├── .binder
    ├── apt.txt
    └── requirements.txt
├── .flake8
├── .github
    ├── labeler.yml
    ├── release-drafter.yml
    ├── semantic.yml
    └── workflows
    │   ├── check-docs.yml
    │   ├── pr-labeler.yml
    │   ├── publish-doc-to-remote.yml
    │   ├── release-drafter.yml
    │   ├── run-test.yml
    │   └── upload-to-pypi.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs
    ├── code-reference
    │   ├── base-reference.md
    │   ├── costs
    │   │   ├── costautoregressive-reference.md
    │   │   ├── costclinear-reference.md
    │   │   ├── costcosine-reference.md
    │   │   ├── costl1-reference.md
    │   │   ├── costl2-reference.md
    │   │   ├── costlinear-reference.md
    │   │   ├── costml-reference.md
    │   │   ├── costnormal-reference.md
    │   │   ├── costrank-reference.md
    │   │   └── costrbf-reference.md
    │   ├── datasets
    │   │   ├── pw_constant-reference.md
    │   │   ├── pw_linear-reference.md
    │   │   ├── pw_normal-reference.md
    │   │   └── pw_wavy-reference.md
    │   ├── detection
    │   │   ├── binseg-reference.md
    │   │   ├── bottomup-reference.md
    │   │   ├── dynp-reference.md
    │   │   ├── kernelcpd-reference.md
    │   │   ├── pelt-reference.md
    │   │   └── window-reference.md
    │   ├── index.md
    │   ├── metrics
    │   │   ├── hausdorff.md
    │   │   ├── precisionrecall.md
    │   │   └── randindex.md
    │   └── show
    │   │   └── display.md
    ├── contributing.md
    ├── custom-cost-function.md
    ├── data
    │   └── text-segmentation-data.txt
    ├── examples
    │   ├── basic-usage.ipynb
    │   ├── introduction.md
    │   ├── kernel-cpd-performance-comparison.ipynb
    │   ├── merging-cost-functions.ipynb
    │   ├── music-segmentation.ipynb
    │   └── text-segmentation.ipynb
    ├── fit-and-predict.md
    ├── getting-started
    │   └── basic-usage.ipynb
    ├── images
    │   ├── Jupyter_logo.svg
    │   ├── correlation_shift.png
    │   ├── example-display.png
    │   ├── example_readme.png
    │   ├── hausdorff.png
    │   ├── precision_recall.png
    │   ├── randindex.png
    │   ├── schema_binseg.png
    │   ├── schema_fenetre.png
    │   ├── schema_tree.png
    │   └── sum_of_sines.png
    ├── index.md
    ├── install.md
    ├── javascripts
    │   ├── configs.js
    │   └── mathjax.js
    ├── license.md
    ├── release-notes.md
    ├── user-guide
    │   ├── costs
    │   │   ├── costautoregressive.md
    │   │   ├── costclinear.md
    │   │   ├── costcosine.md
    │   │   ├── costcustom.md
    │   │   ├── costl1.md
    │   │   ├── costl2.md
    │   │   ├── costlinear.md
    │   │   ├── costml.md
    │   │   ├── costnormal.md
    │   │   ├── costrank.md
    │   │   └── costrbf.md
    │   ├── datasets
    │   │   ├── pw_constant.md
    │   │   ├── pw_linear.md
    │   │   ├── pw_normal.md
    │   │   └── pw_wavy.md
    │   ├── detection
    │   │   ├── binseg.md
    │   │   ├── bottomup.md
    │   │   ├── dynp.md
    │   │   ├── kernelcpd.md
    │   │   ├── pelt.md
    │   │   └── window.md
    │   ├── evaluation.md
    │   ├── index.md
    │   ├── metrics
    │   │   ├── hausdorff.md
    │   │   ├── precisionrecall.md
    │   │   └── randindex.md
    │   └── show
    │   │   └── display.md
    └── what-is-cpd.md
├── images
    ├── example_readme.png
    ├── pw_constant.png
    ├── pw_constantdp.png
    ├── pw_linear.png
    └── pw_linearpelt.png
├── mkdocs.yml
├── mkdocs_macros.py
├── pyproject.toml
├── setup.cfg
├── setup.py
├── src
    └── ruptures
    │   ├── __init__.py
    │   ├── base.py
    │   ├── costs
    │       ├── __init__.py
    │       ├── costautoregressive.py
    │       ├── costclinear.py
    │       ├── costcosine.py
    │       ├── costl1.py
    │       ├── costl2.py
    │       ├── costlinear.py
    │       ├── costml.py
    │       ├── costnormal.py
    │       ├── costrank.py
    │       ├── costrbf.py
    │       └── factory.py
    │   ├── datasets
    │       ├── __init__.py
    │       ├── pw_constant.py
    │       ├── pw_linear.py
    │       ├── pw_normal.py
    │       └── pw_wavy.py
    │   ├── detection
    │       ├── __init__.py
    │       ├── _detection
    │       │   ├── __init__.py
    │       │   ├── ekcpd.pxd
    │       │   ├── ekcpd.pyx
    │       │   ├── ekcpd_computation.c
    │       │   ├── ekcpd_computation.h
    │       │   ├── ekcpd_pelt_computation.c
    │       │   ├── ekcpd_pelt_computation.h
    │       │   ├── kernels.c
    │       │   └── kernels.h
    │       ├── binseg.py
    │       ├── bottomup.py
    │       ├── dynp.py
    │       ├── kernelcpd.py
    │       ├── pelt.py
    │       └── window.py
    │   ├── exceptions.py
    │   ├── metrics
    │       ├── __init__.py
    │       ├── hamming.py
    │       ├── hausdorff.py
    │       ├── precisionrecall.py
    │       ├── randindex.py
    │       ├── sanity_check.py
    │       └── timeerror.py
    │   ├── show
    │       ├── __init__.py
    │       └── display.py
    │   └── utils
    │       ├── __init__.py
    │       ├── _utils
    │           ├── __init__.py
    │           ├── convert_path_matrix.pxd
    │           ├── convert_path_matrix.pyx
    │           ├── convert_path_matrix_c.c
    │           └── convert_path_matrix_c.h
    │       ├── bnode.py
    │       ├── drawbkps.py
    │       └── utils.py
└── tests
    ├── __init__.py
    ├── test_bnode.py
    ├── test_costs.py
    ├── test_datasets.py
    ├── test_detection.py
    ├── test_display.py
    └── test_metrics.py


/.binder/apt.txt:
--------------------------------------------------------------------------------
1 | libsndfile1-dev
2 | 


--------------------------------------------------------------------------------
/.binder/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | ruptures
3 | librosa
4 | nltk
5 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 127
3 | max-complexity = 10
4 | select = E9,F63,F7,F82,F401
5 | per-file-ignores = __init__.py:F401


--------------------------------------------------------------------------------
/.github/labeler.yml:
--------------------------------------------------------------------------------
 1 | # Config file for the labeler Github Action
 2 | # https://hub.docker.com/r/jimschubert/labeler-action
 3 | # labeler "full" schema
 4 | 
 5 | # enable labeler on issues, prs, or both.
 6 | enable:
 7 |   issues: true
 8 |   prs: true
 9 | 
10 | # comments object allows you to specify a different message for issues and prs
11 | 
12 | # comments:
13 | #   issues: |
14 | #     Thanks for opening this issue!
15 | #     I have applied any labels matching special text in your title and description.
16 | 
17 | #     Please review the labels and make any necessary changes.
18 | #   prs: |
19 | #     Thanks for the contribution!
20 | #     I have applied any labels matching special text in your title and description.
21 | 
22 | #     Please review the labels and make any necessary changes.
23 | 
24 | # Labels is an object where:
25 | # - keys are labels
26 | # - values are objects of { include: [ pattern ], exclude: [ pattern ] }
27 | #    - pattern must be a valid regex, and is applied globally to
28 | #      title + description of issues and/or prs (see enabled config above)
29 | #    - 'include' patterns will associate a label if any of these patterns match
30 | #    - 'exclude' patterns will ignore this label if any of these patterns match
31 | labels:
32 |   'Type: Fix':
33 |     include:
34 |       - '^(bug|fix)(\(.*\))?:(.*)'
35 |     exclude: []
36 |   'Type: Feature':
37 |     include:
38 |       - '^feat(\(.*\))?:(.*)'
39 |     exclude: []
40 |   'Type: Build':
41 |     include:
42 |       - '^build(\(.*\))?:(.*)'
43 |     exclude: []
44 |   'Type: Documentation':
45 |     include:
46 |       - '^docs(\(.*\))?:(.*)'
47 |     exclude: []
48 |   'Type: Refactoring':
49 |     include:
50 |       - '^(refactor|style)(\(.*\))?:(.*)'
51 |     exclude: []
52 |   'Type: Testing':
53 |     include:
54 |       - '^test(\(.*\))?:(.*)'
55 |     exclude: []
56 |   'Type: Maintenance':
57 |     include:
58 |       - '^(chore|mnt)(\(.*\))?:(.*)'
59 |     exclude: []
60 |   'Type: CI':
61 |     include:
62 |       - '^ci(\(.*\))?:(.*)'
63 |     exclude: []
64 |   'Type: Performance':
65 |     include:
66 |       - '^perf(\(.*\))?:(.*)'
67 |     exclude: []
68 |   'Type: Revert':
69 |     include:
70 |       - '^revert(\(.*\))?:(.*)'
71 |     exclude: []
72 |   'skip-changelog':
73 |     include:
74 |       - '^(chore: pre-commit autoupdate)'
75 |     exclude: []
76 | 


--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | name-template: 'v$RESOLVED_VERSION 🌈'
 2 | tag-template: 'v$RESOLVED_VERSION'
 3 | categories:
 4 |   - title: '🚀 Features'
 5 |     labels:
 6 |       - 'Type: Feature'
 7 |       - 'Type: Performance'
 8 |   - title: '🐛 Bug Fixes'
 9 |     labels:
10 |       - 'Type: Fix'
11 |   - title: '📚 Documentation'
12 |     label: 'Type: Documentation'
13 |   - title: '🧰 Maintenance'
14 |     label:
15 |       - 'Type: Maintenance'
16 |       - 'Type: Build'
17 |       - 'Type: Refactoring'
18 |       - 'Type: CI'
19 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
20 | change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks.
21 | version-resolver:
22 |   major:
23 |     labels:
24 |       - 'major'
25 |   minor:
26 |     labels:
27 |       - 'minor'
28 |   patch:
29 |     labels:
30 |       - 'patch'
31 |   default: patch
32 | exclude-labels:
33 |   - 'skip-changelog'
34 | template: |
35 |   ## Changes
36 | 
37 |   $CHANGES
38 | 


--------------------------------------------------------------------------------
/.github/semantic.yml:
--------------------------------------------------------------------------------
 1 | # Always validate the PR title, and ignore the commits
 2 | titleOnly: true
 3 | 
 4 | # By default types specified in commitizen/conventional-commit-types is used.
 5 | # See: https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json
 6 | # You can override the valid types
 7 | types:
 8 |   - feat
 9 |   - fix
10 |   - docs
11 |   - style
12 |   - refactor
13 |   - perf
14 |   - test
15 |   - build
16 |   - ci
17 |   - chore
18 |   - revert
19 | 


--------------------------------------------------------------------------------
/.github/workflows/check-docs.yml:
--------------------------------------------------------------------------------
 1 | name: "docs"
 2 | on:
 3 |   pull_request:
 4 |     branches: [ master ]
 5 |     paths-ignore:
 6 |       - '.pre-commit-config.yaml'
 7 | 
 8 | jobs:
 9 |   docs:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |     - uses: actions/checkout@v2
13 |     - name: Set up Python 3.8
14 |       uses: actions/setup-python@v2
15 |       with:
16 |         python-version: '3.8'
17 |     - name: Install sndfile library # for librose, see https://github.com/deepcharles/ruptures/pull/121
18 |       run: |
19 |         sudo apt-get install libsndfile1-dev
20 |     - name: Install ruptures and dependencies
21 |       run: |
22 |         python -m pip install --upgrade pip
23 |         python -m pip install .[docs]
24 |     - name: Run notebooks
25 |       run: |
26 |         find ./docs -name '*.ipynb' | xargs -P 3 -I % jupyter nbconvert --inplace --to notebook --ExecutePreprocessor.kernel_name=python --execute %
27 |     - name: Build documentation
28 |       run: |
29 |         mkdocs build
30 |     - uses: actions/upload-artifact@v4
31 |       with:
32 |         name: DocumentationHTML
33 |         path: site/
34 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-labeler.yml:
--------------------------------------------------------------------------------
 1 | name: Label PRs and issues
 2 | on:
 3 |   issues:
 4 |     types: [opened, edited, milestoned]
 5 |   pull_request_target:
 6 |     types: [opened]
 7 | 
 8 | jobs:
 9 | 
10 |   labeler:
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |     - name: Check Labels
15 |       id: labeler
16 |       uses: jimschubert/labeler-action@v2
17 |       with:
18 |         GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}


--------------------------------------------------------------------------------
/.github/workflows/publish-doc-to-remote.yml:
--------------------------------------------------------------------------------
 1 | # Publish docs to website on new release (or manual trigger)
 2 | name: "Publish docs online"
 3 | 
 4 | on:
 5 |   release:
 6 |     types: [created]
 7 |   workflow_dispatch:
 8 | 
 9 | jobs:
10 |   docs:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |     - uses: actions/checkout@v2
14 |     - name: Set up Python 3.x
15 |       uses: actions/setup-python@v2
16 |       with:
17 |         python-version: '3.8'
18 |     - name: Install sndfile library # for librose, see https://github.com/deepcharles/ruptures/pull/121
19 |       run: |
20 |         sudo apt-get install libsndfile1-dev
21 |     - name: Install ruptures and dependecies
22 |       run: |
23 |         python -m pip install --upgrade pip
24 |         python -m pip install .[docs]
25 |     - name: Run notebooks
26 |       run: |
27 |         find ./docs -name '*.ipynb' | xargs -P 3 -I % jupyter nbconvert --inplace --to notebook --ExecutePreprocessor.kernel_name=python --execute %
28 |     - name: Build documentation
29 |       run: |
30 |         mkdocs build
31 |     - name: Publish documentation to remote repo
32 |       uses: selenehyun/gh-push@master
33 |       env:
34 |         GITHUB_TOKEN: ${{ secrets.DOC_PUSHER_ACCESS_TOKEN }}
35 |         COMMIT_FILES: site/*
36 |         REPO_FULLNAME: centre-borelli/ruptures-docs
37 |         BRANCH: master
38 | 


--------------------------------------------------------------------------------
/.github/workflows/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | name: Release drafter
 2 | 
 3 | on:
 4 |   push:
 5 |     # branches to consider in the event; optional, defaults to all
 6 |     branches:
 7 |       - master
 8 | 
 9 | jobs:
10 |   update_release_draft:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       # Drafts your next Release notes as Pull Requests are merged into "master"
14 |       - uses: release-drafter/release-drafter@v5
15 |         # with:
16 |           # (Optional) specify config name to use, relative to .github/. Default: release-drafter.yml
17 |           # config-name: my-config.yml
18 |         env:
19 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
20 | 


--------------------------------------------------------------------------------
/.github/workflows/run-test.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: build
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches: [ master ]
11 |     paths-ignore:
12 |       - '.pre-commit-config.yaml'
13 |       - 'docs/**'
14 |       - 'images/**'
15 |       - '**.md'
16 |       - 'mkdocs.yml'
17 | 
18 | jobs:
19 |   tests:
20 |     strategy:
21 |       matrix:
22 |         python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
23 |         os: [ubuntu-latest, windows-latest, macos-latest]
24 |     runs-on: ${{ matrix.os }}
25 |     steps:
26 |     - uses: actions/checkout@v4
27 |     - name: Set up Python ${{ matrix.python-version }}
28 |       uses: actions/setup-python@v2
29 |       with:
30 |         python-version: ${{ matrix.python-version }}
31 |     - name: Install ruptures
32 |       run: |
33 |         python -m pip install --upgrade pip
34 |         python -m pip install .[test]
35 |     - name: Test with pytest
36 |       run: |
37 |         python -m pytest --no-cov
38 | 
39 |   coverage:
40 |     runs-on: ubuntu-latest
41 |     steps:
42 |     - uses: actions/checkout@v2
43 |     - name: Set up Python 3.10
44 |       uses: actions/setup-python@v2
45 |       with:
46 |         python-version: '3.10'
47 |     - name: Install ruptures
48 |       run: |
49 |         python -m pip install --upgrade pip
50 |         python -m pip install .[test,display]
51 |     - name: Test with pytest
52 |       run: |
53 |         python -m pytest --cov --cov-report=xml --cov-report=term:skip-covered
54 |     - name: Upload coverage to Codecov
55 |       uses: codecov/codecov-action@v5
56 |       # env:
57 |       #   CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
58 |       with:
59 |         files: ./coverage.xml
60 |         flags: unittests
61 |         fail_ci_if_error: true
62 |     - uses: actions/upload-artifact@v4
63 |       with:
64 |         path: coverage.xml
65 | 


--------------------------------------------------------------------------------
/.github/workflows/upload-to-pypi.yml:
--------------------------------------------------------------------------------
  1 | name: Upload package to Pypi
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |       overrideVersion:
  7 |         description: Manually force a version
  8 | 
  9 | env:
 10 |   CIBW_BUILD_VERBOSITY: 3
 11 |   SETUPTOOLS_SCM_PRETEND_VERSION: ${{ github.event.inputs.overrideVersion }}
 12 |   # Run the package tests using `pytest`
 13 |   CIBW_TEST_REQUIRES: pytest
 14 |   CIBW_TEST_COMMAND: pytest {project}/tests
 15 | 
 16 | jobs:
 17 |   make_sdist:
 18 |     name: Make SDist
 19 |     runs-on: ubuntu-latest
 20 |     steps:
 21 |       - uses: actions/checkout@v2
 22 |       - name: Setup Python
 23 |         uses: actions/setup-python@v2
 24 |         with:
 25 |           python-version: "3.10"
 26 |       - name: Install deps
 27 |         run: python -m pip install build twine
 28 |       - name: Build SDist
 29 |         run: python -m build --sdist
 30 |       - uses: actions/upload-artifact@v2
 31 |         with:
 32 |           path: dist/*.tar.gz
 33 |       - name: Check metadata
 34 |         run: twine check dist/*
 35 | 
 36 |   build_wheels:
 37 |     name: Build wheels on ${{ matrix.os }}
 38 |     runs-on: ${{ matrix.os }}
 39 |     strategy:
 40 |       matrix:
 41 |         os: [ubuntu-latest, windows-latest, macos-latest]
 42 |     steps:
 43 |       - uses: actions/checkout@v2
 44 | 
 45 |       # Used to host cibuildwheel
 46 |       - uses: actions/setup-python@v2
 47 | 
 48 |       - name: Install cibuildwheel
 49 |         run: python -m pip install cibuildwheel
 50 | 
 51 |       - name: Build wheels
 52 |         run: python -m cibuildwheel --output-dir wheelhouse
 53 |         env:
 54 |           # Disable explicitly building PyPI wheels for specific configurations
 55 |           CIBW_SKIP: pp* cp{38,39,310,311,312}-manylinux_i686 *-musllinux_* cp{38,39,310,311,312}-win32
 56 |           CIBW_PRERELEASE_PYTHONS: False
 57 |           # Manually force a version (and avoid building local wheels)
 58 |           CIBW_ENVIRONMENT: "SETUPTOOLS_SCM_PRETEND_VERSION=${{ github.event.inputs.overrideVersion }}"
 59 |           CIBW_ARCHS_MACOS: x86_64 arm64
 60 | 
 61 |       - uses: actions/upload-artifact@v2
 62 |         with:
 63 |           path: wheelhouse/*.whl
 64 | 
 65 |   build_aarch64_wheels:
 66 |     name: Build wheels manylinux_aarch64
 67 |     runs-on: ubuntu-latest
 68 |     strategy:
 69 |       matrix:
 70 |         python: [36, 37, 38, 39, 310, 311, 312]
 71 |         include:
 72 |           - os: ubuntu-latest
 73 |             arch: aarch64
 74 |             platform_id: manylinux_aarch64
 75 |     steps:
 76 |       - uses: actions/checkout@v2
 77 | 
 78 |       - name: Set up QEMU
 79 |         uses: docker/setup-qemu-action@v1
 80 | 
 81 |       - name: Install cibuildwheel
 82 |         run: python -m pip install cibuildwheel
 83 | 
 84 |       - name: Build wheels
 85 |         run: python -m cibuildwheel --output-dir wheelhouse
 86 |         env:
 87 |           CIBW_ARCHS_LINUX: ${{matrix.arch}}
 88 |           CIBW_BUILD: cp${{ matrix.python }}-${{ matrix.platform_id }}
 89 |           # Manually force a version (and avoid building local wheels)
 90 |           CIBW_ENVIRONMENT: "SETUPTOOLS_SCM_PRETEND_VERSION=${{ github.event.inputs.overrideVersion }}"
 91 |       - uses: actions/upload-artifact@v2
 92 |         with:
 93 |           path: wheelhouse/*.whl
 94 | 
 95 |   upload_all:
 96 |     needs: [build_wheels, build_aarch64_wheels, make_sdist]
 97 |     runs-on: ubuntu-latest
 98 |     steps:
 99 |       - uses: actions/download-artifact@v2
100 |         with:
101 |           name: artifact
102 |           path: dist
103 |       - uses: pypa/gh-action-pypi-publish@release/v1
104 |         with:
105 |           user: ${{ secrets.PYPI_USERNAME }}
106 |           password: ${{ secrets.PYPI_PASSWORD }}
107 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | 
55 | # Sphinx documentation
56 | docs/_build/
57 | 
58 | # PyBuilder
59 | target/
60 | 
61 | # Custom
62 | .coverage_conflict-20151112-152034
63 | .ipynb_checkpoints/
64 | Untitled.ipynb
65 | mem
66 | .vscode/*
67 | Demonstration.ipynb
68 | docs.zip
69 | make.bat
70 | ruptures.sublime-project
71 | ruptures.sublime-workspace
72 | test.ipynb
73 | ruptures/detection/.circleci/run-build-locally.sh
74 | .circleci/run-build-locally.sh
75 | docs/build/*
76 | 
77 | # OS generated files
78 | .DS_Store
79 | ./**/.DS_Store
80 | .DS_Store?
81 | .Spotlight-V100
82 | .Trashes
83 | ehthumbs.db
84 | Thumbs.db
85 | site/
86 | src/ruptures/version.py
87 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/psf/black
 3 |   rev: 25.1.0
 4 |   hooks:
 5 |     - id: black
 6 |       language_version: python3
 7 | - repo: https://github.com/pre-commit/pre-commit-hooks
 8 |   rev: v5.0.0
 9 |   hooks:
10 |     - id: trailing-whitespace  # This hook trims trailing whitespace.
11 |     - id: check-docstring-first  # Checks a common error of defining a docstring after code.
12 |     - id: check-merge-conflict  # Check for files that contain merge conflict strings.
13 |     - id: check-yaml  # This hook checks yaml files for parseable syntax.
14 |     - id: detect-private-key  # Detects the presence of private keys.
15 |     - id: check-symlinks
16 |     - id: check-toml
17 | - repo: https://github.com/pre-commit/pygrep-hooks
18 |   rev: v1.10.0
19 |   hooks:
20 |     - id: python-no-eval  # A quick check for the eval() built-in function.
21 | - repo: https://github.com/PyCQA/docformatter
22 |   rev: eb1df347edd128b30cd3368dddc3aa65edcfac38 # Don't autoupdate until https://github.com/PyCQA/docformatter/issues/293 is fixed
23 |   hooks:
24 |     - id: docformatter
25 |       exclude: mkdocs_macros.py
26 |       args: [--in-place]
27 | - repo: https://github.com/PyCQA/flake8
28 |   rev: 7.2.0
29 |   hooks:
30 |     - id: flake8
31 |       # additional_dependencies: [flake8-docstrings, flake8-bugbear, flake8-spellcheck, flake8-import-order]
32 | - repo: https://github.com/kynan/nbstripout
33 |   rev: 0.8.1
34 |   hooks:
35 |     - id: nbstripout
36 | -   repo: https://github.com/asottile/blacken-docs
37 |     rev: 1.19.1
38 |     hooks:
39 |     - id: blacken-docs
40 | - repo: https://github.com/nbQA-dev/nbQA
41 |   rev: 1.9.1
42 |   hooks:
43 |     - id: nbqa-black
44 |       args: [--nbqa-mutate]
45 | 
46 | ci:
47 |   autoupdate_commit_msg: 'chore: pre-commit autoupdate'
48 |   autoupdate_schedule: 'quarterly'
49 | 
50 | 
51 | # https://github.com/PyCQA/docformatter/pull/287


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | The latest release notes are available directly in Github: [ruptures/releases](https://github.com/deepcharles/ruptures/releases).
 4 | 
 5 | Earlier releases are documented below ⬇️.
 6 | 
 7 | ## [1.1.2] - 2020-12-01
 8 | 
 9 | ### Added
10 | 
11 | - 12cbc9e feat: add piecewise linear cpd (#91)
12 | - a12b215 test: add code coverage badge (#97)
13 | - 2e9b17f docs: add binder for notebooks (#94)
14 | - da7544f docs(costcosine): add entry for CostCosine in docs (#93)
15 | - 8c9aa35 build(setup.py/cfg):  add build_ext to setup.py (#88)
16 | - 10ef8e8 build(python39): add py39 to supported versions (#87)
17 | 
18 | ### Changed
19 | 
20 | - 069bd41 fix(kernelcpd): bug fix in pelt (#95)
21 | - b4abc34 fix: memory leak in KernelCPD (#89)
22 | 
23 | ## [1.1.1] - 2020-11-26
24 | 
25 | No change to the code compared to the previous version.
26 | The package was only partly published to Pypi because of the failure of one provider in the CI.
27 | Since Pypi's policy prevents re-uploading twice the same version, we have to increment the version number.
28 | 
29 | ## [1.1.0] - 2020-11-23
30 | 
31 | ### Added
32 | 
33 | - modify publishing process to Pypi PR#83
34 | - add cosine kernel (cost function and in KernelCPD)PR#74
35 | - add faster kernel change point detection (`KernelCPD`, C implementation) PR#74
36 | - add manual trigger to publish to Pypi PR#72
37 | 
38 | ### Changed
39 | 
40 | ## [1.0.6] - 2020-10-23
41 | ### Added
42 | 
43 | - Correct minor error in Dynp (about min_size) PR#74
44 | - Fix legacy formatting errors PR#69
45 | - New documentation (from Sphinx to Mkdocs) PR#64
46 | - Separate requirements.txt and requirements-dev.txt PR#64
47 | - A changelog file ([link](https://github.com/deepcharles/ruptures/blob/master/CHANGELOG.md))
48 | - New Github actions for automatic generation of documentation
49 | - Pre-commit code formatting using [black](https://github.com/psf/black)
50 | 
51 | ### Changed
52 | 
53 | - Correction of display function test #64
54 | - Add badges in the README (Github repo) PR#62: pypi version, python version, code style, contributor list
55 | - Typo in documentation ([PR#60](https://github.com/deepcharles/ruptures/pull/60)) by @gjaeger
56 | - Documentation theme
57 | - Documentation site
58 | 
59 | ## [1.0.5] - 2020-07-22
60 | ### Changed
61 | - Link to documentation in PyPi description
62 | 
63 | [Unreleased]: https://github.com/deepcharles/ruptures/compare/v1.1.2...HEAD
64 | [1.1.2]: https://github.com/deepcharles/ruptures/compare/v1.1.1...v1.1.2
65 | [1.1.1]: https://github.com/deepcharles/ruptures/compare/v1.1.0...v1.1.1
66 | [1.1.0]: https://github.com/deepcharles/ruptures/compare/v1.0.6...v1.1.0
67 | [1.0.6]: https://github.com/deepcharles/ruptures/compare/v1.0.5...v1.0.6
68 | [1.0.5]: https://github.com/deepcharles/ruptures/compare/v1.0.4...v1.0.5
69 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | 
 4 | ## Before contributing
 5 | 
 6 | In all following steps, it is highly recommended to use a virtual environment.
 7 | Build and installation are performed using `pip` so be sure to have the latest version available.
 8 | 
 9 | ```
10 | python -m pip install --upgrade pip
11 | ```
12 | 
13 | ### Install the development version
14 | 
15 | It is important that you contribute to the latest version of the code.
16 | To that end, start by cloning the Github repository.
17 | 
18 | ```
19 | git clone https://github.com/deepcharles/ruptures
20 | cd ruptures
21 | ```
22 | 
23 | Then install the downloaded package with `pip`.
24 | 
25 | ```
26 | python -m pip install --editable .[dev]
27 | ```
28 | 
29 | Note that `python -m` can be omitted most of the times, but within virtualenvs, it can prevent certain errors.
30 | Also, in certain terminals (such as `zsh`), the square brackets must be escaped, e.g. replace `.[dev]` by `.\[dev\]`.
31 | 
32 | In addition to `numpy`, `scipy` and `ruptures`, this command will install all packages needed to develop `ruptures`.
33 | The exact list of librairies can be found in the [`setup.cfg` file](https://github.com/deepcharles/ruptures/blob/master/setup.cfg) (section `[options.extras_require]`).
34 | 
35 | ### Pre-commit hooks
36 | 
37 | We use `pre-commit` to run Git hooks before submitting the code to review.
38 | These hook scripts perform simple tasks before each commit (code formatting mostly).
39 | To activate the hooks, simply run the following command in your terminal.
40 | 
41 | ```
42 | pre-commit install
43 | ```
44 | 
45 | If you try to commit a non-compliant (i.e. badly formatted) file, `pre-commit` will modify this file and make the commit fail.
46 | However you need to stage the new changes **yourself** as `pre-commit` will not do that for you (this is by design; see [here](https://github.com/pre-commit/pre-commit/issues/806) or [here](https://github.com/pre-commit/pre-commit/issues/747)).
47 | Fortunately, `pre-commit` outputs useful messages.
48 | 
49 | The list of hooks (and their options) can be found in [`.pre-commit-config.yaml`](https://github.com/deepcharles/ruptures/blob/master/.pre-commit-config.yaml).
50 | For more information, see [their website](https://pre-commit.com/).
51 | If you want to manually run all pre-commit hooks on a repository, run `pre-commit run --all-files`. To run individual hooks use `pre-commit run <hook_id>`.
52 | 
53 | ## Contribute to the code
54 | 
55 | ### Write tests
56 | 
57 | The following command executes the test suite.
58 | 
59 | ```
60 | python -m pytest
61 | ```
62 | 
63 | ### Write docstrings
64 | 
65 | ## Contribute to the documentation
66 | 
67 | Use [MkDocs](https://www.mkdocs.org/).
68 | 
69 | Use `mkdocs serve` to preview your changes.
70 | Once you are satisfied, no need to build the documentation, the CI will take care of that and publish it online at the next release of the package (if the pull request has been merged).
71 | 
72 | ### Add examples to the gallery
73 | 
74 | An easy way to showcase your work with `ruptures` is to write a narrative example.
75 | To that, simply put a [Jupyter notebook](https://jupyter.org/) in the `docs/examples` folder.
76 | To make it appear in the documentation, add a reference in `mkdocs.yml` (`nav > Gallery of examples`): if the notebook's name is `my_notebook.ipynb`, it will be available as `examples/my_notebook.ipynb`.
77 | It will be rendered automatically when [MkDocs](https://www.mkdocs.org/) builds the documentation.
78 | 
79 | !!! note
80 |     To automatically add a [Binder](https://mybinder.org/v2/gh/deepcharles/ruptures/master) link and a download link to your notebook, simply add the following line of code.
81 |     ```markdown
82 |     {{ '<!-- {{ add_binder_block(page) }} -->' }}
83 |     ```
84 |     Ideally, place this code below the title of the notebook (same cell) and it will be rendered as in [here](examples/kernel-cpd-performance-comparison.ipynb).
85 | 
86 | We welcome any interesting work about a new cost function, algorithm, data, calibration method, etc.
87 | Any other package can be used in combination with `ruptures`.
88 | However, each example should be clearly explained with text and figures.
89 | The amount of raw code should also remain limited for readability.
90 | 
91 | 
92 | ## Miscellaneous
93 | 
94 | ### Naming convention
95 | 
96 | We try to follow (roughly) a consistent naming convention of modules, classes, functions, etc.
97 | When in doubt, you can refer to the [PEP 8 style guide for Python code](https://www.python.org/dev/peps/pep-0008/#naming-conventions).
98 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2017-2021, ENS Paris-Saclay, CNRS
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | prune *
2 | graft src
3 | graft tests
4 | 
5 | include LICENSE README.md pyproject.toml setup.py setup.cfg
6 | exclude CHANGELOG.md CONTRIBUTING.md mkdocs.yml mkdocs_macros.py
7 | global-exclude __pycache__ *.py[cod] .*
8 | 


--------------------------------------------------------------------------------
/docs/code-reference/base-reference.md:
--------------------------------------------------------------------------------
1 | # Base classes (ruptures.base)
2 | 
3 | ::: ruptures.base


--------------------------------------------------------------------------------
/docs/code-reference/costs/costautoregressive-reference.md:
--------------------------------------------------------------------------------
1 | # Autoregressive model change (CostAutoregressive)
2 | 
3 | ::: ruptures.costs.costautoregressive
4 |     rendering:
5 |         show_root_heading: true
6 | 


--------------------------------------------------------------------------------
/docs/code-reference/costs/costclinear-reference.md:
--------------------------------------------------------------------------------
1 | # Continuous linear change (CostCLinear)
2 | 
3 | ::: ruptures.costs.costclinear.CostCLinear
4 |     rendering:
5 |         show_root_heading: true


--------------------------------------------------------------------------------
/docs/code-reference/costs/costcosine-reference.md:
--------------------------------------------------------------------------------
1 | # Kernelized mean change (CostCosine)
2 | 
3 | ::: ruptures.costs.costcosine.CostCosine
4 |     rendering:
5 |         show_root_heading: true
6 | 


--------------------------------------------------------------------------------
/docs/code-reference/costs/costl1-reference.md:
--------------------------------------------------------------------------------
1 | # CostL1 (least absolute deviation)
2 | 
3 | ::: ruptures.costs.costl1.CostL1
4 |     rendering:
5 |         show_root_heading: true
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/code-reference/costs/costl2-reference.md:
--------------------------------------------------------------------------------
1 | # CostL2 (least squared deviation)
2 | 
3 | ::: ruptures.costs.costl2.CostL2
4 |     rendering:
5 |         show_root_heading: true
6 | 


--------------------------------------------------------------------------------
/docs/code-reference/costs/costlinear-reference.md:
--------------------------------------------------------------------------------
1 | # Linear model change (CostLinear)
2 | 
3 | ::: ruptures.costs.costlinear.CostLinear
4 |     rendering:
5 |         show_root_heading: true


--------------------------------------------------------------------------------
/docs/code-reference/costs/costml-reference.md:
--------------------------------------------------------------------------------
1 | # Mahalanobis-type change (CostMl)
2 | 
3 | ::: ruptures.costs.costml.CostMl
4 |     rendering:
5 |         show_root_heading: true


--------------------------------------------------------------------------------
/docs/code-reference/costs/costnormal-reference.md:
--------------------------------------------------------------------------------
1 | # Gaussian process change (CostNormal)
2 | 
3 | ::: ruptures.costs.costnormal.CostNormal
4 |     rendering:
5 |         show_root_heading: true


--------------------------------------------------------------------------------
/docs/code-reference/costs/costrank-reference.md:
--------------------------------------------------------------------------------
1 | # Rank-based change (CostRank)
2 | 
3 | ::: ruptures.costs.costrank.CostRank
4 |     rendering:
5 |         show_root_heading: true
6 | 


--------------------------------------------------------------------------------
/docs/code-reference/costs/costrbf-reference.md:
--------------------------------------------------------------------------------
1 | # Kernelized mean change (CostRbf)
2 | 
3 | ::: ruptures.costs.costrbf.CostRbf
4 |     rendering:
5 |         show_root_heading: true
6 | 


--------------------------------------------------------------------------------
/docs/code-reference/datasets/pw_constant-reference.md:
--------------------------------------------------------------------------------
1 | # Piecewise constant (pw_constant)
2 | 
3 | ::: ruptures.datasets.pw_constant.pw_constant
4 |     rendering:
5 |         show_root_heading: true


--------------------------------------------------------------------------------
/docs/code-reference/datasets/pw_linear-reference.md:
--------------------------------------------------------------------------------
1 | # Piecewise linear (pw_linear)
2 | 
3 | ::: ruptures.datasets.pw_linear.pw_linear
4 |     rendering:
5 |         show_root_heading: true


--------------------------------------------------------------------------------
/docs/code-reference/datasets/pw_normal-reference.md:
--------------------------------------------------------------------------------
1 | # Piecewise Gaussian (pw_normal)
2 | 
3 | ::: ruptures.datasets.pw_normal.pw_normal
4 |     rendering:
5 |         show_root_heading: true


--------------------------------------------------------------------------------
/docs/code-reference/datasets/pw_wavy-reference.md:
--------------------------------------------------------------------------------
1 | # Piecewise wavy (pw_wavy)
2 | 
3 | ::: ruptures.datasets.pw_wavy.pw_wavy
4 |     rendering:
5 |         show_root_heading: true


--------------------------------------------------------------------------------
/docs/code-reference/detection/binseg-reference.md:
--------------------------------------------------------------------------------
1 | # Binary segmentation
2 | 
3 | ::: ruptures.detection.binseg.Binseg
4 |     rendering:
5 |         show_root_heading: true
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/code-reference/detection/bottomup-reference.md:
--------------------------------------------------------------------------------
1 | # Bottom-up segmentation
2 | 
3 | ::: ruptures.detection.bottomup.BottomUp
4 |     rendering:
5 |         show_root_heading: true
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/code-reference/detection/dynp-reference.md:
--------------------------------------------------------------------------------
1 | # Dynamic programming
2 | 
3 | ::: ruptures.detection.dynp.Dynp
4 |     rendering:
5 |         show_root_heading: true
6 | 


--------------------------------------------------------------------------------
/docs/code-reference/detection/kernelcpd-reference.md:
--------------------------------------------------------------------------------
1 | # Efficient kernel change point detection
2 | 
3 | ::: ruptures.detection.kernelcpd.KernelCPD
4 |     rendering:
5 |         show_root_heading: true


--------------------------------------------------------------------------------
/docs/code-reference/detection/pelt-reference.md:
--------------------------------------------------------------------------------
1 | # Pelt
2 | 
3 | ::: ruptures.detection.pelt.Pelt
4 |     rendering:
5 |         show_root_heading: true
6 | 


--------------------------------------------------------------------------------
/docs/code-reference/detection/window-reference.md:
--------------------------------------------------------------------------------
1 | # Window-based change point detection
2 | 
3 | ::: ruptures.detection.window.Window
4 |     rendering:
5 |         show_root_heading: true
6 | 


--------------------------------------------------------------------------------
/docs/code-reference/index.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | 
 3 | This section describes the API of all functions and classes in the `ruptures` package.
 4 | For a more intuitive description of each method, please refer to the [User guide](../user-guide/index.md).
 5 | 
 6 | Roughly, each module corresponds to a certain type of procedure:
 7 | 
 8 | - `ruptures.base`: base classes;
 9 | - `ruptures.detection`: search methods;
10 | - `ruptures.costs`: costs functions;
11 | - `ruptures.datasets`: data set generating utilities;
12 | - `ruptures.metrics`: evaluation metrics;
13 | - `ruptures.show`: display functions.
14 | 


--------------------------------------------------------------------------------
/docs/code-reference/metrics/hausdorff.md:
--------------------------------------------------------------------------------
1 | # Hausdorff metric (`hausdorff`)
2 | 
3 | ::: ruptures.metrics.hausdorff.hausdorff
4 |     rendering:
5 |         show_root_heading: true


--------------------------------------------------------------------------------
/docs/code-reference/metrics/precisionrecall.md:
--------------------------------------------------------------------------------
1 | # Precision and recall (`precision_recall`)
2 | 
3 | ::: ruptures.metrics.precisionrecall.precision_recall
4 |     rendering:
5 |         show_root_heading: true


--------------------------------------------------------------------------------
/docs/code-reference/metrics/randindex.md:
--------------------------------------------------------------------------------
1 | # Rand index (`randindex`)
2 | 
3 | ::: ruptures.metrics.randindex.randindex
4 |     rendering:
5 |         show_root_heading: true


--------------------------------------------------------------------------------
/docs/code-reference/show/display.md:
--------------------------------------------------------------------------------
1 | # Display (`display`)
2 | 
3 | ::: ruptures.show.display.display
4 |     rendering:
5 |         show_root_heading: true


--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
1 | ../CONTRIBUTING.md


--------------------------------------------------------------------------------
/docs/custom-cost-function.md:
--------------------------------------------------------------------------------
 1 | # Creating a custom cost function
 2 | 
 3 | In order to define custom cost functions, simply create a class that inherits from
 4 | `ruptures.base.BaseCost` and implement the methods `.fit(signal)` and `.error(start, end)`:
 5 | 
 6 | - The method `.fit(signal)` takes a signal as input and sets parameters. It returns `'self'`.
 7 | - The method `.error(start, end)` takes two indexes `'start'` and `'end'`  and returns the cost on the segment start:end.
 8 | 
 9 | !!! example
10 |     See this [custom cost example](user-guide/costs/costcustom.md).
11 | 
12 | 


--------------------------------------------------------------------------------
/docs/examples/basic-usage.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Basic usage\n",
  8 |     "\n",
  9 |     "<!-- {{ add_binder_block(page) }} -->\n",
 10 |     "\n",
 11 |     "Let us start with a simple example to illustrate the use of `ruptures`: generate a 3-dimensional piecewise constant signal with noise and estimate the change points.\n",
 12 |     "\n",
 13 |     "## Setup\n",
 14 |     "First, we make the necessary imports."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import matplotlib.pyplot as plt  # for display purposes\n",
 24 |     "\n",
 25 |     "import ruptures as rpt  # our package"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "## Generate and display the signal\n",
 33 |     "\n",
 34 |     "Let us generate a 3-dimensional piecewise constant signal with Gaussian noise."
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "n_samples, n_dims, sigma = 1000, 3, 2\n",
 44 |     "n_bkps = 4  # number of breakpoints\n",
 45 |     "signal, bkps = rpt.pw_constant(n_samples, n_dims, n_bkps, noise_std=sigma)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "The true change points of this synthetic signal are available in the `bkps` variable."
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "print(bkps)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "Note that the first four element are change point indexes while the last is simply the number of samples.\n",
 69 |     "(This is a technical convention so that functions in `ruptures` always know the length of the signal at hand.)\n",
 70 |     "\n",
 71 |     "It is also possible to plot our \\(\\mathbb{R}^3\\)-valued signal along with the true change points with the `rpt.display` function.\n",
 72 |     "In the following image, the color changes whenever the mean of the signal shifts."
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "fig, ax_array = rpt.display(signal, bkps)"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "## Change point detection\n",
 89 |     "We can now perform change point detection, meaning that we find the indexes where the signal mean changes.\n",
 90 |     "To that end, we minimize the sum of squared errors when approximating the signal by a piecewise constant signal.\n",
 91 |     "Formally, for a signal \\( y_0 , y_1 , \\dots , y_{T-1} \\) (\\( T \\) samples), we solve the following optimization problem, over all possible change positions \\( t_1 < t_2 < \\dots < t_K \\)\n",
 92 |     "where the number \\( K \\) of changes is defined by the user:\n",
 93 |     "\n",
 94 |     "\\[\n",
 95 |     "    \\hat{t}_1, \\hat{t}_2,\\dots,\\hat{t}_K = \\arg\\min_{t_1,\\dots,t_K} V(t_1,t_2,\\dots,t_K)\n",
 96 |     "\\]\n",
 97 |     "\n",
 98 |     "with\n",
 99 |     "\n",
100 |     "\\[\n",
101 |     "    V(t_1,t_2,\\dots,t_K) := \\sum_{k=0}^K\\sum_{t=t_k}^{t_{k+1}-1} \\|y_t-\\bar{y}_{t_k..t_{k+1}}\\|^2\n",
102 |     "\\]\n",
103 |     "\n",
104 |     "\n",
105 |     "where \\( \\bar{y}_{t_k..t_{k+1}} \\) is the empirical mean of the sub-signal \\( y_{t_k}, y_{t_k+1},\\dots,y_{t_{k+1}-1} \\).\n",
106 |     "(By convention \\( t_0=0 \\) and \\( t_{K+1}=T \\).)\n",
107 |     "\n",
108 |     "This optimization is solved with dynamic programming, using the [`Dynp`](../user-guide/detection/dynp.md) class. (More information in the section [What is change point detection?](/what-is-cpd) and the [User guide](/user-guide).)\n"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "# detection\n",
118 |     "algo = rpt.Dynp(model=\"l2\").fit(signal)\n",
119 |     "result = algo.predict(n_bkps=4)\n",
120 |     "\n",
121 |     "print(result)"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "Again the first elements are change point indexes and the last is the number of samples."
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "## Display the results"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "markdown",
140 |    "metadata": {},
141 |    "source": [
142 |     "To visualy compare the true segmentation (`bkps`) and the estimated one (`result`), we can resort to `rpt.display` a second time.\n",
143 |     "In the following image, the alternating colors indicate the true breakpoints and the dashed vertical lines, the estimated breakpoints."
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "# display\n",
153 |     "rpt.display(signal, bkps, result)\n",
154 |     "plt.show()"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "metadata": {},
160 |    "source": [
161 |     "In this simple example, both are quite similar and almost undistinguishable."
162 |    ]
163 |   }
164 |  ],
165 |  "metadata": {
166 |   "kernelspec": {
167 |    "display_name": "Python 3",
168 |    "language": "python",
169 |    "name": "python3"
170 |   },
171 |   "language_info": {
172 |    "codemirror_mode": {
173 |     "name": "ipython",
174 |     "version": 3
175 |    },
176 |    "file_extension": ".py",
177 |    "mimetype": "text/x-python",
178 |    "name": "python",
179 |    "nbconvert_exporter": "python",
180 |    "pygments_lexer": "ipython3",
181 |    "version": "3.9.0"
182 |   }
183 |  },
184 |  "nbformat": 4,
185 |  "nbformat_minor": 4
186 | }
187 | 


--------------------------------------------------------------------------------
/docs/examples/introduction.md:
--------------------------------------------------------------------------------
1 | # Gallery of examples
2 | 
3 | These examples illustrate the main features of the `ruptures` package.
4 | Simple examples are direct applications of the library's functions on simulated data.
5 | Advanced examples deal with more complex tasks, such as calibration and real-world data.


--------------------------------------------------------------------------------
/docs/fit-and-predict.md:
--------------------------------------------------------------------------------
 1 | # Fitting and prediction: estimator basics
 2 | 
 3 | `ruptures` has an object-oriented modelling approach (largely inspired by [scikit-learn](https://scikit-learn.org/stable/getting_started.html)): change point detection algorithms are broken down into two conceptual objects that inherits from base classes: `BaseEstimator` and
 4 | `BaseCost`.
 5 | 
 6 | 
 7 | ## Initializing a new estimator
 8 | 
 9 | Each change point detection algorithm inherits from the base class `ruptures.base.BaseEstimator`.
10 | When a class that inherits from the base estimator is created, the `.__init__()` method initializes
11 | an estimator with the following arguments:
12 | 
13 | * `model`: "l1", "l2", "normal", "rbf", "linear", etc. Cost function to use to compute the approximation error.
14 | * `cost`: a custom cost function to the detection algorithm. Should be a `BaseCost` instance.
15 | * `jump`: reduce the set of possible change point indexes; predicted change points can only be a multiple of `jump`.
16 | * `min_size`: minimum number of samples between two change points.
17 | 
18 | ## Making a prediction
19 | 
20 | The main methods are `.fit()`, `.predict()`, `.fit_predict()`:
21 | 
22 | - `.fit()`: generally takes a signal as input and fit the algorithm to the data.
23 | - `.predict()`: performs the change point detection. This method returns a list of indexes corresponding to the end of each regimes. By design, the last element of this list is the number of samples.
24 | - ``.fit_predict()``: helper method which calls ``.fit()`` and ``.predict()`` successively.


--------------------------------------------------------------------------------
/docs/getting-started/basic-usage.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Basic usage\n",
  8 |     "\n",
  9 |     "<!-- {{ add_binder_block(page) }} -->\n",
 10 |     "\n",
 11 |     "Let us start with a simple example to illustrate the use of `ruptures`: generate a 3-dimensional piecewise constant signal with noise and estimate the change points.\n",
 12 |     "\n",
 13 |     "## Setup\n",
 14 |     "First, we make the necessary imports."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import matplotlib.pyplot as plt  # for display purposes\n",
 24 |     "\n",
 25 |     "import ruptures as rpt  # our package"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "## Generate and display the signal\n",
 33 |     "\n",
 34 |     "Let us generate a 3-dimensional piecewise constant signal with Gaussian noise."
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "n_samples, n_dims, sigma = 1000, 3, 2\n",
 44 |     "n_bkps = 4  # number of breakpoints\n",
 45 |     "signal, bkps = rpt.pw_constant(n_samples, n_dims, n_bkps, noise_std=sigma)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "The true change points of this synthetic signal are available in the `bkps` variable."
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "print(bkps)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "Note that the first four element are change point indexes while the last is simply the number of samples.\n",
 69 |     "(This is a technical convention so that functions in `ruptures` always know the length of the signal at hand.)\n",
 70 |     "\n",
 71 |     "It is also possible to plot our \\(\\mathbb{R}^3\\)-valued signal along with the true change points with the `rpt.display` function.\n",
 72 |     "In the following image, the color changes whenever the mean of the signal shifts."
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "fig, ax_array = rpt.display(signal, bkps)"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "## Change point detection\n",
 89 |     "We can now perform change point detection, meaning that we find the indexes where the signal mean changes.\n",
 90 |     "To that end, we minimize the sum of squared errors when approximating the signal by a piecewise constant signal.\n",
 91 |     "Formally, for a signal \\( y_0 , y_1 , \\dots , y_{T-1} \\) (\\( T \\) samples), we solve the following optimization problem, over all possible change positions \\( t_1 < t_2 < \\dots < t_K \\)\n",
 92 |     "where the number \\( K \\) of changes is defined by the user:\n",
 93 |     "\n",
 94 |     "\\[\n",
 95 |     "    \\hat{t}_1, \\hat{t}_2,\\dots,\\hat{t}_K = \\arg\\min_{t_1,\\dots,t_K} V(t_1,t_2,\\dots,t_K)\n",
 96 |     "\\]\n",
 97 |     "\n",
 98 |     "with\n",
 99 |     "\n",
100 |     "\\[\n",
101 |     "    V(t_1,t_2,\\dots,t_K) := \\sum_{k=0}^K\\sum_{t=t_k}^{t_{k+1}-1} \\|y_t-\\bar{y}_{t_k..t_{k+1}}\\|^2\n",
102 |     "\\]\n",
103 |     "\n",
104 |     "\n",
105 |     "where \\( \\bar{y}_{t_k..t_{k+1}} \\) is the empirical mean of the sub-signal \\( y_{t_k}, y_{t_k+1},\\dots,y_{t_{k+1}-1} \\).\n",
106 |     "(By convention \\( t_0=0 \\) and \\( t_{K+1}=T \\).)\n",
107 |     "\n",
108 |     "This optimization is solved with dynamic programming, using the [`Dynp`](../user-guide/detection/dynp.md) class. (More information in the section [What is change point detection?](/what-is-cpd) and the [User guide](/user-guide).)\n"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "# detection\n",
118 |     "algo = rpt.Dynp(model=\"l2\").fit(signal)\n",
119 |     "result = algo.predict(n_bkps=4)\n",
120 |     "\n",
121 |     "print(result)"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "Again the first elements are change point indexes and the last is the number of samples."
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "## Display the results"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "markdown",
140 |    "metadata": {},
141 |    "source": [
142 |     "To visualy compare the true segmentation (`bkps`) and the estimated one (`result`), we can resort to `rpt.display` a second time.\n",
143 |     "In the following image, the alternating colors indicate the true breakpoints and the dashed vertical lines, the estimated breakpoints."
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "# display\n",
153 |     "rpt.display(signal, bkps, result)\n",
154 |     "plt.show()"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "metadata": {},
160 |    "source": [
161 |     "In this simple example, both are quite similar and almost undistinguishable."
162 |    ]
163 |   }
164 |  ],
165 |  "metadata": {
166 |   "kernelspec": {
167 |    "display_name": "Python 3",
168 |    "language": "python",
169 |    "name": "python3"
170 |   },
171 |   "language_info": {
172 |    "codemirror_mode": {
173 |     "name": "ipython",
174 |     "version": 3
175 |    },
176 |    "file_extension": ".py",
177 |    "mimetype": "text/x-python",
178 |    "name": "python",
179 |    "nbconvert_exporter": "python",
180 |    "pygments_lexer": "ipython3",
181 |    "version": "3.9.0"
182 |   }
183 |  },
184 |  "nbformat": 4,
185 |  "nbformat_minor": 4
186 | }
187 | 


--------------------------------------------------------------------------------
/docs/images/correlation_shift.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/correlation_shift.png


--------------------------------------------------------------------------------
/docs/images/example-display.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/example-display.png


--------------------------------------------------------------------------------
/docs/images/example_readme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/example_readme.png


--------------------------------------------------------------------------------
/docs/images/hausdorff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/hausdorff.png


--------------------------------------------------------------------------------
/docs/images/precision_recall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/precision_recall.png


--------------------------------------------------------------------------------
/docs/images/randindex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/randindex.png


--------------------------------------------------------------------------------
/docs/images/schema_binseg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/schema_binseg.png


--------------------------------------------------------------------------------
/docs/images/schema_fenetre.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/schema_fenetre.png


--------------------------------------------------------------------------------
/docs/images/schema_tree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/schema_tree.png


--------------------------------------------------------------------------------
/docs/images/sum_of_sines.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/sum_of_sines.png


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | ../README.md


--------------------------------------------------------------------------------
/docs/install.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | This library requires Python >=3.6 and the following packages: `numpy`, `scipy` and `matplotlib` (the last one is optional and only for display purposes).
 4 | You can either install the latest stable release or the development version.
 5 | 
 6 | ## Stable release
 7 | 
 8 | To install the latest stable release, use `pip` or `conda`.
 9 | 
10 | === "With pip"
11 |     ```
12 |     python -m pip install ruptures
13 |     ```
14 | 
15 | === "With conda"
16 |     `ruptures` can be installed from the `conda-forge` channel (run `conda config --add channels conda-forge` to add it):
17 |     ```
18 |     conda install ruptures
19 |     ```
20 | 
21 | ## Development release
22 | 
23 | Alternatively, you can install the development version of `ruptures` which can contain features that have not yet been integrated to the stable release.
24 | To that end, refer to the [contributing guide](contributing.md).
25 | 
26 | ## Upgrade
27 | 
28 | Show the current version of the package.
29 | 
30 | ```
31 | python -m pip show ruptures
32 | ```
33 | 
34 | In order to upgrade to the version, use the following command.
35 | 
36 | ```
37 | python -m pip install -U ruptures
38 | ```
39 | 
40 | 


--------------------------------------------------------------------------------
/docs/javascripts/configs.js:
--------------------------------------------------------------------------------
 1 | window.MathJax = {
 2 |     tex: {
 3 |         inlineMath: [
 4 |             ["\\(", "\\)"]
 5 |         ],
 6 |         displayMath: [
 7 |             ["\\[", "\\]"]
 8 |         ],
 9 |         processEscapes: true,
10 |         processEnvironments: true
11 |     },
12 |     options: {
13 |         ignoreHtmlClass: ".*|",
14 |         processHtmlClass: "arithmatex"
15 |     }
16 | };


--------------------------------------------------------------------------------
/docs/javascripts/mathjax.js:
--------------------------------------------------------------------------------
 1 | window.MathJax = {
 2 |     tex: {
 3 |       inlineMath: [["\\(", "\\)"]],
 4 |       displayMath: [["\\[", "\\]"]],
 5 |       processEscapes: true,
 6 |       processEnvironments: true
 7 |     },
 8 |     options: {
 9 |       ignoreHtmlClass: ".*|",
10 |       processHtmlClass: "arithmatex"
11 |     }
12 |   };
13 | 
14 |   document$.subscribe(() => {
15 |     MathJax.startup.output.clearCache()
16 |     MathJax.typesetClear()
17 |     MathJax.texReset()
18 |     MathJax.typesetPromise()
19 |   })


--------------------------------------------------------------------------------
/docs/license.md:
--------------------------------------------------------------------------------
1 | # License
2 | 
3 | This project is under BSD license.
4 | 
5 | ```
6 | --8<-- "LICENSE"
7 | ```
8 | 


--------------------------------------------------------------------------------
/docs/release-notes.md:
--------------------------------------------------------------------------------
1 | --8<-- "CHANGELOG.md"
2 | 


--------------------------------------------------------------------------------
/docs/user-guide/costs/costautoregressive.md:
--------------------------------------------------------------------------------
 1 | # Autoregressive model change (`CostAR`)
 2 | 
 3 | ## Description
 4 | 
 5 | Let $0<t_1<t_2<\dots<n$ be unknown change points indexes.
 6 | Consider the following piecewise autoregressive model
 7 | 
 8 | $$
 9 |     y_t = z_t' \delta_j + \varepsilon_t, \quad \forall t=t_j,\dots,t_{j+1}-1
10 | $$
11 | 
12 | where $j>1$ is the segment number, $z_t=[y_{t-1}, y_{t-2},\dots,y_{t-p}]$ is the lag vector,and $p>0$ is the order of the process.
13 | 
14 | The least-squares estimates of the break dates is obtained by minimizing the sum of squared
15 | residuals [[Bai2000]](#Bai2000).
16 | Formally, the associated cost function on an interval $I$ is
17 | 
18 | $$
19 | c(y_{I}) = \min_{\delta\in\mathbb{R}^p} \sum_{t\in I} \|y_t - \delta' z_t \|_2^2.
20 | $$
21 | 
22 | Currently, this function is limited to 1D signals.
23 | 
24 | ## Usage
25 | 
26 | Start with the usual imports and create a signal with piecewise linear trends.
27 | 
28 | ```python
29 | from itertools import cycle
30 | import numpy as np
31 | import matplotlib.pylab as plt
32 | import ruptures as rpt
33 | 
34 | # creation of data
35 | n = 2000
36 | n_bkps, sigma = 4, 0.5  # number of change points, noise standart deviation
37 | bkps = [400, 1000, 1300, 1800, n]
38 | f1 = np.array([0.075, 0.1])
39 | f2 = np.array([0.1, 0.125])
40 | freqs = np.zeros((n, 2))
41 | for sub, val in zip(np.split(freqs, bkps[:-1]), cycle([f1, f2])):
42 |     sub += val
43 | tt = np.arange(n)
44 | signal = np.sum((np.sin(2 * np.pi * tt * f) for f in freqs.T))
45 | signal += np.random.normal(scale=sigma, size=signal.shape)
46 | # display signal
47 | rpt.show.display(signal, bkps, figsize=(10, 6))
48 | plt.show()
49 | ```
50 | 
51 | Then create a [CostAR][ruptures.costs.costautoregressive.CostAR] instance and print the cost of the sub-signal
52 | `signal[50:150]`.
53 | The autoregressive order can be specified through the keyword ``'order'``.
54 | 
55 | ```python
56 | c = rpt.costs.CostAR(order=10).fit(signal)
57 | print(c.error(50, 150))
58 | ```
59 | 
60 | You can also compute the sum of costs for a given list of change points.
61 | 
62 | ```python
63 | print(c.sum_of_costs(bkps))
64 | print(c.sum_of_costs([10, 100, 200, 250, n]))
65 | ```
66 | 
67 | In order to use this cost class in a change point detection algorithm (inheriting from
68 | [BaseEstimator][ruptures.base.BaseEstimator]), either pass a [CostAR][ruptures.costs.costautoregressive.CostAR] instance (through the argument
69 | ``'custom_cost'``) or set `model="ar"`.
70 | Additional parameters can be passed to the cost instance through the keyword ``'params'``.
71 | 
72 | ```python
73 | c = rpt.costs.CostAR(order=10)
74 | algo = rpt.Dynp(custom_cost=c)
75 | # is equivalent to
76 | algo = rpt.Dynp(model="ar", params={"order": 10})
77 | ```
78 | 
79 | ## Reference
80 | 
81 | <a id="Bai2000">[Bai2000]</a>
82 | Bai, J. (2000). Vector autoregressive models with structural changes in regression coefficients and in variance–covariance matrices. Annals of Economics and Finance, 1(2), 301–336.


--------------------------------------------------------------------------------
/docs/user-guide/costs/costclinear.md:
--------------------------------------------------------------------------------
 1 | # Continuous linear change (`CostCLinear`)
 2 | 
 3 | ## Description
 4 | 
 5 | For a given set of indexes (also called knots) $t_k$ ($k=1,\dots,K$), a linear spline $f$ is such that:
 6 | 
 7 | 1. $f$ is affine on each interval $t_k..t_{k+1}$, i.e. $f(t)=\alpha_k (t-t_k) + \beta_k$ ($\alpha_k, \beta_k \in \mathbb{R}^d$) for all $t=t_k,t_k+1,\dots,t_{k+1}-1$;
 8 | 2. $f$ is continuous.
 9 | 
10 | The cost function [`CostCLinear`][ruptures.costs.costclinear.CostCLinear] measures the error when approximating the signal with a linear spline.
11 | Formally, it is defined for $0<a<b\leq T$ by
12 | 
13 | $$
14 | c(y_{a..b}) := \sum_{t=a}^{b-1} \left\lVert y_t - y_{a-1} - \frac{t-a+1}{b-a}(y_{b-1}-y_{a-1}) \right\rVert^2
15 | $$
16 | 
17 | and $c(y_{0..b}):=c(y_{1..b})$ (by convention).
18 | 
19 | ## Usage
20 | 
21 | Start with the usual imports and create a signal with piecewise linear trends.
22 | 
23 | ```python
24 | import numpy as np
25 | import matplotlib.pylab as plt
26 | import ruptures as rpt
27 | 
28 | # creation of data
29 | n_samples, n_dims = 500, 3  # number of samples, dimension
30 | n_bkps, sigma = 3, 5  # number of change points, noise standard deviation
31 | signal, bkps = rpt.pw_constant(n_samples, n_dims, n_bkps, noise_std=sigma)
32 | signal = np.cumsum(signal, axis=1)
33 | ```
34 | 
35 | Then create a [`CostCLinear`][ruptures.costs.costclinear.CostCLinear] instance and print the cost of the sub-signal `signal[50:150]`.
36 | 
37 | ```python
38 | c = rpt.costs.CostCLinear().fit(signal)
39 | print(c.error(50, 150))
40 | ```
41 | 
42 | You can also compute the sum of costs for a given list of change points.
43 | 
44 | ```python
45 | print(c.sum_of_costs(bkps))
46 | print(c.sum_of_costs([10, 100, 200, 250, n]))
47 | ```
48 | 
49 | In order to use this cost class in a change point detection algorithm (inheriting from [`BaseEstimator`][ruptures.base.BaseEstimator]), either pass a [`CostCLinear`][ruptures.costs.costclinear.CostCLinear] instance (through the argument `custom_cost`) or set `model="clinear"`.
50 | 
51 | ```python
52 | c = rpt.costs.CostCLinear()
53 | algo = rpt.Dynp(custom_cost=c)
54 | # is equivalent to
55 | algo = rpt.Dynp(model="clinear")
56 | ```
57 | 


--------------------------------------------------------------------------------
/docs/user-guide/costs/costcosine.md:
--------------------------------------------------------------------------------
 1 | # Kernelized mean change (`CostCosine`)
 2 | 
 3 | ## Description
 4 | 
 5 | Given a positive semi-definite kernel $k(\cdot, \cdot) : \mathbb{R}^d\times \mathbb{R}^d \mapsto \mathbb{R}$ and its associated feature map $\Phi:\mathbb{R}^d \mapsto \mathcal{H}$ (where $\mathcal{H}$ is an appropriate Hilbert space), this cost function detects changes in the mean of the embedded signal $\{\Phi(y_t)\}_t$ [[Arlot2019](#Arlot2019)].
 6 | Formally, for a signal $\{y_t\}_t$ on an interval $I$,
 7 | 
 8 | $$
 9 | c(y_{a..b}) = \sum_{t=a}^{b-1} \| \Phi(y_t) - \bar{\mu} \|_{\mathcal{H}}^2
10 | $$
11 | 
12 | where $\bar{\mu}_{a..b}$ is the empirical mean of the embedded sub-signal $\{\Phi(y_t)\}_{a\leq t < b-1}$.
13 | Here the kernel is the cosine similarity:
14 | 
15 | $$
16 | k(x, y) = \frac{\langle x\mid y\rangle}{\|x\|\|y\|}
17 | $$
18 | 
19 | where $\langle \cdot\mid\cdot \rangle$ and $\| \cdot \|$ are the Euclidean scalar product and norm respectively.
20 | In other words, it is equal to the L2-normalized dot product of vectors.
21 | This cost function has been used for music segmentation tasks [[Cooper2002](#Cooper2002)] and topic segmentation of text [[Hearst1994](#Hearst1994)].
22 | 
23 | ## Usage
24 | 
25 | Start with the usual imports and create a signal.
26 | 
27 | ```python
28 | import numpy as np
29 | import matplotlib.pylab as plt
30 | import ruptures as rpt
31 | 
32 | # creation of data
33 | n, dim = 500, 3  # number of samples, dimension
34 | n_bkps, sigma = 3, 5  # number of change points, noise standart deviation
35 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma)
36 | ```
37 | 
38 | Then create a [`CostCosine`][ruptures.costs.costcosine.CostCosine] instance and print the cost of the sub-signal `signal[50:150]`.
39 | 
40 | ```python
41 | c = rpt.costs.CostCosine().fit(signal)
42 | print(c.error(50, 150))
43 | ```
44 | 
45 | You can also compute the sum of costs for a given list of change points.
46 | 
47 | ```python
48 | print(c.sum_of_costs(bkps))
49 | print(c.sum_of_costs([10, 100, 200, 250, n]))
50 | ```
51 | 
52 | In order to use this cost class in a change point detection algorithm (inheriting from [`BaseEstimator`][ruptures.base.BaseEstimator]), either pass a [`CostCosine`][ruptures.costs.costcosine.CostCosine] instance (through the argument `custom_cost`) or set `model="cosine"`.
53 | 
54 | ```python
55 | c = rpt.costs.CostCosine()
56 | algo = rpt.Dynp(custom_cost=c)
57 | # is equivalent to
58 | algo = rpt.Dynp(model="cosine")
59 | ```
60 | 
61 | ## References
62 | 
63 | <a id="Hearst1994">[Hearst1994]</a>
64 | Hearst, M. A. (1994). Multi-paragraph segmentation of expository text. In Proceedings of the Annual Meeting of the Association for Computational Linguistics (pp. 9–16). Las Cruces, New Mexico, USA.
65 | 
66 | <a id="Cooper2002">[Cooper2002]</a>
67 | Cooper, M., & Foote, J. (2002). Automatic music summarization via similarity analysis. In Proceedings of the International Conference on Music Information Retrieval (ISMIR) (pp. 81–85). Paris, France.
68 | 
69 | <a id="Arlot2019">[Arlot2019]</a>
70 | Arlot, S., Celisse, A., & Harchaoui, Z. (2019). A kernel multiple change-point algorithm via model selection. Journal of Machine Learning Research, 20(162), 1–56.
71 | 


--------------------------------------------------------------------------------
/docs/user-guide/costs/costcustom.md:
--------------------------------------------------------------------------------
 1 | # Custom cost class
 2 | 
 3 | Users who are interested in detecting a specific type of change can easily do so by creating a custom cost function.
 4 | Provided, they subclass the base cost function [`BaseCost`][ruptures.base.BaseCost], they will be able to seamlessly run the algorithms implemented in `ruptures`.
 5 | 
 6 | !!! important
 7 |     The custom cost class must at least implement the two following methods: `.fit(signal)` and `.error(start, end)` (see [user guide](../../custom-cost-function.md)).
 8 | 
 9 | ## Example
10 | 
11 | Let $\{y_t\}_t$ denote a 1D piecewise stationary random process.
12 | Assume that the $y_t$ are independent and exponentially distributed with a scale parameter that shifts at some unknown instants $t_1,t_2,\dots$
13 | The change points estimates are the minimizers of the negative log-likelihood, and the associated cost function is given by
14 | 
15 | $$
16 | c(y_I) = |I| \log \bar{\mu}_I
17 | $$
18 | 
19 | where $I,\, y_I$ and $\bar{\mu}_I$ are respectively an interval, the sub-signal on this interval and the empirical mean of this sub-signal.
20 | The following code implements this cost function:
21 | 
22 | ```python
23 | from math import log
24 | from ruptures.base import BaseCost
25 | 
26 | 
27 | class MyCost(BaseCost):
28 |     """Custom cost for exponential signals."""
29 | 
30 |     # The 2 following attributes must be specified for compatibility.
31 |     model = ""
32 |     min_size = 2
33 | 
34 |     def fit(self, signal):
35 |         """Set the internal parameter."""
36 |         self.signal = signal
37 |         return self
38 | 
39 |     def error(self, start, end):
40 |         """Return the approximation cost on the segment [start:end].
41 | 
42 |         Args:
43 |             start (int): start of the segment
44 |             end (int): end of the segment
45 | 
46 |         Returns:
47 |             float: segment cost
48 |         """
49 |         sub = self.signal[start:end]
50 |         return (end - start) * log(sub.mean())
51 | ```
52 | 
53 | !!! warning
54 |     For compatibility reasons, the static attributes `model` and `min_size` must be explicitly specified:
55 | 
56 |     - `model` is simply a string containing the name of the cost function (can be empty);
57 |     - `min_size` is a positive integer that indicates the minimum segment size (in number of samples) on which the cost function can be applied.
58 | 
59 | This cost function can now be used with all algorithms from `ruptures`.
60 | For instance,
61 | 
62 | ```python
63 | import numpy as np
64 | import matplotlib.pylab as plt
65 | import ruptures as rpt
66 | 
67 | # creation of data
68 | a = np.random.exponential(scale=1, size=100)
69 | b = np.random.exponential(scale=2, size=200)
70 | signal, bkps = np.r_[a, b, a], [100, 300, 400]
71 | # cost
72 | algo = rpt.Pelt(custom_cost=MyCost()).fit(signal)
73 | my_bkps = algo.predict(pen=10)
74 | # display
75 | rpt.display(signal, bkps, my_bkps)
76 | plt.show()
77 | ```
78 | 


--------------------------------------------------------------------------------
/docs/user-guide/costs/costl1.md:
--------------------------------------------------------------------------------
 1 | # Least absolute deviation (`CostL1`)
 2 | 
 3 | ## Description
 4 | 
 5 | This cost function detects changes in the median of a signal.
 6 | Overall, it is a robust estimator of a shift in the central point (mean, median, mode) of a distribution [[Bai1995]](#Bai1995).
 7 | Formally, for a signal $\{y_t\}_t$ on an interval $I$,
 8 | 
 9 | $$
10 | c(y_{I}) = \sum_{t\in I} \|y_t - \bar{y}\|_1
11 | $$
12 | 
13 | where $\bar{y}$ is the componentwise median of $\{y_t\}_{t\in I}$.
14 | 
15 | ## Usage
16 | 
17 | Start with the usual imports and create a signal.
18 | 
19 | ```python
20 | import numpy as np
21 | import matplotlib.pylab as plt
22 | import ruptures as rpt
23 | 
24 | # creation of data
25 | n, dim = 500, 3  # number of samples, dimension
26 | n_bkps, sigma = 3, 5  # number of change points, noise standart deviation
27 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma)
28 | ```
29 | 
30 | Then create a [`CostL1`][ruptures.costs.costl1.CostL1] instance and print the cost of the sub-signal `signal[50:150]`.
31 | 
32 | ```python
33 | c = rpt.costs.CostL1().fit(signal)
34 | print(c.error(50, 150))
35 | ```
36 | 
37 | You can also compute the sum of costs for a given list of change points.
38 | 
39 | ```python
40 | print(c.sum_of_costs(bkps))
41 | print(c.sum_of_costs([10, 100, 200, 250, n]))
42 | ```
43 | 
44 | In order to use this cost class in a change point detection algorithm (inheriting from [`BaseEstimator`][ruptures.base.BaseEstimator], either pass a [`CostL1`][ruptures.costs.costl1.CostL1] instance (through the argument `custom_cost`) or set `model="l1"`.
45 | 
46 | ```python
47 | c = rpt.costs.CostL1()
48 | algo = rpt.Dynp(custom_cost=c)
49 | # is equivalent to
50 | algo = rpt.Dynp(model="l1")
51 | ```
52 | 
53 | ## References
54 | 
55 | <a id="Bai1995">[Bai1995]</a>
56 | Bai, J. (1995). Least absolute deviation of a shift. Econometric Theory, 11(3), 403–436.


--------------------------------------------------------------------------------
/docs/user-guide/costs/costl2.md:
--------------------------------------------------------------------------------
 1 | # Least squared deviation (`CostL2`)
 2 | 
 3 | ## Description
 4 | 
 5 | This cost function detects mean-shifts in a signal.
 6 | Formally, for a signal $\{y_t\}_t$ on an interval $I$,
 7 | 
 8 | $$
 9 | c(y_{I}) = \sum_{t\in I} \|y_t - \bar{y}\|_2^2
10 | $$
11 | 
12 | where $\bar{y}$ is the mean of $\{y_t\}_{t\in I}$.
13 | 
14 | ## Usage
15 | 
16 | Start with the usual imports and create a signal.
17 | 
18 | ```python
19 | import numpy as np
20 | import matplotlib.pylab as plt
21 | import ruptures as rpt
22 | 
23 | # creation of data
24 | n, dim = 500, 3  # number of samples, dimension
25 | n_bkps, sigma = 3, 5  # number of change points, noise standart deviation
26 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma)
27 | ```
28 | 
29 | Then create a [`CostL2`][ruptures.costs.costl2.CostL2] instance and print the cost of the sub-signal `signal[50:150]`.
30 | 
31 | ```python
32 | c = rpt.costs.CostL2().fit(signal)
33 | print(c.error(50, 150))
34 | ```
35 | 
36 | You can also compute the sum of costs for a given list of change points.
37 | 
38 | ```python
39 | print(c.sum_of_costs(bkps))
40 | print(c.sum_of_costs([10, 100, 200, 250, n]))
41 | ```
42 | 
43 | In order to use this cost class in a change point detection algorithm (inheriting from [`BaseEstimator`][ruptures.base.BaseEstimator]), either pass a [`CostL2`][ruptures.costs.costl2.CostL2] instance (through the argument `custom_cost`) or set `model="l2"`.
44 | 
45 | ```python
46 | c = rpt.costs.CostL2()
47 | algo = rpt.Dynp(custom_cost=c)
48 | # is equivalent to
49 | algo = rpt.Dynp(model="l2")
50 | ```


--------------------------------------------------------------------------------
/docs/user-guide/costs/costlinear.md:
--------------------------------------------------------------------------------
 1 | # Linear model change (`CostLinear`)
 2 | 
 3 | ## Description
 4 | 
 5 | Let $0 < t_1 < t_2 < \dots < n$ be unknown change points indexes.
 6 | Consider the following multiple linear regression model
 7 | 
 8 | $$
 9 | y_t = x_t' \delta_j + \varepsilon_t, \quad \forall t=t_j,\dots,t_{j+1}-1
10 | $$
11 | 
12 | for $j>1$.
13 | Here, the observed dependant variable is $y_t\in\mathbb{R}$, the covariate vector is $x_t \in\mathbb{R}^p$, the disturbance is $\varepsilon_t\in\mathbb{R}$.
14 | The vectors $\delta_j\in\mathbb{R}^p$ are the parameter vectors (or regression coefficients).
15 | 
16 | The least-squares estimates of the break dates is obtained by minimizing the sum of squared residuals [[Bai2003]](#Bai2003).
17 | Formally, the associated cost function on an interval $I$ is
18 | 
19 | $$
20 | c(y_{I}) = \min_{\delta\in\mathbb{R}^p} \sum_{t\in I} \|y_t - \delta' x_t \|_2^2.
21 | $$
22 | 
23 | ## Usage
24 | 
25 | Start with the usual imports and create a signal with piecewise linear trends.
26 | 
27 | ```python
28 | import numpy as np
29 | import matplotlib.pylab as plt
30 | import ruptures as rpt
31 | 
32 | # creation of data
33 | n, n_reg = 2000, 3  # number of samples, number of regressors (including intercept)
34 | n_bkps = 3  # number of change points
35 | # regressors
36 | tt = np.linspace(0, 10 * np.pi, n)
37 | X = np.vstack((np.sin(tt), np.sin(5 * tt), np.ones(n))).T
38 | # parameter vectors
39 | deltas, bkps = rpt.pw_constant(n, n_reg, n_bkps, noise_std=None, delta=(1, 3))
40 | # observed signal
41 | y = np.sum(X * deltas, axis=1)
42 | y += np.random.normal(size=y.shape)
43 | # display signal
44 | rpt.show.display(y, bkps, figsize=(10, 6))
45 | plt.show()
46 | ```
47 | 
48 | Then create a [`CostLinear`][ruptures.costs.costlinear.CostLinear] instance and print the cost of the sub-signal `signal[50:150]`.
49 | 
50 | ```python
51 | # stack observed signal and regressors.
52 | # first dimension is the observed signal.
53 | signal = np.column_stack((y.reshape(-1, 1), X))
54 | c = rpt.costs.CostLinear().fit(signal)
55 | print(c.error(50, 150))
56 | ```
57 | 
58 | You can also compute the sum of costs for a given list of change points.
59 | 
60 | ```python
61 | print(c.sum_of_costs(bkps))
62 | print(c.sum_of_costs([10, 100, 200, 250, n]))
63 | ```
64 | 
65 | In order to use this cost class in a change point detection algorithm (inheriting from [`BaseEstimator`][ruptures.base.BaseEstimator]), either pass a [`CostLinear`][ruptures.costs.costlinear.CostLinear] instance (through the argument `custom_cost`) or set `model="linear"`.
66 | 
67 | ```python
68 | c = rpt.costs.CostLinear()
69 | algo = rpt.Dynp(custom_cost=c)
70 | # is equivalent to
71 | algo = rpt.Dynp(model="linear")
72 | ```
73 | 
74 | ## References
75 | 
76 | <a id="Bai2003">[Bai2003]</a>
77 | J. Bai and P. Perron. Critical values for multiple structural change tests. Econometrics Journal, 6(1):72–78, 2003.
78 | 


--------------------------------------------------------------------------------
/docs/user-guide/costs/costml.md:
--------------------------------------------------------------------------------
 1 | # Change detection with a Mahalanobis-type metric (`CostMl`)
 2 | 
 3 | ## Description
 4 | 
 5 | Given a positive semi-definite matrix $M\in\mathbb{R}^{d\times d}$,
 6 | this cost function detects changes in the mean of the embedded signal defined by the pseudo-metric
 7 | 
 8 | $$
 9 | \| x - y \|_M^2 = (x-y)^t M (x-y).
10 | $$
11 | 
12 | Formally, for a signal $\{y_t\}_t$ on an interval $I$, the cost function is equal to
13 | 
14 | $$
15 | c(y_{I}) = \sum_{t\in I} \| y_t - \bar{\mu} \|_{M}^2
16 | $$
17 | 
18 | where $\bar{\mu}$ is the empirical mean of the sub-signal $\{y_t\}_{t\in I}$.
19 | The matrix $M$ can for instance be the result of a similarity learning algorithm [[Xing2003](#Xing2003), [Truong2019](#Truong2019)] or the inverse of the empirical covariance matrix (yielding the Mahalanobis distance).
20 | 
21 | ## Usage
22 | 
23 | Start with the usual imports and create a signal.
24 | 
25 | ```python
26 | import numpy as np
27 | import matplotlib.pylab as plt
28 | import ruptures as rpt
29 | 
30 | # creation of data
31 | n, dim = 500, 3  # number of samples, dimension
32 | n_bkps, sigma = 3, 5  # number of change points, noise standart deviation
33 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma)
34 | ```
35 | 
36 | Then create a [`CostMl`][ruptures.costs.costml.CostMl] instance and print the cost of the sub-signal `signal[50:150]`.
37 | 
38 | ```python
39 | M = np.eye(dim)
40 | c = rpt.costs.CostMl(metric=M).fit(signal)
41 | print(c.error(50, 150))
42 | ```
43 | 
44 | You can also compute the sum of costs for a given list of change points.
45 | 
46 | ```python
47 | print(c.sum_of_costs(bkps))
48 | print(c.sum_of_costs([10, 100, 200, 250, n]))
49 | ```
50 | 
51 | In order to use this cost class in a change point detection algorithm (inheriting from [`BaseEstimator`][ruptures.base.BaseEstimator]), either pass a [`CostMl`][ruptures.costs.costml.CostMl] instance (through the argument `custom_cost`) or set `model="mahalanobis"`.
52 | 
53 | ```python
54 | c = rpt.costs.CostMl(metric=M)
55 | algo = rpt.Dynp(custom_cost=c)
56 | # is equivalent to
57 | algo = rpt.Dynp(model="mahalanobis", params={"metric": M})
58 | ```
59 | 
60 | ## References
61 | 
62 | <a id="Xing2003">[Xing2003]</a>
63 | Xing, E. P., Jordan, M. I., & Russell, S. J. (2003). Distance metric learning, with application to clustering with side-Information. Advances in Neural Information Processing Systems (NIPS), 521–528.
64 | 
65 | <a id="Truong2019">[Truong2019]</a>
66 | Truong, C., Oudre, L., & Vayatis, N. (2019). Supervised kernel change point detection with partial annotations. Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 1–5.


--------------------------------------------------------------------------------
/docs/user-guide/costs/costnormal.md:
--------------------------------------------------------------------------------
 1 | # Gaussian process change (`CostNormal`)
 2 | 
 3 | ## Description
 4 | 
 5 | This cost function detects changes in the mean and covariance matrix of a sequence of multivariate Gaussian random variables.
 6 | Formally, for a signal $\{y_t\}_t$ on an interval $I$,
 7 | $$
 8 | c(y_{I}) = |I| \log\det(\widehat{\Sigma}_I + \epsilon\text{Id})
 9 | $$
10 | where $\widehat{\Sigma}_I$ is the empirical covariance matrix of the sub-signal $\{y_t\}_{t\in I}$ and $\epsilon>0$ is a small constant added to cope with badly conditioned covariance matrices (new in version 1.1.5, see [Issue 196](https://github.com/deepcharles/ruptures/issues/196)).
11 | It is robust to strongly dependant processes; for more information, see [[Lavielle1999]](#Lavielle1999) (univariate case) and [[Lavielle2006]](#Lavielle2006) (multivariate case).
12 | 
13 | 
14 | ## Usage
15 | 
16 | Start with the usual imports and create a signal.
17 | 
18 | ```python
19 | import numpy as np
20 | import matplotlib.pylab as plt
21 | import ruptures as rpt
22 | 
23 | # creation of data
24 | n, dim = 500, 3  # number of samples, dimension
25 | n_bkps, sigma = 3, 5  # number of change points, noise standart deviation
26 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma)
27 | ```
28 | 
29 | Then create a [`CostNormal`][ruptures.costs.costnormal.CostNormal] instance and print the cost of the sub-signal `signal[50:150]`.
30 | 
31 | ```python
32 | c = rpt.costs.CostNormal().fit(signal)
33 | print(c.error(50, 150))
34 | ```
35 | 
36 | You can also compute the sum of costs for a given list of change points.
37 | 
38 | ```python
39 | print(c.sum_of_costs(bkps))
40 | print(c.sum_of_costs([10, 100, 200, 250, n]))
41 | ```
42 | 
43 | In order to use this cost class in a change point detection algorithm (inheriting from [`BaseEstimator`][ruptures.base.BaseEstimator]), either pass a [`CostNormal`][ruptures.costs.costnormal.CostNormal] instance (through the argument `custom_cost`) or set `model="normal"`.
44 | 
45 | ```python
46 | c = rpt.costs.CostNormal()
47 | algo = rpt.Dynp(custom_cost=c)
48 | # is equivalent to
49 | algo = rpt.Dynp(model="normal")
50 | ```
51 | 
52 | To set the small diagonal bias to 0 (default behaviour in versions 1.1.4 and before), simply do the following (change `Dynp` by the search method you need).
53 | ```python
54 | c = rpt.costs.CostNormal(add_small_diag=False)
55 | algo = rpt.Dynp(custom_cost=c)
56 | # or, equivalently,
57 | algo = rpt.Dynp(model="normal", params={"add_small_diag": False})
58 | ```
59 | 
60 | ## References
61 | 
62 | <a id="Lavielle1999">[Lavielle1999]</a>
63 | Lavielle, M. (1999). Detection of multiples changes in a sequence of dependant variables. Stochastic Processes and Their Applications, 83(1), 79–102.
64 | 
65 | <a id="Lavielle2006">[Lavielle2006]</a>
66 | Lavielle, M., & Teyssière, G. (2006). Detection of multiple change-points in multivariate time series. Lithuanian Mathematical Journal, 46(3).


--------------------------------------------------------------------------------
/docs/user-guide/costs/costrank.md:
--------------------------------------------------------------------------------
 1 | # Rank-based cost function (`CostRank`)
 2 | 
 3 | ## Description
 4 | 
 5 | This cost function detects general distribution changes in multivariate signals, using a rank transformation [[Lung-Yut-Fong2015]](#Lung-Yut-Fong2015).
 6 | Formally, for a signal $\{y_t\}_t$ on an interval $[a, b)$,
 7 | 
 8 | $$
 9 | c_{rank}(a, b) = -(b - a) \bar{r}_{a..b}' \hat{\Sigma}_r^{-1} \bar{r}_{a..b}
10 | $$
11 | 
12 | where $\bar{r}_{a..b}$ is the empirical mean of the sub-signal $\{r_t\}_{t=a+1}^b$, and $\hat{\Sigma}_r$ is the covariance matrix of the complete rank signal $r$.
13 | 
14 | ## Usage
15 | 
16 | Start with the usual imports and create a signal.
17 | 
18 | ```python
19 | import numpy as np
20 | import matplotlib.pylab as plt
21 | import ruptures as rpt
22 | 
23 | # creation of data
24 | n, dim = 500, 3  # number of samples, dimension
25 | n_bkps, sigma = 3, 5  # number of change points, noise standard deviation
26 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma)
27 | ```
28 | 
29 | Then create a [`CostRank`][ruptures.costs.costrank.CostRank] instance and print the cost of the sub-signal `signal[50:150]`.
30 | 
31 | ```python
32 | c = rpt.costs.CostRank().fit(signal)
33 | print(c.error(50, 150))
34 | ```
35 | 
36 | You can also compute the sum of costs for a given list of change points.
37 | 
38 | ```python
39 | print(c.sum_of_costs(bkps))
40 | print(c.sum_of_costs([10, 100, 200, 250, n]))
41 | ```
42 | 
43 | In order to use this cost class in a change point detection algorithm (inheriting from [`BaseEstimator`][ruptures.base.BaseEstimator]), either pass a [`CostRank`][ruptures.costs.costrank.CostRank] instance (through the argument `custom_cost`) or set `model="rank"`.
44 | 
45 | ```python
46 | c = rpt.costs.CostRank()
47 | algo = rpt.Dynp(custom_cost=c)
48 | # is equivalent to
49 | algo = rpt.Dynp(model="rank")
50 | ```
51 | 
52 | ## References
53 | 
54 | <a id="Lung-Yut-Fong2015">[Lung-Yut-Fong2015]</a>
55 | Lung-Yut-Fong, A., Lévy-Leduc, C., & Cappé, O. (2015). Homogeneity and change-point detection tests for multivariate data using rank statistics. Journal de La Société Française de Statistique, 156(4), 133–162.


--------------------------------------------------------------------------------
/docs/user-guide/costs/costrbf.md:
--------------------------------------------------------------------------------
 1 | # Kernelized mean change (`CostRbf`)
 2 | 
 3 | ## Description
 4 | 
 5 | Given a positive semi-definite kernel $k(\cdot, \cdot) : \mathbb{R}^d\times \mathbb{R}^d \mapsto \mathbb{R}$ and its associated feature map $\Phi:\mathbb{R}^d \mapsto \mathcal{H}$ (where $\mathcal{H}$ is an appropriate Hilbert space), this cost function detects changes in the mean of the embedded signal $\{\Phi(y_t)\}_t$ [[Garreau2018](#Garreau2018), [Arlot2019](#Arlot2019)].
 6 | Formally, for a signal $\{y_t\}_t$ on an interval $I$,
 7 | 
 8 | $$
 9 | c(y_{I}) = \sum_{t\in I} \| \Phi(y_t) - \bar{\mu} \|_{\mathcal{H}}^2
10 | $$
11 | 
12 | where $\bar{\mu}$ is the empirical mean of the embedded sub-signal $\{\Phi(y_t)\}_{t\in I}$.
13 | Here the kernel is the radial basis function (rbf):
14 | 
15 | $$
16 | k(x, y) = \exp(-\gamma \| x - y \|^2 )
17 | $$
18 | 
19 | where $\| \cdot \|$ is the Euclidean norm and $\gamma>0$ is the so-called bandwidth parameter and is determined according to median heuristics (i.e. equal to the inverse of median of all pairwise distances).
20 | 
21 | In a nutshell, this cost function is able to detect changes in the distribution of an iid sequence of random variables.
22 | Because it is non-parametric, it is performs reasonably well on a wide range of tasks.
23 | 
24 | ## Usage
25 | 
26 | Start with the usual imports and create a signal.
27 | 
28 | ```python
29 | import numpy as np
30 | import matplotlib.pylab as plt
31 | import ruptures as rpt
32 | 
33 | # creation of data
34 | n, dim = 500, 3  # number of samples, dimension
35 | n_bkps, sigma = 3, 5  # number of change points, noise standart deviation
36 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma)
37 | ```
38 | 
39 | Then create a [`CostRbf`][ruptures.costs.costrbf.CostRbf] instance and print the cost of the sub-signal `signal[50:150]`.
40 | 
41 | ```python
42 | c = rpt.costs.CostRbf().fit(signal)
43 | print(c.error(50, 150))
44 | ```
45 | 
46 | You can also compute the sum of costs for a given list of change points.
47 | 
48 | ```python
49 | print(c.sum_of_costs(bkps))
50 | print(c.sum_of_costs([10, 100, 200, 250, n]))
51 | ```
52 | 
53 | In order to use this cost class in a change point detection algorithm (inheriting from [`BaseEstimator`][ruptures.base.BaseEstimator]), either pass a [`CostRbf`][ruptures.costs.costrbf.CostRbf] instance (through the argument `custom_cost`) or set `model="rbf"`.
54 | 
55 | ```python
56 | c = rpt.costs.CostRbf()
57 | algo = rpt.Dynp(custom_cost=c)
58 | # is equivalent to
59 | algo = rpt.Dynp(model="rbf")
60 | ```
61 | 
62 | ## References
63 | 
64 | <a id="Garreau2018">[Garreau2018]</a>
65 | Garreau, D., & Arlot, S. (2018). Consistent change-point detection with kernels. Electronic Journal of Statistics, 12(2), 4440–4486.
66 | 
67 | <a id="Arlot2019">[Arlot2019]</a>
68 | Arlot, S., Celisse, A., & Harchaoui, Z. (2019). A kernel multiple change-point algorithm via model selection. Journal of Machine Learning Research, 20(162), 1–56.
69 | 


--------------------------------------------------------------------------------
/docs/user-guide/datasets/pw_constant.md:
--------------------------------------------------------------------------------
 1 | # Piecewise constant (`pw_constant`)
 2 | 
 3 | ## Description
 4 | 
 5 | For a given number of samples $T$, number $K$ of change points and noise variance $\sigma^2$, the function [`pw_constant`][ruptures.datasets.pw_constant.pw_constant] generates change point dexes $0 < t_1 < \dots < t_K < T$ and a piecewise constant signal $\{y_t\}_t$ with additive Gaussian noise.
 6 | 
 7 | ## Usage
 8 | 
 9 | Start with the usual imports and create a signal.
10 | 
11 | ```python
12 | import numpy as np
13 | import matplotlib.pylab as plt
14 | import ruptures as rpt
15 | 
16 | # creation of data
17 | n, dim = 500, 3  # number of samples, dimension
18 | n_bkps, sigma = 3, 5  # number of change points, noise standard deviation
19 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma)
20 | rpt.display(signal, bkps)
21 | ```
22 | 
23 | The mean shift amplitude is uniformly drawn from an interval that can be changed through the keyword `delta`.
24 | 
25 | ```python
26 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma, delta=(1, 10))
27 | ```
28 | 


--------------------------------------------------------------------------------
/docs/user-guide/datasets/pw_linear.md:
--------------------------------------------------------------------------------
 1 | # Piecewise linear (`pw_linear`)
 2 | 
 3 | ## Description
 4 | 
 5 | This function [`pw_linear`][ruptures.datasets.pw_linear.pw_linear] simulates a piecewise linear model (see [Cost linear](../costs/costlinear.md)).
 6 | The covariates are standard Gaussian random variables.
 7 | The response variable is a (piecewise) linear combination of the covariates.
 8 | 
 9 | ## Usage
10 | 
11 | Start with the usual imports and create a signal.
12 | 
13 | ```python
14 | import numpy as np
15 | import matplotlib.pylab as plt
16 | import ruptures as rpt
17 | 
18 | # creation of data
19 | n, dim = 500, 3  # number of samples, dimension of the covariates
20 | n_bkps, sigma = 3, 5  # number of change points, noise standart deviation
21 | signal, bkps = rpt.pw_linear(n, dim, n_bkps, noise_std=sigma)
22 | rpt.display(signal, bkps)
23 | ```


--------------------------------------------------------------------------------
/docs/user-guide/datasets/pw_normal.md:
--------------------------------------------------------------------------------
 1 | # Piecewise 2D Gaussian process (`pw_normal`)
 2 | 
 3 | ## Description
 4 | 
 5 | The function [`pw_normal`][ruptures.datasets.pw_normal.pw_normal] simulates a 2D signal of Gaussian i.i.d. random variables with zero mean and covariance matrix alternating between $[[1, 0.9], [0.9, 1]]$ and $[[1, -0.9], [-0.9, 1]]$ at every change point.
 6 | 
 7 | ![](../../images/correlation_shift.png)
 8 | <center><i>Top and middle: 2D signal example. Bottom: Scatter plot for each regime type</i></center>
 9 | 
10 | ## Usage
11 | 
12 | Start with the usual imports and create a signal.
13 | 
14 | ```python
15 | import numpy as np
16 | import matplotlib.pylab as plt
17 | import ruptures as rpt
18 | 
19 | # creation of data
20 | n = 500  # number of samples
21 | n_bkps = 3  # number of change points
22 | signal, bkps = rpt.pw_normal(n, n_bkps)
23 | rpt.display(signal, bkps)
24 | ```


--------------------------------------------------------------------------------
/docs/user-guide/datasets/pw_wavy.md:
--------------------------------------------------------------------------------
 1 | # Piecewise sinusoidal signal (`pw_wavy`)
 2 | 
 3 | ## Description
 4 | 
 5 | The function [`pw_wavy`][ruptures.datasets.pw_wavy.pw_wavy] simulates a sum-of-sine signal $y_t=\sin(2\pi f_1 t)+\sin(2\pi f_2 t)$ where $t=0,\dots,T-1$.
 6 | The frequency vector $[f_1, f_2]$ alternates between $[0.075, 0.1]$ and $[0.1, 0.125]$ at each change point index.
 7 | Gaussian white noise can be added to the signal.
 8 | 
 9 | ![](../../images/sum_of_sines.png)
10 | <center><i>Top: signal example. Bottom: associated spectrogram.</i></center>
11 | 
12 | ## Usage
13 | 
14 | Start with the usual imports and create a signal.
15 | 
16 | ```python
17 | import numpy as np
18 | import matplotlib.pylab as plt
19 | import ruptures as rpt
20 | 
21 | # creation of data
22 | n, dim = 500, 3  # number of samples, dimension
23 | n_bkps, sigma = 3, 5  # number of change points, noise standart deviation
24 | signal, bkps = rpt.pw_wavy(n, n_bkps, noise_std=sigma)
25 | rpt.display(signal, bkps)
26 | ```


--------------------------------------------------------------------------------
/docs/user-guide/detection/binseg.md:
--------------------------------------------------------------------------------
 1 | # Binary segmentation (`Binseg`)
 2 | 
 3 | ## Description
 4 | 
 5 | Binary change point detection is used to perform fast signal segmentation and is implemented in [`Binseg`][ruptures.detection.binseg.Binseg].
 6 | It is a sequential approach: first, one change point is detected in the complete input signal, then series is split around this change point, then the operation is repeated on the two resulting sub-signals.
 7 | For a theoretical and algorithmic analysis of [`Binseg`][ruptures.detection.binseg.Binseg], see for instance [[Bai1997]](#Bai1997) and [[Fryzlewicz2014]](#Fryzlewicz2014).
 8 | The benefits of binary segmentation includes low complexity (of the order of $\mathcal{O}(Cn\log n)$, where $n$ is the number of samples and $C$ the complexity of calling the considered cost function on one sub-signal), the fact that it can extend any single change point detection method to detect multiple changes points and that it can work whether the number of regimes is known beforehand or not.
 9 | 
10 | ![](../../images/schema_binseg.png)
11 | <center><i>Schematic view of the binary segmentation algorithm</i></center>
12 | 
13 | 
14 | ## Usage
15 | 
16 | Start with the usual imports and create a signal.
17 | 
18 | ```python
19 | import numpy as np
20 | import matplotlib.pylab as plt
21 | import ruptures as rpt
22 | 
23 | # creation of data
24 | n = 500  # number of samples
25 | n_bkps, sigma = 3, 5  # number of change points, noise standard deviation
26 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma)
27 | ```
28 | 
29 | To perform a binary segmentation of a signal, initialize a [`BinSeg`][ruptures.detection.binseg.Binseg] instance.
30 | 
31 | ```python
32 | # change point detection
33 | model = "l2"  # "l1", "rbf", "linear", "normal", "ar",...
34 | algo = rpt.Binseg(model=model).fit(signal)
35 | my_bkps = algo.predict(n_bkps=3)
36 | 
37 | # show results
38 | rpt.show.display(signal, bkps, my_bkps, figsize=(10, 6))
39 | plt.show()
40 | ```
41 | In the situation in which the number of change points is unknown, one can specify a penalty using
42 | the `pen` parameter or a threshold on the residual norm using `epsilon`.
43 | 
44 | ```python
45 | my_bkps = algo.predict(pen=np.log(n) * dim * sigma**2)
46 | # or
47 | my_bkps = algo.predict(epsilon=3 * n * sigma**2)
48 | ```
49 | 
50 | For faster predictions, one can modify the `jump` parameter during initialization.
51 | The higher it is, the faster the prediction is achieved (at the expense of precision).
52 | 
53 | ```python
54 | algo = rpt.Binseg(model=model, jump=10).fit(signal)
55 | ```
56 | 
57 | ## References
58 | 
59 | <a id="Bai1997">[Bai1997]</a>
60 | Bai, J. (1997). Estimating multiple breaks one at a time. Econometric Theory, 13(3), 315–352.
61 | 
62 | <a id="Fryzlewicz2014">[Fryzlewicz2014]</a>
63 | Fryzlewicz, P. (2014). Wild binary segmentation for multiple change-point detection. The Annals of Statistics, 42(6), 2243–2281.


--------------------------------------------------------------------------------
/docs/user-guide/detection/bottomup.md:
--------------------------------------------------------------------------------
 1 | # Bottom-up segmentation (`BottomUp`)
 2 | 
 3 | ## Description
 4 | 
 5 | Bottom-up change point detection is used to perform fast signal segmentation and is implemented in
 6 | [`BottomUp`][ruptures.detection.bottomup.BottomUp] in a sequential manner.
 7 | Contrary to binary segmentation, which is a greedy procedure, bottom-up segmentation is generous:
 8 | it starts with many change points and successively deletes the less significant ones.
 9 | First, the signal is divided in many sub-signals along a regular grid.
10 | Then contiguous segments are successively merged according to a measure of how similar they are.
11 | See for instance [[Keogh2001]](#Keogh2001) or [[Fryzlewicz2007]](#Fryzlewicz2007) for an algorithmic
12 | analysis of [`BottomUp`][ruptures.detection.bottomup.BottomUp].
13 | The benefits of bottom-up segmentation includes low complexity (of the order of
14 | $\mathcal{O}(n\log n)$, where $n$ is the number of samples), the fact that it can extend
15 | any single change point detection method to detect multiple changes points and that it can work
16 | whether the number of regimes is known beforehand or not.
17 | 
18 | ![](../../images/schema_tree.png)
19 | <center><i>Schematic view of the bottom-up segmentation algorithm</i></center>
20 | 
21 | ## Usage
22 | 
23 | Start with the usual imports and create a signal.
24 | 
25 | ```python
26 | import numpy as np
27 | import matplotlib.pylab as plt
28 | import ruptures as rpt
29 | 
30 | # creation of data
31 | n, dim = 500, 3  # number of samples, dimension
32 | n_bkps, sigma = 3, 5  # number of change points, noise standart deviation
33 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma)
34 | ```
35 | 
36 | To perform a bottom-up segmentation of a signal, initialize a [`BottomUp`][ruptures.detection.bottomup.BottomUp]
37 | instance.
38 | 
39 | ```python
40 | # change point detection
41 | model = "l2"  # "l1", "rbf", "linear", "normal", "ar"
42 | algo = rpt.BottomUp(model=model).fit(signal)
43 | my_bkps = algo.predict(n_bkps=3)
44 | 
45 | # show results
46 | rpt.show.display(signal, bkps, my_bkps, figsize=(10, 6))
47 | plt.show()
48 | ```
49 | 
50 | In the situation in which the number of change points is unknown, one can specify a penalty using
51 | the `pen` parameter or a threshold on the residual norm using `epsilon`.
52 | 
53 | ```python
54 | my_bkps = algo.predict(pen=np.log(n) * dim * sigma**2)
55 | # or
56 | my_bkps = algo.predict(epsilon=3 * n * sigma**2)
57 | ```
58 | 
59 | For faster predictions, one can modify the `jump` parameter during initialization.
60 | The higher it is, the faster the prediction is achieved (at the expense of precision).
61 | 
62 | ```python
63 | algo = rpt.BottomUp(model=model, jump=10).fit(signal)
64 | ```
65 | 
66 | ## References
67 | 
68 | 
69 | <a id="Keogh2001">[Keogh2001]</a>
70 | Keogh, E., Chu, S., Hart, D., & Pazzani, M. (2001). An online algorithm for segmenting time series. Proceedings of the IEEE International Conference on Data Mining (ICDM), 289–296.
71 | 
72 | <a id="Fryzlewicz2007">[Fryzlewicz2007]</a>
73 | Fryzlewicz, P. (2007). Unbalanced Haar technique for nonparametric function estimation. Journal of the American Statistical Association, 102(480), 1318–1327.


--------------------------------------------------------------------------------
/docs/user-guide/detection/dynp.md:
--------------------------------------------------------------------------------
 1 | # Dynamic programming (`Dynp`)
 2 | 
 3 | ## Description
 4 | 
 5 | The method is implemented in both [`Dynp`][ruptures.detection.dynp.Dynp], which is a full native python implementation for which the user can choose any cost functions defined in `ruptures.costs`
 6 | 
 7 | It finds the (exact) minimum of the sum of costs by computing the cost of all subsequences of a given signal.
 8 | It is called "dynamic programming" because the search over all possible segmentations is ordered using a dynamic programming approach.
 9 | 
10 | In order to work, **the user must specify in advance the number of changes to detect**.
11 | (Consider using penalized methods when this number is unknown.)
12 | 
13 | The complexity of the dynamic programming approach is of the order $\mathcal{O}(CKn^2)$, where $K$ is the number of change points to detect, $n$ the number of samples and $C$ the complexity of calling the considered cost function on one sub-signal.
14 | Consequently, piecewise constant models (`model=l2`) are significantly faster than linear or autoregressive models.
15 | 
16 | To reduce the computational cost, you can consider only a subsample of possible change point indexes, by changing the `min_size` and `jump` arguments when instantiating [Dynp](#ruptures.detection.Dynp):
17 | 
18 | - `min_size` controls the minimum distance between change points; for instance, if `min_size=10`, all change points will be at least 10 samples apart.
19 | - `jump` controls the grid of possible change points; for instance, if `jump=k`, only changes at `k, 2*k, 3*k,...` are considered.
20 | 
21 | ## Usage
22 | 
23 | ```python
24 | import numpy as np
25 | import matplotlib.pylab as plt
26 | import ruptures as rpt
27 | 
28 | # creation of data
29 | n, dim = 500, 3
30 | n_bkps, sigma = 3, 5
31 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma)
32 | 
33 | # change point detection
34 | model = "l1"  # "l2", "rbf"
35 | algo = rpt.Dynp(model=model, min_size=3, jump=5).fit(signal)
36 | my_bkps = algo.predict(n_bkps=3)
37 | 
38 | # show results
39 | rpt.show.display(signal, bkps, my_bkps, figsize=(10, 6))
40 | plt.show()
41 | ```


--------------------------------------------------------------------------------
/docs/user-guide/detection/kernelcpd.md:
--------------------------------------------------------------------------------
 1 | # Kernel change point detection
 2 | 
 3 | ## Problem formulation
 4 | 
 5 | In this section, the kernel change point detection setting is briefly described.
 6 | The interested reader can refer to [[Celisse2018](#Celisse2018), [Arlot2019](#Arlot2019)] for a more complete introduction.<br>
 7 | Let $y = \{y_0,y_1,\dots,y_{T-1}\}$ denote a $\mathbb{R}^d$-valued signal with $T$ samples.
 8 | This signal is mapped onto a [reproducing Hilbert space (rkhs)](https://en.wikipedia.org/wiki/Reproducing_kernel_Hilbert_space) $\mathcal{H}$ associated with a user-defined kernel function $k(\cdot, \cdot):\mathbb{R}^d\times\mathbb{R}^d\rightarrow\mathbb{R}$.
 9 | The mapping function $\phi:\mathbb{R}^d\rightarrow\mathcal{H}$ onto this rkhs is implicitly defined by $\phi(y_t) = k(y_t, \cdot)\in\mathcal{H}$ resulting in the following inner-product and norm:
10 | 
11 | $$
12 | \langle\phi(y_s)\mid\phi(y_t)\rangle_{\mathcal{H}} = k(y_s,y_t)
13 | $$
14 | 
15 | and
16 | 
17 | $$
18 | \|\phi(y_t)\|_{\mathcal{H}}^2 = k(y_t,y_t)
19 | $$
20 | 
21 | for any samples $y_s,y_t\in\mathbb{R}^d$.
22 | Kernel change point detection consists in finding mean-shifts in the mapped signal $\phi(y)$ by minimizing $V(\cdot)$ where
23 | 
24 | $$
25 | V(t_1,\dots,t_K) := \sum_{k=0}^K\sum_{t=t_k}^{t_{k+1}-1} \|\phi(y_t)-\bar{\mu}_{t_k..t_{k+1}}\|^2_{\mathcal{H}}
26 | $$
27 | 
28 | where $\bar{\mu}_{t_k..t_{k+1}}$ is the empirical mean of the sub-signal $\phi(y_{t_k}), \phi(y_{t_k+1}),\dots,\phi(y_{t_{k+1}-1})$, and $t_1,t_2,\dots,t_K$ are change point indexes, in increasing order.
29 | (By convention $t_0=0$ and $t_{K+1}=T$.)
30 | 
31 | **If the number of changes is known beforehand**, we solve the following optimization problem, over all possible change positions $t_1<t_2<\dots<t_K$ (where the number $K$ of changes is provided by the user):
32 | 
33 | $$
34 | \hat{t}_1,\dots,\hat{t}_K := \arg\min_{t_1,\dots,t_K} V(t_1,\dots,t_K).
35 | $$
36 | 
37 | The exact optimization procedure is described in [[Celisse2018]](#Celisse2018).
38 | 
39 | **If the number of changes is not known**, we solve the following penalized optimization problem
40 | 
41 | $$
42 | \hat{K}, \{\hat{t}_1,\dots,\hat{t}_{\hat{K}}\} := \arg\min_{K, \{t_1,\dots, t_K\}} V(t_1,\dots, t_K) + \beta K
43 | $$
44 | 
45 | where $\beta>0$ is the smoothing parameter (provided by the user) and $\hat{K}$ is the estimated number of change points.
46 | Higher values of $\beta$ produce lower $\hat{K}$.
47 | The exact optimization procedure is described in [[Killick2012]](#Killick2012).
48 | 
49 | ## Available kernels
50 | We list below a number of kernels that are already implemented in `ruptures`.
51 | In the following, $u$ and $v$ are two d-dimensional vectors and $\|\cdot\|$ is the Euclidean norm.
52 | 
53 | | Kernel                     | Description                                                                                         | Cost function                                        |
54 | | -------------------------- | --------------------------------------------------------------------------------------------------- | ---------------------------------------------------- |
55 | | Linear<br>`model="linear"` | $k_{\text{linear}}(u, v) = u^T v$.                                                                  | [`CostL2`](../../user-guide/costs/costl2.md)         |
56 | | Gaussian<br>`model="rbf"`  | $k_{\text{Gaussian}}(u,v)=\exp(-\gamma \|u-v\|^2)$<br>where $\gamma>0$ is a user-defined parameter. | [`CostRbf`](../../user-guide/costs/costrbf.md)       |
57 | | Cosine<br>`model="cosine"` | $k_{\text{cosine}}(u, v) = (u^T v)/(\|u\|\|v\|)$                                                    | [`CostCosine`](../../user-guide/costs/costcosine.md) |
58 | 
59 | 
60 | ## Implementation and usage
61 | 
62 | Kernel change point detection is implemented in the class [`KernelCPD`][ruptures.detection.kernelcpd.KernelCPD], which is a C implementation of dynamic programming and PELT.
63 | To see it in action, please look at the gallery of examples, in particular:
64 | 
65 | - [Kernel change point detection: a performance comparison](../../examples/kernel-cpd-performance-comparison.ipynb)
66 | 
67 | The exact class API is available [here][ruptures.detection.kernelcpd.KernelCPD].
68 | 
69 | ## References
70 | 
71 | <a id="Gretton2012">[Gretton2012]</a>
72 | Gretton, A., Borgwardt, K. M., Rasch, M. J., Schölkopf, B., & Smola, A. (2012). A kernel two-sample test. The Journal of Machine Learning Research, 13, 723–773.
73 | 
74 | <a id="Killick2012">[Killick2012]</a>
75 | Killick, R., Fearnhead, P., & Eckley, I. (2012). Optimal detection of changepoints with a linear computational cost. Journal of the American Statistical Association, 107(500), 1590–1598.
76 | 
77 | <a id="Celisse2018">[Celisse2018]</a>
78 | Celisse, A., Marot, G., Pierre-Jean, M., & Rigaill, G. (2018). New efficient algorithms for multiple change-point detection with reproducing kernels. Computational Statistics and Data Analysis, 128, 200–220.
79 | 
80 | <a id="Arlot2019">[Arlot2019]</a>
81 | Arlot, S., Celisse, A., & Harchaoui, Z. (2019). A kernel multiple change-point algorithm via model selection. Journal of Machine Learning Research, 20(162), 1–56.
82 | 


--------------------------------------------------------------------------------
/docs/user-guide/detection/pelt.md:
--------------------------------------------------------------------------------
 1 | # Linearly penalized segmentation (`Pelt`)
 2 | 
 3 | # Description
 4 | 
 5 | The method is implemented in [`Pelt`][ruptures.detection.pelt.Pelt].
 6 | 
 7 | Because the enumeration of all possible partitions impossible, the algorithm relies on a pruning rule.
 8 | Many indexes are discarded, greatly reducing the computational cost while retaining the
 9 | ability to find the optimal segmentation.
10 | The implementation follows [[Killick2012]](#Killick2012).
11 | In addition, under certain conditions on the change point repartition, the avarage computational complexity is of the order of $\mathcal{O}(CKn)$, where $K$ is the number of change points to detect, $n$ the number of samples and $C$ the complexity of calling the considered cost function on one sub-signal.
12 | Consequently, piecewise constant models (`model=l2`) are significantly faster than linear or autoregressive models.
13 | 
14 | To reduce the computational cost, you can consider only a subsample of possible change point indexes, by changing the `min_size` and `jump` arguments when instantiating [Pelt](#ruptures.detection.Pelt):
15 | 
16 | - `min_size` controls the minimum distance between change points; for instance, if `min_size=10`, all change points will be at least 10 samples apart.
17 | - `jump` controls the grid of possible change points; for instance, if `jump=k`, only changes at `k, 2*k, 3*k,...` are considered.
18 | 
19 | 
20 | ## Usage
21 | 
22 | ```python
23 | import numpy as np
24 | import matplotlib.pylab as plt
25 | import ruptures as rpt
26 | 
27 | # creation of data
28 | n, dim = 500, 3
29 | n_bkps, sigma = 3, 1
30 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma)
31 | 
32 | # change point detection
33 | model = "l1"  # "l2", "rbf"
34 | algo = rpt.Pelt(model=model, min_size=3, jump=5).fit(signal)
35 | my_bkps = algo.predict(pen=3)
36 | 
37 | # show results
38 | fig, ax_arr = rpt.display(signal, bkps, my_bkps, figsize=(10, 6))
39 | plt.show()
40 | ```
41 | 
42 | ## References
43 | 
44 | <a id="Killick2012">[Killick2012]</a>
45 | Killick, R., Fearnhead, P., & Eckley, I. (2012). Optimal detection of changepoints with a linear computational cost. Journal of the American Statistical Association, 107(500), 1590–1598.
46 | 


--------------------------------------------------------------------------------
/docs/user-guide/detection/window.md:
--------------------------------------------------------------------------------
 1 | # Window-based change point detection (`Window`)
 2 | 
 3 | ## Description
 4 | 
 5 | Window-based change point detection is used to perform fast signal segmentation and is implemented in [`Window`][ruptures.detection.window.Window].
 6 | The algorithm uses two windows which slide along the data stream.
 7 | The statistical properties of the signals within each window are compared with a discrepancy measure.
 8 | For a given cost function $c(\cdot)$, a discrepancy measure is derived $d(\cdot,\cdot)$ as follows:
 9 | 
10 | $$
11 | d(y_{u..v}, y_{v..w}) = c(y_{u..w}) - c(y_{u..v}) - c(y_{v..w})
12 | $$
13 | 
14 | where $\{y_t\}_t$ is the input signal and $u < v < w$ are indexes.
15 | The discrepancy is the cost gain of splitting the sub-signal $y_{u..w}$ at the index $v$.
16 | If the sliding windows $u..v$ and $v..w$ both fall into a segment, their statistical properties are similar and the discrepancy between the first window and the second window is low.
17 | If the sliding windows fall into two dissimilar segments, the discrepancy is significantly higher, suggesting that the boundary between windows is a change point.
18 | The discrepancy curve is the curve, defined for all indexes $t$ between $w/2$ and $n-w/2$ ($n$ is the number of samples),
19 | 
20 | $$
21 | \big(t, d(y_{t-w/2..t}, y_{t..t+w/2})\big)
22 | $$
23 | 
24 | where $w$ is the window length.
25 | A sequential peak search is performed on the discrepancy curve in order to detect change points.
26 | 
27 | The benefits of window-based segmentation includes low complexity (of the order of $\mathcal{O}(n w)$, where $n$ is the number of samples), the fact that it can extend any single change point detection method to detect multiple changes points and that it can work whether the number of regimes is known beforehand or not.
28 | 
29 | ![](../../images/schema_fenetre.png)
30 | <center><i>Schematic view of the window sliding segmentation algorithm</i></center>
31 | 
32 | ## Usage
33 | 
34 | Start with the usual imports and create a signal.
35 | 
36 | ```python
37 | import numpy as np
38 | import matplotlib.pylab as plt
39 | import ruptures as rpt
40 | 
41 | # creation of data
42 | n, dim = 500, 3  # number of samples, dimension
43 | n_bkps, sigma = 3, 5  # number of change points, noise standart deviation
44 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma)
45 | ```
46 | 
47 | To perform a binary segmentation of a signal, initialize a [`Window`][ruptures.detection.window.Window]
48 | instance.
49 | 
50 | ```python
51 | # change point detection
52 | model = "l2"  # "l1", "rbf", "linear", "normal", "ar"
53 | algo = rpt.Window(width=40, model=model).fit(signal)
54 | my_bkps = algo.predict(n_bkps=3)
55 | 
56 | # show results
57 | rpt.show.display(signal, bkps, my_bkps, figsize=(10, 6))
58 | plt.show()
59 | ```
60 | 
61 | The window length (in number of samples) is modified through the argument `width`.
62 | Usual methods assume that the window length is smaller than the smallest regime length.
63 | 
64 | In the situation in which the number of change points is unknown, one can specify a penalty using
65 | the `pen` parameter or a threshold on the residual norm using `epsilon`.
66 | 
67 | ```python
68 | my_bkps = algo.predict(pen=np.log(n) * dim * sigma**2)
69 | # or
70 | my_bkps = algo.predict(epsilon=3 * n * sigma**2)
71 | ```
72 | 
73 | For faster predictions, one can modify the `jump` parameter during initialization.
74 | The higher it is, the faster the prediction is achieved (at the expense of precision).
75 | 
76 | ```python
77 | algo = rpt.Window(model=model, jump=10).fit(signal)
78 | ```


--------------------------------------------------------------------------------
/docs/user-guide/evaluation.md:
--------------------------------------------------------------------------------
1 | # Evaluation and visualization
2 | 
3 | 


--------------------------------------------------------------------------------
/docs/user-guide/index.md:
--------------------------------------------------------------------------------
1 | # User guide
2 | 
3 | This section describes the algorithms and utility functions of `ruptures`.
4 | Each entry of the user guide is linked to a companion entry in the [Code reference](../code-reference/index.md) section, where the API is detailed.


--------------------------------------------------------------------------------
/docs/user-guide/metrics/hausdorff.md:
--------------------------------------------------------------------------------
 1 | # Hausdorff metric (`hausdorff`)
 2 | 
 3 | ## Description
 4 | 
 5 | The [`hausdorff`][ruptures.metrics.hausdorff.hausdorff] function computes the Hausdorff metric which measures the worst prediction error.
 6 | Assume a set of change point indexes $t_1,t_2,\dots$ and their estimates $\hat{t}_1, \hat{t}_2,\dots$.
 7 | The Hausdorff metric is then equal to
 8 | 
 9 | $$
10 | \text{Hausdorff}(\{t_k\}_k, \{\hat{t}_k\}_k) :=  \max \{ \max_k \min_l |t_k - \hat{t}_l| \, , \max_k \min_l |\hat{t}_k - t_l|\}.
11 | $$
12 | 
13 | ![](../../images/hausdorff.png)
14 | <center><i>Schematic example: true segmentation in gray, estimated segmentation in dashed lines. Here, Hausdorff is equal to $\max(\Delta t_1, \Delta t_2, \Delta t_3)$.</i></center>
15 | 
16 | ## Usage
17 | 
18 | Start with the usual imports and create two segmentations to compare.
19 | 
20 | ```python
21 | from ruptures.metrics import hausdorff
22 | 
23 | bkps1, bkps2 = [100, 200, 500], [105, 115, 350, 400, 500]
24 | print(hausdorff(bkps1, bkps2))
25 | ```


--------------------------------------------------------------------------------
/docs/user-guide/metrics/precisionrecall.md:
--------------------------------------------------------------------------------
 1 | # Precision and recall (`precision_recall`)
 2 | 
 3 | ## Description
 4 | 
 5 | The precision and recall of an estimated segmentation is computed by the function [`precision_recall`][ruptures.metrics.precisionrecall.precision_recall] as follows.
 6 | A true change point is declared "detected" (or positive) if there is at least one computed change point at less than "margin" points from it.
 7 | Formally, assume a set of change point indexes $t_1,t_2,\dots$ and their estimates $\hat{t}_1, \hat{t}_2,\dots$
 8 | In the context of change point detection, precision and recall are defined as follows:
 9 | 
10 | $$
11 | \text{precision}:=|\text{TP}|/|\{\hat{t}_l\}_l| \quad \text{and}\quad\text{recall}:=|\text{TP}|/|\{t_k\}_k|
12 | $$
13 | 
14 | where, for a given margin $M$, true positives $\text{TP}$ are true change points for which there is an estimated one at less than $M$ samples, i.e.
15 | 
16 | $$
17 | \text{TP}:= \{t_k\,|\, \exists\, \hat{t}_l\,\, \text{s.t.}\, |\hat{t}_l - t_k|<M \}.
18 | $$
19 | 
20 | ![](../../images/precision_recall.png)
21 | <center><i>Schematic example: true segmentation in gray, estimated segmentation in dashed lines and margin in dashed areas. Here, precision is 2/3 and recall is 2/2.</i></center>
22 | 
23 | ## Usage
24 | 
25 | Start with the usual imports and create two change point sets to compare.
26 | 
27 | ```python
28 | from ruptures.metrics import precision_recall
29 | 
30 | bkps1, bkps2 = [100, 200, 500], [105, 115, 350, 400, 500]
31 | p, r = precision_recall(bkps1, bkps2)
32 | print((p, r))
33 | ```
34 | 
35 | The margin parameter $M$ can be changed through the keyword `margin` (default is 10 samples).
36 | 
37 | ```python
38 | p, r = precision_recall(bkps1, bkps2, margin=10)
39 | print((p, r))
40 | p, r = precision_recall(bkps1, bkps2, margin=20)
41 | print((p, r))
42 | ```


--------------------------------------------------------------------------------
/docs/user-guide/metrics/randindex.md:
--------------------------------------------------------------------------------
 1 | # Rand index (`randindex`)
 2 | 
 3 | ## Description
 4 | 
 5 | The Rand index ($RI$) measures the similarity between two segmentations and is
 6 | equal to the proportion of aggreement between two partitions.
 7 | Formally, for $\mathcal{T}_1$ and $\mathcal{T}_2$ two partitions of $\{1, 2,\dots,T\}$,
 8 | 
 9 | $$
10 | RI := \frac{N_0 + N_1}{T(T+1)/2}
11 | $$
12 | 
13 | where
14 | 
15 | - $N_0$ is the number of pairs of samples that belong to the same segment
16 | according to $\mathcal{T}_1$ and $\mathcal{T}_2$,
17 | - $N_1$ is the number of pairs of samples that belong to different segments
18 | according to $\mathcal{T}_1$ and $\mathcal{T}_2$.
19 | 
20 | $RI$ is between 0 (total disagreement) and 1 (total agreement).
21 | It is available in the [`randindex`][ruptures.metrics.randindex.randindex]
22 | function which uses the efficient implementation of [[Prates2021]](#Prates2021).
23 | 
24 | ## Usage
25 | 
26 | Start with the usual imports and create two segmentations to compare.
27 | 
28 | ```python
29 | from ruptures.metrics import randindex
30 | 
31 | bkps1, bkps2 = [100, 200, 500], [105, 115, 350, 400, 500]
32 | print(randindex(bkps1, bkps2))
33 | ```
34 | 
35 | ## References
36 | 
37 | <a id="Prates2021">[Prates2021]</a>
38 | Prates, L. (2021). A more efficient algorithm to compute the Rand Index for
39 | change-point problems. ArXiv:2112.03738.


--------------------------------------------------------------------------------
/docs/user-guide/show/display.md:
--------------------------------------------------------------------------------
 1 | # Display (`display`)
 2 | 
 3 | ## Description
 4 | 
 5 | The function [`display`][ruptures.show.display.display] displays a signal and the change points provided in alternating colors.
 6 | If another set of change point indexes is provided, they are displayed with dashed vertical dashed lines.
 7 | 
 8 | ## Usage
 9 | 
10 | Start with the usual imports and create a signal.
11 | 
12 | ```python
13 | import numpy as np
14 | import matplotlib.pylab as plt
15 | import ruptures as rpt
16 | 
17 | # creation of data
18 | n, dim = 500, 2  # number of samples, dimension
19 | n_bkps, sigma = 3, 5  # number of change points, noise standart deviation
20 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma)
21 | rpt.display(signal, bkps)
22 | ```
23 | 
24 | If we computed another set of change points, for instance `[110, 150, 320, 500]`, we can easily compare the two segmentations.
25 | 
26 | ```python
27 | rpt.display(signal, bkps, [110, 150, 320, 500])
28 | ```
29 | 
30 | ![](../../images/example-display.png)
31 | <center><i>Example output of the function [`display`][ruptures.show.display.display].</i></center>


--------------------------------------------------------------------------------
/docs/what-is-cpd.md:
--------------------------------------------------------------------------------
 1 | # Getting started
 2 | 
 3 | ## What is change point detection?
 4 | 
 5 | Under construction.
 6 | In the meantime, you can refer to the associated review of methods [[Truong2020]](#Truong2020).
 7 | 
 8 | ## References
 9 | 
10 | <a id="Truong2020">[Truong2020]</a>
11 | Truong, C., Oudre, L., & Vayatis, N. (2020). Selective review of offline change point detection methods. *Signal Processing*, 167. [[abstract]](https://deepcharles.github.io/publication/sp-review-2020) [[doi]](https://doi.org/10.1016/j.sigpro.2019.107299) [[pdf]](http://deepcharles.github.io/files/sp-review-2020.pdf)


--------------------------------------------------------------------------------
/images/example_readme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/images/example_readme.png


--------------------------------------------------------------------------------
/images/pw_constant.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/images/pw_constant.png


--------------------------------------------------------------------------------
/images/pw_constantdp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/images/pw_constantdp.png


--------------------------------------------------------------------------------
/images/pw_linear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/images/pw_linear.png


--------------------------------------------------------------------------------
/images/pw_linearpelt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/images/pw_linearpelt.png


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
  1 | site_name: ruptures
  2 | repo_url: https://github.com/deepcharles/ruptures
  3 | repo_name: deepcharles/ruptures
  4 | docs_dir: docs
  5 | site_dir: site
  6 | plugins:
  7 |   - search
  8 |   - mkdocstrings:
  9 |       watch:
 10 |         - ruptures
 11 |       handlers:
 12 |         python:
 13 |           options:
 14 |             filters:
 15 |               - "!^_"  # exclude all members starting with _
 16 |               - "^__init__$"  # but always include __init__ modules and methods
 17 |   - mknotebooks:
 18 |       execute: false
 19 |       binder: false
 20 |   - macros:
 21 |       module_name: mkdocs_macros
 22 |   - section-index
 23 | markdown_extensions:
 24 |   - pymdownx.snippets:
 25 |   - pymdownx.tabbed:
 26 |   - pymdownx.emoji:
 27 |   - pymdownx.superfences:
 28 |   - pymdownx.arithmatex:
 29 |       generic: true
 30 |   - admonition:
 31 |   - toc:
 32 |       permalink: "#"
 33 |   - codehilite
 34 | nav:
 35 |   - Home:
 36 |     - index.md
 37 |   - Getting started:
 38 |     - what-is-cpd.md
 39 |     - 'Installation': install.md
 40 |     - 'Basic usage': getting-started/basic-usage.ipynb
 41 |     - 'Fitting and predicting': fit-and-predict.md
 42 |     - 'Custom cost function': custom-cost-function.md
 43 |   - User guide:
 44 |     - user-guide/index.md
 45 |     - Search methods:
 46 |       - 'Dynamic programming': user-guide/detection/dynp.md
 47 |       - 'Pelt': user-guide/detection/pelt.md
 48 |       - 'Kernel change detection': user-guide/detection/kernelcpd.md
 49 |       - 'Binary segmentation': user-guide/detection/binseg.md
 50 |       - 'Bottom-up segmentation': user-guide/detection/bottomup.md
 51 |       - 'Window sliding segmentation': user-guide/detection/window.md
 52 |     - Cost functions:
 53 |       - 'CostL1': user-guide/costs/costl1.md
 54 |       - 'CostL2': user-guide/costs/costl2.md
 55 |       - 'CostNormal': user-guide/costs/costnormal.md
 56 |       - 'CostRbf': user-guide/costs/costrbf.md
 57 |       - 'CostCosine': user-guide/costs/costcosine.md
 58 |       - 'CostLinear': user-guide/costs/costlinear.md
 59 |       - 'CostCLinear': user-guide/costs/costclinear.md
 60 |       - 'CostRank': user-guide/costs/costrank.md
 61 |       - 'CostMl': user-guide/costs/costml.md
 62 |       - 'CostAR': user-guide/costs/costautoregressive.md
 63 |       - 'Custom cost': user-guide/costs/costcustom.md
 64 |     - Generate signals:
 65 |       - 'Piecewise constant': user-guide/datasets/pw_constant.md
 66 |       - 'Piecewise linear': user-guide/datasets/pw_linear.md
 67 |       - 'Piecewise Gaussian': user-guide/datasets/pw_normal.md
 68 |       - 'Piecewise sinusoidal': user-guide/datasets/pw_wavy.md
 69 |     - Evaluation metrics:
 70 |       - 'Precision and recall': user-guide/metrics/precisionrecall.md
 71 |       - 'Hausdorff metric': user-guide/metrics/hausdorff.md
 72 |       - 'Rand index': user-guide/metrics/randindex.md
 73 |     - Display:
 74 |       - user-guide/show/display.md
 75 |   - Gallery of examples:
 76 |     - examples/introduction.md
 77 |     - 'Simple usages':
 78 |       - 'Basic usage': examples/basic-usage.ipynb
 79 |     - 'Advanced usages':
 80 |       - 'Combining cost functions': examples/merging-cost-functions.ipynb
 81 |       - 'Kernel change point detection: a performance comparison': examples/kernel-cpd-performance-comparison.ipynb
 82 |       - 'Music segmentation': examples/music-segmentation.ipynb
 83 |       - 'Text segmentation': examples/text-segmentation.ipynb
 84 | 
 85 |   - Code reference:
 86 |       - code-reference/index.md
 87 |       - Base classes: code-reference/base-reference.md
 88 |       - Search methods:
 89 |           - Dynp: code-reference/detection/dynp-reference.md
 90 |           - KernelCPD: code-reference/detection/kernelcpd-reference.md
 91 |           - Pelt: code-reference/detection/pelt-reference.md
 92 |           - Binseg: code-reference/detection/binseg-reference.md
 93 |           - BottomUp: code-reference/detection/bottomup-reference.md
 94 |           - Window: code-reference/detection/window-reference.md
 95 |       - Cost functions:
 96 |           - 'CostL1': code-reference/costs/costl1-reference.md
 97 |           - 'CostL2': code-reference/costs/costl2-reference.md
 98 |           - 'CostNormal': code-reference/costs/costnormal-reference.md
 99 |           - 'CostRbf': code-reference/costs/costrbf-reference.md
100 |           - 'CostCosine': code-reference/costs/costcosine-reference.md
101 |           - 'CostLinear': code-reference/costs/costlinear-reference.md
102 |           - 'CostCLinear': code-reference/costs/costclinear-reference.md
103 |           - 'CostRank': code-reference/costs/costrank-reference.md
104 |           - 'CostMl': code-reference/costs/costml-reference.md
105 |           - 'CostAR': code-reference/costs/costautoregressive-reference.md
106 |       - Data sets:
107 |         - 'Piecewise constant': code-reference/datasets/pw_constant-reference.md
108 |         - 'Piecewise linear': code-reference/datasets/pw_linear-reference.md
109 |         - 'Piecewise normal': code-reference/datasets/pw_normal-reference.md
110 |         - 'Piecewise wavy': code-reference/datasets/pw_wavy-reference.md
111 |       - Metrics:
112 |         - 'Precision and recall': code-reference/metrics/precisionrecall.md
113 |         - 'Hausdorff metric': code-reference/metrics/hausdorff.md
114 |         - 'Rand index': code-reference/metrics/randindex.md
115 |       - Display:
116 |         - 'Display function': code-reference/show/display.md
117 |   - About:
118 |     - 'Release notes': release-notes.md
119 |     - 'Contributing': contributing.md
120 |     - 'License': license.md
121 | theme:
122 |   name: material
123 |   features:
124 |     - navigation.tabs
125 |     - search.highlight
126 | extra_javascript:
127 |   - javascripts/mathjax.js
128 |   - https://polyfill.io/v3/polyfill.min.js?features=es6
129 |   - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js
130 |   - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js
131 | 


--------------------------------------------------------------------------------
/mkdocs_macros.py:
--------------------------------------------------------------------------------
 1 | """This module contains Mkdocs macros to modify the documentation. The
 2 | documentation on mkdocs plugins is available.
 3 | 
 4 | [here](https://mkdocs-macros-plugin.readthedocs.io/en/latest/).
 5 | """
 6 | 
 7 | BINDER_URL = "https://mybinder.org/v2/gh"
 8 | BRANCH = "master"
 9 | BINDER_TEMPLATE = """
10 | -->
11 | !!! info
12 |     - Try this notebook in an executable environment with [Binder]({binder_link}).
13 |     - Download this notebook [here]({download_link}).
14 | <!--
15 | """
16 | 
17 | 
18 | def define_env(env):
19 |     """This is the hook for defining variables, macros and filters.
20 | 
21 |     - variables: the dictionary that contains the environment variables
22 |     - macro: a decorator function, to declare a macro.
23 |     """
24 | 
25 |     @env.macro
26 |     def add_binder_block(page):
27 |         """Add a block with binder and download link.
28 | 
29 |         In any page (.md or .ipynb), the string `{{ add_binder_block(page) }}`
30 |         is replaced by an admonition box (note) with binder and download link.
31 |         This is intended for Jupyter notebooks.
32 | 
33 |         Args:
34 |             page: Mkdocs Page instance (described
35 |                 [here](https://www.mkdocs.org/user-guide/custom-themes/#page))
36 | 
37 |         Returns:
38 |             str: admonition box to be inserted in the documentation.
39 |         """
40 |         repo_url = env.conf["repo_url"]
41 |         repo_name = env.conf["repo_name"]
42 |         docs_dirs = "docs"
43 |         filepath = f"{docs_dirs}/{page.file.src_path}"
44 |         binder_link = f"{BINDER_URL}/{repo_name}/{BRANCH}"
45 |         binder_link = f"{binder_link}?filepath={filepath}"
46 |         download_link = f"{repo_url}/blob/{BRANCH}/{filepath}"
47 |         return BINDER_TEMPLATE.format(
48 |             binder_link=binder_link, download_link=download_link
49 |         )
50 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | # Minimum requirements for the build system to execute.
 3 | requires = [
 4 |     "setuptools>=42",  # https://github.com/deepcharles/ruptures/pull/100#discussion_r553700339
 5 |     "wheel",
 6 |     "Cython>=0.28.5",
 7 |     "setuptools_scm[toml]>=3.4",  # https://scikit-hep.org/developer/packaging#git-tags-official-pypa-method
 8 |     "oldest-supported-numpy",  # https://github.com/scipy/oldest-supported-numpy
 9 |     "scipy>=0.19.1",
10 | ]
11 | build-backend = "setuptools.build_meta"
12 | 
13 | # pytest config
14 | # For more information, see:
15 | # https://docs.pytest.org/en/stable/customize.html#pyproject-toml
16 | [tool.pytest.ini_options]
17 | minversion = "6.0"
18 | addopts = "-vv"
19 | testpaths = ["tests"]
20 | 
21 | [tool.setuptools_scm]
22 | write_to = "src/ruptures/version.py"


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = ruptures
 3 | version = attr: ruptures.__version__
 4 | url = https://github.com/deepcharles/ruptures/
 5 | license = BSD-2-Clause
 6 | license_files = LICENSE
 7 | author = Charles Truong, Laurent Oudre, Nicolas Vayatis
 8 | author_email = charles@doffy.net
 9 | maintainer = Charles Truong, Olivier Boulant
10 | description = Change point detection for signals in Python.
11 | long_description = file: README.md
12 | long_description_content_type = text/markdown
13 | project_urls =
14 |     Documentation = https://centre-borelli.github.io/ruptures-docs/
15 |     Source = https://github.com/deepcharles/ruptures/
16 |     Bug Tracker = https://github.com/deepcharles/ruptures/issues/
17 | keywords =
18 |     change point detection
19 |     signal segmentation
20 |     computer science
21 |     machine learning
22 |     kernel methods
23 |     time series
24 | classifiers =
25 |     Programming Language :: Python :: 3
26 |     Operating System :: OS Independent
27 |     Topic :: Scientific/Engineering :: Mathematics
28 |     Intended Audience :: Science/Research
29 |     License :: OSI Approved :: BSD License
30 | 
31 | [options]
32 | zip_safe = True
33 | include_package_data = True
34 | python_requires = >= 3.6
35 | install_requires =
36 |     numpy
37 |     scipy
38 | packages = find:
39 | package_dir =
40 |     =src
41 | 
42 | [options.packages.find]
43 | where = src
44 | 
45 | [options.extras_require]
46 | display = matplotlib
47 | test =
48 |     pytest
49 |     pytest-cov
50 | dev =
51 |     pre-commit
52 | docs =
53 |     jupyter
54 |     librosa
55 |     matplotlib
56 |     mkdocs
57 |     mkdocs-macros-plugin
58 |     mkdocs-material
59 |     mkdocs-section-index
60 |     mkdocstrings[python]>=0.18 # https://mkdocstrings.github.io/handlers/overview/#about-the-python-handlers
61 |     mknotebooks
62 |     pymdown-extensions
63 |     nltk
64 | 
65 | [build_ext]
66 | inplace=1
67 | 
68 | [coverage:run]
69 | source_pkgs = ruptures


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import Extension, setup
 2 | 
 3 | ext_modules = [
 4 |     Extension(
 5 |         "ruptures.detection._detection.ekcpd",
 6 |         sources=[
 7 |             "src/ruptures/detection/_detection/ekcpd.pyx",
 8 |             "src/ruptures/detection/_detection/ekcpd_computation.c",
 9 |             "src/ruptures/detection/_detection/ekcpd_pelt_computation.c",
10 |             "src/ruptures/detection/_detection/kernels.c",
11 |         ],
12 |     ),
13 |     Extension(
14 |         "ruptures.utils._utils.convert_path_matrix",
15 |         sources=[
16 |             "src/ruptures/utils/_utils/convert_path_matrix.pyx",
17 |             "src/ruptures/utils/_utils/convert_path_matrix_c.c",
18 |         ],
19 |     ),
20 | ]
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     from Cython.Build import cythonize
25 | 
26 |     setup(
27 |         ext_modules=cythonize(ext_modules, language_level="3"),
28 |     )
29 | 


--------------------------------------------------------------------------------
/src/ruptures/__init__.py:
--------------------------------------------------------------------------------
 1 | """Offline change point detection for Python."""
 2 | 
 3 | from .datasets import pw_constant, pw_linear, pw_normal, pw_wavy
 4 | from .detection import Binseg, BottomUp, Dynp, KernelCPD, Pelt, Window
 5 | from .exceptions import NotEnoughPoints
 6 | from .show import display
 7 | 
 8 | # Convenient access to the version number
 9 | from .version import version as __version__
10 | 


--------------------------------------------------------------------------------
/src/ruptures/base.py:
--------------------------------------------------------------------------------
 1 | r"""All estimators and cost functions are subclasses of.
 2 | 
 3 | [`BaseEstimator`][ruptures.base.BaseEstimator] and
 4 | [`BaseCost`][ruptures.base.BaseCost] respectively.
 5 | """
 6 | 
 7 | import abc
 8 | from ruptures.utils import pairwise
 9 | 
10 | 
11 | class BaseEstimator(metaclass=abc.ABCMeta):
12 |     """Base class for all change point detection estimators.
13 | 
14 |     Notes:
15 |         All estimators should specify all the parameters that can be set
16 |         at the class level in their ``__init__`` as explicit keyword
17 |         arguments (no ``*args`` or ``**kwargs``).
18 |     """
19 | 
20 |     @abc.abstractmethod
21 |     def fit(self, *args, **kwargs):
22 |         """To call the segmentation algorithm."""
23 |         pass
24 | 
25 |     @abc.abstractmethod
26 |     def predict(self, *args, **kwargs):
27 |         """To call the segmentation algorithm."""
28 |         pass
29 | 
30 |     @abc.abstractmethod
31 |     def fit_predict(self, *args, **kwargs):
32 |         """To call the segmentation algorithm."""
33 |         pass
34 | 
35 | 
36 | class BaseCost(object, metaclass=abc.ABCMeta):
37 |     """Base class for all segment cost classes.
38 | 
39 |     Notes:
40 |         All classes should specify all the parameters that can be set
41 |         at the class level in their ``__init__`` as explicit keyword
42 |         arguments (no ``*args`` or ``**kwargs``).
43 |     """
44 | 
45 |     @abc.abstractmethod
46 |     def fit(self, *args, **kwargs):
47 |         """Set the parameters of the cost function, for instance the Gram
48 |         matrix, etc."""
49 |         pass
50 | 
51 |     @abc.abstractmethod
52 |     def error(self, start, end):
53 |         """Returns the cost on segment [start:end]."""
54 |         pass
55 | 
56 |     def sum_of_costs(self, bkps):
57 |         """Returns the sum of segments cost for the given segmentation.
58 | 
59 |         Args:
60 |             bkps (list): list of change points. By convention, bkps[-1]==n_samples.
61 | 
62 |         Returns:
63 |             float: sum of costs
64 |         """
65 |         soc = sum(self.error(start, end) for start, end in pairwise([0] + bkps))
66 |         return soc
67 | 
68 |     @property
69 |     @abc.abstractmethod
70 |     def model(self):
71 |         pass
72 | 


--------------------------------------------------------------------------------
/src/ruptures/costs/__init__.py:
--------------------------------------------------------------------------------
 1 | from ruptures.exceptions import NotEnoughPoints
 2 | from .factory import cost_factory
 3 | from .costl1 import CostL1
 4 | from .costl2 import CostL2
 5 | from .costlinear import CostLinear
 6 | from .costclinear import CostCLinear
 7 | from .costrbf import CostRbf
 8 | from .costnormal import CostNormal
 9 | from .costautoregressive import CostAR
10 | from .costml import CostMl
11 | from .costrank import CostRank
12 | from .costcosine import CostCosine
13 | 


--------------------------------------------------------------------------------
/src/ruptures/costs/costautoregressive.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numpy.lib.stride_tricks import as_strided
 3 | from numpy.linalg import lstsq
 4 | from copy import deepcopy
 5 | 
 6 | from ruptures.base import BaseCost
 7 | from ruptures.costs import NotEnoughPoints
 8 | 
 9 | 
10 | class CostAR(BaseCost):
11 |     r"""Least-squares estimate for changes in autoregressive coefficients."""
12 | 
13 |     model = "ar"
14 | 
15 |     def __init__(self, order=4):
16 |         """Initialize the object.
17 | 
18 |         Args:
19 |             order (int): autoregressive order
20 |         """
21 |         self.signal = None
22 |         self.covar = None
23 |         self.min_size = max(5, order + 1)
24 |         self.order = order
25 | 
26 |     def fit(self, signal):
27 |         """Set parameters of the instance. The signal must be 1D.
28 | 
29 |         Args:
30 |             signal (array): 1d signal. Shape (n_samples, 1) or (n_samples,).
31 | 
32 |         Returns:
33 |             self: the current object
34 |         """
35 |         self.signal = deepcopy(signal)
36 |         if signal.ndim == 1:
37 |             self.signal = self.signal.reshape(-1, 1)
38 | 
39 |         # lagged covariates
40 |         n_samples, _ = self.signal.shape
41 |         strides = (self.signal.itemsize, self.signal.itemsize)
42 |         shape = (n_samples - self.order, self.order)
43 |         lagged = as_strided(self.signal, shape=shape, strides=strides)
44 |         # pad the first columns
45 |         lagged_after_padding = np.pad(lagged, ((self.order, 0), (0, 0)), mode="edge")
46 |         # add intercept
47 |         self.covar = np.c_[lagged_after_padding, np.ones(n_samples)]
48 |         # pad signal on the edges
49 |         self.signal[: self.order] = self.signal[self.order]
50 |         return self
51 | 
52 |     def error(self, start, end):
53 |         """Return the approximation cost on the segment [start:end].
54 | 
55 |         Args:
56 |             start (int): start of the segment
57 |             end (int): end of the segment
58 | 
59 |         Returns:
60 |             float: segment cost
61 | 
62 |         Raises:
63 |             NotEnoughPoints: when the segment is too short (less than ``'min_size'`` samples).
64 |         """
65 |         if end - start < self.min_size:
66 |             raise NotEnoughPoints
67 |         y, X = self.signal[start:end], self.covar[start:end]
68 |         _, residual, _, _ = lstsq(X, y, rcond=None)
69 |         return residual.sum()
70 | 


--------------------------------------------------------------------------------
/src/ruptures/costs/costclinear.py:
--------------------------------------------------------------------------------
 1 | r"""Continuous linear change."""
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ruptures.base import BaseCost
 6 | from ruptures.costs import NotEnoughPoints
 7 | 
 8 | 
 9 | class CostCLinear(BaseCost):
10 |     r"""Piecewise linear approximation with a continuity constraint."""
11 | 
12 |     model = "clinear"
13 | 
14 |     def __init__(self):
15 |         """Initialize the object."""
16 |         self.signal = None
17 |         self.min_size = 3
18 | 
19 |     def fit(self, signal) -> "CostCLinear":
20 |         """Set parameters of the instance.
21 | 
22 |         Args:
23 |             signal (array): signal of shape (n_samples, n_dims) or (n_samples,)
24 | 
25 |         Returns:
26 |             self
27 |         """
28 |         if signal.ndim == 1:
29 |             self.signal = signal.reshape(-1, 1)
30 |         else:
31 |             self.signal = signal
32 | 
33 |         return self
34 | 
35 |     def error(self, start, end) -> float:
36 |         """Return the approximation cost on the segment [start:end].
37 | 
38 |         Args:
39 |             start (int): start of the segment
40 |             end (int): end of the segment
41 | 
42 |         Returns:
43 |             segment cost (float)
44 | 
45 |         Raises:
46 |             NotEnoughPoints: when the segment is too short (less than `min_size`
47 |                 samples).
48 |         """
49 |         if end - start < self.min_size:
50 |             raise NotEnoughPoints
51 | 
52 |         if start == 0:
53 |             start = 1
54 | 
55 |         sub = self.signal[start:end]
56 |         slope = (self.signal[end - 1] - self.signal[start - 1]) / (end - start)
57 |         intercept = self.signal[start - 1]
58 |         approx = slope.reshape(-1, 1) * np.arange(
59 |             1, end - start + 1
60 |         ) + intercept.reshape(-1, 1)
61 |         return np.sum((sub - approx.transpose()) ** 2)
62 | 


--------------------------------------------------------------------------------
/src/ruptures/costs/costcosine.py:
--------------------------------------------------------------------------------
 1 | r"""CostCosine (kernel change point detection with the cosine similarity)"""
 2 | 
 3 | import numpy as np
 4 | from ruptures.base import BaseCost
 5 | from ruptures.costs import NotEnoughPoints
 6 | from scipy.spatial.distance import pdist, squareform
 7 | 
 8 | 
 9 | class CostCosine(BaseCost):
10 |     r"""Kernel change point detection with the cosine similarity."""
11 | 
12 |     model = "cosine"
13 | 
14 |     def __init__(self):
15 |         """Initialize the object."""
16 |         self.signal = None
17 |         self.min_size = 1
18 |         self._gram = None
19 | 
20 |     @property
21 |     def gram(self):
22 |         """Generate the gram matrix (lazy loading).
23 | 
24 |         Only access this function after a `.fit()` (otherwise
25 |         `self.signal` is not defined).
26 |         """
27 |         if self._gram is None:
28 |             self._gram = squareform(1 - pdist(self.signal, metric="cosine"))
29 |         return self._gram
30 | 
31 |     def fit(self, signal) -> "CostCosine":
32 |         """Set parameters of the instance.
33 | 
34 |         Args:
35 |             signal (array): array of shape (n_samples,) or (n_samples, n_features)
36 | 
37 |         Returns:
38 |             self
39 |         """
40 |         if signal.ndim == 1:
41 |             self.signal = signal.reshape(-1, 1)
42 |         else:
43 |             self.signal = signal
44 |         return self
45 | 
46 |     def error(self, start, end) -> float:
47 |         """Return the approximation cost on the segment [start:end].
48 | 
49 |         Args:
50 |             start (int): start of the segment
51 |             end (int): end of the segment
52 | 
53 |         Returns:
54 |             segment cost
55 | 
56 |         Raises:
57 |             NotEnoughPoints: when the segment is too short (less than `min_size` samples).
58 |         """
59 |         if end - start < self.min_size:
60 |             raise NotEnoughPoints
61 |         sub_gram = self.gram[start:end, start:end]
62 |         val = np.diagonal(sub_gram).sum()
63 |         val -= sub_gram.sum() / (end - start)
64 |         return val
65 | 


--------------------------------------------------------------------------------
/src/ruptures/costs/costl1.py:
--------------------------------------------------------------------------------
 1 | r"""CostL1 (least absolute deviation)"""
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ruptures.base import BaseCost
 6 | from ruptures.costs import NotEnoughPoints
 7 | 
 8 | 
 9 | class CostL1(BaseCost):
10 |     r"""Least absolute deviation."""
11 | 
12 |     model = "l1"
13 | 
14 |     def __init__(self) -> None:
15 |         """Initialize the object."""
16 |         self.signal = None
17 |         self.min_size = 2
18 | 
19 |     def fit(self, signal) -> "CostL1":
20 |         """Set parameters of the instance.
21 | 
22 |         Args:
23 |             signal (array): signal. Shape (n_samples,) or (n_samples, n_features)
24 | 
25 |         Returns:
26 |             self
27 |         """
28 |         if signal.ndim == 1:
29 |             self.signal = signal.reshape(-1, 1)
30 |         else:
31 |             self.signal = signal
32 | 
33 |         return self
34 | 
35 |     def error(self, start, end) -> float:
36 |         """Return the approximation cost on the segment [start:end].
37 | 
38 |         Args:
39 |             start (int): start of the segment
40 |             end (int): end of the segment
41 | 
42 |         Returns:
43 |             segment cost
44 | 
45 |         Raises:
46 |             NotEnoughPoints: when the segment is too short (less than `min_size` samples).
47 |         """
48 |         if end - start < self.min_size:
49 |             raise NotEnoughPoints
50 |         sub = self.signal[start:end]
51 |         med = np.median(sub, axis=0)
52 | 
53 |         return abs(sub - med).sum()
54 | 


--------------------------------------------------------------------------------
/src/ruptures/costs/costl2.py:
--------------------------------------------------------------------------------
 1 | r"""CostL2 (least squared deviation)"""
 2 | 
 3 | from ruptures.costs import NotEnoughPoints
 4 | 
 5 | from ruptures.base import BaseCost
 6 | 
 7 | 
 8 | class CostL2(BaseCost):
 9 |     r"""Least squared deviation."""
10 | 
11 |     model = "l2"
12 | 
13 |     def __init__(self):
14 |         """Initialize the object."""
15 |         self.signal = None
16 |         self.min_size = 1
17 | 
18 |     def fit(self, signal) -> "CostL2":
19 |         """Set parameters of the instance.
20 | 
21 |         Args:
22 |             signal (array): array of shape (n_samples,) or (n_samples, n_features)
23 | 
24 |         Returns:
25 |             self
26 |         """
27 |         if signal.ndim == 1:
28 |             self.signal = signal.reshape(-1, 1)
29 |         else:
30 |             self.signal = signal
31 | 
32 |         return self
33 | 
34 |     def error(self, start, end) -> float:
35 |         """Return the approximation cost on the segment [start:end].
36 | 
37 |         Args:
38 |             start (int): start of the segment
39 |             end (int): end of the segment
40 | 
41 |         Returns:
42 |             segment cost
43 | 
44 |         Raises:
45 |             NotEnoughPoints: when the segment is too short (less than `min_size` samples).
46 |         """
47 |         if end - start < self.min_size:
48 |             raise NotEnoughPoints
49 | 
50 |         return self.signal[start:end].var(axis=0).sum() * (end - start)
51 | 


--------------------------------------------------------------------------------
/src/ruptures/costs/costlinear.py:
--------------------------------------------------------------------------------
 1 | r"""Linear model change."""
 2 | 
 3 | from numpy.linalg import lstsq
 4 | 
 5 | from ruptures.base import BaseCost
 6 | from ruptures.costs import NotEnoughPoints
 7 | 
 8 | 
 9 | class CostLinear(BaseCost):
10 |     r"""Least-square estimate for linear changes."""
11 | 
12 |     model = "linear"
13 | 
14 |     def __init__(self):
15 |         """Initialize the object."""
16 |         self.signal = None
17 |         self.covar = None
18 |         self.min_size = 2
19 | 
20 |     def fit(self, signal) -> "CostLinear":
21 |         """Set parameters of the instance. The first column contains the
22 |         observed variable. The other columns contains the covariates.
23 | 
24 |         Args:
25 |             signal (array): signal of shape (n_samples, n_regressors+1)
26 | 
27 |         Returns:
28 |             self
29 |         """
30 |         assert signal.ndim > 1, "Not enough dimensions"
31 | 
32 |         self.signal = signal[:, 0].reshape(-1, 1)
33 |         self.covar = signal[:, 1:]
34 |         return self
35 | 
36 |     def error(self, start, end) -> float:
37 |         """Return the approximation cost on the segment [start:end].
38 | 
39 |         Args:
40 |             start (int): start of the segment
41 |             end (int): end of the segment
42 | 
43 |         Returns:
44 |             segment cost
45 | 
46 |         Raises:
47 |             NotEnoughPoints: when the segment is too short (less than `min_size` samples).
48 |         """
49 |         if end - start < self.min_size:
50 |             raise NotEnoughPoints
51 |         y, X = self.signal[start:end], self.covar[start:end]
52 |         _, residual, _, _ = lstsq(X, y, rcond=None)
53 |         return residual.sum()
54 | 


--------------------------------------------------------------------------------
/src/ruptures/costs/costml.py:
--------------------------------------------------------------------------------
 1 | r"""Change detection with a Mahalanobis-type metric."""
 2 | 
 3 | import numpy as np
 4 | from numpy.linalg import inv
 5 | 
 6 | from ruptures.base import BaseCost
 7 | from ruptures.exceptions import NotEnoughPoints
 8 | 
 9 | 
10 | class CostMl(BaseCost):
11 |     r"""Mahalanobis-type cost function."""
12 | 
13 |     model = "mahalanobis"
14 | 
15 |     def __init__(self, metric=None):
16 |         """Create a new instance.
17 | 
18 |         Args:
19 |             metric (ndarray, optional): PSD matrix that defines a
20 |                 Mahalanobis-type pseudo distance. If None, defaults to the
21 |                 Mahalanobis matrix. Shape (n_features, n_features).
22 |         """
23 |         self.metric = metric  # metric matrix
24 |         self.has_custom_metric = False if self.metric is None else True
25 |         self.gram = None
26 |         self.min_size = 2
27 | 
28 |     def fit(self, signal) -> "CostMl":
29 |         """Set parameters of the instance.
30 | 
31 |         Args:
32 |             signal (array): signal. Shape (n_samples,) or
33 |                 (n_samples, n_features)
34 | 
35 |         Returns:
36 |             self
37 |         """
38 |         s_ = signal.reshape(-1, 1) if signal.ndim == 1 else signal
39 | 
40 |         # fit a Mahalanobis metric if self.has_custom_metric is False
41 |         if self.has_custom_metric is False:
42 |             covar = np.cov(s_.T)
43 |             self.metric = inv(covar.reshape(1, 1) if covar.size == 1 else covar)
44 | 
45 |         self.gram = s_.dot(self.metric).dot(s_.T)
46 |         self.signal = s_
47 | 
48 |         return self
49 | 
50 |     def error(self, start, end):
51 |         """Return the approximation cost on the segment [start:end].
52 | 
53 |         Args:
54 |             start (int): start of the segment
55 |             end (int): end of the segment
56 | 
57 |         Returns:
58 |             float: segment cost
59 | 
60 |         Raises:
61 |             NotEnoughPoints: when the segment is too short (less than
62 |                 ``'min_size'`` samples).
63 |         """
64 |         if end - start < self.min_size:
65 |             raise NotEnoughPoints
66 |         sub_gram = self.gram[start:end, start:end]
67 |         val = np.diagonal(sub_gram).sum()
68 |         val -= sub_gram.sum() / (end - start)
69 |         return val
70 | 


--------------------------------------------------------------------------------
/src/ruptures/costs/costnormal.py:
--------------------------------------------------------------------------------
 1 | r"""Gaussian process changes (CostNormal)"""
 2 | 
 3 | import warnings
 4 | 
 5 | import numpy as np
 6 | from numpy.linalg import slogdet
 7 | from ruptures.base import BaseCost
 8 | from ruptures.costs import NotEnoughPoints
 9 | 
10 | 
11 | class CostNormal(BaseCost):
12 |     """Gaussian process change."""
13 | 
14 |     model = "normal"
15 | 
16 |     def __init__(self, add_small_diag=True):
17 |         """Initialize the object.
18 | 
19 |         Args:
20 |             add_small_diag (bool, optional): For signals with truly constant
21 |                 segments, the covariance matrix is badly conditioned, so we add
22 |                 a small diagonal matrix. Defaults to True.
23 |         """
24 |         self.signal = None
25 |         self.min_size = 2
26 |         self.add_small_diag = add_small_diag
27 |         if add_small_diag:
28 |             warnings.warn(
29 |                 "New behaviour in v1.1.5: "
30 |                 "a small bias is added to the covariance matrix to cope with truly "
31 |                 "constant segments (see PR#198).",
32 |                 UserWarning,
33 |             )
34 | 
35 |     def fit(self, signal) -> "CostNormal":
36 |         """Set parameters of the instance.
37 | 
38 |         Args:
39 |             signal (array): signal of shape (n_samples,) or
40 |                 (n_samples, n_features)
41 | 
42 |         Returns:
43 |             self
44 |         """
45 |         if signal.ndim == 1:
46 |             self.signal = signal.reshape(-1, 1)
47 |         else:
48 |             self.signal = signal
49 |         self.n_samples, self.n_dims = self.signal.shape
50 |         return self
51 | 
52 |     def error(self, start, end) -> float:
53 |         """Return the approximation cost on the segment [start:end].
54 | 
55 |         Args:
56 |             start (int): start of the segment
57 |             end (int): end of the segment
58 | 
59 |         Returns:
60 |             segment cost
61 | 
62 |         Raises:
63 |             NotEnoughPoints: when the segment is too short (less than `min_size`
64 |                 samples).
65 |         """
66 |         if end - start < self.min_size:
67 |             raise NotEnoughPoints
68 |         sub = self.signal[start:end]
69 | 
70 |         if self.signal.shape[1] > 1:
71 |             cov = np.cov(sub.T)
72 |         else:
73 |             cov = np.array([[sub.var()]])
74 |         if self.add_small_diag:  # adding small bias
75 |             cov += 1e-6 * np.eye(self.n_dims)
76 |         _, val = slogdet(cov)
77 |         return val * (end - start)
78 | 


--------------------------------------------------------------------------------
/src/ruptures/costs/costrank.py:
--------------------------------------------------------------------------------
 1 | r"""Rank-based cost function (CostRank)"""
 2 | 
 3 | import numpy as np
 4 | from numpy.linalg import pinv, LinAlgError
 5 | from scipy.stats.mstats import rankdata
 6 | 
 7 | from ruptures.base import BaseCost
 8 | from ruptures.costs import NotEnoughPoints
 9 | 
10 | 
11 | class CostRank(BaseCost):
12 |     r"""Rank-based cost function."""
13 | 
14 |     model = "rank"
15 | 
16 |     def __init__(self):
17 |         """Initialize the object."""
18 |         self.inv_cov = None
19 |         self.ranks = None
20 |         self.min_size = 2
21 | 
22 |     def fit(self, signal) -> "CostRank":
23 |         """Set parameters of the instance.
24 | 
25 |         Args:
26 |             signal (array): signal. Shape (n_samples,) or (n_samples, n_features)
27 | 
28 |         Returns:
29 |             self
30 |         """
31 |         if signal.ndim == 1:
32 |             signal = signal.reshape(-1, 1)
33 | 
34 |         obs, vars = signal.shape
35 | 
36 |         # Convert signal data into ranks in the range [1, n]
37 |         ranks = rankdata(signal, axis=0)
38 |         # Center the ranks into the range [-(n+1)/2, (n+1)/2]
39 |         centered_ranks = ranks - ((obs + 1) / 2)
40 |         # Sigma is the covariance of these ranks.
41 |         # If it's a scalar, reshape it into a 1x1 matrix
42 |         cov = np.cov(centered_ranks, rowvar=False, bias=True).reshape(vars, vars)
43 | 
44 |         # Use the pseudoinverse to handle linear dependencies
45 |         # see Lung-Yut-Fong, A., Lévy-Leduc, C., & Cappé, O. (2015)
46 |         try:
47 |             self.inv_cov = pinv(cov)
48 |         except LinAlgError as e:
49 |             raise LinAlgError(
50 |                 "The covariance matrix of the rank signal is not invertible and the "
51 |                 "pseudo-inverse computation did not converge."
52 |             ) from e
53 |         self.ranks = centered_ranks
54 |         self.signal = signal
55 | 
56 |         return self
57 | 
58 |     def error(self, start, end):
59 |         """Return the approximation cost on the segment [start:end].
60 | 
61 |         Args:
62 |             start (int): start of the segment
63 |             end (int): end of the segment
64 | 
65 |         Returns:
66 |             float: segment cost
67 | 
68 |         Raises:
69 |             NotEnoughPoints: when the segment is too short (less than `min_size` samples).
70 |         """
71 |         if end - start < self.min_size:
72 |             raise NotEnoughPoints
73 | 
74 |         mean = np.reshape(np.mean(self.ranks[start:end], axis=0), (-1, 1))
75 | 
76 |         return -(end - start) * mean.T @ self.inv_cov @ mean
77 | 


--------------------------------------------------------------------------------
/src/ruptures/costs/costrbf.py:
--------------------------------------------------------------------------------
 1 | r"""Kernelized mean change."""
 2 | 
 3 | import numpy as np
 4 | from scipy.spatial.distance import pdist, squareform
 5 | 
 6 | from ruptures.exceptions import NotEnoughPoints
 7 | from ruptures.base import BaseCost
 8 | 
 9 | 
10 | class CostRbf(BaseCost):
11 |     r"""Kernel cost function (rbf kernel)."""
12 | 
13 |     model = "rbf"
14 | 
15 |     def __init__(self, gamma=None):
16 |         """Initialize the object."""
17 |         self.min_size = 1
18 |         self.gamma = gamma
19 |         self._gram = None
20 | 
21 |     @property
22 |     def gram(self):
23 |         """Generate the gram matrix (lazy loading).
24 | 
25 |         Only access this function after a `.fit()` (otherwise
26 |         `self.signal` is not defined).
27 |         """
28 |         if self._gram is None:
29 |             K = pdist(self.signal, metric="sqeuclidean")
30 |             if self.gamma is None:
31 |                 self.gamma = 1.0
32 |                 # median heuristics
33 |                 K_median = np.median(K)
34 |                 if K_median != 0:
35 |                     # K /= K_median
36 |                     self.gamma = 1 / K_median
37 |             K *= self.gamma
38 |             np.clip(K, 1e-2, 1e2, K)  # clipping to avoid exponential under/overflow
39 |             self._gram = np.exp(squareform(-K))
40 |         return self._gram
41 | 
42 |     def fit(self, signal) -> "CostRbf":
43 |         """Sets parameters of the instance.
44 | 
45 |         Args:
46 |             signal (array): signal. Shape (n_samples,) or (n_samples, n_features)
47 | 
48 |         Returns:
49 |             self
50 |         """
51 |         if signal.ndim == 1:
52 |             self.signal = signal.reshape(-1, 1)
53 |         else:
54 |             self.signal = signal
55 | 
56 |         # If gamma is none, set it using the median heuristic.
57 |         # This heuristic involves computing the gram matrix which is lazy loaded
58 |         # so we simply access the `.gram` property
59 |         if self.gamma is None:
60 |             self.gram
61 | 
62 |         return self
63 | 
64 |     def error(self, start, end) -> float:
65 |         """Return the approximation cost on the segment [start:end].
66 | 
67 |         Args:
68 |             start (int): start of the segment
69 |             end (int): end of the segment
70 | 
71 |         Returns:
72 |             segment cost
73 | 
74 |         Raises:
75 |             NotEnoughPoints: when the segment is too short (less than `min_size` samples).
76 |         """
77 |         if end - start < self.min_size:
78 |             raise NotEnoughPoints
79 |         sub_gram = self.gram[start:end, start:end]
80 |         val = np.diagonal(sub_gram).sum()
81 |         val -= sub_gram.sum() / (end - start)
82 |         return val
83 | 


--------------------------------------------------------------------------------
/src/ruptures/costs/factory.py:
--------------------------------------------------------------------------------
 1 | """Factory function for Cost classes."""
 2 | 
 3 | from ruptures.base import BaseCost
 4 | 
 5 | 
 6 | def cost_factory(model, *args, **kwargs):
 7 |     for cls in BaseCost.__subclasses__():
 8 |         if cls.model == model:
 9 |             return cls(*args, **kwargs)
10 |     raise ValueError("Not such model: {}".format(model))
11 | 


--------------------------------------------------------------------------------
/src/ruptures/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | """Utility functions to load or generate data sets."""
2 | 
3 | from .pw_constant import pw_constant
4 | from .pw_linear import pw_linear
5 | from .pw_normal import pw_normal
6 | from .pw_wavy import pw_wavy
7 | 


--------------------------------------------------------------------------------
/src/ruptures/datasets/pw_constant.py:
--------------------------------------------------------------------------------
 1 | """Piecewise constant signal (with noise)"""
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ruptures.utils import draw_bkps
 6 | 
 7 | 
 8 | def pw_constant(
 9 |     n_samples=200, n_features=1, n_bkps=3, noise_std=None, delta=(1, 10), seed=None
10 | ):
11 |     """Return a piecewise constant signal and the associated changepoints.
12 | 
13 |     Args:
14 |         n_samples (int): signal length
15 |         n_features (int, optional): number of dimensions
16 |         n_bkps (int, optional): number of changepoints
17 |         noise_std (float, optional): noise std. If None, no noise is added
18 |         delta (tuple, optional): (delta_min, delta_max) max and min jump values
19 |         seed (int): random seed
20 | 
21 |     Returns:
22 |         tuple: signal of shape (n_samples, n_features), list of breakpoints
23 |     """
24 |     # breakpoints
25 |     bkps = draw_bkps(n_samples, n_bkps, seed=seed)
26 |     # we create the signal
27 |     signal = np.empty((n_samples, n_features), dtype=float)
28 |     tt_ = np.arange(n_samples)
29 |     delta_min, delta_max = delta
30 |     # mean value
31 |     center = np.zeros(n_features)
32 |     rng = np.random.default_rng(seed=seed)
33 |     for ind in np.split(tt_, bkps):
34 |         if ind.size > 0:
35 |             # jump value
36 |             jump = rng.uniform(delta_min, delta_max, size=n_features)
37 |             spin = rng.choice([-1, 1], n_features)
38 |             center += jump * spin
39 |             signal[ind] = center
40 | 
41 |     if noise_std is not None:
42 |         noise = rng.normal(size=signal.shape) * noise_std
43 |         signal = signal + noise
44 | 
45 |     return signal, bkps
46 | 


--------------------------------------------------------------------------------
/src/ruptures/datasets/pw_linear.py:
--------------------------------------------------------------------------------
 1 | r"""Shift in linear model."""
 2 | 
 3 | import numpy as np
 4 | 
 5 | from . import pw_constant
 6 | 
 7 | 
 8 | def pw_linear(n_samples=200, n_features=1, n_bkps=3, noise_std=None, seed=None):
 9 |     """Return piecewise linear signal and the associated changepoints.
10 | 
11 |     Args:
12 |         n_samples (int, optional): signal length
13 |         n_features (int, optional): number of covariates
14 |         n_bkps (int, optional): number of change points
15 |         noise_std (float, optional): noise std. If None, no noise is added
16 |         seed (int): random seed
17 |     Returns:
18 |         tuple: signal of shape (n_samples, n_features+1), list of breakpoints
19 |     """
20 |     rng = np.random.default_rng(seed=seed)
21 |     covar = rng.normal(size=(n_samples, n_features))
22 |     linear_coeff, bkps = pw_constant(
23 |         n_samples=n_samples,
24 |         n_bkps=n_bkps,
25 |         n_features=n_features,
26 |         noise_std=None,
27 |         seed=seed,
28 |     )
29 |     var = np.sum(linear_coeff * covar, axis=1)
30 |     if noise_std is not None:
31 |         var += rng.normal(scale=noise_std, size=var.shape)
32 |     signal = np.c_[var, covar]
33 |     return signal, bkps
34 | 


--------------------------------------------------------------------------------
/src/ruptures/datasets/pw_normal.py:
--------------------------------------------------------------------------------
 1 | """2D piecewise Gaussian process (pw_normal)"""
 2 | 
 3 | from itertools import cycle
 4 | 
 5 | import numpy as np
 6 | 
 7 | from ruptures.utils import draw_bkps
 8 | 
 9 | 
10 | def pw_normal(n_samples=200, n_bkps=3, seed=None):
11 |     """Return a 2D piecewise Gaussian signal and the associated changepoints.
12 | 
13 |     Args:
14 |         n_samples (int, optional): signal length
15 |         n_bkps (int, optional): number of change points
16 |         seed (int): random seed
17 | 
18 |     Returns:
19 |         tuple: signal of shape (n_samples, 2), list of breakpoints
20 |     """
21 |     # breakpoints
22 |     bkps = draw_bkps(n_samples, n_bkps, seed=seed)
23 |     # we create the signal
24 |     signal = np.zeros((n_samples, 2), dtype=float)
25 |     cov1 = np.array([[1, 0.9], [0.9, 1]])
26 |     cov2 = np.array([[1, -0.9], [-0.9, 1]])
27 |     rng = np.random.default_rng(seed=seed)
28 |     for sub, cov in zip(np.split(signal, bkps), cycle((cov1, cov2))):
29 |         n_sub, _ = sub.shape
30 |         sub += rng.multivariate_normal([0, 0], cov, size=n_sub)
31 | 
32 |     return signal, bkps
33 | 


--------------------------------------------------------------------------------
/src/ruptures/datasets/pw_wavy.py:
--------------------------------------------------------------------------------
 1 | """Piecewise sinusoidal (pw_wavy)"""
 2 | 
 3 | from itertools import cycle
 4 | 
 5 | import numpy as np
 6 | 
 7 | from ruptures.utils import draw_bkps
 8 | 
 9 | 
10 | def pw_wavy(n_samples=200, n_bkps=3, noise_std=None, seed=None):
11 |     """Return a 1D piecewise wavy signal and the associated changepoints.
12 | 
13 |     Args:
14 |         n_samples (int, optional): signal length
15 |         n_bkps (int, optional): number of changepoints
16 |         noise_std (float, optional): noise std. If None, no noise is added
17 |         seed (int): random seed
18 | 
19 |     Returns:
20 |         tuple: signal of shape (n_samples, 1), list of breakpoints
21 |     """
22 |     # breakpoints
23 |     bkps = draw_bkps(n_samples, n_bkps, seed=seed)
24 |     # we create the signal
25 |     f1 = np.array([0.075, 0.1])
26 |     f2 = np.array([0.1, 0.125])
27 |     freqs = np.zeros((n_samples, 2))
28 |     for sub, val in zip(np.split(freqs, bkps[:-1]), cycle([f1, f2])):
29 |         sub += val
30 |     tt = np.arange(n_samples)
31 | 
32 |     # DeprecationWarning: Calling np.sum(generator) is deprecated
33 |     # Use np.sum(np.from_iter(generator)) or the python sum builtin instead.
34 |     signal = np.sum([np.sin(2 * np.pi * tt * f) for f in freqs.T], axis=0)
35 | 
36 |     if noise_std is not None:
37 |         rng = np.random.default_rng(seed=seed)
38 |         noise = rng.normal(scale=noise_std, size=signal.shape)
39 |         signal += noise
40 | 
41 |     return signal, bkps
42 | 


--------------------------------------------------------------------------------
/src/ruptures/detection/__init__.py:
--------------------------------------------------------------------------------
1 | r"""Search methods."""
2 | 
3 | from .binseg import Binseg
4 | from .bottomup import BottomUp
5 | from .dynp import Dynp
6 | from .kernelcpd import KernelCPD
7 | from .pelt import Pelt
8 | from .window import Window
9 | 


--------------------------------------------------------------------------------
/src/ruptures/detection/_detection/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/src/ruptures/detection/_detection/__init__.py


--------------------------------------------------------------------------------
/src/ruptures/detection/_detection/ekcpd.pxd:
--------------------------------------------------------------------------------
 1 | cdef extern from "kernels.h":
 2 |     ctypedef struct KernelGeneric:
 3 |         char *name
 4 |     ctypedef struct KernelLinear:
 5 |         KernelGeneric *pBaseObj
 6 |     ctypedef struct KernelGaussian:
 7 |         KernelGeneric *pBaseObj
 8 |         double gamma
 9 |     ctypedef struct KernelCosine:
10 |         KernelGeneric *pBaseObj
11 |     cdef char *LINEAR_KERNEL_NAME
12 |     cdef char *GAUSSIAN_KERNEL_NAME
13 |     cdef char *COSINE_KERNEL_NAME
14 |     double kernel_value_by_name(double *x, double *y, int n_dims, void *kernelObj)
15 | 
16 | cdef extern from "ekcpd_computation.h":
17 |     void ekcpd_compute(double *signal, int n_samples, int n_dims, int n_bkps, int min_size, void *kernelDescObj, int *M_path)
18 | 
19 | cdef extern from "ekcpd_pelt_computation.h":
20 |     void ekcpd_pelt_compute(double *signal, int n_samples, int n_dims, double beta, int min_size, void *kernelDescObj, int *M_path)
21 | 
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/src/ruptures/detection/_detection/ekcpd.pyx:
--------------------------------------------------------------------------------
  1 | cimport ruptures.detection._detection.ekcpd as ekcpd
  2 | 
  3 | from libc.stdlib cimport malloc, free
  4 | 
  5 | cimport cython
  6 | import numpy as np
  7 | 
  8 | cpdef ekcpd_L2(double[:,:] signal, int n_bkps, int min_size):
  9 | 
 10 |     # Allocate and initialize kernel description
 11 |     cdef ekcpd.KernelLinear kernelLinearDesc
 12 |     cdef ekcpd.KernelGeneric kernelDesc
 13 |     kernelDesc.name = LINEAR_KERNEL_NAME
 14 |     kernelLinearDesc.pBaseObj = &kernelDesc
 15 | 
 16 |     return ekcpd_core(signal, n_bkps, min_size, &kernelLinearDesc)
 17 | 
 18 | 
 19 | cpdef ekcpd_pelt_L2(double[:,:] signal, double beta, int min_size):
 20 | 
 21 |     # Allocate and initialize kernel description
 22 |     cdef ekcpd.KernelLinear kernelLinearDesc
 23 |     cdef ekcpd.KernelGeneric kernelDesc
 24 |     kernelDesc.name = LINEAR_KERNEL_NAME
 25 |     kernelLinearDesc.pBaseObj = &kernelDesc
 26 | 
 27 |     return ekcpd_pelt_core(signal, beta, min_size, &kernelLinearDesc)
 28 | 
 29 | 
 30 | cpdef ekcpd_Gaussian(double[:,:] signal, int n_bkps, int min_size, double gamma):
 31 | 
 32 |     # Allocate and initialize kernel description
 33 |     cdef ekcpd.KernelGaussian kernelGaussianDesc
 34 |     cdef ekcpd.KernelGeneric kernelDesc
 35 |     kernelDesc.name = GAUSSIAN_KERNEL_NAME
 36 |     kernelGaussianDesc.pBaseObj = &kernelDesc
 37 |     kernelGaussianDesc.gamma = gamma
 38 | 
 39 |     return ekcpd_core(signal, n_bkps, min_size, &kernelGaussianDesc)
 40 | 
 41 | 
 42 | cpdef ekcpd_pelt_Gaussian(double[:,:] signal, double beta, int min_size, double gamma):
 43 | 
 44 |     # Allocate and initialize kernel description
 45 |     cdef ekcpd.KernelGaussian kernelGaussianDesc
 46 |     cdef ekcpd.KernelGeneric kernelDesc
 47 |     kernelDesc.name = GAUSSIAN_KERNEL_NAME
 48 |     kernelGaussianDesc.pBaseObj = &kernelDesc
 49 |     kernelGaussianDesc.gamma = gamma
 50 | 
 51 |     return ekcpd_pelt_core(signal, beta, min_size, &kernelGaussianDesc)
 52 | 
 53 | 
 54 | cpdef ekcpd_cosine(double[:,:] signal, int n_bkps, int min_size):
 55 | 
 56 |     # Allocate and initialize kernel description
 57 |     cdef ekcpd.KernelCosine kernelCosineDesc
 58 |     cdef ekcpd.KernelGeneric kernelDesc
 59 |     kernelDesc.name = COSINE_KERNEL_NAME
 60 |     kernelCosineDesc.pBaseObj = &kernelDesc
 61 | 
 62 |     return ekcpd_core(signal, n_bkps, min_size, &kernelCosineDesc)
 63 | 
 64 | 
 65 | cpdef ekcpd_pelt_cosine(double[:,:] signal, double beta, int min_size):
 66 | 
 67 |     # Allocate and initialize kernel description
 68 |     cdef ekcpd.KernelLinear kernelCosineDesc
 69 |     cdef ekcpd.KernelGeneric kernelDesc
 70 |     kernelDesc.name = COSINE_KERNEL_NAME
 71 |     kernelCosineDesc.pBaseObj = &kernelDesc
 72 | 
 73 |     return ekcpd_pelt_core(signal, beta, min_size, &kernelCosineDesc)
 74 | 
 75 | 
 76 | cdef ekcpd_core(double[:,:] signal, int n_bkps, int min_size, void *kernelDescObj):
 77 |     cdef:
 78 |         int n_samples = signal.shape[0]
 79 |         int n_dims = signal.shape[1]
 80 | 
 81 |     # Allocate and initialize structure for result of c function
 82 |     cdef int[::1] path_matrix_flat = np.empty((n_bkps+1)*(n_samples+1), dtype=np.dtype("i"))
 83 |     # Make it C compatible in terms of memory contiguousness
 84 |     cdef double[:, ::1] signal_arr = np.ascontiguousarray(signal)
 85 |     try:
 86 |         ekcpd.ekcpd_compute(&signal_arr[0, 0], n_samples, n_dims, n_bkps, min_size, kernelDescObj, &path_matrix_flat[0])
 87 |     except:
 88 |         print("An exception occurred.")
 89 | 
 90 |     return np.asarray(path_matrix_flat)
 91 | 
 92 | 
 93 | cdef ekcpd_pelt_core(double[:,:] signal, double beta, int min_size, void *kernelDescObj):
 94 |     cdef:
 95 |         int n_samples = signal.shape[0]
 96 |         int n_dims = signal.shape[1]
 97 | 
 98 |     # Allocate and initialize structure for result of c function
 99 |     cdef int[::1] path_matrix_flat = np.empty((n_samples+1), dtype=np.dtype("i"))
100 |     # Make it C compatible in terms of memory contiguousness
101 |     cdef double[:, ::1] signal_arr = np.ascontiguousarray(signal)
102 |     try:
103 |         ekcpd.ekcpd_pelt_compute(&signal_arr[0, 0], n_samples, n_dims, beta, min_size, kernelDescObj, &path_matrix_flat[0])
104 |     except:
105 |         print("An exception occurred.")
106 | 
107 |     return np.asarray(path_matrix_flat)


--------------------------------------------------------------------------------
/src/ruptures/detection/_detection/ekcpd_computation.c:
--------------------------------------------------------------------------------
  1 | #include <string.h>
  2 | #include <stdlib.h>
  3 | #include <math.h>
  4 | 
  5 | #include "kernels.h"
  6 | 
  7 | static inline int int_min(int a, int b)
  8 | {
  9 |     if (a > b)
 10 |         return b;
 11 |     return a;
 12 | }
 13 | 
 14 | /**
 15 |  * @brief Efficient kernel change point detection
 16 |  *
 17 |  * @param signal shape (n_samples*n_dims,)
 18 |  * @param n_samples number of samples
 19 |  * @param n_dims number of dimensions
 20 |  * @param n_bkps number of change points to detect
 21 |  * @param jump index jump while scanning through the signal data
 22 |  * @param min_size minimum size of a segment
 23 |  * @param kernelDescObj describe the selected kernel
 24 |  * @param M_path_res path matrix of shape (q+1, n_bkps+1) where q = ceil(q/jump)
 25 |  */
 26 | void ekcpd_compute(double *signal, int n_samples, int n_dims, int n_bkps, int min_size, void *kernelDescObj, int *M_path)
 27 | {
 28 |     int t, s, k;
 29 |     int n_bkps_max;
 30 | 
 31 |     // Allocate memory
 32 |     double *D, *S, *M_V;
 33 |     double c_cost, c_cost_sum, c_r, diag_element;
 34 | 
 35 |     // Initialize and allocate memory
 36 |     // Allocate memory
 37 |     D = (double *)malloc((n_samples + 1) * sizeof(double));
 38 |     S = (double *)malloc((n_samples + 1) * sizeof(double));
 39 |     M_V = (double *)malloc((n_samples + 1) * (n_bkps + 1) * sizeof(double));
 40 | 
 41 |     // D, S, M_V and M_path
 42 |     for (t = 0; t < (n_samples + 1); t++)
 43 |     {
 44 |         D[t] = 0.0;
 45 |         S[t] = 0.0;
 46 |         for (k = 0; k < (n_bkps + 1); k++)
 47 |         {
 48 |             M_V[t * (n_bkps + 1) + k] = 0.0;
 49 |             M_path[t * (n_bkps + 1) + k] = 0;
 50 |         }
 51 |     }
 52 | 
 53 |     // Computation loop
 54 |     // Handle y_{0..t} = {y_0, ..., y_{t-1}}
 55 |     for (t = 1; t < (n_samples + 1); t++)
 56 |     {
 57 |         diag_element = kernel_value_by_name(&(signal[(t - 1) * n_dims]), &(signal[(t - 1) * n_dims]), n_dims, kernelDescObj);
 58 |         D[t] = D[t - 1] + diag_element;
 59 | 
 60 |         // Compute S[t-1] = S_{t-1, t}, S[t-2] = S_{t-2, t}, ..., S[0] = S_{0, t}
 61 |         // S_{t-1, t} can be computed with S_{t-1, t-1}.
 62 |         // S_{t-1, t-1} was stored in S[t-1]
 63 |         // S_{t-1, t} will be stored in S[t-1] as well
 64 |         c_r = 0.0;
 65 |         for (s = t - 1; s >= 0; s--)
 66 |         {
 67 |             c_r += kernel_value_by_name(&(signal[s * n_dims]), &(signal[(t - 1) * n_dims]), n_dims, kernelDescObj);
 68 |             S[s] += 2 * c_r - diag_element;
 69 |         }
 70 | 
 71 |         // Compute segmentations
 72 |         // Store the total cost on y_{0..t} with 0 break points in M_V[t, 0]
 73 |         M_V[t * (n_bkps + 1)] = D[t] - S[0] / t;
 74 |         for (s = min_size; s < t - min_size + 1; s++)
 75 |         {
 76 |             // Compute cost on y_{s..t}
 77 |             // D_{s..t} = D_{0..t} - D{0..s} <--> D_{s..t} = D[t] - D[s]
 78 |             // S{s..t} has been stored in S[s]
 79 |             c_cost = D[t] - D[s] - S[s] / (t - s);
 80 |             n_bkps_max = int_min(n_bkps, s / min_size); // integer division: s / min_size = floor(s / min_size)
 81 |             for (k = 1; k <= n_bkps_max; k++)
 82 |             {
 83 |                 // With k break points on y_{0..t}, sum cost with (k-1) break points on y_{0..s} and cost on y_{s..t}
 84 |                 c_cost_sum = M_V[s * (n_bkps + 1) + (k - 1)] + c_cost;
 85 |                 if (s == k * min_size)
 86 |                 {
 87 |                     // k is the smallest possibility for s in order to have k break points in y_{0..s}.
 88 |                     // It means that y_0, y_1, ..., y_k are break points.
 89 |                     M_V[t * (n_bkps + 1) + k] = c_cost_sum;
 90 |                     M_path[t * (n_bkps + 1) + k] = s;
 91 |                     continue;
 92 |                 }
 93 |                 // Compare to current min
 94 |                 if (M_V[t * (n_bkps + 1) + k] > c_cost_sum)
 95 |                 {
 96 |                     M_V[t * (n_bkps + 1) + k] = c_cost_sum;
 97 |                     M_path[t * (n_bkps + 1) + k] = s;
 98 |                 }
 99 |             }
100 |         }
101 |     }
102 | 
103 |     // Free memory
104 |     free(D);
105 |     free(S);
106 |     free(M_V);
107 | 
108 |     return;
109 | }
110 | 


--------------------------------------------------------------------------------
/src/ruptures/detection/_detection/ekcpd_computation.h:
--------------------------------------------------------------------------------
1 | void ekcpd_compute(double *signal, int n_samples, int n_dims, int n_bkps, int min_size, void *kernelDescObj, int *M_path_res);


--------------------------------------------------------------------------------
/src/ruptures/detection/_detection/ekcpd_pelt_computation.c:
--------------------------------------------------------------------------------
  1 | #include <string.h>
  2 | #include <stdlib.h>
  3 | #include <math.h>
  4 | 
  5 | #include "kernels.h"
  6 | static inline int max_int(int a, int b)
  7 | {
  8 |     if (a > b)
  9 |         return a;
 10 |     return b;
 11 | }
 12 | 
 13 | /**
 14 |  * @brief Efficient kernel change point detection
 15 |  *
 16 |  * @param signal shape (n_samples*n_dims,)
 17 |  * @param n_samples number of samples
 18 |  * @param n_dims number of dimensions
 19 |  * @param beta smoothing parameter
 20 |  * @param min_size minimum size of a segment
 21 |  * @param kernelDescObj describe the selected kernel
 22 |  * @param M_path path matrix of shape (n_samples+1), filled by the function
 23 |  */
 24 | void ekcpd_pelt_compute(double *signal, int n_samples, int n_dims, double beta, int min_size, void *kernelDescObj, int *M_path)
 25 | {
 26 |     int t, s;
 27 |     int s_min = 0;
 28 | 
 29 |     // Allocate memory
 30 |     double *D, *S, *M_V, *M_pruning;
 31 |     double c_cost, c_cost_sum, c_r, diag_element;
 32 | 
 33 |     // Initialize and allocate memory
 34 |     // Allocate memory
 35 |     D = (double *)malloc((n_samples + 1) * sizeof(double));
 36 |     S = (double *)malloc((n_samples + 1) * sizeof(double));
 37 |     M_V = (double *)malloc((n_samples + 1) * sizeof(double));
 38 |     M_pruning = (double *)malloc((n_samples + 1) * sizeof(double));
 39 | 
 40 |     // D, S, M_V and M_path
 41 |     for (t = 0; t < (n_samples + 1); t++)
 42 |     {
 43 |         D[t] = 0.0;
 44 |         S[t] = 0.0;
 45 |         M_V[t] = 0.0;
 46 |         M_path[t] = 0;
 47 |         M_pruning[t] = 0.0;
 48 |     }
 49 | 
 50 |     // for t<2*min_size, there cannot be any change point.
 51 |     for (t = 1; t < 2 * min_size; t++)
 52 |     {
 53 |         diag_element = kernel_value_by_name(&(signal[(t - 1) * n_dims]), &(signal[(t - 1) * n_dims]), n_dims, kernelDescObj);
 54 |         D[t] = D[t - 1] + diag_element;
 55 | 
 56 |         // Compute S[t-1] = S_{t-1, t}, S[t-2] = S_{t-2, t}, ..., S[0] = S_{0, t}
 57 |         // S_{t-1, t} can be computed with S_{t-1, t-1}.
 58 |         // S_{t-1, t-1} was stored in S[t-1]
 59 |         // S_{t-1, t} will be stored in S[t-1] as well
 60 |         c_r = 0.0;
 61 |         for (s = t - 1; s >= 0; s--)
 62 |         {
 63 |             c_r += kernel_value_by_name(&(signal[s * n_dims]), &(signal[(t - 1) * n_dims]), n_dims, kernelDescObj);
 64 |             S[s] += 2 * c_r - diag_element;
 65 |         }
 66 |         c_cost = D[t] - D[0] - S[0] / t;
 67 |         M_V[t] = c_cost + beta;
 68 |     }
 69 | 
 70 |     // Computation loop
 71 |     // Handle y_{0..t} = {y_0, ..., y_{t-1}}
 72 |     for (t = 2 * min_size; t < (n_samples + 1); t++)
 73 |     {
 74 |         diag_element = kernel_value_by_name(&(signal[(t - 1) * n_dims]), &(signal[(t - 1) * n_dims]), n_dims, kernelDescObj);
 75 |         D[t] = D[t - 1] + diag_element;
 76 | 
 77 |         // Compute S[t-1] = S_{t-1, t}, S[t-2] = S_{t-2, t}, ..., S[0] = S_{0, t}
 78 |         // S_{t-1, t} can be computed with S_{t-1, t-1}.
 79 |         // S_{t-1, t-1} was stored in S[t-1]
 80 |         // S_{t-1, t} will be stored in S[t-1] as well
 81 |         c_r = 0.0;
 82 |         for (s = t - 1; s >= s_min; s--)
 83 |         {
 84 |             c_r += kernel_value_by_name(&(signal[s * n_dims]), &(signal[(t - 1) * n_dims]), n_dims, kernelDescObj);
 85 |             S[s] += 2 * c_r - diag_element;
 86 |         }
 87 | 
 88 |         // Compute segmentations
 89 |         // Store the total cost on y_{0..t} with 0 break points in M_V[t, 0]
 90 |         // init
 91 |         s = s_min;
 92 |         c_cost = D[t] - D[s] - S[s] / (t - s);
 93 |         c_cost_sum = M_V[s] + c_cost;
 94 |         M_pruning[s] = c_cost_sum;
 95 |         c_cost_sum += beta;
 96 |         M_V[t] = c_cost_sum;
 97 |         M_path[t] = s;
 98 |         // search for minimum (penalized) sum of cost
 99 |         for (s = max_int(s_min + 1, min_size); s < t - min_size + 1; s++)
100 |         {
101 |             // Compute cost on y_{s..t}
102 |             // D_{s..t} = D_{0..t} - D{0..s} <--> D_{s..t} = D[t] - D[s]
103 |             // S{s..t} has been stored in S[s]
104 |             c_cost = D[t] - D[s] - S[s] / (t - s);
105 |             c_cost_sum = M_V[s] + c_cost;
106 |             M_pruning[s] = c_cost_sum;
107 |             c_cost_sum += beta;
108 |             // Compare to current min
109 |             if (M_V[t] > c_cost_sum)
110 |             {
111 |                 M_V[t] = c_cost_sum;
112 |                 M_path[t] = s;
113 |             }
114 |         }
115 |         // Pruning
116 |         while ((M_pruning[s_min] >= M_V[t]) && (s_min < t - min_size + 1))
117 |         {
118 |             if (s_min == 0)
119 |             {
120 |                 s_min += min_size;
121 |             }
122 |             else
123 |             {
124 |                 s_min++;
125 |             }
126 |         }
127 |     }
128 | 
129 |     // Free memory
130 |     free(D);
131 |     free(S);
132 |     free(M_V);
133 |     free(M_pruning);
134 | 
135 |     return;
136 | }
137 | 


--------------------------------------------------------------------------------
/src/ruptures/detection/_detection/ekcpd_pelt_computation.h:
--------------------------------------------------------------------------------
1 | void ekcpd_pelt_compute(double *signal, int n_samples, int n_dims, double beta, int min_size, void *kernelDescObj, int *M_path);
2 | 


--------------------------------------------------------------------------------
/src/ruptures/detection/_detection/kernels.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <string.h>
 3 | 
 4 | #include "kernels.h"
 5 | 
 6 | /*************************************
 7 |  *
 8 |  * Utils
 9 |  *
10 | *************************************/
11 | 
12 | static inline float min_f(float a, float b)
13 | {
14 | 	if (a > b)
15 | 		return b;
16 | 	return a;
17 | }
18 | 
19 | static inline float max_f(float a, float b)
20 | {
21 | 	if (a > b)
22 | 		return a;
23 | 	return b;
24 | }
25 | 
26 | float clip(float n, float lower, float upper)
27 | {
28 | 	return max_f(lower, min_f(n, upper));
29 | }
30 | 
31 | /*************************************
32 |  *
33 |  * Kernels
34 |  *
35 | *************************************/
36 | 
37 | static inline double linear_kernel(double *x, double *y, int n_dims)
38 | {
39 | 	double kernel_value = 0.0;
40 | 	int dim;
41 | 	for (dim = 0; dim < n_dims; dim++)
42 | 	{
43 | 		kernel_value = kernel_value + x[dim] * y[dim];
44 | 	}
45 | 	return (kernel_value);
46 | }
47 | 
48 | static inline double gaussian_kernel(double *x, double *y, int n_dims, double gamma)
49 | {
50 | 	double squared_distance = 0.0;
51 | 	int t;
52 | 	for (t = 0; t < n_dims; t++)
53 | 	{
54 | 		squared_distance = squared_distance + (x[t] - y[t]) * (x[t] - y[t]);
55 | 	}
56 | 	// clipping to avoid exp under/overflow
57 | 	return exp(-clip(gamma * squared_distance, 0.01, 100));
58 | }
59 | 
60 | static inline double cosine_similarity(double *x, double *y, int n_dims)
61 | {
62 |     double dot = 0.0, denom_x = 0.0, denom_y = 0.0 ;
63 | 	int i;
64 |      for(i = 0; i < n_dims; i++) {
65 |         dot += x[i] * y[i] ;
66 |         denom_x += x[i] * x[i] ;
67 |         denom_y += y[i] * y[i] ;
68 |     }
69 |     return dot / (sqrt(denom_x) * sqrt(denom_y)) ;
70 | }
71 | 
72 | 
73 | 
74 | // Hub function that select proper kernel accoridng kernelObj
75 | double kernel_value_by_name(double *x, double *y, int n_dims, void *kernelObj)
76 | {
77 | 	if (strcmp(((KernelLinear *)kernelObj)->pBaseObj->name, LINEAR_KERNEL_NAME) == 0)
78 | 	{
79 | 		return linear_kernel(x, y, n_dims);
80 | 	}
81 | 	else if (strcmp(((KernelGaussian *)kernelObj)->pBaseObj->name, GAUSSIAN_KERNEL_NAME) == 0)
82 | 	{
83 | 		return gaussian_kernel(x, y, n_dims, ((KernelGaussian *)kernelObj)->gamma);
84 | 	}
85 | 	else if (strcmp(((KernelCosine *)kernelObj)->pBaseObj->name, COSINE_KERNEL_NAME) == 0)
86 | 	{
87 | 		return cosine_similarity(x, y, n_dims);
88 | 	}
89 | 	return 0.0;
90 | }
91 | 


--------------------------------------------------------------------------------
/src/ruptures/detection/_detection/kernels.h:
--------------------------------------------------------------------------------
 1 | #define MAX_KERNEL_NAME_LENGTH 10
 2 | #define LINEAR_KERNEL_NAME "linear\0"
 3 | #define GAUSSIAN_KERNEL_NAME "gaussian\0"
 4 | #define COSINE_KERNEL_NAME "cosine\0"
 5 | 
 6 | typedef struct KernelGeneric {
 7 |     char *name;
 8 | } KernelGeneric;
 9 | 
10 | typedef struct KernelLinear {
11 |     KernelGeneric *pBaseObj;
12 | } KernelLinear;
13 | 
14 | typedef struct KernelGaussian {
15 |     KernelGeneric *pBaseObj;
16 |     double gamma;
17 | } KernelGaussian;
18 | 
19 | typedef struct KernelCosine {
20 |     KernelGeneric *pBaseObj;
21 | } KernelCosine;
22 | 
23 | double kernel_value_by_name(double *x, double *y, int n_dims, void *kernelObj);
24 | 


--------------------------------------------------------------------------------
/src/ruptures/detection/binseg.py:
--------------------------------------------------------------------------------
  1 | r"""Binary segmentation."""
  2 | 
  3 | from functools import lru_cache
  4 | 
  5 | import numpy as np
  6 | from ruptures.base import BaseCost, BaseEstimator
  7 | from ruptures.costs import cost_factory
  8 | from ruptures.exceptions import BadSegmentationParameters
  9 | from ruptures.utils import pairwise, sanity_check
 10 | 
 11 | 
 12 | class Binseg(BaseEstimator):
 13 |     """Binary segmentation."""
 14 | 
 15 |     def __init__(self, model="l2", custom_cost=None, min_size=2, jump=5, params=None):
 16 |         """Initialize a Binseg instance.
 17 | 
 18 |         Args:
 19 |             model (str, optional): segment model, ["l1", "l2", "rbf",...]. Not used if ``'custom_cost'`` is not None.
 20 |             custom_cost (BaseCost, optional): custom cost function. Defaults to None.
 21 |             min_size (int, optional): minimum segment length. Defaults to 2 samples.
 22 |             jump (int, optional): subsample (one every *jump* points). Defaults to 5 samples.
 23 |             params (dict, optional): a dictionary of parameters for the cost instance.
 24 |         """
 25 |         if custom_cost is not None and isinstance(custom_cost, BaseCost):
 26 |             self.cost = custom_cost
 27 |         else:
 28 |             if params is None:
 29 |                 self.cost = cost_factory(model=model)
 30 |             else:
 31 |                 self.cost = cost_factory(model=model, **params)
 32 |         self.min_size = max(min_size, self.cost.min_size)
 33 |         self.jump = jump
 34 |         self.n_samples = None
 35 |         self.signal = None
 36 | 
 37 |     def _seg(self, n_bkps=None, pen=None, epsilon=None):
 38 |         """Computes the binary segmentation.
 39 | 
 40 |         The stopping rule depends on the parameter passed to the function.
 41 | 
 42 |         Args:
 43 |             n_bkps (int): number of breakpoints to find before stopping.
 44 |             penalty (float): penalty value (>0)
 45 |             epsilon (float): reconstruction budget (>0)
 46 | 
 47 |         Returns:
 48 |             dict: partition dict {(start, end): cost value,...}
 49 |         """
 50 |         # initialization
 51 |         bkps = [self.n_samples]
 52 |         stop = False
 53 |         while not stop:
 54 |             stop = True
 55 |             new_bkps = [
 56 |                 self.single_bkp(start, end) for start, end in pairwise([0] + bkps)
 57 |             ]
 58 |             bkp, gain = max(new_bkps, key=lambda x: x[1])
 59 | 
 60 |             if bkp is None:  # all possible configuration have been explored.
 61 |                 break
 62 | 
 63 |             if n_bkps is not None:
 64 |                 if len(bkps) - 1 < n_bkps:
 65 |                     stop = False
 66 |             elif pen is not None:
 67 |                 if gain > pen:
 68 |                     stop = False
 69 |             elif epsilon is not None:
 70 |                 error = self.cost.sum_of_costs(bkps)
 71 |                 if error > epsilon:
 72 |                     stop = False
 73 | 
 74 |             if not stop:
 75 |                 bkps.append(bkp)
 76 |                 bkps.sort()
 77 |         partition = {
 78 |             (start, end): self.cost.error(start, end)
 79 |             for start, end in pairwise([0] + bkps)
 80 |         }
 81 |         return partition
 82 | 
 83 |     @lru_cache(maxsize=None)
 84 |     def single_bkp(self, start, end):
 85 |         """Return the optimal breakpoint of [start:end] (if it exists)."""
 86 |         segment_cost = self.cost.error(start, end)
 87 |         if np.isinf(segment_cost) and segment_cost < 0:  # if cost is -inf
 88 |             return None, 0
 89 |         gain_list = list()
 90 |         for bkp in range(start, end, self.jump):
 91 |             if bkp - start >= self.min_size and end - bkp >= self.min_size:
 92 |                 gain = (
 93 |                     segment_cost
 94 |                     - self.cost.error(start, bkp)
 95 |                     - self.cost.error(bkp, end)
 96 |                 )
 97 |                 gain_list.append((gain, bkp))
 98 |         try:
 99 |             gain, bkp = max(gain_list)
100 |         except ValueError:  # if empty sub_sampling
101 |             return None, 0
102 |         return bkp, gain
103 | 
104 |     def fit(self, signal) -> "Binseg":
105 |         """Compute params to segment signal.
106 | 
107 |         Args:
108 |             signal (array): signal to segment. Shape (n_samples, n_features) or (n_samples,).
109 | 
110 |         Returns:
111 |             self
112 |         """
113 |         # update some params
114 |         if signal.ndim == 1:
115 |             self.signal = signal.reshape(-1, 1)
116 |         else:
117 |             self.signal = signal
118 |         self.n_samples, _ = self.signal.shape
119 |         self.cost.fit(signal)
120 |         self.single_bkp.cache_clear()
121 | 
122 |         return self
123 | 
124 |     def predict(self, n_bkps=None, pen=None, epsilon=None):
125 |         """Return the optimal breakpoints.
126 | 
127 |         Must be called after the fit method. The breakpoints are associated with the
128 |         signal passed to [`fit()`][ruptures.detection.binseg.Binseg.fit].
129 |         The stopping rule depends on the parameter passed to the function.
130 | 
131 |         Args:
132 |             n_bkps (int): number of breakpoints to find before stopping.
133 |             pen (float): penalty value (>0)
134 |             epsilon (float): reconstruction budget (>0)
135 | 
136 |         Raises:
137 |             AssertionError: if none of `n_bkps`, `pen`, `epsilon` is set.
138 |             BadSegmentationParameters: in case of impossible segmentation
139 |                 configuration
140 | 
141 |         Returns:
142 |             list: sorted list of breakpoints
143 |         """
144 |         msg = "Give a parameter."
145 |         assert any(param is not None for param in (n_bkps, pen, epsilon)), msg
146 | 
147 |         # raise an exception in case of impossible segmentation configuration
148 |         if not sanity_check(
149 |             n_samples=self.cost.signal.shape[0],
150 |             n_bkps=0 if n_bkps is None else n_bkps,
151 |             jump=self.jump,
152 |             min_size=self.min_size,
153 |         ):
154 |             raise BadSegmentationParameters
155 | 
156 |         partition = self._seg(n_bkps=n_bkps, pen=pen, epsilon=epsilon)
157 |         bkps = sorted(e for s, e in partition.keys())
158 |         return bkps
159 | 
160 |     def fit_predict(self, signal, n_bkps=None, pen=None, epsilon=None):
161 |         """Fit to the signal and return the optimal breakpoints.
162 | 
163 |         Helper method to call fit and predict once
164 | 
165 |         Args:
166 |             signal (array): signal. Shape (n_samples, n_features) or (n_samples,).
167 |             n_bkps (int): number of breakpoints.
168 |             pen (float): penalty value (>0)
169 |             epsilon (float): reconstruction budget (>0)
170 | 
171 |         Returns:
172 |             list: sorted list of breakpoints
173 |         """
174 |         self.fit(signal)
175 |         return self.predict(n_bkps=n_bkps, pen=pen, epsilon=epsilon)
176 | 


--------------------------------------------------------------------------------
/src/ruptures/detection/dynp.py:
--------------------------------------------------------------------------------
  1 | r"""Dynamic programming."""
  2 | 
  3 | from functools import lru_cache
  4 | 
  5 | from ruptures.utils import sanity_check
  6 | from ruptures.costs import cost_factory
  7 | from ruptures.base import BaseCost, BaseEstimator
  8 | from ruptures.exceptions import BadSegmentationParameters
  9 | 
 10 | 
 11 | class Dynp(BaseEstimator):
 12 |     """Find optimal change points using dynamic programming.
 13 | 
 14 |     Given a segment model, it computes the best partition for which the
 15 |     sum of errors is minimum.
 16 |     """
 17 | 
 18 |     def __init__(self, model="l2", custom_cost=None, min_size=2, jump=5, params=None):
 19 |         """Creates a Dynp instance.
 20 | 
 21 |         Args:
 22 |             model (str, optional): segment model, ["l1", "l2", "rbf"]. Not used if ``'custom_cost'`` is not None.
 23 |             custom_cost (BaseCost, optional): custom cost function. Defaults to None.
 24 |             min_size (int, optional): minimum segment length.
 25 |             jump (int, optional): subsample (one every *jump* points).
 26 |             params (dict, optional): a dictionary of parameters for the cost instance.
 27 |         """
 28 |         if custom_cost is not None and isinstance(custom_cost, BaseCost):
 29 |             self.cost = custom_cost
 30 |         else:
 31 |             self.model_name = model
 32 |             if params is None:
 33 |                 self.cost = cost_factory(model=model)
 34 |             else:
 35 |                 self.cost = cost_factory(model=model, **params)
 36 |         self.min_size = max(min_size, self.cost.min_size)
 37 |         self.jump = jump
 38 |         self.n_samples = None
 39 | 
 40 |     @lru_cache(maxsize=None)
 41 |     def seg(self, start, end, n_bkps):
 42 |         """Recurrence to find the optimal partition of signal[start:end].
 43 | 
 44 |         This method is to be memoized and then used.
 45 | 
 46 |         Args:
 47 |             start (int): start of the segment (inclusive)
 48 |             end (int): end of the segment (exclusive)
 49 |             n_bkps (int): number of breakpoints
 50 | 
 51 |         Returns:
 52 |             dict: {(start, end): cost value, ...}
 53 |         """
 54 |         jump, min_size = self.jump, self.min_size
 55 | 
 56 |         if n_bkps == 0:
 57 |             cost = self.cost.error(start, end)
 58 |             return {(start, end): cost}
 59 |         elif n_bkps > 0:
 60 |             # Let's fill the list of admissible last breakpoints
 61 |             multiple_of_jump = (k for k in range(start, end) if k % jump == 0)
 62 |             admissible_bkps = list()
 63 |             for bkp in multiple_of_jump:
 64 |                 n_samples = bkp - start
 65 |                 # first check if left subproblem is possible
 66 |                 if sanity_check(
 67 |                     n_samples=n_samples,
 68 |                     n_bkps=n_bkps - 1,
 69 |                     jump=jump,
 70 |                     min_size=min_size,
 71 |                 ):
 72 |                     # second check if the right subproblem has enough points
 73 |                     if end - bkp >= min_size:
 74 |                         admissible_bkps.append(bkp)
 75 | 
 76 |             assert (
 77 |                 len(admissible_bkps) > 0
 78 |             ), "No admissible last breakpoints found.\
 79 |              start, end: ({},{}), n_bkps: {}.".format(
 80 |                 start, end, n_bkps
 81 |             )
 82 | 
 83 |             # Compute the subproblems
 84 |             sub_problems = list()
 85 |             for bkp in admissible_bkps:
 86 |                 left_partition = self.seg(start, bkp, n_bkps - 1)
 87 |                 right_partition = self.seg(bkp, end, 0)
 88 |                 tmp_partition = dict(left_partition)
 89 |                 tmp_partition[(bkp, end)] = right_partition[(bkp, end)]
 90 |                 sub_problems.append(tmp_partition)
 91 | 
 92 |             # Find the optimal partition
 93 |             return min(sub_problems, key=lambda d: sum(d.values()))
 94 | 
 95 |     def fit(self, signal) -> "Dynp":
 96 |         """Create the cache associated with the signal.
 97 | 
 98 |         Dynamic programming is a recurrence; intermediate results are cached to speed up
 99 |         computations. This method sets up the cache.
100 | 
101 |         Args:
102 |             signal (array): signal. Shape (n_samples, n_features) or (n_samples,).
103 | 
104 |         Returns:
105 |             self
106 |         """
107 |         # clear cache
108 |         self.seg.cache_clear()
109 |         # update some params
110 |         self.cost.fit(signal)
111 |         self.n_samples = signal.shape[0]
112 |         return self
113 | 
114 |     def predict(self, n_bkps):
115 |         """Return the optimal breakpoints.
116 | 
117 |         Must be called after the fit method. The breakpoints are associated with the signal passed
118 |         to [`fit()`][ruptures.detection.dynp.Dynp.fit].
119 | 
120 |         Args:
121 |             n_bkps (int): number of breakpoints.
122 | 
123 |         Raises:
124 |             BadSegmentationParameters: in case of impossible segmentation
125 |                 configuration
126 | 
127 |         Returns:
128 |             list: sorted list of breakpoints
129 |         """
130 |         # raise an exception in case of impossible segmentation configuration
131 |         if not sanity_check(
132 |             n_samples=self.cost.signal.shape[0],
133 |             n_bkps=n_bkps,
134 |             jump=self.jump,
135 |             min_size=self.min_size,
136 |         ):
137 |             raise BadSegmentationParameters
138 |         partition = self.seg(0, self.n_samples, n_bkps)
139 |         bkps = sorted(e for s, e in partition.keys())
140 |         return bkps
141 | 
142 |     def fit_predict(self, signal, n_bkps):
143 |         """Fit to the signal and return the optimal breakpoints.
144 | 
145 |         Helper method to call fit and predict once
146 | 
147 |         Args:
148 |             signal (array): signal. Shape (n_samples, n_features) or (n_samples,).
149 |             n_bkps (int): number of breakpoints.
150 | 
151 |         Returns:
152 |             list: sorted list of breakpoints
153 |         """
154 |         self.fit(signal)
155 |         return self.predict(n_bkps)
156 | 


--------------------------------------------------------------------------------
/src/ruptures/detection/pelt.py:
--------------------------------------------------------------------------------
  1 | r"""Pelt."""
  2 | 
  3 | from math import floor
  4 | 
  5 | from ruptures.costs import cost_factory
  6 | from ruptures.base import BaseCost, BaseEstimator
  7 | from ruptures.exceptions import BadSegmentationParameters
  8 | from ruptures.utils import sanity_check
  9 | 
 10 | 
 11 | class Pelt(BaseEstimator):
 12 |     """Penalized change point detection.
 13 | 
 14 |     For a given model and penalty level, computes the segmentation which
 15 |     minimizes the constrained sum of approximation errors.
 16 |     """
 17 | 
 18 |     def __init__(self, model="l2", custom_cost=None, min_size=2, jump=5, params=None):
 19 |         """Initialize a Pelt instance.
 20 | 
 21 |         Args:
 22 |             model (str, optional): segment model, ["l1", "l2", "rbf"]. Not used if ``'custom_cost'`` is not None.
 23 |             custom_cost (BaseCost, optional): custom cost function. Defaults to None.
 24 |             min_size (int, optional): minimum segment length.
 25 |             jump (int, optional): subsample (one every *jump* points).
 26 |             params (dict, optional): a dictionary of parameters for the cost instance.
 27 |         """
 28 |         if custom_cost is not None and isinstance(custom_cost, BaseCost):
 29 |             self.cost = custom_cost
 30 |         else:
 31 |             if params is None:
 32 |                 self.cost = cost_factory(model=model)
 33 |             else:
 34 |                 self.cost = cost_factory(model=model, **params)
 35 |         self.min_size = max(min_size, self.cost.min_size)
 36 |         self.jump = jump
 37 |         self.n_samples = None
 38 | 
 39 |     def _seg(self, pen):
 40 |         """Computes the segmentation for a given penalty using PELT (or a list
 41 |         of penalties).
 42 | 
 43 |         Args:
 44 |             penalty (float): penalty value
 45 | 
 46 |         Returns:
 47 |             dict: partition dict {(start, end): cost value,...}
 48 |         """
 49 |         # initialization
 50 |         # partitions[t] contains the optimal partition of signal[0:t]
 51 |         partitions = dict()  # this dict will be recursively filled
 52 |         partitions[0] = {(0, 0): 0}
 53 |         admissible = []
 54 | 
 55 |         # Recursion
 56 |         ind = [k for k in range(0, self.n_samples, self.jump) if k >= self.min_size]
 57 |         ind += [self.n_samples]
 58 |         for bkp in ind:
 59 |             # adding a point to the admissible set from the previous loop.
 60 |             new_adm_pt = floor((bkp - self.min_size) / self.jump)
 61 |             new_adm_pt *= self.jump
 62 |             admissible.append(new_adm_pt)
 63 | 
 64 |             subproblems = list()
 65 |             for t in admissible:
 66 |                 # left partition
 67 |                 try:
 68 |                     tmp_partition = partitions[t].copy()
 69 |                 except KeyError:  # no partition of 0:t exists
 70 |                     continue
 71 |                 # we update with the right partition
 72 |                 tmp_partition.update({(t, bkp): self.cost.error(t, bkp) + pen})
 73 |                 subproblems.append(tmp_partition)
 74 | 
 75 |             # finding the optimal partition
 76 |             partitions[bkp] = min(subproblems, key=lambda d: sum(d.values()))
 77 |             # trimming the admissible set
 78 |             admissible = [
 79 |                 t
 80 |                 for t, partition in zip(admissible, subproblems)
 81 |                 if sum(partition.values()) <= sum(partitions[bkp].values()) + pen
 82 |             ]
 83 | 
 84 |         best_partition = partitions[self.n_samples]
 85 |         del best_partition[(0, 0)]
 86 |         return best_partition
 87 | 
 88 |     def fit(self, signal) -> "Pelt":
 89 |         """Set params.
 90 | 
 91 |         Args:
 92 |             signal (array): signal to segment. Shape (n_samples, n_features) or (n_samples,).
 93 | 
 94 |         Returns:
 95 |             self
 96 |         """
 97 |         # update params
 98 |         self.cost.fit(signal)
 99 |         if signal.ndim == 1:
100 |             (n_samples,) = signal.shape
101 |         else:
102 |             n_samples, _ = signal.shape
103 |         self.n_samples = n_samples
104 |         return self
105 | 
106 |     def predict(self, pen):
107 |         """Return the optimal breakpoints.
108 | 
109 |         Must be called after the fit method. The breakpoints are associated with the signal passed
110 |         to [`fit()`][ruptures.detection.pelt.Pelt.fit].
111 | 
112 |         Args:
113 |             pen (float): penalty value (>0)
114 | 
115 |         Raises:
116 |             BadSegmentationParameters: in case of impossible segmentation
117 |                 configuration
118 | 
119 |         Returns:
120 |             list: sorted list of breakpoints
121 |         """
122 |         # raise an exception in case of impossible segmentation configuration
123 |         if not sanity_check(
124 |             n_samples=self.cost.signal.shape[0],
125 |             n_bkps=0,
126 |             jump=self.jump,
127 |             min_size=self.min_size,
128 |         ):
129 |             raise BadSegmentationParameters
130 | 
131 |         partition = self._seg(pen)
132 |         bkps = sorted(e for s, e in partition.keys())
133 |         return bkps
134 | 
135 |     def fit_predict(self, signal, pen):
136 |         """Fit to the signal and return the optimal breakpoints.
137 | 
138 |         Helper method to call fit and predict once
139 | 
140 |         Args:
141 |             signal (array): signal. Shape (n_samples, n_features) or (n_samples,).
142 |             pen (float): penalty value (>0)
143 | 
144 |         Returns:
145 |             list: sorted list of breakpoints
146 |         """
147 |         self.fit(signal)
148 |         return self.predict(pen)
149 | 


--------------------------------------------------------------------------------
/src/ruptures/exceptions.py:
--------------------------------------------------------------------------------
 1 | """The `ruptures.exceptions` module includes all custom warnings and error
 2 | classes used across ruptures."""
 3 | 
 4 | 
 5 | class NotEnoughPoints(Exception):
 6 |     """Raise this exception when there is not enough point to calculate a cost
 7 |     function."""
 8 | 
 9 | 
10 | class BadSegmentationParameters(Exception):
11 |     """Raise this exception when a segmentation is not possible given the
12 |     parameters."""
13 | 


--------------------------------------------------------------------------------
/src/ruptures/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | r"""
 2 | ====================================================================================================
 3 | Evaluation
 4 | ====================================================================================================
 5 | 
 6 | :mod:`ruptures.metrics` provides metrics to evaluate change point detection performances and
 7 | :mod:`ruptures.show` provides a display function for visual inspection.
 8 | 
 9 | .. toctree::
10 |     :glob:
11 |     :maxdepth: 1
12 | 
13 |     hausdorff
14 |     randindex
15 |     precision
16 |     display
17 | 
18 | """
19 | 
20 | from .hausdorff import hausdorff
21 | from .timeerror import meantime
22 | from .precisionrecall import precision_recall
23 | from .hamming import hamming
24 | from .randindex import randindex
25 | 


--------------------------------------------------------------------------------
/src/ruptures/metrics/hamming.py:
--------------------------------------------------------------------------------
 1 | """Hamming metric for segmentation."""
 2 | 
 3 | from ruptures.metrics.randindex import randindex
 4 | 
 5 | 
 6 | def hamming(bkps1, bkps2):
 7 |     """Modified Hamming distance for partitions. For all pair of points (x, y),
 8 |     x != y, the functions computes the number of times the two partitions
 9 |     disagree. The result is scaled to be within 0 and 1.
10 | 
11 |     Args:
12 |         bkps1 (list): list of the last index of each regime.
13 |         bkps2 (list): list of the last index of each regime.
14 | 
15 |     Returns:
16 |         float: Hamming distance.
17 |     """
18 |     return 1 - randindex(bkps1=bkps1, bkps2=bkps2)
19 | 


--------------------------------------------------------------------------------
/src/ruptures/metrics/hausdorff.py:
--------------------------------------------------------------------------------
 1 | r"""Hausdorff metric."""
 2 | 
 3 | import numpy as np
 4 | from scipy.spatial.distance import cdist
 5 | from ruptures.metrics.sanity_check import sanity_check
 6 | 
 7 | 
 8 | def hausdorff(bkps1, bkps2):
 9 |     """Compute the Hausdorff distance between changepoints.
10 | 
11 |     Args:
12 |         bkps1 (list): list of the last index of each regime.
13 |         bkps2 (list): list of the last index of each regime.
14 | 
15 |     Returns:
16 |         float: Hausdorff distance.
17 |     """
18 |     sanity_check(bkps1, bkps2)
19 |     bkps1_arr = np.array(bkps1[:-1]).reshape(-1, 1)
20 |     bkps2_arr = np.array(bkps2[:-1]).reshape(-1, 1)
21 |     pw_dist = cdist(bkps1_arr, bkps2_arr)
22 |     res = max(pw_dist.min(axis=0).max(), pw_dist.min(axis=1).max())
23 |     return res
24 | 


--------------------------------------------------------------------------------
/src/ruptures/metrics/precisionrecall.py:
--------------------------------------------------------------------------------
 1 | r"""Precision and recall."""
 2 | 
 3 | from itertools import product
 4 | 
 5 | from ruptures.metrics.sanity_check import sanity_check
 6 | 
 7 | 
 8 | def precision_recall(true_bkps, my_bkps, margin=10):
 9 |     """Calculate the precision/recall of an estimated segmentation compared
10 |     with the true segmentation.
11 | 
12 |     Args:
13 |         true_bkps (list): list of the last index of each regime (true
14 |             partition).
15 |         my_bkps (list): list of the last index of each regime (computed
16 |             partition).
17 |         margin (int, optional): allowed error (in points).
18 | 
19 |     Returns:
20 |         tuple: (precision, recall)
21 |     """
22 |     sanity_check(true_bkps, my_bkps)
23 |     assert margin > 0, "Margin of error must be positive (margin = {})".format(margin)
24 | 
25 |     if len(my_bkps) == 1:
26 |         return 0, 0
27 | 
28 |     used = set()
29 |     true_pos = set(
30 |         true_b
31 |         for true_b, my_b in product(true_bkps[:-1], my_bkps[:-1])
32 |         if my_b - margin < true_b < my_b + margin
33 |         and not (my_b in used or used.add(my_b))
34 |     )
35 | 
36 |     tp_ = len(true_pos)
37 |     precision = tp_ / (len(my_bkps) - 1)
38 |     recall = tp_ / (len(true_bkps) - 1)
39 |     return precision, recall
40 | 


--------------------------------------------------------------------------------
/src/ruptures/metrics/randindex.py:
--------------------------------------------------------------------------------
 1 | r"""Rand index (`randindex`)"""
 2 | 
 3 | from ruptures.metrics.sanity_check import sanity_check
 4 | 
 5 | 
 6 | def randindex(bkps1, bkps2):
 7 |     """Computes the Rand index (between 0 and 1) between two segmentations.
 8 | 
 9 |     The Rand index (RI) measures the similarity between two segmentations and
10 |     is equal to the proportion of aggreement between two partitions.
11 | 
12 |     RI is between 0 (total disagreement) and 1 (total agreement).
13 |     This function uses the efficient implementation of [1].
14 | 
15 |     [1] Prates, L. (2021). A more efficient algorithm to compute the Rand Index for
16 |     change-point problems. ArXiv:2112.03738.
17 | 
18 |     Args:
19 |         bkps1 (list): sorted list of the last index of each regime.
20 |         bkps2 (list): sorted list of the last index of each regime.
21 | 
22 |     Returns:
23 |         float: Rand index
24 |     """
25 |     sanity_check(bkps1, bkps2)
26 |     n_samples = bkps1[-1]
27 |     bkps1_with_0 = [0] + bkps1
28 |     bkps2_with_0 = [0] + bkps2
29 |     n_bkps1 = len(bkps1)
30 |     n_bkps2 = len(bkps2)
31 | 
32 |     disagreement = 0
33 |     beginj: int = 0  # avoids unnecessary computations
34 |     for index_bkps1 in range(n_bkps1):
35 |         start1: int = bkps1_with_0[index_bkps1]
36 |         end1: int = bkps1_with_0[index_bkps1 + 1]
37 |         for index_bkps2 in range(beginj, n_bkps2):
38 |             start2: int = bkps2_with_0[index_bkps2]
39 |             end2: int = bkps2_with_0[index_bkps2 + 1]
40 |             nij = max(min(end1, end2) - max(start1, start2), 0)
41 |             disagreement += nij * abs(end1 - end2)
42 | 
43 |             # we can skip the rest of the iteration, nij will be 0
44 |             if end1 < end2:
45 |                 break
46 |             else:
47 |                 beginj = index_bkps2 + 1
48 | 
49 |     disagreement /= n_samples * (n_samples - 1) / 2
50 |     return 1.0 - disagreement
51 | 


--------------------------------------------------------------------------------
/src/ruptures/metrics/sanity_check.py:
--------------------------------------------------------------------------------
 1 | """Helper function to check if two breakpoints list are comparable."""
 2 | 
 3 | 
 4 | class BadPartitions(Exception):
 5 |     """Exception raised when the partition is bad."""
 6 | 
 7 |     pass
 8 | 
 9 | 
10 | def sanity_check(bkps1, bkps2):
11 |     """Checks if two partitions are indeed partitions of the same signal.
12 | 
13 |     Args:
14 |         bkps1 (list): list of the last index of each regime.
15 |         bkps2 (list): list of the last index of each regime.
16 | 
17 |     Raises:
18 |         BadPartitions: whenever a partition does not respect some conditions.
19 | 
20 |     Returns:
21 |         None:
22 |     """
23 |     # checks if empty.
24 |     for nom, bkps in zip(("first", "second"), (bkps1, bkps2)):
25 |         if len(bkps) == 0:
26 |             raise BadPartitions("The {} partition is empty.".format(nom))
27 |     # checks if both ends with the same index.
28 |     if max(bkps1) != max(bkps2):
29 |         raise BadPartitions(
30 |             "The end of the last regime is not the same for each of the "
31 |             "partitions:\n{}\n{}".format(bkps1, bkps2)
32 |         )
33 |     # checks if there is repetition.
34 |     for bkps in (bkps1, bkps2):
35 |         seen = set()
36 |         if any(i in seen or seen.add(i) for i in bkps):
37 |             raise BadPartitions("Some indexes are repeated: {}".format(bkps))
38 | 


--------------------------------------------------------------------------------
/src/ruptures/metrics/timeerror.py:
--------------------------------------------------------------------------------
 1 | """Mean time error."""
 2 | 
 3 | import numpy as np
 4 | from scipy.spatial.distance import cdist
 5 | 
 6 | from ruptures.metrics.sanity_check import sanity_check
 7 | 
 8 | 
 9 | def meantime(true_bkps, my_bkps):
10 |     """For each computed changepoint, the mean time error is the average number
11 |     of points to the closest true changepoint. Not a symetric funtion.
12 | 
13 |     Args:
14 |         true_bkps (list): list of the last index of each regime (true
15 |             partition).
16 |         my_bkps (list): list of the last index of each regime (computed
17 |             partition)
18 | 
19 |     Returns:
20 |         float: mean time error.
21 |     """
22 |     sanity_check(true_bkps, my_bkps)
23 |     true_bkps_arr = np.array(true_bkps[:-1]).reshape(-1, 1)
24 |     my_bkps_arr = np.array(my_bkps[:-1]).reshape(-1, 1)
25 |     pw_dist = cdist(true_bkps_arr, my_bkps_arr)
26 | 
27 |     dist_from_true = pw_dist.min(axis=0)
28 |     assert len(dist_from_true) == len(my_bkps) - 1
29 | 
30 |     return dist_from_true.mean()
31 | 


--------------------------------------------------------------------------------
/src/ruptures/show/__init__.py:
--------------------------------------------------------------------------------
1 | from .display import display
2 | 


--------------------------------------------------------------------------------
/src/ruptures/show/display.py:
--------------------------------------------------------------------------------
  1 | r"""Display."""
  2 | 
  3 | from itertools import cycle
  4 | 
  5 | import numpy as np
  6 | 
  7 | from ruptures.utils import pairwise
  8 | 
  9 | COLOR_CYCLE = ["#4286f4", "#f44174"]
 10 | 
 11 | 
 12 | class MatplotlibMissingError(RuntimeError):
 13 |     pass
 14 | 
 15 | 
 16 | def display(
 17 |     signal,
 18 |     true_chg_pts,
 19 |     computed_chg_pts=None,
 20 |     computed_chg_pts_color="k",
 21 |     computed_chg_pts_linewidth=3,
 22 |     computed_chg_pts_linestyle="--",
 23 |     computed_chg_pts_alpha=1.0,
 24 |     **kwargs
 25 | ):
 26 |     """Display a signal and the change points provided in alternating colors.
 27 |     If another set of change point is provided, they are displayed with dashed
 28 |     vertical dashed lines. The following matplotlib subplots options is set by
 29 |     default, but can be changed when calling `display`):
 30 | 
 31 |     - figure size `figsize`, defaults to `(10, 2 * n_features)`.
 32 | 
 33 |     Args:
 34 |         signal (array): signal array, shape (n_samples,) or (n_samples, n_features).
 35 |         true_chg_pts (list): list of change point indexes.
 36 |         computed_chg_pts (list, optional): list of change point indexes.
 37 |         computed_chg_pts_color (str, optional): color of the lines indicating
 38 |             the computed_chg_pts. Defaults to "k".
 39 |         computed_chg_pts_linewidth (int, optional): linewidth of the lines
 40 |             indicating the computed_chg_pts. Defaults to 3.
 41 |         computed_chg_pts_linestyle (str, optional): linestyle of the lines
 42 |             indicating the computed_chg_pts. Defaults to "--".
 43 |         computed_chg_pts_alpha (float, optional): alpha of the lines indicating
 44 |             the computed_chg_pts. Defaults to "1.0".
 45 |         **kwargs : all additional keyword arguments are passed to the plt.subplots call.
 46 | 
 47 |     Returns:
 48 |         tuple: (figure, axarr) with a :class:`matplotlib.figure.Figure` object and an array of Axes objects.
 49 |     """
 50 |     try:
 51 |         import matplotlib.pyplot as plt
 52 |     except ImportError:
 53 |         raise MatplotlibMissingError(
 54 |             "This feature requires the optional dependency matpotlib, you can install it using `pip install matplotlib`."
 55 |         )
 56 | 
 57 |     if type(signal) != np.ndarray:
 58 |         # Try to get array from Pandas dataframe
 59 |         signal = signal.values
 60 | 
 61 |     if signal.ndim == 1:
 62 |         signal = signal.reshape(-1, 1)
 63 |     n_samples, n_features = signal.shape
 64 | 
 65 |     # let's set a sensible defaut size for the subplots
 66 |     matplotlib_options = {
 67 |         "figsize": (10, 2 * n_features),  # figure size
 68 |     }
 69 |     # add/update the options given by the user
 70 |     matplotlib_options.update(kwargs)
 71 | 
 72 |     # create plots
 73 |     fig, axarr = plt.subplots(n_features, sharex=True, **matplotlib_options)
 74 |     if n_features == 1:
 75 |         axarr = [axarr]
 76 | 
 77 |     for axe, sig in zip(axarr, signal.T):
 78 |         color_cycle = cycle(COLOR_CYCLE)
 79 |         # plot s
 80 |         axe.plot(range(n_samples), sig)
 81 | 
 82 |         # color each (true) regime
 83 |         bkps = [0] + sorted(true_chg_pts)
 84 |         alpha = 0.2  # transparency of the colored background
 85 | 
 86 |         for (start, end), col in zip(pairwise(bkps), color_cycle):
 87 |             axe.axvspan(max(0, start - 0.5), end - 0.5, facecolor=col, alpha=alpha)
 88 |         # vertical lines to mark the computed_chg_pts
 89 |         if computed_chg_pts is not None:
 90 |             for bkp in computed_chg_pts:
 91 |                 if bkp != 0 and bkp < n_samples:
 92 |                     axe.axvline(
 93 |                         x=bkp - 0.5,
 94 |                         color=computed_chg_pts_color,
 95 |                         linewidth=computed_chg_pts_linewidth,
 96 |                         linestyle=computed_chg_pts_linestyle,
 97 |                         alpha=computed_chg_pts_alpha,
 98 |                     )
 99 | 
100 |     fig.tight_layout()
101 | 
102 |     return fig, axarr
103 | 


--------------------------------------------------------------------------------
/src/ruptures/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Import utils functions."""
2 | 
3 | from ruptures.utils._utils.convert_path_matrix import from_path_matrix_to_bkps_list
4 | 
5 | from .bnode import Bnode
6 | from .drawbkps import draw_bkps
7 | from .utils import pairwise, sanity_check, unzip
8 | 


--------------------------------------------------------------------------------
/src/ruptures/utils/_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/src/ruptures/utils/_utils/__init__.py


--------------------------------------------------------------------------------
/src/ruptures/utils/_utils/convert_path_matrix.pxd:
--------------------------------------------------------------------------------
1 | cdef extern from "convert_path_matrix_c.h":
2 |     void convert_path_matrix_c(int *path_matrix, int n_bkps, int n_samples, int n_bkps_max, int jump, int *bkps_list)


--------------------------------------------------------------------------------
/src/ruptures/utils/_utils/convert_path_matrix.pyx:
--------------------------------------------------------------------------------
 1 | cimport ruptures.utils._utils.convert_path_matrix as convert_pm
 2 | 
 3 | cimport cython
 4 | import numpy as np
 5 | 
 6 | cpdef from_path_matrix_to_bkps_list(int[:] path_matrix_flat, int n_bkps, int n_samples, int n_bkps_max, int jump):
 7 |     # Init bkps_list array
 8 |     cdef int[::1] bkps_list = np.empty((n_bkps+1), dtype=np.dtype("i"))
 9 |     try:
10 |         convert_pm.convert_path_matrix_c(&path_matrix_flat[0], n_bkps, n_samples, n_bkps_max, jump, &bkps_list[0])
11 |     except:
12 |         print("An exception occurred.")
13 |     return np.asarray(bkps_list).tolist()


--------------------------------------------------------------------------------
/src/ruptures/utils/_utils/convert_path_matrix_c.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | 
 3 | void convert_path_matrix_c(int *path_matrix, int n_bkps, int n_samples, int n_bkps_max, int jump, int *bkps_list)
 4 | {
 5 |     int q = (int)ceil((float)n_samples / (float)jump);
 6 |     bkps_list[n_bkps] = q;
 7 |     int k = 0;
 8 |     while (k++ < n_bkps)
 9 |     {
10 |         bkps_list[n_bkps - k] = path_matrix[bkps_list[n_bkps - k + 1] * (n_bkps_max + 1) + (n_bkps - k + 1)];
11 |     }
12 |     for (k = 0 ; k < n_bkps + 1 ; k++)
13 |     {
14 |         bkps_list[k] = bkps_list[k] * jump;
15 |     }
16 |     bkps_list[n_bkps] = n_samples;
17 |     return;
18 | }
19 | 


--------------------------------------------------------------------------------
/src/ruptures/utils/_utils/convert_path_matrix_c.h:
--------------------------------------------------------------------------------
1 | void convert_path_matrix_c(int *path_matrix, int n_bkps, int n_samples, int n_bkps_max, int jump, int *bkps_list);
2 | 


--------------------------------------------------------------------------------
/src/ruptures/utils/bnode.py:
--------------------------------------------------------------------------------
 1 | """Binary node."""
 2 | 
 3 | import functools
 4 | import numpy as np
 5 | 
 6 | 
 7 | @functools.total_ordering
 8 | class Bnode:
 9 |     """Binary node.
10 | 
11 |     In binary segmentation, each segment [start, end) is a binary node.
12 |     """
13 | 
14 |     def __init__(self, start, end, val, left=None, right=None, parent=None):
15 |         self.start = start
16 |         self.end = end
17 |         self.val = val
18 |         self.left = left
19 |         self.right = right
20 |         self.parent = parent
21 | 
22 |     @property
23 |     def gain(self):
24 |         """Return the cost decrease when splitting this node."""
25 |         if self.left is None or self.right is None:
26 |             return 0
27 |         elif np.isinf(self.val) and self.val < 0:
28 |             return 0
29 |         return self.val - (self.left.val + self.right.val)
30 | 
31 |     def __lt__(self, other):
32 |         return self.start < other.start
33 | 
34 |     def __eq__(self, other):
35 |         return (
36 |             isinstance(other, self.__class__)
37 |             and self.start == other.start
38 |             and self.end == other.end
39 |         )
40 | 
41 |     def __hash__(self):
42 |         return hash((self.__class__, self.start, self.end))
43 | 


--------------------------------------------------------------------------------
/src/ruptures/utils/drawbkps.py:
--------------------------------------------------------------------------------
 1 | r"""Draw a random partition."""
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def draw_bkps(n_samples=100, n_bkps=3, seed=None):
 7 |     """Draw a random partition with specified number of samples and specified
 8 |     number of changes."""
 9 |     rng = np.random.default_rng(seed=seed)
10 |     alpha = np.ones(n_bkps + 1) / (n_bkps + 1) * 2000
11 |     bkps = np.cumsum(rng.dirichlet(alpha) * n_samples).astype(int).tolist()
12 |     bkps[-1] = n_samples
13 |     return bkps
14 | 


--------------------------------------------------------------------------------
/src/ruptures/utils/utils.py:
--------------------------------------------------------------------------------
 1 | """Miscellaneous functions for ruptures."""
 2 | 
 3 | from itertools import tee
 4 | from math import ceil
 5 | 
 6 | 
 7 | def pairwise(iterable):
 8 |     """S -> (s0,s1), (s1,s2), (s2, s3), ..."""
 9 |     a, b = tee(iterable)
10 |     next(b, None)
11 |     return zip(a, b)
12 | 
13 | 
14 | def unzip(seq):
15 |     """Reverse zip."""
16 |     return zip(*seq)
17 | 
18 | 
19 | def sanity_check(n_samples, n_bkps, jump, min_size):
20 |     """Check if a partition if possible given some segmentation parameters.
21 | 
22 |     Args:
23 |         n_samples (int): number of point in the signal
24 |         n_bkps (int): number of breakpoints
25 |         jump (int): the start index of each regime can only be a multiple of
26 |             "jump" (and the end index = -1 modulo "jump").
27 |         min_size (int): minimum size of a segment.
28 | 
29 |     Returns:
30 |         bool: True if there exists a potential configuration of
31 |             breakpoints for the given parameters. False if it does not.
32 |     """
33 |     n_adm_bkps = n_samples // jump  # number of admissible breakpoints
34 | 
35 |     # Are there enough points for the given number of regimes?
36 |     if n_bkps > n_adm_bkps:
37 |         return False
38 |     if n_bkps * ceil(min_size / jump) * jump + min_size > n_samples:
39 |         return False
40 |     return True
41 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_bnode.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ruptures.utils import Bnode
 3 | 
 4 | 
 5 | def test_bnode():
 6 |     left = Bnode(start=100, end=120, val=1)
 7 |     right = Bnode(start=120, end=200, val=1)
 8 | 
 9 |     # bad merging, no right leaf
10 |     merged_node = Bnode(start=left.start, end=right.end, left=left, right=None, val=3)
11 |     assert merged_node.gain == 0
12 | 
13 |     # bad merging, no left leaf
14 |     merged_node = Bnode(start=left.start, end=right.end, left=None, right=right, val=3)
15 |     assert merged_node.gain == 0
16 | 
17 |     # bad merging, negative infinit val
18 |     merged_node = Bnode(
19 |         start=left.start, end=right.end, left=left, right=right, val=-np.inf
20 |     )
21 |     assert merged_node.gain == 0
22 | 
23 |     # normal merging
24 |     merged_node = Bnode(
25 |         start=left.start,
26 |         end=right.end,
27 |         left=left,
28 |         right=right,
29 |         val=left.val + right.val + 1,
30 |     )
31 |     assert merged_node.gain == merged_node.val - (left.val + right.val)
32 | 


--------------------------------------------------------------------------------
/tests/test_datasets.py:
--------------------------------------------------------------------------------
  1 | from itertools import product
  2 | 
  3 | import pytest
  4 | import numpy as np
  5 | 
  6 | from ruptures.datasets import pw_constant, pw_linear, pw_normal, pw_wavy
  7 | 
  8 | 
  9 | @pytest.mark.parametrize("func", [pw_constant, pw_linear, pw_normal, pw_wavy])
 10 | def test_empty_arg(func):
 11 |     func()
 12 | 
 13 | 
 14 | @pytest.mark.parametrize(
 15 |     "func, n_samples, n_features, n_bkps, noise_std",
 16 |     product([pw_constant], range(20, 1000, 200), range(1, 4), [2, 5, 3], [None, 1, 2]),
 17 | )
 18 | def test_constant(func, n_samples, n_features, n_bkps, noise_std):
 19 |     signal, bkps = func(
 20 |         n_samples=n_samples, n_features=n_features, n_bkps=n_bkps, noise_std=noise_std
 21 |     )
 22 |     assert signal.shape == (n_samples, n_features)
 23 |     assert len(bkps) == n_bkps + 1
 24 |     assert bkps[-1] == n_samples
 25 | 
 26 | 
 27 | def test_seed(n_samples=200, n_features=3, n_bkps=5, noise_std=1, seed=12345):
 28 |     # pw_constant
 29 |     signal1, bkps1 = pw_constant(
 30 |         n_samples=n_samples,
 31 |         n_features=n_features,
 32 |         n_bkps=n_bkps,
 33 |         noise_std=noise_std,
 34 |         seed=seed,
 35 |     )
 36 |     signal2, bkps2 = pw_constant(
 37 |         n_samples=n_samples,
 38 |         n_features=n_features,
 39 |         n_bkps=n_bkps,
 40 |         noise_std=noise_std,
 41 |         seed=seed,
 42 |     )
 43 |     assert np.allclose(signal1, signal2)
 44 |     assert bkps1 == bkps2
 45 | 
 46 |     # pw_normal
 47 |     signal1, bkps1 = pw_normal(n_samples=n_samples, n_bkps=n_bkps, seed=seed)
 48 |     signal2, bkps2 = pw_normal(n_samples=n_samples, n_bkps=n_bkps, seed=seed)
 49 |     assert np.allclose(signal1, signal2)
 50 |     assert bkps1 == bkps2
 51 | 
 52 |     # pw_linear
 53 |     signal1, bkps1 = pw_linear(
 54 |         n_samples=n_samples,
 55 |         n_features=n_features,
 56 |         n_bkps=n_bkps,
 57 |         noise_std=noise_std,
 58 |         seed=seed,
 59 |     )
 60 |     signal2, bkps2 = pw_linear(
 61 |         n_samples=n_samples,
 62 |         n_features=n_features,
 63 |         n_bkps=n_bkps,
 64 |         noise_std=noise_std,
 65 |         seed=seed,
 66 |     )
 67 |     assert np.allclose(signal1, signal2)
 68 |     assert bkps1 == bkps2
 69 | 
 70 |     # pw_wavy
 71 |     signal1, bkps1 = pw_wavy(
 72 |         n_samples=n_samples, n_bkps=n_bkps, noise_std=noise_std, seed=seed
 73 |     )
 74 |     signal2, bkps2 = pw_wavy(
 75 |         n_samples=n_samples, n_bkps=n_bkps, noise_std=noise_std, seed=seed
 76 |     )
 77 |     assert np.allclose(signal1, signal2)
 78 |     assert bkps1 == bkps2
 79 | 
 80 | 
 81 | @pytest.mark.parametrize(
 82 |     "func, n_samples, n_features, n_bkps, noise_std",
 83 |     product([pw_linear], range(20, 1000, 200), range(1, 4), [2, 5, 3], [None, 1, 2]),
 84 | )
 85 | def test_linear(func, n_samples, n_features, n_bkps, noise_std):
 86 |     signal, bkps = func(
 87 |         n_samples=n_samples, n_features=n_features, n_bkps=n_bkps, noise_std=noise_std
 88 |     )
 89 |     assert signal.shape == (n_samples, n_features + 1)
 90 |     assert len(bkps) == n_bkps + 1
 91 |     assert bkps[-1] == n_samples
 92 | 
 93 | 
 94 | @pytest.mark.parametrize(
 95 |     "func, n_samples, n_bkps, noise_std",
 96 |     product([pw_wavy], range(20, 1000, 200), [2, 5, 3], [None, 1, 2]),
 97 | )
 98 | def test_wavy(func, n_samples, n_bkps, noise_std):
 99 |     signal, bkps = func(n_samples=n_samples, n_bkps=n_bkps, noise_std=noise_std)
100 |     assert signal.shape == (n_samples,)
101 |     assert len(bkps) == n_bkps + 1
102 |     assert bkps[-1] == n_samples
103 | 
104 | 
105 | @pytest.mark.parametrize(
106 |     "func, n_samples, n_bkps", product([pw_normal], range(20, 1000, 200), [2, 5, 3])
107 | )
108 | def test_normal(func, n_samples, n_bkps):
109 |     signal, bkps = func(n_samples=n_samples, n_bkps=n_bkps)
110 |     assert signal.shape == (n_samples, 2)
111 |     assert len(bkps) == n_bkps + 1
112 |     assert bkps[-1] == n_samples
113 | 


--------------------------------------------------------------------------------
/tests/test_display.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ruptures.datasets import pw_constant
 4 | from ruptures.show import display
 5 | from ruptures.show.display import MatplotlibMissingError
 6 | 
 7 | 
 8 | @pytest.fixture(scope="module")
 9 | def signal_bkps():
10 |     signal, bkps = pw_constant()
11 |     return signal, bkps
12 | 
13 | 
14 | def test_display_with_options(signal_bkps):
15 |     try:
16 |         signal, bkps = signal_bkps
17 |         fig, axarr = display(signal, bkps)
18 |         fig, axarr = display(signal, bkps, bkps)
19 |         figsize = (20, 10)  # figure size
20 |         fig, axarr = display(
21 |             signal,
22 |             bkps,
23 |             figsize=figsize,
24 |         )
25 |         fig, axarr = display(
26 |             signal[:, 0],
27 |             bkps,
28 |             figsize=figsize,
29 |         )
30 |     except MatplotlibMissingError:
31 |         pytest.skip("matplotlib is not installed")
32 | 
33 | 
34 | def test_display_without_options(signal_bkps):
35 |     try:
36 |         signal, bkps = signal_bkps
37 |         fig, axarr = display(signal, bkps)
38 |         fig, axarr = display(signal, bkps, bkps)
39 |         figsize = (20, 10)  # figure size
40 |         fig, axarr = display(signal, bkps)
41 |         fig, axarr = display(signal[:, 0], bkps)
42 |     except MatplotlibMissingError:
43 |         pytest.skip("matplotlib is not installed")
44 | 
45 | 
46 | def test_display_with_new_options(signal_bkps):
47 |     try:
48 |         signal, bkps = signal_bkps
49 |         fig, axarr = display(signal, bkps)
50 |         fig, axarr = display(signal, bkps, bkps)
51 | 
52 |         fig, axarr = display(signal, bkps, facecolor="k", edgecolor="b")
53 |         fig, axarr = display(signal[:, 0], bkps, facecolor="k", edgecolor="b")
54 |     except MatplotlibMissingError:
55 |         pytest.skip("matplotlib is not installed")
56 | 
57 | 
58 | def test_display_with_computed_chg_pts_options(signal_bkps):
59 |     try:
60 |         signal, bkps = signal_bkps
61 |         fig, axarr = display(signal, bkps)
62 |         fig, axarr = display(signal, bkps, bkps)
63 | 
64 |         fig, axarr = display(signal, bkps, bkps, computed_chg_pts_color="k")
65 |         fig, axarr = display(
66 |             signal, bkps, bkps, computed_chg_pts_color="k", computed_chg_pts_linewidth=3
67 |         )
68 |         fig, axarr = display(
69 |             signal,
70 |             bkps,
71 |             bkps,
72 |             computed_chg_pts_color="k",
73 |             computed_chg_pts_linewidth=3,
74 |             computed_chg_pts_linestyle="--",
75 |         )
76 |         fig, axarr = display(
77 |             signal,
78 |             bkps,
79 |             bkps,
80 |             computed_chg_pts_color="k",
81 |             computed_chg_pts_linewidth=3,
82 |             computed_chg_pts_linestyle="--",
83 |             computed_chg_pts_alpha=1.0,
84 |         )
85 |     except MatplotlibMissingError:
86 |         pytest.skip("matplotlib is not installed")
87 | 


--------------------------------------------------------------------------------
/tests/test_metrics.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ruptures.metrics import (
 4 |     hamming,
 5 |     hausdorff,
 6 |     meantime,
 7 |     precision_recall,
 8 |     randindex,
 9 | )
10 | from ruptures.metrics.sanity_check import BadPartitions
11 | 
12 | 
13 | @pytest.fixture(scope="module")
14 | def b_mb():
15 |     return [100, 200, 350, 400, 500], [101, 201, 301, 401, 500]
16 | 
17 | 
18 | def test_hausdorff(b_mb):
19 |     b, mb = b_mb
20 |     m = hausdorff(b, mb)
21 |     assert m > 0
22 |     m = hausdorff(b, b)
23 |     assert m == 0
24 | 
25 | 
26 | def test_randindex(b_mb):
27 |     b, mb = b_mb
28 |     m = randindex(b, mb)
29 |     assert 1 > m > 0
30 |     m = randindex(b, b)
31 |     assert m == 1
32 | 
33 | 
34 | def test_meantime(b_mb):
35 |     b, mb = b_mb
36 |     m = meantime(b, mb)
37 |     assert m > 0
38 |     m = meantime(b, b)
39 |     assert m == 0
40 | 
41 | 
42 | @pytest.mark.parametrize("margin", range(1, 20, 2))
43 | def test_precision_recall(b_mb, margin):
44 |     b, mb = b_mb
45 |     p, r = precision_recall(b, mb, margin=margin)
46 |     assert 0 <= p < 1
47 |     assert 0 <= r < 1
48 |     p, r = precision_recall(b, b, margin=margin)
49 |     assert (p, r) == (1, 1)
50 |     p, r = precision_recall(b, [b[-1]], margin=margin)
51 | 
52 | 
53 | @pytest.mark.parametrize(
54 |     "metric", [hamming, hausdorff, meantime, precision_recall, randindex]
55 | )
56 | def test_exception(b_mb, metric):
57 |     true_bkps, my_bkps = b_mb
58 |     with pytest.raises(BadPartitions):
59 |         m = metric(true_bkps, [])
60 |     with pytest.raises(BadPartitions):
61 |         m = metric([], my_bkps)
62 |     with pytest.raises(BadPartitions):
63 |         m = metric([10, 10, 500], [10, 500])
64 |     with pytest.raises(BadPartitions):
65 |         m = metric([10, 500], [10, 501])
66 | 


--------------------------------------------------------------------------------