├── .github └── workflows │ ├── doc.yml │ ├── release.yml │ └── test.yml ├── .gitignore ├── .python-version ├── ISSUE_TEMPLATE.md ├── LICENSE ├── README.md ├── doc ├── Makefile ├── pyannote-metrics.pdf └── source │ ├── api.rst │ ├── basics.rst │ ├── changelog.rst │ ├── cli.rst │ ├── conf.py │ ├── images │ ├── diagnostic.png │ ├── pipeline.png │ └── segmentation.png │ ├── index.rst │ ├── pyplots │ └── tutorial.py │ ├── reference.rst │ └── tutorial.rst ├── notebooks ├── .ipynb_checkpoints │ ├── Better purity metric-checkpoint.ipynb │ └── Untitled-checkpoint.ipynb ├── Better purity metric.ipynb ├── Untitled.ipynb ├── index.ipynb ├── pyannote.metrics.diarization.ipynb └── pyannote.metrics.identification.ipynb ├── pyproject.toml ├── setup.py ├── src └── pyannote │ └── metrics │ ├── __init__.py │ ├── base.py │ ├── binary_classification.py │ ├── cli.py │ ├── detection.py │ ├── diarization.py │ ├── errors │ ├── __init__.py │ ├── identification.py │ └── segmentation.py │ ├── identification.py │ ├── matcher.py │ ├── plot │ ├── __init__.py │ └── binary_classification.py │ ├── py.typed │ ├── segmentation.py │ ├── spotting.py │ ├── types.py │ └── utils.py ├── tests ├── test_detection.py ├── test_diarization.py └── test_identification.py └── uv.lock /.github/workflows/doc.yml: -------------------------------------------------------------------------------- 1 | name: Documentation 2 | on: 3 | push: 4 | branches: 5 | - master 6 | 7 | jobs: 8 | build-and-deploy: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - uses: actions/checkout@v4 13 | with: 14 | persist-credentials: false 15 | fetch-depth: 0 16 | - name: Install uv 17 | uses: astral-sh/setup-uv@v5 18 | with: 19 | enable-cache: true 20 | cache-dependency-glob: uv.lock 21 | 22 | - name: Install the project 23 | run: uv sync --extra doc 24 | 25 | - name: Build documentation 26 | run: | 27 | make --directory=doc html 28 | touch ./doc/build/html/.nojekyll 29 | - name: Deploy 30 | uses: peaceiris/actions-gh-pages@v3 31 | with: 32 | github_token: ${{ secrets.GITHUB_TOKEN }} 33 | publish_dir: ./doc/build/html 34 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI 2 | 3 | on: push 4 | 5 | jobs: 6 | build: 7 | name: Build distribution 📦 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v4 12 | with: 13 | persist-credentials: false 14 | fetch-depth: 0 15 | - name: Install uv 16 | uses: astral-sh/setup-uv@v5 17 | with: 18 | enable-cache: true 19 | cache-dependency-glob: uv.lock 20 | - name: Set up Python 21 | uses: actions/setup-python@v5 22 | with: 23 | python-version-file: ".python-version" 24 | - name: Build 25 | run: uv build 26 | - name: Store the distribution packages 27 | uses: actions/upload-artifact@v4 28 | with: 29 | name: python-package-distributions 30 | path: dist/ 31 | 32 | publish-to-pypi: 33 | name: >- 34 | Publish Python 🐍 distribution 📦 to PyPI 35 | if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes 36 | needs: 37 | - build 38 | runs-on: ubuntu-latest 39 | environment: 40 | name: pypi 41 | permissions: 42 | id-token: write 43 | steps: 44 | - name: Download all the dists 45 | uses: actions/download-artifact@v4 46 | with: 47 | name: python-package-distributions 48 | path: dist/ 49 | - name: Install uv 50 | uses: astral-sh/setup-uv@v5 51 | with: 52 | enable-cache: true 53 | cache-dependency-glob: uv.lock 54 | - name: Publish distribution 📦 to PyPI 55 | run: uv publish --trusted-publishing always --publish-url https://upload.pypi.org/legacy/ 56 | 57 | 58 | github-release: 59 | name: >- 60 | Sign the Python 🐍 distribution 📦 with Sigstore 61 | and upload them to GitHub Release 62 | needs: 63 | - publish-to-pypi 64 | runs-on: ubuntu-latest 65 | 66 | permissions: 67 | contents: write # IMPORTANT: mandatory for making GitHub Releases 68 | id-token: write # IMPORTANT: mandatory for sigstore 69 | 70 | steps: 71 | - name: Download all the dists 72 | uses: actions/download-artifact@v4 73 | with: 74 | name: python-package-distributions 75 | path: dist/ 76 | - name: Sign the dists with Sigstore 77 | uses: sigstore/gh-action-sigstore-python@v3.0.0 78 | with: 79 | inputs: >- 80 | ./dist/*.tar.gz 81 | ./dist/*.whl 82 | - name: Create GitHub Release 83 | env: 84 | GITHUB_TOKEN: ${{ github.token }} 85 | run: >- 86 | gh release create 87 | "$GITHUB_REF_NAME" 88 | --repo "$GITHUB_REPOSITORY" 89 | --notes "" 90 | - name: Upload artifact signatures to GitHub Release 91 | env: 92 | GITHUB_TOKEN: ${{ github.token }} 93 | # Upload to GitHub Release using the `gh` CLI. 94 | # `dist/` contains the built packages, and the 95 | # sigstore-produced signatures and certificates. 96 | run: >- 97 | gh release upload 98 | "$GITHUB_REF_NAME" dist/** 99 | --repo "$GITHUB_REPOSITORY" 100 | 101 | # publish-to-testpypi: 102 | # name: Publish Python 🐍 distribution 📦 to TestPyPI 103 | # needs: 104 | # - build 105 | # runs-on: ubuntu-latest 106 | # 107 | # environment: 108 | # name: testpypi 109 | # 110 | # permissions: 111 | # id-token: write # IMPORTANT: mandatory for trusted publishing 112 | # 113 | # steps: 114 | # - name: Download all the dists 115 | # uses: actions/download-artifact@v4 116 | # with: 117 | # name: python-package-distributions 118 | # path: dist/ 119 | # - name: Install uv 120 | # uses: astral-sh/setup-uv@v5 121 | # with: 122 | # enable-cache: true 123 | # cache-dependency-glob: uv.lock 124 | # - name: Publish distribution 📦 to PyPI 125 | # run: uv publish --trusted-publishing always --publish-url https://test.pypi.org/legacy/ 126 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - develop 7 | push: 8 | branches: 9 | - develop 10 | - master 11 | - release/* 12 | 13 | 14 | jobs: 15 | test: 16 | name: Test 17 | runs-on: ubuntu-latest 18 | strategy: 19 | matrix: 20 | python-version: 21 | - "3.10" 22 | - "3.11" 23 | - "3.12" 24 | env: 25 | UV_PYTHON: ${{ matrix.python-version }} 26 | steps: 27 | - uses: actions/checkout@v4 28 | 29 | - name: Install uv 30 | uses: astral-sh/setup-uv@v5 31 | 32 | - name: Install the project 33 | run: uv sync --extra test 34 | 35 | - name: Run tests 36 | run: uv run pytest tests -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | MANIFEST 10 | .Python 11 | env/ 12 | bin/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # Installer logs 27 | pip-log.txt 28 | pip-delete-this-directory.txt 29 | 30 | # Unit test / coverage reports 31 | htmlcov/ 32 | .tox/ 33 | .coverage 34 | .cache 35 | nosetests.xml 36 | coverage.xml 37 | 38 | # Translations 39 | *.mo 40 | 41 | # Mr Developer 42 | .mr.developer.cfg 43 | .project 44 | .pydevproject 45 | 46 | # Rope 47 | .ropeproject 48 | 49 | # Django stuff: 50 | *.log 51 | *.pot 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | doc/.ipynb_checkpoints 57 | 58 | .mypy_cache/ -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.10 2 | -------------------------------------------------------------------------------- /ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | #### Description 3 | 4 | 5 | #### Steps/Code to Reproduce 6 | 30 | 31 | #### Expected Results 32 | 33 | 34 | #### Actual Results 35 | 36 | 37 | #### Versions 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 CNRS (Hervé BREDIN - http://herve.niderb.fr) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyannote.metrics 2 | 3 | > a toolkit for reproducible evaluation, diagnostic, and error analysis of speaker diarization systems 4 | 5 | An overview of `pyannote.metrics` is available as an [InterSpeech 2017 paper](docs/pyannote-metrics.pdf): it is recommended to read it first, to quickly get an idea whether this tool is for you. 6 | 7 | ## Installation 8 | 9 | ```bash 10 | $ pip install pyannote.metrics 11 | ``` 12 | 13 | ## Documentation 14 | 15 | The documentation is available at [http://pyannote.github.io/pyannote-metrics](http://pyannote.github.io/pyannote-metrics). 16 | 17 | Sample notebooks are available [here](http://nbviewer.ipython.org/github/pyannote/pyannote-metrics/blob/master/notebooks/index.ipynb). 18 | 19 | ## Citation 20 | 21 | If you use `pyannote.metrics` in your research, please use the following citation: 22 | 23 | ```bibtex 24 | @inproceedings{pyannote.metrics, 25 | author = {Herv\'e Bredin}, 26 | title = {{pyannote.metrics: a toolkit for reproducible evaluation, diagnostic, and error analysis of speaker diarization systems}}, 27 | booktitle = {{Interspeech 2017, 18th Annual Conference of the International Speech Communication Association}}, 28 | year = {2017}, 29 | month = {August}, 30 | address = {Stockholm, Sweden}, 31 | url = {http://pyannote.github.io/pyannote-metrics}, 32 | } 33 | ``` 34 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = pyannotemetrics 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | uv run --extra doc $(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | uv run --extra doc $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /doc/pyannote-metrics.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyannote/pyannote-metrics/584d177a4862a3ab3f89e9ca6639495f7293065e/doc/pyannote-metrics.pdf -------------------------------------------------------------------------------- /doc/source/api.rst: -------------------------------------------------------------------------------- 1 | Getting started 2 | =============== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | tutorial 8 | basics 9 | -------------------------------------------------------------------------------- /doc/source/basics.rst: -------------------------------------------------------------------------------- 1 | Principles 2 | ========== 3 | 4 | `pyannote.metrics` provides a set of classes to compare the output of speaker diarization (hereafter called `hypothesis`) systems to manual annotations (`reference`). Let us first instantiate a sample `reference` and `hypothesis`. 5 | 6 | .. ipython:: 7 | 8 | In [10]: from pyannote.core import Segment, Timeline, Annotation 9 | 10 | 11 | In [11]: reference = Annotation(uri='file1') 12 | ....: reference[Segment(0, 10)] = 'A' 13 | ....: reference[Segment(12, 20)] = 'B' 14 | ....: reference[Segment(24, 27)] = 'A' 15 | ....: reference[Segment(30, 40)] = 'C' 16 | 17 | In [12]: hypothesis = Annotation(uri='file1') 18 | ....: hypothesis[Segment(2, 13)] = 'a' 19 | ....: hypothesis[Segment(13, 14)] = 'd' 20 | ....: hypothesis[Segment(14, 20)] = 'b' 21 | ....: hypothesis[Segment(22, 38)] = 'c' 22 | ....: hypothesis[Segment(38, 40)] = 'd' 23 | 24 | 25 | .. plot:: pyplots/tutorial.py 26 | 27 | This basically tells us that, according to the manual annotation, speaker `A` speaks in timeranges [0s, 10s] and [24s, 27s]. 28 | 29 | 30 | .. note:: 31 | 32 | Overlapping segments are supported. See :mod:`pyannote.core` documentation for more details. 33 | 34 | `pyannote.metrics` follows an object-oriented paradigm. 35 | Most evaluation metrics (e.g. :class:`DiarizationErrorRate` below) inherit from :class:`BaseMetric`. 36 | As such, they share a common set of methods. 37 | 38 | For instance, once instantiated, they can be called directly to compute the value of the evaluation metric. 39 | 40 | .. ipython:: 41 | :okwarning: 42 | 43 | In [10]: from pyannote.metrics.diarization import DiarizationErrorRate 44 | 45 | In [1]: metric = DiarizationErrorRate() 46 | 47 | In [1]: metric(reference, hypothesis) 48 | 49 | 50 | Accumulation & reporting 51 | ------------------------ 52 | 53 | The same metric instance can be used to evaluate multiple files. 54 | 55 | .. ipython:: 56 | :okwarning: 57 | 58 | In [11]: other_reference = Annotation(uri='file2') 59 | ....: other_reference[Segment(0, 5)] = 'A' 60 | ....: other_reference[Segment(6, 10)] = 'B' 61 | ....: other_reference[Segment(12, 13)] = 'B' 62 | ....: other_reference[Segment(15, 20)] = 'A' 63 | 64 | In [12]: other_hypothesis = Annotation(uri='file2') 65 | ....: other_hypothesis[Segment(1, 6)] = 'a' 66 | ....: other_hypothesis[Segment(6, 7)] = 'b' 67 | ....: other_hypothesis[Segment(7, 10)] = 'c' 68 | ....: other_hypothesis[Segment(11, 19)] = 'b' 69 | ....: other_hypothesis[Segment(19, 20)] = 'a' 70 | 71 | In [12]: metric = DiarizationErrorRate() 72 | 73 | In [12]: metric(reference, hypothesis) 74 | 75 | In [12]: metric(other_reference, other_hypothesis) 76 | 77 | 78 | You do not need to keep track of the result of each call yourself: this is done automatically. 79 | For instance, once you have evaluated all files, you can use the overriden :func:`~pyannote.metrics.base.BaseMetric.__abs__` operator to get the accumulated value: 80 | 81 | .. ipython:: 82 | 83 | In [12]: abs(metric) 84 | 85 | :func:`~pyannote.metrics.base.BaseMetric.report` provides a convenient summary of the result: 86 | 87 | .. ipython:: 88 | 89 | In [12]: report = metric.report(display=True) 90 | 91 | 92 | The internal accumulator can be reset using the :func:`~pyannote.metrics.base.BaseMetric.report` method: 93 | 94 | .. ipython:: 95 | 96 | In [12]: metric.reset() 97 | 98 | 99 | Evaluation map 100 | -------------- 101 | 102 | Though audio files can always be processed entirely (from beginning to end), there are cases where reference annotations are only available for some regions of the audio files. 103 | All metrics support the provision of an evaluation map that indicate which part of the audio file should be evaluated. 104 | 105 | .. ipython:: 106 | 107 | In [2]: uem = Timeline([Segment(0, 10), Segment(15, 20)]) 108 | 109 | In [2]: metric(reference, hypothesis, uem=uem) 110 | 111 | 112 | Components 113 | ---------- 114 | 115 | Most metrics are computed as the combination of several components. 116 | For instance, the diarization error rate is the combination of false alarm (non-speech regions classified as speech), missed detection (speech regions classified as non-speech) and confusion between speakers. 117 | 118 | Using ``detailed=True`` will return the value of each component: 119 | 120 | .. ipython:: 121 | :okwarning: 122 | 123 | In [13]: metric(reference, hypothesis, detailed=True) 124 | 125 | The accumulated value of each component can also be obtained using the overriden :func:`~pyannote.metrics.base.BaseMetric.__getitem__` operator: 126 | 127 | .. ipython:: 128 | :okwarning: 129 | 130 | In [13]: metric(other_reference, other_hypothesis) 131 | 132 | In [13]: metric['confusion'] 133 | 134 | In [13]: metric[:] 135 | 136 | 137 | Define your own metric 138 | ---------------------- 139 | 140 | It is possible (and encouraged) to develop and contribute new evaluation metrics. 141 | 142 | All you have to do is inherit from :class:`BaseMetric` and implement a few methods: 143 | ``metric_name``, ``metric_components``, ``compute_components``, and ``compute_metric``: 144 | 145 | .. code-block:: python 146 | 147 | def is_male(speaker_name): 148 | # black magic that returns True if speaker is a man, False otherwise 149 | pass 150 | 151 | class MyMetric(BaseMetric): 152 | # This dummy metric computes the ratio between male and female speakers. 153 | # It does not actually use the reference annotation... 154 | 155 | @classmethod 156 | def metric_name(cls): 157 | # Return human-readable name of the metric 158 | 159 | return 'male / female ratio' 160 | 161 | @classmethod: 162 | def metric_components(cls): 163 | # Return component names from which the metric is computed 164 | 165 | return ['male', 'female'] 166 | 167 | def compute_components(self, reference, hypothesis, **kwargs): 168 | # Actually compute the value of each component 169 | 170 | components = {'male': 0., 'female': 0.} 171 | 172 | for segment, _, speaker_name in hypothesis.itertracks(yield_label=True): 173 | if is_male(speaker_name): 174 | components['male'] += segment.duration 175 | else: 176 | components['female'] += segment.duration 177 | 178 | return components 179 | 180 | def compute_metric(self, components): 181 | # Actually compute the metric based on the component values 182 | 183 | return components['male'] / components['female'] 184 | 185 | 186 | See :class:`pyannote.metrics.base.BaseMetric` for more details. 187 | -------------------------------------------------------------------------------- /doc/source/changelog.rst: -------------------------------------------------------------------------------- 1 | ######### 2 | Changelog 3 | ######### 4 | 5 | Version 4.0.0rc2 (2025-02-19) 6 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 7 | 8 | - fix: remove deprecated use of `np.NaN` 9 | - BREAKING: drop support to `Python` < 3.10 10 | - BREAKING: switch to native namespace package 11 | - setup: switch to `uv` 12 | 13 | Version 3.3.0 (2025-01-12) 14 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 15 | 16 | - BREAKING: improve diarization purity and coverage to account for overlapping regions 17 | - chore: use `bool` instead of deprecated `np.bool` 18 | 19 | Version 3.2.1 (2022-06-20) 20 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 21 | 22 | - fix: fix corner case for confidence interval 23 | - doc: add type hinting (@hadware) 24 | 25 | Version 3.2 (2022-01-12) 26 | ~~~~~~~~~~~~~~~~~~~~~~~~ 27 | 28 | - feat: add option to override existing "uri" 29 | - feat: add support for missing "uri" 30 | 31 | Version 3.1 (2021-09-27) 32 | ~~~~~~~~~~~~~~~~~~~~~~~~ 33 | 34 | - BREAKING: remove (buggy) support for parallel processing 35 | - fix: fix documentation deployment 36 | 37 | Version 3.0.1 (2020-07-02) 38 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 39 | 40 | - setup: switch to pyannote.database 4.0+ 41 | 42 | Version 3.0 (2020-06-15) 43 | ~~~~~~~~~~~~~~~~~~~~~~~~ 44 | 45 | - feat: add DetectionCostFunction detection metric (@nryant) 46 | - BREAKING: rename pyannote-metrics.py CLI to pyannote-metrics 47 | 48 | Version 2.3 (2020-02-26) 49 | ~~~~~~~~~~~~~~~~~~~~~~~~ 50 | 51 | - feat: add DetectionPrecisionRecallFMeasure compound metric (@MarvinLvn) 52 | - fix: fix corner "in f-measure" case when both precision and recall are zero (@MarvinLvn) 53 | - fix: fix a typo in documentation (@wq2012) 54 | 55 | Version 2.2 (2019-12-13) 56 | ~~~~~~~~~~~~~~~~~~~~~~~~ 57 | 58 | - feat: add support for evaluation of overlapped speech detection 59 | - feat: setup continuous integration 60 | - setup: switch to pyannote.core 3.2 61 | 62 | Version 2.1 (2019-06-24) 63 | ~~~~~~~~~~~~~~~~~~~~~~~~ 64 | 65 | - chore: rewrite mapping and matching routines 66 | - fix: remove buggy xarray dependency 67 | - setup: switch to pyannote.core 3.0 68 | 69 | Version 2.0.2 (2019-04-15) 70 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 71 | 72 | - fix: avoid division by zero 73 | 74 | Version 2.0.1 (2019-03-20) 75 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 76 | 77 | - BREAKING: drop support for all file formats but RTTM 78 | - BREAKING: drop Python 2.7 support 79 | - setup: switch to pyannote.database 2.0 80 | - setup: switch to pyannote.core 2.1 81 | 82 | Version 1.8.1 (2018-11-19) 83 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 84 | 85 | - setup: switch to pyannote.core 2.0 86 | 87 | Version 1.8 (2018-09-03) 88 | ~~~~~~~~~~~~~~~~~~~~~~~~ 89 | 90 | - feat: add compound segmentation metric SegmentationPurityCoverageFMeasure (@diego-fustes) 91 | - fix: fix typo in IdentificationErrorAnalysis (@benjisympa) 92 | 93 | Version 1.7.1 (2018-09-03) 94 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 95 | 96 | - fix: fix broken images in documentation 97 | 98 | Version 1.7 (2018-03-17) 99 | ~~~~~~~~~~~~~~~~~~~~~~~~ 100 | 101 | - feat: add option to filter out target trials in "spotting" mode 102 | - chore: default to "parallel=False" 103 | 104 | Version 1.6.1 (2018-02-05) 105 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 106 | 107 | - fix: fix Diarization{Purity | Coverage} with empty references 108 | - improve: improve support for speaker spotting experiments 109 | - chore: (temporarily?) remove parallel processing in pyannote.metrics.py 110 | - setup: drop support for Python 2 111 | 112 | Version 1.5 (2017-10-20) 113 | ~~~~~~~~~~~~~~~~~~~~~~~~ 114 | 115 | - feat: add fixed vs. variable latency switch for LLSS 116 | 117 | Version 1.4.3 (2017-10-17) 118 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 119 | 120 | - fix: add more safety checks to pyannote-metrics.py "spotting" mode 121 | - setup: switch to pyannote.core 1.2, pyannote.database 1.1, pyannote.parser 0.7 122 | 123 | Version 1.4.2 (2017-10-13) 124 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 125 | 126 | - improve: set latency of missed detections to maximum possible value 127 | - improve: improve instructions in pyannote-metrics.py --help 128 | 129 | Version 1.4.1 (2017-10-02) 130 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 131 | 132 | - feat: add LowLatencySpeakerSpotting metric 133 | - feat: add "spotting" mode to pyannote-metrics.py 134 | - setup: switch to pyannote.database 1.0 135 | 136 | Version 1.3 (2017-09-19) 137 | ~~~~~~~~~~~~~~~~~~~~~~~~ 138 | 139 | - feat: add "skip_overlap" option to not evaluate overlapping speech regions 140 | - improve: bring performance improvement to diarization metrics 141 | - fix: fix a bug where collar was applied twice in DiarizationErrorRate 142 | - fix: add collar support to purity/coverage/homogeneity/completeness 143 | - fix: fix a bug happening in 'uemify' when both reference and hypothesis are empty 144 | - fix: fix a "division by zero" error in homogeneity/completeness 145 | - setup: switch to pyannote.core 1.1 (major performance improvements) 146 | 147 | Version 1.2 (2017-07-21) 148 | ~~~~~~~~~~~~~~~~~~~~~~~~ 149 | 150 | - feat: add method DiarizationPurityCoverageFMeasure.compute_metrics to get 151 | purity, coverage, and their F-measure (all at once) 152 | 153 | Version 1.1 (2017-07-20) 154 | ~~~~~~~~~~~~~~~~~~~~~~~~ 155 | 156 | - feat: add new metric 'DiarizationPurityCoverageFMeasure' 157 | - doc: update installation instructions 158 | - setup: switch to pyannote.core 1.0.4 159 | 160 | Version 1.0 (2017-07-04) 161 | ~~~~~~~~~~~~~~~~~~~~~~~~ 162 | 163 | - setup: switch to pyannote.core 1.0 164 | - feat: add score calibration for binary classification tasks 165 | - doc: update citation 166 | 167 | Version 0.14.4 (2017-03-27) 168 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 169 | 170 | - doc: update notebook to latest version 171 | 172 | Version 0.14.3 (2017-03-27) 173 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 174 | 175 | - doc: add Sphinx documentation 176 | 177 | Version 0.14.2 (2017-03-21) 178 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 179 | 180 | - feat: better README and technical report 181 | 182 | Version 0.14.1 (2017-03-16) 183 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 184 | 185 | - chore: rename SegmentationError to SegmentationErrorAnalysis 186 | - fix: fix DetectionErrorRate support for kwargs 187 | 188 | Version 0.14 (2017-02-06) 189 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 190 | 191 | - feat: add "parallel" option to not use multiprocessing 192 | - feat: add "accuracy" in "detection" report 193 | - setup: switch to pyannote.core 0.13 194 | - setup: switch to pyannote.parser 0.6.5 195 | 196 | Version 0.13.2 (2017-01-30) 197 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 198 | 199 | - feat: add pyannote-metrics.py evaluation script 200 | - fix: fix BaseMetric.report() for metric without a 'total' component 201 | - fix: fix (Greedy)DiarizationErrorRate uem handling 202 | - fix: fix (Greedy)DiarizationErrorRate parallel processing 203 | - setup: switch to pyannote.core 0.12 204 | - setup: update munkres & matplotlib dependencies 205 | 206 | Version 0.12.1 (2017-01-27) 207 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 208 | 209 | - feat: support for multiprocessing 210 | - feat: add report() method 211 | - feat: travis continuous integration (finally!) 212 | - improve: speed up detection metrics 213 | - feat: add unit tests for detection metrics 214 | - fix: fix python 3 support 215 | - setup: remove dependency to pyannote.algorithms 216 | - setup: switch to pyannote.core 0.11 217 | 218 | Version 0.11 (2016-12-13) 219 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 220 | 221 | - feat: add pyannote.metrics.binary_classification module 222 | 223 | Version 0.10.3 (2016-11-28) 224 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 225 | 226 | - fix: fix (greedy) diarization error rate 227 | - feat: add support for 'collar' to (greedy) diarization error rate 228 | 229 | Version 0.10.2 (2016-11-10) 230 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 231 | 232 | - fix: fix default "xlim" in "plot_distributions" 233 | - setup: switch to pyannote.core 0.8 and pyannote.algorithms 0.6.6 234 | 235 | Version 0.10.1 (2016-11-05) 236 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 237 | 238 | - feat: add "uem" support to diarization metrics 239 | 240 | Version 0.9 (2016-09-23) 241 | ~~~~~~~~~~~~~~~~~~~~~~~~ 242 | 243 | - feat: add plotting functions for binary classification tasks 244 | 245 | Version 0.8 (2016-08-25) 246 | ~~~~~~~~~~~~~~~~~~~~~~~~ 247 | 248 | - feat: detection accuracy 249 | - refactor: detection metrics 250 | - setup: update to pyannote.core 0.7.2 251 | 252 | Version 0.7.1 (2016-06-24) 253 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 254 | 255 | - setup: update to pyannote.core 0.6.6 256 | 257 | Version 0.7 (2016-04-04) 258 | ~~~~~~~~~~~~~~~~~~~~~~~~ 259 | 260 | - feat: greedy diarization error rate 261 | 262 | Version 0.6.0 (2016-03-29) 263 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 264 | 265 | - feat: Python 3 support 266 | - feat: unit tests 267 | - wip: travis 268 | 269 | Version 0.5.1 (2016-02-19) 270 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 271 | 272 | - refactor: diarization metrics 273 | 274 | Version 0.4.1 (2014-11-20) 275 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 276 | 277 | - fix: identification error analysis matrix confusion 278 | 279 | Version 0.4 (2014-10-31) 280 | ~~~~~~~~~~~~~~~~~~~~~~~~ 281 | 282 | - feat(error): identification regression analysis 283 | - feat: new pyannote_eval.py CLI 284 | 285 | Version 0.3 (2014-10-01) 286 | ~~~~~~~~~~~~~~~~~~~~~~~~ 287 | 288 | - feat(error): segmentation error analysis 289 | 290 | Version 0.2 (2014-08-05) 291 | ~~~~~~~~~~~~~~~~~~~~~~~~ 292 | 293 | - feat(detection): add precision and recall 294 | - fix(identification): fix precision and recall 295 | 296 | Version 0.1 (2014-06-27) 297 | ~~~~~~~~~~~~~~~~~~~~~~~~ 298 | 299 | - feat(segmentation): add precision and recall 300 | - feat(identification): add support for NIST collar 301 | - feat(error): add module for detailed error analysis 302 | 303 | Version 0.0.1 (2014-06-04) 304 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 305 | 306 | - first public version 307 | -------------------------------------------------------------------------------- /doc/source/cli.rst: -------------------------------------------------------------------------------- 1 | ################# 2 | Command line tool 3 | ################# 4 | 5 | There are two main issues that may arise with results reported in the literature: 6 | 7 | * Even though the same public datasets are used, the actual evaluation protocol may differ slightly from one paper to another. 8 | * The implementation of the reported evaluation metrics may also differ. 9 | 10 | The first objective of the `pyannote.metrics` library is to address these two problems, and provide a convenient way for researchers to evaluate their approaches in a reproducible and comparable manner. 11 | 12 | Here is an example use of the command line interface that is provided to solve this problem. 13 | 14 | .. code-block:: bash 15 | 16 | $ pyannote-metrics diarization --subset=development Etape.SpeakerDiarization.TV hypothesis.rttm 17 | 18 | Diarization (collar = 0 ms) error purity coverage total correct % fa. % miss. % conf. % 19 | -------------------------------------- ------- -------- ---------- -------- --------- ----- ------ ----- ------- ---- ------- ----- 20 | BFMTV_BFMStory_2011-03-17_175900 14.64 94.74 90.00 2582.08 2300.22 89.08 96.16 3.72 80.14 3.10 201.72 7.81 21 | LCP_CaVousRegarde_2011-02-17_204700 17.80 89.13 86.90 3280.72 2848.42 86.82 151.78 4.63 208.29 6.35 224.01 6.83 22 | LCP_EntreLesLignes_2011-03-18_192900 23.46 79.52 79.03 1704.97 1337.80 78.46 32.89 1.93 157.14 9.22 210.03 12.32 23 | LCP_EntreLesLignes_2011-03-25_192900 26.75 76.97 75.86 1704.13 1292.83 75.86 44.61 2.62 158.38 9.29 252.92 14.84 24 | LCP_PileEtFace_2011-03-17_192900 10.73 93.33 92.30 1611.49 1487.32 92.30 48.73 3.02 55.49 3.44 68.67 4.26 25 | LCP_TopQuestions_2011-03-23_213900 18.28 98.25 94.20 727.26 668.65 91.94 74.36 10.22 16.41 2.26 42.20 5.80 26 | LCP_TopQuestions_2011-04-05_213900 27.97 97.95 79.81 818.03 638.68 78.08 49.45 6.04 17.46 2.13 161.89 19.79 27 | TV8_LaPlaceDuVillage_2011-03-14_172834 21.43 92.89 89.64 996.12 892.04 89.55 109.36 10.98 11.80 1.18 92.28 9.26 28 | TV8_LaPlaceDuVillage_2011-03-21_201334 66.23 77.24 70.64 1296.86 691.76 53.34 253.80 19.57 29.16 2.25 575.95 44.41 29 | TOTAL 23.27 88.18 84.55 14721.65 12157.71 82.58 861.14 5.85 734.28 4.99 1829.67 12.43 30 | 31 | 32 | Tasks 33 | ----- 34 | 35 | Not only can ``pyannote-metrics`` command line tool be used to compute the diarization error rate using NIST implementation, one can also evaluate the typical four sub-modules used in most speaker diarization systems: 36 | 37 | .. image:: images/pipeline.png 38 | 39 | Practically, the first positional argument (e.g. ``diarization``, above) is a flag indicating which task should be evaluated. 40 | 41 | Apart from the ``diarization`` flag that is used for evaluating speaker diarization results, other available flags are ``detection`` (speech activity detection), ``segmentation`` (speaker change detection), and ``identification`` (supervised speaker identification). 42 | Depending on the task, a different set of evaluation metrics is computed. 43 | 44 | Datasets and protocols 45 | ---------------------- 46 | 47 | ``pyannote.metrics`` provides an easy way to ensure the same protocol (i.e. manual groundtruth and training/development/test split) is used for evaluation. 48 | 49 | Internally, it relies on a collection of Python packages that all derive from the ``pyannote.database`` main package, that provides a convenient API to define training/development/test splits, along with groundtruth annotations. 50 | In the example above, the `development` set of the `TV` evaluation protocol of the ETAPE dataset is used. 51 | 52 | Results are both reported for each file in the selected subset, and aggregated into one final metric value. 53 | 54 | As of March 2017, ``pyannote.database`` packages exist for the ETAPE corpus, the REPERE corpus, and the AMI corpus. As more people contribute new ``pyannote.database`` packages, they will be added to the `pyannote` ecosystem. 55 | 56 | File formats 57 | ------------ 58 | 59 | Hypothesis files must use the [Rich Transcription Time Marked](https://web.archive.org/web/20170119114252/http://www.itl.nist.gov/iad/mig/tests/rt/2009/docs/rt09-meeting-eval-plan-v2.pdf) (RTTM) format. 60 | -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # pyannote.core documentation build configuration file, created by 4 | # sphinx-quickstart on Tue Jan 24 15:45:55 2017. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # 19 | 20 | # allow pyannote.core import 21 | import os 22 | import sys 23 | 24 | sys.path.insert(0, os.path.abspath("../..")) 25 | 26 | 27 | # -- General configuration ------------------------------------------------ 28 | 29 | # If your documentation needs a minimal Sphinx version, state it here. 30 | # 31 | # needs_sphinx = '1.0' 32 | 33 | # Add any Sphinx extension module names here, as strings. They can be 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 35 | # ones. 36 | extensions = [ 37 | "sphinx.ext.autodoc", 38 | "sphinx.ext.napoleon", 39 | "sphinx.ext.intersphinx", 40 | "sphinx.ext.todo", 41 | "sphinx.ext.coverage", 42 | "sphinx.ext.mathjax", 43 | "sphinx.ext.viewcode", 44 | "sphinx.ext.githubpages", 45 | "matplotlib.sphinxext.plot_directive", 46 | "IPython.sphinxext.ipython_directive", 47 | ] 48 | 49 | 50 | # Napoleon settings 51 | napoleon_google_docstring = True 52 | napoleon_numpy_docstring = True 53 | napoleon_include_init_with_doc = False 54 | napoleon_include_private_with_doc = False 55 | napoleon_include_special_with_doc = False 56 | napoleon_use_admonition_for_examples = False 57 | napoleon_use_admonition_for_notes = False 58 | napoleon_use_admonition_for_references = False 59 | napoleon_use_ivar = False 60 | napoleon_use_param = True 61 | napoleon_use_rtype = True 62 | napoleon_use_keyword = True 63 | 64 | # Add any paths that contain templates here, relative to this directory. 65 | templates_path = ["_templates"] 66 | 67 | # The suffix(es) of source filenames. 68 | # You can specify multiple suffix as a list of string: 69 | # 70 | # source_suffix = ['.rst', '.md'] 71 | source_suffix = {".rst": "restructuredtext"} 72 | 73 | # The master toctree document. 74 | master_doc = "index" 75 | 76 | # General information about the project. 77 | project = "pyannote.metrics" 78 | copyright = "2017, CNRS" 79 | author = "Hervé Bredin" 80 | 81 | # The version info for the project you're documenting, acts as replacement for 82 | # |version| and |release|, also used in various other places throughout the 83 | # built documents. 84 | 85 | import pyannote.metrics 86 | 87 | # The short X.Y version. 88 | version = pyannote.metrics.__version__.split("+")[0] 89 | # The full version, including alpha/beta/rc tags. 90 | release = pyannote.metrics.__version__ 91 | 92 | # The language for content autogenerated by Sphinx. Refer to documentation 93 | # for a list of supported languages. 94 | # 95 | # This is also used if you do content translation via gettext catalogs. 96 | # Usually you set "language" from the command line for these cases. 97 | language = "en" 98 | 99 | # List of patterns, relative to source directory, that match files and 100 | # directories to ignore when looking for source files. 101 | # This patterns also effect to html_static_path and html_extra_path 102 | exclude_patterns = [] 103 | 104 | # The name of the Pygments (syntax highlighting) style to use. 105 | pygments_style = "sphinx" 106 | 107 | # If true, `todo` and `todoList` produce output, else they produce nothing. 108 | todo_include_todos = True 109 | 110 | 111 | # -- Options for HTML output ---------------------------------------------- 112 | 113 | # The theme to use for HTML and HTML Help pages. See the documentation for 114 | # a list of builtin themes. 115 | # 116 | html_theme = "sphinx_rtd_theme" 117 | 118 | # Theme options are theme-specific and customize the look and feel of a theme 119 | # further. For a list of options available for each theme, see the 120 | # documentation. 121 | # 122 | # html_theme_options = {} 123 | 124 | # Add any paths that contain custom static files (such as style sheets) here, 125 | # relative to this directory. They are copied after the builtin static files, 126 | # so a file named "default.css" will overwrite the builtin "default.css". 127 | html_static_path = ["_static"] 128 | 129 | 130 | # -- Options for HTMLHelp output ------------------------------------------ 131 | 132 | # Output file base name for HTML help builder. 133 | htmlhelp_basename = "pyannotemetricsdoc" 134 | 135 | 136 | # -- Options for LaTeX output --------------------------------------------- 137 | 138 | latex_elements = { 139 | # The paper size ('letterpaper' or 'a4paper'). 140 | # 141 | # 'papersize': 'letterpaper', 142 | # The font size ('10pt', '11pt' or '12pt'). 143 | # 144 | # 'pointsize': '10pt', 145 | # Additional stuff for the LaTeX preamble. 146 | # 147 | # 'preamble': '', 148 | # Latex figure (float) alignment 149 | # 150 | # 'figure_align': 'htbp', 151 | } 152 | 153 | # Grouping the document tree into LaTeX files. List of tuples 154 | # (source start file, target name, title, 155 | # author, documentclass [howto, manual, or own class]). 156 | latex_documents = [ 157 | ( 158 | master_doc, 159 | "pyannotemetrics.tex", 160 | "pyannote.metrics Documentation", 161 | "Hervé Bredin", 162 | "manual", 163 | ), 164 | ] 165 | 166 | 167 | # -- Options for manual page output --------------------------------------- 168 | 169 | # One entry per manual page. List of tuples 170 | # (source start file, name, description, authors, manual section). 171 | man_pages = [ 172 | (master_doc, "pyannotemetrics", "pyannote.metrics Documentation", [author], 1) 173 | ] 174 | 175 | 176 | # -- Options for Texinfo output ------------------------------------------- 177 | 178 | # Grouping the document tree into Texinfo files. List of tuples 179 | # (source start file, target name, title, author, 180 | # dir menu entry, description, category) 181 | texinfo_documents = [ 182 | ( 183 | master_doc, 184 | "pyannotemetrics", 185 | "pyannote.metrics Documentation", 186 | author, 187 | "pyannotemetrics", 188 | "One line description of project.", 189 | "Miscellaneous", 190 | ), 191 | ] 192 | 193 | # Example configuration for intersphinx: refer to the Python standard library. 194 | intersphinx_mapping = { 195 | "python": ("https://docs.python.org/", None), 196 | "pyannote.core": ("https://pyannote.github.io/pyannote-core", None), 197 | "pyannote.database": ("https://pyannote.github.io/pyannote-database", None), 198 | } 199 | 200 | # plot_pre_code = """import matplotlib.pyplot as plt 201 | # from pyannote.core import notebook 202 | # from pyannote.core import Segment 203 | # from pyannote.core import Timeline 204 | # from pyannote.core import Annotation 205 | # from pyannote.core import SlidingWindow 206 | # from pyannote.core import SlidingWindowFeature""" 207 | 208 | # ipython_savefig_dir = "../../build/html/_static" 209 | -------------------------------------------------------------------------------- /doc/source/images/diagnostic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyannote/pyannote-metrics/584d177a4862a3ab3f89e9ca6639495f7293065e/doc/source/images/diagnostic.png -------------------------------------------------------------------------------- /doc/source/images/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyannote/pyannote-metrics/584d177a4862a3ab3f89e9ca6639495f7293065e/doc/source/images/pipeline.png -------------------------------------------------------------------------------- /doc/source/images/segmentation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyannote/pyannote-metrics/584d177a4862a3ab3f89e9ca6639495f7293065e/doc/source/images/segmentation.png -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | .. pyannote.metrics documentation master file, created by 2 | sphinx-quickstart on Thu Jan 19 11:54:52 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | ################ 7 | pyannote.metrics 8 | ################ 9 | 10 | "A toolkit for reproducible evaluation, diagnostic, and error analysis of speaker diarization systems" 11 | ------------------------------------------------------------------------------------------------------ 12 | 13 | `pyannote.metrics` is an open-source Python library aimed at researchers working in the wide area of speaker diarization. It provides a command line interface (CLI) to improve reproducibility and comparison of speaker diarization research results. Through its application programming interface (API), a large set of evaluation metrics is available for diagnostic purposes of all modules of typical speaker diarization pipelines (speech activity detection, speaker change detection, clustering, and identification). Finally, thanks to `pyannote.core` visualization capabilities, it can also be used for detailed error analysis purposes. 14 | 15 | 16 | Installation 17 | ============ 18 | 19 | :: 20 | 21 | $ pip install pyannote.metrics 22 | 23 | Citation 24 | ======== 25 | 26 | If you use `pyannote.metrics` in your research, please use the following citation: 27 | 28 | :: 29 | 30 | @inproceedings{pyannote.metrics, 31 | author = {Herv\'e Bredin}, 32 | title = {{pyannote.metrics: a toolkit for reproducible evaluation, diagnostic, and error analysis of speaker diarization systems}}, 33 | booktitle = {{Interspeech 2017, 18th Annual Conference of the International Speech Communication Association}}, 34 | year = {2017}, 35 | month = {August}, 36 | address = {Stockholm, Sweden}, 37 | url = {http://pyannote.github.io/pyannote-metrics}, 38 | } 39 | 40 | 41 | User guide 42 | ========== 43 | 44 | .. toctree:: 45 | :maxdepth: 3 46 | 47 | api 48 | cli 49 | 50 | API documentation 51 | ================= 52 | 53 | .. toctree:: 54 | :maxdepth: 3 55 | 56 | reference 57 | changelog 58 | 59 | 60 | Indices and tables 61 | ================== 62 | 63 | * :ref:`genindex` 64 | * :ref:`modindex` 65 | * :ref:`search` 66 | -------------------------------------------------------------------------------- /doc/source/pyplots/tutorial.py: -------------------------------------------------------------------------------- 1 | 2 | notebook.width = 10 3 | plt.rcParams['figure.figsize'] = (notebook.width, 3) 4 | 5 | # only display [0, 20] timerange 6 | notebook.crop = Segment(0, 40) 7 | 8 | # plot reference 9 | plt.subplot(211) 10 | reference = Annotation() 11 | reference[Segment(0, 10)] = 'A' 12 | reference[Segment(12, 20)] = 'B' 13 | reference[Segment(24, 27)] = 'A' 14 | reference[Segment(30, 40)] = 'C' 15 | notebook.plot_annotation(reference, legend=True, time=False) 16 | plt.gca().text(0.6, 0.15, 'reference', fontsize=16) 17 | 18 | # plot hypothesis 19 | plt.subplot(212) 20 | hypothesis = Annotation() 21 | hypothesis[Segment(2, 13)] = 'a' 22 | hypothesis[Segment(13, 14)] = 'd' 23 | hypothesis[Segment(14, 20)] = 'b' 24 | hypothesis[Segment(22, 38)] = 'c' 25 | hypothesis[Segment(38, 40)] = 'd' 26 | notebook.plot_annotation(hypothesis, legend=True, time=True) 27 | plt.gca().text(0.6, 0.15, 'hypothesis', fontsize=16) 28 | 29 | plt.show() 30 | -------------------------------------------------------------------------------- /doc/source/reference.rst: -------------------------------------------------------------------------------- 1 | ######### 2 | Reference 3 | ######### 4 | 5 | 6 | ****************** 7 | Evaluation metrics 8 | ****************** 9 | 10 | .. toctree:: 11 | :maxdepth: 3 12 | 13 | Here is a typical speaker diarization pipeline: 14 | 15 | .. image:: images/pipeline.png 16 | 17 | The first step is usually dedicated to speech activity detection, where the objective is to get rid of all non-speech regions. 18 | Then, speaker change detection aims at segmenting speech regions into homogeneous segments. 19 | The subsequent clustering step tries to group those speech segments according to the identity of the speaker. 20 | Finally, an optional supervised classification step may be applied to actually identity every speaker cluster in a supervised way. 21 | 22 | Looking at the final performance of the system is usually not enough for diagnostic purposes. 23 | In particular, it is often necessary to evaluate the performance of each module separately to identify their strenght and weakness, or to estimate the influence of their errors on the complete pipeline. 24 | 25 | Here, we provide the list of metrics that were implemented in `pyannote.metrics` with that very goal in mind. 26 | 27 | Because manual annotations cannot be precise at the audio sample level, it is common in speaker diarization research to remove from evaluation a 500ms collar around each speaker turn boundary (250ms before and after). 28 | Most of the metrics available in `pyannote.metrics` support a `collar` parameter, which defaults to 0. 29 | 30 | Moreover, though audio files can always be processed entirely (from beginning to end), there are cases where reference annotations are only available for some regions of the audio files. 31 | All metrics support the provision of an evaluation map that indicate which part of the audio file should be evaluated. 32 | 33 | 34 | 35 | Detection 36 | --------- 37 | 38 | The two primary metrics for evaluating speech activity detection modules are detection error rate and detection cost function. 39 | 40 | Detection error rate (not to be confused with diarization error rate) is defined as: 41 | 42 | .. math:: 43 | \text{detection error rate} = \frac{\text{false alarm} + \text{missed detection}}{\text{total}} 44 | 45 | where :math:`\text{false alarm}` is the duration of non-speech incorrectly classified as speech, :math:`\text{missed detection}` is the duration of speech incorrectly classified as non-speech, and :math:`\text{total}` is the total duration of speech in the reference. 46 | 47 | Alternately, speech activity module output may be evaluated in terms of detection cost function, which is defined as: 48 | 49 | .. math:: 50 | \text{detection cost function} = 0.25 \times \text{false alarm rate} + 0.75 \times \text{miss rate} 51 | 52 | where :math:`\text{false alarm rate}` is the proportion of non-speech incorrectly classified as speech and :math:`\text{miss rate}` is the proportion of speech incorrectly classified as non-speech. 53 | 54 | Additionally, detection may be evaluated in terms of accuracy (proportion of the input signal correctly classified), precision (proportion of detected speech that is speech), and recall (proporton of speech that is detected). 55 | 56 | .. automodule:: pyannote.metrics.detection 57 | :members: 58 | 59 | Segmentation 60 | ------------ 61 | 62 | Change detection modules can be evaluated using two pairs of dual metrics: precision and recall, or purity and coverage. 63 | 64 | .. image:: images/segmentation.png 65 | 66 | Precision and recall are standard metrics based on the number of correctly detected speaker boundaries. Recall is 75% because 3 out of 4 reference boundaries were correctly detected, and precision is 100% because all hypothesized boundaries are correct. 67 | 68 | The main weakness of that pair of metrics (and their combination into a f-score) is that it is very sensitive to the `tolerance` parameter, i.e. the maximum distance between two boundaries for them to be matched. From one segmentation paper to another, authors may used very different values, thus making the approaches difficult to compare. 69 | 70 | Instead, we think that segment-wise purity and coverage should be used instead. 71 | They have several advantages over precision and recall, including the fact that they do not depend on any `tolerance` parameter, and that they directly relate to the cluster-wise purity and coverage used for evaluating speaker diarization. 72 | 73 | Segment-wise coverage is computed for each segment in the reference as the ratio of the duration of the intersection with the most co-occurring hypothesis segment and the duration of the reference segment. 74 | For instance, coverage for reference segment 1 is 100% because it is entirely covered by hypothesis segment A. 75 | 76 | Purity is the dual metric that indicates how `pure` hypothesis segments are. For instance, segment A is only 65% pure because it is covered at 65% by segment 1 and 35% by segment 2. 77 | 78 | The final values are duration-weighted average over each segment. 79 | 80 | .. automodule:: pyannote.metrics.segmentation 81 | :members: 82 | 83 | Diarization 84 | ----------- 85 | 86 | Diarization error rate (DER) is the \emph{de facto} standard metric for evaluating and comparing speaker diarization systems. 87 | It is defined as follows: 88 | 89 | .. math:: 90 | 91 | \text{DER} = \frac{\text{false alarm} + \text{missed detection} + \text{confusion}}{\text{total}} 92 | 93 | where :math:`\text{false alarm}` is the duration of non-speech incorrectly classified as speech, :math:`\text{missed detection}` is the duration of 94 | speech incorrectly classified as non-speech, :math:`\text{confusion}` is the duration of speaker confusion, and :math:`\text{total}` is the sum over all speakers of their reference speech duration. 95 | 96 | Note that this metric does take overlapping speech into account, potentially leading to increased missed detection in case the speaker diarization system does not include an overlapping speech detection module. 97 | 98 | Optimal vs. greedy 99 | ****************** 100 | 101 | Two implementations of the diarization error rate are available (optimal and greedy), depending on how the one-to-one mapping between reference and hypothesized speakers is computed. 102 | 103 | The `optimal` version uses the Hungarian algorithm to compute the mapping that minimize the confusion term, while the `greedy` version operates in a greedy manner, mapping reference and hypothesized speakers iteratively, by decreasing value of their cooccurrence duration. 104 | 105 | In practice, the `greedy` version is much faster than the `optimal` one, especially for files with a large number of speakers -- though it may slightly over-estimate the value of the diarization error rate. 106 | 107 | Purity and coverage 108 | ******************* 109 | 110 | While the diarization error rate provides a convenient way to compare different diarization approaches, it is usually not enough to understand the type of errors commited by the system. 111 | 112 | Purity and coverage are two dual evaluation metrics that provide additional insight on the behavior of the system. 113 | 114 | .. math:: 115 | 116 | \text{purity} & = & \frac{\displaystyle \sum_{\text{cluster}} \max_{\text{speaker}} |\text{cluster} \cap \text{speaker}| }{\displaystyle \sum_{\text{cluster}} |\text{cluster}|} \\ 117 | \text{coverage} & = & \frac{\displaystyle \sum_{\text{speaker}} \max_{\text{cluster}} |\text{speaker} \cap \text{cluster}| }{\displaystyle \sum_{\text{speaker}} |\text{speaker}|} \\ 118 | 119 | where :math:`|\text{speaker}|` (respectively :math:`|\text{cluster}|`) is the speech duration of this particular reference speaker (resp. hypothesized cluster), and :math:`|\text{speaker} \cap \text{cluster}|` is the duration of their intersection. 120 | 121 | Over-segmented results (e.g. too many speaker clusters) tend to lead to high purity and low coverage, while under-segmented results (e.g. when two speakers are merged into one large cluster) lead to low purity and higher coverage. 122 | 123 | Use case 124 | ******** 125 | 126 | This figure depicts the evolution of a multi-stage speaker diarization system applied on the ETAPE dataset. 127 | It is roughly made of four consecutive modules (segmentation, BIC clustering, Viterbi resegmentation, and CLR clustering). 128 | 129 | .. image:: images/diagnostic.png 130 | 131 | From the upper part of the figure (DER as a function of the module), it is clear that each module improves the output of the previous one. 132 | 133 | Yet, the lower part of the figure clarifies the role of each module. 134 | BIC clustering tends to increase the size of the speaker clusters, at the expense of purity (-7%). 135 | Viterbi resegmentation addresses this limitation and greatly improves cluster purity (+5%), with very little impact on the actual cluster coverage (+2%). 136 | Finally, CLR clustering brings an additional +5% coverage improvement. 137 | 138 | 139 | .. automodule:: pyannote.metrics.diarization 140 | :members: 141 | 142 | .. automodule:: pyannote.metrics.matcher 143 | :members: 144 | 145 | Identification 146 | -------------- 147 | 148 | In case prior speaker models are available, the speech turn clustering module may be followed by a supervised speaker recognition module for cluster-wise supervised classification. 149 | 150 | `pyannote.metrics` also provides a collection of evaluation metrics for this identification task. This includes precision, recall, and identification error rate (IER): 151 | 152 | .. math:: 153 | \text{IER} = \frac{\text{false alarm} + \text{missed detection} + \text{confusion}}{\text{total}} 154 | 155 | which is similar to the diarization error rate (DER) introduced previously, except that the :math:`\texttt{confusion}` term is computed directly by comparing reference and hypothesis labels, and does not rely on a prior one-to-one matching. 156 | 157 | .. automodule:: pyannote.metrics.identification 158 | :members: 159 | 160 | ************** 161 | Error analysis 162 | ************** 163 | 164 | Segmentation 165 | ============ 166 | 167 | .. automodule:: pyannote.metrics.errors.segmentation 168 | :members: 169 | 170 | Identification 171 | ============== 172 | 173 | .. automodule:: pyannote.metrics.errors.identification 174 | :members: 175 | 176 | ***** 177 | Plots 178 | ***** 179 | 180 | Binary classification 181 | ===================== 182 | 183 | .. automodule:: pyannote.metrics.plot.binary_classification 184 | :members: 185 | -------------------------------------------------------------------------------- /doc/source/tutorial.rst: -------------------------------------------------------------------------------- 1 | ######### 2 | Tutorial 3 | ######### 4 | 5 | This tutorial will guide you through a simple example on how to use `pyannote.metrics` for evaluation purposes. 6 | 7 | `pyannote.metrics` internally relies on :class:`pyannote.core.Annotation` data structure to store reference and hypothesis annotations. 8 | 9 | .. plot:: pyplots/tutorial.py 10 | 11 | 12 | .. ipython:: 13 | 14 | In [10]: from pyannote.core import Segment, Timeline, Annotation 15 | 16 | In [11]: reference = Annotation() 17 | ....: reference[Segment(0, 10)] = 'A' 18 | ....: reference[Segment(12, 20)] = 'B' 19 | ....: reference[Segment(24, 27)] = 'A' 20 | ....: reference[Segment(30, 40)] = 'C' 21 | 22 | In [12]: hypothesis = Annotation() 23 | ....: hypothesis[Segment(2, 13)] = 'a' 24 | ....: hypothesis[Segment(13, 14)] = 'd' 25 | ....: hypothesis[Segment(14, 20)] = 'b' 26 | ....: hypothesis[Segment(22, 38)] = 'c' 27 | ....: hypothesis[Segment(38, 40)] = 'd' 28 | 29 | 30 | Several evaluation metrics are available, including the diarization error rate: 31 | 32 | 33 | .. ipython:: 34 | :okwarning: 35 | 36 | In [13]: from pyannote.metrics.diarization import DiarizationErrorRate 37 | 38 | In [14]: metric = DiarizationErrorRate() 39 | 40 | In [15]: metric(reference, hypothesis) 41 | Out[15]: 0.516 42 | 43 | That's it for the tutorial. 44 | `pyannote.metrics` can do much more than that! Keep reading... 45 | -------------------------------------------------------------------------------- /notebooks/.ipynb_checkpoints/Better purity metric-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 5 6 | } 7 | -------------------------------------------------------------------------------- /notebooks/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "11bb7327", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "WAV = '/Users/hbredin/Development/pyannote/pyannote-audio/tutorials/assets/sample.wav'\n", 11 | "REF = '/Users/hbredin/Development/pyannote/pyannote-audio/tutorials/assets/sample.rttm'" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "id": "7a655749", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "from pyannote.database.util import load_rttm\n", 22 | "reference = load_rttm(REF)['sample']" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "id": "cad117ea", 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "data": { 33 | "image/png": "iVBORw0KGgoAAAANSUhEUgAABiYAAAEcCAYAAABDHgySAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy89olMNAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAf1klEQVR4nO3dfZBV5Z0n8N9tGpoGuptuXpp3gkLQFTQRk7iaMeIA29RqRMmuGXXGLjNOUkHWJGbWmB0JxpFUqNWtclAnq4CVBFei8SWFExxHBSZxE3xdnSlgQgIjii1vDTRvjS13/zCiyFt3c/s5t+nPp+pW2+fec54v175Pn77fe87J5fP5fAAAAAAAACRQknUAAAAAAACg61BMAAAAAAAAySgmAAAAAACAZBQTAAAAAABAMooJAAAAAAAgGcUEAAAAAACQjGICAAAAAABIRjEBAAAAAAAko5gAAAAAAACSUUwAAAAAAADJKCYAAAAAAIBkFBMAAAAAAEAyigkAAAAAACAZxQQAAAAAAJCMYgIAAAAAAEhGMQEAAAAAACTTZYuJ559/Prp16xZ1dXVZRyl69fX1kcvlIpfLRffu3aO2tjYmT54cCxYsiAMHDmQdr6g1NDTEDTfcEKNHj46ePXtGbW1tfP7zn4+///u/jz179mQdr+jU19fHtGnTso5RlI703DzyyCPRs2fPmDt3bjahAAAAAKAdSgu/ycbCb/KYqtu11oIFC2LmzJlx//33xxtvvBEjRowocK7W27t3b9LxysvL27xOXV1dLFy4MN5777145513YunSpXHDDTfEI488Er/4xS+itLQDfpQ6uT/84Q9x/vnnR9++fWPOnDkxfvz4aGlpiX/7t3+LBQsWxJAhQ+KLX/xi1jHppO6///6YMWNG3H333fGXf/mXWccBAAAAgFbrgHeTJxd+k8f0YpvX2L17d/zsZz+LF154IRoaGuKBBx6IWbNmdUC21vnJT36SdLy/+qu/avM6ZWVlMWjQoIiIGDp0aJx99tlx7rnnxp/+6Z/GAw884I3RI/j6178epaWl8eKLL0bv3r0PLh8/fnxMnz498vl8hunozObOnRuzZs2KBx98MKZPn551HAAAAABoky75MffFixfH2LFjY+zYsXH11VfHzJkz45ZbbolcLpd1tE7loosuirPOOiseffTR5MXEtn3bonHftmTjVfesiZqeNa1+/NatW+Mf//EfY86cOYeUEh+Vyc9b09vv31KoGPz+rRPY27gp9jZuTjZeefWAKK8e2K51v/Od78Tdd98dS5YsiUmTJhU4GQAAAAB0vC5ZTMyfPz+uvvrqiHj/FEW7du2KZ555xpt87XDaaafFa6+9lnzcpet+GQ+teTDZeF8ee2VcefpVrX782rVrI5/Px9ixYw9Z3r9//9i3b19ERMyYMSN++MMfFjTncb34o4jlt6YZ6wvfi5g4O81YJ2jtP/0s/uWRu5ONN+5LM2L8f7m+zev98pe/jCeeeCKeeeaZuOiiizogGQAAAAB0vC5XTKxZsyZWrlwZjz76aERElJaWxhVXXBELFixQTLRDPp/P5JP/daOmxucGfy7ZeNVtOFrioz7+3KxcuTIOHDgQV111VTQ3NxciWtuc89WIsYmua9FJjpaIiBg96b/G0AkTk41XXj2gXeudeeaZsWXLlpg1a1Z85jOfiYqKigInAwAAAICO1wHFxNOF32QBzZ8/P1paWmLo0KEHl+Xz+ejevXs0NjZGdXX7LqZ9Iv78z/88+ZiFsmrVqhg1alTycWvaeGql1EaPHh25XC5Wr159yPJTTjklItp3AfKC6ESnV0qpvHpgu0+tlNLQoUPj5z//eUycODHq6upi6dKlygkAAAAAOp0OKCbSv7HfWi0tLfHjH/847rjjjpgyZcoh902fPj0WLVoU11/f9tOrnKjM3qQ+Qc8++2y8/vrr8c1vfjPrKEWnX79+MXny5Jg3b17MnDnzqNeZgLYaMWJELF++PCZOnBhTpkyJp556KiorK7OOBQAAAACtVpJ1gJSWLFkSjY2N8ZWvfCXGjRt3yO1LX/pSzJ8/P+uIRau5uTkaGhrirbfeipdffjnmzJkTl156aVx88cXxF3/xF1nHK0r33HNPtLS0xDnnnBOLFy+OVatWxZo1a+KnP/1prF69Orp165Z1RDqpYcOGxbJly2Lr1q0xZcqU2LFjR9aRAAAAAKDVulQxMX/+/Jg0aVJUVVUddt/06dPj1VdfjZdffjmDZMVv6dKlMXjw4PjEJz4RdXV18dxzz8Vdd90VTzzxhDfYj+LUU0+NV155JSZNmhQ333xznHXWWXHOOefE3/3d38W3v/3tuO2227KOSCc2dOjQWL58eWzfvj0mT54c27dvzzoSAAAAALRKLp/P57MOAQAAAAAAdA1d6ogJAAAAAAAgW4oJAAAAAAAgGcUEAAAAAACQjGICAAAAAABIRjEBAAAAAAAko5gAAAAAAACSUUwAAAAAAADJKCYAAAAAAIBkFBMAAAAAAEAyigkAAAAAACCZLllM1NfXRy6XO3jr169f1NXVxWuvvZZ1tKL08efrg1tdXV3W0YpafX19TJs27bDly5Yti1wuF9u3b0+eqdgd7TnDcwMAAADAyaO00Bvct3NboTd5TD0ra9q1Xl1dXSxcuDAiIhoaGuJv/uZv4uKLL4433nijkPFOGh99vj5QVlaWURoAAAAAADqrghcTj113fqE3eUx/tnhVu9YrKyuLQYMGRUTEoEGD4qabbooLLrggNm/eHAMGDChkxJPCR58vAAAAAABor4IXE53Rrl27YtGiRTF69Ojo169f8vHfe+edeG/TpmTjdRs4MLrV1iYbryPs3rYn9jTuTTZer+ry6F3TK9l4HWfLH28p9P/jrfjlm5ojv2t/svFyfXpErsIRRwAAAAB0TV22mFiyZEn06dMnIiJ2794dgwcPjiVLlkRJSfrLbuz+6aJouvN/JRuv4lvfjMobv9WmdT76fH3gpptuiltuuaWQ0Vpt1VO/i5ceej3ZeBO+PD7O+bOz2rzekZ639957r1Cx2uHnEXFforGui4ivJhrrxLS8tDHeW/HvycbrdsHI6H7hqGTjAQAAAEAx6bLFxMSJE+Pee++NiIht27bFPffcE1OnTo2VK1fGyJEjk2bpffVV0XPK5GTjdRs4sM3rfPT5+kBNTfuu71EIp/+nMTHys8OSjderurxd6x3pefvtb38bV199dSFitcP0iPhCorE6x9ESERGlE4ZEt7Hp8ub69Eg2FgAAAAAUm4IXE5fd9+tCb7JD9O7dO0aPHn3w+wkTJkRVVVXcd9998bd/+7dJs3SrrS36Uyt9/PnKWu+aXp3i1EpHet7efPPNjNJEdKbTK6WUqyhzaiUAAAAASKTgxUTPyuw+RX8icrlclJSUxN696a5bAAAAAAAAXU2XPZVTc3NzNDQ0REREY2NjzJs3L3bt2hWXXHJJxsmK00efrw+UlpZG//4+fQ+p7NixI1599dVDltXU1MSIESOyCQQAAAAA7dBli4mlS5fG4MGDIyKioqIiTjvttHj44YfjwgsvzDZYkfro8/WBsWPHxurVqzNKxMnowIEDUVraZael41q2bFl8+tOfPmTZNddcEw888EA2gQAAAACgHXL5fD6fdQiAiIi6uroYPXp0zJs3L+soAAAAAEAHKck6AEBjY2M8+eSTsWzZspg0aVLWcQAAAACADuScKUDmrr322njhhRfixhtvjEsvvTTrOAAAAABAB3IqJwAAAAAAIBmncgIAAAAAAJJRTAAAAAAAAMkoJgAAAAAAgGQUEwAAAAAAQDKKCQAAAAAAIBnFBAAAAAAAkEyXLSYaGhpi5syZccopp0RZWVkMHz48LrnkknjmmWeyjlZ06uvrI5fLHXZbu3Zt1tEAAAAAAOhkSrMOkIX169fH+eefH3379o25c+fGmWeeGe+++2489dRTMWPGjFi9enXWEYtOXV1dLFy48JBlAwYMyCgNAAAAAACdVcGLifzu/YXe5DHlevdo8zpf//rXI5fLxcqVK6N3794Hl59xxhlx7bXXFjLeSaOsrCwGDRqUdQwAAAAAADq5ghcTzXc8X+hNHlPPWRe26fHbtm2LpUuXxu23335IKfGBvn37FiYYAAAAAABwmC53Kqe1a9dGPp+P0047LesoB21pao4tTc3JxutfURb9K8ratM6SJUuiT58+B7+fOnVqPPzww4WOBgAAAADASa7LFRP5fD4iInK5XMZJPvTYixti/rLfJxvvKxeeGtdNHN2mdSZOnBj33nvvwe+PdLQJAAAAAAAcT5crJsaMGRO5XC5WrVoV06ZNyzpORERcds7w+JOxA5ON19ajJSLeLyJGj25bmQEAAAAAAB+Xy39wCEGBdIaLX0+dOjVef/31WLNmzWGf/N++fbvrTHxMfX19bN++PR5//PGsowAAAAAA0MkV/IiJ9hQFqd1zzz1x3nnnxWc/+9n4/ve/H2eeeWa0tLTE008/Hffee2+sWrUq64gAAAAAAHBS6nKncoqIGDVqVLz88stx++23x4033hhvv/12DBgwICZMmHDIdRQAAAAAAIDCKvipnAAAAAAAAI6mJOsAAAAAAABA16GYAAAAAAAAklFMAAAAAAAAySgmAAAAAACAZBQTAAAAAABAMooJAAAAAAAgGcUEAAAAAACQTGl7Vzxw4EBs3LgxKioqIpfLFTITAAAAAADQyeTz+WhqaoohQ4ZEScnRj4todzGxcePGGD58eHtXBwAAAAAATkIbNmyIYcOGHfX+dhcTFRUVBweorKxs72YAAAAAAICTwM6dO2P48OEH+4OjaXcx8cHpmyorKxUTAAAAAABARMRxL//g4tcAAAAAAEAyigkAAAAAACAZxQQAAAAAAJCMYgIAAAAAAEhGMQEAAAAAACSjmAAAAAAAAJJRTAAAAAAAAMkoJgAAAAAAgGQUEwAAAAAAQDKKCQAAAAAAIBnFBAAAAAAAkIxiAgAAAAAASEYxAQAAAAAAJKOYAAAAAAAAklFMAAAAAAAAySgmAAAAAACAZBQTAAAAAABAMooJAAAAAAAgGcUEAAAAAACQjGICAAAAAABIRjEBAAAAAAAko5gAAAAAAACSUUwAAAAAAADJKCYAAAAAAIBkFBMAAAAAAEAyigkAAAAAACAZxQQAAAAAAJCMYgIAAAAAAEhGMQEAAAAAACSjmAAAAAAAAJJRTAAAAAAAAMkoJgAAAAAAgGQUEwAAAAAAQDKKCQAAAAAAIBnFBAAAAAAAkIxiAgAAAAAASEYxAQAAAAAAJKOYAAAAAAAAklFMAAAAAAAAySgmAAAAAACAZApQTGw98U200t7G1fHyj6+Ml388O/Y2burw8fJNzfHusnWRb2ru8LE6yu5NjbHykX+K3Zsas47Ccexavz5W3/jt2LV+fdZRMrd5w5pYuOD62LxhTUREbNu3LR5ctSi27duWcTLoGnZv2xMv/p//F7u37ck6CtAGrXrtNr0d8dzs978CxacAr9G9jZvi9YfnJfmbGQCKid+BnUsnKyZ+H2uefCXWPLk49jZu7vDx8rv2x3sr/j3yu/Z3+FgdZffWnfHqtj/E7q07s47CcezZsCEqHlocezZsyDpK5ra+sy4eq1kXW99ZFxERjfu2xUNrHoxGxQQksadxb7z00Ouxp3Fv1lGANmjVa7fp7YjltyomoFgV4DW6t3Fz/Msjdyf5mxkAionfgZ2LUzkBAAAAAADJKCYAAAAAAIBkFBMAAAAAAEAypSe+iZ0RkerCyrsTjXOo/N53I7+7c15nIt/cEhERze/uj717nSu8mO1v3h+5rEMAAAAAAHSwAhQTN0ZEtxPfTKu8l2icQ73709cyGbcQWkr3RfSP+OVvnov4TdZpOJbKt9+OP8k6BAAAAABABytAMQEUUn7dutjft2/WMTLV8saGiJKIN/dviu7b18aGpg1ZR4IuqfHNHVlHANqgTa/ZLas6LgjQfgV8be586w8F2xYAdAZ+93UuigkoMi03/4/YnHWIjDUNL4/4zpi4a9PiiE2Ls44DXdZzdz6fdQSgozx6ddYJgA72f+f996wjAAAclWICikzpD26P6k9/OusYmWr83W8i4vH4bwOviFH/4bzY0LQh7nzpf2YdC7qcid86L6qHVWUdA2ilxjd3tL5QvPynEf1P79hAQNttWVWw4vA/Xj83KoeeUpBtAUBnsPOtPyjmO5ECFBN3RESqN1GfjojvJRrrQ92vPjNKavskH7cQSn+3IWLlGzH13InRf8ywrONwDFt//XzE/74/cqNGRY/x47OOk6nS5rci3owY1mNgnNp3dNZxoMuqHlYVA07tl3UMoCP0Pz1iyNlZpwA6UOXQU6LmlDOyjgEAcEQFKCYqI6L6xDfTKr0TjXOoXHn3yPXukcnYJypX9v7/4rLuPaK8vDzjNBxLj7Ie8W7WIQAAAAAAOlhJ1gEAAAAAAICuQzEBAAAAAAAko5gAAAAAAACSUUwAAAAAAADJFODi1/1OfBOtVF59aoz9z5+OiE9GefWADh8v16dHdLtgZOT6dM4LX0dE9O5XGZ+qOSV696vMOgrH0Wv48Hjzy1fEsOHDs46SuX61o+Ky10ZFvwmjIiKiumdNfHnslVHdsybjZNA19KoujwlfHh+9qsuzjgK0QateuxWDI77wvfe/AsWnAK/R8uoBMe5LM5L8zQwAxcTvwM4ll8/n8+1ZcefOnVFVVRU7duyIykpvegMAAAAAQFfW2t7AqZwAAAAAAIBkFBMAAAAAAEAyigkAAAAAACAZxQQAAAAAAJCMYgIAAAAAAEhGMQEAAAAAACSjmAAAAAAAAJJRTAAAAAAAAMkoJgAAAAAAgGQUEwAAAAAAQDKKCQAAAAAAIBnFBAAAAAAAkIxiAgAAAAAASEYxAQAAAAAAJKOYAAAAAAAAklFMAAAAAAAAySgmAAAAAACAZBQTAAAAAABAMooJAAAAAAAgGcUEAAAAAACQjGICAAAAAABIRjEBAAAAAAAko5gAAAAAAACSUUwAAAAAAADJKCYAAAAAAIBkFBMAAAAAAEAyigkAAAAAACAZxQQAAAAAAJCMYgIAAAAAAEhGMQEAAAAAACSjmAAAAAAAAJJRTAAAAAAAAMkoJgAAAAAAgGQUEwAAAAAAQDKKCeCItjQ1x33PrY0tTc1ZRwG6CPMOFIstEfGjP34tpm0BAJBavqk53l22LvL+TqOVWvuzopgAjmhLU3PMX/Z7bxACyZh3oFhsiYj7onDFRKG2BQBAavld++O9Ff8e+V37s45CJ5Hf3bqfFcUEAAAAAACQjGICAAAAAABIRjEBAAAAAAAko5gAAAAAAACSKc06AFDc1m/elXUEoIsw30CxWVck2wAAIGsHtuzJOgKdxIGte1v1OMUEcEyzH3096wgAQCZuyToAAABFouWxVVlHoJNo2be7VY9TTADHNPvy8fGJAX2yjgF0Aes371KGQlG5LSJGneA21oWCAwCg8yu97PQo6d8r6xh0AqXr34n4QSse1/FRgM7sEwP6xGlDKrOOAQAkNyoiTss6BAAARaCkf68oGVyRdQw6gZKmna17XAfnAAAAAAAAOEgxAQAAAAAAJKOYAAAAAAAAklFMAAAAAAAAySgmAAAAAACAZBQTwBH1ryiLr1x4avSvKMs6CtBFmHegWPSPiOv++LWYtgUAQGq5Pj2i2wUjI9enR9ZR6CRyvVv3s5LL5/P59gywc+fOqKqqih07dkRlZWV7NgEAAAAAAJwkWtsbOGICAAAAAABIRjEBAAAAAAAko5gAAAAAAACSUUwAAAAAAADJKCYAAAAAAIBkFBMAAAAAAEAyigkAAAAAACAZxQQAAAAAAJCMYgIAAAAAAEhGMQEAAAAAACSjmAAAAAAAAJJRTAAAAAAAAMkoJgAAAAAAgGQUEwAAAAAAQDKKCQAAAAAAIBnFBAAAAAAAkIxiAgAAAAAASEYxAQAAAAAAJKOYAAAAAAAAklFMAAAAAAAAySgmAAAAAACAZBQTAAAAAABAMooJAAAAAAAgGcUEAAAAAACQjGICAAAAAABIRjEBAAAAAAAko5gAAAAAAACSUUwAAAAAAADJKCYAAAAAAIBkFBMAAAAAAEAyigkAAAAAACAZxQQAAAAAAJCMYgIAAAAAAEhGMQEAAAAAACSjmAAAAAAAAJJRTAAAAAAAAMkoJgAAAAAAgGQUEwAAAAAAQDKKCQAAAAAAIBnFBAAAAAAAkIxiAgAAAAAASKa0vSvm8/mIiNi5c2fBwgAAAAAAAJ3TB33BB/3B0bS7mGhqaoqIiOHDh7d3EwAAAAAAwEmmqakpqqqqjnp/Ln+86uIoDhw4EBs3boyKiorI5XLtDggc3c6dO2P48OGxYcOGqKyszDoOQFEzZwK0jXkToG3MmwDHl8/no6mpKYYMGRIlJUe/kkS7j5goKSmJYcOGtXd1oA0qKyvt9AC0kjkToG3MmwBtY94EOLZjHSnxARe/BgAAAAAAklFMAAAAAAAAySgmoIiVlZXF9773vSgrK8s6CkDRM2cCtI15E6BtzJsAhdPui18DAAAAAAC0lSMmAAAAAACAZBQTAAAAAABAMooJAAAAAAAgGcUEAAAAAACQjGICisCKFSvikksuiSFDhkQul4vHH3/8kPvz+XzMnj07hgwZEuXl5XHhhRfGv/7rv2YTFiBjx5sz6+vrI5fLHXI799xzswkLkLEf/OAH8ZnPfCYqKipi4MCBMW3atFizZs0hj7GvCfCh1syb9jcBTpxiAorA7t2746yzzop58+Yd8f65c+fGnXfeGfPmzYsXXnghBg0aFJMnT46mpqbESQGyd7w5MyKirq4u3n777YO3f/iHf0iYEKB4LF++PGbMmBG/+c1v4umnn46WlpaYMmVK7N69++Bj7GsCfKg182aE/U2AE5XL5/P5rEMAH8rlcvHYY4/FtGnTIuL9T7ANGTIkvvGNb8RNN90UERHNzc1RW1sbP/zhD+OrX/1qhmkBsvXxOTPi/U+wbd++/bAjKQCI2Lx5cwwcODCWL18eF1xwgX1NgOP4+LwZYX8ToBAcMQFFbt26ddHQ0BBTpkw5uKysrCy+8IUvxPPPP59hMoDitWzZshg4cGB88pOfjOuuuy42bdqUdSSAorBjx46IiKipqYkI+5oAx/PxefMD9jcBToxiAopcQ0NDRETU1tYesry2tvbgfQB8aOrUqbFo0aJ49tln44477ogXXnghLrroomhubs46GkCm8vl8fOtb34rPf/7zMW7cuIiwrwlwLEeaNyPsbwIUQmnWAYDWyeVyh3yfz+cPWwZAxBVXXHHwv8eNGxfnnHNOjBw5Mp588sm4/PLLM0wGkK3rr78+XnvttfjVr3512H32NQEOd7R50/4mwIlzxAQUuUGDBkVEHPaJtU2bNh32yTYADjd48OAYOXJk/O53v8s6CkBmZs6cGb/4xS/iueeei2HDhh1cbl8T4MiONm8eif1NgLZTTECRGzVqVAwaNCiefvrpg8v2798fy5cvj/POOy/DZACdw9atW2PDhg0xePDgrKMAJJfP5+P666+PRx99NJ599tkYNWrUIffb1wQ41PHmzSOxvwnQdk7lBEVg165dsXbt2oPfr1u3Ll599dWoqamJESNGxDe+8Y2YM2dOjBkzJsaMGRNz5syJXr16xZVXXplhaoBsHGvOrKmpidmzZ8f06dNj8ODBsX79+vjud78b/fv3j8suuyzD1ADZmDFjRjz44IPxxBNPREVFxcEjI6qqqqK8vDxyuZx9TYCPON68uWvXLvubAAWQy+fz+axDQFe3bNmymDhx4mHLr7nmmnjggQcin8/HrbfeGj/60Y+isbExPve5z8Xdd999yMW3ALqKY82Z9957b0ybNi1eeeWV2L59ewwePDgmTpwYt912WwwfPjyDtADZOtp1IhYuXBj19fUREfY1AT7iePPm3r177W8CFIBiAgAAAAAASMY1JgAAAAAAgGQUEwAAAAAAQDKKCQAAAAAAIBnFBAAAAAAAkIxiAgAAAAAASEYxAQAAAAAAJKOYAAAAAAAAklFMAAAAxzV79uz41Kc+lXUMAADgJJDL5/P5rEMAAADZyeVyx7z/mmuuiXnz5kVzc3P069cvUSoAAOBkpZgAAIAurqGh4eB/L168OGbNmhVr1qw5uKy8vDyqqqqyiAYAAJyEnMoJAAC6uEGDBh28VVVVRS6XO2zZx0/lVF9fH9OmTYs5c+ZEbW1t9O3bN2699dZoaWmJv/7rv46ampoYNmxYLFiw4JCx3nrrrbjiiiuiuro6+vXrF5deemmsX78+7T8YAADIlGICAABol2effTY2btwYK1asiDvvvDNmz54dF198cVRXV8dvf/vb+NrXvhZf+9rXYsOGDRERsWfPnpg4cWL06dMnVqxYEb/61a+iT58+UVdXF/v378/4XwMAAKSimAAAANqlpqYm7rrrrhg7dmxce+21MXbs2NizZ09897vfjTFjxsTNN98cPXr0iF//+tcREfHQQw9FSUlJ3H///TF+/Pg4/fTTY+HChfHGG2/EsmXLsv3HAAAAyZRmHQAAAOiczjjjjCgp+fCzTrW1tTFu3LiD33fr1i369esXmzZtioiIl156KdauXRsVFRWHbGffvn3x+9//Pk1oAAAgc4oJAACgXbp3737I97lc7ojLDhw4EBERBw4ciAkTJsSiRYsO29aAAQM6LigAAFBUFBMAAEASZ599dixevDgGDhwYlZWVWccBAAAy4hoTAABAEldddVX0798/Lr300vjnf/7nWLduXSxfvjxuuOGGePPNN7OOBwAAJKKYAAAAkujVq1esWLEiRowYEZdffnmcfvrpce2118bevXsdQQEAAF1ILp/P57MOAQAAAAAAdA2OmAAAAAAAAJJRTAAAAAAAAMkoJgAAAAAAgGQUEwAAAAAAQDKKCQAAAAAAIBnFBAAAAAAAkIxiAgAAAAAASEYxAQAAAAAAJKOYAAAAAAAAklFMAAAAAAAAySgmAAAAAACAZBQTAAAAAABAMv8fzgepDxOQaNkAAAAASUVORK5CYII=\n", 34 | "text/plain": [ 35 | "" 36 | ] 37 | }, 38 | "execution_count": null, 39 | "metadata": {}, 40 | "output_type": "execute_result" 41 | } 42 | ], 43 | "source": [ 44 | "# here, I simulate your segmentation pipeline \n", 45 | "# but you should obviously use your own instead\n", 46 | "from pyannote.audio import Pipeline\n", 47 | "import torch\n", 48 | "pipeline = Pipeline.from_pretrained(\"pyannote/speaker-diarization-3.1\").to(torch.device('mps'))\n", 49 | "segmentation = pipeline(WAV).relabel_tracks()\n", 50 | "segmentation" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "id": "35a0ac7c", 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "image/png": "iVBORw0KGgoAAAANSUhEUgAABiYAAADyCAYAAADJJ33UAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy89olMNAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAbqklEQVR4nO3dfZAV5Z0v8O8BYRxgeBmQYUZwZLPItaLXl7jXRb2iq2K8Zkt82SCarMTdqKtuJFvLpqJZhahYmtXdiuYmVKVMKsZE2cRAajVljAhoqFCGxJiXKja4IiTAxY28DgMT5dw/LCdBFAYYnzMwn0/V1Mzp7tP9OzNPP/3M+Z7urlSr1WoAAAAAAAAK6FPrAgAAAAAAgN5DMAEAAAAAABQjmAAAAAAAAIoRTAAAAAAAAMUIJgAAAAAAgGIEEwAAAAAAQDGCCQAAAAAAoBjBBAAAAAAAUIxgAgAAAAAAKEYwAQAAAAAAFCOYAAAAAAAAihFMAAAAAAAAxQgmAAAAAACAYgQTAAAAAABAMYIJAAAAAACgGMFEDUybNi2TJ0+udRkcBLQVAAAAAOBQI5g4hGzZsiXTp09Pa2tr6uvrc9ppp+X555/fZZlqtZqZM2empaUl9fX1Oeuss/LLX/6yRhVTK11pK4899ljOP//8jBgxIpVKJS+88EJtigUAAAAADimHdfcK2zdt7+5V7lH9kMOLbq8n+v3vf59+/frlb//2b/OLX/wiDz30UFpaWvL1r3895557bn71q1/lyCOPTJLcc889ue+++/LVr341xxxzTO64446cd955Wb58eRoaGsoX3/Zq2e0NPKLs9nqYfWkrbW1tOf300/NXf/VX+fjHP17jygEAAACAQ0WlWq1Wu3OFcy76eneubq+unf+RfX7Ot771rcyaNSsrVqzIgAEDctJJJ2X+/Pm54YYbsnHjxpx00kn5whe+kO3bt2fq1Km5//77079//yRvnnHwuc99Ll/60peydu3aHHPMMfnnf/7nXHbZZUmSN954I9dcc00WLFiQdevW5aijjsr111+fm266qXP706ZNy8aNGzNv3rwkybJly3LBBRfkpptuyi233JJNmzZlxowZmTdvXrZv355TTjkl//qv/5oTTjghSTJz5szMmzcvn/jEJ3LHHXdk5cqVaWtrS0NDQ+bPn58LL7ywc1snnnhiPvShD+WOO+5ItVpNS0tLpk+fnk996lNJkh07dqSpqSl33313rr322v36GxyQmZXC29u35t5b28ofW7lyZcaOHZuf/vSnOfHEE/f1Nw4AAAAAsItuP2Oip1u7dm2mTp2ae+65JxdffHG2bNmSZ599Nm/lM08//XQOP/zwPPPMM1m5cmU+9rGPZcSIEbnzzjuTJJ/5zGfy2GOP5Ytf/GLGjRuXxYsX5yMf+UiOOOKITJw4MTt37szo0aMzd+7cjBgxIkuWLMk111yT5ubmfPjDH96tnoULF2by5Mm566678nd/93epVqu58MIL09jYmCeeeCJDhgzJnDlzcs455+Q///M/09jYmCRZsWJF5s6dm29/+9vp27dvXn/99bzxxhs5/PBdzyCpr6/Pc889lyR5+eWXs27dukyaNKlzfl1dXSZOnJglS5bUJpjowXpzWwEAAAAAeK/0ymDi9ddfzyWXXJLW1tYkyfHHH985v3///nnwwQczYMCAvP/9789nP/vZzJgxI7fffnva29tz3333ZcGCBZkwYUKS5E/+5E/y3HPPZc6cOZk4cWL69euXWbNmda5v7NixWbJkSebOnbvbm83z58/PRz/60cyZMydTp05NkjzzzDP5+c9/nvXr16euri5J8i//8i+ZN29evvWtb+Waa65JknR0dOShhx7KEUf84dJEEyZMyO23355jjz02TU1N+eY3v5mlS5dm3LhxSZJ169YlSZqamnapo6mpKa+88sqB/3IPMb25rQAAAAAAvFd6XTBxwgkn5Jxzzsnxxx+f888/P5MmTcpll12WYcOGdc4fMGBA5/ITJkzI1q1bs3r16qxfvz7bt2/Peeedt8s6Ozo6ctJJJ3U+/tKXvpQvf/nLeeWVV9Le3p6Ojo7dLoGzdOnS/Md//Ef+/d//PRdffHHn9GXLlmXr1q0ZPnz4Lsu3t7fnpZde6nzc2tq6yxvNSfLQQw/l6quvzpFHHpm+ffvm5JNPzhVXXJGf/OQnuyxXqex6+aRqtbrbNLQVAAAAAID3QrcHE3/9tcu6e5Xdqm/fvnnqqaeyZMmSfP/738/999+fW265JUuXLt3j8yqVSnbu3JkkefzxxztvEPyWtz6xPnfu3Hzyk5/MvffemwkTJqShoSGf+9zndlv/+973vgwfPjwPPvhgLrzwws77EuzcuTPNzc1ZuHDhbjUMHTq08+eBAwfuNv9973tfFi1alLa2tmzevDnNzc2ZMmVKxo4dmyQZNWpUkjfPnGhubu583vr163c7i6KYGetrs90u6M1tBQAAAADgvdLtwUT9kMP3vlCNVSqVnH766Tn99NNz6623prW1Nd/5zneSJD/72c/S3t6e+vr6JMmPfvSjDBo0KKNHj86wYcNSV1eXVatWZeLEie+47meffTannXZarr/++s5pf/zp9beMGDEijz32WM4666xMmTIlc+fOTb9+/XLyySdn3bp1Oeyww3L00Ufv1+sbOHBgBg4cmA0bNuTJJ5/MPffck+TNSwWNGjUqTz31VOen9js6OrJo0aLcfffd+7WtAzbwiL0vU0O9ta0AAAAAALxXet2lnJYuXZqnn346kyZNysiRI7N06dK8+uqrOfbYY/Piiy+mo6Mjf/M3f5PPfOYzeeWVV3LbbbflxhtvTJ8+fdLQ0JB//Md/zCc/+cns3LkzZ5xxRjZv3pwlS5Zk0KBBueqqq/Knf/qn+drXvpYnn3wyY8eOzUMPPZTnn3/+HT+JPnLkyCxYsCBnn312pk6dmkceeSTnnntuJkyYkMmTJ+fuu+/O+PHjs2bNmjzxxBOZPHlyTjnllHd9bU8++WSq1WrGjx+fFStWZMaMGRk/fnw+9rGPJXnzTfbp06dn9uzZGTduXMaNG5fZs2dnwIABueKKK96z3/nBqje3lSR57bXXsmrVqqxZsyZJsnz58iRvnnnz1tk3AAAAAAD7qtcFE4MHD87ixYvzb//2b9m8eXNaW1tz77335oILLsijjz6ac845J+PGjcuZZ56ZHTt25PLLL8/MmTM7n3/77bdn5MiRueuuu/Jf//VfGTp0aE4++eTcfPPNSZLrrrsuL7zwQqZMmZJKpZKpU6fm+uuvz/e+9713rGfUqFFZsGBBzjrrrFx55ZX5xje+kSeeeCK33HJLrr766rz66qsZNWpUzjzzzL1ebmnTpk359Kc/nd/85jdpbGzMpZdemjvvvDP9+vXrXOaf/umf0t7enuuvvz4bNmzIqaeemu9///tpaGg48F/uIaa3t5Xvfve7uwQVl19+eZLktttu2+V1AgAAAADsi0q1Wq3WuoieYtq0adm4cWPmzZtX61Lo4bQVAAAAAID906fWBQAAAAAAAL2HYAIAAAAAACjGpZwAAAAAAIBinDEBAAAAAAAUI5gAAAAAAACKEUwAAAAAAADFHLa/T9y5c2fWrFmThoaGVCqV7qwJAAAAAAA4yFSr1WzZsiUtLS3p0+fdz4vY72BizZo1GTNmzP4+HQAAAAAAOAStXr06o0ePftf5+x1MNDQ0dG5g8ODB+7saAAAAAADgELB58+aMGTOmMz94N/sdTLx1+abBgwcLJgAAAAAAgCTZ6+0f3PwaAAAAAAAoRjABAAAAAAAUI5gAAAAAAACKEUwAAAAAAADFCCYAAAAAAIBiBBMAAAAAAEAxggkAAAAAAKAYwQQAAAAAAFCMYAIAAAAAAChGMAEAAAAAABQjmAAAAAAAAIoRTAAAAAAAAMUIJgAAAAAAgGIEEwAAAAAAQDGCCQAAAAAAoBjBBAAAAAAAUIxgAgAAAAAAKEYwAQAAAAAAFCOYAAAAAAAAihFMAAAAAAAAxQgmAAAAAACAYgQTAAAAAABAMYIJAAAAAACgGMEEAAAAAABQjGACAAAAAAAoRjABAAAAAAAUI5gAAAAAAACKEUwAAAAAAADFCCYAAAAAAIBiBBMAAAAAAEAxggkAAAAAAKAYwQQAAAAAAFCMYAIAAAAAAChGMAEAAAAAABQjmAAAAAAAAIoRTAAAAAAAAMUIJgAAAAAAgGIEEwAAAAAAQDGCCQAAAAAAoBjBBAAAAAAAUMwBBxNtr23rjjq65KVfvZSP/J+P5hM3fCJr16597ze4ZW3yzMw3vx+k2latzI9nfz5tq1bWuhT2Ytvql7Lm/uuybfVLtS6l5t7ebtte25Yff/NnRfsb6NUOgeMf9Epd2HcdU6Fn65Z91HEcgN7KMfCgcsDBRPvG7d1RR5es/PUrefh7X8/9//f+csHEolkHdWPetua3Wba0MdvW/LbWpbAXO9a+kpbfzcmOta/UupSae3u73bahPcse+Xm2bWivcWXQSxwCxz/olbqw7zqmQs/WLfuo4zgAvZVj4EHFpZwAAAAAAIBiBBMAAAAAAEAxggkAAAAAAKCYww50BTvaOtK+qcx9Jjraf19kO7vZviFpe7U22z5QO7a8+a3tjWJ/J/ZPZ/vu2Hjwtrfu8rZ2u2NrR40Lgl7qYD7+QW+0fUOXF92xtdwYHui6bh33Oo4D0Nvsw3iY2jvgYOLJOxelvl99d9SyV6s2riyynd187dzabLc7bGtNckce//yqJKtqXQ17MKJ+ZS79H8mwH1ya/KDW1dSYdgs9w8F8/AP26PFbn651CcB7zXEcAOjBXMoJAAAAAAAoRjABAAAAAAAUI5gAAAAAAACKOeB7TJx/y8QcfdxR3VHLXi383qJkcZFN7eqvf5A0/c8abLgb/HhZsvy/c+EnjsrwU/5XrathDza/sCh5Jtlw7rcz7KT/Xetyautt7fZ3Kze4FjbUwsF8/IPe6P+92OVryl/42XMy/Ohh73FBwL7q1nGv4zgAvc0+jIepvQMOJuoG9k/9kMO7o5a96l/fr8h2dnP4sGTgEbXZ9oGqa0jy36kb2LfY34n9s/2t9t1/6MHb3rrL29pt3aD+ta4IeqeD+fgHvdHhXQ8a6gaVG8MDXdet417HcQB6m30YD1N7LuUEAAAAAAAUI5gAAAAAAACKEUwAAAAAAADFCCYAAAAAAIBiDvjm1/VDy9007+hxrbnygo+kceywNDc3v/cbbGhOJt725veD1ICWI/OBU5dlQMsZtS6Fvahrbs2a4ddmaHNrrUupube32wHD6vOBy4/PgGH1Na4MeolD4PgHvVIX9l3HVOjZumUfdRwHoLdyDDyoVKrVanV/nrh58+YMGTIkmzZtyuDBg7u7LgAAAAAA4CDS1dzApZwAAAAAAIBiBBMAAAAAAEAxggkAAAAAAKAYwQQAAAAAAFCMYAIAAAAAAChGMAEAAAAAABQjmAAAAAAAAIoRTAAAAAAAAMUIJgAAAAAAgGIEEwAAAAAAQDGCCQAAAAAAoBjBBAAAAAAAUIxgAgAAAAAAKEYwAQAAAAAAFCOYAAAAAAAAihFMAAAAAAAAxQgmAAAAAACAYgQTAAAAAABAMYIJAAAAAACgGMEEAAAAAABQjGACAAAAAAAoRjABAAAAAAAUI5gAAAAAAACKEUwAAAAAAADFCCYAAAAAAIBiBBMAAAAAAEAxggkAAAAAAKAYwQQAAAAAAFCMYAIAAAAAAChGMAEAAAAAABQjmAAAAAAAAIoRTAAAAAAAAMUIJgAAAAAAgGIEEwAAAAAAQDGCCeAdtb22LT/+5s/S9tq2WpcC9BL6HegZunNftF8DABzcjOfYV11tK4IJ4B1t29CeZY/8PNs2tNe6FKCX0O9Az9Cd+6L9GgDg4GY8x75q37i9S8sJJgAAAAAAgGIEEwAAAAAAQDGH1boAoGfbsbUj7Zu6dgoWwIHYsbWj1iUAf6Q7xgD2awCAQ4P3h+iqHW1d+x9AMAHs0eO3Pl3rEgCAGjAGAADgLcaGdFX777t2PxKXcgIAAAAAAIoRTAAAAAAAAMUIJgAAAAAAgGLcYwLYows/e06GHz2s1mUAvcDvVm5w3VLoQbpjDGC/BgA4NHh/iK5a+YtVyff2vpxgAtijukH9Uz/k8FqXAfQCdYP617oE4I90xxjAfg0AcGjw/hBdVTewa/8DuJQTAAAAAABQjGACAAAAAAAoRjABAAAAAAAUI5gAAAAAAACKEUwA72jAsPp84PLjM2BYfa1LAXoJ/Q70DN25L9qvAQAObsZz7Kv6oV27SXqlWq1W92cDmzdvzpAhQ7Jp06YMHjx4f1YBAAAAAAAcIrqaGzhjAgAAAAAAKEYwAQAAAAAAFCOYAAAAAAAAihFMAAAAAAAAxQgmAAAAAACAYgQTAAAAAABAMYIJAAAAAACgGMEEAAAAAABQjGACAAAAAAAoRjABAAAAAAAUI5gAAAAAAACKEUwAAAAAAADFCCYAAAAAAIBiBBMAAAAAAEAxggkAAAAAAKAYwQQAAAAAAFCMYAIAAAAAAChGMAEAAAAAABQjmAAAAAAAAIoRTAAAAAAAAMUIJgAAAAAAgGIEEwAAAAAAQDGCCQAAAAAAoBjBBAAAAAAAUIxgAgAAAAAAKEYwAQAAAAAAFCOYAAAAAAAAihFMAAAAAAAAxQgmAAAAAACAYgQTAAAAAABAMYIJAAAAAACgGMEEAAAAAABQjGACAAAAAAAoRjABAAAAAAAUI5gAAAAAAACKEUwAAAAAAADFCCYAAAAAAIBiBBMAAAAAAEAxggkAAAAAAKAYwQQAAAAAAFCMYAIAAAAAACjmsP19YrVaTZJs3ry524oBAAAAAAAOTm/lBW/lB+9mv4OJLVu2JEnGjBmzv6sAAAAAAAAOMVu2bMmQIUPedX6lurfo4l3s3Lkza9asSUNDQyqVyn4XCLy7zZs3Z8yYMVm9enUGDx5c63IAejR9JsC+0W8C7Bv9JsDeVavVbNmyJS0tLenT593vJLHfZ0z06dMno0eP3t+nA/tg8ODBBj0AXaTPBNg3+k2AfaPfBNizPZ0p8RY3vwYAAAAAAIoRTAAAAAAAAMUIJqAHq6ury2233Za6urpalwLQ4+kzAfaNfhNg3+g3AbrPft/8GgAAAAAAYF85YwIAAAAAAChGMAEAAAAAABQjmAAAAAAAAIoRTAAAAAAAAMUIJqAHWLx4cf7yL/8yLS0tqVQqmTdv3i7zq9VqZs6cmZaWltTX1+ess87KL3/5y9oUC1Bje+szp02blkqlssvXn//5n9emWIAau+uuu/Jnf/ZnaWhoyMiRIzN58uQsX758l2WMNQH+oCv9pvEmwIETTEAP0NbWlhNOOCEPPPDAO86/5557ct999+WBBx7I888/n1GjRuW8887Lli1bClcKUHt76zOT5IMf/GDWrl3b+fXEE08UrBCg51i0aFFuuOGG/OhHP8pTTz2V119/PZMmTUpbW1vnMsaaAH/QlX4zMd4EOFCVarVarXURwB9UKpV85zvfyeTJk5O8+Qm2lpaWTJ8+PZ/61KeSJDt27EhTU1PuvvvuXHvttTWsFqC23t5nJm9+gm3jxo27nUkBQPLqq69m5MiRWbRoUc4880xjTYC9eHu/mRhvAnQHZ0xAD/fyyy9n3bp1mTRpUue0urq6TJw4MUuWLKlhZQA918KFCzNy5Mgcc8wx+fjHP57169fXuiSAHmHTpk1JksbGxiTGmgB78/Z+8y3GmwAHRjABPdy6deuSJE1NTbtMb2pq6pwHwB9ccMEFefjhh7NgwYLce++9ef755/MXf/EX2bFjR61LA6iparWaf/iHf8gZZ5yR4447LomxJsCevFO/mRhvAnSHw2pdANA1lUpll8fVanW3aQAkU6ZM6fz5uOOOyymnnJLW1tY8/vjjueSSS2pYGUBt3XjjjXnxxRfz3HPP7TbPWBNgd+/WbxpvAhw4Z0xADzdq1Kgk2e0Ta+vXr9/tk20A7K65uTmtra359a9/XetSAGrm7//+7/Pd7343zzzzTEaPHt053VgT4J29W7/5Tow3AfadYAJ6uLFjx2bUqFF56qmnOqd1dHRk0aJFOe2002pYGcDB4Xe/+11Wr16d5ubmWpcCUFy1Ws2NN96Yxx57LAsWLMjYsWN3mW+sCbCrvfWb78R4E2DfuZQT9ABbt27NihUrOh+//PLLeeGFF9LY2Jijjjoq06dPz+zZszNu3LiMGzcus2fPzoABA3LFFVfUsGqA2thTn9nY2JiZM2fm0ksvTXNzc1auXJmbb745I0aMyMUXX1zDqgFq44Ybbsg3vvGNzJ8/Pw0NDZ1nRgwZMiT19fWpVCrGmgB/ZG/95tatW403AbpBpVqtVmtdBPR2CxcuzNlnn73b9Kuuuipf/epXU61WM2vWrMyZMycbNmzIqaeemi984Qu73HwLoLfYU5/5xS9+MZMnT85Pf/rTbNy4Mc3NzTn77LNz++23Z8yYMTWoFqC23u0+EV/5ylcybdq0JDHWBPgje+s329vbjTcBuoFgAgAAAAAAKMY9JgAAAAAAgGIEEwAAAAAAQDGCCQAAAAAAoBjBBAAAAAAAUIxgAgAAAAAAKEYwAQAAAAAAFCOYAAAAAAAAihFMAAAAezVz5syceOKJtS4DAAA4BFSq1Wq11kUAAAC1U6lU9jj/qquuygMPPJAdO3Zk+PDhhaoCAAAOVYIJAADo5datW9f586OPPppbb701y5cv75xWX1+fIUOG1KI0AADgEORSTgAA0MuNGjWq82vIkCGpVCq7TXv7pZymTZuWyZMnZ/bs2WlqasrQoUMza9asvP7665kxY0YaGxszevToPPjgg7ts67e//W2mTJmSYcOGZfjw4bnooouycuXKsi8YAACoKcEEAACwXxYsWJA1a9Zk8eLFue+++zJz5sx86EMfyrBhw7J06dJcd911ue6667J69eokybZt23L22Wdn0KBBWbx4cZ577rkMGjQoH/zgB9PR0VHjVwMAAJQimAAAAPZLY2NjPv/5z2f8+PG5+uqrM378+Gzbti0333xzxo0bl09/+tPp379/fvjDHyZJHnnkkfTp0ydf/vKXc/zxx+fYY4/NV77ylaxatSoLFy6s7YsBAACKOazWBQAAAAen97///enT5w+fdWpqaspxxx3X+bhv374ZPnx41q9fnyRZtmxZVqxYkYaGhl3Ws3379rz00ktligYAAGpOMAEAAOyXfv367fK4Uqm847SdO3cmSXbu3JkPfOADefjhh3db1xFHHPHeFQoAAPQoggkAAKCIk08+OY8++mhGjhyZwYMH17ocAACgRtxjAgAAKOLKK6/MiBEjctFFF+XZZ5/Nyy+/nEWLFuWmm27Kb37zm1qXBwAAFCKYAAAAihgwYEAWL16co446KpdcckmOPfbYXH311Wlvb3cGBQAA9CKVarVarXURAAAAAABA7+CMCQAAAAAAoBjBBAAAAAAAUIxgAgAAAAAAKEYwAQAAAAAAFCOYAAAAAAAAihFMAAAAAAAAxQgmAAAAAACAYgQTAAAAAABAMYIJAAAAAACgGMEEAAAAAABQjGACAAAAAAAoRjABAAAAAAAU8/8BScTBSzcVCDwAAAAASUVORK5CYII=\n", 62 | "text/plain": [ 63 | "" 64 | ] 65 | }, 66 | "execution_count": null, 67 | "metadata": {}, 68 | "output_type": "execute_result" 69 | } 70 | ], 71 | "source": [ 72 | "# simulate oracle clustering by assigning to each segment\n", 73 | "# the label from reference coocurring the most.\n", 74 | "\n", 75 | "segmentation_with_oracle_clustering = segmentation.empty()\n", 76 | "for segment, track in segmentation.itertracks():\n", 77 | " label = reference.argmax(segment)\n", 78 | " segmentation_with_oracle_clustering[segment, track] = label\n", 79 | "\n", 80 | "segmentation_with_oracle_clustering" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "id": "8b1eac76", 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "# we should do better than that (see the two overlapping purple segments around time t=18s)\n", 91 | "# but, with the time we have left before the Interspeech deadline, that is our only option\n", 92 | "# I guess. Ideally, we should prevent two overlapping segments (from our segmentation) to\n", 93 | "# end up in the same cluster after oracle clustering." 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "id": "0d97481b", 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "data": { 104 | "text/plain": [ 105 | "0.05345995893223817" 106 | ] 107 | }, 108 | "execution_count": null, 109 | "metadata": {}, 110 | "output_type": "execute_result" 111 | } 112 | ], 113 | "source": [ 114 | "# now, we can compute a DER with oracle clustering\n", 115 | "# to compute a nice new Figure 7 (same format as Figures 5 and 6)\n", 116 | "# that could be cited in the conclusion to explain a bit better \n", 117 | "# the future work related to incremental clustering\n", 118 | "from pyannote.metrics.diarization import DiarizationErrorRate\n", 119 | "metric = DiarizationErrorRate()\n", 120 | "metric(reference, segmentation_with_oracle_clustering)" 121 | ] 122 | } 123 | ], 124 | "metadata": { 125 | "kernelspec": { 126 | "display_name": "Python 3 (ipykernel)", 127 | "language": "python", 128 | "name": "python3" 129 | } 130 | }, 131 | "nbformat": 4, 132 | "nbformat_minor": 5 133 | } 134 | -------------------------------------------------------------------------------- /notebooks/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": true, 7 | "editable": true 8 | }, 9 | "source": [ 10 | "* Diarization ([pyannote.metrics.diarization](pyannote.metrics.diarization.ipynb))\n", 11 | "* Identification ([pyannote.metrics.identification](pyannote.metrics.identification.ipynb))" 12 | ] 13 | } 14 | ], 15 | "metadata": { 16 | "kernelspec": { 17 | "display_name": "Python 3", 18 | "language": "python", 19 | "name": "python3" 20 | }, 21 | "language_info": { 22 | "codemirror_mode": { 23 | "name": "ipython", 24 | "version": 3 25 | }, 26 | "file_extension": ".py", 27 | "mimetype": "text/x-python", 28 | "name": "python", 29 | "nbconvert_exporter": "python", 30 | "pygments_lexer": "ipython3", 31 | "version": "3.4.3" 32 | } 33 | }, 34 | "nbformat": 4, 35 | "nbformat_minor": 0 36 | } 37 | -------------------------------------------------------------------------------- /notebooks/pyannote.metrics.identification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false, 8 | "deletable": true, 9 | "editable": true 10 | }, 11 | "outputs": [ 12 | { 13 | "name": "stdout", 14 | "output_type": "stream", 15 | "text": [ 16 | "Populating the interactive namespace from numpy and matplotlib\n" 17 | ] 18 | } 19 | ], 20 | "source": [ 21 | "%pylab inline" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": { 27 | "deletable": true, 28 | "editable": true 29 | }, 30 | "source": [ 31 | "# Identification evaluation metrics" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": { 38 | "collapsed": false, 39 | "deletable": true, 40 | "editable": true 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "from pyannote.core import Annotation, Segment" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 3, 50 | "metadata": { 51 | "collapsed": false, 52 | "deletable": true, 53 | "editable": true 54 | }, 55 | "outputs": [ 56 | { 57 | "data": { 58 | "image/png": "iVBORw0KGgoAAAANSUhEUgAABHQAAACsCAYAAAAaLvvnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADPxJREFUeJzt3X+sJWdZB/DvQxeksSCWbQrZ1qxWsDaVrnYhCzSmbVCq\nJW2JxNCg6R8kQIKKiaJgYkpNiJqopX8YDQIWE7UQK6URYktKBUwsdBe2LQWrFEvqpnRFIdKIFdrH\nP86U3u6v3rN7t3Pfez+fZHNm5s7sPmf32fec8z0z71R3BwAAAIBxPG3uAgAAAABYjkAHAAAAYDAC\nHQAAAIDBCHQAAAAABiPQAQAAABiMQAcAAABgMAIdAAAAgMEIdAAAAAAGI9ABAAAAGIxABwAAAGAw\nQwY6VXVZVXVVnTl3LWupqh6pqr1VdUdVfbaqXjZ3TWulqp5XVddV1b1VtaeqPlpVL5y7LgAAABjR\nkIFOksuT/OP0uJF8q7t3dPc5Sd6e5HfnLmgtVFUl+VCSf+juM7r73Cye36nzVgYAAABjGi7QqaqT\nkpyX5PVJXjtzOcfTs5N8fe4i1sgFSb7d3X/62IbuvqO7PzVjTQAAADCsLXMXcBQuTfL33f0vVfWf\nVXVud++Zu6g1cmJV7U3yzCTPT3LhzPWslbOTbJR/IwAAAJjdMQU6+7ad3mtVSJJs23d/rWK3y5Nc\nMy1fN62veViw68qb1vS53XbVK1fz3L7V3TuSpKpemuQvqurs7l6zWi654eI1fV43XvaR1TwvAAAA\nYA0NdYZOVZ2cxVkrP1ZVneSEJF1Vb13L0GM96O5/qqqtSU5Jsn/ueo7R3UleM3cRAAAAsFHUSDlI\nVb0hybnd/cYV2z6R5Le7+5PzVbY2quqh7j5pWj4zi4mfT+3uR+at7NhMkyLfluS93f3uaduLknyf\neXQAAABgeaNNinx5FndLWun6bJy7XZ043bZ8b5IPJLli9DAnSaazp16d5BXTbcvvzuIOXl+dtzIA\nAAAY01Bn6AAAAAAw3hk6AAAAAJueQAcAAABgMAIdAAAAgMEIdAAAAAAGI9ABAAAAGMyWZXbeunVr\nb9++/TiVAgAAALD57Nmz52vdfcoyxywV6Gzfvj27d+9erioAAAAADquqvrLsMS65AgAAABiMQAcA\nAABgMAIdAAAAgMEIdAAAAAAGI9ABAAAAGIxABwAAAGAwAh0AAACAwQh0AAAAAAYj0AEAAAAYjEAH\nAAAAYDACHQAAAIDBCHQAAAAABiPQAQAAABiMQAcAAABgMAIdAAAAgMEIdAAAAAAGI9ABAAAAGIxA\nBwAAAGAwAh0AAACAwQh0AAAAAAYj0AEAAAAYjEAHAAAAYDACHQAAAIDBCHQAAAAABiPQAQAAABiM\nQAcAAABgMAIdAAAAgMEIdAAAAAAGI9ABAAAAGIxABwAAAGAwAh0AAACAwQh0AAAAAAYj0AEAAAAY\njEAHAAAAYDACHQAAAIDBCHQAAAAABiPQAQAAABiMQAcAAABgMAIdAAAAgMEIdAAAAAAGI9ABAAAA\nGIxABwAAAGAwAh0AAACAwQh0AAAAAAYj0AEAAAAYjEAHAAAAYDACHQAAAIDBLBXofPvOO49XHcAm\ntW/b6dm37fS5yzhudl15U3ZdedPcZcCm4//euDb668JTzd/n2rrkhotzyQ0Xz10GsMEc7bjiDB0A\nAACAwQh0AAAAAAYj0AEAAAAYzJZlD3ANLsDyzOUBsBzvOVnPzKMDrAfO0AEAAAAYzNJn6Gzbd//x\nqAPYpDbLN7C3XfXKuUuATcVZcePznnNtbJbX2afajZd9ZO4SgA3EXa4AAAAANgmBDgAAAMBgBDoA\nAAAAgxHoAAAAAAymunvVO+/cubN37959HMsBAAAA2Fyqak9371zmGGfoAAAAAAxGoAMAAAAwGIEO\nAAAAwGAEOgAAAACDEegAAAAADEagAwAAADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiB\nDgAAAMBgBDoAAAAAgxHoAAAAAAxGoAMAAAAwGIEOAAAAwGAEOgAAAACDEegAAAAADEagAwAAADAY\ngQ4AAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoAAAAAAxGoAMAAAAw\nGIEOAAAAwGAEOgAAAACDEegAAAAADEagAwAAADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAA\nMBiBDgAAAMBgBDoAAAAAgxHoAAAAAAxGoAMAAAAwGIEOAAAAwGAEOgAAAACDEegAAAAADEagAwAA\nADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAg6nuXv3OVd9Mcs/x\nK4cNZGuSr81dBMPQL6yWXmEZ+oXV0issQ7+wWnqFZfxIdz9rmQO2LPkH3NPdO5c8hk2oqnbrFVZL\nv7BaeoVl6BdWS6+wDP3CaukVllFVu5c9xiVXAAAAAIMR6AAAAAAMZtlA593HpQo2Ir3CMvQLq6VX\nWIZ+YbX0CsvQL6yWXmEZS/fLUpMiAwAAADA/l1wBAAAADGZVgU5VXVRV91TVl6rqbce7KMZWVfdV\n1V1VtfdoZupmY6uq91XV/qr6/IptJ1fVx6rqX6fH75+zRtaHw/TKO6pq3zS+7K2qn52zRtaHqjq9\nqm6tqi9U1d1V9ZZpu7GFgxyhX4wvPEFVPbOqPlNVd0y9ctW0/Qer6tPTZ6MPVNUz5q6V+R2hX66t\nqn9bMbbsmLtW1oeqOqGqPldVfzetLz22PGmgU1UnJPnjJD+T5Kwkl1fVWcdaPBveBd29w236OIRr\nk1x0wLa3Jbmlu1+Q5JZpHa7Nwb2SJFdP48uO7v7oU1wT69N3kvxad5+VZFeSN0/vVYwtHMrh+iUx\nvvBEDye5sLvPSbIjyUVVtSvJ72fRKz+c5OtJXj9jjawfh+uXJHnrirFl73wlss68JckXV6wvPbas\n5gydlyT5Und/ubv/L8l1SS49imIB0t2fTPJfB2y+NMn7p+X3J7nsKS2KdekwvQIH6e4Huvuz0/I3\ns3hztC3GFg7hCP0CT9ALD02rT59+dZILk/zNtN3YQpIj9gscpKpOS3JxkvdM65WjGFtWE+hsS3L/\nivV/jxc9jqyT3FxVe6rqDXMXwxBO7e4HpuWvJjl1zmJY936pqu6cLslyCQ1PUFXbk/x4kk/H2MKT\nOKBfEuMLB5guidibZH+SjyW5N8k3uvs70y4+G/FdB/ZLdz82trxzGluurqrvmbFE1o93JfmNJI9O\n68/NUYwtJkXmeDivu38ii8v03lxVPzl3QYyjF7fe820Gh/MnSc7I4lTmB5L84bzlsJ5U1UlJrk/y\nq9393yt/ZmzhQIfoF+MLB+nuR7p7R5LTsrhy4cyZS2IdO7BfqursJG/Pom9enOTkJL85Y4msA1X1\nqiT7u3vPsf5eqwl09iU5fcX6adM2OKTu3jc97k/yoSxe/OBIHqyq5yfJ9Lh/5npYp7r7wenN0qNJ\n/izGFyZV9fQsPpz/ZXf/7bTZ2MIhHapfjC8cSXd/I8mtSV6a5DlVtWX6kc9GHGRFv1w0XebZ3f1w\nkj+PsYXk5Ukuqar7spjS5sIk1+QoxpbVBDq3J3nBNOPyM5K8NsmNR1M1G19VfW9VPeux5SQ/neTz\nRz4KcmOSK6blK5J8eMZaWMce+3A+eXWML+S7152/N8kXu/uPVvzI2MJBDtcvxhcOVFWnVNVzpuUT\nk/xUFnMu3ZrkNdNuxhaSHLZf/nnFFwuVxZwoxpZNrrvf3t2ndff2LPKVj3f363IUY0stzkB+kp0W\nt218V5ITkryvu995lLWzwVXVD2VxVk6SbEnyV/qFlarqr5Ocn2RrkgeTXJnkhiQfTPIDSb6S5Oe7\n22S4m9xheuX8LC6H6CT3JXnjijlS2KSq6rwkn0pyVx6/Fv23spgXxdjCExyhXy6P8YUVqupFWUxM\nekIWX4R/sLt/Z3q/e10Wl898LskvTGdfsIkdoV8+nuSUJJVkb5I3rZg8mU2uqs5P8uvd/aqjGVtW\nFegAAAAAsH6YFBkAAABgMAIdAAAAgMEIdAAAAAAGI9ABAAAAGIxABwAAAGAwW+YuAADgyVTVc5Pc\nMq0+L8kjSf5jWv+f7n7ZLIUBAMzEbcsBgKFU1TuSPNTdfzB3LQAAc3HJFQAwtKp6aHo8v6o+UVUf\nrqovV9XvVdXrquozVXVXVZ0x7XdKVV1fVbdPv14+7zMAAFieQAcA2EjOSfKmJD+a5BeTvLC7X5Lk\nPUl+edrnmiRXd/eLk/zc9DMAgKGYQwcA2Ehu7+4HkqSq7k1y87T9riQXTMuvSHJWVT12zLOr6qTu\nfugprRQA4BgIdACAjeThFcuPrlh/NI+/73lakl3d/b9PZWEAAGvJJVcAwGZzcx6//CpVtWPGWgAA\njopABwDYbH4lyc6qurOqvpDFnDsAAENx23IAAACAwThDBwAAAGAwAh0AAACAwQh0AAAAAAYj0AEA\nAAAYjEAHAAAAYDACHQAAAIDBCHQAAAAABiPQAQAAABjM/wNLTDmmMc0M8QAAAABJRU5ErkJggg==\n", 59 | "text/plain": [ 60 | "" 61 | ] 62 | }, 63 | "execution_count": 3, 64 | "metadata": {}, 65 | "output_type": "execute_result" 66 | } 67 | ], 68 | "source": [ 69 | "reference = Annotation()\n", 70 | "reference[Segment(0, 10)] = 'A'\n", 71 | "reference[Segment(12, 20)] = 'B'\n", 72 | "reference[Segment(24, 27)] = 'A'\n", 73 | "reference[Segment(30, 40)] = 'C'\n", 74 | "reference" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 4, 80 | "metadata": { 81 | "collapsed": false, 82 | "deletable": true, 83 | "editable": true 84 | }, 85 | "outputs": [ 86 | { 87 | "data": { 88 | "image/png": "iVBORw0KGgoAAAANSUhEUgAABHQAAACsCAYAAAAaLvvnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADY5JREFUeJzt3W2MpWdZB/D/RQtCLIjQ5cVtzQqCtUFYZSHlJVoasdWS\ntkRiaMDwgaQlAYUEkZfEFEyIkigvH4wWedWChYi2jRCBAAFUCuzClvIiCliCm9oVhUijoLSXH85T\nOp196ZzZ2Xnmnvn9ks08zzPn7Fwn59p7z/nPfd+nujsAAAAAjOMecxcAAAAAwHIEOgAAAACDEegA\nAAAADEagAwAAADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAgxky\n0KmqS6qqq+qsuWvZSFV1W1UdrKobquozVfXEuWvaKFX1kKq6uqq+WlUHqup9VfXIues6USuesy9M\nz9uLq2rIf1cAAACMo7p77hqWVlXvSvJjST7c3VfMXc9Gqapbu/u06fj8JK/o7l+YuawTVlWV5B+S\nvL27/2S69pgk9+vuj89a3Ala9Zw9KMk7k/z9dupLAAAAtp7hZhJU1WlJnpzkuUmeOXM5J9P9knxr\n7iI2yFOS/N8dYU6SdPcNo4c5q3X34SSXJXnBFGIBAADASXHq3AWsw8VJ/ra7/6mq/qOqHtvdB+Yu\naoPcp6oOJrl3kocmOW/mejbKo5Jsl+fouLr7a1V1SpIHJbll7noAAADYnk4o0Dm0+8wNXa+1+9A3\n1jKr4dIkb5iOr57ONzwsOOeK92/oY7v+Veev5bH9T3fvTZKqekKSP6uqR/UGrou76JoLN/RxXXfJ\ne7fMTJQrL75qQx/b5dc+e8s8NgAAAFhpqBk6VfWALGat/ExVdZJTknRVvWQjQ4+toLs/UVWnJ9mV\n5PDc9ZygLyR5xtxFbIaqeliS2zL+cwYAAMAWNtSmyFV1WZLHdvflK659NMnvdPfH5qtsY6zaYPes\nJH+X5MHdfdu8lZ2YaT+Z65O8ubvfOF17dJIfGX0fnVXP2a4k70jyCZsiAwAAcDINNUMni+VVr1l1\n7T3T9eEDndy5h06SVJLnjB7mJEl3d1U9Pcnrq+qlSb6b5KYkL5q1sI1xx3N2zyTfT/LnSV47b0kA\nAABsd0PN0AEAAABgwI8tBwAAANjpBDoAAAAAgxHoAAAAAAxGoAMAAAAwGIEOAAAAwGCW+tjy008/\nvffs2XOSSgEAAADYeQ4cOPDN7t61zH2WCnT27NmT/fv3L1cVAAAAAMdUVV9f9j6WXAEAAAAMRqAD\nAAAAMBiBDgAAAMBgBDoAAAAAgxHoAAAAAAxGoAMAAAAwGIEOAAAAwGAEOgAAAACDEegAAAAADEag\nAwAAADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoAAAAAAxG\noAMAAAAwGIEOAAAAwGAEOgAAAACDEegAAAAADEagAwAAADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAM\nRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoAAAAAAxGoAMAAAAwGIEOAAAAwGAEOgAAAACDEegAAAAA\nDEagAwAAADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoAAAA\nAAxGoAMAAAAwGIEOAAAAwGAEOgAAAACDEegAAAAADEagAwAAADAYgQ4AAADAYAQ6k0O7z8yh3WfO\nXQbAul158VW58uKrNu3nnXPF+3POFe/ftJ8Hy9ju/XnRNRfmomsunLsMANgUm/06d7Ot97EJdAAA\nAAAGI9ABAAAAGIxABwAAAGAwp85dwFZjHx1gWPt+L8n61+Aube+uJNnW+5TAVmcfHQB2ggtzaZJN\nfJ07CDN0AAAAAAZjhs4quw99Y+4SANZn+o3F5dc+e1N+3FunmTnXv+r8Tfl5sIydMnPsukveO3cJ\nAHDSXfnWzX2du9l8yhUAAADADiHQAQAAABiMQAcAAABgMAIdAAAAgMFUd6/5xvv27ev9+/efxHIA\nAAAAdpaqOtDd+5a5jxk6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoAAAAAAxG\noAMAAAAwGIEOAAAAwGAEOgAAAACDEegAAAAADEagAwAAADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAM\nRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoAAAAAAxGoAMAAAAwGIEOAAAAwGAEOgAAAACDEegAAAAA\nDEagAwAAADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoAAAA\nAAxGoAMAAAAwGIEOAAAAwGAEOgAAAACDEegAAAAADEagAwAAADAYgQ4AAADAYAQ6AAAAAIMR6AAA\nAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoAAAAAAxGoAMAAAAwGIEOAAAAwGAEOgAAAACDEegA\nAAAADEagAwAAADAYgQ4AAADAYKq7137jqu8k+fLJK4dt5PQk35y7CIahX1grvcIy9AtrpVdYhn5h\nrfQKy/ip7r7vMnc4dckf8OXu3rfkfdiBqmq/XmGt9AtrpVdYhn5hrfQKy9AvrJVeYRlVtX/Z+1hy\nBQAAADAYgQ4AAADAYJYNdN54UqpgO9IrLEO/sFZ6hWXoF9ZKr7AM/cJa6RWWsXS/LLUpMgAAAADz\ns+QKAAAAYDBrCnSq6oKq+nJVfaWqXnayi2JsVXVTVd1YVQfXs1M321tVvaWqDlfV51dce0BVfbCq\n/nn6+qNz1sjWcIxeeWVVHZrGl4NV9Stz1sjWUFVnVtVHquqLVfWFqnrhdN3YwhGO0y/GF+6iqu5d\nVZ+qqhumXnnVdP0nquqT03ujd1XVveaulfkdp1/eVlX/smJs2Tt3rWwNVXVKVX22qv5mOl96bLnb\nQKeqTknyR0l+OcnZSS6tqrNPtHi2vad0914f08dRvC3JBauuvSzJh7r7EUk+NJ3D23JkryTJ66bx\nZW93v2+Ta2Jr+n6SF3f32UnOSfL86bWKsYWjOVa/JMYX7up7Sc7r7sck2Zvkgqo6J8lrsuiVn0zy\nrSTPnbFGto5j9UuSvGTF2HJwvhLZYl6Y5EsrzpceW9YyQ+fxSb7S3V/r7v9NcnWSi9dRLEC6+2NJ\n/nPV5YuTvH06fnuSSza1KLakY/QKHKG7b+7uz0zH38nixdHuGFs4iuP0C9xFL9w6nd5z+tNJzkvy\nl9N1YwtJjtsvcISqOiPJhUneNJ1X1jG2rCXQ2Z3kGyvO/zX+0+P4OskHqupAVV02dzEM4cHdffN0\n/G9JHjxnMWx5L6iqz01Lsiyh4S6qak+Sn03yyRhbuBur+iUxvrDKtCTiYJLDST6Y5KtJvt3d359u\n4r0RP7C6X7r7jrHl1dPY8rqq+qEZS2TreH2S305y+3T+wKxjbLEpMifDk7v757JYpvf8qvr5uQti\nHL346D2/zeBY/jjJw7OYynxzkj+ctxy2kqo6Lcl7kryou/9r5feMLax2lH4xvnCE7r6tu/cmOSOL\nlQtnzVwSW9jqfqmqRyV5eRZ987gkD0jy0hlLZAuoqqclOdzdB07071pLoHMoyZkrzs+YrsFRdfeh\n6evhJH+dxX9+cDy3VNVDk2T6enjmetiiuvuW6cXS7Un+NMYXJlV1zyzenL+ju/9qumxs4aiO1i/G\nF46nu7+d5CNJnpDk/lV16vQt7404wop+uWBa5tnd/b0kb42xheRJSS6qqpuy2NLmvCRvyDrGlrUE\nOp9O8ohpx+V7JXlmkuvWUzXbX1X9cFXd947jJL+U5PPHvxfkuiTPmY6fk+TaGWthC7vjzfnk6TG+\nkB+sO39zki9192tXfMvYwhGO1S/GF1arql1Vdf/p+D5JnprFnksfSfKM6WbGFpIcs1/+ccUvFiqL\nPVGMLTtcd7+8u8/o7j1Z5Csf7u5nZR1jSy1mIN/NjRYf2/j6JKckeUt3v3qdtbPNVdXDspiVkySn\nJnmnfmGlqvqLJOcmOT3JLUmuSHJNkncn+fEkX0/ya91tM9wd7hi9cm4WyyE6yU1JLl+xRwo7VFU9\nOcnHk9yYO9eivyKLfVGMLdzFcfrl0hhfWKGqHp3FxqSnZPGL8Hd39+9Or3evzmL5zGeTPHuafcEO\ndpx++XCSXUkqycEkz1uxeTI7XFWdm+S3uvtp6xlb1hToAAAAALB12BQZAAAAYDACHQAAAIDBCHQA\nAAAABiPQAQAAABiMQAcAAABgMKfOXQAAwN2pqgcm+dB0+pAktyX59+n8v7v7ibMUBgAwEx9bDgAM\npapemeTW7v6DuWsBAJiLJVcAwNCq6tbp67lV9dGquraqvlZVv19Vz6qqT1XVjVX18Ol2u6rqPVX1\n6enPk+Z9BAAAyxPoAADbyWOSPC/JTyf59SSP7O7HJ3lTkt+YbvOGJK/r7scl+dXpewAAQ7GHDgCw\nnXy6u29Okqr6apIPTNdvTPKU6fgXk5xdVXfc535VdVp337qplQIAnACBDgCwnXxvxfHtK85vz52v\ne+6R5Jzu/u5mFgYAsJEsuQIAdpoP5M7lV6mqvTPWAgCwLgIdAGCn+c0k+6rqc1X1xSz23AEAGIqP\nLQcAAAAYjBk6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoAAAAAAxGoAMAAAAw\nGIEOAAAAwGD+H/JZbvqSS5q9AAAAAElFTkSuQmCC\n", 89 | "text/plain": [ 90 | "" 91 | ] 92 | }, 93 | "execution_count": 4, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "hypothesis = Annotation()\n", 100 | "hypothesis[Segment(2, 13)] = 'A'\n", 101 | "hypothesis[Segment(13, 14)] = 'D'\n", 102 | "hypothesis[Segment(14, 20)] = 'B'\n", 103 | "hypothesis[Segment(22, 38)] = 'C'\n", 104 | "hypothesis[Segment(38, 40)] = 'D'\n", 105 | "hypothesis" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": { 111 | "deletable": true, 112 | "editable": true 113 | }, 114 | "source": [ 115 | "## Identification error rate" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 5, 121 | "metadata": { 122 | "collapsed": false, 123 | "deletable": true, 124 | "editable": true 125 | }, 126 | "outputs": [ 127 | { 128 | "name": "stdout", 129 | "output_type": "stream", 130 | "text": [ 131 | "IER = 0.516\n" 132 | ] 133 | } 134 | ], 135 | "source": [ 136 | "from pyannote.metrics.identification import IdentificationErrorRate\n", 137 | "identificationErrorRate = IdentificationErrorRate()\n", 138 | "print(\"IER = {0:.3f}\".format(identificationErrorRate(reference, hypothesis, uem=Segment(0, 40))))" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": { 144 | "deletable": true, 145 | "editable": true 146 | }, 147 | "source": [ 148 | "## Confusion matrix" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 6, 154 | "metadata": { 155 | "collapsed": false, 156 | "deletable": true, 157 | "editable": true 158 | }, 159 | "outputs": [ 160 | { 161 | "data": { 162 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWIAAAD0CAYAAAChFCyQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFgJJREFUeJzt3X2wZVV95vHvQ9PQgi+o7UCHJoIJQwadCHiLtJpJEZWI\nHYtOVZgZmImCpdV5kUSnUjPBpAIZq6bGTE0l0WBC3UJGnBgk6WjsWK0EFItYDq0XbIEGiQ3JSLct\nTYPy4gvQ9z7546xDjjfn3LMvd52z94XnU7Wq9z7nx17rHvr87uq111pbtomIiPYc1nYDIiKe7ZKI\nIyJalkQcEdGyJOKIiJYlEUdEtCyJOCKiZUnEERETIOm/SNot6Q5J10haNyo2iTgiojJJxwO/AczY\nfgWwBjh/VHwScUTEZBwOPEfS4cBRwDdHBSYRR0RUZnsf8L+BbwD7gYdt/+2o+MOn1bCIiC57488e\n7Qcfmm8Ue8ttj+8GfjDw0qzt2f6JpBcCW4CTgO8Afynpl2z/2bDrJRFHRAAHH5pn53UbG8Wu3XDP\nD2zPLBHyBuAfbD8AIOnjwGuAJOKIiNHMvBdqXewbwCZJRwHfB14PzI0KTiKOiAAMLFBnN0rbOyVt\nA24FDgFfAWZHxa8oEUt6EXAtcCLwj8B/sP3tIXHzwO3l9Bu2z11JvRERtRnzpJuNETe6nn0ZcFmT\n2JXOmrgE+Kztk4HPlvNhvm/7tFKShCOikxZwo1LbShPxFuDqcnw18AsrvF5ERCsMzONGpbaVJuJj\nbe8vx98Cjh0Rt07SnKSbJSVZR0QntdUjHjtGLOkG4Lghb/3O4IltSxrVwpfa3ifpZcDnJN1u+54h\ndW0FtgIcfZRe9RM/fsTYH+DZ4O9vO6rtJnTGk8ce3XYTOmPt/d9tuwmd8SjfPmj7JSu5hoH5lh4d\nNzYR237DqPck3S9pg+39kjYAB0ZcY1/5815JnwdOB/5FIi4TomcBZl65zl+67oRGP8Qz3Rt/5LS2\nm9AZ3/rPr2m7CZ1x3B9+se0mdMYN3vb/V3oNY56cQG+3iZUOTWwHLizHFwKfXBwg6YWSjizH64HX\nAneusN6IiLoM8w1LbStNxO8Dzpb0dXorSd4HIGlG0pUl5t8Ac5K+CtwIvM92EnFEdEpvHnGzUtuK\n5hHbfpDeipHFr88B7yjHXwT+7UrqiYiYPDGPWqk5K+siIig94naGiJOIIyL60iOOiGiRgSfdzhbt\nScQREfRX1qVHHBHRGiPmW3poURJxRESx4PSIIyJak6GJiIjWifncrIuIaI+BJ1nTSt1JxBERgN1e\nj7idWiMiOmgBNSrjSDpF0q6B8oikd4+KT484IoL+zbo6fVPbdwOnAUhaA+wDPjEqPok4IgKY4M26\n1wP32B65Z3IScUQE/SXOE7lZdz5wzVIBScQRESx7Zd16SXMD57PlCUM/RNIRwLnAe5a6WBJxRESx\n0Hxo4qDtmQZxbwJutX3/UkFJxBER1L1ZN+ACxgxLQBJxRARQhiYq7jUh6WjgbOCXx8UmEUdEFAsV\ne8S2vwu8uElsEnFEBL2VdROaNTFWEnFEBGWMOJv+RES0KxvDR0S0yCgbw0dEtC094oiIFpllLeio\nqkqtks6RdLekPZIuGfL+kZKuLe/vlHRijXojImoxvVkTTUptK07EZYu3D9JbyncqcIGkUxeFvR34\ntu0fB/4Q+P2V1hsRUds8alRqq9EjPhPYY/te208AHwO2LIrZAlxdjrcBr5fUzqh4RMQQtljwYY1K\nbTWueDxw38D53vLa0Bjbh4CHGbLiRNJWSXOS5h54cL5C0yIimpv3YY1KbZ16VJLtWdsztmde8uJ2\nVrhExLOTqfeopOWqMWtiH3DCwPnG8tqwmL2SDgdeADxYoe6IiCqMeHKhnQ5gjR7xl4GTJZ1UNkE+\nH9i+KGY7cGE5Pg/4nG1XqDsiopp5DmtUaltxj9j2IUkXA9cBa4CrbO+W9F5gzvZ24EPA/5W0B3iI\nXrKOiOiMVb+yzvYOYMei1y4dOP4B8O9r1BURMSk1t8Fcjqysi4gAbKpuDL8cScQREcWqHpqIiFjt\n+kuc25BEHBFBf9OfdnrEnVrQERHRnrpLnCUdI2mbpK9JukvSq0fFpkccEVFUXjX3fuAzts8rayyO\nGhWYRBwRQd1ZE5JeAPwMcFHv2n4CeGJUfBJxRAS9m3WHmi9xXi9pbuB81vbswPlJwAPA/5H0SuAW\n4F22vzvsYknEERHFMoYmDtqeWeL9w4EzgF+3vVPS+4FLgN8dFpybdRER/POsiSalgb3AXts7y/k2\neol5qCTiiIii1qwJ298C7pN0Snnp9cCdo+IzNBERAdC8t9vUrwMfLTMm7gXeNiowiTgign/eGL7a\n9exdwFLjyE9JIo6IoJeIDy1k97WIiFZl05+IiBat+o3hIyKeCSbxYNAmkogjIgCcoYmIiFa1uQ1m\nEnFEBP29JjJrIiKiVU6POCKiXblZFxHRIudmXURE+9oamqgyMi3pHEl3S9oj6ZIh718k6QFJu0p5\nR416IyLqEfMLhzUqta24RyxpDfBB4Gx6e3B+WdJ224u3fLvW9sUrrS8iYhJW+1OczwT22L63PJfp\nY8CWCteNiJge98aJm5TaaiTi44H7Bs73ltcW+0VJt5XHS59Qod6IiKoWUKNS27Ru1v0NcI3txyX9\nMnA18LrFQZK2AlsB1q19PpvP/o9Tal63/cjN32y7Cd2x6Yttt6Az1rz8lPFBzxZ3rPwSZnXfrNsH\nDPZwN5bXnmL7QduPl9MrgVcNu5DtWdsztmeOWHNUhaZFRDTV7Hl1kxhHrpGIvwycLOmk8kiQ84Ht\ngwGSNgycngvcVaHeiIiqFhbUqNS24qEJ24ckXQxcB6wBrrK9W9J7gTnb24HfkHQucAh4CLhopfVG\nRNTUuxFXL8lK+kfgUWAeOGR75GOTqowR294B7Fj02qUDx+8B3lOjroiISZnAsMPP2j44Ligr6yIi\niklMTWuinT3fIiI6yFajAqyXNDdQtg67HPC3km4Z8f5T0iOOiKC3H/EyxogPLjXmW/y07X2S/hVw\nvaSv2b5pWGB6xBER8NSjkmpNX7O9r/x5APgEvVXIQyURR0T0uWEZQ9LRkp7XPwZ+jiWWnWRoIiKi\nqDh97VjgE5Kgl2f/3PZnRgUnEUdEFLVmTdi+F3hl0/gk4ogI2t1rIok4IgJ622BOYPlyE0nEERF9\nLS3oSCKOiABY3jziqpKIIyL60iOOiGhR5d3XliOJOCKiLz3iiIiWpUccEdGy9IgjIlpk0iOOiGhb\nWxvDJxFHRPQlEUdEtCxDExERLTJooZ2qk4gjIgBQesQREa3LGHFERMuSiCMiWtZSIq7y8FBJV0k6\nIGnow/HU8wFJeyTdJumMGvVGRFRj0IIalSYkrZH0FUmfGhdb6ynOHwbOWeL9NwEnl7IV+NNK9UZE\n1FPpKc7Fu4C7mgRWScS2bwIeWiJkC/AR99wMHCNpQ426IyK6RtJG4OeBK5vE1+oRj3M8cN/A+d7y\n2g+RtFXSnKS5J+a/N6WmRUT0yM0KsL6fq0rZuuhSfwT8N6DRzORO3ayzPQvMArzgORtaGjaPiGet\n5vOID9qeGfaGpDcDB2zfIumsJhebViLeB5wwcL6xvBYR0Q3LG/9dymuBcyVtBtYBz5f0Z7Z/adR/\nMK2hie3AW8vsiU3Aw7b3T6nuiIhGtNCsLMX2e2xvtH0icD7wuaWSMFTqEUu6BjiL3rjJXuAyYG1p\n1BXADmAzsAf4HvC2GvVGRFS1mhd02L5gzPsG3lmjroiIiamciG1/Hvj8uLhO3ayLiGjLwIyIqUsi\njojoy+5rEREtS484IqJd2Rg+IqJNGSOOiOiAJOKIiJYlEUdEtCtDExERbUsijohoUW7WRUR0QBJx\nRETLkogjItojMjQREdG+JOKIiBY5S5wjItqXHnFERLsyRhwR0bYk4oiIFtV7ijOS1gE3AUfSy7Pb\nbF82Kj6JOCKiqHiz7nHgdbYfk7QW+IKkT9u+eVhwEnFERFFrjLg8MPmxcrq2lJFXP6xOtRERzwBu\nWBqQtEbSLuAAcL3tnaNik4gjIqB5Eu4l4vWS5gbK1n9xOXve9mnARuBMSa8YVXWGJiIiKEucm4cf\ntD3TJND2dyTdCJwD3DEspkqPWNJVkg5IGlqJpLMkPSxpVymX1qg3IqKqSkMTkl4i6Zhy/BzgbOBr\no+Jr9Yg/DFwOfGSJmL+z/eZK9UVEVFdx1sQG4GpJa+h1eP/C9qdGBVdJxLZvknRijWtFRLSm3qyJ\n24DTm8ZP82bdqyV9VdKnJb18ivVGRIxXntDRpNQ2rZt1twIvLZObNwN/DZy8OKjcedwKsI6jmN99\n95Sa123f3NR2C7rjum/uarsJnfHv3vlTbTehO4benXoaWlriPJUese1HbD9WjncAayWtHxI3a3vG\n9sxajpxG0yIintJWj3gqiVjScZJUjs8s9T44jbojIhqruKBjOaoMTUi6BjiL3iTnvcBl9Jb0YfsK\n4DzgVyUdAr4PnF+WAEZEdMNq3xje9gVj3r+c3vS2iIjuyjaYERHtycNDIyK6IIk4IqJdaunWVRJx\nRASs/pt1ERHPCBmaiIhoV27WRUS0LYk4IqJFE1q+3EQScUREXxJxRER7BGgh09ciIlqVoYmIiDZN\naGe1Jqb5hI6IiE7TQrMy9jrSCZJulHSnpN2S3rVUfHrEERF99XrEh4DftH2rpOcBt0i63vadw4KT\niCMiilpjxLb3A/vL8aOS7gKOB5KIIyJG8mRmTZQn3J8O7BwVk0QcEdHXPA+vlzQ3cD5re3ZxkKTn\nAn8FvNv2I6MulkQcEcGyN4Y/aHtmyetJa+kl4Y/a/vhSsUnEEREAdq9UUB6W/CHgLtt/MC4+09ci\nIgq5WWngtcBbgNdJ2lXK5lHB6RFHRBS1Noa3/QV6ox2NJBFHREDvRl32moiIaFn2moiIaFc2/YmI\naFtLT3Fe8ayJJptbqOcDkvZIuk3SGSutNyKitoqzJpalRo+4yeYWbwJOLuWngD8tf0ZEdIImtMS5\niRX3iG3vt31rOX4U6G9uMWgL8BH33AwcI2nDSuuOiKhqoWGprOoY8RKbWxwP3Ddwvre8tn/Rf78V\n2AqwjqNqNi0iYiyt1jHivqabWyzF9qztGdszazmyVtMiIsbzMkplVXrEDTa32AecMHC+sbwWEdER\n9faaWK4asyaabG6xHXhrmT2xCXi4bJwcEdEZq3nWRH9zi9sl7Sqv/TbwowC2rwB2AJuBPcD3gLdV\nqDcioh6D5lfpEucmm1vYNvDOldYVETFRLQ1NZGVdRERfljhHRLSrrelrScQREX1JxBER7ZG9em/W\nRUQ8Y6zWecQREc8Y/QeIjitjSLpK0gFJdzSpNok4IgLKo5IalvE+DJzTtOoMTUREFLVmTdi+qWyC\n1kgScUREX2ZNRES0yIaFxpsNr5c0N3A+a3v26VadRBwR0dd80/eDtmdqVZtEHBFRrPqN4SMiVr16\n09euAf4fcIqkvZLevlR8esQREVCmr1WbNXHBcuKTiCMiAFjWzbqqkogjIvoyfS0iokUVhyaWK4k4\nIgLoPTw0QxMREe3K0ERERIsyNBER0QGZNRER0aZmizUmIYk4IgLK0ER6xBER7Vqte01IOkHSjZLu\nlLRb0ruGxJwl6WFJu0q5dKX1RkRUV2mvieWq0SM+BPym7VslPQ+4RdL1tu9cFPd3tt9cob6IiAnw\n6p01YXs/sL8cPyrpLuB4YHEijojoLoPn51upuuo2mOUZTacDO4e8/WpJX5X0aUkvr1lvREQVq3ho\nAgBJzwX+Cni37UcWvX0r8FLbj0naDPw1cPKQa2wFtpbTx2/wtkaPop6w9cDBtAHoQDvWbGi/DUUH\n2vFfO9AGoBOfBaes+ArLe1RSVVUSsaS19JLwR21/fPH7g4nZ9g5JfyJpve2Di+JmgdlyzbmajyJ5\nurrQji60oSvt6EIbutKOLrShK+1Y9Py4p2+1ziOWJOBDwF22/2BEzHHA/bYt6Ux6QyIPrrTuiIia\nvIp7xK8F3gLcLmlXee23gR8FsH0FcB7wq5IOAd8Hzrdb+tUTETGMDfOrNBHb/gKgMTGXA5cv89JP\n+9HUlXWhHV1oA3SjHV1oA3SjHV1oA3SjHXXa0NI2mErHNCICnn/Yi73p8Dc2ir3+yWtuGTcuLukc\n4P3AGuBK2+8bFZunOEdEQJmattCsjCFpDfBB4E3AqcAFkk4dFd+ZRCzpRZKul/T18ucLR8TNDyyV\n3l6x/nMk3S1pj6RLhrx/pKRry/s7y5zpqhq04SJJDwz8/O+YQBuuknRA0tCpg+r5QGnjbZLOaKEN\nU1ky33D5/kQ/j65sISBpnaQvlbUAuyX99yExE/2ONGzDir4jXnCj0sCZwB7b99p+AvgYsGV0xXYn\nCvC/gEvK8SXA74+Ie2wCda8B7gFeBhwBfBU4dVHMrwFXlOPzgWtbaMNFwOUT/v/wM8AZwB0j3t8M\nfJrefYFNwM4W2nAW8KlJfg6lng3AGeX4ecDfD/l/MtHPo2EbJv55lJ/vueV4Lb1FW5sWxUz6O9Kk\nDU/7OwJ8BphrWO5YdL510bXOozcc0T9/y1Lt6tLua1vo/YUCuBr4PPBbU6r7qd9eAJL6v70Gl2lv\nAX6vHG8DLpckl095Sm2YONs3jenJbAE+Un7umyUdI2mDe0vdp9WGqXCz5fsT/TwatmHiys/3WDld\nW8riv/sT/Y40bMNKrn9OrWstV2eGJoBjB/7yfgs4dkTcOklzkm6W9AuV6j4euG/gfG95bWiM7UPA\nw8CLK9XftA0Av1j+CbxN0gkV62+qaTsnbapL5jV6+f7UPo8l2gBT+DwkrSlTVA8A19se+VlM6DvS\npA3Q/ncEYB8wWPfG8tpQU03Ekm6QdMeQ8kNjJ+U336jfdC91727lfwL+SNKPTbrdHfI3wIm2fxK4\nnt6/HJ6N+kvmXwn8Mb0l8xOjpZfvT8WYNkzl87A9b/s0eknlTEmvmEQ9K2xDV74jXwZOlnSSpCPo\nDdWMvKc11URs+w22XzGkfBK4X9IGgPLngRHX2Ff+vJfe8MXpFZrW5LfXUzGSDgdeQN3VgWPbYPtB\n24+X0yuBV1Wsv6ll/aafBNuP2H6sHO8A1kpaP4m6NGb5PlP4PMa1YZqfR6njO8CNwOJ/yk/6OzK2\nDR35jvT/RXAxcB1wF/AXtnePiu/S0MR24MJyfCHwycUBkl4o6chyvJ7eqr4aY2VNfnsNtu884HMV\nx4cbtaH/i6o4l97/4GnbDry1zBbYBDxcc3y4CUnHSVI5ntiS+VLHksv3mfDn0aQN0/g8JL1E0jHl\n+DnA2cDXFoVN9DvSpA0d+Y4AvV+Ktv+17R+z/T/GBXei0BtL+izwdeAG4EXl9RnK3UfgNcDt9GYU\n3A68vWL9m+ndkb4H+J3y2nuBc8vxOuAvgT3Al4CXTeAzGNeG/wnsLj//jcBPTKAN19C7OfQkvfHO\ntwO/AvxKeV/05kfeU/4fzLTQhosHPoebgddM6O/kT9MbIrsN2FXK5ml+Hg3bMPHPA/hJ4CulHXcA\nl077O9KwDRP/jkyiZGVdRETLujQ0ERHxrJREHBHRsiTiiIiWJRFHRLQsiTgiomVJxBERLUsijoho\nWRJxRETL/gkIloarMJJ5WAAAAABJRU5ErkJggg==\n", 163 | "text/plain": [ 164 | "" 165 | ] 166 | }, 167 | "metadata": {}, 168 | "output_type": "display_data" 169 | } 170 | ], 171 | "source": [ 172 | "imshow(reference * hypothesis, interpolation='nearest'); colorbar();" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": { 178 | "deletable": true, 179 | "editable": true 180 | }, 181 | "source": [ 182 | "## Precision and coverage" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 7, 188 | "metadata": { 189 | "collapsed": false, 190 | "deletable": true, 191 | "editable": true 192 | }, 193 | "outputs": [ 194 | { 195 | "name": "stdout", 196 | "output_type": "stream", 197 | "text": [ 198 | "Precision = 0.611\n" 199 | ] 200 | } 201 | ], 202 | "source": [ 203 | "from pyannote.metrics.identification import IdentificationPrecision\n", 204 | "precision = IdentificationPrecision()\n", 205 | "print(\"Precision = {0:.3f}\".format(precision(reference, hypothesis, uem=Segment(0, 40))))" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 8, 211 | "metadata": { 212 | "collapsed": false, 213 | "deletable": true, 214 | "editable": true 215 | }, 216 | "outputs": [ 217 | { 218 | "name": "stdout", 219 | "output_type": "stream", 220 | "text": [ 221 | "Recall = 0.710\n" 222 | ] 223 | } 224 | ], 225 | "source": [ 226 | "from pyannote.metrics.identification import IdentificationRecall\n", 227 | "recall = IdentificationRecall()\n", 228 | "print(\"Recall = {0:.3f}\".format(recall(reference, hypothesis, uem=Segment(0, 40))))" 229 | ] 230 | } 231 | ], 232 | "metadata": { 233 | "kernelspec": { 234 | "display_name": "Python 3", 235 | "language": "python", 236 | "name": "python3" 237 | }, 238 | "language_info": { 239 | "codemirror_mode": { 240 | "name": "ipython", 241 | "version": 3 242 | }, 243 | "file_extension": ".py", 244 | "mimetype": "text/x-python", 245 | "name": "python", 246 | "nbconvert_exporter": "python", 247 | "pygments_lexer": "ipython3", 248 | "version": "3.4.3" 249 | } 250 | }, 251 | "nbformat": 4, 252 | "nbformat_minor": 0 253 | } 254 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "pyannote-metrics" 3 | description = "A toolkit for reproducible evaluation, diagnostic, and error analysis of speaker diarization systems" 4 | readme = "README.md" 5 | authors = [ 6 | { name = "Hervé BREDIN", email = "herve@pyannote.ai" } 7 | ] 8 | requires-python = ">=3.10" 9 | 10 | dynamic = [ 11 | "version", 12 | ] 13 | 14 | dependencies = [ 15 | "numpy>=2.2.2", 16 | "pandas>=2.2.3", 17 | "pyannote-core>=5.0.0", 18 | "pyannote-database>=5.1.3", 19 | "scikit-learn>=1.6.1", 20 | "scipy>=1.15.1", 21 | ] 22 | 23 | [project.scripts] 24 | pyannote-metrics = "pyannote.metrics.cli:main" 25 | 26 | 27 | [project.optional-dependencies] 28 | test = [ 29 | "pytest>=8.3.4", 30 | ] 31 | doc = [ 32 | "sphinx-rtd-theme>=3.0.2", 33 | "sphinx>=8.1.3", 34 | "ipython>=8.32.0", 35 | "matplotlib>=3.10.0", 36 | ] 37 | cli = [ 38 | "docopt>=0.6.2", 39 | "sympy>=1.13.3", 40 | "tabulate>=0.9.0", 41 | ] 42 | plot = [ 43 | "matplotlib>=3.10.0", 44 | ] 45 | 46 | [build-system] 47 | requires = ["hatchling", "hatch-vcs"] 48 | build-backend = "hatchling.build" 49 | 50 | [tool.hatch.version] 51 | source = "vcs" 52 | 53 | [tool.hatch.build.targets.wheel] 54 | packages = ["src/pyannote"] 55 | 56 | [dependency-groups] 57 | dev = [ 58 | "ipykernel>=6.29.5", 59 | ] 60 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2014-2020 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Herve BREDIN - http://herve.niderb.fr 28 | 29 | import versioneer 30 | 31 | versioneer.versionfile_source = "pyannote/metrics/_version.py" 32 | versioneer.versionfile_build = versioneer.versionfile_source 33 | versioneer.tag_prefix = "" 34 | versioneer.parentdir_prefix = "pyannote-metrics-" 35 | 36 | from setuptools import setup, find_packages 37 | 38 | setup( 39 | # package 40 | namespace_packages=["pyannote"], 41 | packages=find_packages(), 42 | entry_points={ 43 | "console_scripts": [ 44 | "pyannote-metrics=pyannote.metrics.cli:main", 45 | ], 46 | }, 47 | install_requires=[ 48 | "pyannote.core >= 4.1", 49 | "pyannote.database >= 4.0.1", 50 | "pandas >= 0.19", 51 | "scipy >= 1.1.0", 52 | "scikit-learn >= 0.17.1", 53 | "docopt >= 0.6.2", 54 | "tabulate >= 0.7.7", 55 | "matplotlib >= 2.0.0", 56 | "sympy >= 1.1", 57 | "numpy", 58 | ], 59 | # versioneer 60 | version=versioneer.get_version(), 61 | cmdclass=versioneer.get_cmdclass(), 62 | # PyPI 63 | name="pyannote.metrics", 64 | description=( 65 | "a toolkit for reproducible evaluation, diagnostic, and error analysis of speaker diarization systems" 66 | ), 67 | author="Herve Bredin", 68 | author_email="bredin@limsi.fr", 69 | url="https://pyannote.github.io/pyannote-metrics", 70 | classifiers=[ 71 | "Development Status :: 4 - Beta", 72 | "Intended Audience :: Science/Research", 73 | "License :: OSI Approved :: MIT License", 74 | "Natural Language :: English", 75 | "Programming Language :: Python :: 3", 76 | "Programming Language :: Python :: 3.7", 77 | "Programming Language :: Python :: 3.8", 78 | "Topic :: Scientific/Engineering", 79 | ], 80 | extras_require={ 81 | "tests": {"pytest"}, 82 | "docs": ["Sphinx==2.2.2", "ipython==7.10.1", "sphinx_rtd_theme==0.4.3"], 83 | }, 84 | ) 85 | -------------------------------------------------------------------------------- /src/pyannote/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2012-2021 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | from .base import f_measure 30 | 31 | import importlib.metadata 32 | __version__ = importlib.metadata.version("pyannote-metrics") 33 | 34 | __all__ = ["f_measure"] 35 | -------------------------------------------------------------------------------- /src/pyannote/metrics/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2012- CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | from typing import List, Union, Optional, Set, Tuple 29 | 30 | import warnings 31 | import numpy as np 32 | import pandas as pd 33 | import scipy.stats 34 | from pyannote.core import Annotation, Timeline 35 | 36 | from pyannote.metrics.types import Details, MetricComponents 37 | 38 | 39 | class BaseMetric: 40 | """ 41 | :class:`BaseMetric` is the base class for most pyannote evaluation metrics. 42 | 43 | Attributes 44 | ---------- 45 | name : str 46 | Human-readable name of the metric (eg. 'diarization error rate') 47 | """ 48 | 49 | @classmethod 50 | def metric_name(cls) -> str: 51 | raise NotImplementedError( 52 | cls.__name__ + " is missing a 'metric_name' class method. " 53 | "It should return the name of the metric as string." 54 | ) 55 | 56 | @classmethod 57 | def metric_components(cls) -> MetricComponents: 58 | raise NotImplementedError( 59 | cls.__name__ + " is missing a 'metric_components' class method. " 60 | "It should return the list of names of metric components." 61 | ) 62 | 63 | def __init__(self, **kwargs): 64 | super(BaseMetric, self).__init__() 65 | self.metric_name_ = self.__class__.metric_name() 66 | self.components_: Set[str] = set(self.__class__.metric_components()) 67 | self.reset() 68 | 69 | def init_components(self): 70 | return {value: 0.0 for value in self.components_} 71 | 72 | def reset(self): 73 | """Reset accumulated components and metric values""" 74 | self.accumulated_: Details = dict() 75 | self.results_: List = list() 76 | for value in self.components_: 77 | self.accumulated_[value] = 0.0 78 | 79 | @property 80 | def name(self): 81 | """Metric name.""" 82 | return self.metric_name() 83 | 84 | # TODO: use joblib/locky to allow parallel processing? 85 | # TODO: signature could be something like __call__(self, reference_iterator, hypothesis_iterator, ...) 86 | 87 | def __call__(self, reference: Union[Timeline, Annotation], 88 | hypothesis: Union[Timeline, Annotation], 89 | detailed: bool = False, uri: Optional[str] = None, **kwargs): 90 | """Compute metric value and accumulate components 91 | 92 | Parameters 93 | ---------- 94 | reference : type depends on the metric 95 | Manual `reference` 96 | hypothesis : type depends on the metric 97 | Evaluated `hypothesis` 98 | uri : optional 99 | Override uri. 100 | detailed : bool, optional 101 | By default (False), return metric value only. 102 | Set `detailed` to True to return dictionary where keys are 103 | components names and values are component values 104 | 105 | Returns 106 | ------- 107 | value : float (if `detailed` is False) 108 | Metric value 109 | components : dict (if `detailed` is True) 110 | `components` updated with metric value 111 | """ 112 | 113 | # compute metric components 114 | components = self.compute_components(reference, hypothesis, **kwargs) 115 | 116 | # compute rate based on components 117 | components[self.metric_name_] = self.compute_metric(components) 118 | 119 | # keep track of this computation 120 | uri = uri or getattr(reference, "uri", "NA") 121 | self.results_.append((uri, components)) 122 | 123 | # accumulate components 124 | for name in self.components_: 125 | self.accumulated_[name] += components[name] 126 | 127 | if detailed: 128 | return components 129 | 130 | return components[self.metric_name_] 131 | 132 | def report(self, display: bool = False) -> pd.DataFrame: 133 | """Evaluation report 134 | 135 | Parameters 136 | ---------- 137 | display : bool, optional 138 | Set to True to print the report to stdout. 139 | 140 | Returns 141 | ------- 142 | report : pandas.DataFrame 143 | Dataframe with one column per metric component, one row per 144 | evaluated item, and one final row for accumulated results. 145 | """ 146 | 147 | report = [] 148 | uris = [] 149 | 150 | percent = "total" in self.metric_components() 151 | 152 | for uri, components in self.results_: 153 | row = {} 154 | if percent: 155 | total = components["total"] 156 | for key, value in components.items(): 157 | if key == self.name: 158 | row[key, "%"] = 100 * value 159 | elif key == "total": 160 | row[key, ""] = value 161 | else: 162 | row[key, ""] = value 163 | if percent: 164 | if total > 0: 165 | row[key, "%"] = 100 * value / total 166 | else: 167 | row[key, "%"] = np.nan 168 | 169 | report.append(row) 170 | uris.append(uri) 171 | 172 | row = {} 173 | components = self.accumulated_ 174 | 175 | if percent: 176 | total = components["total"] 177 | 178 | for key, value in components.items(): 179 | if key == self.name: 180 | row[key, "%"] = 100 * value 181 | elif key == "total": 182 | row[key, ""] = value 183 | else: 184 | row[key, ""] = value 185 | if percent: 186 | if total > 0: 187 | row[key, "%"] = 100 * value / total 188 | else: 189 | row[key, "%"] = np.nan 190 | 191 | row[self.name, "%"] = 100 * abs(self) 192 | report.append(row) 193 | uris.append("TOTAL") 194 | 195 | df = pd.DataFrame(report) 196 | 197 | df["item"] = uris 198 | df = df.set_index("item") 199 | 200 | df.columns = pd.MultiIndex.from_tuples(df.columns) 201 | 202 | df = df[[self.name] + self.metric_components()] 203 | 204 | if display: 205 | print( 206 | df.to_string( 207 | index=True, 208 | sparsify=False, 209 | justify="right", 210 | float_format=lambda f: "{0:.2f}".format(f), 211 | ) 212 | ) 213 | 214 | return df 215 | 216 | def __str__(self): 217 | report = self.report(display=False) 218 | return report.to_string( 219 | sparsify=False, float_format=lambda f: "{0:.2f}".format(f) 220 | ) 221 | 222 | def __abs__(self): 223 | """Compute metric value from accumulated components""" 224 | return self.compute_metric(self.accumulated_) 225 | 226 | def __getitem__(self, component: str) -> Union[float, Details]: 227 | """Get value of accumulated `component`. 228 | 229 | Parameters 230 | ---------- 231 | component : str 232 | Name of `component` 233 | 234 | Returns 235 | ------- 236 | value : type depends on the metric 237 | Value of accumulated `component` 238 | 239 | """ 240 | if component == slice(None, None, None): 241 | return dict(self.accumulated_) 242 | else: 243 | return self.accumulated_[component] 244 | 245 | def __iter__(self): 246 | """Iterator over the accumulated (uri, value)""" 247 | for uri, component in self.results_: 248 | yield uri, component 249 | 250 | def compute_components(self, 251 | reference: Union[Timeline, Annotation], 252 | hypothesis: Union[Timeline, Annotation], 253 | **kwargs) -> Details: 254 | """Compute metric components 255 | 256 | Parameters 257 | ---------- 258 | reference : type depends on the metric 259 | Manual `reference` 260 | hypothesis : same as `reference` 261 | Evaluated `hypothesis` 262 | 263 | Returns 264 | ------- 265 | components : dict 266 | Dictionary where keys are component names and values are component 267 | values 268 | 269 | """ 270 | raise NotImplementedError( 271 | self.__class__.__name__ + " is missing a 'compute_components' method." 272 | "It should return a dictionary where keys are component names " 273 | "and values are component values." 274 | ) 275 | 276 | def compute_metric(self, components: Details): 277 | """Compute metric value from computed `components` 278 | 279 | Parameters 280 | ---------- 281 | components : dict 282 | Dictionary where keys are components names and values are component 283 | values 284 | 285 | Returns 286 | ------- 287 | value : type depends on the metric 288 | Metric value 289 | """ 290 | raise NotImplementedError( 291 | self.__class__.__name__ + " is missing a 'compute_metric' method. " 292 | "It should return the actual value of the metric based " 293 | "on the precomputed component dictionary given as input." 294 | ) 295 | 296 | def confidence_interval(self, alpha: float = 0.9) \ 297 | -> Tuple[float, Tuple[float, float]]: 298 | """Compute confidence interval on accumulated metric values 299 | 300 | Parameters 301 | ---------- 302 | alpha : float, optional 303 | Probability that the returned confidence interval contains 304 | the true metric value. 305 | 306 | Returns 307 | ------- 308 | (center, (lower, upper)) 309 | with center the mean of the conditional pdf of the metric value 310 | and (lower, upper) is a confidence interval centered on the median, 311 | containing the estimate to a probability alpha. 312 | 313 | See Also: 314 | --------- 315 | scipy.stats.bayes_mvs 316 | 317 | """ 318 | 319 | values = [r[self.metric_name_] for _, r in self.results_] 320 | 321 | if len(values) == 0: 322 | raise ValueError("Please evaluate a bunch of files before computing confidence interval.") 323 | 324 | elif len(values) == 1: 325 | warnings.warn("Cannot compute a reliable confidence interval out of just one file.") 326 | center = lower = upper = values[0] 327 | return center, (lower, upper) 328 | 329 | else: 330 | return scipy.stats.bayes_mvs(values, alpha=alpha)[0] 331 | 332 | 333 | PRECISION_NAME = "precision" 334 | PRECISION_RETRIEVED = "# retrieved" 335 | PRECISION_RELEVANT_RETRIEVED = "# relevant retrieved" 336 | 337 | 338 | class Precision(BaseMetric): 339 | """ 340 | :class:`Precision` is a base class for precision-like evaluation metrics. 341 | 342 | It defines two components '# retrieved' and '# relevant retrieved' and the 343 | compute_metric() method to compute the actual precision: 344 | 345 | Precision = # retrieved / # relevant retrieved 346 | 347 | Inheriting classes must implement compute_components(). 348 | """ 349 | 350 | @classmethod 351 | def metric_name(cls): 352 | return PRECISION_NAME 353 | 354 | @classmethod 355 | def metric_components(cls) -> MetricComponents: 356 | return [PRECISION_RETRIEVED, PRECISION_RELEVANT_RETRIEVED] 357 | 358 | def compute_metric(self, components: Details) -> float: 359 | """Compute precision from `components`""" 360 | numerator = components[PRECISION_RELEVANT_RETRIEVED] 361 | denominator = components[PRECISION_RETRIEVED] 362 | if denominator == 0.0: 363 | if numerator == 0: 364 | return 1.0 365 | else: 366 | raise ValueError("") 367 | else: 368 | return numerator / denominator 369 | 370 | 371 | RECALL_NAME = "recall" 372 | RECALL_RELEVANT = "# relevant" 373 | RECALL_RELEVANT_RETRIEVED = "# relevant retrieved" 374 | 375 | 376 | class Recall(BaseMetric): 377 | """ 378 | :class:`Recall` is a base class for recall-like evaluation metrics. 379 | 380 | It defines two components '# relevant' and '# relevant retrieved' and the 381 | compute_metric() method to compute the actual recall: 382 | 383 | Recall = # relevant retrieved / # relevant 384 | 385 | Inheriting classes must implement compute_components(). 386 | """ 387 | 388 | @classmethod 389 | def metric_name(cls): 390 | return RECALL_NAME 391 | 392 | @classmethod 393 | def metric_components(cls) -> MetricComponents: 394 | return [RECALL_RELEVANT, RECALL_RELEVANT_RETRIEVED] 395 | 396 | def compute_metric(self, components: Details) -> float: 397 | """Compute recall from `components`""" 398 | numerator = components[RECALL_RELEVANT_RETRIEVED] 399 | denominator = components[RECALL_RELEVANT] 400 | if denominator == 0.0: 401 | if numerator == 0: 402 | return 1.0 403 | else: 404 | raise ValueError("") 405 | else: 406 | return numerator / denominator 407 | 408 | 409 | def f_measure(precision: float, recall: float, beta=1.0) -> float: 410 | """Compute f-measure 411 | 412 | f-measure is defined as follows: 413 | F(P, R, b) = (1+b²).P.R / (b².P + R) 414 | 415 | where P is `precision`, R is `recall` and b is `beta` 416 | """ 417 | if precision + recall == 0.0: 418 | return 0 419 | return (1 + beta * beta) * precision * recall / (beta * beta * precision + recall) 420 | -------------------------------------------------------------------------------- /src/pyannote/metrics/binary_classification.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2016-2017 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | from collections import Counter 30 | from typing import Tuple 31 | 32 | import numpy as np 33 | import sklearn.metrics 34 | from numpy.typing import ArrayLike 35 | from sklearn.base import BaseEstimator 36 | from sklearn.calibration import CalibratedClassifierCV 37 | from sklearn.model_selection._split import _CVIterableWrapper 38 | 39 | from .types import CalibrationMethod 40 | 41 | 42 | def det_curve( 43 | y_true: ArrayLike, scores: ArrayLike, distances: bool = False 44 | ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, float]: 45 | """DET curve 46 | 47 | Parameters 48 | ---------- 49 | y_true : (n_samples, ) array-like 50 | Boolean reference. 51 | scores : (n_samples, ) array-like 52 | Predicted score. 53 | distances : boolean, optional 54 | When True, indicate that `scores` are actually `distances` 55 | 56 | Returns 57 | ------- 58 | fpr : numpy array 59 | False alarm rate 60 | fnr : numpy array 61 | False rejection rate 62 | thresholds : numpy array 63 | Corresponding thresholds 64 | eer : float 65 | Equal error rate 66 | """ 67 | 68 | if distances: 69 | scores = -scores 70 | 71 | # compute false positive and false negative rates 72 | # (a.k.a. false alarm and false rejection rates) 73 | fpr, tpr, thresholds = sklearn.metrics.roc_curve(y_true, scores, pos_label=True) 74 | fnr = 1 - tpr 75 | if distances: 76 | thresholds = -thresholds 77 | 78 | # estimate equal error rate 79 | eer_index = np.where(fpr > fnr)[0][0] 80 | eer = 0.25 * ( 81 | fpr[eer_index - 1] + fpr[eer_index] + fnr[eer_index - 1] + fnr[eer_index] 82 | ) 83 | 84 | return fpr, fnr, thresholds, eer 85 | 86 | 87 | def precision_recall_curve( 88 | y_true: ArrayLike, scores: ArrayLike, distances: bool = False 89 | ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, float]: 90 | """Precision-recall curve 91 | 92 | Parameters 93 | ---------- 94 | y_true : (n_samples, ) array-like 95 | Boolean reference. 96 | scores : (n_samples, ) array-like 97 | Predicted score. 98 | distances : boolean, optional 99 | When True, indicate that `scores` are actually `distances` 100 | 101 | Returns 102 | ------- 103 | precision : numpy array 104 | Precision 105 | recall : numpy array 106 | Recall 107 | thresholds : numpy array 108 | Corresponding thresholds 109 | auc : float 110 | Area under curve 111 | 112 | """ 113 | 114 | if distances: 115 | scores = -scores 116 | 117 | precision, recall, thresholds = sklearn.metrics.precision_recall_curve( 118 | y_true, scores, pos_label=True 119 | ) 120 | 121 | if distances: 122 | thresholds = -thresholds 123 | 124 | auc = sklearn.metrics.auc(precision, recall, reorder=True) 125 | 126 | return precision, recall, thresholds, auc 127 | 128 | 129 | class _Passthrough(BaseEstimator): 130 | """Dummy binary classifier used by score Calibration class""" 131 | 132 | def __init__(self): 133 | super().__init__() 134 | self.classes_ = np.array([False, True], dtype=bool) 135 | 136 | def fit(self, scores, y_true): 137 | return self 138 | 139 | def decision_function(self, scores: ArrayLike): 140 | """Returns the input scores unchanged""" 141 | return scores 142 | 143 | 144 | class Calibration: 145 | """Probability calibration for binary classification tasks 146 | 147 | Parameters 148 | ---------- 149 | method : {'isotonic', 'sigmoid'}, optional 150 | See `CalibratedClassifierCV`. Defaults to 'isotonic'. 151 | equal_priors : bool, optional 152 | Set to True to force equal priors. Default behavior is to estimate 153 | priors from the data itself. 154 | 155 | Examples 156 | -------- 157 | >>> calibration = Calibration() 158 | >>> calibration.fit(train_score, train_y) 159 | >>> test_probability = calibration.transform(test_score) 160 | 161 | See also 162 | -------- 163 | CalibratedClassifierCV 164 | 165 | """ 166 | 167 | def __init__( 168 | self, equal_priors: bool = False, method: CalibrationMethod = "isotonic" 169 | ): 170 | self.method = method 171 | self.equal_priors = equal_priors 172 | 173 | def fit(self, scores: ArrayLike, y_true: ArrayLike): 174 | """Train calibration 175 | 176 | Parameters 177 | ---------- 178 | scores : (n_samples, ) array-like 179 | Uncalibrated scores. 180 | y_true : (n_samples, ) array-like 181 | True labels (dtype=bool). 182 | """ 183 | 184 | # to force equal priors, randomly select (and average over) 185 | # up to fifty balanced (i.e. #true == #false) calibration sets. 186 | if self.equal_priors: 187 | counter = Counter(y_true) 188 | positive, negative = counter[True], counter[False] 189 | 190 | if positive > negative: 191 | majority, minority = True, False 192 | n_majority, n_minority = positive, negative 193 | else: 194 | majority, minority = False, True 195 | n_majority, n_minority = negative, positive 196 | 197 | n_splits = min(50, n_majority // n_minority + 1) 198 | 199 | minority_index = np.where(y_true == minority)[0] 200 | majority_index = np.where(y_true == majority)[0] 201 | 202 | cv = [] 203 | for _ in range(n_splits): 204 | test_index = np.hstack( 205 | [ 206 | np.random.choice( 207 | majority_index, size=n_minority, replace=False 208 | ), 209 | minority_index, 210 | ] 211 | ) 212 | cv.append(([], test_index)) 213 | cv = _CVIterableWrapper(cv) 214 | 215 | # to estimate priors from the data itself, use the whole set 216 | else: 217 | cv = "prefit" 218 | 219 | self.calibration_ = CalibratedClassifierCV( 220 | base_estimator=_Passthrough(), method=self.method, cv=cv 221 | ) 222 | self.calibration_.fit(scores.reshape(-1, 1), y_true) 223 | 224 | return self 225 | 226 | def transform(self, scores: ArrayLike): 227 | """Calibrate scores into probabilities 228 | 229 | Parameters 230 | ---------- 231 | scores : (n_samples, ) array-like 232 | Uncalibrated scores. 233 | 234 | Returns 235 | ------- 236 | probabilities : (n_samples, ) array-like 237 | Calibrated scores (i.e. probabilities) 238 | """ 239 | return self.calibration_.predict_proba(scores.reshape(-1, 1))[:, 1] 240 | -------------------------------------------------------------------------------- /src/pyannote/metrics/errors/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2012-2019 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | """Error analysis""" 30 | -------------------------------------------------------------------------------- /src/pyannote/metrics/errors/identification.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2012-2019 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | # Benjamin MAURICE - maurice@limsi.fr 29 | from typing import Optional, TYPE_CHECKING 30 | 31 | import numpy as np 32 | from pyannote.core import Annotation, Timeline 33 | from scipy.optimize import linear_sum_assignment 34 | 35 | from ..identification import UEMSupportMixin 36 | from ..matcher import LabelMatcher 37 | from ..matcher import MATCH_CORRECT, MATCH_CONFUSION, \ 38 | MATCH_MISSED_DETECTION, MATCH_FALSE_ALARM 39 | 40 | if TYPE_CHECKING: 41 | from xarray import DataArray 42 | 43 | REFERENCE_TOTAL = 'reference' 44 | HYPOTHESIS_TOTAL = 'hypothesis' 45 | 46 | REGRESSION = 'regression' 47 | IMPROVEMENT = 'improvement' 48 | BOTH_CORRECT = 'both_correct' 49 | BOTH_INCORRECT = 'both_incorrect' 50 | 51 | 52 | class IdentificationErrorAnalysis(UEMSupportMixin): 53 | """ 54 | 55 | Parameters 56 | ---------- 57 | collar : float, optional 58 | Duration (in seconds) of collars removed from evaluation around 59 | boundaries of reference segments. 60 | skip_overlap : bool, optional 61 | Set to True to not evaluate overlap regions. 62 | Defaults to False (i.e. keep overlap regions). 63 | """ 64 | 65 | def __init__(self, collar: float = 0., skip_overlap: bool = False): 66 | 67 | super().__init__() 68 | self.matcher = LabelMatcher() 69 | self.collar = collar 70 | self.skip_overlap = skip_overlap 71 | 72 | def difference(self, 73 | reference: Annotation, 74 | hypothesis: Annotation, 75 | uem: Optional[Timeline] = None, 76 | uemified: bool = False): 77 | """Get error analysis as `Annotation` 78 | 79 | Labels are (status, reference_label, hypothesis_label) tuples. 80 | `status` is either 'correct', 'confusion', 'missed detection' or 81 | 'false alarm'. 82 | `reference_label` is None in case of 'false alarm'. 83 | `hypothesis_label` is None in case of 'missed detection'. 84 | 85 | Parameters 86 | ---------- 87 | uemified : bool, optional 88 | Returns "uemified" version of reference and hypothesis. 89 | Defaults to False. 90 | 91 | Returns 92 | ------- 93 | errors : `Annotation` 94 | 95 | """ 96 | 97 | R, H, common_timeline = self.uemify( 98 | reference, hypothesis, uem=uem, 99 | collar=self.collar, skip_overlap=self.skip_overlap, 100 | returns_timeline=True) 101 | 102 | errors = Annotation(uri=reference.uri, modality=reference.modality) 103 | 104 | # loop on all segments 105 | for segment in common_timeline: 106 | 107 | # list of labels in reference segment 108 | rlabels = R.get_labels(segment, unique=False) 109 | 110 | # list of labels in hypothesis segment 111 | hlabels = H.get_labels(segment, unique=False) 112 | 113 | _, details = self.matcher(rlabels, hlabels) 114 | 115 | for r, h in details[MATCH_CORRECT]: 116 | track = errors.new_track(segment, prefix=MATCH_CORRECT) 117 | errors[segment, track] = (MATCH_CORRECT, r, h) 118 | 119 | for r, h in details[MATCH_CONFUSION]: 120 | track = errors.new_track(segment, prefix=MATCH_CONFUSION) 121 | errors[segment, track] = (MATCH_CONFUSION, r, h) 122 | 123 | for r in details[MATCH_MISSED_DETECTION]: 124 | track = errors.new_track(segment, 125 | prefix=MATCH_MISSED_DETECTION) 126 | errors[segment, track] = (MATCH_MISSED_DETECTION, r, None) 127 | 128 | for h in details[MATCH_FALSE_ALARM]: 129 | track = errors.new_track(segment, prefix=MATCH_FALSE_ALARM) 130 | errors[segment, track] = (MATCH_FALSE_ALARM, None, h) 131 | 132 | if uemified: 133 | return reference, hypothesis, errors 134 | else: 135 | return errors 136 | 137 | def _match_errors(self, before, after): 138 | b_type, b_ref, b_hyp = before 139 | a_type, a_ref, a_hyp = after 140 | return (b_ref == a_ref) * (1 + (b_type == a_type) + (b_hyp == a_hyp)) 141 | 142 | # TODO : return type 143 | def regression(self, 144 | reference: Annotation, 145 | before: Annotation, 146 | after: Annotation, 147 | uem: Optional[Timeline] = None, 148 | uemified: bool = False): 149 | 150 | _, before, errors_before = self.difference( 151 | reference, before, uem=uem, uemified=True) 152 | 153 | reference, after, errors_after = self.difference( 154 | reference, after, uem=uem, uemified=True) 155 | 156 | behaviors = Annotation(uri=reference.uri, modality=reference.modality) 157 | 158 | # common (up-sampled) timeline 159 | common_timeline = errors_after.get_timeline().union( 160 | errors_before.get_timeline()) 161 | common_timeline = common_timeline.segmentation() 162 | 163 | # align 'before' errors on common timeline 164 | B = self._tagger(errors_before, common_timeline) 165 | 166 | # align 'after' errors on common timeline 167 | A = self._tagger(errors_after, common_timeline) 168 | 169 | for segment in common_timeline: 170 | 171 | old_errors = B.get_labels(segment, unique=False) 172 | new_errors = A.get_labels(segment, unique=False) 173 | 174 | n1 = len(old_errors) 175 | n2 = len(new_errors) 176 | n = max(n1, n2) 177 | 178 | match = np.zeros((n, n), dtype=int) 179 | for i1, e1 in enumerate(old_errors): 180 | for i2, e2 in enumerate(new_errors): 181 | match[i1, i2] = self._match_errors(e1, e2) 182 | 183 | for i1, i2 in zip(*linear_sum_assignment(-match)): 184 | 185 | if i1 >= n1: 186 | track = behaviors.new_track(segment, 187 | candidate=REGRESSION, 188 | prefix=REGRESSION) 189 | behaviors[segment, track] = ( 190 | REGRESSION, None, new_errors[i2]) 191 | 192 | elif i2 >= n2: 193 | track = behaviors.new_track(segment, 194 | candidate=IMPROVEMENT, 195 | prefix=IMPROVEMENT) 196 | behaviors[segment, track] = ( 197 | IMPROVEMENT, old_errors[i1], None) 198 | 199 | elif old_errors[i1][0] == MATCH_CORRECT: 200 | 201 | if new_errors[i2][0] == MATCH_CORRECT: 202 | track = behaviors.new_track(segment, 203 | candidate=BOTH_CORRECT, 204 | prefix=BOTH_CORRECT) 205 | behaviors[segment, track] = ( 206 | BOTH_CORRECT, old_errors[i1], new_errors[i2]) 207 | 208 | else: 209 | track = behaviors.new_track(segment, 210 | candidate=REGRESSION, 211 | prefix=REGRESSION) 212 | behaviors[segment, track] = ( 213 | REGRESSION, old_errors[i1], new_errors[i2]) 214 | 215 | else: 216 | 217 | if new_errors[i2][0] == MATCH_CORRECT: 218 | track = behaviors.new_track(segment, 219 | candidate=IMPROVEMENT, 220 | prefix=IMPROVEMENT) 221 | behaviors[segment, track] = ( 222 | IMPROVEMENT, old_errors[i1], new_errors[i2]) 223 | 224 | else: 225 | track = behaviors.new_track(segment, 226 | candidate=BOTH_INCORRECT, 227 | prefix=BOTH_INCORRECT) 228 | behaviors[segment, track] = ( 229 | BOTH_INCORRECT, old_errors[i1], new_errors[i2]) 230 | 231 | behaviors = behaviors.support() 232 | 233 | if uemified: 234 | return reference, before, after, behaviors 235 | else: 236 | return behaviors 237 | 238 | def matrix(self, 239 | reference: Annotation, 240 | hypothesis: Annotation, 241 | uem: Optional[Timeline] = None) -> 'DataArray': 242 | 243 | reference, hypothesis, errors = self.difference( 244 | reference, hypothesis, uem=uem, uemified=True) 245 | 246 | chart = errors.chart() 247 | 248 | # rLabels contains reference labels 249 | # hLabels contains hypothesis labels confused with a reference label 250 | # falseAlarmLabels contains false alarm hypothesis labels that do not 251 | # exist in reference labels // corner case // 252 | 253 | falseAlarmLabels = set(hypothesis.labels()) - set(reference.labels()) 254 | hLabels = set(reference.labels()) | set(hypothesis.labels()) 255 | rLabels = set(reference.labels()) 256 | 257 | # sort these sets of labels 258 | cmp_func = reference._cmp_labels 259 | falseAlarmLabels = sorted(falseAlarmLabels, cmp=cmp_func) 260 | rLabels = sorted(rLabels, cmp=cmp_func) 261 | hLabels = sorted(hLabels, cmp=cmp_func) 262 | 263 | # append false alarm labels as last 'reference' labels 264 | # (make sure to mark them as such) 265 | rLabels = rLabels + [(MATCH_FALSE_ALARM, hLabel) 266 | for hLabel in falseAlarmLabels] 267 | 268 | # prepend duration columns before the detailed confusion matrix 269 | hLabels = [ 270 | REFERENCE_TOTAL, HYPOTHESIS_TOTAL, 271 | MATCH_CORRECT, MATCH_CONFUSION, 272 | MATCH_FALSE_ALARM, MATCH_MISSED_DETECTION 273 | ] + hLabels 274 | 275 | # initialize empty matrix 276 | 277 | try: 278 | from xarray import DataArray 279 | except ImportError: 280 | msg = ( 281 | "Please install xarray dependency to use class " 282 | "'IdentificationErrorAnalysis'." 283 | ) 284 | raise ImportError(msg) 285 | 286 | matrix = DataArray( 287 | np.zeros((len(rLabels), len(hLabels))), 288 | coords=[('reference', rLabels), ('hypothesis', hLabels)]) 289 | 290 | # loop on chart 291 | for (status, rLabel, hLabel), duration in chart: 292 | 293 | # increment correct 294 | if status == MATCH_CORRECT: 295 | matrix.loc[rLabel, hLabel] += duration 296 | matrix.loc[rLabel, MATCH_CORRECT] += duration 297 | 298 | # increment confusion matrix 299 | if status == MATCH_CONFUSION: 300 | matrix.loc[rLabel, hLabel] += duration 301 | matrix.loc[rLabel, MATCH_CONFUSION] += duration 302 | if hLabel in falseAlarmLabels: 303 | matrix.loc[(MATCH_FALSE_ALARM, hLabel), rLabel] += duration 304 | matrix.loc[(MATCH_FALSE_ALARM, hLabel), MATCH_CONFUSION] += duration 305 | else: 306 | matrix.loc[hLabel, rLabel] += duration 307 | matrix.loc[hLabel, MATCH_CONFUSION] += duration 308 | 309 | if status == MATCH_FALSE_ALARM: 310 | # hLabel is also a reference label 311 | if hLabel in falseAlarmLabels: 312 | matrix.loc[(MATCH_FALSE_ALARM, hLabel), MATCH_FALSE_ALARM] += duration 313 | else: 314 | matrix.loc[hLabel, MATCH_FALSE_ALARM] += duration 315 | 316 | if status == MATCH_MISSED_DETECTION: 317 | matrix.loc[rLabel, MATCH_MISSED_DETECTION] += duration 318 | 319 | # total reference and hypothesis duration 320 | for rLabel in rLabels: 321 | 322 | if isinstance(rLabel, tuple) and rLabel[0] == MATCH_FALSE_ALARM: 323 | r = 0. 324 | h = hypothesis.label_duration(rLabel[1]) 325 | else: 326 | r = reference.label_duration(rLabel) 327 | h = hypothesis.label_duration(rLabel) 328 | 329 | matrix.loc[rLabel, REFERENCE_TOTAL] = r 330 | matrix.loc[rLabel, HYPOTHESIS_TOTAL] = h 331 | 332 | return matrix 333 | -------------------------------------------------------------------------------- /src/pyannote/metrics/errors/segmentation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2012-2017 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | from typing import Union 29 | 30 | from pyannote.core import Annotation, Timeline 31 | 32 | 33 | class SegmentationErrorAnalysis: 34 | 35 | def __init__(self): 36 | super().__init__() 37 | 38 | def __call__(self, reference: Union[Timeline, Annotation], 39 | hypothesis: Union[Timeline, Annotation]) -> Annotation: 40 | 41 | if isinstance(reference, Annotation): 42 | reference = reference.get_timeline() 43 | 44 | if isinstance(hypothesis, Annotation): 45 | hypothesis = hypothesis.get_timeline() 46 | 47 | # over-segmentation 48 | over = Timeline(uri=reference.uri) 49 | prev_r = reference[0] 50 | intersection = [] 51 | for r, h in reference.co_iter(hypothesis): 52 | 53 | if r != prev_r: 54 | intersection = sorted(intersection) 55 | for _, segment in intersection[:-1]: 56 | over.add(segment) 57 | intersection = [] 58 | prev_r = r 59 | 60 | segment = r & h 61 | intersection.append((segment.duration, segment)) 62 | 63 | intersection = sorted(intersection) 64 | for _, segment in intersection[:-1]: 65 | over.add(segment) 66 | 67 | # under-segmentation 68 | under = Timeline(uri=reference.uri) 69 | prev_h = hypothesis[0] 70 | intersection = [] 71 | for h, r in hypothesis.co_iter(reference): 72 | 73 | if h != prev_h: 74 | intersection = sorted(intersection) 75 | for _, segment in intersection[:-1]: 76 | under.add(segment) 77 | intersection = [] 78 | prev_h = h 79 | 80 | segment = h & r 81 | intersection.append((segment.duration, segment)) 82 | 83 | intersection = sorted(intersection) 84 | for _, segment in intersection[:-1]: 85 | under.add(segment) 86 | 87 | # extent 88 | extent = reference.extent() 89 | 90 | # frontier error (both under- and over-segmented) 91 | frontier = under.crop(over) 92 | 93 | # under-segmented 94 | not_over = over.gaps(support=extent) 95 | only_under = under.crop(not_over) 96 | 97 | # over-segmented 98 | not_under = under.gaps(support=extent) 99 | only_over = over.crop(not_under) 100 | 101 | status = Annotation(uri=reference.uri) 102 | for segment in frontier: 103 | status[segment, '_'] = 'shift' 104 | for segment in only_over: 105 | status[segment, '_'] = 'over-segmentation' 106 | for segment in only_under: 107 | status[segment, '_'] = 'under-segmentation' 108 | 109 | return status.support() 110 | -------------------------------------------------------------------------------- /src/pyannote/metrics/identification.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2012-2019 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | from typing import Optional 29 | 30 | from pyannote.core import Annotation, Timeline 31 | 32 | from .base import BaseMetric 33 | from .base import Precision, PRECISION_RETRIEVED, PRECISION_RELEVANT_RETRIEVED 34 | from .base import Recall, RECALL_RELEVANT, RECALL_RELEVANT_RETRIEVED 35 | from .matcher import LabelMatcher, \ 36 | MATCH_TOTAL, MATCH_CORRECT, MATCH_CONFUSION, \ 37 | MATCH_MISSED_DETECTION, MATCH_FALSE_ALARM 38 | from .types import MetricComponents, Details 39 | from .utils import UEMSupportMixin 40 | 41 | # TODO: can't we put these as class attributes? 42 | IER_TOTAL = MATCH_TOTAL 43 | IER_CORRECT = MATCH_CORRECT 44 | IER_CONFUSION = MATCH_CONFUSION 45 | IER_FALSE_ALARM = MATCH_FALSE_ALARM 46 | IER_MISS = MATCH_MISSED_DETECTION 47 | IER_NAME = 'identification error rate' 48 | 49 | 50 | class IdentificationErrorRate(UEMSupportMixin, BaseMetric): 51 | """Identification error rate 52 | 53 | ``ier = (wc x confusion + wf x false_alarm + wm x miss) / total`` 54 | 55 | where 56 | - `confusion` is the total confusion duration in seconds 57 | - `false_alarm` is the total hypothesis duration where there are 58 | - `miss` is 59 | - `total` is the total duration of all tracks 60 | - wc, wf and wm are optional weights (default to 1) 61 | 62 | Parameters 63 | ---------- 64 | collar : float, optional 65 | Duration (in seconds) of collars removed from evaluation around 66 | boundaries of reference segments. 67 | skip_overlap : bool, optional 68 | Set to True to not evaluate overlap regions. 69 | Defaults to False (i.e. keep overlap regions). 70 | confusion, miss, false_alarm: float, optional 71 | Optional weights for confusion, miss and false alarm respectively. 72 | Default to 1. (no weight) 73 | """ 74 | 75 | @classmethod 76 | def metric_name(cls) -> str: 77 | return IER_NAME 78 | 79 | @classmethod 80 | def metric_components(cls) -> MetricComponents: 81 | return [ 82 | IER_TOTAL, 83 | IER_CORRECT, 84 | IER_FALSE_ALARM, IER_MISS, 85 | IER_CONFUSION] 86 | 87 | def __init__(self, 88 | confusion: float = 1., 89 | miss: float = 1., 90 | false_alarm: float = 1., 91 | collar: float = 0., 92 | skip_overlap: bool = False, 93 | **kwargs): 94 | 95 | super().__init__(**kwargs) 96 | self.matcher_ = LabelMatcher() 97 | self.confusion = confusion 98 | self.miss = miss 99 | self.false_alarm = false_alarm 100 | self.collar = collar 101 | self.skip_overlap = skip_overlap 102 | 103 | def compute_components(self, 104 | reference: Annotation, 105 | hypothesis: Annotation, 106 | uem: Optional[Timeline] = None, 107 | collar: Optional[float] = None, 108 | skip_overlap: Optional[float] = None, 109 | **kwargs) -> Details: 110 | """ 111 | 112 | Parameters 113 | ---------- 114 | collar : float, optional 115 | Override self.collar 116 | skip_overlap : bool, optional 117 | Override self.skip_overlap 118 | 119 | See also 120 | -------- 121 | :class:`pyannote.metric.diarization.DiarizationErrorRate` uses these 122 | two options in its `compute_components` method. 123 | 124 | """ 125 | 126 | detail = self.init_components() 127 | 128 | if collar is None: 129 | collar = self.collar 130 | if skip_overlap is None: 131 | skip_overlap = self.skip_overlap 132 | 133 | R, H, common_timeline = self.uemify( 134 | reference, hypothesis, uem=uem, 135 | collar=collar, skip_overlap=skip_overlap, 136 | returns_timeline=True) 137 | 138 | # loop on all segments 139 | for segment in common_timeline: 140 | # segment duration 141 | duration = segment.duration 142 | 143 | # list of IDs in reference segment 144 | r = R.get_labels(segment, unique=False) 145 | 146 | # list of IDs in hypothesis segment 147 | h = H.get_labels(segment, unique=False) 148 | 149 | counts, _ = self.matcher_(r, h) 150 | 151 | detail[IER_TOTAL] += duration * counts[IER_TOTAL] 152 | detail[IER_CORRECT] += duration * counts[IER_CORRECT] 153 | detail[IER_CONFUSION] += duration * counts[IER_CONFUSION] 154 | detail[IER_MISS] += duration * counts[IER_MISS] 155 | detail[IER_FALSE_ALARM] += duration * counts[IER_FALSE_ALARM] 156 | 157 | return detail 158 | 159 | def compute_metric(self, detail: Details) -> float: 160 | 161 | numerator = 1. * ( 162 | self.confusion * detail[IER_CONFUSION] + 163 | self.false_alarm * detail[IER_FALSE_ALARM] + 164 | self.miss * detail[IER_MISS] 165 | ) 166 | denominator = 1. * detail[IER_TOTAL] 167 | if denominator == 0.: 168 | if numerator == 0: 169 | return 0. 170 | else: 171 | return 1. 172 | else: 173 | return numerator / denominator 174 | 175 | 176 | class IdentificationPrecision(UEMSupportMixin, Precision): 177 | """Identification Precision 178 | 179 | Parameters 180 | ---------- 181 | collar : float, optional 182 | Duration (in seconds) of collars removed from evaluation around 183 | boundaries of reference segments. 184 | skip_overlap : bool, optional 185 | Set to True to not evaluate overlap regions. 186 | Defaults to False (i.e. keep overlap regions). 187 | """ 188 | 189 | def __init__(self, collar: float = 0., skip_overlap: bool = False, **kwargs): 190 | super().__init__(**kwargs) 191 | self.collar = collar 192 | self.skip_overlap = skip_overlap 193 | self.matcher_ = LabelMatcher() 194 | 195 | def compute_components(self, 196 | reference: Annotation, 197 | hypothesis: Annotation, 198 | uem: Optional[Timeline] = None, 199 | **kwargs) -> Details: 200 | detail = self.init_components() 201 | 202 | R, H, common_timeline = self.uemify( 203 | reference, hypothesis, uem=uem, 204 | collar=self.collar, skip_overlap=self.skip_overlap, 205 | returns_timeline=True) 206 | 207 | # loop on all segments 208 | for segment in common_timeline: 209 | # segment duration 210 | duration = segment.duration 211 | 212 | # list of IDs in reference segment 213 | r = R.get_labels(segment, unique=False) 214 | 215 | # list of IDs in hypothesis segment 216 | h = H.get_labels(segment, unique=False) 217 | 218 | counts, _ = self.matcher_(r, h) 219 | 220 | detail[PRECISION_RETRIEVED] += duration * len(h) 221 | detail[PRECISION_RELEVANT_RETRIEVED] += \ 222 | duration * counts[IER_CORRECT] 223 | 224 | return detail 225 | 226 | 227 | class IdentificationRecall(UEMSupportMixin, Recall): 228 | """Identification Recall 229 | 230 | Parameters 231 | ---------- 232 | collar : float, optional 233 | Duration (in seconds) of collars removed from evaluation around 234 | boundaries of reference segments. 235 | skip_overlap : bool, optional 236 | Set to True to not evaluate overlap regions. 237 | Defaults to False (i.e. keep overlap regions). 238 | """ 239 | 240 | def __init__(self, collar: float = 0., skip_overlap: bool = False, **kwargs): 241 | super().__init__(**kwargs) 242 | self.collar = collar 243 | self.skip_overlap = skip_overlap 244 | self.matcher_ = LabelMatcher() 245 | 246 | def compute_components(self, 247 | reference: Annotation, 248 | hypothesis: Annotation, 249 | uem: Optional[Timeline] = None, 250 | **kwargs) -> Details: 251 | detail = self.init_components() 252 | 253 | R, H, common_timeline = self.uemify( 254 | reference, hypothesis, uem=uem, 255 | collar=self.collar, skip_overlap=self.skip_overlap, 256 | returns_timeline=True) 257 | 258 | # loop on all segments 259 | for segment in common_timeline: 260 | # segment duration 261 | duration = segment.duration 262 | 263 | # list of IDs in reference segment 264 | r = R.get_labels(segment, unique=False) 265 | 266 | # list of IDs in hypothesis segment 267 | h = H.get_labels(segment, unique=False) 268 | 269 | counts, _ = self.matcher_(r, h) 270 | 271 | detail[RECALL_RELEVANT] += duration * counts[IER_TOTAL] 272 | detail[RECALL_RELEVANT_RETRIEVED] += duration * counts[IER_CORRECT] 273 | 274 | return detail 275 | -------------------------------------------------------------------------------- /src/pyannote/metrics/matcher.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2012-2019 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | from typing import Dict, Tuple, Iterable, List, TYPE_CHECKING 29 | 30 | import numpy as np 31 | from pyannote.core import Annotation 32 | from scipy.optimize import linear_sum_assignment 33 | 34 | if TYPE_CHECKING: 35 | from pyannote.core.utils.types import Label 36 | 37 | MATCH_CORRECT = 'correct' 38 | MATCH_CONFUSION = 'confusion' 39 | MATCH_MISSED_DETECTION = 'missed detection' 40 | MATCH_FALSE_ALARM = 'false alarm' 41 | MATCH_TOTAL = 'total' 42 | 43 | 44 | class LabelMatcher: 45 | """ 46 | ID matcher base class mixin. 47 | 48 | All ID matcher classes must inherit from this class and implement 49 | .match() -- ie return True if two IDs match and False 50 | otherwise. 51 | """ 52 | 53 | def match(self, rlabel: 'Label', hlabel: 'Label') -> bool: 54 | """ 55 | Parameters 56 | ---------- 57 | rlabel : 58 | Reference label 59 | hlabel : 60 | Hypothesis label 61 | 62 | Returns 63 | ------- 64 | match : bool 65 | True if labels match, False otherwise. 66 | 67 | """ 68 | # Two IDs match if they are equal to each other 69 | return rlabel == hlabel 70 | 71 | def __call__(self, rlabels: Iterable['Label'], hlabels: Iterable['Label']) \ 72 | -> Tuple[Dict[str, int], 73 | Dict[str, List['Label']]]: 74 | """ 75 | 76 | Parameters 77 | ---------- 78 | rlabels, hlabels : iterable 79 | Reference and hypothesis labels 80 | 81 | Returns 82 | ------- 83 | counts : dict 84 | details : dict 85 | 86 | """ 87 | 88 | # counts and details 89 | counts = { 90 | MATCH_CORRECT: 0, 91 | MATCH_CONFUSION: 0, 92 | MATCH_MISSED_DETECTION: 0, 93 | MATCH_FALSE_ALARM: 0, 94 | MATCH_TOTAL: 0 95 | } 96 | 97 | details = { 98 | MATCH_CORRECT: [], 99 | MATCH_CONFUSION: [], 100 | MATCH_MISSED_DETECTION: [], 101 | MATCH_FALSE_ALARM: [] 102 | } 103 | # this is to make sure rlabels and hlabels are lists 104 | # as we will access them later by index 105 | rlabels = list(rlabels) 106 | hlabels = list(hlabels) 107 | 108 | NR = len(rlabels) 109 | NH = len(hlabels) 110 | N = max(NR, NH) 111 | 112 | # corner case 113 | if N == 0: 114 | return counts, details 115 | 116 | # initialize match matrix 117 | # with True if labels match and False otherwise 118 | match = np.zeros((N, N), dtype=bool) 119 | for r, rlabel in enumerate(rlabels): 120 | for h, hlabel in enumerate(hlabels): 121 | match[r, h] = self.match(rlabel, hlabel) 122 | 123 | # find one-to-one mapping that maximize total number of matches 124 | # using the Hungarian algorithm and computes error accordingly 125 | for r, h in zip(*linear_sum_assignment(~match)): 126 | 127 | # hypothesis label is matched with unexisting reference label 128 | # ==> this is a false alarm 129 | if r >= NR: 130 | counts[MATCH_FALSE_ALARM] += 1 131 | details[MATCH_FALSE_ALARM].append(hlabels[h]) 132 | 133 | # reference label is matched with unexisting hypothesis label 134 | # ==> this is a missed detection 135 | elif h >= NH: 136 | counts[MATCH_MISSED_DETECTION] += 1 137 | details[MATCH_MISSED_DETECTION].append(rlabels[r]) 138 | 139 | # reference and hypothesis labels match 140 | # ==> this is a correct detection 141 | elif match[r, h]: 142 | counts[MATCH_CORRECT] += 1 143 | details[MATCH_CORRECT].append((rlabels[r], hlabels[h])) 144 | 145 | # reference and hypothesis do not match 146 | # ==> this is a confusion 147 | else: 148 | counts[MATCH_CONFUSION] += 1 149 | details[MATCH_CONFUSION].append((rlabels[r], hlabels[h])) 150 | 151 | counts[MATCH_TOTAL] += NR 152 | 153 | # returns counts and details 154 | return counts, details 155 | 156 | 157 | class HungarianMapper: 158 | 159 | def __call__(self, A: Annotation, B: Annotation) -> Dict['Label', 'Label']: 160 | mapping = {} 161 | 162 | cooccurrence = A * B 163 | a_labels, b_labels = A.labels(), B.labels() 164 | 165 | for a, b in zip(*linear_sum_assignment(-cooccurrence)): 166 | if cooccurrence[a, b] > 0: 167 | mapping[a_labels[a]] = b_labels[b] 168 | 169 | return mapping 170 | 171 | 172 | class GreedyMapper: 173 | 174 | def __call__(self, A: Annotation, B: Annotation) -> Dict['Label', 'Label']: 175 | mapping = {} 176 | 177 | cooccurrence = A * B 178 | Na, Nb = cooccurrence.shape 179 | a_labels, b_labels = A.labels(), B.labels() 180 | 181 | for i in range(min(Na, Nb)): 182 | a, b = np.unravel_index(np.argmax(cooccurrence), (Na, Nb)) 183 | 184 | if cooccurrence[a, b] > 0: 185 | mapping[a_labels[a]] = b_labels[b] 186 | cooccurrence[a, :] = 0. 187 | cooccurrence[:, b] = 0. 188 | continue 189 | 190 | break 191 | 192 | return mapping 193 | -------------------------------------------------------------------------------- /src/pyannote/metrics/plot/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2016 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | -------------------------------------------------------------------------------- /src/pyannote/metrics/plot/binary_classification.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2016 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | 30 | import warnings 31 | from typing import Optional, Tuple 32 | 33 | import matplotlib 34 | import numpy as np 35 | from numpy.typing import ArrayLike 36 | 37 | from pyannote.metrics.binary_classification import det_curve 38 | from pyannote.metrics.binary_classification import precision_recall_curve 39 | 40 | with warnings.catch_warnings(): 41 | warnings.simplefilter("ignore") 42 | matplotlib.use("Agg") 43 | import matplotlib.pyplot as plt 44 | 45 | 46 | def plot_distributions( 47 | y_true: ArrayLike, 48 | scores: ArrayLike, 49 | save_to: str, 50 | xlim: Optional[Tuple[float, float]] = None, 51 | nbins: int = 100, 52 | ymax: float = 3.0, 53 | dpi: int = 150, 54 | ) -> bool: 55 | """Scores distributions 56 | 57 | This function will create (and overwrite) the following files: 58 | - {save_to}.scores.png 59 | - {save_to}.scores.eps 60 | 61 | Parameters 62 | ---------- 63 | y_true : (n_samples, ) array-like 64 | Boolean reference. 65 | scores : (n_samples, ) array-like 66 | Predicted score. 67 | save_to : str 68 | Files path prefix 69 | """ 70 | 71 | plt.figure(figsize=(12, 12)) 72 | 73 | if xlim is None: 74 | xlim = (np.min(scores), np.max(scores)) 75 | 76 | bins = np.linspace(xlim[0], xlim[1], nbins) 77 | plt.hist(scores[y_true], bins=bins, color="g", alpha=0.5, normed=True) 78 | plt.hist(scores[~y_true], bins=bins, color="r", alpha=0.5, normed=True) 79 | 80 | # TODO heuristic to estimate ymax from nbins and xlim 81 | plt.ylim(0, ymax) 82 | plt.tight_layout() 83 | plt.savefig(save_to + ".scores.png", dpi=dpi) 84 | plt.savefig(save_to + ".scores.eps") 85 | plt.close() 86 | 87 | return True 88 | 89 | 90 | def plot_det_curve( 91 | y_true: ArrayLike, 92 | scores: ArrayLike, 93 | save_to: str, 94 | distances: bool = False, 95 | dpi: int = 150, 96 | ) -> float: 97 | """DET curve 98 | 99 | This function will create (and overwrite) the following files: 100 | - {save_to}.det.png 101 | - {save_to}.det.eps 102 | - {save_to}.det.txt 103 | 104 | Parameters 105 | ---------- 106 | y_true : (n_samples, ) array-like 107 | Boolean reference. 108 | scores : (n_samples, ) array-like 109 | Predicted score. 110 | save_to : str 111 | Files path prefix. 112 | distances : boolean, optional 113 | When True, indicate that `scores` are actually `distances` 114 | dpi : int, optional 115 | Resolution of .png file. Defaults to 150. 116 | 117 | Returns 118 | ------- 119 | eer : float 120 | Equal error rate 121 | """ 122 | 123 | fpr, fnr, thresholds, eer = det_curve(y_true, scores, distances=distances) 124 | 125 | # plot DET curve 126 | plt.figure(figsize=(12, 12)) 127 | plt.loglog(fpr, fnr, "b") 128 | plt.loglog([eer], [eer], "bo") 129 | plt.xlabel("False Positive Rate") 130 | plt.ylabel("False Negative Rate") 131 | plt.xlim(1e-2, 1.0) 132 | plt.ylim(1e-2, 1.0) 133 | plt.grid(True) 134 | plt.tight_layout() 135 | plt.savefig(save_to + ".det.png", dpi=dpi) 136 | plt.savefig(save_to + ".det.eps") 137 | plt.close() 138 | 139 | # save DET curve in text file 140 | txt = save_to + ".det.txt" 141 | line = "{t:.6f} {fp:.6f} {fn:.6f}\n" 142 | with open(txt, "w") as f: 143 | for i, (t, fp, fn) in enumerate(zip(thresholds, fpr, fnr)): 144 | f.write(line.format(t=t, fp=fp, fn=fn)) 145 | 146 | return eer 147 | 148 | 149 | def plot_precision_recall_curve( 150 | y_true: ArrayLike, 151 | scores: ArrayLike, 152 | save_to: str, 153 | distances: bool = False, 154 | dpi: int = 150, 155 | ) -> float: 156 | """Precision/recall curve 157 | 158 | This function will create (and overwrite) the following files: 159 | - {save_to}.precision_recall.png 160 | - {save_to}.precision_recall.eps 161 | - {save_to}.precision_recall.txt 162 | 163 | Parameters 164 | ---------- 165 | y_true : (n_samples, ) array-like 166 | Boolean reference. 167 | scores : (n_samples, ) array-like 168 | Predicted score. 169 | save_to : str 170 | Files path prefix. 171 | distances : boolean, optional 172 | When True, indicate that `scores` are actually `distances` 173 | dpi : int, optional 174 | Resolution of .png file. Defaults to 150. 175 | 176 | Returns 177 | ------- 178 | auc : float 179 | Area under precision/recall curve 180 | """ 181 | 182 | precision, recall, thresholds, auc = precision_recall_curve( 183 | y_true, scores, distances=distances 184 | ) 185 | 186 | # plot P/R curve 187 | plt.figure(figsize=(12, 12)) 188 | plt.plot(recall, precision, "b") 189 | plt.xlabel("Recall") 190 | plt.ylabel("Precision") 191 | plt.xlim(0, 1) 192 | plt.ylim(0, 1) 193 | plt.tight_layout() 194 | plt.savefig(save_to + ".precision_recall.png", dpi=dpi) 195 | plt.savefig(save_to + ".precision_recall.eps") 196 | plt.close() 197 | 198 | # save P/R curve in text file 199 | txt = save_to + ".precision_recall.txt" 200 | line = "{t:.6f} {p:.6f} {r:.6f}\n" 201 | with open(txt, "w") as f: 202 | for i, (t, p, r) in enumerate(zip(thresholds, precision, recall)): 203 | f.write(line.format(t=t, p=p, r=r)) 204 | 205 | return auc 206 | -------------------------------------------------------------------------------- /src/pyannote/metrics/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyannote/pyannote-metrics/584d177a4862a3ab3f89e9ca6639495f7293065e/src/pyannote/metrics/py.typed -------------------------------------------------------------------------------- /src/pyannote/metrics/segmentation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2012-2019 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | # Camille Guinaudeau - https://sites.google.com/site/cguinaudeau/ 29 | # Mamadou Doumbia 30 | # Diego Fustes diego.fustes at toptal.com 31 | from typing import Tuple, Union, Optional 32 | 33 | import numpy as np 34 | from pyannote.core import Segment, Timeline, Annotation 35 | from pyannote.core.utils.generators import pairwise 36 | 37 | from .base import BaseMetric, f_measure 38 | from .types import MetricComponents, Details 39 | from .utils import UEMSupportMixin 40 | 41 | #  TODO: can't we put these as class attributes? 42 | PURITY_NAME = 'segmentation purity' 43 | COVERAGE_NAME = 'segmentation coverage' 44 | PURITY_COVERAGE_NAME = 'segmentation F[purity|coverage]' 45 | PTY_CVG_TOTAL = 'total duration' 46 | PTY_CVG_INTER = 'intersection duration' 47 | 48 | PTY_TOTAL = 'pty total duration' 49 | PTY_INTER = 'pty intersection duration' 50 | CVG_TOTAL = 'cvg total duration' 51 | CVG_INTER = 'cvg intersection duration' 52 | 53 | PRECISION_NAME = 'segmentation precision' 54 | RECALL_NAME = 'segmentation recall' 55 | 56 | PR_BOUNDARIES = 'number of boundaries' 57 | PR_MATCHES = 'number of matches' 58 | 59 | 60 | class SegmentationCoverage(BaseMetric): 61 | """Segmentation coverage 62 | 63 | Parameters 64 | ---------- 65 | tolerance : float, optional 66 | When provided, preprocess reference by filling intra-label gaps shorter 67 | than `tolerance` (in seconds). 68 | 69 | """ 70 | 71 | def __init__(self, tolerance: float = 0.500, **kwargs): 72 | super().__init__(**kwargs) 73 | self.tolerance = tolerance 74 | 75 | def _partition(self, 76 | timeline: Timeline, 77 | coverage: Timeline) -> Annotation: 78 | 79 | # boundaries (as set of timestamps) 80 | boundaries = set([]) 81 | for segment in timeline: 82 | boundaries.add(segment.start) 83 | boundaries.add(segment.end) 84 | 85 | # partition (as timeline) 86 | partition = Annotation() 87 | for start, end in pairwise(sorted(boundaries)): 88 | segment = Segment(start, end) 89 | partition[segment] = '_' 90 | 91 | return partition.crop(coverage, mode='intersection').relabel_tracks() 92 | 93 | def _preprocess(self, reference: Annotation, 94 | hypothesis: Union[Annotation, Timeline]) \ 95 | -> Tuple[Annotation, Annotation]: 96 | 97 | if not isinstance(reference, Annotation): 98 | raise TypeError('reference must be an instance of `Annotation`') 99 | 100 | if isinstance(hypothesis, Annotation): 101 | hypothesis: Timeline = hypothesis.get_timeline() 102 | 103 | # reference where short intra-label gaps are removed 104 | filled = Timeline() 105 | for label in reference.labels(): 106 | label_timeline = reference.label_timeline(label) 107 | for gap in label_timeline.gaps(): 108 | if gap.duration < self.tolerance: 109 | label_timeline.add(gap) 110 | 111 | for segment in label_timeline.support(): 112 | filled.add(segment) 113 | 114 | # reference coverage after filling gaps 115 | coverage = filled.support() 116 | 117 | reference_partition = self._partition(filled, coverage) 118 | hypothesis_partition = self._partition(hypothesis, coverage) 119 | 120 | return reference_partition, hypothesis_partition 121 | 122 | def _process(self, reference: Annotation, hypothesis: Annotation) -> Details: 123 | 124 | detail = self.init_components() 125 | 126 | # cooccurrence matrix 127 | K = reference * hypothesis 128 | detail[PTY_CVG_TOTAL] = np.sum(K).item() 129 | detail[PTY_CVG_INTER] = np.sum(np.max(K, axis=1)).item() 130 | 131 | return detail 132 | 133 | @classmethod 134 | def metric_name(cls): 135 | return COVERAGE_NAME 136 | 137 | @classmethod 138 | def metric_components(cls) -> MetricComponents: 139 | return [PTY_CVG_TOTAL, PTY_CVG_INTER] 140 | 141 | def compute_components(self, reference: Annotation, 142 | hypothesis: Union[Annotation, Timeline], **kwargs): 143 | reference, hypothesis = self._preprocess(reference, hypothesis) 144 | return self._process(reference, hypothesis) 145 | 146 | def compute_metric(self, detail: Details) -> float: 147 | return detail[PTY_CVG_INTER] / detail[PTY_CVG_TOTAL] 148 | 149 | 150 | class SegmentationPurity(SegmentationCoverage): 151 | """Segmentation purity 152 | 153 | Parameters 154 | ---------- 155 | tolerance : float, optional 156 | When provided, preprocess reference by filling intra-label gaps shorter 157 | than `tolerance` (in seconds). 158 | 159 | """ 160 | 161 | @classmethod 162 | def metric_name(cls) -> str: 163 | return PURITY_NAME 164 | 165 | # TODO : Use type from parent class 166 | def compute_components(self, reference: Annotation, 167 | hypothesis: Union[Annotation, Timeline], 168 | **kwargs) -> Details: 169 | reference, hypothesis = self._preprocess(reference, hypothesis) 170 | return self._process(hypothesis, reference) 171 | 172 | 173 | class SegmentationPurityCoverageFMeasure(SegmentationCoverage): 174 | """ 175 | Compute segmentation purity and coverage, and return their F-score. 176 | 177 | 178 | Parameters 179 | ---------- 180 | tolerance : float, optional 181 | When provided, preprocess reference by filling intra-label gaps shorter 182 | than `tolerance` (in seconds). 183 | 184 | beta : float, optional 185 | When beta > 1, greater importance is given to coverage. 186 | When beta < 1, greater importance is given to purity. 187 | Defaults to 1. 188 | 189 | See also 190 | -------- 191 | pyannote.metrics.segmentation.SegmentationPurity 192 | pyannote.metrics.segmentation.SegmentationCoverage 193 | pyannote.metrics.base.f_measure 194 | """ 195 | 196 | def __init__(self, tolerance=0.500, beta=1, **kwargs): 197 | super(SegmentationPurityCoverageFMeasure, self).__init__(tolerance=tolerance, **kwargs) 198 | self.beta = beta 199 | 200 | def _process(self, reference: Annotation, 201 | hypothesis: Union[Annotation, Timeline]) -> Details: 202 | reference, hypothesis = self._preprocess(reference, hypothesis) 203 | 204 | detail = self.init_components() 205 | 206 | # cooccurrence matrix coverage 207 | K = reference * hypothesis 208 | detail[CVG_TOTAL] = np.sum(K).item() 209 | detail[CVG_INTER] = np.sum(np.max(K, axis=1)).item() 210 | 211 | # cooccurrence matrix purity 212 | detail[PTY_TOTAL] = detail[CVG_TOTAL] 213 | detail[PTY_INTER] = np.sum(np.max(K, axis=0)).item() 214 | 215 | return detail 216 | 217 | def compute_components(self, reference: Annotation, 218 | hypothesis: Union[Annotation, Timeline], 219 | **kwargs) -> Details: 220 | return self._process(reference, hypothesis) 221 | 222 | def compute_metric(self, detail: Details) -> float: 223 | _, _, value = self.compute_metrics(detail=detail) 224 | return value 225 | 226 | def compute_metrics(self, detail: Optional[Details] = None) \ 227 | -> Tuple[float, float, float]: 228 | detail = self.accumulated_ if detail is None else detail 229 | 230 | purity = \ 231 | 1. if detail[PTY_TOTAL] == 0. \ 232 | else detail[PTY_INTER] / detail[PTY_TOTAL] 233 | 234 | coverage = \ 235 | 1. if detail[CVG_TOTAL] == 0. \ 236 | else detail[CVG_INTER] / detail[CVG_TOTAL] 237 | 238 | return purity, coverage, f_measure(purity, coverage, beta=self.beta) 239 | 240 | @classmethod 241 | def metric_name(cls) -> str: 242 | return PURITY_COVERAGE_NAME 243 | 244 | @classmethod 245 | def metric_components(cls) -> MetricComponents: 246 | return [PTY_TOTAL, PTY_INTER, CVG_TOTAL, CVG_INTER] 247 | 248 | 249 | class SegmentationPrecision(UEMSupportMixin, BaseMetric): 250 | """Segmentation precision 251 | 252 | >>> from pyannote.core import Timeline, Segment 253 | >>> from pyannote.metrics.segmentation import SegmentationPrecision 254 | >>> precision = SegmentationPrecision() 255 | 256 | >>> reference = Timeline() 257 | >>> reference.add(Segment(0, 1)) 258 | >>> reference.add(Segment(1, 2)) 259 | >>> reference.add(Segment(2, 4)) 260 | 261 | >>> hypothesis = Timeline() 262 | >>> hypothesis.add(Segment(0, 1)) 263 | >>> hypothesis.add(Segment(1, 2)) 264 | >>> hypothesis.add(Segment(2, 3)) 265 | >>> hypothesis.add(Segment(3, 4)) 266 | >>> precision(reference, hypothesis) 267 | 0.6666666666666666 268 | 269 | >>> hypothesis = Timeline() 270 | >>> hypothesis.add(Segment(0, 4)) 271 | >>> precision(reference, hypothesis) 272 | 1.0 273 | 274 | """ 275 | 276 | @classmethod 277 | def metric_name(cls): 278 | return PRECISION_NAME 279 | 280 | @classmethod 281 | def metric_components(cls): 282 | return [PR_MATCHES, PR_BOUNDARIES] 283 | 284 | def __init__(self, tolerance=0., **kwargs): 285 | 286 | super().__init__(**kwargs) 287 | self.tolerance = tolerance 288 | 289 | def compute_components(self, 290 | reference: Union[Annotation, Timeline], 291 | hypothesis: Union[Annotation, Timeline], 292 | **kwargs) -> Details: 293 | 294 | # extract timeline if needed 295 | if isinstance(reference, Annotation): 296 | reference = reference.get_timeline() 297 | if isinstance(hypothesis, Annotation): 298 | hypothesis = hypothesis.get_timeline() 299 | 300 | detail = self.init_components() 301 | 302 | # number of matches so far... 303 | n_matches = 0. # make sure it is a float (for later ratio) 304 | 305 | # number of boundaries in reference and hypothesis 306 | N = len(reference) - 1 307 | M = len(hypothesis) - 1 308 | 309 | # number of boundaries in hypothesis 310 | detail[PR_BOUNDARIES] = M 311 | 312 | # corner case (no boundary in hypothesis or in reference) 313 | if M == 0 or N == 0: 314 | detail[PR_MATCHES] = 0. 315 | return detail 316 | 317 | # reference and hypothesis boundaries 318 | ref_boundaries = [segment.end for segment in reference][:-1] 319 | hyp_boundaries = [segment.end for segment in hypothesis][:-1] 320 | 321 | # temporal delta between all pairs of boundaries 322 | delta = np.zeros((N, M)) 323 | for r, refBoundary in enumerate(ref_boundaries): 324 | for h, hypBoundary in enumerate(hyp_boundaries): 325 | delta[r, h] = abs(refBoundary - hypBoundary) 326 | 327 | # make sure boundaries too far apart from each other cannot be matched 328 | # (this is what np.inf is used for) 329 | delta[np.where(delta > self.tolerance)] = np.inf 330 | 331 | # h always contains the minimum value in delta matrix 332 | # h == np.inf means that no boundary can be matched 333 | h = np.amin(delta) 334 | 335 | # while there are still boundaries to match 336 | while h < np.inf: 337 | # increment match count 338 | n_matches += 1 339 | 340 | # find boundaries to match 341 | k = np.argmin(delta) 342 | i = k // M 343 | j = k % M 344 | 345 | # make sure they cannot be matched again 346 | delta[i, :] = np.inf 347 | delta[:, j] = np.inf 348 | 349 | # update minimum value in delta 350 | h = np.amin(delta) 351 | 352 | detail[PR_MATCHES] = n_matches 353 | return detail 354 | 355 | def compute_metric(self, detail: Details) -> float: 356 | 357 | numerator = detail[PR_MATCHES] 358 | denominator = detail[PR_BOUNDARIES] 359 | 360 | if denominator == 0.: 361 | if numerator == 0: 362 | return 1. 363 | else: 364 | raise ValueError('') 365 | else: 366 | return numerator / denominator 367 | 368 | 369 | class SegmentationRecall(SegmentationPrecision): 370 | """Segmentation recall 371 | 372 | >>> from pyannote.core import Timeline, Segment 373 | >>> from pyannote.metrics.segmentation import SegmentationRecall 374 | >>> recall = SegmentationRecall() 375 | 376 | >>> reference = Timeline() 377 | >>> reference.add(Segment(0, 1)) 378 | >>> reference.add(Segment(1, 2)) 379 | >>> reference.add(Segment(2, 4)) 380 | 381 | >>> hypothesis = Timeline() 382 | >>> hypothesis.add(Segment(0, 1)) 383 | >>> hypothesis.add(Segment(1, 2)) 384 | >>> hypothesis.add(Segment(2, 3)) 385 | >>> hypothesis.add(Segment(3, 4)) 386 | >>> recall(reference, hypothesis) 387 | 1.0 388 | 389 | >>> hypothesis = Timeline() 390 | >>> hypothesis.add(Segment(0, 4)) 391 | >>> recall(reference, hypothesis) 392 | 0.0 393 | 394 | """ 395 | 396 | @classmethod 397 | def metric_name(cls): 398 | return RECALL_NAME 399 | 400 | def compute_components(self, reference: Union[Annotation, Timeline], 401 | hypothesis: Union[Annotation, Timeline], 402 | **kwargs) -> Details: 403 | return super(SegmentationRecall, self).compute_components( 404 | hypothesis, reference) 405 | -------------------------------------------------------------------------------- /src/pyannote/metrics/spotting.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2017-2019 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | import sys 30 | from typing import Union, Iterable, Optional, Tuple, List, Dict 31 | 32 | import numpy as np 33 | from numpy.typing import ArrayLike 34 | from pyannote.core import Segment, Annotation, SlidingWindowFeature, Timeline 35 | 36 | from .base import BaseMetric 37 | from .binary_classification import det_curve 38 | from .types import MetricComponents, Details 39 | 40 | SPOTTING_TARGET = "target" 41 | SPOTTING_SPK_LATENCY = 'speaker_latency' 42 | SPOTTING_SPK_SCORE = 'spk_score' 43 | SPOTTING_ABS_LATENCY = 'absolute_latency' 44 | SPOTTING_ABS_SCORE = "abs_score" 45 | SPOTTING_SCORE = "score" 46 | 47 | 48 | class LowLatencySpeakerSpotting(BaseMetric): 49 | """Evaluation of low-latency speaker spotting (LLSS) systems 50 | 51 | LLSS systems can be evaluated in two ways: with fixed or variable latency. 52 | 53 | * When latency is fixed a priori (default), only scores reported by the 54 | system within the requested latency range are considered. Varying the 55 | detection threshold has no impact on the actual latency of the system. It 56 | only impacts the detection performance. 57 | 58 | * In variable latency mode, the whole stream of scores is considered. 59 | Varying the detection threshold will impact both the detection performance 60 | and the detection latency. Each trial will result in the alarm being 61 | triggered with a different latency. In case the alarm is not triggered at 62 | all (missed detection), the latency is arbitrarily set to the value one 63 | would obtain if it were triggered at the end of the last target speech 64 | turn. The reported latency is the average latency over all target trials. 65 | 66 | Parameters 67 | ---------- 68 | latencies : float iterable, optional 69 | Switch to fixed latency mode, using provided `latencies`. 70 | Defaults to [1, 5, 10, 30, 60] (in seconds). 71 | thresholds : float iterable, optional 72 | Switch to variable latency mode, using provided detection `thresholds`. 73 | Defaults to fixed latency mode. 74 | """ 75 | 76 | @classmethod 77 | def metric_name(cls) -> str: 78 | return "Low-latency speaker spotting" 79 | 80 | def metric_components(self) -> Dict[str, float]: 81 | return {'target': 0.} 82 | 83 | def __init__(self, 84 | thresholds: Optional[ArrayLike] = None, 85 | latencies: Optional[ArrayLike] = None): 86 | super().__init__() 87 | 88 | if thresholds is None and latencies is None: 89 | latencies = [1, 5, 10, 30, 60] 90 | 91 | if thresholds is not None and latencies is not None: 92 | raise ValueError( 93 | 'One must choose between fixed and variable latency.') 94 | 95 | if thresholds is not None: 96 | self.thresholds = np.sort(thresholds) 97 | 98 | if latencies is not None: 99 | latencies = np.sort(latencies) 100 | 101 | self.latencies = latencies 102 | 103 | def compute_metric(self, detail: MetricComponents): 104 | return None 105 | 106 | def _fixed_latency(self, reference: Timeline, 107 | timestamps: List[float], scores: List[float]) -> Details: 108 | 109 | if not reference: 110 | target_trial = False 111 | spk_score = np.max(scores) * np.ones((len(self.latencies), 1)) 112 | abs_score = spk_score 113 | 114 | else: 115 | target_trial = True 116 | 117 | # cumulative target speech duration after each speech turn 118 | total = np.cumsum([segment.duration for segment in reference]) 119 | 120 | # maximum score in timerange [0, t] 121 | # where t is when latency is reached 122 | spk_score = [] 123 | abs_score = [] 124 | 125 | # index of speech turn when given latency is reached 126 | for i, latency in zip(np.searchsorted(total, self.latencies), 127 | self.latencies): 128 | 129 | # maximum score in timerange [0, t] 130 | # where t is when latency is reached 131 | try: 132 | t = reference[i].end - (total[i] - latency) 133 | up_to = np.searchsorted(timestamps, t) 134 | if up_to < 1: 135 | s = -sys.float_info.max 136 | else: 137 | s = np.max(scores[:up_to]) 138 | except IndexError: 139 | s = np.max(scores) 140 | spk_score.append(s) 141 | 142 | # maximum score in timerange [0, t + latency] 143 | # where t is when target speaker starts speaking 144 | t = reference[0].start + latency 145 | 146 | up_to = np.searchsorted(timestamps, t) 147 | if up_to < 1: 148 | s = -sys.float_info.max 149 | else: 150 | s = np.max(scores[:up_to]) 151 | abs_score.append(s) 152 | 153 | spk_score = np.array(spk_score).reshape((-1, 1)) 154 | abs_score = np.array(abs_score).reshape((-1, 1)) 155 | 156 | return { 157 | SPOTTING_TARGET: target_trial, 158 | SPOTTING_SPK_LATENCY: self.latencies, 159 | SPOTTING_SCORE: spk_score, 160 | SPOTTING_ABS_LATENCY: self.latencies, 161 | SPOTTING_ABS_SCORE: abs_score, 162 | } 163 | 164 | def _variable_latency(self, reference: Union[Timeline, Annotation], 165 | timestamps: List[float], scores: List[float], 166 | **kwargs) -> Details: 167 | 168 | # pre-compute latencies 169 | speaker_latency = np.nan * np.ones((len(timestamps), 1)) 170 | absolute_latency = np.nan * np.ones((len(timestamps), 1)) 171 | if isinstance(reference, Annotation): 172 | reference = reference.get_timeline(copy=False) 173 | if reference: 174 | first_time = reference[0].start 175 | for i, t in enumerate(timestamps): 176 | so_far = Segment(first_time, t) 177 | speaker_latency[i] = reference.crop(so_far).duration() 178 | absolute_latency[i] = max(0, so_far.duration) 179 | # TODO | speed up latency pre-computation 180 | 181 | # for every threshold, compute when (if ever) alarm is triggered 182 | maxcum = (np.maximum.accumulate(scores)).reshape((-1, 1)) 183 | triggered = maxcum > self.thresholds 184 | indices = np.array([np.searchsorted(triggered[:, i], True) 185 | for i, _ in enumerate(self.thresholds)]) 186 | 187 | if reference: 188 | 189 | target_trial = True 190 | 191 | absolute_latency = np.take(absolute_latency, indices, mode='clip') 192 | speaker_latency = np.take(speaker_latency, indices, mode='clip') 193 | 194 | # is alarm triggered at all? 195 | positive = triggered[-1, :] 196 | 197 | # in case alarm is not triggered, set absolute latency to duration 198 | # between first and last speech turn of the target speaker... 199 | absolute_latency[~positive] = reference.extent().duration 200 | 201 | # ...and set speaker latency to target's total speech duration 202 | speaker_latency[~positive] = reference.duration() 203 | 204 | else: 205 | 206 | target_trial = False 207 | 208 | # the notion of "latency" is not applicable to non-target trials 209 | absolute_latency = np.nan 210 | speaker_latency = np.nan 211 | 212 | return { 213 | SPOTTING_TARGET: target_trial, 214 | SPOTTING_ABS_LATENCY: absolute_latency, 215 | SPOTTING_SPK_LATENCY: speaker_latency, 216 | SPOTTING_SCORE: np.max(scores) 217 | } 218 | 219 | def compute_components(self, reference: Union[Timeline, Annotation], 220 | hypothesis: Union[SlidingWindowFeature, 221 | Iterable[Tuple[float, float]]], 222 | **kwargs) -> Details: 223 | """ 224 | 225 | Parameters 226 | ---------- 227 | reference : Timeline or Annotation 228 | hypothesis : SlidingWindowFeature or (time, score) iterable 229 | """ 230 | 231 | if isinstance(hypothesis, SlidingWindowFeature): 232 | hypothesis = [(window.end, value) for window, value in hypothesis] 233 | timestamps, scores = zip(*hypothesis) 234 | 235 | if self.latencies is None: 236 | return self._variable_latency(reference, timestamps, scores) 237 | 238 | else: 239 | return self._fixed_latency(reference, timestamps, scores) 240 | 241 | @property 242 | def absolute_latency(self): 243 | latencies = [trial[SPOTTING_ABS_LATENCY] for _, trial in self 244 | if trial[SPOTTING_TARGET]] 245 | return np.nanmean(latencies, axis=0) 246 | 247 | @property 248 | def speaker_latency(self): 249 | latencies = [trial[SPOTTING_SPK_LATENCY] for _, trial in self 250 | if trial[SPOTTING_TARGET]] 251 | return np.nanmean(latencies, axis=0) 252 | 253 | # TODO : figure out return type 254 | def det_curve(self, 255 | cost_miss: float = 100, 256 | cost_fa: float = 1, 257 | prior_target: float = 0.01, 258 | return_latency: bool = False): 259 | """DET curve 260 | 261 | Parameters 262 | ---------- 263 | cost_miss : float, optional 264 | Cost of missed detections. Defaults to 100. 265 | cost_fa : float, optional 266 | Cost of false alarms. Defaults to 1. 267 | prior_target : float, optional 268 | Target trial prior. Defaults to 0.5. 269 | return_latency : bool, optional 270 | Set to True to return latency. 271 | Has no effect when latencies are given at initialization time. 272 | 273 | Returns 274 | ------- 275 | thresholds : numpy array 276 | Detection thresholds 277 | fpr : numpy array 278 | False alarm rate 279 | fnr : numpy array 280 | False rejection rate 281 | eer : float 282 | Equal error rate 283 | cdet : numpy array 284 | Cdet cost function 285 | speaker_latency : numpy array 286 | absolute_latency : numpy array 287 | Speaker and absolute latency when return_latency is set to True. 288 | """ 289 | 290 | if self.latencies is None: 291 | 292 | y_true = np.array([trial[SPOTTING_TARGET] for _, trial in self]) 293 | scores = np.array([trial[SPOTTING_SCORE] for _, trial in self]) 294 | fpr, fnr, thresholds, eer = det_curve(y_true, scores, distances=False) 295 | fpr, fnr, thresholds = fpr[::-1], fnr[::-1], thresholds[::-1] 296 | cdet = cost_miss * fnr * prior_target + \ 297 | cost_fa * fpr * (1. - prior_target) 298 | 299 | if return_latency: 300 | # needed to align the thresholds used in the DET curve 301 | # with (self.)thresholds used to compute latencies. 302 | indices = np.searchsorted(thresholds, self.thresholds, side='left') 303 | 304 | thresholds = np.take(thresholds, indices, mode='clip') 305 | fpr = np.take(fpr, indices, mode='clip') 306 | fnr = np.take(fnr, indices, mode='clip') 307 | cdet = np.take(cdet, indices, mode='clip') 308 | return thresholds, fpr, fnr, eer, cdet, \ 309 | self.speaker_latency, self.absolute_latency 310 | 311 | else: 312 | return thresholds, fpr, fnr, eer, cdet 313 | 314 | else: 315 | 316 | y_true = np.array([trial[SPOTTING_TARGET] for _, trial in self]) 317 | spk_scores = np.array([trial[SPOTTING_SPK_SCORE] for _, trial in self]) 318 | abs_scores = np.array([trial[SPOTTING_ABS_SCORE] for _, trial in self]) 319 | 320 | result = {} 321 | for key, scores in {'speaker': spk_scores, 322 | 'absolute': abs_scores}.items(): 323 | 324 | result[key] = {} 325 | 326 | for i, latency in enumerate(self.latencies): 327 | fpr, fnr, theta, eer = det_curve(y_true, scores[:, i], 328 | distances=False) 329 | fpr, fnr, theta = fpr[::-1], fnr[::-1], theta[::-1] 330 | cdet = cost_miss * fnr * prior_target + \ 331 | cost_fa * fpr * (1. - prior_target) 332 | result[key][latency] = theta, fpr, fnr, eer, cdet 333 | 334 | return result 335 | -------------------------------------------------------------------------------- /src/pyannote/metrics/types.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | 3 | from typing_extensions import Literal 4 | 5 | MetricComponent = str 6 | CalibrationMethod = Literal["isotonic", "sigmoid"] 7 | MetricComponents = List[MetricComponent] 8 | Details = Dict[MetricComponent, float] -------------------------------------------------------------------------------- /src/pyannote/metrics/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2012-2019 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | import warnings 30 | from typing import Optional, Tuple, Union 31 | 32 | from pyannote.core import Timeline, Segment, Annotation 33 | 34 | 35 | class UEMSupportMixin: 36 | """Provides 'uemify' method with optional (à la NIST) collar""" 37 | 38 | def extrude(self, 39 | uem: Timeline, 40 | reference: Annotation, 41 | collar: float = 0.0, 42 | skip_overlap: bool = False) -> Timeline: 43 | """Extrude reference boundary collars from uem 44 | 45 | reference |----| |--------------| |-------------| 46 | uem |---------------------| |-------------------------------| 47 | extruded |--| |--| |---| |-----| |-| |-----| |-----------| |-----| 48 | 49 | Parameters 50 | ---------- 51 | uem : Timeline 52 | Evaluation map. 53 | reference : Annotation 54 | Reference annotation. 55 | collar : float, optional 56 | When provided, set the duration of collars centered around 57 | reference segment boundaries that are extruded from both reference 58 | and hypothesis. Defaults to 0. (i.e. no collar). 59 | skip_overlap : bool, optional 60 | Set to True to not evaluate overlap regions. 61 | Defaults to False (i.e. keep overlap regions). 62 | 63 | Returns 64 | ------- 65 | extruded_uem : Timeline 66 | """ 67 | 68 | if collar == 0. and not skip_overlap: 69 | return uem 70 | 71 | collars, overlap_regions = [], [] 72 | 73 | # build list of collars if needed 74 | if collar > 0.: 75 | # iterate over all segments in reference 76 | for segment in reference.itersegments(): 77 | # add collar centered on start time 78 | t = segment.start 79 | collars.append(Segment(t - .5 * collar, t + .5 * collar)) 80 | 81 | # add collar centered on end time 82 | t = segment.end 83 | collars.append(Segment(t - .5 * collar, t + .5 * collar)) 84 | 85 | # build list of overlap regions if needed 86 | if skip_overlap: 87 | # iterate over pair of intersecting segments 88 | for (segment1, track1), (segment2, track2) in reference.co_iter(reference): 89 | if segment1 == segment2 and track1 == track2: 90 | continue 91 | # add their intersection 92 | overlap_regions.append(segment1 & segment2) 93 | 94 | segments = collars + overlap_regions 95 | 96 | return Timeline(segments=segments).support().gaps(support=uem) 97 | 98 | def common_timeline(self, reference: Annotation, hypothesis: Annotation) \ 99 | -> Timeline: 100 | """Return timeline common to both reference and hypothesis 101 | 102 | reference |--------| |------------| |---------| |----| 103 | hypothesis |--------------| |------| |----------------| 104 | timeline |--|-----|----|---|-|------| |-|---------|----| |----| 105 | 106 | Parameters 107 | ---------- 108 | reference : Annotation 109 | hypothesis : Annotation 110 | 111 | Returns 112 | ------- 113 | timeline : Timeline 114 | """ 115 | timeline = reference.get_timeline(copy=True) 116 | timeline.update(hypothesis.get_timeline(copy=False)) 117 | return timeline.segmentation() 118 | 119 | def project(self, annotation: Annotation, timeline: Timeline) -> Annotation: 120 | """Project annotation onto timeline segments 121 | 122 | reference |__A__| |__B__| 123 | |____C____| 124 | 125 | timeline |---|---|---| |---| 126 | 127 | projection |_A_|_A_|_C_| |_B_| 128 | |_C_| 129 | 130 | Parameters 131 | ---------- 132 | annotation : Annotation 133 | timeline : Timeline 134 | 135 | Returns 136 | ------- 137 | projection : Annotation 138 | """ 139 | projection = annotation.empty() 140 | timeline_ = annotation.get_timeline(copy=False) 141 | for segment_, segment in timeline_.co_iter(timeline): 142 | for track_ in annotation.get_tracks(segment_): 143 | track = projection.new_track(segment, candidate=track_) 144 | projection[segment, track] = annotation[segment_, track_] 145 | return projection 146 | 147 | def uemify(self, 148 | reference: Annotation, 149 | hypothesis: Annotation, 150 | uem: Optional[Timeline] = None, 151 | collar: float = 0., 152 | skip_overlap: bool = False, 153 | returns_uem: bool = False, 154 | returns_timeline: bool = False) \ 155 | -> Union[ 156 | Tuple[Annotation, Annotation], 157 | Tuple[Annotation, Annotation, Timeline], 158 | Tuple[Annotation, Annotation, Timeline, Timeline], 159 | ]: 160 | """Crop 'reference' and 'hypothesis' to 'uem' support 161 | 162 | Parameters 163 | ---------- 164 | reference, hypothesis : Annotation 165 | Reference and hypothesis annotations. 166 | uem : Timeline, optional 167 | Evaluation map. 168 | collar : float, optional 169 | When provided, set the duration of collars centered around 170 | reference segment boundaries that are extruded from both reference 171 | and hypothesis. Defaults to 0. (i.e. no collar). 172 | skip_overlap : bool, optional 173 | Set to True to not evaluate overlap regions. 174 | Defaults to False (i.e. keep overlap regions). 175 | returns_uem : bool, optional 176 | Set to True to return extruded uem as well. 177 | Defaults to False (i.e. only return reference and hypothesis) 178 | returns_timeline : bool, optional 179 | Set to True to oversegment reference and hypothesis so that they 180 | share the same internal timeline. 181 | 182 | Returns 183 | ------- 184 | reference, hypothesis : Annotation 185 | Extruded reference and hypothesis annotations 186 | uem : Timeline 187 | Extruded uem (returned only when 'returns_uem' is True) 188 | timeline : Timeline: 189 | Common timeline (returned only when 'returns_timeline' is True) 190 | """ 191 | 192 | # when uem is not provided, use the union of reference and hypothesis 193 | # extents -- and warn the user about that. 194 | if uem is None: 195 | r_extent = reference.get_timeline().extent() 196 | h_extent = hypothesis.get_timeline().extent() 197 | extent = r_extent | h_extent 198 | uem = Timeline(segments=[extent] if extent else [], 199 | uri=reference.uri) 200 | warnings.warn( 201 | "'uem' was approximated by the union of 'reference' " 202 | "and 'hypothesis' extents.") 203 | 204 | # extrude collars (and overlap regions) from uem 205 | uem = self.extrude(uem, reference, collar=collar, 206 | skip_overlap=skip_overlap) 207 | 208 | # extrude regions outside of uem 209 | reference = reference.crop(uem, mode='intersection') 210 | hypothesis = hypothesis.crop(uem, mode='intersection') 211 | 212 | # project reference and hypothesis on common timeline 213 | if returns_timeline: 214 | timeline = self.common_timeline(reference, hypothesis) 215 | reference = self.project(reference, timeline) 216 | hypothesis = self.project(hypothesis, timeline) 217 | 218 | result = (reference, hypothesis) 219 | if returns_uem: 220 | result += (uem,) 221 | 222 | if returns_timeline: 223 | result += (timeline,) 224 | 225 | return result 226 | -------------------------------------------------------------------------------- /tests/test_detection.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2020 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | 30 | import pytest 31 | 32 | import pyannote.core 33 | from pyannote.core import Annotation 34 | from pyannote.core import Segment 35 | from pyannote.core import Timeline 36 | from pyannote.metrics.detection import DetectionCostFunction 37 | from pyannote.metrics.detection import DetectionErrorRate 38 | from pyannote.metrics.detection import DetectionPrecision 39 | from pyannote.metrics.detection import DetectionRecall 40 | from pyannote.metrics.detection import DetectionAccuracy 41 | from pyannote.metrics.detection import DetectionPrecisionRecallFMeasure 42 | 43 | 44 | import numpy.testing as npt 45 | 46 | # Time 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 47 | # Reference |--------------| |-----------| |-----| |--------------| 48 | 49 | # Time 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 50 | # Hypothesis |-----------------|-----| |-----------------| |-----| 51 | # |--------| 52 | 53 | # Time 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 54 | # UEM |--------------------------------------| 55 | 56 | @pytest.fixture 57 | def reference(): 58 | reference = Annotation() 59 | reference[Segment(0, 5)] = 'A' 60 | reference[Segment(6, 10)] = 'B' 61 | reference[Segment(12, 14)] = 'A' 62 | reference[Segment(15, 20)] = 'C' 63 | return reference 64 | 65 | 66 | @pytest.fixture 67 | def hypothesis(): 68 | hypothesis = Annotation() 69 | hypothesis[Segment(1, 7)] = 'A' 70 | hypothesis[Segment(7, 9)] = 'D' 71 | hypothesis[Segment(7, 10)] = 'B' 72 | hypothesis[Segment(11, 17)] = 'C' 73 | hypothesis[Segment(18, 20)] = 'D' 74 | return hypothesis 75 | 76 | 77 | @pytest.fixture 78 | def uem(): 79 | return Timeline([Segment(1, 14)]) 80 | 81 | 82 | def test_error_rate(reference, hypothesis): 83 | detectionErrorRate = DetectionErrorRate() 84 | error_rate = detectionErrorRate(reference, hypothesis) 85 | npt.assert_almost_equal(error_rate, 0.3125, decimal=7) 86 | 87 | 88 | def test_detailed(reference, hypothesis): 89 | detectionErrorRate = DetectionErrorRate() 90 | details = detectionErrorRate(reference, hypothesis, detailed=True) 91 | 92 | rate = details['detection error rate'] 93 | npt.assert_almost_equal(rate, 0.3125, decimal=7) 94 | 95 | false_alarm = details['false alarm'] 96 | npt.assert_almost_equal(false_alarm, 3.0, decimal=7) 97 | 98 | missed_detection = details['miss'] 99 | npt.assert_almost_equal(missed_detection, 2.0, decimal=7) 100 | 101 | total = details['total'] 102 | npt.assert_almost_equal(total, 16.0, decimal=7) 103 | 104 | 105 | def test_accuracy(reference, hypothesis): 106 | # 15 correct / 20 total 107 | detectionAccuracy = DetectionAccuracy() 108 | accuracy = detectionAccuracy(reference, hypothesis) 109 | npt.assert_almost_equal(accuracy, 0.75, decimal=3) 110 | 111 | 112 | def test_precision(reference, hypothesis): 113 | # 14 true positive / 17 detected 114 | detectionPrecision = DetectionPrecision() 115 | precision = detectionPrecision(reference, hypothesis) 116 | npt.assert_almost_equal(precision, 0.8235, decimal=3) 117 | 118 | 119 | def test_recall(reference, hypothesis): 120 | # 14 true positive / 16 expected 121 | detectionRecall = DetectionRecall() 122 | recall = detectionRecall(reference, hypothesis) 123 | npt.assert_almost_equal(recall, 0.875, decimal=3) 124 | 125 | 126 | def test_fscore(reference, hypothesis): 127 | # expected 28/33 since it 128 | # is computed as : 129 | # 2*precision*recall / (precision+recall) 130 | detectionFMeasure = DetectionPrecisionRecallFMeasure() 131 | 132 | fscore = detectionFMeasure(reference, hypothesis) 133 | npt.assert_almost_equal(fscore, 0.848, decimal=3) 134 | 135 | 136 | def test_decision_cost_function(reference, hypothesis, uem): 137 | # No UEM. 138 | expected = 0.28125 139 | dcf = DetectionCostFunction(fa_weight=0.25, miss_weight=0.75) 140 | actual = dcf(reference, hypothesis) 141 | npt.assert_almost_equal(actual, expected, decimal=7) 142 | 143 | # UEM. 144 | expected = 1/6. 145 | dcf = DetectionCostFunction(fa_weight=0.25, miss_weight=0.75) 146 | actual = dcf(reference, hypothesis, uem=uem) 147 | npt.assert_almost_equal(actual, expected, decimal=7) 148 | -------------------------------------------------------------------------------- /tests/test_diarization.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import pyannote.core 4 | from pyannote.core import Annotation 5 | from pyannote.core import Segment 6 | from pyannote.core import Timeline 7 | from pyannote.metrics.diarization import DiarizationErrorRate 8 | from pyannote.metrics.diarization import DiarizationPurity 9 | from pyannote.metrics.diarization import DiarizationCoverage 10 | 11 | import numpy.testing as npt 12 | 13 | 14 | @pytest.fixture 15 | def reference(): 16 | reference = Annotation() 17 | reference[Segment(0, 10)] = "A" 18 | reference[Segment(12, 20)] = "B" 19 | reference[Segment(24, 27)] = "A" 20 | reference[Segment(30, 40)] = "C" 21 | return reference 22 | 23 | 24 | @pytest.fixture 25 | def reference_with_overlap(): 26 | reference = Annotation() 27 | reference[Segment(0, 13)] = "A" 28 | reference[Segment(12, 20)] = "B" 29 | reference[Segment(24, 27)] = "A" 30 | reference[Segment(30, 40)] = "C" 31 | return reference 32 | 33 | 34 | @pytest.fixture 35 | def hypothesis(): 36 | hypothesis = Annotation() 37 | hypothesis[Segment(2, 13)] = "a" 38 | hypothesis[Segment(13, 14)] = "d" 39 | hypothesis[Segment(14, 20)] = "b" 40 | hypothesis[Segment(22, 38)] = "c" 41 | hypothesis[Segment(38, 40)] = "d" 42 | return hypothesis 43 | 44 | 45 | def test_error_rate(reference, hypothesis): 46 | diarizationErrorRate = DiarizationErrorRate() 47 | error_rate = diarizationErrorRate(reference, hypothesis) 48 | npt.assert_almost_equal(error_rate, 0.5161290322580645, decimal=7) 49 | 50 | 51 | def test_optimal_mapping(reference, hypothesis): 52 | diarizationErrorRate = DiarizationErrorRate() 53 | mapping = diarizationErrorRate.optimal_mapping(reference, hypothesis) 54 | assert mapping == {"a": "A", "b": "B", "c": "C"} 55 | 56 | 57 | def test_detailed(reference, hypothesis): 58 | diarizationErrorRate = DiarizationErrorRate() 59 | details = diarizationErrorRate(reference, hypothesis, detailed=True) 60 | 61 | confusion = details["confusion"] 62 | npt.assert_almost_equal(confusion, 7.0, decimal=7) 63 | 64 | correct = details["correct"] 65 | npt.assert_almost_equal(correct, 22.0, decimal=7) 66 | 67 | rate = details["diarization error rate"] 68 | npt.assert_almost_equal(rate, 0.5161290322580645, decimal=7) 69 | 70 | false_alarm = details["false alarm"] 71 | npt.assert_almost_equal(false_alarm, 7.0, decimal=7) 72 | 73 | missed_detection = details["missed detection"] 74 | npt.assert_almost_equal(missed_detection, 2.0, decimal=7) 75 | 76 | total = details["total"] 77 | npt.assert_almost_equal(total, 31.0, decimal=7) 78 | 79 | 80 | def test_purity(reference, hypothesis): 81 | diarizationPurity = DiarizationPurity() 82 | purity = diarizationPurity(reference, hypothesis) 83 | npt.assert_almost_equal(purity, 0.6666, decimal=3) 84 | 85 | 86 | def test_coverage(reference, hypothesis): 87 | diarizationCoverage = DiarizationCoverage() 88 | coverage = diarizationCoverage(reference, hypothesis) 89 | npt.assert_almost_equal(coverage, 0.7096, decimal=3) 90 | 91 | 92 | def test_skip_overlap(reference_with_overlap, hypothesis): 93 | metric = DiarizationErrorRate(skip_overlap=True) 94 | total = metric(reference_with_overlap, hypothesis, detailed=True)["total"] 95 | npt.assert_almost_equal(total, 32, decimal=3) 96 | 97 | 98 | def test_leep_overlap(reference_with_overlap, hypothesis): 99 | metric = DiarizationErrorRate(skip_overlap=False) 100 | total = metric(reference_with_overlap, hypothesis, detailed=True)["total"] 101 | npt.assert_almost_equal(total, 34, decimal=3) 102 | 103 | 104 | def test_bug_16(): 105 | reference = Annotation() 106 | reference[Segment(0, 10)] = "A" 107 | hypothesis = Annotation() 108 | 109 | metric = DiarizationErrorRate(collar=1) 110 | total = metric(reference, hypothesis, detailed=True)["total"] 111 | npt.assert_almost_equal(total, 9, decimal=3) 112 | 113 | metric = DiarizationErrorRate(collar=0) 114 | total = metric(reference, hypothesis, detailed=True)["total"] 115 | npt.assert_almost_equal(total, 10, decimal=3) 116 | -------------------------------------------------------------------------------- /tests/test_identification.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import pyannote.core 4 | from pyannote.core import Annotation 5 | from pyannote.core import Segment 6 | from pyannote.core import Timeline 7 | from pyannote.metrics.identification import IdentificationErrorRate 8 | from pyannote.metrics.identification import IdentificationPrecision 9 | from pyannote.metrics.identification import IdentificationRecall 10 | 11 | import numpy.testing as npt 12 | 13 | 14 | @pytest.fixture 15 | def reference(): 16 | reference = Annotation() 17 | reference[Segment(0, 10)] = 'A' 18 | reference[Segment(12, 20)] = 'B' 19 | reference[Segment(24, 27)] = 'A' 20 | reference[Segment(30, 40)] = 'C' 21 | return reference 22 | 23 | 24 | @pytest.fixture 25 | def hypothesis(): 26 | hypothesis = Annotation() 27 | hypothesis[Segment(2, 13)] = 'A' 28 | hypothesis[Segment(13, 14)] = 'D' 29 | hypothesis[Segment(14, 20)] = 'B' 30 | hypothesis[Segment(22, 38)] = 'C' 31 | hypothesis[Segment(38, 40)] = 'D' 32 | return hypothesis 33 | 34 | 35 | def test_error_rate(reference, hypothesis): 36 | identificationErrorRate = IdentificationErrorRate() 37 | error_rate = identificationErrorRate(reference, hypothesis) 38 | npt.assert_almost_equal(error_rate, 0.5161290322580645, decimal=7) 39 | 40 | 41 | def test_detailed(reference, hypothesis): 42 | identificationErrorRate = IdentificationErrorRate() 43 | details = identificationErrorRate(reference, hypothesis, detailed=True) 44 | 45 | confusion = details['confusion'] 46 | npt.assert_almost_equal(confusion, 7.0, decimal=7) 47 | 48 | correct = details['correct'] 49 | npt.assert_almost_equal(correct, 22.0, decimal=7) 50 | 51 | rate = details['identification error rate'] 52 | npt.assert_almost_equal(rate, 0.5161290322580645, decimal=7) 53 | 54 | false_alarm = details['false alarm'] 55 | npt.assert_almost_equal(false_alarm, 7.0, decimal=7) 56 | 57 | missed_detection = details['missed detection'] 58 | npt.assert_almost_equal(missed_detection, 2.0, decimal=7) 59 | 60 | total = details['total'] 61 | npt.assert_almost_equal(total, 31.0, decimal=7) 62 | 63 | 64 | def test_precision(reference, hypothesis): 65 | identificationPrecisions = IdentificationPrecision() 66 | precision = identificationPrecisions(reference, hypothesis) 67 | npt.assert_almost_equal(precision, 0.611, decimal=3) 68 | 69 | 70 | def test_recall(reference, hypothesis): 71 | identificationRecall = IdentificationRecall() 72 | recall = identificationRecall(reference, hypothesis) 73 | npt.assert_almost_equal(recall, 0.710, decimal=3) 74 | --------------------------------------------------------------------------------