├── .gitattributes ├── .github ├── FUNDING.yml └── workflows │ ├── doc.yml │ ├── release.yml │ └── test.yml ├── .gitignore ├── .python-version ├── LICENSE ├── README.md ├── doc ├── Makefile ├── make.bat └── source │ ├── changelog.rst │ ├── conf.py │ └── index.rst ├── pyproject.toml ├── setup.cfg ├── setup.py ├── src └── pyannote │ └── database │ ├── __init__.py │ ├── cli.py │ ├── custom.py │ ├── database.py │ ├── file_finder.py │ ├── loader.py │ ├── protocol │ ├── __init__.py │ ├── collection.py │ ├── protocol.py │ ├── segmentation.py │ ├── speaker_diarization.py │ ├── speaker_identification.py │ ├── speaker_recognition.py │ ├── speaker_spotting.py │ └── speaker_verification.py │ ├── py.typed │ ├── registry.py │ └── util.py ├── tests ├── data │ ├── audio │ │ ├── filename1.wav │ │ └── filename2.wav │ ├── ctms │ │ ├── filename1.ctm │ │ └── filename2.ctm │ ├── database.yml │ ├── lists │ │ └── train.lst │ ├── mapping │ │ ├── domain.map │ │ └── duration.map │ ├── rttms │ │ └── train.rttm │ ├── trial │ │ └── train.trial │ └── uems │ │ └── train.uem ├── test.py ├── test_registry.py └── trial.py └── uv.lock /.gitattributes: -------------------------------------------------------------------------------- 1 | pyannote/database/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [hbredin] 4 | -------------------------------------------------------------------------------- /.github/workflows/doc.yml: -------------------------------------------------------------------------------- 1 | name: Documentation 2 | on: 3 | push: 4 | branches: 5 | - master 6 | 7 | jobs: 8 | build-and-deploy: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - uses: actions/checkout@v4 13 | with: 14 | persist-credentials: false 15 | fetch-depth: 0 16 | - name: Install uv 17 | uses: astral-sh/setup-uv@v5 18 | with: 19 | enable-cache: true 20 | cache-dependency-glob: uv.lock 21 | 22 | - name: Install the project 23 | run: uv sync --extra doc 24 | 25 | - name: Build documentation 26 | run: | 27 | make --directory=doc html 28 | touch ./doc/build/html/.nojekyll 29 | - name: Deploy 30 | uses: peaceiris/actions-gh-pages@v3 31 | with: 32 | github_token: ${{ secrets.GITHUB_TOKEN }} 33 | publish_dir: ./doc/build/html 34 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI 2 | 3 | on: push 4 | 5 | jobs: 6 | build: 7 | name: Build distribution 📦 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v4 12 | with: 13 | persist-credentials: false 14 | fetch-depth: 0 15 | - name: Install uv 16 | uses: astral-sh/setup-uv@v5 17 | with: 18 | enable-cache: true 19 | cache-dependency-glob: uv.lock 20 | - name: Set up Python 21 | uses: actions/setup-python@v5 22 | with: 23 | python-version-file: ".python-version" 24 | - name: Build 25 | run: uv build 26 | - name: Store the distribution packages 27 | uses: actions/upload-artifact@v4 28 | with: 29 | name: python-package-distributions 30 | path: dist/ 31 | 32 | publish-to-pypi: 33 | name: >- 34 | Publish Python 🐍 distribution 📦 to PyPI 35 | if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes 36 | needs: 37 | - build 38 | runs-on: ubuntu-latest 39 | environment: 40 | name: pypi 41 | permissions: 42 | id-token: write 43 | steps: 44 | - name: Download all the dists 45 | uses: actions/download-artifact@v4 46 | with: 47 | name: python-package-distributions 48 | path: dist/ 49 | - name: Install uv 50 | uses: astral-sh/setup-uv@v5 51 | with: 52 | enable-cache: true 53 | cache-dependency-glob: uv.lock 54 | - name: Publish distribution 📦 to PyPI 55 | run: uv publish --trusted-publishing always --publish-url https://upload.pypi.org/legacy/ 56 | 57 | 58 | github-release: 59 | name: >- 60 | Sign the Python 🐍 distribution 📦 with Sigstore 61 | and upload them to GitHub Release 62 | needs: 63 | - publish-to-pypi 64 | runs-on: ubuntu-latest 65 | 66 | permissions: 67 | contents: write # IMPORTANT: mandatory for making GitHub Releases 68 | id-token: write # IMPORTANT: mandatory for sigstore 69 | 70 | steps: 71 | - name: Download all the dists 72 | uses: actions/download-artifact@v4 73 | with: 74 | name: python-package-distributions 75 | path: dist/ 76 | - name: Sign the dists with Sigstore 77 | uses: sigstore/gh-action-sigstore-python@v3.0.0 78 | with: 79 | inputs: >- 80 | ./dist/*.tar.gz 81 | ./dist/*.whl 82 | - name: Create GitHub Release 83 | env: 84 | GITHUB_TOKEN: ${{ github.token }} 85 | run: >- 86 | gh release create 87 | "$GITHUB_REF_NAME" 88 | --repo "$GITHUB_REPOSITORY" 89 | --notes "" 90 | - name: Upload artifact signatures to GitHub Release 91 | env: 92 | GITHUB_TOKEN: ${{ github.token }} 93 | # Upload to GitHub Release using the `gh` CLI. 94 | # `dist/` contains the built packages, and the 95 | # sigstore-produced signatures and certificates. 96 | run: >- 97 | gh release upload 98 | "$GITHUB_REF_NAME" dist/** 99 | --repo "$GITHUB_REPOSITORY" 100 | 101 | # publish-to-testpypi: 102 | # name: Publish Python 🐍 distribution 📦 to TestPyPI 103 | # needs: 104 | # - build 105 | # runs-on: ubuntu-latest 106 | # 107 | # environment: 108 | # name: testpypi 109 | # 110 | # permissions: 111 | # id-token: write # IMPORTANT: mandatory for trusted publishing 112 | # 113 | # steps: 114 | # - name: Download all the dists 115 | # uses: actions/download-artifact@v4 116 | # with: 117 | # name: python-package-distributions 118 | # path: dist/ 119 | # - name: Install uv 120 | # uses: astral-sh/setup-uv@v5 121 | # with: 122 | # enable-cache: true 123 | # cache-dependency-glob: uv.lock 124 | # - name: Publish distribution 📦 to PyPI 125 | # run: uv publish --trusted-publishing always --publish-url https://test.pypi.org/legacy/ 126 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - develop 7 | push: 8 | branches: 9 | - develop 10 | - master 11 | - release/* 12 | 13 | 14 | jobs: 15 | test: 16 | name: Test 17 | runs-on: ubuntu-latest 18 | strategy: 19 | matrix: 20 | python-version: 21 | - "3.10" 22 | - "3.11" 23 | - "3.12" 24 | env: 25 | UV_PYTHON: ${{ matrix.python-version }} 26 | steps: 27 | - uses: actions/checkout@v4 28 | 29 | - name: Install uv 30 | uses: astral-sh/setup-uv@v5 31 | 32 | - name: Install the project 33 | run: uv sync --extra test 34 | 35 | - name: Run tests 36 | run: uv run pytest tests -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | .mypy_cache 92 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.10 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 CNRS 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = pyannotedatabase 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | uv run --extra doc $(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | uv run --extra doc $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | set SPHINXPROJ=pyannotedatabase 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /doc/source/changelog.rst: -------------------------------------------------------------------------------- 1 | ######### 2 | Changelog 3 | ######### 4 | 5 | Version 6.0.0rc1 (2025-02-10) 6 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 7 | 8 | - BREAKING: drop support to `Python` < 3.10 9 | - BREAKING: switch to native namespace package 10 | - setup: switch to `uv` 11 | 12 | Version 5.1.3 (2025-01-15) 13 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 14 | 15 | - chore: remove deprecated use of `delim_whitespace` 16 | - chore: use `importlib.metadata` instead of `pkg_resources` 17 | 18 | Version 5.1.0 (2024-04-05) 19 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 20 | 21 | - BREAKING: switch to python 3.9+ (no support for 3.12 yet) 22 | - feat(registry): make registry iterable (yields protocol names) 23 | - fix(setup): fix `typer`` dependency 24 | 25 | Version 5.0.1 (2023-04-21) 26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 27 | 28 | - fix: fix support for numeric values in custom protocols (e.g. `channel: 0`) 29 | - improve: do not complain about missing scope for meta protocols 30 | 31 | Version 5.0.0 (2023-03-27) 32 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 33 | 34 | - feat(registry): add pyannote.database.registry to load multiple YAML configuration file 35 | - BREAKING: turn pyannote.database.get_{database|protocol|protocols} function into registry methods 36 | - BREAKING: remove support for database plugins (via "pyannote.database.database" entrypoint) 37 | - feat(protocol): add generic segmentation protocol with "classes" metadata 38 | - feat(protocol): add support for label "scope" in speaker diarization protocols 39 | - improve(protocol): make ProtocolFile pickable 40 | - feat(loader): add support for LAB and STM files 41 | - feat(loader): add option to filter RTTM lines by "type" 42 | - fix(cli): fix corner case when annotation key is missing 43 | 44 | Version 4.1.3 (2022-02-11) 45 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 46 | 47 | - fix: fix corner case with missing uris in RTTM and UEM files 48 | - fix(CI): fix documentation deployment 49 | 50 | Version 4.1.1 (2021-08-4) 51 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 52 | 53 | - fix: fix support for meta verification protocol 54 | 55 | Version 4.1 (2021-04-07) 56 | ~~~~~~~~~~~~~~~~~~~~~~~~ 57 | 58 | - feat: make {RTTM|UEM}Loader usable as preprocessors 59 | - improve: remove the need for underscore prefix in database.yml 60 | - fix: fix ProtocolFile.__iter__ (dictionary changed size during iteration) 61 | 62 | Version 4.0.4 (2020-12-14) 63 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 64 | 65 | - fix: fix ProtocolFile.__iter__ (dictionary changed size during iteration) 66 | 67 | Version 4.0.3 (2020-10-11) 68 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 69 | 70 | - fix: make custom protocols pickable 71 | 72 | Version 4.0.2 (2020-10-02) 73 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 74 | 75 | - feat: automagically crop "annotated" to file "duration" when available 76 | - fix: fix a bug in SpeakerDiarizationProtocols 77 | - improve: avoid loading ProtocolFile (lazy) keys in "get_unique_identifier" 78 | 79 | Version 4.0.1 (2020-06-26) 80 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 81 | 82 | - feat: set "name" attribute in get_protocol 83 | - fix: display warning only when precomputed value is modified 84 | 85 | Version 4.0 (2020-06-15) 86 | ~~~~~~~~~~~~~~~~~~~~~~~~ 87 | 88 | - feat: add support for custom speaker verification protocols 89 | - feat: add pyannote.database.loader entrypoint 90 | - feat: add pyannote-database CLI 91 | - feat: add a few dataloaders (RTTM, UEM, CTM, MAP) 92 | - feat: add support for nested ProtocolFile 93 | - doc: major documentation update (README and docstrings) 94 | - BREAKING: custom protocols must define a "uri" section 95 | - BREAKING: remove support for "preprocessors" in Database constructor 96 | - BREAKING: remove support for progress bars 97 | 98 | Version 3.0.1 (2020-03-31) 99 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 100 | 101 | - BREAKING (feat): if a "database.yml" file exists in current working directory, it will be used even if PYANNOTE_DATABASE_CONFIG is set to another value. 102 | - feat: add support in FileFinder for paths relative to "database.yml" 103 | - BREAKING: rename "config_yml" option to "database_yml" in FileFinder 104 | - feat: add support in custom protocols for paths relative to "database.yml" (@PaulLerner) 105 | - BREAKING (feat): use "annotated" to crop "annotation" in custom protocols (@PaulLerner) 106 | - fix: add support for int-like protocol name in custom protocols (@PaulLerner) 107 | 108 | Version 2.5 (2020-02-04) 109 | ~~~~~~~~~~~~~~~~~~~~~~~~ 110 | 111 | - BREAKING: refactor {current | protocol}_file_iter 112 | - BREAKING: only rely on "uri" to decide if a ProtocolFile contains multiple files 113 | - BREAKING: deprecate FileFinder.current_file_iter in favor of ProtocolFile.files 114 | - BREAKING: deprecate FileFinder.protocol_file_iter in favor of Protocol.files 115 | - fix: fix support for lazy preprocessors in {Protocol | ProtocolFile}.files 116 | 117 | Version 2.4.3 (2020-01-24) 118 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 119 | 120 | - fix: fix infinite recursion in "ProtocolFile" lazy evaluation 121 | 122 | Version 2.4.2 (2020-01-06) 123 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 124 | 125 | - fix: add support for int-like database name in custom protocol 126 | 127 | Version 2.4.1 (2019-12-20) 128 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 129 | 130 | - fix: make ProtocolFile thread-safe 131 | 132 | Version 2.4 (2019-12-17) 133 | ~~~~~~~~~~~~~~~~~~~~~~~~ 134 | 135 | - feat: make preprocessors lazy 136 | - fix: pandas would convert a label to NaN ([@PaulLerner](https://github.com/PaulLerner)) 137 | - feat: setup continuous integration 138 | - setup: switch to pyannote.core 3.2 139 | 140 | Version 2.3.1 (2019-09-04) 141 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 142 | 143 | - fix: fix support for MDTM files in `pyannote.database.custom.subset_iter` ([#23](https://github.com/pyannote/pyannote-database/issues/23)) 144 | 145 | Version 2.3 (2019-07-19) 146 | ~~~~~~~~~~~~~~~~~~~~~~~~ 147 | 148 | - feat: add LabelMapper preprocessor ([@MarvinLvn](https://github.com/MarvinLvn)) 149 | - chore: replace (deprecated) pandas.read_table with pandas.read_csv ([@V-assim](https://github.com/V-assim)) 150 | - chore: use YAML safe loader ([@V-assim](https://github.com/V-assim)) 151 | 152 | Version 2.2 (2019-06-26) 153 | ~~~~~~~~~~~~~~~~~~~~~~~~ 154 | 155 | - setup: switch to pyannote.core 3.0 156 | - feat: add RTTMLoader preprocessor 157 | 158 | Version 2.1 (2019-04-04) 159 | ~~~~~~~~~~~~~~~~~~~~~~~~ 160 | 161 | - feat: add support for "domain" key in generic protocol 162 | 163 | Version 2.0 (2019-03-20) 164 | ~~~~~~~~~~~~~~~~~~~~~~~~ 165 | 166 | - BREAKING: change location and format of pyannote.database configuration file 167 | - feat: add support for PYANNOTE_DATABASE_CONFIG environment variable 168 | 169 | Version 1.6 (2019-03-12) 170 | ~~~~~~~~~~~~~~~~~~~~~~~~ 171 | 172 | - feat: add support for [file-based speaker diarization protocols](https://github.com/pyannote/pyannote-database/tree/develop#generic-speaker-diarization-protocols) 173 | - setup: switch to pyannote.core 2.1 174 | 175 | Version 1.5.5 (2018-11-30) 176 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 177 | 178 | - fix: fix Collection.files when progress=True 179 | 180 | Version 1.5.4 (2018-11-14) 181 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 182 | 183 | - fix: skip files with no "uri" entry in FileFinder.protocol_file_iter 184 | 185 | Version 1.5.3 (2018-11-08) 186 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 187 | 188 | - fix: fix broken SpeakerVerificationProtocol 189 | 190 | Version 1.5.1 (2018-10-16) 191 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 192 | 193 | - fix: fix support for string preprocessors 194 | 195 | Version 1.5 (2018-09-25) 196 | ~~~~~~~~~~~~~~~~~~~~~~~~ 197 | 198 | - BREAKING: simplify SpeakerVerificationProtocol with {subset}_trial methods 199 | 200 | Version 1.4 (2018-07-13) 201 | ~~~~~~~~~~~~~~~~~~~~~~~~ 202 | 203 | - feat: add raw collection protocol 204 | 205 | Version 1.3.2 (2018-05-16) 206 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 207 | 208 | - fix: fix regression introduced in 1.3.1 209 | 210 | Version 1.3.1 (2018-05-11) 211 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 212 | 213 | - fix: fix bug in `FileFinder.protocol_file_iter` with empty iterators 214 | 215 | Version 1.3 (2018-02-04) 216 | ~~~~~~~~~~~~~~~~~~~~~~~~ 217 | 218 | - feat: add `extra_keys` parameter to `{protocol | current}_file_iter` 219 | 220 | Version 1.2.1 (2018-02-03) 221 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 222 | 223 | - setup: drop support for Python 2 224 | - feat: add `protocol_file_iter` and `current_file_iter` to FileFinder 225 | - feat: add `get_label_identifier` utility function 226 | - fix: fix "get_unique_identifier" when "database" or "channel" is None 227 | 228 | Version 1.1 (2017-10-13) 229 | ~~~~~~~~~~~~~~~~~~~~~~~~ 230 | 231 | - feat: add speaker identification protocol 232 | - feat: add speaker verification protocols 233 | - feat: add support for list of uris in FileFinder 234 | 235 | Version 1.0 (2017-10-02) 236 | ~~~~~~~~~~~~~~~~~~~~~~~~ 237 | 238 | - feat: add support for "meta" protocols 239 | - feat: add speaker spotting protocol 240 | - setup: switch to pyannote.core 1.1 241 | 242 | Version 0.12 (2017-06-28) 243 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 244 | 245 | - feat: add utility functions at package root 246 | - doc: improve documentation 247 | - doc: add link to pyannote-db-template repository 248 | 249 | Version 0.11.2 (2017-03-15) 250 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 251 | 252 | - fix: fix a bug with string template preprocessors 253 | - doc: improve documentation 254 | 255 | Version 0.11.1 (2017-01_16) 256 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 257 | 258 | - feat: add 'get_protocol' helper function 259 | 260 | Version 0.11 (2017-01-11) 261 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 262 | 263 | - feat: add support for validation on training set to speaker recognition protocols 264 | - feat: add 'get_annotated' helper function 265 | 266 | Version 0.10.2 (2017-01-04) 267 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 268 | 269 | - fix: fix bug in FileFinder 270 | 271 | Version 0.10.1 (2016-12-17) 272 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 273 | 274 | - improve: change signature of preprocessor.__call__ 275 | 276 | Version 0.9 (2016-12-14) 277 | ~~~~~~~~~~~~~~~~~~~~~~~~ 278 | 279 | - feat: add "get_unique_identifier" utility function 280 | 281 | Version 0.8.1 (2016-12-12) 282 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 283 | 284 | - fix: fix progress bar support 285 | 286 | Version 0.8 (2016-12-06) 287 | ~~~~~~~~~~~~~~~~~~~~~~~~ 288 | 289 | - feat: add progress bar support 290 | 291 | Version 0.7.1 (2016-12-03) 292 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 293 | 294 | - fix: add 'yield_name' parameter to speaker recognition generators 295 | 296 | Version 0.7 (2016-12-02) 297 | ~~~~~~~~~~~~~~~~~~~~~~~~ 298 | 299 | - feat: add speaker recognition protocol 300 | 301 | Version 0.6.1 (2016-12-02) 302 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 303 | 304 | - feat: add FileFinder utility class 305 | - fix: fix SpeakerDiarizationProtocol.stats() 306 | 307 | Version 0.5 (2016-12-01) 308 | ~~~~~~~~~~~~~~~~~~~~~~~~ 309 | 310 | - BREAKING: replace 'medium_template' by (more generic) 'preprocessors' 311 | 312 | Version 0.4.1 (2016-11-17) 313 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 314 | 315 | - fix: rename 'speakers' to 'labels' in statistics dictionary 316 | 317 | Version 0.4 (2016-10-27) 318 | ~~~~~~~~~~~~~~~~~~~~~~~~ 319 | 320 | - feat: add a method providing global statistics about a subset 321 | 322 | Version 0.3 (2016-09-22) 323 | ~~~~~~~~~~~~~~~~~~~~~~~~ 324 | 325 | - feat: add support for multiple media 326 | 327 | Version 0.2 (2016-09-21) 328 | ~~~~~~~~~~~~~~~~~~~~~~~~ 329 | 330 | - feat: add support for 'medium_template' attribute 331 | 332 | Version 0.1 (2016-09-20) 333 | ~~~~~~~~~~~~~~~~~~~~~~~~ 334 | 335 | - first public version 336 | -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # pyannote.database documentation build configuration file, created by 4 | # sphinx-quickstart on Tue Jan 24 15:45:55 2017. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # 19 | 20 | # allow pyannote.database import 21 | import os 22 | import sys 23 | sys.path.insert(0, os.path.abspath('../..')) 24 | 25 | 26 | # -- General configuration ------------------------------------------------ 27 | 28 | # If your documentation needs a minimal Sphinx version, state it here. 29 | # 30 | # needs_sphinx = '1.0' 31 | 32 | # Add any Sphinx extension module names here, as strings. They can be 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 34 | # ones. 35 | extensions = ['sphinx.ext.autodoc', 36 | 'sphinx.ext.napoleon', 37 | 'sphinx.ext.intersphinx', 38 | 'sphinx.ext.todo', 39 | 'sphinx.ext.coverage', 40 | 'sphinx.ext.mathjax', 41 | 'sphinx.ext.viewcode', 42 | 'sphinx.ext.githubpages', 43 | ] 44 | 45 | 46 | # Napoleon settings 47 | napoleon_google_docstring = True 48 | napoleon_numpy_docstring = True 49 | napoleon_include_init_with_doc = False 50 | napoleon_include_private_with_doc = False 51 | napoleon_include_special_with_doc = False 52 | napoleon_use_admonition_for_examples = False 53 | napoleon_use_admonition_for_notes = False 54 | napoleon_use_admonition_for_references = False 55 | napoleon_use_ivar = False 56 | napoleon_use_param = True 57 | napoleon_use_rtype = True 58 | napoleon_use_keyword = True 59 | 60 | # Add any paths that contain templates here, relative to this directory. 61 | templates_path = ['_templates'] 62 | 63 | # The suffix(es) of source filenames. 64 | # You can specify multiple suffix as a list of string: 65 | # 66 | # source_suffix = ['.rst', '.md'] 67 | source_suffix = {'.rst': 'restructuredtext'} 68 | 69 | # The master toctree document. 70 | master_doc = 'index' 71 | 72 | # General information about the project. 73 | project = u'pyannote.database' 74 | copyright = u'2019, CNRS' 75 | author = u'Hervé Bredin' 76 | 77 | # The version info for the project you're documenting, acts as replacement for 78 | # |version| and |release|, also used in various other places throughout the 79 | # built documents. 80 | 81 | import pyannote.database 82 | # The short X.Y version. 83 | version = pyannote.database.__version__.split('+')[0] 84 | # The full version, including alpha/beta/rc tags. 85 | release = pyannote.database.__version__ 86 | 87 | # The language for content autogenerated by Sphinx. Refer to documentation 88 | # for a list of supported languages. 89 | # 90 | # This is also used if you do content translation via gettext catalogs. 91 | # Usually you set "language" from the command line for these cases. 92 | language = "en" 93 | 94 | # List of patterns, relative to source directory, that match files and 95 | # directories to ignore when looking for source files. 96 | # This patterns also effect to html_static_path and html_extra_path 97 | exclude_patterns = [] 98 | 99 | # The name of the Pygments (syntax highlighting) style to use. 100 | pygments_style = 'sphinx' 101 | 102 | # If true, `todo` and `todoList` produce output, else they produce nothing. 103 | todo_include_todos = True 104 | 105 | 106 | # -- Options for HTML output ---------------------------------------------- 107 | 108 | # The theme to use for HTML and HTML Help pages. See the documentation for 109 | # a list of builtin themes. 110 | # 111 | html_theme = "sphinx_rtd_theme" 112 | 113 | # Theme options are theme-specific and customize the look and feel of a theme 114 | # further. For a list of options available for each theme, see the 115 | # documentation. 116 | # 117 | # html_theme_options = {} 118 | 119 | # Add any paths that contain custom static files (such as style sheets) here, 120 | # relative to this directory. They are copied after the builtin static files, 121 | # so a file named "default.css" will overwrite the builtin "default.css". 122 | html_static_path = ['_static'] 123 | 124 | 125 | # -- Options for HTMLHelp output ------------------------------------------ 126 | 127 | # Output file base name for HTML help builder. 128 | htmlhelp_basename = 'pyannotedatabasedoc' 129 | 130 | 131 | # -- Options for LaTeX output --------------------------------------------- 132 | 133 | latex_elements = { 134 | # The paper size ('letterpaper' or 'a4paper'). 135 | # 136 | # 'papersize': 'letterpaper', 137 | 138 | # The font size ('10pt', '11pt' or '12pt'). 139 | # 140 | # 'pointsize': '10pt', 141 | 142 | # Additional stuff for the LaTeX preamble. 143 | # 144 | # 'preamble': '', 145 | 146 | # Latex figure (float) alignment 147 | # 148 | # 'figure_align': 'htbp', 149 | } 150 | 151 | # Grouping the document tree into LaTeX files. List of tuples 152 | # (source start file, target name, title, 153 | # author, documentclass [howto, manual, or own class]). 154 | latex_documents = [ 155 | (master_doc, 'pyannotedatabase.tex', u'pyannote.database Documentation', 156 | u'Hervé Bredin', 'manual'), 157 | ] 158 | 159 | 160 | # -- Options for manual page output --------------------------------------- 161 | 162 | # One entry per manual page. List of tuples 163 | # (source start file, name, description, authors, manual section). 164 | man_pages = [ 165 | (master_doc, 'pyannotedatabase', u'pyannote.database Documentation', 166 | [author], 1) 167 | ] 168 | 169 | 170 | # -- Options for Texinfo output ------------------------------------------- 171 | 172 | # Grouping the document tree into Texinfo files. List of tuples 173 | # (source start file, target name, title, author, 174 | # dir menu entry, description, category) 175 | texinfo_documents = [ 176 | (master_doc, 'pyannotedatabase', u'pyannote.database Documentation', 177 | author, 'pyannotedatabase', 'One line description of project.', 178 | 'Miscellaneous'), 179 | ] 180 | 181 | intersphinx_mapping = { 182 | 'python': ('https://docs.python.org/', None), 183 | 'pyannote.core': ('https://pyannote.github.io/pyannote-core', None), 184 | } 185 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | .. pyannote.core documentation master file, created by 2 | sphinx-quickstart on Thu Jan 19 13:25:34 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | ################# 7 | pyannote.database 8 | ################# 9 | 10 | `pyannote.database` is an open-source Python library that provides a common interface to multimedia databases and associated experimental protocol. 11 | 12 | Installation 13 | ============ 14 | 15 | :: 16 | 17 | $ pip install pyannote.database 18 | 19 | 20 | API documentation 21 | ================= 22 | 23 | .. toctree:: 24 | :maxdepth: 2 25 | 26 | changelog 27 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "pyannote-database" 3 | description = "Interface to multimedia databases and experimental protocols" 4 | readme = "README.md" 5 | authors = [ 6 | { name = "Hervé BREDIN", email = "herve@pyannote.ai" } 7 | ] 8 | requires-python = ">=3.10" 9 | 10 | dynamic = [ 11 | "version", 12 | ] 13 | 14 | dependencies = [ 15 | "pandas>=2.2.3", 16 | "pyannote-core>=5.0.0", 17 | "pyyaml>=6.0.2", 18 | ] 19 | 20 | [project.scripts] 21 | pyannote-database = "pyannote.database.cli:main" 22 | 23 | [project.entry-points.'pyannote.database.loader'] 24 | ".rttm" = "pyannote.database.loader:RTTMLoader" 25 | ".uem" = "pyannote.database.loader:UEMLoader" 26 | ".ctm" = "pyannote.database.loader:CTMLoader" 27 | ".map" = "pyannote.database.loader:MAPLoader" 28 | ".lab" = "pyannote.database.loader:LABLoader" 29 | ".stm" = "pyannote.database.loader:STMLoader" 30 | 31 | [project.optional-dependencies] 32 | cli = [ 33 | "typer>=0.15.1", 34 | ] 35 | test = [ 36 | "pytest>=8.3.4", 37 | ] 38 | doc = [ 39 | "sphinx-rtd-theme>=3.0.2", 40 | "sphinx>=8.1.3", 41 | ] 42 | 43 | [build-system] 44 | requires = ["hatchling", "hatch-vcs"] 45 | build-backend = "hatchling.build" 46 | 47 | [tool.hatch.version] 48 | source = "vcs" 49 | 50 | [tool.hatch.build.targets.wheel] 51 | packages = ["src/pyannote"] 52 | 53 | [dependency-groups] 54 | dev = [ 55 | "ipykernel>=6.29.5", 56 | ] 57 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | 2 | # See the docstring in versioneer.py for instructions. Note that you must 3 | # re-run 'versioneer.py setup' after changing this section, and commit the 4 | # resulting files. 5 | 6 | [versioneer] 7 | VCS = git 8 | style = pep440 9 | versionfile_source = pyannote/database/_version.py 10 | versionfile_build = pyannote/database/_version.py 11 | tag_prefix = 12 | parentdir_prefix = pyannote-database- 13 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2016-2020 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | import versioneer 30 | from setuptools import setup, find_packages 31 | 32 | setup( 33 | # package 34 | namespace_packages=["pyannote"], 35 | packages=find_packages(), 36 | install_requires=[ 37 | "pyannote.core >= 4.1", 38 | "pyYAML >= 3.12", 39 | "pandas >= 0.19", 40 | "typer >= 0.12.1", 41 | "typing_extensions >= 3.7.4;python_version < '3.8'", 42 | ], 43 | entry_points={ 44 | "console_scripts": [ 45 | "pyannote-database=pyannote.database.cli:main", 46 | ], 47 | "pyannote.database.loader": [ 48 | ".rttm = pyannote.database.loader:RTTMLoader", 49 | ".uem = pyannote.database.loader:UEMLoader", 50 | ".ctm = pyannote.database.loader:CTMLoader", 51 | ".map = pyannote.database.loader:MAPLoader", 52 | ".lab = pyannote.database.loader:LABLoader", 53 | ".stm = pyannote.database.loader:STMLoader", 54 | ], 55 | }, 56 | # versioneer 57 | version=versioneer.get_version(), 58 | cmdclass=versioneer.get_cmdclass(), 59 | # PyPI 60 | name="pyannote.database", 61 | description=("Interface to multimedia databases and experimental protocols"), 62 | author="Hervé Bredin", 63 | author_email="bredin@limsi.fr", 64 | url="http://pyannote.github.io/", 65 | classifiers=[ 66 | "Development Status :: 4 - Beta", 67 | "Intended Audience :: Science/Research", 68 | "License :: OSI Approved :: MIT License", 69 | "Natural Language :: English", 70 | "Programming Language :: Python :: 3.9", 71 | "Programming Language :: Python :: 3.10", 72 | "Programming Language :: Python :: 3.11", 73 | "Topic :: Scientific/Engineering", 74 | ], 75 | extras_require={ 76 | "testing": ["pytest", "flake8==3.7.9"], 77 | "doc": [ 78 | "matplotlib >= 2.0.0", 79 | "Sphinx == 2.2.2", 80 | "ipython == 7.16.3", 81 | "sphinx_rtd_theme == 0.4.3", 82 | ], 83 | }, 84 | ) 85 | -------------------------------------------------------------------------------- /src/pyannote/database/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2016- CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | # Alexis PLAQUET 29 | 30 | """pyannote.database""" 31 | 32 | 33 | from typing import Optional 34 | import warnings 35 | 36 | from .registry import registry, LoadingMode 37 | 38 | from .database import Database 39 | 40 | from .protocol.protocol import Protocol 41 | from .protocol.protocol import ProtocolFile 42 | from .protocol.protocol import Subset 43 | from .protocol.protocol import Preprocessors 44 | 45 | from .file_finder import FileFinder 46 | from .util import get_annotated 47 | from .util import get_unique_identifier 48 | from .util import get_label_identifier 49 | 50 | import importlib.metadata 51 | __version__ = importlib.metadata.version("pyannote-database") 52 | 53 | 54 | def get_protocol(name, preprocessors: Optional[Preprocessors] = None) -> Protocol: 55 | """Get protocol by full name 56 | 57 | name : str 58 | Protocol full name (e.g. "Etape.SpeakerDiarization.TV") 59 | preprocessors : dict or (key, preprocessor) iterable 60 | When provided, each protocol item (dictionary) are preprocessed, such 61 | that item[key] = preprocessor(item). In case 'preprocessor' is not 62 | callable, it should be a string containing placeholder for item keys 63 | (e.g. {'audio': '/path/to/{uri}.wav'}) 64 | 65 | Returns 66 | ------- 67 | protocol : Protocol 68 | Protocol instance 69 | """ 70 | warnings.warn( 71 | "`get_protocol` has been deprecated in favor of `pyannote.database.registry.get_protocol`.", 72 | DeprecationWarning) 73 | return registry.get_protocol(name, preprocessors=preprocessors) 74 | 75 | 76 | __all__ = [ 77 | "registry", 78 | "get_protocol", 79 | "LoadingMode", 80 | "Database", 81 | "Protocol", 82 | "ProtocolFile", 83 | "Subset", 84 | "FileFinder", 85 | "get_annotated", 86 | "get_unique_identifier", 87 | "get_label_identifier", 88 | ] 89 | -------------------------------------------------------------------------------- /src/pyannote/database/cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2020- CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | # Alexis PLAQUET 29 | 30 | 31 | import typer 32 | from enum import Enum 33 | import math 34 | from typing import Text 35 | from pyannote.database import Database 36 | from pyannote.database import registry 37 | from pyannote.database.protocol import CollectionProtocol 38 | from pyannote.database.protocol import SpeakerDiarizationProtocol 39 | from pyannote.core import Annotation 40 | 41 | app = typer.Typer() 42 | 43 | 44 | class Task(str, Enum): 45 | Any = "Any" 46 | Protocol = "Protocol" 47 | Collection = "Collection" 48 | SpeakerDiarization = "SpeakerDiarization" 49 | SpeakerVerification = "SpeakerVerification" 50 | 51 | 52 | @app.command("database") 53 | def database(): 54 | """Print list of databases""" 55 | for database in registry.databases: 56 | typer.echo(f"{database}") 57 | 58 | 59 | @app.command("task") 60 | def task( 61 | database: str = typer.Option( 62 | "", 63 | "--database", 64 | "-d", 65 | metavar="DATABASE", 66 | help="Filter tasks by DATABASE.", 67 | case_sensitive=False, 68 | ) 69 | ): 70 | """Print list of tasks""" 71 | 72 | if database == "": 73 | tasks = [] 74 | else: 75 | db: Database = registry.get_database(database) 76 | tasks = db.get_tasks() 77 | 78 | for task in tasks: 79 | typer.echo(f"{task}") 80 | 81 | 82 | @app.command("protocol") 83 | def protocol( 84 | database: str = typer.Option( 85 | "", 86 | "--database", 87 | "-d", 88 | metavar="DATABASE", 89 | help="Filter protocols by DATABASE.", 90 | case_sensitive=False, 91 | ), 92 | task: Task = typer.Option( 93 | "Any", "--task", "-t", help="Filter protocols by TASK.", case_sensitive=False, 94 | ), 95 | ): 96 | """Print list of protocols""" 97 | 98 | if database == "": 99 | databases = list(registry.databases) 100 | else: 101 | databases = [database] 102 | 103 | for database_name in databases: 104 | db: Database = registry.get_database(database_name) 105 | tasks = db.get_tasks() if task == "Any" else [task] 106 | for task_name in tasks: 107 | try: 108 | protocols = db.get_protocols(task_name) 109 | except KeyError: 110 | continue 111 | for protocol in protocols: 112 | typer.echo(f"{database_name}.{task_name}.{protocol}") 113 | 114 | 115 | def duration_to_str(seconds: float) -> Text: 116 | hours = math.floor(seconds / 3600) 117 | minutes = math.floor((seconds - 3600 * hours) / 60) 118 | return f"{hours}h{minutes:02d}m" 119 | 120 | 121 | @app.command("info") 122 | def info(protocol: str): 123 | """Print protocol detailed information""" 124 | 125 | p = registry.get_protocol(protocol) 126 | 127 | if isinstance(p, SpeakerDiarizationProtocol): 128 | subsets = ["train", "development", "test"] 129 | skip_annotation = False 130 | skip_annotated = False 131 | elif isinstance(p, CollectionProtocol): 132 | subsets = ["files"] 133 | skip_annotation = True 134 | skip_annotated = True 135 | else: 136 | typer.echo("Only collections and speaker diarization protocols are supported.") 137 | typer.Exit(code=1) 138 | 139 | for subset in subsets: 140 | 141 | num_files = 0 142 | speakers = set() 143 | duration = 0.0 144 | speech = 0.0 145 | 146 | def iterate(): 147 | try: 148 | for file in getattr(p, subset)(): 149 | yield file 150 | except (AttributeError, NotImplementedError): 151 | return 152 | 153 | for file in iterate(): 154 | num_files += 1 155 | 156 | if not skip_annotation: 157 | annotation = file["annotation"] or Annotation(uri=file["uri"]) 158 | speakers.update(annotation.labels()) 159 | speech += annotation.get_timeline().support().duration() 160 | 161 | if not skip_annotated: 162 | annotated = file["annotated"] 163 | duration += annotated.duration() 164 | 165 | if num_files > 0: 166 | typer.secho( 167 | f"{subset}", fg=typer.colors.BRIGHT_GREEN, underline=True, bold=True 168 | ) 169 | typer.echo(f" {num_files} files") 170 | if not skip_annotated: 171 | typer.echo(f" {duration_to_str(duration)} annotated") 172 | 173 | if not skip_annotation: 174 | typer.echo( 175 | f" {duration_to_str(speech)} of speech ({100 * speech / duration:.0f}%)" 176 | ) 177 | typer.echo(f" {len(speakers)} speakers") 178 | 179 | 180 | def main(): 181 | app() 182 | -------------------------------------------------------------------------------- /src/pyannote/database/custom.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2019-2020 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | # Pavel KORSHUNOV - https://www.idiap.ch/~pkorshunov/ 29 | # Paul LERNER 30 | # Vincent BRIGNATZ 31 | # Alexis PLAQUET 32 | 33 | """Custom protocols 34 | 35 | Protocols: 36 | MyDatabase: 37 | Collection: 38 | MyProtocol: 39 | train: 40 | uris: xxx.lst 41 | annotation: xxx.rttm 42 | annotated: xxx.uem 43 | """ 44 | 45 | from pathlib import Path 46 | import string 47 | 48 | 49 | from . import protocol as protocol_module 50 | 51 | from pyannote.database.protocol.protocol import ProtocolFile 52 | 53 | 54 | import warnings 55 | from numbers import Number 56 | from typing import Text, Dict, Callable, Any, Union 57 | import functools 58 | 59 | from .protocol.protocol import Subset 60 | from .protocol.segmentation import SegmentationProtocol 61 | from .protocol.speaker_diarization import SpeakerDiarizationProtocol 62 | 63 | from importlib.metadata import entry_points 64 | 65 | from .util import get_annotated 66 | 67 | from .loader import load_lst, load_trial 68 | 69 | # All "Loader" classes types (eg RTTMLoader, UEMLoader, ...) retrieved from the entry point. 70 | LOADERS = { 71 | ep.name: ep 72 | for ep in entry_points(group="pyannote.database.loader") 73 | } 74 | 75 | 76 | def Template(template: Text, database_yml: Path) -> Callable[[ProtocolFile], Any]: 77 | """Get data loader based on template 78 | 79 | Parameters 80 | ---------- 81 | template : str 82 | Path format template (e.g. "/path/to/{uri}.csv"). 83 | Extension (here ".csv") determined which data loader to use. 84 | database_yml : Path 85 | Path to YAML configuration file, to which `template` is relative. 86 | Defaults to assume that `template` is absolute or relative to 87 | current working directory. 88 | 89 | Returns 90 | ------- 91 | data_loader : Callable[[ProtocolFile], Any] 92 | Callable that takes a ProtocolFile and returns some data. 93 | 94 | See also 95 | -------- 96 | pyannote.database.loader 97 | """ 98 | 99 | path = Path(template) 100 | if path.suffix not in LOADERS: 101 | msg = f"No loader for files with '{path.suffix}' suffix" 102 | raise ValueError(msg) 103 | 104 | Loader = LOADERS[path.suffix].load() 105 | 106 | def load(current_file: ProtocolFile): 107 | path = resolve_path(Path(template.format(**abs(current_file))), database_yml) 108 | 109 | # check if file exists 110 | if not path.is_file(): 111 | msg = f"No such file or directory: '{path}' (via '{template}' template)." 112 | raise FileNotFoundError(msg) 113 | 114 | loader = Loader(path) 115 | return loader(current_file) 116 | 117 | return load 118 | 119 | 120 | def NumericValue(value): 121 | def load(current_file: ProtocolFile): 122 | return value 123 | return load 124 | 125 | 126 | def resolve_path(path: Path, database_yml: Path) -> Path: 127 | """Resolve path 128 | 129 | Parameters 130 | ---------- 131 | path : `Path` 132 | Path. Can be either absolute, relative to current working directory, 133 | or relative to `database_yml` parent directory. 134 | database_yml : `Path` 135 | Path to YAML configuration file. 136 | 137 | Returns 138 | ------- 139 | resolved_path: `Path` 140 | Resolved path. 141 | """ 142 | 143 | path = path.expanduser() 144 | 145 | if path.is_file(): 146 | return path 147 | 148 | else: 149 | relative_path = database_yml.parent / path 150 | if relative_path.is_file(): 151 | return relative_path 152 | 153 | msg = f'Could not find file "{path}".' 154 | raise FileNotFoundError(msg) 155 | 156 | 157 | def meta_subset_iter( 158 | meta_database: Text, 159 | meta_task: Text, 160 | meta_protocol: Text, 161 | meta_subset: Subset, 162 | subset_entries: Dict, 163 | database_yml: Path, 164 | ): 165 | """Meta-protocol method that iterates over a subset 166 | 167 | Parameters 168 | ---------- 169 | meta_database : str 170 | "X" 171 | meta_task : str 172 | Task name (e.g. SpeakerDiarization, SpeakerVerification) 173 | meta_protocol : str 174 | Protocol name (e.g. MyProtocol) 175 | meta_subset : {"train", "development", "test"} 176 | Subset 177 | subset_entries : dict 178 | Subset entries. 179 | Etape.SpeakerDiarization.TV: [train] 180 | REPERE.SpeakerDiarization.Phase1: [train, development] 181 | REPERE.SpeakerDiarization.Phase2: [train, development] 182 | """ 183 | 184 | # this is imported here to avoid circular imports 185 | from . import registry 186 | 187 | for protocol, subsets in subset_entries.items(): 188 | partial_protocol = registry.get_protocol(protocol) 189 | for subset in subsets: 190 | method_name = f"{subset}_iter" 191 | for file in getattr(partial_protocol, method_name)(): 192 | yield file 193 | 194 | 195 | def gather_loaders( 196 | entries: Dict, 197 | database_yml: Path, 198 | ) -> dict: 199 | """Loads all Loaders for data type specified in 'entries' into a dict. 200 | 201 | Parameters 202 | ---------- 203 | entries : Dict, optional 204 | Subset entries (eg 'uri', 'annotated', 'annotation', ...) 205 | database_yml : Path, optional 206 | Path to the 'database.yml' file 207 | 208 | Returns 209 | ------- 210 | dict 211 | A dictionary mapping each key of entry (except 'uri' and 'trial') 212 | to a function that given a ProtocolFile returns the data type 213 | related to this entry. 214 | """ 215 | lazy_loader = dict() 216 | 217 | for key, value in entries.items(): 218 | 219 | if key == "uri" or key == "trial": 220 | continue 221 | 222 | if isinstance(value, Number): 223 | lazy_loader[key] = NumericValue(value) 224 | continue 225 | 226 | # check whether value (path) contains placeholders such as {uri} or {subset} 227 | _, placeholders, _, _ = zip(*string.Formatter().parse(value)) 228 | is_template = len(set(placeholders) - set([None])) > 0 229 | 230 | if is_template: 231 | 232 | # make sure old database.yml specifications still work but warn the user 233 | # that they can now get rid of this "_" prefix 234 | if value.startswith("_"): 235 | msg = ( 236 | "Since version 4.1, pyannote.database is smart enough to know " 237 | "when paths defined in 'database.yml' contains placeholders. " 238 | "Remove the underscore (_) prefix to get rid of this warning." 239 | ) 240 | warnings.warn(msg) 241 | value = value[1:] 242 | 243 | lazy_loader[key] = Template(value, database_yml) 244 | 245 | else: 246 | 247 | path = resolve_path(Path(value), database_yml) 248 | 249 | # check if file exists 250 | if not path.is_file(): 251 | msg = f"No such file or directory: '{path}'" 252 | raise FileNotFoundError(msg) 253 | 254 | # check if loader exists 255 | if path.suffix not in LOADERS: 256 | msg = f"No loader for file with '{path.suffix}' suffix" 257 | raise TypeError(msg) 258 | 259 | # load custom loader class 260 | Loader = LOADERS[path.suffix].load() 261 | 262 | # TODO: As it is right now, every call to "subset_iter" also calls "Loader(path)". 263 | # However, calling "Loader(path)" might be time consuming so we should probably cache it: 264 | # Current behavior: 265 | # for _ in protocol.train(): pass # first call is slow (compute Loader(path)) 266 | # for _ in protocol.train(): pass # subsequent calls are equally slow (compute Loader(path)) 267 | # Proposed behavior: 268 | # for _ in protocol.train(): pass # first call is slow (compute and cache Loader(path)) 269 | # for _ in protocol.train(): pass # subsequent calls are fast (use cached Loader(path)) 270 | lazy_loader[key] = Loader(path) 271 | return lazy_loader 272 | 273 | 274 | def subset_iter( 275 | self, 276 | database: Text = None, 277 | task: Text = None, 278 | protocol: Text = None, 279 | subset: Subset = None, 280 | entries: Dict = None, 281 | database_yml: Path = None, 282 | **metadata, 283 | ): 284 | """ 285 | 286 | Parameters 287 | ---------- 288 | database : str 289 | Database name (e.g. MyDatabase) 290 | task : str 291 | Task name (e.g. SpeakerDiarization, SpeakerVerification) 292 | protocol : str 293 | Protocol name (e.g. MyProtocol) 294 | subset : {"train", "development", "test"} 295 | Subset 296 | entries : dict 297 | Subset entries. 298 | database_yml : `Path` 299 | Path to the 'database.yml' file 300 | metadata : dict 301 | Additional metadata to be added to each ProtocolFile (such 302 | as "scope" or "classes") 303 | """ 304 | 305 | if "uri" in entries: 306 | uri = entries["uri"] 307 | 308 | elif "uris" in entries: 309 | uri = entries["uris"] 310 | msg = ( 311 | f"Found deprecated 'uris' entry in {database}.{task}.{protocol}.{subset}. " 312 | f"Please use 'uri' (singular) instead, in '{database_yml}'." 313 | ) 314 | warnings.warn(msg, DeprecationWarning) 315 | 316 | else: 317 | msg = f"Missing mandatory 'uri' entry in {database}.{task}.{protocol}.{subset}" 318 | raise ValueError(msg) 319 | 320 | uris = load_lst(resolve_path(Path(uri), database_yml)) 321 | 322 | lazy_loader = gather_loaders(entries=entries, database_yml=database_yml) 323 | 324 | for uri in uris: 325 | yield ProtocolFile( 326 | {"uri": uri, "database": database, "subset": subset, **metadata}, lazy=lazy_loader 327 | ) 328 | 329 | def subset_trial( 330 | self, 331 | database: Text = None, 332 | task: Text = None, 333 | protocol: Text = None, 334 | subset: Subset = None, 335 | entries: Dict = None, 336 | database_yml: Path = None, 337 | ): 338 | """ 339 | 340 | Parameters 341 | ---------- 342 | database : str 343 | Database name (e.g. MyDatabase) 344 | task : str 345 | Task name (e.g. SpeakerDiarization, SpeakerVerification) 346 | protocol : str 347 | Protocol name (e.g. MyProtocol) 348 | subset : {"train", "development", "test"} 349 | Subset 350 | entries : dict 351 | Subset entries. 352 | database_yml : `Path` 353 | Path to the 'database.yml' file 354 | """ 355 | 356 | lazy_loader = gather_loaders(entries=entries, database_yml=database_yml) 357 | lazy_loader["try_with"] = get_annotated 358 | 359 | # meant to store and cache one `ProtocolFile` instance per file 360 | files: Dict[Text, ProtocolFile] = dict() 361 | 362 | # iterate trials and use preloaded test files 363 | for trial in load_trial(resolve_path(Path(entries["trial"]), database_yml)): 364 | # create `ProtocolFile` only the first time this uri is encountered 365 | uri1, uri2 = trial["uri1"], trial["uri2"] 366 | if uri1 not in files: 367 | files[uri1] = self.preprocess( 368 | ProtocolFile( 369 | {"uri": uri1, "database": database, "subset": subset}, 370 | lazy=lazy_loader, 371 | ) 372 | ) 373 | if uri2 not in files: 374 | files[uri2] = self.preprocess( 375 | ProtocolFile( 376 | {"uri": uri2, "database": database, "subset": subset}, 377 | lazy=lazy_loader, 378 | ) 379 | ) 380 | 381 | yield { 382 | "reference": trial["reference"], 383 | "file1": files[uri1], 384 | "file2": files[uri2], 385 | } 386 | 387 | 388 | def get_init(protocols): 389 | def init(self): 390 | super(self.__class__, self).__init__() 391 | for protocol in protocols: 392 | self.register_protocol(*protocol) 393 | 394 | return init 395 | 396 | 397 | def get_custom_protocol_class_name(database: Text, task: Text, protocol: Text): 398 | return f"{database}__{task}__{protocol}" 399 | 400 | 401 | def create_protocol( 402 | database: Text, 403 | task: Text, 404 | protocol: Text, 405 | protocol_entries: Dict, 406 | database_yml: Path, 407 | ) -> Union[type, None]: 408 | """Create new protocol class 409 | 410 | Parameters 411 | ---------- 412 | database : str 413 | task : str 414 | protocol : str 415 | protocol_entries : dict 416 | 417 | Returns 418 | ------- 419 | CustomProtocol : type or None 420 | 421 | """ 422 | 423 | 424 | try: 425 | base_class = getattr( 426 | protocol_module, "Protocol" if task == "Protocol" else f"{task}Protocol" 427 | ) 428 | 429 | except AttributeError: 430 | msg = ( 431 | f"Ignoring '{database}.{task}' protocols found in {database_yml} " 432 | f"because '{task}' tasks are not supported yet." 433 | ) 434 | print(msg) 435 | return None 436 | 437 | # Collections do not define subsets, so we artificially create one (called "files") 438 | # 439 | # MyCollection: 440 | # uri: /path/to/collection.lst 441 | # 442 | # becomes 443 | # 444 | # MyCollection: 445 | # files: 446 | # uri: /path/to/collection.lst 447 | if task == "Collection": 448 | protocol_entries = {"files": protocol_entries} 449 | 450 | metadata = dict() 451 | 452 | if issubclass(base_class, SegmentationProtocol): 453 | if "classes" in protocol_entries: 454 | metadata["classes"] = protocol_entries.pop("classes") 455 | 456 | if issubclass(base_class, SpeakerDiarizationProtocol): 457 | scope = protocol_entries.pop("scope", None) 458 | 459 | if scope is None and database != "X": 460 | msg = ( 461 | f"'{database}.{task}.{protocol}' found in {database_yml} does not define " 462 | f"the 'scope' of speaker labels (file, database, or global). Setting it to 'file'." 463 | ) 464 | print(msg) 465 | metadata["scope"] = "file" 466 | 467 | else: 468 | metadata["scope"] = scope 469 | 470 | methods = dict() 471 | for subset, subset_entries in protocol_entries.items(): 472 | 473 | if subset not in [ 474 | "files", 475 | "train", 476 | "development", 477 | "test", 478 | "train_trial", 479 | "development_trial", 480 | "test_trial", 481 | ]: 482 | msg = ( 483 | f"Ignoring '{database}.{task}.{protocol}.{subset}' found in {database_yml} " 484 | f"because '{subset}' entries are not supported yet." 485 | ) 486 | warnings.warn(msg) 487 | continue 488 | 489 | method_name = f"{subset}_iter" 490 | if database == "X": 491 | methods[method_name] = functools.partial( 492 | meta_subset_iter, 493 | database, 494 | task, 495 | protocol, 496 | subset, 497 | subset_entries, 498 | database_yml, 499 | ) 500 | else: 501 | 502 | methods[method_name] = functools.partialmethod( 503 | subset_iter, 504 | database=database, 505 | task=task, 506 | protocol=protocol, 507 | subset=subset, 508 | entries=subset_entries, 509 | database_yml=database_yml, 510 | **metadata, 511 | ) 512 | 513 | if "trial" in subset_entries.keys(): 514 | methods[f"{subset}_trial"] = functools.partialmethod( 515 | subset_trial, 516 | database=database, 517 | task=task, 518 | protocol=protocol, 519 | subset=subset, 520 | entries=subset_entries, 521 | database_yml=database_yml, 522 | ) 523 | 524 | #  making custom protocol pickable by adding it to pyannote.database.custom module 525 | custom_protocol_class_name = get_custom_protocol_class_name( 526 | database, task, protocol 527 | ) 528 | CustomProtocolClass = type(custom_protocol_class_name, (base_class,), methods) 529 | globals()[custom_protocol_class_name] = CustomProtocolClass 530 | 531 | return CustomProtocolClass 532 | -------------------------------------------------------------------------------- /src/pyannote/database/database.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2016 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | 30 | from typing import Optional 31 | from .protocol.protocol import Preprocessors 32 | import warnings 33 | 34 | 35 | class Database: 36 | """Base database 37 | 38 | This class should be inherited from, not used directly. 39 | 40 | """ 41 | 42 | def __init__(self, preprocessors=None): 43 | 44 | if preprocessors is not None: 45 | database_name = self.__class__.__name__ 46 | msg = ( 47 | f"Ignoring deprecated 'preprocessors' argument in {database_name}.__init__. " 48 | f"Pass it to 'get_protocol' instead." 49 | ) 50 | warnings.warn(msg) 51 | 52 | super(Database, self).__init__() 53 | 54 | def register_protocol(self, task_name, protocol_name, protocol): 55 | if not hasattr(self, "protocols_"): 56 | self.protocols_ = {} 57 | if task_name not in self.protocols_: 58 | self.protocols_[task_name] = {} 59 | self.protocols_[task_name][protocol_name] = protocol 60 | # TODO / register globally. 61 | 62 | def _get_tasks(self): 63 | try: 64 | tasks = self.protocols_ 65 | except AttributeError: 66 | message = "This database does not implement any protocol." 67 | raise AttributeError(message) 68 | return tasks 69 | 70 | def get_tasks(self): 71 | tasks = self._get_tasks() 72 | return sorted(tasks) 73 | 74 | def get_protocols(self, task): 75 | return sorted(self.protocols_[task].keys()) 76 | 77 | def get_protocol( 78 | self, task, protocol, preprocessors: Optional[Preprocessors] = None 79 | ): 80 | return self.protocols_[task][protocol](preprocessors=preprocessors) 81 | 82 | def __str__(self): 83 | return self.__doc__ 84 | -------------------------------------------------------------------------------- /src/pyannote/database/file_finder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2016- CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | # Alexis PLAQUET 29 | 30 | import warnings 31 | from pathlib import Path 32 | from typing import Text 33 | from pyannote.database.protocol.protocol import ProtocolFile 34 | from .registry import registry as global_registry 35 | from .registry import Registry 36 | 37 | 38 | class FileFinder: 39 | """Database file finder. 40 | 41 | Retrieve media files by URI. 42 | 43 | Parameters 44 | ---------- 45 | registry : Registry, optional 46 | Database registry. Defaults to `pyannote.database.registry`. 47 | """ 48 | 49 | def __init__( 50 | self, 51 | registry: Registry = None, 52 | database_yml: Text = None): 53 | super().__init__() 54 | if registry is None: 55 | if database_yml is None: 56 | registry = global_registry 57 | else: 58 | warnings.warn("Passing `database.yml` to `FileFinder` is deprecated in favor of `registry`.") 59 | registry = Registry() 60 | registry.load_database(database_yml) 61 | self.registry = registry 62 | 63 | def __call__(self, current_file: ProtocolFile) -> Path: 64 | """Look for current file 65 | 66 | Parameter 67 | --------- 68 | current_file : ProtocolFile 69 | Protocol file. 70 | 71 | Returns 72 | ------- 73 | path : Path 74 | Path to file. 75 | 76 | Raises 77 | ------ 78 | FileNotFoundError when the file could not be found or when more than one 79 | matching file were found. 80 | """ 81 | 82 | uri = current_file["uri"] 83 | database = current_file["database"] 84 | 85 | path_templates = self.registry.sources[database] 86 | if isinstance(path_templates, Text): 87 | path_templates = [path_templates] 88 | 89 | searched = [] 90 | found = [] 91 | 92 | for path_template in path_templates: 93 | path = Path(path_template.format(uri=uri, database=database)) 94 | searched.append(path) 95 | 96 | # paths with "*" or "**" patterns are split into two parts, 97 | # - the root part (from the root up to the first occurrence of *) 98 | # - the pattern part (from the first occurrence of * to the end) 99 | # which is looked for (inside root) using Path.glob 100 | # Example with path = '/path/to/**/*/file.wav' 101 | # root = '/path/to' 102 | # pattern = '**/*/file.wav' 103 | 104 | if "*" in str(path): 105 | parts = path.parent.parts 106 | for p, part in enumerate(parts): 107 | if "*" in part: 108 | break 109 | 110 | root = path.parents[len(parts) - p] 111 | pattern = str(path.relative_to(root)) 112 | found_ = root.glob(pattern) 113 | found.extend(found_) 114 | 115 | # a path without "*" patterns is supposed to be an actual file 116 | elif path.is_file(): 117 | found.append(path) 118 | 119 | if len(found) == 1: 120 | return found[0] 121 | 122 | if len(found) == 0: 123 | msg = f'Could not find file "{uri}" in any of the following location(s):' 124 | for path in searched: 125 | msg += f"\n - {path}" 126 | raise FileNotFoundError(msg) 127 | 128 | if len(found) > 1: 129 | msg = ( 130 | f'Looked for file "{uri}" and found more than one ' 131 | f"({len(found)}) matching locations: " 132 | ) 133 | for path in found: 134 | msg += f"\n - {path}" 135 | raise FileNotFoundError(msg) 136 | -------------------------------------------------------------------------------- /src/pyannote/database/loader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2020- CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | # Vincent BRIGNATZ 29 | 30 | """Data loaders""" 31 | 32 | from typing import Text 33 | from pathlib import Path 34 | import string 35 | from pyannote.database.util import load_rttm, load_uem, load_lab, load_stm 36 | import pandas as pd 37 | from pyannote.core import Segment, Timeline, Annotation 38 | from pyannote.database.protocol.protocol import ProtocolFile 39 | from typing import Union, Any 40 | import warnings 41 | 42 | 43 | try: 44 | from spacy.tokens import Token 45 | from spacy.tokens import Doc 46 | 47 | Token.set_extension("time_start", default=None) 48 | Token.set_extension("time_end", default=None) 49 | Token.set_extension("confidence", default=0.0) 50 | 51 | except ImportError: 52 | pass 53 | 54 | 55 | def load_lst(file_lst): 56 | """Load LST file 57 | 58 | LST files provide a list of URIs (one line per URI) 59 | 60 | Parameter 61 | --------- 62 | file_lst : `str` 63 | Path to LST file. 64 | 65 | Returns 66 | ------- 67 | uris : `list` 68 | List or uris 69 | """ 70 | 71 | with open(file_lst, mode="r") as fp: 72 | lines = fp.readlines() 73 | return [line.strip() for line in lines] 74 | 75 | 76 | def load_trial(file_trial): 77 | """Load trial file 78 | 79 | Trial files provide a list of two URIs and their reference 80 | 81 | Parameter 82 | --------- 83 | file_trial : `str` 84 | Path to trial file. 85 | 86 | Returns 87 | ------- 88 | list_trial : `list` 89 | List of trial 90 | """ 91 | 92 | trials = pd.read_table( 93 | file_trial, sep="\s+", names=["reference", "uri1", "uri2"] 94 | ) 95 | 96 | for _, reference, uri1, uri2 in trials.itertuples(): 97 | yield {"reference": reference, "uri1": uri1, "uri2": uri2} 98 | 99 | 100 | class RTTMLoader: 101 | """RTTM loader 102 | 103 | Can be used as a preprocessor. 104 | 105 | Parameters 106 | ---------- 107 | path : str 108 | Path to RTTM file with optional ProtocolFile key placeholders 109 | (e.g. "/path/to/{database}/{subset}/{uri}.rttm") 110 | """ 111 | 112 | def __init__(self, path: Text = None): 113 | super().__init__() 114 | 115 | self.path = str(path) 116 | 117 | _, placeholders, _, _ = zip(*string.Formatter().parse(self.path)) 118 | self.placeholders_ = set(placeholders) - set([None]) 119 | self.loaded_ = dict() if self.placeholders_ else load_rttm(self.path) 120 | 121 | def __call__(self, file: ProtocolFile) -> Annotation: 122 | 123 | uri = file["uri"] 124 | 125 | if uri in self.loaded_: 126 | return self.loaded_[uri] 127 | 128 | sub_file = {key: file[key] for key in self.placeholders_} 129 | loaded = load_rttm(self.path.format(**sub_file)) 130 | if uri not in loaded: 131 | loaded[uri] = Annotation(uri=uri) 132 | 133 | # do not cache annotations when there is one RTTM file per "uri" 134 | # since loading it should be quite fast 135 | if "uri" in self.placeholders_: 136 | return loaded[uri] 137 | 138 | # when there is more than one file in loaded RTTM, cache them all 139 | # so that loading future "uri" will be instantaneous 140 | self.loaded_.update(loaded) 141 | 142 | return self.loaded_[uri] 143 | 144 | 145 | class STMLoader: 146 | """STM loader 147 | 148 | Can be used as a preprocessor. 149 | 150 | Parameters 151 | ---------- 152 | path : str 153 | Path to STM file with optional ProtocolFile key placeholders 154 | (e.g. "/path/to/{database}/{subset}/{uri}.stm") 155 | """ 156 | 157 | def __init__(self, path: Text = None): 158 | super().__init__() 159 | 160 | self.path = str(path) 161 | 162 | _, placeholders, _, _ = zip(*string.Formatter().parse(self.path)) 163 | self.placeholders_ = set(placeholders) - set([None]) 164 | self.loaded_ = dict() if self.placeholders_ else load_stm(self.path) 165 | 166 | def __call__(self, file: ProtocolFile) -> Annotation: 167 | 168 | uri = file["uri"] 169 | 170 | if uri in self.loaded_: 171 | return self.loaded_[uri] 172 | 173 | sub_file = {key: file[key] for key in self.placeholders_} 174 | loaded = load_stm(self.path.format(**sub_file)) 175 | if uri not in loaded: 176 | loaded[uri] = Annotation(uri=uri) 177 | 178 | # do not cache annotations when there is one STM file per "uri" 179 | # since loading it should be quite fast 180 | if "uri" in self.placeholders_: 181 | return loaded[uri] 182 | 183 | # when there is more than one file in loaded STM, cache them all 184 | # so that loading future "uri" will be instantaneous 185 | self.loaded_.update(loaded) 186 | 187 | return self.loaded_[uri] 188 | 189 | 190 | class UEMLoader: 191 | """UEM loader 192 | 193 | Can be used as a preprocessor. 194 | 195 | Parameters 196 | ---------- 197 | path : str 198 | Path to UEM file with optional ProtocolFile key placeholders 199 | (e.g. "/path/to/{database}/{subset}/{uri}.uem") 200 | """ 201 | 202 | def __init__(self, path: Text = None): 203 | super().__init__() 204 | 205 | self.path = str(path) 206 | 207 | _, placeholders, _, _ = zip(*string.Formatter().parse(self.path)) 208 | self.placeholders_ = set(placeholders) - set([None]) 209 | self.loaded_ = dict() if self.placeholders_ else load_uem(self.path) 210 | 211 | def __call__(self, file: ProtocolFile) -> Timeline: 212 | 213 | uri = file["uri"] 214 | 215 | if uri in self.loaded_: 216 | return self.loaded_[uri] 217 | 218 | sub_file = {key: file[key] for key in self.placeholders_} 219 | loaded = load_uem(self.path.format(**sub_file)) 220 | if uri not in loaded: 221 | loaded[uri] = Timeline(uri=uri) 222 | 223 | # do not cache timelines when there is one UEM file per "uri" 224 | # since loading it should be quite fast 225 | if "uri" in self.placeholders_: 226 | return loaded[uri] 227 | 228 | # when there is more than one file in loaded UEM, cache them all 229 | # so that loading future "uri" will be instantaneous 230 | self.loaded_.update(loaded) 231 | 232 | return self.loaded_[uri] 233 | 234 | 235 | class LABLoader: 236 | """LAB loader 237 | 238 | Parameters 239 | ---------- 240 | path : str 241 | Path to LAB file with mandatory {uri} placeholder. 242 | (e.g. "/path/to/{uri}.lab") 243 | 244 | each .lab file contains the segments for a single audio file, in the following format: 245 | start end label 246 | 247 | ex. 248 | 0.0 12.3456 sing 249 | 12.3456 15.0 nosing 250 | ... 251 | """ 252 | 253 | def __init__(self, path: Text = None): 254 | super().__init__() 255 | 256 | self.path = str(path) 257 | 258 | _, placeholders, _, _ = zip(*string.Formatter().parse(self.path)) 259 | self.placeholders_ = set(placeholders) - set([None]) 260 | if "uri" not in self.placeholders_: 261 | raise ValueError("`path` must contain the {uri} placeholder.") 262 | 263 | def __call__(self, file: ProtocolFile) -> Annotation: 264 | 265 | uri = file["uri"] 266 | 267 | sub_file = {key: file[key] for key in self.placeholders_} 268 | return load_lab(self.path.format(**sub_file), uri=uri) 269 | 270 | 271 | class CTMLoader: 272 | """CTM loader 273 | 274 | Parameter 275 | --------- 276 | ctm : Path 277 | Path to CTM file 278 | """ 279 | 280 | def __init__(self, ctm: Path): 281 | self.ctm = ctm 282 | 283 | names = ["uri", "channel", "start", "duration", "word", "confidence"] 284 | dtype = { 285 | "uri": str, 286 | "start": float, 287 | "duration": float, 288 | "word": str, 289 | "confidence": float, 290 | } 291 | self.data_ = pd.read_csv( 292 | ctm, names=names, dtype=dtype, sep="\s+" 293 | ).groupby("uri") 294 | 295 | def __call__(self, current_file: ProtocolFile) -> Union["Doc", None]: 296 | 297 | try: 298 | from spacy.vocab import Vocab 299 | from spacy.tokens import Doc 300 | except ImportError: 301 | msg = "Cannot load CTM files because spaCy is not available." 302 | warnings.warn(msg) 303 | return None 304 | 305 | uri = current_file["uri"] 306 | 307 | try: 308 | lines = list(self.data_.get_group(uri).iterrows()) 309 | except KeyError: 310 | lines = [] 311 | 312 | words = [line.word for _, line in lines] 313 | doc = Doc(Vocab(), words=words) 314 | 315 | for token, (_, line) in zip(doc, lines): 316 | token._.time_start = line.start 317 | token._.time_end = line.start + line.duration 318 | token._.confidence = line.confidence 319 | 320 | return doc 321 | 322 | 323 | class MAPLoader: 324 | """Mapping loader 325 | 326 | For generic files with format : 327 | {uri} {value} 328 | 329 | Exemples : 330 | 331 | duration.map : 332 | 333 | filename1 60.0 334 | filename2 123.450 335 | filename3 32.400 336 | 337 | domain.map : 338 | 339 | filename1 radio 340 | filename2 radio 341 | filename3 phone 342 | 343 | Parameter 344 | --------- 345 | map : Path 346 | Path to mapping file 347 | """ 348 | 349 | def __init__(self, mapping: Path): 350 | self.mapping = mapping 351 | 352 | names = ["uri", "value"] 353 | dtype = { 354 | "uri": str, 355 | } 356 | self.data_ = pd.read_csv( 357 | mapping, names=names, dtype=dtype, sep="\s+" 358 | ) 359 | 360 | # get colum 'value' dtype, allowing us to acces it during subset 361 | self.dtype = self.data_.dtypes["value"] 362 | 363 | if self.data_.duplicated(["uri"]).any(): 364 | print(f"Found following duplicate key in file {mapping}") 365 | print(self.data_[self.data_.duplicated(["uri"], keep=False)]) 366 | raise ValueError() 367 | 368 | self.data_ = self.data_.groupby("uri") 369 | 370 | def __call__(self, current_file: ProtocolFile) -> Any: 371 | uri = current_file["uri"] 372 | 373 | try: 374 | value = self.data_.get_group(uri).value.item() 375 | except KeyError: 376 | msg = f"Couldn't find mapping for {uri} in {self.mapping}" 377 | raise KeyError(msg) 378 | 379 | return value 380 | -------------------------------------------------------------------------------- /src/pyannote/database/protocol/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2016- CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | from .protocol import Protocol 30 | from .collection import CollectionProtocol 31 | from .segmentation import SegmentationProtocol 32 | from .speaker_diarization import SpeakerDiarizationProtocol 33 | from .speaker_spotting import SpeakerSpottingProtocol 34 | from .speaker_verification import SpeakerVerificationProtocol 35 | from .speaker_identification import SpeakerIdentificationProtocol 36 | from .speaker_recognition import SpeakerRecognitionProtocol 37 | 38 | 39 | __all__ = [ 40 | "Protocol", 41 | "CollectionProtocol", 42 | "SegmentationProtocol", 43 | "SpeakerDiarizationProtocol", 44 | "SpeakerVerificationProtocol", 45 | "SpeakerSpottingProtocol", 46 | "SpeakerIdentificationProtocol", 47 | "SpeakerRecognitionProtocol", 48 | ] 49 | 50 | -------------------------------------------------------------------------------- /src/pyannote/database/protocol/collection.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2018-2020 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | 30 | from typing import Iterator, Dict 31 | from .protocol import Protocol 32 | 33 | 34 | class CollectionProtocol(Protocol): 35 | """A collection of files with no train/dev/test split 36 | 37 | A collection can be defined programmatically by creating a class that 38 | inherits from CollectionProtocol and implements the `files_iter` method: 39 | 40 | >>> class MyCollection(CollectionProtocol): 41 | ... def files_iter(self) -> Iterator[Dict]: 42 | ... yield {"uri": "filename1", "any_other_key": "..."} 43 | ... yield {"uri": "filename2", "any_other_key": "..."} 44 | ... yield {"uri": "filename3", "any_other_key": "..."} 45 | 46 | `files_iter` should return an iterator of dictionnaries with 47 | - a mandatory "uri" key that provides a unique file identifier (usually 48 | the filename), 49 | - any other key that the collection may provide. 50 | 51 | It can then be used in Python like this: 52 | 53 | >>> collection = MyCollection() 54 | >>> for file in collection.files(): 55 | ... print(file["uri"]) 56 | filename1 57 | filename2 58 | filename3 59 | 60 | A collection can also be defined using `pyannote.database` configuration 61 | file, whose (configurable) path defaults to "~/database.yml". 62 | 63 | ~~~ Content of ~/database.yml ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 64 | Protocols: 65 | MyDatabase: 66 | Collection: 67 | MyCollection: 68 | uri: /path/to/collection.lst 69 | any_other_key: ... # see custom loader documentation 70 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 71 | 72 | where "/path/to/collection.lst" contains the list of identifiers of the 73 | files in the collection: 74 | 75 | ~~~ Content of "/path/to/collection.lst ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 76 | filename1 77 | filename2 78 | filename3 79 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 80 | 81 | It can the be used in Python like this: 82 | 83 | >>> from pyannote.database import registry 84 | >>> collection = registry.get_protocol('MyDatabase.Collection.MyCollection') 85 | >>> for file in collection.files(): 86 | ... print(file["uri"]) 87 | filename1 88 | filename2 89 | filename3 90 | """ 91 | 92 | # this method should be overriden 93 | def files_iter(self) -> Iterator[Dict]: 94 | raise NotImplementedError() 95 | 96 | # this allows Protocol.files() to iterate over the collection 97 | def train_iter(self) -> Iterator[Dict]: 98 | return self.files_iter() 99 | -------------------------------------------------------------------------------- /src/pyannote/database/protocol/protocol.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2016-2020 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | """ 30 | ######### 31 | Protocols 32 | ######### 33 | 34 | """ 35 | 36 | import warnings 37 | import collections 38 | import threading 39 | import itertools 40 | from typing import Union, Dict, Iterator, Callable, Any, Text, Optional 41 | 42 | try: 43 | from typing import Literal 44 | except ImportError: 45 | from typing_extensions import Literal 46 | 47 | Subset = Literal["train", "development", "test"] 48 | LEGACY_SUBSET_MAPPING = {"train": "trn", "development": "dev", "test": "tst"} 49 | Scope = Literal["file", "database", "global"] 50 | 51 | Preprocessor = Callable[["ProtocolFile"], Any] 52 | Preprocessors = Dict[Text, Preprocessor] 53 | 54 | 55 | class ProtocolFile(collections.abc.MutableMapping): 56 | """Protocol file with lazy preprocessors 57 | 58 | This is a dict-like data structure where some values may depend on other 59 | values, and are only computed if/when requested. Once computed, they are 60 | cached and never recomputed again. 61 | 62 | Parameters 63 | ---------- 64 | precomputed : dict 65 | Regular dictionary with precomputed values 66 | lazy : dict, optional 67 | Dictionary describing how lazy value needs to be computed. 68 | Values are callable expecting a dictionary as input and returning the 69 | computed value. 70 | 71 | """ 72 | 73 | def __init__(self, precomputed: Union[Dict, "ProtocolFile"], lazy: Dict = None): 74 | 75 | if lazy is None: 76 | lazy = dict() 77 | 78 | if isinstance(precomputed, ProtocolFile): 79 | # when 'precomputed' is a ProtocolFile, it may already contain lazy keys. 80 | 81 | # we use 'precomputed' precomputed keys as precomputed keys 82 | self._store: Dict = abs(precomputed) 83 | 84 | # we handle the corner case where the intersection of 'precomputed' lazy keys 85 | # and 'lazy' keys is not empty. this is currently achieved by "unlazying" the 86 | # 'precomputed' one (which is probably not the most efficient solution). 87 | for key in set(precomputed.lazy) & set(lazy): 88 | self._store[key] = precomputed[key] 89 | 90 | # we use the union of 'precomputed' lazy keys and provided 'lazy' keys as lazy keys 91 | compound_lazy = dict(precomputed.lazy) 92 | compound_lazy.update(lazy) 93 | self.lazy: Dict = compound_lazy 94 | 95 | else: 96 | # when 'precomputed' is a Dict, we use it directly as precomputed keys 97 | # and 'lazy' as lazy keys. 98 | self._store = dict(precomputed) 99 | self.lazy = dict(lazy) 100 | 101 | # re-entrant lock used below to make ProtocolFile thread-safe 102 | self.lock_ = threading.RLock() 103 | 104 | # this is needed to avoid infinite recursion 105 | # when a key is both in precomputed and lazy. 106 | # keys with evaluating_ > 0 are currently being evaluated 107 | # and therefore should be taken from precomputed 108 | self.evaluating_ = collections.Counter() 109 | 110 | # since RLock is not pickable, remove it before pickling... 111 | def __getstate__(self): 112 | d = dict(self.__dict__) 113 | del d["lock_"] 114 | return d 115 | 116 | # ... and add it back when unpickling 117 | def __setstate__(self, d): 118 | self.__dict__.update(d) 119 | self.lock_ = threading.RLock() 120 | 121 | def __abs__(self): 122 | with self.lock_: 123 | return dict(self._store) 124 | 125 | def __getitem__(self, key): 126 | with self.lock_: 127 | 128 | if key in self.lazy and self.evaluating_[key] == 0: 129 | 130 | # mark lazy key as being evaluated 131 | self.evaluating_.update([key]) 132 | 133 | # apply preprocessor once and remove it 134 | value = self.lazy[key](self) 135 | del self.lazy[key] 136 | 137 | # warn the user when a precomputed key is modified 138 | if key in self._store and value != self._store[key]: 139 | msg = 'Existing precomputed key "{key}" has been modified by a preprocessor.' 140 | warnings.warn(msg.format(key=key)) 141 | 142 | # store the output of the lazy computation 143 | # so that it is available for future access 144 | self._store[key] = value 145 | 146 | # lazy evaluation is finished for key 147 | self.evaluating_.subtract([key]) 148 | 149 | return self._store[key] 150 | 151 | def __setitem__(self, key, value): 152 | with self.lock_: 153 | 154 | if key in self.lazy: 155 | del self.lazy[key] 156 | 157 | self._store[key] = value 158 | 159 | def __delitem__(self, key): 160 | with self.lock_: 161 | 162 | if key in self.lazy: 163 | del self.lazy[key] 164 | 165 | del self._store[key] 166 | 167 | def __iter__(self): 168 | with self.lock_: 169 | 170 | store_keys = list(self._store) 171 | for key in store_keys: 172 | yield key 173 | 174 | lazy_keys = list(self.lazy) 175 | for key in lazy_keys: 176 | if key in self._store: 177 | continue 178 | yield key 179 | 180 | def __len__(self): 181 | with self.lock_: 182 | return len(set(self._store) | set(self.lazy)) 183 | 184 | def files(self) -> Iterator["ProtocolFile"]: 185 | """Iterate over all files 186 | 187 | When `current_file` refers to only one file, 188 | yield it and return. 189 | When `current_file` refers to a list of file (i.e. 'uri' is a list), 190 | yield each file separately. 191 | 192 | Examples 193 | -------- 194 | >>> current_file = ProtocolFile({ 195 | ... 'uri': 'my_uri', 196 | ... 'database': 'my_database'}) 197 | >>> for file in current_file.files(): 198 | ... print(file['uri'], file['database']) 199 | my_uri my_database 200 | 201 | >>> current_file = { 202 | ... 'uri': ['my_uri1', 'my_uri2', 'my_uri3'], 203 | ... 'database': 'my_database'} 204 | >>> for file in current_file.files(): 205 | ... print(file['uri'], file['database']) 206 | my_uri1 my_database 207 | my_uri2 my_database 208 | my_uri3 my_database 209 | 210 | """ 211 | 212 | uris = self["uri"] 213 | if not isinstance(uris, list): 214 | yield self 215 | return 216 | 217 | n_uris = len(uris) 218 | 219 | # iterate over precomputed keys and make sure 220 | 221 | precomputed = {"uri": uris} 222 | for key, value in abs(self).items(): 223 | 224 | if key == "uri": 225 | continue 226 | 227 | if not isinstance(value, list): 228 | precomputed[key] = itertools.repeat(value) 229 | 230 | else: 231 | if len(value) != n_uris: 232 | msg = ( 233 | f'Mismatch between number of "uris" ({n_uris}) ' 234 | f'and number of "{key}" ({len(value)}).' 235 | ) 236 | raise ValueError(msg) 237 | precomputed[key] = value 238 | 239 | keys = list(precomputed.keys()) 240 | for values in zip(*precomputed.values()): 241 | precomputed_one = dict(zip(keys, values)) 242 | yield ProtocolFile(precomputed_one, self.lazy) 243 | 244 | 245 | class Protocol: 246 | """Experimental protocol 247 | 248 | An experimental protocol usually defines three subsets: a training subset, 249 | a development subset, and a test subset. 250 | 251 | An experimental protocol can be defined programmatically by creating a 252 | class that inherits from Protocol and implements at least 253 | one of `train_iter`, `development_iter` and `test_iter` methods: 254 | 255 | >>> class MyProtocol(Protocol): 256 | ... def train_iter(self) -> Iterator[Dict]: 257 | ... yield {"uri": "filename1", "any_other_key": "..."} 258 | ... yield {"uri": "filename2", "any_other_key": "..."} 259 | 260 | `{subset}_iter` should return an iterator of dictionnaries with 261 | - "uri" key (mandatory) that provides a unique file identifier (usually 262 | the filename), 263 | - any other key that the protocol may provide. 264 | 265 | It can then be used in Python like this: 266 | 267 | >>> protocol = MyProtocol() 268 | >>> for file in protocol.train(): 269 | ... print(file["uri"]) 270 | filename1 271 | filename2 272 | 273 | An experimental protocol can also be defined using `pyannote.database` 274 | configuration file, whose (configurable) path defaults to "~/database.yml". 275 | 276 | ~~~ Content of ~/database.yml ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 277 | Protocols: 278 | MyDatabase: 279 | Protocol: 280 | MyProtocol: 281 | train: 282 | uri: /path/to/collection.lst 283 | any_other_key: ... # see custom loader documentation 284 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 285 | 286 | where "/path/to/collection.lst" contains the list of identifiers of the 287 | files in the collection: 288 | 289 | ~~~ Content of "/path/to/collection.lst ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 290 | filename1 291 | filename2 292 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 293 | 294 | It can then be used in Python like this: 295 | 296 | >>> from pyannote.database import registry 297 | >>> protocol = registry.get_protocol('MyDatabase.Protocol.MyProtocol') 298 | >>> for file in protocol.train(): 299 | ... print(file["uri"]) 300 | filename1 301 | filename2 302 | 303 | This class is usually inherited from, but can be used directly. 304 | 305 | Parameters 306 | ---------- 307 | preprocessors : dict 308 | Preprocess protocol files so that `file[key] = preprocessors[key](file)` 309 | for each key in `preprocessors`. In case `preprocessors[key]` is not 310 | callable, it should be a string containing placeholders for `file` keys 311 | (e.g. {'audio': '/path/to/{uri}.wav'}) 312 | """ 313 | 314 | def __init__(self, preprocessors: Optional[Preprocessors] = None): 315 | super().__init__() 316 | 317 | if preprocessors is None: 318 | preprocessors = dict() 319 | 320 | self.preprocessors = dict() 321 | for key, preprocessor in preprocessors.items(): 322 | 323 | if callable(preprocessor): 324 | self.preprocessors[key] = preprocessor 325 | 326 | # when `preprocessor` is not callable, it should be a string 327 | # containing placeholder for item key (e.g. '/path/to/{uri}.wav') 328 | elif isinstance(preprocessor, str): 329 | preprocessor_copy = str(preprocessor) 330 | 331 | def func(current_file): 332 | return preprocessor_copy.format(**current_file) 333 | 334 | self.preprocessors[key] = func 335 | 336 | else: 337 | msg = f'"{key}" preprocessor is neither a callable nor a string.' 338 | raise ValueError(msg) 339 | 340 | def preprocess(self, current_file: Union[Dict, ProtocolFile]) -> ProtocolFile: 341 | return ProtocolFile(current_file, lazy=self.preprocessors) 342 | 343 | def __str__(self): 344 | return self.__doc__ 345 | 346 | def train_iter(self) -> Iterator[Union[Dict, ProtocolFile]]: 347 | """Iterate over files in the training subset""" 348 | raise NotImplementedError() 349 | 350 | def development_iter(self) -> Iterator[Union[Dict, ProtocolFile]]: 351 | """Iterate over files in the development subset""" 352 | raise NotImplementedError() 353 | 354 | def test_iter(self) -> Iterator[Union[Dict, ProtocolFile]]: 355 | """Iterate over files in the test subset""" 356 | raise NotImplementedError() 357 | 358 | def subset_helper(self, subset: Subset) -> Iterator[ProtocolFile]: 359 | 360 | try: 361 | files = getattr(self, f"{subset}_iter")() 362 | except (AttributeError, NotImplementedError): 363 | # previous pyannote.database versions used `trn_iter` instead of 364 | # `train_iter`, `dev_iter` instead of `development_iter`, and 365 | # `tst_iter` instead of `test_iter`. therefore, we use the legacy 366 | # version when it is available (and the new one is not). 367 | subset_legacy = LEGACY_SUBSET_MAPPING[subset] 368 | try: 369 | files = getattr(self, f"{subset_legacy}_iter")() 370 | except AttributeError: 371 | msg = f"Protocol does not implement a {subset} subset." 372 | raise NotImplementedError(msg) 373 | 374 | for file in files: 375 | yield self.preprocess(file) 376 | 377 | def train(self) -> Iterator[ProtocolFile]: 378 | return self.subset_helper("train") 379 | 380 | def development(self) -> Iterator[ProtocolFile]: 381 | return self.subset_helper("development") 382 | 383 | def test(self) -> Iterator[ProtocolFile]: 384 | return self.subset_helper("test") 385 | 386 | def files(self) -> Iterator[ProtocolFile]: 387 | """Iterate over all files in `protocol`""" 388 | 389 | # imported here to avoid circular imports 390 | from pyannote.database.util import get_unique_identifier 391 | 392 | yielded_uris = set() 393 | 394 | for method in [ 395 | "development", 396 | "development_enrolment", 397 | "development_trial", 398 | "test", 399 | "test_enrolment", 400 | "test_trial", 401 | "train", 402 | "train_enrolment", 403 | "train_trial", 404 | ]: 405 | 406 | if not hasattr(self, method): 407 | continue 408 | 409 | def iterate(): 410 | try: 411 | for file in getattr(self, method)(): 412 | yield file 413 | except (AttributeError, NotImplementedError): 414 | return 415 | 416 | for current_file in iterate(): 417 | 418 | # skip "files" that do not contain a "uri" entry. 419 | # this happens for speaker verification trials that contain 420 | # two nested files "file1" and "file2" 421 | # see https://github.com/pyannote/pyannote-db-voxceleb/issues/4 422 | if "uri" not in current_file: 423 | continue 424 | 425 | for current_file_ in current_file.files(): 426 | 427 | # corner case when the same file is yielded several times 428 | uri = get_unique_identifier(current_file_) 429 | if uri in yielded_uris: 430 | continue 431 | 432 | yield current_file_ 433 | 434 | yielded_uris.add(uri) 435 | -------------------------------------------------------------------------------- /src/pyannote/database/protocol/segmentation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2016- CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | 30 | from typing import Dict, Optional 31 | from .protocol import Protocol 32 | from .protocol import ProtocolFile 33 | from .protocol import Subset 34 | from .protocol import Preprocessor 35 | from .protocol import Preprocessors 36 | from pyannote.core import Annotation 37 | from pyannote.core import Timeline 38 | from pyannote.core import Segment 39 | import functools 40 | 41 | 42 | def crop_annotated( 43 | current_file: ProtocolFile, existing_preprocessor: Optional[Preprocessor] = None 44 | ) -> Timeline: 45 | """Preprocessor that crops 'annotated' according to 'duration' 46 | 47 | Returns 'annotated' unchanged if 'duration' is not available 48 | 49 | Parameters 50 | ---------- 51 | current_file : ProtocolFile 52 | Protocol file. 53 | existing_preprocessor : Preprocessor, optional 54 | When provided, this preprocessor must be used to get the initial 55 | 'annotated' instead of getting it from 'current_file["annotated"]' 56 | 57 | Returns 58 | ------- 59 | cropped_annotated : Timeline 60 | "annotated" cropped by "duration". 61 | """ 62 | 63 | if existing_preprocessor is None: 64 | annotated = current_file.get("annotated", None) 65 | else: 66 | annotated = existing_preprocessor(current_file) 67 | 68 | if annotated is None: 69 | return None 70 | 71 | duration = current_file.get("duration", None) 72 | if duration is None: 73 | return annotated 74 | 75 | # crop 'annotated' to 'duration' 76 | duration = Segment(0.0, duration) 77 | 78 | if annotated and not annotated.extent() in duration: 79 | return annotated.crop(duration, mode="intersection") 80 | 81 | return annotated 82 | 83 | 84 | def crop_annotation( 85 | current_file: ProtocolFile, existing_preprocessor: Optional[Preprocessor] = None 86 | ) -> Annotation: 87 | """Preprocessor that crops 'annotation' by 'annotated' 88 | 89 | Returns 'annotation' unchanged if 'annotated' is not available 90 | 91 | Parameters 92 | ---------- 93 | current_file : ProtocolFile 94 | Protocol file. 95 | existing_preprocessor : Preprocessor, optional 96 | When provided, this preprocessor must be used to get the initial 97 | 'annotation' instead of getting it from 'current_file["annotation"]' 98 | 99 | Returns 100 | ------- 101 | cropped_annotation : Annotation 102 | "annotation" cropped by "annotated". 103 | """ 104 | 105 | if existing_preprocessor is None: 106 | annotation = current_file.get("annotation", None) 107 | else: 108 | annotation = existing_preprocessor(current_file) 109 | 110 | if annotation is None: 111 | return None 112 | 113 | annotated = current_file.get("annotated", None) 114 | if annotated is None: 115 | return annotation 116 | 117 | # crop 'annotation' to 'annotated' extent 118 | if annotated and not annotated.covers(annotation.get_timeline()): 119 | return annotation.crop(annotated, mode="intersection") 120 | 121 | return annotation 122 | 123 | 124 | class SegmentationProtocol(Protocol): 125 | """A protocol for segmentation experiments 126 | 127 | A segmentation protocol can be defined programmatically by creating 128 | a class that inherits from SegmentationProtocol and implements at 129 | least one of `train_iter`, `development_iter` and `test_iter` methods: 130 | 131 | >>> class MySegmentationProtocol(SegmentationProtocol): 132 | ... def train_iter(self) -> Iterator[Dict]: 133 | ... yield {"uri": "filename1", 134 | ... "annotation": Annotation(...), 135 | ... "annotated": Timeline(...)} 136 | ... yield {"uri": "filename2", 137 | ... "annotation": Annotation(...), 138 | ... "annotated": Timeline(...)} 139 | 140 | `{subset}_iter` should return an iterator of dictionnaries with 141 | - "uri" key (mandatory) that provides a unique file identifier (usually 142 | the filename), 143 | - "annotation" key (mandatory for train and development subsets) that 144 | provides reference segmentation as a `pyannote.core.Annotation` 145 | instance, 146 | - "annotated" key (recommended) that describes which part of the file 147 | has been annotated, as a `pyannote.core.Timeline` instance. Any part 148 | of "annotation" that lives outside of the provided "annotated" will 149 | be removed. This is also used by `pyannote.metrics` to remove 150 | un-annotated regions from its evaluation report, and by 151 | `pyannote.audio` to not consider empty un-annotated regions as 152 | non-speech. 153 | - any other key that the protocol may provide. 154 | 155 | It can then be used in Python like this: 156 | 157 | >>> protocol = MySegmentationProtocol() 158 | >>> for file in protocol.train(): 159 | ... print(file["uri"]) 160 | filename1 161 | filename2 162 | 163 | A segmentation protocol can also be defined using `pyannote.database` 164 | configuration file, whose (configurable) path defaults to "~/database.yml". 165 | 166 | ~~~ Content of ~/database.yml ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 167 | Protocols: 168 | MyDatabase: 169 | Segmentation: 170 | MyProtocol: 171 | train: 172 | uri: /path/to/collection.lst 173 | annotation: /path/to/reference.rttm 174 | annotated: /path/to/reference.uem 175 | any_other_key: ... # see custom loader documentation 176 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 177 | 178 | where "/path/to/collection.lst" contains the list of identifiers of the 179 | files in the collection: 180 | 181 | ~~~ Content of "/path/to/collection.lst ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 182 | filename1 183 | filename2 184 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 185 | 186 | "/path/to/reference.rttm" contains the reference segmentation using 187 | RTTM format: 188 | 189 | ~~~ Content of "/path/to/reference.rttm ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 190 | SPEAKER filename1 1 3.168 0.800 music 191 | SPEAKER filename1 1 5.463 0.640 music 192 | SPEAKER filename1 1 5.496 0.574 speech 193 | SPEAKER filename1 1 10.454 0.499 speech 194 | SPEAKER filename2 1 2.977 0.391 noise 195 | SPEAKER filename2 1 18.705 0.964 noise 196 | SPEAKER filename2 1 22.269 0.457 music 197 | SPEAKER filename2 1 28.474 1.526 music 198 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 199 | 200 | "/path/to/reference.uem" describes the annotated regions using UEM format: 201 | 202 | ~~~ Content of "/path/to/reference.uem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 203 | filename1 NA 0.000 30.000 204 | filename2 NA 0.000 30.000 205 | filename2 NA 40.000 70.000 206 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 207 | 208 | It can then be used in Python like this: 209 | 210 | >>> from pyannote.database import registry 211 | >>> protocol = registry.get_protocol('MyDatabase.SpeakerDiarization.MyProtocol') 212 | >>> for file in protocol.train(): 213 | ... print(file["uri"]) 214 | filename1 215 | filename2 216 | """ 217 | 218 | def __init__(self, preprocessors: Optional[Preprocessors] = None): 219 | 220 | if preprocessors is None: 221 | preprocessors = dict() 222 | 223 | # wrap exisiting "annotated" preprocessor by crop_annotated so that 224 | # "annotated" is automatically cropped by file "duration" when provided 225 | preprocessors["annotated"] = functools.partial( 226 | crop_annotated, existing_preprocessor=preprocessors.get("annotated", None) 227 | ) 228 | 229 | # wrap exisiting "annotation" preprocessor by crop_annotation so that 230 | # "annotation" is automatically cropped by "annotated" when provided 231 | preprocessors["annotation"] = functools.partial( 232 | crop_annotation, existing_preprocessor=preprocessors.get("annotation", None) 233 | ) 234 | 235 | super().__init__(preprocessors=preprocessors) 236 | 237 | def stats(self, subset: Subset = "train") -> Dict: 238 | """Obtain global statistics on a given subset 239 | 240 | Parameters 241 | ---------- 242 | subset : {'train', 'development', 'test'} 243 | 244 | Returns 245 | ------- 246 | stats : dict 247 | Dictionary with the followings keys: 248 | * annotated: float 249 | total duration (in seconds) of the parts that were manually annotated 250 | * annotation: float 251 | total duration (in seconds) of actual annotations 252 | * n_files: int 253 | number of files in the subset 254 | * labels: dict 255 | maps classes with their total duration (in seconds) 256 | """ 257 | 258 | from ..util import get_annotated 259 | 260 | annotated_duration = 0.0 261 | annotation_duration = 0.0 262 | n_files = 0 263 | labels = {} 264 | 265 | for item in getattr(self, subset)(): 266 | 267 | annotated = get_annotated(item) 268 | annotated_duration += annotated.duration() 269 | 270 | # increment 'annotation' total duration 271 | annotation = item["annotation"] 272 | annotation_duration += annotation.get_timeline().duration() 273 | 274 | for label, duration in annotation.chart(): 275 | if label not in labels: 276 | labels[label] = 0.0 277 | labels[label] += duration 278 | n_files += 1 279 | 280 | stats = { 281 | "annotated": annotated_duration, 282 | "annotation": annotation_duration, 283 | "n_files": n_files, 284 | "labels": labels, 285 | } 286 | 287 | return stats 288 | -------------------------------------------------------------------------------- /src/pyannote/database/protocol/speaker_diarization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2016- CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | from .segmentation import SegmentationProtocol 30 | 31 | class SpeakerDiarizationProtocol(SegmentationProtocol): 32 | """A protocol for speaker diarization experiments 33 | 34 | A speaker diarization protocol can be defined programmatically by creating 35 | a class that inherits from SpeakerDiarizationProtocol and implements at 36 | least one of `train_iter`, `development_iter` and `test_iter` methods: 37 | 38 | >>> class MySpeakerDiarizationProtocol(SpeakerDiarizationProtocol): 39 | ... def train_iter(self) -> Iterator[Dict]: 40 | ... yield {"uri": "filename1", 41 | ... "annotation": Annotation(...), 42 | ... "annotated": Timeline(...)} 43 | ... yield {"uri": "filename2", 44 | ... "annotation": Annotation(...), 45 | ... "annotated": Timeline(...)} 46 | 47 | `{subset}_iter` should return an iterator of dictionnaries with 48 | - "uri" key (mandatory) that provides a unique file identifier (usually 49 | the filename), 50 | - "annotation" key (mandatory for train and development subsets) that 51 | provides reference speaker diarization as a `pyannote.core.Annotation` 52 | instance, 53 | - "annotated" key (recommended) that describes which part of the file 54 | has been annotated, as a `pyannote.core.Timeline` instance. Any part 55 | of "annotation" that lives outside of the provided "annotated" will 56 | be removed. This is also used by `pyannote.metrics` to remove 57 | un-annotated regions from its evaluation report, and by 58 | `pyannote.audio` to not consider empty un-annotated regions as 59 | non-speech. 60 | - any other key that the protocol may provide. 61 | 62 | It can then be used in Python like this: 63 | 64 | >>> protocol = MySpeakerDiarizationProtocol() 65 | >>> for file in protocol.train(): 66 | ... print(file["uri"]) 67 | filename1 68 | filename2 69 | 70 | A speaker diarization protocol can also be defined using `pyannote.database` 71 | configuration file, whose (configurable) path defaults to "~/database.yml". 72 | 73 | ~~~ Content of ~/database.yml ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 74 | Protocols: 75 | MyDatabase: 76 | SpeakerDiarization: 77 | MyProtocol: 78 | train: 79 | uri: /path/to/collection.lst 80 | annotation: /path/to/reference.rttm 81 | annotated: /path/to/reference.uem 82 | any_other_key: ... # see custom loader documentation 83 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 84 | 85 | where "/path/to/collection.lst" contains the list of identifiers of the 86 | files in the collection: 87 | 88 | ~~~ Content of "/path/to/collection.lst ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 89 | filename1 90 | filename2 91 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 92 | 93 | "/path/to/reference.rttm" contains the reference speaker diarization using 94 | RTTM format: 95 | 96 | ~~~ Content of "/path/to/reference.rttm ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 97 | SPEAKER filename1 1 3.168 0.800 speaker_A 98 | SPEAKER filename1 1 5.463 0.640 speaker_A 99 | SPEAKER filename1 1 5.496 0.574 speaker_B 100 | SPEAKER filename1 1 10.454 0.499 speaker_B 101 | SPEAKER filename2 1 2.977 0.391 speaker_C 102 | SPEAKER filename2 1 18.705 0.964 speaker_C 103 | SPEAKER filename2 1 22.269 0.457 speaker_A 104 | SPEAKER filename2 1 28.474 1.526 speaker_A 105 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 106 | 107 | "/path/to/reference.uem" describes the annotated regions using UEM format: 108 | 109 | ~~~ Content of "/path/to/reference.uem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 110 | filename1 NA 0.000 30.000 111 | filename2 NA 0.000 30.000 112 | filename2 NA 40.000 70.000 113 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 114 | 115 | It can then be used in Python like this: 116 | 117 | >>> from pyannote.database import registry 118 | >>> protocol = registry.get_protocol('MyDatabase.SpeakerDiarization.MyProtocol') 119 | >>> for file in protocol.train(): 120 | ... print(file["uri"]) 121 | filename1 122 | filename2 123 | """ 124 | 125 | pass -------------------------------------------------------------------------------- /src/pyannote/database/protocol/speaker_identification.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2017 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | 30 | from .speaker_verification import SpeakerVerificationProtocol 31 | 32 | 33 | class SpeakerIdentificationProtocol(SpeakerVerificationProtocol): 34 | """Speaker identification protocol 35 | 36 | Parameters 37 | ---------- 38 | preprocessors : dict or (key, preprocessor) iterable 39 | When provided, each protocol item (dictionary) are preprocessed, such 40 | that item[key] = preprocessor(item). In case 'preprocessor' is not 41 | callable, it should be a string containing placeholder for item keys 42 | (e.g. {'audio': '/path/to/{uri}.wav'}) 43 | """ 44 | 45 | pass 46 | -------------------------------------------------------------------------------- /src/pyannote/database/protocol/speaker_recognition.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2016-2020 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | 30 | from .protocol import Protocol 31 | 32 | 33 | class SpeakerRecognitionProtocol(Protocol): 34 | """Speaker recognition protocol 35 | 36 | Parameters 37 | ---------- 38 | preprocessors : dict or (key, preprocessor) iterable 39 | When provided, each protocol item (dictionary) are preprocessed, such 40 | that item[key] = preprocessor(item). In case 'preprocessor' is not 41 | callable, it should be a string containing placeholder for item keys 42 | (e.g. {'wav': '/path/to/{uri}.wav'}) 43 | """ 44 | 45 | def trn_iter(self): 46 | raise NotImplementedError( 47 | "Custom speaker recognition protocol " 'should implement "trn_iter".' 48 | ) 49 | 50 | def trn_enroll_iter(self): 51 | raise NotImplementedError( 52 | "Custom speaker recognition protocol " 'should implement "trn_enroll_iter".' 53 | ) 54 | 55 | def trn_test_iter(self): 56 | raise NotImplementedError( 57 | "Custom speaker recognition protocol " 'should implement "trn_test_iter".' 58 | ) 59 | 60 | def trn_keys(self): 61 | raise NotImplementedError( 62 | "Custom speaker recognition protocol " 'should implement "trn_keys".' 63 | ) 64 | 65 | def dev_enroll_iter(self): 66 | raise NotImplementedError( 67 | "Custom speaker recognition protocol " 'should implement "dev_enroll_iter".' 68 | ) 69 | 70 | def dev_test_iter(self): 71 | raise NotImplementedError( 72 | "Custom speaker recognition protocol " 'should implement "dev_test_iter".' 73 | ) 74 | 75 | def dev_keys(self): 76 | raise NotImplementedError( 77 | "Custom speaker recognition protocol " 'should implement "dev_keys".' 78 | ) 79 | 80 | def tst_enroll_iter(self): 81 | raise NotImplementedError( 82 | "Custom speaker recognition protocol " 'should implement "tst_enroll_iter".' 83 | ) 84 | 85 | def tst_test_iter(self): 86 | raise NotImplementedError( 87 | "Custom speaker recognition protocol " 'should implement "tst_test_iter".' 88 | ) 89 | 90 | def tst_keys(self): 91 | raise NotImplementedError( 92 | "Custom speaker recognition protocol " 'should implement tst_keys".' 93 | ) 94 | 95 | def train(self, yield_name=False): 96 | """Iterate over the training set 97 | 98 | This will yield dictionaries with the followings keys: 99 | 100 | * database: str 101 | unique database identifier 102 | * uri: str 103 | unique recording identifier 104 | * channel: int 105 | index of resource channel to use 106 | * speaker: str 107 | unique speaker identifier 108 | 109 | as well as keys coming from the provided preprocessors. 110 | 111 | Usage 112 | ----- 113 | >>> for item in protocol.train(): 114 | ... uri = item['uri'] 115 | ... channel = item['channel'] 116 | ... speaker = item['speaker'] 117 | """ 118 | 119 | generator = self.trn_iter() 120 | 121 | for name, item in generator: 122 | if yield_name: 123 | yield name, self.preprocess(item) 124 | else: 125 | yield self.preprocess(item) 126 | 127 | def train_enroll(self, yield_name=True): 128 | """Iterate over the training set enrollments 129 | 130 | This will yield dictionaries with the followings keys: 131 | 132 | * database: str 133 | unique database identifier 134 | * uri: str 135 | uniform (or unique) resource identifier 136 | * channel: int 137 | index of resource channel to use 138 | 139 | as well as keys coming from the provided preprocessors. 140 | 141 | Usage 142 | ----- 143 | >>> for item in protocol.train_enroll(): 144 | ... uri = item['uri'] 145 | ... channel = item['channel'] 146 | """ 147 | 148 | generator = self.trn_enroll_iter() 149 | 150 | for name, item in generator: 151 | if yield_name: 152 | yield name, self.preprocess(item) 153 | else: 154 | yield self.preprocess(item) 155 | 156 | def train_test(self, yield_name=True): 157 | """Iterate over the training set tests 158 | 159 | This will yield dictionaries with the followings keys: 160 | 161 | * database: str 162 | unique database identifier 163 | * uri: str 164 | uniform (or unique) resource identifier 165 | * channel: int 166 | index of resource channel to use 167 | 168 | as well as keys coming from the provided preprocessors. 169 | 170 | Usage 171 | ----- 172 | >>> for item in protocol.train_test(): 173 | ... uri = item['uri'] 174 | ... channel = item['channel'] 175 | """ 176 | 177 | generator = self.trn_test_iter() 178 | 179 | for name, item in generator: 180 | if yield_name: 181 | yield name, self.preprocess(item) 182 | else: 183 | yield self.preprocess(item) 184 | 185 | def train_keys(self): 186 | return self.trn_keys() 187 | 188 | def development_enroll(self, yield_name=True): 189 | """Iterate over the development set enrollments 190 | 191 | This will yield dictionaries with the followings keys: 192 | 193 | * database: str 194 | unique database identifier 195 | * uri: str 196 | uniform (or unique) resource identifier 197 | * channel: int 198 | index of resource channel to use 199 | 200 | as well as keys coming from the provided preprocessors. 201 | 202 | Usage 203 | ----- 204 | >>> for item in protocol.development_enroll(): 205 | ... uri = item['uri'] 206 | ... channel = item['channel'] 207 | """ 208 | 209 | generator = self.dev_enroll_iter() 210 | 211 | for name, item in generator: 212 | if yield_name: 213 | yield name, self.preprocess(item) 214 | else: 215 | yield self.preprocess(item) 216 | 217 | def development_test(self, yield_name=True): 218 | """Iterate over the development set tests 219 | 220 | This will yield dictionaries with the followings keys: 221 | 222 | * database: str 223 | unique database identifier 224 | * uri: str 225 | uniform (or unique) resource identifier 226 | * channel: int 227 | index of resource channel to use 228 | 229 | as well as keys coming from the provided preprocessors. 230 | 231 | Usage 232 | ----- 233 | >>> for item in protocol.development_test(): 234 | ... uri = item['uri'] 235 | ... channel = item['channel'] 236 | """ 237 | 238 | generator = self.dev_test_iter() 239 | 240 | for name, item in generator: 241 | if yield_name: 242 | yield name, self.preprocess(item) 243 | else: 244 | yield self.preprocess(item) 245 | 246 | def development_keys(self): 247 | return self.dev_keys() 248 | 249 | def test_enroll(self, yield_name=True): 250 | """Iterate over the test set targets 251 | 252 | This will yield dictionaries with the followings keys: 253 | 254 | * database: str 255 | unique database identifier 256 | * uri: str 257 | uniform (or unique) resource identifier 258 | * channel: int 259 | index of resource channel to use 260 | * speaker: str 261 | unique speaker identifier 262 | 263 | as well as keys coming from the provided preprocessors. 264 | 265 | Usage 266 | ----- 267 | >>> for item in protocol.test_enroll(): 268 | ... uri = item['uri'] 269 | ... channel = item['channel'] 270 | ... speaker = item['speaker'] 271 | """ 272 | 273 | generator = self.tst_enroll_iter() 274 | 275 | for name, item in generator: 276 | if yield_name: 277 | yield name, self.preprocess(item) 278 | else: 279 | yield self.preprocess(item) 280 | 281 | def test_test(self, yield_name=True): 282 | """Iterate over the test set tests 283 | 284 | This will yield dictionaries with the followings keys: 285 | 286 | * database: str 287 | unique database identifier 288 | * uri: str 289 | uniform (or unique) resource identifier 290 | * channel: int 291 | index of resource channel to use 292 | 293 | as well as keys coming from the provided preprocessors. 294 | 295 | Usage 296 | ----- 297 | >>> for item in protocol.test_test(): 298 | ... uri = item['uri'] 299 | ... channel = item['channel'] 300 | """ 301 | 302 | generator = self.tst_test_iter() 303 | 304 | for name, item in generator: 305 | if yield_name: 306 | yield name, self.preprocess(item) 307 | else: 308 | yield self.preprocess(item) 309 | 310 | def test_keys(self): 311 | return self.tst_keys() 312 | -------------------------------------------------------------------------------- /src/pyannote/database/protocol/speaker_spotting.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2017-2020 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | 30 | from .speaker_diarization import SpeakerDiarizationProtocol 31 | 32 | 33 | class SpeakerSpottingProtocol(SpeakerDiarizationProtocol): 34 | """Speaker spotting protocol 35 | 36 | Parameters 37 | ---------- 38 | preprocessors : dict or (key, preprocessor) iterable 39 | When provided, each protocol item (dictionary) are preprocessed, such 40 | that item[key] = preprocessor(item). In case 'preprocessor' is not 41 | callable, it should be a string containing placeholder for item keys 42 | (e.g. {'audio': '/path/to/{uri}.wav'}) 43 | """ 44 | 45 | def trn_iter(self): 46 | raise NotImplementedError( 47 | 'Custom speaker spotting protocol should implement "trn_iter".' 48 | ) 49 | 50 | def trn_enrol_iter(self): 51 | pass 52 | 53 | def trn_try_iter(self): 54 | pass 55 | 56 | def dev_iter(self): 57 | raise NotImplementedError( 58 | 'Custom speaker spotting protocol should implement "dev_iter".' 59 | ) 60 | 61 | def dev_enrol_iter(self): 62 | raise NotImplementedError( 63 | 'Custom speaker spotting protocol should implement "dev_enrol_iter".' 64 | ) 65 | 66 | def dev_try_iter(self): 67 | raise NotImplementedError( 68 | 'Custom speaker spotting protocol should implement "dev_try_iter".' 69 | ) 70 | 71 | def tst_iter(self): 72 | raise NotImplementedError( 73 | 'Custom speaker spotting protocol should implement "tst_iter".' 74 | ) 75 | 76 | def tst_enrol_iter(self): 77 | raise NotImplementedError( 78 | 'Custom speaker spotting protocol should implement "tst_enrol_iter".' 79 | ) 80 | 81 | def tst_try_iter(self): 82 | raise NotImplementedError( 83 | 'Custom speaker spotting protocol should implement "tst_try_iter".' 84 | ) 85 | 86 | def train_enrolment(self): 87 | """Iterate over the enrolments of the train set 88 | 89 | Yields dictionaries with the followings keys: 90 | 91 | * uri: str 92 | unique audio file identifier 93 | * database: str 94 | unique database identifier 95 | * model_id: str 96 | unique model identifier (the same speaker might have different models) 97 | * enrol_with: pyannote.core.Timeline 98 | parts of the audio file to use for enrolment 99 | 100 | as well as keys coming from the provided preprocessors (e.g. 'audio') 101 | 102 | Usage 103 | ----- 104 | >>> models = {} 105 | >>> for enrolment in protocol.train_enrolment(): 106 | ... # obtain path to audio file 107 | ... audio = enrolment['audio'] 108 | ... # obtain parts of the audio file to use for enrolment 109 | ... enrol_with = enrolment['enrol_with'] 110 | ... # this is where enrolment actually happens 111 | ... model = do_something(audio, enrol_with) 112 | ... # store models for later use 113 | ... model_id = enrolment['model_id'] 114 | ... models[model_id] = model 115 | 116 | """ 117 | 118 | generator = self.trn_enrol_iter() 119 | 120 | for current_enrolment in generator: 121 | yield self.preprocess(current_enrolment) 122 | 123 | def train_trial(self): 124 | """Iterate over the trials of the train set 125 | 126 | Yields dictionaries with the followings keys: 127 | 128 | * uri: str 129 | unique audio file identifier 130 | * database: str 131 | unique database identifier 132 | * try_with: pyannote.core.Segment, optional 133 | parts of the audio file where to look for the target speaker. 134 | default is to use the whole audio file 135 | * model_id: str 136 | unique identifier of the target 137 | * reference: pyannote.core.Timeline 138 | parts of the audio file where the target actually speaks. 139 | it might be empty in case of impostor trials. 140 | in case of genuine trials, it should be contained in `try_with` 141 | 142 | as well as keys coming from the provided preprocessors (e.g. 'audio') 143 | 144 | Usage 145 | ----- 146 | >>> for trial in protocol.train_trial(): 147 | ... # obtain path to audio file 148 | ... audio = trial['audio'] 149 | ... # obtain parts of the audio file to use for trial 150 | ... try_with = trial['try_with'] 151 | ... # this is where the trial actually happens 152 | ... model_id = trial['model_id'] 153 | ... score = do_something(audio, try_with, model_id) 154 | ... # optionally perform evaluation 155 | ... reference = trial['reference'] 156 | ... metric(reference, score) 157 | 158 | """ 159 | 160 | generator = self.trn_try_iter() 161 | 162 | for current_trial in generator: 163 | yield self.preprocess(current_trial) 164 | 165 | def development_enrolment(self): 166 | """Iterate over the enrolments of the development set 167 | 168 | Yields dictionaries with the followings keys: 169 | 170 | * uri: str 171 | unique audio file identifier 172 | * database: str 173 | unique database identifier 174 | * model_id: str 175 | unique model identifier (the same speaker might have different models) 176 | * enrol_with: pyannote.core.Timeline 177 | parts of the audio file to use for enrolment 178 | 179 | as well as keys coming from the provided preprocessors (e.g. 'audio') 180 | 181 | Usage 182 | ----- 183 | >>> models = {} 184 | >>> for enrolment in protocol.development_enrolment(): 185 | ... # obtain path to audio file 186 | ... audio = enrolment['audio'] 187 | ... # obtain parts of the audio file to use for enrolment 188 | ... enrol_with = enrolment['enrol_with'] 189 | ... # this is where enrolment actually happens 190 | ... model = do_something(audio, enrol_with) 191 | ... # store models for later use 192 | ... model_id = enrolment['model_id'] 193 | ... models[model_id] = model 194 | 195 | """ 196 | 197 | generator = self.dev_enrol_iter() 198 | 199 | for current_enrolment in generator: 200 | yield self.preprocess(current_enrolment) 201 | 202 | def development_trial(self): 203 | """Iterate over the trials of the development set 204 | 205 | Yields dictionaries with the followings keys: 206 | 207 | * uri: str 208 | unique audio file identifier 209 | * database: str 210 | unique database identifier 211 | * try_with: pyannote.core.Segment, optional 212 | parts of the audio file where to look for the target speaker. 213 | default is to use the whole audio file 214 | * model_id: str 215 | unique identifier of the target 216 | * reference: pyannote.core.Timeline 217 | parts of the audio file where the target actually speaks. 218 | it might be empty in case of impostor trials. 219 | in case of genuine trials, it should be contained in `try_with` 220 | 221 | as well as keys coming from the provided preprocessors (e.g. 'audio') 222 | 223 | Usage 224 | ----- 225 | >>> for trial in protocol.development_trial(): 226 | ... # obtain path to audio file 227 | ... audio = trial['audio'] 228 | ... # obtain parts of the audio file to use for trial 229 | ... try_with = trial['try_with'] 230 | ... # this is where the trial actually happens 231 | ... model_id = trial['model_id'] 232 | ... score = do_something(audio, try_with, model_id) 233 | ... # optionally perform evaluation 234 | ... reference = trial['reference'] 235 | ... metric(reference, score) 236 | 237 | """ 238 | 239 | generator = self.dev_try_iter() 240 | 241 | for current_trial in generator: 242 | yield self.preprocess(current_trial) 243 | 244 | def test_enrolment(self): 245 | """Iterate over the enrolments of the test set 246 | 247 | Yields dictionaries with the followings keys: 248 | 249 | * uri: str 250 | unique audio file identifier 251 | * database: str 252 | unique database identifier 253 | * model_id: str 254 | unique model identifier (the same speaker might have different models) 255 | * enrol_with: pyannote.core.Timeline 256 | parts of the audio file to use for enrolment 257 | 258 | as well as keys coming from the provided preprocessors (e.g. 'audio') 259 | 260 | Usage 261 | ----- 262 | >>> models = {} 263 | >>> for enrolment in protocol.test_enrolment(): 264 | ... # obtain path to audio file 265 | ... audio = enrolment['audio'] 266 | ... # obtain parts of the audio file to use for enrolment 267 | ... enrol_with = enrolment['enrol_with'] 268 | ... # this is where enrolment actually happens 269 | ... model = do_something(audio, enrol_with) 270 | ... # store models for later use 271 | ... model_id = enrolment['model_id'] 272 | ... models[model_id] = model 273 | 274 | """ 275 | 276 | generator = self.tst_enrol_iter() 277 | 278 | for current_enrolment in generator: 279 | yield self.preprocess(current_enrolment) 280 | 281 | def test_trial(self): 282 | """Iterate over the trials of the test set 283 | 284 | Yields dictionaries with the followings keys: 285 | 286 | * uri: str 287 | unique audio file identifier 288 | * database: str 289 | unique database identifier 290 | * try_with: pyannote.core.Segment, optional 291 | parts of the audio file where to look for the target speaker. 292 | default is to use the whole audio file 293 | * model_id: str 294 | unique identifier of the target 295 | * reference: pyannote.core.Timeline 296 | parts of the audio file where the target actually speaks. 297 | it might be empty in case of impostor trials. 298 | in case of genuine trials, it should be contained in `try_with` 299 | 300 | as well as keys coming from the provided preprocessors (e.g. 'audio') 301 | 302 | Usage 303 | ----- 304 | >>> for trial in protocol.test_trial(): 305 | ... # obtain path to audio file 306 | ... audio = trial['audio'] 307 | ... # obtain parts of the audio file to use for trial 308 | ... try_with = trial['try_with'] 309 | ... # this is where the trial actually happens 310 | ... model_id = trial['model_id'] 311 | ... score = do_something(audio, try_with, model_id) 312 | ... # optionally perform evaluation 313 | ... reference = trial['reference'] 314 | ... metric(reference, score) 315 | 316 | """ 317 | 318 | generator = self.tst_try_iter() 319 | 320 | for current_trial in generator: 321 | yield self.preprocess(current_trial) 322 | -------------------------------------------------------------------------------- /src/pyannote/database/protocol/speaker_verification.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2017-2020 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | 30 | from typing import Dict, Iterator 31 | from .speaker_diarization import SpeakerDiarizationProtocol 32 | from .protocol import Subset 33 | from .protocol import LEGACY_SUBSET_MAPPING 34 | 35 | 36 | class SpeakerVerificationProtocol(SpeakerDiarizationProtocol): 37 | """A protocol for speaker verification experiments 38 | 39 | A speaker verification protocol can be defined programmatically by creating 40 | a class that inherits from `SpeakerVerificationProtocol` and implement at 41 | least one of `train_trial_iter`, `development_trial_iter` and 42 | `test_trial_iter` methods: 43 | 44 | >>> class MySpeakerVerificationProtocol(SpeakerVerificationProtocol): 45 | ... def train_trial_iter(self) -> Iterator[Dict]: 46 | ... yield {"reference": 0, 47 | ... "file1": { 48 | ... "uri":"filename1", 49 | ... "try_with":Timeline(...) 50 | ... }, 51 | ... "file2": { 52 | ... "uri":"filename3", 53 | ... "try_with":Timeline(...) 54 | ... }, 55 | ... } 56 | 57 | `{subset}_trial_iter` should return an iterator of dictionnaries with 58 | 59 | - `reference` key (mandatory) that provides an int portraying whether 60 | `file1` and `file2` are uttered by the same speaker (1 is same, 0 is 61 | different), 62 | - `file1` key (mandatory) that provides the first file, 63 | - `file2` key (mandatory) that provides the second file. 64 | 65 | Both `file1` and `file2` should be provided as dictionaries or ProtocolFile 66 | instances with 67 | 68 | - `uri` key (mandatory), 69 | - `try_with` key (mandatory) that describes which part of the file should 70 | be used in the validation process, as a `pyannote.core.Timeline` instance. 71 | - any other key that the protocol may provide. 72 | 73 | It can then be used in Python like this: 74 | 75 | >>> protocol = MySpeakerVerificationProtocol() 76 | ... for trial in protocol.train_trial(): 77 | ... print(f"{trial['reference']} {trial['file1']['uri']} {trial['file2']['uri']}") 78 | 1 filename1 filename2 79 | 0 filename1 filename3 80 | 81 | A speaker verification protocol can also be defined using `pyannote.database` 82 | configuration file, whose (configurable) path defaults to "~/database.yml". 83 | 84 | ~~~ Content of ~/database.yml ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 85 | Protocols: 86 | MyDatabase: 87 | SpeakerVerification: 88 | MyProtocol: 89 | train: 90 | uri: /path/to/train.lst 91 | duration: /path/to/duration.map 92 | trial: /path/to/trial.txt 93 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 94 | 95 | where `/path/to/train.lst` contains the list of identifiers of the 96 | files in the collection: 97 | 98 | ~~~ Content of /path/to/train.lst~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 99 | filename1 100 | filename2 101 | filename3 102 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 103 | 104 | `/path/to/duration.map` contains the duration of the files: 105 | 106 | ~~~ Content of /path/to/duration.map ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 107 | filename1 30.000 108 | filename2 30.000 109 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 110 | 111 | `/path/to/trial.txt` contains a list of trials : 112 | 113 | ~~~ Content of /path/to/trial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 114 | 1 filename1 filename2 115 | 0 filename1 filename3 116 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 117 | 118 | `1` stands for _target_ trials and `0` for _non-target_ trials. 119 | In the example below, it means that the same speaker uttered files 120 | `filename1` and `filename2` and that `filename1` and `filename3` are from 121 | two different speakers. 122 | 123 | It can then be used in Python like this: 124 | 125 | >>> from pyannote.database import registry 126 | >>> protocol = registry.get_protocol('MyDatabase.SpeakerVerification.MyProtocol') 127 | >>> for trial in protocol.train_trial(): 128 | ... print(f"{trial['reference']} {trial['file1']['uri']} {trial['file2']['uri']}") 129 | 1 filename1 filename2 130 | 0 filename1 filename3 131 | 132 | Note that speaker verification protocols (`SpeakerVerificationProtocol`) 133 | are a subclass of speaker diarization protocols (`SpeakerDiarizationProtocol`). 134 | As such, they also define regular `{subset}` methods. 135 | """ 136 | 137 | def subset_trial_helper(self, subset: Subset) -> Iterator[Dict]: 138 | 139 | try: 140 | trials = getattr(self, f"{subset}_trial_iter")() 141 | except (AttributeError, NotImplementedError): 142 | # previous pyannote.database versions used `trn_try_iter` instead 143 | # of `train_trial_iter`, `dev_try_iter` instead of 144 | # `development_trial_iter`, and `tst_try_iter` instead of 145 | # `test_iter`. therefore, we use the legacy version when it is 146 | # available (and the new one is not). 147 | subset_legacy = LEGACY_SUBSET_MAPPING[subset] 148 | try: 149 | trials = getattr(self, f"{subset_legacy}_try_iter")() 150 | except AttributeError: 151 | msg = f"{subset}_trial_iter is not implemented." 152 | raise AttributeError(msg) 153 | 154 | for trial in trials: 155 | trial["file1"] = self.preprocess(trial["file1"]) 156 | trial["file2"] = self.preprocess(trial["file2"]) 157 | yield trial 158 | 159 | def train_trial_iter(self) -> Iterator[Dict]: 160 | """Iterate over trials in the train subset""" 161 | raise NotImplementedError() 162 | 163 | def development_trial_iter(self) -> Iterator[Dict]: 164 | """Iterate over trials in the development subset""" 165 | raise NotImplementedError() 166 | 167 | def test_trial_iter(self) -> Iterator[Dict]: 168 | """Iterate over trials in the test subset""" 169 | raise NotImplementedError() 170 | 171 | def train_trial(self) -> Iterator[Dict]: 172 | return self.subset_trial_helper("train") 173 | 174 | def development_trial(self) -> Iterator[Dict]: 175 | return self.subset_trial_helper("development") 176 | 177 | def test_trial(self) -> Iterator[Dict]: 178 | return self.subset_trial_helper("test") 179 | -------------------------------------------------------------------------------- /src/pyannote/database/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyannote/pyannote-database/e5b8a5581bfe4ec05ac7bbfc1be9cd66f41b8f36/src/pyannote/database/py.typed -------------------------------------------------------------------------------- /src/pyannote/database/registry.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2022- CNRS 7 | # Copyright (c) 2022- Université Paul Sabatier 8 | 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | # SOFTWARE. 26 | 27 | # AUTHORS 28 | # Hervé BREDIN - http://herve.niderb.fr 29 | # Alexis PLAQUET 30 | 31 | from enum import Enum 32 | import os 33 | from pathlib import Path 34 | from typing import Dict, List, Set, Optional, Text, Tuple, Type, Union 35 | import warnings 36 | 37 | from pyannote.database.protocol.protocol import Preprocessors, Protocol 38 | from .custom import create_protocol, get_init 39 | from .database import Database 40 | import yaml 41 | 42 | 43 | # controls what to do in case of protocol name conflict 44 | class LoadingMode(Enum): 45 | OVERRIDE = 0 # override existing protocol 46 | KEEP = 1 # keep existing protocol 47 | ERROR = 2 # raise an error 48 | 49 | 50 | # To ease the understanding of future me, all comments inside Registry codebase 51 | # assume the existence of the following database.yml files. 52 | 53 | # ====================================== 54 | # Content of /path/to/first/database.yml 55 | # ====================================== 56 | # Databases: 57 | # DatabaseA: 58 | # - relative/path/A/trn/{uri}.wav 59 | # - relative/path/A/dev/{uri}.wav 60 | # - relative/path/A/tst/{uri}.wav 61 | # DatabaseB: /absolute/path/B/{uri}.wav 62 | # 63 | # Protocols: 64 | # DatabaseA: 65 | # SpeakerDiarization: 66 | # ProtocolA: 67 | # train: 68 | # uri: relative/path/A/trn.lst 69 | # development: 70 | # uri: relative/path/A/dev.lst 71 | # test: 72 | # uri: relative/path/A/tst.lst 73 | # ProtocolB: 74 | # ... 75 | # DatabaseB: 76 | # SpeakerDiarization: 77 | # Protocol: 78 | # ... 79 | # X: 80 | # SpeakerDiarization: 81 | # A_and_B: 82 | # train: ... 83 | # development: ... 84 | # test: ... 85 | 86 | # ====================================== 87 | # Content of /path/to/second/database.yml 88 | # ====================================== 89 | # Databases: 90 | # DatabaseC: /absolute/path/C/{uri}.wav 91 | # DatabaseB: /absolute/path/B/{uri}.wav 92 | # Protocols: 93 | # DatabaseB: 94 | # SpeakerDiarization: 95 | # Protocol: 96 | # ... 97 | # DatabaseC: 98 | # SpeakerDiarization: 99 | # Protocol: 100 | # ... 101 | 102 | 103 | class Registry: 104 | """Database and experimental protocols registry 105 | 106 | Usage 107 | ----- 108 | >>> from pyannote.database import registry 109 | >>> registry.load_database("/path/to/first/database.yml") 110 | >>> registry.load_database("/path/to/second/database.yml") 111 | """ 112 | 113 | def __init__(self) -> None: 114 | # Mapping of database.yml paths to their config in a dictionary 115 | # Example after loading both database.yml: 116 | # {"/path/to/first/database.yml": { 117 | # "Databases":{ 118 | # "DatabaseA": ["relative/path/A/trn/{uri}.wav", "relative/path/A/dev/{uri}.wav", relative/path/A/tst/{uri}.wav] 119 | # "DatabaseB": "/absolute/path/B/{uri}.wav" 120 | # }, 121 | # "Protocols":{ 122 | # "DatabaseA":{ 123 | # "SpeakerDiarization": { 124 | # "ProtocolA": { 125 | # "train": {"uri": "relative/path/A/trn.lst"}, 126 | # "development": {"uri": "relative/path/A/dev.lst"}, 127 | # "test": {"uri"; "relative/path/A/tst.lst"} 128 | # } 129 | # } 130 | # }, 131 | # "DatabaseB":{"SpeakerDiarization":{"Protocol": {...}}}, 132 | # "X":{"SpeakerDiarization":{"A_and_B":{...}}} 133 | # } 134 | # }, 135 | # "/path/to/second/database.yml": { 136 | # "Databases":{ 137 | # "DatabaseC": /absolute/path/C/{uri}.wav 138 | # "DatabaseB": "/absolute/path/B/{uri}.wav" 139 | # }, 140 | # "Protocols":{ 141 | # "DatabaseB":{"SpeakerDiarization": {"Protocol": {...}}}, 142 | # "DatabaseC":{...} 143 | # } 144 | # } 145 | # } 146 | self.configs: Dict[Path, Dict] = dict() 147 | 148 | # Content of the "Database" root item (= where to find file content) 149 | # Example after loading both database.yml: 150 | # { 151 | # "DatabaseA": [ 152 | # "/path/to/first/relative/path/A/trn/{uri}.wav", 153 | # "/path/to/first/relative/path/A/dev/{uri}.wav", 154 | # /path/to/first/relative/path/A/tst/{uri}.wav 155 | # ], 156 | # "DatabaseB": ["/absolute/path/B/{uri}.wav"], 157 | # "DatabaseC": ["/absolute/path/C/{uri}.wav"] 158 | # } 159 | self.sources: Dict[Text, List[Text]] = dict() 160 | 161 | # Mapping of database names to a type that inherits from Database 162 | # Example after loading both database.yml: 163 | # {"DatabaseA": pyannote.database.registry.DatabaseA, 164 | # "DatabaseB": pyannote.database.registry.DatabaseB, 165 | # "DatabaseC": pyannote.database.registry.DatabaseC, 166 | # "X": pyannote.database.registry.X} 167 | self.databases: Dict[Text, Type] = dict() 168 | 169 | def load_database( 170 | self, 171 | path: Union[Text, Path], 172 | mode: LoadingMode = LoadingMode.OVERRIDE, 173 | ): 174 | """Load YAML configuration file into the registry 175 | 176 | Parameters 177 | ---------- 178 | path : str or Path 179 | Path to YAML configuration file. 180 | mode : LoadingMode, optional 181 | Controls how to handle conflicts in protocol names. 182 | Defaults to overriding the existing protocol. 183 | 184 | Usage 185 | ----- 186 | >>> from pyannote.database import registry 187 | >>> registry.load_database("/path/to/database.yml") 188 | """ 189 | 190 | self._load_database_helper(path, mode=mode, loading=set()) 191 | self._reload_meta_protocols() 192 | 193 | def _load_database_helper( 194 | self, 195 | database_yml: Union[Text, Path], 196 | mode: LoadingMode = LoadingMode.KEEP, 197 | loading: Set[Path] = set(), 198 | ): 199 | """Helper function for recursive loading 200 | 201 | Parameters 202 | ---------- 203 | database_yml : Union[Text, Path] 204 | Path to the database.yml 205 | mode : LoadingMode, optional 206 | Controls how to handle conflicts in protocol names. 207 | Defaults to overriding the existing protocol. 208 | """ 209 | 210 | # make path absolute 211 | database_yml = Path(database_yml).expanduser().resolve() 212 | 213 | # stop here if configuration file is already being loaded 214 | # (possibly because of circular requirements) 215 | if database_yml in loading: 216 | return 217 | 218 | # mark it as currently being loaded (to avoid future circular requirements) 219 | loading.add(database_yml) 220 | 221 | # load configuration 222 | with open(database_yml, "r") as f: 223 | config = yaml.load(f, Loader=yaml.SafeLoader) 224 | 225 | # load every requirement 226 | requirements = config.pop("Requirements", list()) 227 | if not isinstance(requirements, list): 228 | requirements = [requirements] 229 | for requirement_yaml in requirements: 230 | requirement_yaml = Path(requirement_yaml) 231 | if not requirement_yaml.is_absolute(): 232 | requirement_yaml = database_yml.parent / requirement_yaml 233 | 234 | self._load_database_helper(requirement_yaml, mode=mode, loading=loading) 235 | 236 | # process "Protocols" section 237 | protocols = config.get("Protocols", dict()) 238 | 239 | # make sure meta-protocols are processed last (relies on the fact that 240 | # dicts are iterated in insertion order since Python 3.6) 241 | x = protocols.pop("X", None) 242 | if x is not None: 243 | protocols["X"] = x 244 | 245 | # load protocols of each database 246 | for db_name, db_entries in protocols.items(): 247 | self._load_protocols(db_name, db_entries, database_yml, mode=mode) 248 | 249 | # process "Databases" section 250 | databases = config.get("Databases", dict()) 251 | for db_name, value in databases.items(): 252 | if not isinstance(value, list): 253 | value = [value] 254 | 255 | path_list: List[str] = list() 256 | for p in value: 257 | path = Path(p) 258 | if not path.is_absolute(): 259 | path = database_yml.parent / path 260 | path_list.append(str(path)) 261 | self.sources[str(db_name)] = path_list 262 | 263 | # save configuration for later reloading of meta-protocols 264 | self.configs[database_yml] = config 265 | 266 | def get_database(self, database_name, **kwargs) -> Database: 267 | """Get database by name 268 | 269 | Parameters 270 | ---------- 271 | database_name : str 272 | Database name. 273 | 274 | Returns 275 | ------- 276 | database : Database 277 | Database instance 278 | """ 279 | 280 | try: 281 | database = self.databases[database_name] 282 | 283 | except KeyError: 284 | if database_name == "X": 285 | msg = ( 286 | "Could not find any meta-protocol. Please refer to " 287 | "pyannote.database documentation to learn how to define them: " 288 | "https://github.com/pyannote/pyannote-database" 289 | ) 290 | else: 291 | msg = ( 292 | 'Could not find any protocol for "{name}" database. Please ' 293 | "refer to pyannote.database documentation to learn how to " 294 | "define them: https://github.com/pyannote/pyannote-database" 295 | ) 296 | msg = msg.format(name=database_name) 297 | raise ValueError(msg) 298 | 299 | return database(**kwargs) 300 | 301 | def get_protocol( 302 | self, name, preprocessors: Optional[Preprocessors] = None 303 | ) -> Protocol: 304 | """Get protocol by full name 305 | 306 | Parameters 307 | ---------- 308 | name : str 309 | Protocol full name (e.g. "Etape.SpeakerDiarization.TV") 310 | preprocessors : dict or (key, preprocessor) iterable 311 | When provided, each protocol item (dictionary) are preprocessed, such 312 | that item[key] = preprocessor(item). In case 'preprocessor' is not 313 | callable, it should be a string containing placeholder for item keys 314 | (e.g. {'audio': '/path/to/{uri}.wav'}) 315 | 316 | Returns 317 | ------- 318 | protocol : Protocol 319 | Protocol instance 320 | """ 321 | 322 | database_name, task_name, protocol_name = name.split(".") 323 | database = self.get_database(database_name) 324 | protocol = database.get_protocol( 325 | task_name, protocol_name, preprocessors=preprocessors 326 | ) 327 | protocol.name = name 328 | return protocol 329 | 330 | # iterate over all protocols by name 331 | def __iter__(self): 332 | for database_name in self.databases: 333 | database = self.get_database(database_name) 334 | for task_name in database.get_tasks(): 335 | for protocol_name in database.get_protocols(task_name): 336 | yield f"{database_name}.{task_name}.{protocol_name}" 337 | 338 | def _load_protocols( 339 | self, 340 | db_name, 341 | db_entries: dict, 342 | database_yml: Union[Text, Path] = None, 343 | mode: LoadingMode = LoadingMode.OVERRIDE, 344 | ): 345 | """Load all protocols from this database into the registry. 346 | 347 | Parameters 348 | ---------- 349 | db_name : _type_ 350 | Name of the database 351 | db_entries : dict 352 | Dict of all entries under this database (this should be tasks) 353 | database_yml : Union[Text, Path], optional 354 | Path to the database.yml file. Not required for X protocols, by default None 355 | """ 356 | 357 | db_name = str(db_name) 358 | 359 | # maps tuple (task,protocol) to the custom protocol class 360 | protocols: Dict[Tuple[Text, Text], Type] = dict() 361 | 362 | for task_name, task_entries in db_entries.items(): 363 | for protocol, protocol_entries in task_entries.items(): 364 | protocol = str(protocol) 365 | CustomProtocol = create_protocol( 366 | db_name, task_name, protocol, protocol_entries, database_yml 367 | ) 368 | if CustomProtocol is None: 369 | continue 370 | 371 | protocols[(task_name, protocol)] = CustomProtocol 372 | 373 | # If needed, merge old protocols dict with the new one (according to current override rules) 374 | if db_name in self.databases: 375 | old_protocols = self.databases[db_name]._protocols 376 | _merge_protocols_inplace( 377 | protocols, old_protocols, mode, db_name, database_yml 378 | ) 379 | 380 | # create database class on-the-fly 381 | protocol_list = [ 382 | (task, p_name, p_type) for (task, p_name), p_type in protocols.items() 383 | ] 384 | self.databases[db_name] = type( 385 | db_name, 386 | (Database,), 387 | {"__init__": get_init(protocol_list), "_protocols": protocols}, 388 | ) 389 | 390 | def _reload_meta_protocols(self): 391 | """Reloads all meta protocols from all database.yml files loaded.""" 392 | 393 | # TODO: decide how to handle X protocol overriding. 394 | 395 | self.databases.pop("X", None) 396 | 397 | for db_yml, config in self.configs.items(): 398 | databases = config.get("Protocols", dict()) 399 | if "X" in databases: 400 | self._load_protocols( 401 | "X", databases["X"], db_yml, mode=LoadingMode.OVERRIDE 402 | ) 403 | 404 | 405 | def _env_config_paths() -> List[Path]: 406 | """Parse PYANNOTE_DATABASE_CONFIG environment variable 407 | 408 | PYANNOTE_DATABASE_CONFIG may contain multiple paths separation by ";". 409 | 410 | Returns 411 | ------- 412 | paths : list of Path 413 | List of all YAML database file defined in PYANNOTE_DATABASE_CONF 414 | """ 415 | 416 | content = os.environ.get("PYANNOTE_DATABASE_CONFIG", "") 417 | 418 | paths = [] 419 | for path in content.split(";"): 420 | path = Path(path).expanduser() 421 | if path.is_file(): 422 | paths.append(path) 423 | return paths 424 | 425 | 426 | def _find_default_ymls() -> List[Path]: 427 | """Get paths to default YAML configuration files 428 | 429 | * $HOME/.pyannote/database.yml 430 | * $CWD/database.yml 431 | * PYANNOTE_DATABASE_CONFIG environment variable 432 | 433 | Returns 434 | ------- 435 | paths : list of Path 436 | List of existing default YAML configuration files 437 | """ 438 | 439 | paths: List[Path] = [] 440 | 441 | home_db_yml = Path("~/.pyannote/database.yml").expanduser() 442 | if home_db_yml.is_file(): 443 | paths.append(home_db_yml) 444 | 445 | cwd_db_yml = Path.cwd() / "database.yml" 446 | if cwd_db_yml.is_file(): 447 | paths.append(cwd_db_yml) 448 | 449 | paths += _env_config_paths() 450 | 451 | return paths 452 | 453 | 454 | def _merge_protocols_inplace( 455 | new_protocols: Dict[Tuple[Text, Text], Type], 456 | old_protocols: Dict[Tuple[Text, Text], Type], 457 | mode: LoadingMode, 458 | db_name: str, 459 | database_yml: str, 460 | ): 461 | """Merge new and old protocols inplace into the passed new_protocol. 462 | 463 | Warning, merging order might be counterintuitive : "KEEP" strategy keeps element from the OLD protocol 464 | and MODIFIES the new protocol. 465 | 466 | TODO: make it intuitive :) 467 | 468 | Parameters 469 | ---------- 470 | new_protocols : Dict[Tuple[Text, Text], Type] 471 | New protocol dict 472 | Maps (task,protocol) tuples to custom protocol classes 473 | old_protocols : Dict[Tuple[Text, Text], Type] 474 | Old protocols dict 475 | Maps (task,protocol) tuples to custom protocol classes. 476 | mode : LoadingMode 477 | How to handle override 478 | db_name : _type_ 479 | Name of the database (for logging/warning purposes) 480 | database_yml : str 481 | Path of the database.yml file (for logging/warning purposes) 482 | """ 483 | 484 | # for all previously defined protocol (in old_protocols) 485 | for p_id, old_p in old_protocols.items(): 486 | # if this protocol is redefined 487 | if p_id in new_protocols: 488 | t_name, p_name = p_id 489 | realname = f"{db_name}.{t_name}.{p_name}" 490 | 491 | # raise an error 492 | if mode == LoadingMode.ERROR: 493 | raise RuntimeError( 494 | f"Cannot load {realname} protocol from '{database_yml}' as it already exists." 495 | ) 496 | 497 | # keep the new protocol 498 | elif mode == LoadingMode.OVERRIDE: 499 | warnings.warn( 500 | f"Replacing existing {realname} protocol by the one defined in '{database_yml}'." 501 | ) 502 | pass 503 | 504 | # keep the old protocol 505 | elif mode == LoadingMode.KEEP: 506 | warnings.warn( 507 | f"Skipping {realname} protocol defined in '{database_yml}' as it already exists." 508 | ) 509 | new_protocols[p_id] = old_p 510 | 511 | # no conflit : keep the previously defined protocol 512 | else: 513 | new_protocols[p_id] = old_p 514 | 515 | 516 | # initialize the registry singleton 517 | registry = Registry() 518 | 519 | # load all database yaml files found at startup 520 | for yml in _find_default_ymls(): 521 | registry.load_database(yml) 522 | -------------------------------------------------------------------------------- /src/pyannote/database/util.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2016-2020 CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | import yaml 30 | from pathlib import Path 31 | import warnings 32 | import pandas as pd 33 | from pyannote.core import Segment, Timeline, Annotation 34 | from .protocol.protocol import ProtocolFile 35 | 36 | from typing import Text 37 | from typing import Union 38 | from typing import Dict 39 | from typing import List 40 | 41 | DatabaseName = Text 42 | PathTemplate = Text 43 | 44 | 45 | def get_unique_identifier(item): 46 | """Return unique item identifier 47 | 48 | The complete format is {database}/{uri}_{channel}: 49 | * prefixed by "{database}/" only when `item` has a 'database' key. 50 | * suffixed by "_{channel}" only when `item` has a 'channel' key. 51 | 52 | Parameters 53 | ---------- 54 | item : dict 55 | Item as yielded by pyannote.database protocols 56 | 57 | Returns 58 | ------- 59 | identifier : str 60 | Unique item identifier 61 | """ 62 | 63 | IDENTIFIER = "" 64 | 65 | # {database}/{uri}_{channel} 66 | database = item.get("database", None) 67 | if database is not None: 68 | IDENTIFIER += f"{database}/" 69 | IDENTIFIER += item["uri"] 70 | channel = item.get("channel", None) 71 | if channel is not None: 72 | IDENTIFIER += f"_{channel:d}" 73 | 74 | return IDENTIFIER 75 | 76 | 77 | # This function is used in custom.py 78 | def get_annotated(current_file): 79 | """Get part of the file that is annotated. 80 | 81 | Parameters 82 | ---------- 83 | current_file : `dict` 84 | File generated by a `pyannote.database` protocol. 85 | 86 | Returns 87 | ------- 88 | annotated : `pyannote.core.Timeline` 89 | Part of the file that is annotated. Defaults to 90 | `current_file["annotated"]`. When it does not exist, try to use the 91 | full audio extent. When that fails, use "annotation" extent. 92 | """ 93 | 94 | # if protocol provides 'annotated' key, use it 95 | if "annotated" in current_file: 96 | annotated = current_file["annotated"] 97 | return annotated 98 | 99 | # if it does not, but does provide 'audio' key 100 | # try and use wav duration 101 | 102 | if "duration" in current_file: 103 | try: 104 | duration = current_file["duration"] 105 | except ImportError: 106 | pass 107 | else: 108 | annotated = Timeline([Segment(0, duration)]) 109 | msg = '"annotated" was approximated by [0, audio duration].' 110 | warnings.warn(msg) 111 | return annotated 112 | 113 | extent = current_file["annotation"].get_timeline().extent() 114 | annotated = Timeline([extent]) 115 | 116 | msg = ( 117 | '"annotated" was approximated by "annotation" extent. ' 118 | 'Please provide "annotated" directly, or at the very ' 119 | 'least, use a "duration" preprocessor.' 120 | ) 121 | warnings.warn(msg) 122 | 123 | return annotated 124 | 125 | 126 | def get_label_identifier(label, current_file): 127 | """Return unique label identifier 128 | 129 | Parameters 130 | ---------- 131 | label : str 132 | Database-internal label 133 | current_file 134 | Yielded by pyannote.database protocols 135 | 136 | Returns 137 | ------- 138 | unique_label : str 139 | Global label 140 | """ 141 | 142 | # TODO. when the "true" name of a person is used, 143 | # do not preprend database name. 144 | database = current_file["database"] 145 | return database + "|" + label 146 | 147 | 148 | def load_rttm(file_rttm, keep_type="SPEAKER"): 149 | """Load RTTM file 150 | 151 | Parameter 152 | --------- 153 | file_rttm : `str` 154 | Path to RTTM file. 155 | keep_type : str, optional 156 | Only keep lines with this type (field #1 in RTTM specs). 157 | Defaults to "SPEAKER". 158 | 159 | Returns 160 | ------- 161 | annotations : `dict` 162 | Speaker diarization as a {uri: pyannote.core.Annotation} dictionary. 163 | """ 164 | 165 | names = [ 166 | "type", 167 | "uri", 168 | "NA2", 169 | "start", 170 | "duration", 171 | "NA3", 172 | "NA4", 173 | "speaker", 174 | "NA5", 175 | "NA6", 176 | ] 177 | dtype = {"uri": str, "start": float, "duration": float, "speaker": str} 178 | data = pd.read_csv( 179 | file_rttm, 180 | names=names, 181 | dtype=dtype, 182 | sep="\s+", 183 | keep_default_na=True, 184 | ) 185 | 186 | annotations = dict() 187 | for uri, turns in data.groupby("uri"): 188 | annotation = Annotation(uri=uri) 189 | for i, turn in turns.iterrows(): 190 | if turn.type != keep_type: 191 | continue 192 | segment = Segment(turn.start, turn.start + turn.duration) 193 | annotation[segment, i] = turn.speaker 194 | annotations[uri] = annotation 195 | 196 | return annotations 197 | 198 | 199 | def load_stm(file_stm): 200 | """Load STM file (speaker-info only) 201 | 202 | Parameter 203 | --------- 204 | file_stm : str 205 | Path to STM file 206 | 207 | Returns 208 | ------- 209 | annotations : `dict` 210 | Speaker diarization as a {uri: pyannote.core.Annotation} dictionary. 211 | """ 212 | 213 | dtype = {"uri": str, "speaker": str, "start": float, "end": float} 214 | data = pd.read_csv( 215 | file_stm, 216 | sep="\s+", 217 | usecols=[0, 2, 3, 4], 218 | dtype=dtype, 219 | names=list(dtype), 220 | ) 221 | 222 | annotations = dict() 223 | for uri, turns in data.groupby("uri"): 224 | annotation = Annotation(uri=uri) 225 | for i, turn in turns.iterrows(): 226 | segment = Segment(turn.start, turn.end) 227 | annotation[segment, i] = turn.speaker 228 | annotations[uri] = annotation 229 | 230 | return annotations 231 | 232 | 233 | def load_mdtm(file_mdtm): 234 | """Load MDTM file 235 | 236 | Parameter 237 | --------- 238 | file_mdtm : `str` 239 | Path to MDTM file. 240 | 241 | Returns 242 | ------- 243 | annotations : `dict` 244 | Speaker diarization as a {uri: pyannote.core.Annotation} dictionary. 245 | """ 246 | 247 | names = ["uri", "NA1", "start", "duration", "NA2", "NA3", "NA4", "speaker"] 248 | dtype = {"uri": str, "start": float, "duration": float, "speaker": str} 249 | data = pd.read_csv( 250 | file_mdtm, 251 | names=names, 252 | dtype=dtype, 253 | sep="\s+", 254 | keep_default_na=False, 255 | ) 256 | 257 | annotations = dict() 258 | for uri, turns in data.groupby("uri"): 259 | annotation = Annotation(uri=uri) 260 | for i, turn in turns.iterrows(): 261 | segment = Segment(turn.start, turn.start + turn.duration) 262 | annotation[segment, i] = turn.speaker 263 | annotations[uri] = annotation 264 | 265 | return annotations 266 | 267 | 268 | def load_uem(file_uem): 269 | """Load UEM file 270 | 271 | Parameter 272 | --------- 273 | file_uem : `str` 274 | Path to UEM file. 275 | 276 | Returns 277 | ------- 278 | timelines : `dict` 279 | Evaluation map as a {uri: pyannote.core.Timeline} dictionary. 280 | """ 281 | 282 | names = ["uri", "NA1", "start", "end"] 283 | dtype = {"uri": str, "start": float, "end": float} 284 | data = pd.read_csv(file_uem, names=names, dtype=dtype, sep="\s+") 285 | 286 | timelines = dict() 287 | for uri, parts in data.groupby("uri"): 288 | segments = [Segment(part.start, part.end) for i, part in parts.iterrows()] 289 | timelines[uri] = Timeline(segments=segments, uri=uri) 290 | 291 | return timelines 292 | 293 | 294 | def load_lab(path, uri: str = None) -> Annotation: 295 | """Load LAB file 296 | 297 | Parameter 298 | --------- 299 | file_lab : `str` 300 | Path to LAB file 301 | 302 | Returns 303 | ------- 304 | data : `pyannote.core.Annotation` 305 | """ 306 | 307 | names = ["start", "end", "label"] 308 | dtype = {"start": float, "end": float, "label": str} 309 | data = pd.read_csv(path, names=names, dtype=dtype, sep="\s+") 310 | 311 | annotation = Annotation(uri=uri) 312 | for i, turn in data.iterrows(): 313 | segment = Segment(turn.start, turn.end) 314 | annotation[segment, i] = turn.label 315 | 316 | return annotation 317 | 318 | 319 | def load_lst(file_lst): 320 | """Load LST file 321 | 322 | LST files provide a list of URIs (one line per URI) 323 | 324 | Parameter 325 | --------- 326 | file_lst : `str` 327 | Path to LST file. 328 | 329 | Returns 330 | ------- 331 | uris : `list` 332 | List or uris 333 | """ 334 | 335 | with open(file_lst, mode="r") as fp: 336 | lines = fp.readlines() 337 | return [line.strip() for line in lines] 338 | 339 | 340 | def load_mapping(mapping_txt): 341 | """Load mapping file 342 | 343 | Parameter 344 | --------- 345 | mapping_txt : `str` 346 | Path to mapping file 347 | 348 | Returns 349 | ------- 350 | mapping : `dict` 351 | {1st field: 2nd field} dictionary 352 | """ 353 | 354 | with open(mapping_txt, mode="r") as fp: 355 | lines = fp.readlines() 356 | 357 | mapping = dict() 358 | for line in lines: 359 | key, value, *left = line.strip().split() 360 | mapping[key] = value 361 | 362 | return mapping 363 | 364 | 365 | class LabelMapper(object): 366 | """Label mapper for use as pyannote.database preprocessor 367 | 368 | Parameters 369 | ---------- 370 | mapping : `dict` 371 | Mapping dictionary as used in `Annotation.rename_labels()`. 372 | keep_missing : `bool`, optional 373 | In case a label has no mapping, a `ValueError` will be raised. 374 | Set "keep_missing" to True to keep those labels unchanged instead. 375 | 376 | Usage 377 | ----- 378 | >>> mapping = {'Hadrien': 'MAL', 'Marvin': 'MAL', 379 | ... 'Wassim': 'CHI', 'Herve': 'GOD'} 380 | >>> preprocessors = {'annotation': LabelMapper(mapping=mapping)} 381 | >>> protocol = registry.get_protocol('AMI.SpeakerDiarization.MixHeadset', 382 | preprocessors=preprocessors) 383 | 384 | """ 385 | 386 | def __init__(self, mapping, keep_missing=False): 387 | self.mapping = mapping 388 | self.keep_missing = keep_missing 389 | 390 | def __call__(self, current_file): 391 | if not self.keep_missing: 392 | missing = set(current_file["annotation"].labels()) - set(self.mapping) 393 | if missing and not self.keep_missing: 394 | label = missing.pop() 395 | msg = ( 396 | f'No mapping found for label "{label}". Set "keep_missing" ' 397 | f"to True to keep labels with no mapping." 398 | ) 399 | raise ValueError(msg) 400 | 401 | return current_file["annotation"].rename_labels(mapping=self.mapping) 402 | -------------------------------------------------------------------------------- /tests/data/audio/filename1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyannote/pyannote-database/e5b8a5581bfe4ec05ac7bbfc1be9cd66f41b8f36/tests/data/audio/filename1.wav -------------------------------------------------------------------------------- /tests/data/audio/filename2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyannote/pyannote-database/e5b8a5581bfe4ec05ac7bbfc1be9cd66f41b8f36/tests/data/audio/filename2.wav -------------------------------------------------------------------------------- /tests/data/ctms/filename1.ctm: -------------------------------------------------------------------------------- 1 | filename1 A 0.2 0.3 hello 0.9 2 | filename1 A 0.3 0.4 world 0.8 3 | -------------------------------------------------------------------------------- /tests/data/ctms/filename2.ctm: -------------------------------------------------------------------------------- 1 | filename2 A 0.2 0.3 how 0.9 2 | filename2 A 0.5 0.4 are 0.8 3 | filename2 A 0.9 0.2 you 0.9 4 | -------------------------------------------------------------------------------- /tests/data/database.yml: -------------------------------------------------------------------------------- 1 | Protocols: 2 | MyDatabase: 3 | 4 | Collection: 5 | MyCollection: 6 | uri: lists/train.lst 7 | 8 | Protocol: 9 | MyProtocol: 10 | train: 11 | uri: lists/train.lst 12 | speaker: rttms/train.rttm 13 | transcription: _ctms/{uri}.ctm 14 | domain: mapping/domain.map 15 | 16 | SpeakerDiarization: 17 | MySpeakerDiarization: 18 | train: 19 | uri: lists/train.lst 20 | annotation: rttms/train.rttm 21 | annotated: uems/train.uem 22 | 23 | SpeakerVerification: 24 | MySpeakerVerification: 25 | train: 26 | uri: lists/train.lst 27 | annotation: rttms/train.rttm # we need the annotation to specify the speaker ! 28 | # duration: mapping/duration.map # can use duration or annotated 29 | annotated: uems/train.uem 30 | trial: trial/train.trial 31 | 32 | X: 33 | SpeakerDiarization: 34 | MyMetaProtocol: 35 | train: 36 | MyDatabase.Protocol.MyProtocol: [train, ] 37 | development: 38 | MyDatabase.SpeakerDiarization.MySpeakerDiarization: [train, ] 39 | MyDatabase.SpeakerVerification.MySpeakerVerification: [train, ] 40 | -------------------------------------------------------------------------------- /tests/data/lists/train.lst: -------------------------------------------------------------------------------- 1 | filename1 2 | filename2 -------------------------------------------------------------------------------- /tests/data/mapping/domain.map: -------------------------------------------------------------------------------- 1 | filename1 phone 2 | filename2 radio 3 | -------------------------------------------------------------------------------- /tests/data/mapping/duration.map: -------------------------------------------------------------------------------- 1 | filename1 2.0 2 | filename2 2.0 3 | -------------------------------------------------------------------------------- /tests/data/rttms/train.rttm: -------------------------------------------------------------------------------- 1 | SPEAKER filename1 1 0.2 0.7 speaker_A 2 | SPEAKER filename2 1 0.2 0.9 speaker_B 3 | -------------------------------------------------------------------------------- /tests/data/trial/train.trial: -------------------------------------------------------------------------------- 1 | 0 filename1 filename2 -------------------------------------------------------------------------------- /tests/data/uems/train.uem: -------------------------------------------------------------------------------- 1 | filename1 NA 0.000 2.000 2 | filename2 NA 0.000 2.000 3 | -------------------------------------------------------------------------------- /tests/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2023- CNRS 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | # AUTHORS 27 | # Hervé BREDIN - http://herve.niderb.fr 28 | 29 | 30 | from pyannote.database import registry 31 | from pyannote.database.protocol import CollectionProtocol 32 | from pyannote.database.protocol import Protocol 33 | from pyannote.database.protocol import SpeakerDiarizationProtocol 34 | from pyannote.database.protocol import SpeakerVerificationProtocol 35 | 36 | assert "MyDatabase" in registry.databases 37 | 38 | database = registry.get_database("MyDatabase") 39 | tasks = database.get_tasks() 40 | assert "Collection" in tasks 41 | assert "Protocol" in tasks 42 | assert "SpeakerDiarization" in tasks 43 | assert "SpeakerVerification" in tasks 44 | 45 | assert "MyCollection" in database.get_protocols("Collection") 46 | assert "MyProtocol" in database.get_protocols("Protocol") 47 | assert "MySpeakerDiarization" in database.get_protocols("SpeakerDiarization") 48 | assert "MySpeakerVerification" in database.get_protocols("SpeakerVerification") 49 | 50 | 51 | collection = registry.get_protocol("MyDatabase.Collection.MyCollection") 52 | assert isinstance(collection, CollectionProtocol) 53 | 54 | protocol = registry.get_protocol("MyDatabase.Protocol.MyProtocol") 55 | assert isinstance(protocol, Protocol) 56 | 57 | speaker_diarization = registry.get_protocol( 58 | "MyDatabase.SpeakerDiarization.MySpeakerDiarization" 59 | ) 60 | assert isinstance(speaker_diarization, SpeakerDiarizationProtocol) 61 | 62 | speaker_verification = registry.get_protocol( 63 | "MyDatabase.SpeakerVerification.MySpeakerVerification" 64 | ) 65 | assert isinstance(speaker_verification, SpeakerVerificationProtocol) 66 | 67 | 68 | files = list(collection.files()) 69 | assert len(files) == 2 70 | 71 | files = list(protocol.files()) 72 | assert len(files) == 2 73 | 74 | files = list(speaker_diarization.files()) 75 | assert len(files) == 2 76 | 77 | files = list(speaker_verification.files()) 78 | assert len(files) == 2 79 | 80 | 81 | meta_protocol = registry.get_protocol("X.SpeakerDiarization.MyMetaProtocol") 82 | files = list(meta_protocol.train()) 83 | assert len(files) == 2 84 | 85 | files = list(meta_protocol.development()) 86 | assert len(files) == 4 87 | -------------------------------------------------------------------------------- /tests/test_registry.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2023- CNRS 7 | # Copyright (c) 2023- Université Paul Sabatier 8 | 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | # SOFTWARE. 26 | 27 | # AUTHORS 28 | # Alexis PLAQUET 29 | # Hervé BREDIN - http://herve.niderb.fr 30 | 31 | import warnings 32 | import pytest 33 | 34 | from pyannote.database.registry import LoadingMode, _merge_protocols_inplace 35 | 36 | def test_override_merging_disjoint(): 37 | protocols1 = { 38 | ("Task1", "Protocol1"): None, 39 | } 40 | protocols2 = { 41 | ("OtherTask", "Protocol1"): 42, 42 | } 43 | 44 | with warnings.catch_warnings(): 45 | warnings.simplefilter("error") # expect no warning 46 | _merge_protocols_inplace(protocols1, protocols2, LoadingMode.KEEP, "", "") 47 | 48 | assert ("Task1", "Protocol1",) in protocols1 49 | assert ("OtherTask", "Protocol1",) in protocols1 50 | assert len(protocols1) == 2 51 | 52 | def test_override_merging_identical(): 53 | 54 | protocols2 = { 55 | ("Task1", "Protocol1"): None, 56 | } # the "old" protocols dict. KEEP override options will keep these entries. 57 | 58 | # Expect warning and protocols1 to become protocols2 (keep old value) 59 | protocols1 = { 60 | ("Task1", "Protocol1"): 42, 61 | } 62 | with pytest.warns(Warning) as w: 63 | _merge_protocols_inplace(protocols1, protocols2, LoadingMode.KEEP, "", "") 64 | assert ("Task1", "Protocol1") in protocols1 65 | assert protocols1[("Task1", "Protocol1")] == None 66 | assert len(protocols1) == 1 67 | 68 | # Expect warning and protocols1 to keep its value (use new value) 69 | protocols1 = { 70 | ("Task1", "Protocol1"): 42, 71 | } 72 | with pytest.warns(Warning) as w: 73 | _merge_protocols_inplace(protocols1, protocols2, LoadingMode.OVERRIDE, "", "") 74 | assert ("Task1", "Protocol1") in protocols1 75 | assert protocols1[("Task1", "Protocol1")] == 42 76 | assert len(protocols1) == 1 77 | -------------------------------------------------------------------------------- /tests/trial.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2023- CNRS 7 | # Copyright (c) 2023- Université Paul Sabatier 8 | 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | # SOFTWARE. 26 | 27 | # AUTHORS 28 | # Hervé BREDIN - http://herve.niderb.fr 29 | # Alexis PLAQUET 30 | 31 | 32 | from pyannote.database import registry 33 | 34 | protocol = registry.get_protocol('MyDatabase.SpeakerVerification.MySpeakerVerification') 35 | for elt in protocol.train_trial(): 36 | print(elt) 37 | print(elt['file1']['try_with']) 38 | file1_annotation = elt['file1']['annotation'] 39 | print('annotation : ', file1_annotation) 40 | file1_annotated = elt['file1']['annotated'] 41 | print('annotated : ', file1_annotated) --------------------------------------------------------------------------------