├── .deepsource.toml ├── .dockerignore ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── main.yml │ ├── publish.yml │ └── publish_docker.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── Dockerfile ├── LICENSE ├── README.md ├── bin └── mfa_update ├── ci └── docker_environment.yaml ├── docs ├── Makefile ├── make.bat └── source │ ├── _static │ ├── MFA_default.svg │ ├── MFA_dnn.svg │ ├── MFA_dnn_ivectors.svg │ ├── MFA_paper_Interspeech2017.pdf │ ├── MFA_poster_LSA2017.pdf │ ├── because.svg │ ├── css │ │ └── mfa.css │ ├── favicon.ico │ ├── fonts │ │ ├── GentiumPlus-Bold.woff │ │ ├── GentiumPlus-Bold.woff2 │ │ ├── GentiumPlus-BoldItalic.woff │ │ ├── GentiumPlus-BoldItalic.woff2 │ │ ├── GentiumPlus-Italic.woff │ │ ├── GentiumPlus-Italic.woff2 │ │ ├── GentiumPlus-Regular.woff │ │ └── GentiumPlus-Regular.woff2 │ ├── interrogate_badge.svg │ ├── librispeech_textgrid.png │ ├── logo.svg │ ├── logo_dark.svg │ ├── logo_light.svg │ ├── logo_long.svg │ ├── logo_long_dark.svg │ ├── logo_long_light.svg │ ├── logo_stacked.svg │ ├── logo_stacked_dark.svg │ ├── logo_stacked_light.svg │ ├── lot.svg │ ├── multiple_speakers_output_textgrid.png │ ├── multiple_speakers_textgrid.png │ ├── sound_files │ │ ├── english_fast.svg │ │ ├── english_fast.wav │ │ ├── english_slow.svg │ │ ├── english_slow.wav │ │ ├── english_t.svg │ │ ├── english_t.wav │ │ ├── english_t_it's.svg │ │ ├── english_t_it.svg │ │ ├── english_t_itself.svg │ │ ├── english_t_just.svg │ │ ├── english_t_onto.svg │ │ ├── english_t_righted.svg │ │ ├── english_t_stop.svg │ │ ├── english_t_tipped.svg │ │ ├── english_t_to.svg │ │ ├── english_t_top.svg │ │ ├── english_t_truck.svg │ │ ├── japanese_fast.svg │ │ ├── japanese_fast.wav │ │ ├── japanese_slow.svg │ │ └── japanese_slow.wav │ └── the.svg │ ├── _templates │ ├── autosummary │ │ ├── attribute.rst │ │ ├── base.rst │ │ ├── class.rst │ │ ├── function.rst │ │ ├── method.rst │ │ └── property.rst │ └── version.html │ ├── changelog │ ├── changelog_1.0.rst │ ├── changelog_2.0.rst │ ├── changelog_2.0_pre_release.rst │ ├── changelog_2.1.rst │ ├── changelog_2.2.rst │ ├── changelog_3.0.rst │ ├── changelog_3.1.rst │ ├── changelog_3.2.rst │ ├── index.md │ ├── news_1.1.rst │ ├── news_2.0.rst │ ├── news_2.1.rst │ └── news_3.0.rst │ ├── conf.py │ ├── external_links.py │ ├── first_steps │ ├── example.rst │ ├── index.rst │ └── tutorials.rst │ ├── getting_started.rst │ ├── index.rst │ ├── installation.rst │ ├── reference │ ├── acoustic_modeling │ │ ├── helper.rst │ │ ├── index.rst │ │ └── training.rst │ ├── alignment │ │ ├── alignment.rst │ │ ├── helper.rst │ │ └── index.rst │ ├── core_index.rst │ ├── corpus │ │ └── index.rst │ ├── database │ │ └── index.rst │ ├── diarization │ │ ├── helper.rst │ │ ├── index.rst │ │ └── main.rst │ ├── dictionary │ │ ├── helper.rst │ │ ├── index.rst │ │ ├── main.rst │ │ └── training.rst │ ├── g2p │ │ ├── generator.rst │ │ ├── helper.rst │ │ └── index.rst │ ├── g2p_modeling │ │ ├── helper.rst │ │ ├── index.rst │ │ └── training.rst │ ├── helper │ │ ├── abc.rst │ │ ├── config.rst │ │ ├── data.rst │ │ ├── exceptions.rst │ │ ├── helper.rst │ │ ├── index.rst │ │ ├── textgrid.rst │ │ └── utils.rst │ ├── index.rst │ ├── ivector │ │ ├── helper.rst │ │ ├── index.rst │ │ └── training.rst │ ├── language_modeling │ │ ├── helper.rst │ │ ├── index.rst │ │ └── training.rst │ ├── segmentation │ │ ├── helper.rst │ │ ├── index.rst │ │ └── main.rst │ ├── server │ │ └── index.rst │ ├── tokenization │ │ ├── helper.rst │ │ ├── index.rst │ │ ├── tokenizer.rst │ │ └── training.rst │ ├── top_level_index.rst │ ├── transcription │ │ ├── helper.rst │ │ ├── index.rst │ │ └── main.rst │ └── validation │ │ ├── helper.rst │ │ ├── index.rst │ │ └── main.rst │ └── user_guide │ ├── commands.rst │ ├── concepts │ ├── features.md │ ├── fst.md │ ├── hmm.md │ ├── index.rst │ └── speaker_adaptation.md │ ├── configuration │ ├── acoustic_model_adapt.rst │ ├── acoustic_modeling.rst │ ├── diarization.rst │ ├── g2p.rst │ ├── global.rst │ ├── index.rst │ ├── ivector.rst │ ├── lm.rst │ ├── segment.rst │ └── transcription.rst │ ├── corpus_creation │ ├── anchor.rst │ ├── create_segments.rst │ ├── diarize_speakers.rst │ ├── index.rst │ ├── tokenize.rst │ ├── train_ivector.rst │ ├── train_tokenizer.rst │ ├── training_dictionary.rst │ ├── training_lm.rst │ └── transcribing.rst │ ├── corpus_structure.rst │ ├── data_validation.rst │ ├── dictionary.rst │ ├── dictionary_validation.rst │ ├── glossary.rst │ ├── implementations │ ├── alignment_analysis.md │ ├── alignment_evaluation.md │ ├── fine_tune.md │ ├── index.md │ ├── lexicon_probabilities.md │ ├── phone_groups.md │ ├── phone_models.md │ └── phonological_rules.md │ ├── index.rst │ ├── models │ └── index.rst │ ├── server │ └── index.rst │ ├── troubleshooting.rst │ └── workflows │ ├── adapt_acoustic_model.rst │ ├── alignment.rst │ ├── dictionary_generating.rst │ ├── g2p_train.rst │ ├── index.rst │ └── train_acoustic_model.rst ├── environment.yml ├── github_environment.yml ├── montreal_forced_aligner ├── __init__.py ├── __main__.py ├── abc.py ├── acoustic_modeling │ ├── __init__.py │ ├── base.py │ ├── lda.py │ ├── monophone.py │ ├── pronunciation_probabilities.py │ ├── sat.py │ ├── trainer.py │ └── triphone.py ├── alignment │ ├── __init__.py │ ├── adapting.py │ ├── base.py │ ├── mixins.py │ ├── multiprocessing.py │ └── pretrained.py ├── command_line │ ├── __init__.py │ ├── adapt.py │ ├── align.py │ ├── align_one.py │ ├── anchor.py │ ├── configure.py │ ├── create_segments.py │ ├── diarize_speakers.py │ ├── g2p.py │ ├── history.py │ ├── mfa.py │ ├── model.py │ ├── server.py │ ├── tokenize.py │ ├── train_acoustic_model.py │ ├── train_dictionary.py │ ├── train_g2p.py │ ├── train_ivector_extractor.py │ ├── train_lm.py │ ├── train_tokenizer.py │ ├── transcribe.py │ ├── utils.py │ └── validate.py ├── config.py ├── corpus │ ├── __init__.py │ ├── acoustic_corpus.py │ ├── base.py │ ├── classes.py │ ├── features.py │ ├── helper.py │ ├── ivector_corpus.py │ ├── multiprocessing.py │ └── text_corpus.py ├── data.py ├── db.py ├── diarization │ ├── __init__.py │ ├── multiprocessing.py │ └── speaker_diarizer.py ├── dictionary │ ├── __init__.py │ ├── mixins.py │ └── multispeaker.py ├── exceptions.py ├── g2p │ ├── __init__.py │ ├── generator.py │ ├── mixins.py │ ├── phonetisaurus_trainer.py │ └── trainer.py ├── helper.py ├── ivector │ ├── __init__.py │ ├── multiprocessing.py │ └── trainer.py ├── language_modeling │ ├── __init__.py │ ├── multiprocessing.py │ └── trainer.py ├── models.py ├── online │ ├── __init__.py │ ├── alignment.py │ └── transcription.py ├── textgrid.py ├── tokenization │ ├── __init__.py │ ├── chinese.py │ ├── english.py │ ├── japanese.py │ ├── korean.py │ ├── resources │ │ └── japanese │ │ │ ├── char.def │ │ │ ├── mfa_sudachi.dic │ │ │ ├── rewrite.def │ │ │ ├── sudachi_config.json │ │ │ └── unk.def │ ├── simple.py │ ├── spacy.py │ ├── thai.py │ ├── tokenizer.py │ └── trainer.py ├── transcription │ ├── __init__.py │ ├── models.py │ ├── multiprocessing.py │ └── transcriber.py ├── utils.py ├── vad │ ├── __init__.py │ ├── models.py │ ├── multiprocessing.py │ └── segmenter.py └── validation │ ├── __init__.py │ ├── corpus_validator.py │ └── dictionary_validator.py ├── pyproject.toml ├── requirements.txt ├── rtd_environment.yml ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── conftest.py ├── data │ ├── am │ │ ├── acoustic_g2p_output_model.zip │ │ └── mono_model.zip │ ├── configs │ │ ├── acoustic │ │ │ ├── bad_topology.yaml │ │ │ ├── english_mfa_phone_groups.yaml │ │ │ ├── english_mfa_rules.yaml │ │ │ └── english_mfa_topology.yaml │ │ ├── bad_align_config.yaml │ │ ├── basic_align_config.yaml │ │ ├── basic_ipa_config.yaml │ │ ├── basic_segment_config.yaml │ │ ├── basic_train_config.yaml │ │ ├── basic_train_lm.yaml │ │ ├── different_punctuation_config.yaml │ │ ├── eval_mapping.yaml │ │ ├── g2p_config.yaml │ │ ├── ivector_train.yaml │ │ ├── lda_sat_train.yaml │ │ ├── lda_train.yaml │ │ ├── mono_align.yaml │ │ ├── mono_train.yaml │ │ ├── no_punctuation_config.yaml │ │ ├── out_of_order_config.yaml │ │ ├── pitch_tri_train.yaml │ │ ├── pron_train.yaml │ │ ├── sat_train.yaml │ │ ├── test_groups.yaml │ │ ├── test_rules.yaml │ │ ├── train_g2p_acoustic.yaml │ │ ├── train_g2p_config.yaml │ │ ├── transcribe.yaml │ │ ├── tri_train.yaml │ │ └── xsampa_train.yaml │ ├── dictionaries │ │ ├── acoustic_g2p_dictionary.yaml │ │ ├── english_us_mfa_reduced.dict │ │ ├── expected │ │ │ ├── graphemes.txt │ │ │ ├── lexicon.text.fst │ │ │ ├── phone_map.txt │ │ │ ├── phones.txt │ │ │ ├── phones │ │ │ │ ├── extra_questions.int │ │ │ │ ├── extra_questions.txt │ │ │ │ ├── roots.int │ │ │ │ ├── roots.txt │ │ │ │ ├── sets.int │ │ │ │ ├── sets.txt │ │ │ │ ├── word_boundary.int │ │ │ │ └── word_boundary.txt │ │ │ ├── topo │ │ │ └── words.txt │ │ ├── test_abstract.txt │ │ ├── test_acoustic.txt │ │ ├── test_basic.txt │ │ ├── test_chinese_dict.txt │ │ ├── test_extra_annotations.txt │ │ ├── test_frclitics.txt │ │ ├── test_hindi.txt │ │ ├── test_japanese.txt │ │ ├── test_mixed_format_dictionary.txt │ │ ├── test_tabbed_dictionary.txt │ │ ├── test_vietnamese_ipa.txt │ │ └── test_xsampa.txt │ ├── lab │ │ ├── 13697_11991_000000.lab │ │ ├── 61-70968-0000.lab │ │ ├── acoustic_corpus.lab │ │ ├── cold_corpus.lab │ │ ├── cold_corpus3.lab │ │ ├── cold_corpus3_extra.lab │ │ ├── common_voice_en_22058264.lab │ │ ├── common_voice_en_22058266.lab │ │ ├── common_voice_en_22058267.lab │ │ ├── common_voice_ja_24511055.lab │ │ ├── devanagari.lab │ │ ├── french_clitics.lab │ │ ├── japanese.lab │ │ ├── multilingual_ipa.txt │ │ ├── multilingual_ipa_2.txt │ │ ├── multilingual_ipa_3.txt │ │ ├── multilingual_ipa_4.txt │ │ ├── multilingual_ipa_5.txt │ │ ├── multilingual_ipa_us.txt │ │ ├── multilingual_ipa_us_2.txt │ │ ├── multilingual_ipa_us_3.txt │ │ ├── multilingual_ipa_us_4.txt │ │ ├── multilingual_ipa_us_5.txt │ │ ├── punctuated.lab │ │ ├── se10x016-08071999-1334_u0016001.lab │ │ ├── se10x016-08071999-1334_u0016002.lab │ │ ├── se10x016-08071999-1334_u0016003.lab │ │ ├── se10x016-08071999-1334_u0016004.lab │ │ ├── weird_words.lab │ │ ├── xsampa.lab │ │ └── 日本語.lab │ ├── lm │ │ ├── test_lm.arpa │ │ └── test_lm.zip │ ├── textgrid │ │ ├── 61-70968-0000.TextGrid │ │ ├── acoustic_corpus.TextGrid │ │ ├── cold_corpus.TextGrid │ │ ├── cold_corpus3.TextGrid │ │ ├── michaelandsickmichael.TextGrid │ │ ├── michaelandsickmichael_short_tg.TextGrid │ │ ├── multilingual_ipa.TextGrid │ │ ├── multilingual_ipa_2.TextGrid │ │ ├── multilingual_ipa_3.TextGrid │ │ ├── multilingual_ipa_4.TextGrid │ │ ├── multilingual_ipa_5.TextGrid │ │ ├── multilingual_ipa_us.TextGrid │ │ ├── multilingual_ipa_us_2.TextGrid │ │ ├── multilingual_ipa_us_3.TextGrid │ │ ├── multilingual_ipa_us_4.TextGrid │ │ ├── multilingual_ipa_us_5.TextGrid │ │ ├── short_segments.TextGrid │ │ └── vietnamese.TextGrid │ ├── tokenizer │ │ ├── test_tokenizer_model.zip │ │ └── test_tokenizer_model_phonetisaurus.zip │ └── wav │ │ ├── 13697_11991_000000.opus │ │ ├── 61-70968-0000.flac │ │ ├── acoustic_corpus.wav │ │ ├── cold_corpus.wav │ │ ├── cold_corpus3.wav │ │ ├── cold_corpus_24bit.wav │ │ ├── cold_corpus_32bit_float.wav │ │ ├── common_voice_en_22058264.mp3 │ │ ├── common_voice_en_22058266.mp3 │ │ ├── common_voice_en_22058267.mp3 │ │ ├── common_voice_ja_24511055.mp3 │ │ ├── dummy.mp3 │ │ ├── dummy.wav │ │ ├── falsetto.flac │ │ ├── falsetto2.flac │ │ ├── mfa_a.flac │ │ ├── mfa_affectation.flac │ │ ├── mfa_apex.flac │ │ ├── mfa_bottle.flac │ │ ├── mfa_breaths.flac │ │ ├── mfa_breathy.flac │ │ ├── mfa_buddy.flac │ │ ├── mfa_creaky.flac │ │ ├── mfa_crossword.flac │ │ ├── mfa_cutoff.flac │ │ ├── mfa_cutoffprogressive.flac │ │ ├── mfa_er.flac │ │ ├── mfa_erpause.flac │ │ ├── mfa_exaggerated.flac │ │ ├── mfa_falsetto.flac │ │ ├── mfa_her.flac │ │ ├── mfa_hes.flac │ │ ├── mfa_internalsil.flac │ │ ├── mfa_kmg.flac │ │ ├── mfa_laughter.flac │ │ ├── mfa_long.flac │ │ ├── mfa_longstop.flac │ │ ├── mfa_michael.flac │ │ ├── mfa_patty.flac │ │ ├── mfa_poofy.flac │ │ ├── mfa_pooty.flac │ │ ├── mfa_puddy.flac │ │ ├── mfa_putty.flac │ │ ├── mfa_puttynorm.flac │ │ ├── mfa_reallylong.flac │ │ ├── mfa_registershift.flac │ │ ├── mfa_surround.flac │ │ ├── mfa_the.flac │ │ ├── mfa_theapprox.flac │ │ ├── mfa_theinitialstop.flac │ │ ├── mfa_thenorm.flac │ │ ├── mfa_theother.flac │ │ ├── mfa_thestop.flac │ │ ├── mfa_thez.flac │ │ ├── mfa_thoughts.flac │ │ ├── mfa_uh.flac │ │ ├── mfa_uhuh.flac │ │ ├── mfa_uhum.flac │ │ ├── mfa_um.flac │ │ ├── mfa_unk.flac │ │ ├── mfa_whatscalled.flac │ │ ├── mfa_whisper.flac │ │ ├── mfa_words.flac │ │ ├── mfa_youknow.flac │ │ ├── michaelandsickmichael.wav │ │ ├── multilingual_ipa.flac │ │ ├── multilingual_ipa_2.flac │ │ ├── multilingual_ipa_3.flac │ │ ├── multilingual_ipa_4.flac │ │ ├── multilingual_ipa_5.flac │ │ ├── multilingual_ipa_us.flac │ │ ├── multilingual_ipa_us_2.flac │ │ ├── multilingual_ipa_us_3.flac │ │ ├── multilingual_ipa_us_4.flac │ │ ├── multilingual_ipa_us_5.flac │ │ ├── se10x016-08071999-1334_u0016001.wav │ │ ├── se10x016-08071999-1334_u0016002.wav │ │ ├── se10x016-08071999-1334_u0016003.wav │ │ ├── se10x016-08071999-1334_u0016004.wav │ │ ├── whisper.flac │ │ └── whisper2.flac ├── test_abc.py ├── test_acoustic_modeling.py ├── test_alignment_pretrained.py ├── test_commandline_adapt.py ├── test_commandline_align.py ├── test_commandline_configure.py ├── test_commandline_create_segments.py ├── test_commandline_diarize_speakers.py ├── test_commandline_g2p.py ├── test_commandline_history.py ├── test_commandline_lm.py ├── test_commandline_model.py ├── test_commandline_tokenize.py ├── test_commandline_train.py ├── test_commandline_train_dict.py ├── test_commandline_train_ivector.py ├── test_commandline_transcribe.py ├── test_commandline_validate.py ├── test_config.py ├── test_corpus.py ├── test_dict.py ├── test_g2p.py ├── test_gui.py ├── test_helper.py ├── test_segmentation.py └── test_validate.py └── tox.ini /.deepsource.toml: -------------------------------------------------------------------------------- 1 | version = 1 2 | 3 | test_patterns = ["tests/**"] 4 | 5 | [[analyzers]] 6 | name = "python" 7 | enabled = true 8 | 9 | [analyzers.meta] 10 | runtime_version = "3.x.x" 11 | max_line_length = 120 12 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | 2 | docs/* 3 | build/* 4 | .tox/* 5 | .github/* 6 | .pytest_cache/* 7 | tests/* 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG]" 5 | labels: bug 6 | assignees: mmcauliffe 7 | 8 | --- 9 | 10 | **Debugging checklist** 11 | 12 | [ ] Have you read the troubleshooting page (https://montreal-forced-aligner.readthedocs.io/en/latest/user_guide/troubleshooting.html) and searched the documentation to ensure that your issue is not addressed there? 13 | [ ] Have you updated to latest MFA version (check https://montreal-forced-aligner.readthedocs.io/en/latest/changelog/changelog_3.0.html)? What is the output of `mfa version`? 14 | [ ] Have you tried rerunning the command with the `--clean` flag? 15 | 16 | **Describe the issue** 17 | A clear and concise description of what the bug is. 18 | 19 | **For Reproducing your issue** 20 | Please fill out the following: 21 | 22 | 1. Corpus structure 23 | * What language is the corpus in? 24 | * How many files/speakers? 25 | * Are you using lab files or TextGrid files for input? 26 | 2. Dictionary 27 | * Are you using a dictionary from MFA? If so, which one? 28 | * If it's a custom dictionary, what is the phoneset? 29 | 3. Acoustic model 30 | * If you're using an acoustic model, is it one download through MFA? If so, which one? 31 | * If it's a model you've trained, what data was it trained on? 32 | 33 | **Log file** 34 | Please attach the log file for the run that encountered an error (by default these will be stored in `~/Documents/MFA`). 35 | 36 | **Desktop (please complete the following information):** 37 | - OS: [e.g. Windows, OSX, Linux] 38 | - Version [e.g. MacOSX 10.15, Ubuntu 20.04, Windows 10, etc] 39 | - Any other details about the setup (Cloud, Docker, etc) 40 | 41 | **Additional context** 42 | Add any other context about the problem here. 43 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest new functionality that would make MFA easier to use 4 | title: '' 5 | labels: enhancement 6 | assignees: mmcauliffe 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python distributions to PyPI 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | build-n-publish: 9 | name: Build and publish to PyPI 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@main 13 | with: 14 | fetch-depth: 0 15 | - name: Set up Python 3.10 16 | uses: actions/setup-python@v3 17 | with: 18 | python-version: "3.10" 19 | 20 | - name: Install pypa/build 21 | run: >- 22 | python -m 23 | pip install 24 | build 25 | --user 26 | 27 | - name: Build a binary wheel and a source tarball 28 | run: >- 29 | python -m 30 | build 31 | --sdist 32 | --wheel 33 | --outdir dist/ 34 | . 35 | 36 | - name: Publish to PyPI 37 | uses: pypa/gh-action-pypi-publish@release/v1 38 | with: 39 | user: __token__ 40 | password: ${{ secrets.PYPI_API_TOKEN }} 41 | -------------------------------------------------------------------------------- /.github/workflows/publish_docker.yml: -------------------------------------------------------------------------------- 1 | name: Publish Docker image 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | push_to_registries: 9 | name: Push Docker image to multiple registries 10 | runs-on: ubuntu-latest 11 | permissions: 12 | packages: write 13 | contents: read 14 | steps: 15 | - name: Check out the repo 16 | uses: actions/checkout@v3 17 | 18 | - name: Log in to Docker Hub 19 | uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 20 | with: 21 | username: ${{ secrets.DOCKER_USERNAME }} 22 | password: ${{ secrets.DOCKER_PASSWORD }} 23 | 24 | - name: Log in to the Container registry 25 | uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 26 | with: 27 | registry: ghcr.io 28 | username: ${{ github.actor }} 29 | password: ${{ secrets.GITHUB_TOKEN }} 30 | 31 | - name: Extract metadata (tags, labels) for Docker 32 | id: meta 33 | uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 34 | with: 35 | images: | 36 | mmcauliffe/montreal-forced-aligner 37 | ghcr.io/${{ github.repository }} 38 | 39 | - name: Build and push Docker images 40 | uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc 41 | with: 42 | context: . 43 | push: true 44 | tags: ${{ steps.meta.outputs.tags }} 45 | labels: ${{ steps.meta.outputs.labels }} 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *.exe 5 | *.zip 6 | *.txt 7 | *.lab 8 | *.dict 9 | !tests/data/dictionaries/*.dict 10 | !tests/data/dictionaries/*.txt 11 | !tests/data/lab/*.txt 12 | !tests/data/lab/*.lab 13 | !tests/data/am/*.zip 14 | !tests/data/lm/*.zip 15 | report.txt 16 | 17 | .idea/ 18 | .pytest-cache/ 19 | # Mac BS 20 | .DS_Store 21 | 22 | 23 | generated/ 24 | 25 | pretrained_models/ 26 | 27 | # C extensions 28 | *.so 29 | 30 | # Distribution / packaging 31 | montreal_forced_aligner/version.py 32 | montreal_forced_aligner/_version.py 33 | .Python 34 | env/ 35 | build/ 36 | develop-eggs/ 37 | dist/ 38 | downloads/ 39 | eggs/ 40 | .eggs/ 41 | lib/ 42 | lib64/ 43 | parts/ 44 | sdist/ 45 | var/ 46 | *.egg-info/ 47 | .installed.cfg 48 | *.egg 49 | thirdparty/bin 50 | 51 | # PyInstaller 52 | # Usually these files are written by a python script from a template 53 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 54 | *.manifest 55 | *.spec 56 | 57 | # Installer logs 58 | pip-log.txt 59 | pip-delete-this-directory.txt 60 | 61 | # Unit test / coverage reports 62 | htmlcov/ 63 | .tox/ 64 | .coverage 65 | .coverage.* 66 | .cache 67 | nosetests.xml 68 | coverage.xml 69 | *,cover 70 | 71 | # Translations 72 | *.mo 73 | *.pot 74 | 75 | # Django stuff: 76 | *.log 77 | 78 | # Sphinx documentation 79 | docs/build/ 80 | 81 | # PyBuilder 82 | target/ 83 | 84 | 85 | *.dll 86 | 87 | .pytest_cache/ 88 | 89 | docs/source/api/ 90 | 91 | 92 | *.lprof 93 | 94 | *.pclprof 95 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: local 3 | hooks: 4 | - id: profile-check 5 | name: no profiling 6 | entry: '@profile' 7 | language: pygrep 8 | types: [ python ] 9 | - id: print-check 10 | name: no print statements 11 | entry: '\bprint\(' 12 | language: pygrep 13 | types: [ python ] 14 | files: ^montreal_forced_aligner/ 15 | exclude: ^montreal_forced_aligner/command_line/transcribe.py 16 | - repo: https://github.com/psf/black 17 | rev: 23.9.1 18 | hooks: 19 | - id: black 20 | - repo: https://github.com/pycqa/flake8 21 | rev: 7.0.0 22 | hooks: 23 | - id: flake8 24 | additional_dependencies: 25 | - pyproject-flake8 26 | - repo: https://github.com/pre-commit/mirrors-isort 27 | rev: v5.10.1 28 | hooks: 29 | - id: isort 30 | additional_dependencies: [toml] 31 | - repo: https://github.com/asottile/setup-cfg-fmt 32 | rev: v2.2.0 33 | hooks: 34 | - id: setup-cfg-fmt 35 | args: 36 | - --min-py3-version 37 | - "3.8" 38 | - repo: https://github.com/pre-commit/pre-commit-hooks 39 | rev: v4.0.1 40 | hooks: 41 | - id: check-ast 42 | - id: check-builtin-literals 43 | - id: check-docstring-first 44 | - id: check-merge-conflict 45 | - id: check-yaml 46 | - id: check-toml 47 | - id: debug-statements 48 | - id: end-of-file-fixer 49 | - id: trailing-whitespace 50 | - id: check-added-large-files 51 | args: ['--maxkb=2000'] 52 | - id: mixed-line-ending 53 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: "ubuntu-20.04" 5 | tools: 6 | python: "mambaforge-4.10" 7 | 8 | sphinx: 9 | configuration: docs/source/conf.py 10 | 11 | conda: 12 | environment: rtd_environment.yml 13 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM condaforge/mambaforge:22.11.1-4 as build 2 | 3 | COPY ci/docker_environment.yaml . 4 | RUN mkdir -p /mfa 5 | RUN useradd -ms /bin/bash mfauser 6 | RUN chown -R mfauser /mfa 7 | COPY . /pkg 8 | RUN mamba env create -p /env -f docker_environment.yaml && conda clean -afy && \ 9 | chown -R mfauser /env 10 | RUN conda run -p /env python -m pip install speechbrain && \ 11 | conda run -p /env python -m pip install --no-deps /pkg 12 | USER mfauser 13 | ENV MFA_ROOT_DIR=/mfa 14 | RUN conda run -p /env mfa server init 15 | 16 | RUN echo "source activate /env && mfa server start" > ~/.bashrc 17 | ENV PATH /env/bin:$PATH 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Montreal Corpus Tools 2 | 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a 5 | copy of this software and associated documentation files (the 6 | "Software"), to deal in the Software without restriction, including 7 | without limitation the rights to use, copy, modify, merge, publish, 8 | distribute, sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so, subject to 10 | the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included 13 | in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 18 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 19 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 20 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 21 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /bin/mfa_update: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import os 5 | import shutil 6 | import subprocess 7 | import sys 8 | from importlib.util import find_spec 9 | 10 | if __name__ == "__main__": 11 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 12 | parser.add_argument( 13 | "--install_3p", 14 | action="store_true", 15 | help="Install/update third party dependencies (Speechbrain and WhisperX)", 16 | ) 17 | args = parser.parse_args() 18 | anchor_found = find_spec("anchor") is not None 19 | speechbrain_found = find_spec("speechbrain") is not None 20 | whisperx_found = find_spec("whisperx") is not None 21 | 22 | conda_path = shutil.which("conda") 23 | if conda_path is None: 24 | print("Please install conda before running this command.") 25 | sys.exit(1) 26 | mamba_path = shutil.which("mamba") 27 | if mamba_path is None: 28 | print("No mamba found, installing first...") 29 | subprocess.call( 30 | [conda_path, "install", "-c", "conda-forge", "-y", "mamba"], env=os.environ 31 | ) 32 | package_list = ["montreal-forced-aligner", "kalpy", "kaldi=*=cpu*"] 33 | if anchor_found: 34 | package_list.append("anchor-annotator") 35 | subprocess.call( 36 | [mamba_path, "update", "-c", "conda-forge", "-y"] + package_list, env=os.environ 37 | ) 38 | if args.install_3p: 39 | channels = ["conda-forge", "pytorch", "nvidia", "anaconda"] 40 | package_list = ["pytorch", "torchaudio"] 41 | if not whisperx_found: 42 | package_list.extend(["cudnn=8", "transformers"]) 43 | command = [mamba_path, "install", "-y"] 44 | for c in channels: 45 | command.extend(["-c", c]) 46 | command += package_list 47 | subprocess.call(command, env=os.environ) 48 | command = ["pip", "install", "-U"] 49 | package_list = ["whisperx", "speechbrain", "pygtrie"] 50 | subprocess.call(command, env=os.environ) 51 | -------------------------------------------------------------------------------- /ci/docker_environment.yaml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - pytorch 4 | - nvidia 5 | - anaconda 6 | dependencies: 7 | - python>=3.11 8 | - numpy 9 | - librosa 10 | - tqdm 11 | - requests 12 | - pyyaml 13 | - dataclassy 14 | - kaldi=*=*cpu* 15 | - pynini 16 | - openfst=1.8.3 17 | - scikit-learn 18 | - hdbscan 19 | - baumwelch 20 | - ngram 21 | - praatio=6.0.0 22 | - biopython 23 | - sqlalchemy>=2.0 24 | - git 25 | - pgvector 26 | - pgvector-python 27 | - postgresql 28 | - psycopg2 29 | - click 30 | - pytorch 31 | - torchaudio 32 | - setuptools_scm 33 | - kneed 34 | - matplotlib 35 | - seaborn 36 | - sqlite 37 | - rich 38 | - rich-click 39 | - kalpy 40 | - spacy 41 | - sudachipy 42 | - sudachidict-core 43 | - spacy-pkuseg 44 | - pip 45 | - pip: 46 | - speechbrain 47 | - python-mecab-ko 48 | - jamo 49 | - pythainlp 50 | - dragonmapper 51 | -------------------------------------------------------------------------------- /docs/source/_static/MFA_paper_Interspeech2017.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/MFA_paper_Interspeech2017.pdf -------------------------------------------------------------------------------- /docs/source/_static/MFA_poster_LSA2017.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/MFA_poster_LSA2017.pdf -------------------------------------------------------------------------------- /docs/source/_static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/favicon.ico -------------------------------------------------------------------------------- /docs/source/_static/fonts/GentiumPlus-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/fonts/GentiumPlus-Bold.woff -------------------------------------------------------------------------------- /docs/source/_static/fonts/GentiumPlus-Bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/fonts/GentiumPlus-Bold.woff2 -------------------------------------------------------------------------------- /docs/source/_static/fonts/GentiumPlus-BoldItalic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/fonts/GentiumPlus-BoldItalic.woff -------------------------------------------------------------------------------- /docs/source/_static/fonts/GentiumPlus-BoldItalic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/fonts/GentiumPlus-BoldItalic.woff2 -------------------------------------------------------------------------------- /docs/source/_static/fonts/GentiumPlus-Italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/fonts/GentiumPlus-Italic.woff -------------------------------------------------------------------------------- /docs/source/_static/fonts/GentiumPlus-Italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/fonts/GentiumPlus-Italic.woff2 -------------------------------------------------------------------------------- /docs/source/_static/fonts/GentiumPlus-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/fonts/GentiumPlus-Regular.woff -------------------------------------------------------------------------------- /docs/source/_static/fonts/GentiumPlus-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/fonts/GentiumPlus-Regular.woff2 -------------------------------------------------------------------------------- /docs/source/_static/librispeech_textgrid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/librispeech_textgrid.png -------------------------------------------------------------------------------- /docs/source/_static/multiple_speakers_output_textgrid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/multiple_speakers_output_textgrid.png -------------------------------------------------------------------------------- /docs/source/_static/multiple_speakers_textgrid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/multiple_speakers_textgrid.png -------------------------------------------------------------------------------- /docs/source/_static/sound_files/english_fast.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/sound_files/english_fast.wav -------------------------------------------------------------------------------- /docs/source/_static/sound_files/english_slow.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/sound_files/english_slow.wav -------------------------------------------------------------------------------- /docs/source/_static/sound_files/english_t.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/sound_files/english_t.wav -------------------------------------------------------------------------------- /docs/source/_static/sound_files/japanese_fast.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/sound_files/japanese_fast.wav -------------------------------------------------------------------------------- /docs/source/_static/sound_files/japanese_slow.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/sound_files/japanese_slow.wav -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/attribute.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | {{ fullname }} 4 | {{ underline }} 5 | 6 | .. currentmodule:: {{ module }} 7 | 8 | .. autoattribute:: {{ objname }} 9 | :type: 10 | -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/base.rst: -------------------------------------------------------------------------------- 1 | {{ objname | escape | underline}} 2 | 3 | .. currentmodule:: {{ module }} 4 | 5 | .. auto{{ objtype }}:: {{ objname }} 6 | -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/class.rst: -------------------------------------------------------------------------------- 1 | :html_theme.sidebar_secondary.remove: 2 | 3 | {{ objname }} 4 | {{ underline }} 5 | 6 | .. currentmodule:: {{ module }} 7 | 8 | .. autoclass:: {{ objname }} 9 | :members: 10 | :show-inheritance: 11 | :no-inherited-members: 12 | :no-special-members: 13 | -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/function.rst: -------------------------------------------------------------------------------- 1 | :html_theme.sidebar_secondary.remove: 2 | 3 | {{ objname }} 4 | {{ underline }} 5 | 6 | .. currentmodule:: {{ module }} 7 | 8 | .. autofunction:: {{ objname }} 9 | -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/method.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | {{ fullname }} 4 | {{ underline }} 5 | 6 | .. currentmodule:: {{ module }} 7 | 8 | .. automethod:: {{ objname }} 9 | -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/property.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | {{ fullname }} 4 | {{ underline }} 5 | 6 | .. currentmodule:: {{ module }} 7 | 8 | .. autoproperty:: {{ objname }} 9 | -------------------------------------------------------------------------------- /docs/source/_templates/version.html: -------------------------------------------------------------------------------- 1 | {# This will display the version of the docs as a badge 2 | 3 | Colors from: 4 | 5 | Wong, B. Points of view: Color blindness. 6 | Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618 7 | 8 | #} 9 | 10 | 11 | {% if "dev" in version %} 12 | {# orange for dev #E69F00 #} 13 | 15 | {% elif versionwarning %} 16 | {# red for old #980F0F #} 17 | 19 | {% else %} 20 | {# green for stable #009E73 #} 21 | 23 | {% endif %} 24 | 25 | -------------------------------------------------------------------------------- /docs/source/changelog/changelog_2.1.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _changelog_2.1: 3 | 4 | ************* 5 | 2.1 Changelog 6 | ************* 7 | 8 | 2.1.6 9 | ===== 10 | 11 | - Fix for issue with ignore_case flag not being respected 12 | - Fixed a hang in speaker diarization 13 | - Fixed an error related to paths ending in trailing slashes which caused MFA to try to connect to a database named after the local user 14 | - Partial migration to using :class:`pathlib.Path` instead of :mod:`os.path` 15 | 16 | 2.1.5 17 | ===== 18 | 19 | - Fix for improperly reset databases 20 | 21 | 2.1.4 22 | ===== 23 | 24 | - Change how database connections are made to remove pooling 25 | 26 | 2.1.3 27 | ===== 28 | 29 | - Fixed a bug with intervals after the end of the sound file having negative duration (they are now not parsed) 30 | - Fixed an issue where utterances were not properly assigned to the correct channels 31 | - Modified the logic for connections to attempt to solve error with too many clients 32 | 33 | 2.1.2 34 | ===== 35 | 36 | - Fixed a crash in training when the debug flag was not set 37 | - Set default postgresql port to 5433 to avoid conflicts with any system installations 38 | - Fixed a crash in textgrid export 39 | 40 | 2.1.1 41 | ===== 42 | 43 | - Fixed a bug with `mfa` command not working from the command line 44 | - Updated to be compatible with PraatIO 6.0 45 | 46 | 2.1.0 47 | ===== 48 | 49 | - Drop support for SQLite as a database backend 50 | - Fixed a bug where TextGrid parsing errors would cause MFA to crash rather than ignore those files 51 | - Updated CLI to use :xref:`click` rather than argparse 52 | - Added :code:`--use_phone_model` flag for :code:`mfa align` and :code:`mfa validate` commands. See :ref:`phone_models` for more details. 53 | - Added :code:`--phone_confidence` flag for :code:`mfa validate` commands. See :ref:`phone_models` for more details. 54 | - Added modeling of :code:`cutoff` phones via :code:`--use_cutoff_model` which adds progressive truncations of the next word, if it's not unknown or a non-speech word (silence, laughter, etc). See :ref:`cutoff_modeling` for more details. 55 | - Added support for using :xref:`speechbrain`'s VAD model in :ref:`create_segments` 56 | - Overhaul and update :ref:`train_ivector` 57 | - Overhaul and update :ref:`diarize_speakers` 58 | - Added support for using :xref:`speechbrain`'s SpeakerRecognition model in :ref:`diarize_speakers` 59 | -------------------------------------------------------------------------------- /docs/source/changelog/changelog_3.1.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _changelog_3.1: 3 | 4 | ************* 5 | 3.1 Changelog 6 | ************* 7 | 8 | 3.1.4 9 | ----- 10 | 11 | - Optimized :code:`mfa g2p` to better use multiple processes 12 | - Added :code:`--export_scores` to :code:`mfa g2p` for adding a column representing the final weights of the generated pronunciations 13 | - Added :code:`--output_directory` to :code:`mfa validate` to save generated validation files rather than the temporary directory 14 | - Fixed a bug in cutoff modeling that was preventing them from being properly parsed 15 | 16 | 3.1.3 17 | ----- 18 | 19 | - Fixed an issue where silence probability being zero was not correctly removing silence 20 | - Compatibility with kalpy v0.6.5 21 | - Added API functionality for verifying transcripts with interjection words in alignment 22 | - Fixed an error in fine tuning that generated nonsensical boundaries 23 | 24 | 3.1.2 25 | ----- 26 | 27 | - Fixed a bug where hidden files and folders would be parsed as corpus data 28 | - Fixed a bug where validation would not respect :code:`--no_final_clean` 29 | - Fixed a rare crash in training when a job would not have utterances assigned to it 30 | - Fixed a bug where MFA would mistakenly report a dictionary and acoustic model phones did not match for older versions 31 | 32 | 3.1.1 33 | ----- 34 | 35 | - Fixed an issue with TextGrids missing intervals 36 | 37 | 3.1.0 38 | ----- 39 | 40 | - Fixed a bug where cutoffs were not properly modelled 41 | - Added additional filter on create subset to not include utterances with cutoffs in smaller subsets 42 | - Added the ability to specify HMM topologies for phones 43 | - Fixed issues caused by validators not cleaning up temporary files and databases 44 | - Added support for default and nonnative dictionaries generated from other dictionaries 45 | - Restricted initial training rounds to exclude default and nonnative dictionaries 46 | - Changed clustering of phones to not mix silence and non-silence phones 47 | - Optimized textgrid export 48 | - Added better memory management for collecting alignments 49 | -------------------------------------------------------------------------------- /docs/source/changelog/changelog_3.2.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _changelog_3.2: 3 | 4 | ************* 5 | 3.2 Changelog 6 | ************* 7 | 8 | 3.2.1 9 | ----- 10 | 11 | - Changed unicode normalization to default to composed forms unless overridden by :code:`--unicode_decomposition true` 12 | 13 | 3.2.0 14 | ----- 15 | 16 | - Added :code:`--subset_word_count` parameter to :ref:`train_acoustic_model` to add a minimum word count for an utterance to be included in training subsets 17 | - Added :code:`--minimum_utterance_length` parameter to :ref:`train_acoustic_model` to add a minimum word count for an utterance to be included in training at all 18 | - Improved memory usage in compiling training graphs for initial subsets 19 | - Add support for transcription via whisperx and speechbrain models 20 | - Update text normalization to normalize to decomposed forms 21 | - Compatibility with Kalpy 0.6.7 22 | -------------------------------------------------------------------------------- /docs/source/first_steps/tutorials.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _`filing an issue`: https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/issues 3 | 4 | .. _`Montreal Forced Aligner v2 Corpus Phonetics Tutorial`: https://eleanorchodroff.com/tutorial/montreal-forced-aligner.html 5 | 6 | .. _`Phonetic forced alignment with the Montreal Forced Aligner`: https://www.youtube.com/watch?v=Zhj-ccMDj_w 7 | 8 | .. _`How I used Montreal Forced Aligner for a New Language (Sinhalese)`: https://medium.com/breaktheloop/how-i-used-montreal-forced-aligner-for-a-new-language-sinhalese-8f2c22a65a22 9 | 10 | .. _`Bootstrapping an IPA dictionary for English using Montreal Forced Aligner 2.0`: https://mmcauliffe.medium.com/creating-english-ipa-dictionary-using-montreal-forced-aligner-2-0-242415dfee32 11 | 12 | .. _`Update on Montreal Forced Aligner performance`: https://memcauliffe.com/update-on-montreal-forced-aligner-performance.html 13 | .. _`Speaker dictionaries and multilingual IPA`: https://memcauliffe.com/speaker-dictionaries-and-multilingual-ipa.html 14 | 15 | .. _tutorials: 16 | 17 | External tutorials 18 | ================== 19 | 20 | I will try to keep this updated with a list of in-depth tutorials for using MFA. If you write up anything that could be included here, please let me know by `filing an issue`_ and I will add it. 21 | 22 | * `Montreal Forced Aligner v2 Corpus Phonetics Tutorial`_ (Now updated for 2.0!) 23 | * Courtesy of :xref:`chodroff` 24 | * `Phonetic forced alignment with the Montreal Forced Aligner`_ (YouTube recording) 25 | * Courtesy of :xref:`chodroff` and :xref:`rutgers_spanish_portuguese` 26 | * `How I used Montreal Forced Aligner for a New Language (Sinhalese)`_ 27 | * Courtesy of :xref:`dias` 28 | * `Bootstrapping an IPA dictionary for English using Montreal Forced Aligner 2.0`_ 29 | * `Update on Montreal Forced Aligner performance`_ 30 | * `Speaker dictionaries and multilingual IPA`_ 31 | -------------------------------------------------------------------------------- /docs/source/getting_started.rst: -------------------------------------------------------------------------------- 1 | 2 | *************** 3 | Getting started 4 | *************** 5 | 6 | 7 | Installation 8 | ------------ 9 | 10 | .. grid:: 2 11 | 12 | .. grid-item-card:: Installing with conda 13 | :text-align: center 14 | :columns: 12 15 | 16 | MFA is now on :xref:`conda_forge` and can be installed with Anaconda or Miniconda: 17 | 18 | .. code-block:: bash 19 | 20 | conda config --add channels conda-forge 21 | conda install montreal-forced-aligner 22 | 23 | +++ 24 | 25 | .. button-link:: https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html 26 | :color: primary 27 | :expand: 28 | 29 | Install Conda 30 | 31 | 32 | .. grid-item-card:: In-depth instructions 33 | :text-align: center 34 | 35 | Using :ref:`Docker `? Want to :ref:`install via source `? 36 | 37 | +++ 38 | 39 | .. button-ref:: installation 40 | :expand: 41 | :color: primary 42 | :ref-type: doc 43 | 44 | To the installation guide 45 | 46 | 47 | .. grid-item-card:: First steps 48 | :text-align: center 49 | 50 | First time using MFA? Want a walk-through of a specific use case? 51 | 52 | +++ 53 | 54 | .. button-ref:: first_steps 55 | :expand: 56 | :color: primary 57 | 58 | First steps 59 | 60 | 61 | .. toctree:: 62 | :maxdepth: 1 63 | :hidden: 64 | 65 | installation 66 | first_steps/index 67 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Montreal Forced Aligner documentation 3 | ===================================== 4 | 5 | .. grid:: 2 6 | 7 | .. grid-item-card:: Getting started 8 | :text-align: center 9 | 10 | :fas:`running;fa-6x i-navigation` 11 | 12 | ^^^ 13 | 14 | Install the Montreal Forced Aligner and get started with examples and tutorials. 15 | 16 | +++ 17 | 18 | .. button-ref:: getting_started 19 | :expand: 20 | :color: primary 21 | 22 | Install MFA 23 | 24 | .. grid-item-card:: First steps 25 | :text-align: center 26 | 27 | :fas:`terminal;fa-6x i-navigation` 28 | 29 | ^^^ 30 | 31 | Have a particular use case for MFA? 32 | 33 | Check out the first steps tutorials. 34 | 35 | +++ 36 | 37 | .. button-ref:: first_steps 38 | :expand: 39 | :color: primary 40 | 41 | First steps 42 | 43 | .. grid-item-card:: User guide 44 | :text-align: center 45 | 46 | :fas:`book-open;fa-6x i-navigation` 47 | 48 | ^^^ 49 | 50 | The User Guide gives more details on input formats, available commands, and details on the various workflows available. 51 | 52 | +++ 53 | 54 | .. button-ref:: user_guide 55 | :expand: 56 | :color: primary 57 | 58 | User guide 59 | 60 | .. grid-item-card:: API reference 61 | :text-align: center 62 | 63 | :fas:`file-code;fa-6x i-navigation` 64 | 65 | ^^^ 66 | 67 | The API guide lists all the inner workings of MFA, the modules and classes that you can import and use in your own scripts and projects, along with details about the Kaldi functionality used. 68 | 69 | +++ 70 | 71 | .. button-ref:: mfa_api 72 | :expand: 73 | :color: primary 74 | 75 | Reference guide 76 | 77 | .. toctree:: 78 | :hidden: 79 | 80 | Getting started 81 | User guide 82 | API reference 83 | Changelog 84 | -------------------------------------------------------------------------------- /docs/source/reference/acoustic_modeling/helper.rst: -------------------------------------------------------------------------------- 1 | 2 | Helper functionality 3 | ==================== 4 | 5 | Mixins 6 | ------ 7 | 8 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.base 9 | 10 | .. autosummary:: 11 | :toctree: generated/ 12 | 13 | AcousticModelTrainingMixin -- Basic mixin 14 | 15 | 16 | Multiprocessing workers and functions 17 | ------------------------------------- 18 | 19 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.monophone 20 | 21 | .. autosummary:: 22 | :toctree: generated/ 23 | 24 | MonoAlignEqualFunction 25 | 26 | 27 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.triphone 28 | 29 | .. autosummary:: 30 | :toctree: generated/ 31 | 32 | TreeStatsFunction 33 | ConvertAlignmentsFunction 34 | 35 | 36 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.lda 37 | 38 | .. autosummary:: 39 | :toctree: generated/ 40 | 41 | LdaAccStatsFunction 42 | CalcLdaMlltFunction 43 | 44 | 45 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.sat 46 | 47 | .. autosummary:: 48 | :toctree: generated/ 49 | 50 | AccStatsTwoFeatsFunction 51 | 52 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.trainer 53 | 54 | .. autosummary:: 55 | :toctree: generated/ 56 | 57 | TransitionAccFunction 58 | 59 | Multiprocessing argument classes 60 | -------------------------------- 61 | 62 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.monophone 63 | 64 | .. autosummary:: 65 | :toctree: generated/ 66 | 67 | MonoAlignEqualArguments 68 | 69 | 70 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.triphone 71 | 72 | .. autosummary:: 73 | :toctree: generated/ 74 | 75 | TreeStatsArguments 76 | ConvertAlignmentsArguments 77 | 78 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.lda 79 | 80 | .. autosummary:: 81 | :toctree: generated/ 82 | 83 | LdaAccStatsArguments 84 | CalcLdaMlltArguments 85 | 86 | 87 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.sat 88 | 89 | .. autosummary:: 90 | :toctree: generated/ 91 | 92 | AccStatsTwoFeatsArguments 93 | 94 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.trainer 95 | 96 | .. autosummary:: 97 | :toctree: generated/ 98 | 99 | TransitionAccArguments 100 | -------------------------------------------------------------------------------- /docs/source/reference/acoustic_modeling/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _acoustic_modeling_api: 3 | 4 | Acoustic models 5 | =============== 6 | 7 | :term:`Acoustic models` contain information about how phones are pronounced, trained over large (and not-so-large) corpora of speech. Currently only GMM-HMM style acoustic models are supported, which are generally good enough for alignment, but nowhere near state of the art for transcription. 8 | 9 | .. note:: 10 | 11 | As part of the training procedure, alignments are generated, and so can be exported at the end (the same as training an acoustic model and then using it with the :class:`~montreal_forced_aligner.alignment.pretrained.PretrainedAligner`. See :meth:`~montreal_forced_aligner.alignment.CorpusAligner.export_files` for the method and :ref:`train_acoustic_model` for the command line function. 12 | 13 | .. currentmodule:: montreal_forced_aligner.models 14 | 15 | .. autosummary:: 16 | :toctree: generated/ 17 | 18 | AcousticModel 19 | 20 | .. toctree:: 21 | :hidden: 22 | 23 | training 24 | helper 25 | -------------------------------------------------------------------------------- /docs/source/reference/acoustic_modeling/training.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _acoustic_model_training_api: 3 | 4 | Training acoustic models 5 | ======================== 6 | 7 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.trainer 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | TrainableAligner 13 | 14 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling 15 | 16 | .. autosummary:: 17 | :toctree: generated/ 18 | 19 | MonophoneTrainer -- Monophone trainer 20 | TriphoneTrainer -- Triphone trainer 21 | LdaTrainer -- LDA trainer 22 | SatTrainer -- Speaker adapted trainer 23 | PronunciationProbabilityTrainer -- Pronunciation probability trainer 24 | -------------------------------------------------------------------------------- /docs/source/reference/alignment/alignment.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _aligners_api: 3 | 4 | Alignment classes 5 | ================= 6 | 7 | .. currentmodule:: montreal_forced_aligner.alignment 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | CorpusAligner -- Base aligner 13 | AdaptingAligner -- Adapting an acoustic model to new data 14 | PretrainedAligner -- Pretrained aligner 15 | -------------------------------------------------------------------------------- /docs/source/reference/alignment/helper.rst: -------------------------------------------------------------------------------- 1 | 2 | Helper functionality 3 | ==================== 4 | 5 | Mixins 6 | ------ 7 | 8 | .. currentmodule:: montreal_forced_aligner.alignment.mixins 9 | 10 | .. autosummary:: 11 | :toctree: generated/ 12 | 13 | AlignMixin -- Alignment mixin 14 | 15 | Multiprocessing workers and functions 16 | ------------------------------------- 17 | 18 | .. currentmodule:: montreal_forced_aligner.alignment.multiprocessing 19 | 20 | .. autosummary:: 21 | :toctree: generated/ 22 | 23 | AlignFunction 24 | FineTuneFunction 25 | CompileTrainGraphsFunction 26 | AccStatsFunction 27 | AlignmentExtractionFunction 28 | ExportTextGridProcessWorker 29 | PhoneConfidenceFunction 30 | 31 | 32 | Multiprocessing argument classes 33 | -------------------------------- 34 | 35 | .. currentmodule:: montreal_forced_aligner.alignment.multiprocessing 36 | 37 | .. autosummary:: 38 | :toctree: generated/ 39 | 40 | AlignArguments 41 | AccStatsArguments 42 | CompileTrainGraphsArguments 43 | AlignmentExtractionArguments 44 | ExportTextGridArguments 45 | FineTuneArguments 46 | PhoneConfidenceArguments 47 | -------------------------------------------------------------------------------- /docs/source/reference/alignment/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _alignment_api: 3 | 4 | Alignment 5 | ========= 6 | 7 | .. toctree:: 8 | 9 | alignment 10 | helper 11 | -------------------------------------------------------------------------------- /docs/source/reference/core_index.rst: -------------------------------------------------------------------------------- 1 | 2 | Core functionality 3 | ================== 4 | 5 | This sections contains the core objects that are used as input to any top level worker: the corpora, pronunciation dictionaries, and various types of MFA models. Each model's section contains the classes and functionality used to train them. 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | 10 | corpus/index 11 | database/index 12 | dictionary/index 13 | acoustic_modeling/index 14 | g2p_modeling/index 15 | language_modeling/index 16 | ivector/index 17 | -------------------------------------------------------------------------------- /docs/source/reference/corpus/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _corpus_api: 3 | 4 | Corpora 5 | ======= 6 | 7 | .. currentmodule:: montreal_forced_aligner.corpus.acoustic_corpus 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | AcousticCorpus 13 | 14 | .. currentmodule:: montreal_forced_aligner.corpus.text_corpus 15 | 16 | .. autosummary:: 17 | :toctree: generated/ 18 | 19 | TextCorpus 20 | 21 | .. currentmodule:: montreal_forced_aligner.corpus.classes 22 | 23 | .. autosummary:: 24 | :toctree: generated/ 25 | 26 | FileData -- Class for representing sound file/transcription file pairs in corpora 27 | UtteranceData -- Class for collecting information about utterances 28 | 29 | 30 | 31 | Helper classes and functions 32 | ============================ 33 | 34 | 35 | Multiprocessing 36 | --------------- 37 | 38 | .. currentmodule:: montreal_forced_aligner.corpus.multiprocessing 39 | 40 | .. autosummary:: 41 | :toctree: generated/ 42 | 43 | Job 44 | CorpusProcessWorker 45 | 46 | Mixins 47 | ------ 48 | 49 | .. currentmodule:: montreal_forced_aligner.corpus.base 50 | 51 | .. autosummary:: 52 | :toctree: generated/ 53 | 54 | CorpusMixin 55 | 56 | .. currentmodule:: montreal_forced_aligner.corpus.acoustic_corpus 57 | 58 | .. autosummary:: 59 | :toctree: generated/ 60 | 61 | AcousticCorpusMixin 62 | AcousticCorpusPronunciationMixin 63 | 64 | .. currentmodule:: montreal_forced_aligner.corpus.ivector_corpus 65 | 66 | .. autosummary:: 67 | :toctree: generated/ 68 | 69 | IvectorCorpusMixin 70 | 71 | .. currentmodule:: montreal_forced_aligner.corpus.text_corpus 72 | 73 | .. autosummary:: 74 | :toctree: generated/ 75 | 76 | TextCorpusMixin 77 | DictionaryTextCorpusMixin 78 | 79 | Features 80 | -------- 81 | 82 | .. currentmodule:: montreal_forced_aligner.corpus.features 83 | 84 | .. autosummary:: 85 | :toctree: generated/ 86 | 87 | FeatureConfigMixin 88 | MfccFunction 89 | MfccArguments 90 | CalcFmllrFunction 91 | CalcFmllrArguments 92 | IvectorConfigMixin 93 | VadConfigMixin 94 | ComputeVadFunction 95 | VadArguments 96 | 97 | Ivector 98 | ------- 99 | 100 | .. currentmodule:: montreal_forced_aligner.corpus.features 101 | 102 | .. autosummary:: 103 | :toctree: generated/ 104 | 105 | ExtractIvectorsFunction 106 | ExtractIvectorsArguments 107 | -------------------------------------------------------------------------------- /docs/source/reference/database/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _database_api: 3 | 4 | Database 5 | ======== 6 | 7 | MFA uses a SQLite database to cache information during training/alignment runs. An issue with training larger corpora was running into memory bottlenecks as all the information in the corpus was stored in memory, and fMLLR estimations in later stages would crash. Additionally, there was always a trade off between storing results for use in other applications like :xref:`anchor` or providing diagnostic information to users, and ensuring that the core MFA workflows were as memory/time efficient as possible. Offloading to a database frees up some memory, and makes some computations more efficient, and should be optimized enough to not slow down regular processing. 8 | 9 | .. currentmodule:: montreal_forced_aligner.db 10 | 11 | .. autosummary:: 12 | :toctree: generated/ 13 | 14 | Dictionary 15 | Dialect 16 | Word 17 | Pronunciation 18 | Phone 19 | Grapheme 20 | File 21 | TextFile 22 | SoundFile 23 | Speaker 24 | Utterance 25 | WordInterval 26 | PhoneInterval 27 | CorpusWorkflow 28 | PhonologicalRule 29 | RuleApplication 30 | Job 31 | M2MSymbol 32 | M2M2Job 33 | Word2Job 34 | -------------------------------------------------------------------------------- /docs/source/reference/diarization/helper.rst: -------------------------------------------------------------------------------- 1 | 2 | Helper functions 3 | ================ 4 | 5 | .. currentmodule:: montreal_forced_aligner.diarization.multiprocessing 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | PldaClassificationFunction 11 | PldaClassificationArguments 12 | ComputeEerFunction 13 | ComputeEerArguments 14 | SpeechbrainEmbeddingFunction 15 | SpeechbrainClassificationFunction 16 | SpeechbrainArguments 17 | cluster_matrix 18 | -------------------------------------------------------------------------------- /docs/source/reference/diarization/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _diarization_api: 3 | 4 | Speaker diarization 5 | =================== 6 | 7 | Speaker diarization is the procedure to assign speaker labels to utterances. MFA can train and use ivector models (see :ref:`train_ivector`) or use :xref:`speechbrain`'s pretrained speaker classifier. 8 | 9 | .. toctree:: 10 | 11 | main 12 | helper 13 | -------------------------------------------------------------------------------- /docs/source/reference/diarization/main.rst: -------------------------------------------------------------------------------- 1 | 2 | Speaker Diarization 3 | =================== 4 | 5 | .. currentmodule:: montreal_forced_aligner.diarization.speaker_diarizer 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | SpeakerDiarizer 11 | -------------------------------------------------------------------------------- /docs/source/reference/dictionary/helper.rst: -------------------------------------------------------------------------------- 1 | 2 | Helper classes and functions 3 | ============================ 4 | 5 | Model 6 | ----- 7 | 8 | .. currentmodule:: montreal_forced_aligner.models 9 | 10 | .. autosummary:: 11 | :toctree: generated/ 12 | 13 | DictionaryModel 14 | 15 | Mixins 16 | ------ 17 | 18 | .. currentmodule:: montreal_forced_aligner.dictionary.mixins 19 | 20 | .. autosummary:: 21 | :toctree: generated/ 22 | 23 | DictionaryMixin 24 | TemporaryDictionaryMixin 25 | 26 | .. currentmodule:: montreal_forced_aligner.dictionary.multispeaker 27 | 28 | .. autosummary:: 29 | :toctree: generated/ 30 | 31 | MultispeakerDictionaryMixin 32 | 33 | 34 | Pronunciation probability functionality 35 | ======================================= 36 | 37 | Helper 38 | ------ 39 | 40 | .. currentmodule:: montreal_forced_aligner.alignment.multiprocessing 41 | 42 | .. autosummary:: 43 | :toctree: generated/ 44 | 45 | GeneratePronunciationsFunction 46 | GeneratePronunciationsArguments 47 | -------------------------------------------------------------------------------- /docs/source/reference/dictionary/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _dictionary_training_api: 3 | 4 | Pronunciation dictionaries 5 | ========================== 6 | 7 | .. toctree:: 8 | 9 | main 10 | helper 11 | training 12 | -------------------------------------------------------------------------------- /docs/source/reference/dictionary/main.rst: -------------------------------------------------------------------------------- 1 | 2 | Main classes 3 | ============ 4 | 5 | .. currentmodule:: montreal_forced_aligner.dictionary 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | MultispeakerDictionary -- Collection of pronunciation dictionaries that specify speaker-dictionary mappings 11 | -------------------------------------------------------------------------------- /docs/source/reference/dictionary/training.rst: -------------------------------------------------------------------------------- 1 | 2 | Training pronunciation probabilities 3 | ==================================== 4 | 5 | .. currentmodule:: montreal_forced_aligner.alignment.pretrained 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | DictionaryTrainer -- Train pronunciation probabilities from alignments 11 | -------------------------------------------------------------------------------- /docs/source/reference/g2p/generator.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _generating_dictionaries_api: 3 | 4 | Dictionary generation 5 | ===================== 6 | 7 | .. currentmodule:: montreal_forced_aligner.g2p.generator 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | PyniniCorpusGenerator -- Generator for Pynini G2P model 13 | PyniniWordListGenerator -- Generator for Pynini G2P model 14 | -------------------------------------------------------------------------------- /docs/source/reference/g2p/helper.rst: -------------------------------------------------------------------------------- 1 | 2 | Helper functionality 3 | ==================== 4 | 5 | Mixins 6 | ------ 7 | 8 | .. currentmodule:: montreal_forced_aligner.g2p.generator 9 | 10 | .. autosummary:: 11 | :toctree: generated/ 12 | 13 | PyniniGenerator 14 | 15 | Helper 16 | ------ 17 | 18 | .. currentmodule:: montreal_forced_aligner.g2p.generator 19 | 20 | .. autosummary:: 21 | :toctree: generated/ 22 | 23 | Rewriter 24 | RewriterWorker 25 | -------------------------------------------------------------------------------- /docs/source/reference/g2p/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _g2p_generate_api: 3 | 4 | Generating dictionaries 5 | ======================= 6 | 7 | .. toctree:: 8 | 9 | generator 10 | helper 11 | -------------------------------------------------------------------------------- /docs/source/reference/g2p_modeling/helper.rst: -------------------------------------------------------------------------------- 1 | 2 | Helper functionality 3 | ==================== 4 | 5 | 6 | Mixins 7 | ------ 8 | 9 | .. currentmodule:: montreal_forced_aligner.g2p.mixins 10 | 11 | .. autosummary:: 12 | :toctree: generated/ 13 | 14 | G2PMixin 15 | G2PTopLevelMixin 16 | 17 | .. currentmodule:: montreal_forced_aligner.g2p.trainer 18 | 19 | .. autosummary:: 20 | :toctree: generated/ 21 | 22 | G2PTrainer 23 | 24 | Helper 25 | ------ 26 | 27 | .. currentmodule:: montreal_forced_aligner.g2p.trainer 28 | 29 | .. autosummary:: 30 | :toctree: generated/ 31 | 32 | RandomStartWorker 33 | RandomStart 34 | -------------------------------------------------------------------------------- /docs/source/reference/g2p_modeling/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _g2p_modeling_api: 3 | 4 | Grapheme-to-Phoneme (G2P) models 5 | ================================ 6 | 7 | G2P models are used to generate pronunciations from orthographic spellings. The G2P models currently supported use Pynini weighted finite state transducers (wFST) to based off a training lexicon. 8 | 9 | .. currentmodule:: montreal_forced_aligner.models 10 | 11 | .. autosummary:: 12 | :toctree: generated/ 13 | 14 | G2PModel 15 | 16 | .. toctree:: 17 | 18 | training 19 | helper 20 | -------------------------------------------------------------------------------- /docs/source/reference/g2p_modeling/training.rst: -------------------------------------------------------------------------------- 1 | Training G2P models 2 | =================== 3 | 4 | Pynini Pair Ngram 5 | ----------------- 6 | 7 | 8 | .. currentmodule:: montreal_forced_aligner.g2p.trainer 9 | 10 | .. autosummary:: 11 | :toctree: generated/ 12 | 13 | PyniniTrainer -- Trainer for Pynini G2P model 14 | PyniniValidator -- Validator for Pynini G2P model 15 | 16 | Phonetisaurus-style models 17 | -------------------------- 18 | 19 | .. currentmodule:: montreal_forced_aligner.g2p.phonetisaurus_trainer 20 | 21 | .. autosummary:: 22 | :toctree: generated/ 23 | 24 | PhonetisaurusTrainer -- Trainer for Phonetisaurus G2P model 25 | 26 | Mixins 27 | ------ 28 | 29 | .. currentmodule:: montreal_forced_aligner.g2p.trainer 30 | 31 | .. autosummary:: 32 | :toctree: generated/ 33 | 34 | PyniniTrainerMixin 35 | 36 | .. currentmodule:: montreal_forced_aligner.g2p.phonetisaurus_trainer 37 | 38 | .. autosummary:: 39 | :toctree: generated/ 40 | 41 | PhonetisaurusTrainerMixin 42 | -------------------------------------------------------------------------------- /docs/source/reference/helper/abc.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: montreal_forced_aligner.abc 2 | 3 | .. autosummary:: 4 | :toctree: generated/ 5 | 6 | KaldiFunction 7 | MfaModel -- Base model type for MFA 8 | MfaWorker -- Base worker class for MFA 9 | TopLevelMfaWorker -- MFA workers that have acoustic models 10 | TrainerMixin -- Trainer type interface 11 | TemporaryDirectoryMixin -- Mixin for temporary directory functionality 12 | DatabaseMixin -- Mixin for database functionality 13 | AdapterMixin -- Adaptation type interface 14 | ExporterMixin -- Abstract exporter type interface 15 | FileExporterMixin -- File exporter type interface 16 | ModelExporterMixin -- Model exporter type interface 17 | 18 | .. automodule:: montreal_forced_aligner.models 19 | 20 | .. autosummary:: 21 | :toctree: generated/ 22 | 23 | Archive 24 | -------------------------------------------------------------------------------- /docs/source/reference/helper/config.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: montreal_forced_aligner.config 2 | 3 | .. autosummary:: 4 | :toctree: generated/ 5 | 6 | MfaConfiguration 7 | MfaProfile 8 | get_temporary_directory 9 | generate_config_path 10 | generate_command_history_path 11 | load_command_history 12 | update_command_history 13 | -------------------------------------------------------------------------------- /docs/source/reference/helper/data.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: montreal_forced_aligner.data 2 | 3 | .. autosummary:: 4 | :toctree: generated/ 5 | 6 | MfaArguments 7 | TextFileType 8 | SoundFileType 9 | SoundFileInformation 10 | PhoneSetType 11 | WordData 12 | WordType 13 | PhoneType 14 | WorkflowType 15 | DatabaseImportData 16 | PronunciationProbabilityCounter 17 | CtmInterval 18 | -------------------------------------------------------------------------------- /docs/source/reference/helper/exceptions.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: montreal_forced_aligner.exceptions 2 | 3 | .. autosummary:: 4 | :toctree: generated/ 5 | 6 | MFAError 7 | SoxError 8 | G2PError 9 | ConfigError 10 | LMError 11 | LanguageModelNotFoundError 12 | ModelExtensionError 13 | ThirdpartyError 14 | TrainerError 15 | ModelError 16 | CorpusError 17 | ModelLoadError 18 | CorpusReadError 19 | ArgumentError 20 | AlignmentExportError 21 | NoSuccessfulAlignments 22 | KaldiProcessingError 23 | TextParseError 24 | TextGridParseError 25 | DictionaryError 26 | NoDefaultSpeakerDictionaryError 27 | DictionaryPathError 28 | DictionaryFileError 29 | FileArgumentNotFoundError 30 | PretrainedModelNotFoundError 31 | MultipleModelTypesFoundError 32 | ModelTypeNotSupportedError 33 | PronunciationAcousticMismatchError 34 | RootDirectoryError 35 | -------------------------------------------------------------------------------- /docs/source/reference/helper/helper.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: montreal_forced_aligner.helper 2 | 3 | .. autosummary:: 4 | :toctree: generated/ 5 | 6 | comma_join 7 | make_safe 8 | make_scp_safe 9 | load_scp 10 | load_scp_safe 11 | score_wer 12 | edit_distance 13 | output_mapping 14 | compare_labels 15 | overlap_scoring 16 | align_phones 17 | -------------------------------------------------------------------------------- /docs/source/reference/helper/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _helper_api: 3 | 4 | Helper 5 | ====== 6 | 7 | .. toctree:: 8 | 9 | abc 10 | config 11 | data 12 | exceptions 13 | helper 14 | textgrid 15 | utils 16 | -------------------------------------------------------------------------------- /docs/source/reference/helper/textgrid.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: montreal_forced_aligner.textgrid 2 | 3 | .. autosummary:: 4 | :toctree: generated/ 5 | 6 | process_ctm_line 7 | export_textgrid 8 | construct_output_tiers 9 | construct_output_path 10 | output_textgrid_writing_errors 11 | -------------------------------------------------------------------------------- /docs/source/reference/helper/utils.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: montreal_forced_aligner.utils 2 | 3 | .. autosummary:: 4 | :toctree: generated/ 5 | 6 | Counter 7 | run_kaldi_function 8 | thirdparty_binary 9 | log_kaldi_errors 10 | parse_logs 11 | -------------------------------------------------------------------------------- /docs/source/reference/ivector/helper.rst: -------------------------------------------------------------------------------- 1 | Training functionality 2 | ====================== 3 | 4 | Mixins 5 | ------ 6 | 7 | .. currentmodule:: montreal_forced_aligner.ivector.trainer 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | IvectorModelTrainingMixin 13 | 14 | Helper 15 | ------ 16 | 17 | .. currentmodule:: montreal_forced_aligner.ivector.trainer 18 | 19 | .. autosummary:: 20 | :toctree: generated/ 21 | 22 | GmmGselectFunction 23 | GmmGselectArguments 24 | GaussToPostFunction 25 | GaussToPostArguments 26 | AccGlobalStatsFunction 27 | AccGlobalStatsArguments 28 | AccIvectorStatsFunction 29 | AccIvectorStatsArguments 30 | -------------------------------------------------------------------------------- /docs/source/reference/ivector/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _ivector_api: 3 | 4 | Ivector extraction 5 | ================== 6 | 7 | .. currentmodule:: montreal_forced_aligner.models 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | IvectorExtractorModel 13 | 14 | .. toctree:: 15 | 16 | training 17 | helper 18 | -------------------------------------------------------------------------------- /docs/source/reference/ivector/training.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _training_ivector_api: 3 | 4 | Training ivector extractors 5 | =========================== 6 | 7 | .. currentmodule:: montreal_forced_aligner.ivector.trainer 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | IvectorTrainer -- Training ivector extractor models 13 | DubmTrainer -- Training block for DUBM 14 | TrainableIvectorExtractor -- Top level worker for running Ivector training pipelines 15 | -------------------------------------------------------------------------------- /docs/source/reference/language_modeling/helper.rst: -------------------------------------------------------------------------------- 1 | Helper functionality 2 | ==================== 3 | 4 | Mixins 5 | ------ 6 | 7 | .. currentmodule:: montreal_forced_aligner.language_modeling.trainer 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | LmTrainerMixin -- Mixin for language model training 13 | LmCorpusTrainerMixin -- Mixin for language model training on a corpus 14 | LmDictionaryCorpusTrainerMixin -- Mixin for language model training on a corpus with a pronunciation dictionary 15 | 16 | 17 | Helper 18 | ------ 19 | 20 | .. currentmodule:: montreal_forced_aligner.language_modeling.multiprocessing 21 | 22 | .. autosummary:: 23 | :toctree: generated/ 24 | 25 | TrainSpeakerLmFunction 26 | TrainSpeakerLmArguments 27 | -------------------------------------------------------------------------------- /docs/source/reference/language_modeling/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _language_modeling_api: 3 | 4 | Language models 5 | =============== 6 | 7 | Language models allow for transcription via Speech-to-Text when used alongside acoustic models and pronunciation dictionaries. 8 | 9 | .. currentmodule:: montreal_forced_aligner.models 10 | 11 | .. autosummary:: 12 | :toctree: generated/ 13 | 14 | LanguageModel 15 | 16 | .. toctree:: 17 | 18 | training 19 | helper 20 | -------------------------------------------------------------------------------- /docs/source/reference/language_modeling/training.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _language_model_training_api: 3 | 4 | Training language models 5 | ======================== 6 | 7 | .. currentmodule:: montreal_forced_aligner.language_modeling.trainer 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | MfaLmCorpusTrainer -- Trainer for language model on text corpora 13 | MfaLmDictionaryCorpusTrainer -- Trainer for language model on text corpora 14 | MfaLmArpaTrainer -- Trainer for MFA language model on arpa format language model 15 | -------------------------------------------------------------------------------- /docs/source/reference/segmentation/helper.rst: -------------------------------------------------------------------------------- 1 | 2 | Helper functions 3 | ================ 4 | 5 | .. currentmodule:: montreal_forced_aligner.vad.multiprocessing 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | SegmentVadFunction 11 | SegmentVadArguments 12 | SegmentVadFunction 13 | SegmentVadArguments 14 | get_initial_segmentation 15 | merge_segments 16 | segment_utterance_transcript 17 | segment_utterance_vad 18 | segment_utterance_vad_speech_brain 19 | -------------------------------------------------------------------------------- /docs/source/reference/segmentation/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _segmentation_api: 3 | 4 | Segmentation 5 | ============ 6 | 7 | Segmentation aims to break long audio files into chunks of speech. 8 | 9 | .. note:: 10 | 11 | The current implementation of segmentation uses only Voice Activity Detection (VAD) features. There's been some work towards getting a full speaker diarization set up going with :ref:`training_ivector_api` but that's largely planned for 2.1. 12 | 13 | .. toctree:: 14 | 15 | main 16 | helper 17 | -------------------------------------------------------------------------------- /docs/source/reference/segmentation/main.rst: -------------------------------------------------------------------------------- 1 | 2 | Segmenter 3 | ========= 4 | 5 | .. currentmodule:: montreal_forced_aligner.vad.segmenter 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | VadSegmenter 11 | TranscriptionSegmenter 12 | -------------------------------------------------------------------------------- /docs/source/reference/server/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _server_api: 3 | 4 | Managing MFA servers 5 | ==================== 6 | 7 | Functions 8 | --------- 9 | 10 | .. currentmodule:: montreal_forced_aligner.command_line.utils 11 | 12 | .. autosummary:: 13 | :toctree: generated/ 14 | 15 | configure_pg 16 | initialize_server 17 | check_databases 18 | start_server 19 | stop_server 20 | delete_server 21 | -------------------------------------------------------------------------------- /docs/source/reference/tokenization/helper.rst: -------------------------------------------------------------------------------- 1 | Helper functionality 2 | ==================== 3 | 4 | Helper 5 | ------ 6 | 7 | .. currentmodule:: montreal_forced_aligner.tokenization.tokenizer 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | TokenizerRewriter 13 | TokenizerArguments 14 | TokenizerFunction 15 | 16 | 17 | Helper 18 | ------ 19 | 20 | .. currentmodule:: montreal_forced_aligner.tokenization.simple 21 | 22 | .. autosummary:: 23 | :toctree: generated/ 24 | 25 | SanitizeFunction 26 | SplitWordsFunction 27 | -------------------------------------------------------------------------------- /docs/source/reference/tokenization/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _tokenization_api: 3 | 4 | Tokenizers 5 | ========== 6 | 7 | Tokenizers allow for adding spaces as word boundaries for orthographic systems that don't normally use them (i.e., Japanese, Chinese, Thai). 8 | 9 | .. currentmodule:: montreal_forced_aligner.models 10 | 11 | .. autosummary:: 12 | :toctree: generated/ 13 | 14 | TokenizerModel 15 | 16 | .. toctree:: 17 | 18 | training 19 | tokenizer 20 | helper 21 | -------------------------------------------------------------------------------- /docs/source/reference/tokenization/tokenizer.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _tokenizer_api: 3 | 4 | Corpus tokenizer 5 | ================= 6 | 7 | .. currentmodule:: montreal_forced_aligner.tokenization.tokenizer 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | CorpusTokenizer 13 | TokenizerValidator 14 | 15 | Simple tokenizer 16 | ================ 17 | 18 | .. currentmodule:: montreal_forced_aligner.tokenization.simple 19 | 20 | .. autosummary:: 21 | :toctree: generated/ 22 | 23 | SimpleTokenizer 24 | -------------------------------------------------------------------------------- /docs/source/reference/tokenization/training.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _tokenizer_model_training_api: 3 | 4 | Training tokenizer models 5 | ========================= 6 | 7 | .. currentmodule:: montreal_forced_aligner.tokenization.trainer 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | TokenizerTrainer -- Trainer for language model on text corpora 13 | -------------------------------------------------------------------------------- /docs/source/reference/top_level_index.rst: -------------------------------------------------------------------------------- 1 | Workflows 2 | ========= 3 | 4 | .. toctree:: 5 | 6 | alignment/index 7 | validation/index 8 | g2p/index 9 | transcription/index 10 | segmentation/index 11 | diarization/index 12 | tokenization/index 13 | -------------------------------------------------------------------------------- /docs/source/reference/transcription/helper.rst: -------------------------------------------------------------------------------- 1 | Helper functions 2 | ================ 3 | 4 | Mixins 5 | ------ 6 | 7 | .. currentmodule:: montreal_forced_aligner.transcription.transcriber 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | TranscriberMixin 13 | 14 | Decoding graph 15 | -------------- 16 | 17 | .. currentmodule:: montreal_forced_aligner.transcription.multiprocessing 18 | 19 | .. autosummary:: 20 | :toctree: generated/ 21 | 22 | CreateHclgFunction 23 | CreateHclgArguments 24 | 25 | 26 | Speaker-independent transcription 27 | --------------------------------- 28 | 29 | .. currentmodule:: montreal_forced_aligner.transcription.multiprocessing 30 | 31 | .. autosummary:: 32 | :toctree: generated/ 33 | 34 | DecodeFunction 35 | DecodeArguments 36 | LmRescoreFunction 37 | LmRescoreArguments 38 | CarpaLmRescoreFunction 39 | CarpaLmRescoreArguments 40 | 41 | Speaker-adapted transcription 42 | ----------------------------- 43 | 44 | .. currentmodule:: montreal_forced_aligner.transcription.multiprocessing 45 | 46 | .. autosummary:: 47 | :toctree: generated/ 48 | 49 | InitialFmllrFunction 50 | InitialFmllrArguments 51 | FmllrRescoreFunction 52 | FmllrRescoreArguments 53 | FinalFmllrFunction 54 | FinalFmllrArguments 55 | -------------------------------------------------------------------------------- /docs/source/reference/transcription/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _transcription_api: 3 | 4 | Transcription 5 | ============= 6 | 7 | MFA can use trained acoustic models (see :ref:`acoustic_model_training_api`), trained language models (see :ref:`language_model_training_api`), and pronunciation dictionaries (see :ref:`generating_dictionaries_api`) in order to generate transcripts for audio files. 8 | 9 | .. toctree:: 10 | 11 | main 12 | helper 13 | -------------------------------------------------------------------------------- /docs/source/reference/transcription/main.rst: -------------------------------------------------------------------------------- 1 | Transcriber 2 | =========== 3 | 4 | .. currentmodule:: montreal_forced_aligner.transcription 5 | 6 | .. autosummary:: 7 | :toctree: generated/ 8 | 9 | Transcriber 10 | -------------------------------------------------------------------------------- /docs/source/reference/validation/helper.rst: -------------------------------------------------------------------------------- 1 | Helper functions 2 | ================ 3 | 4 | Mixins 5 | ------ 6 | 7 | .. currentmodule:: montreal_forced_aligner.validation 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | ValidationMixin 13 | -------------------------------------------------------------------------------- /docs/source/reference/validation/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _validation_api: 3 | 4 | Validation 5 | ========== 6 | 7 | The validation utilities are used to evaluate a dataset for either training an acoustic model, or performing alignment. They will detect issues with sound files, transcription files, unalignable utterances, and can perform some simplistic evaluation of transcripts. 8 | 9 | .. toctree:: 10 | 11 | main 12 | helper 13 | -------------------------------------------------------------------------------- /docs/source/reference/validation/main.rst: -------------------------------------------------------------------------------- 1 | Validators 2 | ========== 3 | 4 | .. currentmodule:: montreal_forced_aligner.validation 5 | 6 | .. autosummary:: 7 | :toctree: generated/ 8 | 9 | TrainingValidator 10 | PretrainedValidator 11 | DictionaryValidator 12 | -------------------------------------------------------------------------------- /docs/source/user_guide/concepts/features.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | (acoustic_features)= 5 | # Acoustic features 6 | ================= 7 | 8 | ```{warning} 9 | 10 | Still under construction, I hope to fill these sections out as I have time. 11 | ``` 12 | 13 | 14 | (features_mfcc)= 15 | ## Mel-Frequency Cepstrum Coefficients (MFCCs) 16 | 17 | ```{seealso} 18 | * https://kaldi-asr.org/doc/feat.html#feat_mfcc 19 | ``` 20 | 21 | 22 | (features_pitch)= 23 | ## Pitch 24 | -------------------------------------------------------------------------------- /docs/source/user_guide/concepts/fst.md: -------------------------------------------------------------------------------- 1 | 2 | (fst)= 3 | # Finite State Transducers 4 | 5 | ```{warning} 6 | 7 | Still under construction, I hope to fill these sections out as I have time. 8 | ``` 9 | 10 | ```{seealso} 11 | 12 | * [OpenFst Quick Tour](https://www.openfst.org/twiki/bin/view/FST/FstQuickTour) 13 | ``` 14 | 15 | (acceptor)= 16 | ## Acceptors 17 | 18 | (wfst)= 19 | 20 | ## Weighted Finite State Transducers 21 | 22 | 23 | (lexicon_fst)= 24 | # Lexicon FSTs 25 | 26 | MFA compiles input pronunciation dictionaries to a Weighted Finite State Transducer ({term}`WFST`), with phones as input symbols and words as output symbols. During alignment, the {term}`lexicon FST` is composed with a linear acceptor created from the 27 | 28 | 29 | (grammar_fst)= 30 | 31 | # Grammar FSTs 32 | 33 | 34 | (g2p_fst)= 35 | # G2P FSTs 36 | 37 | ```{seealso} 38 | 39 | * [Pynini documentation](https://www.openfst.org/twiki/bin/view/GRM/Pynini) 40 | * [Phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus) 41 | ``` 42 | -------------------------------------------------------------------------------- /docs/source/user_guide/concepts/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _concepts: 3 | 4 | *************** 5 | Concepts in MFA 6 | *************** 7 | 8 | This section will attempt to provide a blend of technical and non-technical overviews of various components and concepts used in MFA. There are much more in-depth resources for learning about various components that will be linked if you are interested in learning more about them. 9 | 10 | .. warning:: 11 | 12 | Still under construction, I hope to fill these sections out as I have time. 13 | 14 | .. toctree:: 15 | :hidden: 16 | 17 | features 18 | speaker_adaptation 19 | fst 20 | hmm 21 | -------------------------------------------------------------------------------- /docs/source/user_guide/configuration/acoustic_model_adapt.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _configuration_adapting: 3 | 4 | Acoustic model adaptation options 5 | ================================= 6 | 7 | For the Kaldi recipe that monophone training is based on, see :kaldi_steps:`train_map`. 8 | 9 | 10 | .. csv-table:: 11 | :widths: 20, 20, 60 12 | :header: "Parameter", "Default value", "Notes" 13 | 14 | "mapping_tau", 20, "Smoothing constant used in MAP estimation, corresponds to the number of 'fake counts' that we add for the old model. Larger tau corresponds to less aggressive re-estimation, and more smoothing. You might also want to try 10 or 15." 15 | -------------------------------------------------------------------------------- /docs/source/user_guide/configuration/diarization.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _configuration_diarization: 3 | 4 | Diarization options 5 | =================== 6 | 7 | .. csv-table:: 8 | :widths: 20, 20, 60 9 | :header: "Parameter", "Default value", "Notes" 10 | :stub-columns: 1 11 | 12 | "cluster_type", ``optics``, "Clustering algorithm in :xref:`scikit-learn` to use, one of ``optics``, ``dbscan``, ``affinity``, ``agglomerative``, ``spectral, ``kmeans``" 13 | "expected_num_speakers", 0, "Number of speaker clusters to find, must be > 1 for ``agglomerative``, ``spectral``, and ``kmeans``" 14 | "sparse_threshold", 0.5, "Threshold on distance to limit precomputed sparse matrix" 15 | 16 | .. _default_diarization_config: 17 | 18 | Default diarization config file 19 | ------------------------------- 20 | 21 | .. code-block:: yaml 22 | 23 | cluster_type: optics 24 | energy_mean_scale: 0.5 25 | max_segment_length: 30 26 | min_pause_duration: 0.05 27 | -------------------------------------------------------------------------------- /docs/source/user_guide/configuration/lm.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _configuration_language_modeling: 3 | 4 | ******************************* 5 | Language model training options 6 | ******************************* 7 | 8 | See also the :ref:`configuration_dictionary` for the options that control how text is normalized and parsed. 9 | 10 | 11 | .. csv-table:: 12 | :widths: 20, 20, 60 13 | :header: "Parameter", "Default value", "Notes" 14 | 15 | "order", 3, "Order of language model" 16 | "method", kneser_ney, "Method for smoothing" 17 | "prune_thresh_small", 0.0000003, "Threshold for pruning a small model, only used if ``prune`` is true" 18 | "prune_thresh_medium", 0.0000001, "Threshold for pruning a medium model, only used if ``prune`` is true" 19 | 20 | Default language model config 21 | ----------------------------- 22 | 23 | .. code-block:: yaml 24 | 25 | order: 3 26 | method: kneser_ney 27 | prune_thresh_small: 0.0000003 28 | prune_thresh_medium: 0.0000001 29 | -------------------------------------------------------------------------------- /docs/source/user_guide/configuration/segment.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _configuration_segmentation: 3 | 4 | ******************** 5 | Segmentation options 6 | ******************** 7 | 8 | 9 | .. csv-table:: 10 | :widths: 20, 20, 60 11 | :header: "Parameter", "Default value", "Notes" 12 | 13 | "energy_threshold", 5.5, "Energy threshold above which a frame will be counted as voiced" 14 | "energy_mean_scale", 0.5, "Proportion of the mean energy of the file that should be added to the energy_threshold" 15 | "max_segment_length", 30, "Maximum length of segments before they do not get merged" 16 | "min_pause_duration", 0.05, "Minimum unvoiced duration to split speech segments" 17 | 18 | .. _default_segment_config: 19 | 20 | Default segmentation config file 21 | -------------------------------- 22 | 23 | .. code-block:: yaml 24 | 25 | energy_threshold: 5.5 26 | energy_mean_scale: 0.5 27 | max_segment_length: 30 28 | min_pause_duration: 0.05 29 | -------------------------------------------------------------------------------- /docs/source/user_guide/configuration/transcription.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _transcribe_config: 3 | 4 | ********************* 5 | Transcription options 6 | ********************* 7 | 8 | .. csv-table:: 9 | :widths: 20, 20, 60 10 | :header: "Parameter", "Default value", "Notes" 11 | 12 | "beam", 13, "Beam for decoding" 13 | "max_active", 7000, "Max active for decoding" 14 | "lattice_beam", 6, "Beam width for decoding lattices" 15 | "acoustic_scale", 0.083333, "Multiplier to scale acoustic costs" 16 | "silence_weight", 0.01, "Weight on silence in fMLLR estimation" 17 | "uses_speaker_adaptation", true, "Flag for whether to perform speaker adaptation" 18 | "first_beam", 10.0, "Beam for decoding in initial speaker-independent pass, only used if ``uses_speaker_adaptation`` is true" 19 | "first_max_active", 2000, "Max active for decoding in initial speaker-independent pass, only used if ``uses_speaker_adaptation`` is true" 20 | "fmllr_update_type", "full", "Type of fMLLR estimation" 21 | 22 | Default transcriber config 23 | -------------------------- 24 | 25 | .. code-block:: yaml 26 | 27 | beam: 13 28 | max_active: 7000 29 | lattice_beam: 6 30 | acoustic_scale: 0.083333 31 | silence_weight: 0.01 32 | fmllr: true 33 | first_beam: 10.0 # Beam used in initial, speaker-indep. pass 34 | first_max_active: 2000 # max-active used in initial pass. 35 | fmllr_update_type: full 36 | -------------------------------------------------------------------------------- /docs/source/user_guide/corpus_creation/anchor.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _`Anchor Annotator documentation`: https://anchor-annotator.readthedocs.io/en/latest/ 3 | 4 | .. _anchor: 5 | 6 | Anchor annotator ``(mfa anchor)`` 7 | ================================= 8 | 9 | The Anchor Annotator is a GUI utility for MFA that allows for users to modify transcripts and add/change entries in the pronunciation dictionary to interactively fix out of vocabulary issues. 10 | 11 | .. attention:: 12 | 13 | Anchor is under development and is currently pre-alpha. Use at your own risk and please use version control or back up any critical data. 14 | 15 | 16 | To use the annotator, first install the anchor subpackage: 17 | 18 | .. code-block:: 19 | 20 | conda install montreal-forced-aligner[anchor] 21 | 22 | This will install MFA if hasn't been along with all the packages that Anchor requires. Once installed, Anchor can be started with the following MFA subcommand `mfa anchor`. 23 | 24 | See the `Anchor Annotator documentation`_ for more information. 25 | 26 | Command reference 27 | ================= 28 | 29 | 30 | .. click:: montreal_forced_aligner.command_line.anchor:anchor_cli 31 | :prog: mfa anchor 32 | :nested: full 33 | -------------------------------------------------------------------------------- /docs/source/user_guide/corpus_creation/create_segments.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _create_segments: 3 | 4 | Segment transcribed files ``(mfa segment)`` 5 | =========================================== 6 | 7 | The Montreal Forced Aligner can use Voice Activity Detection (VAD) capabilities from :xref:`speechbrain` to generate segments from 8 | a longer sound file, while attempting to segment transcripts as well. If you do not have transcripts, see :ref:`create_segments_vad`. 9 | 10 | .. note:: 11 | 12 | On Windows, if you get an ``OSError/WinError 1314`` during the run, follow `these instructions `_ to enable symbolic link creation permissions. 13 | 14 | Command reference 15 | ----------------- 16 | 17 | .. click:: montreal_forced_aligner.command_line.create_segments:create_segments_cli 18 | :prog: mfa segment 19 | :nested: full 20 | 21 | 22 | Configuration reference 23 | ----------------------- 24 | 25 | - :ref:`configuration_segmentation` 26 | 27 | API reference 28 | ------------- 29 | 30 | - :ref:`segmentation_api` 31 | 32 | .. _create_segments_vad: 33 | 34 | Segment untranscribed files ``(mfa segment_vad)`` 35 | ================================================= 36 | 37 | The Montreal Forced Aligner can use Voice Activity Detection (VAD) capabilities from :xref:`speechbrain` or energy based VAD to generate segments from 38 | a longer sound file. This command does not split transcripts, instead assigning a default label of "speech" to all identified speech segments. If you would like to preserve transcripts for each segment, see :ref:`create_segments`. 39 | 40 | .. note:: 41 | 42 | On Windows, if you get an ``OSError/WinError 1314`` during the run, follow `these instructions `_ to enable symbolic link creation permissions. 43 | 44 | Command reference 45 | ----------------- 46 | 47 | .. click:: montreal_forced_aligner.command_line.create_segments:create_segments_vad_cli 48 | :prog: mfa segment_vad 49 | :nested: full 50 | 51 | 52 | Configuration reference 53 | ----------------------- 54 | 55 | - :ref:`configuration_segmentation` 56 | 57 | API reference 58 | ------------- 59 | 60 | - :ref:`segmentation_api` 61 | -------------------------------------------------------------------------------- /docs/source/user_guide/corpus_creation/index.rst: -------------------------------------------------------------------------------- 1 | .. _corpus_creation: 2 | 3 | ************************* 4 | Corpus creation utilities 5 | ************************* 6 | 7 | MFA now contains several command line utilities for helping to create corpora from scratch. The main workflow is as follows: 8 | 9 | 1. If the corpus made up of long sound file that need segmenting, :ref:`segment the audio files using VAD ` 10 | 2. If the corpus does not contain transcriptions, :ref:`transcribe utterances using existing acoustic models, 11 | language models, and dictionaries ` 12 | 3. Use the :ref:`Anchor annotator tool ` to manually correct error in transcription 13 | 4. As necessary, bootstrap better transcriptions: 14 | 15 | 1. :ref:`Train language model ` with updated transcriptions 16 | 2. :ref:`Add pronunciation and silence probabilities to the dictionary ` 17 | 18 | .. toctree:: 19 | :hidden: 20 | 21 | create_segments 22 | train_ivector 23 | diarize_speakers 24 | transcribing 25 | training_lm 26 | training_dictionary 27 | tokenize 28 | train_tokenizer 29 | anchor 30 | -------------------------------------------------------------------------------- /docs/source/user_guide/corpus_creation/tokenize.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _tokenize_cli: 3 | 4 | Tokenize utterances ``(mfa tokenize)`` 5 | ========================================= 6 | 7 | Use a model trained from :ref:`train_tokenizer_cli` to tokenize a corpus (i.e. insert spaces as word boundaries for orthographic systems that do not require them). 8 | 9 | Command reference 10 | ----------------- 11 | 12 | .. click:: montreal_forced_aligner.command_line.tokenize:tokenize_cli 13 | :prog: mfa tokenize 14 | :nested: full 15 | 16 | 17 | API reference 18 | ------------- 19 | 20 | - :ref:`tokenization_api` 21 | -------------------------------------------------------------------------------- /docs/source/user_guide/corpus_creation/train_ivector.rst: -------------------------------------------------------------------------------- 1 | .. _train_ivector: 2 | 3 | Train an ivector extractor ``(mfa train_ivector)`` 4 | ================================================== 5 | 6 | The Montreal Forced Aligner can train :term:`ivector extractors` using an acoustic model for generating alignments. As part of this training process, a classifier is built in that can be used as part of :ref:`diarize_speakers`. 7 | 8 | 9 | Command reference 10 | ----------------- 11 | 12 | .. click:: montreal_forced_aligner.command_line.train_ivector_extractor:train_ivector_cli 13 | :prog: mfa train_ivector 14 | :nested: full 15 | 16 | Configuration reference 17 | ----------------------- 18 | 19 | - :ref:`configuration_ivector` 20 | 21 | API reference 22 | ------------- 23 | 24 | - :ref:`ivector_api` 25 | -------------------------------------------------------------------------------- /docs/source/user_guide/corpus_creation/train_tokenizer.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _train_tokenizer_cli: 3 | 4 | Train a word tokenizer ``(mfa train_tokenizer)`` 5 | ================================================ 6 | 7 | Training a tokenizer uses a simplified sequence-to-sequence model like G2P, but with the following differences: 8 | 9 | * Both the input and output symbols are graphemes 10 | * Symbols can only output themselves 11 | * Only allow for inserting space characters 12 | 13 | Command reference 14 | ----------------- 15 | 16 | .. click:: montreal_forced_aligner.command_line.train_tokenizer:train_tokenizer_cli 17 | :prog: mfa train_tokenizer 18 | :nested: full 19 | 20 | 21 | API reference 22 | ------------- 23 | 24 | - :ref:`tokenization_api` 25 | -------------------------------------------------------------------------------- /docs/source/user_guide/corpus_creation/training_lm.rst: -------------------------------------------------------------------------------- 1 | .. _training_lm: 2 | 3 | Train a new language model ``(mfa train_lm)`` 4 | ============================================== 5 | 6 | MFA has a utility function for training ARPA-format ngram :term:`language models`, as well as merging with a pre-existing model. 7 | 8 | 9 | .. note:: 10 | 11 | As of version 2.0.6, users on Windows can run this command natively without requiring :xref:`wsl`, see :ref:`installation` for more details. 12 | 13 | Command reference 14 | ----------------- 15 | 16 | .. click:: montreal_forced_aligner.command_line.train_lm:train_lm_cli 17 | :prog: mfa train_lm 18 | :nested: full 19 | 20 | Configuration reference 21 | ----------------------- 22 | 23 | - :ref:`configuration_language_modeling` 24 | 25 | API reference 26 | ------------- 27 | 28 | - :ref:`language_modeling_api` 29 | -------------------------------------------------------------------------------- /docs/source/user_guide/corpus_creation/transcribing.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _transcribing: 3 | 4 | Transcribe audio files ``(mfa transcribe)`` 5 | =========================================== 6 | 7 | MFA has some limited ability to use its acoustic and language models for performing transcription. The intent of this functionality is largely to aid in offline corpus construction, and not as an online capability like most ASR systems. 8 | 9 | .. seealso:: 10 | 11 | See :ref:`train_acoustic_model` and :ref:`training_lm` details on training MFA models to use in transcription. 12 | 13 | Unlike alignment, transcription does not require transcribed audio files (except when running in :ref:`transcription_evaluation`, but instead will use the combination of acoustic model, language model, and pronunciation dictionary to create a decoding lattice and find the best path through it. When training a language model for transcription, it is recommended to train one on text/speech transcripts that are in the same domain to minimize errors. 14 | 15 | .. warning:: 16 | 17 | The technology that MFA uses is several years out of date, and as such if you have other options available such as :xref:`coqui` or other production systems for :abbr:`STT (Speech to Text)`, we recommend using those. The transcription capabilities are more here for completeness. 18 | 19 | .. _transcription_evaluation: 20 | 21 | Evaluation mode 22 | --------------- 23 | 24 | Transcriptions can be compared to a gold-standard references by transcribing a corpus in the same format as for alignment (i.e., each sound file has a corresponding TextGrid or lab file). Transcript will proceed as above, and then the resulting transcripts will be aligned with the gold transcriptions using the :mod:`Bio.pairwise2` alignment algorithm. From the aligned transcripts, Word Error Rate and Character Error Rate will be calculated for each utterance as follows: 25 | 26 | .. math:: 27 | 28 | Error \: rate = \frac{insertions + deletions + (2 * substitutions)} {length_{ref}} 29 | 30 | 31 | Command reference 32 | ----------------- 33 | 34 | .. click:: montreal_forced_aligner.command_line.transcribe:transcribe_corpus_cli 35 | :prog: mfa transcribe 36 | :nested: full 37 | 38 | Configuration reference 39 | ----------------------- 40 | 41 | - :ref:`transcribe_config` 42 | 43 | API reference 44 | ------------- 45 | 46 | - :ref:`transcription_api` 47 | -------------------------------------------------------------------------------- /docs/source/user_guide/data_validation.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _validating_data: 3 | 4 | *************** 5 | Validating data 6 | *************** 7 | 8 | The validation utility will perform the basic set up that alignment would perform, but analyzes and reports any issues 9 | that the user may want to fix. 10 | 11 | First, the utility parses the corpus and dictionary, prints out summary information about the corpus, 12 | and logs any of the following issues: 13 | 14 | - If there are any words in transcriptions that are not in the dictionary, these are logged as out-of-vocabulary items (OOVs). 15 | A list of these OOVs and which utterances they appear in are saved to text files. 16 | - Any issues reading sound files 17 | - Any issues generating features, skipped if ``--ignore_acoustics`` is flagged 18 | - Mismatches in sound files and transcriptions 19 | - Any issues reading transcription files 20 | - Any unaligned files from trial alignment run, skipped if ``--ignore_acoustics`` is flagged 21 | - If no acoustic model is specified, a monophone model is trained for testing alignment 22 | 23 | - Any files that have deviations from their original transcription to decoded transcriptions using a simple language model when ``--test_transcriptions`` is supplied 24 | - Ngram language models for each speaker are generated and merged with models for each utterance for use in decoding utterances, which may help you find transcription or data inconsistency issues in the corpus 25 | 26 | .. _phone_confidence: 27 | 28 | Phone confidence 29 | ================ 30 | 31 | The phone confidence functionality of the validation utility is similar to :ref:`phone_models` in that both are trying to represent the "goodness" of the phone label for the given interval. Where phone models use the acoustic model in combination with a phone language model, phone confidence simply calculates the likelihoods of each phone for each frame 32 | 33 | .. _running_the_validator: 34 | 35 | Running the corpus validation utility 36 | ===================================== 37 | 38 | 39 | Command reference 40 | ----------------- 41 | 42 | .. click:: montreal_forced_aligner.command_line.validate:validate_corpus_cli 43 | :prog: mfa validate 44 | :nested: full 45 | -------------------------------------------------------------------------------- /docs/source/user_guide/dictionary_validation.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _validating_dictionaries: 3 | 4 | ************************************* 5 | Validating pronunciation dictionaries 6 | ************************************* 7 | 8 | 9 | .. _running_the_dictionary_validator: 10 | 11 | Running the dictionary validation utility 12 | ========================================= 13 | 14 | 15 | Command reference 16 | ----------------- 17 | 18 | .. click:: montreal_forced_aligner.command_line.validate:validate_dictionary_cli 19 | :prog: mfa validate_dictionary 20 | :nested: full 21 | -------------------------------------------------------------------------------- /docs/source/user_guide/implementations/alignment_evaluation.md: -------------------------------------------------------------------------------- 1 | 2 | (alignment_evaluation)= 3 | # Evaluating alignments 4 | 5 | Alignments can be compared to a gold-standard reference set by specifying the `--reference_directory` below. MFA will load all TextGrids and parse them as if they were exported by MFA (i.e., phone and speaker tiers per speaker). The phone intervals will be aligned using the {mod}`Bio.pairwise2` alignment algorithm. If the reference TextGrids use a different phone set, then a custom mapping yaml file can be specified via the `--custom_mapping_path`. As an example, the Buckeye reference alignments used in [Update on Montreal Forced Aligner performance](https://memcauliffe.com/update-on-montreal-forced-aligner-performance.html) use its own ARPA-based phone set that removes stress integers, is lower case, and has syllabic sonorants. To map alignments generated with the `english` model and dictionary that use standard ARPA, a yaml file like the following allows for a better alignment of reference phones to aligned phones. 6 | 7 | :::yaml 8 | N: [en, n] 9 | M: [em, m] 10 | L: [el, l] 11 | AA0: aa 12 | AE0: ae 13 | AH0: ah 14 | AO0: ao 15 | AW0: aw 16 | ::: 17 | 18 | Using the above file, both {ipa_inline}`en` and {ipa_inline}`n` phones in the Buckeye corpus will not be penalized when matched with {ipa_inline}`N` phones output by MFA. 19 | 20 | In addition to any custom mapping, phone boundaries are used in the cost function for the {mod}`Bio.pairwise2` alignment algorithm as follows: 21 | 22 | :::{math} 23 | Overlap \: cost = -1 * \biggl(\lvert begin_{aligned} - begin_{ref} \rvert + \lvert end_{aligned} - end_{ref} \rvert + \begin{cases} 24 | 0, & label_{1} = label_{2} \\ 25 | 2, & otherwise 26 | \end{cases}\biggr) 27 | ::: 28 | 29 | The two metrics calculated for each utterance are overlap score and phone error rate. Overlap score is calculated similarly to the above cost function for each phone (excluding phones that are aligned to silence or were inserted/deleted) and averaged over the utterance: 30 | 31 | :::{math} 32 | Alignment \: score = \frac{Overlap \: cost}{2} 33 | ::: 34 | 35 | Phone error rate is calculated as: 36 | 37 | :::{math} 38 | Phone \: error \: rate = \frac{insertions + deletions + (2 * substitutions)} {length_{ref}} 39 | ::: 40 | -------------------------------------------------------------------------------- /docs/source/user_guide/implementations/fine_tune.md: -------------------------------------------------------------------------------- 1 | 2 | (fine_tune_alignments)= 3 | 4 | # Fine-tuning alignments 5 | 6 | By default and standard in ASR, the frame step between feature frames is set to 10 ms, which limits the accuracy of MFA to a minimum of 0.01 seconds. When the `--fine_tune` flag is specified, the aligner does an extra fine-tuning step following alignment. The audio surrounding each interval's initial boundary is extracted with a frame step of 1 ms (0.001s) and is aligned using a simple phone dictionary combined with a transcript of the previous phone and the current phone. Extracting the phone alignment gives the possibility of higher degrees of accuracy (down to 1ms). 7 | 8 | :::{warning} 9 | 10 | The actual accuracy bound is not clear as each frame uses the surrounding 25ms to generate features, so each frame necessary incorporates time-smeared acoustic information. 11 | ::: 12 | -------------------------------------------------------------------------------- /docs/source/user_guide/implementations/index.md: -------------------------------------------------------------------------------- 1 | 2 | # In depth guides 3 | 4 | :::{warning} 5 | This section is under construction! 6 | ::: 7 | 8 | ```{toctree} 9 | :hidden: 10 | 11 | phone_groups 12 | phonological_rules 13 | lexicon_probabilities 14 | alignment_analysis 15 | alignment_evaluation 16 | fine_tune 17 | phone_models 18 | ``` 19 | -------------------------------------------------------------------------------- /docs/source/user_guide/implementations/phone_models.md: -------------------------------------------------------------------------------- 1 | 2 | (phone_models)= 3 | # Phone model alignments 4 | 5 | With the `--use_phone_model` flag, an ngram language model for phones will be constructed and used to generate phone transcripts with alignments. The phone language model uses bigrams and higher orders (up to 4), with no unigrams included to speed up transcription (and because the phonotactics of languages highly constrain the possible sequences of phones). The phone language model is trained on phone transcriptions extracted from alignments and includes silence and OOV phones. 6 | 7 | The phone transcription additionally uses speaker-adaptation transforms from the regular alignment as well to speed up transcription. From the phone transcription lattices, we extract phone-level alignments along with confidence score using {kaldi_src}`lattice-to-ctm-conf`. 8 | 9 | The alignments extracted from phone transcriptions are compared to the baseline alignments using the procedure outlined in {ref}`alignment_evaluation` above. 10 | -------------------------------------------------------------------------------- /docs/source/user_guide/models/index.rst: -------------------------------------------------------------------------------- 1 | .. _pretrained_models: 2 | 3 | ***************** 4 | Pretrained models 5 | ***************** 6 | 7 | The command for interacting with MFA models is :code:`mfa model`. The subcommands allow for inspecting currently saved pretrained models, downloading ones from MFA's model repo, and saving models you have trained to be used with a simple name rather than the full path each time. 8 | 9 | Following installation of MFA, :code:`mfa model list acoustic` will not list any models. If you want to download the default English model trained on LibriSpeech, you can run :code:`mfa model download acoustic english_us_arpa`. At which point, the previous ``list`` command will output "english_us_arpa" as an option. When referring to an acoustic model in another MFA command, rather than the full path to the acoustic model, you can now supply just ``english_us_arpa`` and MFA will resolve it to the saved path. 10 | 11 | Similarly, if you train a new model, you can run :code:`mfa model save acoustic /path/where/the/model/was/saved.zip`, then this model will be available via ``saved`` in the future. The name defaults to whatever the archive is called without the directory or extension. You can modify this name with the ``--name NEWNAME`` option 12 | 13 | There are a number of pretrained models for aligning and generating pronunciation dictionaries. The command 14 | for downloading these is :code:`mfa model download ` where ``model_type`` is one of ``acoustic``, ``g2p``, or 15 | ``dictionary``. 16 | 17 | .. note:: 18 | 19 | Please see the :xref:`mfa_models` site for information and statistics about various models. 20 | 21 | 22 | Command reference 23 | ----------------- 24 | 25 | .. click:: montreal_forced_aligner.command_line.model:model_cli 26 | :prog: mfa model 27 | :nested: full 28 | -------------------------------------------------------------------------------- /docs/source/user_guide/workflows/adapt_acoustic_model.rst: -------------------------------------------------------------------------------- 1 | .. _adapt_acoustic_model: 2 | 3 | Adapt acoustic model to new data ``(mfa adapt)`` 4 | ================================================ 5 | 6 | A recent 2.0 functionality for MFA is to adapt pretrained :term:`acoustic models` to a new dataset. MFA will first align the dataset using the pretrained model, and then update the acoustic model's GMM means with those generated by the data. See :kaldi_steps:`train_map` for the Kaldi script this functionality corresponds to. As part of the adaptation process, MFA can generate final alignments and export these files if an output directory is specified in the command. 7 | 8 | 9 | Command reference 10 | ----------------- 11 | 12 | .. click:: montreal_forced_aligner.command_line.adapt:adapt_model_cli 13 | :prog: mfa adapt 14 | :nested: full 15 | 16 | Configuration reference 17 | ----------------------- 18 | 19 | - :ref:`configuration_global` 20 | - :ref:`configuration_adapting` 21 | 22 | API reference 23 | ------------- 24 | 25 | - :class:`~montreal_forced_aligner.alignment.AdaptingAligner` 26 | -------------------------------------------------------------------------------- /docs/source/user_guide/workflows/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _workflows_index: 3 | 4 | Workflows available 5 | =================== 6 | 7 | The primary workflow in MFA is forced alignment, where text is aligned to speech along with phones derived from a pronunciation dictionary and an acoustic model. There are, however, other workflows for transcribing speech using speech-to-text functionality in Kaldi, pronunciation dictionary creation using Pynini, and some basic corpus creation utilities like VAD-based segmentation. Additionally, acoustic models, G2P models, and language models can be trained from your own data (and then used in alignment and other workflows). 8 | 9 | .. warning:: 10 | 11 | Speech-to-text functionality is pretty basic, and the model architecture used in MFA is older GMM-HMM and NGram models, so using something like :xref:`coqui` or Kaldi's ``nnet`` functionality will likely yield better quality transcriptions. 12 | 13 | .. hint:: 14 | 15 | See :ref:`pretrained_models` for details about commands to inspect, download, and save various pretrained MFA models. 16 | 17 | .. toctree:: 18 | :hidden: 19 | 20 | alignment 21 | adapt_acoustic_model 22 | train_acoustic_model 23 | dictionary_generating 24 | g2p_train 25 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - pytorch 4 | - nvidia 5 | - anaconda 6 | dependencies: 7 | - python>=3.8 8 | - numpy 9 | - librosa 10 | - pysoundfile 11 | - tqdm 12 | - requests 13 | - pyyaml 14 | - dataclassy 15 | - kaldi=*=*cpu* 16 | - scipy 17 | - pynini 18 | - openfst=1.8.3 19 | - scikit-learn 20 | - hdbscan 21 | - baumwelch 22 | - ngram 23 | - praatio=6.0.0 24 | - biopython 25 | - sqlalchemy>=2.0 26 | - pgvector 27 | - pgvector-python 28 | - sqlite 29 | - postgresql 30 | - psycopg2 31 | - click 32 | - setuptools_scm 33 | - pytest 34 | - pytest-mypy 35 | - pytest-cov 36 | - pytest-timeout 37 | - mock 38 | - coverage 39 | - coveralls 40 | - interrogate 41 | - kneed 42 | - matplotlib 43 | - seaborn 44 | - pip 45 | - rich 46 | - rich-click 47 | - kalpy 48 | # Tokenization dependencies 49 | - spacy 50 | - sudachipy 51 | - sudachidict-core 52 | - spacy-pkuseg 53 | - pip: 54 | - build 55 | - twine 56 | # Tokenization dependencies 57 | - python-mecab-ko 58 | - jamo 59 | - pythainlp 60 | - hanziconv 61 | - dragonmapper 62 | -------------------------------------------------------------------------------- /github_environment.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python>=3.8 5 | - numpy 6 | - librosa 7 | - pysoundfile 8 | - tqdm 9 | - requests 10 | - pyyaml 11 | - dataclassy 12 | - kaldi=*=*cpu* 13 | - scipy 14 | - pynini 15 | - openfst=1.8.3 16 | - scikit-learn 17 | - hdbscan 18 | - baumwelch 19 | - ngram 20 | - praatio=6.0.0 21 | - biopython 22 | - sqlalchemy>=2.0 23 | - pgvector 24 | - pgvector-python 25 | - sqlite 26 | - postgresql 27 | - psycopg2 28 | - click 29 | - setuptools_scm 30 | - pytest 31 | - pytest-mypy 32 | - pytest-cov 33 | - pytest-timeout 34 | - mock 35 | - coverage 36 | - coveralls 37 | - interrogate 38 | - kneed 39 | - matplotlib 40 | - seaborn 41 | - rich 42 | - rich-click 43 | - kalpy 44 | -------------------------------------------------------------------------------- /montreal_forced_aligner/__init__.py: -------------------------------------------------------------------------------- 1 | """Montreal Forced Aligner is a package for aligning speech corpora through the use of acoustic models and 2 | dictionaries using Kaldi functionality.""" 3 | 4 | import montreal_forced_aligner.acoustic_modeling as acoustic_modeling 5 | import montreal_forced_aligner.alignment as alignment 6 | import montreal_forced_aligner.command_line as command_line 7 | import montreal_forced_aligner.corpus as corpus 8 | import montreal_forced_aligner.dictionary as dictionary 9 | import montreal_forced_aligner.exceptions as exceptions 10 | import montreal_forced_aligner.g2p as g2p 11 | import montreal_forced_aligner.helper as helper 12 | import montreal_forced_aligner.ivector as ivector 13 | import montreal_forced_aligner.language_modeling as language_modeling 14 | import montreal_forced_aligner.models as models 15 | import montreal_forced_aligner.textgrid as textgrid 16 | import montreal_forced_aligner.transcription as transcription 17 | import montreal_forced_aligner.utils as utils 18 | 19 | __all__ = [ 20 | "abc", 21 | "data", 22 | "acoustic_modeling", 23 | "alignment", 24 | "command_line", 25 | "config", 26 | "corpus", 27 | "dictionary", 28 | "exceptions", 29 | "g2p", 30 | "ivector", 31 | "language_modeling", 32 | "helper", 33 | "models", 34 | "transcription", 35 | "textgrid", 36 | "utils", 37 | ] 38 | -------------------------------------------------------------------------------- /montreal_forced_aligner/__main__.py: -------------------------------------------------------------------------------- 1 | from rich.traceback import install 2 | 3 | from montreal_forced_aligner.command_line.mfa import mfa_cli 4 | 5 | install(show_locals=True) 6 | mfa_cli() 7 | -------------------------------------------------------------------------------- /montreal_forced_aligner/acoustic_modeling/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Training acoustic models 3 | ======================== 4 | 5 | 6 | """ 7 | from montreal_forced_aligner.acoustic_modeling.base import AcousticModelTrainingMixin # noqa 8 | from montreal_forced_aligner.acoustic_modeling.lda import LdaTrainer # noqa 9 | from montreal_forced_aligner.acoustic_modeling.monophone import MonophoneTrainer # noqa 10 | from montreal_forced_aligner.acoustic_modeling.pronunciation_probabilities import ( # noqa 11 | PronunciationProbabilityTrainer, 12 | ) 13 | from montreal_forced_aligner.acoustic_modeling.sat import SatTrainer # noqa 14 | from montreal_forced_aligner.acoustic_modeling.trainer import TrainableAligner # noqa 15 | from montreal_forced_aligner.acoustic_modeling.triphone import TriphoneTrainer # noqa 16 | 17 | __all__ = [ 18 | "AcousticModelTrainingMixin", 19 | "LdaTrainer", 20 | "MonophoneTrainer", 21 | "SatTrainer", 22 | "TriphoneTrainer", 23 | "PronunciationProbabilityTrainer", 24 | "TrainableAligner", 25 | "base", 26 | "lda", 27 | "monophone", 28 | "sat", 29 | "triphone", 30 | "pronunciation_probabilities", 31 | "trainer", 32 | ] 33 | -------------------------------------------------------------------------------- /montreal_forced_aligner/alignment/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Aligners 3 | ======== 4 | 5 | """ 6 | from montreal_forced_aligner.alignment.adapting import AdaptingAligner 7 | from montreal_forced_aligner.alignment.base import CorpusAligner 8 | from montreal_forced_aligner.alignment.mixins import AlignMixin 9 | from montreal_forced_aligner.alignment.pretrained import DictionaryTrainer, PretrainedAligner 10 | 11 | __all__ = [ 12 | "AdaptingAligner", 13 | "PretrainedAligner", 14 | "CorpusAligner", 15 | "DictionaryTrainer", 16 | "adapting", 17 | "base", 18 | "pretrained", 19 | "mixins", 20 | "AlignMixin", 21 | "multiprocessing", 22 | ] 23 | -------------------------------------------------------------------------------- /montreal_forced_aligner/command_line/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Command line functionality 3 | ========================== 4 | 5 | """ 6 | 7 | from montreal_forced_aligner.command_line.adapt import adapt_model_cli 8 | from montreal_forced_aligner.command_line.align import align_corpus_cli 9 | from montreal_forced_aligner.command_line.anchor import anchor_cli 10 | from montreal_forced_aligner.command_line.configure import configure_cli 11 | from montreal_forced_aligner.command_line.create_segments import create_segments_cli 12 | from montreal_forced_aligner.command_line.diarize_speakers import diarize_speakers_cli 13 | from montreal_forced_aligner.command_line.g2p import g2p_cli 14 | from montreal_forced_aligner.command_line.history import history_cli 15 | from montreal_forced_aligner.command_line.mfa import mfa_cli 16 | from montreal_forced_aligner.command_line.model import model_cli 17 | from montreal_forced_aligner.command_line.train_acoustic_model import train_acoustic_model_cli 18 | from montreal_forced_aligner.command_line.train_dictionary import train_dictionary_cli 19 | from montreal_forced_aligner.command_line.train_g2p import train_g2p_cli 20 | from montreal_forced_aligner.command_line.train_ivector_extractor import train_ivector_cli 21 | from montreal_forced_aligner.command_line.train_lm import train_lm_cli 22 | from montreal_forced_aligner.command_line.transcribe import transcribe_corpus_cli 23 | from montreal_forced_aligner.command_line.validate import ( 24 | validate_corpus_cli, 25 | validate_dictionary_cli, 26 | ) 27 | 28 | __all__ = [ 29 | "adapt", 30 | "align", 31 | "anchor", 32 | "diarize_speakers", 33 | "create_segments", 34 | "g2p", 35 | "mfa", 36 | "model", 37 | "configure", 38 | "history", 39 | "train_acoustic_model", 40 | "train_dictionary", 41 | "train_g2p", 42 | "train_ivector_extractor", 43 | "train_lm", 44 | "transcribe", 45 | "utils", 46 | "validate", 47 | "adapt_model_cli", 48 | "align_corpus_cli", 49 | "diarize_speakers_cli", 50 | "create_segments_cli", 51 | "g2p_cli", 52 | "mfa_cli", 53 | "configure_cli", 54 | "history_cli", 55 | "anchor_cli", 56 | "model_cli", 57 | "train_acoustic_model_cli", 58 | "train_dictionary_cli", 59 | "train_g2p_cli", 60 | "train_ivector_cli", 61 | "train_lm_cli", 62 | "transcribe_corpus_cli", 63 | "validate_dictionary_cli", 64 | "validate_corpus_cli", 65 | ] 66 | -------------------------------------------------------------------------------- /montreal_forced_aligner/command_line/anchor.py: -------------------------------------------------------------------------------- 1 | """Command line functions for launching anchor annotation""" 2 | from __future__ import annotations 3 | 4 | import logging 5 | 6 | import requests 7 | import rich_click as click 8 | 9 | from montreal_forced_aligner import config 10 | 11 | __all__ = ["anchor_cli"] 12 | 13 | logger = logging.getLogger("mfa") 14 | 15 | 16 | @click.command(name="anchor", short_help="Launch Anchor") 17 | @click.help_option("-h", "--help") 18 | def anchor_cli(*args, **kwargs) -> None: # pragma: no cover 19 | """ 20 | Launch Anchor Annotator (if installed) 21 | """ 22 | from anchor.command_line import main # noqa 23 | 24 | if config.VERBOSE: 25 | try: 26 | from anchor._version import version 27 | 28 | response = requests.get( 29 | "https://api.github.com/repos/MontrealCorpusTools/Anchor-annotator/releases/latest" 30 | ) 31 | latest_version = response.json()["tag_name"].replace("v", "") 32 | if version < latest_version: 33 | click.echo( 34 | f"You are currently running an older version of Anchor annotator ({version}) than the latest available ({latest_version}). " 35 | f"To update, please run mfa_update." 36 | ) 37 | except ImportError: 38 | pass 39 | main() 40 | -------------------------------------------------------------------------------- /montreal_forced_aligner/command_line/history.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | 4 | import rich_click as click 5 | 6 | from montreal_forced_aligner import config 7 | 8 | __all__ = ["history_cli"] 9 | 10 | logger = logging.getLogger("mfa") 11 | 12 | 13 | @click.command( 14 | "history", 15 | help="Show previously run mfa commands", 16 | ) 17 | @click.option("--depth", help="Number of commands to list, defaults to 10", type=int, default=10) 18 | @click.option( 19 | "--verbose/--no_verbose", 20 | "-v/-nv", 21 | "verbose", 22 | help=f"Output debug messages, default is {config.VERBOSE}", 23 | default=config.VERBOSE, 24 | ) 25 | @click.help_option("-h", "--help") 26 | def history_cli(depth: int, verbose: bool) -> None: 27 | """ 28 | List previous MFA commands 29 | """ 30 | history = config.load_command_history()[-depth:] 31 | if verbose: 32 | logger.info("command\tDate\tExecution time\tVersion\tExit code\tException") 33 | for h in history: 34 | execution_time = time.strftime("%H:%M:%S", time.gmtime(h["execution_time"])) 35 | d = h["date"].isoformat() 36 | logger.info( 37 | f"{h['command']}\t{d}\t{execution_time}\t{h.get('version', 'unknown')}\t{h['exit_code']}\t{h['exception']}" 38 | ) 39 | pass 40 | else: 41 | for h in history: 42 | logger.info(h["command"]) 43 | -------------------------------------------------------------------------------- /montreal_forced_aligner/command_line/tokenize.py: -------------------------------------------------------------------------------- 1 | """Command line functions for generating pronunciations using G2P models""" 2 | from __future__ import annotations 3 | 4 | from pathlib import Path 5 | 6 | import rich_click as click 7 | 8 | from montreal_forced_aligner import config 9 | from montreal_forced_aligner.command_line.utils import common_options, validate_tokenizer_model 10 | from montreal_forced_aligner.tokenization.tokenizer import CorpusTokenizer 11 | 12 | __all__ = ["tokenize_cli"] 13 | 14 | 15 | @click.command( 16 | name="tokenize", 17 | context_settings=dict( 18 | ignore_unknown_options=True, 19 | allow_extra_args=True, 20 | allow_interspersed_args=True, 21 | ), 22 | short_help="Tokenize utterances", 23 | ) 24 | @click.argument( 25 | "input_path", type=click.Path(exists=True, file_okay=True, dir_okay=True, path_type=Path) 26 | ) 27 | @click.argument("tokenizer_model_path", type=click.UNPROCESSED, callback=validate_tokenizer_model) 28 | @click.argument( 29 | "output_directory", type=click.Path(file_okay=False, dir_okay=True, path_type=Path) 30 | ) 31 | @click.option( 32 | "--config_path", 33 | "-c", 34 | help="Path to config file to use for training.", 35 | type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path), 36 | ) 37 | @common_options 38 | @click.help_option("-h", "--help") 39 | @click.pass_context 40 | def tokenize_cli(context, **kwargs) -> None: 41 | """ 42 | Tokenize utterances with a trained tokenizer model 43 | """ 44 | if kwargs.get("profile", None) is not None: 45 | config.profile = kwargs.pop("profile") 46 | config.update_configuration(kwargs) 47 | 48 | config_path = kwargs.get("config_path", None) 49 | input_path = kwargs["input_path"] 50 | tokenizer_model_path = kwargs["tokenizer_model_path"] 51 | output_directory = kwargs["output_directory"] 52 | 53 | tokenizer = CorpusTokenizer( 54 | corpus_directory=input_path, 55 | tokenizer_model_path=tokenizer_model_path, 56 | **CorpusTokenizer.parse_parameters(config_path, context.params, context.args), 57 | ) 58 | 59 | try: 60 | tokenizer.setup() 61 | tokenizer.tokenize_utterances() 62 | tokenizer.export_files(output_directory) 63 | except Exception: 64 | tokenizer.dirty = True 65 | raise 66 | finally: 67 | tokenizer.cleanup() 68 | -------------------------------------------------------------------------------- /montreal_forced_aligner/corpus/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Corpora 3 | ======= 4 | 5 | 6 | """ 7 | from __future__ import annotations 8 | 9 | from montreal_forced_aligner.corpus.acoustic_corpus import ( 10 | AcousticCorpus, 11 | AcousticCorpusMixin, 12 | AcousticCorpusPronunciationMixin, 13 | ) 14 | from montreal_forced_aligner.corpus.base import CorpusMixin 15 | from montreal_forced_aligner.corpus.text_corpus import ( 16 | DictionaryTextCorpusMixin, 17 | TextCorpus, 18 | TextCorpusMixin, 19 | ) 20 | 21 | __all__ = [ 22 | "base", 23 | "helper", 24 | "classes", 25 | "features", 26 | "multiprocessing", 27 | "CorpusMixin", 28 | "ivector_corpus", 29 | "acoustic_corpus", 30 | "AcousticCorpus", 31 | "AcousticCorpusMixin", 32 | "AcousticCorpusPronunciationMixin", 33 | "text_corpus", 34 | "TextCorpus", 35 | "TextCorpusMixin", 36 | "DictionaryTextCorpusMixin", 37 | ] 38 | -------------------------------------------------------------------------------- /montreal_forced_aligner/diarization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/montreal_forced_aligner/diarization/__init__.py -------------------------------------------------------------------------------- /montreal_forced_aligner/dictionary/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pronunciation dictionaries 3 | ========================== 4 | 5 | """ 6 | 7 | from montreal_forced_aligner.dictionary.mixins import DictionaryMixin 8 | from montreal_forced_aligner.dictionary.multispeaker import ( 9 | MultispeakerDictionary, 10 | MultispeakerDictionaryMixin, 11 | ) 12 | 13 | __all__ = [ 14 | "multispeaker", 15 | "mixins", 16 | "DictionaryMixin", 17 | "MultispeakerDictionary", 18 | "MultispeakerDictionaryMixin", 19 | ] 20 | -------------------------------------------------------------------------------- /montreal_forced_aligner/g2p/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Grapheme to phoneme (G2P) 3 | ========================= 4 | """ 5 | 6 | from montreal_forced_aligner.g2p.generator import PyniniCorpusGenerator, PyniniWordListGenerator 7 | from montreal_forced_aligner.g2p.phonetisaurus_trainer import PhonetisaurusTrainer 8 | from montreal_forced_aligner.g2p.trainer import PyniniTrainer 9 | 10 | __all__ = [ 11 | "generator", 12 | "trainer", 13 | "PyniniTrainer", 14 | "PyniniCorpusGenerator", 15 | "PyniniWordListGenerator", 16 | "PhonetisaurusTrainer", 17 | ] 18 | -------------------------------------------------------------------------------- /montreal_forced_aligner/ivector/__init__.py: -------------------------------------------------------------------------------- 1 | """Module for ivector extractor training""" 2 | 3 | from montreal_forced_aligner.ivector.trainer import ( 4 | DubmTrainer, 5 | IvectorTrainer, 6 | TrainableIvectorExtractor, 7 | ) 8 | 9 | __all__ = ["trainer", "DubmTrainer", "IvectorTrainer", "TrainableIvectorExtractor"] 10 | -------------------------------------------------------------------------------- /montreal_forced_aligner/language_modeling/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Language modeling 3 | ================= 4 | 5 | 6 | """ 7 | 8 | from montreal_forced_aligner.language_modeling.trainer import ( 9 | MfaLmArpaTrainer, 10 | MfaLmCorpusTrainer, 11 | MfaLmDictionaryCorpusTrainer, 12 | ) 13 | 14 | __all__ = ["MfaLmCorpusTrainer", "MfaLmDictionaryCorpusTrainer", "MfaLmArpaTrainer"] 15 | -------------------------------------------------------------------------------- /montreal_forced_aligner/online/__init__.py: -------------------------------------------------------------------------------- 1 | """Module for running MFA in online mode""" 2 | -------------------------------------------------------------------------------- /montreal_forced_aligner/tokenization/__init__.py: -------------------------------------------------------------------------------- 1 | """Tokenization classes""" 2 | 3 | from montreal_forced_aligner.tokenization.tokenizer import CorpusTokenizer, TokenizerValidator 4 | from montreal_forced_aligner.tokenization.trainer import TokenizerTrainer 5 | 6 | __all__ = ["TokenizerTrainer", "TokenizerValidator", "CorpusTokenizer"] 7 | -------------------------------------------------------------------------------- /montreal_forced_aligner/tokenization/korean.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | 5 | try: 6 | import jamo 7 | from mecab import MeCab 8 | 9 | KO_AVAILABLE = True 10 | except ImportError: 11 | KO_AVAILABLE = False 12 | MeCab = None 13 | jamo = None 14 | 15 | 16 | class KoreanTokenizer: 17 | def __init__(self, ignore_case: bool = True): 18 | self.ignore_case = ignore_case 19 | self.tokenizer = MeCab() 20 | 21 | def __call__(self, text): 22 | new_text = [] 23 | morphs = self.tokenizer.parse(text) 24 | pronunciations = [] 25 | for morph in morphs: 26 | normalized = morph.surface 27 | join = False 28 | m = re.search(r"[]})>][<({[]", normalized) 29 | if new_text and m: 30 | new_text[-1] += normalized[: m.start() + 1] 31 | normalized = normalized[m.end() - 1 :] 32 | elif new_text and re.match(r"^[<({\[].*", new_text[-1]): 33 | join = True 34 | elif new_text and re.match(r".*[-_~]$", new_text[-1]): 35 | join = True 36 | elif new_text and re.match(r".*[>)}\]]$", normalized): 37 | join = True 38 | elif new_text and re.match(r"^[-_~].*", normalized): 39 | join = True 40 | if new_text and any(new_text[-1].endswith(x) for x in {">", ")", "}", "]"}): 41 | join = False 42 | if join: 43 | new_text[-1] += normalized 44 | pronunciations[-1] += jamo.h2j(normalized) 45 | continue 46 | if morph.pos in {"SF", "SY", "SC"} and normalized not in {"<", "(", "{", "["}: 47 | continue 48 | new_text.append(normalized) 49 | pronunciations.append(jamo.h2j(normalized)) 50 | new_text = " ".join(new_text) 51 | pronunciations = " ".join(pronunciations) 52 | if self.ignore_case: 53 | new_text = new_text.lower() 54 | pronunciations = pronunciations.lower() 55 | return new_text, pronunciations 56 | 57 | 58 | def ko_spacy(ignore_case: bool = True): 59 | if not KO_AVAILABLE: 60 | raise ImportError("Please install Korean support via `pip install python-mecab-ko jamo`") 61 | return KoreanTokenizer(ignore_case) 62 | -------------------------------------------------------------------------------- /montreal_forced_aligner/tokenization/resources/japanese/mfa_sudachi.dic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/montreal_forced_aligner/tokenization/resources/japanese/mfa_sudachi.dic -------------------------------------------------------------------------------- /montreal_forced_aligner/tokenization/resources/japanese/sudachi_config.json: -------------------------------------------------------------------------------- 1 | {"userDict": ["mfa_sudachi.dic"], "characterDefinitionFile": "char.def", "inputTextPlugin": [{"class": "com.worksap.nlp.sudachi.DefaultInputTextPlugin"}, {"class": "com.worksap.nlp.sudachi.ProlongedSoundMarkPlugin", "prolongedSoundMarks": ["ー", "-", "⁓", "〜", "〰"], "replacementSymbol": "ー"}, {"class": "com.worksap.nlp.sudachi.IgnoreYomiganaPlugin", "leftBrackets": ["(", "("], "rightBrackets": [")", ")"], "maxYomiganaLength": 4}], "oovProviderPlugin": [{"class": "com.worksap.nlp.sudachi.MeCabOovPlugin", "charDef": "char.def", "unkDef": "unk.def"}, {"class": "com.worksap.nlp.sudachi.SimpleOovPlugin", "oovPOS": ["補助記号", "一般", "*", "*", "*", "*"], "leftId": 5968, "rightId": 5968, "cost": 3857}]} 2 | -------------------------------------------------------------------------------- /montreal_forced_aligner/tokenization/resources/japanese/unk.def: -------------------------------------------------------------------------------- 1 | DEFAULT,5968,5968,3857,補助記号,一般,*,*,*,* 2 | SPACE,5966,5966,6056,空白,*,*,*,*,* 3 | KANJI,5139,5139,14657,名詞,普通名詞,一般,*,*,* 4 | KANJI,5129,5129,17308,名詞,普通名詞,サ変可能,*,*,* 5 | KANJI,4785,4785,18181,名詞,固有名詞,一般,*,*,* 6 | KANJI,4787,4787,18086,名詞,固有名詞,人名,一般,*,* 7 | KANJI,4791,4791,19198,名詞,固有名詞,地名,一般,*,* 8 | SYMBOL,5129,5129,17094,名詞,普通名詞,サ変可能,*,*,* 9 | NUMERIC,4794,4794,12450,名詞,数詞,*,*,*,* 10 | ALPHA,5139,5139,11633,名詞,普通名詞,一般,*,*,* 11 | ALPHA,4785,4785,13620,名詞,固有名詞,一般,*,*,* 12 | ALPHA,4787,4787,14228,名詞,固有名詞,人名,一般,*,* 13 | ALPHA,4791,4791,15793,名詞,固有名詞,地名,一般,*,* 14 | ALPHA,5687,5687,15246,感動詞,一般,*,*,*,* 15 | HIRAGANA,5139,5139,16012,名詞,普通名詞,一般,*,*,* 16 | HIRAGANA,5129,5129,20012,名詞,普通名詞,サ変可能,*,*,* 17 | HIRAGANA,4785,4785,18282,名詞,固有名詞,一般,*,*,* 18 | HIRAGANA,4787,4787,18269,名詞,固有名詞,人名,一般,*,* 19 | HIRAGANA,4791,4791,20474,名詞,固有名詞,地名,一般,*,* 20 | HIRAGANA,5687,5687,17786,感動詞,一般,*,*,*,* 21 | KATAKANA,5139,5139,10980,名詞,普通名詞,一般,*,*,* 22 | KATAKANA,5129,5129,14802,名詞,普通名詞,サ変可能,*,*,* 23 | KATAKANA,4785,4785,13451,名詞,固有名詞,一般,*,*,* 24 | KATAKANA,4787,4787,13759,名詞,固有名詞,人名,一般,*,* 25 | KATAKANA,4791,4791,14554,名詞,固有名詞,地名,一般,*,* 26 | KATAKANA,5687,5687,15272,感動詞,一般,*,*,*,* 27 | KANJINUMERIC,4794,4794,14170,名詞,数詞,*,*,*,* 28 | GREEK,5139,5139,11051,名詞,普通名詞,一般,*,*,* 29 | GREEK,4785,4785,13353,名詞,固有名詞,一般,*,*,* 30 | GREEK,4787,4787,13671,名詞,固有名詞,人名,一般,*,* 31 | GREEK,4791,4791,14862,名詞,固有名詞,地名,一般,*,* 32 | CYRILLIC,5139,5139,11140,名詞,普通名詞,一般,*,*,* 33 | CYRILLIC,4785,4785,13174,名詞,固有名詞,一般,*,*,* 34 | CYRILLIC,4787,4787,13495,名詞,固有名詞,人名,一般,*,* 35 | CYRILLIC,4791,4791,14700,名詞,固有名詞,地名,一般,*,* 36 | -------------------------------------------------------------------------------- /montreal_forced_aligner/transcription/__init__.py: -------------------------------------------------------------------------------- 1 | """Transcription module for MFA""" 2 | from montreal_forced_aligner.transcription.transcriber import Transcriber 3 | 4 | __all__ = ["Transcriber", "transcriber"] 5 | -------------------------------------------------------------------------------- /montreal_forced_aligner/vad/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/montreal_forced_aligner/vad/__init__.py -------------------------------------------------------------------------------- /montreal_forced_aligner/validation/__init__.py: -------------------------------------------------------------------------------- 1 | """Validation classes""" 2 | 3 | from montreal_forced_aligner.validation.corpus_validator import ( 4 | PretrainedValidator, 5 | TrainingValidator, 6 | ValidationMixin, 7 | ) 8 | from montreal_forced_aligner.validation.dictionary_validator import DictionaryValidator 9 | 10 | __all__ = ["PretrainedValidator", "TrainingValidator", "ValidationMixin", "DictionaryValidator"] 11 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=45", "wheel", "setuptools_scm>=6.2" 4 | ] 5 | build-backend = "setuptools.build_meta" 6 | 7 | [tool.setuptools_scm] 8 | write_to = "montreal_forced_aligner/_version.py" 9 | 10 | [tool.black] 11 | line-length = 99 12 | 13 | [tool.flake8] 14 | max-line-length = 99 15 | extend-ignore = ["D203", "E203", "E251", "E266", "E302", "E305", "E401", "E402", "E501", "F401", "F403", "W503"] 16 | exclude = [".git", "__pycache__", "dist", "build"] 17 | 18 | [tool.isort] 19 | line_length = 99 20 | profile = "black" 21 | known_first_party = [ 22 | "montreal_forced_aligner" 23 | ] 24 | 25 | [tool.interrogate] 26 | ignore-init-method = true 27 | ignore-init-module = false 28 | ignore-magic = false 29 | ignore-semiprivate = false 30 | ignore-private = false 31 | ignore-module = false 32 | ignore-property-decorators = false 33 | fail-under = 95 34 | exclude = [ 35 | "tests", 36 | "build", 37 | "dist", 38 | "setup.py", 39 | "docs" 40 | ] 41 | verbose = 100 42 | omit-covered-files = false 43 | quiet = false 44 | generate-badge = "docs/source/_static" 45 | badge-format = "svg" 46 | whitelist-regex = [] 47 | ignore-regex = [] 48 | color = true 49 | 50 | 51 | [tool.check-manifest] 52 | ignore = [ 53 | ".deepsource.toml", 54 | ".readthedocs.yaml", 55 | ] 56 | 57 | [tool.coverage.run] 58 | source = ["montreal_forced_aligner"] 59 | concurrency = ["multiprocessing"] 60 | branch = true 61 | parallel = true 62 | omit = [ 63 | ".tox/*" 64 | ] 65 | 66 | 67 | [tool.coverage.report] 68 | show_missing = true 69 | exclude_lines = [ 70 | "pragma: no cover", 71 | "if __name__ == .__main__.:", 72 | "raise AssertionError", 73 | "raise NotImplementedError", 74 | "pass", 75 | "if sys.platform", 76 | "except ImportError:", 77 | "except KeyboardInterrupt:", 78 | "except Exception as e:", 79 | "except Exception:", 80 | "if call_back", 81 | "if is_set", 82 | "if TYPE_CHECKING:", 83 | "def history_save_handler() -> None:", 84 | "class ExitHooks(object):", 85 | "def main() -> None:", 86 | "if os.path.exists", 87 | "@abstractmethod", 88 | 'if "MFA_ERROR"', 89 | ] 90 | fail_under = 50 91 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | praatio>=6.0.0 2 | tqdm 3 | pyyaml 4 | librosa 5 | numpy 6 | scipy 7 | scikit-learn 8 | requests 9 | biopython 10 | dataclassy 11 | sqlalchemy>=2.0 12 | click 13 | rich 14 | rich-click 15 | numpy 16 | pynini 17 | -------------------------------------------------------------------------------- /rtd_environment.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python>=3.9 5 | - numpy 6 | - librosa 7 | - tqdm 8 | - requests 9 | - pyyaml 10 | - praatio=6.0.0 11 | - dataclassy 12 | - sqlalchemy>=2.0 13 | - pynini 14 | - pgvector 15 | - pgvector-python 16 | - postgresql 17 | - scikit-learn 18 | - hdbscan 19 | - psycopg2 20 | - biopython 21 | - click 22 | - setuptools_scm 23 | - importlib_metadata 24 | - sphinx 25 | - numpydoc 26 | - sphinx-design 27 | - sphinx-click 28 | - sphinx-intl 29 | - pydata-sphinx-theme 30 | - myst-parser 31 | - mock 32 | - setuptools-scm 33 | - kneed 34 | - matplotlib 35 | - seaborn 36 | - rich 37 | - rich-click 38 | - kaldi =*=cpu* 39 | - kalpy 40 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools_scm # noqa 2 | from setuptools import setup 3 | 4 | setup() 5 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/__init__.py -------------------------------------------------------------------------------- /tests/data/am/acoustic_g2p_output_model.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/am/acoustic_g2p_output_model.zip -------------------------------------------------------------------------------- /tests/data/am/mono_model.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/am/mono_model.zip -------------------------------------------------------------------------------- /tests/data/configs/acoustic/bad_topology.yaml: -------------------------------------------------------------------------------- 1 | b: 2 | max_states: 3 3 | bʲ: 4 | max_states: 1 5 | -------------------------------------------------------------------------------- /tests/data/configs/acoustic/english_mfa_phone_groups.yaml: -------------------------------------------------------------------------------- 1 | - 2 | - p 3 | - pʷ 4 | - pʰ 5 | - pʲ 6 | - 7 | - kp 8 | - 9 | - b 10 | - bʲ 11 | - 12 | - ɡb 13 | - 14 | - f 15 | - fʷ 16 | - fʲ 17 | - 18 | - v 19 | - vʷ 20 | - vʲ 21 | - 22 | - θ 23 | - 24 | - t̪ 25 | - 26 | - ð 27 | - 28 | - d̪ 29 | - 30 | - t 31 | - tʷ 32 | - tʰ 33 | - tʲ 34 | - 35 | - ʈ 36 | - ʈʲ 37 | - ʈʷ 38 | - 39 | - ʔ 40 | - 41 | - d 42 | - dʲ 43 | - 44 | - ɖ 45 | - ɖʲ 46 | - 47 | - ɾ 48 | - ɾʲ 49 | - 50 | - tʃ 51 | - 52 | - dʒ 53 | - 54 | - ʃ 55 | - 56 | - ʒ 57 | - 58 | - s 59 | - 60 | - z 61 | - 62 | - ɹ 63 | - 64 | - m 65 | - 66 | - mʲ 67 | - 68 | - m̩ 69 | - 70 | - ɱ 71 | - 72 | - n 73 | - 74 | - n̩ 75 | - 76 | - ɲ 77 | - 78 | - ɾ̃ 79 | - 80 | - ŋ 81 | - 82 | - l 83 | - 84 | - ɫ 85 | - 86 | - ɫ̩ 87 | - 88 | - ʎ 89 | - 90 | - ɟ 91 | - ɟʷ 92 | - 93 | - ɡ 94 | - ɡʷ 95 | - 96 | - c 97 | - cʷ 98 | - cʰ 99 | - 100 | - k 101 | - kʷ 102 | - kʰ 103 | - 104 | - ç 105 | - 106 | - h 107 | - 108 | - ɐ 109 | - 110 | - ə 111 | - 112 | - ɜː 113 | - ɜ 114 | - 115 | - ɝ 116 | - 117 | - ɚ 118 | - 119 | - ʊ 120 | - 121 | - ɪ 122 | - 123 | - ɑ 124 | - ɑː 125 | - 126 | - ɒ 127 | - ɒː 128 | - 129 | - ɔ 130 | - 131 | - aː 132 | - a 133 | - 134 | - æ 135 | - 136 | - aj 137 | - 138 | - aw 139 | - 140 | - i 141 | - iː 142 | - 143 | - j 144 | - 145 | - ɛː 146 | - ɛ 147 | - 148 | - e 149 | - eː 150 | - 151 | - ej 152 | - 153 | - ʉ 154 | - ʉː 155 | - 156 | - uː 157 | - u 158 | - 159 | - w 160 | - 161 | - ʋ 162 | - 163 | - ɔj 164 | - 165 | - ow 166 | - 167 | - əw 168 | - 169 | - o 170 | - oː 171 | -------------------------------------------------------------------------------- /tests/data/configs/acoustic/english_mfa_topology.yaml: -------------------------------------------------------------------------------- 1 | ɾ: 2 | max_states: 1 3 | min_states: 1 4 | ɾʲ: 5 | max_states: 1 6 | min_states: 1 7 | ɾ̃: 8 | max_states: 1 9 | min_states: 1 10 | ʔ: 11 | max_states: 1 12 | min_states: 1 13 | ə: 14 | max_states: 3 15 | ɚ: 16 | max_states: 3 17 | ɪ: 18 | max_states: 3 19 | e: 20 | max_states: 3 21 | eː: 22 | max_states: 3 23 | ɛ: 24 | max_states: 3 25 | ɛː: 26 | max_states: 3 27 | ɐ: 28 | max_states: 3 29 | i: 30 | max_states: 3 31 | iː: 32 | max_states: 3 33 | o: 34 | max_states: 3 35 | oː: 36 | max_states: 3 37 | u: 38 | max_states: 3 39 | uː: 40 | max_states: 3 41 | ɝ: 42 | max_states: 3 43 | j: 44 | max_states: 3 45 | w: 46 | max_states: 3 47 | -------------------------------------------------------------------------------- /tests/data/configs/bad_align_config.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 10 3 | -------------------------------------------------------------------------------- /tests/data/configs/basic_align_config.yaml: -------------------------------------------------------------------------------- 1 | beam: 100 2 | retry_beam: 400 3 | 4 | features: 5 | type: "mfcc" 6 | frame_shift: 10 7 | -------------------------------------------------------------------------------- /tests/data/configs/basic_ipa_config.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 40 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: false 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | multilingual_ipa: true 11 | 12 | digraphs: 13 | - "[dt][szʒʃʐʑʂɕç]" 14 | - "[a][job_name][u]" 15 | 16 | strip_diacritics: 17 | - 'ː' 18 | - 'ˑ' 19 | - '̩' 20 | - '̆' 21 | - '̑' 22 | - '̯' 23 | - '͡' 24 | - '‿' 25 | - '͜' 26 | 27 | training: 28 | - monophone: 29 | num_iterations: 5 30 | max_gaussians: 1000 31 | subset: 100 32 | 33 | - triphone: 34 | num_iterations: 3 35 | num_leaves: 250 36 | max_gaussians: 2000 37 | cluster_threshold: -1 38 | subset: 1000 39 | 40 | - lda: 41 | num_iterations: 2 42 | num_leaves: 500 43 | max_gaussians: 4000 44 | subset: 1000 45 | features: 46 | splice_left_context: 3 47 | splice_right_context: 3 48 | 49 | - sat: 50 | num_iterations: 2 51 | num_leaves: 500 52 | max_gaussians: 5000 53 | power: 0.2 54 | silence_weight: 0.0 55 | fmllr_update_type: "full" 56 | subset: 1000 57 | features: 58 | lda: true 59 | -------------------------------------------------------------------------------- /tests/data/configs/basic_segment_config.yaml: -------------------------------------------------------------------------------- 1 | 2 | energy_threshold: 9 3 | energy_mean_scale: 0.5 4 | max_segment_length: 5 5 | min_pause_duration: 0.25 6 | -------------------------------------------------------------------------------- /tests/data/configs/basic_train_config.yaml: -------------------------------------------------------------------------------- 1 | beam: 100 2 | retry_beam: 400 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: false 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | training: 11 | - monophone: 12 | num_iterations: 5 13 | max_gaussians: 1000 14 | subset: 100 15 | 16 | - triphone: 17 | num_iterations: 3 18 | num_leaves: 250 19 | max_gaussians: 2000 20 | cluster_threshold: -1 21 | subset: 1000 22 | 23 | - lda: 24 | num_iterations: 2 25 | num_leaves: 500 26 | max_gaussians: 4000 27 | subset: 1000 28 | features: 29 | splice_left_context: 3 30 | splice_right_context: 3 31 | 32 | - sat: 33 | num_iterations: 2 34 | num_leaves: 500 35 | max_gaussians: 5000 36 | power: 0.2 37 | silence_weight: 0.0 38 | fmllr_update_type: "full" 39 | subset: 1000 40 | features: 41 | lda: true 42 | -------------------------------------------------------------------------------- /tests/data/configs/basic_train_lm.yaml: -------------------------------------------------------------------------------- 1 | order: 3 2 | method: kneser_ney 3 | prune_thresh_small: 0.0000003 4 | prune_thresh_medium: 0.0000001 5 | -------------------------------------------------------------------------------- /tests/data/configs/different_punctuation_config.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 400 3 | word_break_markers: .-'][ 4 | punctuation: .-'][ 5 | 6 | features: 7 | type: "mfcc" 8 | use_energy: true 9 | frame_shift: 10 10 | use_pitch: false 11 | 12 | training: 13 | - monophone: 14 | num_iterations: 3 15 | max_gaussians: 500 16 | subset: 1000 17 | -------------------------------------------------------------------------------- /tests/data/configs/eval_mapping.yaml: -------------------------------------------------------------------------------- 1 | ʔ: T 2 | h: HH 3 | ç: HH 4 | i: [IY0, IY2, IY1] 5 | iː: [IY0, IY2, IY1] 6 | ɚ: [ER0, ER2, ER1] 7 | ɝ: [ER0, ER2, ER1] 8 | ɝː: [ER0, ER2, ER1] 9 | dʒ: JH 10 | tʃ: CH 11 | ɑ: [AA0, AA2, AA1] 12 | ɑː: [AA0, AA2, AA1] 13 | ʊ: [UH0, UH2, UH1] 14 | ɛ: [EH0, EH2, EH1] 15 | oʊ: [OW0, OW2, OW1] 16 | ow: [OW0, OW2, OW1] 17 | aʊ: [AW0, AW2, AW1] 18 | aw: [AW0, AW2, AW1] 19 | aɪ: [AY0, AY2, AY1] 20 | aj: [AY0, AY2, AY1] 21 | ɔ: [AO0, AO2, AO1] 22 | ɒ: [AO0, AO2, AO1] 23 | ɔː: [AO0, AO2, AO1] 24 | ɒː: [AO0, AO2, AO1] 25 | ɔɪ: [OY0, OY2, OY1] 26 | ɔj: [OY0, OY2, OY1] 27 | u: [UW0, UW2, UW1] 28 | ʉ: [UW0, UW2, UW1] 29 | uː: [UW0, UW2, UW1] 30 | ʉː: [UW0, UW2, UW1] 31 | æ: [AE0, AE2, AE1] 32 | æː: [AE0, AE2, AE1] 33 | eɪ: [EY0, EY2, EY1] 34 | ej: [EY0, EY2, EY1] 35 | ɪ: [IH0, IH2, IH1] 36 | ð: DH 37 | ʃ: SH 38 | ʒ: ZH 39 | ɹ: R 40 | j: Y 41 | θ: TH 42 | ə: [AH0, AH2, AH1] 43 | ʌ: [AH0, AH2, AH1] 44 | ɐ: [AH0, AH2, AH1] 45 | n̩: N 46 | n: N 47 | m̩: M 48 | m: M 49 | mʲ: M 50 | ɱ: M 51 | v: V 52 | vʲ: V 53 | fʲ: F 54 | f: F 55 | l̩: L 56 | l: L 57 | ɫ̩: L 58 | ɫ: L 59 | ʎ: L 60 | ɾ: [D, T] 61 | pʰ: P 62 | pʲ: P 63 | p̚: P 64 | bʲ: B 65 | b̚: B 66 | tʰ: T 67 | t: T 68 | d: D 69 | d̚: D 70 | dʲ: D 71 | t̚: T 72 | tʲ: T 73 | kʰ: K 74 | k̚: K 75 | cʰ: K 76 | c̚: K 77 | ɡ: G 78 | ɡ̚: G 79 | ɟ̚: G 80 | ɟ: G 81 | ŋ: NG 82 | ɲ: [NG, N] 83 | -------------------------------------------------------------------------------- /tests/data/configs/g2p_config.yaml: -------------------------------------------------------------------------------- 1 | punctuation: "、。।,@<>\"(),.:;¿?¡!\\&%#*~【】,…‥「」『』〝〟″⟨⟩♪・‹›«»~′$+=" 2 | clitic_markers: "'’" 3 | compound_markers: "-" 4 | num_pronunciations: 1 5 | -------------------------------------------------------------------------------- /tests/data/configs/ivector_train.yaml: -------------------------------------------------------------------------------- 1 | 2 | features: 3 | type: "mfcc" 4 | use_energy: true 5 | frame_shift: 10 6 | 7 | training: 8 | - dubm: 9 | num_iterations_init: 4 10 | num_iterations: 2 11 | - ivector: 12 | num_iterations: 2 13 | gaussian_min_count: 2 14 | -------------------------------------------------------------------------------- /tests/data/configs/lda_sat_train.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 400 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: false 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | training: 11 | - monophone: 12 | num_iterations: 4 13 | max_gaussians: 1000 14 | subset: 1000 15 | 16 | - triphone: 17 | num_iterations: 2 18 | num_leaves: 1500 19 | max_gaussians: 2000 20 | cluster_threshold: -1 21 | subset: 3000 22 | boost_silence: 1.25 23 | power: 0.25 24 | 25 | - lda: 26 | num_iterations: 3 27 | num_leaves: 1500 28 | max_gaussians: 4000 29 | subset: 5000 30 | features: 31 | splice_left_context: 3 32 | splice_right_context: 3 33 | 34 | - sat: 35 | num_iterations: 2 36 | num_leaves: 1500 37 | max_gaussians: 8000 38 | power: 0.2 39 | silence_weight: 0.0 40 | fmllr_update_type: "full" 41 | subset: 5000 42 | features: 43 | lda: true 44 | -------------------------------------------------------------------------------- /tests/data/configs/lda_train.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 400 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: false 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | training: 11 | - monophone: 12 | num_iterations: 4 13 | max_gaussians: 100 14 | subset: 1000 15 | 16 | - lda: 17 | num_iterations: 15 18 | num_leaves: 500 19 | max_gaussians: 4000 20 | subset: 1000 21 | features: 22 | splice_left_context: 3 23 | splice_right_context: 3 24 | -------------------------------------------------------------------------------- /tests/data/configs/mono_align.yaml: -------------------------------------------------------------------------------- 1 | beam: 100 2 | retry_beam: 400 3 | -------------------------------------------------------------------------------- /tests/data/configs/mono_train.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 400 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: true 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | training: 11 | - monophone: 12 | num_iterations: 5 13 | max_gaussians: 500 14 | subset: 1000 15 | -------------------------------------------------------------------------------- /tests/data/configs/no_punctuation_config.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 400 3 | punctuation: 4 | word_break_markers: 5 | compound_markers: 6 | quote_markers: 7 | clitic_markers: 8 | 9 | features: 10 | type: "mfcc" 11 | use_energy: true 12 | frame_shift: 10 13 | use_pitch: false 14 | 15 | training: 16 | - monophone: 17 | num_iterations: 3 18 | max_gaussians: 500 19 | subset: 1000 20 | -------------------------------------------------------------------------------- /tests/data/configs/out_of_order_config.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 40 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: false 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | training: 11 | - triphone: 12 | num_iterations: 35 13 | num_leaves: 2500 14 | max_gaussians: 20000 15 | cluster_threshold: -1 16 | subset: 30000 17 | boost_silence: 1.25 18 | power: 0.25 19 | 20 | - monophone: 21 | num_iterations: 40 22 | max_gaussians: 1000 23 | subset: 10000 24 | -------------------------------------------------------------------------------- /tests/data/configs/pitch_tri_train.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 400 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: false 7 | frame_shift: 10 8 | use_pitch: true 9 | use_voicing: true 10 | 11 | training: 12 | - monophone: 13 | num_iterations: 5 14 | max_gaussians: 100 15 | subset: 1000 16 | 17 | - triphone: 18 | num_iterations: 3 19 | num_leaves: 250 20 | max_gaussians: 2000 21 | cluster_threshold: -1 22 | subset: 3000 23 | boost_silence: 1.25 24 | power: 0.25 25 | -------------------------------------------------------------------------------- /tests/data/configs/pron_train.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 400 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: true 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | training: 11 | - monophone: 12 | num_iterations: 5 13 | max_gaussians: 500 14 | subset: 1000 15 | 16 | - pronunciation_probabilities: 17 | subset: 1000 18 | -------------------------------------------------------------------------------- /tests/data/configs/sat_train.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 500 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: false 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | training: 11 | - monophone: 12 | num_iterations: 5 13 | max_gaussians: 500 14 | subset: 1000 15 | 16 | - triphone: 17 | num_iterations: 3 18 | num_leaves: 1500 19 | max_gaussians: 2000 20 | cluster_threshold: -1 21 | subset: 3000 22 | boost_silence: 1.25 23 | power: 0.25 24 | 25 | - sat: 26 | num_iterations: 5 27 | num_leaves: 2000 28 | max_gaussians: 10000 29 | power: 0.2 30 | silence_weight: 0.0 31 | fmllr_update_type: "full" 32 | subset: 1000 33 | -------------------------------------------------------------------------------- /tests/data/configs/test_groups.yaml: -------------------------------------------------------------------------------- 1 | bilabial_stops: 2 | - p 3 | - b 4 | labiodental_obstruents: 5 | - f 6 | - v 7 | dental_obstruents: 8 | - th 9 | - dh 10 | coronal_stops: 11 | - t 12 | - d 13 | coronal_affricates: 14 | - ch 15 | - jh 16 | coronal_fricatives: 17 | - sh 18 | - zh 19 | - s 20 | - z 21 | rhotics: 22 | - r 23 | nasals: 24 | - m 25 | - n 26 | - ng 27 | laterals: 28 | - l 29 | dorsal_obstruents: 30 | - g 31 | - k 32 | voiceless_glottals: 33 | - hh 34 | central_vowels: 35 | - ah 36 | - er 37 | - uh 38 | - ih 39 | front_diphthongs: 40 | - ay 41 | - oy 42 | back_diphthongs: 43 | - ow 44 | - aw 45 | low_vowels: 46 | - aa 47 | - ao 48 | high_front_vowels: 49 | - iy 50 | front_glides: 51 | - y 52 | mid_front_vowels: 53 | - ae 54 | - eh 55 | - ey 56 | high_back_vowels: 57 | - uw 58 | back_glides: 59 | - w 60 | -------------------------------------------------------------------------------- /tests/data/configs/test_rules.yaml: -------------------------------------------------------------------------------- 1 | rules: 2 | - following_context: '' 3 | preceding_context: '' 4 | replacement: ih 5 | segment: iy 6 | -------------------------------------------------------------------------------- /tests/data/configs/train_g2p_acoustic.yaml: -------------------------------------------------------------------------------- 1 | beam: 100 2 | retry_beam: 800 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: false 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | training: 11 | - monophone: 12 | num_iterations: 5 13 | max_gaussians: 1000 14 | subset: 100 15 | 16 | - triphone: 17 | num_iterations: 3 18 | num_leaves: 250 19 | max_gaussians: 2000 20 | cluster_threshold: -1 21 | subset: 1000 22 | 23 | - lda: 24 | num_iterations: 2 25 | num_leaves: 500 26 | max_gaussians: 4000 27 | subset: 1000 28 | features: 29 | splice_left_context: 3 30 | splice_right_context: 3 31 | 32 | - sat: 33 | num_iterations: 2 34 | num_leaves: 500 35 | max_gaussians: 5000 36 | power: 0.2 37 | silence_weight: 0.0 38 | fmllr_update_type: "full" 39 | subset: 1000 40 | features: 41 | lda: true 42 | 43 | - pronunciation_probabilities: 44 | train_g2p: true 45 | num_iterations: 5 46 | 47 | - sat: 48 | num_iterations: 2 49 | num_leaves: 500 50 | max_gaussians: 5000 51 | power: 0.2 52 | silence_weight: 0.0 53 | fmllr_update_type: "full" 54 | subset: 1000 55 | features: 56 | lda: true 57 | -------------------------------------------------------------------------------- /tests/data/configs/train_g2p_config.yaml: -------------------------------------------------------------------------------- 1 | punctuation: "、。।,@<>\"(),.:;¿?¡!\\&%#*~【】,…‥「」『』〝〟″⟨⟩♪・‹›«»~′$+=" 2 | clitic_markers: "'’" 3 | compound_markers: "-" 4 | num_pronunciations: 1 # Used if running in validation mode 5 | order: 7 6 | random_starts: 25 7 | seed: 1917 8 | delta: 0.0009765 9 | lr: 1.0 10 | batch_size: 200 11 | num_iterations: 10 12 | smoothing_method: "kneser_ney" 13 | pruning_method: "relative_entropy" 14 | model_size: 1000000 15 | -------------------------------------------------------------------------------- /tests/data/configs/transcribe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/configs/transcribe.yaml -------------------------------------------------------------------------------- /tests/data/configs/tri_train.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 400 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: false 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | training: 11 | - monophone: 12 | num_iterations: 5 13 | max_gaussians: 100 14 | subset: 1000 15 | 16 | - triphone: 17 | num_iterations: 3 18 | num_leaves: 250 19 | max_gaussians: 2000 20 | cluster_threshold: -1 21 | subset: 3000 22 | boost_silence: 1.25 23 | power: 0.25 24 | -------------------------------------------------------------------------------- /tests/data/configs/xsampa_train.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 400 3 | ignore_case: false 4 | punctuation: .-'][ 5 | 6 | features: 7 | type: "mfcc" 8 | use_energy: true 9 | frame_shift: 10 10 | use_pitch: false 11 | 12 | training: 13 | - monophone: 14 | num_iterations: 10 15 | max_gaussians: 500 16 | subset: 1000 17 | -------------------------------------------------------------------------------- /tests/data/dictionaries/acoustic_g2p_dictionary.yaml: -------------------------------------------------------------------------------- 1 | default: english_us_mfa 2 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/graphemes.txt: -------------------------------------------------------------------------------- 1 | a 2 | b 3 | d 4 | o 5 | r 6 | w 7 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/lexicon.text.fst: -------------------------------------------------------------------------------- 1 | 0 1 0.6931471805599453 2 | 0 1 sil 0.6931471805599453 3 | 2 1 sil 4 | 1 1 sil_S !SIL 0.6931471805599453 5 | 1 2 sil_S !SIL 0.6931471805599453 6 | 1 1 spn_S 0.6931471805599453 7 | 1 2 spn_S 0.6931471805599453 8 | 1 3 phonea_B worda 9 | 3 1 phoneb_E 0.6931471805599453 10 | 3 2 phoneb_E 0.6931471805599453 11 | 1 4 phonea_B wordb 12 | 4 1 phonec_E 0.6931471805599453 13 | 4 2 phonec_E 0.6931471805599453 14 | 1 0 15 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phone_map.txt: -------------------------------------------------------------------------------- 1 | sil sil sil_B sil_E sil_I sil_S 2 | spn spn spn_B spn_E spn_I spn_S 3 | phoneb phoneb_B phoneb_E phoneb_I phoneb_S 4 | phonea phonea_B phonea_E phonea_I phonea_S 5 | phonec phonec_B phonec_E phonec_I phonec_S 6 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phones.txt: -------------------------------------------------------------------------------- 1 | 0 2 | sil 1 3 | sil_B 2 4 | sil_E 3 5 | sil_I 4 6 | sil_S 5 7 | spn 6 8 | spn_B 7 9 | spn_E 8 10 | spn_I 9 11 | spn_S 10 12 | phonea_B 11 13 | phonea_E 12 14 | phonea_I 13 15 | phonea_S 14 16 | phoneb_B 15 17 | phoneb_E 16 18 | phoneb_I 17 19 | phoneb_S 18 20 | phonec_B 19 21 | phonec_E 20 22 | phonec_I 21 23 | phonec_S 22 24 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phones/extra_questions.int: -------------------------------------------------------------------------------- 1 | 1 2 3 4 5 6 7 8 9 10 2 | 11 12 13 14 15 16 17 18 19 20 21 22 3 | 11 15 19 4 | 12 16 20 5 | 13 17 21 6 | 14 18 22 7 | 1 6 8 | 2 7 9 | 3 8 10 | 4 9 11 | 5 10 12 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phones/extra_questions.txt: -------------------------------------------------------------------------------- 1 | sil sil_B sil_E sil_I sil_S spn spn_B spn_E spn_I spn_S 2 | phonea_B phonea_E phonea_I phonea_S phoneb_B phoneb_E phoneb_I phoneb_S phonec_B phonec_E phonec_I phonec_S 3 | phonea_B phoneb_B phonec_B 4 | phonea_E phoneb_E phonec_E 5 | phonea_I phoneb_I phonec_I 6 | phonea_S phoneb_S phonec_S 7 | sil spn 8 | sil_B spn_B 9 | sil_E spn_E 10 | sil_I spn_I 11 | sil_S spn_S 12 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phones/roots.int: -------------------------------------------------------------------------------- 1 | shared split 1 2 3 4 5 2 | shared split 6 7 8 9 10 3 | shared split 11 12 13 14 4 | shared split 15 16 17 18 5 | shared split 19 20 21 22 6 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phones/roots.txt: -------------------------------------------------------------------------------- 1 | shared split sil sil_B sil_E sil_I sil_S 2 | shared split spn spn_B spn_E spn_I spn_S 3 | shared split phonea_B phonea_E phonea_I phonea_S 4 | shared split phoneb_B phoneb_E phoneb_I phoneb_S 5 | shared split phonec_B phonec_E phonec_I phonec_S 6 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phones/sets.int: -------------------------------------------------------------------------------- 1 | 1 2 3 4 5 2 | 6 7 8 9 10 3 | 11 12 13 14 4 | 15 16 17 18 5 | 19 20 21 22 6 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phones/sets.txt: -------------------------------------------------------------------------------- 1 | sil sil_B sil_E sil_I sil_S 2 | spn spn_B spn_E spn_I spn_S 3 | phonea_B phonea_E phonea_I phonea_S 4 | phoneb_B phoneb_E phoneb_I phoneb_S 5 | phonec_B phonec_E phonec_I phonec_S 6 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phones/word_boundary.int: -------------------------------------------------------------------------------- 1 | 1 nonword 2 | 2 begin 3 | 3 end 4 | 4 internal 5 | 5 singleton 6 | 6 nonword 7 | 7 begin 8 | 8 end 9 | 9 internal 10 | 10 singleton 11 | 11 begin 12 | 12 end 13 | 13 internal 14 | 14 singleton 15 | 15 begin 16 | 16 end 17 | 17 internal 18 | 18 singleton 19 | 19 begin 20 | 20 end 21 | 21 internal 22 | 22 singleton 23 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phones/word_boundary.txt: -------------------------------------------------------------------------------- 1 | sil nonword 2 | sil_B begin 3 | sil_E end 4 | sil_I internal 5 | sil_S singleton 6 | spn nonword 7 | spn_B begin 8 | spn_E end 9 | spn_I internal 10 | spn_S singleton 11 | phonea_B begin 12 | phonea_E end 13 | phonea_I internal 14 | phonea_S singleton 15 | phoneb_B begin 16 | phoneb_E end 17 | phoneb_I internal 18 | phoneb_S singleton 19 | phonec_B begin 20 | phonec_E end 21 | phonec_I internal 22 | phonec_S singleton 23 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/topo: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 11 12 13 14 15 16 17 18 19 20 21 22 5 | 6 | 0 0 0 0.75 1 0.25 7 | 1 1 1 0.75 2 0.25 8 | 2 2 2 0.75 3 0.25 9 | 3 10 | 11 | 12 | 13 | 1 2 3 4 5 6 7 8 9 10 14 | 15 | 0 0 0 0.25 1 0.25 2 0.25 3 0.25 16 | 1 1 1 0.25 2 0.25 3 0.25 4 0.25 17 | 2 2 1 0.25 2 0.25 3 0.25 4 0.25 18 | 3 3 1 0.25 2 0.25 3 0.25 4 0.25 19 | 4 4 4 0.75 5 0.25 20 | 5 21 | 22 | 23 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/words.txt: -------------------------------------------------------------------------------- 1 | 0 2 | !SIL 1 3 | 2 4 | worda 3 5 | wordb 4 6 | #0 5 7 | 6 8 | 7 9 | -------------------------------------------------------------------------------- /tests/data/dictionaries/test_abstract.txt: -------------------------------------------------------------------------------- 1 | worda phonea phoneb 2 | wordb phonea phonec 3 | wordc phonec 4 | -------------------------------------------------------------------------------- /tests/data/dictionaries/test_acoustic.txt: -------------------------------------------------------------------------------- 1 | this dh ih s 2 | is ih z 3 | the dh ah 4 | acoustic ah k uw s t ih k 5 | corpus k ao r p us 6 | i'm ay m 7 | talking t aa k ih ng 8 | pretty p r eh t iy 9 | fast f ae s t 10 | here hh iy r 11 | there's dh eh r z 12 | nothing n ah th ih ng 13 | going g ow ih ng 14 | else eh l s 15 | on ah n 16 | we're w iy r 17 | just j ah s t 18 | yknow y ah n ow 19 | some s ah m 20 | speech s p iy ch 21 | errors eh r ao r z 22 | but b ah t 23 | who hh uw 24 | cares k ae r z 25 | me m iy 26 | really r iy l iy 27 | slow s l ow 28 | and ae n d 29 | slightly s l ay t l iy 30 | lower l ow w er 31 | in ih n 32 | intensity ih n t eh n s ih t iy 33 | saying s ey ih ng 34 | words w er d z 35 | here's h iy r z 36 | more m ao r 37 | um ah m 38 | that dh ae t 39 | should sh uh d 40 | be b iy 41 | all aa l 42 | thanks th ae ng k s 43 | just jh ah s t 44 | sound s aw n d 45 | environment eh n v ay r ah n m eh n t 46 | -------------------------------------------------------------------------------- /tests/data/dictionaries/test_basic.txt: -------------------------------------------------------------------------------- 1 | 'm m 2 | ’m m 3 | i’m ay m ih 4 | this dh ih s 5 | is ih z 6 | the dh ah 7 | acoustic ah k uw s t ih k 8 | corpus k ao r p ah s 9 | i'm ay m 10 | talking t aa k ih ng 11 | pretty p r eh t iy 12 | fast f ae s t 13 | here hh iy r 14 | there's dh eh r z 15 | nothing n ah th ih ng 16 | going g ow ih ng 17 | else eh l s 18 | on ah n 19 | we're w iy r 20 | just jh ah s t 21 | yknow j ah n ow 22 | some s ah m 23 | speech s p iy ch 24 | errors eh r ao r z 25 | but b ah t 26 | who hh uw 27 | cares k ae r z 28 | me m iy 29 | really r iy l iy 30 | slow s l ow 31 | and ae n d 32 | slightly s l ay t l iy 33 | lower l ow w er 34 | in ih n 35 | intensity ih n t eh n s ih t iy 36 | saying s ey ih ng 37 | words w er d z 38 | here's hh iy r z 39 | more m ao r 40 | um ah m 41 | that dh ae t 42 | should sh uh d 43 | be b iy 44 | all aa l 45 | thanks th ae ng k s 46 | uh ah 47 | so s ow 48 | sick s ih k 49 | i ay 50 | have hh ae v 51 | a ah 52 | cold k ow l d 53 | probably p r aa b ah b l iy 54 | sound s aw n d 55 | quite k w ay t 56 | different d ih f er ah n t 57 | than dh ae n 58 | recording r iy k ao r d ih ng 59 | environment eh n v ay r ah n m eh n t 60 | also aa l s ow 61 | bunch b ah n ch 62 | did d ih d 63 | not n aa t 64 | original ao r ih g ih n ah l 65 | one w ah n 66 | long l aa n g 67 | pause p aa z 68 | think th ih ng k 69 | good g uh d 70 | alright aa l r ay t 71 | much m ah ch 72 | since s ih n s 73 | quality k w aa l ih t iy 74 | of ah v 75 | gonna g ah n ah 76 | cough k aa f 77 | for f ao r 78 | little l ih t ah l 79 | bit b ih t 80 | just jh ah s t 81 | to t uw 82 | yup j ah p 83 | happened hh ae p ah n d 84 | that's dh ae t s 85 | hopefully hh ow p f uh l iy 86 | levels l eh v ah l z 87 | okay ow k ay 88 | lot l aa t 89 | yeah j ae 90 | -------------------------------------------------------------------------------- /tests/data/dictionaries/test_extra_annotations.txt: -------------------------------------------------------------------------------- 1 | worda phonea phoneb 2 | wordb phonea phonec 3 | wordc phonec 4 | {LG} laugh 5 | {SL} sil 6 | sil 7 | {VN} vocnoise 8 | -------------------------------------------------------------------------------- /tests/data/dictionaries/test_frclitics.txt: -------------------------------------------------------------------------------- 1 | aujourd'hui o zh u r d w i 2 | c'est s e 3 | est e 4 | c' s 5 | c s e 6 | m' m 7 | m 3 m 8 | appelle a p 3 l 9 | vingt-cinq v ae~ s ae~ k 10 | vingt v ae~ 11 | six s i s 12 | -------------------------------------------------------------------------------- /tests/data/dictionaries/test_hindi.txt: -------------------------------------------------------------------------------- 1 | हैं ɦ ɛ̃ː 2 | हूं ɦ ũː 3 | हौंसला ɦ ɔ̃ː s̪ l̪ aː 4 | -------------------------------------------------------------------------------- /tests/data/dictionaries/test_japanese.txt: -------------------------------------------------------------------------------- 1 | はい h a i 2 | はい h aː 3 | 何 n a ɴ 4 | 何 n a ɲ i 5 | でしょう d e ɕ oː 6 | -------------------------------------------------------------------------------- /tests/data/dictionaries/test_mixed_format_dictionary.txt: -------------------------------------------------------------------------------- 1 | 'm 1.0 m 2 | ’m m 3 | i’m 0.01 ay m ih 4 | this 1.0 0.43 1.23 0.85 dh ih s 5 | is 1.0 0.5 1.0 1.0 ih z 6 | the 1.0 0.5 1.0 1.0 dh ah 7 | acoustic ah k uw s t ih k 8 | corpus k ao r p ah s 9 | i'm ay m 10 | talking t aa k ih ng 11 | pretty p r eh t iy 12 | fast f ae s t 13 | here hh iy r 14 | there's dh eh r z 15 | nothing n ah th ih ng 16 | going g ow ih ng 17 | else eh l s 18 | on ah n 19 | we're w iy r 20 | just jh ah s t 21 | yknow y ah n ow 22 | some s ah m 23 | speech s p iy ch 24 | errors eh r ao r z 25 | but b ah t 26 | who hh uw 27 | cares k ae r z 28 | me m iy 29 | really r iy l iy 30 | slow s l ow 31 | and ae n d 32 | slightly s l ay t l iy 33 | lower l ow w er 34 | in ih n 35 | intensity ih n t eh n s ih t iy 36 | saying s ey ih ng 37 | words w er d z 38 | here's hh iy r z 39 | more m ao r 40 | um ah m 41 | that dh ae t 42 | should sh uh d 43 | be b iy 44 | all aa l 45 | thanks th ae ng k s 46 | uh ah 47 | so s ow 48 | sick s ih k 49 | i ay 50 | have hh ae v 51 | a ah 52 | cold k ow l d 53 | probably p r aa b ah b l iy 54 | sound s aw n d 55 | quite k w ay t 56 | different d ih f er ah n t 57 | than dh ae n 58 | recording r iy k ao r d ih ng 59 | environment eh n v ay r ah n m eh n t 60 | also aa l s ow 61 | bunch b ah n ch 62 | did d ih d 63 | not n aa t 64 | original ao r ih g ih n ah l 65 | one w ah n 66 | long l aa n g 67 | pause p aa z 68 | think th ih ng k 69 | good g uh d 70 | alright aa l r ay t 71 | much m ah ch 72 | since s ih n s 73 | quality k w aa l ih t iy 74 | of ah v 75 | gonna g ah n ah 76 | cough k aa f 77 | for f ao r 78 | little l ih t ah l 79 | bit b ih t 80 | to t uw 81 | yup y ah p 82 | happened hh ae p ah n d 83 | that's dh ae t s 84 | hopefully hh ow p f uh l iy 85 | levels l eh v ah l z 86 | okay ow k ay 87 | lot l aa t 88 | yeah y ae 89 | -------------------------------------------------------------------------------- /tests/data/dictionaries/test_tabbed_dictionary.txt: -------------------------------------------------------------------------------- 1 | 'm 1.0 m 2 | ’m m 3 | i’m 0.01 ay m ih 4 | this 1.0 0.43 1.23 0.85 dh ih s 5 | is 1.0 0.5 1.0 1.0 ih z 6 | the 1.0 0.5 1.0 1.0 dh ah 7 | acoustic ah k uw s t ih k 8 | corpus k ao r p us 9 | i'm ay m 10 | talking t aa k ih ng 11 | pretty p r eh t iy 12 | fast f ae s t 13 | here hh iy r 14 | there's dh eh r z 15 | nothing n ah th ih ng 16 | going g ow ih ng 17 | else eh l s 18 | on ah n 19 | we're w iy r 20 | just j ah s t 21 | yknow y ah n ow 22 | some s ah m 23 | speech s p iy ch 24 | errors eh r ao r z 25 | but b ah t 26 | who hh uw 27 | cares k ae r z 28 | me m iy 29 | really r iy l iy 30 | slow s l ow 31 | and ae n d 32 | slightly s l ay t l iy 33 | lower l ow w er 34 | in ih n 35 | intensity ih n t eh n s ih t iy 36 | saying s ey ih ng 37 | words w er d z 38 | here's hh iy r z 39 | more m ao r 40 | um ah m 41 | that dh ae t 42 | should sh uh d 43 | be b iy 44 | all aa l 45 | thanks th ae ng k s 46 | uh ah 47 | so s ow 48 | sick s ih k 49 | i ay 50 | have hh ae v 51 | a ah 52 | cold k ow l d 53 | probably p r aa b ah b l iy 54 | sound s aw n d 55 | quite k w ay t 56 | different d ih f er ah n t 57 | than dh ae n 58 | recording r iy k ao r d ih ng 59 | environment eh n v ay r ah n m eh n t 60 | also aa l s ow 61 | bunch b ah n ch 62 | did d ih d 63 | not n aa t 64 | original ao r ih g ih n ah l 65 | one w ah n 66 | long l aa n g 67 | pause p aa z 68 | think th ih ng k 69 | good g uh d 70 | alright aa l r ay t 71 | much m ah ch 72 | since s ih n s 73 | quality k w aa l ih t iy 74 | of ah v 75 | gonna g ah n ah 76 | cough k aa f 77 | for f ao r 78 | little l ih t ah l 79 | bit b ih t 80 | just j ah s t 81 | to t uw 82 | yup y ah p 83 | happened hh ae p ah n d 84 | that's dh ae t s 85 | hopefully hh ow p f uh l iy 86 | levels l eh v ah l z 87 | okay ow k ay 88 | lot l aa t 89 | yeah y ae 90 | -------------------------------------------------------------------------------- /tests/data/lab/13697_11991_000000.lab: -------------------------------------------------------------------------------- 1 | la sorpresa y el disgusto que produjo a wentworth la substitución de una cuñada por otra el gesto que se dibujó en su fisonomía el asombro que manifestó y las palabras a duras penas reprimidas que asomaron a sus labios mientras le hablaba carlos 2 | -------------------------------------------------------------------------------- /tests/data/lab/61-70968-0000.lab: -------------------------------------------------------------------------------- 1 | HE BEGAN A CONFUSED COMPLAINT AGAINST THE WIZARD WHO HAD VANISHED BEHIND THE CURTAIN ON THE LEFT 2 | -------------------------------------------------------------------------------- /tests/data/lab/acoustic_corpus.lab: -------------------------------------------------------------------------------- 1 | this is the acoustic corpus i'm talking pretty fast here there's nothing going else going on we're just yknow there's some speech errors but who cares um this is me talking really slow and slightly lower in intensity uh we're just saying some words and here's some more words words words words um and that should be all thanks 2 | -------------------------------------------------------------------------------- /tests/data/lab/cold_corpus.lab: -------------------------------------------------------------------------------- 1 | uh so this is the sick corpus uh i have a cold so i probably sound quite different than the uh acoustic corpus um the recording environment is also quite different and i'm saying a bunch of different words that i did not say in the original one uh and here's a long pause and i think this is probably good alright thanks 2 | -------------------------------------------------------------------------------- /tests/data/lab/cold_corpus3.lab: -------------------------------------------------------------------------------- 1 | alright so this is the sick corpus uh hopefully the recording levels are okay um i have a cold so this probably sounds a lot different than the acoustic corpus uh and i'm also saying [adif] bunch of different words um i think i'm probably gonna cough here yeah so that just happened uh and uh that should be good alright thanks 2 | -------------------------------------------------------------------------------- /tests/data/lab/cold_corpus3_extra.lab: -------------------------------------------------------------------------------- 1 | alright so this is the sick corpus uh hopefully the recording levels are okay um i have a cold so this probably sounds a lot different than the acoustic corpus uh and i'm also saying [adif] bunch of different words um i think i'm probably gonna cough here {CG} yeah so that just happened uh and uh that should be good alright thanks 2 | -------------------------------------------------------------------------------- /tests/data/lab/common_voice_en_22058264.lab: -------------------------------------------------------------------------------- 1 | no 2 | -------------------------------------------------------------------------------- /tests/data/lab/common_voice_en_22058266.lab: -------------------------------------------------------------------------------- 1 | Fire fox 2 | -------------------------------------------------------------------------------- /tests/data/lab/common_voice_en_22058267.lab: -------------------------------------------------------------------------------- 1 | six 2 | -------------------------------------------------------------------------------- /tests/data/lab/common_voice_ja_24511055.lab: -------------------------------------------------------------------------------- 1 | 真っ昼間なのにキャンプの外れの電柱に電球がともっていた 2 | -------------------------------------------------------------------------------- /tests/data/lab/devanagari.lab: -------------------------------------------------------------------------------- 1 | हैंः हूं हौंसला 2 | -------------------------------------------------------------------------------- /tests/data/lab/french_clitics.lab: -------------------------------------------------------------------------------- 1 | aujourd aujourd'hui m'appelle purple-people-eater vingt-six m'm'appelle c'est m'c'est m'appele m'ving-sic flying'purple-people-eater 2 | -------------------------------------------------------------------------------- /tests/data/lab/japanese.lab: -------------------------------------------------------------------------------- 1 | 「はい」、。! 『何 でしょう』 2 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa.txt: -------------------------------------------------------------------------------- 1 | i can't think of an animal that's less chad like than a sloth 2 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa_2.txt: -------------------------------------------------------------------------------- 1 | welcome to a series of plat chat videos where we're gonna tackle every single team in the overwatch league twenty twenty 2 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa_3.txt: -------------------------------------------------------------------------------- 1 | and run you through 2 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa_4.txt: -------------------------------------------------------------------------------- 1 | kinda our fears and also predictions for them 2 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa_5.txt: -------------------------------------------------------------------------------- 1 | i'm sideshow joined by custer and reinforce we've got a special edition of plat chat 2 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa_us.txt: -------------------------------------------------------------------------------- 1 | uh with only like four games to go 2 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa_us_2.txt: -------------------------------------------------------------------------------- 1 | hey josh could have finished it he just decided to fail it instead 2 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa_us_3.txt: -------------------------------------------------------------------------------- 1 | really good performances against top teams that have ended up going their way 2 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa_us_4.txt: -------------------------------------------------------------------------------- 1 | uh i i still think it's a very good team though in n a i think this is uh 2 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa_us_5.txt: -------------------------------------------------------------------------------- 1 | uh and this was the first time i think the justice really looked like an elite team 2 | -------------------------------------------------------------------------------- /tests/data/lab/punctuated.lab: -------------------------------------------------------------------------------- 1 | oh yes, they - they, you know, they love her' and so' 'something 'i mean... ‘you The village name is Anglo Saxon in origin, and means 'Myrsa's woodland'. 2 | -------------------------------------------------------------------------------- /tests/data/lab/se10x016-08071999-1334_u0016001.lab: -------------------------------------------------------------------------------- 1 | tyst under denna inspelning 2 | -------------------------------------------------------------------------------- /tests/data/lab/se10x016-08071999-1334_u0016002.lab: -------------------------------------------------------------------------------- 1 | Testar en två tre fyra fem sex sju åtta. 2 | -------------------------------------------------------------------------------- /tests/data/lab/se10x016-08071999-1334_u0016003.lab: -------------------------------------------------------------------------------- 1 | Har du sett våra rara barnbarn som leker och busar ute i grannträdgården! 2 | -------------------------------------------------------------------------------- /tests/data/lab/se10x016-08071999-1334_u0016004.lab: -------------------------------------------------------------------------------- 1 | Vår husläkare börjar i och för sig bli gammal och skröplig, men han har ju ett sådant trevligt sätt! 2 | -------------------------------------------------------------------------------- /tests/data/lab/weird_words.lab: -------------------------------------------------------------------------------- 1 | i’m talking-ajfish me-really [me-really] [me'really] [me_??_really] asds-asda sdasd-me 2 | -------------------------------------------------------------------------------- /tests/data/lab/xsampa.lab: -------------------------------------------------------------------------------- 1 | @bUr\tOU {bstr\{kt {bSaIr\ Abr\utseIzi {br\@geItIN @bor\n {b3kr\Ambi {bI5s@`n Ar\g thr\Ip@5eI Ar\dvAr\k 2 | -------------------------------------------------------------------------------- /tests/data/lab/日本語.lab: -------------------------------------------------------------------------------- 1 | 「はい」、。! 『何 でしょう』 2 | -------------------------------------------------------------------------------- /tests/data/lm/test_lm.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/lm/test_lm.zip -------------------------------------------------------------------------------- /tests/data/textgrid/61-70968-0000.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 4.905 6 | 7 | 1 8 | "IntervalTier" 9 | "61" 10 | 0 11 | 4.905 12 | 1 13 | 0 14 | 4.905 15 | "HE BEGAN A CONFUSED COMPLAINT AGAINST THE WIZARD WHO HAD VANISHED BEHIND THE CURTAIN ON THE LEFT" 16 | -------------------------------------------------------------------------------- /tests/data/textgrid/michaelandsickmichael_short_tg.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 52.44082780612245 6 | 7 | 2 8 | "IntervalTier" 9 | "michael" 10 | 0 11 | 52.44082780612245 12 | 7 13 | 0 14 | 1.059222833923831 15 | "" 16 | 1.059222833923831 17 | 7.541483952089169 18 | "this is the acoustic corpus i'm talking pretty fast here there's nothing going else going on we're just yknow there's some speech errors but who 19 | cares" 20 | 7.541483952089169 21 | 8.016163828116456 22 | "" 23 | 8.016163828116456 24 | 17.207369573609213 25 | "um this is me talking really slow and slightly lower in intensity uh we're just saying some words" 26 | 17.207369573609213 27 | 18.35980726400338 28 | "" 29 | 18.35980726400338 30 | 25.251655700977985 31 | "and here's some more words words words words um and that should be all thanks" 32 | 25.251655700977985 33 | 52.44082780612245 34 | "" 35 | "IntervalTier" 36 | "sickmichael" 37 | 0 38 | 52.44080102040816 39 | 9 40 | 0 41 | 26.72325 42 | "" 43 | 26.72325 44 | 39.52854922648294 45 | "uh so this is the sick corpus uh i have a cold so i probably sound quite different than the uh uh acoustic corpus um the recording environment is also quite different" 46 | 39.52854922648294 47 | 40.20409920265843 48 | "" 49 | 40.20409920265843 50 | 43.81379465384285 51 | "and i'm saying a bunch of different words that i did not say in the original one" 52 | 43.81379465384285 53 | 44.480184007206404 54 | "" 55 | 44.480184007206404 56 | 45.08451636541159 57 | "uh" 58 | 45.08451636541159 59 | 46.37863407952624 60 | "" 61 | 46.37863407952624 62 | 51.457439118982556 63 | "and here's a long pause and i think this is probably good alright thanks" 64 | 51.457439118982556 65 | 52.44080102040816 66 | "" 67 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 4.1195 6 | 7 | 1 8 | "IntervalTier" 9 | "speaker_one" 10 | 0 11 | 4.1195 12 | 1 13 | 0 14 | 4.1195 15 | "i can't think of an animal that's less chad-like than a sloth" 16 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa_2.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 6.2271 6 | 7 | 1 8 | "IntervalTier" 9 | "speaker_one" 10 | 0 11 | 6.2271 12 | 1 13 | 0 14 | 6.2271 15 | "welcome to a series of platchat videos where we're gonna tackle every single team in the overwatch league twenty twenty" 16 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa_3.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 1.3062999999999994 6 | 7 | 1 8 | "IntervalTier" 9 | "speaker_one" 10 | 0 11 | 1.3062999999999994 12 | 1 13 | 0 14 | 1.3062999999999994 15 | "and run you through" 16 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa_4.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 3.296199999999999 6 | 7 | 1 8 | "IntervalTier" 9 | "speaker_one" 10 | 0 11 | 3.296199999999999 12 | 1 13 | 0 14 | 3.296199999999999 15 | "kinda our fears and also predictions for them" 16 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa_5.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 4.304 6 | 7 | 1 8 | "IntervalTier" 9 | "speaker_one" 10 | 0 11 | 4.304 12 | 1 13 | 0 14 | 4.304 15 | "i'm sideshow joined by custa and reinforce we've got a special edition of platchat" 16 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa_us.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 2.9013125 6 | tiers? 7 | size = 1 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "speaker_two" 12 | xmin = 0 13 | xmax = 2.9013125 14 | intervals: size = 1 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 2.9013125 18 | text = "uh with only like four games to go" 19 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa_us_2.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 2.411162499999989 6 | 7 | 1 8 | "IntervalTier" 9 | "speaker_two" 10 | 0 11 | 2.411162499999989 12 | 1 13 | 0 14 | 2.411162499999989 15 | "hey josh could have finished it he just decided to fail it instead" 16 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa_us_3.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 3.350999999999999 6 | 7 | 1 8 | "IntervalTier" 9 | "speaker_two" 10 | 0 11 | 3.350999999999999 12 | 1 13 | 0 14 | 3.350999999999999 15 | "really good performances against top teams that have ended up going their way" 16 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa_us_4.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 3.5188874999998916 6 | 7 | 1 8 | "IntervalTier" 9 | "speaker_two" 10 | 0 11 | 3.5188874999998916 12 | 1 13 | 0 14 | 3.5188874999998916 15 | "uh i i still think it's a very good team though in n a i think this is uh" 16 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa_us_5.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 4.656600000000026 6 | 7 | 1 8 | "IntervalTier" 9 | "speaker_two" 10 | 0 11 | 4.656600000000026 12 | 1 13 | 0 14 | 4.656600000000026 15 | "uh and this was the first time i think the justice really looked like an elite team" 16 | -------------------------------------------------------------------------------- /tests/data/textgrid/short_segments.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 1 6 | tiers? 7 | size = 1 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "talker" 12 | xmin = 0 13 | xmax = 1 14 | intervals: size = 7 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 0.16250605313552421 18 | text = "" 19 | intervals [2]: 20 | xmin = 0.16250605313552421 21 | xmax = 0.2837613633862341 22 | text = "blah" 23 | intervals [3]: 24 | xmin = 0.2837613633862341 25 | xmax = 0.43007610442209065 26 | text = "" 27 | intervals [4]: 28 | xmin = 0.43007610442209065 29 | xmax = 0.4389681605071427 30 | text = "ts" 31 | intervals [5]: 32 | xmin = 0.4389681605071427 33 | xmax = 0.6588444564284299 34 | text = "" 35 | intervals [6]: 36 | xmin = 0.6588444564284299 37 | xmax = 0.8480027404195374 38 | text = "blah2" 39 | intervals [7]: 40 | xmin = 0.8480027404195374 41 | xmax = 1 42 | text = "" 43 | -------------------------------------------------------------------------------- /tests/data/textgrid/vietnamese.TextGrid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/textgrid/vietnamese.TextGrid -------------------------------------------------------------------------------- /tests/data/tokenizer/test_tokenizer_model.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/tokenizer/test_tokenizer_model.zip -------------------------------------------------------------------------------- /tests/data/tokenizer/test_tokenizer_model_phonetisaurus.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/tokenizer/test_tokenizer_model_phonetisaurus.zip -------------------------------------------------------------------------------- /tests/data/wav/13697_11991_000000.opus: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/13697_11991_000000.opus -------------------------------------------------------------------------------- /tests/data/wav/61-70968-0000.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/61-70968-0000.flac -------------------------------------------------------------------------------- /tests/data/wav/acoustic_corpus.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/acoustic_corpus.wav -------------------------------------------------------------------------------- /tests/data/wav/cold_corpus.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/cold_corpus.wav -------------------------------------------------------------------------------- /tests/data/wav/cold_corpus3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/cold_corpus3.wav -------------------------------------------------------------------------------- /tests/data/wav/cold_corpus_24bit.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/cold_corpus_24bit.wav -------------------------------------------------------------------------------- /tests/data/wav/cold_corpus_32bit_float.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/cold_corpus_32bit_float.wav -------------------------------------------------------------------------------- /tests/data/wav/common_voice_en_22058264.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/common_voice_en_22058264.mp3 -------------------------------------------------------------------------------- /tests/data/wav/common_voice_en_22058266.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/common_voice_en_22058266.mp3 -------------------------------------------------------------------------------- /tests/data/wav/common_voice_en_22058267.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/common_voice_en_22058267.mp3 -------------------------------------------------------------------------------- /tests/data/wav/common_voice_ja_24511055.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/common_voice_ja_24511055.mp3 -------------------------------------------------------------------------------- /tests/data/wav/dummy.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/dummy.mp3 -------------------------------------------------------------------------------- /tests/data/wav/dummy.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/dummy.wav -------------------------------------------------------------------------------- /tests/data/wav/falsetto.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/falsetto.flac -------------------------------------------------------------------------------- /tests/data/wav/falsetto2.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/falsetto2.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_a.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_a.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_affectation.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_affectation.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_apex.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_apex.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_bottle.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_bottle.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_breaths.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_breaths.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_breathy.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_breathy.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_buddy.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_buddy.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_creaky.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_creaky.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_crossword.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_crossword.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_cutoff.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_cutoff.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_cutoffprogressive.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_cutoffprogressive.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_er.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_er.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_erpause.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_erpause.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_exaggerated.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_exaggerated.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_falsetto.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_falsetto.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_her.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_her.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_hes.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_hes.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_internalsil.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_internalsil.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_kmg.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_kmg.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_laughter.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_laughter.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_long.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_long.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_longstop.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_longstop.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_michael.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_michael.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_patty.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_patty.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_poofy.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_poofy.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_pooty.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_pooty.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_puddy.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_puddy.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_putty.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_putty.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_puttynorm.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_puttynorm.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_reallylong.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_reallylong.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_registershift.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_registershift.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_surround.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_surround.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_the.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_the.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_theapprox.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_theapprox.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_theinitialstop.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_theinitialstop.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_thenorm.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_thenorm.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_theother.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_theother.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_thestop.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_thestop.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_thez.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_thez.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_thoughts.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_thoughts.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_uh.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_uh.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_uhuh.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_uhuh.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_uhum.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_uhum.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_um.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_um.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_unk.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_unk.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_whatscalled.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_whatscalled.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_whisper.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_whisper.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_words.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_words.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_youknow.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_youknow.flac -------------------------------------------------------------------------------- /tests/data/wav/michaelandsickmichael.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/michaelandsickmichael.wav -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa.flac -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa_2.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa_2.flac -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa_3.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa_3.flac -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa_4.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa_4.flac -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa_5.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa_5.flac -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa_us.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa_us.flac -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa_us_2.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa_us_2.flac -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa_us_3.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa_us_3.flac -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa_us_4.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa_us_4.flac -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa_us_5.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa_us_5.flac -------------------------------------------------------------------------------- /tests/data/wav/se10x016-08071999-1334_u0016001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/se10x016-08071999-1334_u0016001.wav -------------------------------------------------------------------------------- /tests/data/wav/se10x016-08071999-1334_u0016002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/se10x016-08071999-1334_u0016002.wav -------------------------------------------------------------------------------- /tests/data/wav/se10x016-08071999-1334_u0016003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/se10x016-08071999-1334_u0016003.wav -------------------------------------------------------------------------------- /tests/data/wav/se10x016-08071999-1334_u0016004.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/se10x016-08071999-1334_u0016004.wav -------------------------------------------------------------------------------- /tests/data/wav/whisper.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/whisper.flac -------------------------------------------------------------------------------- /tests/data/wav/whisper2.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/whisper2.flac -------------------------------------------------------------------------------- /tests/test_abc.py: -------------------------------------------------------------------------------- 1 | from montreal_forced_aligner.abc import MfaWorker, TrainerMixin 2 | from montreal_forced_aligner.acoustic_modeling import SatTrainer, TrainableAligner 3 | from montreal_forced_aligner.alignment import AlignMixin 4 | 5 | 6 | def test_typing(basic_corpus_dir, basic_dict_path, temp_dir): 7 | am_trainer = TrainableAligner( 8 | corpus_directory=basic_corpus_dir, 9 | dictionary_path=basic_dict_path, 10 | ) 11 | trainer = SatTrainer(identifier="sat", worker=am_trainer) 12 | assert type(trainer).__name__ == "SatTrainer" 13 | assert isinstance(trainer, TrainerMixin) 14 | assert isinstance(trainer, AlignMixin) 15 | assert isinstance(trainer, MfaWorker) 16 | assert isinstance(am_trainer, MfaWorker) 17 | -------------------------------------------------------------------------------- /tests/test_commandline_adapt.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import click.testing 4 | 5 | from montreal_forced_aligner.command_line.mfa import mfa_cli 6 | 7 | 8 | def test_adapt_basic( 9 | basic_corpus_dir, 10 | generated_dir, 11 | english_dictionary, 12 | temp_dir, 13 | test_align_config, 14 | english_acoustic_model, 15 | db_setup, 16 | ): 17 | adapted_model_path = generated_dir.joinpath("basic_adapted.zip") 18 | command = [ 19 | "adapt", 20 | basic_corpus_dir, 21 | english_dictionary, 22 | english_acoustic_model, 23 | adapted_model_path, 24 | "--beam", 25 | "100", 26 | "--clean", 27 | "--no_debug", 28 | "-p", 29 | "test", 30 | ] 31 | command = [str(x) for x in command] 32 | result = click.testing.CliRunner().invoke( 33 | mfa_cli, command, catch_exceptions=True 34 | ) 35 | print(result.stdout) 36 | print(result.stderr) 37 | if result.exception: 38 | print(result.exc_info) 39 | raise result.exception 40 | assert os.path.exists(adapted_model_path) 41 | 42 | 43 | def test_adapt_multilingual( 44 | multilingual_ipa_corpus_dir, 45 | mfa_speaker_dict_path, 46 | generated_dir, 47 | temp_dir, 48 | basic_align_config_path, 49 | english_acoustic_model, 50 | english_mfa_acoustic_model, 51 | db_setup, 52 | ): 53 | adapted_model_path = generated_dir.joinpath("multilingual_adapted.zip") 54 | output_path = generated_dir.joinpath("multilingual_output") 55 | command = [ 56 | "adapt", 57 | multilingual_ipa_corpus_dir, 58 | mfa_speaker_dict_path, 59 | english_mfa_acoustic_model, 60 | adapted_model_path, 61 | output_path, 62 | "--config_path", 63 | basic_align_config_path, 64 | "-q", 65 | "--clean", 66 | "--no_debug", 67 | "-p", 68 | "test", 69 | ] 70 | command = [str(x) for x in command] 71 | result = click.testing.CliRunner().invoke( 72 | mfa_cli, command, catch_exceptions=True 73 | ) 74 | print(result.stdout) 75 | print(result.stderr) 76 | if result.exception: 77 | print(result.exc_info) 78 | raise result.exception 79 | assert os.path.exists(adapted_model_path) 80 | -------------------------------------------------------------------------------- /tests/test_commandline_configure.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import click.testing 4 | 5 | from montreal_forced_aligner import config 6 | from montreal_forced_aligner.command_line.mfa import mfa_cli 7 | 8 | 9 | def test_configure( 10 | temp_dir, 11 | basic_corpus_dir, 12 | generated_dir, 13 | english_dictionary, 14 | basic_align_config_path, 15 | english_acoustic_model, 16 | global_config, 17 | ): 18 | path = config.generate_config_path() 19 | if os.path.exists(path): 20 | os.remove(path) 21 | command = [ 22 | "configure", 23 | "--always_clean", 24 | "-t", 25 | temp_dir, 26 | "-j", 27 | "10", 28 | "--disable_mp", 29 | "--always_verbose", 30 | "-p", 31 | "test", 32 | ] 33 | command = [str(x) for x in command] 34 | click.testing.CliRunner().invoke(mfa_cli, command, catch_exceptions=False) 35 | assert os.path.exists(path) 36 | config.load_configuration() 37 | 38 | assert config.CURRENT_PROFILE_NAME == "test" 39 | assert config.NUM_JOBS == 10 40 | assert not config.USE_MP 41 | assert config.VERBOSE 42 | assert config.CLEAN 43 | 44 | command = ["configure", "--never_clean", "--enable_mp", "--never_verbose", "-p", "test"] 45 | click.testing.CliRunner().invoke(mfa_cli, command, catch_exceptions=False) 46 | 47 | assert os.path.exists(path) 48 | config.load_configuration() 49 | assert config.CURRENT_PROFILE_NAME == "test" 50 | assert config.USE_MP 51 | assert not config.VERBOSE 52 | assert not config.CLEAN 53 | 54 | config.CLEAN = True 55 | config.DEBUG = True 56 | config.VERBOSE = True 57 | config.USE_MP = False 58 | config.TEMPORARY_DIRECTORY = temp_dir 59 | -------------------------------------------------------------------------------- /tests/test_commandline_history.py: -------------------------------------------------------------------------------- 1 | import click.testing 2 | 3 | from montreal_forced_aligner.command_line.mfa import mfa_cli 4 | 5 | 6 | def test_mfa_history(): 7 | 8 | command = ["history", "--depth", "60"] 9 | result = click.testing.CliRunner().invoke( 10 | mfa_cli, command, catch_exceptions=True 11 | ) 12 | print(result.stdout) 13 | print(result.stderr) 14 | if result.exception: 15 | print(result.exc_info) 16 | raise result.exception 17 | assert not result.return_value 18 | 19 | command = ["history"] 20 | result = click.testing.CliRunner().invoke( 21 | mfa_cli, command, catch_exceptions=True 22 | ) 23 | print(result.stdout) 24 | print(result.stderr) 25 | if result.exception: 26 | print(result.exc_info) 27 | raise result.exception 28 | assert not result.return_value 29 | 30 | 31 | def test_mfa_history_verbose(): 32 | 33 | command = ["history", "-v", "--depth", "60"] 34 | result = click.testing.CliRunner().invoke( 35 | mfa_cli, command, catch_exceptions=True 36 | ) 37 | print(result.stdout) 38 | print(result.stderr) 39 | if result.exception: 40 | print(result.exc_info) 41 | raise result.exception 42 | assert not result.return_value 43 | 44 | command = ["history", "-v"] 45 | result = click.testing.CliRunner().invoke( 46 | mfa_cli, command, catch_exceptions=True 47 | ) 48 | print(result.stdout) 49 | print(result.stderr) 50 | if result.exception: 51 | print(result.exc_info) 52 | raise result.exception 53 | assert not result.return_value 54 | -------------------------------------------------------------------------------- /tests/test_commandline_train_dict.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import click.testing 4 | import sqlalchemy.orm 5 | 6 | from montreal_forced_aligner.command_line.mfa import mfa_cli 7 | 8 | 9 | def test_train_dict( 10 | basic_corpus_dir, 11 | english_dictionary, 12 | english_acoustic_model, 13 | generated_dir, 14 | temp_dir, 15 | basic_align_config_path, 16 | db_setup, 17 | ): 18 | output_path = generated_dir.joinpath("trained_dict") 19 | command = [ 20 | "train_dictionary", 21 | basic_corpus_dir, 22 | english_dictionary, 23 | english_acoustic_model, 24 | output_path, 25 | "-q", 26 | "--clean", 27 | "--debug", 28 | "--silence_probabilities", 29 | "--config_path", 30 | basic_align_config_path, 31 | "--use_mp", 32 | ] 33 | command = [str(x) for x in command] 34 | result = click.testing.CliRunner().invoke( 35 | mfa_cli, command, catch_exceptions=True 36 | ) 37 | print(result.stdout) 38 | print(result.stderr) 39 | if result.exception: 40 | print(result.exc_info) 41 | raise result.exception 42 | assert not result.return_value 43 | 44 | dict_path = os.path.join(output_path, "english_us_arpa.dict") 45 | assert os.path.exists(output_path) 46 | sqlalchemy.orm.close_all_sessions() 47 | textgrid_output = generated_dir.joinpath("trained_dict_output") 48 | command = [ 49 | "align", 50 | basic_corpus_dir, 51 | dict_path, 52 | english_acoustic_model, 53 | textgrid_output, 54 | "-q", 55 | "--clean", 56 | "--debug", 57 | "--config_path", 58 | basic_align_config_path, 59 | ] 60 | command = [str(x) for x in command] 61 | result = click.testing.CliRunner().invoke( 62 | mfa_cli, command, catch_exceptions=True 63 | ) 64 | print(result.stdout) 65 | print(result.stderr) 66 | if result.exception: 67 | print(result.exc_info) 68 | raise result.exception 69 | assert not result.return_value 70 | assert os.path.exists(textgrid_output) 71 | -------------------------------------------------------------------------------- /tests/test_commandline_train_ivector.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import click.testing 4 | 5 | from montreal_forced_aligner.command_line.mfa import mfa_cli 6 | 7 | 8 | def test_basic_ivector( 9 | basic_corpus_dir, 10 | generated_dir, 11 | temp_dir, 12 | train_ivector_config_path, 13 | ivector_output_model_path, 14 | db_setup, 15 | ): 16 | command = [ 17 | "train_ivector", 18 | basic_corpus_dir, 19 | ivector_output_model_path, 20 | "--config_path", 21 | train_ivector_config_path, 22 | "-q", 23 | "--clean", 24 | "--debug", 25 | ] 26 | command = [str(x) for x in command] 27 | result = click.testing.CliRunner().invoke( 28 | mfa_cli, command, catch_exceptions=True 29 | ) 30 | print(result.stdout) 31 | print(result.stderr) 32 | if result.exception: 33 | print(result.exc_info) 34 | raise result.exception 35 | assert not result.return_value 36 | assert os.path.exists(ivector_output_model_path) 37 | -------------------------------------------------------------------------------- /tests/test_gui.py: -------------------------------------------------------------------------------- 1 | from montreal_forced_aligner import config 2 | from montreal_forced_aligner.corpus.acoustic_corpus import AcousticCorpus 3 | 4 | 5 | def test_save_text_lab(basic_corpus_dir, generated_dir, db_setup): 6 | output_directory = generated_dir.joinpath("gui_tests") 7 | config.TEMPORARY_DIRECTORY = output_directory 8 | corpus = AcousticCorpus( 9 | corpus_directory=basic_corpus_dir, 10 | ) 11 | corpus._load_corpus() 12 | corpus.get_file(name="acoustic_corpus").save(corpus.corpus_directory) 13 | corpus.cleanup_connections() 14 | 15 | 16 | def test_file_properties( 17 | stereo_corpus_dir, 18 | generated_dir, 19 | db_setup, 20 | ): 21 | output_directory = generated_dir.joinpath("gui_tests") 22 | config.TEMPORARY_DIRECTORY = output_directory 23 | corpus = AcousticCorpus( 24 | corpus_directory=stereo_corpus_dir, 25 | ) 26 | corpus._load_corpus() 27 | file = corpus.get_file(name="michaelandsickmichael") 28 | assert file.sound_file.num_channels == 2 29 | assert file.num_speakers == 2 30 | assert file.num_utterances == 7 31 | x, y = file.sound_file.normalized_waveform() 32 | assert y.shape[0] == 2 33 | 34 | 35 | def test_flac_tg(flac_tg_corpus_dir, generated_dir, db_setup): 36 | output_directory = generated_dir.joinpath("gui_tests") 37 | config.TEMPORARY_DIRECTORY = output_directory 38 | corpus = AcousticCorpus( 39 | corpus_directory=flac_tg_corpus_dir, 40 | ) 41 | corpus._load_corpus() 42 | corpus.get_file(name="61-70968-0000").save(corpus.corpus_directory) 43 | corpus.cleanup_connections() 44 | -------------------------------------------------------------------------------- /tests/test_helper.py: -------------------------------------------------------------------------------- 1 | from montreal_forced_aligner.data import CtmInterval 2 | from montreal_forced_aligner.helper import align_phones, load_evaluation_mapping 3 | 4 | 5 | def test_align_phones(basic_corpus_dir, basic_dict_path, temp_dir, eval_mapping_path): 6 | mapping = load_evaluation_mapping(eval_mapping_path) 7 | reference_phoneset = set() 8 | for v in mapping.values(): 9 | if isinstance(v, str): 10 | reference_phoneset.add(v) 11 | else: 12 | reference_phoneset.update(v) 13 | 14 | reference_sequence = [ 15 | "HH", 16 | "IY0", 17 | "HH", 18 | "AE1", 19 | "D", 20 | "Y", 21 | "ER0", 22 | "G", 23 | "R", 24 | "IY1", 25 | "S", 26 | "IY0", 27 | "S", 28 | "UW1", 29 | "T", 30 | "IH0", 31 | "N", 32 | "D", 33 | "ER1", 34 | "T", 35 | "IY0", 36 | "W", 37 | "AA1", 38 | "SH", 39 | "W", 40 | "AO1", 41 | "T", 42 | "ER0", 43 | "AO1", 44 | "L", 45 | "sil", 46 | "Y", 47 | "IH1", 48 | "R", 49 | ] 50 | reference_sequence = [CtmInterval(i, i + 1, x) for i, x in enumerate(reference_sequence)] 51 | comparison_sequence = [ 52 | "ç", 53 | "i", 54 | "h", 55 | "æ", 56 | "d", 57 | "j", 58 | "ɚ", 59 | "ɟ", 60 | "ɹ", 61 | "iː", 62 | "s", 63 | "i", 64 | "s", 65 | "ʉː", 66 | "t", 67 | "sil", 68 | "ɪ", 69 | "n", 70 | "d", 71 | "ɝ", 72 | "ɾ", 73 | "i", 74 | "w", 75 | "ɑː", 76 | "ʃ", 77 | "w", 78 | "ɑː", 79 | "ɾ", 80 | "ɚ", 81 | "ɑː", 82 | "ɫ", 83 | "sil", 84 | "j", 85 | "ɪ", 86 | "ɹ", 87 | ] 88 | comparison_sequence = [CtmInterval(i, i + 1, x) for i, x in enumerate(comparison_sequence)] 89 | score, phone_errors, error_counts = align_phones( 90 | reference_sequence, 91 | comparison_sequence, 92 | silence_phone="sil", 93 | custom_mapping=mapping, 94 | debug=True, 95 | ) 96 | 97 | assert score < 1 98 | assert phone_errors < 1 99 | -------------------------------------------------------------------------------- /tests/test_segmentation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from montreal_forced_aligner.diarization.speaker_diarizer import FOUND_SPEECHBRAIN 4 | from montreal_forced_aligner.vad.segmenter import TranscriptionSegmenter 5 | 6 | 7 | def test_segment_transcript( 8 | basic_corpus_dir, 9 | english_mfa_acoustic_model, 10 | english_us_mfa_reduced_dict, 11 | generated_dir, 12 | temp_dir, 13 | basic_segment_config_path, 14 | db_setup, 15 | ): 16 | if not FOUND_SPEECHBRAIN: 17 | pytest.skip("SpeechBrain not installed") 18 | segmenter = TranscriptionSegmenter( 19 | corpus_directory=basic_corpus_dir, 20 | dictionary_path=english_us_mfa_reduced_dict, 21 | acoustic_model_path=english_mfa_acoustic_model, 22 | speechbrain=True, 23 | en_activation_th=0.4, 24 | en_deactivation_th=0.4, 25 | ) 26 | segmenter.setup() 27 | new_utterances = segmenter.segment_transcript(1) 28 | assert len(new_utterances) > 0 29 | segmenter.cleanup() 30 | --------------------------------------------------------------------------------