├── tests ├── __init__.py ├── data │ ├── configs │ │ ├── transcribe.yaml │ │ ├── mono_align.yaml │ │ ├── bad_align_config.yaml │ │ ├── acoustic │ │ │ ├── bad_topology.yaml │ │ │ ├── english_mfa_topology.yaml │ │ │ └── english_mfa_phone_groups.yaml │ │ ├── basic_align_config.yaml │ │ ├── basic_train_lm.yaml │ │ ├── test_rules.yaml │ │ ├── basic_segment_config.yaml │ │ ├── g2p_config.yaml │ │ ├── ivector_train.yaml │ │ ├── mono_train.yaml │ │ ├── xsampa_train.yaml │ │ ├── pron_train.yaml │ │ ├── different_punctuation_config.yaml │ │ ├── no_punctuation_config.yaml │ │ ├── train_g2p_config.yaml │ │ ├── tri_train.yaml │ │ ├── lda_train.yaml │ │ ├── out_of_order_config.yaml │ │ ├── pitch_tri_train.yaml │ │ ├── sat_train.yaml │ │ ├── english_arpa_remapping.yaml │ │ ├── test_groups.yaml │ │ ├── basic_train_config.yaml │ │ ├── lda_sat_train.yaml │ │ ├── basic_ipa_config.yaml │ │ ├── train_g2p_acoustic.yaml │ │ └── eval_mapping.yaml │ ├── lab │ │ ├── 日本語.lab │ │ ├── japanese.lab │ │ ├── common_voice_en_22058264.lab │ │ ├── common_voice_en_22058267.lab │ │ ├── devanagari.lab │ │ ├── common_voice_en_22058266.lab │ │ ├── multilingual_ipa_3.txt │ │ ├── common_voice_ja_24511055.lab │ │ ├── multilingual_ipa_us.txt │ │ ├── se10x016-08071999-1334_u0016001.lab │ │ ├── multilingual_ipa_4.txt │ │ ├── se10x016-08071999-1334_u0016002.lab │ │ ├── multilingual_ipa.txt │ │ ├── multilingual_ipa_us_2.txt │ │ ├── multilingual_ipa_us_4.txt │ │ ├── multilingual_ipa_us_3.txt │ │ ├── multilingual_ipa_5.txt │ │ ├── multilingual_ipa_us_5.txt │ │ ├── se10x016-08071999-1334_u0016003.lab │ │ ├── 61-70968-0000.lab │ │ ├── weird_words.lab │ │ ├── xsampa.lab │ │ ├── se10x016-08071999-1334_u0016004.lab │ │ ├── multilingual_ipa_2.txt │ │ ├── french_clitics.lab │ │ ├── punctuated.lab │ │ ├── 13697_11991_000000.lab │ │ ├── cold_corpus.lab │ │ ├── acoustic_corpus.lab │ │ ├── cold_corpus3.lab │ │ └── cold_corpus3_extra.lab │ ├── dictionaries │ │ ├── acoustic_g2p_dictionary.yaml │ │ ├── expected │ │ │ ├── graphemes.txt │ │ │ ├── phones │ │ │ │ ├── sets.int │ │ │ │ ├── roots.int │ │ │ │ ├── extra_questions.int │ │ │ │ ├── sets.txt │ │ │ │ ├── roots.txt │ │ │ │ ├── word_boundary.int │ │ │ │ ├── extra_questions.txt │ │ │ │ └── word_boundary.txt │ │ │ ├── words.txt │ │ │ ├── phone_map.txt │ │ │ ├── phones.txt │ │ │ ├── lexicon.text.fst │ │ │ └── topo │ │ ├── test_hindi.txt │ │ ├── test_abstract.txt │ │ ├── test_japanese.txt │ │ ├── test_extra_annotations.txt │ │ ├── test_frclitics.txt │ │ ├── test_acoustic.txt │ │ ├── test_basic.txt │ │ ├── test_mixed_format_dictionary.txt │ │ └── test_tabbed_dictionary.txt │ ├── wav │ │ ├── dummy.mp3 │ │ ├── dummy.wav │ │ ├── mfa_a.flac │ │ ├── mfa_er.flac │ │ ├── mfa_her.flac │ │ ├── mfa_hes.flac │ │ ├── mfa_kmg.flac │ │ ├── mfa_the.flac │ │ ├── mfa_uh.flac │ │ ├── mfa_um.flac │ │ ├── mfa_unk.flac │ │ ├── whisper.flac │ │ ├── falsetto.flac │ │ ├── falsetto2.flac │ │ ├── mfa_apex.flac │ │ ├── mfa_buddy.flac │ │ ├── mfa_long.flac │ │ ├── mfa_patty.flac │ │ ├── mfa_poofy.flac │ │ ├── mfa_pooty.flac │ │ ├── mfa_puddy.flac │ │ ├── mfa_putty.flac │ │ ├── mfa_thez.flac │ │ ├── mfa_uhuh.flac │ │ ├── mfa_uhum.flac │ │ ├── mfa_words.flac │ │ ├── whisper2.flac │ │ ├── cold_corpus.wav │ │ ├── cold_corpus3.wav │ │ ├── mfa_bottle.flac │ │ ├── mfa_breaths.flac │ │ ├── mfa_breathy.flac │ │ ├── mfa_creaky.flac │ │ ├── mfa_cutoff.flac │ │ ├── mfa_erpause.flac │ │ ├── mfa_falsetto.flac │ │ ├── mfa_laughter.flac │ │ ├── mfa_longstop.flac │ │ ├── mfa_michael.flac │ │ ├── mfa_surround.flac │ │ ├── mfa_thenorm.flac │ │ ├── mfa_theother.flac │ │ ├── mfa_thestop.flac │ │ ├── mfa_thoughts.flac │ │ ├── mfa_whisper.flac │ │ ├── mfa_youknow.flac │ │ ├── 61-70968-0000.flac │ │ ├── acoustic_corpus.wav │ │ ├── mfa_crossword.flac │ │ ├── mfa_puttynorm.flac │ │ ├── mfa_reallylong.flac │ │ ├── mfa_theapprox.flac │ │ ├── cold_corpus_24bit.wav │ │ ├── mfa_affectation.flac │ │ ├── mfa_exaggerated.flac │ │ ├── mfa_internalsil.flac │ │ ├── mfa_registershift.flac │ │ ├── mfa_whatscalled.flac │ │ ├── multilingual_ipa.flac │ │ ├── 13697_11991_000000.opus │ │ ├── mfa_theinitialstop.flac │ │ ├── multilingual_ipa_2.flac │ │ ├── multilingual_ipa_3.flac │ │ ├── multilingual_ipa_4.flac │ │ ├── multilingual_ipa_5.flac │ │ ├── multilingual_ipa_us.flac │ │ ├── cold_corpus_32bit_float.wav │ │ ├── mfa_cutoffprogressive.flac │ │ ├── michaelandsickmichael.wav │ │ ├── multilingual_ipa_us_2.flac │ │ ├── multilingual_ipa_us_3.flac │ │ ├── multilingual_ipa_us_4.flac │ │ ├── multilingual_ipa_us_5.flac │ │ ├── common_voice_en_22058264.mp3 │ │ ├── common_voice_en_22058266.mp3 │ │ ├── common_voice_en_22058267.mp3 │ │ ├── common_voice_ja_24511055.mp3 │ │ ├── se10x016-08071999-1334_u0016001.wav │ │ ├── se10x016-08071999-1334_u0016002.wav │ │ ├── se10x016-08071999-1334_u0016003.wav │ │ └── se10x016-08071999-1334_u0016004.wav │ ├── lm │ │ └── test_lm.zip │ ├── am │ │ ├── mono_model.zip │ │ └── acoustic_g2p_output_model.zip │ ├── textgrid │ │ ├── vietnamese.TextGrid │ │ ├── multilingual_ipa_3.TextGrid │ │ ├── multilingual_ipa.TextGrid │ │ ├── 61-70968-0000.TextGrid │ │ ├── multilingual_ipa_4.TextGrid │ │ ├── multilingual_ipa_5.TextGrid │ │ ├── multilingual_ipa_us_2.TextGrid │ │ ├── multilingual_ipa_us_3.TextGrid │ │ ├── multilingual_ipa_us_4.TextGrid │ │ ├── multilingual_ipa_2.TextGrid │ │ ├── multilingual_ipa_us_5.TextGrid │ │ ├── multilingual_ipa_us.TextGrid │ │ ├── short_segments.TextGrid │ │ └── michaelandsickmichael_short_tg.TextGrid │ └── tokenizer │ │ ├── test_tokenizer_model.zip │ │ └── test_tokenizer_model_phonetisaurus.zip ├── test_abc.py ├── test_segmentation.py ├── test_commandline_train_ivector.py ├── test_commandline_find_oovs.py ├── test_gui.py ├── test_commandline_history.py ├── test_commandline_configure.py ├── test_commandline_train_dict.py ├── test_commandline_remap.py └── test_commandline_adapt.py ├── montreal_forced_aligner ├── vad │ └── __init__.py ├── diarization │ └── __init__.py ├── online │ └── __init__.py ├── __main__.py ├── transcription │ └── __init__.py ├── tokenization │ ├── resources │ │ └── japanese │ │ │ ├── mfa_sudachi.dic │ │ │ ├── sudachi_config.json │ │ │ └── unk.def │ ├── __init__.py │ └── korean.py ├── ivector │ └── __init__.py ├── language_modeling │ └── __init__.py ├── validation │ └── __init__.py ├── dictionary │ └── __init__.py ├── g2p │ └── __init__.py ├── alignment │ └── __init__.py ├── corpus │ └── __init__.py ├── acoustic_modeling │ └── __init__.py ├── __init__.py └── command_line │ ├── history.py │ ├── anchor.py │ ├── server.py │ └── tokenize.py ├── setup.py ├── .dockerignore ├── docs └── source │ ├── _static │ ├── favicon.ico │ ├── MFA_poster_LSA2017.pdf │ ├── librispeech_textgrid.png │ ├── fonts │ │ ├── GentiumPlus-Bold.woff │ │ ├── GentiumPlus-Bold.woff2 │ │ ├── GentiumPlus-Italic.woff │ │ ├── GentiumPlus-Italic.woff2 │ │ ├── GentiumPlus-Regular.woff │ │ ├── GentiumPlus-Regular.woff2 │ │ ├── GentiumPlus-BoldItalic.woff │ │ └── GentiumPlus-BoldItalic.woff2 │ ├── sound_files │ │ ├── english_t.wav │ │ ├── english_fast.wav │ │ ├── english_slow.wav │ │ ├── japanese_fast.wav │ │ └── japanese_slow.wav │ ├── webfonts │ │ ├── fa-brands-400.eot │ │ ├── fa-brands-400.ttf │ │ ├── fa-brands-400.woff │ │ ├── fa-regular-400.eot │ │ ├── fa-regular-400.ttf │ │ ├── fa-solid-900.eot │ │ ├── fa-solid-900.ttf │ │ ├── fa-solid-900.woff │ │ ├── fa-solid-900.woff2 │ │ ├── fa-brands-400.woff2 │ │ ├── fa-regular-400.woff │ │ └── fa-regular-400.woff2 │ ├── MFA_paper_Interspeech2017.pdf │ ├── multiple_speakers_textgrid.png │ └── multiple_speakers_output_textgrid.png │ ├── _templates │ ├── autosummary │ │ ├── base.rst │ │ ├── method.rst │ │ ├── property.rst │ │ ├── attribute.rst │ │ ├── function.rst │ │ └── class.rst │ └── version.html │ ├── reference │ ├── alignment │ │ ├── index.rst │ │ ├── alignment.rst │ │ └── helper.rst │ ├── g2p │ │ ├── index.rst │ │ ├── generator.rst │ │ └── helper.rst │ ├── dictionary │ │ ├── index.rst │ │ ├── training.rst │ │ ├── main.rst │ │ └── helper.rst │ ├── transcription │ │ ├── main.rst │ │ ├── index.rst │ │ └── helper.rst │ ├── helper │ │ ├── index.rst │ │ ├── utils.rst │ │ ├── textgrid.rst │ │ ├── helper.rst │ │ ├── config.rst │ │ ├── data.rst │ │ ├── abc.rst │ │ └── exceptions.rst │ ├── segmentation │ │ ├── main.rst │ │ ├── helper.rst │ │ └── index.rst │ ├── validation │ │ ├── helper.rst │ │ ├── main.rst │ │ └── index.rst │ ├── diarization │ │ ├── main.rst │ │ ├── index.rst │ │ └── helper.rst │ ├── top_level_index.rst │ ├── ivector │ │ ├── index.rst │ │ ├── training.rst │ │ └── helper.rst │ ├── tokenization │ │ ├── training.rst │ │ ├── index.rst │ │ ├── tokenizer.rst │ │ └── helper.rst │ ├── server │ │ └── index.rst │ ├── language_modeling │ │ ├── index.rst │ │ ├── training.rst │ │ └── helper.rst │ ├── g2p_modeling │ │ ├── index.rst │ │ ├── helper.rst │ │ └── training.rst │ ├── core_index.rst │ ├── acoustic_modeling │ │ ├── training.rst │ │ └── index.rst │ └── database │ │ └── index.rst │ ├── user_guide │ ├── implementations │ │ ├── index.md │ │ ├── fine_tune.md │ │ ├── phone_models.md │ │ └── alignment_evaluation.md │ ├── concepts │ │ ├── features.md │ │ ├── index.rst │ │ └── fst.md │ ├── dictionary_validation.rst │ ├── corpus_creation │ │ ├── tokenize.rst │ │ ├── train_tokenizer.rst │ │ ├── train_ivector.rst │ │ ├── training_lm.rst │ │ ├── index.rst │ │ ├── anchor.rst │ │ └── create_segments.rst │ ├── configuration │ │ ├── acoustic_model_adapt.rst │ │ ├── segment.rst │ │ ├── diarization.rst │ │ ├── lm.rst │ │ └── transcription.rst │ ├── workflows │ │ ├── finding_oovs.rst │ │ ├── index.rst │ │ └── adapt_acoustic_model.rst │ ├── models │ │ └── index.rst │ └── data_validation.rst │ ├── changelog │ ├── changelog_3.2.rst │ ├── changelog_3.1.rst │ └── changelog_2.1.rst │ ├── getting_started.rst │ ├── first_steps │ └── tutorials.rst │ └── index.rst ├── .deepsource.toml ├── .readthedocs.yaml ├── requirements.txt ├── Dockerfile ├── .github ├── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md └── workflows │ ├── publish.yml │ ├── publish_docker.yml │ └── main.yml ├── rtd_environment.yml ├── ci └── docker_environment.yaml ├── LICENSE ├── environment.yml ├── github_environment.yml ├── .gitignore ├── .pre-commit-config.yaml ├── bin └── mfa_update └── pyproject.toml /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/data/configs/transcribe.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /montreal_forced_aligner/vad/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/data/lab/日本語.lab: -------------------------------------------------------------------------------- 1 | 「はい」、。! 『何 でしょう』 2 | -------------------------------------------------------------------------------- /montreal_forced_aligner/diarization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/data/lab/japanese.lab: -------------------------------------------------------------------------------- 1 | 「はい」、。! 『何 でしょう』 2 | -------------------------------------------------------------------------------- /tests/data/lab/common_voice_en_22058264.lab: -------------------------------------------------------------------------------- 1 | no 2 | -------------------------------------------------------------------------------- /tests/data/lab/common_voice_en_22058267.lab: -------------------------------------------------------------------------------- 1 | six 2 | -------------------------------------------------------------------------------- /tests/data/lab/devanagari.lab: -------------------------------------------------------------------------------- 1 | हैंः हूं हौंसला 2 | -------------------------------------------------------------------------------- /tests/data/lab/common_voice_en_22058266.lab: -------------------------------------------------------------------------------- 1 | Fire fox 2 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa_3.txt: -------------------------------------------------------------------------------- 1 | and run you through 2 | -------------------------------------------------------------------------------- /tests/data/configs/mono_align.yaml: -------------------------------------------------------------------------------- 1 | beam: 100 2 | retry_beam: 400 3 | -------------------------------------------------------------------------------- /tests/data/configs/bad_align_config.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 10 3 | -------------------------------------------------------------------------------- /tests/data/lab/common_voice_ja_24511055.lab: -------------------------------------------------------------------------------- 1 | 真っ昼間なのにキャンプの外れの電柱に電球がともっていた 2 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa_us.txt: -------------------------------------------------------------------------------- 1 | uh with only like four games to go 2 | -------------------------------------------------------------------------------- /tests/data/dictionaries/acoustic_g2p_dictionary.yaml: -------------------------------------------------------------------------------- 1 | default: english_us_mfa 2 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/graphemes.txt: -------------------------------------------------------------------------------- 1 | a 2 | b 3 | d 4 | o 5 | r 6 | w 7 | -------------------------------------------------------------------------------- /tests/data/lab/se10x016-08071999-1334_u0016001.lab: -------------------------------------------------------------------------------- 1 | tyst under denna inspelning 2 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa_4.txt: -------------------------------------------------------------------------------- 1 | kinda our fears and also predictions for them 2 | -------------------------------------------------------------------------------- /montreal_forced_aligner/online/__init__.py: -------------------------------------------------------------------------------- 1 | """Module for running MFA in online mode""" 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools_scm # noqa 2 | from setuptools import setup 3 | 4 | setup() 5 | -------------------------------------------------------------------------------- /tests/data/dictionaries/test_hindi.txt: -------------------------------------------------------------------------------- 1 | हैं ɦ ɛ̃ː 2 | हूं ɦ ũː 3 | हौंसला ɦ ɔ̃ː s̪ l̪ aː 4 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | 2 | docs/* 3 | build/* 4 | .tox/* 5 | .github/* 6 | .pytest_cache/* 7 | tests/* 8 | -------------------------------------------------------------------------------- /tests/data/lab/se10x016-08071999-1334_u0016002.lab: -------------------------------------------------------------------------------- 1 | Testar en två tre fyra fem sex sju åtta. 2 | -------------------------------------------------------------------------------- /tests/data/configs/acoustic/bad_topology.yaml: -------------------------------------------------------------------------------- 1 | b: 2 | max_states: 3 3 | bʲ: 4 | max_states: 1 5 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa.txt: -------------------------------------------------------------------------------- 1 | i can't think of an animal that's less chad like than a sloth 2 | -------------------------------------------------------------------------------- /tests/data/dictionaries/test_abstract.txt: -------------------------------------------------------------------------------- 1 | worda phonea phoneb 2 | wordb phonea phonec 3 | wordc phonec 4 | -------------------------------------------------------------------------------- /tests/data/dictionaries/test_japanese.txt: -------------------------------------------------------------------------------- 1 | はい h a i 2 | はい h aː 3 | 何 n a ɴ 4 | 何 n a ɲ i 5 | でしょう d e ɕ oː 6 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa_us_2.txt: -------------------------------------------------------------------------------- 1 | hey josh could have finished it he just decided to fail it instead 2 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa_us_4.txt: -------------------------------------------------------------------------------- 1 | uh i i still think it's a very good team though in n a i think this is uh 2 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa_us_3.txt: -------------------------------------------------------------------------------- 1 | really good performances against top teams that have ended up going their way 2 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phones/sets.int: -------------------------------------------------------------------------------- 1 | 1 2 3 4 5 2 | 6 7 8 9 10 3 | 11 12 13 14 4 | 15 16 17 18 5 | 19 20 21 22 6 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa_5.txt: -------------------------------------------------------------------------------- 1 | i'm sideshow joined by custer and reinforce we've got a special edition of plat chat 2 | -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa_us_5.txt: -------------------------------------------------------------------------------- 1 | uh and this was the first time i think the justice really looked like an elite team 2 | -------------------------------------------------------------------------------- /tests/data/lab/se10x016-08071999-1334_u0016003.lab: -------------------------------------------------------------------------------- 1 | Har du sett våra rara barnbarn som leker och busar ute i grannträdgården! 2 | -------------------------------------------------------------------------------- /tests/data/wav/dummy.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/dummy.mp3 -------------------------------------------------------------------------------- /tests/data/wav/dummy.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/dummy.wav -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/words.txt: -------------------------------------------------------------------------------- 1 | 0 2 | !SIL 1 3 | 2 4 | worda 3 5 | wordb 4 6 | #0 5 7 | 6 8 | 7 9 | -------------------------------------------------------------------------------- /tests/data/lab/61-70968-0000.lab: -------------------------------------------------------------------------------- 1 | HE BEGAN A CONFUSED COMPLAINT AGAINST THE WIZARD WHO HAD VANISHED BEHIND THE CURTAIN ON THE LEFT 2 | -------------------------------------------------------------------------------- /tests/data/lab/weird_words.lab: -------------------------------------------------------------------------------- 1 | i’m talking-ajfish me-really [me-really] [me'really] [me_??_really] asds-asda sdasd-me 2 | -------------------------------------------------------------------------------- /tests/data/lab/xsampa.lab: -------------------------------------------------------------------------------- 1 | @bUr\tOU {bstr\{kt {bSaIr\ Abr\utseIzi {br\@geItIN @bor\n {b3kr\Ambi {bI5s@`n Ar\g thr\Ip@5eI Ar\dvAr\k 2 | -------------------------------------------------------------------------------- /tests/data/lm/test_lm.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/lm/test_lm.zip -------------------------------------------------------------------------------- /tests/data/wav/mfa_a.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_a.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_er.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_er.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_her.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_her.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_hes.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_hes.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_kmg.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_kmg.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_the.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_the.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_uh.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_uh.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_um.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_um.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_unk.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_unk.flac -------------------------------------------------------------------------------- /tests/data/wav/whisper.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/whisper.flac -------------------------------------------------------------------------------- /tests/data/am/mono_model.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/am/mono_model.zip -------------------------------------------------------------------------------- /tests/data/configs/basic_align_config.yaml: -------------------------------------------------------------------------------- 1 | beam: 100 2 | retry_beam: 400 3 | 4 | features: 5 | type: "mfcc" 6 | frame_shift: 10 7 | -------------------------------------------------------------------------------- /tests/data/wav/falsetto.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/falsetto.flac -------------------------------------------------------------------------------- /tests/data/wav/falsetto2.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/falsetto2.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_apex.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_apex.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_buddy.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_buddy.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_long.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_long.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_patty.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_patty.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_poofy.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_poofy.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_pooty.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_pooty.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_puddy.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_puddy.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_putty.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_putty.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_thez.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_thez.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_uhuh.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_uhuh.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_uhum.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_uhum.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_words.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_words.flac -------------------------------------------------------------------------------- /tests/data/wav/whisper2.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/whisper2.flac -------------------------------------------------------------------------------- /docs/source/_static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/favicon.ico -------------------------------------------------------------------------------- /tests/data/configs/basic_train_lm.yaml: -------------------------------------------------------------------------------- 1 | order: 3 2 | method: kneser_ney 3 | prune_thresh_small: 0.0000003 4 | prune_thresh_medium: 0.0000001 5 | -------------------------------------------------------------------------------- /tests/data/wav/cold_corpus.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/cold_corpus.wav -------------------------------------------------------------------------------- /tests/data/wav/cold_corpus3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/cold_corpus3.wav -------------------------------------------------------------------------------- /tests/data/wav/mfa_bottle.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_bottle.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_breaths.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_breaths.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_breathy.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_breathy.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_creaky.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_creaky.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_cutoff.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_cutoff.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_erpause.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_erpause.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_falsetto.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_falsetto.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_laughter.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_laughter.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_longstop.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_longstop.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_michael.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_michael.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_surround.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_surround.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_thenorm.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_thenorm.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_theother.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_theother.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_thestop.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_thestop.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_thoughts.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_thoughts.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_whisper.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_whisper.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_youknow.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_youknow.flac -------------------------------------------------------------------------------- /tests/data/wav/61-70968-0000.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/61-70968-0000.flac -------------------------------------------------------------------------------- /tests/data/wav/acoustic_corpus.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/acoustic_corpus.wav -------------------------------------------------------------------------------- /tests/data/wav/mfa_crossword.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_crossword.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_puttynorm.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_puttynorm.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_reallylong.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_reallylong.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_theapprox.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_theapprox.flac -------------------------------------------------------------------------------- /tests/data/configs/test_rules.yaml: -------------------------------------------------------------------------------- 1 | rules: 2 | - following_context: '' 3 | preceding_context: '' 4 | replacement: ih 5 | segment: iy 6 | -------------------------------------------------------------------------------- /tests/data/wav/cold_corpus_24bit.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/cold_corpus_24bit.wav -------------------------------------------------------------------------------- /tests/data/wav/mfa_affectation.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_affectation.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_exaggerated.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_exaggerated.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_internalsil.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_internalsil.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_registershift.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_registershift.flac -------------------------------------------------------------------------------- /tests/data/wav/mfa_whatscalled.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_whatscalled.flac -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/multilingual_ipa.flac -------------------------------------------------------------------------------- /tests/data/configs/basic_segment_config.yaml: -------------------------------------------------------------------------------- 1 | 2 | energy_threshold: 9 3 | energy_mean_scale: 0.5 4 | max_segment_length: 5 5 | min_pause_duration: 0.25 6 | -------------------------------------------------------------------------------- /tests/data/lab/se10x016-08071999-1334_u0016004.lab: -------------------------------------------------------------------------------- 1 | Vår husläkare börjar i och för sig bli gammal och skröplig, men han har ju ett sådant trevligt sätt! 2 | -------------------------------------------------------------------------------- /tests/data/textgrid/vietnamese.TextGrid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/textgrid/vietnamese.TextGrid -------------------------------------------------------------------------------- /tests/data/wav/13697_11991_000000.opus: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/13697_11991_000000.opus -------------------------------------------------------------------------------- /tests/data/wav/mfa_theinitialstop.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_theinitialstop.flac -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa_2.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/multilingual_ipa_2.flac -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa_3.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/multilingual_ipa_3.flac -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa_4.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/multilingual_ipa_4.flac -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa_5.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/multilingual_ipa_5.flac -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa_us.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/multilingual_ipa_us.flac -------------------------------------------------------------------------------- /docs/source/_static/MFA_poster_LSA2017.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/MFA_poster_LSA2017.pdf -------------------------------------------------------------------------------- /tests/data/lab/multilingual_ipa_2.txt: -------------------------------------------------------------------------------- 1 | welcome to a series of plat chat videos where we're gonna tackle every single team in the overwatch league twenty twenty 2 | -------------------------------------------------------------------------------- /tests/data/wav/cold_corpus_32bit_float.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/cold_corpus_32bit_float.wav -------------------------------------------------------------------------------- /tests/data/wav/mfa_cutoffprogressive.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/mfa_cutoffprogressive.flac -------------------------------------------------------------------------------- /tests/data/wav/michaelandsickmichael.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/michaelandsickmichael.wav -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa_us_2.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/multilingual_ipa_us_2.flac -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa_us_3.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/multilingual_ipa_us_3.flac -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa_us_4.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/multilingual_ipa_us_4.flac -------------------------------------------------------------------------------- /tests/data/wav/multilingual_ipa_us_5.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/multilingual_ipa_us_5.flac -------------------------------------------------------------------------------- /docs/source/_static/librispeech_textgrid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/librispeech_textgrid.png -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/base.rst: -------------------------------------------------------------------------------- 1 | {{ objname | escape | underline}} 2 | 3 | .. currentmodule:: {{ module }} 4 | 5 | .. auto{{ objtype }}:: {{ objname }} 6 | -------------------------------------------------------------------------------- /tests/data/am/acoustic_g2p_output_model.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/am/acoustic_g2p_output_model.zip -------------------------------------------------------------------------------- /tests/data/wav/common_voice_en_22058264.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/common_voice_en_22058264.mp3 -------------------------------------------------------------------------------- /tests/data/wav/common_voice_en_22058266.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/common_voice_en_22058266.mp3 -------------------------------------------------------------------------------- /tests/data/wav/common_voice_en_22058267.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/common_voice_en_22058267.mp3 -------------------------------------------------------------------------------- /tests/data/wav/common_voice_ja_24511055.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/common_voice_ja_24511055.mp3 -------------------------------------------------------------------------------- /docs/source/_static/fonts/GentiumPlus-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/fonts/GentiumPlus-Bold.woff -------------------------------------------------------------------------------- /docs/source/_static/sound_files/english_t.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/sound_files/english_t.wav -------------------------------------------------------------------------------- /docs/source/_static/webfonts/fa-brands-400.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/webfonts/fa-brands-400.eot -------------------------------------------------------------------------------- /docs/source/_static/webfonts/fa-brands-400.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/webfonts/fa-brands-400.ttf -------------------------------------------------------------------------------- /docs/source/_static/webfonts/fa-brands-400.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/webfonts/fa-brands-400.woff -------------------------------------------------------------------------------- /docs/source/_static/webfonts/fa-regular-400.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/webfonts/fa-regular-400.eot -------------------------------------------------------------------------------- /docs/source/_static/webfonts/fa-regular-400.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/webfonts/fa-regular-400.ttf -------------------------------------------------------------------------------- /docs/source/_static/webfonts/fa-solid-900.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/webfonts/fa-solid-900.eot -------------------------------------------------------------------------------- /docs/source/_static/webfonts/fa-solid-900.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/webfonts/fa-solid-900.ttf -------------------------------------------------------------------------------- /docs/source/_static/webfonts/fa-solid-900.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/webfonts/fa-solid-900.woff -------------------------------------------------------------------------------- /docs/source/_static/webfonts/fa-solid-900.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/webfonts/fa-solid-900.woff2 -------------------------------------------------------------------------------- /docs/source/reference/alignment/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _alignment_api: 3 | 4 | Alignment 5 | ========= 6 | 7 | .. toctree:: 8 | 9 | alignment 10 | helper 11 | -------------------------------------------------------------------------------- /tests/data/lab/french_clitics.lab: -------------------------------------------------------------------------------- 1 | aujourd aujourd'hui m'appelle purple-people-eater vingt-six m'm'appelle c'est m'c'est m'appele m'ving-sic flying'purple-people-eater 2 | -------------------------------------------------------------------------------- /tests/data/tokenizer/test_tokenizer_model.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/tokenizer/test_tokenizer_model.zip -------------------------------------------------------------------------------- /docs/source/_static/MFA_paper_Interspeech2017.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/MFA_paper_Interspeech2017.pdf -------------------------------------------------------------------------------- /docs/source/_static/fonts/GentiumPlus-Bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/fonts/GentiumPlus-Bold.woff2 -------------------------------------------------------------------------------- /docs/source/_static/fonts/GentiumPlus-Italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/fonts/GentiumPlus-Italic.woff -------------------------------------------------------------------------------- /docs/source/_static/sound_files/english_fast.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/sound_files/english_fast.wav -------------------------------------------------------------------------------- /docs/source/_static/sound_files/english_slow.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/sound_files/english_slow.wav -------------------------------------------------------------------------------- /docs/source/_static/sound_files/japanese_fast.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/sound_files/japanese_fast.wav -------------------------------------------------------------------------------- /docs/source/_static/sound_files/japanese_slow.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/sound_files/japanese_slow.wav -------------------------------------------------------------------------------- /docs/source/_static/webfonts/fa-brands-400.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/webfonts/fa-brands-400.woff2 -------------------------------------------------------------------------------- /docs/source/_static/webfonts/fa-regular-400.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/webfonts/fa-regular-400.woff -------------------------------------------------------------------------------- /docs/source/_static/webfonts/fa-regular-400.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/webfonts/fa-regular-400.woff2 -------------------------------------------------------------------------------- /tests/data/dictionaries/test_extra_annotations.txt: -------------------------------------------------------------------------------- 1 | worda phonea phoneb 2 | wordb phonea phonec 3 | wordc phonec 4 | {LG} laugh 5 | {SL} sil 6 | sil 7 | {VN} vocnoise 8 | -------------------------------------------------------------------------------- /docs/source/_static/fonts/GentiumPlus-Italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/fonts/GentiumPlus-Italic.woff2 -------------------------------------------------------------------------------- /docs/source/_static/fonts/GentiumPlus-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/fonts/GentiumPlus-Regular.woff -------------------------------------------------------------------------------- /docs/source/_static/fonts/GentiumPlus-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/fonts/GentiumPlus-Regular.woff2 -------------------------------------------------------------------------------- /docs/source/_static/multiple_speakers_textgrid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/multiple_speakers_textgrid.png -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/method.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | {{ fullname }} 4 | {{ underline }} 5 | 6 | .. currentmodule:: {{ module }} 7 | 8 | .. automethod:: {{ objname }} 9 | -------------------------------------------------------------------------------- /tests/data/configs/g2p_config.yaml: -------------------------------------------------------------------------------- 1 | punctuation: "、。।,@<>\"(),.:;¿?¡!\\&%#*~【】,…‥「」『』〝〟″⟨⟩♪・‹›«»~′$+=" 2 | clitic_markers: "'’" 3 | compound_markers: "-" 4 | num_pronunciations: 1 5 | -------------------------------------------------------------------------------- /tests/data/wav/se10x016-08071999-1334_u0016001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/se10x016-08071999-1334_u0016001.wav -------------------------------------------------------------------------------- /tests/data/wav/se10x016-08071999-1334_u0016002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/se10x016-08071999-1334_u0016002.wav -------------------------------------------------------------------------------- /tests/data/wav/se10x016-08071999-1334_u0016003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/se10x016-08071999-1334_u0016003.wav -------------------------------------------------------------------------------- /tests/data/wav/se10x016-08071999-1334_u0016004.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/wav/se10x016-08071999-1334_u0016004.wav -------------------------------------------------------------------------------- /docs/source/_static/fonts/GentiumPlus-BoldItalic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/fonts/GentiumPlus-BoldItalic.woff -------------------------------------------------------------------------------- /docs/source/_static/fonts/GentiumPlus-BoldItalic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/fonts/GentiumPlus-BoldItalic.woff2 -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/property.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | {{ fullname }} 4 | {{ underline }} 5 | 6 | .. currentmodule:: {{ module }} 7 | 8 | .. autoproperty:: {{ objname }} 9 | -------------------------------------------------------------------------------- /tests/data/lab/punctuated.lab: -------------------------------------------------------------------------------- 1 | oh yes, they - they, you know, they love her' and so' 'something 'i mean... ‘you The village name is Anglo Saxon in origin, and means 'Myrsa's woodland'. 2 | -------------------------------------------------------------------------------- /docs/source/_static/multiple_speakers_output_textgrid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/docs/source/_static/multiple_speakers_output_textgrid.png -------------------------------------------------------------------------------- /montreal_forced_aligner/__main__.py: -------------------------------------------------------------------------------- 1 | from rich.traceback import install 2 | 3 | from montreal_forced_aligner.command_line.mfa import mfa_cli 4 | 5 | install(show_locals=True) 6 | mfa_cli() 7 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phones/roots.int: -------------------------------------------------------------------------------- 1 | shared split 1 2 3 4 5 2 | shared split 6 7 8 9 10 3 | shared split 11 12 13 14 4 | shared split 15 16 17 18 5 | shared split 19 20 21 22 6 | -------------------------------------------------------------------------------- /docs/source/reference/g2p/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _g2p_generate_api: 3 | 4 | Generating dictionaries 5 | ======================= 6 | 7 | .. toctree:: 8 | 9 | generator 10 | helper 11 | -------------------------------------------------------------------------------- /tests/data/tokenizer/test_tokenizer_model_phonetisaurus.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/tests/data/tokenizer/test_tokenizer_model_phonetisaurus.zip -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/attribute.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | {{ fullname }} 4 | {{ underline }} 5 | 6 | .. currentmodule:: {{ module }} 7 | 8 | .. autoattribute:: {{ objname }} 9 | :type: 10 | -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/function.rst: -------------------------------------------------------------------------------- 1 | :html_theme.sidebar_secondary.remove: 2 | 3 | {{ objname }} 4 | {{ underline }} 5 | 6 | .. currentmodule:: {{ module }} 7 | 8 | .. autofunction:: {{ objname }} 9 | -------------------------------------------------------------------------------- /montreal_forced_aligner/transcription/__init__.py: -------------------------------------------------------------------------------- 1 | """Transcription module for MFA""" 2 | from montreal_forced_aligner.transcription.transcriber import Transcriber 3 | 4 | __all__ = ["Transcriber", "transcriber"] 5 | -------------------------------------------------------------------------------- /montreal_forced_aligner/tokenization/resources/japanese/mfa_sudachi.dic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/HEAD/montreal_forced_aligner/tokenization/resources/japanese/mfa_sudachi.dic -------------------------------------------------------------------------------- /docs/source/reference/dictionary/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _dictionary_training_api: 3 | 4 | Pronunciation dictionaries 5 | ========================== 6 | 7 | .. toctree:: 8 | 9 | main 10 | helper 11 | training 12 | -------------------------------------------------------------------------------- /docs/source/reference/transcription/main.rst: -------------------------------------------------------------------------------- 1 | Transcriber 2 | =========== 3 | 4 | .. currentmodule:: montreal_forced_aligner.transcription 5 | 6 | .. autosummary:: 7 | :toctree: generated/ 8 | 9 | Transcriber 10 | -------------------------------------------------------------------------------- /tests/data/dictionaries/test_frclitics.txt: -------------------------------------------------------------------------------- 1 | aujourd'hui o zh u r d w i 2 | c'est s e 3 | est e 4 | c' s 5 | c s e 6 | m' m 7 | m 3 m 8 | appelle a p 3 l 9 | vingt-cinq v ae~ s ae~ k 10 | vingt v ae~ 11 | six s i s 12 | -------------------------------------------------------------------------------- /.deepsource.toml: -------------------------------------------------------------------------------- 1 | version = 1 2 | 3 | test_patterns = ["tests/**"] 4 | 5 | [[analyzers]] 6 | name = "python" 7 | enabled = true 8 | 9 | [analyzers.meta] 10 | runtime_version = "3.x.x" 11 | max_line_length = 120 12 | -------------------------------------------------------------------------------- /docs/source/reference/helper/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _helper_api: 3 | 4 | Helper 5 | ====== 6 | 7 | .. toctree:: 8 | 9 | abc 10 | config 11 | data 12 | exceptions 13 | helper 14 | textgrid 15 | utils 16 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phones/extra_questions.int: -------------------------------------------------------------------------------- 1 | 1 2 3 4 5 6 7 8 9 10 2 | 11 12 13 14 15 16 17 18 19 20 21 22 3 | 11 15 19 4 | 12 16 20 5 | 13 17 21 6 | 14 18 22 7 | 1 6 8 | 2 7 9 | 3 8 10 | 4 9 11 | 5 10 12 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phones/sets.txt: -------------------------------------------------------------------------------- 1 | sil sil_B sil_E sil_I sil_S 2 | spn spn_B spn_E spn_I spn_S 3 | phonea_B phonea_E phonea_I phonea_S 4 | phoneb_B phoneb_E phoneb_I phoneb_S 5 | phonec_B phonec_E phonec_I phonec_S 6 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: "ubuntu-20.04" 5 | tools: 6 | python: "mambaforge-4.10" 7 | 8 | sphinx: 9 | configuration: docs/source/conf.py 10 | 11 | conda: 12 | environment: rtd_environment.yml 13 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | praatio>=6.0.0 2 | tqdm 3 | pyyaml 4 | librosa 5 | numpy 6 | scipy 7 | scikit-learn 8 | requests 9 | dataclassy 10 | sqlalchemy>=2.0 11 | click 12 | rich 13 | rich-click 14 | numpy 15 | pynini 16 | huggingface_hub 17 | -------------------------------------------------------------------------------- /docs/source/reference/segmentation/main.rst: -------------------------------------------------------------------------------- 1 | 2 | Segmenter 3 | ========= 4 | 5 | .. currentmodule:: montreal_forced_aligner.vad.segmenter 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | VadSegmenter 11 | TranscriptionSegmenter 12 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phone_map.txt: -------------------------------------------------------------------------------- 1 | sil sil sil_B sil_E sil_I sil_S 2 | spn spn spn_B spn_E spn_I spn_S 3 | phoneb phoneb_B phoneb_E phoneb_I phoneb_S 4 | phonea phonea_B phonea_E phonea_I phonea_S 5 | phonec phonec_B phonec_E phonec_I phonec_S 6 | -------------------------------------------------------------------------------- /docs/source/reference/validation/helper.rst: -------------------------------------------------------------------------------- 1 | Helper functions 2 | ================ 3 | 4 | Mixins 5 | ------ 6 | 7 | .. currentmodule:: montreal_forced_aligner.validation 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | ValidationMixin 13 | -------------------------------------------------------------------------------- /docs/source/reference/diarization/main.rst: -------------------------------------------------------------------------------- 1 | 2 | Speaker Diarization 3 | =================== 4 | 5 | .. currentmodule:: montreal_forced_aligner.diarization.speaker_diarizer 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | SpeakerDiarizer 11 | -------------------------------------------------------------------------------- /docs/source/reference/top_level_index.rst: -------------------------------------------------------------------------------- 1 | Workflows 2 | ========= 3 | 4 | .. toctree:: 5 | 6 | alignment/index 7 | validation/index 8 | g2p/index 9 | transcription/index 10 | segmentation/index 11 | diarization/index 12 | tokenization/index 13 | -------------------------------------------------------------------------------- /docs/source/reference/validation/main.rst: -------------------------------------------------------------------------------- 1 | Validators 2 | ========== 3 | 4 | .. currentmodule:: montreal_forced_aligner.validation 5 | 6 | .. autosummary:: 7 | :toctree: generated/ 8 | 9 | TrainingValidator 10 | PretrainedValidator 11 | DictionaryValidator 12 | -------------------------------------------------------------------------------- /tests/data/lab/13697_11991_000000.lab: -------------------------------------------------------------------------------- 1 | la sorpresa y el disgusto que produjo a wentworth la substitución de una cuñada por otra el gesto que se dibujó en su fisonomía el asombro que manifestó y las palabras a duras penas reprimidas que asomaron a sus labios mientras le hablaba carlos 2 | -------------------------------------------------------------------------------- /docs/source/reference/helper/utils.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: montreal_forced_aligner.utils 2 | 3 | .. autosummary:: 4 | :toctree: generated/ 5 | 6 | Counter 7 | run_kaldi_function 8 | thirdparty_binary 9 | log_kaldi_errors 10 | parse_logs 11 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phones/roots.txt: -------------------------------------------------------------------------------- 1 | shared split sil sil_B sil_E sil_I sil_S 2 | shared split spn spn_B spn_E spn_I spn_S 3 | shared split phonea_B phonea_E phonea_I phonea_S 4 | shared split phoneb_B phoneb_E phoneb_I phoneb_S 5 | shared split phonec_B phonec_E phonec_I phonec_S 6 | -------------------------------------------------------------------------------- /docs/source/reference/helper/textgrid.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: montreal_forced_aligner.textgrid 2 | 3 | .. autosummary:: 4 | :toctree: generated/ 5 | 6 | process_ctm_line 7 | export_textgrid 8 | construct_output_path 9 | output_textgrid_writing_errors 10 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa_3.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 1.3062999999999994 6 | 7 | 1 8 | "IntervalTier" 9 | "speaker_one" 10 | 0 11 | 1.3062999999999994 12 | 1 13 | 0 14 | 1.3062999999999994 15 | "and run you through" 16 | -------------------------------------------------------------------------------- /tests/data/configs/ivector_train.yaml: -------------------------------------------------------------------------------- 1 | 2 | features: 3 | type: "mfcc" 4 | use_energy: true 5 | frame_shift: 10 6 | 7 | training: 8 | - dubm: 9 | num_iterations_init: 4 10 | num_iterations: 2 11 | - ivector: 12 | num_iterations: 2 13 | gaussian_min_count: 2 14 | -------------------------------------------------------------------------------- /tests/data/configs/mono_train.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 400 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: true 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | training: 11 | - monophone: 12 | num_iterations: 5 13 | max_gaussians: 500 14 | subset: 1000 15 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 4.1195 6 | 7 | 1 8 | "IntervalTier" 9 | "speaker_one" 10 | 0 11 | 4.1195 12 | 1 13 | 0 14 | 4.1195 15 | "i can't think of an animal that's less chad-like than a sloth" 16 | -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/class.rst: -------------------------------------------------------------------------------- 1 | :html_theme.sidebar_secondary.remove: 2 | 3 | {{ objname }} 4 | {{ underline }} 5 | 6 | .. currentmodule:: {{ module }} 7 | 8 | .. autoclass:: {{ objname }} 9 | :members: 10 | :show-inheritance: 11 | :no-inherited-members: 12 | :no-special-members: 13 | -------------------------------------------------------------------------------- /tests/data/textgrid/61-70968-0000.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 4.905 6 | 7 | 1 8 | "IntervalTier" 9 | "61" 10 | 0 11 | 4.905 12 | 1 13 | 0 14 | 4.905 15 | "HE BEGAN A CONFUSED COMPLAINT AGAINST THE WIZARD WHO HAD VANISHED BEHIND THE CURTAIN ON THE LEFT" 16 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa_4.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 3.296199999999999 6 | 7 | 1 8 | "IntervalTier" 9 | "speaker_one" 10 | 0 11 | 3.296199999999999 12 | 1 13 | 0 14 | 3.296199999999999 15 | "kinda our fears and also predictions for them" 16 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa_5.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 4.304 6 | 7 | 1 8 | "IntervalTier" 9 | "speaker_one" 10 | 0 11 | 4.304 12 | 1 13 | 0 14 | 4.304 15 | "i'm sideshow joined by custa and reinforce we've got a special edition of platchat" 16 | -------------------------------------------------------------------------------- /docs/source/reference/ivector/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _ivector_api: 3 | 4 | Ivector extraction 5 | ================== 6 | 7 | .. currentmodule:: montreal_forced_aligner.models 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | IvectorExtractorModel 13 | 14 | .. toctree:: 15 | 16 | training 17 | helper 18 | -------------------------------------------------------------------------------- /montreal_forced_aligner/ivector/__init__.py: -------------------------------------------------------------------------------- 1 | """Module for ivector extractor training""" 2 | 3 | from montreal_forced_aligner.ivector.trainer import ( 4 | DubmTrainer, 5 | IvectorTrainer, 6 | TrainableIvectorExtractor, 7 | ) 8 | 9 | __all__ = ["trainer", "DubmTrainer", "IvectorTrainer", "TrainableIvectorExtractor"] 10 | -------------------------------------------------------------------------------- /montreal_forced_aligner/tokenization/__init__.py: -------------------------------------------------------------------------------- 1 | """Tokenization classes""" 2 | 3 | from montreal_forced_aligner.tokenization.tokenizer import CorpusTokenizer, TokenizerValidator 4 | from montreal_forced_aligner.tokenization.trainer import TokenizerTrainer 5 | 6 | __all__ = ["TokenizerTrainer", "TokenizerValidator", "CorpusTokenizer"] 7 | -------------------------------------------------------------------------------- /docs/source/reference/dictionary/training.rst: -------------------------------------------------------------------------------- 1 | 2 | Training pronunciation probabilities 3 | ==================================== 4 | 5 | .. currentmodule:: montreal_forced_aligner.alignment.pretrained 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | DictionaryTrainer -- Train pronunciation probabilities from alignments 11 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa_us_2.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 2.411162499999989 6 | 7 | 1 8 | "IntervalTier" 9 | "speaker_two" 10 | 0 11 | 2.411162499999989 12 | 1 13 | 0 14 | 2.411162499999989 15 | "hey josh could have finished it he just decided to fail it instead" 16 | -------------------------------------------------------------------------------- /docs/source/reference/helper/helper.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: montreal_forced_aligner.helper 2 | 3 | .. autosummary:: 4 | :toctree: generated/ 5 | 6 | comma_join 7 | make_safe 8 | make_scp_safe 9 | load_scp 10 | load_scp_safe 11 | score_wer 12 | edit_distance 13 | output_mapping 14 | -------------------------------------------------------------------------------- /docs/source/user_guide/implementations/index.md: -------------------------------------------------------------------------------- 1 | 2 | # In depth guides 3 | 4 | :::{warning} 5 | This section is under construction! 6 | ::: 7 | 8 | ```{toctree} 9 | :hidden: 10 | 11 | phone_groups 12 | phonological_rules 13 | lexicon_probabilities 14 | alignment_analysis 15 | alignment_evaluation 16 | fine_tune 17 | phone_models 18 | ``` 19 | -------------------------------------------------------------------------------- /tests/data/configs/xsampa_train.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 400 3 | ignore_case: false 4 | punctuation: .-'][ 5 | 6 | features: 7 | type: "mfcc" 8 | use_energy: true 9 | frame_shift: 10 10 | use_pitch: false 11 | 12 | training: 13 | - monophone: 14 | num_iterations: 10 15 | max_gaussians: 500 16 | subset: 1000 17 | -------------------------------------------------------------------------------- /tests/data/lab/cold_corpus.lab: -------------------------------------------------------------------------------- 1 | uh so this is the sick corpus uh i have a cold so i probably sound quite different than the uh acoustic corpus um the recording environment is also quite different and i'm saying a bunch of different words that i did not say in the original one uh and here's a long pause and i think this is probably good alright thanks 2 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa_us_3.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 3.350999999999999 6 | 7 | 1 8 | "IntervalTier" 9 | "speaker_two" 10 | 0 11 | 3.350999999999999 12 | 1 13 | 0 14 | 3.350999999999999 15 | "really good performances against top teams that have ended up going their way" 16 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa_us_4.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 3.5188874999998916 6 | 7 | 1 8 | "IntervalTier" 9 | "speaker_two" 10 | 0 11 | 3.5188874999998916 12 | 1 13 | 0 14 | 3.5188874999998916 15 | "uh i i still think it's a very good team though in n a i think this is uh" 16 | -------------------------------------------------------------------------------- /tests/data/lab/acoustic_corpus.lab: -------------------------------------------------------------------------------- 1 | this is the acoustic corpus i'm talking pretty fast here there's nothing going else going on we're just yknow there's some speech errors but who cares um this is me talking really slow and slightly lower in intensity uh we're just saying some words and here's some more words words words words um and that should be all thanks 2 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa_2.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 6.2271 6 | 7 | 1 8 | "IntervalTier" 9 | "speaker_one" 10 | 0 11 | 6.2271 12 | 1 13 | 0 14 | 6.2271 15 | "welcome to a series of platchat videos where we're gonna tackle every single team in the overwatch league twenty twenty" 16 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa_us_5.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 4.656600000000026 6 | 7 | 1 8 | "IntervalTier" 9 | "speaker_two" 10 | 0 11 | 4.656600000000026 12 | 1 13 | 0 14 | 4.656600000000026 15 | "uh and this was the first time i think the justice really looked like an elite team" 16 | -------------------------------------------------------------------------------- /docs/source/reference/tokenization/training.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _tokenizer_model_training_api: 3 | 4 | Training tokenizer models 5 | ========================= 6 | 7 | .. currentmodule:: montreal_forced_aligner.tokenization.trainer 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | TokenizerTrainer -- Trainer for language model on text corpora 13 | -------------------------------------------------------------------------------- /tests/data/configs/pron_train.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 400 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: true 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | training: 11 | - monophone: 12 | num_iterations: 5 13 | max_gaussians: 500 14 | subset: 1000 15 | 16 | - pronunciation_probabilities: 17 | subset: 1000 18 | -------------------------------------------------------------------------------- /tests/data/lab/cold_corpus3.lab: -------------------------------------------------------------------------------- 1 | alright so this is the sick corpus uh hopefully the recording levels are okay um i have a cold so this probably sounds a lot different than the acoustic corpus uh and i'm also saying [adif] bunch of different words um i think i'm probably gonna cough here yeah so that just happened uh and uh that should be good alright thanks 2 | -------------------------------------------------------------------------------- /tests/data/lab/cold_corpus3_extra.lab: -------------------------------------------------------------------------------- 1 | alright so this is the sick corpus uh hopefully the recording levels are okay um i have a cold so this probably sounds a lot different than the acoustic corpus uh and i'm also saying [adif] bunch of different words um i think i'm probably gonna cough here {CG} yeah so that just happened uh and uh that should be good alright thanks 2 | -------------------------------------------------------------------------------- /tests/data/configs/different_punctuation_config.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 400 3 | word_break_markers: .-'][ 4 | punctuation: .-'][ 5 | 6 | features: 7 | type: "mfcc" 8 | use_energy: true 9 | frame_shift: 10 10 | use_pitch: false 11 | 12 | training: 13 | - monophone: 14 | num_iterations: 3 15 | max_gaussians: 500 16 | subset: 1000 17 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phones.txt: -------------------------------------------------------------------------------- 1 | 0 2 | sil 1 3 | sil_B 2 4 | sil_E 3 5 | sil_I 4 6 | sil_S 5 7 | spn 6 8 | spn_B 7 9 | spn_E 8 10 | spn_I 9 11 | spn_S 10 12 | phonea_B 11 13 | phonea_E 12 14 | phonea_I 13 15 | phonea_S 14 16 | phoneb_B 15 17 | phoneb_E 16 18 | phoneb_I 17 19 | phoneb_S 18 20 | phonec_B 19 21 | phonec_E 20 22 | phonec_I 21 23 | phonec_S 22 24 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phones/word_boundary.int: -------------------------------------------------------------------------------- 1 | 1 nonword 2 | 2 begin 3 | 3 end 4 | 4 internal 5 | 5 singleton 6 | 6 nonword 7 | 7 begin 8 | 8 end 9 | 9 internal 10 | 10 singleton 11 | 11 begin 12 | 12 end 13 | 13 internal 14 | 14 singleton 15 | 15 begin 16 | 16 end 17 | 17 internal 18 | 18 singleton 19 | 19 begin 20 | 20 end 21 | 21 internal 22 | 22 singleton 23 | -------------------------------------------------------------------------------- /docs/source/reference/helper/config.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: montreal_forced_aligner.config 2 | 3 | .. autosummary:: 4 | :toctree: generated/ 5 | 6 | MfaConfiguration 7 | MfaProfile 8 | get_temporary_directory 9 | generate_config_path 10 | generate_command_history_path 11 | load_command_history 12 | update_command_history 13 | -------------------------------------------------------------------------------- /docs/source/reference/diarization/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _diarization_api: 3 | 4 | Speaker diarization 5 | =================== 6 | 7 | Speaker diarization is the procedure to assign speaker labels to utterances. MFA can train and use ivector models (see :ref:`train_ivector`) or use :xref:`speechbrain`'s pretrained speaker classifier. 8 | 9 | .. toctree:: 10 | 11 | main 12 | helper 13 | -------------------------------------------------------------------------------- /montreal_forced_aligner/language_modeling/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Language modeling 3 | ================= 4 | 5 | 6 | """ 7 | 8 | from montreal_forced_aligner.language_modeling.trainer import ( 9 | MfaLmArpaTrainer, 10 | MfaLmCorpusTrainer, 11 | MfaLmDictionaryCorpusTrainer, 12 | ) 13 | 14 | __all__ = ["MfaLmCorpusTrainer", "MfaLmDictionaryCorpusTrainer", "MfaLmArpaTrainer"] 15 | -------------------------------------------------------------------------------- /docs/source/reference/alignment/alignment.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _aligners_api: 3 | 4 | Alignment classes 5 | ================= 6 | 7 | .. currentmodule:: montreal_forced_aligner.alignment 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | CorpusAligner -- Base aligner 13 | AdaptingAligner -- Adapting an acoustic model to new data 14 | PretrainedAligner -- Pretrained aligner 15 | -------------------------------------------------------------------------------- /docs/source/reference/g2p/generator.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _generating_dictionaries_api: 3 | 4 | Dictionary generation 5 | ===================== 6 | 7 | .. currentmodule:: montreal_forced_aligner.g2p.generator 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | PyniniCorpusGenerator -- Generator for Pynini G2P model 13 | PyniniWordListGenerator -- Generator for Pynini G2P model 14 | -------------------------------------------------------------------------------- /tests/data/configs/no_punctuation_config.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 400 3 | punctuation: 4 | word_break_markers: 5 | compound_markers: 6 | quote_markers: 7 | clitic_markers: 8 | 9 | features: 10 | type: "mfcc" 11 | use_energy: true 12 | frame_shift: 10 13 | use_pitch: false 14 | 15 | training: 16 | - monophone: 17 | num_iterations: 3 18 | max_gaussians: 500 19 | subset: 1000 20 | -------------------------------------------------------------------------------- /docs/source/reference/server/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _server_api: 3 | 4 | Managing MFA servers 5 | ==================== 6 | 7 | Functions 8 | --------- 9 | 10 | .. currentmodule:: montreal_forced_aligner.command_line.utils 11 | 12 | .. autosummary:: 13 | :toctree: generated/ 14 | 15 | configure_pg 16 | initialize_server 17 | check_databases 18 | start_server 19 | stop_server 20 | delete_server 21 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phones/extra_questions.txt: -------------------------------------------------------------------------------- 1 | sil sil_B sil_E sil_I sil_S spn spn_B spn_E spn_I spn_S 2 | phonea_B phonea_E phonea_I phonea_S phoneb_B phoneb_E phoneb_I phoneb_S phonec_B phonec_E phonec_I phonec_S 3 | phonea_B phoneb_B phonec_B 4 | phonea_E phoneb_E phonec_E 5 | phonea_I phoneb_I phonec_I 6 | phonea_S phoneb_S phonec_S 7 | sil spn 8 | sil_B spn_B 9 | sil_E spn_E 10 | sil_I spn_I 11 | sil_S spn_S 12 | -------------------------------------------------------------------------------- /docs/source/reference/validation/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _validation_api: 3 | 4 | Validation 5 | ========== 6 | 7 | The validation utilities are used to evaluate a dataset for either training an acoustic model, or performing alignment. They will detect issues with sound files, transcription files, unalignable utterances, and can perform some simplistic evaluation of transcripts. 8 | 9 | .. toctree:: 10 | 11 | main 12 | helper 13 | -------------------------------------------------------------------------------- /montreal_forced_aligner/validation/__init__.py: -------------------------------------------------------------------------------- 1 | """Validation classes""" 2 | 3 | from montreal_forced_aligner.validation.corpus_validator import ( 4 | PretrainedValidator, 5 | TrainingValidator, 6 | ValidationMixin, 7 | ) 8 | from montreal_forced_aligner.validation.dictionary_validator import DictionaryValidator 9 | 10 | __all__ = ["PretrainedValidator", "TrainingValidator", "ValidationMixin", "DictionaryValidator"] 11 | -------------------------------------------------------------------------------- /docs/source/reference/helper/data.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: montreal_forced_aligner.data 2 | 3 | .. autosummary:: 4 | :toctree: generated/ 5 | 6 | MfaArguments 7 | TextFileType 8 | SoundFileType 9 | SoundFileInformation 10 | PhoneSetType 11 | WordData 12 | WordType 13 | PhoneType 14 | WorkflowType 15 | DatabaseImportData 16 | PronunciationProbabilityCounter 17 | -------------------------------------------------------------------------------- /docs/source/reference/transcription/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _transcription_api: 3 | 4 | Transcription 5 | ============= 6 | 7 | MFA can use trained acoustic models (see :ref:`acoustic_model_training_api`), trained language models (see :ref:`language_model_training_api`), and pronunciation dictionaries (see :ref:`generating_dictionaries_api`) in order to generate transcripts for audio files. 8 | 9 | .. toctree:: 10 | 11 | main 12 | helper 13 | -------------------------------------------------------------------------------- /tests/data/configs/train_g2p_config.yaml: -------------------------------------------------------------------------------- 1 | punctuation: "、。।,@<>\"(),.:;¿?¡!\\&%#*~【】,…‥「」『』〝〟″⟨⟩♪・‹›«»~′$+=" 2 | clitic_markers: "'’" 3 | compound_markers: "-" 4 | num_pronunciations: 1 # Used if running in validation mode 5 | order: 7 6 | random_starts: 25 7 | seed: 1917 8 | delta: 0.0009765 9 | lr: 1.0 10 | batch_size: 200 11 | num_iterations: 10 12 | smoothing_method: "kneser_ney" 13 | pruning_method: "relative_entropy" 14 | model_size: 1000000 15 | -------------------------------------------------------------------------------- /docs/source/user_guide/concepts/features.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | (acoustic_features)= 5 | # Acoustic features 6 | ================= 7 | 8 | ```{warning} 9 | 10 | Still under construction, I hope to fill these sections out as I have time. 11 | ``` 12 | 13 | 14 | (features_mfcc)= 15 | ## Mel-Frequency Cepstrum Coefficients (MFCCs) 16 | 17 | ```{seealso} 18 | * https://kaldi-asr.org/doc/feat.html#feat_mfcc 19 | ``` 20 | 21 | 22 | (features_pitch)= 23 | ## Pitch 24 | -------------------------------------------------------------------------------- /docs/source/reference/g2p/helper.rst: -------------------------------------------------------------------------------- 1 | 2 | Helper functionality 3 | ==================== 4 | 5 | Mixins 6 | ------ 7 | 8 | .. currentmodule:: montreal_forced_aligner.g2p.generator 9 | 10 | .. autosummary:: 11 | :toctree: generated/ 12 | 13 | PyniniGenerator 14 | 15 | Helper 16 | ------ 17 | 18 | .. currentmodule:: montreal_forced_aligner.g2p.generator 19 | 20 | .. autosummary:: 21 | :toctree: generated/ 22 | 23 | Rewriter 24 | RewriterWorker 25 | -------------------------------------------------------------------------------- /docs/source/reference/language_modeling/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _language_modeling_api: 3 | 4 | Language models 5 | =============== 6 | 7 | Language models allow for transcription via Speech-to-Text when used alongside acoustic models and pronunciation dictionaries. 8 | 9 | .. currentmodule:: montreal_forced_aligner.models 10 | 11 | .. autosummary:: 12 | :toctree: generated/ 13 | 14 | LanguageModel 15 | 16 | .. toctree:: 17 | 18 | training 19 | helper 20 | -------------------------------------------------------------------------------- /docs/source/reference/segmentation/helper.rst: -------------------------------------------------------------------------------- 1 | 2 | Helper functions 3 | ================ 4 | 5 | .. currentmodule:: montreal_forced_aligner.vad.multiprocessing 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | SegmentTranscriptFunction 11 | SegmentTranscriptArguments 12 | SegmentVadFunction 13 | SegmentVadArguments 14 | get_initial_segmentation 15 | merge_segments 16 | segment_utterance_transcript 17 | segment_utterance_vad 18 | -------------------------------------------------------------------------------- /docs/source/reference/ivector/training.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _training_ivector_api: 3 | 4 | Training ivector extractors 5 | =========================== 6 | 7 | .. currentmodule:: montreal_forced_aligner.ivector.trainer 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | IvectorTrainer -- Training ivector extractor models 13 | DubmTrainer -- Training block for DUBM 14 | TrainableIvectorExtractor -- Top level worker for running Ivector training pipelines 15 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/phones/word_boundary.txt: -------------------------------------------------------------------------------- 1 | sil nonword 2 | sil_B begin 3 | sil_E end 4 | sil_I internal 5 | sil_S singleton 6 | spn nonword 7 | spn_B begin 8 | spn_E end 9 | spn_I internal 10 | spn_S singleton 11 | phonea_B begin 12 | phonea_E end 13 | phonea_I internal 14 | phonea_S singleton 15 | phoneb_B begin 16 | phoneb_E end 17 | phoneb_I internal 18 | phoneb_S singleton 19 | phonec_B begin 20 | phonec_E end 21 | phonec_I internal 22 | phonec_S singleton 23 | -------------------------------------------------------------------------------- /docs/source/reference/diarization/helper.rst: -------------------------------------------------------------------------------- 1 | 2 | Helper functions 3 | ================ 4 | 5 | .. currentmodule:: montreal_forced_aligner.diarization.multiprocessing 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | PldaClassificationFunction 11 | PldaClassificationArguments 12 | ComputeEerFunction 13 | ComputeEerArguments 14 | SpeechbrainEmbeddingFunction 15 | SpeechbrainClassificationFunction 16 | SpeechbrainArguments 17 | cluster_matrix 18 | -------------------------------------------------------------------------------- /docs/source/reference/tokenization/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _tokenization_api: 3 | 4 | Tokenizers 5 | ========== 6 | 7 | Tokenizers allow for adding spaces as word boundaries for orthographic systems that don't normally use them (i.e., Japanese, Chinese, Thai). 8 | 9 | .. currentmodule:: montreal_forced_aligner.models 10 | 11 | .. autosummary:: 12 | :toctree: generated/ 13 | 14 | TokenizerModel 15 | 16 | .. toctree:: 17 | 18 | training 19 | tokenizer 20 | helper 21 | -------------------------------------------------------------------------------- /tests/data/textgrid/multilingual_ipa_us.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 2.9013125 6 | tiers? 7 | size = 1 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "speaker_two" 12 | xmin = 0 13 | xmax = 2.9013125 14 | intervals: size = 1 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 2.9013125 18 | text = "uh with only like four games to go" 19 | -------------------------------------------------------------------------------- /docs/source/reference/segmentation/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _segmentation_api: 3 | 4 | Segmentation 5 | ============ 6 | 7 | Segmentation aims to break long audio files into chunks of speech. 8 | 9 | .. note:: 10 | 11 | The current implementation of segmentation uses only Voice Activity Detection (VAD) features. There's been some work towards getting a full speaker diarization set up going with :ref:`training_ivector_api` but that's largely planned for 2.1. 12 | 13 | .. toctree:: 14 | 15 | main 16 | helper 17 | -------------------------------------------------------------------------------- /docs/source/reference/tokenization/tokenizer.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _tokenizer_api: 3 | 4 | Corpus tokenizer 5 | ================= 6 | 7 | .. currentmodule:: montreal_forced_aligner.tokenization.tokenizer 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | CorpusTokenizer 13 | TokenizerValidator 14 | 15 | Simple tokenizer 16 | ================ 17 | 18 | .. currentmodule:: montreal_forced_aligner.tokenization.simple 19 | 20 | .. autosummary:: 21 | :toctree: generated/ 22 | 23 | SimpleTokenizer 24 | -------------------------------------------------------------------------------- /tests/data/configs/tri_train.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 400 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: false 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | training: 11 | - monophone: 12 | num_iterations: 5 13 | max_gaussians: 100 14 | subset: 1000 15 | 16 | - triphone: 17 | num_iterations: 3 18 | num_leaves: 250 19 | max_gaussians: 2000 20 | cluster_threshold: -1 21 | subset: 3000 22 | boost_silence: 1.25 23 | power: 0.25 24 | -------------------------------------------------------------------------------- /montreal_forced_aligner/dictionary/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pronunciation dictionaries 3 | ========================== 4 | 5 | """ 6 | 7 | from montreal_forced_aligner.dictionary.mixins import DictionaryMixin 8 | from montreal_forced_aligner.dictionary.multispeaker import ( 9 | MultispeakerDictionary, 10 | MultispeakerDictionaryMixin, 11 | ) 12 | 13 | __all__ = [ 14 | "multispeaker", 15 | "mixins", 16 | "DictionaryMixin", 17 | "MultispeakerDictionary", 18 | "MultispeakerDictionaryMixin", 19 | ] 20 | -------------------------------------------------------------------------------- /tests/data/configs/lda_train.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 400 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: false 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | training: 11 | - monophone: 12 | num_iterations: 4 13 | max_gaussians: 100 14 | subset: 1000 15 | 16 | - lda: 17 | num_iterations: 15 18 | num_leaves: 500 19 | max_gaussians: 4000 20 | subset: 1000 21 | features: 22 | splice_left_context: 3 23 | splice_right_context: 3 24 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/lexicon.text.fst: -------------------------------------------------------------------------------- 1 | 0 1 0.6931471805599453 2 | 0 1 sil 0.6931471805599453 3 | 2 1 sil 4 | 1 1 sil_S !SIL 0.6931471805599453 5 | 1 2 sil_S !SIL 0.6931471805599453 6 | 1 1 spn_S 0.6931471805599453 7 | 1 2 spn_S 0.6931471805599453 8 | 1 3 phonea_B worda 9 | 3 1 phoneb_E 0.6931471805599453 10 | 3 2 phoneb_E 0.6931471805599453 11 | 1 4 phonea_B wordb 12 | 4 1 phonec_E 0.6931471805599453 13 | 4 2 phonec_E 0.6931471805599453 14 | 1 0 15 | -------------------------------------------------------------------------------- /docs/source/reference/dictionary/main.rst: -------------------------------------------------------------------------------- 1 | 2 | Main classes 3 | ============ 4 | 5 | .. currentmodule:: montreal_forced_aligner.dictionary 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | MultispeakerDictionary -- Collection of pronunciation dictionaries that specify speaker-dictionary mappings 11 | 12 | .. currentmodule:: montreal_forced_aligner.dictionary.remapper 13 | 14 | .. autosummary:: 15 | :toctree: generated/ 16 | 17 | DictionaryRemapper -- Class to remap a dictionary to a new phone set 18 | -------------------------------------------------------------------------------- /tests/data/configs/out_of_order_config.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 40 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: false 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | training: 11 | - triphone: 12 | num_iterations: 35 13 | num_leaves: 2500 14 | max_gaussians: 20000 15 | cluster_threshold: -1 16 | subset: 30000 17 | boost_silence: 1.25 18 | power: 0.25 19 | 20 | - monophone: 21 | num_iterations: 40 22 | max_gaussians: 1000 23 | subset: 10000 24 | -------------------------------------------------------------------------------- /docs/source/reference/language_modeling/training.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _language_model_training_api: 3 | 4 | Training language models 5 | ======================== 6 | 7 | .. currentmodule:: montreal_forced_aligner.language_modeling.trainer 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | MfaLmCorpusTrainer -- Trainer for language model on text corpora 13 | MfaLmDictionaryCorpusTrainer -- Trainer for language model on text corpora 14 | MfaLmArpaTrainer -- Trainer for MFA language model on arpa format language model 15 | -------------------------------------------------------------------------------- /tests/data/configs/pitch_tri_train.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 400 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: false 7 | frame_shift: 10 8 | use_pitch: true 9 | use_voicing: true 10 | 11 | training: 12 | - monophone: 13 | num_iterations: 5 14 | max_gaussians: 100 15 | subset: 1000 16 | 17 | - triphone: 18 | num_iterations: 3 19 | num_leaves: 250 20 | max_gaussians: 2000 21 | cluster_threshold: -1 22 | subset: 3000 23 | boost_silence: 1.25 24 | power: 0.25 25 | -------------------------------------------------------------------------------- /docs/source/reference/g2p_modeling/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _g2p_modeling_api: 3 | 4 | Grapheme-to-Phoneme (G2P) models 5 | ================================ 6 | 7 | G2P models are used to generate pronunciations from orthographic spellings. The G2P models currently supported use Pynini weighted finite state transducers (wFST) to based off a training lexicon. 8 | 9 | .. currentmodule:: montreal_forced_aligner.models 10 | 11 | .. autosummary:: 12 | :toctree: generated/ 13 | 14 | G2PModel 15 | 16 | .. toctree:: 17 | 18 | training 19 | helper 20 | -------------------------------------------------------------------------------- /docs/source/reference/tokenization/helper.rst: -------------------------------------------------------------------------------- 1 | Helper functionality 2 | ==================== 3 | 4 | Helper 5 | ------ 6 | 7 | .. currentmodule:: montreal_forced_aligner.tokenization.tokenizer 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | TokenizerRewriter 13 | TokenizerArguments 14 | TokenizerFunction 15 | 16 | 17 | Helper 18 | ------ 19 | 20 | .. currentmodule:: montreal_forced_aligner.tokenization.simple 21 | 22 | .. autosummary:: 23 | :toctree: generated/ 24 | 25 | SanitizeFunction 26 | SplitWordsFunction 27 | -------------------------------------------------------------------------------- /docs/source/reference/core_index.rst: -------------------------------------------------------------------------------- 1 | 2 | Core functionality 3 | ================== 4 | 5 | This sections contains the core objects that are used as input to any top level worker: the corpora, pronunciation dictionaries, and various types of MFA models. Each model's section contains the classes and functionality used to train them. 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | 10 | corpus/index 11 | database/index 12 | dictionary/index 13 | acoustic_modeling/index 14 | g2p_modeling/index 15 | language_modeling/index 16 | ivector/index 17 | -------------------------------------------------------------------------------- /montreal_forced_aligner/g2p/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Grapheme to phoneme (G2P) 3 | ========================= 4 | """ 5 | 6 | from montreal_forced_aligner.g2p.generator import PyniniCorpusGenerator, PyniniWordListGenerator 7 | from montreal_forced_aligner.g2p.phonetisaurus_trainer import PhonetisaurusTrainer 8 | from montreal_forced_aligner.g2p.trainer import PyniniTrainer 9 | 10 | __all__ = [ 11 | "generator", 12 | "trainer", 13 | "PyniniTrainer", 14 | "PyniniCorpusGenerator", 15 | "PyniniWordListGenerator", 16 | "PhonetisaurusTrainer", 17 | ] 18 | -------------------------------------------------------------------------------- /docs/source/user_guide/dictionary_validation.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _validating_dictionaries: 3 | 4 | ************************************* 5 | Validating pronunciation dictionaries 6 | ************************************* 7 | 8 | 9 | .. _running_the_dictionary_validator: 10 | 11 | Running the dictionary validation utility 12 | ========================================= 13 | 14 | 15 | Command reference 16 | ----------------- 17 | 18 | .. click:: montreal_forced_aligner.command_line.validate:validate_dictionary_cli 19 | :prog: mfa validate_dictionary 20 | :nested: full 21 | -------------------------------------------------------------------------------- /docs/source/user_guide/corpus_creation/tokenize.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _tokenize_cli: 3 | 4 | Tokenize utterances ``(mfa tokenize)`` 5 | ========================================= 6 | 7 | Use a model trained from :ref:`train_tokenizer_cli` to tokenize a corpus (i.e. insert spaces as word boundaries for orthographic systems that do not require them). 8 | 9 | Command reference 10 | ----------------- 11 | 12 | .. click:: montreal_forced_aligner.command_line.tokenize:tokenize_cli 13 | :prog: mfa tokenize 14 | :nested: full 15 | 16 | 17 | API reference 18 | ------------- 19 | 20 | - :ref:`tokenization_api` 21 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM condaforge/mambaforge:22.11.1-4 AS build 2 | 3 | COPY ci/docker_environment.yaml . 4 | RUN mkdir -p /mfa 5 | RUN useradd -ms /bin/bash mfauser 6 | RUN chown -R mfauser /mfa 7 | COPY . /pkg 8 | RUN mamba env create -p /env -f docker_environment.yaml && conda clean -afy && \ 9 | chown -R mfauser /env 10 | RUN conda run -p /env python -m pip install speechbrain && \ 11 | conda run -p /env python -m pip install --no-deps /pkg 12 | USER mfauser 13 | ENV MFA_ROOT_DIR=/mfa 14 | RUN conda run -p /env mfa server init 15 | 16 | RUN echo "source activate /env && mfa server start" > ~/.bashrc 17 | ENV PATH=/env/bin:$PATH 18 | -------------------------------------------------------------------------------- /docs/source/user_guide/concepts/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _concepts: 3 | 4 | *************** 5 | Concepts in MFA 6 | *************** 7 | 8 | This section will attempt to provide a blend of technical and non-technical overviews of various components and concepts used in MFA. There are much more in-depth resources for learning about various components that will be linked if you are interested in learning more about them. 9 | 10 | .. warning:: 11 | 12 | Still under construction, I hope to fill these sections out as I have time. 13 | 14 | .. toctree:: 15 | :hidden: 16 | 17 | features 18 | speaker_adaptation 19 | fst 20 | hmm 21 | -------------------------------------------------------------------------------- /docs/source/reference/g2p_modeling/helper.rst: -------------------------------------------------------------------------------- 1 | 2 | Helper functionality 3 | ==================== 4 | 5 | 6 | Mixins 7 | ------ 8 | 9 | .. currentmodule:: montreal_forced_aligner.g2p.mixins 10 | 11 | .. autosummary:: 12 | :toctree: generated/ 13 | 14 | G2PMixin 15 | G2PTopLevelMixin 16 | 17 | .. currentmodule:: montreal_forced_aligner.g2p.trainer 18 | 19 | .. autosummary:: 20 | :toctree: generated/ 21 | 22 | G2PTrainer 23 | 24 | Helper 25 | ------ 26 | 27 | .. currentmodule:: montreal_forced_aligner.g2p.trainer 28 | 29 | .. autosummary:: 30 | :toctree: generated/ 31 | 32 | RandomStartWorker 33 | RandomStart 34 | -------------------------------------------------------------------------------- /docs/source/user_guide/configuration/acoustic_model_adapt.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _configuration_adapting: 3 | 4 | Acoustic model adaptation options 5 | ================================= 6 | 7 | For the Kaldi recipe that monophone training is based on, see :kaldi_steps:`train_map`. 8 | 9 | 10 | .. csv-table:: 11 | :widths: 20, 20, 60 12 | :header: "Parameter", "Default value", "Notes" 13 | 14 | "mapping_tau", 20, "Smoothing constant used in MAP estimation, corresponds to the number of 'fake counts' that we add for the old model. Larger tau corresponds to less aggressive re-estimation, and more smoothing. You might also want to try 10 or 15." 15 | -------------------------------------------------------------------------------- /montreal_forced_aligner/alignment/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Aligners 3 | ======== 4 | 5 | """ 6 | from montreal_forced_aligner.alignment.adapting import AdaptingAligner 7 | from montreal_forced_aligner.alignment.base import CorpusAligner 8 | from montreal_forced_aligner.alignment.mixins import AlignMixin 9 | from montreal_forced_aligner.alignment.pretrained import DictionaryTrainer, PretrainedAligner 10 | 11 | __all__ = [ 12 | "AdaptingAligner", 13 | "PretrainedAligner", 14 | "CorpusAligner", 15 | "DictionaryTrainer", 16 | "adapting", 17 | "base", 18 | "pretrained", 19 | "mixins", 20 | "AlignMixin", 21 | "multiprocessing", 22 | ] 23 | -------------------------------------------------------------------------------- /docs/source/reference/ivector/helper.rst: -------------------------------------------------------------------------------- 1 | Training functionality 2 | ====================== 3 | 4 | Mixins 5 | ------ 6 | 7 | .. currentmodule:: montreal_forced_aligner.ivector.trainer 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | IvectorModelTrainingMixin 13 | 14 | Helper 15 | ------ 16 | 17 | .. currentmodule:: montreal_forced_aligner.ivector.trainer 18 | 19 | .. autosummary:: 20 | :toctree: generated/ 21 | 22 | GmmGselectFunction 23 | GmmGselectArguments 24 | GaussToPostFunction 25 | GaussToPostArguments 26 | AccGlobalStatsFunction 27 | AccGlobalStatsArguments 28 | AccIvectorStatsFunction 29 | AccIvectorStatsArguments 30 | -------------------------------------------------------------------------------- /docs/source/reference/acoustic_modeling/training.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _acoustic_model_training_api: 3 | 4 | Training acoustic models 5 | ======================== 6 | 7 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.trainer 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | TrainableAligner 13 | 14 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling 15 | 16 | .. autosummary:: 17 | :toctree: generated/ 18 | 19 | MonophoneTrainer -- Monophone trainer 20 | TriphoneTrainer -- Triphone trainer 21 | LdaTrainer -- LDA trainer 22 | SatTrainer -- Speaker adapted trainer 23 | PronunciationProbabilityTrainer -- Pronunciation probability trainer 24 | -------------------------------------------------------------------------------- /tests/data/configs/sat_train.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 500 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: false 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | training: 11 | - monophone: 12 | num_iterations: 5 13 | max_gaussians: 500 14 | subset: 1000 15 | 16 | - triphone: 17 | num_iterations: 3 18 | num_leaves: 1500 19 | max_gaussians: 2000 20 | cluster_threshold: -1 21 | subset: 3000 22 | boost_silence: 1.25 23 | power: 0.25 24 | 25 | - sat: 26 | num_iterations: 5 27 | num_leaves: 2000 28 | max_gaussians: 10000 29 | power: 0.2 30 | silence_weight: 0.0 31 | fmllr_update_type: "full" 32 | subset: 1000 33 | -------------------------------------------------------------------------------- /tests/data/configs/acoustic/english_mfa_topology.yaml: -------------------------------------------------------------------------------- 1 | ɾ: 2 | max_states: 1 3 | min_states: 1 4 | ɾʲ: 5 | max_states: 1 6 | min_states: 1 7 | ɾ̃: 8 | max_states: 1 9 | min_states: 1 10 | ʔ: 11 | max_states: 1 12 | min_states: 1 13 | ə: 14 | max_states: 3 15 | ɚ: 16 | max_states: 3 17 | ɪ: 18 | max_states: 3 19 | e: 20 | max_states: 3 21 | eː: 22 | max_states: 3 23 | ɛ: 24 | max_states: 3 25 | ɛː: 26 | max_states: 3 27 | ɐ: 28 | max_states: 3 29 | i: 30 | max_states: 3 31 | iː: 32 | max_states: 3 33 | o: 34 | max_states: 3 35 | oː: 36 | max_states: 3 37 | u: 38 | max_states: 3 39 | uː: 40 | max_states: 3 41 | ɝ: 42 | max_states: 3 43 | j: 44 | max_states: 3 45 | w: 46 | max_states: 3 47 | -------------------------------------------------------------------------------- /docs/source/user_guide/corpus_creation/train_tokenizer.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _train_tokenizer_cli: 3 | 4 | Train a word tokenizer ``(mfa train_tokenizer)`` 5 | ================================================ 6 | 7 | Training a tokenizer uses a simplified sequence-to-sequence model like G2P, but with the following differences: 8 | 9 | * Both the input and output symbols are graphemes 10 | * Symbols can only output themselves 11 | * Only allow for inserting space characters 12 | 13 | Command reference 14 | ----------------- 15 | 16 | .. click:: montreal_forced_aligner.command_line.train_tokenizer:train_tokenizer_cli 17 | :prog: mfa train_tokenizer 18 | :nested: full 19 | 20 | 21 | API reference 22 | ------------- 23 | 24 | - :ref:`tokenization_api` 25 | -------------------------------------------------------------------------------- /tests/test_abc.py: -------------------------------------------------------------------------------- 1 | from montreal_forced_aligner.abc import MfaWorker, TrainerMixin 2 | from montreal_forced_aligner.acoustic_modeling import SatTrainer, TrainableAligner 3 | from montreal_forced_aligner.alignment import AlignMixin 4 | 5 | 6 | def test_typing(basic_corpus_dir, basic_dict_path, temp_dir): 7 | am_trainer = TrainableAligner( 8 | corpus_directory=basic_corpus_dir, 9 | dictionary_path=basic_dict_path, 10 | ) 11 | trainer = SatTrainer(identifier="sat", worker=am_trainer) 12 | assert type(trainer).__name__ == "SatTrainer" 13 | assert isinstance(trainer, TrainerMixin) 14 | assert isinstance(trainer, AlignMixin) 15 | assert isinstance(trainer, MfaWorker) 16 | assert isinstance(am_trainer, MfaWorker) 17 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest new functionality that would make MFA easier to use 4 | title: '' 5 | labels: enhancement 6 | assignees: mmcauliffe 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /docs/source/reference/language_modeling/helper.rst: -------------------------------------------------------------------------------- 1 | Helper functionality 2 | ==================== 3 | 4 | Mixins 5 | ------ 6 | 7 | .. currentmodule:: montreal_forced_aligner.language_modeling.trainer 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | LmTrainerMixin -- Mixin for language model training 13 | LmCorpusTrainerMixin -- Mixin for language model training on a corpus 14 | LmDictionaryCorpusTrainerMixin -- Mixin for language model training on a corpus with a pronunciation dictionary 15 | 16 | 17 | Helper 18 | ------ 19 | 20 | .. currentmodule:: montreal_forced_aligner.language_modeling.multiprocessing 21 | 22 | .. autosummary:: 23 | :toctree: generated/ 24 | 25 | TrainSpeakerLmFunction 26 | TrainSpeakerLmArguments 27 | -------------------------------------------------------------------------------- /montreal_forced_aligner/tokenization/resources/japanese/sudachi_config.json: -------------------------------------------------------------------------------- 1 | {"userDict": ["mfa_sudachi.dic"], "characterDefinitionFile": "char.def", "inputTextPlugin": [{"class": "com.worksap.nlp.sudachi.DefaultInputTextPlugin"}, {"class": "com.worksap.nlp.sudachi.ProlongedSoundMarkPlugin", "prolongedSoundMarks": ["ー", "-", "⁓", "〜", "〰"], "replacementSymbol": "ー"}, {"class": "com.worksap.nlp.sudachi.IgnoreYomiganaPlugin", "leftBrackets": ["(", "("], "rightBrackets": [")", ")"], "maxYomiganaLength": 4}], "oovProviderPlugin": [{"class": "com.worksap.nlp.sudachi.MeCabOovPlugin", "charDef": "char.def", "unkDef": "unk.def"}, {"class": "com.worksap.nlp.sudachi.SimpleOovPlugin", "oovPOS": ["補助記号", "一般", "*", "*", "*", "*"], "leftId": 5968, "rightId": 5968, "cost": 3857}]} 2 | -------------------------------------------------------------------------------- /docs/source/user_guide/workflows/finding_oovs.rst: -------------------------------------------------------------------------------- 1 | 2 | 3 | .. _g2p_find_oovs: 4 | 5 | Find OOVs in a corpus ``(mfa find_oovs)`` 6 | ========================================= 7 | 8 | The ``mfa find_oovs`` command is a utility for generating a list of OOVs for a given corpus and pronunciation dictionary, along with counts of their occurrences in the corpus and which utterances they appear in. 9 | 10 | .. note:: 11 | 12 | This command is functionally the same as :ref:`using the corpus validator `, but it outputs the OOV information more straight-forwardly. 13 | 14 | 15 | Command reference 16 | ----------------- 17 | 18 | .. click:: montreal_forced_aligner.command_line.find_oovs:find_oovs_cli 19 | :prog: mfa find_oovs 20 | :nested: full 21 | -------------------------------------------------------------------------------- /rtd_environment.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - defaults 4 | dependencies: 5 | - python>=3.9 6 | - numpy 7 | - librosa 8 | - tqdm 9 | - requests 10 | - pyyaml 11 | - praatio>=6.0.0 12 | - dataclassy 13 | - sqlalchemy>=2.0 14 | - pynini 15 | - pgvector 16 | - pgvector-python 17 | - postgresql 18 | - scikit-learn 19 | - hdbscan 20 | - psycopg2 21 | - click 22 | - setuptools_scm 23 | - importlib_metadata 24 | - sphinx 25 | - numpydoc 26 | - sphinx-design 27 | - sphinx-click 28 | - sphinx-intl 29 | - sphinx-immaterial 30 | - pydata-sphinx-theme 31 | - myst-parser 32 | - mock 33 | - setuptools-scm 34 | - kneed 35 | - matplotlib 36 | - seaborn 37 | - rich 38 | - rich-click 39 | - kaldi =*=cpu* 40 | - kalpy>=0.8 41 | - compilers 42 | -------------------------------------------------------------------------------- /docs/source/user_guide/corpus_creation/train_ivector.rst: -------------------------------------------------------------------------------- 1 | .. _train_ivector: 2 | 3 | Train an ivector extractor ``(mfa train_ivector)`` 4 | ================================================== 5 | 6 | The Montreal Forced Aligner can train :term:`ivector extractors` using an acoustic model for generating alignments. As part of this training process, a classifier is built in that can be used as part of :ref:`diarize_speakers`. 7 | 8 | 9 | Command reference 10 | ----------------- 11 | 12 | .. click:: montreal_forced_aligner.command_line.train_ivector_extractor:train_ivector_cli 13 | :prog: mfa train_ivector 14 | :nested: full 15 | 16 | Configuration reference 17 | ----------------------- 18 | 19 | - :ref:`configuration_ivector` 20 | 21 | API reference 22 | ------------- 23 | 24 | - :ref:`ivector_api` 25 | -------------------------------------------------------------------------------- /tests/data/configs/english_arpa_remapping.yaml: -------------------------------------------------------------------------------- 1 | aj: AY1 2 | aw: AW1 3 | b: B 4 | bʲ: B 5 | c: K 6 | cʰ: K 7 | cʷ: K 8 | d: D 9 | dʒ: JH 10 | dʲ: D 11 | ej: EY1 12 | f: F 13 | fʲ: F 14 | h: HH 15 | i: IY0 16 | iː: IY1 17 | j: Y 18 | k: K 19 | kʰ: K 20 | kʷ: K 21 | l: L 22 | m: M 23 | mʲ: M 24 | m̩: AH0 M 25 | n: N 26 | n̩: AH0 N 27 | ow: OW1 28 | p: P 29 | pʰ: P 30 | pʲ: P 31 | pʷ: P 32 | s: S 33 | t: T 34 | tʃ: CH 35 | tʰ: T 36 | tʲ: T 37 | tʷ: T 38 | v: V 39 | vʲ: V 40 | w: W 41 | z: Z 42 | æ: AE1 43 | ç: HH 44 | ð: DH 45 | ŋ: NG 46 | ɐ: AH1 47 | ɑ: AA0 48 | ɑː: AA1 49 | ɒ: AO0 50 | ɒː: AO1 51 | ɔj: OY1 52 | ə: AH0 53 | ɚ: ER0 54 | ɛ: EH1 55 | ɝ: ER1 56 | ɟ: G 57 | ɟʷ: G 58 | ɡ: G 59 | ɡʷ: G 60 | ɪ: IH1 61 | ɫ: L 62 | ɫ̩: AH0 L 63 | ɱ: M 64 | ɲ: N 65 | ɹ: R 66 | ʃ: SH 67 | ʉ: UW0 68 | ʉː: UW1 69 | ʊ: UH1 70 | ʎ: L 71 | ʒ: ZH 72 | θ: TH 73 | -------------------------------------------------------------------------------- /docs/source/user_guide/corpus_creation/training_lm.rst: -------------------------------------------------------------------------------- 1 | .. _training_lm: 2 | 3 | Train a new language model ``(mfa train_lm)`` 4 | ============================================== 5 | 6 | MFA has a utility function for training ARPA-format ngram :term:`language models`, as well as merging with a pre-existing model. 7 | 8 | 9 | .. note:: 10 | 11 | As of version 2.0.6, users on Windows can run this command natively without requiring :xref:`wsl`, see :ref:`installation` for more details. 12 | 13 | Command reference 14 | ----------------- 15 | 16 | .. click:: montreal_forced_aligner.command_line.train_lm:train_lm_cli 17 | :prog: mfa train_lm 18 | :nested: full 19 | 20 | Configuration reference 21 | ----------------------- 22 | 23 | - :ref:`configuration_language_modeling` 24 | 25 | API reference 26 | ------------- 27 | 28 | - :ref:`language_modeling_api` 29 | -------------------------------------------------------------------------------- /docs/source/changelog/changelog_3.2.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _changelog_3.2: 3 | 4 | ************* 5 | 3.2 Changelog 6 | ************* 7 | 8 | 3.2.1 9 | ----- 10 | 11 | - Changed unicode normalization to default to composed forms unless overridden by :code:`--unicode_decomposition true` 12 | 13 | 3.2.0 14 | ----- 15 | 16 | - Added :code:`--subset_word_count` parameter to :ref:`train_acoustic_model` to add a minimum word count for an utterance to be included in training subsets 17 | - Added :code:`--minimum_utterance_length` parameter to :ref:`train_acoustic_model` to add a minimum word count for an utterance to be included in training at all 18 | - Improved memory usage in compiling training graphs for initial subsets 19 | - Add support for transcription via whisperx and speechbrain models 20 | - Update text normalization to normalize to decomposed forms 21 | - Compatibility with Kalpy 0.6.7 22 | -------------------------------------------------------------------------------- /tests/data/configs/test_groups.yaml: -------------------------------------------------------------------------------- 1 | bilabial_stops: 2 | - p 3 | - b 4 | labiodental_obstruents: 5 | - f 6 | - v 7 | dental_obstruents: 8 | - th 9 | - dh 10 | coronal_stops: 11 | - t 12 | - d 13 | coronal_affricates: 14 | - ch 15 | - jh 16 | coronal_fricatives: 17 | - sh 18 | - zh 19 | - s 20 | - z 21 | rhotics: 22 | - r 23 | nasals: 24 | - m 25 | - n 26 | - ng 27 | laterals: 28 | - l 29 | dorsal_obstruents: 30 | - g 31 | - k 32 | voiceless_glottals: 33 | - hh 34 | central_vowels: 35 | - ah 36 | - er 37 | - uh 38 | - ih 39 | front_diphthongs: 40 | - ay 41 | - oy 42 | back_diphthongs: 43 | - ow 44 | - aw 45 | low_vowels: 46 | - aa 47 | - ao 48 | high_front_vowels: 49 | - iy 50 | front_glides: 51 | - y 52 | mid_front_vowels: 53 | - ae 54 | - eh 55 | - ey 56 | high_back_vowels: 57 | - uw 58 | back_glides: 59 | - w 60 | -------------------------------------------------------------------------------- /docs/source/user_guide/implementations/fine_tune.md: -------------------------------------------------------------------------------- 1 | 2 | (fine_tune_alignments)= 3 | 4 | # Fine-tuning alignments 5 | 6 | By default and standard in ASR, the frame step between feature frames is set to 10 ms, which limits the accuracy of MFA to a minimum of 0.01 seconds. When the `--fine_tune` flag is specified, the aligner does an extra fine-tuning step following alignment. The audio surrounding each interval's initial boundary is extracted with a frame step of 1 ms (0.001s) and is aligned using a simple phone dictionary combined with a transcript of the previous phone and the current phone. Extracting the phone alignment gives the possibility of higher degrees of accuracy (down to 1ms). 7 | 8 | :::{warning} 9 | 10 | The actual accuracy bound is not clear as each frame uses the surrounding 25ms to generate features, so each frame necessary incorporates time-smeared acoustic information. 11 | ::: 12 | -------------------------------------------------------------------------------- /docs/source/reference/helper/abc.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: montreal_forced_aligner.abc 2 | 3 | .. autosummary:: 4 | :toctree: generated/ 5 | 6 | KaldiFunction 7 | MfaModel -- Base model type for MFA 8 | MfaWorker -- Base worker class for MFA 9 | TopLevelMfaWorker -- MFA workers that have acoustic models 10 | TrainerMixin -- Trainer type interface 11 | TemporaryDirectoryMixin -- Mixin for temporary directory functionality 12 | DatabaseMixin -- Mixin for database functionality 13 | AdapterMixin -- Adaptation type interface 14 | ExporterMixin -- Abstract exporter type interface 15 | FileExporterMixin -- File exporter type interface 16 | ModelExporterMixin -- Model exporter type interface 17 | 18 | .. automodule:: montreal_forced_aligner.models 19 | 20 | .. autosummary:: 21 | :toctree: generated/ 22 | 23 | Archive 24 | -------------------------------------------------------------------------------- /tests/data/dictionaries/test_acoustic.txt: -------------------------------------------------------------------------------- 1 | this dh ih s 2 | is ih z 3 | the dh ah 4 | acoustic ah k uw s t ih k 5 | corpus k ao r p us 6 | i'm ay m 7 | talking t aa k ih ng 8 | pretty p r eh t iy 9 | fast f ae s t 10 | here hh iy r 11 | there's dh eh r z 12 | nothing n ah th ih ng 13 | going g ow ih ng 14 | else eh l s 15 | on ah n 16 | we're w iy r 17 | just j ah s t 18 | yknow y ah n ow 19 | some s ah m 20 | speech s p iy ch 21 | errors eh r ao r z 22 | but b ah t 23 | who hh uw 24 | cares k ae r z 25 | me m iy 26 | really r iy l iy 27 | slow s l ow 28 | and ae n d 29 | slightly s l ay t l iy 30 | lower l ow w er 31 | in ih n 32 | intensity ih n t eh n s ih t iy 33 | saying s ey ih ng 34 | words w er d z 35 | here's h iy r z 36 | more m ao r 37 | um ah m 38 | that dh ae t 39 | should sh uh d 40 | be b iy 41 | all aa l 42 | thanks th ae ng k s 43 | just jh ah s t 44 | sound s aw n d 45 | environment eh n v ay r ah n m eh n t 46 | -------------------------------------------------------------------------------- /montreal_forced_aligner/corpus/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Corpora 3 | ======= 4 | 5 | 6 | """ 7 | from __future__ import annotations 8 | 9 | from montreal_forced_aligner.corpus.acoustic_corpus import ( 10 | AcousticCorpus, 11 | AcousticCorpusMixin, 12 | AcousticCorpusPronunciationMixin, 13 | ) 14 | from montreal_forced_aligner.corpus.base import CorpusMixin 15 | from montreal_forced_aligner.corpus.text_corpus import ( 16 | DictionaryTextCorpusMixin, 17 | TextCorpus, 18 | TextCorpusMixin, 19 | ) 20 | 21 | __all__ = [ 22 | "base", 23 | "helper", 24 | "classes", 25 | "features", 26 | "multiprocessing", 27 | "CorpusMixin", 28 | "ivector_corpus", 29 | "acoustic_corpus", 30 | "AcousticCorpus", 31 | "AcousticCorpusMixin", 32 | "AcousticCorpusPronunciationMixin", 33 | "text_corpus", 34 | "TextCorpus", 35 | "TextCorpusMixin", 36 | "DictionaryTextCorpusMixin", 37 | ] 38 | -------------------------------------------------------------------------------- /tests/data/configs/basic_train_config.yaml: -------------------------------------------------------------------------------- 1 | beam: 100 2 | retry_beam: 400 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: false 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | training: 11 | - monophone: 12 | num_iterations: 5 13 | max_gaussians: 1000 14 | subset: 100 15 | 16 | - triphone: 17 | num_iterations: 3 18 | num_leaves: 250 19 | max_gaussians: 2000 20 | cluster_threshold: -1 21 | subset: 1000 22 | 23 | - lda: 24 | num_iterations: 2 25 | num_leaves: 500 26 | max_gaussians: 4000 27 | subset: 1000 28 | features: 29 | splice_left_context: 3 30 | splice_right_context: 3 31 | 32 | - sat: 33 | num_iterations: 2 34 | num_leaves: 500 35 | max_gaussians: 5000 36 | power: 0.2 37 | silence_weight: 0.0 38 | fmllr_update_type: "full" 39 | subset: 1000 40 | features: 41 | lda: true 42 | -------------------------------------------------------------------------------- /docs/source/user_guide/configuration/segment.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _configuration_segmentation: 3 | 4 | ******************** 5 | Segmentation options 6 | ******************** 7 | 8 | 9 | .. csv-table:: 10 | :widths: 20, 20, 60 11 | :header: "Parameter", "Default value", "Notes" 12 | 13 | "energy_threshold", 5.5, "Energy threshold above which a frame will be counted as voiced" 14 | "energy_mean_scale", 0.5, "Proportion of the mean energy of the file that should be added to the energy_threshold" 15 | "max_segment_length", 30, "Maximum length of segments before they do not get merged" 16 | "min_pause_duration", 0.05, "Minimum unvoiced duration to split speech segments" 17 | 18 | .. _default_segment_config: 19 | 20 | Default segmentation config file 21 | -------------------------------- 22 | 23 | .. code-block:: yaml 24 | 25 | energy_threshold: 5.5 26 | energy_mean_scale: 0.5 27 | max_segment_length: 30 28 | min_pause_duration: 0.05 29 | -------------------------------------------------------------------------------- /docs/source/user_guide/configuration/diarization.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _configuration_diarization: 3 | 4 | Diarization options 5 | =================== 6 | 7 | .. csv-table:: 8 | :widths: 20, 20, 60 9 | :header: "Parameter", "Default value", "Notes" 10 | :stub-columns: 1 11 | 12 | "cluster_type", ``optics``, "Clustering algorithm in :xref:`scikit-learn` to use, one of ``optics``, ``dbscan``, ``affinity``, ``agglomerative``, ``spectral, ``kmeans``" 13 | "expected_num_speakers", 0, "Number of speaker clusters to find, must be > 1 for ``agglomerative``, ``spectral``, and ``kmeans``" 14 | "sparse_threshold", 0.5, "Threshold on distance to limit precomputed sparse matrix" 15 | 16 | .. _default_diarization_config: 17 | 18 | Default diarization config file 19 | ------------------------------- 20 | 21 | .. code-block:: yaml 22 | 23 | cluster_type: optics 24 | energy_mean_scale: 0.5 25 | max_segment_length: 30 26 | min_pause_duration: 0.05 27 | -------------------------------------------------------------------------------- /docs/source/user_guide/configuration/lm.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _configuration_language_modeling: 3 | 4 | ******************************* 5 | Language model training options 6 | ******************************* 7 | 8 | See also the :ref:`configuration_dictionary` for the options that control how text is normalized and parsed. 9 | 10 | 11 | .. csv-table:: 12 | :widths: 20, 20, 60 13 | :header: "Parameter", "Default value", "Notes" 14 | 15 | "order", 3, "Order of language model" 16 | "method", kneser_ney, "Method for smoothing" 17 | "prune_thresh_small", 0.0000003, "Threshold for pruning a small model, only used if ``prune`` is true" 18 | "prune_thresh_medium", 0.0000001, "Threshold for pruning a medium model, only used if ``prune`` is true" 19 | 20 | Default language model config 21 | ----------------------------- 22 | 23 | .. code-block:: yaml 24 | 25 | order: 3 26 | method: kneser_ney 27 | prune_thresh_small: 0.0000003 28 | prune_thresh_medium: 0.0000001 29 | -------------------------------------------------------------------------------- /ci/docker_environment.yaml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - pytorch 4 | - nvidia 5 | - anaconda 6 | dependencies: 7 | - python>=3.11 8 | - numpy 9 | - librosa 10 | - tqdm 11 | - requests 12 | - pyyaml 13 | - dataclassy 14 | - kaldi=*=*cpu* 15 | - pynini 16 | - openfst 17 | - scikit-learn 18 | - hdbscan 19 | - baumwelch 20 | - ngram 21 | - praatio>=6.0.0 22 | - sqlalchemy>=2.0 23 | - git 24 | - cmake 25 | - make 26 | - compilers 27 | - pkg-config 28 | - pgvector 29 | - pgvector-python 30 | - postgresql 31 | - psycopg2 32 | - click 33 | - pytorch 34 | - torchaudio 35 | - setuptools_scm 36 | - kneed 37 | - matplotlib 38 | - seaborn 39 | - sqlite 40 | - rich 41 | - rich-click 42 | - kalpy>=0.8 43 | - spacy 44 | - sudachipy 45 | - sudachidict-core 46 | - spacy-pkuseg 47 | - sentencepiece 48 | - pip 49 | - pip: 50 | - speechbrain 51 | - jamo 52 | - pythainlp 53 | - dragonmapper 54 | -------------------------------------------------------------------------------- /tests/data/configs/lda_sat_train.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 400 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: false 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | training: 11 | - monophone: 12 | num_iterations: 4 13 | max_gaussians: 1000 14 | subset: 1000 15 | 16 | - triphone: 17 | num_iterations: 2 18 | num_leaves: 1500 19 | max_gaussians: 2000 20 | cluster_threshold: -1 21 | subset: 3000 22 | boost_silence: 1.25 23 | power: 0.25 24 | 25 | - lda: 26 | num_iterations: 3 27 | num_leaves: 1500 28 | max_gaussians: 4000 29 | subset: 5000 30 | features: 31 | splice_left_context: 3 32 | splice_right_context: 3 33 | 34 | - sat: 35 | num_iterations: 2 36 | num_leaves: 1500 37 | max_gaussians: 8000 38 | power: 0.2 39 | silence_weight: 0.0 40 | fmllr_update_type: "full" 41 | subset: 5000 42 | features: 43 | lda: true 44 | -------------------------------------------------------------------------------- /docs/source/reference/g2p_modeling/training.rst: -------------------------------------------------------------------------------- 1 | Training G2P models 2 | =================== 3 | 4 | Pynini Pair Ngram 5 | ----------------- 6 | 7 | 8 | .. currentmodule:: montreal_forced_aligner.g2p.trainer 9 | 10 | .. autosummary:: 11 | :toctree: generated/ 12 | 13 | PyniniTrainer -- Trainer for Pynini G2P model 14 | PyniniValidator -- Validator for Pynini G2P model 15 | 16 | Phonetisaurus-style models 17 | -------------------------- 18 | 19 | .. currentmodule:: montreal_forced_aligner.g2p.phonetisaurus_trainer 20 | 21 | .. autosummary:: 22 | :toctree: generated/ 23 | 24 | PhonetisaurusTrainer -- Trainer for Phonetisaurus G2P model 25 | 26 | Mixins 27 | ------ 28 | 29 | .. currentmodule:: montreal_forced_aligner.g2p.trainer 30 | 31 | .. autosummary:: 32 | :toctree: generated/ 33 | 34 | PyniniTrainerMixin 35 | 36 | .. currentmodule:: montreal_forced_aligner.g2p.phonetisaurus_trainer 37 | 38 | .. autosummary:: 39 | :toctree: generated/ 40 | 41 | PhonetisaurusTrainerMixin 42 | -------------------------------------------------------------------------------- /tests/test_segmentation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from montreal_forced_aligner.diarization.speaker_diarizer import FOUND_SPEECHBRAIN 4 | from montreal_forced_aligner.vad.segmenter import TranscriptionSegmenter 5 | 6 | 7 | def test_segment_transcript( 8 | basic_corpus_dir, 9 | english_mfa_acoustic_model, 10 | english_us_mfa_reduced_dict, 11 | generated_dir, 12 | temp_dir, 13 | basic_segment_config_path, 14 | db_setup, 15 | ): 16 | if not FOUND_SPEECHBRAIN: 17 | pytest.skip("SpeechBrain not installed") 18 | segmenter = TranscriptionSegmenter( 19 | corpus_directory=basic_corpus_dir, 20 | dictionary_path=english_us_mfa_reduced_dict, 21 | acoustic_model_path=english_mfa_acoustic_model, 22 | speechbrain=True, 23 | en_activation_th=0.4, 24 | en_deactivation_th=0.4, 25 | ) 26 | segmenter.setup() 27 | new_utterances = segmenter.segment_transcript(1) 28 | assert len(new_utterances) > 0 29 | segmenter.cleanup() 30 | -------------------------------------------------------------------------------- /docs/source/reference/dictionary/helper.rst: -------------------------------------------------------------------------------- 1 | 2 | Helper classes and functions 3 | ============================ 4 | 5 | Model 6 | ----- 7 | 8 | .. currentmodule:: montreal_forced_aligner.models 9 | 10 | .. autosummary:: 11 | :toctree: generated/ 12 | 13 | DictionaryModel 14 | 15 | Mixins 16 | ------ 17 | 18 | .. currentmodule:: montreal_forced_aligner.dictionary.mixins 19 | 20 | .. autosummary:: 21 | :toctree: generated/ 22 | 23 | DictionaryMixin 24 | TemporaryDictionaryMixin 25 | 26 | .. currentmodule:: montreal_forced_aligner.dictionary.multispeaker 27 | 28 | .. autosummary:: 29 | :toctree: generated/ 30 | 31 | MultispeakerDictionaryMixin 32 | 33 | 34 | Pronunciation probability functionality 35 | ======================================= 36 | 37 | Helper 38 | ------ 39 | 40 | .. currentmodule:: montreal_forced_aligner.alignment.multiprocessing 41 | 42 | .. autosummary:: 43 | :toctree: generated/ 44 | 45 | GeneratePronunciationsFunction 46 | GeneratePronunciationsArguments 47 | -------------------------------------------------------------------------------- /docs/source/user_guide/concepts/fst.md: -------------------------------------------------------------------------------- 1 | 2 | (fst)= 3 | # Finite State Transducers 4 | 5 | ```{warning} 6 | 7 | Still under construction, I hope to fill these sections out as I have time. 8 | ``` 9 | 10 | ```{seealso} 11 | 12 | * [OpenFst Quick Tour](https://www.openfst.org/twiki/bin/view/FST/FstQuickTour) 13 | ``` 14 | 15 | (acceptor)= 16 | ## Acceptors 17 | 18 | (wfst)= 19 | 20 | ## Weighted Finite State Transducers 21 | 22 | 23 | (lexicon_fst)= 24 | # Lexicon FSTs 25 | 26 | MFA compiles input pronunciation dictionaries to a Weighted Finite State Transducer ({term}`WFST`), with phones as input symbols and words as output symbols. During alignment, the {term}`lexicon FST` is composed with a linear acceptor created from the 27 | 28 | 29 | (grammar_fst)= 30 | 31 | # Grammar FSTs 32 | 33 | 34 | (g2p_fst)= 35 | # G2P FSTs 36 | 37 | ```{seealso} 38 | 39 | * [Pynini documentation](https://www.openfst.org/twiki/bin/view/GRM/Pynini) 40 | * [Phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus) 41 | ``` 42 | -------------------------------------------------------------------------------- /docs/source/_templates/version.html: -------------------------------------------------------------------------------- 1 | {# This will display the version of the docs as a badge 2 | 3 | Colors from: 4 | 5 | Wong, B. Points of view: Color blindness. 6 | Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618 7 | 8 | #} 9 | 10 | 11 | {% if "dev" in version %} 12 | {# orange for dev #E69F00 #} 13 | 15 | {% elif versionwarning %} 16 | {# red for old #980F0F #} 17 | 19 | {% else %} 20 | {# green for stable #009E73 #} 21 | 23 | {% endif %} 24 | 25 | -------------------------------------------------------------------------------- /docs/source/reference/helper/exceptions.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: montreal_forced_aligner.exceptions 2 | 3 | .. autosummary:: 4 | :toctree: generated/ 5 | 6 | MFAError 7 | SoxError 8 | G2PError 9 | ConfigError 10 | LMError 11 | LanguageModelNotFoundError 12 | ModelExtensionError 13 | ThirdpartyError 14 | TrainerError 15 | ModelError 16 | CorpusError 17 | ModelLoadError 18 | CorpusReadError 19 | ArgumentError 20 | AlignmentExportError 21 | NoSuccessfulAlignments 22 | KaldiProcessingError 23 | TextParseError 24 | TextGridParseError 25 | DictionaryError 26 | NoDefaultSpeakerDictionaryError 27 | DictionaryPathError 28 | DictionaryFileError 29 | FileArgumentNotFoundError 30 | PretrainedModelNotFoundError 31 | MultipleModelTypesFoundError 32 | ModelTypeNotSupportedError 33 | PronunciationAcousticMismatchError 34 | RootDirectoryError 35 | -------------------------------------------------------------------------------- /tests/test_commandline_train_ivector.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import click.testing 4 | 5 | from montreal_forced_aligner.command_line.mfa import mfa_cli 6 | 7 | 8 | def test_basic_ivector( 9 | basic_corpus_dir, 10 | generated_dir, 11 | temp_dir, 12 | train_ivector_config_path, 13 | ivector_output_model_path, 14 | db_setup, 15 | ): 16 | command = [ 17 | "train_ivector", 18 | basic_corpus_dir, 19 | ivector_output_model_path, 20 | "--config_path", 21 | train_ivector_config_path, 22 | "-q", 23 | "--clean", 24 | "--debug", 25 | ] 26 | command = [str(x) for x in command] 27 | result = click.testing.CliRunner().invoke( 28 | mfa_cli, command, catch_exceptions=True 29 | ) 30 | print(result.stdout) 31 | print(result.stderr) 32 | if result.exception: 33 | print(result.exc_info) 34 | raise result.exception 35 | assert not result.return_value 36 | assert os.path.exists(ivector_output_model_path) 37 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python distributions to PyPI 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | build-n-publish: 9 | name: Build and publish to PyPI 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@main 13 | with: 14 | fetch-depth: 0 15 | - name: Set up Python 3.10 16 | uses: actions/setup-python@v3 17 | with: 18 | python-version: "3.10" 19 | 20 | - name: Install pypa/build 21 | run: >- 22 | python -m 23 | pip install 24 | build 25 | --user 26 | 27 | - name: Build a binary wheel and a source tarball 28 | run: >- 29 | python -m 30 | build 31 | --sdist 32 | --wheel 33 | --outdir dist/ 34 | . 35 | 36 | - name: Publish to PyPI 37 | uses: pypa/gh-action-pypi-publish@release/v1 38 | with: 39 | user: __token__ 40 | password: ${{ secrets.PYPI_API_TOKEN }} 41 | -------------------------------------------------------------------------------- /docs/source/reference/acoustic_modeling/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _acoustic_modeling_api: 3 | 4 | Acoustic models 5 | =============== 6 | 7 | :term:`Acoustic models` contain information about how phones are pronounced, trained over large (and not-so-large) corpora of speech. Currently only GMM-HMM style acoustic models are supported, which are generally good enough for alignment, but nowhere near state of the art for transcription. 8 | 9 | .. note:: 10 | 11 | As part of the training procedure, alignments are generated, and so can be exported at the end (the same as training an acoustic model and then using it with the :class:`~montreal_forced_aligner.alignment.pretrained.PretrainedAligner`. See :meth:`~montreal_forced_aligner.alignment.CorpusAligner.export_files` for the method and :ref:`train_acoustic_model` for the command line function. 12 | 13 | .. currentmodule:: montreal_forced_aligner.models 14 | 15 | .. autosummary:: 16 | :toctree: generated/ 17 | 18 | AcousticModel 19 | 20 | .. toctree:: 21 | :hidden: 22 | 23 | training 24 | helper 25 | -------------------------------------------------------------------------------- /tests/data/dictionaries/expected/topo: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 11 12 13 14 15 16 17 18 19 20 21 22 5 | 6 | 0 0 0 0.75 1 0.25 7 | 1 1 1 0.75 2 0.25 8 | 2 2 2 0.75 3 0.25 9 | 3 10 | 11 | 12 | 13 | 1 2 3 4 5 6 7 8 9 10 14 | 15 | 0 0 0 0.25 1 0.25 2 0.25 3 0.25 16 | 1 1 1 0.25 2 0.25 3 0.25 4 0.25 17 | 2 2 1 0.25 2 0.25 3 0.25 4 0.25 18 | 3 3 1 0.25 2 0.25 3 0.25 4 0.25 19 | 4 4 4 0.75 5 0.25 20 | 5 21 | 22 | 23 | -------------------------------------------------------------------------------- /docs/source/user_guide/implementations/phone_models.md: -------------------------------------------------------------------------------- 1 | 2 | (phone_models)= 3 | # Phone model alignments 4 | 5 | ```{warning} 6 | This functionality is deprecated and will be removed in MFA 4.0. 7 | ``` 8 | 9 | With the `--use_phone_model` flag, an ngram language model for phones will be constructed and used to generate phone transcripts with alignments. The phone language model uses bigrams and higher orders (up to 4), with no unigrams included to speed up transcription (and because the phonotactics of languages highly constrain the possible sequences of phones). The phone language model is trained on phone transcriptions extracted from alignments and includes silence and OOV phones. 10 | 11 | The phone transcription additionally uses speaker-adaptation transforms from the regular alignment as well to speed up transcription. From the phone transcription lattices, we extract phone-level alignments along with confidence score using {kaldi_src}`lattice-to-ctm-conf`. 12 | 13 | The alignments extracted from phone transcriptions are compared to the baseline alignments using the procedure outlined in {ref}`alignment_evaluation` above. 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Montreal Corpus Tools 2 | 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a 5 | copy of this software and associated documentation files (the 6 | "Software"), to deal in the Software without restriction, including 7 | without limitation the rights to use, copy, modify, merge, publish, 8 | distribute, sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so, subject to 10 | the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included 13 | in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 18 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 19 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 20 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 21 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - anaconda 4 | dependencies: 5 | - python>=3.8 6 | - numpy 7 | - librosa 8 | - pysoundfile 9 | - tqdm 10 | - requests 11 | - pyyaml 12 | - dataclassy 13 | - kaldi=*=*cpu* 14 | - scipy 15 | - pynini 16 | - openfst 17 | - scikit-learn 18 | - hdbscan 19 | - baumwelch 20 | - ngram 21 | - praatio>=6.0.0 22 | - sqlalchemy>=2.0 23 | - pgvector 24 | - pgvector-python 25 | - sqlite 26 | - postgresql 27 | - psycopg2 28 | - click 29 | - setuptools_scm 30 | - pytest 31 | - pytest-mypy 32 | - pytest-cov 33 | - pytest-timeout 34 | - mock 35 | - coverage 36 | - coveralls 37 | - interrogate 38 | - kneed 39 | - matplotlib 40 | - seaborn 41 | - pip 42 | - rich 43 | - rich-click 44 | - kalpy>=0.8 45 | - huggingface_hub 46 | # Tokenization dependencies 47 | - spacy 48 | - sudachipy 49 | - sudachidict-core 50 | - spacy-pkuseg 51 | - pip: 52 | - build 53 | - twine 54 | # Tokenization dependencies 55 | - python-mecab-ko 56 | - jamo 57 | - pythainlp 58 | - hanziconv 59 | - dragonmapper 60 | -------------------------------------------------------------------------------- /docs/source/reference/alignment/helper.rst: -------------------------------------------------------------------------------- 1 | 2 | Helper functionality 3 | ==================== 4 | 5 | Mixins 6 | ------ 7 | 8 | .. currentmodule:: montreal_forced_aligner.alignment.mixins 9 | 10 | .. autosummary:: 11 | :toctree: generated/ 12 | 13 | AlignMixin -- Alignment mixin 14 | 15 | Multiprocessing workers and functions 16 | ------------------------------------- 17 | 18 | .. currentmodule:: montreal_forced_aligner.alignment.multiprocessing 19 | 20 | .. autosummary:: 21 | :toctree: generated/ 22 | 23 | AlignFunction 24 | FineTuneFunction 25 | CompileTrainGraphsFunction 26 | AccStatsFunction 27 | AlignmentExtractionFunction 28 | ExportTextGridProcessWorker 29 | PhoneConfidenceFunction 30 | 31 | 32 | Multiprocessing argument classes 33 | -------------------------------- 34 | 35 | .. currentmodule:: montreal_forced_aligner.alignment.multiprocessing 36 | 37 | .. autosummary:: 38 | :toctree: generated/ 39 | 40 | AlignArguments 41 | AccStatsArguments 42 | CompileTrainGraphsArguments 43 | AlignmentExtractionArguments 44 | ExportTextGridArguments 45 | FineTuneArguments 46 | PhoneConfidenceArguments 47 | -------------------------------------------------------------------------------- /docs/source/user_guide/corpus_creation/index.rst: -------------------------------------------------------------------------------- 1 | .. _corpus_creation: 2 | 3 | ************************* 4 | Corpus creation utilities 5 | ************************* 6 | 7 | MFA now contains several command line utilities for helping to create corpora from scratch. The main workflow is as follows: 8 | 9 | 1. If the corpus made up of long sound file that need segmenting, :ref:`segment the audio files using VAD ` 10 | 2. If the corpus does not contain transcriptions, :ref:`transcribe utterances using existing acoustic models, 11 | language models, and dictionaries ` 12 | 3. Use the :ref:`Anchor annotator tool ` to manually correct error in transcription 13 | 4. As necessary, bootstrap better transcriptions: 14 | 15 | 1. :ref:`Train language model ` with updated transcriptions 16 | 2. :ref:`Add pronunciation and silence probabilities to the dictionary ` 17 | 18 | .. toctree:: 19 | :hidden: 20 | 21 | create_segments 22 | train_ivector 23 | diarize_speakers 24 | transcribing 25 | training_lm 26 | training_dictionary 27 | tokenize 28 | train_tokenizer 29 | anchor 30 | -------------------------------------------------------------------------------- /montreal_forced_aligner/acoustic_modeling/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Training acoustic models 3 | ======================== 4 | 5 | 6 | """ 7 | from montreal_forced_aligner.acoustic_modeling.base import AcousticModelTrainingMixin # noqa 8 | from montreal_forced_aligner.acoustic_modeling.lda import LdaTrainer # noqa 9 | from montreal_forced_aligner.acoustic_modeling.monophone import MonophoneTrainer # noqa 10 | from montreal_forced_aligner.acoustic_modeling.pronunciation_probabilities import ( # noqa 11 | PronunciationProbabilityTrainer, 12 | ) 13 | from montreal_forced_aligner.acoustic_modeling.sat import SatTrainer # noqa 14 | from montreal_forced_aligner.acoustic_modeling.trainer import TrainableAligner # noqa 15 | from montreal_forced_aligner.acoustic_modeling.triphone import TriphoneTrainer # noqa 16 | 17 | __all__ = [ 18 | "AcousticModelTrainingMixin", 19 | "LdaTrainer", 20 | "MonophoneTrainer", 21 | "SatTrainer", 22 | "TriphoneTrainer", 23 | "PronunciationProbabilityTrainer", 24 | "TrainableAligner", 25 | "base", 26 | "lda", 27 | "monophone", 28 | "sat", 29 | "triphone", 30 | "pronunciation_probabilities", 31 | "trainer", 32 | ] 33 | -------------------------------------------------------------------------------- /tests/data/configs/basic_ipa_config.yaml: -------------------------------------------------------------------------------- 1 | beam: 10 2 | retry_beam: 40 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: false 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | multilingual_ipa: true 11 | 12 | digraphs: 13 | - "[dt][szʒʃʐʑʂɕç]" 14 | - "[a][job_name][u]" 15 | 16 | strip_diacritics: 17 | - 'ː' 18 | - 'ˑ' 19 | - '̩' 20 | - '̆' 21 | - '̑' 22 | - '̯' 23 | - '͡' 24 | - '‿' 25 | - '͜' 26 | 27 | training: 28 | - monophone: 29 | num_iterations: 5 30 | max_gaussians: 1000 31 | subset: 100 32 | 33 | - triphone: 34 | num_iterations: 3 35 | num_leaves: 250 36 | max_gaussians: 2000 37 | cluster_threshold: -1 38 | subset: 1000 39 | 40 | - lda: 41 | num_iterations: 2 42 | num_leaves: 500 43 | max_gaussians: 4000 44 | subset: 1000 45 | features: 46 | splice_left_context: 3 47 | splice_right_context: 3 48 | 49 | - sat: 50 | num_iterations: 2 51 | num_leaves: 500 52 | max_gaussians: 5000 53 | power: 0.2 54 | silence_weight: 0.0 55 | fmllr_update_type: "full" 56 | subset: 1000 57 | features: 58 | lda: true 59 | -------------------------------------------------------------------------------- /docs/source/user_guide/corpus_creation/anchor.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _`Anchor Annotator documentation`: https://anchor-annotator.readthedocs.io/en/latest/ 3 | 4 | .. _anchor: 5 | 6 | Anchor annotator ``(mfa anchor)`` 7 | ================================= 8 | 9 | The Anchor Annotator is a GUI utility for MFA that allows for users to modify transcripts and add/change entries in the pronunciation dictionary to interactively fix out of vocabulary issues. 10 | 11 | .. attention:: 12 | 13 | Anchor is under development and is currently pre-alpha. Use at your own risk and please use version control or back up any critical data. 14 | 15 | 16 | To use the annotator, first install the anchor subpackage: 17 | 18 | .. code-block:: 19 | 20 | conda install montreal-forced-aligner[anchor] 21 | 22 | This will install MFA if hasn't been along with all the packages that Anchor requires. Once installed, Anchor can be started with the following MFA subcommand `mfa anchor`. 23 | 24 | See the `Anchor Annotator documentation`_ for more information. 25 | 26 | Command reference 27 | ================= 28 | 29 | 30 | .. click:: montreal_forced_aligner.command_line.anchor:anchor_cli 31 | :prog: mfa anchor 32 | :nested: full 33 | -------------------------------------------------------------------------------- /docs/source/reference/database/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _database_api: 3 | 4 | Database 5 | ======== 6 | 7 | MFA uses a SQLite database to cache information during training/alignment runs. An issue with training larger corpora was running into memory bottlenecks as all the information in the corpus was stored in memory, and fMLLR estimations in later stages would crash. Additionally, there was always a trade off between storing results for use in other applications like :xref:`anchor` or providing diagnostic information to users, and ensuring that the core MFA workflows were as memory/time efficient as possible. Offloading to a database frees up some memory, and makes some computations more efficient, and should be optimized enough to not slow down regular processing. 8 | 9 | .. currentmodule:: montreal_forced_aligner.db 10 | 11 | .. autosummary:: 12 | :toctree: generated/ 13 | 14 | Dictionary 15 | Dialect 16 | Word 17 | Pronunciation 18 | Phone 19 | Grapheme 20 | File 21 | TextFile 22 | SoundFile 23 | Speaker 24 | Utterance 25 | WordInterval 26 | PhoneInterval 27 | CorpusWorkflow 28 | PhonologicalRule 29 | RuleApplication 30 | Job 31 | M2MSymbol 32 | M2M2Job 33 | Word2Job 34 | -------------------------------------------------------------------------------- /docs/source/user_guide/workflows/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _workflows_index: 3 | 4 | Workflows available 5 | =================== 6 | 7 | The primary workflow in MFA is forced alignment, where text is aligned to speech along with phones derived from a pronunciation dictionary and an acoustic model. There are, however, other workflows for transcribing speech using speech-to-text functionality in Kaldi, pronunciation dictionary creation using Pynini, and some basic corpus creation utilities like VAD-based segmentation. Additionally, acoustic models, G2P models, and language models can be trained from your own data (and then used in alignment and other workflows). 8 | 9 | .. warning:: 10 | 11 | Speech-to-text functionality is pretty basic, and the model architecture used in MFA is older GMM-HMM and NGram models, so using something like :xref:`speechbrain` or :xref:`whisperx` will likely yield better quality transcriptions. 12 | 13 | .. hint:: 14 | 15 | See :ref:`pretrained_models` for details about commands to inspect, download, and save various pretrained MFA models. 16 | 17 | .. toctree:: 18 | :hidden: 19 | 20 | alignment 21 | adapt_acoustic_model 22 | train_acoustic_model 23 | finding_oovs 24 | dictionary_generating 25 | g2p_train 26 | remap 27 | -------------------------------------------------------------------------------- /tests/data/textgrid/short_segments.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 1 6 | tiers? 7 | size = 1 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "talker" 12 | xmin = 0 13 | xmax = 1 14 | intervals: size = 7 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 0.16250605313552421 18 | text = "" 19 | intervals [2]: 20 | xmin = 0.16250605313552421 21 | xmax = 0.2837613633862341 22 | text = "blah" 23 | intervals [3]: 24 | xmin = 0.2837613633862341 25 | xmax = 0.43007610442209065 26 | text = "" 27 | intervals [4]: 28 | xmin = 0.43007610442209065 29 | xmax = 0.4389681605071427 30 | text = "ts" 31 | intervals [5]: 32 | xmin = 0.4389681605071427 33 | xmax = 0.6588444564284299 34 | text = "" 35 | intervals [6]: 36 | xmin = 0.6588444564284299 37 | xmax = 0.8480027404195374 38 | text = "blah2" 39 | intervals [7]: 40 | xmin = 0.8480027404195374 41 | xmax = 1 42 | text = "" 43 | -------------------------------------------------------------------------------- /tests/test_commandline_find_oovs.py: -------------------------------------------------------------------------------- 1 | import click.testing 2 | 3 | from montreal_forced_aligner.command_line.mfa import mfa_cli 4 | 5 | 6 | def test_validate_corpus( 7 | multilingual_ipa_tg_corpus_dir, 8 | english_mfa_acoustic_model, 9 | english_us_mfa_dictionary, 10 | temp_dir, 11 | generated_dir, 12 | db_setup, 13 | ): 14 | output_path = generated_dir.joinpath("find_oovs_output") 15 | command = [ 16 | "find_oovs", 17 | multilingual_ipa_tg_corpus_dir, 18 | english_us_mfa_dictionary, 19 | output_path, 20 | "-q", 21 | "-s", 22 | "4", 23 | "--oov_count_threshold", 24 | "0", 25 | "--clean", 26 | "--no_use_mp", 27 | ] 28 | command = [str(x) for x in command] 29 | result = click.testing.CliRunner().invoke(mfa_cli, command, catch_exceptions=True) 30 | print(result.stdout) 31 | print(result.stderr) 32 | if result.exception: 33 | print(result.exc_info) 34 | raise result.exception 35 | assert not result.return_value 36 | assert output_path.joinpath(f"oovs_found_{english_us_mfa_dictionary}.txt") 37 | assert output_path.joinpath(f"oov_counts_{english_us_mfa_dictionary}.txt") 38 | assert output_path.joinpath("utterance_oovs.txt") 39 | -------------------------------------------------------------------------------- /docs/source/reference/transcription/helper.rst: -------------------------------------------------------------------------------- 1 | Helper functions 2 | ================ 3 | 4 | Mixins 5 | ------ 6 | 7 | .. currentmodule:: montreal_forced_aligner.transcription.transcriber 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | TranscriberMixin 13 | 14 | Decoding graph 15 | -------------- 16 | 17 | .. currentmodule:: montreal_forced_aligner.transcription.multiprocessing 18 | 19 | .. autosummary:: 20 | :toctree: generated/ 21 | 22 | CreateHclgFunction 23 | CreateHclgArguments 24 | 25 | 26 | Speaker-independent transcription 27 | --------------------------------- 28 | 29 | .. currentmodule:: montreal_forced_aligner.transcription.multiprocessing 30 | 31 | .. autosummary:: 32 | :toctree: generated/ 33 | 34 | DecodeFunction 35 | DecodeArguments 36 | LmRescoreFunction 37 | LmRescoreArguments 38 | CarpaLmRescoreFunction 39 | CarpaLmRescoreArguments 40 | 41 | Speaker-adapted transcription 42 | ----------------------------- 43 | 44 | .. currentmodule:: montreal_forced_aligner.transcription.multiprocessing 45 | 46 | .. autosummary:: 47 | :toctree: generated/ 48 | 49 | InitialFmllrFunction 50 | InitialFmllrArguments 51 | FmllrRescoreFunction 52 | FmllrRescoreArguments 53 | FinalFmllrFunction 54 | FinalFmllrArguments 55 | -------------------------------------------------------------------------------- /tests/data/configs/train_g2p_acoustic.yaml: -------------------------------------------------------------------------------- 1 | beam: 100 2 | retry_beam: 800 3 | 4 | features: 5 | type: "mfcc" 6 | use_energy: false 7 | frame_shift: 10 8 | use_pitch: false 9 | 10 | training: 11 | - monophone: 12 | num_iterations: 5 13 | max_gaussians: 1000 14 | subset: 100 15 | 16 | - triphone: 17 | num_iterations: 3 18 | num_leaves: 250 19 | max_gaussians: 2000 20 | cluster_threshold: -1 21 | subset: 1000 22 | 23 | - lda: 24 | num_iterations: 2 25 | num_leaves: 500 26 | max_gaussians: 4000 27 | subset: 1000 28 | features: 29 | splice_left_context: 3 30 | splice_right_context: 3 31 | 32 | - sat: 33 | num_iterations: 2 34 | num_leaves: 500 35 | max_gaussians: 5000 36 | power: 0.2 37 | silence_weight: 0.0 38 | fmllr_update_type: "full" 39 | subset: 1000 40 | features: 41 | lda: true 42 | 43 | - pronunciation_probabilities: 44 | train_g2p: true 45 | num_iterations: 5 46 | 47 | - sat: 48 | num_iterations: 2 49 | num_leaves: 500 50 | max_gaussians: 5000 51 | power: 0.2 52 | silence_weight: 0.0 53 | fmllr_update_type: "full" 54 | subset: 1000 55 | features: 56 | lda: true 57 | -------------------------------------------------------------------------------- /tests/data/configs/eval_mapping.yaml: -------------------------------------------------------------------------------- 1 | ʔ: T 2 | h: HH 3 | ç: HH 4 | i: [IY0, IY2, IY1] 5 | iː: [IY0, IY2, IY1] 6 | ɚ: [ER0, ER2, ER1] 7 | ɝ: [ER0, ER2, ER1] 8 | ɝː: [ER0, ER2, ER1] 9 | dʒ: JH 10 | tʃ: CH 11 | ɑ: [AA0, AA2, AA1] 12 | ɑː: [AA0, AA2, AA1] 13 | ʊ: [UH0, UH2, UH1] 14 | ɛ: [EH0, EH2, EH1] 15 | oʊ: [OW0, OW2, OW1] 16 | ow: [OW0, OW2, OW1] 17 | aʊ: [AW0, AW2, AW1] 18 | aw: [AW0, AW2, AW1] 19 | aɪ: [AY0, AY2, AY1] 20 | aj: [AY0, AY2, AY1] 21 | ɔ: [AO0, AO2, AO1] 22 | ɒ: [AO0, AO2, AO1] 23 | ɔː: [AO0, AO2, AO1] 24 | ɒː: [AO0, AO2, AO1] 25 | ɔɪ: [OY0, OY2, OY1] 26 | ɔj: [OY0, OY2, OY1] 27 | u: [UW0, UW2, UW1] 28 | ʉ: [UW0, UW2, UW1] 29 | uː: [UW0, UW2, UW1] 30 | ʉː: [UW0, UW2, UW1] 31 | æ: [AE0, AE2, AE1] 32 | æː: [AE0, AE2, AE1] 33 | eɪ: [EY0, EY2, EY1] 34 | ej: [EY0, EY2, EY1] 35 | ɪ: [IH0, IH2, IH1] 36 | ð: DH 37 | ʃ: SH 38 | ʒ: ZH 39 | ɹ: R 40 | j: Y 41 | θ: TH 42 | ə: [AH0, AH2, AH1] 43 | ʌ: [AH0, AH2, AH1] 44 | ɐ: [AH0, AH2, AH1] 45 | n̩: N 46 | n: N 47 | m̩: M 48 | m: M 49 | mʲ: M 50 | ɱ: M 51 | v: V 52 | vʲ: V 53 | fʲ: F 54 | f: F 55 | l̩: L 56 | l: L 57 | ɫ̩: L 58 | ɫ: L 59 | ʎ: L 60 | ɾ: [D, T] 61 | pʰ: P 62 | pʲ: P 63 | p̚: P 64 | bʲ: B 65 | b̚: B 66 | tʰ: T 67 | t: T 68 | d: D 69 | d̚: D 70 | dʲ: D 71 | t̚: T 72 | tʲ: T 73 | kʰ: K 74 | k̚: K 75 | cʰ: K 76 | c̚: K 77 | ɡ: G 78 | ɡ̚: G 79 | ɟ̚: G 80 | ɟ: G 81 | ŋ: NG 82 | ɲ: [NG, N] 83 | -------------------------------------------------------------------------------- /github_environment.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.12 5 | - numpy<2.1.0 6 | - librosa 7 | - pysoundfile 8 | - tqdm 9 | - requests 10 | - pyyaml 11 | - dataclassy 12 | - kaldi=*=*cpu* 13 | - scipy 14 | - pynini 15 | - openfst 16 | - scikit-learn 17 | - hdbscan 18 | - baumwelch 19 | - ngram 20 | - praatio>=6.0.0 21 | - sqlalchemy>=2.0 22 | - pgvector 23 | - pgvector-python 24 | - sqlite 25 | - postgresql 26 | - psycopg2 27 | - click 28 | - setuptools_scm 29 | - pytest 30 | - pytest-mypy 31 | - pytest-cov 32 | - pytest-timeout 33 | - mock 34 | - coverage 35 | - coveralls 36 | - interrogate 37 | - kneed 38 | - matplotlib 39 | - seaborn 40 | - rich 41 | - rich-click 42 | - kalpy>=0.8 43 | - pip 44 | - huggingface_hub 45 | - spacy 46 | - sudachipy 47 | - sudachidict-core 48 | - jamo 49 | - pythainlp 50 | - python-build 51 | - twine 52 | # speechbrain dependencies 53 | - transformers>=4.48.0 54 | - pandas<2.3.0 55 | - sentencepiece 56 | - pytorch=2.8.0 57 | - torchaudio=2.8.0 58 | # whisperx dependencies 59 | - triton 60 | - nltk 61 | - av<16.0.0 62 | - onnxruntime<1.20.0 63 | - pip: 64 | - speechbrain 65 | - whisperx 66 | # Tokenization dependencies 67 | - python-mecab-ko 68 | - hanziconv 69 | - dragonmapper 70 | -------------------------------------------------------------------------------- /montreal_forced_aligner/__init__.py: -------------------------------------------------------------------------------- 1 | """Montreal Forced Aligner is a package for aligning speech corpora through the use of acoustic models and 2 | dictionaries using Kaldi functionality.""" 3 | 4 | import montreal_forced_aligner.acoustic_modeling as acoustic_modeling 5 | import montreal_forced_aligner.alignment as alignment 6 | import montreal_forced_aligner.command_line as command_line 7 | import montreal_forced_aligner.corpus as corpus 8 | import montreal_forced_aligner.dictionary as dictionary 9 | import montreal_forced_aligner.exceptions as exceptions 10 | import montreal_forced_aligner.g2p as g2p 11 | import montreal_forced_aligner.helper as helper 12 | import montreal_forced_aligner.ivector as ivector 13 | import montreal_forced_aligner.language_modeling as language_modeling 14 | import montreal_forced_aligner.models as models 15 | import montreal_forced_aligner.textgrid as textgrid 16 | import montreal_forced_aligner.transcription as transcription 17 | import montreal_forced_aligner.utils as utils 18 | 19 | __all__ = [ 20 | "abc", 21 | "data", 22 | "acoustic_modeling", 23 | "alignment", 24 | "command_line", 25 | "config", 26 | "corpus", 27 | "dictionary", 28 | "exceptions", 29 | "g2p", 30 | "ivector", 31 | "language_modeling", 32 | "helper", 33 | "models", 34 | "transcription", 35 | "textgrid", 36 | "utils", 37 | ] 38 | -------------------------------------------------------------------------------- /docs/source/user_guide/configuration/transcription.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _transcribe_config: 3 | 4 | ********************* 5 | Transcription options 6 | ********************* 7 | 8 | .. csv-table:: 9 | :widths: 20, 20, 60 10 | :header: "Parameter", "Default value", "Notes" 11 | 12 | "beam", 13, "Beam for decoding" 13 | "max_active", 7000, "Max active for decoding" 14 | "lattice_beam", 6, "Beam width for decoding lattices" 15 | "acoustic_scale", 0.083333, "Multiplier to scale acoustic costs" 16 | "silence_weight", 0.01, "Weight on silence in fMLLR estimation" 17 | "uses_speaker_adaptation", true, "Flag for whether to perform speaker adaptation" 18 | "first_beam", 10.0, "Beam for decoding in initial speaker-independent pass, only used if ``uses_speaker_adaptation`` is true" 19 | "first_max_active", 2000, "Max active for decoding in initial speaker-independent pass, only used if ``uses_speaker_adaptation`` is true" 20 | "fmllr_update_type", "full", "Type of fMLLR estimation" 21 | 22 | Default transcriber config 23 | -------------------------- 24 | 25 | .. code-block:: yaml 26 | 27 | beam: 13 28 | max_active: 7000 29 | lattice_beam: 6 30 | acoustic_scale: 0.083333 31 | silence_weight: 0.01 32 | fmllr: true 33 | first_beam: 10.0 # Beam used in initial, speaker-indep. pass 34 | first_max_active: 2000 # max-active used in initial pass. 35 | fmllr_update_type: full 36 | -------------------------------------------------------------------------------- /montreal_forced_aligner/command_line/history.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | 4 | import rich_click as click 5 | 6 | from montreal_forced_aligner import config 7 | 8 | __all__ = ["history_cli"] 9 | 10 | logger = logging.getLogger("mfa") 11 | 12 | 13 | @click.command( 14 | "history", 15 | help="Show previously run mfa commands", 16 | ) 17 | @click.option("--depth", help="Number of commands to list, defaults to 10", type=int, default=10) 18 | @click.option( 19 | "--verbose/--no_verbose", 20 | "-v/-nv", 21 | "verbose", 22 | help=f"Output debug messages, default is {config.VERBOSE}", 23 | default=config.VERBOSE, 24 | ) 25 | @click.help_option("-h", "--help") 26 | def history_cli(depth: int, verbose: bool) -> None: 27 | """ 28 | List previous MFA commands 29 | """ 30 | history = config.load_command_history()[-depth:] 31 | if verbose: 32 | logger.info("command\tDate\tExecution time\tVersion\tExit code\tException") 33 | for h in history: 34 | execution_time = time.strftime("%H:%M:%S", time.gmtime(h["execution_time"])) 35 | d = h["date"].isoformat() 36 | logger.info( 37 | f"{h['command']}\t{d}\t{execution_time}\t{h.get('version', 'unknown')}\t{h['exit_code']}\t{h['exception']}" 38 | ) 39 | pass 40 | else: 41 | for h in history: 42 | logger.info(h["command"]) 43 | -------------------------------------------------------------------------------- /montreal_forced_aligner/command_line/anchor.py: -------------------------------------------------------------------------------- 1 | """Command line functions for launching anchor annotation""" 2 | from __future__ import annotations 3 | 4 | import logging 5 | 6 | import requests 7 | import rich_click as click 8 | 9 | from montreal_forced_aligner import config 10 | from montreal_forced_aligner.command_line.utils import initialize_configuration 11 | 12 | __all__ = ["anchor_cli"] 13 | 14 | logger = logging.getLogger("mfa") 15 | 16 | 17 | @click.command(name="anchor", short_help="Launch Anchor") 18 | @click.help_option("-h", "--help") 19 | @click.pass_context 20 | def anchor_cli(context, **kwargs) -> None: # pragma: no cover 21 | """ 22 | Launch Anchor Annotator (if installed) 23 | """ 24 | initialize_configuration(context) 25 | from anchor.command_line import main # noqa 26 | 27 | if config.VERBOSE: 28 | try: 29 | from anchor._version import version 30 | 31 | response = requests.get( 32 | "https://api.github.com/repos/MontrealCorpusTools/Anchor-annotator/releases/latest" 33 | ) 34 | latest_version = response.json()["tag_name"].replace("v", "") 35 | if version < latest_version: 36 | click.echo( 37 | f"You are currently running an older version of Anchor annotator ({version}) than the latest available ({latest_version}). " 38 | f"To update, please run mfa_update." 39 | ) 40 | except ImportError: 41 | pass 42 | main() 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *.exe 5 | *.zip 6 | *.txt 7 | *.lab 8 | *.dict 9 | !tests/data/dictionaries/*.dict 10 | !tests/data/dictionaries/*.txt 11 | !tests/data/lab/*.txt 12 | !tests/data/lab/*.lab 13 | !tests/data/am/*.zip 14 | !tests/data/lm/*.zip 15 | report.txt 16 | 17 | .idea/ 18 | .pytest-cache/ 19 | # Mac BS 20 | .DS_Store 21 | 22 | 23 | generated/ 24 | 25 | pretrained_models/ 26 | 27 | # C extensions 28 | *.so 29 | 30 | # Distribution / packaging 31 | montreal_forced_aligner/version.py 32 | montreal_forced_aligner/_version.py 33 | .Python 34 | env/ 35 | build/ 36 | develop-eggs/ 37 | dist/ 38 | downloads/ 39 | eggs/ 40 | .eggs/ 41 | lib/ 42 | lib64/ 43 | parts/ 44 | sdist/ 45 | var/ 46 | *.egg-info/ 47 | .installed.cfg 48 | *.egg 49 | thirdparty/bin 50 | 51 | # PyInstaller 52 | # Usually these files are written by a python script from a template 53 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 54 | *.manifest 55 | *.spec 56 | 57 | # Installer logs 58 | pip-log.txt 59 | pip-delete-this-directory.txt 60 | 61 | # Unit test / coverage reports 62 | htmlcov/ 63 | .tox/ 64 | .coverage 65 | .coverage.* 66 | .cache 67 | nosetests.xml 68 | coverage.xml 69 | *,cover 70 | 71 | # Translations 72 | *.mo 73 | *.pot 74 | 75 | # Django stuff: 76 | *.log 77 | 78 | # Sphinx documentation 79 | docs/build/ 80 | 81 | # PyBuilder 82 | target/ 83 | 84 | 85 | *.dll 86 | 87 | .pytest_cache/ 88 | 89 | docs/source/api/ 90 | 91 | 92 | *.lprof 93 | 94 | *.pclprof 95 | -------------------------------------------------------------------------------- /.github/workflows/publish_docker.yml: -------------------------------------------------------------------------------- 1 | name: Publish Docker image 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | push_to_registries: 9 | name: Push Docker image to multiple registries 10 | runs-on: ubuntu-latest 11 | permissions: 12 | packages: write 13 | contents: read 14 | steps: 15 | - name: Check out the repo 16 | uses: actions/checkout@v3 17 | 18 | - name: Log in to Docker Hub 19 | uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 20 | with: 21 | username: ${{ secrets.DOCKER_USERNAME }} 22 | password: ${{ secrets.DOCKER_PASSWORD }} 23 | 24 | - name: Log in to the Container registry 25 | uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 26 | with: 27 | registry: ghcr.io 28 | username: ${{ github.actor }} 29 | password: ${{ secrets.GITHUB_TOKEN }} 30 | 31 | - name: Extract metadata (tags, labels) for Docker 32 | id: meta 33 | uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 34 | with: 35 | images: | 36 | mmcauliffe/montreal-forced-aligner 37 | ghcr.io/${{ github.repository }} 38 | 39 | - name: Build and push Docker images 40 | uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc 41 | with: 42 | context: . 43 | push: true 44 | tags: ${{ steps.meta.outputs.tags }} 45 | labels: ${{ steps.meta.outputs.labels }} 46 | -------------------------------------------------------------------------------- /montreal_forced_aligner/tokenization/resources/japanese/unk.def: -------------------------------------------------------------------------------- 1 | DEFAULT,5968,5968,3857,補助記号,一般,*,*,*,* 2 | SPACE,5966,5966,6056,空白,*,*,*,*,* 3 | KANJI,5139,5139,14657,名詞,普通名詞,一般,*,*,* 4 | KANJI,5129,5129,17308,名詞,普通名詞,サ変可能,*,*,* 5 | KANJI,4785,4785,18181,名詞,固有名詞,一般,*,*,* 6 | KANJI,4787,4787,18086,名詞,固有名詞,人名,一般,*,* 7 | KANJI,4791,4791,19198,名詞,固有名詞,地名,一般,*,* 8 | SYMBOL,5129,5129,17094,名詞,普通名詞,サ変可能,*,*,* 9 | NUMERIC,4794,4794,12450,名詞,数詞,*,*,*,* 10 | ALPHA,5139,5139,11633,名詞,普通名詞,一般,*,*,* 11 | ALPHA,4785,4785,13620,名詞,固有名詞,一般,*,*,* 12 | ALPHA,4787,4787,14228,名詞,固有名詞,人名,一般,*,* 13 | ALPHA,4791,4791,15793,名詞,固有名詞,地名,一般,*,* 14 | ALPHA,5687,5687,15246,感動詞,一般,*,*,*,* 15 | HIRAGANA,5139,5139,16012,名詞,普通名詞,一般,*,*,* 16 | HIRAGANA,5129,5129,20012,名詞,普通名詞,サ変可能,*,*,* 17 | HIRAGANA,4785,4785,18282,名詞,固有名詞,一般,*,*,* 18 | HIRAGANA,4787,4787,18269,名詞,固有名詞,人名,一般,*,* 19 | HIRAGANA,4791,4791,20474,名詞,固有名詞,地名,一般,*,* 20 | HIRAGANA,5687,5687,17786,感動詞,一般,*,*,*,* 21 | KATAKANA,5139,5139,10980,名詞,普通名詞,一般,*,*,* 22 | KATAKANA,5129,5129,14802,名詞,普通名詞,サ変可能,*,*,* 23 | KATAKANA,4785,4785,13451,名詞,固有名詞,一般,*,*,* 24 | KATAKANA,4787,4787,13759,名詞,固有名詞,人名,一般,*,* 25 | KATAKANA,4791,4791,14554,名詞,固有名詞,地名,一般,*,* 26 | KATAKANA,5687,5687,15272,感動詞,一般,*,*,*,* 27 | KANJINUMERIC,4794,4794,14170,名詞,数詞,*,*,*,* 28 | GREEK,5139,5139,11051,名詞,普通名詞,一般,*,*,* 29 | GREEK,4785,4785,13353,名詞,固有名詞,一般,*,*,* 30 | GREEK,4787,4787,13671,名詞,固有名詞,人名,一般,*,* 31 | GREEK,4791,4791,14862,名詞,固有名詞,地名,一般,*,* 32 | CYRILLIC,5139,5139,11140,名詞,普通名詞,一般,*,*,* 33 | CYRILLIC,4785,4785,13174,名詞,固有名詞,一般,*,*,* 34 | CYRILLIC,4787,4787,13495,名詞,固有名詞,人名,一般,*,* 35 | CYRILLIC,4791,4791,14700,名詞,固有名詞,地名,一般,*,* 36 | -------------------------------------------------------------------------------- /docs/source/user_guide/workflows/adapt_acoustic_model.rst: -------------------------------------------------------------------------------- 1 | .. _adapt_acoustic_model: 2 | 3 | Adapt acoustic model to new data ``(mfa adapt)`` 4 | ================================================ 5 | 6 | A recent 2.0 functionality for MFA is to adapt pretrained :term:`acoustic models` to a new dataset. MFA will first align the dataset using the pretrained model, and then update the acoustic model's GMM means with those generated by the data. See :kaldi_steps:`train_map` for the Kaldi script this functionality corresponds to. As part of the adaptation process, MFA can generate final alignments and export these files if an output directory is specified in the command. 7 | 8 | 9 | .. note:: 10 | 11 | You can use manual or verified reference alignments in adaptation to bypass the initial round of alignment for some or 12 | all files and these alignments will be used for adjusting the acoustic model parameters. See :ref:`reference_alignment_format` 13 | for more information on how to include these alignments. 14 | 15 | 16 | .. seealso:: 17 | 18 | See :xref:`mfa_adaptation_scripts` for reference files and CLI commands that are have been used for evaluating performance of adaptation. 19 | 20 | Command reference 21 | ----------------- 22 | 23 | .. click:: montreal_forced_aligner.command_line.adapt:adapt_model_cli 24 | :prog: mfa adapt 25 | :nested: full 26 | 27 | Configuration reference 28 | ----------------------- 29 | 30 | - :ref:`configuration_global` 31 | - :ref:`configuration_adapting` 32 | 33 | API reference 34 | ------------- 35 | 36 | - :class:`~montreal_forced_aligner.alignment.AdaptingAligner` 37 | -------------------------------------------------------------------------------- /tests/test_gui.py: -------------------------------------------------------------------------------- 1 | from montreal_forced_aligner import config 2 | from montreal_forced_aligner.corpus.acoustic_corpus import AcousticCorpus 3 | 4 | 5 | def test_save_text_lab(basic_corpus_dir, generated_dir, db_setup): 6 | output_directory = generated_dir.joinpath("gui_tests") 7 | config.TEMPORARY_DIRECTORY = output_directory 8 | corpus = AcousticCorpus( 9 | corpus_directory=basic_corpus_dir, 10 | ) 11 | corpus._load_corpus() 12 | corpus.get_file(name="acoustic_corpus").save(corpus.corpus_directory) 13 | corpus.cleanup_connections() 14 | 15 | 16 | def test_file_properties( 17 | stereo_corpus_dir, 18 | generated_dir, 19 | db_setup, 20 | ): 21 | output_directory = generated_dir.joinpath("gui_tests") 22 | config.TEMPORARY_DIRECTORY = output_directory 23 | corpus = AcousticCorpus( 24 | corpus_directory=stereo_corpus_dir, 25 | ) 26 | corpus._load_corpus() 27 | file = corpus.get_file(name="michaelandsickmichael") 28 | assert file.sound_file.num_channels == 2 29 | assert file.num_speakers == 2 30 | assert file.num_utterances == 7 31 | x, y = file.sound_file.normalized_waveform() 32 | assert y.shape[0] == 2 33 | 34 | 35 | def test_flac_tg(flac_tg_corpus_dir, generated_dir, db_setup): 36 | output_directory = generated_dir.joinpath("gui_tests") 37 | config.TEMPORARY_DIRECTORY = output_directory 38 | corpus = AcousticCorpus( 39 | corpus_directory=flac_tg_corpus_dir, 40 | ) 41 | corpus._load_corpus() 42 | corpus.get_file(name="61-70968-0000").save(corpus.corpus_directory) 43 | corpus.cleanup_connections() 44 | -------------------------------------------------------------------------------- /tests/test_commandline_history.py: -------------------------------------------------------------------------------- 1 | import click.testing 2 | 3 | from montreal_forced_aligner.command_line.mfa import mfa_cli 4 | 5 | 6 | def test_mfa_history(): 7 | 8 | command = ["history", "--depth", "60"] 9 | result = click.testing.CliRunner().invoke( 10 | mfa_cli, command, catch_exceptions=True 11 | ) 12 | print(result.stdout) 13 | print(result.stderr) 14 | if result.exception: 15 | print(result.exc_info) 16 | raise result.exception 17 | assert not result.return_value 18 | 19 | command = ["history"] 20 | result = click.testing.CliRunner().invoke( 21 | mfa_cli, command, catch_exceptions=True 22 | ) 23 | print(result.stdout) 24 | print(result.stderr) 25 | if result.exception: 26 | print(result.exc_info) 27 | raise result.exception 28 | assert not result.return_value 29 | 30 | 31 | def test_mfa_history_verbose(): 32 | 33 | command = ["history", "-v", "--depth", "60"] 34 | result = click.testing.CliRunner().invoke( 35 | mfa_cli, command, catch_exceptions=True 36 | ) 37 | print(result.stdout) 38 | print(result.stderr) 39 | if result.exception: 40 | print(result.exc_info) 41 | raise result.exception 42 | assert not result.return_value 43 | 44 | command = ["history", "-v"] 45 | result = click.testing.CliRunner().invoke( 46 | mfa_cli, command, catch_exceptions=True 47 | ) 48 | print(result.stdout) 49 | print(result.stderr) 50 | if result.exception: 51 | print(result.exc_info) 52 | raise result.exception 53 | assert not result.return_value 54 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: local 3 | hooks: 4 | - id: profile-check 5 | name: no profiling 6 | entry: '@profile' 7 | language: pygrep 8 | types: [ python ] 9 | - id: print-check 10 | name: no print statements 11 | entry: '\bprint\(' 12 | language: pygrep 13 | types: [ python ] 14 | files: ^montreal_forced_aligner/ 15 | exclude: ^montreal_forced_aligner/command_line/transcribe.py 16 | - repo: https://github.com/psf/black 17 | rev: 23.9.1 18 | hooks: 19 | - id: black 20 | - repo: https://github.com/pycqa/flake8 21 | rev: 7.0.0 22 | hooks: 23 | - id: flake8 24 | entry: pflake8 25 | additional_dependencies: 26 | - pyproject-flake8 27 | - repo: https://github.com/pre-commit/mirrors-isort 28 | rev: v5.10.1 29 | hooks: 30 | - id: isort 31 | additional_dependencies: [toml] 32 | - repo: https://github.com/asottile/setup-cfg-fmt 33 | rev: v2.2.0 34 | hooks: 35 | - id: setup-cfg-fmt 36 | args: 37 | - --min-py3-version 38 | - "3.8" 39 | - repo: https://github.com/pre-commit/pre-commit-hooks 40 | rev: v4.0.1 41 | hooks: 42 | - id: check-ast 43 | - id: check-builtin-literals 44 | - id: check-docstring-first 45 | - id: check-merge-conflict 46 | - id: check-yaml 47 | - id: check-toml 48 | - id: debug-statements 49 | - id: end-of-file-fixer 50 | - id: trailing-whitespace 51 | - id: check-added-large-files 52 | args: ['--maxkb=2000'] 53 | - id: mixed-line-ending 54 | -------------------------------------------------------------------------------- /tests/data/textgrid/michaelandsickmichael_short_tg.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | 0 5 | 52.44082780612245 6 | 7 | 2 8 | "IntervalTier" 9 | "michael" 10 | 0 11 | 52.44082780612245 12 | 7 13 | 0 14 | 1.059222833923831 15 | "" 16 | 1.059222833923831 17 | 7.541483952089169 18 | "this is the acoustic corpus i'm talking pretty fast here there's nothing going else going on we're just yknow there's some speech errors but who 19 | cares" 20 | 7.541483952089169 21 | 8.016163828116456 22 | "" 23 | 8.016163828116456 24 | 17.207369573609213 25 | "um this is me talking really slow and slightly lower in intensity uh we're just saying some words" 26 | 17.207369573609213 27 | 18.35980726400338 28 | "" 29 | 18.35980726400338 30 | 25.251655700977985 31 | "and here's some more words words words words um and that should be all thanks" 32 | 25.251655700977985 33 | 52.44082780612245 34 | "" 35 | "IntervalTier" 36 | "sickmichael" 37 | 0 38 | 52.44080102040816 39 | 9 40 | 0 41 | 26.72325 42 | "" 43 | 26.72325 44 | 39.52854922648294 45 | "uh so this is the sick corpus uh i have a cold so i probably sound quite different than the uh uh acoustic corpus um the recording environment is also quite different" 46 | 39.52854922648294 47 | 40.20409920265843 48 | "" 49 | 40.20409920265843 50 | 43.81379465384285 51 | "and i'm saying a bunch of different words that i did not say in the original one" 52 | 43.81379465384285 53 | 44.480184007206404 54 | "" 55 | 44.480184007206404 56 | 45.08451636541159 57 | "uh" 58 | 45.08451636541159 59 | 46.37863407952624 60 | "" 61 | 46.37863407952624 62 | 51.457439118982556 63 | "and here's a long pause and i think this is probably good alright thanks" 64 | 51.457439118982556 65 | 52.44080102040816 66 | "" 67 | -------------------------------------------------------------------------------- /docs/source/getting_started.rst: -------------------------------------------------------------------------------- 1 | 2 | *************** 3 | Getting started 4 | *************** 5 | 6 | 7 | Installation 8 | ------------ 9 | 10 | .. grid:: 2 11 | 12 | .. grid-item-card:: Installing with conda 13 | :text-align: center 14 | :columns: 12 15 | 16 | MFA is now on :xref:`conda_forge` and can be installed with Anaconda or Miniconda: 17 | 18 | .. code-block:: bash 19 | 20 | conda config --add channels conda-forge 21 | conda create -n aligner montreal-forced-aligner 22 | conda activate aligner 23 | mfa --help 24 | 25 | +++ 26 | 27 | .. button-link:: https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html 28 | :color: primary 29 | :expand: 30 | 31 | Install Conda 32 | 33 | 34 | .. grid-item-card:: In-depth instructions 35 | :text-align: center 36 | 37 | Using :ref:`Docker `? Want to :ref:`install via source `? 38 | 39 | +++ 40 | 41 | .. button-ref:: installation 42 | :expand: 43 | :color: primary 44 | :ref-type: doc 45 | 46 | To the installation guide 47 | 48 | 49 | .. grid-item-card:: First steps 50 | :text-align: center 51 | 52 | First time using MFA? Want a walk-through of a specific use case? 53 | 54 | +++ 55 | 56 | .. button-ref:: first_steps 57 | :expand: 58 | :color: primary 59 | 60 | First steps 61 | 62 | 63 | .. toctree:: 64 | :maxdepth: 1 65 | :hidden: 66 | 67 | installation 68 | first_steps/index 69 | first_steps/alignment_example 70 | first_steps/remapping_example 71 | first_steps/tutorials 72 | -------------------------------------------------------------------------------- /tests/test_commandline_configure.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import click.testing 4 | 5 | from montreal_forced_aligner import config 6 | from montreal_forced_aligner.command_line.mfa import mfa_cli 7 | 8 | 9 | def test_configure( 10 | temp_dir, 11 | basic_corpus_dir, 12 | generated_dir, 13 | english_dictionary, 14 | basic_align_config_path, 15 | english_acoustic_model, 16 | global_config, 17 | ): 18 | path = config.generate_config_path() 19 | if os.path.exists(path): 20 | os.remove(path) 21 | command = [ 22 | "configure", 23 | "--always_clean", 24 | "-t", 25 | temp_dir, 26 | "-j", 27 | "10", 28 | "--disable_mp", 29 | "--always_verbose", 30 | "-p", 31 | "test", 32 | ] 33 | command = [str(x) for x in command] 34 | click.testing.CliRunner().invoke(mfa_cli, command, catch_exceptions=False) 35 | assert os.path.exists(path) 36 | config.load_configuration() 37 | 38 | assert config.CURRENT_PROFILE_NAME == "test" 39 | assert config.NUM_JOBS == 10 40 | assert not config.USE_MP 41 | assert config.VERBOSE 42 | assert config.CLEAN 43 | 44 | command = ["configure", "--never_clean", "--enable_mp", "--never_verbose", "-p", "test"] 45 | click.testing.CliRunner().invoke(mfa_cli, command, catch_exceptions=False) 46 | 47 | assert os.path.exists(path) 48 | config.load_configuration() 49 | assert config.CURRENT_PROFILE_NAME == "test" 50 | assert config.USE_MP 51 | assert not config.VERBOSE 52 | assert not config.CLEAN 53 | 54 | config.CLEAN = True 55 | config.DEBUG = True 56 | config.VERBOSE = True 57 | config.USE_MP = False 58 | config.TEMPORARY_DIRECTORY = temp_dir 59 | -------------------------------------------------------------------------------- /tests/data/dictionaries/test_basic.txt: -------------------------------------------------------------------------------- 1 | 'm m 2 | ’m m 3 | i’m ay m ih 4 | this dh ih s 5 | is ih z 6 | the dh ah 7 | acoustic ah k uw s t ih k 8 | corpus k ao r p ah s 9 | i'm ay m 10 | talking t aa k ih ng 11 | pretty p r eh t iy 12 | fast f ae s t 13 | here hh iy r 14 | there's dh eh r z 15 | nothing n ah th ih ng 16 | going g ow ih ng 17 | else eh l s 18 | on ah n 19 | we're w iy r 20 | just jh ah s t 21 | yknow j ah n ow 22 | some s ah m 23 | speech s p iy ch 24 | errors eh r ao r z 25 | but b ah t 26 | who hh uw 27 | cares k ae r z 28 | me m iy 29 | really r iy l iy 30 | slow s l ow 31 | and ae n d 32 | slightly s l ay t l iy 33 | lower l ow w er 34 | in ih n 35 | intensity ih n t eh n s ih t iy 36 | saying s ey ih ng 37 | words w er d z 38 | here's hh iy r z 39 | more m ao r 40 | um ah m 41 | that dh ae t 42 | should sh uh d 43 | be b iy 44 | all aa l 45 | thanks th ae ng k s 46 | uh ah 47 | so s ow 48 | sick s ih k 49 | i ay 50 | have hh ae v 51 | a ah 52 | cold k ow l d 53 | probably p r aa b ah b l iy 54 | sound s aw n d 55 | quite k w ay t 56 | different d ih f er ah n t 57 | than dh ae n 58 | recording r iy k ao r d ih ng 59 | environment eh n v ay r ah n m eh n t 60 | also aa l s ow 61 | bunch b ah n ch 62 | did d ih d 63 | not n aa t 64 | original ao r ih g ih n ah l 65 | one w ah n 66 | long l aa ng 67 | pause p aa z 68 | think th ih ng k 69 | good g uh d 70 | alright aa l r ay t 71 | much m ah ch 72 | since s ih n s 73 | quality k w aa l ih t iy 74 | of ah v 75 | gonna g ah n ah 76 | cough k aa f 77 | for f ao r 78 | little l ih t ah l 79 | bit b ih t 80 | just jh ah s t 81 | to t uw 82 | yup j ah p 83 | happened hh ae p ah n d 84 | that's dh ae t s 85 | hopefully hh ow p f uh l iy 86 | levels l eh v ah l z 87 | okay ow k ay 88 | lot l aa t 89 | yeah j ae 90 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG]" 5 | labels: bug 6 | assignees: mmcauliffe 7 | 8 | --- 9 | 10 | **Debugging checklist** 11 | 12 | [ ] Have you read the troubleshooting page (https://montreal-forced-aligner.readthedocs.io/en/latest/user_guide/troubleshooting.html) and searched the documentation to ensure that your issue is not addressed there? 13 | [ ] Have you updated to latest MFA version (check https://montreal-forced-aligner.readthedocs.io/en/latest/changelog/changelog_3.0.html)? What is the output of `mfa version`? 14 | [ ] Have you tried rerunning the command with the `--clean` flag? 15 | 16 | **Describe the issue** 17 | A clear and concise description of what the bug is. 18 | 19 | **For Reproducing your issue** 20 | Please fill out the following: 21 | 22 | 1. Corpus structure 23 | * What language is the corpus in? 24 | * How many files/speakers? 25 | * Are you using lab files or TextGrid files for input? 26 | 2. Dictionary 27 | * Are you using a dictionary from MFA? If so, which one? 28 | * If it's a custom dictionary, what is the phoneset? 29 | 3. Acoustic model 30 | * If you're using an acoustic model, is it one download through MFA? If so, which one? 31 | * If it's a model you've trained, what data was it trained on? 32 | 33 | **Log file** 34 | Please attach the log file for the run that encountered an error (by default these will be stored in `~/Documents/MFA`). 35 | 36 | **Desktop (please complete the following information):** 37 | - OS: [e.g. Windows, OSX, Linux] 38 | - Version [e.g. MacOSX 10.15, Ubuntu 20.04, Windows 10, etc] 39 | - Any other details about the setup (Cloud, Docker, etc) 40 | 41 | **Additional context** 42 | Add any other context about the problem here. 43 | -------------------------------------------------------------------------------- /tests/data/configs/acoustic/english_mfa_phone_groups.yaml: -------------------------------------------------------------------------------- 1 | - 2 | - p 3 | - pʷ 4 | - pʰ 5 | - pʲ 6 | - 7 | - kp 8 | - 9 | - b 10 | - bʲ 11 | - 12 | - ɡb 13 | - 14 | - f 15 | - fʷ 16 | - fʲ 17 | - 18 | - v 19 | - vʷ 20 | - vʲ 21 | - 22 | - θ 23 | - 24 | - t̪ 25 | - 26 | - ð 27 | - 28 | - d̪ 29 | - 30 | - t 31 | - tʷ 32 | - tʰ 33 | - tʲ 34 | - 35 | - ʈ 36 | - ʈʲ 37 | - ʈʷ 38 | - 39 | - ʔ 40 | - 41 | - d 42 | - dʲ 43 | - 44 | - ɖ 45 | - ɖʲ 46 | - 47 | - ɾ 48 | - ɾʲ 49 | - 50 | - tʃ 51 | - 52 | - dʒ 53 | - 54 | - ʃ 55 | - 56 | - ʒ 57 | - 58 | - s 59 | - 60 | - z 61 | - 62 | - ɹ 63 | - 64 | - m 65 | - 66 | - mʲ 67 | - 68 | - m̩ 69 | - 70 | - ɱ 71 | - 72 | - n 73 | - 74 | - n̩ 75 | - 76 | - ɲ 77 | - 78 | - ɾ̃ 79 | - 80 | - ŋ 81 | - 82 | - l 83 | - 84 | - ɫ 85 | - 86 | - ɫ̩ 87 | - 88 | - ʎ 89 | - 90 | - ɟ 91 | - ɟʷ 92 | - 93 | - ɡ 94 | - ɡʷ 95 | - 96 | - c 97 | - cʷ 98 | - cʰ 99 | - 100 | - k 101 | - kʷ 102 | - kʰ 103 | - 104 | - ç 105 | - 106 | - h 107 | - 108 | - ɐ 109 | - 110 | - ə 111 | - 112 | - ɜː 113 | - ɜ 114 | - 115 | - ɝ 116 | - 117 | - ɚ 118 | - 119 | - ʊ 120 | - 121 | - ɪ 122 | - 123 | - ɑ 124 | - ɑː 125 | - 126 | - ɒ 127 | - ɒː 128 | - 129 | - ɔ 130 | - 131 | - aː 132 | - a 133 | - 134 | - æ 135 | - 136 | - aj 137 | - 138 | - aw 139 | - 140 | - i 141 | - iː 142 | - 143 | - j 144 | - 145 | - ɛː 146 | - ɛ 147 | - 148 | - e 149 | - eː 150 | - 151 | - ej 152 | - 153 | - ʉ 154 | - ʉː 155 | - 156 | - uː 157 | - u 158 | - 159 | - w 160 | - 161 | - ʋ 162 | - 163 | - ɔj 164 | - 165 | - ow 166 | - 167 | - əw 168 | - 169 | - o 170 | - oː 171 | -------------------------------------------------------------------------------- /docs/source/user_guide/models/index.rst: -------------------------------------------------------------------------------- 1 | .. _pretrained_models: 2 | 3 | ***************** 4 | Pretrained models 5 | ***************** 6 | 7 | The command for interacting with MFA models is :code:`mfa model`. The subcommands allow for inspecting currently saved pretrained models, downloading ones from MFA's model repo, and saving models you have trained to be used with a simple name rather than the full path each time. 8 | 9 | Following installation of MFA, :code:`mfa model list acoustic` will not list any models. If you want to download the default English model trained on LibriSpeech, you can run :code:`mfa model download acoustic english_us_arpa`. At which point, the previous ``list`` command will output "english_us_arpa" as an option. When referring to an acoustic model in another MFA command, rather than the full path to the acoustic model, you can now supply just ``english_us_arpa`` and MFA will resolve it to the saved path. 10 | 11 | Similarly, if you train a new model, you can run :code:`mfa model save acoustic /path/where/the/model/was/saved.zip`, then this model will be available via ``saved`` in the future. The name defaults to whatever the archive is called without the directory or extension. You can modify this name with the ``--name NEWNAME`` option 12 | 13 | There are a number of pretrained models for aligning and generating pronunciation dictionaries. The command 14 | for downloading these is :code:`mfa model download ` where ``model_type`` is one of ``acoustic``, ``g2p``, or 15 | ``dictionary``. 16 | 17 | .. note:: 18 | 19 | Please see the :xref:`mfa_models` site for information and statistics about various models. 20 | 21 | 22 | Command reference 23 | ----------------- 24 | 25 | .. click:: montreal_forced_aligner.command_line.model:model_cli 26 | :prog: mfa model 27 | :nested: full 28 | -------------------------------------------------------------------------------- /tests/data/dictionaries/test_mixed_format_dictionary.txt: -------------------------------------------------------------------------------- 1 | 'm 1.0 m 2 | ’m m 3 | i’m 0.01 ay m ih 4 | this 1.0 0.43 1.23 0.85 dh ih s 5 | is 1.0 0.5 1.0 1.0 ih z 6 | the 1.0 0.5 1.0 1.0 dh ah 7 | acoustic ah k uw s t ih k 8 | corpus k ao r p ah s 9 | i'm ay m 10 | talking t aa k ih ng 11 | pretty p r eh t iy 12 | fast f ae s t 13 | here hh iy r 14 | there's dh eh r z 15 | nothing n ah th ih ng 16 | going g ow ih ng 17 | else eh l s 18 | on ah n 19 | we're w iy r 20 | just jh ah s t 21 | yknow y ah n ow 22 | some s ah m 23 | speech s p iy ch 24 | errors eh r ao r z 25 | but b ah t 26 | who hh uw 27 | cares k ae r z 28 | me m iy 29 | really r iy l iy 30 | slow s l ow 31 | and ae n d 32 | slightly s l ay t l iy 33 | lower l ow w er 34 | in ih n 35 | intensity ih n t eh n s ih t iy 36 | saying s ey ih ng 37 | words w er d z 38 | here's hh iy r z 39 | more m ao r 40 | um ah m 41 | that dh ae t 42 | should sh uh d 43 | be b iy 44 | all aa l 45 | thanks th ae ng k s 46 | uh ah 47 | so s ow 48 | sick s ih k 49 | i ay 50 | have hh ae v 51 | a ah 52 | cold k ow l d 53 | probably p r aa b ah b l iy 54 | sound s aw n d 55 | quite k w ay t 56 | different d ih f er ah n t 57 | than dh ae n 58 | recording r iy k ao r d ih ng 59 | environment eh n v ay r ah n m eh n t 60 | also aa l s ow 61 | bunch b ah n ch 62 | did d ih d 63 | not n aa t 64 | original ao r ih g ih n ah l 65 | one w ah n 66 | long l aa n g 67 | pause p aa z 68 | think th ih ng k 69 | good g uh d 70 | alright aa l r ay t 71 | much m ah ch 72 | since s ih n s 73 | quality k w aa l ih t iy 74 | of ah v 75 | gonna g ah n ah 76 | cough k aa f 77 | for f ao r 78 | little l ih t ah l 79 | bit b ih t 80 | to t uw 81 | yup y ah p 82 | happened hh ae p ah n d 83 | that's dh ae t s 84 | hopefully hh ow p f uh l iy 85 | levels l eh v ah l z 86 | okay ow k ay 87 | lot l aa t 88 | yeah y ae 89 | -------------------------------------------------------------------------------- /tests/data/dictionaries/test_tabbed_dictionary.txt: -------------------------------------------------------------------------------- 1 | 'm 1.0 m 2 | ’m m 3 | i’m 0.01 ay m ih 4 | this 1.0 0.43 1.23 0.85 dh ih s 5 | is 1.0 0.5 1.0 1.0 ih z 6 | the 1.0 0.5 1.0 1.0 dh ah 7 | acoustic ah k uw s t ih k 8 | corpus k ao r p us 9 | i'm ay m 10 | talking t aa k ih ng 11 | pretty p r eh t iy 12 | fast f ae s t 13 | here hh iy r 14 | there's dh eh r z 15 | nothing n ah th ih ng 16 | going g ow ih ng 17 | else eh l s 18 | on ah n 19 | we're w iy r 20 | just j ah s t 21 | yknow y ah n ow 22 | some s ah m 23 | speech s p iy ch 24 | errors eh r ao r z 25 | but b ah t 26 | who hh uw 27 | cares k ae r z 28 | me m iy 29 | really r iy l iy 30 | slow s l ow 31 | and ae n d 32 | slightly s l ay t l iy 33 | lower l ow w er 34 | in ih n 35 | intensity ih n t eh n s ih t iy 36 | saying s ey ih ng 37 | words w er d z 38 | here's hh iy r z 39 | more m ao r 40 | um ah m 41 | that dh ae t 42 | should sh uh d 43 | be b iy 44 | all aa l 45 | thanks th ae ng k s 46 | uh ah 47 | so s ow 48 | sick s ih k 49 | i ay 50 | have hh ae v 51 | a ah 52 | cold k ow l d 53 | probably p r aa b ah b l iy 54 | sound s aw n d 55 | quite k w ay t 56 | different d ih f er ah n t 57 | than dh ae n 58 | recording r iy k ao r d ih ng 59 | environment eh n v ay r ah n m eh n t 60 | also aa l s ow 61 | bunch b ah n ch 62 | did d ih d 63 | not n aa t 64 | original ao r ih g ih n ah l 65 | one w ah n 66 | long l aa n g 67 | pause p aa z 68 | think th ih ng k 69 | good g uh d 70 | alright aa l r ay t 71 | much m ah ch 72 | since s ih n s 73 | quality k w aa l ih t iy 74 | of ah v 75 | gonna g ah n ah 76 | cough k aa f 77 | for f ao r 78 | little l ih t ah l 79 | bit b ih t 80 | just j ah s t 81 | to t uw 82 | yup y ah p 83 | happened hh ae p ah n d 84 | that's dh ae t s 85 | hopefully hh ow p f uh l iy 86 | levels l eh v ah l z 87 | okay ow k ay 88 | lot l aa t 89 | yeah y ae 90 | -------------------------------------------------------------------------------- /montreal_forced_aligner/command_line/server.py: -------------------------------------------------------------------------------- 1 | """Command line functionality for managing servers""" 2 | import rich_click as click 3 | 4 | from montreal_forced_aligner.command_line.utils import ( 5 | common_options, 6 | delete_server, 7 | initialize_configuration, 8 | initialize_server, 9 | start_server, 10 | stop_server, 11 | ) 12 | 13 | 14 | @click.group(name="server", short_help="Start, stop, and delete MFA database servers") 15 | @click.help_option("-h", "--help") 16 | def server_cli(): 17 | pass 18 | 19 | 20 | @server_cli.command(name="init", short_help="Initialize the MFA database server") 21 | @click.help_option("-h", "--help") 22 | @common_options 23 | @click.pass_context 24 | def init_cli(context, **kwargs): 25 | initialize_configuration(context) 26 | initialize_server() 27 | 28 | 29 | @server_cli.command(name="start", short_help="Start the MFA database server") 30 | @click.help_option("-h", "--help") 31 | @common_options 32 | @click.pass_context 33 | def start_cli(context, **kwargs): 34 | initialize_configuration(context) 35 | start_server() 36 | 37 | 38 | @server_cli.command(name="stop", short_help="Stop the MFA database server") 39 | @click.option( 40 | "-m", 41 | "--mode", 42 | help="Mode flag to be passed to pg_ctl", 43 | type=click.Choice(["fast", "immediate", "smart"], case_sensitive=False), 44 | default="fast", 45 | ) 46 | @click.help_option("-h", "--help") 47 | @common_options 48 | @click.pass_context 49 | def stop_cli(context, **kwargs): 50 | initialize_configuration(context) 51 | stop_server(mode=kwargs.get("mode", "fast")) 52 | 53 | 54 | @server_cli.command(name="delete", short_help="Delete the MFA database server") 55 | @click.help_option("-h", "--help") 56 | @common_options 57 | @click.pass_context 58 | def delete_cli(context, **kwargs): 59 | initialize_configuration(context) 60 | delete_server() 61 | -------------------------------------------------------------------------------- /bin/mfa_update: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import os 5 | import shutil 6 | import subprocess 7 | import sys 8 | from importlib.util import find_spec 9 | 10 | if __name__ == "__main__": 11 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 12 | parser.add_argument( 13 | "--install_3p", 14 | action="store_true", 15 | help="Install/update third party dependencies (Speechbrain and WhisperX)", 16 | ) 17 | args = parser.parse_args() 18 | anchor_found = find_spec("anchor") is not None 19 | speechbrain_found = find_spec("speechbrain") is not None 20 | whisperx_found = find_spec("whisperx") is not None 21 | 22 | conda_path = shutil.which("conda") 23 | if conda_path is None: 24 | print("Please install conda before running this command.") 25 | sys.exit(1) 26 | mamba_path = shutil.which("mamba") 27 | if mamba_path is None: 28 | print("No mamba found, installing first...") 29 | subprocess.call( 30 | [conda_path, "install", "-c", "conda-forge", "-y", "mamba"], env=os.environ 31 | ) 32 | package_list = ["montreal-forced-aligner", "kalpy", "kaldi=*=cpu*"] 33 | if anchor_found: 34 | package_list.append("anchor-annotator") 35 | subprocess.call( 36 | [mamba_path, "update", "-c", "conda-forge", "-y"] + package_list, env=os.environ 37 | ) 38 | if args.install_3p: 39 | channels = ["conda-forge", "pytorch", "nvidia", "anaconda"] 40 | package_list = ["pytorch", "torchaudio"] 41 | if not whisperx_found: 42 | package_list.extend(["cudnn=8", "transformers"]) 43 | command = [mamba_path, "install", "-y"] 44 | for c in channels: 45 | command.extend(["-c", c]) 46 | command += package_list 47 | subprocess.call(command, env=os.environ) 48 | command = ["pip", "install", "-U"] 49 | package_list = ["whisperx", "speechbrain", "pygtrie"] 50 | subprocess.call(command, env=os.environ) 51 | -------------------------------------------------------------------------------- /docs/source/first_steps/tutorials.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _`filing an issue`: https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/issues 3 | 4 | .. _`Montreal Forced Aligner v2 Corpus Phonetics Tutorial`: https://eleanorchodroff.com/tutorial/montreal-forced-aligner.html 5 | 6 | .. _`Phonetic forced alignment with the Montreal Forced Aligner`: https://www.youtube.com/watch?v=Zhj-ccMDj_w 7 | 8 | .. _`How I used Montreal Forced Aligner for a New Language (Sinhalese)`: https://medium.com/breaktheloop/how-i-used-montreal-forced-aligner-for-a-new-language-sinhalese-8f2c22a65a22 9 | 10 | .. _`Bootstrapping an IPA dictionary for English using Montreal Forced Aligner 2.0`: https://mmcauliffe.medium.com/creating-english-ipa-dictionary-using-montreal-forced-aligner-2-0-242415dfee32 11 | 12 | .. _`Update on Montreal Forced Aligner performance`: https://memcauliffe.com/update-on-montreal-forced-aligner-performance.html 13 | .. _`Speaker dictionaries and multilingual IPA`: https://memcauliffe.com/speaker-dictionaries-and-multilingual-ipa.html 14 | 15 | .. _tutorials: 16 | 17 | External tutorials 18 | ================== 19 | 20 | I will try to keep this updated with a list of in-depth tutorials for using MFA. If you write up anything that could be included here, please let me know by `filing an issue`_ and I will add it. 21 | 22 | * `A Gentle Guide to Montreal Forced Aligner `_ (Updated for MFA 3.0) 23 | * Courtesy of :xref:`chenzixu` 24 | * `Montreal Forced Aligner v2 Corpus Phonetics Tutorial`_ (Based on MFA 2.0) 25 | * Courtesy of :xref:`chodroff` 26 | * `Phonetic forced alignment with the Montreal Forced Aligner`_ (YouTube recording) 27 | * Courtesy of :xref:`chodroff` and :xref:`rutgers_spanish_portuguese` 28 | * `How I used Montreal Forced Aligner for a New Language (Sinhalese)`_ 29 | * Courtesy of :xref:`dias` 30 | * `Bootstrapping an IPA dictionary for English using Montreal Forced Aligner 2.0`_ 31 | * `Update on Montreal Forced Aligner performance`_ 32 | * `Speaker dictionaries and multilingual IPA`_ 33 | -------------------------------------------------------------------------------- /docs/source/changelog/changelog_3.1.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _changelog_3.1: 3 | 4 | ************* 5 | 3.1 Changelog 6 | ************* 7 | 8 | 3.1.4 9 | ----- 10 | 11 | - Optimized :code:`mfa g2p` to better use multiple processes 12 | - Added :code:`--export_scores` to :code:`mfa g2p` for adding a column representing the final weights of the generated pronunciations 13 | - Added :code:`--output_directory` to :code:`mfa validate` to save generated validation files rather than the temporary directory 14 | - Fixed a bug in cutoff modeling that was preventing them from being properly parsed 15 | 16 | 3.1.3 17 | ----- 18 | 19 | - Fixed an issue where silence probability being zero was not correctly removing silence 20 | - Compatibility with kalpy v0.6.5 21 | - Added API functionality for verifying transcripts with interjection words in alignment 22 | - Fixed an error in fine tuning that generated nonsensical boundaries 23 | 24 | 3.1.2 25 | ----- 26 | 27 | - Fixed a bug where hidden files and folders would be parsed as corpus data 28 | - Fixed a bug where validation would not respect :code:`--no_final_clean` 29 | - Fixed a rare crash in training when a job would not have utterances assigned to it 30 | - Fixed a bug where MFA would mistakenly report a dictionary and acoustic model phones did not match for older versions 31 | 32 | 3.1.1 33 | ----- 34 | 35 | - Fixed an issue with TextGrids missing intervals 36 | 37 | 3.1.0 38 | ----- 39 | 40 | - Fixed a bug where cutoffs were not properly modelled 41 | - Added additional filter on create subset to not include utterances with cutoffs in smaller subsets 42 | - Added the ability to specify HMM topologies for phones 43 | - Fixed issues caused by validators not cleaning up temporary files and databases 44 | - Added support for default and nonnative dictionaries generated from other dictionaries 45 | - Restricted initial training rounds to exclude default and nonnative dictionaries 46 | - Changed clustering of phones to not mix silence and non-silence phones 47 | - Optimized textgrid export 48 | - Added better memory management for collecting alignments 49 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Montreal Forced Aligner documentation 3 | ===================================== 4 | 5 | .. grid:: 2 6 | 7 | .. grid-item-card:: Getting started 8 | :text-align: center 9 | 10 | :fas:`running;fa-6x i-navigation` 11 | 12 | ^^^ 13 | 14 | Install the Montreal Forced Aligner and get started with examples and tutorials. 15 | 16 | +++ 17 | 18 | .. button-ref:: getting_started 19 | :expand: 20 | :color: primary 21 | 22 | Install MFA 23 | 24 | .. grid-item-card:: First steps 25 | :text-align: center 26 | 27 | :fas:`terminal;fa-6x i-navigation` 28 | 29 | ^^^ 30 | 31 | Have a particular use case for MFA? 32 | 33 | Check out the first steps tutorials. 34 | 35 | +++ 36 | 37 | .. button-ref:: first_steps 38 | :expand: 39 | :color: primary 40 | 41 | First steps 42 | 43 | .. grid-item-card:: User guide 44 | :text-align: center 45 | 46 | :fas:`book-open;fa-6x i-navigation` 47 | 48 | ^^^ 49 | 50 | The User Guide gives more details on input formats, available commands, and details on the various workflows available. 51 | 52 | +++ 53 | 54 | .. button-ref:: user_guide 55 | :expand: 56 | :color: primary 57 | 58 | User guide 59 | 60 | .. grid-item-card:: API reference 61 | :text-align: center 62 | 63 | :fas:`file-code;fa-6x i-navigation` 64 | 65 | ^^^ 66 | 67 | The API guide lists all the inner workings of MFA, the modules and classes that you can import and use in your own scripts and projects, along with details about the Kaldi functionality used. 68 | 69 | +++ 70 | 71 | .. button-ref:: mfa_api 72 | :expand: 73 | :color: primary 74 | 75 | Reference guide 76 | 77 | .. toctree:: 78 | :hidden: 79 | 80 | Getting started 81 | User guide 82 | API reference 83 | Changelog 84 | -------------------------------------------------------------------------------- /tests/test_commandline_train_dict.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import click.testing 4 | import sqlalchemy.orm 5 | 6 | from montreal_forced_aligner.command_line.mfa import mfa_cli 7 | 8 | 9 | def test_train_dict( 10 | basic_corpus_dir, 11 | english_dictionary, 12 | english_acoustic_model, 13 | generated_dir, 14 | temp_dir, 15 | basic_align_config_path, 16 | db_setup, 17 | ): 18 | output_path = generated_dir.joinpath("trained_dict") 19 | command = [ 20 | "train_dictionary", 21 | basic_corpus_dir, 22 | english_dictionary, 23 | english_acoustic_model, 24 | output_path, 25 | "-q", 26 | "--clean", 27 | "--debug", 28 | "--silence_probabilities", 29 | "--config_path", 30 | basic_align_config_path, 31 | "--use_mp", 32 | ] 33 | command = [str(x) for x in command] 34 | result = click.testing.CliRunner().invoke( 35 | mfa_cli, command, catch_exceptions=True 36 | ) 37 | print(result.stdout) 38 | print(result.stderr) 39 | if result.exception: 40 | print(result.exc_info) 41 | raise result.exception 42 | assert not result.return_value 43 | 44 | dict_path = os.path.join(output_path, "english_us_arpa.dict") 45 | assert os.path.exists(output_path) 46 | sqlalchemy.orm.close_all_sessions() 47 | textgrid_output = generated_dir.joinpath("trained_dict_output") 48 | command = [ 49 | "align", 50 | basic_corpus_dir, 51 | dict_path, 52 | english_acoustic_model, 53 | textgrid_output, 54 | "-q", 55 | "--clean", 56 | "--debug", 57 | "--config_path", 58 | basic_align_config_path, 59 | ] 60 | command = [str(x) for x in command] 61 | result = click.testing.CliRunner().invoke( 62 | mfa_cli, command, catch_exceptions=True 63 | ) 64 | print(result.stdout) 65 | print(result.stderr) 66 | if result.exception: 67 | print(result.exc_info) 68 | raise result.exception 69 | assert not result.return_value 70 | assert os.path.exists(textgrid_output) 71 | -------------------------------------------------------------------------------- /tests/test_commandline_remap.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import click.testing 4 | 5 | from montreal_forced_aligner.command_line.mfa import mfa_cli 6 | 7 | 8 | def test_remap_dictionary( 9 | english_us_mfa_dictionary, 10 | english_arpa_remapping_path, 11 | english_acoustic_model, 12 | generated_dir, 13 | temp_dir, 14 | db_setup, 15 | ): 16 | output_path = generated_dir.joinpath("remapped_dictionary.txt") 17 | command = [ 18 | "remap_dictionary", 19 | english_us_mfa_dictionary, 20 | english_acoustic_model, 21 | english_arpa_remapping_path, 22 | output_path, 23 | "-q", 24 | "--clean", 25 | "--debug", 26 | "--no_use_mp", 27 | "--no_use_postgres", 28 | "-v", 29 | ] 30 | command = [str(x) for x in command] 31 | result = click.testing.CliRunner().invoke(mfa_cli, command, catch_exceptions=True) 32 | print(result.stdout) 33 | print(result.stderr) 34 | if result.exception: 35 | print(result.exc_info) 36 | raise result.exception 37 | assert not result.return_value 38 | 39 | assert os.path.exists(output_path) 40 | 41 | 42 | def test_remap_alignments( 43 | mfa_example_aligned_dir, 44 | english_arpa_remapping_path, 45 | generated_dir, 46 | temp_dir, 47 | db_setup, 48 | ): 49 | output_path = generated_dir.joinpath("remapped_alignments") 50 | command = [ 51 | "remap", 52 | "alignments", 53 | mfa_example_aligned_dir, 54 | english_arpa_remapping_path, 55 | output_path, 56 | "-q", 57 | "--clean", 58 | "--debug", 59 | "--no_use_mp", 60 | "--no_use_postgres", 61 | "-v", 62 | ] 63 | command = [str(x) for x in command] 64 | result = click.testing.CliRunner().invoke(mfa_cli, command, catch_exceptions=True) 65 | print(result.stdout) 66 | print(result.stderr) 67 | if result.exception: 68 | print(result.exc_info) 69 | raise result.exception 70 | assert not result.return_value 71 | 72 | assert os.path.exists(os.path.join(output_path, "michael", "mfa_michael.TextGrid")) 73 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=45", "wheel", "setuptools_scm>=6.2" 4 | ] 5 | build-backend = "setuptools.build_meta" 6 | 7 | [tool.setuptools_scm] 8 | write_to = "montreal_forced_aligner/_version.py" 9 | 10 | [tool.black] 11 | line-length = 99 12 | 13 | [tool.flake8] 14 | max-line-length = 99 15 | extend-ignore = ["D203", "E203", "E251", "E266", "E302", "E305", "E401", "E402", "E501", "F401", "F403", "W503"] 16 | exclude = [".git", "__pycache__", "dist", "build"] 17 | 18 | [tool.isort] 19 | line_length = 99 20 | profile = "black" 21 | known_first_party = [ 22 | "montreal_forced_aligner" 23 | ] 24 | 25 | [tool.interrogate] 26 | ignore-init-method = true 27 | ignore-init-module = false 28 | ignore-magic = false 29 | ignore-semiprivate = false 30 | ignore-private = false 31 | ignore-module = false 32 | ignore-property-decorators = false 33 | fail-under = 95 34 | exclude = [ 35 | "tests", 36 | "build", 37 | "dist", 38 | "setup.py", 39 | "docs" 40 | ] 41 | verbose = 100 42 | omit-covered-files = false 43 | quiet = false 44 | generate-badge = "docs/source/_static" 45 | badge-format = "svg" 46 | whitelist-regex = [] 47 | ignore-regex = [] 48 | color = true 49 | 50 | 51 | [tool.check-manifest] 52 | ignore = [ 53 | ".deepsource.toml", 54 | ".readthedocs.yaml", 55 | ] 56 | 57 | [tool.coverage.run] 58 | source = ["montreal_forced_aligner"] 59 | concurrency = ["multiprocessing"] 60 | branch = true 61 | parallel = true 62 | omit = [ 63 | ".tox/*" 64 | ] 65 | 66 | 67 | [tool.coverage.report] 68 | show_missing = true 69 | exclude_lines = [ 70 | "pragma: no cover", 71 | "if __name__ == .__main__.:", 72 | "raise AssertionError", 73 | "raise NotImplementedError", 74 | "pass", 75 | "if sys.platform", 76 | "except ImportError:", 77 | "except KeyboardInterrupt:", 78 | "except Exception as e:", 79 | "except Exception:", 80 | "if call_back", 81 | "if is_set", 82 | "if TYPE_CHECKING:", 83 | "def history_save_handler() -> None:", 84 | "class ExitHooks(object):", 85 | "def main() -> None:", 86 | "if os.path.exists", 87 | "@abstractmethod", 88 | 'if "MFA_ERROR"', 89 | ] 90 | fail_under = 50 91 | -------------------------------------------------------------------------------- /docs/source/user_guide/corpus_creation/create_segments.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _create_segments: 3 | 4 | Segment transcribed files ``(mfa segment)`` 5 | =========================================== 6 | 7 | The Montreal Forced Aligner can use Voice Activity Detection (VAD) capabilities from :xref:`speechbrain` to generate segments from 8 | a longer sound file, while attempting to segment transcripts as well. If you do not have transcripts, see :ref:`create_segments_vad`. 9 | 10 | .. note:: 11 | 12 | On Windows, if you get an ``OSError/WinError 1314`` during the run, follow `these instructions `_ to enable symbolic link creation permissions. 13 | 14 | Command reference 15 | ----------------- 16 | 17 | .. click:: montreal_forced_aligner.command_line.create_segments:create_segments_cli 18 | :prog: mfa segment 19 | :nested: full 20 | 21 | 22 | Configuration reference 23 | ----------------------- 24 | 25 | - :ref:`configuration_segmentation` 26 | 27 | API reference 28 | ------------- 29 | 30 | - :ref:`segmentation_api` 31 | 32 | .. _create_segments_vad: 33 | 34 | Segment untranscribed files ``(mfa segment_vad)`` 35 | ================================================= 36 | 37 | The Montreal Forced Aligner can use Voice Activity Detection (VAD) capabilities from :xref:`speechbrain` or energy based VAD to generate segments from 38 | a longer sound file. This command does not split transcripts, instead assigning a default label of "speech" to all identified speech segments. If you would like to preserve transcripts for each segment, see :ref:`create_segments`. 39 | 40 | .. note:: 41 | 42 | On Windows, if you get an ``OSError/WinError 1314`` during the run, follow `these instructions `_ to enable symbolic link creation permissions. 43 | 44 | Command reference 45 | ----------------- 46 | 47 | .. click:: montreal_forced_aligner.command_line.create_segments:create_segments_vad_cli 48 | :prog: mfa segment_vad 49 | :nested: full 50 | 51 | 52 | Configuration reference 53 | ----------------------- 54 | 55 | - :ref:`configuration_segmentation` 56 | 57 | API reference 58 | ------------- 59 | 60 | - :ref:`segmentation_api` 61 | -------------------------------------------------------------------------------- /docs/source/user_guide/implementations/alignment_evaluation.md: -------------------------------------------------------------------------------- 1 | 2 | (alignment_evaluation)= 3 | # Evaluating alignments 4 | 5 | Alignments can be compared to a gold-standard reference set by specifying the `--reference_directory` below. MFA will load all TextGrids and parse them as if they were exported by MFA (i.e., phone and speaker tiers per speaker). The phone intervals will be aligned using the {mod}`Bio.pairwise2` alignment algorithm. If the reference TextGrids use a different phone set, then a custom mapping yaml file can be specified via the `--custom_mapping_path`. As an example, the Buckeye reference alignments used in [Update on Montreal Forced Aligner performance](https://memcauliffe.com/update-on-montreal-forced-aligner-performance.html) use its own ARPA-based phone set that removes stress integers, is lower case, and has syllabic sonorants. To map alignments generated with the `english` model and dictionary that use standard ARPA, a yaml file like the following allows for a better alignment of reference phones to aligned phones. 6 | 7 | :::yaml 8 | N: [en, n] 9 | M: [em, m] 10 | L: [el, l] 11 | AA0: aa 12 | AE0: ae 13 | AH0: ah 14 | AO0: ao 15 | AW0: aw 16 | ::: 17 | 18 | Using the above file, both {ipa_inline}`en` and {ipa_inline}`n` phones in the Buckeye corpus will not be penalized when matched with {ipa_inline}`N` phones output by MFA. 19 | 20 | In addition to any custom mapping, phone boundaries are used in the cost function for the {mod}`Bio.pairwise2` alignment algorithm as follows: 21 | 22 | :::{math} 23 | Overlap \: cost = -1 * \biggl(\lvert begin_{aligned} - begin_{ref} \rvert + \lvert end_{aligned} - end_{ref} \rvert + \begin{cases} 24 | 0, & label_{1} = label_{2} \\ 25 | 2, & otherwise 26 | \end{cases}\biggr) 27 | ::: 28 | 29 | The two metrics calculated for each utterance are overlap score and phone error rate. Overlap score is calculated similarly to the above cost function for each phone (excluding phones that are aligned to silence or were inserted/deleted) and averaged over the utterance: 30 | 31 | :::{math} 32 | Alignment \: score = \frac{Overlap \: cost}{2} 33 | ::: 34 | 35 | Phone error rate is calculated as: 36 | 37 | :::{math} 38 | Phone \: error \: rate = \frac{insertions + deletions + (2 * substitutions)} {length_{ref}} 39 | ::: 40 | -------------------------------------------------------------------------------- /tests/test_commandline_adapt.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import click.testing 4 | 5 | from montreal_forced_aligner.command_line.mfa import mfa_cli 6 | 7 | 8 | def test_adapt_basic( 9 | basic_corpus_dir, 10 | generated_dir, 11 | english_dictionary, 12 | temp_dir, 13 | test_align_config, 14 | english_acoustic_model, 15 | db_setup, 16 | ): 17 | adapted_model_path = generated_dir.joinpath("basic_adapted.zip") 18 | command = [ 19 | "adapt", 20 | basic_corpus_dir, 21 | english_dictionary, 22 | english_acoustic_model, 23 | adapted_model_path, 24 | "--beam", 25 | "100", 26 | "--clean", 27 | "--no_debug", 28 | "-p", 29 | "test", 30 | ] 31 | command = [str(x) for x in command] 32 | result = click.testing.CliRunner().invoke( 33 | mfa_cli, command, catch_exceptions=True 34 | ) 35 | print(result.stdout) 36 | print(result.stderr) 37 | if result.exception: 38 | print(result.exc_info) 39 | raise result.exception 40 | assert os.path.exists(adapted_model_path) 41 | 42 | 43 | def test_adapt_multilingual( 44 | multilingual_ipa_corpus_dir, 45 | mfa_speaker_dict_path, 46 | generated_dir, 47 | temp_dir, 48 | basic_align_config_path, 49 | english_acoustic_model, 50 | english_mfa_acoustic_model, 51 | db_setup, 52 | ): 53 | adapted_model_path = generated_dir.joinpath("multilingual_adapted.zip") 54 | output_path = generated_dir.joinpath("multilingual_output") 55 | command = [ 56 | "adapt", 57 | multilingual_ipa_corpus_dir, 58 | mfa_speaker_dict_path, 59 | english_mfa_acoustic_model, 60 | adapted_model_path, 61 | output_path, 62 | "--config_path", 63 | basic_align_config_path, 64 | "-q", 65 | "--clean", 66 | "--no_debug", 67 | "-p", 68 | "test", 69 | ] 70 | command = [str(x) for x in command] 71 | result = click.testing.CliRunner().invoke( 72 | mfa_cli, command, catch_exceptions=True 73 | ) 74 | print(result.stdout) 75 | print(result.stderr) 76 | if result.exception: 77 | print(result.exc_info) 78 | raise result.exception 79 | assert os.path.exists(adapted_model_path) 80 | -------------------------------------------------------------------------------- /docs/source/user_guide/data_validation.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _validating_data: 3 | 4 | *************** 5 | Validating data 6 | *************** 7 | 8 | The validation utility will perform the basic set up that alignment would perform, but analyzes and reports any issues 9 | that the user may want to fix. 10 | 11 | First, the utility parses the corpus and dictionary, prints out summary information about the corpus, 12 | and logs any of the following issues: 13 | 14 | - If there are any words in transcriptions that are not in the dictionary, these are logged as out-of-vocabulary items (OOVs). 15 | A list of these OOVs and which utterances they appear in are saved to text files. 16 | - Any issues reading sound files 17 | - Any issues generating features, skipped if ``--ignore_acoustics`` is flagged 18 | - Mismatches in sound files and transcriptions 19 | - Any issues reading transcription files 20 | - Any unaligned files from trial alignment run, skipped if ``--ignore_acoustics`` is flagged 21 | - If no acoustic model is specified, a monophone model is trained for testing alignment 22 | 23 | - Any files that have deviations from their original transcription to decoded transcriptions using a simple language model when ``--test_transcriptions`` is supplied 24 | - Ngram language models for each speaker are generated and merged with models for each utterance for use in decoding utterances, which may help you find transcription or data inconsistency issues in the corpus 25 | 26 | .. _phone_confidence: 27 | 28 | Phone confidence 29 | ================ 30 | 31 | .. warning:: 32 | 33 | This functionality is deprecated and will be removed in MFA 4.0. 34 | 35 | The phone confidence functionality of the validation utility is similar to :ref:`phone_models` in that both are trying to represent the "goodness" of the phone label for the given interval. Where phone models use the acoustic model in combination with a phone language model, phone confidence simply calculates the likelihoods of each phone for each frame 36 | 37 | .. _running_the_validator: 38 | 39 | Running the corpus validation utility 40 | ===================================== 41 | 42 | 43 | Command reference 44 | ----------------- 45 | 46 | .. click:: montreal_forced_aligner.command_line.validate:validate_corpus_cli 47 | :prog: mfa validate 48 | :nested: full 49 | -------------------------------------------------------------------------------- /montreal_forced_aligner/command_line/tokenize.py: -------------------------------------------------------------------------------- 1 | """Command line functions for generating pronunciations using G2P models""" 2 | from __future__ import annotations 3 | 4 | from pathlib import Path 5 | 6 | import rich_click as click 7 | 8 | from montreal_forced_aligner.command_line.utils import ( 9 | common_options, 10 | initialize_configuration, 11 | validate_tokenizer_model, 12 | ) 13 | from montreal_forced_aligner.tokenization.tokenizer import CorpusTokenizer 14 | 15 | __all__ = ["tokenize_cli"] 16 | 17 | 18 | @click.command( 19 | name="tokenize", 20 | context_settings=dict( 21 | ignore_unknown_options=True, 22 | allow_extra_args=True, 23 | allow_interspersed_args=True, 24 | ), 25 | short_help="Tokenize utterances", 26 | ) 27 | @click.argument( 28 | "input_path", type=click.Path(exists=True, file_okay=True, dir_okay=True, path_type=Path) 29 | ) 30 | @click.argument("tokenizer_model_path", type=click.UNPROCESSED, callback=validate_tokenizer_model) 31 | @click.argument( 32 | "output_directory", type=click.Path(file_okay=False, dir_okay=True, path_type=Path) 33 | ) 34 | @click.option( 35 | "--config_path", 36 | "-c", 37 | help="Path to config file to use for training.", 38 | type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path), 39 | ) 40 | @common_options 41 | @click.help_option("-h", "--help") 42 | @click.pass_context 43 | def tokenize_cli(context, **kwargs) -> None: 44 | """ 45 | Tokenize utterances with a trained tokenizer model 46 | """ 47 | initialize_configuration(context) 48 | 49 | config_path = kwargs.get("config_path", None) 50 | input_path = kwargs["input_path"] 51 | tokenizer_model_path = kwargs["tokenizer_model_path"] 52 | output_directory = kwargs["output_directory"] 53 | 54 | tokenizer = CorpusTokenizer( 55 | corpus_directory=input_path, 56 | tokenizer_model_path=tokenizer_model_path, 57 | **CorpusTokenizer.parse_parameters(config_path, context.params, context.args), 58 | ) 59 | 60 | try: 61 | tokenizer.setup() 62 | tokenizer.tokenize_utterances() 63 | tokenizer.export_files(output_directory) 64 | except Exception: 65 | tokenizer.dirty = True 66 | raise 67 | finally: 68 | tokenizer.cleanup() 69 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: [ main ] 5 | pull_request: 6 | branches: [ main ] 7 | # Allow rebuilds via API. 8 | repository_dispatch: 9 | types: rebuild 10 | 11 | concurrency: 12 | group: run_tests-${{ github.ref }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | build: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@main 20 | with: 21 | fetch-depth: 0 22 | 23 | - name: Check disk usage 24 | run: df -h 25 | 26 | - name: Install Conda environment with Micromamba 27 | uses: conda-incubator/setup-miniconda@v3 28 | with: 29 | environment-file: github_environment.yml 30 | miniforge-version: latest 31 | 32 | - name: Check disk usage after environment install 33 | run: df -h 34 | 35 | - name: Configure mfa 36 | shell: bash -l {0} 37 | run: python -m montreal_forced_aligner configure --disable_auto_server 38 | 39 | - name: Initialize database 40 | shell: bash -l {0} 41 | run: python -m montreal_forced_aligner server init -p test 42 | 43 | - name: Check database config 44 | shell: bash -l {0} 45 | run: cat ~/Documents/MFA/pg_mfa_test/postgresql.conf 46 | 47 | - name: Check init database log 48 | shell: bash -l {0} 49 | run: cat ~/Documents/MFA/pg_init_log_test.txt 50 | 51 | - name: Check database start log 52 | shell: bash -l {0} 53 | run: cat ~/Documents/MFA/pg_log_test.txt 54 | 55 | - name: Run tests 56 | env: 57 | GITHUB_TOKEN: ${{ secrets.MFA_GITHUB_TOKEN }} 58 | shell: bash -l {0} 59 | run: pytest -x ./tests 60 | 61 | - name: Stop database 62 | shell: bash -l {0} 63 | run: python -m montreal_forced_aligner server stop -p test 64 | 65 | - name: Check init database log 66 | shell: bash -l {0} 67 | run: cat ~/Documents/MFA/pg_init_log_test.txt 68 | 69 | - name: Check database start log 70 | shell: bash -l {0} 71 | run: cat ~/Documents/MFA/pg_log_test.txt 72 | 73 | - name: "Upload coverage to Codecov" 74 | uses: "codecov/codecov-action@v3" 75 | with: 76 | file: ./coverage.xml 77 | fail_ci_if_error: false 78 | -------------------------------------------------------------------------------- /docs/source/changelog/changelog_2.1.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _changelog_2.1: 3 | 4 | ************* 5 | 2.1 Changelog 6 | ************* 7 | 8 | 2.1.6 9 | ===== 10 | 11 | - Fix for issue with ignore_case flag not being respected 12 | - Fixed a hang in speaker diarization 13 | - Fixed an error related to paths ending in trailing slashes which caused MFA to try to connect to a database named after the local user 14 | - Partial migration to using :class:`pathlib.Path` instead of :mod:`os.path` 15 | 16 | 2.1.5 17 | ===== 18 | 19 | - Fix for improperly reset databases 20 | 21 | 2.1.4 22 | ===== 23 | 24 | - Change how database connections are made to remove pooling 25 | 26 | 2.1.3 27 | ===== 28 | 29 | - Fixed a bug with intervals after the end of the sound file having negative duration (they are now not parsed) 30 | - Fixed an issue where utterances were not properly assigned to the correct channels 31 | - Modified the logic for connections to attempt to solve error with too many clients 32 | 33 | 2.1.2 34 | ===== 35 | 36 | - Fixed a crash in training when the debug flag was not set 37 | - Set default postgresql port to 5433 to avoid conflicts with any system installations 38 | - Fixed a crash in textgrid export 39 | 40 | 2.1.1 41 | ===== 42 | 43 | - Fixed a bug with `mfa` command not working from the command line 44 | - Updated to be compatible with PraatIO 6.0 45 | 46 | 2.1.0 47 | ===== 48 | 49 | - Drop support for SQLite as a database backend 50 | - Fixed a bug where TextGrid parsing errors would cause MFA to crash rather than ignore those files 51 | - Updated CLI to use :xref:`click` rather than argparse 52 | - Added :code:`--use_phone_model` flag for :code:`mfa align` and :code:`mfa validate` commands. See :ref:`phone_models` for more details. 53 | - Added :code:`--phone_confidence` flag for :code:`mfa validate` commands. See :ref:`phone_models` for more details. 54 | - Added modeling of :code:`cutoff` phones via :code:`--use_cutoff_model` which adds progressive truncations of the next word, if it's not unknown or a non-speech word (silence, laughter, etc). See :ref:`cutoff_modeling` for more details. 55 | - Added support for using :xref:`speechbrain`'s VAD model in :ref:`create_segments` 56 | - Overhaul and update :ref:`train_ivector` 57 | - Overhaul and update :ref:`diarize_speakers` 58 | - Added support for using :xref:`speechbrain`'s SpeakerRecognition model in :ref:`diarize_speakers` 59 | -------------------------------------------------------------------------------- /montreal_forced_aligner/tokenization/korean.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | 5 | try: 6 | import jamo 7 | from mecab import MeCab 8 | 9 | KO_AVAILABLE = True 10 | except (ImportError, ModuleNotFoundError): 11 | KO_AVAILABLE = False 12 | MeCab = None 13 | jamo = None 14 | 15 | 16 | class KoreanTokenizer: 17 | def __init__(self, ignore_case: bool = True): 18 | self.ignore_case = ignore_case 19 | self.tokenizer = MeCab() 20 | 21 | def __call__(self, text): 22 | new_text = [] 23 | morphs = self.tokenizer.parse(text) 24 | pronunciations = [] 25 | for morph in morphs: 26 | normalized = morph.surface 27 | join = False 28 | m = re.search(r"[]})>][<({[]", normalized) 29 | if new_text and m: 30 | new_text[-1] += normalized[: m.start() + 1] 31 | normalized = normalized[m.end() - 1 :] 32 | elif new_text and re.match(r"^[<({\[].*", new_text[-1]): 33 | join = True 34 | elif new_text and re.match(r".*[-_~]$", new_text[-1]): 35 | join = True 36 | elif new_text and re.match(r".*[>)}\]]$", normalized): 37 | join = True 38 | elif new_text and re.match(r"^[-_~].*", normalized): 39 | join = True 40 | if new_text and any(new_text[-1].endswith(x) for x in {">", ")", "}", "]"}): 41 | join = False 42 | if join: 43 | new_text[-1] += normalized 44 | pronunciations[-1] += jamo.h2j(normalized) 45 | continue 46 | if morph.pos in {"SF", "SY", "SC"} and normalized not in {"<", "(", "{", "["}: 47 | continue 48 | new_text.append(normalized) 49 | pronunciations.append(jamo.h2j(normalized)) 50 | new_text = " ".join(new_text) 51 | pronunciations = " ".join(pronunciations) 52 | if self.ignore_case: 53 | new_text = new_text.lower() 54 | pronunciations = pronunciations.lower() 55 | return new_text, pronunciations 56 | 57 | 58 | def ko_spacy(ignore_case: bool = True): 59 | if not KO_AVAILABLE: 60 | raise ImportError("Please install Korean support via `pip install python-mecab-ko jamo`") 61 | return KoreanTokenizer(ignore_case) 62 | --------------------------------------------------------------------------------