├── .deepsource.toml
├── .dockerignore
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
└── workflows
│ ├── main.yml
│ ├── publish.yml
│ └── publish_docker.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── Dockerfile
├── LICENSE
├── README.md
├── bin
└── mfa_update
├── ci
└── docker_environment.yaml
├── docs
├── Makefile
├── make.bat
└── source
│ ├── _static
│ ├── MFA_default.svg
│ ├── MFA_dnn.svg
│ ├── MFA_dnn_ivectors.svg
│ ├── MFA_paper_Interspeech2017.pdf
│ ├── MFA_poster_LSA2017.pdf
│ ├── because.svg
│ ├── css
│ │ └── mfa.css
│ ├── favicon.ico
│ ├── fonts
│ │ ├── GentiumPlus-Bold.woff
│ │ ├── GentiumPlus-Bold.woff2
│ │ ├── GentiumPlus-BoldItalic.woff
│ │ ├── GentiumPlus-BoldItalic.woff2
│ │ ├── GentiumPlus-Italic.woff
│ │ ├── GentiumPlus-Italic.woff2
│ │ ├── GentiumPlus-Regular.woff
│ │ └── GentiumPlus-Regular.woff2
│ ├── interrogate_badge.svg
│ ├── librispeech_textgrid.png
│ ├── logo.svg
│ ├── logo_dark.svg
│ ├── logo_light.svg
│ ├── logo_long.svg
│ ├── logo_long_dark.svg
│ ├── logo_long_light.svg
│ ├── logo_stacked.svg
│ ├── logo_stacked_dark.svg
│ ├── logo_stacked_light.svg
│ ├── lot.svg
│ ├── multiple_speakers_output_textgrid.png
│ ├── multiple_speakers_textgrid.png
│ ├── sound_files
│ │ ├── english_fast.svg
│ │ ├── english_fast.wav
│ │ ├── english_slow.svg
│ │ ├── english_slow.wav
│ │ ├── english_t.svg
│ │ ├── english_t.wav
│ │ ├── english_t_it's.svg
│ │ ├── english_t_it.svg
│ │ ├── english_t_itself.svg
│ │ ├── english_t_just.svg
│ │ ├── english_t_onto.svg
│ │ ├── english_t_righted.svg
│ │ ├── english_t_stop.svg
│ │ ├── english_t_tipped.svg
│ │ ├── english_t_to.svg
│ │ ├── english_t_top.svg
│ │ ├── english_t_truck.svg
│ │ ├── japanese_fast.svg
│ │ ├── japanese_fast.wav
│ │ ├── japanese_slow.svg
│ │ └── japanese_slow.wav
│ └── the.svg
│ ├── _templates
│ ├── autosummary
│ │ ├── attribute.rst
│ │ ├── base.rst
│ │ ├── class.rst
│ │ ├── function.rst
│ │ ├── method.rst
│ │ └── property.rst
│ └── version.html
│ ├── changelog
│ ├── changelog_1.0.rst
│ ├── changelog_2.0.rst
│ ├── changelog_2.0_pre_release.rst
│ ├── changelog_2.1.rst
│ ├── changelog_2.2.rst
│ ├── changelog_3.0.rst
│ ├── changelog_3.1.rst
│ ├── changelog_3.2.rst
│ ├── index.md
│ ├── news_1.1.rst
│ ├── news_2.0.rst
│ ├── news_2.1.rst
│ └── news_3.0.rst
│ ├── conf.py
│ ├── external_links.py
│ ├── first_steps
│ ├── example.rst
│ ├── index.rst
│ └── tutorials.rst
│ ├── getting_started.rst
│ ├── index.rst
│ ├── installation.rst
│ ├── reference
│ ├── acoustic_modeling
│ │ ├── helper.rst
│ │ ├── index.rst
│ │ └── training.rst
│ ├── alignment
│ │ ├── alignment.rst
│ │ ├── helper.rst
│ │ └── index.rst
│ ├── core_index.rst
│ ├── corpus
│ │ └── index.rst
│ ├── database
│ │ └── index.rst
│ ├── diarization
│ │ ├── helper.rst
│ │ ├── index.rst
│ │ └── main.rst
│ ├── dictionary
│ │ ├── helper.rst
│ │ ├── index.rst
│ │ ├── main.rst
│ │ └── training.rst
│ ├── g2p
│ │ ├── generator.rst
│ │ ├── helper.rst
│ │ └── index.rst
│ ├── g2p_modeling
│ │ ├── helper.rst
│ │ ├── index.rst
│ │ └── training.rst
│ ├── helper
│ │ ├── abc.rst
│ │ ├── config.rst
│ │ ├── data.rst
│ │ ├── exceptions.rst
│ │ ├── helper.rst
│ │ ├── index.rst
│ │ ├── textgrid.rst
│ │ └── utils.rst
│ ├── index.rst
│ ├── ivector
│ │ ├── helper.rst
│ │ ├── index.rst
│ │ └── training.rst
│ ├── language_modeling
│ │ ├── helper.rst
│ │ ├── index.rst
│ │ └── training.rst
│ ├── segmentation
│ │ ├── helper.rst
│ │ ├── index.rst
│ │ └── main.rst
│ ├── server
│ │ └── index.rst
│ ├── tokenization
│ │ ├── helper.rst
│ │ ├── index.rst
│ │ ├── tokenizer.rst
│ │ └── training.rst
│ ├── top_level_index.rst
│ ├── transcription
│ │ ├── helper.rst
│ │ ├── index.rst
│ │ └── main.rst
│ └── validation
│ │ ├── helper.rst
│ │ ├── index.rst
│ │ └── main.rst
│ └── user_guide
│ ├── commands.rst
│ ├── concepts
│ ├── features.md
│ ├── fst.md
│ ├── hmm.md
│ ├── index.rst
│ └── speaker_adaptation.md
│ ├── configuration
│ ├── acoustic_model_adapt.rst
│ ├── acoustic_modeling.rst
│ ├── diarization.rst
│ ├── g2p.rst
│ ├── global.rst
│ ├── index.rst
│ ├── ivector.rst
│ ├── lm.rst
│ ├── segment.rst
│ └── transcription.rst
│ ├── corpus_creation
│ ├── anchor.rst
│ ├── create_segments.rst
│ ├── diarize_speakers.rst
│ ├── index.rst
│ ├── tokenize.rst
│ ├── train_ivector.rst
│ ├── train_tokenizer.rst
│ ├── training_dictionary.rst
│ ├── training_lm.rst
│ └── transcribing.rst
│ ├── corpus_structure.rst
│ ├── data_validation.rst
│ ├── dictionary.rst
│ ├── dictionary_validation.rst
│ ├── glossary.rst
│ ├── implementations
│ ├── alignment_analysis.md
│ ├── alignment_evaluation.md
│ ├── fine_tune.md
│ ├── index.md
│ ├── lexicon_probabilities.md
│ ├── phone_groups.md
│ ├── phone_models.md
│ └── phonological_rules.md
│ ├── index.rst
│ ├── models
│ └── index.rst
│ ├── server
│ └── index.rst
│ ├── troubleshooting.rst
│ └── workflows
│ ├── adapt_acoustic_model.rst
│ ├── alignment.rst
│ ├── dictionary_generating.rst
│ ├── g2p_train.rst
│ ├── index.rst
│ └── train_acoustic_model.rst
├── environment.yml
├── github_environment.yml
├── montreal_forced_aligner
├── __init__.py
├── __main__.py
├── abc.py
├── acoustic_modeling
│ ├── __init__.py
│ ├── base.py
│ ├── lda.py
│ ├── monophone.py
│ ├── pronunciation_probabilities.py
│ ├── sat.py
│ ├── trainer.py
│ └── triphone.py
├── alignment
│ ├── __init__.py
│ ├── adapting.py
│ ├── base.py
│ ├── mixins.py
│ ├── multiprocessing.py
│ └── pretrained.py
├── command_line
│ ├── __init__.py
│ ├── adapt.py
│ ├── align.py
│ ├── align_one.py
│ ├── anchor.py
│ ├── configure.py
│ ├── create_segments.py
│ ├── diarize_speakers.py
│ ├── g2p.py
│ ├── history.py
│ ├── mfa.py
│ ├── model.py
│ ├── server.py
│ ├── tokenize.py
│ ├── train_acoustic_model.py
│ ├── train_dictionary.py
│ ├── train_g2p.py
│ ├── train_ivector_extractor.py
│ ├── train_lm.py
│ ├── train_tokenizer.py
│ ├── transcribe.py
│ ├── utils.py
│ └── validate.py
├── config.py
├── corpus
│ ├── __init__.py
│ ├── acoustic_corpus.py
│ ├── base.py
│ ├── classes.py
│ ├── features.py
│ ├── helper.py
│ ├── ivector_corpus.py
│ ├── multiprocessing.py
│ └── text_corpus.py
├── data.py
├── db.py
├── diarization
│ ├── __init__.py
│ ├── multiprocessing.py
│ └── speaker_diarizer.py
├── dictionary
│ ├── __init__.py
│ ├── mixins.py
│ └── multispeaker.py
├── exceptions.py
├── g2p
│ ├── __init__.py
│ ├── generator.py
│ ├── mixins.py
│ ├── phonetisaurus_trainer.py
│ └── trainer.py
├── helper.py
├── ivector
│ ├── __init__.py
│ ├── multiprocessing.py
│ └── trainer.py
├── language_modeling
│ ├── __init__.py
│ ├── multiprocessing.py
│ └── trainer.py
├── models.py
├── online
│ ├── __init__.py
│ ├── alignment.py
│ └── transcription.py
├── textgrid.py
├── tokenization
│ ├── __init__.py
│ ├── chinese.py
│ ├── english.py
│ ├── japanese.py
│ ├── korean.py
│ ├── resources
│ │ └── japanese
│ │ │ ├── char.def
│ │ │ ├── mfa_sudachi.dic
│ │ │ ├── rewrite.def
│ │ │ ├── sudachi_config.json
│ │ │ └── unk.def
│ ├── simple.py
│ ├── spacy.py
│ ├── thai.py
│ ├── tokenizer.py
│ └── trainer.py
├── transcription
│ ├── __init__.py
│ ├── models.py
│ ├── multiprocessing.py
│ └── transcriber.py
├── utils.py
├── vad
│ ├── __init__.py
│ ├── models.py
│ ├── multiprocessing.py
│ └── segmenter.py
└── validation
│ ├── __init__.py
│ ├── corpus_validator.py
│ └── dictionary_validator.py
├── pyproject.toml
├── requirements.txt
├── rtd_environment.yml
├── setup.cfg
├── setup.py
├── tests
├── __init__.py
├── conftest.py
├── data
│ ├── am
│ │ ├── acoustic_g2p_output_model.zip
│ │ └── mono_model.zip
│ ├── configs
│ │ ├── acoustic
│ │ │ ├── bad_topology.yaml
│ │ │ ├── english_mfa_phone_groups.yaml
│ │ │ ├── english_mfa_rules.yaml
│ │ │ └── english_mfa_topology.yaml
│ │ ├── bad_align_config.yaml
│ │ ├── basic_align_config.yaml
│ │ ├── basic_ipa_config.yaml
│ │ ├── basic_segment_config.yaml
│ │ ├── basic_train_config.yaml
│ │ ├── basic_train_lm.yaml
│ │ ├── different_punctuation_config.yaml
│ │ ├── eval_mapping.yaml
│ │ ├── g2p_config.yaml
│ │ ├── ivector_train.yaml
│ │ ├── lda_sat_train.yaml
│ │ ├── lda_train.yaml
│ │ ├── mono_align.yaml
│ │ ├── mono_train.yaml
│ │ ├── no_punctuation_config.yaml
│ │ ├── out_of_order_config.yaml
│ │ ├── pitch_tri_train.yaml
│ │ ├── pron_train.yaml
│ │ ├── sat_train.yaml
│ │ ├── test_groups.yaml
│ │ ├── test_rules.yaml
│ │ ├── train_g2p_acoustic.yaml
│ │ ├── train_g2p_config.yaml
│ │ ├── transcribe.yaml
│ │ ├── tri_train.yaml
│ │ └── xsampa_train.yaml
│ ├── dictionaries
│ │ ├── acoustic_g2p_dictionary.yaml
│ │ ├── english_us_mfa_reduced.dict
│ │ ├── expected
│ │ │ ├── graphemes.txt
│ │ │ ├── lexicon.text.fst
│ │ │ ├── phone_map.txt
│ │ │ ├── phones.txt
│ │ │ ├── phones
│ │ │ │ ├── extra_questions.int
│ │ │ │ ├── extra_questions.txt
│ │ │ │ ├── roots.int
│ │ │ │ ├── roots.txt
│ │ │ │ ├── sets.int
│ │ │ │ ├── sets.txt
│ │ │ │ ├── word_boundary.int
│ │ │ │ └── word_boundary.txt
│ │ │ ├── topo
│ │ │ └── words.txt
│ │ ├── test_abstract.txt
│ │ ├── test_acoustic.txt
│ │ ├── test_basic.txt
│ │ ├── test_chinese_dict.txt
│ │ ├── test_extra_annotations.txt
│ │ ├── test_frclitics.txt
│ │ ├── test_hindi.txt
│ │ ├── test_japanese.txt
│ │ ├── test_mixed_format_dictionary.txt
│ │ ├── test_tabbed_dictionary.txt
│ │ ├── test_vietnamese_ipa.txt
│ │ └── test_xsampa.txt
│ ├── lab
│ │ ├── 13697_11991_000000.lab
│ │ ├── 61-70968-0000.lab
│ │ ├── acoustic_corpus.lab
│ │ ├── cold_corpus.lab
│ │ ├── cold_corpus3.lab
│ │ ├── cold_corpus3_extra.lab
│ │ ├── common_voice_en_22058264.lab
│ │ ├── common_voice_en_22058266.lab
│ │ ├── common_voice_en_22058267.lab
│ │ ├── common_voice_ja_24511055.lab
│ │ ├── devanagari.lab
│ │ ├── french_clitics.lab
│ │ ├── japanese.lab
│ │ ├── multilingual_ipa.txt
│ │ ├── multilingual_ipa_2.txt
│ │ ├── multilingual_ipa_3.txt
│ │ ├── multilingual_ipa_4.txt
│ │ ├── multilingual_ipa_5.txt
│ │ ├── multilingual_ipa_us.txt
│ │ ├── multilingual_ipa_us_2.txt
│ │ ├── multilingual_ipa_us_3.txt
│ │ ├── multilingual_ipa_us_4.txt
│ │ ├── multilingual_ipa_us_5.txt
│ │ ├── punctuated.lab
│ │ ├── se10x016-08071999-1334_u0016001.lab
│ │ ├── se10x016-08071999-1334_u0016002.lab
│ │ ├── se10x016-08071999-1334_u0016003.lab
│ │ ├── se10x016-08071999-1334_u0016004.lab
│ │ ├── weird_words.lab
│ │ ├── xsampa.lab
│ │ └── 日本語.lab
│ ├── lm
│ │ ├── test_lm.arpa
│ │ └── test_lm.zip
│ ├── textgrid
│ │ ├── 61-70968-0000.TextGrid
│ │ ├── acoustic_corpus.TextGrid
│ │ ├── cold_corpus.TextGrid
│ │ ├── cold_corpus3.TextGrid
│ │ ├── michaelandsickmichael.TextGrid
│ │ ├── michaelandsickmichael_short_tg.TextGrid
│ │ ├── multilingual_ipa.TextGrid
│ │ ├── multilingual_ipa_2.TextGrid
│ │ ├── multilingual_ipa_3.TextGrid
│ │ ├── multilingual_ipa_4.TextGrid
│ │ ├── multilingual_ipa_5.TextGrid
│ │ ├── multilingual_ipa_us.TextGrid
│ │ ├── multilingual_ipa_us_2.TextGrid
│ │ ├── multilingual_ipa_us_3.TextGrid
│ │ ├── multilingual_ipa_us_4.TextGrid
│ │ ├── multilingual_ipa_us_5.TextGrid
│ │ ├── short_segments.TextGrid
│ │ └── vietnamese.TextGrid
│ ├── tokenizer
│ │ ├── test_tokenizer_model.zip
│ │ └── test_tokenizer_model_phonetisaurus.zip
│ └── wav
│ │ ├── 13697_11991_000000.opus
│ │ ├── 61-70968-0000.flac
│ │ ├── acoustic_corpus.wav
│ │ ├── cold_corpus.wav
│ │ ├── cold_corpus3.wav
│ │ ├── cold_corpus_24bit.wav
│ │ ├── cold_corpus_32bit_float.wav
│ │ ├── common_voice_en_22058264.mp3
│ │ ├── common_voice_en_22058266.mp3
│ │ ├── common_voice_en_22058267.mp3
│ │ ├── common_voice_ja_24511055.mp3
│ │ ├── dummy.mp3
│ │ ├── dummy.wav
│ │ ├── falsetto.flac
│ │ ├── falsetto2.flac
│ │ ├── mfa_a.flac
│ │ ├── mfa_affectation.flac
│ │ ├── mfa_apex.flac
│ │ ├── mfa_bottle.flac
│ │ ├── mfa_breaths.flac
│ │ ├── mfa_breathy.flac
│ │ ├── mfa_buddy.flac
│ │ ├── mfa_creaky.flac
│ │ ├── mfa_crossword.flac
│ │ ├── mfa_cutoff.flac
│ │ ├── mfa_cutoffprogressive.flac
│ │ ├── mfa_er.flac
│ │ ├── mfa_erpause.flac
│ │ ├── mfa_exaggerated.flac
│ │ ├── mfa_falsetto.flac
│ │ ├── mfa_her.flac
│ │ ├── mfa_hes.flac
│ │ ├── mfa_internalsil.flac
│ │ ├── mfa_kmg.flac
│ │ ├── mfa_laughter.flac
│ │ ├── mfa_long.flac
│ │ ├── mfa_longstop.flac
│ │ ├── mfa_michael.flac
│ │ ├── mfa_patty.flac
│ │ ├── mfa_poofy.flac
│ │ ├── mfa_pooty.flac
│ │ ├── mfa_puddy.flac
│ │ ├── mfa_putty.flac
│ │ ├── mfa_puttynorm.flac
│ │ ├── mfa_reallylong.flac
│ │ ├── mfa_registershift.flac
│ │ ├── mfa_surround.flac
│ │ ├── mfa_the.flac
│ │ ├── mfa_theapprox.flac
│ │ ├── mfa_theinitialstop.flac
│ │ ├── mfa_thenorm.flac
│ │ ├── mfa_theother.flac
│ │ ├── mfa_thestop.flac
│ │ ├── mfa_thez.flac
│ │ ├── mfa_thoughts.flac
│ │ ├── mfa_uh.flac
│ │ ├── mfa_uhuh.flac
│ │ ├── mfa_uhum.flac
│ │ ├── mfa_um.flac
│ │ ├── mfa_unk.flac
│ │ ├── mfa_whatscalled.flac
│ │ ├── mfa_whisper.flac
│ │ ├── mfa_words.flac
│ │ ├── mfa_youknow.flac
│ │ ├── michaelandsickmichael.wav
│ │ ├── multilingual_ipa.flac
│ │ ├── multilingual_ipa_2.flac
│ │ ├── multilingual_ipa_3.flac
│ │ ├── multilingual_ipa_4.flac
│ │ ├── multilingual_ipa_5.flac
│ │ ├── multilingual_ipa_us.flac
│ │ ├── multilingual_ipa_us_2.flac
│ │ ├── multilingual_ipa_us_3.flac
│ │ ├── multilingual_ipa_us_4.flac
│ │ ├── multilingual_ipa_us_5.flac
│ │ ├── se10x016-08071999-1334_u0016001.wav
│ │ ├── se10x016-08071999-1334_u0016002.wav
│ │ ├── se10x016-08071999-1334_u0016003.wav
│ │ ├── se10x016-08071999-1334_u0016004.wav
│ │ ├── whisper.flac
│ │ └── whisper2.flac
├── test_abc.py
├── test_acoustic_modeling.py
├── test_alignment_pretrained.py
├── test_commandline_adapt.py
├── test_commandline_align.py
├── test_commandline_configure.py
├── test_commandline_create_segments.py
├── test_commandline_diarize_speakers.py
├── test_commandline_g2p.py
├── test_commandline_history.py
├── test_commandline_lm.py
├── test_commandline_model.py
├── test_commandline_tokenize.py
├── test_commandline_train.py
├── test_commandline_train_dict.py
├── test_commandline_train_ivector.py
├── test_commandline_transcribe.py
├── test_commandline_validate.py
├── test_config.py
├── test_corpus.py
├── test_dict.py
├── test_g2p.py
├── test_gui.py
├── test_helper.py
├── test_segmentation.py
└── test_validate.py
└── tox.ini
/.deepsource.toml:
--------------------------------------------------------------------------------
1 | version = 1
2 |
3 | test_patterns = ["tests/**"]
4 |
5 | [[analyzers]]
6 | name = "python"
7 | enabled = true
8 |
9 | [analyzers.meta]
10 | runtime_version = "3.x.x"
11 | max_line_length = 120
12 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 |
2 | docs/*
3 | build/*
4 | .tox/*
5 | .github/*
6 | .pytest_cache/*
7 | tests/*
8 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: "[BUG]"
5 | labels: bug
6 | assignees: mmcauliffe
7 |
8 | ---
9 |
10 | **Debugging checklist**
11 |
12 | [ ] Have you read the troubleshooting page (https://montreal-forced-aligner.readthedocs.io/en/latest/user_guide/troubleshooting.html) and searched the documentation to ensure that your issue is not addressed there?
13 | [ ] Have you updated to latest MFA version (check https://montreal-forced-aligner.readthedocs.io/en/latest/changelog/changelog_3.0.html)? What is the output of `mfa version`?
14 | [ ] Have you tried rerunning the command with the `--clean` flag?
15 |
16 | **Describe the issue**
17 | A clear and concise description of what the bug is.
18 |
19 | **For Reproducing your issue**
20 | Please fill out the following:
21 |
22 | 1. Corpus structure
23 | * What language is the corpus in?
24 | * How many files/speakers?
25 | * Are you using lab files or TextGrid files for input?
26 | 2. Dictionary
27 | * Are you using a dictionary from MFA? If so, which one?
28 | * If it's a custom dictionary, what is the phoneset?
29 | 3. Acoustic model
30 | * If you're using an acoustic model, is it one download through MFA? If so, which one?
31 | * If it's a model you've trained, what data was it trained on?
32 |
33 | **Log file**
34 | Please attach the log file for the run that encountered an error (by default these will be stored in `~/Documents/MFA`).
35 |
36 | **Desktop (please complete the following information):**
37 | - OS: [e.g. Windows, OSX, Linux]
38 | - Version [e.g. MacOSX 10.15, Ubuntu 20.04, Windows 10, etc]
39 | - Any other details about the setup (Cloud, Docker, etc)
40 |
41 | **Additional context**
42 | Add any other context about the problem here.
43 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest new functionality that would make MFA easier to use
4 | title: ''
5 | labels: enhancement
6 | assignees: mmcauliffe
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: Publish Python distributions to PyPI
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | jobs:
8 | build-n-publish:
9 | name: Build and publish to PyPI
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@main
13 | with:
14 | fetch-depth: 0
15 | - name: Set up Python 3.10
16 | uses: actions/setup-python@v3
17 | with:
18 | python-version: "3.10"
19 |
20 | - name: Install pypa/build
21 | run: >-
22 | python -m
23 | pip install
24 | build
25 | --user
26 |
27 | - name: Build a binary wheel and a source tarball
28 | run: >-
29 | python -m
30 | build
31 | --sdist
32 | --wheel
33 | --outdir dist/
34 | .
35 |
36 | - name: Publish to PyPI
37 | uses: pypa/gh-action-pypi-publish@release/v1
38 | with:
39 | user: __token__
40 | password: ${{ secrets.PYPI_API_TOKEN }}
41 |
--------------------------------------------------------------------------------
/.github/workflows/publish_docker.yml:
--------------------------------------------------------------------------------
1 | name: Publish Docker image
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | jobs:
8 | push_to_registries:
9 | name: Push Docker image to multiple registries
10 | runs-on: ubuntu-latest
11 | permissions:
12 | packages: write
13 | contents: read
14 | steps:
15 | - name: Check out the repo
16 | uses: actions/checkout@v3
17 |
18 | - name: Log in to Docker Hub
19 | uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
20 | with:
21 | username: ${{ secrets.DOCKER_USERNAME }}
22 | password: ${{ secrets.DOCKER_PASSWORD }}
23 |
24 | - name: Log in to the Container registry
25 | uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
26 | with:
27 | registry: ghcr.io
28 | username: ${{ github.actor }}
29 | password: ${{ secrets.GITHUB_TOKEN }}
30 |
31 | - name: Extract metadata (tags, labels) for Docker
32 | id: meta
33 | uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
34 | with:
35 | images: |
36 | mmcauliffe/montreal-forced-aligner
37 | ghcr.io/${{ github.repository }}
38 |
39 | - name: Build and push Docker images
40 | uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
41 | with:
42 | context: .
43 | push: true
44 | tags: ${{ steps.meta.outputs.tags }}
45 | labels: ${{ steps.meta.outputs.labels }}
46 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *.exe
5 | *.zip
6 | *.txt
7 | *.lab
8 | *.dict
9 | !tests/data/dictionaries/*.dict
10 | !tests/data/dictionaries/*.txt
11 | !tests/data/lab/*.txt
12 | !tests/data/lab/*.lab
13 | !tests/data/am/*.zip
14 | !tests/data/lm/*.zip
15 | report.txt
16 |
17 | .idea/
18 | .pytest-cache/
19 | # Mac BS
20 | .DS_Store
21 |
22 |
23 | generated/
24 |
25 | pretrained_models/
26 |
27 | # C extensions
28 | *.so
29 |
30 | # Distribution / packaging
31 | montreal_forced_aligner/version.py
32 | montreal_forced_aligner/_version.py
33 | .Python
34 | env/
35 | build/
36 | develop-eggs/
37 | dist/
38 | downloads/
39 | eggs/
40 | .eggs/
41 | lib/
42 | lib64/
43 | parts/
44 | sdist/
45 | var/
46 | *.egg-info/
47 | .installed.cfg
48 | *.egg
49 | thirdparty/bin
50 |
51 | # PyInstaller
52 | # Usually these files are written by a python script from a template
53 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
54 | *.manifest
55 | *.spec
56 |
57 | # Installer logs
58 | pip-log.txt
59 | pip-delete-this-directory.txt
60 |
61 | # Unit test / coverage reports
62 | htmlcov/
63 | .tox/
64 | .coverage
65 | .coverage.*
66 | .cache
67 | nosetests.xml
68 | coverage.xml
69 | *,cover
70 |
71 | # Translations
72 | *.mo
73 | *.pot
74 |
75 | # Django stuff:
76 | *.log
77 |
78 | # Sphinx documentation
79 | docs/build/
80 |
81 | # PyBuilder
82 | target/
83 |
84 |
85 | *.dll
86 |
87 | .pytest_cache/
88 |
89 | docs/source/api/
90 |
91 |
92 | *.lprof
93 |
94 | *.pclprof
95 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: local
3 | hooks:
4 | - id: profile-check
5 | name: no profiling
6 | entry: '@profile'
7 | language: pygrep
8 | types: [ python ]
9 | - id: print-check
10 | name: no print statements
11 | entry: '\bprint\('
12 | language: pygrep
13 | types: [ python ]
14 | files: ^montreal_forced_aligner/
15 | exclude: ^montreal_forced_aligner/command_line/transcribe.py
16 | - repo: https://github.com/psf/black
17 | rev: 23.9.1
18 | hooks:
19 | - id: black
20 | - repo: https://github.com/pycqa/flake8
21 | rev: 7.0.0
22 | hooks:
23 | - id: flake8
24 | additional_dependencies:
25 | - pyproject-flake8
26 | - repo: https://github.com/pre-commit/mirrors-isort
27 | rev: v5.10.1
28 | hooks:
29 | - id: isort
30 | additional_dependencies: [toml]
31 | - repo: https://github.com/asottile/setup-cfg-fmt
32 | rev: v2.2.0
33 | hooks:
34 | - id: setup-cfg-fmt
35 | args:
36 | - --min-py3-version
37 | - "3.8"
38 | - repo: https://github.com/pre-commit/pre-commit-hooks
39 | rev: v4.0.1
40 | hooks:
41 | - id: check-ast
42 | - id: check-builtin-literals
43 | - id: check-docstring-first
44 | - id: check-merge-conflict
45 | - id: check-yaml
46 | - id: check-toml
47 | - id: debug-statements
48 | - id: end-of-file-fixer
49 | - id: trailing-whitespace
50 | - id: check-added-large-files
51 | args: ['--maxkb=2000']
52 | - id: mixed-line-ending
53 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | build:
4 | os: "ubuntu-20.04"
5 | tools:
6 | python: "mambaforge-4.10"
7 |
8 | sphinx:
9 | configuration: docs/source/conf.py
10 |
11 | conda:
12 | environment: rtd_environment.yml
13 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM condaforge/mambaforge:22.11.1-4 as build
2 |
3 | COPY ci/docker_environment.yaml .
4 | RUN mkdir -p /mfa
5 | RUN useradd -ms /bin/bash mfauser
6 | RUN chown -R mfauser /mfa
7 | COPY . /pkg
8 | RUN mamba env create -p /env -f docker_environment.yaml && conda clean -afy && \
9 | chown -R mfauser /env
10 | RUN conda run -p /env python -m pip install speechbrain && \
11 | conda run -p /env python -m pip install --no-deps /pkg
12 | USER mfauser
13 | ENV MFA_ROOT_DIR=/mfa
14 | RUN conda run -p /env mfa server init
15 |
16 | RUN echo "source activate /env && mfa server start" > ~/.bashrc
17 | ENV PATH /env/bin:$PATH
18 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2016 Montreal Corpus Tools
2 |
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a
5 | copy of this software and associated documentation files (the
6 | "Software"), to deal in the Software without restriction, including
7 | without limitation the rights to use, copy, modify, merge, publish,
8 | distribute, sublicense, and/or sell copies of the Software, and to
9 | permit persons to whom the Software is furnished to do so, subject to
10 | the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included
13 | in all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/bin/mfa_update:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import argparse
4 | import os
5 | import shutil
6 | import subprocess
7 | import sys
8 | from importlib.util import find_spec
9 |
10 | if __name__ == "__main__":
11 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
12 | parser.add_argument(
13 | "--install_3p",
14 | action="store_true",
15 | help="Install/update third party dependencies (Speechbrain and WhisperX)",
16 | )
17 | args = parser.parse_args()
18 | anchor_found = find_spec("anchor") is not None
19 | speechbrain_found = find_spec("speechbrain") is not None
20 | whisperx_found = find_spec("whisperx") is not None
21 |
22 | conda_path = shutil.which("conda")
23 | if conda_path is None:
24 | print("Please install conda before running this command.")
25 | sys.exit(1)
26 | mamba_path = shutil.which("mamba")
27 | if mamba_path is None:
28 | print("No mamba found, installing first...")
29 | subprocess.call(
30 | [conda_path, "install", "-c", "conda-forge", "-y", "mamba"], env=os.environ
31 | )
32 | package_list = ["montreal-forced-aligner", "kalpy", "kaldi=*=cpu*"]
33 | if anchor_found:
34 | package_list.append("anchor-annotator")
35 | subprocess.call(
36 | [mamba_path, "update", "-c", "conda-forge", "-y"] + package_list, env=os.environ
37 | )
38 | if args.install_3p:
39 | channels = ["conda-forge", "pytorch", "nvidia", "anaconda"]
40 | package_list = ["pytorch", "torchaudio"]
41 | if not whisperx_found:
42 | package_list.extend(["cudnn=8", "transformers"])
43 | command = [mamba_path, "install", "-y"]
44 | for c in channels:
45 | command.extend(["-c", c])
46 | command += package_list
47 | subprocess.call(command, env=os.environ)
48 | command = ["pip", "install", "-U"]
49 | package_list = ["whisperx", "speechbrain", "pygtrie"]
50 | subprocess.call(command, env=os.environ)
51 |
--------------------------------------------------------------------------------
/ci/docker_environment.yaml:
--------------------------------------------------------------------------------
1 | channels:
2 | - conda-forge
3 | - pytorch
4 | - nvidia
5 | - anaconda
6 | dependencies:
7 | - python>=3.11
8 | - numpy
9 | - librosa
10 | - tqdm
11 | - requests
12 | - pyyaml
13 | - dataclassy
14 | - kaldi=*=*cpu*
15 | - pynini
16 | - openfst=1.8.3
17 | - scikit-learn
18 | - hdbscan
19 | - baumwelch
20 | - ngram
21 | - praatio=6.0.0
22 | - biopython
23 | - sqlalchemy>=2.0
24 | - git
25 | - pgvector
26 | - pgvector-python
27 | - postgresql
28 | - psycopg2
29 | - click
30 | - pytorch
31 | - torchaudio
32 | - setuptools_scm
33 | - kneed
34 | - matplotlib
35 | - seaborn
36 | - sqlite
37 | - rich
38 | - rich-click
39 | - kalpy
40 | - spacy
41 | - sudachipy
42 | - sudachidict-core
43 | - spacy-pkuseg
44 | - pip
45 | - pip:
46 | - speechbrain
47 | - python-mecab-ko
48 | - jamo
49 | - pythainlp
50 | - dragonmapper
51 |
--------------------------------------------------------------------------------
/docs/source/_static/MFA_paper_Interspeech2017.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/MFA_paper_Interspeech2017.pdf
--------------------------------------------------------------------------------
/docs/source/_static/MFA_poster_LSA2017.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/MFA_poster_LSA2017.pdf
--------------------------------------------------------------------------------
/docs/source/_static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/favicon.ico
--------------------------------------------------------------------------------
/docs/source/_static/fonts/GentiumPlus-Bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/fonts/GentiumPlus-Bold.woff
--------------------------------------------------------------------------------
/docs/source/_static/fonts/GentiumPlus-Bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/fonts/GentiumPlus-Bold.woff2
--------------------------------------------------------------------------------
/docs/source/_static/fonts/GentiumPlus-BoldItalic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/fonts/GentiumPlus-BoldItalic.woff
--------------------------------------------------------------------------------
/docs/source/_static/fonts/GentiumPlus-BoldItalic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/fonts/GentiumPlus-BoldItalic.woff2
--------------------------------------------------------------------------------
/docs/source/_static/fonts/GentiumPlus-Italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/fonts/GentiumPlus-Italic.woff
--------------------------------------------------------------------------------
/docs/source/_static/fonts/GentiumPlus-Italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/fonts/GentiumPlus-Italic.woff2
--------------------------------------------------------------------------------
/docs/source/_static/fonts/GentiumPlus-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/fonts/GentiumPlus-Regular.woff
--------------------------------------------------------------------------------
/docs/source/_static/fonts/GentiumPlus-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/fonts/GentiumPlus-Regular.woff2
--------------------------------------------------------------------------------
/docs/source/_static/librispeech_textgrid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/librispeech_textgrid.png
--------------------------------------------------------------------------------
/docs/source/_static/multiple_speakers_output_textgrid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/multiple_speakers_output_textgrid.png
--------------------------------------------------------------------------------
/docs/source/_static/multiple_speakers_textgrid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/multiple_speakers_textgrid.png
--------------------------------------------------------------------------------
/docs/source/_static/sound_files/english_fast.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/sound_files/english_fast.wav
--------------------------------------------------------------------------------
/docs/source/_static/sound_files/english_slow.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/sound_files/english_slow.wav
--------------------------------------------------------------------------------
/docs/source/_static/sound_files/english_t.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/sound_files/english_t.wav
--------------------------------------------------------------------------------
/docs/source/_static/sound_files/japanese_fast.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/sound_files/japanese_fast.wav
--------------------------------------------------------------------------------
/docs/source/_static/sound_files/japanese_slow.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/docs/source/_static/sound_files/japanese_slow.wav
--------------------------------------------------------------------------------
/docs/source/_templates/autosummary/attribute.rst:
--------------------------------------------------------------------------------
1 | :orphan:
2 |
3 | {{ fullname }}
4 | {{ underline }}
5 |
6 | .. currentmodule:: {{ module }}
7 |
8 | .. autoattribute:: {{ objname }}
9 | :type:
10 |
--------------------------------------------------------------------------------
/docs/source/_templates/autosummary/base.rst:
--------------------------------------------------------------------------------
1 | {{ objname | escape | underline}}
2 |
3 | .. currentmodule:: {{ module }}
4 |
5 | .. auto{{ objtype }}:: {{ objname }}
6 |
--------------------------------------------------------------------------------
/docs/source/_templates/autosummary/class.rst:
--------------------------------------------------------------------------------
1 | :html_theme.sidebar_secondary.remove:
2 |
3 | {{ objname }}
4 | {{ underline }}
5 |
6 | .. currentmodule:: {{ module }}
7 |
8 | .. autoclass:: {{ objname }}
9 | :members:
10 | :show-inheritance:
11 | :no-inherited-members:
12 | :no-special-members:
13 |
--------------------------------------------------------------------------------
/docs/source/_templates/autosummary/function.rst:
--------------------------------------------------------------------------------
1 | :html_theme.sidebar_secondary.remove:
2 |
3 | {{ objname }}
4 | {{ underline }}
5 |
6 | .. currentmodule:: {{ module }}
7 |
8 | .. autofunction:: {{ objname }}
9 |
--------------------------------------------------------------------------------
/docs/source/_templates/autosummary/method.rst:
--------------------------------------------------------------------------------
1 | :orphan:
2 |
3 | {{ fullname }}
4 | {{ underline }}
5 |
6 | .. currentmodule:: {{ module }}
7 |
8 | .. automethod:: {{ objname }}
9 |
--------------------------------------------------------------------------------
/docs/source/_templates/autosummary/property.rst:
--------------------------------------------------------------------------------
1 | :orphan:
2 |
3 | {{ fullname }}
4 | {{ underline }}
5 |
6 | .. currentmodule:: {{ module }}
7 |
8 | .. autoproperty:: {{ objname }}
9 |
--------------------------------------------------------------------------------
/docs/source/_templates/version.html:
--------------------------------------------------------------------------------
1 | {# This will display the version of the docs as a badge
2 |
3 | Colors from:
4 |
5 | Wong, B. Points of view: Color blindness.
6 | Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618
7 |
8 | #}
9 |
10 |
11 | {% if "dev" in version %}
12 | {# orange for dev #E69F00 #}
13 |
15 | {% elif versionwarning %}
16 | {# red for old #980F0F #}
17 |
19 | {% else %}
20 | {# green for stable #009E73 #}
21 |
23 | {% endif %}
24 |
25 |
--------------------------------------------------------------------------------
/docs/source/changelog/changelog_2.1.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _changelog_2.1:
3 |
4 | *************
5 | 2.1 Changelog
6 | *************
7 |
8 | 2.1.6
9 | =====
10 |
11 | - Fix for issue with ignore_case flag not being respected
12 | - Fixed a hang in speaker diarization
13 | - Fixed an error related to paths ending in trailing slashes which caused MFA to try to connect to a database named after the local user
14 | - Partial migration to using :class:`pathlib.Path` instead of :mod:`os.path`
15 |
16 | 2.1.5
17 | =====
18 |
19 | - Fix for improperly reset databases
20 |
21 | 2.1.4
22 | =====
23 |
24 | - Change how database connections are made to remove pooling
25 |
26 | 2.1.3
27 | =====
28 |
29 | - Fixed a bug with intervals after the end of the sound file having negative duration (they are now not parsed)
30 | - Fixed an issue where utterances were not properly assigned to the correct channels
31 | - Modified the logic for connections to attempt to solve error with too many clients
32 |
33 | 2.1.2
34 | =====
35 |
36 | - Fixed a crash in training when the debug flag was not set
37 | - Set default postgresql port to 5433 to avoid conflicts with any system installations
38 | - Fixed a crash in textgrid export
39 |
40 | 2.1.1
41 | =====
42 |
43 | - Fixed a bug with `mfa` command not working from the command line
44 | - Updated to be compatible with PraatIO 6.0
45 |
46 | 2.1.0
47 | =====
48 |
49 | - Drop support for SQLite as a database backend
50 | - Fixed a bug where TextGrid parsing errors would cause MFA to crash rather than ignore those files
51 | - Updated CLI to use :xref:`click` rather than argparse
52 | - Added :code:`--use_phone_model` flag for :code:`mfa align` and :code:`mfa validate` commands. See :ref:`phone_models` for more details.
53 | - Added :code:`--phone_confidence` flag for :code:`mfa validate` commands. See :ref:`phone_models` for more details.
54 | - Added modeling of :code:`cutoff` phones via :code:`--use_cutoff_model` which adds progressive truncations of the next word, if it's not unknown or a non-speech word (silence, laughter, etc). See :ref:`cutoff_modeling` for more details.
55 | - Added support for using :xref:`speechbrain`'s VAD model in :ref:`create_segments`
56 | - Overhaul and update :ref:`train_ivector`
57 | - Overhaul and update :ref:`diarize_speakers`
58 | - Added support for using :xref:`speechbrain`'s SpeakerRecognition model in :ref:`diarize_speakers`
59 |
--------------------------------------------------------------------------------
/docs/source/changelog/changelog_3.1.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _changelog_3.1:
3 |
4 | *************
5 | 3.1 Changelog
6 | *************
7 |
8 | 3.1.4
9 | -----
10 |
11 | - Optimized :code:`mfa g2p` to better use multiple processes
12 | - Added :code:`--export_scores` to :code:`mfa g2p` for adding a column representing the final weights of the generated pronunciations
13 | - Added :code:`--output_directory` to :code:`mfa validate` to save generated validation files rather than the temporary directory
14 | - Fixed a bug in cutoff modeling that was preventing them from being properly parsed
15 |
16 | 3.1.3
17 | -----
18 |
19 | - Fixed an issue where silence probability being zero was not correctly removing silence
20 | - Compatibility with kalpy v0.6.5
21 | - Added API functionality for verifying transcripts with interjection words in alignment
22 | - Fixed an error in fine tuning that generated nonsensical boundaries
23 |
24 | 3.1.2
25 | -----
26 |
27 | - Fixed a bug where hidden files and folders would be parsed as corpus data
28 | - Fixed a bug where validation would not respect :code:`--no_final_clean`
29 | - Fixed a rare crash in training when a job would not have utterances assigned to it
30 | - Fixed a bug where MFA would mistakenly report a dictionary and acoustic model phones did not match for older versions
31 |
32 | 3.1.1
33 | -----
34 |
35 | - Fixed an issue with TextGrids missing intervals
36 |
37 | 3.1.0
38 | -----
39 |
40 | - Fixed a bug where cutoffs were not properly modelled
41 | - Added additional filter on create subset to not include utterances with cutoffs in smaller subsets
42 | - Added the ability to specify HMM topologies for phones
43 | - Fixed issues caused by validators not cleaning up temporary files and databases
44 | - Added support for default and nonnative dictionaries generated from other dictionaries
45 | - Restricted initial training rounds to exclude default and nonnative dictionaries
46 | - Changed clustering of phones to not mix silence and non-silence phones
47 | - Optimized textgrid export
48 | - Added better memory management for collecting alignments
49 |
--------------------------------------------------------------------------------
/docs/source/changelog/changelog_3.2.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _changelog_3.2:
3 |
4 | *************
5 | 3.2 Changelog
6 | *************
7 |
8 | 3.2.1
9 | -----
10 |
11 | - Changed unicode normalization to default to composed forms unless overridden by :code:`--unicode_decomposition true`
12 |
13 | 3.2.0
14 | -----
15 |
16 | - Added :code:`--subset_word_count` parameter to :ref:`train_acoustic_model` to add a minimum word count for an utterance to be included in training subsets
17 | - Added :code:`--minimum_utterance_length` parameter to :ref:`train_acoustic_model` to add a minimum word count for an utterance to be included in training at all
18 | - Improved memory usage in compiling training graphs for initial subsets
19 | - Add support for transcription via whisperx and speechbrain models
20 | - Update text normalization to normalize to decomposed forms
21 | - Compatibility with Kalpy 0.6.7
22 |
--------------------------------------------------------------------------------
/docs/source/first_steps/tutorials.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _`filing an issue`: https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/issues
3 |
4 | .. _`Montreal Forced Aligner v2 Corpus Phonetics Tutorial`: https://eleanorchodroff.com/tutorial/montreal-forced-aligner.html
5 |
6 | .. _`Phonetic forced alignment with the Montreal Forced Aligner`: https://www.youtube.com/watch?v=Zhj-ccMDj_w
7 |
8 | .. _`How I used Montreal Forced Aligner for a New Language (Sinhalese)`: https://medium.com/breaktheloop/how-i-used-montreal-forced-aligner-for-a-new-language-sinhalese-8f2c22a65a22
9 |
10 | .. _`Bootstrapping an IPA dictionary for English using Montreal Forced Aligner 2.0`: https://mmcauliffe.medium.com/creating-english-ipa-dictionary-using-montreal-forced-aligner-2-0-242415dfee32
11 |
12 | .. _`Update on Montreal Forced Aligner performance`: https://memcauliffe.com/update-on-montreal-forced-aligner-performance.html
13 | .. _`Speaker dictionaries and multilingual IPA`: https://memcauliffe.com/speaker-dictionaries-and-multilingual-ipa.html
14 |
15 | .. _tutorials:
16 |
17 | External tutorials
18 | ==================
19 |
20 | I will try to keep this updated with a list of in-depth tutorials for using MFA. If you write up anything that could be included here, please let me know by `filing an issue`_ and I will add it.
21 |
22 | * `Montreal Forced Aligner v2 Corpus Phonetics Tutorial`_ (Now updated for 2.0!)
23 | * Courtesy of :xref:`chodroff`
24 | * `Phonetic forced alignment with the Montreal Forced Aligner`_ (YouTube recording)
25 | * Courtesy of :xref:`chodroff` and :xref:`rutgers_spanish_portuguese`
26 | * `How I used Montreal Forced Aligner for a New Language (Sinhalese)`_
27 | * Courtesy of :xref:`dias`
28 | * `Bootstrapping an IPA dictionary for English using Montreal Forced Aligner 2.0`_
29 | * `Update on Montreal Forced Aligner performance`_
30 | * `Speaker dictionaries and multilingual IPA`_
31 |
--------------------------------------------------------------------------------
/docs/source/getting_started.rst:
--------------------------------------------------------------------------------
1 |
2 | ***************
3 | Getting started
4 | ***************
5 |
6 |
7 | Installation
8 | ------------
9 |
10 | .. grid:: 2
11 |
12 | .. grid-item-card:: Installing with conda
13 | :text-align: center
14 | :columns: 12
15 |
16 | MFA is now on :xref:`conda_forge` and can be installed with Anaconda or Miniconda:
17 |
18 | .. code-block:: bash
19 |
20 | conda config --add channels conda-forge
21 | conda install montreal-forced-aligner
22 |
23 | +++
24 |
25 | .. button-link:: https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html
26 | :color: primary
27 | :expand:
28 |
29 | Install Conda
30 |
31 |
32 | .. grid-item-card:: In-depth instructions
33 | :text-align: center
34 |
35 | Using :ref:`Docker `? Want to :ref:`install via source `?
36 |
37 | +++
38 |
39 | .. button-ref:: installation
40 | :expand:
41 | :color: primary
42 | :ref-type: doc
43 |
44 | To the installation guide
45 |
46 |
47 | .. grid-item-card:: First steps
48 | :text-align: center
49 |
50 | First time using MFA? Want a walk-through of a specific use case?
51 |
52 | +++
53 |
54 | .. button-ref:: first_steps
55 | :expand:
56 | :color: primary
57 |
58 | First steps
59 |
60 |
61 | .. toctree::
62 | :maxdepth: 1
63 | :hidden:
64 |
65 | installation
66 | first_steps/index
67 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 |
2 | Montreal Forced Aligner documentation
3 | =====================================
4 |
5 | .. grid:: 2
6 |
7 | .. grid-item-card:: Getting started
8 | :text-align: center
9 |
10 | :fas:`running;fa-6x i-navigation`
11 |
12 | ^^^
13 |
14 | Install the Montreal Forced Aligner and get started with examples and tutorials.
15 |
16 | +++
17 |
18 | .. button-ref:: getting_started
19 | :expand:
20 | :color: primary
21 |
22 | Install MFA
23 |
24 | .. grid-item-card:: First steps
25 | :text-align: center
26 |
27 | :fas:`terminal;fa-6x i-navigation`
28 |
29 | ^^^
30 |
31 | Have a particular use case for MFA?
32 |
33 | Check out the first steps tutorials.
34 |
35 | +++
36 |
37 | .. button-ref:: first_steps
38 | :expand:
39 | :color: primary
40 |
41 | First steps
42 |
43 | .. grid-item-card:: User guide
44 | :text-align: center
45 |
46 | :fas:`book-open;fa-6x i-navigation`
47 |
48 | ^^^
49 |
50 | The User Guide gives more details on input formats, available commands, and details on the various workflows available.
51 |
52 | +++
53 |
54 | .. button-ref:: user_guide
55 | :expand:
56 | :color: primary
57 |
58 | User guide
59 |
60 | .. grid-item-card:: API reference
61 | :text-align: center
62 |
63 | :fas:`file-code;fa-6x i-navigation`
64 |
65 | ^^^
66 |
67 | The API guide lists all the inner workings of MFA, the modules and classes that you can import and use in your own scripts and projects, along with details about the Kaldi functionality used.
68 |
69 | +++
70 |
71 | .. button-ref:: mfa_api
72 | :expand:
73 | :color: primary
74 |
75 | Reference guide
76 |
77 | .. toctree::
78 | :hidden:
79 |
80 | Getting started
81 | User guide
82 | API reference
83 | Changelog
84 |
--------------------------------------------------------------------------------
/docs/source/reference/acoustic_modeling/helper.rst:
--------------------------------------------------------------------------------
1 |
2 | Helper functionality
3 | ====================
4 |
5 | Mixins
6 | ------
7 |
8 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.base
9 |
10 | .. autosummary::
11 | :toctree: generated/
12 |
13 | AcousticModelTrainingMixin -- Basic mixin
14 |
15 |
16 | Multiprocessing workers and functions
17 | -------------------------------------
18 |
19 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.monophone
20 |
21 | .. autosummary::
22 | :toctree: generated/
23 |
24 | MonoAlignEqualFunction
25 |
26 |
27 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.triphone
28 |
29 | .. autosummary::
30 | :toctree: generated/
31 |
32 | TreeStatsFunction
33 | ConvertAlignmentsFunction
34 |
35 |
36 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.lda
37 |
38 | .. autosummary::
39 | :toctree: generated/
40 |
41 | LdaAccStatsFunction
42 | CalcLdaMlltFunction
43 |
44 |
45 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.sat
46 |
47 | .. autosummary::
48 | :toctree: generated/
49 |
50 | AccStatsTwoFeatsFunction
51 |
52 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.trainer
53 |
54 | .. autosummary::
55 | :toctree: generated/
56 |
57 | TransitionAccFunction
58 |
59 | Multiprocessing argument classes
60 | --------------------------------
61 |
62 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.monophone
63 |
64 | .. autosummary::
65 | :toctree: generated/
66 |
67 | MonoAlignEqualArguments
68 |
69 |
70 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.triphone
71 |
72 | .. autosummary::
73 | :toctree: generated/
74 |
75 | TreeStatsArguments
76 | ConvertAlignmentsArguments
77 |
78 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.lda
79 |
80 | .. autosummary::
81 | :toctree: generated/
82 |
83 | LdaAccStatsArguments
84 | CalcLdaMlltArguments
85 |
86 |
87 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.sat
88 |
89 | .. autosummary::
90 | :toctree: generated/
91 |
92 | AccStatsTwoFeatsArguments
93 |
94 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.trainer
95 |
96 | .. autosummary::
97 | :toctree: generated/
98 |
99 | TransitionAccArguments
100 |
--------------------------------------------------------------------------------
/docs/source/reference/acoustic_modeling/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _acoustic_modeling_api:
3 |
4 | Acoustic models
5 | ===============
6 |
7 | :term:`Acoustic models` contain information about how phones are pronounced, trained over large (and not-so-large) corpora of speech. Currently only GMM-HMM style acoustic models are supported, which are generally good enough for alignment, but nowhere near state of the art for transcription.
8 |
9 | .. note::
10 |
11 | As part of the training procedure, alignments are generated, and so can be exported at the end (the same as training an acoustic model and then using it with the :class:`~montreal_forced_aligner.alignment.pretrained.PretrainedAligner`. See :meth:`~montreal_forced_aligner.alignment.CorpusAligner.export_files` for the method and :ref:`train_acoustic_model` for the command line function.
12 |
13 | .. currentmodule:: montreal_forced_aligner.models
14 |
15 | .. autosummary::
16 | :toctree: generated/
17 |
18 | AcousticModel
19 |
20 | .. toctree::
21 | :hidden:
22 |
23 | training
24 | helper
25 |
--------------------------------------------------------------------------------
/docs/source/reference/acoustic_modeling/training.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _acoustic_model_training_api:
3 |
4 | Training acoustic models
5 | ========================
6 |
7 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling.trainer
8 |
9 | .. autosummary::
10 | :toctree: generated/
11 |
12 | TrainableAligner
13 |
14 | .. currentmodule:: montreal_forced_aligner.acoustic_modeling
15 |
16 | .. autosummary::
17 | :toctree: generated/
18 |
19 | MonophoneTrainer -- Monophone trainer
20 | TriphoneTrainer -- Triphone trainer
21 | LdaTrainer -- LDA trainer
22 | SatTrainer -- Speaker adapted trainer
23 | PronunciationProbabilityTrainer -- Pronunciation probability trainer
24 |
--------------------------------------------------------------------------------
/docs/source/reference/alignment/alignment.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _aligners_api:
3 |
4 | Alignment classes
5 | =================
6 |
7 | .. currentmodule:: montreal_forced_aligner.alignment
8 |
9 | .. autosummary::
10 | :toctree: generated/
11 |
12 | CorpusAligner -- Base aligner
13 | AdaptingAligner -- Adapting an acoustic model to new data
14 | PretrainedAligner -- Pretrained aligner
15 |
--------------------------------------------------------------------------------
/docs/source/reference/alignment/helper.rst:
--------------------------------------------------------------------------------
1 |
2 | Helper functionality
3 | ====================
4 |
5 | Mixins
6 | ------
7 |
8 | .. currentmodule:: montreal_forced_aligner.alignment.mixins
9 |
10 | .. autosummary::
11 | :toctree: generated/
12 |
13 | AlignMixin -- Alignment mixin
14 |
15 | Multiprocessing workers and functions
16 | -------------------------------------
17 |
18 | .. currentmodule:: montreal_forced_aligner.alignment.multiprocessing
19 |
20 | .. autosummary::
21 | :toctree: generated/
22 |
23 | AlignFunction
24 | FineTuneFunction
25 | CompileTrainGraphsFunction
26 | AccStatsFunction
27 | AlignmentExtractionFunction
28 | ExportTextGridProcessWorker
29 | PhoneConfidenceFunction
30 |
31 |
32 | Multiprocessing argument classes
33 | --------------------------------
34 |
35 | .. currentmodule:: montreal_forced_aligner.alignment.multiprocessing
36 |
37 | .. autosummary::
38 | :toctree: generated/
39 |
40 | AlignArguments
41 | AccStatsArguments
42 | CompileTrainGraphsArguments
43 | AlignmentExtractionArguments
44 | ExportTextGridArguments
45 | FineTuneArguments
46 | PhoneConfidenceArguments
47 |
--------------------------------------------------------------------------------
/docs/source/reference/alignment/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _alignment_api:
3 |
4 | Alignment
5 | =========
6 |
7 | .. toctree::
8 |
9 | alignment
10 | helper
11 |
--------------------------------------------------------------------------------
/docs/source/reference/core_index.rst:
--------------------------------------------------------------------------------
1 |
2 | Core functionality
3 | ==================
4 |
5 | This sections contains the core objects that are used as input to any top level worker: the corpora, pronunciation dictionaries, and various types of MFA models. Each model's section contains the classes and functionality used to train them.
6 |
7 | .. toctree::
8 | :maxdepth: 1
9 |
10 | corpus/index
11 | database/index
12 | dictionary/index
13 | acoustic_modeling/index
14 | g2p_modeling/index
15 | language_modeling/index
16 | ivector/index
17 |
--------------------------------------------------------------------------------
/docs/source/reference/corpus/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _corpus_api:
3 |
4 | Corpora
5 | =======
6 |
7 | .. currentmodule:: montreal_forced_aligner.corpus.acoustic_corpus
8 |
9 | .. autosummary::
10 | :toctree: generated/
11 |
12 | AcousticCorpus
13 |
14 | .. currentmodule:: montreal_forced_aligner.corpus.text_corpus
15 |
16 | .. autosummary::
17 | :toctree: generated/
18 |
19 | TextCorpus
20 |
21 | .. currentmodule:: montreal_forced_aligner.corpus.classes
22 |
23 | .. autosummary::
24 | :toctree: generated/
25 |
26 | FileData -- Class for representing sound file/transcription file pairs in corpora
27 | UtteranceData -- Class for collecting information about utterances
28 |
29 |
30 |
31 | Helper classes and functions
32 | ============================
33 |
34 |
35 | Multiprocessing
36 | ---------------
37 |
38 | .. currentmodule:: montreal_forced_aligner.corpus.multiprocessing
39 |
40 | .. autosummary::
41 | :toctree: generated/
42 |
43 | Job
44 | CorpusProcessWorker
45 |
46 | Mixins
47 | ------
48 |
49 | .. currentmodule:: montreal_forced_aligner.corpus.base
50 |
51 | .. autosummary::
52 | :toctree: generated/
53 |
54 | CorpusMixin
55 |
56 | .. currentmodule:: montreal_forced_aligner.corpus.acoustic_corpus
57 |
58 | .. autosummary::
59 | :toctree: generated/
60 |
61 | AcousticCorpusMixin
62 | AcousticCorpusPronunciationMixin
63 |
64 | .. currentmodule:: montreal_forced_aligner.corpus.ivector_corpus
65 |
66 | .. autosummary::
67 | :toctree: generated/
68 |
69 | IvectorCorpusMixin
70 |
71 | .. currentmodule:: montreal_forced_aligner.corpus.text_corpus
72 |
73 | .. autosummary::
74 | :toctree: generated/
75 |
76 | TextCorpusMixin
77 | DictionaryTextCorpusMixin
78 |
79 | Features
80 | --------
81 |
82 | .. currentmodule:: montreal_forced_aligner.corpus.features
83 |
84 | .. autosummary::
85 | :toctree: generated/
86 |
87 | FeatureConfigMixin
88 | MfccFunction
89 | MfccArguments
90 | CalcFmllrFunction
91 | CalcFmllrArguments
92 | IvectorConfigMixin
93 | VadConfigMixin
94 | ComputeVadFunction
95 | VadArguments
96 |
97 | Ivector
98 | -------
99 |
100 | .. currentmodule:: montreal_forced_aligner.corpus.features
101 |
102 | .. autosummary::
103 | :toctree: generated/
104 |
105 | ExtractIvectorsFunction
106 | ExtractIvectorsArguments
107 |
--------------------------------------------------------------------------------
/docs/source/reference/database/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _database_api:
3 |
4 | Database
5 | ========
6 |
7 | MFA uses a SQLite database to cache information during training/alignment runs. An issue with training larger corpora was running into memory bottlenecks as all the information in the corpus was stored in memory, and fMLLR estimations in later stages would crash. Additionally, there was always a trade off between storing results for use in other applications like :xref:`anchor` or providing diagnostic information to users, and ensuring that the core MFA workflows were as memory/time efficient as possible. Offloading to a database frees up some memory, and makes some computations more efficient, and should be optimized enough to not slow down regular processing.
8 |
9 | .. currentmodule:: montreal_forced_aligner.db
10 |
11 | .. autosummary::
12 | :toctree: generated/
13 |
14 | Dictionary
15 | Dialect
16 | Word
17 | Pronunciation
18 | Phone
19 | Grapheme
20 | File
21 | TextFile
22 | SoundFile
23 | Speaker
24 | Utterance
25 | WordInterval
26 | PhoneInterval
27 | CorpusWorkflow
28 | PhonologicalRule
29 | RuleApplication
30 | Job
31 | M2MSymbol
32 | M2M2Job
33 | Word2Job
34 |
--------------------------------------------------------------------------------
/docs/source/reference/diarization/helper.rst:
--------------------------------------------------------------------------------
1 |
2 | Helper functions
3 | ================
4 |
5 | .. currentmodule:: montreal_forced_aligner.diarization.multiprocessing
6 |
7 | .. autosummary::
8 | :toctree: generated/
9 |
10 | PldaClassificationFunction
11 | PldaClassificationArguments
12 | ComputeEerFunction
13 | ComputeEerArguments
14 | SpeechbrainEmbeddingFunction
15 | SpeechbrainClassificationFunction
16 | SpeechbrainArguments
17 | cluster_matrix
18 |
--------------------------------------------------------------------------------
/docs/source/reference/diarization/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _diarization_api:
3 |
4 | Speaker diarization
5 | ===================
6 |
7 | Speaker diarization is the procedure to assign speaker labels to utterances. MFA can train and use ivector models (see :ref:`train_ivector`) or use :xref:`speechbrain`'s pretrained speaker classifier.
8 |
9 | .. toctree::
10 |
11 | main
12 | helper
13 |
--------------------------------------------------------------------------------
/docs/source/reference/diarization/main.rst:
--------------------------------------------------------------------------------
1 |
2 | Speaker Diarization
3 | ===================
4 |
5 | .. currentmodule:: montreal_forced_aligner.diarization.speaker_diarizer
6 |
7 | .. autosummary::
8 | :toctree: generated/
9 |
10 | SpeakerDiarizer
11 |
--------------------------------------------------------------------------------
/docs/source/reference/dictionary/helper.rst:
--------------------------------------------------------------------------------
1 |
2 | Helper classes and functions
3 | ============================
4 |
5 | Model
6 | -----
7 |
8 | .. currentmodule:: montreal_forced_aligner.models
9 |
10 | .. autosummary::
11 | :toctree: generated/
12 |
13 | DictionaryModel
14 |
15 | Mixins
16 | ------
17 |
18 | .. currentmodule:: montreal_forced_aligner.dictionary.mixins
19 |
20 | .. autosummary::
21 | :toctree: generated/
22 |
23 | DictionaryMixin
24 | TemporaryDictionaryMixin
25 |
26 | .. currentmodule:: montreal_forced_aligner.dictionary.multispeaker
27 |
28 | .. autosummary::
29 | :toctree: generated/
30 |
31 | MultispeakerDictionaryMixin
32 |
33 |
34 | Pronunciation probability functionality
35 | =======================================
36 |
37 | Helper
38 | ------
39 |
40 | .. currentmodule:: montreal_forced_aligner.alignment.multiprocessing
41 |
42 | .. autosummary::
43 | :toctree: generated/
44 |
45 | GeneratePronunciationsFunction
46 | GeneratePronunciationsArguments
47 |
--------------------------------------------------------------------------------
/docs/source/reference/dictionary/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _dictionary_training_api:
3 |
4 | Pronunciation dictionaries
5 | ==========================
6 |
7 | .. toctree::
8 |
9 | main
10 | helper
11 | training
12 |
--------------------------------------------------------------------------------
/docs/source/reference/dictionary/main.rst:
--------------------------------------------------------------------------------
1 |
2 | Main classes
3 | ============
4 |
5 | .. currentmodule:: montreal_forced_aligner.dictionary
6 |
7 | .. autosummary::
8 | :toctree: generated/
9 |
10 | MultispeakerDictionary -- Collection of pronunciation dictionaries that specify speaker-dictionary mappings
11 |
--------------------------------------------------------------------------------
/docs/source/reference/dictionary/training.rst:
--------------------------------------------------------------------------------
1 |
2 | Training pronunciation probabilities
3 | ====================================
4 |
5 | .. currentmodule:: montreal_forced_aligner.alignment.pretrained
6 |
7 | .. autosummary::
8 | :toctree: generated/
9 |
10 | DictionaryTrainer -- Train pronunciation probabilities from alignments
11 |
--------------------------------------------------------------------------------
/docs/source/reference/g2p/generator.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _generating_dictionaries_api:
3 |
4 | Dictionary generation
5 | =====================
6 |
7 | .. currentmodule:: montreal_forced_aligner.g2p.generator
8 |
9 | .. autosummary::
10 | :toctree: generated/
11 |
12 | PyniniCorpusGenerator -- Generator for Pynini G2P model
13 | PyniniWordListGenerator -- Generator for Pynini G2P model
14 |
--------------------------------------------------------------------------------
/docs/source/reference/g2p/helper.rst:
--------------------------------------------------------------------------------
1 |
2 | Helper functionality
3 | ====================
4 |
5 | Mixins
6 | ------
7 |
8 | .. currentmodule:: montreal_forced_aligner.g2p.generator
9 |
10 | .. autosummary::
11 | :toctree: generated/
12 |
13 | PyniniGenerator
14 |
15 | Helper
16 | ------
17 |
18 | .. currentmodule:: montreal_forced_aligner.g2p.generator
19 |
20 | .. autosummary::
21 | :toctree: generated/
22 |
23 | Rewriter
24 | RewriterWorker
25 |
--------------------------------------------------------------------------------
/docs/source/reference/g2p/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _g2p_generate_api:
3 |
4 | Generating dictionaries
5 | =======================
6 |
7 | .. toctree::
8 |
9 | generator
10 | helper
11 |
--------------------------------------------------------------------------------
/docs/source/reference/g2p_modeling/helper.rst:
--------------------------------------------------------------------------------
1 |
2 | Helper functionality
3 | ====================
4 |
5 |
6 | Mixins
7 | ------
8 |
9 | .. currentmodule:: montreal_forced_aligner.g2p.mixins
10 |
11 | .. autosummary::
12 | :toctree: generated/
13 |
14 | G2PMixin
15 | G2PTopLevelMixin
16 |
17 | .. currentmodule:: montreal_forced_aligner.g2p.trainer
18 |
19 | .. autosummary::
20 | :toctree: generated/
21 |
22 | G2PTrainer
23 |
24 | Helper
25 | ------
26 |
27 | .. currentmodule:: montreal_forced_aligner.g2p.trainer
28 |
29 | .. autosummary::
30 | :toctree: generated/
31 |
32 | RandomStartWorker
33 | RandomStart
34 |
--------------------------------------------------------------------------------
/docs/source/reference/g2p_modeling/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _g2p_modeling_api:
3 |
4 | Grapheme-to-Phoneme (G2P) models
5 | ================================
6 |
7 | G2P models are used to generate pronunciations from orthographic spellings. The G2P models currently supported use Pynini weighted finite state transducers (wFST) to based off a training lexicon.
8 |
9 | .. currentmodule:: montreal_forced_aligner.models
10 |
11 | .. autosummary::
12 | :toctree: generated/
13 |
14 | G2PModel
15 |
16 | .. toctree::
17 |
18 | training
19 | helper
20 |
--------------------------------------------------------------------------------
/docs/source/reference/g2p_modeling/training.rst:
--------------------------------------------------------------------------------
1 | Training G2P models
2 | ===================
3 |
4 | Pynini Pair Ngram
5 | -----------------
6 |
7 |
8 | .. currentmodule:: montreal_forced_aligner.g2p.trainer
9 |
10 | .. autosummary::
11 | :toctree: generated/
12 |
13 | PyniniTrainer -- Trainer for Pynini G2P model
14 | PyniniValidator -- Validator for Pynini G2P model
15 |
16 | Phonetisaurus-style models
17 | --------------------------
18 |
19 | .. currentmodule:: montreal_forced_aligner.g2p.phonetisaurus_trainer
20 |
21 | .. autosummary::
22 | :toctree: generated/
23 |
24 | PhonetisaurusTrainer -- Trainer for Phonetisaurus G2P model
25 |
26 | Mixins
27 | ------
28 |
29 | .. currentmodule:: montreal_forced_aligner.g2p.trainer
30 |
31 | .. autosummary::
32 | :toctree: generated/
33 |
34 | PyniniTrainerMixin
35 |
36 | .. currentmodule:: montreal_forced_aligner.g2p.phonetisaurus_trainer
37 |
38 | .. autosummary::
39 | :toctree: generated/
40 |
41 | PhonetisaurusTrainerMixin
42 |
--------------------------------------------------------------------------------
/docs/source/reference/helper/abc.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: montreal_forced_aligner.abc
2 |
3 | .. autosummary::
4 | :toctree: generated/
5 |
6 | KaldiFunction
7 | MfaModel -- Base model type for MFA
8 | MfaWorker -- Base worker class for MFA
9 | TopLevelMfaWorker -- MFA workers that have acoustic models
10 | TrainerMixin -- Trainer type interface
11 | TemporaryDirectoryMixin -- Mixin for temporary directory functionality
12 | DatabaseMixin -- Mixin for database functionality
13 | AdapterMixin -- Adaptation type interface
14 | ExporterMixin -- Abstract exporter type interface
15 | FileExporterMixin -- File exporter type interface
16 | ModelExporterMixin -- Model exporter type interface
17 |
18 | .. automodule:: montreal_forced_aligner.models
19 |
20 | .. autosummary::
21 | :toctree: generated/
22 |
23 | Archive
24 |
--------------------------------------------------------------------------------
/docs/source/reference/helper/config.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: montreal_forced_aligner.config
2 |
3 | .. autosummary::
4 | :toctree: generated/
5 |
6 | MfaConfiguration
7 | MfaProfile
8 | get_temporary_directory
9 | generate_config_path
10 | generate_command_history_path
11 | load_command_history
12 | update_command_history
13 |
--------------------------------------------------------------------------------
/docs/source/reference/helper/data.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: montreal_forced_aligner.data
2 |
3 | .. autosummary::
4 | :toctree: generated/
5 |
6 | MfaArguments
7 | TextFileType
8 | SoundFileType
9 | SoundFileInformation
10 | PhoneSetType
11 | WordData
12 | WordType
13 | PhoneType
14 | WorkflowType
15 | DatabaseImportData
16 | PronunciationProbabilityCounter
17 | CtmInterval
18 |
--------------------------------------------------------------------------------
/docs/source/reference/helper/exceptions.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: montreal_forced_aligner.exceptions
2 |
3 | .. autosummary::
4 | :toctree: generated/
5 |
6 | MFAError
7 | SoxError
8 | G2PError
9 | ConfigError
10 | LMError
11 | LanguageModelNotFoundError
12 | ModelExtensionError
13 | ThirdpartyError
14 | TrainerError
15 | ModelError
16 | CorpusError
17 | ModelLoadError
18 | CorpusReadError
19 | ArgumentError
20 | AlignmentExportError
21 | NoSuccessfulAlignments
22 | KaldiProcessingError
23 | TextParseError
24 | TextGridParseError
25 | DictionaryError
26 | NoDefaultSpeakerDictionaryError
27 | DictionaryPathError
28 | DictionaryFileError
29 | FileArgumentNotFoundError
30 | PretrainedModelNotFoundError
31 | MultipleModelTypesFoundError
32 | ModelTypeNotSupportedError
33 | PronunciationAcousticMismatchError
34 | RootDirectoryError
35 |
--------------------------------------------------------------------------------
/docs/source/reference/helper/helper.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: montreal_forced_aligner.helper
2 |
3 | .. autosummary::
4 | :toctree: generated/
5 |
6 | comma_join
7 | make_safe
8 | make_scp_safe
9 | load_scp
10 | load_scp_safe
11 | score_wer
12 | edit_distance
13 | output_mapping
14 | compare_labels
15 | overlap_scoring
16 | align_phones
17 |
--------------------------------------------------------------------------------
/docs/source/reference/helper/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _helper_api:
3 |
4 | Helper
5 | ======
6 |
7 | .. toctree::
8 |
9 | abc
10 | config
11 | data
12 | exceptions
13 | helper
14 | textgrid
15 | utils
16 |
--------------------------------------------------------------------------------
/docs/source/reference/helper/textgrid.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: montreal_forced_aligner.textgrid
2 |
3 | .. autosummary::
4 | :toctree: generated/
5 |
6 | process_ctm_line
7 | export_textgrid
8 | construct_output_tiers
9 | construct_output_path
10 | output_textgrid_writing_errors
11 |
--------------------------------------------------------------------------------
/docs/source/reference/helper/utils.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: montreal_forced_aligner.utils
2 |
3 | .. autosummary::
4 | :toctree: generated/
5 |
6 | Counter
7 | run_kaldi_function
8 | thirdparty_binary
9 | log_kaldi_errors
10 | parse_logs
11 |
--------------------------------------------------------------------------------
/docs/source/reference/ivector/helper.rst:
--------------------------------------------------------------------------------
1 | Training functionality
2 | ======================
3 |
4 | Mixins
5 | ------
6 |
7 | .. currentmodule:: montreal_forced_aligner.ivector.trainer
8 |
9 | .. autosummary::
10 | :toctree: generated/
11 |
12 | IvectorModelTrainingMixin
13 |
14 | Helper
15 | ------
16 |
17 | .. currentmodule:: montreal_forced_aligner.ivector.trainer
18 |
19 | .. autosummary::
20 | :toctree: generated/
21 |
22 | GmmGselectFunction
23 | GmmGselectArguments
24 | GaussToPostFunction
25 | GaussToPostArguments
26 | AccGlobalStatsFunction
27 | AccGlobalStatsArguments
28 | AccIvectorStatsFunction
29 | AccIvectorStatsArguments
30 |
--------------------------------------------------------------------------------
/docs/source/reference/ivector/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _ivector_api:
3 |
4 | Ivector extraction
5 | ==================
6 |
7 | .. currentmodule:: montreal_forced_aligner.models
8 |
9 | .. autosummary::
10 | :toctree: generated/
11 |
12 | IvectorExtractorModel
13 |
14 | .. toctree::
15 |
16 | training
17 | helper
18 |
--------------------------------------------------------------------------------
/docs/source/reference/ivector/training.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _training_ivector_api:
3 |
4 | Training ivector extractors
5 | ===========================
6 |
7 | .. currentmodule:: montreal_forced_aligner.ivector.trainer
8 |
9 | .. autosummary::
10 | :toctree: generated/
11 |
12 | IvectorTrainer -- Training ivector extractor models
13 | DubmTrainer -- Training block for DUBM
14 | TrainableIvectorExtractor -- Top level worker for running Ivector training pipelines
15 |
--------------------------------------------------------------------------------
/docs/source/reference/language_modeling/helper.rst:
--------------------------------------------------------------------------------
1 | Helper functionality
2 | ====================
3 |
4 | Mixins
5 | ------
6 |
7 | .. currentmodule:: montreal_forced_aligner.language_modeling.trainer
8 |
9 | .. autosummary::
10 | :toctree: generated/
11 |
12 | LmTrainerMixin -- Mixin for language model training
13 | LmCorpusTrainerMixin -- Mixin for language model training on a corpus
14 | LmDictionaryCorpusTrainerMixin -- Mixin for language model training on a corpus with a pronunciation dictionary
15 |
16 |
17 | Helper
18 | ------
19 |
20 | .. currentmodule:: montreal_forced_aligner.language_modeling.multiprocessing
21 |
22 | .. autosummary::
23 | :toctree: generated/
24 |
25 | TrainSpeakerLmFunction
26 | TrainSpeakerLmArguments
27 |
--------------------------------------------------------------------------------
/docs/source/reference/language_modeling/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _language_modeling_api:
3 |
4 | Language models
5 | ===============
6 |
7 | Language models allow for transcription via Speech-to-Text when used alongside acoustic models and pronunciation dictionaries.
8 |
9 | .. currentmodule:: montreal_forced_aligner.models
10 |
11 | .. autosummary::
12 | :toctree: generated/
13 |
14 | LanguageModel
15 |
16 | .. toctree::
17 |
18 | training
19 | helper
20 |
--------------------------------------------------------------------------------
/docs/source/reference/language_modeling/training.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _language_model_training_api:
3 |
4 | Training language models
5 | ========================
6 |
7 | .. currentmodule:: montreal_forced_aligner.language_modeling.trainer
8 |
9 | .. autosummary::
10 | :toctree: generated/
11 |
12 | MfaLmCorpusTrainer -- Trainer for language model on text corpora
13 | MfaLmDictionaryCorpusTrainer -- Trainer for language model on text corpora
14 | MfaLmArpaTrainer -- Trainer for MFA language model on arpa format language model
15 |
--------------------------------------------------------------------------------
/docs/source/reference/segmentation/helper.rst:
--------------------------------------------------------------------------------
1 |
2 | Helper functions
3 | ================
4 |
5 | .. currentmodule:: montreal_forced_aligner.vad.multiprocessing
6 |
7 | .. autosummary::
8 | :toctree: generated/
9 |
10 | SegmentVadFunction
11 | SegmentVadArguments
12 | SegmentVadFunction
13 | SegmentVadArguments
14 | get_initial_segmentation
15 | merge_segments
16 | segment_utterance_transcript
17 | segment_utterance_vad
18 | segment_utterance_vad_speech_brain
19 |
--------------------------------------------------------------------------------
/docs/source/reference/segmentation/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _segmentation_api:
3 |
4 | Segmentation
5 | ============
6 |
7 | Segmentation aims to break long audio files into chunks of speech.
8 |
9 | .. note::
10 |
11 | The current implementation of segmentation uses only Voice Activity Detection (VAD) features. There's been some work towards getting a full speaker diarization set up going with :ref:`training_ivector_api` but that's largely planned for 2.1.
12 |
13 | .. toctree::
14 |
15 | main
16 | helper
17 |
--------------------------------------------------------------------------------
/docs/source/reference/segmentation/main.rst:
--------------------------------------------------------------------------------
1 |
2 | Segmenter
3 | =========
4 |
5 | .. currentmodule:: montreal_forced_aligner.vad.segmenter
6 |
7 | .. autosummary::
8 | :toctree: generated/
9 |
10 | VadSegmenter
11 | TranscriptionSegmenter
12 |
--------------------------------------------------------------------------------
/docs/source/reference/server/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _server_api:
3 |
4 | Managing MFA servers
5 | ====================
6 |
7 | Functions
8 | ---------
9 |
10 | .. currentmodule:: montreal_forced_aligner.command_line.utils
11 |
12 | .. autosummary::
13 | :toctree: generated/
14 |
15 | configure_pg
16 | initialize_server
17 | check_databases
18 | start_server
19 | stop_server
20 | delete_server
21 |
--------------------------------------------------------------------------------
/docs/source/reference/tokenization/helper.rst:
--------------------------------------------------------------------------------
1 | Helper functionality
2 | ====================
3 |
4 | Helper
5 | ------
6 |
7 | .. currentmodule:: montreal_forced_aligner.tokenization.tokenizer
8 |
9 | .. autosummary::
10 | :toctree: generated/
11 |
12 | TokenizerRewriter
13 | TokenizerArguments
14 | TokenizerFunction
15 |
16 |
17 | Helper
18 | ------
19 |
20 | .. currentmodule:: montreal_forced_aligner.tokenization.simple
21 |
22 | .. autosummary::
23 | :toctree: generated/
24 |
25 | SanitizeFunction
26 | SplitWordsFunction
27 |
--------------------------------------------------------------------------------
/docs/source/reference/tokenization/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _tokenization_api:
3 |
4 | Tokenizers
5 | ==========
6 |
7 | Tokenizers allow for adding spaces as word boundaries for orthographic systems that don't normally use them (i.e., Japanese, Chinese, Thai).
8 |
9 | .. currentmodule:: montreal_forced_aligner.models
10 |
11 | .. autosummary::
12 | :toctree: generated/
13 |
14 | TokenizerModel
15 |
16 | .. toctree::
17 |
18 | training
19 | tokenizer
20 | helper
21 |
--------------------------------------------------------------------------------
/docs/source/reference/tokenization/tokenizer.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _tokenizer_api:
3 |
4 | Corpus tokenizer
5 | =================
6 |
7 | .. currentmodule:: montreal_forced_aligner.tokenization.tokenizer
8 |
9 | .. autosummary::
10 | :toctree: generated/
11 |
12 | CorpusTokenizer
13 | TokenizerValidator
14 |
15 | Simple tokenizer
16 | ================
17 |
18 | .. currentmodule:: montreal_forced_aligner.tokenization.simple
19 |
20 | .. autosummary::
21 | :toctree: generated/
22 |
23 | SimpleTokenizer
24 |
--------------------------------------------------------------------------------
/docs/source/reference/tokenization/training.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _tokenizer_model_training_api:
3 |
4 | Training tokenizer models
5 | =========================
6 |
7 | .. currentmodule:: montreal_forced_aligner.tokenization.trainer
8 |
9 | .. autosummary::
10 | :toctree: generated/
11 |
12 | TokenizerTrainer -- Trainer for language model on text corpora
13 |
--------------------------------------------------------------------------------
/docs/source/reference/top_level_index.rst:
--------------------------------------------------------------------------------
1 | Workflows
2 | =========
3 |
4 | .. toctree::
5 |
6 | alignment/index
7 | validation/index
8 | g2p/index
9 | transcription/index
10 | segmentation/index
11 | diarization/index
12 | tokenization/index
13 |
--------------------------------------------------------------------------------
/docs/source/reference/transcription/helper.rst:
--------------------------------------------------------------------------------
1 | Helper functions
2 | ================
3 |
4 | Mixins
5 | ------
6 |
7 | .. currentmodule:: montreal_forced_aligner.transcription.transcriber
8 |
9 | .. autosummary::
10 | :toctree: generated/
11 |
12 | TranscriberMixin
13 |
14 | Decoding graph
15 | --------------
16 |
17 | .. currentmodule:: montreal_forced_aligner.transcription.multiprocessing
18 |
19 | .. autosummary::
20 | :toctree: generated/
21 |
22 | CreateHclgFunction
23 | CreateHclgArguments
24 |
25 |
26 | Speaker-independent transcription
27 | ---------------------------------
28 |
29 | .. currentmodule:: montreal_forced_aligner.transcription.multiprocessing
30 |
31 | .. autosummary::
32 | :toctree: generated/
33 |
34 | DecodeFunction
35 | DecodeArguments
36 | LmRescoreFunction
37 | LmRescoreArguments
38 | CarpaLmRescoreFunction
39 | CarpaLmRescoreArguments
40 |
41 | Speaker-adapted transcription
42 | -----------------------------
43 |
44 | .. currentmodule:: montreal_forced_aligner.transcription.multiprocessing
45 |
46 | .. autosummary::
47 | :toctree: generated/
48 |
49 | InitialFmllrFunction
50 | InitialFmllrArguments
51 | FmllrRescoreFunction
52 | FmllrRescoreArguments
53 | FinalFmllrFunction
54 | FinalFmllrArguments
55 |
--------------------------------------------------------------------------------
/docs/source/reference/transcription/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _transcription_api:
3 |
4 | Transcription
5 | =============
6 |
7 | MFA can use trained acoustic models (see :ref:`acoustic_model_training_api`), trained language models (see :ref:`language_model_training_api`), and pronunciation dictionaries (see :ref:`generating_dictionaries_api`) in order to generate transcripts for audio files.
8 |
9 | .. toctree::
10 |
11 | main
12 | helper
13 |
--------------------------------------------------------------------------------
/docs/source/reference/transcription/main.rst:
--------------------------------------------------------------------------------
1 | Transcriber
2 | ===========
3 |
4 | .. currentmodule:: montreal_forced_aligner.transcription
5 |
6 | .. autosummary::
7 | :toctree: generated/
8 |
9 | Transcriber
10 |
--------------------------------------------------------------------------------
/docs/source/reference/validation/helper.rst:
--------------------------------------------------------------------------------
1 | Helper functions
2 | ================
3 |
4 | Mixins
5 | ------
6 |
7 | .. currentmodule:: montreal_forced_aligner.validation
8 |
9 | .. autosummary::
10 | :toctree: generated/
11 |
12 | ValidationMixin
13 |
--------------------------------------------------------------------------------
/docs/source/reference/validation/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _validation_api:
3 |
4 | Validation
5 | ==========
6 |
7 | The validation utilities are used to evaluate a dataset for either training an acoustic model, or performing alignment. They will detect issues with sound files, transcription files, unalignable utterances, and can perform some simplistic evaluation of transcripts.
8 |
9 | .. toctree::
10 |
11 | main
12 | helper
13 |
--------------------------------------------------------------------------------
/docs/source/reference/validation/main.rst:
--------------------------------------------------------------------------------
1 | Validators
2 | ==========
3 |
4 | .. currentmodule:: montreal_forced_aligner.validation
5 |
6 | .. autosummary::
7 | :toctree: generated/
8 |
9 | TrainingValidator
10 | PretrainedValidator
11 | DictionaryValidator
12 |
--------------------------------------------------------------------------------
/docs/source/user_guide/concepts/features.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | (acoustic_features)=
5 | # Acoustic features
6 | =================
7 |
8 | ```{warning}
9 |
10 | Still under construction, I hope to fill these sections out as I have time.
11 | ```
12 |
13 |
14 | (features_mfcc)=
15 | ## Mel-Frequency Cepstrum Coefficients (MFCCs)
16 |
17 | ```{seealso}
18 | * https://kaldi-asr.org/doc/feat.html#feat_mfcc
19 | ```
20 |
21 |
22 | (features_pitch)=
23 | ## Pitch
24 |
--------------------------------------------------------------------------------
/docs/source/user_guide/concepts/fst.md:
--------------------------------------------------------------------------------
1 |
2 | (fst)=
3 | # Finite State Transducers
4 |
5 | ```{warning}
6 |
7 | Still under construction, I hope to fill these sections out as I have time.
8 | ```
9 |
10 | ```{seealso}
11 |
12 | * [OpenFst Quick Tour](https://www.openfst.org/twiki/bin/view/FST/FstQuickTour)
13 | ```
14 |
15 | (acceptor)=
16 | ## Acceptors
17 |
18 | (wfst)=
19 |
20 | ## Weighted Finite State Transducers
21 |
22 |
23 | (lexicon_fst)=
24 | # Lexicon FSTs
25 |
26 | MFA compiles input pronunciation dictionaries to a Weighted Finite State Transducer ({term}`WFST`), with phones as input symbols and words as output symbols. During alignment, the {term}`lexicon FST` is composed with a linear acceptor created from the
27 |
28 |
29 | (grammar_fst)=
30 |
31 | # Grammar FSTs
32 |
33 |
34 | (g2p_fst)=
35 | # G2P FSTs
36 |
37 | ```{seealso}
38 |
39 | * [Pynini documentation](https://www.openfst.org/twiki/bin/view/GRM/Pynini)
40 | * [Phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus)
41 | ```
42 |
--------------------------------------------------------------------------------
/docs/source/user_guide/concepts/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _concepts:
3 |
4 | ***************
5 | Concepts in MFA
6 | ***************
7 |
8 | This section will attempt to provide a blend of technical and non-technical overviews of various components and concepts used in MFA. There are much more in-depth resources for learning about various components that will be linked if you are interested in learning more about them.
9 |
10 | .. warning::
11 |
12 | Still under construction, I hope to fill these sections out as I have time.
13 |
14 | .. toctree::
15 | :hidden:
16 |
17 | features
18 | speaker_adaptation
19 | fst
20 | hmm
21 |
--------------------------------------------------------------------------------
/docs/source/user_guide/configuration/acoustic_model_adapt.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _configuration_adapting:
3 |
4 | Acoustic model adaptation options
5 | =================================
6 |
7 | For the Kaldi recipe that monophone training is based on, see :kaldi_steps:`train_map`.
8 |
9 |
10 | .. csv-table::
11 | :widths: 20, 20, 60
12 | :header: "Parameter", "Default value", "Notes"
13 |
14 | "mapping_tau", 20, "Smoothing constant used in MAP estimation, corresponds to the number of 'fake counts' that we add for the old model. Larger tau corresponds to less aggressive re-estimation, and more smoothing. You might also want to try 10 or 15."
15 |
--------------------------------------------------------------------------------
/docs/source/user_guide/configuration/diarization.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _configuration_diarization:
3 |
4 | Diarization options
5 | ===================
6 |
7 | .. csv-table::
8 | :widths: 20, 20, 60
9 | :header: "Parameter", "Default value", "Notes"
10 | :stub-columns: 1
11 |
12 | "cluster_type", ``optics``, "Clustering algorithm in :xref:`scikit-learn` to use, one of ``optics``, ``dbscan``, ``affinity``, ``agglomerative``, ``spectral, ``kmeans``"
13 | "expected_num_speakers", 0, "Number of speaker clusters to find, must be > 1 for ``agglomerative``, ``spectral``, and ``kmeans``"
14 | "sparse_threshold", 0.5, "Threshold on distance to limit precomputed sparse matrix"
15 |
16 | .. _default_diarization_config:
17 |
18 | Default diarization config file
19 | -------------------------------
20 |
21 | .. code-block:: yaml
22 |
23 | cluster_type: optics
24 | energy_mean_scale: 0.5
25 | max_segment_length: 30
26 | min_pause_duration: 0.05
27 |
--------------------------------------------------------------------------------
/docs/source/user_guide/configuration/lm.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _configuration_language_modeling:
3 |
4 | *******************************
5 | Language model training options
6 | *******************************
7 |
8 | See also the :ref:`configuration_dictionary` for the options that control how text is normalized and parsed.
9 |
10 |
11 | .. csv-table::
12 | :widths: 20, 20, 60
13 | :header: "Parameter", "Default value", "Notes"
14 |
15 | "order", 3, "Order of language model"
16 | "method", kneser_ney, "Method for smoothing"
17 | "prune_thresh_small", 0.0000003, "Threshold for pruning a small model, only used if ``prune`` is true"
18 | "prune_thresh_medium", 0.0000001, "Threshold for pruning a medium model, only used if ``prune`` is true"
19 |
20 | Default language model config
21 | -----------------------------
22 |
23 | .. code-block:: yaml
24 |
25 | order: 3
26 | method: kneser_ney
27 | prune_thresh_small: 0.0000003
28 | prune_thresh_medium: 0.0000001
29 |
--------------------------------------------------------------------------------
/docs/source/user_guide/configuration/segment.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _configuration_segmentation:
3 |
4 | ********************
5 | Segmentation options
6 | ********************
7 |
8 |
9 | .. csv-table::
10 | :widths: 20, 20, 60
11 | :header: "Parameter", "Default value", "Notes"
12 |
13 | "energy_threshold", 5.5, "Energy threshold above which a frame will be counted as voiced"
14 | "energy_mean_scale", 0.5, "Proportion of the mean energy of the file that should be added to the energy_threshold"
15 | "max_segment_length", 30, "Maximum length of segments before they do not get merged"
16 | "min_pause_duration", 0.05, "Minimum unvoiced duration to split speech segments"
17 |
18 | .. _default_segment_config:
19 |
20 | Default segmentation config file
21 | --------------------------------
22 |
23 | .. code-block:: yaml
24 |
25 | energy_threshold: 5.5
26 | energy_mean_scale: 0.5
27 | max_segment_length: 30
28 | min_pause_duration: 0.05
29 |
--------------------------------------------------------------------------------
/docs/source/user_guide/configuration/transcription.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _transcribe_config:
3 |
4 | *********************
5 | Transcription options
6 | *********************
7 |
8 | .. csv-table::
9 | :widths: 20, 20, 60
10 | :header: "Parameter", "Default value", "Notes"
11 |
12 | "beam", 13, "Beam for decoding"
13 | "max_active", 7000, "Max active for decoding"
14 | "lattice_beam", 6, "Beam width for decoding lattices"
15 | "acoustic_scale", 0.083333, "Multiplier to scale acoustic costs"
16 | "silence_weight", 0.01, "Weight on silence in fMLLR estimation"
17 | "uses_speaker_adaptation", true, "Flag for whether to perform speaker adaptation"
18 | "first_beam", 10.0, "Beam for decoding in initial speaker-independent pass, only used if ``uses_speaker_adaptation`` is true"
19 | "first_max_active", 2000, "Max active for decoding in initial speaker-independent pass, only used if ``uses_speaker_adaptation`` is true"
20 | "fmllr_update_type", "full", "Type of fMLLR estimation"
21 |
22 | Default transcriber config
23 | --------------------------
24 |
25 | .. code-block:: yaml
26 |
27 | beam: 13
28 | max_active: 7000
29 | lattice_beam: 6
30 | acoustic_scale: 0.083333
31 | silence_weight: 0.01
32 | fmllr: true
33 | first_beam: 10.0 # Beam used in initial, speaker-indep. pass
34 | first_max_active: 2000 # max-active used in initial pass.
35 | fmllr_update_type: full
36 |
--------------------------------------------------------------------------------
/docs/source/user_guide/corpus_creation/anchor.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _`Anchor Annotator documentation`: https://anchor-annotator.readthedocs.io/en/latest/
3 |
4 | .. _anchor:
5 |
6 | Anchor annotator ``(mfa anchor)``
7 | =================================
8 |
9 | The Anchor Annotator is a GUI utility for MFA that allows for users to modify transcripts and add/change entries in the pronunciation dictionary to interactively fix out of vocabulary issues.
10 |
11 | .. attention::
12 |
13 | Anchor is under development and is currently pre-alpha. Use at your own risk and please use version control or back up any critical data.
14 |
15 |
16 | To use the annotator, first install the anchor subpackage:
17 |
18 | .. code-block::
19 |
20 | conda install montreal-forced-aligner[anchor]
21 |
22 | This will install MFA if hasn't been along with all the packages that Anchor requires. Once installed, Anchor can be started with the following MFA subcommand `mfa anchor`.
23 |
24 | See the `Anchor Annotator documentation`_ for more information.
25 |
26 | Command reference
27 | =================
28 |
29 |
30 | .. click:: montreal_forced_aligner.command_line.anchor:anchor_cli
31 | :prog: mfa anchor
32 | :nested: full
33 |
--------------------------------------------------------------------------------
/docs/source/user_guide/corpus_creation/create_segments.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _create_segments:
3 |
4 | Segment transcribed files ``(mfa segment)``
5 | ===========================================
6 |
7 | The Montreal Forced Aligner can use Voice Activity Detection (VAD) capabilities from :xref:`speechbrain` to generate segments from
8 | a longer sound file, while attempting to segment transcripts as well. If you do not have transcripts, see :ref:`create_segments_vad`.
9 |
10 | .. note::
11 |
12 | On Windows, if you get an ``OSError/WinError 1314`` during the run, follow `these instructions `_ to enable symbolic link creation permissions.
13 |
14 | Command reference
15 | -----------------
16 |
17 | .. click:: montreal_forced_aligner.command_line.create_segments:create_segments_cli
18 | :prog: mfa segment
19 | :nested: full
20 |
21 |
22 | Configuration reference
23 | -----------------------
24 |
25 | - :ref:`configuration_segmentation`
26 |
27 | API reference
28 | -------------
29 |
30 | - :ref:`segmentation_api`
31 |
32 | .. _create_segments_vad:
33 |
34 | Segment untranscribed files ``(mfa segment_vad)``
35 | =================================================
36 |
37 | The Montreal Forced Aligner can use Voice Activity Detection (VAD) capabilities from :xref:`speechbrain` or energy based VAD to generate segments from
38 | a longer sound file. This command does not split transcripts, instead assigning a default label of "speech" to all identified speech segments. If you would like to preserve transcripts for each segment, see :ref:`create_segments`.
39 |
40 | .. note::
41 |
42 | On Windows, if you get an ``OSError/WinError 1314`` during the run, follow `these instructions `_ to enable symbolic link creation permissions.
43 |
44 | Command reference
45 | -----------------
46 |
47 | .. click:: montreal_forced_aligner.command_line.create_segments:create_segments_vad_cli
48 | :prog: mfa segment_vad
49 | :nested: full
50 |
51 |
52 | Configuration reference
53 | -----------------------
54 |
55 | - :ref:`configuration_segmentation`
56 |
57 | API reference
58 | -------------
59 |
60 | - :ref:`segmentation_api`
61 |
--------------------------------------------------------------------------------
/docs/source/user_guide/corpus_creation/index.rst:
--------------------------------------------------------------------------------
1 | .. _corpus_creation:
2 |
3 | *************************
4 | Corpus creation utilities
5 | *************************
6 |
7 | MFA now contains several command line utilities for helping to create corpora from scratch. The main workflow is as follows:
8 |
9 | 1. If the corpus made up of long sound file that need segmenting, :ref:`segment the audio files using VAD `
10 | 2. If the corpus does not contain transcriptions, :ref:`transcribe utterances using existing acoustic models,
11 | language models, and dictionaries `
12 | 3. Use the :ref:`Anchor annotator tool ` to manually correct error in transcription
13 | 4. As necessary, bootstrap better transcriptions:
14 |
15 | 1. :ref:`Train language model ` with updated transcriptions
16 | 2. :ref:`Add pronunciation and silence probabilities to the dictionary `
17 |
18 | .. toctree::
19 | :hidden:
20 |
21 | create_segments
22 | train_ivector
23 | diarize_speakers
24 | transcribing
25 | training_lm
26 | training_dictionary
27 | tokenize
28 | train_tokenizer
29 | anchor
30 |
--------------------------------------------------------------------------------
/docs/source/user_guide/corpus_creation/tokenize.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _tokenize_cli:
3 |
4 | Tokenize utterances ``(mfa tokenize)``
5 | =========================================
6 |
7 | Use a model trained from :ref:`train_tokenizer_cli` to tokenize a corpus (i.e. insert spaces as word boundaries for orthographic systems that do not require them).
8 |
9 | Command reference
10 | -----------------
11 |
12 | .. click:: montreal_forced_aligner.command_line.tokenize:tokenize_cli
13 | :prog: mfa tokenize
14 | :nested: full
15 |
16 |
17 | API reference
18 | -------------
19 |
20 | - :ref:`tokenization_api`
21 |
--------------------------------------------------------------------------------
/docs/source/user_guide/corpus_creation/train_ivector.rst:
--------------------------------------------------------------------------------
1 | .. _train_ivector:
2 |
3 | Train an ivector extractor ``(mfa train_ivector)``
4 | ==================================================
5 |
6 | The Montreal Forced Aligner can train :term:`ivector extractors` using an acoustic model for generating alignments. As part of this training process, a classifier is built in that can be used as part of :ref:`diarize_speakers`.
7 |
8 |
9 | Command reference
10 | -----------------
11 |
12 | .. click:: montreal_forced_aligner.command_line.train_ivector_extractor:train_ivector_cli
13 | :prog: mfa train_ivector
14 | :nested: full
15 |
16 | Configuration reference
17 | -----------------------
18 |
19 | - :ref:`configuration_ivector`
20 |
21 | API reference
22 | -------------
23 |
24 | - :ref:`ivector_api`
25 |
--------------------------------------------------------------------------------
/docs/source/user_guide/corpus_creation/train_tokenizer.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _train_tokenizer_cli:
3 |
4 | Train a word tokenizer ``(mfa train_tokenizer)``
5 | ================================================
6 |
7 | Training a tokenizer uses a simplified sequence-to-sequence model like G2P, but with the following differences:
8 |
9 | * Both the input and output symbols are graphemes
10 | * Symbols can only output themselves
11 | * Only allow for inserting space characters
12 |
13 | Command reference
14 | -----------------
15 |
16 | .. click:: montreal_forced_aligner.command_line.train_tokenizer:train_tokenizer_cli
17 | :prog: mfa train_tokenizer
18 | :nested: full
19 |
20 |
21 | API reference
22 | -------------
23 |
24 | - :ref:`tokenization_api`
25 |
--------------------------------------------------------------------------------
/docs/source/user_guide/corpus_creation/training_lm.rst:
--------------------------------------------------------------------------------
1 | .. _training_lm:
2 |
3 | Train a new language model ``(mfa train_lm)``
4 | ==============================================
5 |
6 | MFA has a utility function for training ARPA-format ngram :term:`language models`, as well as merging with a pre-existing model.
7 |
8 |
9 | .. note::
10 |
11 | As of version 2.0.6, users on Windows can run this command natively without requiring :xref:`wsl`, see :ref:`installation` for more details.
12 |
13 | Command reference
14 | -----------------
15 |
16 | .. click:: montreal_forced_aligner.command_line.train_lm:train_lm_cli
17 | :prog: mfa train_lm
18 | :nested: full
19 |
20 | Configuration reference
21 | -----------------------
22 |
23 | - :ref:`configuration_language_modeling`
24 |
25 | API reference
26 | -------------
27 |
28 | - :ref:`language_modeling_api`
29 |
--------------------------------------------------------------------------------
/docs/source/user_guide/corpus_creation/transcribing.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _transcribing:
3 |
4 | Transcribe audio files ``(mfa transcribe)``
5 | ===========================================
6 |
7 | MFA has some limited ability to use its acoustic and language models for performing transcription. The intent of this functionality is largely to aid in offline corpus construction, and not as an online capability like most ASR systems.
8 |
9 | .. seealso::
10 |
11 | See :ref:`train_acoustic_model` and :ref:`training_lm` details on training MFA models to use in transcription.
12 |
13 | Unlike alignment, transcription does not require transcribed audio files (except when running in :ref:`transcription_evaluation`, but instead will use the combination of acoustic model, language model, and pronunciation dictionary to create a decoding lattice and find the best path through it. When training a language model for transcription, it is recommended to train one on text/speech transcripts that are in the same domain to minimize errors.
14 |
15 | .. warning::
16 |
17 | The technology that MFA uses is several years out of date, and as such if you have other options available such as :xref:`coqui` or other production systems for :abbr:`STT (Speech to Text)`, we recommend using those. The transcription capabilities are more here for completeness.
18 |
19 | .. _transcription_evaluation:
20 |
21 | Evaluation mode
22 | ---------------
23 |
24 | Transcriptions can be compared to a gold-standard references by transcribing a corpus in the same format as for alignment (i.e., each sound file has a corresponding TextGrid or lab file). Transcript will proceed as above, and then the resulting transcripts will be aligned with the gold transcriptions using the :mod:`Bio.pairwise2` alignment algorithm. From the aligned transcripts, Word Error Rate and Character Error Rate will be calculated for each utterance as follows:
25 |
26 | .. math::
27 |
28 | Error \: rate = \frac{insertions + deletions + (2 * substitutions)} {length_{ref}}
29 |
30 |
31 | Command reference
32 | -----------------
33 |
34 | .. click:: montreal_forced_aligner.command_line.transcribe:transcribe_corpus_cli
35 | :prog: mfa transcribe
36 | :nested: full
37 |
38 | Configuration reference
39 | -----------------------
40 |
41 | - :ref:`transcribe_config`
42 |
43 | API reference
44 | -------------
45 |
46 | - :ref:`transcription_api`
47 |
--------------------------------------------------------------------------------
/docs/source/user_guide/data_validation.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _validating_data:
3 |
4 | ***************
5 | Validating data
6 | ***************
7 |
8 | The validation utility will perform the basic set up that alignment would perform, but analyzes and reports any issues
9 | that the user may want to fix.
10 |
11 | First, the utility parses the corpus and dictionary, prints out summary information about the corpus,
12 | and logs any of the following issues:
13 |
14 | - If there are any words in transcriptions that are not in the dictionary, these are logged as out-of-vocabulary items (OOVs).
15 | A list of these OOVs and which utterances they appear in are saved to text files.
16 | - Any issues reading sound files
17 | - Any issues generating features, skipped if ``--ignore_acoustics`` is flagged
18 | - Mismatches in sound files and transcriptions
19 | - Any issues reading transcription files
20 | - Any unaligned files from trial alignment run, skipped if ``--ignore_acoustics`` is flagged
21 | - If no acoustic model is specified, a monophone model is trained for testing alignment
22 |
23 | - Any files that have deviations from their original transcription to decoded transcriptions using a simple language model when ``--test_transcriptions`` is supplied
24 | - Ngram language models for each speaker are generated and merged with models for each utterance for use in decoding utterances, which may help you find transcription or data inconsistency issues in the corpus
25 |
26 | .. _phone_confidence:
27 |
28 | Phone confidence
29 | ================
30 |
31 | The phone confidence functionality of the validation utility is similar to :ref:`phone_models` in that both are trying to represent the "goodness" of the phone label for the given interval. Where phone models use the acoustic model in combination with a phone language model, phone confidence simply calculates the likelihoods of each phone for each frame
32 |
33 | .. _running_the_validator:
34 |
35 | Running the corpus validation utility
36 | =====================================
37 |
38 |
39 | Command reference
40 | -----------------
41 |
42 | .. click:: montreal_forced_aligner.command_line.validate:validate_corpus_cli
43 | :prog: mfa validate
44 | :nested: full
45 |
--------------------------------------------------------------------------------
/docs/source/user_guide/dictionary_validation.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _validating_dictionaries:
3 |
4 | *************************************
5 | Validating pronunciation dictionaries
6 | *************************************
7 |
8 |
9 | .. _running_the_dictionary_validator:
10 |
11 | Running the dictionary validation utility
12 | =========================================
13 |
14 |
15 | Command reference
16 | -----------------
17 |
18 | .. click:: montreal_forced_aligner.command_line.validate:validate_dictionary_cli
19 | :prog: mfa validate_dictionary
20 | :nested: full
21 |
--------------------------------------------------------------------------------
/docs/source/user_guide/implementations/alignment_evaluation.md:
--------------------------------------------------------------------------------
1 |
2 | (alignment_evaluation)=
3 | # Evaluating alignments
4 |
5 | Alignments can be compared to a gold-standard reference set by specifying the `--reference_directory` below. MFA will load all TextGrids and parse them as if they were exported by MFA (i.e., phone and speaker tiers per speaker). The phone intervals will be aligned using the {mod}`Bio.pairwise2` alignment algorithm. If the reference TextGrids use a different phone set, then a custom mapping yaml file can be specified via the `--custom_mapping_path`. As an example, the Buckeye reference alignments used in [Update on Montreal Forced Aligner performance](https://memcauliffe.com/update-on-montreal-forced-aligner-performance.html) use its own ARPA-based phone set that removes stress integers, is lower case, and has syllabic sonorants. To map alignments generated with the `english` model and dictionary that use standard ARPA, a yaml file like the following allows for a better alignment of reference phones to aligned phones.
6 |
7 | :::yaml
8 | N: [en, n]
9 | M: [em, m]
10 | L: [el, l]
11 | AA0: aa
12 | AE0: ae
13 | AH0: ah
14 | AO0: ao
15 | AW0: aw
16 | :::
17 |
18 | Using the above file, both {ipa_inline}`en` and {ipa_inline}`n` phones in the Buckeye corpus will not be penalized when matched with {ipa_inline}`N` phones output by MFA.
19 |
20 | In addition to any custom mapping, phone boundaries are used in the cost function for the {mod}`Bio.pairwise2` alignment algorithm as follows:
21 |
22 | :::{math}
23 | Overlap \: cost = -1 * \biggl(\lvert begin_{aligned} - begin_{ref} \rvert + \lvert end_{aligned} - end_{ref} \rvert + \begin{cases}
24 | 0, & label_{1} = label_{2} \\
25 | 2, & otherwise
26 | \end{cases}\biggr)
27 | :::
28 |
29 | The two metrics calculated for each utterance are overlap score and phone error rate. Overlap score is calculated similarly to the above cost function for each phone (excluding phones that are aligned to silence or were inserted/deleted) and averaged over the utterance:
30 |
31 | :::{math}
32 | Alignment \: score = \frac{Overlap \: cost}{2}
33 | :::
34 |
35 | Phone error rate is calculated as:
36 |
37 | :::{math}
38 | Phone \: error \: rate = \frac{insertions + deletions + (2 * substitutions)} {length_{ref}}
39 | :::
40 |
--------------------------------------------------------------------------------
/docs/source/user_guide/implementations/fine_tune.md:
--------------------------------------------------------------------------------
1 |
2 | (fine_tune_alignments)=
3 |
4 | # Fine-tuning alignments
5 |
6 | By default and standard in ASR, the frame step between feature frames is set to 10 ms, which limits the accuracy of MFA to a minimum of 0.01 seconds. When the `--fine_tune` flag is specified, the aligner does an extra fine-tuning step following alignment. The audio surrounding each interval's initial boundary is extracted with a frame step of 1 ms (0.001s) and is aligned using a simple phone dictionary combined with a transcript of the previous phone and the current phone. Extracting the phone alignment gives the possibility of higher degrees of accuracy (down to 1ms).
7 |
8 | :::{warning}
9 |
10 | The actual accuracy bound is not clear as each frame uses the surrounding 25ms to generate features, so each frame necessary incorporates time-smeared acoustic information.
11 | :::
12 |
--------------------------------------------------------------------------------
/docs/source/user_guide/implementations/index.md:
--------------------------------------------------------------------------------
1 |
2 | # In depth guides
3 |
4 | :::{warning}
5 | This section is under construction!
6 | :::
7 |
8 | ```{toctree}
9 | :hidden:
10 |
11 | phone_groups
12 | phonological_rules
13 | lexicon_probabilities
14 | alignment_analysis
15 | alignment_evaluation
16 | fine_tune
17 | phone_models
18 | ```
19 |
--------------------------------------------------------------------------------
/docs/source/user_guide/implementations/phone_models.md:
--------------------------------------------------------------------------------
1 |
2 | (phone_models)=
3 | # Phone model alignments
4 |
5 | With the `--use_phone_model` flag, an ngram language model for phones will be constructed and used to generate phone transcripts with alignments. The phone language model uses bigrams and higher orders (up to 4), with no unigrams included to speed up transcription (and because the phonotactics of languages highly constrain the possible sequences of phones). The phone language model is trained on phone transcriptions extracted from alignments and includes silence and OOV phones.
6 |
7 | The phone transcription additionally uses speaker-adaptation transforms from the regular alignment as well to speed up transcription. From the phone transcription lattices, we extract phone-level alignments along with confidence score using {kaldi_src}`lattice-to-ctm-conf`.
8 |
9 | The alignments extracted from phone transcriptions are compared to the baseline alignments using the procedure outlined in {ref}`alignment_evaluation` above.
10 |
--------------------------------------------------------------------------------
/docs/source/user_guide/models/index.rst:
--------------------------------------------------------------------------------
1 | .. _pretrained_models:
2 |
3 | *****************
4 | Pretrained models
5 | *****************
6 |
7 | The command for interacting with MFA models is :code:`mfa model`. The subcommands allow for inspecting currently saved pretrained models, downloading ones from MFA's model repo, and saving models you have trained to be used with a simple name rather than the full path each time.
8 |
9 | Following installation of MFA, :code:`mfa model list acoustic` will not list any models. If you want to download the default English model trained on LibriSpeech, you can run :code:`mfa model download acoustic english_us_arpa`. At which point, the previous ``list`` command will output "english_us_arpa" as an option. When referring to an acoustic model in another MFA command, rather than the full path to the acoustic model, you can now supply just ``english_us_arpa`` and MFA will resolve it to the saved path.
10 |
11 | Similarly, if you train a new model, you can run :code:`mfa model save acoustic /path/where/the/model/was/saved.zip`, then this model will be available via ``saved`` in the future. The name defaults to whatever the archive is called without the directory or extension. You can modify this name with the ``--name NEWNAME`` option
12 |
13 | There are a number of pretrained models for aligning and generating pronunciation dictionaries. The command
14 | for downloading these is :code:`mfa model download ` where ``model_type`` is one of ``acoustic``, ``g2p``, or
15 | ``dictionary``.
16 |
17 | .. note::
18 |
19 | Please see the :xref:`mfa_models` site for information and statistics about various models.
20 |
21 |
22 | Command reference
23 | -----------------
24 |
25 | .. click:: montreal_forced_aligner.command_line.model:model_cli
26 | :prog: mfa model
27 | :nested: full
28 |
--------------------------------------------------------------------------------
/docs/source/user_guide/workflows/adapt_acoustic_model.rst:
--------------------------------------------------------------------------------
1 | .. _adapt_acoustic_model:
2 |
3 | Adapt acoustic model to new data ``(mfa adapt)``
4 | ================================================
5 |
6 | A recent 2.0 functionality for MFA is to adapt pretrained :term:`acoustic models` to a new dataset. MFA will first align the dataset using the pretrained model, and then update the acoustic model's GMM means with those generated by the data. See :kaldi_steps:`train_map` for the Kaldi script this functionality corresponds to. As part of the adaptation process, MFA can generate final alignments and export these files if an output directory is specified in the command.
7 |
8 |
9 | Command reference
10 | -----------------
11 |
12 | .. click:: montreal_forced_aligner.command_line.adapt:adapt_model_cli
13 | :prog: mfa adapt
14 | :nested: full
15 |
16 | Configuration reference
17 | -----------------------
18 |
19 | - :ref:`configuration_global`
20 | - :ref:`configuration_adapting`
21 |
22 | API reference
23 | -------------
24 |
25 | - :class:`~montreal_forced_aligner.alignment.AdaptingAligner`
26 |
--------------------------------------------------------------------------------
/docs/source/user_guide/workflows/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _workflows_index:
3 |
4 | Workflows available
5 | ===================
6 |
7 | The primary workflow in MFA is forced alignment, where text is aligned to speech along with phones derived from a pronunciation dictionary and an acoustic model. There are, however, other workflows for transcribing speech using speech-to-text functionality in Kaldi, pronunciation dictionary creation using Pynini, and some basic corpus creation utilities like VAD-based segmentation. Additionally, acoustic models, G2P models, and language models can be trained from your own data (and then used in alignment and other workflows).
8 |
9 | .. warning::
10 |
11 | Speech-to-text functionality is pretty basic, and the model architecture used in MFA is older GMM-HMM and NGram models, so using something like :xref:`coqui` or Kaldi's ``nnet`` functionality will likely yield better quality transcriptions.
12 |
13 | .. hint::
14 |
15 | See :ref:`pretrained_models` for details about commands to inspect, download, and save various pretrained MFA models.
16 |
17 | .. toctree::
18 | :hidden:
19 |
20 | alignment
21 | adapt_acoustic_model
22 | train_acoustic_model
23 | dictionary_generating
24 | g2p_train
25 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | channels:
2 | - conda-forge
3 | - pytorch
4 | - nvidia
5 | - anaconda
6 | dependencies:
7 | - python>=3.8
8 | - numpy
9 | - librosa
10 | - pysoundfile
11 | - tqdm
12 | - requests
13 | - pyyaml
14 | - dataclassy
15 | - kaldi=*=*cpu*
16 | - scipy
17 | - pynini
18 | - openfst=1.8.3
19 | - scikit-learn
20 | - hdbscan
21 | - baumwelch
22 | - ngram
23 | - praatio=6.0.0
24 | - biopython
25 | - sqlalchemy>=2.0
26 | - pgvector
27 | - pgvector-python
28 | - sqlite
29 | - postgresql
30 | - psycopg2
31 | - click
32 | - setuptools_scm
33 | - pytest
34 | - pytest-mypy
35 | - pytest-cov
36 | - pytest-timeout
37 | - mock
38 | - coverage
39 | - coveralls
40 | - interrogate
41 | - kneed
42 | - matplotlib
43 | - seaborn
44 | - pip
45 | - rich
46 | - rich-click
47 | - kalpy
48 | # Tokenization dependencies
49 | - spacy
50 | - sudachipy
51 | - sudachidict-core
52 | - spacy-pkuseg
53 | - pip:
54 | - build
55 | - twine
56 | # Tokenization dependencies
57 | - python-mecab-ko
58 | - jamo
59 | - pythainlp
60 | - hanziconv
61 | - dragonmapper
62 |
--------------------------------------------------------------------------------
/github_environment.yml:
--------------------------------------------------------------------------------
1 | channels:
2 | - conda-forge
3 | dependencies:
4 | - python>=3.8
5 | - numpy
6 | - librosa
7 | - pysoundfile
8 | - tqdm
9 | - requests
10 | - pyyaml
11 | - dataclassy
12 | - kaldi=*=*cpu*
13 | - scipy
14 | - pynini
15 | - openfst=1.8.3
16 | - scikit-learn
17 | - hdbscan
18 | - baumwelch
19 | - ngram
20 | - praatio=6.0.0
21 | - biopython
22 | - sqlalchemy>=2.0
23 | - pgvector
24 | - pgvector-python
25 | - sqlite
26 | - postgresql
27 | - psycopg2
28 | - click
29 | - setuptools_scm
30 | - pytest
31 | - pytest-mypy
32 | - pytest-cov
33 | - pytest-timeout
34 | - mock
35 | - coverage
36 | - coveralls
37 | - interrogate
38 | - kneed
39 | - matplotlib
40 | - seaborn
41 | - rich
42 | - rich-click
43 | - kalpy
44 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/__init__.py:
--------------------------------------------------------------------------------
1 | """Montreal Forced Aligner is a package for aligning speech corpora through the use of acoustic models and
2 | dictionaries using Kaldi functionality."""
3 |
4 | import montreal_forced_aligner.acoustic_modeling as acoustic_modeling
5 | import montreal_forced_aligner.alignment as alignment
6 | import montreal_forced_aligner.command_line as command_line
7 | import montreal_forced_aligner.corpus as corpus
8 | import montreal_forced_aligner.dictionary as dictionary
9 | import montreal_forced_aligner.exceptions as exceptions
10 | import montreal_forced_aligner.g2p as g2p
11 | import montreal_forced_aligner.helper as helper
12 | import montreal_forced_aligner.ivector as ivector
13 | import montreal_forced_aligner.language_modeling as language_modeling
14 | import montreal_forced_aligner.models as models
15 | import montreal_forced_aligner.textgrid as textgrid
16 | import montreal_forced_aligner.transcription as transcription
17 | import montreal_forced_aligner.utils as utils
18 |
19 | __all__ = [
20 | "abc",
21 | "data",
22 | "acoustic_modeling",
23 | "alignment",
24 | "command_line",
25 | "config",
26 | "corpus",
27 | "dictionary",
28 | "exceptions",
29 | "g2p",
30 | "ivector",
31 | "language_modeling",
32 | "helper",
33 | "models",
34 | "transcription",
35 | "textgrid",
36 | "utils",
37 | ]
38 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/__main__.py:
--------------------------------------------------------------------------------
1 | from rich.traceback import install
2 |
3 | from montreal_forced_aligner.command_line.mfa import mfa_cli
4 |
5 | install(show_locals=True)
6 | mfa_cli()
7 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/acoustic_modeling/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Training acoustic models
3 | ========================
4 |
5 |
6 | """
7 | from montreal_forced_aligner.acoustic_modeling.base import AcousticModelTrainingMixin # noqa
8 | from montreal_forced_aligner.acoustic_modeling.lda import LdaTrainer # noqa
9 | from montreal_forced_aligner.acoustic_modeling.monophone import MonophoneTrainer # noqa
10 | from montreal_forced_aligner.acoustic_modeling.pronunciation_probabilities import ( # noqa
11 | PronunciationProbabilityTrainer,
12 | )
13 | from montreal_forced_aligner.acoustic_modeling.sat import SatTrainer # noqa
14 | from montreal_forced_aligner.acoustic_modeling.trainer import TrainableAligner # noqa
15 | from montreal_forced_aligner.acoustic_modeling.triphone import TriphoneTrainer # noqa
16 |
17 | __all__ = [
18 | "AcousticModelTrainingMixin",
19 | "LdaTrainer",
20 | "MonophoneTrainer",
21 | "SatTrainer",
22 | "TriphoneTrainer",
23 | "PronunciationProbabilityTrainer",
24 | "TrainableAligner",
25 | "base",
26 | "lda",
27 | "monophone",
28 | "sat",
29 | "triphone",
30 | "pronunciation_probabilities",
31 | "trainer",
32 | ]
33 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/alignment/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Aligners
3 | ========
4 |
5 | """
6 | from montreal_forced_aligner.alignment.adapting import AdaptingAligner
7 | from montreal_forced_aligner.alignment.base import CorpusAligner
8 | from montreal_forced_aligner.alignment.mixins import AlignMixin
9 | from montreal_forced_aligner.alignment.pretrained import DictionaryTrainer, PretrainedAligner
10 |
11 | __all__ = [
12 | "AdaptingAligner",
13 | "PretrainedAligner",
14 | "CorpusAligner",
15 | "DictionaryTrainer",
16 | "adapting",
17 | "base",
18 | "pretrained",
19 | "mixins",
20 | "AlignMixin",
21 | "multiprocessing",
22 | ]
23 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/command_line/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Command line functionality
3 | ==========================
4 |
5 | """
6 |
7 | from montreal_forced_aligner.command_line.adapt import adapt_model_cli
8 | from montreal_forced_aligner.command_line.align import align_corpus_cli
9 | from montreal_forced_aligner.command_line.anchor import anchor_cli
10 | from montreal_forced_aligner.command_line.configure import configure_cli
11 | from montreal_forced_aligner.command_line.create_segments import create_segments_cli
12 | from montreal_forced_aligner.command_line.diarize_speakers import diarize_speakers_cli
13 | from montreal_forced_aligner.command_line.g2p import g2p_cli
14 | from montreal_forced_aligner.command_line.history import history_cli
15 | from montreal_forced_aligner.command_line.mfa import mfa_cli
16 | from montreal_forced_aligner.command_line.model import model_cli
17 | from montreal_forced_aligner.command_line.train_acoustic_model import train_acoustic_model_cli
18 | from montreal_forced_aligner.command_line.train_dictionary import train_dictionary_cli
19 | from montreal_forced_aligner.command_line.train_g2p import train_g2p_cli
20 | from montreal_forced_aligner.command_line.train_ivector_extractor import train_ivector_cli
21 | from montreal_forced_aligner.command_line.train_lm import train_lm_cli
22 | from montreal_forced_aligner.command_line.transcribe import transcribe_corpus_cli
23 | from montreal_forced_aligner.command_line.validate import (
24 | validate_corpus_cli,
25 | validate_dictionary_cli,
26 | )
27 |
28 | __all__ = [
29 | "adapt",
30 | "align",
31 | "anchor",
32 | "diarize_speakers",
33 | "create_segments",
34 | "g2p",
35 | "mfa",
36 | "model",
37 | "configure",
38 | "history",
39 | "train_acoustic_model",
40 | "train_dictionary",
41 | "train_g2p",
42 | "train_ivector_extractor",
43 | "train_lm",
44 | "transcribe",
45 | "utils",
46 | "validate",
47 | "adapt_model_cli",
48 | "align_corpus_cli",
49 | "diarize_speakers_cli",
50 | "create_segments_cli",
51 | "g2p_cli",
52 | "mfa_cli",
53 | "configure_cli",
54 | "history_cli",
55 | "anchor_cli",
56 | "model_cli",
57 | "train_acoustic_model_cli",
58 | "train_dictionary_cli",
59 | "train_g2p_cli",
60 | "train_ivector_cli",
61 | "train_lm_cli",
62 | "transcribe_corpus_cli",
63 | "validate_dictionary_cli",
64 | "validate_corpus_cli",
65 | ]
66 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/command_line/anchor.py:
--------------------------------------------------------------------------------
1 | """Command line functions for launching anchor annotation"""
2 | from __future__ import annotations
3 |
4 | import logging
5 |
6 | import requests
7 | import rich_click as click
8 |
9 | from montreal_forced_aligner import config
10 |
11 | __all__ = ["anchor_cli"]
12 |
13 | logger = logging.getLogger("mfa")
14 |
15 |
16 | @click.command(name="anchor", short_help="Launch Anchor")
17 | @click.help_option("-h", "--help")
18 | def anchor_cli(*args, **kwargs) -> None: # pragma: no cover
19 | """
20 | Launch Anchor Annotator (if installed)
21 | """
22 | from anchor.command_line import main # noqa
23 |
24 | if config.VERBOSE:
25 | try:
26 | from anchor._version import version
27 |
28 | response = requests.get(
29 | "https://api.github.com/repos/MontrealCorpusTools/Anchor-annotator/releases/latest"
30 | )
31 | latest_version = response.json()["tag_name"].replace("v", "")
32 | if version < latest_version:
33 | click.echo(
34 | f"You are currently running an older version of Anchor annotator ({version}) than the latest available ({latest_version}). "
35 | f"To update, please run mfa_update."
36 | )
37 | except ImportError:
38 | pass
39 | main()
40 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/command_line/history.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import time
3 |
4 | import rich_click as click
5 |
6 | from montreal_forced_aligner import config
7 |
8 | __all__ = ["history_cli"]
9 |
10 | logger = logging.getLogger("mfa")
11 |
12 |
13 | @click.command(
14 | "history",
15 | help="Show previously run mfa commands",
16 | )
17 | @click.option("--depth", help="Number of commands to list, defaults to 10", type=int, default=10)
18 | @click.option(
19 | "--verbose/--no_verbose",
20 | "-v/-nv",
21 | "verbose",
22 | help=f"Output debug messages, default is {config.VERBOSE}",
23 | default=config.VERBOSE,
24 | )
25 | @click.help_option("-h", "--help")
26 | def history_cli(depth: int, verbose: bool) -> None:
27 | """
28 | List previous MFA commands
29 | """
30 | history = config.load_command_history()[-depth:]
31 | if verbose:
32 | logger.info("command\tDate\tExecution time\tVersion\tExit code\tException")
33 | for h in history:
34 | execution_time = time.strftime("%H:%M:%S", time.gmtime(h["execution_time"]))
35 | d = h["date"].isoformat()
36 | logger.info(
37 | f"{h['command']}\t{d}\t{execution_time}\t{h.get('version', 'unknown')}\t{h['exit_code']}\t{h['exception']}"
38 | )
39 | pass
40 | else:
41 | for h in history:
42 | logger.info(h["command"])
43 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/command_line/tokenize.py:
--------------------------------------------------------------------------------
1 | """Command line functions for generating pronunciations using G2P models"""
2 | from __future__ import annotations
3 |
4 | from pathlib import Path
5 |
6 | import rich_click as click
7 |
8 | from montreal_forced_aligner import config
9 | from montreal_forced_aligner.command_line.utils import common_options, validate_tokenizer_model
10 | from montreal_forced_aligner.tokenization.tokenizer import CorpusTokenizer
11 |
12 | __all__ = ["tokenize_cli"]
13 |
14 |
15 | @click.command(
16 | name="tokenize",
17 | context_settings=dict(
18 | ignore_unknown_options=True,
19 | allow_extra_args=True,
20 | allow_interspersed_args=True,
21 | ),
22 | short_help="Tokenize utterances",
23 | )
24 | @click.argument(
25 | "input_path", type=click.Path(exists=True, file_okay=True, dir_okay=True, path_type=Path)
26 | )
27 | @click.argument("tokenizer_model_path", type=click.UNPROCESSED, callback=validate_tokenizer_model)
28 | @click.argument(
29 | "output_directory", type=click.Path(file_okay=False, dir_okay=True, path_type=Path)
30 | )
31 | @click.option(
32 | "--config_path",
33 | "-c",
34 | help="Path to config file to use for training.",
35 | type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path),
36 | )
37 | @common_options
38 | @click.help_option("-h", "--help")
39 | @click.pass_context
40 | def tokenize_cli(context, **kwargs) -> None:
41 | """
42 | Tokenize utterances with a trained tokenizer model
43 | """
44 | if kwargs.get("profile", None) is not None:
45 | config.profile = kwargs.pop("profile")
46 | config.update_configuration(kwargs)
47 |
48 | config_path = kwargs.get("config_path", None)
49 | input_path = kwargs["input_path"]
50 | tokenizer_model_path = kwargs["tokenizer_model_path"]
51 | output_directory = kwargs["output_directory"]
52 |
53 | tokenizer = CorpusTokenizer(
54 | corpus_directory=input_path,
55 | tokenizer_model_path=tokenizer_model_path,
56 | **CorpusTokenizer.parse_parameters(config_path, context.params, context.args),
57 | )
58 |
59 | try:
60 | tokenizer.setup()
61 | tokenizer.tokenize_utterances()
62 | tokenizer.export_files(output_directory)
63 | except Exception:
64 | tokenizer.dirty = True
65 | raise
66 | finally:
67 | tokenizer.cleanup()
68 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/corpus/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Corpora
3 | =======
4 |
5 |
6 | """
7 | from __future__ import annotations
8 |
9 | from montreal_forced_aligner.corpus.acoustic_corpus import (
10 | AcousticCorpus,
11 | AcousticCorpusMixin,
12 | AcousticCorpusPronunciationMixin,
13 | )
14 | from montreal_forced_aligner.corpus.base import CorpusMixin
15 | from montreal_forced_aligner.corpus.text_corpus import (
16 | DictionaryTextCorpusMixin,
17 | TextCorpus,
18 | TextCorpusMixin,
19 | )
20 |
21 | __all__ = [
22 | "base",
23 | "helper",
24 | "classes",
25 | "features",
26 | "multiprocessing",
27 | "CorpusMixin",
28 | "ivector_corpus",
29 | "acoustic_corpus",
30 | "AcousticCorpus",
31 | "AcousticCorpusMixin",
32 | "AcousticCorpusPronunciationMixin",
33 | "text_corpus",
34 | "TextCorpus",
35 | "TextCorpusMixin",
36 | "DictionaryTextCorpusMixin",
37 | ]
38 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/diarization/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/montreal_forced_aligner/diarization/__init__.py
--------------------------------------------------------------------------------
/montreal_forced_aligner/dictionary/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Pronunciation dictionaries
3 | ==========================
4 |
5 | """
6 |
7 | from montreal_forced_aligner.dictionary.mixins import DictionaryMixin
8 | from montreal_forced_aligner.dictionary.multispeaker import (
9 | MultispeakerDictionary,
10 | MultispeakerDictionaryMixin,
11 | )
12 |
13 | __all__ = [
14 | "multispeaker",
15 | "mixins",
16 | "DictionaryMixin",
17 | "MultispeakerDictionary",
18 | "MultispeakerDictionaryMixin",
19 | ]
20 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/g2p/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Grapheme to phoneme (G2P)
3 | =========================
4 | """
5 |
6 | from montreal_forced_aligner.g2p.generator import PyniniCorpusGenerator, PyniniWordListGenerator
7 | from montreal_forced_aligner.g2p.phonetisaurus_trainer import PhonetisaurusTrainer
8 | from montreal_forced_aligner.g2p.trainer import PyniniTrainer
9 |
10 | __all__ = [
11 | "generator",
12 | "trainer",
13 | "PyniniTrainer",
14 | "PyniniCorpusGenerator",
15 | "PyniniWordListGenerator",
16 | "PhonetisaurusTrainer",
17 | ]
18 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/ivector/__init__.py:
--------------------------------------------------------------------------------
1 | """Module for ivector extractor training"""
2 |
3 | from montreal_forced_aligner.ivector.trainer import (
4 | DubmTrainer,
5 | IvectorTrainer,
6 | TrainableIvectorExtractor,
7 | )
8 |
9 | __all__ = ["trainer", "DubmTrainer", "IvectorTrainer", "TrainableIvectorExtractor"]
10 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/language_modeling/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Language modeling
3 | =================
4 |
5 |
6 | """
7 |
8 | from montreal_forced_aligner.language_modeling.trainer import (
9 | MfaLmArpaTrainer,
10 | MfaLmCorpusTrainer,
11 | MfaLmDictionaryCorpusTrainer,
12 | )
13 |
14 | __all__ = ["MfaLmCorpusTrainer", "MfaLmDictionaryCorpusTrainer", "MfaLmArpaTrainer"]
15 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/online/__init__.py:
--------------------------------------------------------------------------------
1 | """Module for running MFA in online mode"""
2 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/tokenization/__init__.py:
--------------------------------------------------------------------------------
1 | """Tokenization classes"""
2 |
3 | from montreal_forced_aligner.tokenization.tokenizer import CorpusTokenizer, TokenizerValidator
4 | from montreal_forced_aligner.tokenization.trainer import TokenizerTrainer
5 |
6 | __all__ = ["TokenizerTrainer", "TokenizerValidator", "CorpusTokenizer"]
7 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/tokenization/korean.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import re
4 |
5 | try:
6 | import jamo
7 | from mecab import MeCab
8 |
9 | KO_AVAILABLE = True
10 | except ImportError:
11 | KO_AVAILABLE = False
12 | MeCab = None
13 | jamo = None
14 |
15 |
16 | class KoreanTokenizer:
17 | def __init__(self, ignore_case: bool = True):
18 | self.ignore_case = ignore_case
19 | self.tokenizer = MeCab()
20 |
21 | def __call__(self, text):
22 | new_text = []
23 | morphs = self.tokenizer.parse(text)
24 | pronunciations = []
25 | for morph in morphs:
26 | normalized = morph.surface
27 | join = False
28 | m = re.search(r"[]})>][<({[]", normalized)
29 | if new_text and m:
30 | new_text[-1] += normalized[: m.start() + 1]
31 | normalized = normalized[m.end() - 1 :]
32 | elif new_text and re.match(r"^[<({\[].*", new_text[-1]):
33 | join = True
34 | elif new_text and re.match(r".*[-_~]$", new_text[-1]):
35 | join = True
36 | elif new_text and re.match(r".*[>)}\]]$", normalized):
37 | join = True
38 | elif new_text and re.match(r"^[-_~].*", normalized):
39 | join = True
40 | if new_text and any(new_text[-1].endswith(x) for x in {">", ")", "}", "]"}):
41 | join = False
42 | if join:
43 | new_text[-1] += normalized
44 | pronunciations[-1] += jamo.h2j(normalized)
45 | continue
46 | if morph.pos in {"SF", "SY", "SC"} and normalized not in {"<", "(", "{", "["}:
47 | continue
48 | new_text.append(normalized)
49 | pronunciations.append(jamo.h2j(normalized))
50 | new_text = " ".join(new_text)
51 | pronunciations = " ".join(pronunciations)
52 | if self.ignore_case:
53 | new_text = new_text.lower()
54 | pronunciations = pronunciations.lower()
55 | return new_text, pronunciations
56 |
57 |
58 | def ko_spacy(ignore_case: bool = True):
59 | if not KO_AVAILABLE:
60 | raise ImportError("Please install Korean support via `pip install python-mecab-ko jamo`")
61 | return KoreanTokenizer(ignore_case)
62 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/tokenization/resources/japanese/mfa_sudachi.dic:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/montreal_forced_aligner/tokenization/resources/japanese/mfa_sudachi.dic
--------------------------------------------------------------------------------
/montreal_forced_aligner/tokenization/resources/japanese/sudachi_config.json:
--------------------------------------------------------------------------------
1 | {"userDict": ["mfa_sudachi.dic"], "characterDefinitionFile": "char.def", "inputTextPlugin": [{"class": "com.worksap.nlp.sudachi.DefaultInputTextPlugin"}, {"class": "com.worksap.nlp.sudachi.ProlongedSoundMarkPlugin", "prolongedSoundMarks": ["ー", "-", "⁓", "〜", "〰"], "replacementSymbol": "ー"}, {"class": "com.worksap.nlp.sudachi.IgnoreYomiganaPlugin", "leftBrackets": ["(", "("], "rightBrackets": [")", ")"], "maxYomiganaLength": 4}], "oovProviderPlugin": [{"class": "com.worksap.nlp.sudachi.MeCabOovPlugin", "charDef": "char.def", "unkDef": "unk.def"}, {"class": "com.worksap.nlp.sudachi.SimpleOovPlugin", "oovPOS": ["補助記号", "一般", "*", "*", "*", "*"], "leftId": 5968, "rightId": 5968, "cost": 3857}]}
2 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/tokenization/resources/japanese/unk.def:
--------------------------------------------------------------------------------
1 | DEFAULT,5968,5968,3857,補助記号,一般,*,*,*,*
2 | SPACE,5966,5966,6056,空白,*,*,*,*,*
3 | KANJI,5139,5139,14657,名詞,普通名詞,一般,*,*,*
4 | KANJI,5129,5129,17308,名詞,普通名詞,サ変可能,*,*,*
5 | KANJI,4785,4785,18181,名詞,固有名詞,一般,*,*,*
6 | KANJI,4787,4787,18086,名詞,固有名詞,人名,一般,*,*
7 | KANJI,4791,4791,19198,名詞,固有名詞,地名,一般,*,*
8 | SYMBOL,5129,5129,17094,名詞,普通名詞,サ変可能,*,*,*
9 | NUMERIC,4794,4794,12450,名詞,数詞,*,*,*,*
10 | ALPHA,5139,5139,11633,名詞,普通名詞,一般,*,*,*
11 | ALPHA,4785,4785,13620,名詞,固有名詞,一般,*,*,*
12 | ALPHA,4787,4787,14228,名詞,固有名詞,人名,一般,*,*
13 | ALPHA,4791,4791,15793,名詞,固有名詞,地名,一般,*,*
14 | ALPHA,5687,5687,15246,感動詞,一般,*,*,*,*
15 | HIRAGANA,5139,5139,16012,名詞,普通名詞,一般,*,*,*
16 | HIRAGANA,5129,5129,20012,名詞,普通名詞,サ変可能,*,*,*
17 | HIRAGANA,4785,4785,18282,名詞,固有名詞,一般,*,*,*
18 | HIRAGANA,4787,4787,18269,名詞,固有名詞,人名,一般,*,*
19 | HIRAGANA,4791,4791,20474,名詞,固有名詞,地名,一般,*,*
20 | HIRAGANA,5687,5687,17786,感動詞,一般,*,*,*,*
21 | KATAKANA,5139,5139,10980,名詞,普通名詞,一般,*,*,*
22 | KATAKANA,5129,5129,14802,名詞,普通名詞,サ変可能,*,*,*
23 | KATAKANA,4785,4785,13451,名詞,固有名詞,一般,*,*,*
24 | KATAKANA,4787,4787,13759,名詞,固有名詞,人名,一般,*,*
25 | KATAKANA,4791,4791,14554,名詞,固有名詞,地名,一般,*,*
26 | KATAKANA,5687,5687,15272,感動詞,一般,*,*,*,*
27 | KANJINUMERIC,4794,4794,14170,名詞,数詞,*,*,*,*
28 | GREEK,5139,5139,11051,名詞,普通名詞,一般,*,*,*
29 | GREEK,4785,4785,13353,名詞,固有名詞,一般,*,*,*
30 | GREEK,4787,4787,13671,名詞,固有名詞,人名,一般,*,*
31 | GREEK,4791,4791,14862,名詞,固有名詞,地名,一般,*,*
32 | CYRILLIC,5139,5139,11140,名詞,普通名詞,一般,*,*,*
33 | CYRILLIC,4785,4785,13174,名詞,固有名詞,一般,*,*,*
34 | CYRILLIC,4787,4787,13495,名詞,固有名詞,人名,一般,*,*
35 | CYRILLIC,4791,4791,14700,名詞,固有名詞,地名,一般,*,*
36 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/transcription/__init__.py:
--------------------------------------------------------------------------------
1 | """Transcription module for MFA"""
2 | from montreal_forced_aligner.transcription.transcriber import Transcriber
3 |
4 | __all__ = ["Transcriber", "transcriber"]
5 |
--------------------------------------------------------------------------------
/montreal_forced_aligner/vad/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/montreal_forced_aligner/vad/__init__.py
--------------------------------------------------------------------------------
/montreal_forced_aligner/validation/__init__.py:
--------------------------------------------------------------------------------
1 | """Validation classes"""
2 |
3 | from montreal_forced_aligner.validation.corpus_validator import (
4 | PretrainedValidator,
5 | TrainingValidator,
6 | ValidationMixin,
7 | )
8 | from montreal_forced_aligner.validation.dictionary_validator import DictionaryValidator
9 |
10 | __all__ = ["PretrainedValidator", "TrainingValidator", "ValidationMixin", "DictionaryValidator"]
11 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 | "setuptools>=45", "wheel", "setuptools_scm>=6.2"
4 | ]
5 | build-backend = "setuptools.build_meta"
6 |
7 | [tool.setuptools_scm]
8 | write_to = "montreal_forced_aligner/_version.py"
9 |
10 | [tool.black]
11 | line-length = 99
12 |
13 | [tool.flake8]
14 | max-line-length = 99
15 | extend-ignore = ["D203", "E203", "E251", "E266", "E302", "E305", "E401", "E402", "E501", "F401", "F403", "W503"]
16 | exclude = [".git", "__pycache__", "dist", "build"]
17 |
18 | [tool.isort]
19 | line_length = 99
20 | profile = "black"
21 | known_first_party = [
22 | "montreal_forced_aligner"
23 | ]
24 |
25 | [tool.interrogate]
26 | ignore-init-method = true
27 | ignore-init-module = false
28 | ignore-magic = false
29 | ignore-semiprivate = false
30 | ignore-private = false
31 | ignore-module = false
32 | ignore-property-decorators = false
33 | fail-under = 95
34 | exclude = [
35 | "tests",
36 | "build",
37 | "dist",
38 | "setup.py",
39 | "docs"
40 | ]
41 | verbose = 100
42 | omit-covered-files = false
43 | quiet = false
44 | generate-badge = "docs/source/_static"
45 | badge-format = "svg"
46 | whitelist-regex = []
47 | ignore-regex = []
48 | color = true
49 |
50 |
51 | [tool.check-manifest]
52 | ignore = [
53 | ".deepsource.toml",
54 | ".readthedocs.yaml",
55 | ]
56 |
57 | [tool.coverage.run]
58 | source = ["montreal_forced_aligner"]
59 | concurrency = ["multiprocessing"]
60 | branch = true
61 | parallel = true
62 | omit = [
63 | ".tox/*"
64 | ]
65 |
66 |
67 | [tool.coverage.report]
68 | show_missing = true
69 | exclude_lines = [
70 | "pragma: no cover",
71 | "if __name__ == .__main__.:",
72 | "raise AssertionError",
73 | "raise NotImplementedError",
74 | "pass",
75 | "if sys.platform",
76 | "except ImportError:",
77 | "except KeyboardInterrupt:",
78 | "except Exception as e:",
79 | "except Exception:",
80 | "if call_back",
81 | "if is_set",
82 | "if TYPE_CHECKING:",
83 | "def history_save_handler() -> None:",
84 | "class ExitHooks(object):",
85 | "def main() -> None:",
86 | "if os.path.exists",
87 | "@abstractmethod",
88 | 'if "MFA_ERROR"',
89 | ]
90 | fail_under = 50
91 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | praatio>=6.0.0
2 | tqdm
3 | pyyaml
4 | librosa
5 | numpy
6 | scipy
7 | scikit-learn
8 | requests
9 | biopython
10 | dataclassy
11 | sqlalchemy>=2.0
12 | click
13 | rich
14 | rich-click
15 | numpy
16 | pynini
17 |
--------------------------------------------------------------------------------
/rtd_environment.yml:
--------------------------------------------------------------------------------
1 | channels:
2 | - conda-forge
3 | dependencies:
4 | - python>=3.9
5 | - numpy
6 | - librosa
7 | - tqdm
8 | - requests
9 | - pyyaml
10 | - praatio=6.0.0
11 | - dataclassy
12 | - sqlalchemy>=2.0
13 | - pynini
14 | - pgvector
15 | - pgvector-python
16 | - postgresql
17 | - scikit-learn
18 | - hdbscan
19 | - psycopg2
20 | - biopython
21 | - click
22 | - setuptools_scm
23 | - importlib_metadata
24 | - sphinx
25 | - numpydoc
26 | - sphinx-design
27 | - sphinx-click
28 | - sphinx-intl
29 | - pydata-sphinx-theme
30 | - myst-parser
31 | - mock
32 | - setuptools-scm
33 | - kneed
34 | - matplotlib
35 | - seaborn
36 | - rich
37 | - rich-click
38 | - kaldi =*=cpu*
39 | - kalpy
40 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools_scm # noqa
2 | from setuptools import setup
3 |
4 | setup()
5 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/__init__.py
--------------------------------------------------------------------------------
/tests/data/am/acoustic_g2p_output_model.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/am/acoustic_g2p_output_model.zip
--------------------------------------------------------------------------------
/tests/data/am/mono_model.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/am/mono_model.zip
--------------------------------------------------------------------------------
/tests/data/configs/acoustic/bad_topology.yaml:
--------------------------------------------------------------------------------
1 | b:
2 | max_states: 3
3 | bʲ:
4 | max_states: 1
5 |
--------------------------------------------------------------------------------
/tests/data/configs/acoustic/english_mfa_phone_groups.yaml:
--------------------------------------------------------------------------------
1 | -
2 | - p
3 | - pʷ
4 | - pʰ
5 | - pʲ
6 | -
7 | - kp
8 | -
9 | - b
10 | - bʲ
11 | -
12 | - ɡb
13 | -
14 | - f
15 | - fʷ
16 | - fʲ
17 | -
18 | - v
19 | - vʷ
20 | - vʲ
21 | -
22 | - θ
23 | -
24 | - t̪
25 | -
26 | - ð
27 | -
28 | - d̪
29 | -
30 | - t
31 | - tʷ
32 | - tʰ
33 | - tʲ
34 | -
35 | - ʈ
36 | - ʈʲ
37 | - ʈʷ
38 | -
39 | - ʔ
40 | -
41 | - d
42 | - dʲ
43 | -
44 | - ɖ
45 | - ɖʲ
46 | -
47 | - ɾ
48 | - ɾʲ
49 | -
50 | - tʃ
51 | -
52 | - dʒ
53 | -
54 | - ʃ
55 | -
56 | - ʒ
57 | -
58 | - s
59 | -
60 | - z
61 | -
62 | - ɹ
63 | -
64 | - m
65 | -
66 | - mʲ
67 | -
68 | - m̩
69 | -
70 | - ɱ
71 | -
72 | - n
73 | -
74 | - n̩
75 | -
76 | - ɲ
77 | -
78 | - ɾ̃
79 | -
80 | - ŋ
81 | -
82 | - l
83 | -
84 | - ɫ
85 | -
86 | - ɫ̩
87 | -
88 | - ʎ
89 | -
90 | - ɟ
91 | - ɟʷ
92 | -
93 | - ɡ
94 | - ɡʷ
95 | -
96 | - c
97 | - cʷ
98 | - cʰ
99 | -
100 | - k
101 | - kʷ
102 | - kʰ
103 | -
104 | - ç
105 | -
106 | - h
107 | -
108 | - ɐ
109 | -
110 | - ə
111 | -
112 | - ɜː
113 | - ɜ
114 | -
115 | - ɝ
116 | -
117 | - ɚ
118 | -
119 | - ʊ
120 | -
121 | - ɪ
122 | -
123 | - ɑ
124 | - ɑː
125 | -
126 | - ɒ
127 | - ɒː
128 | -
129 | - ɔ
130 | -
131 | - aː
132 | - a
133 | -
134 | - æ
135 | -
136 | - aj
137 | -
138 | - aw
139 | -
140 | - i
141 | - iː
142 | -
143 | - j
144 | -
145 | - ɛː
146 | - ɛ
147 | -
148 | - e
149 | - eː
150 | -
151 | - ej
152 | -
153 | - ʉ
154 | - ʉː
155 | -
156 | - uː
157 | - u
158 | -
159 | - w
160 | -
161 | - ʋ
162 | -
163 | - ɔj
164 | -
165 | - ow
166 | -
167 | - əw
168 | -
169 | - o
170 | - oː
171 |
--------------------------------------------------------------------------------
/tests/data/configs/acoustic/english_mfa_topology.yaml:
--------------------------------------------------------------------------------
1 | ɾ:
2 | max_states: 1
3 | min_states: 1
4 | ɾʲ:
5 | max_states: 1
6 | min_states: 1
7 | ɾ̃:
8 | max_states: 1
9 | min_states: 1
10 | ʔ:
11 | max_states: 1
12 | min_states: 1
13 | ə:
14 | max_states: 3
15 | ɚ:
16 | max_states: 3
17 | ɪ:
18 | max_states: 3
19 | e:
20 | max_states: 3
21 | eː:
22 | max_states: 3
23 | ɛ:
24 | max_states: 3
25 | ɛː:
26 | max_states: 3
27 | ɐ:
28 | max_states: 3
29 | i:
30 | max_states: 3
31 | iː:
32 | max_states: 3
33 | o:
34 | max_states: 3
35 | oː:
36 | max_states: 3
37 | u:
38 | max_states: 3
39 | uː:
40 | max_states: 3
41 | ɝ:
42 | max_states: 3
43 | j:
44 | max_states: 3
45 | w:
46 | max_states: 3
47 |
--------------------------------------------------------------------------------
/tests/data/configs/bad_align_config.yaml:
--------------------------------------------------------------------------------
1 | beam: 10
2 | retry_beam: 10
3 |
--------------------------------------------------------------------------------
/tests/data/configs/basic_align_config.yaml:
--------------------------------------------------------------------------------
1 | beam: 100
2 | retry_beam: 400
3 |
4 | features:
5 | type: "mfcc"
6 | frame_shift: 10
7 |
--------------------------------------------------------------------------------
/tests/data/configs/basic_ipa_config.yaml:
--------------------------------------------------------------------------------
1 | beam: 10
2 | retry_beam: 40
3 |
4 | features:
5 | type: "mfcc"
6 | use_energy: false
7 | frame_shift: 10
8 | use_pitch: false
9 |
10 | multilingual_ipa: true
11 |
12 | digraphs:
13 | - "[dt][szʒʃʐʑʂɕç]"
14 | - "[a][job_name][u]"
15 |
16 | strip_diacritics:
17 | - 'ː'
18 | - 'ˑ'
19 | - '̩'
20 | - '̆'
21 | - '̑'
22 | - '̯'
23 | - '͡'
24 | - '‿'
25 | - '͜'
26 |
27 | training:
28 | - monophone:
29 | num_iterations: 5
30 | max_gaussians: 1000
31 | subset: 100
32 |
33 | - triphone:
34 | num_iterations: 3
35 | num_leaves: 250
36 | max_gaussians: 2000
37 | cluster_threshold: -1
38 | subset: 1000
39 |
40 | - lda:
41 | num_iterations: 2
42 | num_leaves: 500
43 | max_gaussians: 4000
44 | subset: 1000
45 | features:
46 | splice_left_context: 3
47 | splice_right_context: 3
48 |
49 | - sat:
50 | num_iterations: 2
51 | num_leaves: 500
52 | max_gaussians: 5000
53 | power: 0.2
54 | silence_weight: 0.0
55 | fmllr_update_type: "full"
56 | subset: 1000
57 | features:
58 | lda: true
59 |
--------------------------------------------------------------------------------
/tests/data/configs/basic_segment_config.yaml:
--------------------------------------------------------------------------------
1 |
2 | energy_threshold: 9
3 | energy_mean_scale: 0.5
4 | max_segment_length: 5
5 | min_pause_duration: 0.25
6 |
--------------------------------------------------------------------------------
/tests/data/configs/basic_train_config.yaml:
--------------------------------------------------------------------------------
1 | beam: 100
2 | retry_beam: 400
3 |
4 | features:
5 | type: "mfcc"
6 | use_energy: false
7 | frame_shift: 10
8 | use_pitch: false
9 |
10 | training:
11 | - monophone:
12 | num_iterations: 5
13 | max_gaussians: 1000
14 | subset: 100
15 |
16 | - triphone:
17 | num_iterations: 3
18 | num_leaves: 250
19 | max_gaussians: 2000
20 | cluster_threshold: -1
21 | subset: 1000
22 |
23 | - lda:
24 | num_iterations: 2
25 | num_leaves: 500
26 | max_gaussians: 4000
27 | subset: 1000
28 | features:
29 | splice_left_context: 3
30 | splice_right_context: 3
31 |
32 | - sat:
33 | num_iterations: 2
34 | num_leaves: 500
35 | max_gaussians: 5000
36 | power: 0.2
37 | silence_weight: 0.0
38 | fmllr_update_type: "full"
39 | subset: 1000
40 | features:
41 | lda: true
42 |
--------------------------------------------------------------------------------
/tests/data/configs/basic_train_lm.yaml:
--------------------------------------------------------------------------------
1 | order: 3
2 | method: kneser_ney
3 | prune_thresh_small: 0.0000003
4 | prune_thresh_medium: 0.0000001
5 |
--------------------------------------------------------------------------------
/tests/data/configs/different_punctuation_config.yaml:
--------------------------------------------------------------------------------
1 | beam: 10
2 | retry_beam: 400
3 | word_break_markers: .-'][
4 | punctuation: .-'][
5 |
6 | features:
7 | type: "mfcc"
8 | use_energy: true
9 | frame_shift: 10
10 | use_pitch: false
11 |
12 | training:
13 | - monophone:
14 | num_iterations: 3
15 | max_gaussians: 500
16 | subset: 1000
17 |
--------------------------------------------------------------------------------
/tests/data/configs/eval_mapping.yaml:
--------------------------------------------------------------------------------
1 | ʔ: T
2 | h: HH
3 | ç: HH
4 | i: [IY0, IY2, IY1]
5 | iː: [IY0, IY2, IY1]
6 | ɚ: [ER0, ER2, ER1]
7 | ɝ: [ER0, ER2, ER1]
8 | ɝː: [ER0, ER2, ER1]
9 | dʒ: JH
10 | tʃ: CH
11 | ɑ: [AA0, AA2, AA1]
12 | ɑː: [AA0, AA2, AA1]
13 | ʊ: [UH0, UH2, UH1]
14 | ɛ: [EH0, EH2, EH1]
15 | oʊ: [OW0, OW2, OW1]
16 | ow: [OW0, OW2, OW1]
17 | aʊ: [AW0, AW2, AW1]
18 | aw: [AW0, AW2, AW1]
19 | aɪ: [AY0, AY2, AY1]
20 | aj: [AY0, AY2, AY1]
21 | ɔ: [AO0, AO2, AO1]
22 | ɒ: [AO0, AO2, AO1]
23 | ɔː: [AO0, AO2, AO1]
24 | ɒː: [AO0, AO2, AO1]
25 | ɔɪ: [OY0, OY2, OY1]
26 | ɔj: [OY0, OY2, OY1]
27 | u: [UW0, UW2, UW1]
28 | ʉ: [UW0, UW2, UW1]
29 | uː: [UW0, UW2, UW1]
30 | ʉː: [UW0, UW2, UW1]
31 | æ: [AE0, AE2, AE1]
32 | æː: [AE0, AE2, AE1]
33 | eɪ: [EY0, EY2, EY1]
34 | ej: [EY0, EY2, EY1]
35 | ɪ: [IH0, IH2, IH1]
36 | ð: DH
37 | ʃ: SH
38 | ʒ: ZH
39 | ɹ: R
40 | j: Y
41 | θ: TH
42 | ə: [AH0, AH2, AH1]
43 | ʌ: [AH0, AH2, AH1]
44 | ɐ: [AH0, AH2, AH1]
45 | n̩: N
46 | n: N
47 | m̩: M
48 | m: M
49 | mʲ: M
50 | ɱ: M
51 | v: V
52 | vʲ: V
53 | fʲ: F
54 | f: F
55 | l̩: L
56 | l: L
57 | ɫ̩: L
58 | ɫ: L
59 | ʎ: L
60 | ɾ: [D, T]
61 | pʰ: P
62 | pʲ: P
63 | p̚: P
64 | bʲ: B
65 | b̚: B
66 | tʰ: T
67 | t: T
68 | d: D
69 | d̚: D
70 | dʲ: D
71 | t̚: T
72 | tʲ: T
73 | kʰ: K
74 | k̚: K
75 | cʰ: K
76 | c̚: K
77 | ɡ: G
78 | ɡ̚: G
79 | ɟ̚: G
80 | ɟ: G
81 | ŋ: NG
82 | ɲ: [NG, N]
83 |
--------------------------------------------------------------------------------
/tests/data/configs/g2p_config.yaml:
--------------------------------------------------------------------------------
1 | punctuation: "、。।,@<>\"(),.:;¿?¡!\\&%#*~【】,…‥「」『』〝〟″⟨⟩♪・‹›«»~′$+="
2 | clitic_markers: "'’"
3 | compound_markers: "-"
4 | num_pronunciations: 1
5 |
--------------------------------------------------------------------------------
/tests/data/configs/ivector_train.yaml:
--------------------------------------------------------------------------------
1 |
2 | features:
3 | type: "mfcc"
4 | use_energy: true
5 | frame_shift: 10
6 |
7 | training:
8 | - dubm:
9 | num_iterations_init: 4
10 | num_iterations: 2
11 | - ivector:
12 | num_iterations: 2
13 | gaussian_min_count: 2
14 |
--------------------------------------------------------------------------------
/tests/data/configs/lda_sat_train.yaml:
--------------------------------------------------------------------------------
1 | beam: 10
2 | retry_beam: 400
3 |
4 | features:
5 | type: "mfcc"
6 | use_energy: false
7 | frame_shift: 10
8 | use_pitch: false
9 |
10 | training:
11 | - monophone:
12 | num_iterations: 4
13 | max_gaussians: 1000
14 | subset: 1000
15 |
16 | - triphone:
17 | num_iterations: 2
18 | num_leaves: 1500
19 | max_gaussians: 2000
20 | cluster_threshold: -1
21 | subset: 3000
22 | boost_silence: 1.25
23 | power: 0.25
24 |
25 | - lda:
26 | num_iterations: 3
27 | num_leaves: 1500
28 | max_gaussians: 4000
29 | subset: 5000
30 | features:
31 | splice_left_context: 3
32 | splice_right_context: 3
33 |
34 | - sat:
35 | num_iterations: 2
36 | num_leaves: 1500
37 | max_gaussians: 8000
38 | power: 0.2
39 | silence_weight: 0.0
40 | fmllr_update_type: "full"
41 | subset: 5000
42 | features:
43 | lda: true
44 |
--------------------------------------------------------------------------------
/tests/data/configs/lda_train.yaml:
--------------------------------------------------------------------------------
1 | beam: 10
2 | retry_beam: 400
3 |
4 | features:
5 | type: "mfcc"
6 | use_energy: false
7 | frame_shift: 10
8 | use_pitch: false
9 |
10 | training:
11 | - monophone:
12 | num_iterations: 4
13 | max_gaussians: 100
14 | subset: 1000
15 |
16 | - lda:
17 | num_iterations: 15
18 | num_leaves: 500
19 | max_gaussians: 4000
20 | subset: 1000
21 | features:
22 | splice_left_context: 3
23 | splice_right_context: 3
24 |
--------------------------------------------------------------------------------
/tests/data/configs/mono_align.yaml:
--------------------------------------------------------------------------------
1 | beam: 100
2 | retry_beam: 400
3 |
--------------------------------------------------------------------------------
/tests/data/configs/mono_train.yaml:
--------------------------------------------------------------------------------
1 | beam: 10
2 | retry_beam: 400
3 |
4 | features:
5 | type: "mfcc"
6 | use_energy: true
7 | frame_shift: 10
8 | use_pitch: false
9 |
10 | training:
11 | - monophone:
12 | num_iterations: 5
13 | max_gaussians: 500
14 | subset: 1000
15 |
--------------------------------------------------------------------------------
/tests/data/configs/no_punctuation_config.yaml:
--------------------------------------------------------------------------------
1 | beam: 10
2 | retry_beam: 400
3 | punctuation:
4 | word_break_markers:
5 | compound_markers:
6 | quote_markers:
7 | clitic_markers:
8 |
9 | features:
10 | type: "mfcc"
11 | use_energy: true
12 | frame_shift: 10
13 | use_pitch: false
14 |
15 | training:
16 | - monophone:
17 | num_iterations: 3
18 | max_gaussians: 500
19 | subset: 1000
20 |
--------------------------------------------------------------------------------
/tests/data/configs/out_of_order_config.yaml:
--------------------------------------------------------------------------------
1 | beam: 10
2 | retry_beam: 40
3 |
4 | features:
5 | type: "mfcc"
6 | use_energy: false
7 | frame_shift: 10
8 | use_pitch: false
9 |
10 | training:
11 | - triphone:
12 | num_iterations: 35
13 | num_leaves: 2500
14 | max_gaussians: 20000
15 | cluster_threshold: -1
16 | subset: 30000
17 | boost_silence: 1.25
18 | power: 0.25
19 |
20 | - monophone:
21 | num_iterations: 40
22 | max_gaussians: 1000
23 | subset: 10000
24 |
--------------------------------------------------------------------------------
/tests/data/configs/pitch_tri_train.yaml:
--------------------------------------------------------------------------------
1 | beam: 10
2 | retry_beam: 400
3 |
4 | features:
5 | type: "mfcc"
6 | use_energy: false
7 | frame_shift: 10
8 | use_pitch: true
9 | use_voicing: true
10 |
11 | training:
12 | - monophone:
13 | num_iterations: 5
14 | max_gaussians: 100
15 | subset: 1000
16 |
17 | - triphone:
18 | num_iterations: 3
19 | num_leaves: 250
20 | max_gaussians: 2000
21 | cluster_threshold: -1
22 | subset: 3000
23 | boost_silence: 1.25
24 | power: 0.25
25 |
--------------------------------------------------------------------------------
/tests/data/configs/pron_train.yaml:
--------------------------------------------------------------------------------
1 | beam: 10
2 | retry_beam: 400
3 |
4 | features:
5 | type: "mfcc"
6 | use_energy: true
7 | frame_shift: 10
8 | use_pitch: false
9 |
10 | training:
11 | - monophone:
12 | num_iterations: 5
13 | max_gaussians: 500
14 | subset: 1000
15 |
16 | - pronunciation_probabilities:
17 | subset: 1000
18 |
--------------------------------------------------------------------------------
/tests/data/configs/sat_train.yaml:
--------------------------------------------------------------------------------
1 | beam: 10
2 | retry_beam: 500
3 |
4 | features:
5 | type: "mfcc"
6 | use_energy: false
7 | frame_shift: 10
8 | use_pitch: false
9 |
10 | training:
11 | - monophone:
12 | num_iterations: 5
13 | max_gaussians: 500
14 | subset: 1000
15 |
16 | - triphone:
17 | num_iterations: 3
18 | num_leaves: 1500
19 | max_gaussians: 2000
20 | cluster_threshold: -1
21 | subset: 3000
22 | boost_silence: 1.25
23 | power: 0.25
24 |
25 | - sat:
26 | num_iterations: 5
27 | num_leaves: 2000
28 | max_gaussians: 10000
29 | power: 0.2
30 | silence_weight: 0.0
31 | fmllr_update_type: "full"
32 | subset: 1000
33 |
--------------------------------------------------------------------------------
/tests/data/configs/test_groups.yaml:
--------------------------------------------------------------------------------
1 | bilabial_stops:
2 | - p
3 | - b
4 | labiodental_obstruents:
5 | - f
6 | - v
7 | dental_obstruents:
8 | - th
9 | - dh
10 | coronal_stops:
11 | - t
12 | - d
13 | coronal_affricates:
14 | - ch
15 | - jh
16 | coronal_fricatives:
17 | - sh
18 | - zh
19 | - s
20 | - z
21 | rhotics:
22 | - r
23 | nasals:
24 | - m
25 | - n
26 | - ng
27 | laterals:
28 | - l
29 | dorsal_obstruents:
30 | - g
31 | - k
32 | voiceless_glottals:
33 | - hh
34 | central_vowels:
35 | - ah
36 | - er
37 | - uh
38 | - ih
39 | front_diphthongs:
40 | - ay
41 | - oy
42 | back_diphthongs:
43 | - ow
44 | - aw
45 | low_vowels:
46 | - aa
47 | - ao
48 | high_front_vowels:
49 | - iy
50 | front_glides:
51 | - y
52 | mid_front_vowels:
53 | - ae
54 | - eh
55 | - ey
56 | high_back_vowels:
57 | - uw
58 | back_glides:
59 | - w
60 |
--------------------------------------------------------------------------------
/tests/data/configs/test_rules.yaml:
--------------------------------------------------------------------------------
1 | rules:
2 | - following_context: ''
3 | preceding_context: ''
4 | replacement: ih
5 | segment: iy
6 |
--------------------------------------------------------------------------------
/tests/data/configs/train_g2p_acoustic.yaml:
--------------------------------------------------------------------------------
1 | beam: 100
2 | retry_beam: 800
3 |
4 | features:
5 | type: "mfcc"
6 | use_energy: false
7 | frame_shift: 10
8 | use_pitch: false
9 |
10 | training:
11 | - monophone:
12 | num_iterations: 5
13 | max_gaussians: 1000
14 | subset: 100
15 |
16 | - triphone:
17 | num_iterations: 3
18 | num_leaves: 250
19 | max_gaussians: 2000
20 | cluster_threshold: -1
21 | subset: 1000
22 |
23 | - lda:
24 | num_iterations: 2
25 | num_leaves: 500
26 | max_gaussians: 4000
27 | subset: 1000
28 | features:
29 | splice_left_context: 3
30 | splice_right_context: 3
31 |
32 | - sat:
33 | num_iterations: 2
34 | num_leaves: 500
35 | max_gaussians: 5000
36 | power: 0.2
37 | silence_weight: 0.0
38 | fmllr_update_type: "full"
39 | subset: 1000
40 | features:
41 | lda: true
42 |
43 | - pronunciation_probabilities:
44 | train_g2p: true
45 | num_iterations: 5
46 |
47 | - sat:
48 | num_iterations: 2
49 | num_leaves: 500
50 | max_gaussians: 5000
51 | power: 0.2
52 | silence_weight: 0.0
53 | fmllr_update_type: "full"
54 | subset: 1000
55 | features:
56 | lda: true
57 |
--------------------------------------------------------------------------------
/tests/data/configs/train_g2p_config.yaml:
--------------------------------------------------------------------------------
1 | punctuation: "、。।,@<>\"(),.:;¿?¡!\\&%#*~【】,…‥「」『』〝〟″⟨⟩♪・‹›«»~′$+="
2 | clitic_markers: "'’"
3 | compound_markers: "-"
4 | num_pronunciations: 1 # Used if running in validation mode
5 | order: 7
6 | random_starts: 25
7 | seed: 1917
8 | delta: 0.0009765
9 | lr: 1.0
10 | batch_size: 200
11 | num_iterations: 10
12 | smoothing_method: "kneser_ney"
13 | pruning_method: "relative_entropy"
14 | model_size: 1000000
15 |
--------------------------------------------------------------------------------
/tests/data/configs/transcribe.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/configs/transcribe.yaml
--------------------------------------------------------------------------------
/tests/data/configs/tri_train.yaml:
--------------------------------------------------------------------------------
1 | beam: 10
2 | retry_beam: 400
3 |
4 | features:
5 | type: "mfcc"
6 | use_energy: false
7 | frame_shift: 10
8 | use_pitch: false
9 |
10 | training:
11 | - monophone:
12 | num_iterations: 5
13 | max_gaussians: 100
14 | subset: 1000
15 |
16 | - triphone:
17 | num_iterations: 3
18 | num_leaves: 250
19 | max_gaussians: 2000
20 | cluster_threshold: -1
21 | subset: 3000
22 | boost_silence: 1.25
23 | power: 0.25
24 |
--------------------------------------------------------------------------------
/tests/data/configs/xsampa_train.yaml:
--------------------------------------------------------------------------------
1 | beam: 10
2 | retry_beam: 400
3 | ignore_case: false
4 | punctuation: .-'][
5 |
6 | features:
7 | type: "mfcc"
8 | use_energy: true
9 | frame_shift: 10
10 | use_pitch: false
11 |
12 | training:
13 | - monophone:
14 | num_iterations: 10
15 | max_gaussians: 500
16 | subset: 1000
17 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/acoustic_g2p_dictionary.yaml:
--------------------------------------------------------------------------------
1 | default: english_us_mfa
2 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/expected/graphemes.txt:
--------------------------------------------------------------------------------
1 | a
2 | b
3 | d
4 | o
5 | r
6 | w
7 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/expected/lexicon.text.fst:
--------------------------------------------------------------------------------
1 | 0 1 0.6931471805599453
2 | 0 1 sil 0.6931471805599453
3 | 2 1 sil
4 | 1 1 sil_S !SIL 0.6931471805599453
5 | 1 2 sil_S !SIL 0.6931471805599453
6 | 1 1 spn_S 0.6931471805599453
7 | 1 2 spn_S 0.6931471805599453
8 | 1 3 phonea_B worda
9 | 3 1 phoneb_E 0.6931471805599453
10 | 3 2 phoneb_E 0.6931471805599453
11 | 1 4 phonea_B wordb
12 | 4 1 phonec_E 0.6931471805599453
13 | 4 2 phonec_E 0.6931471805599453
14 | 1 0
15 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/expected/phone_map.txt:
--------------------------------------------------------------------------------
1 | sil sil sil_B sil_E sil_I sil_S
2 | spn spn spn_B spn_E spn_I spn_S
3 | phoneb phoneb_B phoneb_E phoneb_I phoneb_S
4 | phonea phonea_B phonea_E phonea_I phonea_S
5 | phonec phonec_B phonec_E phonec_I phonec_S
6 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/expected/phones.txt:
--------------------------------------------------------------------------------
1 | 0
2 | sil 1
3 | sil_B 2
4 | sil_E 3
5 | sil_I 4
6 | sil_S 5
7 | spn 6
8 | spn_B 7
9 | spn_E 8
10 | spn_I 9
11 | spn_S 10
12 | phonea_B 11
13 | phonea_E 12
14 | phonea_I 13
15 | phonea_S 14
16 | phoneb_B 15
17 | phoneb_E 16
18 | phoneb_I 17
19 | phoneb_S 18
20 | phonec_B 19
21 | phonec_E 20
22 | phonec_I 21
23 | phonec_S 22
24 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/expected/phones/extra_questions.int:
--------------------------------------------------------------------------------
1 | 1 2 3 4 5 6 7 8 9 10
2 | 11 12 13 14 15 16 17 18 19 20 21 22
3 | 11 15 19
4 | 12 16 20
5 | 13 17 21
6 | 14 18 22
7 | 1 6
8 | 2 7
9 | 3 8
10 | 4 9
11 | 5 10
12 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/expected/phones/extra_questions.txt:
--------------------------------------------------------------------------------
1 | sil sil_B sil_E sil_I sil_S spn spn_B spn_E spn_I spn_S
2 | phonea_B phonea_E phonea_I phonea_S phoneb_B phoneb_E phoneb_I phoneb_S phonec_B phonec_E phonec_I phonec_S
3 | phonea_B phoneb_B phonec_B
4 | phonea_E phoneb_E phonec_E
5 | phonea_I phoneb_I phonec_I
6 | phonea_S phoneb_S phonec_S
7 | sil spn
8 | sil_B spn_B
9 | sil_E spn_E
10 | sil_I spn_I
11 | sil_S spn_S
12 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/expected/phones/roots.int:
--------------------------------------------------------------------------------
1 | shared split 1 2 3 4 5
2 | shared split 6 7 8 9 10
3 | shared split 11 12 13 14
4 | shared split 15 16 17 18
5 | shared split 19 20 21 22
6 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/expected/phones/roots.txt:
--------------------------------------------------------------------------------
1 | shared split sil sil_B sil_E sil_I sil_S
2 | shared split spn spn_B spn_E spn_I spn_S
3 | shared split phonea_B phonea_E phonea_I phonea_S
4 | shared split phoneb_B phoneb_E phoneb_I phoneb_S
5 | shared split phonec_B phonec_E phonec_I phonec_S
6 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/expected/phones/sets.int:
--------------------------------------------------------------------------------
1 | 1 2 3 4 5
2 | 6 7 8 9 10
3 | 11 12 13 14
4 | 15 16 17 18
5 | 19 20 21 22
6 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/expected/phones/sets.txt:
--------------------------------------------------------------------------------
1 | sil sil_B sil_E sil_I sil_S
2 | spn spn_B spn_E spn_I spn_S
3 | phonea_B phonea_E phonea_I phonea_S
4 | phoneb_B phoneb_E phoneb_I phoneb_S
5 | phonec_B phonec_E phonec_I phonec_S
6 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/expected/phones/word_boundary.int:
--------------------------------------------------------------------------------
1 | 1 nonword
2 | 2 begin
3 | 3 end
4 | 4 internal
5 | 5 singleton
6 | 6 nonword
7 | 7 begin
8 | 8 end
9 | 9 internal
10 | 10 singleton
11 | 11 begin
12 | 12 end
13 | 13 internal
14 | 14 singleton
15 | 15 begin
16 | 16 end
17 | 17 internal
18 | 18 singleton
19 | 19 begin
20 | 20 end
21 | 21 internal
22 | 22 singleton
23 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/expected/phones/word_boundary.txt:
--------------------------------------------------------------------------------
1 | sil nonword
2 | sil_B begin
3 | sil_E end
4 | sil_I internal
5 | sil_S singleton
6 | spn nonword
7 | spn_B begin
8 | spn_E end
9 | spn_I internal
10 | spn_S singleton
11 | phonea_B begin
12 | phonea_E end
13 | phonea_I internal
14 | phonea_S singleton
15 | phoneb_B begin
16 | phoneb_E end
17 | phoneb_I internal
18 | phoneb_S singleton
19 | phonec_B begin
20 | phonec_E end
21 | phonec_I internal
22 | phonec_S singleton
23 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/expected/topo:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | 11 12 13 14 15 16 17 18 19 20 21 22
5 |
6 | 0 0 0 0.75 1 0.25
7 | 1 1 1 0.75 2 0.25
8 | 2 2 2 0.75 3 0.25
9 | 3
10 |
11 |
12 |
13 | 1 2 3 4 5 6 7 8 9 10
14 |
15 | 0 0 0 0.25 1 0.25 2 0.25 3 0.25
16 | 1 1 1 0.25 2 0.25 3 0.25 4 0.25
17 | 2 2 1 0.25 2 0.25 3 0.25 4 0.25
18 | 3 3 1 0.25 2 0.25 3 0.25 4 0.25
19 | 4 4 4 0.75 5 0.25
20 | 5
21 |
22 |
23 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/expected/words.txt:
--------------------------------------------------------------------------------
1 | 0
2 | !SIL 1
3 | 2
4 | worda 3
5 | wordb 4
6 | #0 5
7 | 6
8 | 7
9 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/test_abstract.txt:
--------------------------------------------------------------------------------
1 | worda phonea phoneb
2 | wordb phonea phonec
3 | wordc phonec
4 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/test_acoustic.txt:
--------------------------------------------------------------------------------
1 | this dh ih s
2 | is ih z
3 | the dh ah
4 | acoustic ah k uw s t ih k
5 | corpus k ao r p us
6 | i'm ay m
7 | talking t aa k ih ng
8 | pretty p r eh t iy
9 | fast f ae s t
10 | here hh iy r
11 | there's dh eh r z
12 | nothing n ah th ih ng
13 | going g ow ih ng
14 | else eh l s
15 | on ah n
16 | we're w iy r
17 | just j ah s t
18 | yknow y ah n ow
19 | some s ah m
20 | speech s p iy ch
21 | errors eh r ao r z
22 | but b ah t
23 | who hh uw
24 | cares k ae r z
25 | me m iy
26 | really r iy l iy
27 | slow s l ow
28 | and ae n d
29 | slightly s l ay t l iy
30 | lower l ow w er
31 | in ih n
32 | intensity ih n t eh n s ih t iy
33 | saying s ey ih ng
34 | words w er d z
35 | here's h iy r z
36 | more m ao r
37 | um ah m
38 | that dh ae t
39 | should sh uh d
40 | be b iy
41 | all aa l
42 | thanks th ae ng k s
43 | just jh ah s t
44 | sound s aw n d
45 | environment eh n v ay r ah n m eh n t
46 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/test_basic.txt:
--------------------------------------------------------------------------------
1 | 'm m
2 | ’m m
3 | i’m ay m ih
4 | this dh ih s
5 | is ih z
6 | the dh ah
7 | acoustic ah k uw s t ih k
8 | corpus k ao r p ah s
9 | i'm ay m
10 | talking t aa k ih ng
11 | pretty p r eh t iy
12 | fast f ae s t
13 | here hh iy r
14 | there's dh eh r z
15 | nothing n ah th ih ng
16 | going g ow ih ng
17 | else eh l s
18 | on ah n
19 | we're w iy r
20 | just jh ah s t
21 | yknow j ah n ow
22 | some s ah m
23 | speech s p iy ch
24 | errors eh r ao r z
25 | but b ah t
26 | who hh uw
27 | cares k ae r z
28 | me m iy
29 | really r iy l iy
30 | slow s l ow
31 | and ae n d
32 | slightly s l ay t l iy
33 | lower l ow w er
34 | in ih n
35 | intensity ih n t eh n s ih t iy
36 | saying s ey ih ng
37 | words w er d z
38 | here's hh iy r z
39 | more m ao r
40 | um ah m
41 | that dh ae t
42 | should sh uh d
43 | be b iy
44 | all aa l
45 | thanks th ae ng k s
46 | uh ah
47 | so s ow
48 | sick s ih k
49 | i ay
50 | have hh ae v
51 | a ah
52 | cold k ow l d
53 | probably p r aa b ah b l iy
54 | sound s aw n d
55 | quite k w ay t
56 | different d ih f er ah n t
57 | than dh ae n
58 | recording r iy k ao r d ih ng
59 | environment eh n v ay r ah n m eh n t
60 | also aa l s ow
61 | bunch b ah n ch
62 | did d ih d
63 | not n aa t
64 | original ao r ih g ih n ah l
65 | one w ah n
66 | long l aa n g
67 | pause p aa z
68 | think th ih ng k
69 | good g uh d
70 | alright aa l r ay t
71 | much m ah ch
72 | since s ih n s
73 | quality k w aa l ih t iy
74 | of ah v
75 | gonna g ah n ah
76 | cough k aa f
77 | for f ao r
78 | little l ih t ah l
79 | bit b ih t
80 | just jh ah s t
81 | to t uw
82 | yup j ah p
83 | happened hh ae p ah n d
84 | that's dh ae t s
85 | hopefully hh ow p f uh l iy
86 | levels l eh v ah l z
87 | okay ow k ay
88 | lot l aa t
89 | yeah j ae
90 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/test_extra_annotations.txt:
--------------------------------------------------------------------------------
1 | worda phonea phoneb
2 | wordb phonea phonec
3 | wordc phonec
4 | {LG} laugh
5 | {SL} sil
6 | sil
7 | {VN} vocnoise
8 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/test_frclitics.txt:
--------------------------------------------------------------------------------
1 | aujourd'hui o zh u r d w i
2 | c'est s e
3 | est e
4 | c' s
5 | c s e
6 | m' m
7 | m 3 m
8 | appelle a p 3 l
9 | vingt-cinq v ae~ s ae~ k
10 | vingt v ae~
11 | six s i s
12 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/test_hindi.txt:
--------------------------------------------------------------------------------
1 | हैं ɦ ɛ̃ː
2 | हूं ɦ ũː
3 | हौंसला ɦ ɔ̃ː s̪ l̪ aː
4 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/test_japanese.txt:
--------------------------------------------------------------------------------
1 | はい h a i
2 | はい h aː
3 | 何 n a ɴ
4 | 何 n a ɲ i
5 | でしょう d e ɕ oː
6 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/test_mixed_format_dictionary.txt:
--------------------------------------------------------------------------------
1 | 'm 1.0 m
2 | ’m m
3 | i’m 0.01 ay m ih
4 | this 1.0 0.43 1.23 0.85 dh ih s
5 | is 1.0 0.5 1.0 1.0 ih z
6 | the 1.0 0.5 1.0 1.0 dh ah
7 | acoustic ah k uw s t ih k
8 | corpus k ao r p ah s
9 | i'm ay m
10 | talking t aa k ih ng
11 | pretty p r eh t iy
12 | fast f ae s t
13 | here hh iy r
14 | there's dh eh r z
15 | nothing n ah th ih ng
16 | going g ow ih ng
17 | else eh l s
18 | on ah n
19 | we're w iy r
20 | just jh ah s t
21 | yknow y ah n ow
22 | some s ah m
23 | speech s p iy ch
24 | errors eh r ao r z
25 | but b ah t
26 | who hh uw
27 | cares k ae r z
28 | me m iy
29 | really r iy l iy
30 | slow s l ow
31 | and ae n d
32 | slightly s l ay t l iy
33 | lower l ow w er
34 | in ih n
35 | intensity ih n t eh n s ih t iy
36 | saying s ey ih ng
37 | words w er d z
38 | here's hh iy r z
39 | more m ao r
40 | um ah m
41 | that dh ae t
42 | should sh uh d
43 | be b iy
44 | all aa l
45 | thanks th ae ng k s
46 | uh ah
47 | so s ow
48 | sick s ih k
49 | i ay
50 | have hh ae v
51 | a ah
52 | cold k ow l d
53 | probably p r aa b ah b l iy
54 | sound s aw n d
55 | quite k w ay t
56 | different d ih f er ah n t
57 | than dh ae n
58 | recording r iy k ao r d ih ng
59 | environment eh n v ay r ah n m eh n t
60 | also aa l s ow
61 | bunch b ah n ch
62 | did d ih d
63 | not n aa t
64 | original ao r ih g ih n ah l
65 | one w ah n
66 | long l aa n g
67 | pause p aa z
68 | think th ih ng k
69 | good g uh d
70 | alright aa l r ay t
71 | much m ah ch
72 | since s ih n s
73 | quality k w aa l ih t iy
74 | of ah v
75 | gonna g ah n ah
76 | cough k aa f
77 | for f ao r
78 | little l ih t ah l
79 | bit b ih t
80 | to t uw
81 | yup y ah p
82 | happened hh ae p ah n d
83 | that's dh ae t s
84 | hopefully hh ow p f uh l iy
85 | levels l eh v ah l z
86 | okay ow k ay
87 | lot l aa t
88 | yeah y ae
89 |
--------------------------------------------------------------------------------
/tests/data/dictionaries/test_tabbed_dictionary.txt:
--------------------------------------------------------------------------------
1 | 'm 1.0 m
2 | ’m m
3 | i’m 0.01 ay m ih
4 | this 1.0 0.43 1.23 0.85 dh ih s
5 | is 1.0 0.5 1.0 1.0 ih z
6 | the 1.0 0.5 1.0 1.0 dh ah
7 | acoustic ah k uw s t ih k
8 | corpus k ao r p us
9 | i'm ay m
10 | talking t aa k ih ng
11 | pretty p r eh t iy
12 | fast f ae s t
13 | here hh iy r
14 | there's dh eh r z
15 | nothing n ah th ih ng
16 | going g ow ih ng
17 | else eh l s
18 | on ah n
19 | we're w iy r
20 | just j ah s t
21 | yknow y ah n ow
22 | some s ah m
23 | speech s p iy ch
24 | errors eh r ao r z
25 | but b ah t
26 | who hh uw
27 | cares k ae r z
28 | me m iy
29 | really r iy l iy
30 | slow s l ow
31 | and ae n d
32 | slightly s l ay t l iy
33 | lower l ow w er
34 | in ih n
35 | intensity ih n t eh n s ih t iy
36 | saying s ey ih ng
37 | words w er d z
38 | here's hh iy r z
39 | more m ao r
40 | um ah m
41 | that dh ae t
42 | should sh uh d
43 | be b iy
44 | all aa l
45 | thanks th ae ng k s
46 | uh ah
47 | so s ow
48 | sick s ih k
49 | i ay
50 | have hh ae v
51 | a ah
52 | cold k ow l d
53 | probably p r aa b ah b l iy
54 | sound s aw n d
55 | quite k w ay t
56 | different d ih f er ah n t
57 | than dh ae n
58 | recording r iy k ao r d ih ng
59 | environment eh n v ay r ah n m eh n t
60 | also aa l s ow
61 | bunch b ah n ch
62 | did d ih d
63 | not n aa t
64 | original ao r ih g ih n ah l
65 | one w ah n
66 | long l aa n g
67 | pause p aa z
68 | think th ih ng k
69 | good g uh d
70 | alright aa l r ay t
71 | much m ah ch
72 | since s ih n s
73 | quality k w aa l ih t iy
74 | of ah v
75 | gonna g ah n ah
76 | cough k aa f
77 | for f ao r
78 | little l ih t ah l
79 | bit b ih t
80 | just j ah s t
81 | to t uw
82 | yup y ah p
83 | happened hh ae p ah n d
84 | that's dh ae t s
85 | hopefully hh ow p f uh l iy
86 | levels l eh v ah l z
87 | okay ow k ay
88 | lot l aa t
89 | yeah y ae
90 |
--------------------------------------------------------------------------------
/tests/data/lab/13697_11991_000000.lab:
--------------------------------------------------------------------------------
1 | la sorpresa y el disgusto que produjo a wentworth la substitución de una cuñada por otra el gesto que se dibujó en su fisonomía el asombro que manifestó y las palabras a duras penas reprimidas que asomaron a sus labios mientras le hablaba carlos
2 |
--------------------------------------------------------------------------------
/tests/data/lab/61-70968-0000.lab:
--------------------------------------------------------------------------------
1 | HE BEGAN A CONFUSED COMPLAINT AGAINST THE WIZARD WHO HAD VANISHED BEHIND THE CURTAIN ON THE LEFT
2 |
--------------------------------------------------------------------------------
/tests/data/lab/acoustic_corpus.lab:
--------------------------------------------------------------------------------
1 | this is the acoustic corpus i'm talking pretty fast here there's nothing going else going on we're just yknow there's some speech errors but who cares um this is me talking really slow and slightly lower in intensity uh we're just saying some words and here's some more words words words words um and that should be all thanks
2 |
--------------------------------------------------------------------------------
/tests/data/lab/cold_corpus.lab:
--------------------------------------------------------------------------------
1 | uh so this is the sick corpus uh i have a cold so i probably sound quite different than the uh acoustic corpus um the recording environment is also quite different and i'm saying a bunch of different words that i did not say in the original one uh and here's a long pause and i think this is probably good alright thanks
2 |
--------------------------------------------------------------------------------
/tests/data/lab/cold_corpus3.lab:
--------------------------------------------------------------------------------
1 | alright so this is the sick corpus uh hopefully the recording levels are okay um i have a cold so this probably sounds a lot different than the acoustic corpus uh and i'm also saying [adif] bunch of different words um i think i'm probably gonna cough here yeah so that just happened uh and uh that should be good alright thanks
2 |
--------------------------------------------------------------------------------
/tests/data/lab/cold_corpus3_extra.lab:
--------------------------------------------------------------------------------
1 | alright so this is the sick corpus uh hopefully the recording levels are okay um i have a cold so this probably sounds a lot different than the acoustic corpus uh and i'm also saying [adif] bunch of different words um i think i'm probably gonna cough here {CG} yeah so that just happened uh and uh that should be good alright thanks
2 |
--------------------------------------------------------------------------------
/tests/data/lab/common_voice_en_22058264.lab:
--------------------------------------------------------------------------------
1 | no
2 |
--------------------------------------------------------------------------------
/tests/data/lab/common_voice_en_22058266.lab:
--------------------------------------------------------------------------------
1 | Fire fox
2 |
--------------------------------------------------------------------------------
/tests/data/lab/common_voice_en_22058267.lab:
--------------------------------------------------------------------------------
1 | six
2 |
--------------------------------------------------------------------------------
/tests/data/lab/common_voice_ja_24511055.lab:
--------------------------------------------------------------------------------
1 | 真っ昼間なのにキャンプの外れの電柱に電球がともっていた
2 |
--------------------------------------------------------------------------------
/tests/data/lab/devanagari.lab:
--------------------------------------------------------------------------------
1 | हैंः हूं हौंसला
2 |
--------------------------------------------------------------------------------
/tests/data/lab/french_clitics.lab:
--------------------------------------------------------------------------------
1 | aujourd aujourd'hui m'appelle purple-people-eater vingt-six m'm'appelle c'est m'c'est m'appele m'ving-sic flying'purple-people-eater
2 |
--------------------------------------------------------------------------------
/tests/data/lab/japanese.lab:
--------------------------------------------------------------------------------
1 | 「はい」、。! 『何 でしょう』
2 |
--------------------------------------------------------------------------------
/tests/data/lab/multilingual_ipa.txt:
--------------------------------------------------------------------------------
1 | i can't think of an animal that's less chad like than a sloth
2 |
--------------------------------------------------------------------------------
/tests/data/lab/multilingual_ipa_2.txt:
--------------------------------------------------------------------------------
1 | welcome to a series of plat chat videos where we're gonna tackle every single team in the overwatch league twenty twenty
2 |
--------------------------------------------------------------------------------
/tests/data/lab/multilingual_ipa_3.txt:
--------------------------------------------------------------------------------
1 | and run you through
2 |
--------------------------------------------------------------------------------
/tests/data/lab/multilingual_ipa_4.txt:
--------------------------------------------------------------------------------
1 | kinda our fears and also predictions for them
2 |
--------------------------------------------------------------------------------
/tests/data/lab/multilingual_ipa_5.txt:
--------------------------------------------------------------------------------
1 | i'm sideshow joined by custer and reinforce we've got a special edition of plat chat
2 |
--------------------------------------------------------------------------------
/tests/data/lab/multilingual_ipa_us.txt:
--------------------------------------------------------------------------------
1 | uh with only like four games to go
2 |
--------------------------------------------------------------------------------
/tests/data/lab/multilingual_ipa_us_2.txt:
--------------------------------------------------------------------------------
1 | hey josh could have finished it he just decided to fail it instead
2 |
--------------------------------------------------------------------------------
/tests/data/lab/multilingual_ipa_us_3.txt:
--------------------------------------------------------------------------------
1 | really good performances against top teams that have ended up going their way
2 |
--------------------------------------------------------------------------------
/tests/data/lab/multilingual_ipa_us_4.txt:
--------------------------------------------------------------------------------
1 | uh i i still think it's a very good team though in n a i think this is uh
2 |
--------------------------------------------------------------------------------
/tests/data/lab/multilingual_ipa_us_5.txt:
--------------------------------------------------------------------------------
1 | uh and this was the first time i think the justice really looked like an elite team
2 |
--------------------------------------------------------------------------------
/tests/data/lab/punctuated.lab:
--------------------------------------------------------------------------------
1 | oh yes, they - they, you know, they love her' and so' 'something 'i mean... ‘you The village name is Anglo Saxon in origin, and means 'Myrsa's woodland'.
2 |
--------------------------------------------------------------------------------
/tests/data/lab/se10x016-08071999-1334_u0016001.lab:
--------------------------------------------------------------------------------
1 | tyst under denna inspelning
2 |
--------------------------------------------------------------------------------
/tests/data/lab/se10x016-08071999-1334_u0016002.lab:
--------------------------------------------------------------------------------
1 | Testar en två tre fyra fem sex sju åtta.
2 |
--------------------------------------------------------------------------------
/tests/data/lab/se10x016-08071999-1334_u0016003.lab:
--------------------------------------------------------------------------------
1 | Har du sett våra rara barnbarn som leker och busar ute i grannträdgården!
2 |
--------------------------------------------------------------------------------
/tests/data/lab/se10x016-08071999-1334_u0016004.lab:
--------------------------------------------------------------------------------
1 | Vår husläkare börjar i och för sig bli gammal och skröplig, men han har ju ett sådant trevligt sätt!
2 |
--------------------------------------------------------------------------------
/tests/data/lab/weird_words.lab:
--------------------------------------------------------------------------------
1 | i’m talking-ajfish me-really [me-really] [me'really] [me_??_really] asds-asda sdasd-me
2 |
--------------------------------------------------------------------------------
/tests/data/lab/xsampa.lab:
--------------------------------------------------------------------------------
1 | @bUr\tOU {bstr\{kt {bSaIr\ Abr\utseIzi {br\@geItIN @bor\n {b3kr\Ambi {bI5s@`n Ar\g thr\Ip@5eI Ar\dvAr\k
2 |
--------------------------------------------------------------------------------
/tests/data/lab/日本語.lab:
--------------------------------------------------------------------------------
1 | 「はい」、。! 『何 でしょう』
2 |
--------------------------------------------------------------------------------
/tests/data/lm/test_lm.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/lm/test_lm.zip
--------------------------------------------------------------------------------
/tests/data/textgrid/61-70968-0000.TextGrid:
--------------------------------------------------------------------------------
1 | File type = "ooTextFile"
2 | Object class = "TextGrid"
3 |
4 | 0
5 | 4.905
6 |
7 | 1
8 | "IntervalTier"
9 | "61"
10 | 0
11 | 4.905
12 | 1
13 | 0
14 | 4.905
15 | "HE BEGAN A CONFUSED COMPLAINT AGAINST THE WIZARD WHO HAD VANISHED BEHIND THE CURTAIN ON THE LEFT"
16 |
--------------------------------------------------------------------------------
/tests/data/textgrid/michaelandsickmichael_short_tg.TextGrid:
--------------------------------------------------------------------------------
1 | File type = "ooTextFile"
2 | Object class = "TextGrid"
3 |
4 | 0
5 | 52.44082780612245
6 |
7 | 2
8 | "IntervalTier"
9 | "michael"
10 | 0
11 | 52.44082780612245
12 | 7
13 | 0
14 | 1.059222833923831
15 | ""
16 | 1.059222833923831
17 | 7.541483952089169
18 | "this is the acoustic corpus i'm talking pretty fast here there's nothing going else going on we're just yknow there's some speech errors but who
19 | cares"
20 | 7.541483952089169
21 | 8.016163828116456
22 | ""
23 | 8.016163828116456
24 | 17.207369573609213
25 | "um this is me talking really slow and slightly lower in intensity uh we're just saying some words"
26 | 17.207369573609213
27 | 18.35980726400338
28 | ""
29 | 18.35980726400338
30 | 25.251655700977985
31 | "and here's some more words words words words um and that should be all thanks"
32 | 25.251655700977985
33 | 52.44082780612245
34 | ""
35 | "IntervalTier"
36 | "sickmichael"
37 | 0
38 | 52.44080102040816
39 | 9
40 | 0
41 | 26.72325
42 | ""
43 | 26.72325
44 | 39.52854922648294
45 | "uh so this is the sick corpus uh i have a cold so i probably sound quite different than the uh uh acoustic corpus um the recording environment is also quite different"
46 | 39.52854922648294
47 | 40.20409920265843
48 | ""
49 | 40.20409920265843
50 | 43.81379465384285
51 | "and i'm saying a bunch of different words that i did not say in the original one"
52 | 43.81379465384285
53 | 44.480184007206404
54 | ""
55 | 44.480184007206404
56 | 45.08451636541159
57 | "uh"
58 | 45.08451636541159
59 | 46.37863407952624
60 | ""
61 | 46.37863407952624
62 | 51.457439118982556
63 | "and here's a long pause and i think this is probably good alright thanks"
64 | 51.457439118982556
65 | 52.44080102040816
66 | ""
67 |
--------------------------------------------------------------------------------
/tests/data/textgrid/multilingual_ipa.TextGrid:
--------------------------------------------------------------------------------
1 | File type = "ooTextFile"
2 | Object class = "TextGrid"
3 |
4 | 0
5 | 4.1195
6 |
7 | 1
8 | "IntervalTier"
9 | "speaker_one"
10 | 0
11 | 4.1195
12 | 1
13 | 0
14 | 4.1195
15 | "i can't think of an animal that's less chad-like than a sloth"
16 |
--------------------------------------------------------------------------------
/tests/data/textgrid/multilingual_ipa_2.TextGrid:
--------------------------------------------------------------------------------
1 | File type = "ooTextFile"
2 | Object class = "TextGrid"
3 |
4 | 0
5 | 6.2271
6 |
7 | 1
8 | "IntervalTier"
9 | "speaker_one"
10 | 0
11 | 6.2271
12 | 1
13 | 0
14 | 6.2271
15 | "welcome to a series of platchat videos where we're gonna tackle every single team in the overwatch league twenty twenty"
16 |
--------------------------------------------------------------------------------
/tests/data/textgrid/multilingual_ipa_3.TextGrid:
--------------------------------------------------------------------------------
1 | File type = "ooTextFile"
2 | Object class = "TextGrid"
3 |
4 | 0
5 | 1.3062999999999994
6 |
7 | 1
8 | "IntervalTier"
9 | "speaker_one"
10 | 0
11 | 1.3062999999999994
12 | 1
13 | 0
14 | 1.3062999999999994
15 | "and run you through"
16 |
--------------------------------------------------------------------------------
/tests/data/textgrid/multilingual_ipa_4.TextGrid:
--------------------------------------------------------------------------------
1 | File type = "ooTextFile"
2 | Object class = "TextGrid"
3 |
4 | 0
5 | 3.296199999999999
6 |
7 | 1
8 | "IntervalTier"
9 | "speaker_one"
10 | 0
11 | 3.296199999999999
12 | 1
13 | 0
14 | 3.296199999999999
15 | "kinda our fears and also predictions for them"
16 |
--------------------------------------------------------------------------------
/tests/data/textgrid/multilingual_ipa_5.TextGrid:
--------------------------------------------------------------------------------
1 | File type = "ooTextFile"
2 | Object class = "TextGrid"
3 |
4 | 0
5 | 4.304
6 |
7 | 1
8 | "IntervalTier"
9 | "speaker_one"
10 | 0
11 | 4.304
12 | 1
13 | 0
14 | 4.304
15 | "i'm sideshow joined by custa and reinforce we've got a special edition of platchat"
16 |
--------------------------------------------------------------------------------
/tests/data/textgrid/multilingual_ipa_us.TextGrid:
--------------------------------------------------------------------------------
1 | File type = "ooTextFile"
2 | Object class = "TextGrid"
3 |
4 | xmin = 0
5 | xmax = 2.9013125
6 | tiers?
7 | size = 1
8 | item []:
9 | item [1]:
10 | class = "IntervalTier"
11 | name = "speaker_two"
12 | xmin = 0
13 | xmax = 2.9013125
14 | intervals: size = 1
15 | intervals [1]:
16 | xmin = 0
17 | xmax = 2.9013125
18 | text = "uh with only like four games to go"
19 |
--------------------------------------------------------------------------------
/tests/data/textgrid/multilingual_ipa_us_2.TextGrid:
--------------------------------------------------------------------------------
1 | File type = "ooTextFile"
2 | Object class = "TextGrid"
3 |
4 | 0
5 | 2.411162499999989
6 |
7 | 1
8 | "IntervalTier"
9 | "speaker_two"
10 | 0
11 | 2.411162499999989
12 | 1
13 | 0
14 | 2.411162499999989
15 | "hey josh could have finished it he just decided to fail it instead"
16 |
--------------------------------------------------------------------------------
/tests/data/textgrid/multilingual_ipa_us_3.TextGrid:
--------------------------------------------------------------------------------
1 | File type = "ooTextFile"
2 | Object class = "TextGrid"
3 |
4 | 0
5 | 3.350999999999999
6 |
7 | 1
8 | "IntervalTier"
9 | "speaker_two"
10 | 0
11 | 3.350999999999999
12 | 1
13 | 0
14 | 3.350999999999999
15 | "really good performances against top teams that have ended up going their way"
16 |
--------------------------------------------------------------------------------
/tests/data/textgrid/multilingual_ipa_us_4.TextGrid:
--------------------------------------------------------------------------------
1 | File type = "ooTextFile"
2 | Object class = "TextGrid"
3 |
4 | 0
5 | 3.5188874999998916
6 |
7 | 1
8 | "IntervalTier"
9 | "speaker_two"
10 | 0
11 | 3.5188874999998916
12 | 1
13 | 0
14 | 3.5188874999998916
15 | "uh i i still think it's a very good team though in n a i think this is uh"
16 |
--------------------------------------------------------------------------------
/tests/data/textgrid/multilingual_ipa_us_5.TextGrid:
--------------------------------------------------------------------------------
1 | File type = "ooTextFile"
2 | Object class = "TextGrid"
3 |
4 | 0
5 | 4.656600000000026
6 |
7 | 1
8 | "IntervalTier"
9 | "speaker_two"
10 | 0
11 | 4.656600000000026
12 | 1
13 | 0
14 | 4.656600000000026
15 | "uh and this was the first time i think the justice really looked like an elite team"
16 |
--------------------------------------------------------------------------------
/tests/data/textgrid/short_segments.TextGrid:
--------------------------------------------------------------------------------
1 | File type = "ooTextFile"
2 | Object class = "TextGrid"
3 |
4 | xmin = 0
5 | xmax = 1
6 | tiers?
7 | size = 1
8 | item []:
9 | item [1]:
10 | class = "IntervalTier"
11 | name = "talker"
12 | xmin = 0
13 | xmax = 1
14 | intervals: size = 7
15 | intervals [1]:
16 | xmin = 0
17 | xmax = 0.16250605313552421
18 | text = ""
19 | intervals [2]:
20 | xmin = 0.16250605313552421
21 | xmax = 0.2837613633862341
22 | text = "blah"
23 | intervals [3]:
24 | xmin = 0.2837613633862341
25 | xmax = 0.43007610442209065
26 | text = ""
27 | intervals [4]:
28 | xmin = 0.43007610442209065
29 | xmax = 0.4389681605071427
30 | text = "ts"
31 | intervals [5]:
32 | xmin = 0.4389681605071427
33 | xmax = 0.6588444564284299
34 | text = ""
35 | intervals [6]:
36 | xmin = 0.6588444564284299
37 | xmax = 0.8480027404195374
38 | text = "blah2"
39 | intervals [7]:
40 | xmin = 0.8480027404195374
41 | xmax = 1
42 | text = ""
43 |
--------------------------------------------------------------------------------
/tests/data/textgrid/vietnamese.TextGrid:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/textgrid/vietnamese.TextGrid
--------------------------------------------------------------------------------
/tests/data/tokenizer/test_tokenizer_model.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/tokenizer/test_tokenizer_model.zip
--------------------------------------------------------------------------------
/tests/data/tokenizer/test_tokenizer_model_phonetisaurus.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/tokenizer/test_tokenizer_model_phonetisaurus.zip
--------------------------------------------------------------------------------
/tests/data/wav/13697_11991_000000.opus:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/13697_11991_000000.opus
--------------------------------------------------------------------------------
/tests/data/wav/61-70968-0000.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/61-70968-0000.flac
--------------------------------------------------------------------------------
/tests/data/wav/acoustic_corpus.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/acoustic_corpus.wav
--------------------------------------------------------------------------------
/tests/data/wav/cold_corpus.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/cold_corpus.wav
--------------------------------------------------------------------------------
/tests/data/wav/cold_corpus3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/cold_corpus3.wav
--------------------------------------------------------------------------------
/tests/data/wav/cold_corpus_24bit.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/cold_corpus_24bit.wav
--------------------------------------------------------------------------------
/tests/data/wav/cold_corpus_32bit_float.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/cold_corpus_32bit_float.wav
--------------------------------------------------------------------------------
/tests/data/wav/common_voice_en_22058264.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/common_voice_en_22058264.mp3
--------------------------------------------------------------------------------
/tests/data/wav/common_voice_en_22058266.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/common_voice_en_22058266.mp3
--------------------------------------------------------------------------------
/tests/data/wav/common_voice_en_22058267.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/common_voice_en_22058267.mp3
--------------------------------------------------------------------------------
/tests/data/wav/common_voice_ja_24511055.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/common_voice_ja_24511055.mp3
--------------------------------------------------------------------------------
/tests/data/wav/dummy.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/dummy.mp3
--------------------------------------------------------------------------------
/tests/data/wav/dummy.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/dummy.wav
--------------------------------------------------------------------------------
/tests/data/wav/falsetto.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/falsetto.flac
--------------------------------------------------------------------------------
/tests/data/wav/falsetto2.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/falsetto2.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_a.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_a.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_affectation.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_affectation.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_apex.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_apex.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_bottle.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_bottle.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_breaths.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_breaths.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_breathy.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_breathy.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_buddy.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_buddy.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_creaky.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_creaky.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_crossword.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_crossword.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_cutoff.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_cutoff.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_cutoffprogressive.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_cutoffprogressive.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_er.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_er.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_erpause.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_erpause.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_exaggerated.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_exaggerated.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_falsetto.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_falsetto.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_her.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_her.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_hes.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_hes.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_internalsil.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_internalsil.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_kmg.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_kmg.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_laughter.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_laughter.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_long.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_long.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_longstop.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_longstop.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_michael.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_michael.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_patty.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_patty.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_poofy.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_poofy.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_pooty.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_pooty.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_puddy.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_puddy.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_putty.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_putty.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_puttynorm.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_puttynorm.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_reallylong.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_reallylong.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_registershift.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_registershift.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_surround.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_surround.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_the.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_the.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_theapprox.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_theapprox.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_theinitialstop.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_theinitialstop.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_thenorm.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_thenorm.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_theother.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_theother.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_thestop.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_thestop.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_thez.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_thez.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_thoughts.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_thoughts.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_uh.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_uh.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_uhuh.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_uhuh.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_uhum.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_uhum.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_um.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_um.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_unk.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_unk.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_whatscalled.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_whatscalled.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_whisper.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_whisper.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_words.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_words.flac
--------------------------------------------------------------------------------
/tests/data/wav/mfa_youknow.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/mfa_youknow.flac
--------------------------------------------------------------------------------
/tests/data/wav/michaelandsickmichael.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/michaelandsickmichael.wav
--------------------------------------------------------------------------------
/tests/data/wav/multilingual_ipa.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa.flac
--------------------------------------------------------------------------------
/tests/data/wav/multilingual_ipa_2.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa_2.flac
--------------------------------------------------------------------------------
/tests/data/wav/multilingual_ipa_3.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa_3.flac
--------------------------------------------------------------------------------
/tests/data/wav/multilingual_ipa_4.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa_4.flac
--------------------------------------------------------------------------------
/tests/data/wav/multilingual_ipa_5.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa_5.flac
--------------------------------------------------------------------------------
/tests/data/wav/multilingual_ipa_us.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa_us.flac
--------------------------------------------------------------------------------
/tests/data/wav/multilingual_ipa_us_2.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa_us_2.flac
--------------------------------------------------------------------------------
/tests/data/wav/multilingual_ipa_us_3.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa_us_3.flac
--------------------------------------------------------------------------------
/tests/data/wav/multilingual_ipa_us_4.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa_us_4.flac
--------------------------------------------------------------------------------
/tests/data/wav/multilingual_ipa_us_5.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/multilingual_ipa_us_5.flac
--------------------------------------------------------------------------------
/tests/data/wav/se10x016-08071999-1334_u0016001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/se10x016-08071999-1334_u0016001.wav
--------------------------------------------------------------------------------
/tests/data/wav/se10x016-08071999-1334_u0016002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/se10x016-08071999-1334_u0016002.wav
--------------------------------------------------------------------------------
/tests/data/wav/se10x016-08071999-1334_u0016003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/se10x016-08071999-1334_u0016003.wav
--------------------------------------------------------------------------------
/tests/data/wav/se10x016-08071999-1334_u0016004.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/se10x016-08071999-1334_u0016004.wav
--------------------------------------------------------------------------------
/tests/data/wav/whisper.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/whisper.flac
--------------------------------------------------------------------------------
/tests/data/wav/whisper2.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MontrealCorpusTools/Montreal-Forced-Aligner/bb52e81abe87b18002874e5304877b8b2be58eb5/tests/data/wav/whisper2.flac
--------------------------------------------------------------------------------
/tests/test_abc.py:
--------------------------------------------------------------------------------
1 | from montreal_forced_aligner.abc import MfaWorker, TrainerMixin
2 | from montreal_forced_aligner.acoustic_modeling import SatTrainer, TrainableAligner
3 | from montreal_forced_aligner.alignment import AlignMixin
4 |
5 |
6 | def test_typing(basic_corpus_dir, basic_dict_path, temp_dir):
7 | am_trainer = TrainableAligner(
8 | corpus_directory=basic_corpus_dir,
9 | dictionary_path=basic_dict_path,
10 | )
11 | trainer = SatTrainer(identifier="sat", worker=am_trainer)
12 | assert type(trainer).__name__ == "SatTrainer"
13 | assert isinstance(trainer, TrainerMixin)
14 | assert isinstance(trainer, AlignMixin)
15 | assert isinstance(trainer, MfaWorker)
16 | assert isinstance(am_trainer, MfaWorker)
17 |
--------------------------------------------------------------------------------
/tests/test_commandline_adapt.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import click.testing
4 |
5 | from montreal_forced_aligner.command_line.mfa import mfa_cli
6 |
7 |
8 | def test_adapt_basic(
9 | basic_corpus_dir,
10 | generated_dir,
11 | english_dictionary,
12 | temp_dir,
13 | test_align_config,
14 | english_acoustic_model,
15 | db_setup,
16 | ):
17 | adapted_model_path = generated_dir.joinpath("basic_adapted.zip")
18 | command = [
19 | "adapt",
20 | basic_corpus_dir,
21 | english_dictionary,
22 | english_acoustic_model,
23 | adapted_model_path,
24 | "--beam",
25 | "100",
26 | "--clean",
27 | "--no_debug",
28 | "-p",
29 | "test",
30 | ]
31 | command = [str(x) for x in command]
32 | result = click.testing.CliRunner().invoke(
33 | mfa_cli, command, catch_exceptions=True
34 | )
35 | print(result.stdout)
36 | print(result.stderr)
37 | if result.exception:
38 | print(result.exc_info)
39 | raise result.exception
40 | assert os.path.exists(adapted_model_path)
41 |
42 |
43 | def test_adapt_multilingual(
44 | multilingual_ipa_corpus_dir,
45 | mfa_speaker_dict_path,
46 | generated_dir,
47 | temp_dir,
48 | basic_align_config_path,
49 | english_acoustic_model,
50 | english_mfa_acoustic_model,
51 | db_setup,
52 | ):
53 | adapted_model_path = generated_dir.joinpath("multilingual_adapted.zip")
54 | output_path = generated_dir.joinpath("multilingual_output")
55 | command = [
56 | "adapt",
57 | multilingual_ipa_corpus_dir,
58 | mfa_speaker_dict_path,
59 | english_mfa_acoustic_model,
60 | adapted_model_path,
61 | output_path,
62 | "--config_path",
63 | basic_align_config_path,
64 | "-q",
65 | "--clean",
66 | "--no_debug",
67 | "-p",
68 | "test",
69 | ]
70 | command = [str(x) for x in command]
71 | result = click.testing.CliRunner().invoke(
72 | mfa_cli, command, catch_exceptions=True
73 | )
74 | print(result.stdout)
75 | print(result.stderr)
76 | if result.exception:
77 | print(result.exc_info)
78 | raise result.exception
79 | assert os.path.exists(adapted_model_path)
80 |
--------------------------------------------------------------------------------
/tests/test_commandline_configure.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import click.testing
4 |
5 | from montreal_forced_aligner import config
6 | from montreal_forced_aligner.command_line.mfa import mfa_cli
7 |
8 |
9 | def test_configure(
10 | temp_dir,
11 | basic_corpus_dir,
12 | generated_dir,
13 | english_dictionary,
14 | basic_align_config_path,
15 | english_acoustic_model,
16 | global_config,
17 | ):
18 | path = config.generate_config_path()
19 | if os.path.exists(path):
20 | os.remove(path)
21 | command = [
22 | "configure",
23 | "--always_clean",
24 | "-t",
25 | temp_dir,
26 | "-j",
27 | "10",
28 | "--disable_mp",
29 | "--always_verbose",
30 | "-p",
31 | "test",
32 | ]
33 | command = [str(x) for x in command]
34 | click.testing.CliRunner().invoke(mfa_cli, command, catch_exceptions=False)
35 | assert os.path.exists(path)
36 | config.load_configuration()
37 |
38 | assert config.CURRENT_PROFILE_NAME == "test"
39 | assert config.NUM_JOBS == 10
40 | assert not config.USE_MP
41 | assert config.VERBOSE
42 | assert config.CLEAN
43 |
44 | command = ["configure", "--never_clean", "--enable_mp", "--never_verbose", "-p", "test"]
45 | click.testing.CliRunner().invoke(mfa_cli, command, catch_exceptions=False)
46 |
47 | assert os.path.exists(path)
48 | config.load_configuration()
49 | assert config.CURRENT_PROFILE_NAME == "test"
50 | assert config.USE_MP
51 | assert not config.VERBOSE
52 | assert not config.CLEAN
53 |
54 | config.CLEAN = True
55 | config.DEBUG = True
56 | config.VERBOSE = True
57 | config.USE_MP = False
58 | config.TEMPORARY_DIRECTORY = temp_dir
59 |
--------------------------------------------------------------------------------
/tests/test_commandline_history.py:
--------------------------------------------------------------------------------
1 | import click.testing
2 |
3 | from montreal_forced_aligner.command_line.mfa import mfa_cli
4 |
5 |
6 | def test_mfa_history():
7 |
8 | command = ["history", "--depth", "60"]
9 | result = click.testing.CliRunner().invoke(
10 | mfa_cli, command, catch_exceptions=True
11 | )
12 | print(result.stdout)
13 | print(result.stderr)
14 | if result.exception:
15 | print(result.exc_info)
16 | raise result.exception
17 | assert not result.return_value
18 |
19 | command = ["history"]
20 | result = click.testing.CliRunner().invoke(
21 | mfa_cli, command, catch_exceptions=True
22 | )
23 | print(result.stdout)
24 | print(result.stderr)
25 | if result.exception:
26 | print(result.exc_info)
27 | raise result.exception
28 | assert not result.return_value
29 |
30 |
31 | def test_mfa_history_verbose():
32 |
33 | command = ["history", "-v", "--depth", "60"]
34 | result = click.testing.CliRunner().invoke(
35 | mfa_cli, command, catch_exceptions=True
36 | )
37 | print(result.stdout)
38 | print(result.stderr)
39 | if result.exception:
40 | print(result.exc_info)
41 | raise result.exception
42 | assert not result.return_value
43 |
44 | command = ["history", "-v"]
45 | result = click.testing.CliRunner().invoke(
46 | mfa_cli, command, catch_exceptions=True
47 | )
48 | print(result.stdout)
49 | print(result.stderr)
50 | if result.exception:
51 | print(result.exc_info)
52 | raise result.exception
53 | assert not result.return_value
54 |
--------------------------------------------------------------------------------
/tests/test_commandline_train_dict.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import click.testing
4 | import sqlalchemy.orm
5 |
6 | from montreal_forced_aligner.command_line.mfa import mfa_cli
7 |
8 |
9 | def test_train_dict(
10 | basic_corpus_dir,
11 | english_dictionary,
12 | english_acoustic_model,
13 | generated_dir,
14 | temp_dir,
15 | basic_align_config_path,
16 | db_setup,
17 | ):
18 | output_path = generated_dir.joinpath("trained_dict")
19 | command = [
20 | "train_dictionary",
21 | basic_corpus_dir,
22 | english_dictionary,
23 | english_acoustic_model,
24 | output_path,
25 | "-q",
26 | "--clean",
27 | "--debug",
28 | "--silence_probabilities",
29 | "--config_path",
30 | basic_align_config_path,
31 | "--use_mp",
32 | ]
33 | command = [str(x) for x in command]
34 | result = click.testing.CliRunner().invoke(
35 | mfa_cli, command, catch_exceptions=True
36 | )
37 | print(result.stdout)
38 | print(result.stderr)
39 | if result.exception:
40 | print(result.exc_info)
41 | raise result.exception
42 | assert not result.return_value
43 |
44 | dict_path = os.path.join(output_path, "english_us_arpa.dict")
45 | assert os.path.exists(output_path)
46 | sqlalchemy.orm.close_all_sessions()
47 | textgrid_output = generated_dir.joinpath("trained_dict_output")
48 | command = [
49 | "align",
50 | basic_corpus_dir,
51 | dict_path,
52 | english_acoustic_model,
53 | textgrid_output,
54 | "-q",
55 | "--clean",
56 | "--debug",
57 | "--config_path",
58 | basic_align_config_path,
59 | ]
60 | command = [str(x) for x in command]
61 | result = click.testing.CliRunner().invoke(
62 | mfa_cli, command, catch_exceptions=True
63 | )
64 | print(result.stdout)
65 | print(result.stderr)
66 | if result.exception:
67 | print(result.exc_info)
68 | raise result.exception
69 | assert not result.return_value
70 | assert os.path.exists(textgrid_output)
71 |
--------------------------------------------------------------------------------
/tests/test_commandline_train_ivector.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import click.testing
4 |
5 | from montreal_forced_aligner.command_line.mfa import mfa_cli
6 |
7 |
8 | def test_basic_ivector(
9 | basic_corpus_dir,
10 | generated_dir,
11 | temp_dir,
12 | train_ivector_config_path,
13 | ivector_output_model_path,
14 | db_setup,
15 | ):
16 | command = [
17 | "train_ivector",
18 | basic_corpus_dir,
19 | ivector_output_model_path,
20 | "--config_path",
21 | train_ivector_config_path,
22 | "-q",
23 | "--clean",
24 | "--debug",
25 | ]
26 | command = [str(x) for x in command]
27 | result = click.testing.CliRunner().invoke(
28 | mfa_cli, command, catch_exceptions=True
29 | )
30 | print(result.stdout)
31 | print(result.stderr)
32 | if result.exception:
33 | print(result.exc_info)
34 | raise result.exception
35 | assert not result.return_value
36 | assert os.path.exists(ivector_output_model_path)
37 |
--------------------------------------------------------------------------------
/tests/test_gui.py:
--------------------------------------------------------------------------------
1 | from montreal_forced_aligner import config
2 | from montreal_forced_aligner.corpus.acoustic_corpus import AcousticCorpus
3 |
4 |
5 | def test_save_text_lab(basic_corpus_dir, generated_dir, db_setup):
6 | output_directory = generated_dir.joinpath("gui_tests")
7 | config.TEMPORARY_DIRECTORY = output_directory
8 | corpus = AcousticCorpus(
9 | corpus_directory=basic_corpus_dir,
10 | )
11 | corpus._load_corpus()
12 | corpus.get_file(name="acoustic_corpus").save(corpus.corpus_directory)
13 | corpus.cleanup_connections()
14 |
15 |
16 | def test_file_properties(
17 | stereo_corpus_dir,
18 | generated_dir,
19 | db_setup,
20 | ):
21 | output_directory = generated_dir.joinpath("gui_tests")
22 | config.TEMPORARY_DIRECTORY = output_directory
23 | corpus = AcousticCorpus(
24 | corpus_directory=stereo_corpus_dir,
25 | )
26 | corpus._load_corpus()
27 | file = corpus.get_file(name="michaelandsickmichael")
28 | assert file.sound_file.num_channels == 2
29 | assert file.num_speakers == 2
30 | assert file.num_utterances == 7
31 | x, y = file.sound_file.normalized_waveform()
32 | assert y.shape[0] == 2
33 |
34 |
35 | def test_flac_tg(flac_tg_corpus_dir, generated_dir, db_setup):
36 | output_directory = generated_dir.joinpath("gui_tests")
37 | config.TEMPORARY_DIRECTORY = output_directory
38 | corpus = AcousticCorpus(
39 | corpus_directory=flac_tg_corpus_dir,
40 | )
41 | corpus._load_corpus()
42 | corpus.get_file(name="61-70968-0000").save(corpus.corpus_directory)
43 | corpus.cleanup_connections()
44 |
--------------------------------------------------------------------------------
/tests/test_helper.py:
--------------------------------------------------------------------------------
1 | from montreal_forced_aligner.data import CtmInterval
2 | from montreal_forced_aligner.helper import align_phones, load_evaluation_mapping
3 |
4 |
5 | def test_align_phones(basic_corpus_dir, basic_dict_path, temp_dir, eval_mapping_path):
6 | mapping = load_evaluation_mapping(eval_mapping_path)
7 | reference_phoneset = set()
8 | for v in mapping.values():
9 | if isinstance(v, str):
10 | reference_phoneset.add(v)
11 | else:
12 | reference_phoneset.update(v)
13 |
14 | reference_sequence = [
15 | "HH",
16 | "IY0",
17 | "HH",
18 | "AE1",
19 | "D",
20 | "Y",
21 | "ER0",
22 | "G",
23 | "R",
24 | "IY1",
25 | "S",
26 | "IY0",
27 | "S",
28 | "UW1",
29 | "T",
30 | "IH0",
31 | "N",
32 | "D",
33 | "ER1",
34 | "T",
35 | "IY0",
36 | "W",
37 | "AA1",
38 | "SH",
39 | "W",
40 | "AO1",
41 | "T",
42 | "ER0",
43 | "AO1",
44 | "L",
45 | "sil",
46 | "Y",
47 | "IH1",
48 | "R",
49 | ]
50 | reference_sequence = [CtmInterval(i, i + 1, x) for i, x in enumerate(reference_sequence)]
51 | comparison_sequence = [
52 | "ç",
53 | "i",
54 | "h",
55 | "æ",
56 | "d",
57 | "j",
58 | "ɚ",
59 | "ɟ",
60 | "ɹ",
61 | "iː",
62 | "s",
63 | "i",
64 | "s",
65 | "ʉː",
66 | "t",
67 | "sil",
68 | "ɪ",
69 | "n",
70 | "d",
71 | "ɝ",
72 | "ɾ",
73 | "i",
74 | "w",
75 | "ɑː",
76 | "ʃ",
77 | "w",
78 | "ɑː",
79 | "ɾ",
80 | "ɚ",
81 | "ɑː",
82 | "ɫ",
83 | "sil",
84 | "j",
85 | "ɪ",
86 | "ɹ",
87 | ]
88 | comparison_sequence = [CtmInterval(i, i + 1, x) for i, x in enumerate(comparison_sequence)]
89 | score, phone_errors, error_counts = align_phones(
90 | reference_sequence,
91 | comparison_sequence,
92 | silence_phone="sil",
93 | custom_mapping=mapping,
94 | debug=True,
95 | )
96 |
97 | assert score < 1
98 | assert phone_errors < 1
99 |
--------------------------------------------------------------------------------
/tests/test_segmentation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from montreal_forced_aligner.diarization.speaker_diarizer import FOUND_SPEECHBRAIN
4 | from montreal_forced_aligner.vad.segmenter import TranscriptionSegmenter
5 |
6 |
7 | def test_segment_transcript(
8 | basic_corpus_dir,
9 | english_mfa_acoustic_model,
10 | english_us_mfa_reduced_dict,
11 | generated_dir,
12 | temp_dir,
13 | basic_segment_config_path,
14 | db_setup,
15 | ):
16 | if not FOUND_SPEECHBRAIN:
17 | pytest.skip("SpeechBrain not installed")
18 | segmenter = TranscriptionSegmenter(
19 | corpus_directory=basic_corpus_dir,
20 | dictionary_path=english_us_mfa_reduced_dict,
21 | acoustic_model_path=english_mfa_acoustic_model,
22 | speechbrain=True,
23 | en_activation_th=0.4,
24 | en_deactivation_th=0.4,
25 | )
26 | segmenter.setup()
27 | new_utterances = segmenter.segment_transcript(1)
28 | assert len(new_utterances) > 0
29 | segmenter.cleanup()
30 |
--------------------------------------------------------------------------------