├── .SETUPTOOLS_SCM_PRETEND_VERSION ├── .flake8 ├── .git-blame-ignore-revs ├── .github ├── pull_request_template.md ├── pull_request_template │ └── software_pr.md └── workflows │ ├── codeql.yml │ ├── docs.yml │ ├── matrix-tests.yml │ ├── pythonpublish.yml │ └── tests.yml ├── .gitignore ├── .gitlint ├── .pre-commit-config.yaml ├── CITATION.cff ├── Contributing.md ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── Procfile ├── README.md ├── bin └── post_compile ├── docs ├── _static │ ├── abbs.png │ ├── basic.gif │ ├── creator.png │ └── rules.png ├── assets │ └── g2p_update.jpeg ├── cli.md ├── contributing.md ├── index.md ├── installation.md ├── migration-2.md ├── overrides │ └── partials │ │ └── comments.html ├── package.md ├── start.md └── studio.md ├── g2p ├── __init__.py ├── api.py ├── api_v2.py ├── app.py ├── cli.py ├── constants.py ├── exceptions.py ├── log.py ├── mappings │ ├── .schema │ │ ├── g2p-config-schema-2.0.json │ │ ├── g2p-config-schema-2.1.json │ │ └── g2p-config-schema-2.2.json │ ├── __init__.py │ ├── create_fallback_mapping.py │ ├── create_ipa_mapping.py │ ├── langs │ │ ├── __init__.py │ │ ├── alq │ │ │ ├── alq_to_ipa.csv │ │ │ └── config-g2p.yaml │ │ ├── atj │ │ │ ├── README.md │ │ │ ├── atj_ipa_to_eng_ipa.json │ │ │ ├── atj_to_ipa.json │ │ │ └── config-g2p.yaml │ │ ├── ckt │ │ │ ├── README.md │ │ │ ├── ckt_ipa_to_eng_ipa.json │ │ │ ├── ckt_to_ipa.json │ │ │ └── config-g2p.yaml │ │ ├── clc │ │ │ ├── config-g2p.yaml │ │ │ └── doulos.csv │ │ ├── clm │ │ │ ├── clm_equiv.csv │ │ │ ├── clm_to_ipa.csv │ │ │ └── config-g2p.yaml │ │ ├── crg │ │ │ ├── abbreviations.csv │ │ │ ├── config-g2p.yaml │ │ │ ├── crg-dv-to-crg-ipa.csv │ │ │ └── crg-tmd-to-crg-ipa.csv │ │ ├── crj │ │ │ ├── README.md │ │ │ ├── config-g2p.yaml │ │ │ ├── crj_equiv.json │ │ │ ├── crj_ipa_to_eng_ipa.json │ │ │ └── crj_to_ipa.json │ │ ├── crk │ │ │ ├── README.md │ │ │ ├── config-g2p.yaml │ │ │ ├── crk-no-symbols_to_ipa.json │ │ │ └── crk_to_crk-no-symbols.json │ │ ├── crl │ │ │ ├── README.md │ │ │ ├── config-g2p.yaml │ │ │ ├── crl_equiv.json │ │ │ ├── crl_ipa_to_eng_ipa.json │ │ │ └── crl_to_ipa.json │ │ ├── crm │ │ │ ├── README.md │ │ │ ├── config-g2p.yaml │ │ │ ├── crm_equiv.json │ │ │ ├── crm_ipa_to_eng_ipa.json │ │ │ └── crm_to_ipa.json │ │ ├── crx │ │ │ ├── config-g2p.yaml │ │ │ ├── stella_orth_to_syllabics.csv │ │ │ └── stella_syllabics_to_orth.csv │ │ ├── csw │ │ │ ├── README.md │ │ │ ├── config-g2p.yaml │ │ │ ├── csw_equiv.json │ │ │ ├── csw_ipa_to_eng_ipa.json │ │ │ └── csw_to_ipa.json │ │ ├── ctp │ │ │ ├── config-g2p.yaml │ │ │ ├── ctp_ipa_to_eng_ipa.json │ │ │ └── ctp_to_ipa.json │ │ ├── dan │ │ │ ├── config-g2p.yaml │ │ │ ├── dan_abbs.csv │ │ │ ├── dan_to_dummy.json │ │ │ └── dan_to_ipa.csv │ │ ├── eng │ │ │ ├── README.md │ │ │ ├── cmudict.ipa.aligned.txt │ │ │ ├── config-g2p.yaml │ │ │ ├── dummy_to_arpabet.json │ │ │ ├── eng_arpabet_to_ipa.json │ │ │ ├── eng_inventory.json │ │ │ ├── eng_ipa_to_arpabet.json │ │ │ ├── make_alignments.sh │ │ │ ├── make_ipa_cmudict.py │ │ │ └── reverse_json.py │ │ ├── fin │ │ │ ├── config-g2p.yaml │ │ │ └── fin_to_ipa.csv │ │ ├── font-encodings │ │ │ ├── README.md │ │ │ ├── config-g2p.yaml │ │ │ ├── fn_unicode.csv │ │ │ ├── hei_doulos.csv │ │ │ ├── hei_times.csv │ │ │ └── nav_times.csv │ │ ├── fra │ │ │ ├── README.txt │ │ │ ├── config-g2p.yaml │ │ │ ├── fra_abbs.csv │ │ │ └── fra_to_ipa.csv │ │ ├── generated │ │ │ ├── alq-ipa_to_eng-ipa.json │ │ │ ├── atj-ipa_to_eng-ipa.json │ │ │ ├── clm-ipa_to_eng-ipa.json │ │ │ ├── config-g2p.yaml │ │ │ ├── crg-ipa_to_eng-ipa.json │ │ │ ├── crk-ipa_to_eng-ipa.json │ │ │ ├── dan-ipa_to_eng-ipa.json │ │ │ ├── fin-ipa_to_eng-ipa.json │ │ │ ├── fra-ipa_to_eng-ipa.json │ │ │ ├── gla-ipa_to_eng-ipa.json │ │ │ ├── gwi-ipa_to_eng-ipa.json │ │ │ ├── haa-ipa_to_eng-ipa.json │ │ │ ├── ikt-ipa_to_eng-ipa.json │ │ │ ├── ikt-ipa_to_hamming-eng-ipa.json │ │ │ ├── iku-ipa_to_eng-ipa.json │ │ │ ├── iku-ipa_to_hamming-eng-ipa.json │ │ │ ├── iku-sro-ipa_to_eng-ipa.json │ │ │ ├── kwk-ipa_to_eng-ipa.json │ │ │ ├── lml-ipa_to_eng-ipa.json │ │ │ ├── mic-ipa_to_eng-ipa.json │ │ │ ├── moe-ipa_to_eng-ipa.json │ │ │ ├── moh-equiv_to_dummy.json │ │ │ ├── moh-equiv_to_hamming-dummy.json │ │ │ ├── moh-ipa_to_eng-ipa.json │ │ │ ├── moh-ipa_to_hamming-eng-ipa.json │ │ │ ├── oji-ipa_to_eng-ipa.json │ │ │ ├── oka-ipa_to_eng-ipa.json │ │ │ ├── sal-ipa_to_eng-ipa.json │ │ │ ├── see-ipa_to_eng-ipa.json │ │ │ ├── str-equiv_to_dummy.json │ │ │ ├── str-equiv_to_hamming-dummy.json │ │ │ ├── str-ipa_to_eng-ipa.json │ │ │ ├── str-ipa_to_hamming-eng-ipa.json │ │ │ ├── tau-ipa_to_eng-ipa.json │ │ │ ├── tce-ipa_to_eng-ipa.json │ │ │ ├── tli-ipa_to_eng-ipa.json │ │ │ ├── ttm-ipa_to_eng-ipa.json │ │ │ ├── und-ascii_to_dummy.json │ │ │ ├── und-ascii_to_hamming-dummy.json │ │ │ ├── und-ipa_to_hamming-eng-ipa.json │ │ │ └── win-ipa_to_eng-ipa.json │ │ ├── git │ │ │ ├── APA.csv │ │ │ ├── Ortho_variables.csv │ │ │ ├── Orthography.csv │ │ │ ├── Orthography_Deterministic.csv │ │ │ ├── RAPA.csv │ │ │ ├── RAPA_Deterministic.csv │ │ │ ├── README.md │ │ │ ├── config-g2p.yaml │ │ │ ├── equiv.csv │ │ │ ├── git_ipa_to_eng_ipa.json │ │ │ └── git_to_ipa.json │ │ ├── gla │ │ │ ├── README.txt │ │ │ ├── config-g2p.yaml │ │ │ └── gla_to_ipa.json │ │ ├── gwi │ │ │ ├── config-g2p.yaml │ │ │ ├── gwi_equiv.json │ │ │ └── gwi_to_ipa.json │ │ ├── haa │ │ │ ├── README.md │ │ │ ├── config-g2p.yaml │ │ │ ├── haa_abbs.csv │ │ │ ├── haa_equiv.csv │ │ │ └── haa_to_ipa.csv │ │ ├── hur │ │ │ ├── config-g2p.yaml │ │ │ ├── hur_apa_to_hur_orthog.json │ │ │ └── hur_orthog_to_hur_apa.json │ │ ├── ikt │ │ │ ├── README.md │ │ │ ├── config-g2p.yaml │ │ │ └── ikt_to_ipa.json │ │ ├── iku │ │ │ ├── config-g2p.yaml │ │ │ ├── iku_equiv_to_ipa.json │ │ │ ├── iku_sro_to_ipa.json │ │ │ └── iku_to_iku_equiv.json │ │ ├── kkz │ │ │ ├── config-g2p.yaml │ │ │ ├── kkz_ipa_to_eng_ipa.json │ │ │ └── kkz_to_ipa.json │ │ ├── kwk │ │ │ ├── README.md │ │ │ ├── config-g2p.yaml │ │ │ ├── kwk_boas_to_umista.csv │ │ │ ├── kwk_ipa_to_phonemic_ipa.json │ │ │ ├── kwk_napa_to_ipa.csv │ │ │ ├── kwk_napa_to_xsampa.json │ │ │ ├── kwk_umista_to_ipa.json │ │ │ ├── kwk_xsampa_to_eng_ipa.json │ │ │ ├── napa_equiv_ubc.csv │ │ │ ├── napa_equiv_uvic.csv │ │ │ └── umista_equiv.csv │ │ ├── langs.json.gz │ │ ├── lml │ │ │ ├── abbreviations.csv │ │ │ ├── config-g2p.yaml │ │ │ └── lml_to_ipa.csv │ │ ├── mic │ │ │ ├── abbreviations.csv │ │ │ ├── config-g2p.yaml │ │ │ └── mic_to_ipa.json │ │ ├── moe │ │ │ ├── README.md │ │ │ ├── config-g2p.yaml │ │ │ ├── moe_abbs.csv │ │ │ └── moe_to_ipa.json │ │ ├── moh │ │ │ ├── README.md │ │ │ ├── abbreviations.csv │ │ │ ├── config-g2p.yaml │ │ │ ├── moh_equiv.json │ │ │ └── moh_to_ipa.json │ │ ├── network.json.gz │ │ ├── network_lite.py │ │ ├── norm │ │ │ ├── config-g2p.yaml │ │ │ ├── panphon_preprocessor.csv │ │ │ └── tone-map.txt │ │ ├── oji │ │ │ ├── README.md │ │ │ ├── config-g2p.yaml │ │ │ ├── oji_syllabics_to_orth.csv │ │ │ └── oji_to_ipa.csv │ │ ├── oka │ │ │ ├── config-g2p.yaml │ │ │ ├── oka_equiv.csv │ │ │ └── oka_to_ipa.csv │ │ ├── sal │ │ │ ├── config-g2p.yaml │ │ │ ├── sal_apa_to_ipa.csv │ │ │ └── sal_equiv.csv │ │ ├── see │ │ │ ├── config-g2p.yaml │ │ │ └── see_to_ipa.csv │ │ ├── srs │ │ │ ├── config-g2p.yaml │ │ │ ├── srs_ipa_to_eng_ipa.json │ │ │ └── srs_to_ipa.json │ │ ├── str │ │ │ ├── README.md │ │ │ ├── config-g2p.yaml │ │ │ ├── str_equiv.json │ │ │ └── str_to_ipa.json │ │ ├── tau │ │ │ ├── config-g2p.yaml │ │ │ ├── tau_equiv.json │ │ │ └── tau_to_ipa.json │ │ ├── tce │ │ │ ├── README.md │ │ │ ├── config-g2p.yaml │ │ │ ├── tce_equiv.csv │ │ │ └── tce_to_ipa.csv │ │ ├── tgx │ │ │ ├── config-g2p.yaml │ │ │ ├── tgx_ipa_to_eng_ipa.json │ │ │ └── tgx_to_ipa.json │ │ ├── tli │ │ │ ├── README.md │ │ │ ├── config-g2p.yaml │ │ │ ├── tli_equiv.csv │ │ │ └── tli_to_ipa.csv │ │ ├── ttm │ │ │ ├── README.md │ │ │ ├── config-g2p.yaml │ │ │ ├── ttm_equiv.csv │ │ │ └── ttm_to_ipa.csv │ │ ├── und │ │ │ ├── README.md │ │ │ ├── config-g2p.yaml │ │ │ ├── und_ipa_to_eng_ipa.json │ │ │ └── und_to_ipa.json │ │ ├── utils.py │ │ └── win │ │ │ ├── README.md │ │ │ ├── config-g2p.yaml │ │ │ ├── hoocak_alphabet.csv │ │ │ └── win_to_ipa.json │ ├── tokenizer.py │ └── utils.py ├── shared_types.py ├── static │ ├── __init__.py │ ├── assets │ │ └── bonjour.png │ ├── blockly_main.js │ ├── blocks.js │ ├── custom.css │ ├── custom.js │ ├── echart_custom.js │ ├── languages-network.json │ ├── normalize.css │ ├── skeleton.css │ └── swagger.json ├── templates │ └── index.html ├── tests │ ├── .coveragerc │ ├── __init__.py │ ├── public │ │ ├── __init__.py │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── atj.psv │ │ │ ├── clm.csv │ │ │ ├── crg.psv │ │ │ ├── crj.psv │ │ │ ├── crk.psv │ │ │ ├── crl.psv │ │ │ ├── crm.psv │ │ │ ├── csw.psv │ │ │ ├── ctp.csv │ │ │ ├── eng.csv │ │ │ ├── fin.psv │ │ │ ├── fn_unicode.psv │ │ │ ├── fra.psv │ │ │ ├── fra_panagrams.txt │ │ │ ├── fra_panagrams_NFD.txt │ │ │ ├── fra_simple.txt │ │ │ ├── git.psv │ │ │ ├── gwi.psv │ │ │ ├── haa.csv │ │ │ ├── hur.psv │ │ │ ├── ikt.psv │ │ │ ├── iku-sro.psv │ │ │ ├── iku.psv │ │ │ ├── kwk.psv │ │ │ ├── lml.psv │ │ │ ├── mic.psv │ │ │ ├── moe.psv │ │ │ ├── moh.psv │ │ │ ├── oji-syl.psv │ │ │ ├── oji.tsv │ │ │ ├── oka.csv │ │ │ ├── sal-arpabet.tsv │ │ │ ├── sal.tsv │ │ │ ├── srs.psv │ │ │ ├── str.tsv │ │ │ ├── str_un_human_rights.txt │ │ │ ├── tau.psv │ │ │ ├── tce.csv │ │ │ ├── tli.csv │ │ │ ├── ttm.csv │ │ │ └── win.csv │ │ ├── git_to_ipa.json │ │ ├── mappings │ │ │ ├── .gitignore │ │ │ ├── abbreviation_config-g2p.yaml │ │ │ ├── abbreviation_mapping.csv │ │ │ ├── abbreviations.csv │ │ │ ├── abbreviations.json │ │ │ ├── abbreviations.psv │ │ │ ├── abbreviations.substring.csv │ │ │ ├── abbreviations.tsv │ │ │ ├── bad_langs │ │ │ │ └── lang1 │ │ │ │ │ ├── config-g2p.yaml │ │ │ │ │ └── minimal.csv │ │ │ ├── bad_langs2 │ │ │ │ └── lang1 │ │ │ │ │ ├── config-g2p.yaml │ │ │ │ │ └── minimal.csv │ │ │ ├── bad_lexicon_config-g2p.yaml │ │ │ ├── case-feed │ │ │ │ ├── README.md │ │ │ │ ├── cf-in-lc-to-cf-out-uc.csv │ │ │ │ ├── config-g2p.yaml │ │ │ │ └── empty.csv │ │ │ ├── compose.yaml │ │ │ ├── compose1-2.csv │ │ │ ├── compose2-3.csv │ │ │ ├── deletion.csv │ │ │ ├── deletion.json │ │ │ ├── deletion_config_csv.yaml │ │ │ ├── deletion_config_json.yaml │ │ │ ├── g2p_studio.csv │ │ │ ├── g2p_studio2.csv │ │ │ ├── gen-map-1.csv │ │ │ ├── gen-map-2.csv │ │ │ ├── gen-map-3a.csv │ │ │ ├── gen-map-3b.csv │ │ │ ├── gen-map_config-g2p.yaml │ │ │ ├── gm1-ipa_to_gm2-ipa.json │ │ │ ├── gm2-ipa_to_gm3-ipa.json │ │ │ ├── gm3-ipa_to_gm2-ipa.json │ │ │ ├── hello.aligned.txt │ │ │ ├── lexicon_config-g2p.yaml │ │ │ ├── malformed_config-g2p.yaml │ │ │ ├── minimal.csv │ │ │ ├── minimal.json │ │ │ ├── minimal.psv │ │ │ ├── minimal.tsv │ │ │ ├── minimal.xlsx │ │ │ ├── minimal_config-g2p.yaml │ │ │ ├── minimal_configs.yaml │ │ │ ├── no_escape.csv │ │ │ ├── no_mappings_key.yaml │ │ │ ├── nofeed-indices.csv │ │ │ ├── nofeed-indices.yaml │ │ │ ├── null.csv │ │ │ ├── null_config-g2p.yaml │ │ │ ├── rule-ordering.yaml │ │ │ ├── test.yaml │ │ │ ├── test_to_ipa.csv │ │ │ ├── tokenize_punct.csv │ │ │ └── tokenize_punct_config-g2p.yaml │ │ └── sample_response.json │ ├── run.py │ ├── test_api_resources.py │ ├── test_api_v2.py │ ├── test_check_ipa_arpabet.py │ ├── test_cli.py │ ├── test_create_mapping.py │ ├── test_doctor.py │ ├── test_doctor_expensive.py │ ├── test_fallback.py │ ├── test_indices.py │ ├── test_langs.py │ ├── test_lexicon_transducer.py │ ├── test_mappings.py │ ├── test_network.py │ ├── test_studio.py │ ├── test_tokenize_and_map.py │ ├── test_tokenizer.py │ ├── test_transducer.py │ ├── test_unidecode_transducer.py │ ├── test_utils.py │ ├── test_z_local_config.py │ └── time_panphon.py └── transducer │ └── __init__.py ├── mkdocs.yml ├── pyproject.toml ├── readme-heroku.md ├── requirements.txt ├── run_studio.py ├── run_tests.py └── runtime.txt /.SETUPTOOLS_SCM_PRETEND_VERSION: -------------------------------------------------------------------------------- 1 | 2.2 2 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 88 3 | extend-ignore = E203,E501,E704 4 | -------------------------------------------------------------------------------- /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | 9c26df474a70543588e4f8b3ce7d56a36f475da4 2 | cd3165733bbb5cd21b28aa2a3115cc13887dfa90 3 | 9b156b87b63f6ff3c337c3e82ec1ecc45a2af03a 4 | d6ae834863d309cd05096d32c7237eea35e21615 5 | ce0a4b1b2aca9c3e3dcb09dc473c44e9014cc103 6 | 1fa3d9d34b4087c44047df64fb0f936db73cb09f 7 | 4a982e6155dc51d17c80fb54b1f66c9f1d5affb1 8 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | Pull request template for adding a new language 2 | ----------------------------------------------- 3 | 4 | 9 | 10 | * **Please check if the PR fulfills these requirements** 11 | - [ ] Mapping files are added in `g2p/mappings/langs` 12 | - [ ] Mapping is either added to an existing folder or a new folder has been added 13 | - [ ] Language folder and files use appropriate [ISO 639-3 codes](https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes) 14 | - [ ] `config-g2p.yaml` file includes all author names, and settings necessary 15 | - [ ] Please add some test data in `g2p/tests/public/data`. The added file should be a csv/tsv/psv file and each row should have the format `[input_mapping_code,output_mapping_code,input_string,output_string]` 16 | - [ ] As the last step, G2P has been updated by running `g2p update` locally and committing the change 17 | - [ ] You agree to license your contribution under the same license as this project (see [LICENSE](https://github.com/roedoejet/g2p/blob/main/LICENSE) file). 18 | 19 | * **Other information**: 20 | -------------------------------------------------------------------------------- /.github/pull_request_template/software_pr.md: -------------------------------------------------------------------------------- 1 | 3 | 4 | ### PR Goal? 5 | 6 | 7 | 8 | ### Fixes? 9 | 10 | 11 | 12 | ### Feedback sought? 13 | 14 | 15 | 16 | ### Priority? 17 | 18 | 19 | 20 | ### Tests added? 21 | 22 | 23 | 24 | ### How to test? 25 | 26 | 27 | 28 | ### Confidence? 29 | 30 | 31 | 32 | ### Version change? 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Deploy docs 2 | on: 3 | push: 4 | branches: 5 | - main 6 | jobs: 7 | docs: 8 | # Create latest docs 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v4 12 | with: 13 | fetch-depth: 0 # fetch all commits/branches 14 | - name: Set up Python 15 | uses: actions/setup-python@v5 16 | with: 17 | python-version: "3.8" 18 | - name: Install dependencies 19 | run: | 20 | python -m pip install --upgrade pip 21 | pip install -e .[docs] 22 | - name: Setup doc deploy 23 | run: | 24 | git config user.name 'github-actions[bot]' 25 | git config user.email 'github-actions[bot]@users.noreply.github.com' 26 | - name: Deploy docs with mike 🚀 27 | run: | 28 | mike deploy --push --update-aliases dev latest 29 | -------------------------------------------------------------------------------- /.github/workflows/matrix-tests.yml: -------------------------------------------------------------------------------- 1 | name: Run full matrix Tests before Releases 2 | on: 3 | workflow_call: 4 | workflow_dispatch: 5 | push: 6 | branches: main 7 | env: 8 | SETUPTOOLS_SCM_PRETEND_VERSION: "2.1" 9 | jobs: 10 | pre-release-matrix-test: 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | os: [ubuntu-22.04, windows-latest, macos-latest] 15 | python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] 16 | exclude: 17 | - os: macos-latest 18 | python-version: "3.7" 19 | runs-on: ${{ matrix.os }} 20 | steps: 21 | - uses: actions/checkout@v4 22 | - name: Set up Python 23 | uses: actions/setup-python@v5 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | shell: bash 28 | run: | 29 | python -m pip install --upgrade pip 30 | SETUPTOOLS_SCM_PRETEND_VERSION=`cat .SETUPTOOLS_SCM_PRETEND_VERSION` pip install -e .[test] 31 | - name: Run tests 32 | run: python run_tests.py dev 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled python modules. 2 | *.pyc 3 | 4 | # Setuptools distribution folder. 5 | /dist/ 6 | build 7 | venv 8 | 9 | # Ignore private test data 10 | g2p/tests/private 11 | 12 | # Python egg metadata, regenerated from source files by setuptools. 13 | /*.egg-info 14 | 15 | .DS_Store 16 | .vscode 17 | .python-version 18 | 19 | *.log 20 | log.txt 21 | 22 | .coverage 23 | htmlcov 24 | 25 | flask_session 26 | 27 | # Sphinx documentation 28 | docs/_build/ 29 | 30 | # coverage annotation output 31 | *,cover 32 | 33 | # vim temp files 34 | *~ 35 | 36 | # mkdocs build 37 | site 38 | g2p/_version.py 39 | -------------------------------------------------------------------------------- /.gitlint: -------------------------------------------------------------------------------- 1 | [general] 2 | # Enable conventional commit linting 3 | contrib=contrib-title-conventional-commits 4 | 5 | # Ignore any data sent to gitlint via stdin (helpful on Windows) 6 | ignore-stdin=true 7 | 8 | # We don't require a body, just a title, even though a body is also a good idea 9 | ignore=body-is-missing,body-min-length 10 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.4.0 4 | hooks: 5 | - id: check-yaml 6 | - id: check-json 7 | - id: end-of-file-fixer 8 | - id: trailing-whitespace 9 | exclude: \.svg$ 10 | - repo: https://github.com/PyCQA/isort 11 | rev: 5.13.2 12 | hooks: 13 | - id: isort 14 | args: [--profile=black] 15 | - repo: https://github.com/psf/black 16 | rev: 24.8.0 17 | hooks: 18 | - id: black 19 | - repo: https://github.com/pycqa/flake8 20 | rev: 7.1.1 21 | hooks: 22 | - id: flake8 23 | - repo: https://github.com/pre-commit/mirrors-mypy 24 | rev: v1.13.0 25 | hooks: 26 | - id: mypy 27 | additional_dependencies: 28 | [pydantic, types-requests, types-python-slugify, types-PyYAML] 29 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Base Image 2 | FROM debian:latest 3 | 4 | ENV DEBIAN_FRONTEND=noninteractive 5 | 6 | # Dependencies that don't change with g2p updates and can be cached, and make it lean 7 | RUN apt-get update -y \ 8 | && apt-get install -y \ 9 | apt-transport-https \ 10 | libffi-dev \ 11 | openssl \ 12 | libssl-dev \ 13 | python3 \ 14 | python3-pip \ 15 | python3-dev \ 16 | python3-venv \ 17 | build-essential \ 18 | nano \ 19 | git \ 20 | && apt-get clean \ 21 | && apt-get autoremove \ 22 | && rm -fr /var/lib/apt/lists/* 23 | 24 | # Create a venv to install packages locally 25 | RUN python3 -m venv --system-site-packages /g2p/venv 26 | 27 | # Get g2p-specific dependencies that can also often be cached 28 | RUN mkdir -p /g2p/g2p 29 | COPY requirements.txt /g2p 30 | COPY pyproject.toml /g2p 31 | RUN . /g2p/venv/bin/activate \ 32 | && python3 -m pip install --upgrade pip \ 33 | && MAKEFLAGS="-j$(nproc)" pip3 install -r /g2p/requirements.txt 34 | 35 | # Install g2p itself, last 36 | COPY . /g2p/ 37 | COPY README.md /g2p 38 | COPY Dockerfile /g2p 39 | RUN . /g2p/venv/bin/activate \ 40 | && pip3 install -e /g2p 41 | 42 | # Comment this out if you just want to install g2p in the container without running the studio. 43 | SHELL ["/bin/sh", "-c"] 44 | CMD gunicorn --worker-class uvicorn.workers.UvicornWorker -w 1 g2p.app:APP --bind 0.0.0.0:8000 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019-2021 Aidan Pine, David Huggins-Daines, National Research Council Canada (NRC) 4 | Individual mappings in g2p/mappings/langs contributed by others are Copyright 5 | their contributors. 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include g2p/mappings/langs * 2 | recursive-include g2p/tests/public * 3 | include g2p/templates/* 4 | include g2p/static/*.css 5 | include g2p/static/*.js 6 | include g2p/static/*.json 7 | include g2p/static/swagger-ui/* 8 | include MANIFEST.in 9 | exclude .gitignore 10 | recursive-exclude * *.py[co] 11 | recursive-exclude * *~ 12 | recursive-exclude * *.orig 13 | recursive-exclude * *.DS_Store 14 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: gunicorn --worker-class uvicorn.workers.UvicornWorker -w 1 g2p.app:APP 2 | -------------------------------------------------------------------------------- /bin/post_compile: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Heroku has no understanding of dynamic versioning so we have to hack this to make things work 4 | export SETUPTOOLS_SCM_PRETEND_VERSION=`cat .SETUPTOOLS_SCM_PRETEND_VERSION` 5 | # Also it expects to run your app in place, but doesn't actually do this for you because it's old 6 | pip install -e . 7 | -------------------------------------------------------------------------------- /docs/_static/abbs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/docs/_static/abbs.png -------------------------------------------------------------------------------- /docs/_static/basic.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/docs/_static/basic.gif -------------------------------------------------------------------------------- /docs/_static/creator.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/docs/_static/creator.png -------------------------------------------------------------------------------- /docs/_static/rules.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/docs/_static/rules.png -------------------------------------------------------------------------------- /docs/assets/g2p_update.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/docs/assets/g2p_update.jpeg -------------------------------------------------------------------------------- /docs/cli.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | --- 4 | 5 | # Command line interface 6 | 7 | There is a command line interface bundled with g2p that allows both basic and advanced functionality to be accessed from your command line. After [installing g2p](./installation.md), you can get information about how to use the command line by running `g2p --help` 8 | 9 | 10 | ::: mkdocs-click 11 | :module: g2p.cli 12 | :command: cli 13 | :prog_name: g2p 14 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | --- 4 | 5 | # Contributing 6 | 7 | Feel free to dive in! [Open an issue](https://github.com/roedoejet/g2p/issues/new) or submit PRs. 8 | 9 | This repo follows the [Contributor Covenant](http://contributor-covenant.org/version/1/3/0/) Code of Conduct. 10 | 11 | ## Adding a new language/mapping 12 | 13 | In order to add a new mapping or language to be supported, please fill out a pull request with the [pull request template](https://github.com/roedoejet/g2p/blob/main/.github/pull_request_template.md) provided. 14 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Welcome to the G2P documentation! 2 | 3 | G2P is a tool for doing rule-based conversions for text. 4 | 5 | This website has the technical documentation for G2P, but we've also written a [7-part blog series](https://blog.mothertongues.org/g2p-background/) if you want a more thorough introduction to why G2P exists and what you can use it for. 6 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | --- 4 | 5 | # Installation 6 | 7 | You can either install `g2p` with pip from PyPi: 8 | 9 | ```bash 10 | pip install g2p 11 | ``` 12 | 13 | Or by cloning and installing from source: 14 | 15 | ```bash 16 | git clone https://github.com/roedoejet/g2p.git 17 | 18 | cd g2p 19 | 20 | pip install -e . 21 | ``` 22 | 23 | To install in an isolated environment (recommended for development) 24 | you may use [hatch](https://hatch.pypa.io/latest/): 25 | 26 | ``` 27 | hatch shell 28 | ``` 29 | -------------------------------------------------------------------------------- /docs/overrides/partials/comments.html: -------------------------------------------------------------------------------- 1 | {% if page.meta.comments %} 2 |

{{ lang.t("meta.comments") }}

3 | 4 | 7 | 8 | 41 | {% endif %} 42 | -------------------------------------------------------------------------------- /docs/package.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | --- 4 | 5 | # Python package 6 | 7 | ## `make_g2p` 8 | 9 | The easiest way to create a transducer programmatically is to use the `g2p.make_g2p` function. 10 | 11 | To use it, first import the function: 12 | 13 | ```python 14 | from g2p import make_g2p 15 | ``` 16 | 17 | Then, call it with an argument for `in_lang` and `out_lang`. Both must be strings equal to the name of a particular mapping. 18 | 19 | ```python 20 | >>> transducer = make_g2p("dan", "eng-arpabet") 21 | >>> transducer("hej").output_string 22 | 'HH EH Y' 23 | ``` 24 | 25 | There must be a valid path between the `in_lang` and `out_lang` in order for this to work. If you've edited a mapping or added a custom mapping, you must update g2p to include it: `g2p update` 26 | 27 | ## `make_tokenizer` 28 | 29 | Basic usage for the language-aware tokenizer: 30 | 31 | ```python 32 | from g2p import make_tokenizer 33 | tokenizer = make_tokenizer("dan") 34 | for token in tokenizer.tokenize_text("Åh, hvordan har du det, Åbenrå?"): 35 | if token.is_word 36 | word = token.text 37 | else: 38 | interword_punctuation_and_spaces = token.text 39 | ``` 40 | 41 | Note that selecting the tokenizer language is important to make sure punctuation-like letters are handled correctly. For example `:` and `'` are punctuation in English but they will be part of the word tokens in Kanien'kéha (moh): 42 | 43 | ```python 44 | >>> list(make_tokenizer("moh").tokenize_text("Kanien'kéha")) 45 | [{'text': "Kanien'kéha", 'is_word': True}] 46 | >>> list(make_tokenizer("eng").tokenize_text("Kanien'kéha")) 47 | [{'text': 'Kanien', 'is_word': True}, {'text': "'", 'is_word': False}, {'text': 'kéha', 'is_word': True}] 48 | ``` 49 | 50 | ## A look under the hood 51 | 52 | A Mapping object is a list of defined rules. A `Rule` has the following permitted fields: 53 | 54 | ::: g2p.mappings.Rule 55 | options: 56 | show_root_heading: true 57 | show_source: false 58 | heading_level: 3 59 | members_order: source 60 | -------------------------------------------------------------------------------- /docs/start.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | --- 4 | 5 | # Getting Started 6 | 7 | ## Overview 8 | 9 | ### What is G2P? 10 | 11 | The initial version of this package was developed by [Patrick Littell](https://github.com/littell) and was developed in order to allow for g2p from community orthographies to IPA and back again in [ReadAlong-Studio](https://github.com/ReadAlongs/Studio). We decided to then pull out the g2p mechanism from [Convertextract](https://github.com/roedoejet/convertextract) which allows transducer relations to be declared in CSV files, and some g2p functionality from ReadAlong-Studio, and merge them into a stand-alone g2p library - here it is! 12 | 13 | This website has the technical documentation for G2P, but we've also written a [7-part blog series](https://blog.mothertongues.org/g2p-background/) if you want a more thorough introduction to why G2P exists and what you can use it for. 14 | -------------------------------------------------------------------------------- /g2p/constants.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file is for constants that can be initialized without any (expensive) dependencies. 3 | """ 4 | 5 | import os 6 | 7 | DISTANCE_METRICS = [ 8 | "weighted_feature_edit_distance", 9 | "hamming_feature_edit_distance", 10 | "feature_edit_distance", 11 | "dolgo_prime_distance", 12 | "fast_levenshtein_distance", 13 | "levenshtein_distance", 14 | ] 15 | 16 | LANGS_DIR = os.path.join(os.path.dirname(__file__), "mappings", "langs") 17 | LANGS_FILE_NAME = "langs.json.gz" 18 | NETWORK_FILE_NAME = "network.json.gz" 19 | -------------------------------------------------------------------------------- /g2p/log.py: -------------------------------------------------------------------------------- 1 | """ 2 | Error Log 3 | 4 | """ 5 | 6 | import logging 7 | import os 8 | 9 | import coloredlogs # type: ignore 10 | 11 | FIELD_STYLES = dict( 12 | levelname=dict(color="green"), 13 | ) 14 | 15 | 16 | def setup_logger(name): 17 | """Create logger and configure with cool colors!""" 18 | 19 | logger = logging.getLogger(name) 20 | coloredlogs.install( 21 | level=os.environ.get("G2P_LOGLEVEL", "INFO").upper(), 22 | fmt="%(levelname)s - %(message)s", 23 | logger=logger, 24 | field_styles=FIELD_STYLES, 25 | ) 26 | return logger 27 | 28 | 29 | LOGGER = setup_logger("root") 30 | -------------------------------------------------------------------------------- /g2p/mappings/create_fallback_mapping.py: -------------------------------------------------------------------------------- 1 | import datetime as dt 2 | 3 | from text_unidecode import unidecode # type: ignore 4 | 5 | from g2p import make_g2p 6 | from g2p.log import LOGGER 7 | from g2p.mappings import Mapping 8 | from g2p.mappings.create_ipa_mapping import align_inventories 9 | from g2p.mappings.utils import is_ipa, unicode_escape 10 | 11 | DUMMY_INVENTORY = ["ɑ", "i", "u", "t", "s", "n"] 12 | 13 | 14 | def align_to_dummy_fallback( 15 | mapping: Mapping, 16 | io: str = "in", 17 | distance: str = "weighted_feature_edit_distance", 18 | quiet=False, 19 | ): 20 | """Create a mapping from mapping's output inventory to a minimalist dummy inventory""" 21 | mapping_config = mapping.model_dump() 22 | config = { 23 | "in_lang": mapping_config[f"{io}_lang"], 24 | "out_lang": "dummy", 25 | "authors": [f"Generated {dt.datetime.now()}"], 26 | } 27 | default_char = "t" 28 | if is_ipa(mapping_config[f"{io}_lang"]): 29 | list_of_rules = align_inventories( 30 | mapping.inventory(io), DUMMY_INVENTORY, distance=distance, quiet=quiet 31 | ) 32 | else: 33 | und_g2p = make_g2p("und", "und-ipa", tokenize=False) 34 | list_of_rules = [ 35 | { 36 | "in": unicode_escape(x), 37 | "out": und_g2p(unidecode(x).lower()).output_string, 38 | } 39 | for x in mapping.inventory(io) 40 | ] 41 | dummy_list = align_inventories( 42 | [x["out"] for x in list_of_rules], 43 | DUMMY_INVENTORY, 44 | distance=distance, 45 | quiet=quiet, 46 | ) 47 | dummy_dict = {} 48 | for x in dummy_list: 49 | if x["in"]: 50 | dummy_dict[x["in"]] = x["out"] 51 | 52 | for x in list_of_rules: 53 | try: 54 | x["out"] = dummy_dict[x["out"]] 55 | except KeyError: 56 | LOGGER.warning( 57 | f"We couldn't guess at what {x['in']} means, so it's being " 58 | f"replaced with '{default_char}' instead." 59 | ) 60 | x["out"] = default_char 61 | 62 | config["rules"] = list_of_rules 63 | return Mapping(**config) 64 | -------------------------------------------------------------------------------- /g2p/mappings/langs/alq/alq_to_ipa.csv: -------------------------------------------------------------------------------- 1 | j,ʒ,, 2 | ô,u,, 3 | o,u,, 4 | ò,o:,, 5 | i,ɪ 6 | ɪ̀,i:,, 7 | à,ɑ,, 8 | sh,ʃ,, 9 | ch,tʃ,, 10 | ng,ŋ,,\b 11 | g,ʒ,d, 12 | g,ɡ 13 | e,ɛ,,n 14 | e,e:,, 15 | ù,o,, 16 | a,ʌ 17 | y,j 18 | -------------------------------------------------------------------------------- /g2p/mappings/langs/alq/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Algonquin 3 | mappings: 4 | - display_name: Algonquin to IPA 5 | in_lang: alq 6 | out_lang: alq-ipa 7 | authors: 8 | - Eric Joanis 9 | type: mapping 10 | rules_path: alq_to_ipa.csv 11 | rule_ordering: as-written 12 | case_sensitive: false 13 | norm_form: 'NFD' 14 | <<: *shared 15 | -------------------------------------------------------------------------------- /g2p/mappings/langs/atj/README.md: -------------------------------------------------------------------------------- 1 | "c", not ʒ because that is rare in English" 2 | "tc" is often pronounced t͡s 3 | -------------------------------------------------------------------------------- /g2p/mappings/langs/atj/atj_ipa_to_eng_ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "in": "a", 4 | "out": "ɑ" 5 | }, 6 | { 7 | "in": "aː", 8 | "out": "ɑ" 9 | }, 10 | { 11 | "in": "b", 12 | "out": "b" 13 | }, 14 | { 15 | "in": "d", 16 | "out": "d" 17 | }, 18 | { 19 | "in": "d͡ʒ", 20 | "out": "dʒ" 21 | }, 22 | { 23 | "in": "eː", 24 | "out": "eː" 25 | }, 26 | { 27 | "in": "h", 28 | "out": "h" 29 | }, 30 | { 31 | "in": "i", 32 | "out": "i" 33 | }, 34 | { 35 | "in": "iː", 36 | "out": "i" 37 | }, 38 | { 39 | "in": "m", 40 | "out": "m" 41 | }, 42 | { 43 | "in": "n", 44 | "out": "n" 45 | }, 46 | { 47 | "in": "r", 48 | "out": "ɾ" 49 | }, 50 | { 51 | "in": "s", 52 | "out": "s" 53 | }, 54 | { 55 | "in": "u", 56 | "out": "u" 57 | }, 58 | { 59 | "in": "uː", 60 | "out": "u" 61 | }, 62 | { 63 | "in": "w", 64 | "out": "w" 65 | }, 66 | { 67 | "in": "ɡ", 68 | "out": "ɡ" 69 | }, 70 | { 71 | "in": "ʃ", 72 | "out": "ʃ" 73 | } 74 | ] 75 | -------------------------------------------------------------------------------- /g2p/mappings/langs/atj/atj_to_ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | { "in": "p", "out": "b" }, 3 | { "in": "t", "out": "d" }, 4 | { "in": "k", "out": "ɡ" }, 5 | { "in": "s", "out": "s" }, 6 | { "in": "c", "out": "ʃ" }, 7 | { "in": "tc", "out": "d͡ʒ" }, 8 | { "in": "m", "out": "m" }, 9 | { "in": "n", "out": "n" }, 10 | { "in": "r", "out": "r" }, 11 | { "in": "h", "out": "h" }, 12 | { "in": "w", "out": "w" }, 13 | { "in": "a", "out": "a" }, 14 | { "in": "e", "out": "eː" }, 15 | { "in": "i", "out": "i" }, 16 | { "in": "o", "out": "u" }, 17 | { "in": "â", "out": "aː" }, 18 | { "in": "ê", "out": "eː" }, 19 | { "in": "î", "out": "iː" }, 20 | { "in": "ô", "out": "uː" } 21 | ] 22 | -------------------------------------------------------------------------------- /g2p/mappings/langs/atj/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Atikamekw 3 | mappings: 4 | - display_name: Atikamekw to IPA 5 | in_lang: atj 6 | out_lang: atj-ipa 7 | type: mapping 8 | case_sensitive: false 9 | authors: 10 | - David Huggins-Daines 11 | - Patrick Littell 12 | rules_path: atj_to_ipa.json 13 | rule_ordering: apply-longest-first 14 | <<: *shared 15 | - display_name: Atikamekw IPA to English IPA 16 | in_lang: atj-ipa 17 | out_lang: eng-ipa 18 | type: mapping 19 | rules_path: atj_ipa_to_eng_ipa.json 20 | case_sensitive: false 21 | <<: *shared 22 | -------------------------------------------------------------------------------- /g2p/mappings/langs/ckt/README.md: -------------------------------------------------------------------------------- 1 | Language-specific files for Chukchi 2 | -------------------------------------------------------------------------------- /g2p/mappings/langs/ckt/ckt_ipa_to_eng_ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "out": "ɑ", 4 | "in": "a" 5 | }, 6 | { 7 | "out": "eː", 8 | "in": "e" 9 | }, 10 | { 11 | "out": "i", 12 | "in": "i" 13 | }, 14 | { 15 | "out": "j", 16 | "in": "j" 17 | }, 18 | { 19 | "out": "k", 20 | "in": "k" 21 | }, 22 | { 23 | "out": "m", 24 | "in": "m" 25 | }, 26 | { 27 | "out": "n", 28 | "in": "n" 29 | }, 30 | { 31 | "out": "oː", 32 | "in": "o" 33 | }, 34 | { 35 | "out": "p", 36 | "in": "p" 37 | }, 38 | { 39 | "out": "k", 40 | "in": "q" 41 | }, 42 | { 43 | "out": "s", 44 | "in": "s" 45 | }, 46 | { 47 | "out": "t", 48 | "in": "t" 49 | }, 50 | { 51 | "out": "u", 52 | "in": "u" 53 | }, 54 | { 55 | "out": "w", 56 | "in": "w" 57 | }, 58 | { 59 | "out": "ŋ", 60 | "in": "ŋ" 61 | }, 62 | { 63 | "out": "ə", 64 | "in": "ə" 65 | }, 66 | { 67 | "out": "ɡ", 68 | "in": "ɣ" 69 | }, 70 | { 71 | "out": "s", 72 | "in": "ɬ" 73 | }, 74 | { 75 | "out": "ɾ", 76 | "in": "ɾ" 77 | }, 78 | { 79 | "out": "ʔ", 80 | "in": "ʔ" 81 | } 82 | ] 83 | -------------------------------------------------------------------------------- /g2p/mappings/langs/ckt/ckt_to_ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "in": "ʼ", 4 | "out": "ʔ" 5 | }, 6 | { 7 | "in": "а", 8 | "out": "a" 9 | }, 10 | { 11 | "in": "в", 12 | "out": "w" 13 | }, 14 | { 15 | "in": "г", 16 | "out": "ɣ" 17 | }, 18 | { 19 | "in": "е", 20 | "out": "e" 21 | }, 22 | { 23 | "in": "и", 24 | "out": "i" 25 | }, 26 | { 27 | "in": "й", 28 | "out": "j" 29 | }, 30 | { 31 | "in": "к", 32 | "out": "k" 33 | }, 34 | { 35 | "in": "м", 36 | "out": "m" 37 | }, 38 | { 39 | "in": "н", 40 | "out": "n" 41 | }, 42 | { 43 | "in": "о", 44 | "out": "o" 45 | }, 46 | { 47 | "in": "п", 48 | "out": "p" 49 | }, 50 | { 51 | "in": "р", 52 | "out": "ɾ" 53 | }, 54 | { 55 | "in": "с", 56 | "out": "s" 57 | }, 58 | { 59 | "in": "т", 60 | "out": "t" 61 | }, 62 | { 63 | "in": "у", 64 | "out": "u" 65 | }, 66 | { 67 | "in": "ъ", 68 | "out": "ʔ" 69 | }, 70 | { 71 | "in": "ы", 72 | "out": "ə" 73 | }, 74 | { 75 | "in": "ь", 76 | "out": "ʔ" 77 | }, 78 | { 79 | "in": "э", 80 | "out": "e" 81 | }, 82 | { 83 | "in": "ю", 84 | "out": "u" 85 | }, 86 | { 87 | "in": "я", 88 | "out": "a" 89 | }, 90 | { 91 | "in": "ё", 92 | "out": "o" 93 | }, 94 | { 95 | "in": "ӄ", 96 | "out": "q" 97 | }, 98 | { 99 | "in": "ӈ", 100 | "out": "ŋ" 101 | }, 102 | { 103 | "in": "ԓ", 104 | "out": "ɬ" 105 | } 106 | ] 107 | -------------------------------------------------------------------------------- /g2p/mappings/langs/ckt/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Chukchi 3 | mappings: 4 | - display_name: Chukchi to IPA 5 | in_lang: ckt 6 | out_lang: ckt-ipa 7 | type: mapping 8 | authors: 9 | - Vasilisa Andrianets 10 | - Patrick Littell 11 | rules_path: ckt_to_ipa.json 12 | <<: *shared 13 | - display_name: Chukchi IPA to English IPA 14 | in_lang: ckt-ipa 15 | out_lang: eng-ipa 16 | type: mapping 17 | rules_path: ckt_ipa_to_eng_ipa.json 18 | <<: *shared 19 | -------------------------------------------------------------------------------- /g2p/mappings/langs/clc/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Tsilhqot'in 3 | mappings: 4 | - display_name: Doulos 5 | rules_path: doulos.csv 6 | in_lang: clc-doulos 7 | out_lang: clc 8 | <<: *shared 9 | -------------------------------------------------------------------------------- /g2p/mappings/langs/clc/doulos.csv: -------------------------------------------------------------------------------- 1 | @,ŝ 2 | #,ŵ 3 | \^,ẑ 4 | &,ɨ 5 | /,ʔ 6 | -------------------------------------------------------------------------------- /g2p/mappings/langs/clm/clm_equiv.csv: -------------------------------------------------------------------------------- 1 | ’,̕ 2 | ‘,̕ 3 | ˊ,̕ 4 | `,̕ 5 | ́,̕ 6 | ̒,̕ 7 | ̓,̕ 8 | ̔,̕ 9 | ̕,̕ 10 | ̛,̕ 11 | ʻ,̕ 12 | ʼ,̕ 13 | ʽ,̕ 14 | ʹ,̕ 15 | ',̕ 16 | -------------------------------------------------------------------------------- /g2p/mappings/langs/clm/clm_to_ipa.csv: -------------------------------------------------------------------------------- 1 | u̕,ˈɔ,,ʔ|h|y̕ 2 | u,ɔ,,ʔ|h|y̕ 3 | ə̕,ˈʌ 4 | ə,ɨ,č|š|č̕,č|š 5 | ə,ʊ,,[kqx]\u0323?\u0315?\u030C?ʷ 6 | ə,ə 7 | ʔ,ʔ 8 | a̕,ˈa 9 | a,a 10 | č̕,tʃʼ 11 | c̕,tsʼ 12 | č,tʃ 13 | c,ts 14 | e̕,ˈɛ 15 | e,ɛ 16 | h,h 17 | i̕,ˈi 18 | i,i 19 | k̕ʷ,kʼʷ 20 | kʷ,kʷ 21 | k,k 22 | l,l 23 | ɬ,ɬ 24 | ƛ̕,tɬʼ 25 | m̕,m̰ 26 | m,m 27 | n̕,n̰ 28 | n,n 29 | ŋ̕,ɴ̰ 30 | ŋ,ɴ 31 | p̕,pʼ 32 | p,p 33 | q̕ʷ,qʼʷ 34 | qʷ,qʷ 35 | q̕,qʼ 36 | q,q 37 | š,ʃ 38 | s,s 39 | t̕,tʼ 40 | t,t 41 | u̕,ˈu 42 | u,u 43 | w̕,w̰ 44 | w,w 45 | x̌ʷ,χʷ 46 | x̌,χ 47 | x̣ʷ,χʷ 48 | x̣,χ 49 | xʷ,xʷ 50 | y̕,j̰ 51 | y,j 52 | -------------------------------------------------------------------------------- /g2p/mappings/langs/clm/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Klallam 3 | mappings: 4 | - display_name: Klallam to IPA 5 | rules_path: clm_to_ipa.csv 6 | in_lang: clm-equiv 7 | out_lang: clm-ipa 8 | authors: 9 | - Eric Joanis (coding) 10 | - Timothy Montler (linguistic data) 11 | type: mapping 12 | prevent_feeding: true 13 | rule_ordering: as-written 14 | norm_form: NFD 15 | <<: *shared 16 | - display_name: Unicode Equivalencies 17 | in_lang: clm 18 | out_lang: clm-equiv 19 | authors: 20 | - Eric Joanis 21 | rules_path: clm_equiv.csv 22 | norm_form: NFD 23 | <<: *shared 24 | -------------------------------------------------------------------------------- /g2p/mappings/langs/crg/abbreviations.csv: -------------------------------------------------------------------------------- 1 | VOWEL,i,a,e,u,o,ɑː,æ,ɛ,eː,ɪ,iː,oː,uː 2 | -------------------------------------------------------------------------------- /g2p/mappings/langs/crg/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Michif 3 | mappings: 4 | - display_name: Michif Turtle Mountain Dictionary (TMD) to Michif IPA 5 | rules_path: crg-tmd-to-crg-ipa.csv 6 | in_lang: crg-tmd 7 | out_lang: crg-ipa 8 | case_sensitive: false 9 | rule_ordering: as-written 10 | prevent_feeding: true 11 | abbreviations_path: abbreviations.csv 12 | authors: 13 | - Fineen Davis 14 | - Olivia Sammons 15 | - Heather Souter 16 | - Christopher Cox 17 | <<: *shared 18 | - display_name: Michif Double Vowel (DV) to Michif IPA 19 | rules_path: crg-dv-to-crg-ipa.csv 20 | in_lang: crg-dv 21 | out_lang: crg-ipa 22 | rule_ordering: as-written 23 | prevent_feeding: true 24 | case_sensitive: false 25 | authors: 26 | - Fineen Davis 27 | - Olivia Sammons 28 | - Heather Souter 29 | - Christopher Cox 30 | <<: *shared 31 | -------------------------------------------------------------------------------- /g2p/mappings/langs/crg/crg-dv-to-crg-ipa.csv: -------------------------------------------------------------------------------- 1 | g,ŋ,ñ 2 | aeñ,ɛ̃ː 3 | ooñ,ɔ̃ː 4 | oñ,ɔ̃ː 5 | hp,ʰp 6 | ht,ʰt 7 | hk,ʰk 8 | sh,ʃ 9 | zh,ʒ 10 | hch,ʰtʃ 11 | ch,tʃ 12 | uu,uː 13 | aañ,ɑ̃ː 14 | añ,ɑ̃ː 15 | iiñ,ĩː 16 | aw,aw 17 | ay,aj 18 | aa,ɑː 19 | ae,æ 20 | ee,eː 21 | oo,oː 22 | ii,iː 23 | oe,ʌː 24 | a,ʌ 25 | i,ɪ 26 | o,o 27 | u,ʊ 28 | y,j 29 | j,dʒ 30 | e,ɛ 31 | r,ɹ 32 | g,ɡ 33 | -------------------------------------------------------------------------------- /g2p/mappings/langs/crg/crg-tmd-to-crg-ipa.csv: -------------------------------------------------------------------------------- 1 | g,ŋ,n 2 | n,,in|an|en|un|on|ɑːn|æn|ɛn|eːn|ɪn|iːn|oːn|uːn 3 | aen,ɛ̃ː 4 | awn,ɑ̃ː 5 | een,ĩː 6 | oun,ɔ̃ 7 | oow,oaw 8 | ow,aw 9 | uy,aj 10 | aw,ɑː 11 | wy,waj 12 | ae,æ 13 | ee,iː 14 | ay,eː 15 | oo,uː 16 | in,ĩ 17 | hp,ʰp 18 | ht,ʰt 19 | hk,ʰk 20 | sh,ʃ 21 | zh,ʒ 22 | hch,ʰtʃ 23 | ch,tʃ 24 | e,ɛ 25 | i,ɪ 26 | j,dʒ 27 | y,j 28 | -------------------------------------------------------------------------------- /g2p/mappings/langs/crj/README.md: -------------------------------------------------------------------------------- 1 | Language-specific files for Southern East Cree 2 | -------------------------------------------------------------------------------- /g2p/mappings/langs/crj/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: East Cree, Southern 3 | mappings: 4 | - display_name: Northern East Cree Equivalencies 5 | in_lang: crj 6 | out_lang: crj-equiv 7 | type: mapping 8 | authors: 9 | - Delasie Torkornoo 10 | - Aidan Pine 11 | - Eric Joanis 12 | rules_path: crj_equiv.json 13 | rule_ordering: as-written 14 | case_sensitive: false 15 | <<: *shared 16 | - display_name: Southern East Cree to IPA 17 | in_lang: crj-equiv 18 | out_lang: crj-ipa 19 | type: mapping 20 | authors: 21 | - David Huggins-Daines 22 | - Patrick Littell 23 | rules_path: crj_to_ipa.json 24 | rule_ordering: apply-longest-first 25 | case_sensitive: false 26 | <<: *shared 27 | - display_name: Southern East Cree IPA to English IPA 28 | in_lang: crj-ipa 29 | out_lang: eng-ipa 30 | type: mapping 31 | rules_path: crj_ipa_to_eng_ipa.json 32 | rule_ordering: apply-longest-first 33 | case_sensitive: false 34 | <<: *shared 35 | -------------------------------------------------------------------------------- /g2p/mappings/langs/crk/README.md: -------------------------------------------------------------------------------- 1 | Language-specific files for Plains Cree (nêhiyawêwin) 2 | -------------------------------------------------------------------------------- /g2p/mappings/langs/crk/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Plains Cree 3 | mappings: 4 | - display_name: Plains Cree (SRO) to IPA 5 | in_lang: crk-no-symbols 6 | out_lang: crk-ipa 7 | type: mapping 8 | norm_form: NFD 9 | rule_ordering: as-written 10 | prevent_feeding: true 11 | authors: 12 | - Eddie Antonio Santos 13 | rules_path: crk-no-symbols_to_ipa.json 14 | <<: *shared 15 | - display_name: Plains Cree Symbols to SRO 16 | in_lang: crk 17 | out_lang: crk-no-symbols 18 | type: mapping 19 | norm_form: NFD 20 | rule_ordering: as-written 21 | prevent_feeding: true 22 | escape_special: true 23 | authors: 24 | - Aidan Pine 25 | rules_path: crk_to_crk-no-symbols.json 26 | <<: *shared 27 | -------------------------------------------------------------------------------- /g2p/mappings/langs/crk/crk-no-symbols_to_ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "in": "ê", 4 | "out": "eː" 5 | }, 6 | { 7 | "in": "î", 8 | "out": "iː" 9 | }, 10 | { 11 | "in": "ô", 12 | "out": "oː" 13 | }, 14 | { 15 | "in": "â", 16 | "out": "aː" 17 | }, 18 | { 19 | "in": "i", 20 | "out": "ɪ" 21 | }, 22 | { 23 | "in": "o", 24 | "out": "o" 25 | }, 26 | { 27 | "in": "a", 28 | "out": "ʌ" 29 | }, 30 | { 31 | "in": "p", 32 | "out": "p" 33 | }, 34 | { 35 | "in": "t", 36 | "out": "t" 37 | }, 38 | { 39 | "in": "k", 40 | "out": "k" 41 | }, 42 | { 43 | "in": "m", 44 | "out": "m" 45 | }, 46 | { 47 | "in": "n", 48 | "out": "n" 49 | }, 50 | { 51 | "in": "c", 52 | "out": "t͡s" 53 | }, 54 | { 55 | "in": "s", 56 | "out": "s" 57 | }, 58 | { 59 | "in": "h", 60 | "out": "h" 61 | }, 62 | { 63 | "in": "y", 64 | "out": "j" 65 | }, 66 | { 67 | "in": "w", 68 | "out": "w" 69 | } 70 | ] 71 | -------------------------------------------------------------------------------- /g2p/mappings/langs/crk/crk_to_crk-no-symbols.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "in": "&", 4 | "out": "êkwa" 5 | } 6 | ] 7 | -------------------------------------------------------------------------------- /g2p/mappings/langs/crl/README.md: -------------------------------------------------------------------------------- 1 | Language-specific files for Northern East Cree 2 | 3 | AP: There seems to be a problem here with normalization. Most of the rules for long vowels are declared with \u1427 "canadian syllabics final middle dot", so ᐧᐋ is a sequence of \u1427\140B, but there also appears to be a specific code point for waa: \u1419. I've added a crl_norm.json that normalizes the sequence to the single codepoint for that character and changed the crl_to_ipa.json mapping to use \u1419 instead of \u1427\140B, but I'm not sure if this was the right choice. Either way, there needs to be some sort of normalization step here to handle real world input. 4 | 5 | 6 | DT: I have fixed the mappings so that all the w syllables are using the one unicode character instead of the two unicode sequence ( \u1427 plus unicode character). 7 | -------------------------------------------------------------------------------- /g2p/mappings/langs/crl/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: East Cree, Northern 3 | mappings: 4 | - display_name: Northern East Cree Equivalencies 5 | in_lang: crl 6 | out_lang: crl-equiv 7 | type: mapping 8 | authors: 9 | - Delasie Torkornoo 10 | - Aidan Pine 11 | - Eric Joanis 12 | rules_path: crl_equiv.json 13 | rule_ordering: as-written 14 | case_sensitive: false 15 | <<: *shared 16 | - display_name: Northern East Cree to IPA 17 | in_lang: crl-equiv 18 | out_lang: crl-ipa 19 | type: mapping 20 | authors: 21 | - David Huggins-Daines 22 | - Patrick Littell 23 | - Delasie Torkornoo 24 | rules_path: crl_to_ipa.json 25 | rule_ordering: apply-longest-first 26 | case_sensitive: false 27 | <<: *shared 28 | - display_name: Northern East Cree IPA to English IPA 29 | in_lang: crl-ipa 30 | out_lang: eng-ipa 31 | type: mapping 32 | rules_path: crl_ipa_to_eng_ipa.json 33 | rule_ordering: apply-longest-first 34 | case_sensitive: false 35 | <<: *shared 36 | -------------------------------------------------------------------------------- /g2p/mappings/langs/crm/README.md: -------------------------------------------------------------------------------- 1 | Language-specific files for Moose Cree 2 | 3 | Authors: 4 | Delasie Torkornoo, 5 | Bradley Ellert 6 | -------------------------------------------------------------------------------- /g2p/mappings/langs/crm/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Moose Cree 3 | mappings: 4 | - display_name: Moose Cree Equivalencies 5 | in_lang: crm 6 | out_lang: crm-equiv 7 | type: mapping 8 | authors: 9 | - Delasie Torkornoo 10 | - Bradley Ellert 11 | - Aidan Pine 12 | rules_path: crm_equiv.json 13 | case_sensitive: false 14 | <<: *shared 15 | - display_name: Moose Cree to IPA 16 | in_lang: crm-equiv 17 | out_lang: crm-ipa 18 | type: mapping 19 | authors: 20 | - David Huggins-Daines 21 | - Patrick Littell 22 | - Delasie Torkornoo 23 | - Bradley Ellert 24 | rules_path: crm_to_ipa.json 25 | case_sensitive: false 26 | <<: *shared 27 | - display_name: Moose Cree IPA to English IPA 28 | in_lang: crm-ipa 29 | out_lang: eng-ipa 30 | type: mapping 31 | rules_path: crm_ipa_to_eng_ipa.json 32 | rule_ordering: as-written 33 | case_sensitive: false 34 | <<: *shared 35 | -------------------------------------------------------------------------------- /g2p/mappings/langs/crx/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Stella Nadleh 3 | mappings: 4 | - display_name: Roman to Syllabics 5 | rules_path: stella_orth_to_syllabics.csv 6 | in_lang: crx-sro 7 | out_lang: crx-syl 8 | case_sensitive: false 9 | authors: 10 | - Aidan Pine 11 | <<: *shared 12 | - display_name: Syllabics to Roman 13 | rules_path: stella_orth_to_syllabics.csv 14 | in_lang: crx-syl 15 | out_lang: crx-sro 16 | case_sensitive: false 17 | reverse: true 18 | authors: 19 | - Aidan Pine 20 | <<: *shared 21 | -------------------------------------------------------------------------------- /g2p/mappings/langs/csw/README.md: -------------------------------------------------------------------------------- 1 | Language-specific files for Swampy Cree 2 | 3 | Authors: 4 | Delasie Torkornoo, 5 | Bradley Ellert 6 | -------------------------------------------------------------------------------- /g2p/mappings/langs/csw/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Swampy Cree 3 | mappings: 4 | - display_name: Swampy Cree Equivalencies 5 | in_lang: csw 6 | out_lang: csw-equiv 7 | type: mapping 8 | authors: 9 | - Delasie Torkornoo 10 | - Bradley Ellert 11 | - Aidan Pine 12 | rules_path: csw_equiv.json 13 | <<: *shared 14 | - display_name: Swampy Cree to IPA 15 | in_lang: csw-equiv 16 | out_lang: csw-ipa 17 | type: mapping 18 | authors: 19 | - David Huggins-Daines 20 | - Patrick Littell 21 | - Delasie Torkornoo 22 | - Bradley Ellert 23 | rules_path: csw_to_ipa.json 24 | <<: *shared 25 | - display_name: Swampy Cree IPA to English IPA 26 | in_lang: csw-ipa 27 | out_lang: eng-ipa 28 | type: mapping 29 | rules_path: csw_ipa_to_eng_ipa.json 30 | rule_ordering: as-written 31 | <<: *shared 32 | -------------------------------------------------------------------------------- /g2p/mappings/langs/ctp/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Western Highland Chatino 3 | mappings: 4 | - display_name: Western Highland Chatino to IPA 5 | in_lang: ctp 6 | out_lang: ctp-ipa 7 | type: mapping 8 | authors: 9 | - Patrick Littell 10 | rules_path: ctp_to_ipa.json 11 | case_sensitive: false 12 | rule_ordering: as-written 13 | <<: *shared 14 | - display_name: Western Highland Chatino IPA to English IPA 15 | in_lang: ctp-ipa 16 | out_lang: eng-ipa 17 | type: mapping 18 | rules_path: ctp_ipa_to_eng_ipa.json 19 | rule_ordering: as-written 20 | <<: *shared 21 | -------------------------------------------------------------------------------- /g2p/mappings/langs/ctp/ctp_ipa_to_eng_ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "in": "a", 4 | "out": "ɑ" 5 | }, 6 | { 7 | "in": "ã", 8 | "out": "ɑ̃" 9 | }, 10 | { 11 | "in": "d̻", 12 | "out": "d" 13 | }, 14 | { 15 | "in": "d͡z", 16 | "out": "dz" 17 | }, 18 | { 19 | "in": "ẽ", 20 | "out": "1" 21 | }, 22 | { 23 | "in": "e", 24 | "out": "eː" 25 | }, 26 | { 27 | "in": "1", 28 | "out": "ẽ" 29 | }, 30 | { 31 | "in": "kʲ", 32 | "out": "kj" 33 | }, 34 | { 35 | "in": "kʷ", 36 | "out": "kw" 37 | }, 38 | { 39 | "in": "l̻", 40 | "out": "l" 41 | }, 42 | { 43 | "in": "n̻", 44 | "out": "n" 45 | }, 46 | { 47 | "in": "õ", 48 | "out": "õː", 49 | "prevent_feeding": true 50 | }, 51 | { 52 | "in": "o", 53 | "out": "oː" 54 | }, 55 | { 56 | "in": "r", 57 | "out": "ɾ" 58 | }, 59 | { 60 | "in": "t̻", 61 | "out": "t" 62 | }, 63 | { 64 | "in": "t͡s", 65 | "out": "ts" 66 | }, 67 | { 68 | "in": "t͡ʃ", 69 | "out": "tʃ" 70 | } 71 | ] 72 | -------------------------------------------------------------------------------- /g2p/mappings/langs/ctp/ctp_to_ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "in": "ty", 4 | "out": "t̻" 5 | }, 6 | { 7 | "in": "dy", 8 | "out": "d̻" 9 | }, 10 | { 11 | "in": "kw", 12 | "out": "kʷ", 13 | "prevent_feeding": true 14 | }, 15 | { 16 | "in": "k", 17 | "out": "kʲ" 18 | }, 19 | { 20 | "in": "q", 21 | "out": "ʔ" 22 | }, 23 | { 24 | "in": "ts", 25 | "out": "t͡s" 26 | }, 27 | { 28 | "in": "dz", 29 | "out": "d͡z" 30 | }, 31 | { 32 | "in": "ch", 33 | "out": "t͡ʃ" 34 | }, 35 | { 36 | "in": "x", 37 | "out": "ʃ" 38 | }, 39 | { 40 | "in": "j", 41 | "out": "h" 42 | }, 43 | { 44 | "in": "ny", 45 | "out": "n̻" 46 | }, 47 | { 48 | "in": "l", 49 | "out": "l̻" 50 | }, 51 | { 52 | "in": "y", 53 | "out": "j" 54 | }, 55 | { 56 | "in": "an", 57 | "out": "ã" 58 | }, 59 | { 60 | "in": "en", 61 | "out": "ẽ" 62 | }, 63 | { 64 | "in": "in", 65 | "out": "ĩ" 66 | }, 67 | { 68 | "in": "on", 69 | "out": "õ" 70 | }, 71 | { 72 | "in": "un", 73 | "out": "ũ" 74 | }, 75 | { 76 | "in": "ᴬ", 77 | "out": "" 78 | }, 79 | { 80 | "in": "ᴮ", 81 | "out": "" 82 | }, 83 | { 84 | "in": "ᶜ", 85 | "out": "" 86 | }, 87 | { 88 | "in": "ᴰ", 89 | "out": "" 90 | }, 91 | { 92 | "in": "ᴱ", 93 | "out": "" 94 | }, 95 | { 96 | "in": "ᶠ", 97 | "out": "" 98 | }, 99 | { 100 | "in": "ᴳ", 101 | "out": "" 102 | }, 103 | { 104 | "in": "ᴴ", 105 | "out": "" 106 | }, 107 | { 108 | "in": "ᴵ", 109 | "out": "" 110 | }, 111 | { 112 | "in": "ᴶ", 113 | "out": "" 114 | } 115 | ] 116 | -------------------------------------------------------------------------------- /g2p/mappings/langs/dan/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Danish 3 | mappings: 4 | - display_name: Danish to IPA 5 | in_lang: dan 6 | out_lang: dan-ipa 7 | type: mapping 8 | authors: 9 | - Aidan Pine 10 | rules_path: dan_to_ipa.csv 11 | abbreviations_path: dan_abbs.csv 12 | rule_ordering: as-written 13 | case_sensitive: false 14 | norm_form: 'none' 15 | <<: *shared 16 | -------------------------------------------------------------------------------- /g2p/mappings/langs/dan/dan_abbs.csv: -------------------------------------------------------------------------------- 1 | VOWEL,a,e,i,o,u,æ,å,ø 2 | CONSONANT,p,b,t,d,k,g,f,s,h,v,j,r,l,m,n 3 | FRONT,i,e,œ,ø,y 4 | BACK,u,o,a 5 | -------------------------------------------------------------------------------- /g2p/mappings/langs/dan/dan_to_dummy.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "in": "o", 4 | "out": "u" 5 | }, 6 | { 7 | "in": "\u0061\u030a", 8 | "out": "u" 9 | }, 10 | { 11 | "in": "\u00e5", 12 | "out": "u" 13 | }, 14 | { 15 | "in": "d", 16 | "out": "t" 17 | }, 18 | { 19 | "in": "h", 20 | "out": "s" 21 | }, 22 | { 23 | "in": "f", 24 | "out": "s" 25 | }, 26 | { 27 | "in": "b", 28 | "out": "t" 29 | }, 30 | { 31 | "in": "k", 32 | "out": "t" 33 | }, 34 | { 35 | "in": "l", 36 | "out": "n" 37 | }, 38 | { 39 | "in": "v", 40 | "out": "s" 41 | }, 42 | { 43 | "in": "m", 44 | "out": "n" 45 | }, 46 | { 47 | "in": "j", 48 | "out": "t" 49 | }, 50 | { 51 | "in": "t", 52 | "out": "t" 53 | }, 54 | { 55 | "in": "g", 56 | "out": "t" 57 | }, 58 | { 59 | "in": "g", 60 | "out": "t" 61 | }, 62 | { 63 | "in": "g", 64 | "out": "t" 65 | }, 66 | { 67 | "in": "r", 68 | "out": "n" 69 | }, 70 | { 71 | "in": "r", 72 | "out": "n" 73 | }, 74 | { 75 | "in": "c", 76 | "out": "t" 77 | }, 78 | { 79 | "in": "e", 80 | "out": "i" 81 | }, 82 | { 83 | "in": "a", 84 | "out": "\u0251" 85 | }, 86 | { 87 | "in": "a", 88 | "out": "\u0251" 89 | }, 90 | { 91 | "in": "a", 92 | "out": "\u0251" 93 | }, 94 | { 95 | "in": "\u00f8", 96 | "out": "u" 97 | }, 98 | { 99 | "in": "y", 100 | "out": "i" 101 | }, 102 | { 103 | "in": "p", 104 | "out": "t" 105 | } 106 | ] 107 | -------------------------------------------------------------------------------- /g2p/mappings/langs/dan/dan_to_ipa.csv: -------------------------------------------------------------------------------- 1 | dd,ð,VOWEL,VOWEL 2 | tt,d,VOWEL,VOWEL 3 | ng,ŋ 4 | nk,ŋ 5 | sc,s 6 | r,ʁ,,VOWEL 7 | r,ɐ̯,VOWEL 8 | o,ɔ 9 | \u0061\u030A,oː 10 | \u00E5,oː 11 | d,ð̠˕ˠ,VOWEL,\b|CONSONANT|FRONT 12 | t,ð,VOWEL,\b|CONSONANT 13 | g,ɪ̯,FRONT 14 | g,ʊ̯,BACK 15 | af,a,\b,\b 16 | g,,,\b 17 | g,ɡ 18 | r,ɐ̯,,\b 19 | c,s 20 | e,ɛ 21 | a,æ 22 | a,ɑ,r 23 | a,ɑ,,r 24 | r,ʁ 25 | ø,œ 26 | y,u 27 | p,p 28 | (œ|ɑ|æ|ɛ|ʊ̯|ɪ̯|ɔ|VOWEL),,,ɐ̯ 29 | -------------------------------------------------------------------------------- /g2p/mappings/langs/eng/README.md: -------------------------------------------------------------------------------- 1 | Aligned CMUDict for G2P 2 | ----------------------- 3 | 4 | The alignments in `cmudict.ipa.aligned.txt` were generated by 5 | [Phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus), 6 | as seen in [`make_alignments.sh`](./make_alignments.sh) 7 | 8 | CMUDict was obtained from https://github.com/cmusphinx/cmudict and has 9 | this license (2-clause BSD, compatible with G2P): 10 | 11 | Copyright (C) 1993-2015 Carnegie Mellon University. All rights reserved. 12 | 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions 15 | are met: 16 | 17 | 1. Redistributions of source code must retain the above copyright 18 | notice, this list of conditions and the following disclaimer. 19 | The contents of this file are deemed to be source code. 20 | 21 | 2. Redistributions in binary form must reproduce the above copyright 22 | notice, this list of conditions and the following disclaimer in 23 | the documentation and/or other materials provided with the 24 | distribution. 25 | 26 | This work was supported in part by funding from the Defense Advanced 27 | Research Projects Agency, the Office of Naval Research and the National 28 | Science Foundation of the United States of America, and by member 29 | companies of the Carnegie Mellon Sphinx Speech Consortium. We acknowledge 30 | the contributions of many volunteers to the expansion and improvement of 31 | this dictionary. 32 | 33 | THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 34 | ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 35 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 36 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 37 | NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 38 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 39 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 40 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 41 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 42 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 43 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 44 | -------------------------------------------------------------------------------- /g2p/mappings/langs/eng/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: English 3 | mappings: 4 | - display_name: Dummy to Arpabet 5 | in_lang: dummy 6 | out_lang: dummy-eng-arpabet 7 | out_delimiter: " " 8 | type: mapping 9 | norm_form: "NFC" 10 | authors: 11 | - Aidan Pine 12 | rules_path: dummy_to_arpabet.json 13 | rule_ordering: apply-longest-first 14 | <<: *shared 15 | - display_name: Hamming Dummy to Arpabet 16 | in_lang: hamming-dummy 17 | out_lang: hamming-dummy-eng-arpabet 18 | out_delimiter: " " 19 | type: mapping 20 | norm_form: "NFC" 21 | authors: 22 | - Aidan Pine 23 | rules_path: dummy_to_arpabet.json 24 | rule_ordering: apply-longest-first 25 | <<: *shared 26 | - display_name: English IPA to Arpabet 27 | in_lang: eng-ipa 28 | out_lang: eng-arpabet 29 | out_delimiter: " " 30 | type: mapping 31 | norm_form: "NFC" 32 | authors: 33 | - Patrick Littell 34 | rules_path: eng_ipa_to_arpabet.json 35 | rule_ordering: apply-longest-first 36 | <<: *shared 37 | - display_name: English IPA to Arpabet 38 | in_lang: hamming-eng-ipa 39 | out_lang: hamming-eng-arpabet 40 | out_delimiter: " " 41 | type: mapping 42 | norm_form: "NFC" 43 | authors: 44 | - Patrick Littell 45 | rules_path: eng_ipa_to_arpabet.json 46 | rule_ordering: apply-longest-first 47 | <<: *shared 48 | - display_name: English to IPA 49 | type: lexicon 50 | alignments_path: cmudict.ipa.aligned.txt 51 | in_lang: eng 52 | out_lang: eng-ipa 53 | case_sensitive: false 54 | norm_form: "NFC" 55 | authors: 56 | - David Huggins-Daines 57 | <<: *shared 58 | -------------------------------------------------------------------------------- /g2p/mappings/langs/eng/dummy_to_arpabet.json: -------------------------------------------------------------------------------- 1 | [{ 2 | "in": "ɑ", 3 | "out": "AA" 4 | }, 5 | { 6 | "in": "i", 7 | "out": "IY" 8 | }, 9 | { 10 | "in": "u", 11 | "out": "UW" 12 | }, 13 | { 14 | "in": "s", 15 | "out": "S" 16 | }, 17 | { 18 | "in": "t", 19 | "out": "T" 20 | }, 21 | { 22 | "in": "n", 23 | "out": "N" 24 | } 25 | ] 26 | -------------------------------------------------------------------------------- /g2p/mappings/langs/eng/eng_arpabet_to_ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | { "in": "AA", "out": "ɑ" }, 3 | { "in": "AE", "out": "æ" }, 4 | { "in": "AH", "out": "ʌ" }, 5 | { "in": "AO", "out": "ɔ" }, 6 | { "in": "AW", "out": "aʊ" }, 7 | { "in": "AY", "out": "aɪ" }, 8 | { "in": "EH", "out": "ɛ" }, 9 | { "in": "ER", "out": "ɜ˞" }, 10 | { "in": "EY", "out": "eɪ" }, 11 | { "in": "IH", "out": "ɪ" }, 12 | { "in": "IX", "out": "ɨ" }, 13 | { "in": "IY", "out": "i" }, 14 | { "in": "OW", "out": "oʊ" }, 15 | { "in": "OY", "out": "ɔɪ" }, 16 | { "in": "UH", "out": "ʊ" }, 17 | { "in": "UW", "out": "u" }, 18 | { "in": "B", "out": "b" }, 19 | { "in": "CH", "out": "tʃ" }, 20 | { "in": "D", "out": "d" }, 21 | { "in": "DH", "out": "ð" }, 22 | { "in": "F", "out": "f" }, 23 | { "in": "G", "out": "ɡ" }, 24 | { "in": "HH", "out": "h" }, 25 | { "in": "JH", "out": "dʒ" }, 26 | { "in": "K", "out": "k" }, 27 | { "in": "L", "out": "l" }, 28 | { "in": "M", "out": "m" }, 29 | { "in": "N", "out": "n" }, 30 | { "in": "NG", "out": "ŋ" }, 31 | { "in": "P", "out": "p" }, 32 | { "in": "R", "out": "ɹ" }, 33 | { "in": "S", "out": "s" }, 34 | { "in": "SH", "out": "ʃ" }, 35 | { "in": "T", "out": "t" }, 36 | { "in": "TH", "out": "θ" }, 37 | { "in": "V", "out": "v" }, 38 | { "in": "W", "out": "w" }, 39 | { "in": "Y", "out": "j" }, 40 | { "in": "Z", "out": "z" }, 41 | { "in": "ZH", "out": "ʒ" } 42 | ] 43 | -------------------------------------------------------------------------------- /g2p/mappings/langs/eng/eng_inventory.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "inventory", 3 | "authors": ["CMU Sphinx team"], 4 | "created": "2019-04-11", 5 | "last_modified": "2019-04-11", 6 | "metadata": { 7 | "display_name": "English", 8 | "display": true, 9 | "lang": "eng", 10 | "format": "custom", 11 | "delimiter": "" 12 | }, 13 | "inventory": [ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", 14 | "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" ] 15 | } 16 | -------------------------------------------------------------------------------- /g2p/mappings/langs/eng/make_alignments.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Replace with actual path! 4 | # cmudict.dict is retrievable from https://github.com/cmusphinx/cmudict/blob/master/cmudict.dict 5 | CMUDICT=../../../../../cmudict/cmudict.dict 6 | # Install Phonetisaurus with `pip install phonetisaurus` 7 | export PATH=$(python -c 'import phonetisaurus as p; print(p.guess_environment()["PATH"])') 8 | export LD_LIBRARY_PATH=$(python -c 'import phonetisaurus as p; print(p.guess_environment()["LD_LIBRARY_PATH"])') 9 | 10 | python make_ipa_cmudict.py < $CMUDICT > tmp.txt 11 | phonetisaurus-align --s1_char_delim="" --s2_char_delim="" \ 12 | --seq1_del=true --seq2_del=true --seq1_max=2 --seq2_max=2 \ 13 | --iter=5 --input=tmp.txt --ofile=cmudict.ipa.aligned.txt 14 | rm tmp.txt 15 | -------------------------------------------------------------------------------- /g2p/mappings/langs/eng/make_ipa_cmudict.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Convert CMUDict to IPA, removing stress markers. We can only handle 4 | one pronunciation at a time so just take the first one.""" 5 | 6 | import fileinput 7 | import json 8 | import re 9 | 10 | with open("eng_arpabet_to_ipa.json") as f: 11 | mappings = json.load(f) 12 | ipa_map = dict((e["in"], e["out"]) for e in mappings) 13 | 14 | comment_re = re.compile(r"#.*$") 15 | entry_re = re.compile(r"^(\S+?)(\(\d+\))?\s+(.*)$") 16 | stress_re = re.compile(r"\d+$") 17 | for spam in fileinput.input(): 18 | m = entry_re.match(comment_re.sub("", spam.strip())) 19 | if m is None: 20 | continue 21 | word, alt, phones = m.groups() 22 | if alt is not None: # skip alterantes 23 | continue 24 | phones = "".join( 25 | ipa_map[np] for np in (stress_re.sub("", p) for p in phones.split()) 26 | ) 27 | print("\t".join((word, phones))) 28 | -------------------------------------------------------------------------------- /g2p/mappings/langs/eng/reverse_json.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import json 4 | 5 | with open("eng_ipa_to_arpabet.json") as f: 6 | mappings = json.load(f) 7 | print("[") 8 | seen = set() 9 | for i, m in enumerate(mappings): 10 | if m["out"] in seen: 11 | continue 12 | seen.add(m["out"]) 13 | if " " in m["out"]: 14 | continue 15 | print( 16 | ' { "in": "%s", "out": "%s" }%s' 17 | % (m["out"], m["in"], "," if i != len(mappings) - 1 else "") 18 | ) 19 | print("]") 20 | -------------------------------------------------------------------------------- /g2p/mappings/langs/fin/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | - language_name: Finnish 3 | mappings: 4 | - display_name: Finnish to IPA 5 | in_lang: fin 6 | out_lang: fin-ipa 7 | authors: 8 | - David Huggins-Daines 9 | type: mapping 10 | rules_path: fin_to_ipa.csv 11 | rule_ordering: apply-longest-first 12 | case_sensitive: false 13 | norm_form: 'NFC' 14 | <<: *shared 15 | -------------------------------------------------------------------------------- /g2p/mappings/langs/fin/fin_to_ipa.csv: -------------------------------------------------------------------------------- 1 | aa,ɑː,, 2 | ai,ɑɪ,, 3 | au,ɑʊ,, 4 | a,ɑ,, 5 | b,b,, 6 | c,s,, 7 | d,d,, 8 | ee,eː,, 9 | ei,eɪ,, 10 | eu,eʊ,, 11 | e,e,, 12 | f,f,, 13 | g,ɡ,, 14 | h,h,, 15 | ii,iː,, 16 | ie,iɛ,, 17 | i,i,, 18 | j,j,, 19 | k,k,, 20 | l,l,, 21 | m,m,, 22 | n,n,, 23 | oo,oː,, 24 | ou,oʊ,, 25 | oi,ɔɪ,, 26 | o,o,, 27 | p,p,, 28 | q,k,, 29 | r,r,, 30 | s,s,, 31 | t,t,, 32 | uu,uː,, 33 | uo,ʊɔ,, 34 | ui,ʊɪ,, 35 | u,u,, 36 | v,ʋ,, 37 | w,ʋ,, 38 | x,ks,, 39 | yy,yː,, 40 | y,y,, 41 | z,t͡s,, 42 | å,o,, 43 | ää,æː,, 44 | äy,æɥ,, 45 | ä,æ,, 46 | öö,øː,, 47 | öy,øɥ,, 48 | ö,ø,, 49 | n,ŋ,,k 50 | n,ŋ,,g 51 | n,m,,p 52 | n,m,,b 53 | -------------------------------------------------------------------------------- /g2p/mappings/langs/font-encodings/README.md: -------------------------------------------------------------------------------- 1 | # Font encodings 2 | 3 | Before broad utf8 support, communities often resorted to encoding or 'hacking' their characters into a font, and abusing a separate Unicode codepoint to render the character in their writing system as needed. This folder should be where these types of mappings are handled. 4 | 5 | Some style guidelines: 6 | * The `in_lang` key should end with `-font` 7 | * If the mapping is general, please use `Undetermined` as the language name 8 | 9 | Currently the following are supported: 10 | * SIL Fonts 11 | - Heiltsuk Doulos 12 | - Heiltsuk Times 13 | - Navajo Times 14 | * [UBC First Nations Unicode Font](https://fnel.arts.ubc.ca/resources/font/) 15 | -------------------------------------------------------------------------------- /g2p/mappings/langs/font-encodings/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | mappings: 2 | - display_name: Doulos 3 | in_lang: hei-doulos 4 | out_lang: hei 5 | authors: 6 | - Aidan Pine 7 | rules_path: hei_doulos.csv 8 | language_name: Heiltsuk 9 | - display_name: Times 10 | in_lang: hei-times-font 11 | out_lang: hei 12 | authors: 13 | - Aidan Pine 14 | rules_path: hei_times.csv 15 | language_name: Heiltsuk 16 | - display_name: Times 17 | rules_path: nav_times.csv 18 | in_lang: nav-times-font 19 | out_lang: nav 20 | language_name: Navajo 21 | authors: 22 | - Aidan Pine 23 | - display_name: First Nations Unicode 24 | rules_path: fn_unicode.csv 25 | in_lang: fn-unicode-font 26 | out_lang: fn-unicode 27 | norm_form: NFD 28 | escape_special: false 29 | case_sensitive: false 30 | language_name: Undetermined 31 | authors: 32 | - Aidan Pine 33 | -------------------------------------------------------------------------------- /g2p/mappings/langs/font-encodings/fn_unicode.csv: -------------------------------------------------------------------------------- 1 | \uF000,x\u030c 2 | \uF101,c\u0313 3 | \uF102,c\u030c\u0313 4 | \uF103,g\u0313 5 | \uF104,j\u030c\u0313 6 | \uF105,k\u0313 7 | \uF106,l\u0315 8 | \uF107,m\u0313 9 | \uF108,n\u0313 10 | \uF109,ŋ\u0313 11 | \uF10A,p\u0313 12 | \uF10B,q\u0313 13 | \uF10C,r\u0313 14 | \uF10D,t\u0315 15 | \uF10E,w\u0313 16 | \uF10F,y\u0313 17 | \uF110,z\u0313 18 | \uF111,ƛ\u0313 19 | \uF112,ɣ\u0313 20 | \uF113,ʕ\u0315 21 | \uF114,s\u0313 22 | \uF121,w\u0325 23 | \uF131,ᶻ 24 | \uF132,ᶿ 25 | \uF133,ˡ 26 | \uF141,ə\u0300 27 | \uF142,ə\u0301 28 | \uF181,l\u0329 29 | \uF182,m\u0329 30 | \uF183,n\u0329 31 | \uF184,r\u0329 32 | \uF191,a\u0332 33 | \uF197,g\u0332 34 | \uF19B,k\u0332 35 | \uF1A8,x\u0332 36 | \uF1CB,k\u0332\u0313 37 | \uF204,√ 38 | ล,h\u0323 39 | ɤ,ɣ 40 | ∛,· 41 | ∙,· 42 | -------------------------------------------------------------------------------- /g2p/mappings/langs/font-encodings/hei_doulos.csv: -------------------------------------------------------------------------------- 1 | ¹,p̓ 2 | ¡,ṃ́ 3 | ™,ṃ 4 | µ,m̓ 5 | ²,ṃ̓ 6 | †,t̓ 7 | ¢,ṇ́ 8 | Þ,ṇ 9 | ñ,n̓ 10 | ¦,ṇ̓ 11 | ç,c̓ 12 | ß,λ 13 | Ò,ƛ 14 | ƒ,ƛ̓ 15 | æ,ɫ 16 | Æ,Ɫ 17 | ø,ḷ́ 18 | ª,ḷ 19 | ¬,l̓ 20 | …,ḷ̓ 21 | ð,k̓ 22 | í,í 23 | ¥,y̓ 24 | î,i̓ 25 | ðv,k̓v 26 | ú,ú 27 | „,w̓ 28 | ü,u̓ 29 | ©v,ǧv 30 | œv,q̓v 31 | þv,x̌v 32 | ©,ǧ 33 | œ,q̓ 34 | þ,x̌ 35 | á,á 36 | Ó,h̓ 37 | å,a̓ 38 | Ô,ħ 39 | ‰,ʔ 40 | ÿx,x̌ 41 | ÿX,X̌ 42 | Í,⅄ 43 | Î,⅄ 44 | Ï,⅄̓ 45 | -------------------------------------------------------------------------------- /g2p/mappings/langs/font-encodings/hei_times.csv: -------------------------------------------------------------------------------- 1 | b,b 2 | p,p 3 | π,p̓ 4 | m,m 5 | ¡,ṃ́ 6 | ™,ṃ 7 | µ,m̓ 8 | ≤,ṃ̓ 9 | d,d 10 | †,t̓ 11 | n,n 12 | ¢,ṇ́ 13 | ∞,ṇ 14 | ñ,n̓ 15 | ∫,ṇ̓ 16 | z,z 17 | c,c 18 | ç,c̓ 19 | s,s 20 | ß,λ 21 | ∂,ƛ 22 | ƒ,ƛ̓ 23 | æ,ɫ 24 | l,l 25 | ø,ḷ́ 26 | ª,ḷ 27 | ¬,l̓ 28 | …,ḷ̓ 29 | gv,g 30 | kv,k 31 | ˚,k̓ 32 | x,x 33 | y,y 34 | í,í 35 | ¥,y̓ 36 | î,i̓ 37 | gv,gv 38 | kv,kv 39 | ˚v,k̓v 40 | xv,xv 41 | w,w 42 | ú,ú 43 | u,u 44 | ∑,w̓ 45 | ü,u̓ 46 | ©v,ǧv 47 | qv,qv 48 | œv,q̓v 49 | ≈v,x̌v 50 | ©,ǧ 51 | q,q 52 | œ,q̓ 53 | ≈,x̌ 54 | h,h 55 | á,á 56 | a,a 57 | ˙,h̓ 58 | å,a̓ 59 | ≈,x̌ 60 | Í,⅄ 61 | Î,⅄ 62 | Ï,⅄̓ 63 | -------------------------------------------------------------------------------- /g2p/mappings/langs/font-encodings/nav_times.csv: -------------------------------------------------------------------------------- 1 | 1,á 2 | 2,ą 3 | 3,ą́ 4 | 4,é 5 | 5,ę 6 | 6,ę́ 7 | 7,í 8 | 8,į 9 | 9,į́ 10 | 0,ó 11 | -,ǫ 12 | =,ǫ́ 13 | \[,ł 14 | -------------------------------------------------------------------------------- /g2p/mappings/langs/fra/README.txt: -------------------------------------------------------------------------------- 1 | Notes on French g2p by Eric Joanis 2 | 3 | French vowels are quite complicated, and I had a hard time figuring out how to 4 | catch even the common cases. I think I managed OK, but we should not consider 5 | my g2p map definitive. 6 | 7 | Just one tricky example: 8 | y -> /i/ 9 | u -> /y/ 10 | ou -> /u/ 11 | To get this working withoug having a cyclical graph undoing my work, I mapped 12 | u->y first, and then oy->u, making sure that these rules occur *after* the 13 | mapping of oy->/wa/ earlier in the list. 14 | 15 | There were a bunch more challenging cases, solved with a best effort here but 16 | not thoroughly tested. Some other temporary changes are done and reset a few 17 | lines lower, e.g., with nasals, so don't be too surprised if you analyze my 18 | rules to find some that don't seem to make sense, at least in isolation. 19 | -------------------------------------------------------------------------------- /g2p/mappings/langs/fra/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: French 3 | mappings: 4 | - display_name: French to IPA 5 | in_lang: fra 6 | out_lang: fra-ipa 7 | authors: 8 | - Eric Joanis 9 | type: mapping 10 | rules_path: fra_to_ipa.csv 11 | abbreviations_path: fra_abbs.csv 12 | rule_ordering: as-written 13 | case_sensitive: false 14 | norm_form: 'NFC' 15 | <<: *shared 16 | -------------------------------------------------------------------------------- /g2p/mappings/langs/fra/fra_abbs.csv: -------------------------------------------------------------------------------- 1 | VOWEL,a,e,i,o,u,à,â,æ,è,é,ê,ë,î,ï,ô,œ,ù,û,ü,ÿ,ɛ,ɛː 2 | EI_VOW,e,i,è,é,ê,ë,î,ï,ÿ 3 | AOU_VOW,a,o,u,à,â,æ,ô,œ,ù,û,ü 4 | -------------------------------------------------------------------------------- /g2p/mappings/langs/fra/fra_to_ipa.csv: -------------------------------------------------------------------------------- 1 | ge,ʒ,,AOU_VOW 2 | g,ʒ,,EI_VOW 3 | s,z,VOWEL,VOWEL 4 | c,s,,EI_VOW 5 | &,et,, 6 | ais,ɛ,,\b 7 | aî,ɛː,, 8 | est,ɛ,,\b 9 | s,,\S,\b 10 | x,,\S,\b 11 | ent,e,,\b 12 | nt,n,,\b 13 | ez,é,,\b 14 | er,é,,\b 15 | è,ɛ,, 16 | b,b,, 17 | ch,ʃ,,VOWEL 18 | ch,k,, 19 | sh,ʃ,, 20 | ss,s,, 21 | s,s,, 22 | ç,s,, 23 | c,k,, 24 | d,d,, 25 | f,f,, 26 | ph,f,, 27 | gn,ɲ,, 28 | ù,u,, 29 | gu,ɡ,, 30 | ng,ŋ,,\b 31 | g,ɡ,, 32 | j,ʒ,, 33 | k,k,, 34 | ill,j,VOWEL, 35 | ill,ij,, 36 | ail,aj,,\b 37 | eil,ɛj,,\b 38 | ll,l,, 39 | l,l,, 40 | mm,m,, 41 | m,m,, 42 | nn,n,, 43 | n,n,, 44 | p,p,, 45 | qu,k,, 46 | q,k,, 47 | rh,ʁ,, 48 | rr,ʁ,, 49 | r,ʁ,, 50 | th,t,, 51 | t,t,, 52 | ti,si,,[aeou][nm] 53 | v,v,, 54 | w,w,, 55 | x,ks,, 56 | z,z,, 57 | en,ɛn,,VOWEL 58 | en,ɑ̃,, 59 | ein,ɛn,,VOWEL 60 | ein,ɛ̃,, 61 | em,ɑ̃,,[pb] 62 | an,ɑn,,VOWEL 63 | ant,ɑ̃,,\b 64 | an,ɑ̃,, 65 | am,ɑ̃,,[pb] 66 | ain,ɛn,,VOWEL 67 | in,ɪn,,VOWEL 68 | ain,ɛ̃,, 69 | in,ɛ̃,, 70 | aim,ɛ̃,,[pb] 71 | im,ɛ̃,,[pb] 72 | ɪ,i,, 73 | on,ɔn,,VOWEL 74 | om,ɔm,,VOWEL 75 | on,ɔ̃,, 76 | om,ɔ̃,,[pb] 77 | un,yyyn,,VOWEL 78 | un,œ̃,, 79 | um,œ̃,,[pb] 80 | um,œ̃,,\b 81 | yyy,u,, 82 | œu,œ,, 83 | eu,ø,, 84 | eau,o,, 85 | au,o,, 86 | o,ɔ,,[rnml] 87 | é,ɜ,, 88 | ae,ɜ,, 89 | æ,ɜ,, 90 | ai,ɜ,, 91 | ê,ɛː,, 92 | e,,\S,\b 93 | e,ʌ,, 94 | ɜ,e,, 95 | à,a,, 96 | â,ɑ,, 97 | a,a,, 98 | oin,wɛ,, 99 | oi,wa,, 100 | ay,ɛj,,VOWEL 101 | ay,e,, 102 | oy,waj,,VOWEL 103 | oy,wa,, 104 | y,i,, 105 | u,y,, 106 | oy,u,, 107 | ë,ɛ,, 108 | ï,i,, 109 | î,i,, 110 | ô,o,, 111 | û,y,, 112 | ü,y,, 113 | ÿ,i,, 114 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/alq-ipa_to_eng-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "e:", "out": "eː"}, 3 | {"in": "i:", "out": "i"}, 4 | {"in": "j", "out": "j"}, 5 | {"in": "o", "out": "oː"}, 6 | {"in": "o:", "out": "oː"}, 7 | {"in": "u", "out": "u"}, 8 | {"in": "ŋ", "out": "ŋ"}, 9 | {"in": "ɑ", "out": "ɑ"}, 10 | {"in": "ɛ", "out": "ɛ"}, 11 | {"in": "ɡ", "out": "ɡ"}, 12 | {"in": "ʃ", "out": "ʃ"}, 13 | {"in": "ʌ", "out": "ʌ"}, 14 | {"in": "ʒ", "out": "ʒ"} 15 | ] 16 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/atj-ipa_to_eng-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "a", "out": "ɑ"}, 3 | {"in": "aː", "out": "ɑ"}, 4 | {"in": "b", "out": "b"}, 5 | {"in": "d", "out": "d"}, 6 | {"in": "d͡ʒ", "out": "dʒ"}, 7 | {"in": "eː", "out": "eː"}, 8 | {"in": "h", "out": "h"}, 9 | {"in": "i", "out": "i"}, 10 | {"in": "iː", "out": "i"}, 11 | {"in": "m", "out": "m"}, 12 | {"in": "n", "out": "n"}, 13 | {"in": "r", "out": "ɾ"}, 14 | {"in": "s", "out": "s"}, 15 | {"in": "u", "out": "u"}, 16 | {"in": "uː", "out": "u"}, 17 | {"in": "w", "out": "w"}, 18 | {"in": "ɡ", "out": "ɡ"}, 19 | {"in": "ʃ", "out": "ʃ"} 20 | ] 21 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/clm-ipa_to_eng-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "tʃʼ", "out": "tʃ"}, 3 | {"in": "tsʼ", "out": "ts"}, 4 | {"in": "kʼʷ", "out": "kw"}, 5 | {"in": "tɬʼ", "out": "ts"}, 6 | {"in": "qʼʷ", "out": "kw"}, 7 | {"in": "ˈɔ", "out": "ɔ"}, 8 | {"in": "ˈʌ", "out": "ʌ"}, 9 | {"in": "ˈa", "out": "æ"}, 10 | {"in": "tʃ", "out": "t͡ʃ"}, 11 | {"in": "ts", "out": "ts"}, 12 | {"in": "ˈɛ", "out": "ɛ"}, 13 | {"in": "ˈi", "out": "i"}, 14 | {"in": "kʷ", "out": "kw"}, 15 | {"in": "m̰", "out": "m"}, 16 | {"in": "n̰", "out": "n"}, 17 | {"in": "ɴ̰", "out": "ŋ"}, 18 | {"in": "pʼ", "out": "p"}, 19 | {"in": "qʷ", "out": "kw"}, 20 | {"in": "qʼ", "out": "k"}, 21 | {"in": "tʼ", "out": "t"}, 22 | {"in": "ˈu", "out": "u"}, 23 | {"in": "w̰", "out": "w"}, 24 | {"in": "χʷ", "out": "ʃw"}, 25 | {"in": "xʷ", "out": "kw"}, 26 | {"in": "j̰", "out": "j"}, 27 | {"in": "ɔ", "out": "ɔ"}, 28 | {"in": "ɨ", "out": "ɨ"}, 29 | {"in": "ʊ", "out": "ʊ"}, 30 | {"in": "ə", "out": "ə"}, 31 | {"in": "ʔ", "out": "ʔ"}, 32 | {"in": "a", "out": "æ"}, 33 | {"in": "ɛ", "out": "ɛ"}, 34 | {"in": "h", "out": "h"}, 35 | {"in": "i", "out": "i"}, 36 | {"in": "k", "out": "k"}, 37 | {"in": "l", "out": "l"}, 38 | {"in": "ɬ", "out": "s"}, 39 | {"in": "m", "out": "m"}, 40 | {"in": "n", "out": "n"}, 41 | {"in": "ɴ", "out": "ŋ"}, 42 | {"in": "p", "out": "p"}, 43 | {"in": "q", "out": "k"}, 44 | {"in": "ʃ", "out": "ʃ"}, 45 | {"in": "s", "out": "s"}, 46 | {"in": "t", "out": "t"}, 47 | {"in": "u", "out": "u"}, 48 | {"in": "w", "out": "w"}, 49 | {"in": "χ", "out": "ʃ"}, 50 | {"in": "j", "out": "j"} 51 | ] 52 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/crg-ipa_to_eng-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "ɛ̃ː", "out": "ẽː"}, 3 | {"in": "ɑ̃ː", "out": "ɑ̃"}, 4 | {"in": "ĩː", "out": "ẽː"}, 5 | {"in": "oaw", "out": "ɔæw"}, 6 | {"in": "waj", "out": "wæj"}, 7 | {"in": "ʰtʃ", "out": "t͡ʃ"}, 8 | {"in": "ɔ̃ː", "out": "ɔ̃ː"}, 9 | {"in": "ɔ̃", "out": "ɔ̃"}, 10 | {"in": "aw", "out": "æw"}, 11 | {"in": "aj", "out": "æj"}, 12 | {"in": "ɑː", "out": "ɑ"}, 13 | {"in": "iː", "out": "eː"}, 14 | {"in": "eː", "out": "eː"}, 15 | {"in": "uː", "out": "u"}, 16 | {"in": "ĩ", "out": "ĩ"}, 17 | {"in": "ʰp", "out": "p"}, 18 | {"in": "ʰt", "out": "t"}, 19 | {"in": "ʰk", "out": "k"}, 20 | {"in": "tʃ", "out": "t͡ʃ"}, 21 | {"in": "dʒ", "out": "dʒ"}, 22 | {"in": "oː", "out": "oː"}, 23 | {"in": "ʌː", "out": "eː"}, 24 | {"in": "ŋ", "out": "ŋ"}, 25 | {"in": "æ", "out": "æ"}, 26 | {"in": "ʃ", "out": "ʃ"}, 27 | {"in": "ʒ", "out": "ʒ"}, 28 | {"in": "ɛ", "out": "ɛ"}, 29 | {"in": "ɪ", "out": "ɪ"}, 30 | {"in": "j", "out": "j"}, 31 | {"in": "ʌ", "out": "ʌ"}, 32 | {"in": "o", "out": "ɔ"}, 33 | {"in": "ʊ", "out": "ʊ"}, 34 | {"in": "ɹ", "out": "ɹ"} 35 | ] 36 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/crk-ipa_to_eng-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "eː", "out": "eː", "context_before": "", "context_after": ""}, 3 | {"in": "iː", "out": "eː", "context_before": "", "context_after": ""}, 4 | {"in": "oː", "out": "oː", "context_before": "", "context_after": "", "prevent_feeding": true}, 5 | {"in": "aː", "out": "eː", "context_before": "", "context_after": ""}, 6 | {"in": "ɪ", "out": "ɪ", "context_before": "", "context_after": ""}, 7 | {"in": "o", "out": "ɔ", "context_before": "", "context_after": ""}, 8 | {"in": "ʌ", "out": "ʌ", "context_before": "", "context_after": ""}, 9 | {"in": "p", "out": "p", "context_before": "", "context_after": ""}, 10 | {"in": "t", "out": "t", "context_before": "", "context_after": ""}, 11 | {"in": "k", "out": "k", "context_before": "", "context_after": ""}, 12 | {"in": "m", "out": "m", "context_before": "", "context_after": ""}, 13 | {"in": "n", "out": "n", "context_before": "", "context_after": ""}, 14 | {"in": "t͡s", "out": "ts", "context_before": "", "context_after": ""}, 15 | {"in": "s", "out": "s", "context_before": "", "context_after": ""}, 16 | {"in": "h", "out": "h", "context_before": "", "context_after": ""}, 17 | {"in": "j", "out": "j", "context_before": "", "context_after": ""}, 18 | {"in": "w", "out": "w", "context_before": "", "context_after": ""} 19 | ] 20 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/dan-ipa_to_eng-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "ð", "out": "ð", "context_before": "", "context_after": ""}, 3 | {"in": "d", "out": "d", "context_before": "", "context_after": ""}, 4 | {"in": "ŋ", "out": "ŋ", "context_before": "", "context_after": ""}, 5 | {"in": "ŋ", "out": "ŋ", "context_before": "", "context_after": ""}, 6 | {"in": "s", "out": "s", "context_before": "", "context_after": ""}, 7 | {"in": "ʁ", "out": "ʒ", "context_before": "", "context_after": ""}, 8 | {"in": "ɐ̯", "out": "j", "context_before": "", "context_after": ""}, 9 | {"in": "ɔ", "out": "ɔ", "context_before": "", "context_after": ""}, 10 | {"in": "oː", "out": "oː", "context_before": "", "context_after": ""}, 11 | {"in": "oː", "out": "oː", "context_before": "", "context_after": ""}, 12 | {"in": "ð̠˕ˠ", "out": "ð", "context_before": "", "context_after": ""}, 13 | {"in": "ð", "out": "ð", "context_before": "", "context_after": ""}, 14 | {"in": "ɪ̯", "out": "j", "context_before": "", "context_after": ""}, 15 | {"in": "ʊ̯", "out": "w", "context_before": "", "context_after": ""}, 16 | {"in": "a", "out": "ɑ", "context_before": "", "context_after": ""}, 17 | {"in": "ɡ", "out": "ɡ", "context_before": "", "context_after": ""}, 18 | {"in": "ɐ̯", "out": "j", "context_before": "", "context_after": ""}, 19 | {"in": "s", "out": "s", "context_before": "", "context_after": ""}, 20 | {"in": "ɛ", "out": "ɛ", "context_before": "", "context_after": ""}, 21 | {"in": "æ", "out": "æ", "context_before": "", "context_after": ""}, 22 | {"in": "ɑ", "out": "ɑ", "context_before": "", "context_after": ""}, 23 | {"in": "ɑ", "out": "ɑ", "context_before": "", "context_after": ""}, 24 | {"in": "ʁ", "out": "ʒ", "context_before": "", "context_after": ""}, 25 | {"in": "œ", "out": "ɔ", "context_before": "", "context_after": ""}, 26 | {"in": "u", "out": "u", "context_before": "", "context_after": ""}, 27 | {"in": "p", "out": "p", "context_before": "", "context_after": ""} 28 | ] 29 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/fin-ipa_to_eng-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "t͡s", "out": "ts"}, 3 | {"in": "ɑː", "out": "ɑ"}, 4 | {"in": "ɑɪ", "out": "ɑɪ"}, 5 | {"in": "ɑʊ", "out": "ɑʊ"}, 6 | {"in": "eː", "out": "eː"}, 7 | {"in": "eɪ", "out": "eɪ"}, 8 | {"in": "eʊ", "out": "ɛʊ"}, 9 | {"in": "iː", "out": "i"}, 10 | {"in": "iɛ", "out": "iɛ"}, 11 | {"in": "oː", "out": "oː"}, 12 | {"in": "oʊ", "out": "oʊ"}, 13 | {"in": "ɔɪ", "out": "ɔɪ"}, 14 | {"in": "uː", "out": "u"}, 15 | {"in": "ʊɔ", "out": "ʊɔ"}, 16 | {"in": "ʊɪ", "out": "ʊj"}, 17 | {"in": "yː", "out": "u"}, 18 | {"in": "æː", "out": "æ"}, 19 | {"in": "æɥ", "out": "aʊ"}, 20 | {"in": "øː", "out": "ə"}, 21 | {"in": "øɥ", "out": "əj"}, 22 | {"in": "ks", "out": "ks"}, 23 | {"in": "ɑ", "out": "ɑ"}, 24 | {"in": "b", "out": "b"}, 25 | {"in": "s", "out": "s"}, 26 | {"in": "d", "out": "d"}, 27 | {"in": "e", "out": "ɛ"}, 28 | {"in": "f", "out": "f"}, 29 | {"in": "ɡ", "out": "ɡ"}, 30 | {"in": "h", "out": "h"}, 31 | {"in": "i", "out": "i"}, 32 | {"in": "j", "out": "j"}, 33 | {"in": "k", "out": "k"}, 34 | {"in": "l", "out": "l"}, 35 | {"in": "m", "out": "m"}, 36 | {"in": "n", "out": "n"}, 37 | {"in": "o", "out": "ɔ"}, 38 | {"in": "p", "out": "p"}, 39 | {"in": "k", "out": "k"}, 40 | {"in": "r", "out": "ɾ"}, 41 | {"in": "s", "out": "s"}, 42 | {"in": "t", "out": "t"}, 43 | {"in": "u", "out": "u"}, 44 | {"in": "ʋ", "out": "w"}, 45 | {"in": "ʋ", "out": "w"}, 46 | {"in": "y", "out": "u"}, 47 | {"in": "o", "out": "ɔ"}, 48 | {"in": "æ", "out": "æ"}, 49 | {"in": "ø", "out": "ə"}, 50 | {"in": "ŋ", "out": "ŋ"}, 51 | {"in": "ŋ", "out": "ŋ"}, 52 | {"in": "m", "out": "m"}, 53 | {"in": "m", "out": "m"} 54 | ] 55 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/fra-ipa_to_eng-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "a", "out": "ɑ"}, 3 | {"in": "b", "out": "b"}, 4 | {"in": "d", "out": "d"}, 5 | {"in": "e", "out": "eː"}, 6 | {"in": "f", "out": "f"}, 7 | {"in": "i", "out": "i"}, 8 | {"in": "j", "out": "j"}, 9 | {"in": "k", "out": "k"}, 10 | {"in": "l", "out": "l"}, 11 | {"in": "m", "out": "m"}, 12 | {"in": "n", "out": "n"}, 13 | {"in": "o", "out": "oː"}, 14 | {"in": "p", "out": "p"}, 15 | {"in": "s", "out": "s"}, 16 | {"in": "t", "out": "t"}, 17 | {"in": "u", "out": "u"}, 18 | {"in": "v", "out": "v"}, 19 | {"in": "w", "out": "w"}, 20 | {"in": "y", "out": "u"}, 21 | {"in": "z", "out": "z"}, 22 | {"in": "ø", "out": "oː"}, 23 | {"in": "ŋ", "out": "ŋ"}, 24 | {"in": "œ", "out": "ɔ"}, 25 | {"in": "œ̃", "out": "ɔ̃"}, 26 | {"in": "ɑ", "out": "ɑ"}, 27 | {"in": "ɑ̃", "out": "ɑ̃"}, 28 | {"in": "ɔ", "out": "ɔ"}, 29 | {"in": "ɔ̃", "out": "ɔ̃"}, 30 | {"in": "ɛ", "out": "ɛ"}, 31 | {"in": "ɛː", "out": "ɛ"}, 32 | {"in": "ɛ̃", "out": "ɛ̃"}, 33 | {"in": "ɜ", "out": "ʌ"}, 34 | {"in": "ɡ", "out": "ɡ"}, 35 | {"in": "ɪn", "out": "ɪn"}, 36 | {"in": "ɲ", "out": "ŋ"}, 37 | {"in": "ʁ", "out": "ʒ"}, 38 | {"in": "ʃ", "out": "ʃ"}, 39 | {"in": "ʌ", "out": "ʌ"}, 40 | {"in": "ʒ", "out": "ʒ"} 41 | ] 42 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/kwk-ipa_to_eng-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "tʼ͡s", "out": "ts"}, 3 | {"in": "tʼ͡ɬ", "out": "ts"}, 4 | {"in": "tʼs", "out": "ts"}, 5 | {"in": "tʼɬ", "out": "ts"}, 6 | {"in": "qʼʷ", "out": "kw"}, 7 | {"in": "kʼʷ", "out": "kw"}, 8 | {"in": "t͡s", "out": "ts"}, 9 | {"in": "t͡ɬ", "out": "ts"}, 10 | {"in": "kʼʲ", "out": "kj"}, 11 | {"in": "d͡z", "out": "dz"}, 12 | {"in": "d͡l", "out": "dl"}, 13 | {"in": "d͡ɬ", "out": "ds"}, 14 | {"in": "pʼ", "out": "p"}, 15 | {"in": "tʼ", "out": "t"}, 16 | {"in": "kʼ", "out": "k"}, 17 | {"in": "qʼ", "out": "k"}, 18 | {"in": "ʔm", "out": "ʔm"}, 19 | {"in": "ʔn", "out": "ʔn"}, 20 | {"in": "ʔw", "out": "ʔw"}, 21 | {"in": "ʔy", "out": "ʔu"}, 22 | {"in": "lʼ", "out": "l"}, 23 | {"in": "dz", "out": "dz"}, 24 | {"in": "ts", "out": "ts"}, 25 | {"in": "tɬ", "out": "ts"}, 26 | {"in": "dɬ", "out": "ds"}, 27 | {"in": "qʷ", "out": "kw"}, 28 | {"in": "χʷ", "out": "ʃw"}, 29 | {"in": "ɢʷ", "out": "ɡw"}, 30 | {"in": "kʷ", "out": "kw"}, 31 | {"in": "xʷ", "out": "kw"}, 32 | {"in": "ɡʷ", "out": "ɡw"}, 33 | {"in": "ʔl", "out": "ʔl"}, 34 | {"in": "ʔj", "out": "ʔj"}, 35 | {"in": "kʲ", "out": "kj"}, 36 | {"in": "xʲ", "out": "kj"}, 37 | {"in": "ɡʲ", "out": "ɡj"}, 38 | {"in": "ej", "out": "ej"}, 39 | {"in": "ow", "out": "ow"}, 40 | {"in": "ɢ", "out": "ɡ"}, 41 | {"in": "χ", "out": "ʃ"}, 42 | {"in": "ɡ", "out": "ɡ"}, 43 | {"in": "q", "out": "k"}, 44 | {"in": "ə", "out": "ə"}, 45 | {"in": "p", "out": "p"}, 46 | {"in": "t", "out": "t"}, 47 | {"in": "k", "out": "k"}, 48 | {"in": "ɬ", "out": "s"}, 49 | {"in": "s", "out": "s"}, 50 | {"in": "x", "out": "k"}, 51 | {"in": "ʔ", "out": "ʔ"}, 52 | {"in": "b", "out": "b"}, 53 | {"in": "d", "out": "d"}, 54 | {"in": "h", "out": "h"}, 55 | {"in": "m", "out": "m"}, 56 | {"in": "n", "out": "n"}, 57 | {"in": "l", "out": "l"}, 58 | {"in": "w", "out": "w"}, 59 | {"in": "j", "out": "j"}, 60 | {"in": "a", "out": "ɑ"}, 61 | {"in": "e", "out": "ɛ"}, 62 | {"in": "i", "out": "i"}, 63 | {"in": "o", "out": "ɔ"}, 64 | {"in": "u", "out": "u"} 65 | ] 66 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/lml-ipa_to_eng-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "ŋ", "out": "ŋ", "context_before": "", "context_after": ""}, 3 | {"in": "ŋɡ", "out": "ŋ", "context_before": "", "context_after": ""}, 4 | {"in": "ɑ", "out": "ɑ", "context_before": "", "context_after": ""}, 5 | {"in": "ɡ", "out": "ɡ", "context_before": "", "context_after": ""}, 6 | {"in": "ɣ", "out": "ɡ", "context_before": "", "context_after": ""}, 7 | {"in": "ɹ", "out": "ɹ", "context_before": "", "context_after": ""} 8 | ] 9 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/mic-ipa_to_eng-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "ətʃ", "out": "ət͡ʃ"}, 3 | {"in": "iː", "out": "eː"}, 4 | {"in": "ɑː", "out": "ɑ"}, 5 | {"in": "eː", "out": "eː"}, 6 | {"in": "oː", "out": "oː"}, 7 | {"in": "uː", "out": "u"}, 8 | {"in": "tʃ", "out": "t͡ʃ"}, 9 | {"in": "dʒ", "out": "dʒ"}, 10 | {"in": "kə", "out": "kə"}, 11 | {"in": "pə", "out": "pə"}, 12 | {"in": "xə", "out": "kə"}, 13 | {"in": "sə", "out": "sə"}, 14 | {"in": "tə", "out": "tə"}, 15 | {"in": "əj", "out": "əj"}, 16 | {"in": "ək", "out": "ək"}, 17 | {"in": "əl", "out": "əl"}, 18 | {"in": "əm", "out": "əm"}, 19 | {"in": "ən", "out": "ən"}, 20 | {"in": "əp", "out": "əp"}, 21 | {"in": "əx", "out": "ək"}, 22 | {"in": "əs", "out": "əs"}, 23 | {"in": "ət", "out": "ət"}, 24 | {"in": "əw", "out": "əw"}, 25 | {"in": "əy", "out": "əu"}, 26 | {"in": "'", "out": ""}, 27 | {"in": "ɑ", "out": "ɑ"}, 28 | {"in": "o", "out": "ɔ"}, 29 | {"in": "x", "out": "k"}, 30 | {"in": "j", "out": "j"}, 31 | {"in": "b", "out": "b"}, 32 | {"in": "d", "out": "d"}, 33 | {"in": "ɡ", "out": "ɡ"}, 34 | {"in": "z", "out": "z"}, 35 | {"in": "ɣ", "out": "ɡ"} 36 | ] 37 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/oji-ipa_to_eng-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "ŋ", "out": "ŋ", "context_before": "", "context_after": ""}, 3 | {"in": "ɑː", "out": "ɑ", "context_before": "", "context_after": ""}, 4 | {"in": "iː", "out": "i", "context_before": "", "context_after": ""}, 5 | {"in": "oː", "out": "oː", "context_before": "", "context_after": ""}, 6 | {"in": "eː", "out": "eː", "context_before": "", "context_after": ""}, 7 | {"in": "ʌ", "out": "ʌ", "context_before": "", "context_after": ""}, 8 | {"in": "i", "out": "i", "context_before": "", "context_after": ""}, 9 | {"in": "o", "out": "oː", "context_before": "", "context_after": ""}, 10 | {"in": "b", "out": "b", "context_before": "", "context_after": ""}, 11 | {"in": "tʃ", "out": "t͡ʃ", "context_before": "", "context_after": ""}, 12 | {"in": "d", "out": "d", "context_before": "", "context_after": ""}, 13 | {"in": "ɡ", "out": "ɡ", "context_before": "", "context_after": ""}, 14 | {"in": "h", "out": "h", "context_before": "", "context_after": ""}, 15 | {"in": "ʔ", "out": "ʔ", "context_before": "", "context_after": ""}, 16 | {"in": "ʔ", "out": "ʔ", "context_before": "", "context_after": ""}, 17 | {"in": "dʒ", "out": "dʒ", "context_before": "", "context_after": ""}, 18 | {"in": "k", "out": "k", "context_before": "", "context_after": ""}, 19 | {"in": "m", "out": "m", "context_before": "", "context_after": ""}, 20 | {"in": "n", "out": "n", "context_before": "", "context_after": ""}, 21 | {"in": "p", "out": "p", "context_before": "", "context_after": ""}, 22 | {"in": "ʃ", "out": "ʃ", "context_before": "", "context_after": ""}, 23 | {"in": "s", "out": "s", "context_before": "", "context_after": ""}, 24 | {"in": "t", "out": "t", "context_before": "", "context_after": ""}, 25 | {"in": "w", "out": "w", "context_before": "", "context_after": ""}, 26 | {"in": "j", "out": "j", "context_before": "", "context_after": ""}, 27 | {"in": "ʒ", "out": "ʒ", "context_before": "", "context_after": ""}, 28 | {"in": "z", "out": "z", "context_before": "", "context_after": ""} 29 | ] 30 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/oka-ipa_to_eng-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "kʼʷ", "out": "kw"}, 3 | {"in": "qʼʷ", "out": "kw"}, 4 | {"in": "ʕˀʷ", "out": "ʒw"}, 5 | {"in": "tsʼ", "out": "ts"}, 6 | {"in": "tɬʼ", "out": "ts"}, 7 | {"in": "χʷ", "out": "ʃw"}, 8 | {"in": "ˈa", "out": "æ"}, 9 | {"in": "ˈi", "out": "i"}, 10 | {"in": "ɣˀ", "out": "ɡ"}, 11 | {"in": "kʼ", "out": "k"}, 12 | {"in": "kʷ", "out": "kw"}, 13 | {"in": "lˀ", "out": "l"}, 14 | {"in": "mˀ", "out": "m"}, 15 | {"in": "nˀ", "out": "n"}, 16 | {"in": "ˈo", "out": "ɔ"}, 17 | {"in": "pʼ", "out": "p"}, 18 | {"in": "qʼ", "out": "k"}, 19 | {"in": "qʷ", "out": "kw"}, 20 | {"in": "ɾˀ", "out": "ɾ"}, 21 | {"in": "tʼ", "out": "t"}, 22 | {"in": "ʕˀ", "out": "ʒ"}, 23 | {"in": "ˈu", "out": "u"}, 24 | {"in": "ʕʷ", "out": "ʒw"}, 25 | {"in": "xʷ", "out": "kw"}, 26 | {"in": "wˀ", "out": "w"}, 27 | {"in": "yˀ", "out": "u"}, 28 | {"in": "ts", "out": "ts"}, 29 | {"in": "h", "out": "h"}, 30 | {"in": "χ", "out": "ʃ"}, 31 | {"in": "a", "out": "æ"}, 32 | {"in": "ə", "out": "ə"}, 33 | {"in": "i", "out": "i"}, 34 | {"in": "ɣ", "out": "ɡ"}, 35 | {"in": "k", "out": "k"}, 36 | {"in": "l", "out": "l"}, 37 | {"in": "ɬ", "out": "s"}, 38 | {"in": "m", "out": "m"}, 39 | {"in": "n", "out": "n"}, 40 | {"in": "o", "out": "ɔ"}, 41 | {"in": "p", "out": "p"}, 42 | {"in": "q", "out": "k"}, 43 | {"in": "ɾ", "out": "ɾ"}, 44 | {"in": "s", "out": "s"}, 45 | {"in": "t", "out": "t"}, 46 | {"in": "ʕ", "out": "ʒ"}, 47 | {"in": "u", "out": "u"}, 48 | {"in": "x", "out": "k"}, 49 | {"in": "w", "out": "w"}, 50 | {"in": "y", "out": "u"}, 51 | {"in": "ʷ", "out": "w"}, 52 | {"in": "ˀ", "out": ""} 53 | ] 54 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/see-ipa_to_eng-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "d͡ʒ", "out": "dʒ"}, 3 | {"in": "t͡ʃ", "out": "t͡ʃ"}, 4 | {"in": "ẽ", "out": ""}, 5 | {"in": "e", "out": "eː"}, 6 | {"in": "", "out": "ẽː"}, 7 | {"in": "õ", "out": ""}, 8 | {"in": "o", "out": "oː"}, 9 | {"in": "", "out": "õː"} 10 | ] 11 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/str-ipa_to_eng-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "aɪ", "out": "aɪ"}, 3 | {"in": "d͡z", "out": "dz"}, 4 | {"in": "eː", "out": "e"}, 5 | {"in": "e", "out": "eː"}, 6 | {"in": "h", "out": "h"}, 7 | {"in": "i", "out": "i"}, 8 | {"in": "j", "out": "j"}, 9 | {"in": "kʷʼ", "out": "kw"}, 10 | {"in": "k̟", "out": "k"}, 11 | {"in": "k̟ʷ", "out": "kw"}, 12 | {"in": "l", "out": "l"}, 13 | {"in": "m", "out": "m"}, 14 | {"in": "n", "out": "n"}, 15 | {"in": "p", "out": "p"}, 16 | {"in": "pʼ", "out": "p"}, 17 | {"in": "q", "out": "k"}, 18 | {"in": "qʷ", "out": "kw"}, 19 | {"in": "qʷʼ", "out": "kw"}, 20 | {"in": "qʼ", "out": "k"}, 21 | {"in": "s", "out": "s"}, 22 | {"in": "t", "out": "t"}, 23 | {"in": "tʼ", "out": "t"}, 24 | {"in": "t͡s̪", "out": "tθ"}, 25 | {"in": "t͡ɬʼ", "out": "ts"}, 26 | {"in": "t͡ʃ", "out": "t͡ʃ"}, 27 | {"in": "t͡ʃʼ", "out": "tʃ"}, 28 | {"in": "u", "out": "u"}, 29 | {"in": "w", "out": "w"}, 30 | {"in": "xʷ", "out": "kw"}, 31 | {"in": "æ", "out": "æ"}, 32 | {"in": "ŋ", "out": "ŋ"}, 33 | {"in": "ɑ", "out": "ɑ"}, 34 | {"in": "ɬ", "out": "s"}, 35 | {"in": "ʃ", "out": "ʃ"}, 36 | {"in": "ʌ", "out": "ʌ"}, 37 | {"in": "ʔ", "out": "ʔ"}, 38 | {"in": "θ", "out": "θ"}, 39 | {"in": "χ", "out": "ʃ"}, 40 | {"in": "χʷ", "out": "ʃw"} 41 | ] 42 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/und-ascii_to_dummy.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "a", "out": "ɑ", "context_before": "", "context_after": ""}, 3 | {"in": "b", "out": "t", "context_before": "", "context_after": ""}, 4 | {"in": "c", "out": "ts", "context_before": "", "context_after": ""}, 5 | {"in": "d", "out": "t", "context_before": "", "context_after": ""}, 6 | {"in": "e", "out": "i", "context_before": "", "context_after": ""}, 7 | {"in": "f", "out": "s", "context_before": "", "context_after": ""}, 8 | {"in": "g", "out": "t", "context_before": "", "context_after": ""}, 9 | {"in": "h", "out": "n", "context_before": "", "context_after": ""}, 10 | {"in": "i", "out": "i", "context_before": "", "context_after": ""}, 11 | {"in": "j", "out": "s", "context_before": "", "context_after": ""}, 12 | {"in": "k", "out": "t", "context_before": "", "context_after": ""}, 13 | {"in": "l", "out": "n", "context_before": "", "context_after": ""}, 14 | {"in": "m", "out": "n", "context_before": "", "context_after": ""}, 15 | {"in": "n", "out": "n", "context_before": "", "context_after": ""}, 16 | {"in": "o", "out": "u", "context_before": "", "context_after": ""}, 17 | {"in": "p", "out": "t", "context_before": "", "context_after": ""}, 18 | {"in": "q", "out": "t", "context_before": "", "context_after": ""}, 19 | {"in": "r", "out": "n", "context_before": "", "context_after": ""}, 20 | {"in": "s", "out": "s", "context_before": "", "context_after": ""}, 21 | {"in": "t", "out": "t", "context_before": "", "context_after": ""}, 22 | {"in": "u", "out": "u", "context_before": "", "context_after": ""}, 23 | {"in": "v", "out": "s", "context_before": "", "context_after": ""}, 24 | {"in": "w", "out": "u", "context_before": "", "context_after": ""}, 25 | {"in": "x", "out": "s", "context_before": "", "context_after": ""}, 26 | {"in": "y", "out": "i", "context_before": "", "context_after": ""}, 27 | {"in": "z", "out": "s", "context_before": "", "context_after": ""}, 28 | {"in": "@", "out": "ɑ", "context_before": "", "context_after": ""}, 29 | {"in": "\\?", "out": "n", "context_before": "", "context_after": ""}, 30 | {"in": "'", "out": "n", "context_before": "", "context_after": ""}, 31 | {"in": ",", "out": "n", "context_before": "", "context_after": ""} 32 | ] 33 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/und-ascii_to_hamming-dummy.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "a", "out": "ɑ", "context_before": "", "context_after": ""}, 3 | {"in": "b", "out": "t", "context_before": "", "context_after": ""}, 4 | {"in": "c", "out": "ts", "context_before": "", "context_after": ""}, 5 | {"in": "d", "out": "t", "context_before": "", "context_after": ""}, 6 | {"in": "e", "out": "i", "context_before": "", "context_after": ""}, 7 | {"in": "f", "out": "s", "context_before": "", "context_after": ""}, 8 | {"in": "g", "out": "t", "context_before": "", "context_after": ""}, 9 | {"in": "h", "out": "s", "context_before": "", "context_after": ""}, 10 | {"in": "i", "out": "i", "context_before": "", "context_after": ""}, 11 | {"in": "j", "out": "s", "context_before": "", "context_after": ""}, 12 | {"in": "k", "out": "t", "context_before": "", "context_after": ""}, 13 | {"in": "l", "out": "s", "context_before": "", "context_after": ""}, 14 | {"in": "m", "out": "n", "context_before": "", "context_after": ""}, 15 | {"in": "n", "out": "n", "context_before": "", "context_after": ""}, 16 | {"in": "o", "out": "u", "context_before": "", "context_after": ""}, 17 | {"in": "p", "out": "t", "context_before": "", "context_after": ""}, 18 | {"in": "q", "out": "t", "context_before": "", "context_after": ""}, 19 | {"in": "r", "out": "s", "context_before": "", "context_after": ""}, 20 | {"in": "s", "out": "s", "context_before": "", "context_after": ""}, 21 | {"in": "t", "out": "t", "context_before": "", "context_after": ""}, 22 | {"in": "u", "out": "u", "context_before": "", "context_after": ""}, 23 | {"in": "v", "out": "s", "context_before": "", "context_after": ""}, 24 | {"in": "w", "out": "u", "context_before": "", "context_after": ""}, 25 | {"in": "x", "out": "s", "context_before": "", "context_after": ""}, 26 | {"in": "y", "out": "i", "context_before": "", "context_after": ""}, 27 | {"in": "z", "out": "s", "context_before": "", "context_after": ""}, 28 | {"in": "@", "out": "ɑ", "context_before": "", "context_after": ""}, 29 | {"in": "\\?", "out": "t", "context_before": "", "context_after": ""}, 30 | {"in": "'", "out": "t", "context_before": "", "context_after": ""}, 31 | {"in": ",", "out": "t", "context_before": "", "context_after": ""} 32 | ] 33 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/und-ipa_to_hamming-eng-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "t͡ʃ", "out": "t͡ʃ", "context_before": "", "context_after": ""}, 3 | {"in": "a", "out": "æ", "context_before": "", "context_after": ""}, 4 | {"in": "b", "out": "b", "context_before": "", "context_after": ""}, 5 | {"in": "d", "out": "d", "context_before": "", "context_after": ""}, 6 | {"in": "e", "out": "eː", "context_before": "", "context_after": ""}, 7 | {"in": "f", "out": "f", "context_before": "", "context_after": ""}, 8 | {"in": "ɡ", "out": "ɡ", "context_before": "", "context_after": ""}, 9 | {"in": "h", "out": "h", "context_before": "", "context_after": ""}, 10 | {"in": "i", "out": "i", "context_before": "", "context_after": ""}, 11 | {"in": "ʒ", "out": "ʒ", "context_before": "", "context_after": ""}, 12 | {"in": "k", "out": "k", "context_before": "", "context_after": ""}, 13 | {"in": "l", "out": "l", "context_before": "", "context_after": ""}, 14 | {"in": "m", "out": "m", "context_before": "", "context_after": ""}, 15 | {"in": "n", "out": "n", "context_before": "", "context_after": ""}, 16 | {"in": "o", "out": "oː", "context_before": "", "context_after": ""}, 17 | {"in": "p", "out": "p", "context_before": "", "context_after": ""}, 18 | {"in": "q", "out": "k", "context_before": "", "context_after": ""}, 19 | {"in": "r", "out": "ɾ", "context_before": "", "context_after": ""}, 20 | {"in": "s", "out": "s", "context_before": "", "context_after": ""}, 21 | {"in": "t", "out": "t", "context_before": "", "context_after": ""}, 22 | {"in": "u", "out": "u", "context_before": "", "context_after": ""}, 23 | {"in": "v", "out": "v", "context_before": "", "context_after": ""}, 24 | {"in": "w", "out": "w", "context_before": "", "context_after": ""}, 25 | {"in": "x", "out": "k", "context_before": "", "context_after": ""}, 26 | {"in": "j", "out": "j", "context_before": "", "context_after": ""}, 27 | {"in": "z", "out": "z", "context_before": "", "context_after": ""}, 28 | {"in": "ə", "out": "ə", "context_before": "", "context_after": ""}, 29 | {"in": "ʔ", "out": "ʔ", "context_before": "", "context_after": ""} 30 | ] 31 | -------------------------------------------------------------------------------- /g2p/mappings/langs/generated/win-ipa_to_eng-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "aː", "out": "eː"}, 3 | {"in": "ãː", "out": "æ̃"}, 4 | {"in": "tʃ", "out": "t͡ʃ"}, 5 | {"in": "eː", "out": "eː"}, 6 | {"in": "iː", "out": "eː"}, 7 | {"in": "ĩː", "out": "ẽː"}, 8 | {"in": "dʒ", "out": "dʒ"}, 9 | {"in": "kʼ", "out": "k"}, 10 | {"in": "oː", "out": "oː"}, 11 | {"in": "pʼ", "out": "p"}, 12 | {"in": "sʼ", "out": "s"}, 13 | {"in": "ʃʼ", "out": "ʃ"}, 14 | {"in": "tʼ", "out": "t"}, 15 | {"in": "uː", "out": "u"}, 16 | {"in": "ũː", "out": "ũ"}, 17 | {"in": "xʼ", "out": "k"}, 18 | {"in": "a", "out": "æ"}, 19 | {"in": "ã", "out": "æ̃"}, 20 | {"in": "b", "out": "b"}, 21 | {"in": "e", "out": "ɛ"}, 22 | {"in": "ɡ", "out": "ɡ"}, 23 | {"in": "ɣ", "out": "ɡ"}, 24 | {"in": "h", "out": "h"}, 25 | {"in": "i", "out": "i"}, 26 | {"in": "ĩ", "out": "ĩ"}, 27 | {"in": "k", "out": "k"}, 28 | {"in": "m", "out": "m"}, 29 | {"in": "n", "out": "n"}, 30 | {"in": "o", "out": "ɔ"}, 31 | {"in": "p", "out": "p"}, 32 | {"in": "r", "out": "ɾ"}, 33 | {"in": "s", "out": "s"}, 34 | {"in": "ʃ", "out": "ʃ"}, 35 | {"in": "t", "out": "t"}, 36 | {"in": "u", "out": "u"}, 37 | {"in": "ũ", "out": "ũ"}, 38 | {"in": "w", "out": "w"}, 39 | {"in": "x", "out": "k"}, 40 | {"in": "j", "out": "j"}, 41 | {"in": "z", "out": "z"}, 42 | {"in": "ʒ", "out": "ʒ"}, 43 | {"in": "ʔ", "out": "ʔ"} 44 | ] 45 | -------------------------------------------------------------------------------- /g2p/mappings/langs/git/APA.csv: -------------------------------------------------------------------------------- 1 | l{1}\u0313{2},l{1}\u02C0{2} 2 | l{1}\u0313{2},ʔ{2}l{1} 3 | m{1}\u0313{2},m{1}\u02C0{2} 4 | m{1}\u0313{2},ʔ{2}m{1} 5 | n{1}\u0313{2},n{1}\u02C0{2} 6 | n{1}\u0313{2},ʔ{2}n{1} 7 | w{1}\u0313{2},w{1}\u02C0{2} 8 | w{1}\u0313{2},ʔ{2}w{1} 9 | ʔ{1}y{2},ʔ{1}j{2} 10 | y{1}\u0313{2},j{1}\u02C0{2} 11 | \?{1}\u2071{2},ʔ{1}\u2071{2} 12 | ʒ{1},ʣ{1} 13 | k{1}\u0313{2},k{1}\u02C0{2} 14 | k{1}\u0313{2},ʔ{2}k{1} 15 | q{1}\u0313{2},q{1}\u02C0{2} 16 | q{1}\u0313{2},ʔ{2}q{1} 17 | k{1}\u0313{2}ʷ{3},k{1}ʷ{3}\u02C0{2} 18 | p{1}\u0313{2},p{1}\u02C0{2} 19 | p{1}\u0313{2},ʔ{2}p{1} 20 | t{1}\u0313{2},t{1}\u02C0{2} 21 | t{1}\u0313{2},ʔ{2}t{1} 22 | ʔ{1}ƛ{2},ʔ{1}t{2}\u0361{4}ɬ{3} 23 | ƛ{1}\u0313{2},t{1}\u0361{4}ɬ{3}\u02C0{2} 24 | c{1}ʰ{2},ʦ{1}ʰ{2} 25 | c{1},ʦ{1} 26 | ʔ{1}c{2},ʔ{1}ʦ{2} 27 | c{1}\u0313{2},ʦ{1}\u02C0{2} 28 | ʔ{1}c{2}ʰ{3},ʔ{1}ʦ{2}ʰ{3} 29 | x{1}\u0323{2},χ{1} 30 | y{1},j{1} 31 | ɣ{1},ʁ{1} 32 | a,æ 33 | a{1}ː{2},æ{1}ː{2} 34 | g{1}\u0323{2},ɢ{1 35 | g{1}\u0307{2},ɢ{1} 36 | g{1}ʸ{2},ɟ{1} 37 | g{1}\u0302{3},ɟ{1} 38 | ʔ{1}ɡ{2},ʔ{1}ɢ{2} 39 | ʔ{1}g{2}\u0307{3},ʔ{1}ɢ{2} 40 | ʔ{1}ɡ{2}ʸ{3},ʔ{1}ɟ{2} 41 | ʔ{1}g{2}\u0302{3},ʔ{1}ɟ{2} 42 | -------------------------------------------------------------------------------- /g2p/mappings/langs/git/Ortho_variables.csv: -------------------------------------------------------------------------------- 1 | VOWEL,a,ʌ,æ,e,ɛ,ɪ,ɨ,i,ɔ,o,ʊ,u,ʊ\u031E, 2 | CONSONANT,ʔ,b,d,g,ʁ,ɢ,ɟ,h,ɬ,ʣ,k,q,l,m,n,p,s,t,ʔt\u0361ɬ,t\u0361s,w,x,χ,j 3 | RESONANT,j,w,l,m,n 4 | UVULAR,q,ʁ,ɢ,χ 5 | VCLS_STOP,p,t,k,q 6 | VCD_STOP,b,d,g,ɢ,ɟ 7 | -------------------------------------------------------------------------------- /g2p/mappings/langs/git/RAPA.csv: -------------------------------------------------------------------------------- 1 | \?{1},ʔ{1} 2 | '{1}l{2},l{2}\u02C0{1} 3 | '{1}m{2},m{2}\u02C0{1} 4 | \?{1}m{2},ʔ{1}m{2} 5 | '{1}n{2},n{2}\u02C0{1} 6 | \?{1}n{2},ʔ{1}n{2} 7 | \?{1}w{2},ʔ{1}w{2} 8 | '{1}w{2},w{2}\u02C0{1} 9 | \?{1}y{2},ʔ{1}j{2} 10 | '{1}y{2},j{2}\u02C0{1} 11 | \?{1}i{2},ʔ{1}i{2} 12 | \?{1}\u2071{2},ʔ{1}\u2071{2} 13 | a{1},æ{1} 14 | a{1}a{2},æ{1}ː{2} 15 | E{1},ɛ{1} 16 | e{1}e{2},e{1}ː{2} 17 | ɡ{1}\u0332{2}\u0323{3},ʁ{1} 18 | g{1}\u0323{2},ɢ{1} 19 | ɡ{1}ʸ{2},ɟ{1} 20 | ɫ{1},ɬ{1} 21 | i{1}i{2},i{1}ː{2} 22 | j{1},ʣ{1} 23 | k{1}'{2},k{1}\u02C0{2} 24 | \?{1}k{2},ʔ{1}k{2} 25 | \?{1}g{2}\u0323{3},ʔ{1}ɢ{2} 26 | \?{1}q{2},ʔ{1}q{2} 27 | \?{1}q{2}ʰ{3},ʔ{1}q{2}ʰ{3} 28 | q{1}'{2},q{1}\u02C0{2} 29 | \?{1}g{2}ʸ{3},ʔ{1}ɟ{2} 30 | \?{1}k{2}ʷ{3},ʔ{1}k{2}ʷ{3} 31 | \?{1}k{2}ʷ{3}ʰ{4},ʔ{1}k{2}ʷ{3}ʰ{4} 32 | k{1}ʷ{2}'{3},k{1}ʷ{2}\u02C0{3} 33 | \?{1}ɡ{2}ʷ{3},ʔ{1}ɡ{2}ʷ{3} 34 | o{1}o{2},o{1}ː{2} 35 | \?{1}p{2}ʰ{3},ʔ{1}p{2}ʰ{3} 36 | \?{1}p{2},ʔ{1}p{2} 37 | p{1}'{2},p{1}\u02C0{2} 38 | \?{1}b{2},ʔ{1}b{2} 39 | \?{1}t{2},ʔ{1}t{2} 40 | \?{1}t{2}ʰ{3},ʔ{1}t{2}ʰ{3} 41 | t{1}'{2},t{1}\u02C0{2} 42 | \?{1}d{2},ʔ{1}d{2} 43 | \?{1}t{2}\u0361{4}ɬ{3},ʔ{1}t{2}\u0361{4}ɬ{3} 44 | Ƚ{1}'{2},t{1}\u0361{4}ɬ{3}\u02C0{2} 45 | c{1}ʰ{2},ʦ{1}ʰ{2} 46 | c{1},ʦ{1} 47 | \?{1}c{2},ʔ{1}ʦ{2} 48 | c{1}'{2},ʦ{1}\u02C0{2} 49 | \?{1}c{2}ʰ{3},ʔ{1}ʦ{2}ʰ{3} 50 | o{1}\u0323{2},ʊ{1}\u031E{2} 51 | u{1}u{2},u{1}ː{2} 52 | x{1}\u0323{2},χ{1} 53 | y{1},j{1} 54 | A{1},a{1}|ʌ{1} 55 | A{1}A{2},a{1}ː{2}|ʌ{1}ː{2} 56 | I{1},i{1}|ɨ{1} 57 | -------------------------------------------------------------------------------- /g2p/mappings/langs/git/RAPA_Deterministic.csv: -------------------------------------------------------------------------------- 1 | \?{1},ʔ{1} 2 | '{1}l{2},l{2}\u02C0{1} 3 | '{1}m{2},m{2}\u02C0{1} 4 | \?{1}m{2},ʔ{1}m{2} 5 | '{1}n{2},n{2}\u02C0{1} 6 | \?{1}n{2},ʔ{1}n{2} 7 | \?{1}w{2},ʔ{1}w{2} 8 | '{1}w{2},w{2}\u02C0{1} 9 | \?{1}y{2},ʔ{1}j{2} 10 | '{1}y{2},j{2}\u02C0{1} 11 | \?{1}i{2},ʔ{1}i{2} 12 | \?{1}\u2071{2},ʔ{1}\u2071{2} 13 | a{1},æ{1} 14 | a{1}a{2},æ{1}ː{2} 15 | E{1},ɛ{1} 16 | e{1}e{2},e{1}ː{2} 17 | ɡ{1}\u0332{2}\u0323{3},ʁ{1} 18 | g{1}\u0323{2},ɢ{1} 19 | ɡ{1}ʸ{2},ɟ{1} 20 | ɫ{1},ɬ{1} 21 | i{1}i{2},i{1}ː{2} 22 | j{1},ʣ{1} 23 | k{1}'{2},k{1}\u02C0{2} 24 | \?{1}k{2},ʔ{1}k{2} 25 | \?{1}g{2}\u0323{3},ʔ{1}ɢ{2} 26 | \?{1}q{2},ʔ{1}q{2} 27 | \?{1}q{2}ʰ{3},ʔ{1}q{2}ʰ{3} 28 | q{1}'{2},q{1}\u02C0{2} 29 | \?{1}g{2}ʸ{3},ʔ{1}ɟ{2} 30 | \?{1}k{2}ʷ{3},ʔ{1}k{2}ʷ{3} 31 | \?{1}k{2}ʷ{3}ʰ{4},ʔ{1}k{2}ʷ{3}ʰ{4} 32 | k{1}ʷ{2}'{3},k{1}ʷ{2}\u02C0{3} 33 | \?{1}ɡ{2}ʷ{3},ʔ{1}ɡ{2}ʷ{3} 34 | o{1}o{2},o{1}ː{2} 35 | \?{1}p{2}ʰ{3},ʔ{1}p{2}ʰ{3} 36 | \?{1}p{2},ʔ{1}p{2} 37 | p{1}'{2},p{1}\u02C0{2} 38 | \?{1}b{2},ʔ{1}b{2} 39 | \?{1}t{2},ʔ{1}t{2} 40 | \?{1}t{2}ʰ{3},ʔ{1}t{2}ʰ{3} 41 | t{1}'{2},t{1}\u02C0{2} 42 | \?{1}d{2},ʔ{1}d{2} 43 | \?{1}t{2}\u0361{4}ɬ{3},ʔ{1}t{2}\u0361{4}ɬ{3} 44 | Ƚ{1}'{2},t{1}\u0361{4}ɬ{3}\u02C0{2} 45 | c{1}ʰ{2},ʦ{1}ʰ{2} 46 | c{1},ʦ{1} 47 | \?{1}c{2},ʔ{1}ʦ{2} 48 | c{1}'{2},ʦ{1}\u02C0{2} 49 | \?{1}c{2}ʰ{3},ʔ{1}ʦ{2}ʰ{3} 50 | o{1}\u0323{2},ʊ{1}\u031E{2} 51 | u{1}u{2},u{1}ː{2} 52 | x{1}\u0323{2},χ{1} 53 | y{1},j{1} 54 | A{1},a{1} 55 | A{1}A{2},a{1}ː{2} 56 | I{1},i{1} 57 | -------------------------------------------------------------------------------- /g2p/mappings/langs/git/README.md: -------------------------------------------------------------------------------- 1 | Includes fallback for c -> k because the word 'Jacob' occurs in a story but this should be dealt with some other way. 2 | -------------------------------------------------------------------------------- /g2p/mappings/langs/git/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Gitksan 3 | mappings: 4 | - display_name: Orthography 5 | in_lang: git 6 | out_lang: git-ipa 7 | rule_ordering: as-written 8 | case_sensitive: false 9 | authors: 10 | - Fineen Davis 11 | rules_path: git_to_ipa.json 12 | <<: *shared 13 | - display_name: Rigsby APA 14 | in_lang: git 15 | out_lang: git-apa 16 | authors: 17 | - Fineen Davis 18 | rules_path: RAPA_Deterministic.csv 19 | <<: *shared 20 | - display_name: Unicode Equivalencies 21 | in_lang: git 22 | out_lang: git-equiv 23 | authors: 24 | - Aidan Pine 25 | rules_path: equiv.csv 26 | <<: *shared 27 | - display_name: Gitksan IPA to English IPA 28 | in_lang: git-ipa 29 | out_lang: eng-ipa 30 | rule_ordering: apply-longest-first 31 | authors: 32 | - Aidan Pine 33 | rules_path: git_ipa_to_eng_ipa.json 34 | <<: *shared 35 | -------------------------------------------------------------------------------- /g2p/mappings/langs/git/equiv.csv: -------------------------------------------------------------------------------- 1 | \u1E35,k\u0332 2 | \u0331,\u0332 3 | \u201C,\u0022 4 | \u201D,\u0022 5 | \u201E,\u0022 6 | \u2013,\u002D 7 | \u2014,\u002D 8 | ’,\u0027 9 | ‘,\u0027 10 | ˊ,\u0027 11 | `,\u0027 12 | ̒,\u0027 13 | ̔,\u0027 14 | ̕,\u0027 15 | ̛,\u0027 16 | ʻ,\u0027 17 | ʼ,\u0027 18 | ʽ,\u0027 19 | ʹ,\u0027 20 | ː,: 21 | -------------------------------------------------------------------------------- /g2p/mappings/langs/gla/README.txt: -------------------------------------------------------------------------------- 1 | There is no support here for 'slender' consonants, or any context-sensitive rules. These need to be added, although simple ReadAlongs support seems to work already. 2 | -------------------------------------------------------------------------------- /g2p/mappings/langs/gla/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Scottish Gaelic 3 | mappings: 4 | - display_name: Scottish Gaelic to IPA 5 | in_lang: gla 6 | out_lang: gla-ipa 7 | type: mapping 8 | case_sensitive: false 9 | rule_ordering: apply-longest-first 10 | authors: 11 | - Aidan Pine 12 | rules_path: gla_to_ipa.json 13 | <<: *shared 14 | -------------------------------------------------------------------------------- /g2p/mappings/langs/gwi/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Gwich'in 3 | mappings: 4 | - display_name: Gwich'in Equivalencies 5 | in_lang: gwi 6 | out_lang: gwi-equiv 7 | authors: 8 | - Sabrina Yu 9 | type: mapping 10 | rules_path: gwi_equiv.json 11 | prevent_feeding: false 12 | rule_ordering: as-written 13 | case_sensitive: false 14 | norm_form: NFD 15 | <<: *shared 16 | - display_name: Gwich'in to IPA 17 | in_lang: gwi-equiv 18 | out_lang: gwi-ipa 19 | authors: 20 | - Sabrina Yu 21 | type: mapping 22 | rules_path: gwi_to_ipa.json 23 | prevent_feeding: true 24 | rule_ordering: as-written 25 | case_sensitive: false 26 | norm_form: NFD 27 | <<: *shared 28 | -------------------------------------------------------------------------------- /g2p/mappings/langs/gwi/gwi_equiv.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "in":"o\u0328o\u0328", 4 | "out": "o\u0328" 5 | }, 6 | { 7 | "in":"oo", 8 | "out": "o" 9 | }, 10 | { 11 | "in":"\u0300", 12 | "out": "" 13 | }, 14 | { 15 | "in": "\u2019", 16 | "out": "\u02bc" 17 | }, 18 | { 19 | "in": "\u0027", 20 | "out": "\u02bc" 21 | } 22 | ] 23 | -------------------------------------------------------------------------------- /g2p/mappings/langs/haa/README.md: -------------------------------------------------------------------------------- 1 | IPA mappings for Hän with resources from the Yukon Native Language Centre 2 | -------------------------------------------------------------------------------- /g2p/mappings/langs/haa/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Hän 3 | mappings: 4 | - display_name: Hän equivalencies 5 | in_lang: haa 6 | out_lang: haa-equiv 7 | authors: 8 | - Shankhalika Srikanth 9 | type: mapping 10 | rules_path: haa_equiv.csv 11 | prevent_feeding: false 12 | rule_ordering: as-written 13 | case_sensitive: false 14 | norm_form: NFD 15 | <<: *shared 16 | - display_name: Hän to IPA 17 | in_lang: haa-equiv 18 | out_lang: haa-ipa 19 | authors: 20 | - Shankhalika Srikanth 21 | type: mapping 22 | rules_path: haa_to_ipa.csv 23 | abbreviations_path: haa_abbs.csv 24 | prevent_feeding: true 25 | rule_ordering: as-written 26 | case_sensitive: false 27 | norm_form: NFD 28 | <<: *shared 29 | -------------------------------------------------------------------------------- /g2p/mappings/langs/haa/haa_abbs.csv: -------------------------------------------------------------------------------- 1 | PLAIN_VOWEL,i,u,äw,aw,ay,ä,a,ew,ey,ë,oy,iw,o,e 2 | GRAVE_VOWEL,ì,ù,ä̀w,àw,ày,ä̀,à,èw,èy,ë̀,òy,ìw,ò,è 3 | HAT_VOWEL,î,û,ä̂w,âw,ây,ä̂,â,êw,êy,ë̂,ôy,îw,ô,ê 4 | VEE_VOWEL,ǐ,ǔ,ä̌w,ǎw,ǎy,ä̌,ǎ,ěw,ěy,ë̌,ǒy,ǐw,ǒ,ě 5 | NASAL_VOWEL,į,ų,ą̈w,ąw,ąy,ą̈,ą,ęw,ęy,ę̈,ǫy,įw,ǫ,ę 6 | NASAL_GRAVE_VOWEL,į̀,ų̀,ą̈̀w,ą̀w,ą̀y,ą̈̀,ą̀,ę̀w,ę̀y,ę̈̀,ǫ̀y,į̀w,ǫ̀,ę̀ 7 | NASAL_HAT_VOWEL,į̂,ų̂,ą̈̂w,ą̂w,ą̂y,ą̈̂,ą̂,ę̂w,ę̂y,ę̈̂,ǫ̂y,į̂w,ǫ̂,ę̂ 8 | NASAL_VEE_VOWEL,į̌,ų̌,ą̈̌w,ą̌w,ą̌y,ą̈̌,ą̌,ę̌w,ę̌y,ę̈̌,ǫ̌y,į̌w,ǫ̌,ę̌ 9 | VOWELS,PLAIN_VOWEL,GRAVE_VOWEL,HAT_VOWEL,VEE_VOWEL,NASAL_VOWEL,NASAL_GRAVE_VOWEL,NASAL_HAT_VOWEL,NASAL_VEE_VOWEL 10 | VELAR,kh,k',k,gh,g,nj 11 | CONSONANTS,p,m,b,w,t,d,ḏ,n,r,z,s,ł,c,j̱,j,y,k,g,',l,h 12 | -------------------------------------------------------------------------------- /g2p/mappings/langs/haa/haa_equiv.csv: -------------------------------------------------------------------------------- 1 | \u0149,\u0027 2 | \u02BC,\u0027 3 | \u055A,\u0027 4 | \uFF07,\u0027 5 | ’,' 6 | ‘,' 7 | ˊ,' 8 | `,' 9 | ʻ,' 10 | ʼ,' 11 | ʽ,' 12 | ʹ,' 13 | ѐ,è 14 | À,À 15 | È,È 16 | Ì,Ì 17 | Ò,Ò 18 | Ù,Ù 19 | à,à 20 | è,è 21 | ì,ì 22 | ò,ò 23 | ù,ù 24 | Â, 25 | Ê,Ê 26 | Î,Î 27 | Ô,Ô 28 | Û,Û 29 | â,â 30 | ê,ê 31 | î,î 32 | ô,ô 33 | û,û 34 | Ě,Ě 35 | ě,ě 36 | Ǎ,Ǎ 37 | ǎ,ǎ 38 | Ǐ,Ǐ 39 | ǐ,ǐ 40 | Ǒ,Ǒ 41 | ǒ,ǒ 42 | Ǔ,Ǔ 43 | ǔ,ǔ 44 | Ḏ,ḏ 45 | ḏ,ḏ 46 | d\u0332,ḏ 47 | j\u0332,j̱ 48 | Ä,Ä 49 | Ë,Ë 50 | ä,ä 51 | ë,ë 52 | ӓ,ä 53 | -------------------------------------------------------------------------------- /g2p/mappings/langs/haa/haa_to_ipa.csv: -------------------------------------------------------------------------------- 1 | nj,ⁿk,,CONSONANTS 2 | p,pʰ 3 | mb,ᵐp 4 | b,p 5 | wh,w̥ 6 | tth',tθʼ 7 | tth,tθʰ 8 | th,θ 9 | ts',tsʼ 10 | ts,tsʰ 11 | tr',ʈʂʼ 12 | tr,ʈʂʰ 13 | tl',tɬʼ 14 | tl,tɬʰ 15 | t',tʼ 16 | t,tʰ 17 | ddh,tθ 18 | dh,ð 19 | dz,ts 20 | dr,ʈʂ 21 | dl,tɬ 22 | ḏ,d 23 | d,t 24 | nh,n̥ 25 | nd,ⁿt 26 | nj,ⁿk,,VELAR 27 | nj,ŋ 28 | rh,ɻ̊ 29 | zr,ʐ 30 | sr,ʂ 31 | r,ɻ 32 | zh,ʒ 33 | ł,ɬ 34 | ch',tʃʼ 35 | ch,tʃʰ 36 | j̱,dʒ 37 | j,tʃ 38 | sh,ʃ 39 | yh,j̊ 40 | kh,x 41 | k',kʼ 42 | k,kʁ,,VOWELS 43 | k,kʰ 44 | gh,ɣ 45 | g,ɡʁ,,VOWELS 46 | g,k 47 | ',ʔ 48 | l,ɬɮ,VOWELS,VOWELS 49 | ą̈̀w,ã̀o 50 | ą̈̌w,ã̌o 51 | ą̈̂w,ã̂o 52 | a\u0328\u0308w,ão 53 | ą̀w,æ̃̀o 54 | ą̌w,æ̃̌o 55 | ą̂w,æ̃̂o 56 | a\u0328w,æ̃o 57 | ą̀y,æ̃̀i 58 | ą̌y,æ̃̌i 59 | ą̂y,æ̃̂i 60 | a\u0328y,æ̃i 61 | ä\u0328,ɑ̃ 62 | a\u0328,æ̃ 63 | ę̀w,ẽ̀o 64 | ę̌w,ẽ̌o 65 | ę̂w,ẽ̂o 66 | e\u0328w,ẽo 67 | ę̀y,ẽ̀i 68 | ę̌y,ẽ̌i 69 | ę̂y,ẽ̂i 70 | e\u0328y,ẽi 71 | ë\u0328,ə̃ 72 | ǫ̀y,õ̀i 73 | ǫ̌y,õ̌i 74 | ǫ̂y,õ̂i 75 | o\u0328y,õi 76 | į̀w,ĩ̀u 77 | į̌w,ĩ̌u 78 | į̂w,ĩ̂u 79 | i\u0328w,ĩu 80 | ë\u0328,ə\u0303 81 | ë,ə 82 | ä̀w,ào 83 | ä̂w,âo 84 | ä̌w,ǎo 85 | äw,ao 86 | àw,æ̀o 87 | ǎw,æ̌o 88 | âw,æ̂o 89 | aw,æo 90 | ày,æ̀i 91 | ǎy,æ̌i 92 | ây,æ̂i 93 | ay,æi 94 | ä\u0328,ɑ\u0303 95 | ä,ɑ 96 | a\u0328,æ\u0303 97 | a,æ 98 | èw,èo 99 | ěw,ěo 100 | êw,êo 101 | ew,eo 102 | èy,èi 103 | ěy,ěi 104 | êy,êi 105 | ey,ei 106 | òy,òi 107 | ǒy,ǒi 108 | ôy,ôi 109 | oy,oi 110 | ìw,ìu 111 | ǐw,ǐu 112 | îw,îu 113 | iw,iu 114 | y,j 115 | o\u0328,o\u0303 116 | o,o 117 | e\u0328,e\u0303 118 | e,e 119 | \u0328,\u0303 120 | -------------------------------------------------------------------------------- /g2p/mappings/langs/hur/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Halkomelem 3 | mappings: 4 | - display_name: Halkomelem APA to Hul’q’umi’num’ (Island) practical orthography 5 | in_lang: hur-apa 6 | out_lang: hur 7 | authors: 8 | - Zack Gilkison 9 | type: mapping 10 | rules_path: hur_apa_to_hur_orthog.json 11 | prevent_feeding: false 12 | rule_ordering: apply-longest-first 13 | case_sensitive: false 14 | norm_form: NFD 15 | # <<: &shared 16 | - display_name: Hul’q’umi’num’ (Island) practical orthography to Halkomelem APA 17 | in_lang: hur 18 | out_lang: hur-apa 19 | authors: 20 | - Zack Gilkison 21 | type: mapping 22 | rules_path: hur_orthog_to_hur_apa.json 23 | prevent_feeding: true 24 | rule_ordering: apply-longest-first 25 | case_sensitive: false 26 | norm_form: NFD 27 | language_name: Halkomelem 28 | -------------------------------------------------------------------------------- /g2p/mappings/langs/hur/hur_orthog_to_hur_apa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "tth’", "out": "t̓ᶿ"}, 3 | {"in": "kw’", "out": "k̓ʷ"}, 4 | {"in": "qw’", "out": "q̓ʷ"}, 5 | {"in": "tth", "out": "tᶿ"}, 6 | {"in": "t-s", "out": "ts"}, 7 | {"in": "ts’", "out": "c̓"}, 8 | {"in": "ch’", "out": "č̓"}, 9 | {"in": "tl’", "out": "ƛ̓"}, 10 | {"in": "s-h", "out": "sh"}, 11 | {"in": "ú", "out": "ə́"}, 12 | {"in": "ù", "out": "ə̀"}, 13 | {"in": "à", "out": "à"}, 14 | {"in": "é", "out": "é"}, 15 | {"in": "lh", "out": "ł"}, 16 | {"in": "aa", "out": "a:"}, 17 | {"in": "ee", "out": "e:"}, 18 | {"in": "ii", "out": "i:"}, 19 | {"in": "ou", "out": "u"}, 20 | {"in": "oo", "out": "u:"}, 21 | {"in": "p’", "out": "p̓"}, 22 | {"in": "t’", "out": "t̓"}, 23 | {"in": "kw", "out": "kʷ"}, 24 | {"in": "q’", "out": "q̓"}, 25 | {"in": "qw", "out": "qʷ"}, 26 | {"in": "ts", "out": "c"}, 27 | {"in": "ch", "out": "č"}, 28 | {"in": "th", "out": "θ"}, 29 | {"in": "sh", "out": "š"}, 30 | {"in": "hw", "out": "xʷ"}, 31 | {"in": "xw", "out": "x̌ʷ"}, 32 | {"in": "’y", "out": "y̓"}, 33 | {"in": "y’", "out": "y̓"}, 34 | {"in": "’w", "out": "w̓"}, 35 | {"in": "w’", "out": "w̓"}, 36 | {"in": "’m", "out": "m̓"}, 37 | {"in": "m’", "out": "m̓"}, 38 | {"in": "’l", "out": "l̓"}, 39 | {"in": "l’", "out": "l̓"}, 40 | {"in": "’l", "out": "l̕"}, 41 | {"in": "l’", "out": "l̕"}, 42 | {"in": "’n", "out": "n̓"}, 43 | {"in": "n’", "out": "n̓"}, 44 | {"in": "ɛ", "out": "ɛ"}, 45 | {"in": "‘", "out": "="}, 46 | {"in": "·", "out": "·"}, 47 | {"in": "’", "out": "ʔ"}, 48 | {"in": "q", "out": "q"}, 49 | {"in": "u", "out": "ə"}, 50 | {"in": "h", "out": "h"}, 51 | {"in": "x", "out": "x̌"} 52 | ] 53 | -------------------------------------------------------------------------------- /g2p/mappings/langs/ikt/README.md: -------------------------------------------------------------------------------- 1 | Language-specific files for Inuit languages spoken in Western Canada and written in Roman orthography, including Inuinnaqtun and Uummarmiutun. 2 | -------------------------------------------------------------------------------- /g2p/mappings/langs/ikt/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Inuktut, Western 3 | mappings: 4 | - display_name: Western Inuktut to IPA 5 | in_lang: ikt 6 | out_lang: ikt-ipa 7 | type: mapping 8 | case_sensitive: false 9 | norm_form: NFD 10 | rule_ordering: apply-longest-first 11 | authors: 12 | - Patrick Littell 13 | rules_path: ikt_to_ipa.json 14 | <<: *shared 15 | -------------------------------------------------------------------------------- /g2p/mappings/langs/iku/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Inuktitut 3 | mappings: 4 | - display_name: Inuktitut to Inuktitut (equiv) 5 | in_lang: iku 6 | out_lang: iku-equiv 7 | type: mapping 8 | rule_ordering: apply-longest-first 9 | authors: 10 | - Patrick Littell 11 | rules_path: iku_to_iku_equiv.json 12 | language_name: Inuktitut Syllabics 13 | - display_name: Inuktitut to IPA 14 | in_lang: iku-equiv 15 | out_lang: iku-ipa 16 | type: mapping 17 | rule_ordering: apply-longest-first 18 | authors: 19 | - Patrick Littell 20 | rules_path: iku_equiv_to_ipa.json 21 | <<: *shared 22 | - display_name: Inuktitut (SRO) to IPA 23 | in_lang: iku-sro 24 | out_lang: iku-sro-ipa 25 | type: mapping 26 | case_sensitive: false 27 | norm_form: NFD 28 | rule_ordering: apply-longest-first 29 | authors: 30 | - Patrick Littell 31 | rules_path: iku_sro_to_ipa.json 32 | language_name: Inuktitut Romanized 33 | -------------------------------------------------------------------------------- /g2p/mappings/langs/kkz/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Kaska 3 | mappings: 4 | - display_name: Kaska to IPA 5 | rules_path: kkz_to_ipa.json 6 | in_lang: kkz 7 | out_lang: kkz-ipa 8 | rule_ordering: apply-longest-first 9 | norm_form: NFD 10 | case_sensitive: false 11 | authors: 12 | - Christopher Cox 13 | <<: *shared 14 | - display_name: Kaska IPA to English IPA 15 | rules_path: kkz_ipa_to_eng_ipa.json 16 | in_lang: kkz-ipa 17 | out_lang: eng-ipa 18 | norm_form: NFD 19 | rule_ordering: apply-longest-first 20 | case_sensitive: false 21 | authors: 22 | - Christopher Cox 23 | <<: *shared 24 | -------------------------------------------------------------------------------- /g2p/mappings/langs/kwk/README.md: -------------------------------------------------------------------------------- 1 | Language-specific files for Kwak'wala (NAPA orthography) 2 | -------------------------------------------------------------------------------- /g2p/mappings/langs/kwk/kwk_ipa_to_phonemic_ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "kʲ", "out": "k"}, 3 | {"in": "xʲ", "out": "x"}, 4 | {"in": "kʼʲ", "out": "kʼ"}, 5 | {"in": "ɡʲ", "out": "ɡ"}, 6 | {"in": "ej", "out": "e"}, 7 | {"in": "ow", "out": "o"} 8 | ] 9 | -------------------------------------------------------------------------------- /g2p/mappings/langs/kwk/kwk_napa_to_ipa.csv: -------------------------------------------------------------------------------- 1 | p̓,pʼ 2 | t̓,tʼ 3 | k̓,kʼ 4 | q̓,qʼ 5 | c,ts 6 | c̓,tʼs 7 | ƛ,tɬ 8 | ƛ̓,tʼɬ 9 | λ,dɬ 10 | g,ɡ 11 | m̓,ʔm 12 | n̓,ʔn 13 | w̓,ʔw 14 | y̓,ʔy 15 | l',lʼ 16 | dᶻ,dz 17 | ǧ,ɢ 18 | x̌,χ 19 | -------------------------------------------------------------------------------- /g2p/mappings/langs/kwk/kwk_umista_to_ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "p", "out": "p"}, 3 | {"in": "t", "out": "t"}, 4 | {"in": "ts", "out": "t͡s"}, 5 | {"in": "tɬ", "out": "t͡ɬ"}, 6 | {"in": "tl", "out": "t͡ɬ"}, 7 | {"in": "k", "out": "kʲ"}, 8 | {"in": "kw", "out": "kʷ"}, 9 | {"in": "ḵ", "out": "q"}, 10 | {"in": "ḵw", "out": "qʷ"}, 11 | {"in": "ɬ", "out": "ɬ"}, 12 | {"in": "ł", "out": "ɬ"}, 13 | {"in": "s", "out": "s"}, 14 | {"in": "x", "out": "xʲ"}, 15 | {"in": "xw", "out": "xʷ"}, 16 | {"in": "x̱", "out": "χ"}, 17 | {"in": "x̱w", "out": "χʷ"}, 18 | {"in": "p̓", "out": "pʼ"}, 19 | {"in": "t̓", "out": "tʼ"}, 20 | {"in": "t̕s", "out": "tʼ͡s"}, 21 | {"in": "t̕ł", "out": "tʼ͡ɬ"}, 22 | {"in": "t̕l", "out": "tʼ͡ɬ"}, 23 | {"in": "k̓", "out": "kʼʲ"}, 24 | {"in": "k̕w", "out": "kʼʷ"}, 25 | {"in": "ḵ̓", "out": "qʼ"}, 26 | {"in": "ḵ̕w", "out": "qʼʷ"}, 27 | {"in": "'", "out": "ʔ"}, 28 | {"in": "b", "out": "b"}, 29 | {"in": "d", "out": "d"}, 30 | {"in": "dz", "out": "d͡z"}, 31 | {"in": "dɬ", "out": "d͡l"}, 32 | {"in": "dł", "out": "d͡l"}, 33 | {"in": "dl", "out": "d͡ɬ"}, 34 | {"in": "g", "out": "ɡʲ"}, 35 | {"in": "gw", "out": "ɡʷ"}, 36 | {"in": "g̱", "out": "ɢ"}, 37 | {"in": "g̱w", "out": "ɢʷ"}, 38 | {"in": "'m", "out": "ʔm"}, 39 | {"in": "'n", "out": "ʔn"}, 40 | {"in": "'l", "out": "ʔl"}, 41 | {"in": "'w", "out": "ʔw"}, 42 | {"in": "'y", "out": "ʔj"}, 43 | {"in": "h", "out": "h"}, 44 | {"in": "m", "out": "m"}, 45 | {"in": "n", "out": "n"}, 46 | {"in": "l", "out": "l"}, 47 | {"in": "w", "out": "w"}, 48 | {"in": "y", "out": "j"}, 49 | {"in": "a", "out": "a"}, 50 | {"in": "e", "out": "ej"}, 51 | {"in": "i", "out": "i"}, 52 | {"in": "o", "out": "ow"}, 53 | {"in": "u", "out": "u"}, 54 | {"in": "a̱", "out": "ə"} 55 | ] 56 | -------------------------------------------------------------------------------- /g2p/mappings/langs/kwk/napa_equiv_ubc.csv: -------------------------------------------------------------------------------- 1 | p/,p̓ 2 | p̕,p̓ 3 | p’,p̓ 4 | t/,t̓ 5 | t̕,t̓ 6 | t’,t̓ 7 | k/,k̓ 8 | k̕,k̓ 9 | k’,k̓ 10 | c/,c̓ 11 | c̕,c̓ 12 | c’,c̓ 13 | ƛ/,ƛ̓ 14 | ƛ̕,ƛ̓ 15 | ƛ’,ƛ̓ 16 | k'ʷ,k̓ʷ 17 | k]ʷ,k̓ʷ 18 | k̕ʷ,k̓ʷ 19 | k’ʷ,k̓ʷ 20 | k{1}ʷ{3}\u0313{2},k{1}\u0313{2}ʷ{3} 21 | k{1}ʷ{3}'{2},k{1}\u0313{2}ʷ{3} 22 | k{1}ʷ{3}’{2},k{1}\u0313{2}ʷ{3} 23 | k'w,k̓ʷ 24 | k]w,k̓ʷ 25 | k̕w,k̓ʷ 26 | k’w,k̓ʷ 27 | k{1}w{3}\u0313{2},k{1}\u0313{2}ʷ{3} 28 | k{1}w{3}'{2},k{1}\u0313{2}ʷ{3} 29 | k{1}w{3}’{2},k{1}\u0313{2}ʷ{3} 30 | q'ʷ,q̓ʷ 31 | q]ʷ,q̓ʷ 32 | q̕ʷ,q̓ʷ 33 | q’ʷ,q̓ʷ 34 | q{1}ʷ{3}\u0313{2},q{1}\u0313{2}ʷ{3} 35 | q{1}ʷ{3}'{2},q{1}\u0313{2}ʷ{3} 36 | q{1}ʷ{3}’{2},q{1}\u0313{2}ʷ{3} 37 | q'w,q̓ʷ 38 | q]w,q̓ʷ 39 | q̕w,q̓ʷ 40 | q’w,q̓ʷ 41 | q{1}w{3}\u0313{2},q{1}\u0313{2}ʷ{3} 42 | q{1}w{3}'{2},q{1}\u0313{2}ʷ{3} 43 | q{1}w{3}’{2},q{1}\u0313{2}ʷ{3} 44 | kw,kʷ 45 | gw,gʷ 46 | qw,qʷ 47 | ɢw,ɢʷ 48 | xw,xʷ 49 | χw,χʷ 50 | ̕{1}m{2},m{2}\u0313{1} 51 | ’{1}m{2},m{2}\u0313{1} 52 | '{1}m{2},m{2}\u0313{1} 53 | m',m̓ 54 | ̕{1}n{2},n{2}\u0313{1} 55 | ’{1}n{2},n{2}\u0313{1} 56 | '{1}n{2},n{2}\u0313{1} 57 | n',n̓ 58 | ̕{1}w{2},w{2}\u0313{1} 59 | ’{1}w{2},w{2}\u0313{1} 60 | '{1}w{2},w{2}\u0313{1} 61 | w',w̓ 62 | ̕{1}y{2},y{2}\u0313{1} 63 | ’{1}y{2},y{2}\u0313{1} 64 | '{1}y{2},y{2}\u0313{1} 65 | y',y̓ 66 | ̕{1}l{2},l{2}'{1} 67 | l̓,l' 68 | ’{1}l{2},l{2}'{1} 69 | '{1}l{2},l{2}'{1} 70 | dz,dᶻ 71 | -------------------------------------------------------------------------------- /g2p/mappings/langs/kwk/napa_equiv_uvic.csv: -------------------------------------------------------------------------------- 1 | p/,p̓ 2 | p̕,p̓ 3 | p’,p̓ 4 | t/,t̓ 5 | t̕,t̓ 6 | t’,t̓ 7 | k/,k̓ 8 | k̕,k̓ 9 | k’,k̓ 10 | c/,c̓ 11 | c̕,c̓ 12 | c’,c̓ 13 | ƛ/,ƛ̓ 14 | ƛ̕,ƛ̓ 15 | ƛ’,ƛ̓ 16 | k'ʷ,k̓ʷ 17 | k]ʷ,k̓ʷ 18 | k̕ʷ,k̓ʷ 19 | k’ʷ,k̓ʷ 20 | kʷ̓,k̓ʷ 21 | kʷ',k̓ʷ 22 | kʷ’,k̓ʷ 23 | k'w,k̓ʷ 24 | k]w,k̓ʷ 25 | k̕w,k̓ʷ 26 | k’w,k̓ʷ 27 | kw̓,k̓ʷ 28 | kw',k̓ʷ 29 | kw’,k̓ʷ 30 | q'ʷ,q̓ʷ 31 | q]ʷ,q̓ʷ 32 | q̕ʷ,q̓ʷ 33 | q’ʷ,q̓ʷ 34 | qʷ̓,q̓ʷ 35 | qʷ',q̓ʷ 36 | qʷ’,q̓ʷ 37 | q'w,q̓ʷ 38 | q]w,q̓ʷ 39 | q̕w,q̓ʷ 40 | q’w,q̓ʷ 41 | qw̓,q̓ʷ 42 | qw',q̓ʷ 43 | qw’,q̓ʷ 44 | ǧw,ǧʷ 45 | x̌w,x̌ʷ 46 | kw,kʷ 47 | gw,gʷ 48 | qw,qʷ 49 | xw,xʷ 50 | χw,χʷ 51 | ̕m,m̓ 52 | ’m,m̓ 53 | 'm,m̓ 54 | ̕n,n̓ 55 | ’n,n̓ 56 | 'n,n̓ 57 | ̕w,w̓ 58 | ’w,w̓ 59 | 'w,w̓ 60 | ̕y,y̓ 61 | ’y,y̓ 62 | 'y,y̓ 63 | l̕,l' 64 | l̓,l' 65 | l’,l' 66 | dz,dᶻ 67 | -------------------------------------------------------------------------------- /g2p/mappings/langs/kwk/umista_equiv.csv: -------------------------------------------------------------------------------- 1 | p/,p̓ 2 | p̕,p̓ 3 | p’,p̓ 4 | t/,t̓ 5 | t̕,t̓ 6 | t’,t̓ 7 | t's,t̕s 8 | t]s,t̕s 9 | t̓s,t̕s 10 | t̕s,t̕s 11 | t’s,t̕s 12 | t{1}s{3}\u0313{2},t{1}\u0315{2}s{3} 13 | t{1}s{3}\u0315{2},t{1}\u0315{2}s{3} 14 | t{1}s{3}'{2},t{1}\u0315{2}s{3} 15 | t{1}s{3}’{2},t{1}\u0315{2}s{3} 16 | t'ɬ,t̕ɬ 17 | t]ɬ,t̕ɬ 18 | t̓ɬ,t̕ɬ 19 | t̕ɬ,t̕ɬ 20 | t’ɬ,t̕ɬ 21 | t{1}ɬ{3}\u0313{2},t{1}\u0315{2}ɬ{3} 22 | t{1}ɬ{3}\u0315{2},t{1}\u0315{2}ɬ{3} 23 | t{1}ɬ{3}'{2},t{1}\u0315{2}ɬ{3} 24 | t{1}ɬ{3}’{2},t{1}\u0315{2}ɬ{3} 25 | k/,k̓ 26 | k̕,k̓ 27 | k’,k̓ 28 | k'w,k̕w 29 | k]w,k̕w 30 | k̓w,k̕w 31 | k̕w,k̕w 32 | k’w,k̕w 33 | k{1}w{3}\u0313{2},k{1}\u0315{2}w{3} 34 | k{1}w{3}\u0315{2},k{1}\u0315{2}w{3} 35 | k{1}w{3}'{2},k{1}\u0315{2}w{3} 36 | k{1}w{3}’{2},k{1}\u0315{2}w{3} 37 | k;,ḵ 38 | ḵ',ḵ̓ 39 | ḵ̕,ḵ̓ 40 | g;,g̱ 41 | k;w,ḵw 42 | ḵ'w,ḵ̕w 43 | k;]w,ḵ̕w 44 | k{1}]{3};{2}w{4},k{1}\u0331{2}\u0315{3}w{4} 45 | ḵ̓w,ḵ̕w 46 | ḵ̕w,ḵ̕w 47 | k{1}\u0315{3}\u0331{2}w{4},k{1}\u0331{2}\u0315{3}w{4} 48 | k{1}\u0313{3}\u0331{2}w{4},k{1}\u0331{2}\u0315{3}w{4} 49 | g;w,g̱w 50 | ’,' 51 | ̕,' 52 | x;,x̱ 53 | x;w,x̱w 54 | ̕m,'m 55 | m{2}\u0313{1},'{1}m{2} 56 | ’m,'m 57 | ̕n,'n 58 | n{2}\u0313{1},'{1}n{2} 59 | ’n,'n 60 | ̕w,'w 61 | w{2}\u0313{1},'{1}w{2} 62 | ’w,'w 63 | ̕y,'y 64 | y{2}\u0313{1},'{1}y{2} 65 | ’y,'y 66 | ̕l,'l 67 | l{2}\u0313{1},'{1}l{2} 68 | ’l,'l 69 | -------------------------------------------------------------------------------- /g2p/mappings/langs/langs.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/g2p/mappings/langs/langs.json.gz -------------------------------------------------------------------------------- /g2p/mappings/langs/lml/abbreviations.csv: -------------------------------------------------------------------------------- 1 | CONSONANT,t,d,b,bw,p,k,g,g\u0304,n\u0304,n,m,mw,v,vw,l,r,h,s,w 2 | VOWEL,i,a,e,u,o 3 | -------------------------------------------------------------------------------- /g2p/mappings/langs/lml/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Raga 3 | mappings: 4 | - display_name: Raga to IPA 5 | rules_path: lml_to_ipa.csv 6 | in_lang: lml 7 | out_lang: lml-ipa 8 | case_sensitive: false 9 | norm_form: NFD 10 | rule_ordering: as-written 11 | prevent_feeding: true 12 | abbreviations_path: abbreviations.csv 13 | authors: 14 | - Fineen Davis 15 | - Codrington Hinge 16 | <<: *shared 17 | -------------------------------------------------------------------------------- /g2p/mappings/langs/lml/lml_to_ipa.csv: -------------------------------------------------------------------------------- 1 | g,ɣ,VOWEL,VOWEL 2 | g,ɣ,\b,VOWEL 3 | ḡ,ŋɡ,, 4 | g,ɡ 5 | n̄,ŋ,, 6 | r,ɹ,, 7 | ai,ɑj,, 8 | ei,ej,, 9 | a,ɑ,, 10 | e,eː,, 11 | o,oː,, 12 | -------------------------------------------------------------------------------- /g2p/mappings/langs/mic/abbreviations.csv: -------------------------------------------------------------------------------- 1 | SONORANT,i,a,e,u,o,iː,aː,eː,oː,uː,y,m,n,l 2 | CONSONANT,j,k,l,m,n,p,q,s,t,w,y,tʃ,x 3 | OBSTRUENT,k,p,q,s,t,x,tʃ 4 | -------------------------------------------------------------------------------- /g2p/mappings/langs/mic/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Mi'kmaq 3 | mappings: 4 | - display_name: Mi'kmaq to IPA 5 | rules_path: mic_to_ipa.json 6 | in_lang: mic 7 | out_lang: mic-ipa 8 | case_sensitive: false 9 | norm_form: NFD 10 | rule_ordering: as-written 11 | abbreviations_path: abbreviations.csv 12 | authors: 13 | - Aidan Pine 14 | notes: 15 | - Based on the Francis-Smith orthography 16 | <<: *shared 17 | -------------------------------------------------------------------------------- /g2p/mappings/langs/moe/README.md: -------------------------------------------------------------------------------- 1 | Authors: Delasie Torkornoo, Bradley Ellert, Laura Russo 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /g2p/mappings/langs/moe/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Innu-aimun 3 | mappings: 4 | - display_name: Innu-aimun to IPA 5 | in_lang: moe 6 | out_lang: moe-ipa 7 | type: mapping 8 | authors: 9 | - Delasie Torkornoo 10 | - Bradley Ellert 11 | rules_path: moe_to_ipa.json 12 | abbreviations_path: moe_abbs.csv 13 | case_sensitive: false 14 | <<: *shared 15 | -------------------------------------------------------------------------------- /g2p/mappings/langs/moe/moe_abbs.csv: -------------------------------------------------------------------------------- 1 | VOWEL,a,e,i,o,u 2 | -------------------------------------------------------------------------------- /g2p/mappings/langs/moe/moe_to_ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "tshish", "out": "tʃʃ"}, 3 | {"in": "nish", "out": "nəʃ"}, 4 | {"in": "auk", "out": "awk"}, 5 | {"in": "shp", "out": "ʃp"}, 6 | {"in": "sht", "out": "st"}, 7 | {"in": "shk", "out": "ʃk"}, 8 | {"in": "tsh", "out": "tʃ"}, 9 | {"in": "ikw", "out": "ukw"}, 10 | {"in": "akw ", "out": "ukw"}, 11 | {"in": "aa", "out": "aː"}, 12 | {"in": "ai", "out": "ej"}, 13 | {"in": "ii", "out": "iː"}, 14 | {"in": "uu", "out": "uː"}, 15 | {"in": "sh", "out": "ʃ"}, 16 | {"in": "a", "out": "ə"}, 17 | {"in": "i", "out": "u", "context_before": "VOWEL", "context_after": "VOWEL"}, 18 | {"in": "i", "out": "i"}, 19 | {"in": "e", "out": "eː"}, 20 | {"in": "u", "out": "u"}, 21 | {"in": "h", "out": "h"}, 22 | {"in": "ǹ", "out": "l"}, 23 | {"in": "ᵘ", "out": "w"}, 24 | {"in": "w", "out": "w"}, 25 | {"in": "k", "out": "k"}, 26 | {"in": "m", "out": "m"}, 27 | {"in": "n", "out": "n"}, 28 | {"in": "p", "out": "p"}, 29 | {"in": "s", "out": "s"}, 30 | {"in": "t", "out": "t"} 31 | ] 32 | -------------------------------------------------------------------------------- /g2p/mappings/langs/moh/README.md: -------------------------------------------------------------------------------- 1 | Language-specific files for Mohawk 2 | 3 | IPA Phoneset: 4 | 5 | ɑ, ˈɑ, ɑː, ɑ́ː, ɑ̀ː, ʌ̃, ˈʌ̃, ʌ̃ː, ʌ̃̀ː, ʌ̃́ː, e, ˈe, èː, éː, i, ˈi, iː, íː, ìː, k, ɡ, kʷ, ɡʷ, kʰʷ, n, n̥, ũ, ˈũ, ṹː, ũ̀ː, o, ˈo, òː, óː, ɽ, ɽ̥, t, d, d͡z, d͡ʒ, t͡s, t͡ʃ, ʃ, s, ʒ, z, w, f, j, ʔ 6 | -------------------------------------------------------------------------------- /g2p/mappings/langs/moh/abbreviations.csv: -------------------------------------------------------------------------------- 1 | CONSONANT,h,k,n,n̥,r,ɽ,ɽ̥,t,s,w,',t͡s,d͡ʒ,d͡z,t͡ʃ,ʃ,ʒ,d,ɡ,ɡʷ,kʷ,kʰʷ,f,j,z,ʔ 2 | VOWEL,a,á:,à:,a:,à,á,ɑ,ɑ́,ɑ̀,ɑ̀ː,ɑ́ː,ɑː,ʌ̃,ʌ̃́ː,ʌ̃ː,ʌ̃̀ː,ʌ̃̀,ʌ̃́,én:,en,en:,èn:,én,èn,e,è:,é,é:,eː,éː,èː,i,i:,í:,í,ì:,ì,iː,íː,ìː,ón:,òn:,on:,òn,on,ón,ṹː,ũ̀ː,ũ̀,ũ,ṹ,ũː,o,ó,ó:,o:,ò:,ò,óː,oː,òː 3 | -------------------------------------------------------------------------------- /g2p/mappings/langs/moh/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Kanien'kéha 3 | mappings: 4 | - display_name: Kanien'kéha to IPA 5 | rules_path: moh_to_ipa.json 6 | in_lang: moh-equiv 7 | out_lang: moh-ipa 8 | case_sensitive: false 9 | norm_form: NFC 10 | rule_ordering: as-written 11 | abbreviations_path: abbreviations.csv 12 | authors: 13 | - Aidan Pine 14 | - Akwiratékha' Martin 15 | <<: *shared 16 | - display_name: IPA to Kanien'kéha 17 | rules_path: moh_to_ipa.json 18 | in_lang: moh-ipa 19 | reverse: true 20 | out_lang: moh 21 | case_sensitive: false 22 | norm_form: NFC 23 | rule_ordering: as-written 24 | abbreviations_path: abbreviations.csv 25 | authors: 26 | - Aidan Pine 27 | - Akwiratékha' Martin 28 | <<: *shared 29 | - display_name: Kanien'kéha Equivalencies 30 | rules_path: moh_equiv.json 31 | in_lang: moh 32 | out_lang: moh-equiv 33 | case_sensitive: false 34 | norm_form: NFC 35 | rule_ordering: as-written 36 | abbreviations_path: abbreviations.csv 37 | authors: 38 | - Aidan Pine 39 | <<: *shared 40 | -------------------------------------------------------------------------------- /g2p/mappings/langs/moh/moh_equiv.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "꞉", "out": ":"}, 3 | {"in": "–", "out": "-"}, 4 | {"in": "׃", "out": ":"}, 5 | {"in": "’", "out": "'"}, 6 | {"in": "‘", "out": "'"}, 7 | {"in": "òn", "out": "òn:", "context_after": "(CONSONANT)"}, 8 | {"in": "èn", "out": "èn:", "context_after": "(CONSONANT)"}, 9 | {"in": "on:", "out": "ón:"}, 10 | {"in": "en:", "out": "én:"}, 11 | {"in": "à", "out": "à:", "context_after": "[^:]"}, 12 | {"in": "è", "out": "è:", "context_after": "([^:n]|n(VOWEL))"}, 13 | {"in": "ì", "out": "ì:", "context_after": "[^:]"}, 14 | {"in": "ò", "out": "ò:", "context_after": "([^:n]|n(VOWEL))"}, 15 | {"in": "i:", "out": "í:"}, 16 | {"in": "e:", "out": "é:"}, 17 | {"in": "a:", "out": "á:"}, 18 | {"in": "o:", "out": "ó:"}, 19 | {"in": "ό", "out": "ó"}, 20 | {"in": ":", "out": ":", "comment": "force the tokenizer to recognize colon as a letter"} 21 | ] 22 | -------------------------------------------------------------------------------- /g2p/mappings/langs/network.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/g2p/mappings/langs/network.json.gz -------------------------------------------------------------------------------- /g2p/mappings/langs/norm/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Normalization 3 | mappings: 4 | - display_name: Panphon Normalization 5 | rules_path: panphon_preprocessor.csv 6 | id: panphon_preprocessor 7 | in_lang: ipa 8 | out_lang: ipa 9 | rule_ordering: as-written 10 | authors: 11 | - Patrick Littell 12 | - Eric Joanis 13 | <<: *shared 14 | -------------------------------------------------------------------------------- /g2p/mappings/langs/norm/panphon_preprocessor.csv: -------------------------------------------------------------------------------- 1 | ʷ,w 2 | ᵐ,m 3 | ⁿ,n 4 | ᶯ,ɳ 5 | ᶮ,ɲ 6 | ᵑ,ŋ 7 | ʲ,j 8 | ͡, 9 | ˈ, 10 | \u030a,\u0325 11 | \u0300, 12 | \u0301, 13 | \u0302, 14 | \u0304, 15 | \u030b, 16 | \u030c, 17 | \u030f, 18 | \u1dc4, 19 | \u1dc5, 20 | \u1dc6, 21 | \u1dc7, 22 | \u1dc8, 23 | ˨, 24 | ˦, 25 | ˧, 26 | ˥, 27 | ˩, 28 | -------------------------------------------------------------------------------- /g2p/mappings/langs/oji/README.md: -------------------------------------------------------------------------------- 1 | IPA mappings for Ojibwe (Anishinaabemowin) double vowel system. 2 | 3 | Reference: https://ojibwe.lib.umn.edu/about-ojibwe-language 4 | More fun reference: https://www.youtube.com/watch?v=GW0pGtmHJHU 5 | -------------------------------------------------------------------------------- /g2p/mappings/langs/oji/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Ojibwe 3 | mappings: 4 | - display_name: Ojibwe to IPA 5 | in_lang: oji 6 | out_lang: oji-ipa 7 | authors: 8 | - David Huggins-Daines 9 | type: mapping 10 | rules_path: oji_to_ipa.csv 11 | prevent_feeding: true 12 | rule_ordering: as-written 13 | case_sensitive: false 14 | norm_form: NFC 15 | <<: *shared 16 | - display_name: Anishinaabemowin Syllabics to Romanized 17 | in_lang: oji-syl 18 | out_lang: oji 19 | authors: 20 | - Shankhalika Srikanth 21 | type: mapping 22 | rules_path: oji_syllabics_to_orth.csv 23 | prevent_feeding: true 24 | rule_ordering: as-written 25 | case_sensitive: false 26 | norm_form: NFC 27 | language_name: Ojibwe Syllabics 28 | -------------------------------------------------------------------------------- /g2p/mappings/langs/oji/oji_syllabics_to_orth.csv: -------------------------------------------------------------------------------- 1 | ᐊ,a 2 | ᐅ,o 3 | ᐁ,e 4 | ᐃ,i 5 | ᐘ,wa 6 | ᐏ,wi 7 | ᐍ,we 8 | ᐓ,wo 9 | ᐤ,w 10 | ᐸ,pa 11 | ᐳ,po 12 | ᐱ,pi 13 | ᐯ,pe 14 | ᑅ,pwa 15 | ᑁ,pwo 16 | ᐽ,pwi 17 | ᐻ,pwe 18 | ᑊ,p 19 | ᐦ,h 20 | ᑕ,ta 21 | ᑐ,to 22 | ᑎ,ti 23 | ᑌ,te 24 | ᑢ,twa 25 | ᑞ,two 26 | ᑚ,twi 27 | ᑘ,twe 28 | ᐟ,t 29 | ᒋ,chi 30 | ᒍ,cho 31 | ᘃ,che 32 | ᘂ,cha 33 | ᒝ,chwa 34 | ᒙ,chwo 35 | ᒕ,chwi 36 | ᒓ,chwe 37 | ᐨ,ch 38 | ᒪ,ma 39 | ᒥ,mi 40 | ᒧ,mo 41 | ᒣ,me 42 | ᒷ,mwa 43 | ᒳ,mwo 44 | ᒯ,mwi 45 | ᒭ,mwe 46 | ᒼ,m 47 | ᓇ,na 48 | ᓂ,ni 49 | ᓄ,no 50 | ᓀ,ne 51 | ᓌ,nwa 52 | ᓄᐧ,nwo 53 | ᓂᐧ,nwi 54 | ᓊ,nwe 55 | ᐣ,n 56 | ᑲ,ka 57 | ᑭ,ki 58 | ᑫ,ke 59 | ᑯ,ko 60 | ᑿ,kwa 61 | ᑻ,kwo 62 | ᑷ,kwi 63 | ᑵ,kwe 64 | ᐠ,k 65 | ᓴ,sa 66 | ᓯ,si 67 | ᓱ,so 68 | ᓭ,se 69 | ᔁ,swa 70 | ᓽ,swo 71 | ᓹ,swi 72 | ᓷ,swe 73 | ᐢ,s 74 | ᔕ,sha 75 | ᔑ,shi 76 | ᔓ,sho 77 | ᔐ,she 78 | ᔢ,shwa 79 | ᔞ,shwo 80 | ᔚ,shwi 81 | ᔘ,shwe 82 | ᐡ,sh 83 | ᔭ,ya 84 | ᔨ,yi 85 | ᔪ,yo 86 | ᔦ,ye 87 | ᔺ,ywa 88 | ᔶ,ywo 89 | ᔲ,ywi 90 | ᔰ,ywe 91 | ᔾ,y 92 | ᓬ,l 93 | ᕒ,r 94 | ᙮,. 95 | -------------------------------------------------------------------------------- /g2p/mappings/langs/oji/oji_to_ipa.csv: -------------------------------------------------------------------------------- 1 | ng,ŋ,,\b 2 | aa,ɑː,, 3 | ii,iː,, 4 | oo,oː,, 5 | e,eː,, 6 | a,ʌ,, 7 | i,i,, 8 | o,o,, 9 | b,b,, 10 | ch,tʃ,, 11 | d,d,, 12 | g,ɡ,, 13 | h,h,, 14 | ',ʔ,, 15 | ’,ʔ,, 16 | j,dʒ,, 17 | k,k,, 18 | m,m,, 19 | n,n,, 20 | p,p,, 21 | sh,ʃ,, 22 | s,s,, 23 | t,t,, 24 | w,w,, 25 | y,j,, 26 | zh,ʒ,, 27 | z,z,, 28 | -------------------------------------------------------------------------------- /g2p/mappings/langs/oka/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: nsyilxcən 3 | mappings: 4 | - display_name: nsyilxcən to IPA 5 | rules_path: oka_to_ipa.csv 6 | in_lang: oka-equiv 7 | out_lang: oka-ipa 8 | authors: 9 | - Craig Carpenter 10 | type: mapping 11 | prevent_feeding: true 12 | rule_ordering: apply-longest-first 13 | norm_form: NFC 14 | <<: *shared 15 | - display_name: Unicode Equivalencies 16 | in_lang: oka 17 | out_lang: oka-equiv 18 | authors: 19 | - Eric Joanis 20 | rules_path: oka_equiv.csv 21 | norm_form: NFD 22 | <<: *shared 23 | -------------------------------------------------------------------------------- /g2p/mappings/langs/oka/oka_equiv.csv: -------------------------------------------------------------------------------- 1 | ’,' 2 | ‘,' 3 | ˊ,' 4 | `,' 5 | ́,' 6 | ̒,' 7 | ̓,' 8 | ̔,' 9 | ̕,' 10 | ̛,' 11 | ʻ,' 12 | ʼ,' 13 | ʽ,' 14 | ʹ,' 15 | ',' 16 | -------------------------------------------------------------------------------- /g2p/mappings/langs/oka/oka_to_ipa.csv: -------------------------------------------------------------------------------- 1 | a,a,, 2 | a',ˈa,, 3 | c,ts,, 4 | c',tsʼ,, 5 | ə,ə,, 6 | e,ə,, 7 | h,h,, 8 | ḥ,h,, 9 | i,i,, 10 | i',ˈi,, 11 | ɣ,ɣ,, 12 | ɣ',ɣˀ,, 13 | k,k,, 14 | k',kʼ,, 15 | kʷ,kʷ,, 16 | k'ʷ,kʼʷ,, 17 | l,l,, 18 | l',lˀ,, 19 | ɬ,ɬ,, 20 | ł,ɬ,, 21 | ƛ',tɬʼ,, 22 | m,m,, 23 | m',mˀ,, 24 | n,n,, 25 | n',nˀ,, 26 | o,o,, 27 | o',ˈo,, 28 | p,p,, 29 | p',pʼ,, 30 | q,q,, 31 | q',qʼ,, 32 | qʷ,qʷ,, 33 | q'ʷ,qʼʷ,, 34 | r,ɾ,, 35 | r',ɾˀ,, 36 | s,s,, 37 | t,t,, 38 | t',tʼ,, 39 | ť,tʼ,, 40 | ʕ,ʕ,, 41 | ʕ',ʕˀ,, 42 | u,u,, 43 | u',ˈu,, 44 | ʕʷ,ʕʷ,, 45 | ʕ'ʷ,ʕˀʷ,, 46 | x̌,χ,, 47 | x,x,, 48 | w,w,, 49 | y,j,, 50 | x̌ʷ,χʷ,, 51 | xʷ,xʷ,, 52 | w',wˀ,, 53 | w,w,, 54 | y',jˀ,, 55 | ʷ,ʷ,, 56 | ˀ,ˀ,, 57 | -------------------------------------------------------------------------------- /g2p/mappings/langs/sal/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Salishan languages with APA-based writing systems 3 | mappings: 4 | - display_name: Salishan APA to IPA 5 | rules_path: sal_apa_to_ipa.csv 6 | in_lang: sal-apa-equiv 7 | out_lang: sal-ipa 8 | authors: 9 | - Tony Mattina 10 | - Eric Joanis 11 | type: mapping 12 | prevent_feeding: true 13 | rule_ordering: apply-longest-first 14 | norm_form: NFD 15 | <<: *shared 16 | - display_name: Unicode Equivalencies 17 | in_lang: sal-apa 18 | out_lang: sal-apa-equiv 19 | authors: 20 | - Eric Joanis 21 | rules_path: sal_equiv.csv 22 | norm_form: NFD 23 | <<: *shared 24 | -------------------------------------------------------------------------------- /g2p/mappings/langs/sal/sal_apa_to_ipa.csv: -------------------------------------------------------------------------------- 1 | a,a,, 2 | a',ˈa,, 3 | á,ˈa,, 4 | ạ,a̠,, 5 | ạ́,ˈa̠,, 6 | æ,æ,, 7 | æ',ˈæ,, 8 | c,ts,, 9 | c̣,c̠,, 10 | θ,θ,, 11 | ᶿ,ᶿ,, 12 | c',tsʼ,, 13 | c̓,tsʼ,, 14 | č,ʧ,, 15 | č̓,ʧʼ,, 16 | ǰ,ʤ,, 17 | ɛ,ɛ,, 18 | ɛ́,ˈɛ,, 19 | ʌ,ʌ,, 20 | e,ə,, 21 | é,ˈə,, 22 | ẹ́,ˈə̠,, 23 | ə,ə,, 24 | ə́,ˈə,, 25 | ə̣́,ˈə̠,, 26 | ʌ́,ˈʌ,, 27 | g,ɡ,, 28 | gʷ,ɡʷ,, 29 | h,h,, 30 | ḥ,h,, 31 | ḥʷ,h̠ʷ,, 32 | i,i,, 33 | i',ˈi,, 34 | ɣ,ɣ,, 35 | ɣ',ɣˀ,, 36 | k,k,, 37 | k',kʼ,, 38 | kʷ,kʷ,, 39 | k'ʷ,kʼʷ,, 40 | l,l,, 41 | l',lˀ,, 42 | ḷ,l̠,, 43 | ɬ,ɬ,, 44 | ł,ɬ,, 45 | ƛ',tɬʼ,, 46 | m,m,, 47 | m',mˀ,, 48 | n,n,, 49 | n',nˀ,, 50 | ṇ,n̠,, 51 | ŋ,ŋ,, 52 | o,o,, 53 | o',ˈo,, 54 | ó,ˈo,, 55 | ɔ,ɔ,, 56 | ɔ́,ˈɔ,, 57 | p,p,, 58 | p',pʼ,, 59 | q,q,, 60 | q',qʼ,, 61 | qʷ,qʷ,, 62 | q'ʷ,qʼʷ,, 63 | r,ɾ,, 64 | r',ɾˀ,, 65 | s,s,, 66 | ṣ,s̠,, 67 | š,ʃ,, 68 | t,t,, 69 | t',tʼ,, 70 | ť,tʼ,, 71 | ʕ,ʕ,, 72 | ʕ',ʕˀ,, 73 | u,u,, 74 | u',ˈu,, 75 | ú,ˈu,, 76 | ụ,u̠,, 77 | ụ́,ˈu̠,, 78 | ʕʷ,ʕʷ,, 79 | ʕ'ʷ,ʕˀʷ,, 80 | x̌,χ,, 81 | x,x,, 82 | x̣,x̠,, 83 | x̣ʷ,x̠ʷ,, 84 | w,w,, 85 | y,j,, 86 | x̌ʷ,χʷ,, 87 | xʷ,xʷ,, 88 | w',wˀ,, 89 | w,w,, 90 | y',jˀ,, 91 | ʷ,ʷ,, 92 | ʡ,ʡ,, 93 | ʔ,ʔ,, 94 | ˀ,ˀ,, 95 | z',zˀ,, 96 | z̓,zˀ,, 97 | -------------------------------------------------------------------------------- /g2p/mappings/langs/sal/sal_equiv.csv: -------------------------------------------------------------------------------- 1 | ’,' 2 | ‘,' 3 | \u0315,' 4 | \u0301{1}\u0323{2},\u0323{2}\u0301{1} 5 | -------------------------------------------------------------------------------- /g2p/mappings/langs/see/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Seneca 3 | mappings: 4 | - display_name: Seneca to IPA 5 | rules_path: see_to_ipa.csv 6 | in_lang: see 7 | out_lang: see-ipa 8 | case_sensitive: false 9 | norm_form: NFD 10 | rule_ordering: as-written 11 | authors: 12 | - Aidan Pine 13 | <<: *shared 14 | -------------------------------------------------------------------------------- /g2p/mappings/langs/see/see_to_ipa.csv: -------------------------------------------------------------------------------- 1 | ö:,o\u0303 2 | ë:,e\u0303 3 | ä:,ɑ\u0303 4 | ö,o\u0303 5 | ë,e\u0303 6 | ä,ɑ\u0303 7 | a:,ɑ 8 | o:,o 9 | u:,u 10 | e:,e 11 | i:,i 12 | a,ɑ 13 | e,e 14 | i,i 15 | o,o 16 | u,u 17 | j,d͡ʒ 18 | z,z 19 | t,t 20 | d,d 21 | g,ɡ 22 | k,k 23 | tš,t͡ʃ 24 | s,s 25 | w,w 26 | ',ʔ 27 | ’,ʔ 28 | š,ʃ 29 | y,j 30 | -------------------------------------------------------------------------------- /g2p/mappings/langs/srs/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Tsuut'ina 3 | mappings: 4 | - display_name: Tsuut'ina to IPA 5 | rules_path: srs_to_ipa.json 6 | in_lang: srs 7 | out_lang: srs-ipa 8 | rule_ordering: as-written 9 | norm_form: NFD 10 | case_sensitive: false 11 | authors: 12 | - Christopher Cox 13 | <<: *shared 14 | - display_name: Tsuut'ina IPA to English IPA 15 | rules_path: srs_ipa_to_eng_ipa.json 16 | in_lang: srs-ipa 17 | out_lang: eng-ipa 18 | norm_form: NFD 19 | rule_ordering: as-written 20 | case_sensitive: false 21 | authors: 22 | - Christopher Cox 23 | <<: *shared 24 | -------------------------------------------------------------------------------- /g2p/mappings/langs/srs/srs_ipa_to_eng_ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | { "in": "pʰ", "out": "p" }, 3 | { "in": "p", "out": "b" }, 4 | { "in": "m", "out": "m" }, 5 | { "in": "n", "out": "n" }, 6 | { "in": "t͡ɬʼ", "out": "ᵗs" }, 7 | { "in": "t͡ɬʰ", "out": "ᵗs" }, 8 | { "in": "t͡ɬ", "out": "dz" }, 9 | { "in": "ɬ", "out": "s" }, 10 | { "in": "l", "out": "l" }, 11 | { "in": "t͡sʼ", "out": "ᵗs" }, 12 | { "in": "t͡sʰ", "out": "ᵗs" }, 13 | { "in": "t͡s", "out": "dz" }, 14 | { "in": "t͡ʃʼ", "out": "ᵗʃ" }, 15 | { "in": "t͡ʃʰ", "out": "ᵗʃ" }, 16 | { "in": "t͡ʃ", "out": "dʒ" }, 17 | { "in": "ʃ", "out": "ʃ" }, 18 | { "in": "ʒ", "out": "ʒ" }, 19 | { "in": "s", "out": "s" }, 20 | { "in": "z", "out": "z" }, 21 | { "in": "tʼ", "out": "ᵗ" }, 22 | { "in": "tʰ", "out": "ᵗ" }, 23 | { "in": "t", "out": "d" }, 24 | { "in": "kʼʷ", "out": "ᵏ" }, 25 | { "in": "kʼ", "out": "ᵏ" }, 26 | { "in": "kʰ", "out": "ᵏ" }, 27 | { "in": "kʷ", "out": "ɡ" }, 28 | { "in": "k", "out": "ɡ" }, 29 | { "in": "ɰ", "out": "ɹ" }, 30 | { "in": "x", "out": "h" }, 31 | { "in": "h", "out": "h" }, 32 | { "in": "ʔ", "out": "ʔ" }, 33 | { "in": "ᵗ", "out": "t" }, 34 | { "in": "ᵏ", "out": "k" }, 35 | { "in": "\u0300", "out": "" }, 36 | { "in": "\u0301", "out": "" }, 37 | { "in": "\u0302", "out": "" }, 38 | { "in": "\u0304", "out": "" }, 39 | { "in": "\u030c", "out": "" }, 40 | { "in": "\u1dc4", "out": "" }, 41 | { "in": "\u1dc5", "out": "" }, 42 | { "in": "\u1dc6", "out": "" }, 43 | { "in": "\u1dc7", "out": "" }, 44 | { "in": "aj", "out": "aɪ" }, 45 | { "in": "ɛj", "out": "eɪ" }, 46 | { "in": "ɔj", "out": "ɔɪ" }, 47 | { "in": "ʊj", "out": "ɔɪ" }, 48 | { "in": "j", "out": "j" }, 49 | { "in": "ɑw", "out": "aʊ" }, 50 | { "in": "ʊw", "out": "oʊ" }, 51 | { "in": "w", "out": "w" }, 52 | { "in": "ɑː", "out": "ɑ" }, 53 | { "in": "ɪː", "out": "i" }, 54 | { "in": "ɔː", "out": "oː" }, 55 | { "in": "ʊː", "out": "u" }, 56 | { "in": "ʌ", "out": "ʌ" }, 57 | { "in": "ɪ", "out": "ɪ" }, 58 | { "in": "ɔ", "out": "ɔ" }, 59 | { "in": "ʊ", "out": "ʊ" } 60 | ] 61 | -------------------------------------------------------------------------------- /g2p/mappings/langs/str/README.md: -------------------------------------------------------------------------------- 1 | Language-specific files for SENĆOŦEN 2 | 3 | Note: SENĆOŦEN does not seem to have a specific ISO639-3 code, but 4 | 'str' is the general code for Straits Salish. See: 5 | 6 | https://iso639-3.sil.org/ 7 | https://norrisresearch.com/lang_rept/NRI_Rept_Mar2016_Appendices.pdf 8 | -------------------------------------------------------------------------------- /g2p/mappings/langs/str/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: SENĆOŦEN 3 | mappings: 4 | - display_name: SENĆOŦEN equivalency 5 | rules_path: str_equiv.json 6 | in_lang: str 7 | out_lang: str-equiv 8 | rule_ordering: as-written 9 | norm_form: NFD 10 | case_sensitive: true 11 | authors: 12 | - Shankhalika Srikanth 13 | <<: *shared 14 | - display_name: SENĆOŦEN to IPA 15 | rules_path: str_to_ipa.json 16 | in_lang: str-equiv 17 | out_lang: str-ipa 18 | rule_ordering: as-written 19 | norm_form: NFD 20 | case_sensitive: true 21 | authors: 22 | - Aidan Pine 23 | <<: *shared 24 | -------------------------------------------------------------------------------- /g2p/mappings/langs/str/str_equiv.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "A\u0301", "out": "Á"}, 3 | {"in": "C\u0301", "out": "Ć"}, 4 | {"in": "I\u0301", "out": "Í"}, 5 | {"in": "\u0335K", "out": "₭"}, 6 | {"in": "Ḱ", "out": "K\u0301"}, 7 | {"in": "Ḵ", "out": "K\u0331"}, 8 | {"in": "K\u0332", "out": "K\u0331"}, 9 | {"in": "\u0335L", "out": "Ƚ"}, 10 | {"in": "Ṉ", "out": "N\u0331"}, 11 | {"in": "N\u0332", "out": "N\u0331"}, 12 | {"in": "S\u0301", "out": "Ś"}, 13 | {"in": "T\u0331", "out": "Ṯ"}, 14 | {"in": "T\u0332", "out": "Ṯ"}, 15 | {"in": "\u0335T", "out": "Ŧ"}, 16 | {"in": "W\u0332", "out": "W\u0331"}, 17 | {"in": "X\u0332", "out": "X\u0331"}, 18 | {"in": " \u0326", "out": "¸"}, 19 | {"in": " \u0327", "out": "¸"} 20 | ] 21 | -------------------------------------------------------------------------------- /g2p/mappings/langs/str/str_to_ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "Á", "out": "e"}, 3 | {"in": "Ⱥ", "out": "eː"}, 4 | {"in": "A", "out": "æ"}, 5 | {"in": "B", "out": "pʼ"}, 6 | {"in": "Ć", "out": "t͡ʃ"}, 7 | {"in": "Ȼ", "out": "k̟ʷ"}, 8 | {"in": "C", "out": "k̟"}, 9 | {"in": "D", "out": "tʼ"}, 10 | {"in": "E", "out": "ʌ"}, 11 | {"in": "H", "out": "h"}, 12 | {"in": "Í", "out": "aɪ"}, 13 | {"in": "I", "out": "i"}, 14 | {"in": "J", "out": "t͡ʃʼ"}, 15 | {"in": "Ꝁ", "out": "qʷʼ"}, 16 | {"in": "₭", "out": "qʷʼ"}, 17 | {"in": "K\u0301", "out": "qʷ"}, 18 | {"in": "K\u0331", "out": "q"}, 19 | {"in": "K", "out": "qʼ"}, 20 | {"in": "L", "out": "l"}, 21 | {"in": "Ƚ", "out": "ɬ"}, 22 | {"in": "M", "out": "m"}, 23 | {"in": "N\u0331", "out": "ŋ"}, 24 | {"in": "N", "out": "n"}, 25 | {"in": "O", "out": "ɑ"}, 26 | {"in": "P", "out": "p"}, 27 | {"in": "Q", "out": "kʷʼ"}, 28 | {"in": "Ś", "out": "ʃ"}, 29 | {"in": "S", "out": "s"}, 30 | {"in": "Ⱦ", "out": "t͡s̪"}, 31 | {"in": "Ṯ", "out": "t͡ɬʼ"}, 32 | {"in": "Ŧ", "out": "θ"}, 33 | {"in": "T", "out": "t"}, 34 | {"in": "U", "out": "u"}, 35 | {"in": "W\u0331", "out": "xʷ"}, 36 | {"in": "W", "out": "w"}, 37 | {"in": "X\u0331", "out": "χʷ"}, 38 | {"in": "X", "out": "χ"}, 39 | {"in": "Y", "out": "j"}, 40 | {"in": "Z", "out": "d͡z"}, 41 | {"in": "¸", "out": "ʔ", "comment": "cedilla is now the community standard for glottal stop"}, 42 | {"in": ",", "out": "ʔ", "comment": "comma still occurs, though. TODO: disambiguate punctuation vs glottal stop"}, 43 | {"in": "s", "out": "s"} 44 | ] 45 | -------------------------------------------------------------------------------- /g2p/mappings/langs/tau/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Upper Tanana 3 | mappings: 4 | - display_name: Upper Tanana Equivalencies 5 | in_lang: tau 6 | out_lang: tau-equiv 7 | authors: 8 | - Sabrina Yu 9 | type: mapping 10 | rules_path: tau_equiv.json 11 | prevent_feeding: false 12 | rule_ordering: as-written 13 | case_sensitive: false 14 | norm_form: NFD 15 | <<: *shared 16 | - display_name: Upper Tanana to IPA 17 | in_lang: tau-equiv 18 | out_lang: tau-ipa 19 | authors: 20 | - Sabrina Yu 21 | type: mapping 22 | rules_path: tau_to_ipa.json 23 | prevent_feeding: true 24 | rule_ordering: as-written 25 | case_sensitive: false 26 | norm_form: NFD 27 | <<: *shared 28 | -------------------------------------------------------------------------------- /g2p/mappings/langs/tau/tau_equiv.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "in": "\u2019", 4 | "out": "\u02bc" 5 | }, 6 | { 7 | "in": "\u0027", 8 | "out": "\u02bc" 9 | }, 10 | { 11 | "in": "\u0332", 12 | "out": "" 13 | } 14 | ] 15 | -------------------------------------------------------------------------------- /g2p/mappings/langs/tce/README.md: -------------------------------------------------------------------------------- 1 | IPA mappings for Southern Tutchone with resources from the Yukon Native Language Centre 2 | -------------------------------------------------------------------------------- /g2p/mappings/langs/tce/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Southern Tutchone 3 | mappings: 4 | - display_name: Southern Tutchone equivalencies 5 | in_lang: tce 6 | out_lang: tce-equiv 7 | authors: 8 | - Shankhalika Srikanth 9 | type: mapping 10 | rules_path: tce_equiv.csv 11 | prevent_feeding: false 12 | rule_ordering: as-written 13 | case_sensitive: false 14 | norm_form: NFD 15 | <<: *shared 16 | - display_name: Southern Tutchone to IPA 17 | in_lang: tce-equiv 18 | out_lang: tce-ipa 19 | authors: 20 | - Shankhalika Srikanth 21 | type: mapping 22 | rules_path: tce_to_ipa.csv 23 | prevent_feeding: true 24 | rule_ordering: as-written 25 | case_sensitive: false 26 | norm_form: NFD 27 | <<: *shared 28 | -------------------------------------------------------------------------------- /g2p/mappings/langs/tce/tce_equiv.csv: -------------------------------------------------------------------------------- 1 | \u0331,\u0332 2 | \u0149,\u0027 3 | \u02BC,\u0027 4 | \u055A,\u0027 5 | \uFF07,\u0027 6 | ’,' 7 | ‘,' 8 | ˊ,' 9 | `,' 10 | ʻ,' 11 | ʼ,' 12 | ʽ,' 13 | ʹ,' 14 | \u02CA,\u0301 15 | -------------------------------------------------------------------------------- /g2p/mappings/langs/tce/tce_to_ipa.csv: -------------------------------------------------------------------------------- 1 | b,p 2 | tth',tθʼ 3 | tth,tθʰ 4 | th,θ 5 | tl',tɬʼ 6 | tl,tɬʰ 7 | ts',tsʼ 8 | ts,tsʰ 9 | t',tʼ 10 | t,tʰ 11 | ddh,tθ 12 | dh,ð 13 | dz,ts 14 | dl,tɬ 15 | d,t 16 | zh,ʒ 17 | ch',tʃʼ 18 | ch,tʃʰ 19 | j,tʃ 20 | sh,ʃ 21 | ghw,ɣʷ 22 | gh,ɣ 23 | gw,kʷ 24 | khw,xʷ 25 | k'w,kʼʷ 26 | kh,x 27 | kw,kʰʷ 28 | k',kʼ 29 | k,kʰ 30 | r,ɹ 31 | ',ʔ 32 | ł,ɬ 33 | mb,ᵐp 34 | nd,ⁿt 35 | nj,ⁿtʃ 36 | \u0301\u0328,\u0303 37 | \u0300\u0328,\u0303 38 | \u0304\u0328,\u0303 39 | \u030C\u0328,\u0303 40 | \u0302\u0328,\u0303 41 | \u0301, 42 | \u0300, 43 | \u0304, 44 | \u030C, 45 | \u0302, 46 | e\u0328,ɛ̃ 47 | ü\u0328,ʉ̃ 48 | ä\u0328w,ə̃w 49 | a\u0328y,ãi 50 | ä\u0328,ʌ̃ 51 | a\u0328,ɑ̃ 52 | \u0328,\u0303 53 | e,ɛ 54 | ü,ʉ 55 | äw,əw 56 | ay,ai 57 | ä,ʌ 58 | a,ɑ 59 | o,o 60 | y,j 61 | g,\u0261 62 | -------------------------------------------------------------------------------- /g2p/mappings/langs/tgx/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Tagish 3 | mappings: 4 | - display_name: Tagish to IPA 5 | rules_path: tgx_to_ipa.json 6 | in_lang: tgx 7 | out_lang: tgx-ipa 8 | rule_ordering: apply-longest-first 9 | norm_form: NFD 10 | case_sensitive: false 11 | authors: 12 | - Christopher Cox 13 | <<: *shared 14 | - display_name: Tagish IPA to English IPA 15 | rules_path: tgx_ipa_to_eng_ipa.json 16 | in_lang: tgx-ipa 17 | out_lang: eng-ipa 18 | norm_form: NFD 19 | rule_ordering: apply-longest-first 20 | case_sensitive: false 21 | authors: 22 | - Christopher Cox 23 | <<: *shared 24 | -------------------------------------------------------------------------------- /g2p/mappings/langs/tli/README.md: -------------------------------------------------------------------------------- 1 | IPA mappings for Tlingit 2 | with resources from the Yukon Native Language Centre 3 | -------------------------------------------------------------------------------- /g2p/mappings/langs/tli/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Tlingit 3 | mappings: 4 | - display_name: Tlingit Equivalencies 5 | in_lang: tli 6 | out_lang: tli-equiv 7 | authors: 8 | - Shankhalika Srikanth 9 | type: mapping 10 | rules_path: tli_equiv.csv 11 | prevent_feeding: false 12 | rule_ordering: as-written 13 | case_sensitive: false 14 | norm_form: NFD 15 | <<: *shared 16 | - display_name: Tlingit to IPA 17 | in_lang: tli-equiv 18 | out_lang: tli-ipa 19 | authors: 20 | - Shankhalika Srikanth 21 | type: mapping 22 | rules_path: tli_to_ipa.csv 23 | prevent_feeding: true 24 | rule_ordering: as-written 25 | case_sensitive: false 26 | norm_form: NFD 27 | <<: *shared 28 | -------------------------------------------------------------------------------- /g2p/mappings/langs/tli/tli_equiv.csv: -------------------------------------------------------------------------------- 1 | \u0331,\u0332,, 2 | \u0149,\u0027,, 3 | \u02BC,\u0027,, 4 | \u055A,\u0027,, 5 | \uFF07,\u0027,, 6 | ’,' 7 | ‘,' 8 | ˊ,' 9 | `,' 10 | ʻ,' 11 | ʼ,' 12 | ʽ,' 13 | ʹ,' 14 | ',' 15 | -------------------------------------------------------------------------------- /g2p/mappings/langs/tli/tli_to_ipa.csv: -------------------------------------------------------------------------------- 1 | p,pʰ,, 2 | b,p,, 3 | d,t,, 4 | t,tʰ,,[^'sl] 5 | l,ɬʰ,t,[^'] 6 | l,ɬ,, 7 | ł,ɬ,, 8 | sh,ʃ,, 9 | z,s,, 10 | s,sʰ,t,[^'] 11 | j,tʃ,, 12 | ch,tʃʰ,,[^'] 13 | ch,tʃ,, 14 | c,tʃʰ,,[^'] 15 | c,tʃ,, 16 | w,ʷ,[x'kgh\.𝚐̲𝚔̲𝚡̲̲], 17 | k,kʰ,,[^'] 18 | x,x,, 19 | g,k,, 20 | ÿ,ɰ,, 21 | y,j,, 22 | é,e,, 23 | á,a,, 24 | ó,o,, 25 | ú,u,, 26 | í,i,, 27 | è,e,, 28 | à,a,, 29 | ì,i,, 30 | ò,o,, 31 | ù,u,, 32 | ê,e,, 33 | â,a,, 34 | î,i,, 35 | ô,o,, 36 | û,u,, 37 | \u0061\u0328,ã,, 38 | \u0065\u0328,ẽ,, 39 | \u0069\u0328,ĩ,, 40 | \u006F\u0328,õ,, 41 | \u0075\u0328,ũ,, 42 | ee,iː,, 43 | ei,eː,, 44 | aa,aː,, 45 | oo,uː,, 46 | 𝚘̲o,oː,, 47 | 𝚘̲,o,, 48 | 𝚡̲,χ,, 49 | \.,ʔ,,\S 50 | 𝚐̲,q,, 51 | 𝚔̲,q,,' 52 | 𝚔̲,qʰ,,[^'] 53 | 𝚕̲,l,, 54 | ',ʼ 55 | -------------------------------------------------------------------------------- /g2p/mappings/langs/ttm/README.md: -------------------------------------------------------------------------------- 1 | IPA mapping for Northern Tutchone with files from the Yukon Native Language Centre 2 | -------------------------------------------------------------------------------- /g2p/mappings/langs/ttm/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Northern Tutchone 3 | mappings: 4 | - display_name: Northern Tutchone equivalencies 5 | in_lang: ttm 6 | out_lang: ttm-equiv 7 | authors: 8 | - Shankhalika Srikanth 9 | type: mapping 10 | rules_path: ttm_equiv.csv 11 | prevent_feeding: false 12 | rule_ordering: as-written 13 | case_sensitive: false 14 | norm_form: NFD 15 | <<: *shared 16 | - display_name: Northern Tutchone to IPA 17 | in_lang: ttm-equiv 18 | out_lang: ttm-ipa 19 | authors: 20 | - Shankhalika Srikanth 21 | type: mapping 22 | rules_path: ttm_to_ipa.csv 23 | prevent_feeding: true 24 | rule_ordering: as-written 25 | case_sensitive: false 26 | norm_form: NFD 27 | <<: *shared 28 | -------------------------------------------------------------------------------- /g2p/mappings/langs/ttm/ttm_equiv.csv: -------------------------------------------------------------------------------- 1 | \u0331,\u0332 2 | \u0149,\u0027 3 | \u02BC,\u0027 4 | \u055A,\u0027 5 | \uFF07,\u0027 6 | ’,' 7 | ‘,' 8 | ˊ,' 9 | `,' 10 | ʻ,' 11 | ʼ,' 12 | ʽ,' 13 | ʹ,' 14 | \u02CA,\u0301 15 | -------------------------------------------------------------------------------- /g2p/mappings/langs/ttm/ttm_to_ipa.csv: -------------------------------------------------------------------------------- 1 | b,p 2 | tth',tθʼ 3 | tth,tθʰ 4 | th,θ 5 | tl',tɬʼ 6 | tl,tɬʰ 7 | ts',tsʼ 8 | ts,tsʰ 9 | t',tʼ 10 | t,tʰ 11 | ddh,tθ 12 | dh,ð 13 | dz,ts 14 | dl,tɬ 15 | d,t 16 | zh,ʒ 17 | nj,ⁿtʃ 18 | ch',tʃʼ 19 | ch,tʃʰ 20 | j,tʃ 21 | sh,ʃ 22 | ghw,ɣʷ 23 | gh,ɣ 24 | gw,kʷ 25 | khw,xʷ 26 | k'w,kʼʷ 27 | kh,x 28 | kw,kʰʷ 29 | k',kʼ 30 | k,kʰ 31 | g,k 32 | r,ɹ 33 | ',ʔ 34 | ł,ɬ 35 | mb,ᵐp 36 | nd,ⁿt 37 | n,ŋ,i 38 | n,ŋ,u: 39 | \u0301\u0328,\u0303 40 | \u0304\u0328,\u0303 41 | \u030C\u0328,\u0303 42 | \u0301, 43 | \u0304, 44 | \u030C, 45 | a\u0328i,ʌ̃ɪ 46 | a\u0328u,ʌ̃ʊ 47 | a\u0328e,æ̃ 48 | ä\u0328,ʌ̃ 49 | a\u0328,ɑ̃ 50 | \u0328,\u0303 51 | ai,ʌɪ 52 | au,ʌʊ 53 | ae,æ 54 | ä,ʌ 55 | a,ɑ 56 | y,j 57 | o,o 58 | e,e 59 | g,\u0261 60 | -------------------------------------------------------------------------------- /g2p/mappings/langs/und/README.md: -------------------------------------------------------------------------------- 1 | Fallback resources for an unknown language or for cases where other G2P solutions 2 | have failed. ("und" is a special ISO 639-3 code for an undetermined language.) 3 | -------------------------------------------------------------------------------- /g2p/mappings/langs/und/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Undetermined 3 | mappings: 4 | - display_name: Undetermined ASCII to IPA 5 | rules_path: und_to_ipa.json 6 | in_lang: und-ascii 7 | out_lang: und-ipa 8 | norm: NFD 9 | case_sensitive: false 10 | escape_special: false 11 | authors: 12 | - Patrick Littell 13 | <<: *shared 14 | - display_name: Undetermined IPA to English IPA 15 | rules_path: und_ipa_to_eng_ipa.json 16 | in_lang: und-ipa 17 | out_lang: eng-ipa 18 | rule_ordering: apply-longest-first 19 | authors: 20 | - Patrick Littell 21 | <<: *shared 22 | - display_name: Undetermined IPA to English IPA 23 | rules_path: und_ipa_to_eng_ipa.json 24 | in_lang: und-ipa 25 | out_lang: hamming-eng-ipa 26 | rule_ordering: apply-longest-first 27 | authors: 28 | - Patrick Littell 29 | <<: *shared 30 | - display_name: Undetermined Unicode to ASCII 31 | type: unidecode 32 | norm: NFD 33 | in_lang: und 34 | out_lang: und-ascii 35 | authors: 36 | - Eric Joanis 37 | <<: *shared 38 | -------------------------------------------------------------------------------- /g2p/mappings/langs/und/und_ipa_to_eng_ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "out": "ɑ", 4 | "in": "a" 5 | }, 6 | { 7 | "out": "b", 8 | "in": "b" 9 | }, 10 | { 11 | "out": "d", 12 | "in": "d" 13 | }, 14 | { 15 | "out": "eː", 16 | "in": "e" 17 | }, 18 | { 19 | "out": "f", 20 | "in": "f" 21 | }, 22 | { 23 | "out": "h", 24 | "in": "h" 25 | }, 26 | { 27 | "out": "i", 28 | "in": "i" 29 | }, 30 | { 31 | "out": "j", 32 | "in": "j" 33 | }, 34 | { 35 | "out": "k", 36 | "in": "k" 37 | }, 38 | { 39 | "out": "l", 40 | "in": "l" 41 | }, 42 | { 43 | "out": "m", 44 | "in": "m" 45 | }, 46 | { 47 | "out": "n", 48 | "in": "n" 49 | }, 50 | { 51 | "out": "oː", 52 | "in": "o" 53 | }, 54 | { 55 | "out": "p", 56 | "in": "p" 57 | }, 58 | { 59 | "out": "k", 60 | "in": "q" 61 | }, 62 | { 63 | "out": "ɾ", 64 | "in": "r" 65 | }, 66 | { 67 | "out": "s", 68 | "in": "s" 69 | }, 70 | { 71 | "out": "t", 72 | "in": "t" 73 | }, 74 | { 75 | "out": "ts", 76 | "in": "t͡s" 77 | }, 78 | { 79 | "out": "u", 80 | "in": "u" 81 | }, 82 | { 83 | "out": "v", 84 | "in": "v" 85 | }, 86 | { 87 | "out": "w", 88 | "in": "w" 89 | }, 90 | { 91 | "out": "k", 92 | "in": "x" 93 | }, 94 | { 95 | "out": "z", 96 | "in": "z" 97 | }, 98 | { 99 | "out": "ə", 100 | "in": "ə" 101 | }, 102 | { 103 | "out": "ɡ", 104 | "in": "ɡ" 105 | }, 106 | { 107 | "out": "ʒ", 108 | "in": "ʒ" 109 | }, 110 | { 111 | "out": "ʔ", 112 | "in": "ʔ" 113 | } 114 | ] 115 | -------------------------------------------------------------------------------- /g2p/mappings/langs/und/und_to_ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | { "in": "a", "out": "a" }, 3 | { "in": "b", "out": "b" }, 4 | { "in": "c", "out": "t͡ʃ" }, 5 | { "in": "d", "out": "d" }, 6 | { "in": "e", "out": "e" }, 7 | { "in": "f", "out": "f" }, 8 | { "in": "g", "out": "ɡ" }, 9 | { "in": "h", "out": "h" }, 10 | { "in": "i", "out": "i" }, 11 | { "in": "j", "out": "ʒ" }, 12 | { "in": "k", "out": "k" }, 13 | { "in": "l", "out": "l" }, 14 | { "in": "m", "out": "m" }, 15 | { "in": "n", "out": "n" }, 16 | { "in": "o", "out": "o" }, 17 | { "in": "p", "out": "p" }, 18 | { "in": "q", "out": "q" }, 19 | { "in": "r", "out": "r" }, 20 | { "in": "s", "out": "s" }, 21 | { "in": "t", "out": "t" }, 22 | { "in": "u", "out": "u" }, 23 | { "in": "v", "out": "v" }, 24 | { "in": "w", "out": "w" }, 25 | { "in": "x", "out": "x" }, 26 | { "in": "y", "out": "j" }, 27 | { "in": "z", "out": "z" }, 28 | { "in": "@", "out": "ə" }, 29 | { "in": "\\?", "out": "ʔ" }, 30 | { "in": "'", "out": "ʔ" }, 31 | { "in": ",", "out": "ʔ" }, 32 | { "in": ":", "out": "" } 33 | ] 34 | -------------------------------------------------------------------------------- /g2p/mappings/langs/win/README.md: -------------------------------------------------------------------------------- 1 | Language-specific files for Hoocąk (Winnebago / Ho-Chunk) 2 | 3 | Alphabet table taken from (and slightly modified): https://en.wikipedia.org/wiki/Winnebago_language#The_sounds_of_Ho-Chunk_with_example_words[6] 4 | -------------------------------------------------------------------------------- /g2p/mappings/langs/win/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Hoocąk 3 | mappings: 4 | - display_name: Hoocąk to IPA 5 | rules_path: win_to_ipa.json 6 | in_lang: win 7 | out_lang: win-ipa 8 | case_sensitive: false 9 | norm_form: NFC 10 | rule_ordering: apply-longest-first 11 | authors: 12 | - David Huggins-Daines 13 | <<: *shared 14 | -------------------------------------------------------------------------------- /g2p/mappings/langs/win/hoocak_alphabet.csv: -------------------------------------------------------------------------------- 1 | a,a,"a, ʌ" 2 | aa,ā,aː 3 | ą,ą,"ã, ʌ̃" 4 | ąą,ą̄,ã: 5 | b,b,b 6 | c,c,tʃ 7 | e,e,"e, ɛ" 8 | ee,ē,"eː, ɛː" 9 | g,g,g 10 | ǧ,ǧ,ɣ 11 | h,h,h 12 | i,i,i 13 | ii,ī,iː 14 | į,į,ĩ 15 | įį,į̄,ĩ: 16 | j,j,dʒ 17 | k,k,k 18 | k',k',k' 19 | m,m,m 20 | n,n,n 21 | o,o,o 22 | oo,ō,o: 23 | p,p,p 24 | p',p',p' 25 | r,r,r 26 | s,s,s 27 | s',s',s' 28 | š,š,ʃ 29 | š',š',ʃ' 30 | t,t,t 31 | t',t',t' 32 | u,u,u 33 | uu,ū,u: 34 | ų,ų,ũ 35 | ųų,ų̄,ũ: 36 | w,w,w 37 | x,x,x 38 | x',x',x' 39 | y,y,j 40 | z,z,z 41 | ž,ž,ʒ 42 | '','',ʔ 43 | -------------------------------------------------------------------------------- /g2p/static/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/g2p/static/__init__.py -------------------------------------------------------------------------------- /g2p/static/assets/bonjour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/g2p/static/assets/bonjour.png -------------------------------------------------------------------------------- /g2p/static/blockly_main.js: -------------------------------------------------------------------------------- 1 | (function () { 2 | let currentButton; 3 | function exportJS() { 4 | let ws = Blockly.getMainWorkspace() 5 | Blockly.JavaScript.addReservedWords('code'); 6 | let code = Blockly.JavaScript.workspaceToCode(ws) 7 | alert(code) 8 | return code 9 | } 10 | function exportPY(codeType) { 11 | let ws = Blockly.getMainWorkspace() 12 | Blockly.Python.addReservedWords('code'); 13 | let code = Blockly.Python.workspaceToCode(ws) 14 | alert(code) 15 | return code 16 | } 17 | 18 | function add() { 19 | let ws = Blockly.getMainWorkspace() 20 | Blockly.JavaScript.addReservedWords('code'); 21 | var code = Blockly.JavaScript.workspaceToCode( 22 | ws 23 | ); 24 | try { 25 | console.log(code) 26 | eval(code) 27 | } catch (error) { 28 | console.log(error) 29 | } 30 | } 31 | 32 | function clear() { 33 | let ws = Blockly.getMainWorkspace() 34 | ws.clear() 35 | } 36 | 37 | function handleAdd() { 38 | add(); 39 | // clear(); 40 | } 41 | 42 | document.querySelector('#clear').addEventListener('click', clear); 43 | document.querySelector('#add').addEventListener('click', handleAdd); 44 | document.querySelector('#exportJS').addEventListener('click', exportJS) 45 | document.querySelector('#exportPY').addEventListener('click', exportPY) 46 | 47 | Blockly.inject('blockly-div', { 48 | toolbox: document.getElementById('toolbox'), 49 | toolboxPosition: 'end', 50 | horizontalLayout: true, 51 | scrollbars: false 52 | }); 53 | 54 | 55 | })(); 56 | -------------------------------------------------------------------------------- /g2p/static/custom.css: -------------------------------------------------------------------------------- 1 | .center-text { 2 | text-align: center; 3 | } 4 | 5 | label { 6 | display: -webkit-inline-box; 7 | font-weight: 300; 8 | } 9 | 10 | #input, 11 | #output { 12 | width: 100%; 13 | height: 25%; 14 | } 15 | 16 | .export-buttons { 17 | display: inline; 18 | margin: 1rem; 19 | } 20 | 21 | #title { 22 | margin-top: 10px; 23 | margin-left: 5vw; 24 | } 25 | 26 | .ht_master .wtHolder { 27 | overflow-x: hidden; 28 | } 29 | 30 | .mg-top, 31 | h4 { 32 | margin-top: 40px; 33 | } 34 | 35 | .mg-bot { 36 | margin-bottom: 40px; 37 | } 38 | 39 | .hot-container, 40 | .settings, 41 | .abbs-container { 42 | display: none; 43 | } 44 | 45 | .hot-container.active, 46 | .settings.active, 47 | .abbs-container.active { 48 | display: inline; 49 | } 50 | 51 | table td { 52 | text-align: center; 53 | padding: 3px !important; 54 | } 55 | 56 | table th { 57 | padding: 3px !important; 58 | } 59 | -------------------------------------------------------------------------------- /g2p/tests/.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source_pkgs = g2p 4 | omit = *g2p/tests/* 5 | 6 | [report] 7 | precision = 2 8 | -------------------------------------------------------------------------------- /g2p/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/g2p/tests/__init__.py -------------------------------------------------------------------------------- /g2p/tests/public/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | PUBLIC_DIR = os.path.dirname(__file__) 4 | -------------------------------------------------------------------------------- /g2p/tests/public/data/atj.psv: -------------------------------------------------------------------------------- 1 | atj|atj-ipa|matcaci|mad͡ʒaʃi 2 | atj|atj-ipa|amiskw|amisɡw 3 | atj|atj-ipa|awesisak|aweːsisaɡ 4 | -------------------------------------------------------------------------------- /g2p/tests/public/data/clm.csv: -------------------------------------------------------------------------------- 1 | # Ten real words 2 | clm,clm-ipa,x̣áyk̕ʷs,χˈajkʼʷs 3 | clm,clm-ipa,sx̣cáʔəy̕,sχtsˈaʔəj̰ 4 | clm,clm-ipa,čúxʷəŋ̕,tʃˈuxʷəɴ̰ 5 | clm,clm-ipa,ɬə́č̕šən,ɬˈʌtʃʼʃən 6 | clm,clm-ipa,ʔəsyác̕ɬ,ʔəsjˈatsʼɬ 7 | clm,clm-ipa,q̕ʷəyíyəš,qʼʷəjˈijəʃ 8 | clm,clm-ipa,sqʷə́m̕x̣ʷ,sqʷˈʌm̰χʷ 9 | clm,clm-ipa,sq̕ʷə́ŋəs,sqʼʷˈʌɴəs 10 | clm,clm-ipa,sƛ̕ə́mk̕ʷ,stɬʼˈʌmkʼʷ 11 | clm,clm-ipa,sx̣ʷáx̣ʷc̕,sχʷˈaχʷtsʼ 12 | 13 | clm,eng-ipa,x̣áyk̕ʷs,ʃæjkws 14 | clm,eng-ipa,sx̣cáʔəy̕,sʃtsæʔəj 15 | clm,eng-ipa,čúxʷəŋ̕,t͡ʃukwəŋ 16 | clm,eng-ipa,ɬə́č̕šən,sʌtʃʃən 17 | clm,eng-ipa,ʔəsyác̕ɬ,ʔəsjætss 18 | clm,eng-ipa,q̕ʷəyíyəš,kwəjijəʃ 19 | clm,eng-ipa,sqʷə́m̕x̣ʷ,skwʌmʃw 20 | clm,eng-ipa,sq̕ʷə́ŋəs,skwʌŋəs 21 | clm,eng-ipa,sƛ̕ə́mk̕ʷ,stsʌmkw 22 | clm,eng-ipa,sx̣ʷáx̣ʷc̕,sʃwæʃwts 23 | 24 | clm,eng-arpabet,x̣áyk̕ʷs,SH AE Y K W S 25 | clm,eng-arpabet,sx̣cáʔəy̕,S SH T S AE HH AH Y 26 | clm,eng-arpabet,čúxʷəŋ̕,CH UW K W AH NG 27 | clm,eng-arpabet,ɬə́č̕šən,S AH CH SH AH N 28 | clm,eng-arpabet,ʔəsyác̕ɬ,HH AH S Y AE T S S 29 | clm,eng-arpabet,q̕ʷəyíyəš,K W AH Y IY Y AH SH 30 | clm,eng-arpabet,sqʷə́m̕x̣ʷ,S K W AH M SH W 31 | clm,eng-arpabet,sq̕ʷə́ŋəs,S K W AH NG AH S 32 | clm,eng-arpabet,sƛ̕ə́mk̕ʷ,S T S AH M K W 33 | clm,eng-arpabet,sx̣ʷáx̣ʷc̕,S SH W AE SH W T S 34 | 35 | # Artificial test data 36 | clm,clm-ipa,uʔúhuy̕,ɔʔˈɔhɔj̰ 37 | clm,clm-ipa,utúluk,utˈuluk 38 | clm,clm-ipa,ə,ə 39 | clm,clm-ipa,ə́,ˈʌ 40 | clm,clm-ipa,šəč̕əš,ʃɨtʃʼɨʃ 41 | clm,clm-ipa,čəšəč,tʃɨʃɨtʃ 42 | clm,clm-ipa,ə́x̣ʷəx̣ʷ,ˈʌχʷʊχʷ 43 | clm,clm-ipa,ə́x̌əx̌ʷ,ˈʌχʊχʷ 44 | -------------------------------------------------------------------------------- /g2p/tests/public/data/crg.psv: -------------------------------------------------------------------------------- 1 | crg-tmd|crg-ipa|kishchaymikawshoow|kɪʃtʃeːmɪkɑːʃoaw 2 | crg-tmd|crg-ipa|smenn|smɛn 3 | crg-tmd|crg-ipa|dayistaen|deːɪstɛ̃ː 4 | crg-tmd|crg-ipa|baenn|bɛ̃ː 5 | crg-dv|crg-ipa|kishcheemikaashoaw|kɪʃtʃeːmɪkɑːʃoaw 6 | crg-dv|crg-ipa|deeistaeñ|deːɪstɛ̃ː 7 | crg-dv|crg-ipa|lañg|lɑ̃ːŋ 8 | crg-dv|crg-ipa|eede|eːdɛ 9 | crg-dv|crg-ipa|Booñ|bɔ̃ː 10 | crg-dv|crg-ipa|Not|not 11 | crg-dv|crg-ipa|mooñd|mɔ̃ːd 12 | crg-dv|crg-ipa|maañzhii|mɑ̃ːʒiː 13 | crg-dv|crg-ipa|Aeñ|ɛ̃ː 14 | crg-dv|crg-ipa|Kreatoer|kɹɛʌtʌːɹ 15 | crg-dv|eng-arpabet|Booñ|B AO N 16 | crg-dv|eng-arpabet|Not|N AO T 17 | crg-dv|eng-arpabet|maañzhii|M AA N ZH EY 18 | crg-dv|eng-arpabet|ooma|OW M AH 19 | crg-dv|eng-arpabet|grel|G R EH L 20 | -------------------------------------------------------------------------------- /g2p/tests/public/data/crj.psv: -------------------------------------------------------------------------------- 1 | crj|crj-equiv|chisichiiyeshiishtamaatin|ᒋᓯᒌᔦᔒᔥᑕᒫᑎᓐ 2 | crj|eng-ipa|ᒋᓯᒌᔦᔒᔥᑕᒫᑎᓐ|tʃisitʃijeːʃiʃtɑmɑtin 3 | crj|eng-ipa|chisichiiyeshiishtamaatin|tʃisitʃijeːʃiʃtɑmɑtin 4 | crj|crj-equiv|âhkuhîkuyan|ᐋᐦᑯᐦᐄᑯᔭᓐ 5 | crj|crj-equiv|chimûshihtân|ᒋᒨᔑᐦᑖᓐ 6 | crj|crj-equiv|êchêshê|ᐁᒉᔐ 7 | crj|eng-arpabet|âhkuhîkuyan|AA HH K UW HH IY K UW Y AA N 8 | crj|eng-arpabet|chimûshihtân|CH IY M UW SH IY HH T AA N 9 | crj|eng-arpabet|êchêshê|EY CH EY SH EY 10 | -------------------------------------------------------------------------------- /g2p/tests/public/data/crk.psv: -------------------------------------------------------------------------------- 1 | crk|crk-ipa|kikiskamohkan|kɪkɪskʌmohkʌn 2 | crk|crk-ipa|âkohkwênikanêkin|aːkohkweːnɪkʌneːkɪn 3 | crk|crk-ipa|kihcêyim|kɪht͡seːjɪm 4 | crk|crk-ipa|kiwîci-ayisiyiniw|kɪwiːt͡sɪ-ʌjɪsɪjɪnɪw 5 | crk|crk-ipa|kâ-itohtêyan|kaː-ɪtohteːjʌn 6 | crk|crk-ipa|ayisiyinînahk|ʌjɪsɪjɪniːnʌhk 7 | crk|crk-ipa|ô-masinahikêw|oː-mʌsɪnʌhɪkeːw 8 | crk|crk-no-symbols|&|êkwa 9 | crk|eng-arpabet|ômasinahikêw|OW M AH S IH N AH HH IH K EY W 10 | -------------------------------------------------------------------------------- /g2p/tests/public/data/crl.psv: -------------------------------------------------------------------------------- 1 | crl|crl-equiv|chichischaayihtaanaawaachichaa|ᒋᒋᔅᒑᔨᐦᑖᓈᐙᒋᒑ 2 | crl|eng-ipa|ᒋᒋᔅᒑᔨᐦᑖᓈᐙᒋᒑ|tʃitʃistʃɑjihtɑnɑwɑtʃitʃɑ 3 | crl|eng-ipa|chichischaayihtaanaawaachichaa|tʃitʃistʃɑjihtɑnɑwɑtʃitʃɑ 4 | crl|crl-equiv|âhkuhîkuyan|ᐋᐦᑯᐦᐄᑯᔭᓐ 5 | crl|crl-equiv|chimûshihtân|ᒋᒨᔑᐦᑖᓐ 6 | crl|crl-equiv|êchêshê|ᐁᒉᔐ 7 | crl|eng-arpabet|âhkuhîkuyan|AA HH K UW HH IY K UW Y AA N 8 | crl|eng-arpabet|chimûshihtân|CH IY M UW SH IY HH T AA N 9 | crl|eng-arpabet|êchêshê|EY CH EY SH EY 10 | -------------------------------------------------------------------------------- /g2p/tests/public/data/crm.psv: -------------------------------------------------------------------------------- 1 | crm|eng-ipa|êkotê|eːkuteː 2 | crm|eng-ipa|ᐁᑯᑌ|eːkuteː 3 | -------------------------------------------------------------------------------- /g2p/tests/public/data/csw.psv: -------------------------------------------------------------------------------- 1 | csw|eng-ipa|êkotê|eːkuteː 2 | csw|eng-ipa|ᐁᑯᑌ|eːkuteː 3 | -------------------------------------------------------------------------------- /g2p/tests/public/data/ctp.csv: -------------------------------------------------------------------------------- 1 | ctp,eng-arpabet,Qneᴬ,HH N EY 2 | ctp,eng-arpabet,ntkwaᴶ,N T K W AA 3 | -------------------------------------------------------------------------------- /g2p/tests/public/data/eng.csv: -------------------------------------------------------------------------------- 1 | # Make sure the same input in NFD and NFC both get handled correctly 2 | eng-ipa,eng-arpabet,ĩ,IY N 3 | eng-ipa,eng-arpabet,ĩ,IY N 4 | eng,eng-ipa,hello,hʌloʊ 5 | eng,eng-arpabet,hello,HH AH L OW 6 | -------------------------------------------------------------------------------- /g2p/tests/public/data/fin.psv: -------------------------------------------------------------------------------- 1 | fin|fin-ipa|nähdä|næhdæ 2 | fin|fin-ipa|polkupyörä|polkupyøræ 3 | fin|fin-ipa|haluttaa|hɑluttɑː 4 | fin|fin-ipa|tulevaisuus|tuleʋɑɪsuːs 5 | fin|fin-ipa|välinpitämättömyyksien|ʋælimpitæmættømyːksiɛn 6 | fin|fin-ipa|mihinkään|mihiŋkæːn 7 | fin|fin-ipa|nyt minä haluan juoda kahvia|nyt minæ hɑluɑn jʊɔdɑ kɑhʋiɑ 8 | fin|eng-ipa|nyt minä haluan juoda kahvia|nut minæ hɑluɑn jʊɔdɑ kɑhwiɑ 9 | fin|eng-arpabet|nyt minä haluan juoda kahvia|N UW T M IY N AE HH AA L UW AA N Y UH AO D AA K AA HH W IY AA 10 | fin|eng-arpabet|hiihtää|HH IY HH T AE 11 | fin|eng-arpabet|löylyä|L AH Y L UW AE 12 | fin|eng-arpabet|lyijy|L UW IY Y UW 13 | fin|eng-arpabet|luistaa|L UH Y S T AA 14 | -------------------------------------------------------------------------------- /g2p/tests/public/data/fn_unicode.psv: -------------------------------------------------------------------------------- 1 | fn-unicode-font|fn-unicode|qʷi∙qʷi∙diččaq|qʷi·qʷi·diččaq 2 | fn-unicode-font|fn-unicode|ล|ḥ 3 | fn-unicode-font|fn-unicode|X   ล ɤ ∛ X|x ᶿ √ ḥ ɣ · x 4 | -------------------------------------------------------------------------------- /g2p/tests/public/data/fra.psv: -------------------------------------------------------------------------------- 1 | fra-ipa|eng-ipa|ʒ|ʒ 2 | fra|fra-ipa|manger|mɑ̃ʒe 3 | fra|fra-ipa|écoutons|ekutɔ̃ 4 | fra|fra-ipa|écoutés|ekute 5 | fra|fra-ipa|écoutes|ekut 6 | fra|fra-ipa|programmeur|pʁoɡʁamøʁ 7 | fra|fra-ipa|traduction|tʁadyksiɔ̃ 8 | fra|fra-ipa|bison|bizɔ̃ 9 | fra|fra-ipa|cela|sʌla 10 | fra|fra-ipa|Noël|noɛl 11 | fra|fra-ipa|Noël|noɛl 12 | fra|fra-ipa|à côté|a kote 13 | fra|eng-ipa|à côté|ɑ koːteː 14 | fra|eng-arpabet|à côté|AA K OW T EY 15 | fra|eng-ipa|manger|mɑ̃ʒeː 16 | fra|eng-ipa|écoutons|eːkutɔ̃ 17 | fra|eng-ipa|programmeur|pʒoːɡʒɑmoːʒ 18 | fra|eng-ipa|traduction|tʒɑduksiɔ̃ 19 | fra|eng-ipa|bison|bizɔ̃ 20 | fra|fra-ipa|gagnant|ɡaɲɑ̃ 21 | fra|fra-ipa|êtres|ɛːtʁ 22 | fra|fra-ipa|où|u 23 | fra|fra-ipa|s|s 24 | fra|fra-ipa|tests s|tʌst s 25 | fra|fra-ipa|été e|ete ʌ 26 | -------------------------------------------------------------------------------- /g2p/tests/public/data/fra_panagrams.txt: -------------------------------------------------------------------------------- 1 | https://fr.wikipedia.org/wiki/Pangramme 2 | 3 | Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce qui lui permet de penser à la cænogénèse de l'être dont il est question dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, pense-t-il, diminue çà et là la qualité de son œuvre. 4 | Dès Noël, où un zéphyr haï me vêt de glaçons würmiens, je dîne d’exquis rôtis de bœuf au kir, à l’aÿ d’âge mûr, &cætera. (contient les 42 caractères de la langue française) (Gilles Esposito-Farèse) 5 | -------------------------------------------------------------------------------- /g2p/tests/public/data/fra_panagrams_NFD.txt: -------------------------------------------------------------------------------- 1 | https://fr.wikipedia.org/wiki/Pangramme 2 | 3 | Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce qui lui permet de penser à la cænogénèse de l'être dont il est question dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, pense-t-il, diminue çà et là la qualité de son œuvre. 4 | Dès Noël, où un zéphyr haï me vêt de glaçons würmiens, je dîne d’exquis rôtis de bœuf au kir, à l’aÿ d’âge mûr, &cætera. (contient les 42 caractères de la langue française) (Gilles Esposito-Farèse) 5 | -------------------------------------------------------------------------------- /g2p/tests/public/data/fra_simple.txt: -------------------------------------------------------------------------------- 1 | Un texte simple en français, qui passe avec scan. 2 | Testons quand même quelques accents: èéà. 3 | -------------------------------------------------------------------------------- /g2p/tests/public/data/git.psv: -------------------------------------------------------------------------------- 1 | git|git-ipa|gwila|ɡʷilæ 2 | git|git-ipa|hlik̲'sxw|ɬiqʔsxʷ 3 | git|eng-arpabet|K̲'ay|K HH AE Y 4 | git|eng-arpabet|guts'uusgi'y|G UW T S UW S G IY HH Y 5 | -------------------------------------------------------------------------------- /g2p/tests/public/data/gwi.psv: -------------------------------------------------------------------------------- 1 | gwi|gwi-ipa|tik|tʰɪkʰ 2 | gwi|gwi-ipa|àìùèò|əɪʊɛo 3 | gwi|gwi-ipa|tt’|tʰtʼ 4 | gwi|gwi-ipa|tʼt|tʼtʰ 5 | gwi|gwi-ipa|aaeeiioouu|ɑːɛːiːouː 6 | gwi|gwi-ipa|teetl'it|tʰɛːtɬʼɪtʰ 7 | gwi|gwi-ipa|zheh|ʒɛh 8 | gwi|gwi-ipa|ąąęęįįǫǫųų|ɑ̃ːɛ̃ːĩːõũː 9 | gwi|gwi-ipa|ąęįǫų|ə̃ɛ̃ɪ̃õʊ̃ 10 | gwi|gwi-ipa|khghw|xɤʷ 11 | gwi|gwi-ipa|tsttrdzdrr|tsʰtʰʈʂʰtsʈʂɻ 12 | gwi|gwi-ipa|tth'tththdddhdh|tθʼtθʰθttθð 13 | gwi|gwi-ipa|kk'kwk'wkhkhw|kʰkʼkʰʷkʼʷxxʷ 14 | gwi|gwi-ipa|ggwghghwhw|kkʷɤɤʷhw 15 | gwi|gwi-ipa|gwich'in|kʷɪtʃʼɪn 16 | gwi|gwi-ipa|ai’|əɪʔ 17 | gwi|gwi-ipa|jnjj|tʃⁿtʃtʃ 18 | gwi|gwi-ipa|ltl'tldlɬ'|ltɬʼtɬʰtɬɬʔ 19 | gwi|gwi-ipa|bmmb|bmᵐp 20 | gwi|gwi-ipa|nnd|nⁿt 21 | gwi|gwi-ipa|ssrshs|sʂʃs 22 | gwi|gwi-ipa|vyr|vjɻ 23 | -------------------------------------------------------------------------------- /g2p/tests/public/data/haa.csv: -------------------------------------------------------------------------------- 1 | haa,haa-ipa,tth’ą̂ą̈̌,tθʼæ̃̂ɑ̃̌ 2 | haa,eng-ipa,tth’ą̂ą̈̌,tθæ̃ɑ̃ 3 | haa,haa-ipa,nhdhthtthddhwhshzhchch’yhkhghhʼ,n̥ðθtθʰtθw̥ʃʒtʃʰtʃʼj̊xɣhʔ 4 | haa,haa-ipa,nddh,ntθ 5 | haa,haa-ipa,njonjkënjp,ŋoⁿkkʁəⁿkpʰ 6 | haa,haa-ipa,zrhzrsrtrdrtr’r,zɻ̊ʐʂʈʂʰʈʂʈʂʼɻ 7 | haa,haa-ipa,chshzhjchʼyhj̱,tʃʰʃʒtʃtʃʼj̊dʒ 8 | haa,haa-ipa,eyewayawoyoeëäwä,eieoæiæooioeəaoɑ 9 | haa,haa-ipa,ḏgʼtldltl’lłh,dkʔtɬʰtɬtɬʼlɬh 10 | haa,haa-ipa,ywwhbpmbnszë̀ù,jww̥ppʰᵐpnszə̀ù 11 | haa,eng-ipa,ywwhbpmbnszë̀ù,jwwppmpnszəu 12 | haa,haa-ipa,kěwgǒy,kʁěoɡʁǒi 13 | haa,eng-ipa,kěwgǒy,kʒɛɔɡʒɔi 14 | haa,haa-ipa,ą̈̀wlį̌w,ã̀oɬɮĩ̌u 15 | haa,haa-ipa,į̌ǫ̀,ĩ̌õ̀ 16 | -------------------------------------------------------------------------------- /g2p/tests/public/data/ikt.psv: -------------------------------------------------------------------------------- 1 | ikt|ikt-ipa|rřřȓȓ'’łsššnññ|ʁɻɻɟɟʔʔɬsʂʂnɲɲ 2 | ikt|ikt-ipa|niriyuq|niʁijuq 3 | ikt|eng-ipa|niriyuq|niʒijuk 4 | ikt|eng-arpabet|niriyuq|N IY ZH IY Y UW K 5 | ikt|ikt-ipa|niriřuq|niʁiɻuq 6 | ikt|ikt-ipa|niriřuq|niʁiɻuq 7 | ikt|eng-ipa|niriřuq|niʒijuk 8 | ikt|eng-arpabet|niriřuq|N IY ZH IY Y UW K 9 | ikt|ikt-ipa|pingahut|piŋahut 10 | ikt|eng-ipa|pingahut|piŋɑhut 11 | ikt|eng-arpabet|pingahut|P IY NG AA HH UW T 12 | ikt|ikt-ipa|piŋahut|piŋahut 13 | ikt|eng-ipa|piŋahut|piŋɑhut 14 | ikt|eng-arpabet|piŋahut|P IY NG AA HH UW T 15 | ikt|ikt-ipa|akhunaaq|akhunaːq 16 | ikt|eng-ipa|akhunaaq|ɑkhunɑk 17 | ikt|eng-arpabet|akhunaaq|AA K HH UW N AA K 18 | ikt|ikt-ipa|akłunaaq|akɬunaːq 19 | ikt|eng-ipa|akłunaaq|ɑksunɑk 20 | ikt|eng-arpabet|akłunaaq|AA K S UW N AA K 21 | ikt|ikt-ipa|uqhuq|uqhuq 22 | ikt|eng-ipa|uqhuq|ukhuk 23 | ikt|eng-arpabet|uqhuq|UW K HH UW K 24 | ikt|ikt-ipa|uqšuq|uqʂuq 25 | ikt|eng-ipa|uqšuq|ukʃuk 26 | ikt|eng-arpabet|uqšuq|UW K SH UW K 27 | ikt|ikt-ipa|uqsuq|uqsuq 28 | ikt|eng-ipa|uqsuq|uksuk 29 | ikt|eng-arpabet|uqsuq|UW K S UW K 30 | ikt|ikt-ipa|quana|quana 31 | ikt|eng-ipa|quana|kuɑnɑ 32 | ikt|ikt-ipa|ma'na|maʔna 33 | ikt|ikt-ipa|ma’na|maʔna 34 | ikt|eng-ipa|ma'na|mɑʔnɑ 35 | ikt|eng-arpabet|ma'na|M AA HH N AA 36 | ikt|eng-arpabet|quana|K UW AA N AA 37 | ikt|ikt-ipa|qujanaqqutit|qud͡ʒanaqqutit 38 | ikt|eng-ipa|qujanaqqutit|kudʒɑnɑkkutit 39 | ikt|eng-arpabet|qujanaqqutit|K UW JH AA N AA K K UW T IY T 40 | -------------------------------------------------------------------------------- /g2p/tests/public/data/iku-sro.psv: -------------------------------------------------------------------------------- 1 | iku-sro|iku-sro-ipa|yrřřȓȓłsššnññ|jʁɟɟɟɟɬsʂʂnɲɲ 2 | iku-sro|iku-sro-ipa|nirijuq|niʁijuq 3 | iku-sro|eng-ipa|nirijuq|niʒijuk 4 | iku-sro|eng-arpabet|nirijuq|N IY ZH IY Y UW K 5 | iku-sro|iku-sro-ipa|pingahut|piŋahut 6 | iku-sro|eng-ipa|pingahut|piŋɑhut 7 | iku-sro|eng-arpabet|pingahut|P IY NG AA HH UW T 8 | iku-sro|iku-sro-ipa|akłunaaq|akɬunaːq 9 | iku-sro|eng-ipa|akłunaaq|ɑksunɑk 10 | iku-sro|eng-arpabet|akłunaaq|AA K S UW N AA K 11 | iku-sro|iku-sro-ipa|atsunaaq|atsunaːq 12 | iku-sro|eng-ipa|atsunaaq|ɑtsunɑk 13 | iku-sro|eng-arpabet|atsunaaq|AA T S UW N AA K 14 | iku-sro|iku-sro-ipa|uqsuq|uqsuq 15 | iku-sro|eng-ipa|uqsuq|uksuk 16 | iku-sro|eng-arpabet|uqsuq|UW K S UW K 17 | iku-sro|iku-sro-ipa|aakka|aːkka 18 | iku-sro|eng-ipa|aakka|ɑkkɑ 19 | iku-sro|eng-arpabet|aakka|AA K K AA 20 | iku-sro|iku-sro-ipa|qujannamiik|qujannamiːk 21 | iku-sro|eng-ipa|qujannamiik|kujɑnnɑmik 22 | iku-sro|eng-arpabet|qujannamiik|K UW Y AA N N AA M IY K 23 | -------------------------------------------------------------------------------- /g2p/tests/public/data/iku.psv: -------------------------------------------------------------------------------- 1 | iku|iku-ipa|ᐃᒑᒥᒃ|iɡaːmik 2 | iku|eng-ipa|ᐃᒑᒥᒃ|iɡɑmik 3 | iku|eng-arpabet|ᐃᒑᒥᒃ|IY G AA M IY K 4 | -------------------------------------------------------------------------------- /g2p/tests/public/data/lml.psv: -------------------------------------------------------------------------------- 1 | lml|lml-ipa|mwaron̄g|mwɑɹoːŋɡ 2 | lml|lml-ipa|lagoana|lɑɣoːɑnɑ 3 | lml|lml-ipa|n̄oto|ŋoːtoː 4 | lml|lml-ipa|mulei|mulej 5 | lml|lml-ipa|mahamai|mɑhɑmɑj 6 | lml|lml-ipa|vevuri|veːvuɹi 7 | lml|lml-ipa|eig|ejɡ 8 | lml|lml-ipa|eiḡ|ejŋɡ 9 | lml|eng-arpabet|mwaron̄g|M W AA R OW NG 10 | lml|eng-arpabet|lagoana|L AA G OW AA N AA 11 | lml|eng-arpabet|n̄oto|NG OW T OW 12 | lml|eng-arpabet|mulei|M UW L EY 13 | lml|eng-arpabet|mahamai|M AA HH AA M AA Y 14 | lml|eng-arpabet|vevuri|V EY V UW R IY 15 | lml|eng-arpabet|eig|EY G 16 | lml|eng-arpabet|eiḡ|EY NG 17 | -------------------------------------------------------------------------------- /g2p/tests/public/data/mic.psv: -------------------------------------------------------------------------------- 1 | mic|mic-ipa|tiꞌam|tiːɑm 2 | mic|mic-ipa|Miꞌkmaq|miːɡmɑx 3 | mic|mic-ipa|mi'kmaq|miːɡmɑx 4 | mic|mic-ipa|sqolj|əsxoltʃ 5 | mic|mic-ipa|sq|səx 6 | 7 | # Test that ' is tokenized correctly as part of the words. 8 | mic|mic-ipa|mípi'tiꞌnála'jaꞌpéke'qeꞌ|miːbiːdiːnɑːlɑːtʃɑːpeːɡeːɣeː 9 | 10 | # Problem words detected by Marc, fixed by adding o->o "no-op" rule and regenerating mic-ipa to eng-ipa 11 | mic|eng-arpabet|Nsituoqn|N Z IY D UW AO G N 12 | mic|eng-arpabet|koqoey|K AO G AO EY Y 13 | mic|eng-arpabet|Penoqite'lsultinen|P EY N AO G IY D EY L Z UW L D IY N EY N 14 | -------------------------------------------------------------------------------- /g2p/tests/public/data/moe.psv: -------------------------------------------------------------------------------- 1 | moe|moe-ipa|nitaimunit|niteːjmunit 2 | moe|eng-ipa|ishi-mamitunenitenan|iʃi-məmituneːniteːnən 3 | -------------------------------------------------------------------------------- /g2p/tests/public/data/oji-syl.psv: -------------------------------------------------------------------------------- 1 | oji-syl|oji|ᒪᐦᑿ|mahkwa 2 | oji-syl|oji|ᐃᐦᑵ|ihkwe 3 | oji-syl|oji|ᐃᐦᑵᐘᐠ ᓂᑲᒧᐘᐠ᙮|ihkwewak nikamowak. 4 | oji-syl|oji|ᐱᓀᔑᐣᐦᐢ ᐊᒷ ᓴᑭᒣ᙮|pineshinhs amwa sakime. 5 | oji-syl|oji|ᐊᐣ ᐁᔑᓂᑲᓱᔭᐣ|an eshinikasoyan 6 | oji-syl|oji|ᓂᓇᑕᐏᐦᐃᐍ|ninatawihiwe 7 | oji-syl|oji|ᒪᒋᐱᓱ|machipiso 8 | oji-syl|oji|ᒝᐱᐡ|chwapish 9 | oji-syl|oji|ᐊᓂᒧᐦᐡ|animohsh 10 | oji-syl|oji|ᑎᐦᑎᐻᐱᐡᑭᑲᐣ|tihtipwepishkikan 11 | oji-syl|oji|ᒥᓂᐦᐠ|minihk 12 | oji-syl|oji|ᒪᓯᓇᐦᐃᑲᐣ|masinahikan 13 | -------------------------------------------------------------------------------- /g2p/tests/public/data/oji.tsv: -------------------------------------------------------------------------------- 1 | oji oji-ipa aagwiitoo’ooza ɑːɡwiːtoːʔoːzʌ 2 | -------------------------------------------------------------------------------- /g2p/tests/public/data/srs.psv: -------------------------------------------------------------------------------- 1 | srs|srs-ipa|dada|tʌ̄tʌ̄ 2 | srs|srs-ipa|t'at'a|tʼʌ̄tʼʌ̄ 3 | srs|srs-ipa|tata|tʰʌ̄tʰʌ̄ 4 | srs|eng-arpabet|dada t'at'a tata|D AH D AH T AH T AH T AH T AH 5 | srs|srs-ipa|óo oó òo oò|ɔ᷇ː ɔ᷄ː ɔ᷅ː ɔ᷆ː 6 | srs|eng-arpabet|óo oó òo oò|OW OW OW OW 7 | -------------------------------------------------------------------------------- /g2p/tests/public/data/str.tsv: -------------------------------------------------------------------------------- 1 | str eng-arpabet X̱I¸ÁM¸ SH W IY HH EY M HH 2 | 3 | # Variants for cedilla: comma, space+combining cedilla, space+combining comma below 4 | str eng-arpabet X̱I,ÁM ̧ SH W IY HH EY M HH 5 | str eng-arpabet X̱I,ÁM ̦ SH W IY HH EY M HH 6 | 7 | # 2024 update: cedilla stays as such in equiv, but is turned into glottal stop in IPA 8 | # TODO: for comma, disambituate between glottal stop and punctuation 9 | str str-equiv X̱I¸ÁM ̦ X̱I¸ÁM¸ 10 | str str-equiv X̱I,ÁM ̧ X̱I,ÁM¸ 11 | str str-ipa X̱I¸ÁM ̦ χʷiʔemʔ 12 | str str-ipa X̱I,ÁM ̧ χʷiʔemʔ 13 | -------------------------------------------------------------------------------- /g2p/tests/public/data/str_un_human_rights.txt: -------------------------------------------------------------------------------- 1 | https://en.wikipedia.org/wiki/Saanich_dialect 2 | 3 | EWENE SÁN E TŦE U¸ MEQ EȽTÁLṈEW̱ Ȼ SNI¸S SQÍEŦ E TŦE XĆṈINS. 4 | U¸ XENENEȻEL TŦE U¸ MEQ EȽTÁLṈEW̱ E Ȼ SI¸ÁM¸TEṈS. 5 | ĆŚḰÁLEȻEN TŦE U¸ MEQ SÁN. 6 | ͸ Ȼ S¸Á¸ITEṈS TŦE U¸ MEQ SÁN X̱EN¸IṈ E TŦE SĆÁ¸ĆE¸S. 7 | 8 | FGR 9 | abcdefghijklqrstwxyz 10 | -------------------------------------------------------------------------------- /g2p/tests/public/data/tau.psv: -------------------------------------------------------------------------------- 1 | tau|tau-ipa|sh'oo|ʃʔoː 2 | tau|tau-ipa|Jign|tʃiŋ 3 | tau|tau-ipa|maasee'|maːseːʔ 4 | tau|tau-ipa|betlanh|bɛtɬan̥ 5 | tau|tau-ipa|do'eent'aa|tɔʔeːntʼaː 6 | tau|tau-ipa|aaeeooiiuuioiaea|aːeːoːiːuːioiaea 7 | tau|tau-ipa|ąąęęįįǫǫųų|ãːẽːĩːõːũː 8 | tau|tau-ipa|ąęįųǫ|ãɛ̃ĩũɔ̃ 9 | tau|tau-ipa|àìùèò|àìùɛ̀ɔ̀ 10 | tau|tau-ipa|aäüüü|aʌɘːɘ 11 | tau|tau-ipa|thtth'tthht't|θtθʼtθhtʼt 12 | tau|tau-ipa|ddhdh|tθð 13 | tau|tau-ipa|mmbbw|mᵐbbw 14 | tau|tau-ipa|tdt'nnhndstsdzts'|tttʼnn̥ⁿtstststsʼ 15 | tau|tau-ipa|łtldltl'l|ɬtɬtɬtɬʼl 16 | tau|tau-ipa|shchjch'|ʃtʃtʃtʃʼ 17 | tau|tau-ipa|shyyyh|ʃʲjj̊ 18 | tau|tau-ipa|kgk'gnx|kkkʼŋx 19 | tau|tau-ipa|h'|hʔ 20 | tau|eng-ipa|do'eent'aa àìùèò ąęįųǫ h' kgk'gnx|tɔʔeːnteː æiuɛɔ æ̃ɛ̃ĩũɔ̃ hʔ kkkŋk 21 | tau|eng-arpabet|do'eent'aa àìùèò ąęįųǫ h' kgk'gnx|T AO HH EY N T EY AE IY UW EH AO AE N EH N IY N UW N AO N HH HH K K K NG K 22 | -------------------------------------------------------------------------------- /g2p/tests/public/data/tce.csv: -------------------------------------------------------------------------------- 1 | tce,tce-ipa,ch’e,tʃʼɛ 2 | tce,tce-ipa,ghw'nj,ɣʷʔntʃ 3 | tce,tce-ipa,kwǚ\u0328,kʰʷʉ̃ 4 | tce,tce-ipa,ä̀w,əw 5 | tce,tce-ipa,āyy,aij 6 | tce,tce-ipa,dzzh,tsʒ 7 | tce,tce-ipa,sih,sih 8 | tce,tce-ipa,tth,tθʰ 9 | tce,tce-ipa,dhh,ðh 10 | tce,tce-ipa,a\u0328y,ãi 11 | tce,tce-ipa,btl,ptɬʰ 12 | tce,tce-ipa,rkh,ɹx 13 | tce,tce-ipa,mnlg,mnlɡ 14 | tce,tce-ipa,dlsh,tɬʃ 15 | tce,tce-ipa,ä́\u0328ł,ʌ̃ɬ 16 | -------------------------------------------------------------------------------- /g2p/tests/public/data/tli.csv: -------------------------------------------------------------------------------- 1 | tli,tli-ipa,ei,eː 2 | tli,tli-ipa,ee,iː 3 | tli,tli-ipa,aa,aː 4 | tli,tli-ipa,oo,uː 5 | tli,tli-ipa,i,i 6 | tli,tli-ipa,e,e 7 | tli,tli-ipa,a,a 8 | tli,tli-ipa,u,u 9 | tli,tli-ipa,𝚘̲,o 10 | tli,tli-ipa,𝚘̲o,oː 11 | tli,tli-ipa,b,p 12 | tli,tli-ipa,p,pʰ 13 | tli,tli-ipa,d,t 14 | tli,tli-ipa,ti,tʰi 15 | tli,tli-ipa,t',tʼ 16 | tli,tli-ipa,dz,ts 17 | tli,tli-ipa,tsei,tsʰeː 18 | tli,tli-ipa,sh,ʃ 19 | tli,tli-ipa,j,tʃ 20 | tli,tli-ipa,ch𝚘̲o,tʃʰoː 21 | tli,tli-ipa,ch',tʃʼ 22 | tli,tli-ipa,gw,kʷ 23 | tli,tli-ipa,kw,kʰʷ 24 | tli,tli-ipa,k'w,kʼʷ 25 | tli,eng-ipa,k'w,kw 26 | tli,tli-ipa,dl,tɬ 27 | tli,tli-ipa,tlu,tɬʰu 28 | tli,tli-ipa,xw,xʷ 29 | tli,tli-ipa,kawe𝚔̲i𝚐̲,kʰaweqʰiq 30 | tli,tli-ipa,.woo,ʔʷuː 31 | tli,tli-ipa,.woo.,ʔʷuː. 32 | tli,tli-ipa,𝚔̲w,qʰʷ 33 | tli,tli-ipa,hw,hʷ 34 | tli,tli-ipa,y,j 35 | tli,tli-ipa,𝚡̲w,χʷ 36 | tli,tli-ipa,𝚕̲,l 37 | tli,tli-ipa,w𝚘̲,wo 38 | tli,tli-ipa,ÿ,ɰ 39 | tli,tli-ipa,tâch,tʰatʃ 40 | tli,eng-ipa,tâcha,tætʃæ 41 | tli,eng-ipa,ch'𝚘̲o,tʃoː 42 | tli,eng-arpabet,ch'𝚘̲o,CH OW 43 | tli,eng-arpabet,tsaa,T S EY 44 | tli,eng-ipa,x',k 45 | -------------------------------------------------------------------------------- /g2p/tests/public/data/ttm.csv: -------------------------------------------------------------------------------- 1 | ttm,ttm-ipa,NJę,ⁿtʃẽ 2 | ttm,ttm-ipa,Zha,ʒɑ 3 | ttm,ttm-ipa,ddhau,tθʌʊ 4 | ttm,ttm-ipa,dhth,ðθ 5 | ttm,ttm-ipa,k'w'o,kʼʷʔo 6 | ttm,ttm-ipa,dth,tθ 7 | ttm,ttm-ipa,ghwli,ɣʷli 8 | ttm,ttm-ipa,yai',jʌɪʔ 9 | ttm,ttm-ipa,dzuw,tsuw 10 | ttm,ttm-ipa,dlä,tɬʌ 11 | ttm,ttm-ipa,shłú,ʃɬu 12 | ttm,ttm-ipa,khtsae,xtsʰæ 13 | ttm,ttm-ipa,rch,ɹtʃʰ 14 | ttm,ttm-ipa,sw'aę̄,swʔæ̃ 15 | ttm,ttm-ipa,kg,kʰk 16 | -------------------------------------------------------------------------------- /g2p/tests/public/data/win.csv: -------------------------------------------------------------------------------- 1 | win,eng-ipa,ąą,æ̃ 2 | win,eng-ipa,įį,ẽː 3 | win,eng-ipa,oo,oː 4 | win,eng-ipa,ō,oː 5 | win,eng-ipa,ee,eː 6 | win,eng-ipa,uu,u 7 | win,eng-ipa,t',t 8 | win,eng-ipa,p',p 9 | win,eng-arpabet,ąą,AE N 10 | win,eng-arpabet,įį,EY N 11 | win,eng-arpabet,oo,OW 12 | win,eng-arpabet,ō,OW 13 | win,eng-arpabet,ee,EY 14 | win,eng-arpabet,uu,UW 15 | win,eng-arpabet,t',T 16 | win,eng-arpabet,p',P 17 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/.gitignore: -------------------------------------------------------------------------------- 1 | generated_add.yaml 2 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/abbreviation_config-g2p.yaml: -------------------------------------------------------------------------------- 1 | mappings: 2 | - language_name: Abbreviation 3 | display_name: Minimal to Minimal 4 | rules_path: abbreviation_mapping.csv 5 | in_lang: min 6 | out_lang: min 7 | rule_ordering: as-written 8 | case_sensitive: false 9 | escape_special: false 10 | norm_form: "NFD" 11 | reverse: false 12 | abbreviations_path: abbreviations.substring.csv 13 | authors: 14 | - Aidan Pine 15 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/abbreviation_mapping.csv: -------------------------------------------------------------------------------- 1 | VOWEL_HI,1 2 | VOWEL,2 3 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/abbreviations.csv: -------------------------------------------------------------------------------- 1 | VOWEL,a,e,i,o,u 2 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/abbreviations.json: -------------------------------------------------------------------------------- 1 | { 2 | "VOWEL": [ 3 | "a", 4 | "e", 5 | "i", 6 | "o", 7 | "u" 8 | ] 9 | } 10 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/abbreviations.psv: -------------------------------------------------------------------------------- 1 | VOWEL|a|e|i|o|u 2 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/abbreviations.substring.csv: -------------------------------------------------------------------------------- 1 | VOWEL,a,e,i,o,u 2 | VOWEL_HI,i,u 3 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/abbreviations.tsv: -------------------------------------------------------------------------------- 1 | VOWEL a e i o u 2 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/bad_langs/lang1/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | display_name: Minimal to Minimal 2 | rules_path: minimal.csv 3 | in_lang: min 4 | out_lang: min 5 | rule_ordering: as-written 6 | case_sensitive: false 7 | escape_special: true 8 | reverse: true 9 | authors: 10 | - Somebody 11 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/bad_langs/lang1/minimal.csv: -------------------------------------------------------------------------------- 1 | a,b,a,b 2 | 1,1,1,1 3 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/bad_langs2/lang1/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | mappings: 2 | - display_name: Minimal to Minimal 3 | rules_path: minimal.csv 4 | in_lang: min 5 | out_lang: min 6 | rule_ordering: as-written 7 | case_sensitive: false 8 | escape_special: true 9 | reverse: true 10 | authors: 11 | - Somebody 12 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/bad_langs2/lang1/minimal.csv: -------------------------------------------------------------------------------- 1 | a,b,a,b 2 | 1,1,1,1 3 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/bad_lexicon_config-g2p.yaml: -------------------------------------------------------------------------------- 1 | mappings: 2 | - language_name: English 3 | display_name: English to ARPABET 4 | type: lexicon 5 | alignments_path: hello.aligned.foo.txt 6 | out_delimiter: " " 7 | in_lang: eng 8 | out_lang: eng-arpabet 9 | case_sensitive: false 10 | norm_form: 'NFC' 11 | authors: 12 | - David Huggins-Daines 13 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/case-feed/README.md: -------------------------------------------------------------------------------- 1 | #### case-feeding mapping 2 | 3 | Use case: for spelling conversion where all rules have to prevent feeding of 4 | output text to input text of other rules, but need to allow feeding of output 5 | text to `context_before` or `context_after`. 6 | 7 | This three-step mapping: 8 | - first lowercases the input; 9 | - then applies the rules from lowercase input to uppercase output, in such a 10 | way that anything that's been converted cannot be converted again, similar to 11 | what `prevent_feeding` does, but allowing the context to specify upper and 12 | lower cases variants to allow both pre- and post-mapping matches; 13 | - and finally lowercases the output again. 14 | 15 | This ends up being equivalent to a case-insensitive prevent-feeding mapping, 16 | except for the behaviour of contexts. 17 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/case-feed/cf-in-lc-to-cf-out-uc.csv: -------------------------------------------------------------------------------- 1 | ka-,KE-,, 2 | atin,ETIN,, 3 | in,IN,,[aAeE] 4 | in,AN,, 5 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/case-feed/config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Example using case to prevent feeding rules but feed context 3 | mappings: 4 | - display name: case-feed input lowercaser 5 | rules_path: empty.csv 6 | in_lang: cf-in 7 | out_lang: cf-in-lc 8 | case_sensitive: false 9 | authors: 10 | - Eric Joanis 11 | <<: *shared 12 | - display_name: case-feed main mapping in is lc, out is uc, thus no feeding 13 | rules_path: cf-in-lc-to-cf-out-uc.csv 14 | in_lang: cf-in-lc 15 | out_lang: cf-out-uc 16 | case_sensitive: true 17 | prevent_feeding: false 18 | authors: 19 | - Eric Joanis 20 | <<: *shared 21 | - display name: case-feed output lowercaser 22 | rules_path: empty.csv 23 | in_lang: cf-out-uc 24 | out_lang: cf-out 25 | case_sensitive: false 26 | authors: 27 | - Eric Joanis 28 | <<: *shared 29 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/case-feed/empty.csv: -------------------------------------------------------------------------------- 1 | t,t,,Actually empty is illegal so create at least one dummy rule 2 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/compose.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Composition tests 3 | mappings: 4 | - display_name: Step 1 5 | rules_path: compose1-2.csv 6 | in_lang: c1 7 | out_lang: c2 8 | norm_form: NFC 9 | authors: 10 | - Eric Joanis 11 | - display_name: Step 2 12 | rules_path: compose2-3.csv 13 | in_lang: c2 14 | out_lang: c3 15 | norm_form: NFD 16 | authors: 17 | - Eric Joanis 18 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/compose1-2.csv: -------------------------------------------------------------------------------- 1 | a,ab 2 | bc,c 3 | é,ò 4 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/compose2-3.csv: -------------------------------------------------------------------------------- 1 | a,d 2 | bc,e 3 | g{1}h{2}i{3},G{2}H{1}I{3}J{1} 4 | m{1}n{2},N{2}M{1} 5 | ò,ù 6 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/deletion.csv: -------------------------------------------------------------------------------- 1 | a, 2 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/deletion.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "in": "a", 4 | "out": "" 5 | } 6 | ] 7 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/deletion_config_csv.yaml: -------------------------------------------------------------------------------- 1 | mappings: 2 | - language_name: Minimal 3 | display_name: Minimal to Minimal 4 | rules_path: deletion.csv 5 | in_lang: min 6 | out_lang: min 7 | rule_ordering: as-written 8 | case_sensitive: false 9 | escape_special: true 10 | norm_form: 'NFD' 11 | reverse: false 12 | authors: 13 | - Aidan Pine 14 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/deletion_config_json.yaml: -------------------------------------------------------------------------------- 1 | mappings: 2 | - language_name: Minimal 3 | display_name: Minimal to Minimal 4 | rules_path: deletion.json 5 | in_lang: min 6 | out_lang: min 7 | rule_ordering: as-written 8 | case_sensitive: false 9 | escape_special: true 10 | norm_form: 'NFD' 11 | reverse: false 12 | authors: 13 | - Aidan Pine 14 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/g2p_studio.csv: -------------------------------------------------------------------------------- 1 | aa,ɑː,, 2 | a,ɑ,, 3 | ,,, 4 | ,,, 5 | ,,, 6 | ,,, 7 | ,,, 8 | ,,, 9 | ,,, 10 | ,,, 11 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/g2p_studio2.csv: -------------------------------------------------------------------------------- 1 | ee,eː,, 2 | ,,, 3 | ,,, 4 | ,,, 5 | ,,, 6 | ,,, 7 | ,,, 8 | ,,, 9 | ,,, 10 | ,,, 11 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/gen-map-1.csv: -------------------------------------------------------------------------------- 1 | e,e 2 | o,o 3 | b,b 4 | l,l 5 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/gen-map-2.csv: -------------------------------------------------------------------------------- 1 | e,ɛ 2 | o,ɔ 3 | d,d 4 | n,n 5 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/gen-map-3a.csv: -------------------------------------------------------------------------------- 1 | i,i 2 | o,o 3 | k,k 4 | m,m 5 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/gen-map-3b.csv: -------------------------------------------------------------------------------- 1 | u,u 2 | y,y 3 | s,s 4 | n,ɲ 5 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/gen-map_config-g2p.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: GenMap 3 | authors: 4 | - Eric Joanis 5 | mappings: 6 | - display_name: GenMap 1 to IPA 7 | rules_path: gen-map-1.csv 8 | in_lang: gm1 9 | out_lang: gm1-ipa 10 | <<: *shared 11 | - display_name: GenMap 2 to IPA 12 | rules_path: gen-map-2.csv 13 | in_lang: gm2 14 | out_lang: gm2-ipa 15 | <<: *shared 16 | - display_name: GenMap 3a to IPA 17 | rules_path: gen-map-3a.csv 18 | in_lang: gm3a 19 | out_lang: gm3-ipa 20 | - display_name: GenMap 3b to IPA 21 | rules_path: gen-map-3b.csv 22 | in_lang: gm3b 23 | out_lang: gm3-ipa 24 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/gm1-ipa_to_gm2-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "e", "out": "ɛ"}, 3 | {"in": "o", "out": "ɔ"}, 4 | {"in": "b", "out": "d"}, 5 | {"in": "l", "out": "n"} 6 | ] 7 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/gm2-ipa_to_gm3-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "ɛ", "out": "i"}, 3 | {"in": "ɔ", "out": "o"}, 4 | {"in": "d", "out": "s"}, 5 | {"in": "n", "out": "m"} 6 | ] 7 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/gm3-ipa_to_gm2-ipa.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"in": "i", "out": "ɛ"}, 3 | {"in": "o", "out": "ɔ"}, 4 | {"in": "k", "out": "d"}, 5 | {"in": "m", "out": "n"}, 6 | {"in": "u", "out": "ɔ"}, 7 | {"in": "y", "out": "ɛ"}, 8 | {"in": "s", "out": "d"}, 9 | {"in": "ɲ", "out": "n"} 10 | ] 11 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/hello.aligned.txt: -------------------------------------------------------------------------------- 1 | h}HH e}EH l|l}L o}OW 2 | y}Y o|u}UH '}_ r|e}R 3 | b}_ o}_ g}_ u}_ s}_ 4 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/lexicon_config-g2p.yaml: -------------------------------------------------------------------------------- 1 | mappings: 2 | - language_name: English 3 | display_name: English to ARPABET 4 | type: lexicon 5 | alignments_path: hello.aligned.txt 6 | out_delimiter: " " 7 | in_lang: eng 8 | out_lang: eng-arpabet 9 | case_sensitive: false 10 | norm_form: 'NFC' 11 | authors: 12 | - David Huggins-Daines 13 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/malformed_config-g2p.yaml: -------------------------------------------------------------------------------- 1 | mappings: 2 | - language_name: Minimal 3 | - display_name: Minimal to Minimal 4 | - in_lang: min 5 | out_lang: min 6 | rule_ordering: as-written 7 | case_sensitive: false 8 | escape_special: true 9 | norm_form: 'NFD' 10 | reverse: true 11 | authors: 12 | - Aidan Pine 13 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/minimal.csv: -------------------------------------------------------------------------------- 1 | a,b,a,b 2 | 1,1,1,1 3 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/minimal.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "in": "a", 4 | "out": "b", 5 | "context_before": "a", 6 | "context_after": "b" 7 | }, 8 | { 9 | "in": "1", 10 | "out": "1", 11 | "context_before": "1", 12 | "context_after": "1" 13 | } 14 | ] 15 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/minimal.psv: -------------------------------------------------------------------------------- 1 | a|b|a|b 2 | 1|1|1|1 3 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/minimal.tsv: -------------------------------------------------------------------------------- 1 | a b a b 2 | 1 1 1 1 3 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/minimal.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/g2p/tests/public/mappings/minimal.xlsx -------------------------------------------------------------------------------- /g2p/tests/public/mappings/minimal_config-g2p.yaml: -------------------------------------------------------------------------------- 1 | mappings: 2 | - language_name: Minimal 3 | display_name: Minimal to Minimal 4 | rules_path: minimal.csv 5 | in_lang: min 6 | out_lang: min 7 | rule_ordering: as-written 8 | case_sensitive: false 9 | escape_special: true 10 | norm_form: 'NFD' 11 | reverse: true 12 | authors: 13 | - Aidan Pine 14 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/minimal_configs.yaml: -------------------------------------------------------------------------------- 1 | <<: &shared 2 | language_name: Minimal 3 | mappings: 4 | - language_name: Minimal 5 | display_name: Minimal CSV to Minimal 6 | rules_path: minimal.csv 7 | in_lang: min 8 | out_lang: min 9 | rule_ordering: as-written 10 | case_sensitive: false 11 | escape_special: true 12 | norm_form: 'NFD' 13 | reverse: true 14 | authors: 15 | - Aidan Pine 16 | <<: *shared 17 | - language_name: Minimal 18 | display_name: Minimal TSV to Minimal 19 | rules_path: minimal.tsv 20 | in_lang: min 21 | out_lang: min 22 | rule_ordering: as-written 23 | case_sensitive: false 24 | escape_special: true 25 | norm_form: 'NFD' 26 | reverse: true 27 | authors: 28 | - Aidan Pine 29 | <<: *shared 30 | - language_name: Minimal 31 | display_name: Minimal PSV to Minimal 32 | rules_path: minimal.psv 33 | in_lang: min 34 | out_lang: min 35 | rule_ordering: as-written 36 | case_sensitive: false 37 | escape_special: true 38 | norm_form: 'NFD' 39 | reverse: true 40 | authors: 41 | - Aidan Pine 42 | <<: *shared 43 | - language_name: Minimal 44 | display_name: Minimal JSON to Minimal 45 | rules_path: minimal.json 46 | in_lang: min 47 | out_lang: min 48 | rule_ordering: as-written 49 | case_sensitive: false 50 | escape_special: true 51 | norm_form: 'NFD' 52 | reverse: true 53 | authors: 54 | - Aidan Pine 55 | <<: *shared 56 | - language_name: Minimal 57 | display_name: Minimal XLSX to Minimal 58 | rules_path: minimal.xlsx 59 | in_lang: min 60 | out_lang: min 61 | rule_ordering: as-written 62 | case_sensitive: false 63 | escape_special: true 64 | norm_form: 'NFD' 65 | reverse: true 66 | authors: 67 | - Aidan Pine 68 | <<: *shared 69 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/no_escape.csv: -------------------------------------------------------------------------------- 1 | \?,ʔ 2 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/no_mappings_key.yaml: -------------------------------------------------------------------------------- 1 | language_name: Null 2 | display_name: Null to Null 3 | rules_path: null.csv 4 | in_lang: null-in 5 | out_lang: null-out 6 | rule_ordering: as-written 7 | case_sensitive: false 8 | authors: 9 | - Eric Joanis 10 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/nofeed-indices.csv: -------------------------------------------------------------------------------- 1 | a{1}b{2},ce{2}d{1} 2 | a{1}ā{2},aʼ{1}a{2} 3 | d{1}ef{2},gh{1}i{2} 4 | klm,nop 5 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/nofeed-indices.yaml: -------------------------------------------------------------------------------- 1 | mappings: 2 | - language_name: Mapping with index and prevent-feeding 3 | display_name: nofeed-index 4 | in_lang: nofeed-indices-in 5 | out_lang: nofeed-indices-out 6 | type: mapping 7 | prevent_feeding: true 8 | authors: 9 | - Eric Joanis 10 | rules_path: nofeed-indices.csv 11 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/null.csv: -------------------------------------------------------------------------------- 1 | a,b,, 2 | ,,, 3 | d,e,, 4 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/null_config-g2p.yaml: -------------------------------------------------------------------------------- 1 | mappings: 2 | - language_name: Null 3 | display_name: Null to Null 4 | rules_path: null.csv 5 | in_lang: null-in 6 | out_lang: null-out 7 | rule_ordering: as-written 8 | case_sensitive: false 9 | authors: 10 | - Eric Joanis 11 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/rule-ordering.yaml: -------------------------------------------------------------------------------- 1 | mappings: 2 | - language_name: Minimal 3 | display_name: Minimal to Minimal 4 | rules_path: minimal.csv 5 | in_lang: min 6 | out_lang: min 7 | rule_ordering: apply-longest-first 8 | case_sensitive: false 9 | escape_special: true 10 | norm_form: 'NFD' 11 | reverse: true 12 | authors: 13 | - Eddie Antonio Santos 14 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/test.yaml: -------------------------------------------------------------------------------- 1 | mappings: 2 | - language_name: Local Config 3 | display_name: Local Config 4 | in_lang: local-config-in 5 | out_lang: local-config-out 6 | type: mapping 7 | authors: 8 | - Aidan Pine 9 | rules_path: test_to_ipa.csv 10 | - language_name: Local Config 11 | display_name: Local Config to IPA 12 | in_lang: local-config-in 13 | out_lang: dan-ipa 14 | type: mapping 15 | authors: 16 | - Aidan Pine 17 | rules_path: test_to_ipa.csv 18 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/test_to_ipa.csv: -------------------------------------------------------------------------------- 1 | b,a 2 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/tokenize_punct.csv: -------------------------------------------------------------------------------- 1 | A-B,c_d 2 | D,d_end,,$ 3 | -------------------------------------------------------------------------------- /g2p/tests/public/mappings/tokenize_punct_config-g2p.yaml: -------------------------------------------------------------------------------- 1 | mappings: 2 | - language_name: tok punct 3 | display_name: Tokenize Punctuation Case Insensitive 4 | rules_path: tokenize_punct.csv 5 | comment: "test mapping for Readalongs-Studio issue #40" 6 | issue_url: "https://github.com/ReadAlongs/Studio/issues/40" 7 | in_lang: tok-in 8 | out_lang: tok-out 9 | case_sensitive: false 10 | authors: 11 | - Eric Joanis 12 | -------------------------------------------------------------------------------- /g2p/tests/test_doctor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from unittest import TestCase, main 4 | 5 | from g2p.log import LOGGER 6 | from g2p.mappings.langs.utils import check_ipa_known_segs 7 | 8 | 9 | class DoctorTest(TestCase): 10 | def setUp(self): 11 | pass 12 | 13 | # the fra to fra-ipa mapping was fixed, this test no longer works 14 | def not_test_ipa_known_segs_fra(self): 15 | with self.assertLogs(LOGGER, level="WARNING") as cm: 16 | check_ipa_known_segs(["fra-ipa"]) 17 | self.assertIn("vagon", "".join(cm.output)) 18 | self.assertIn("panphon", "".join(cm.output)) 19 | self.assertGreaterEqual(len(cm.output), 2) 20 | 21 | def test_ipa_known_segs_fra_fixed(self): 22 | self.assertTrue(check_ipa_known_segs(["fra-ipa"])) 23 | 24 | def test_ipa_known_segs_alq(self): 25 | with self.assertLogs(LOGGER, level="WARNING") as cm: 26 | self.assertFalse(check_ipa_known_segs(["alq-ipa"])) 27 | self.assertIn("o:", "".join(cm.output)) 28 | self.assertIn("panphon", "".join(cm.output)) 29 | 30 | # this test takes 8 seconds and doesn't do anything useful: it trivially increases 31 | # code coverage but does not have enough assertions to catch a future code-breaking 32 | # change. 33 | # Migrated to test_doctor_expensive.py so we can still run it, manually or via 34 | # ./run.py all. 35 | def not_test_ipa_known_segs_all(self): 36 | with self.assertLogs(LOGGER, level="WARNING") as cm: 37 | check_ipa_known_segs() 38 | self.assertGreaterEqual(len(cm.output), 20) 39 | 40 | 41 | if __name__ == "__main__": 42 | main() 43 | -------------------------------------------------------------------------------- /g2p/tests/test_doctor_expensive.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from unittest import TestCase, main 4 | 5 | from click.testing import CliRunner 6 | 7 | from g2p.cli import doctor 8 | from g2p.log import LOGGER 9 | from g2p.mappings.langs.utils import check_ipa_known_segs 10 | 11 | 12 | class ExpensiveDoctorTest(TestCase): 13 | # We segragate the expensive tests for g2p doctor in this suite which is not included 14 | # in dev, so that it doesn't slow down our Travis CI tests, but can still be run by 15 | # hand when desired. 16 | # These tests are not very good because they don't assert enough to make sure doctor 17 | # actually works, but they still exercise the code. 18 | # 19 | # This test suite is deliberately left out of run.py: it will only get run if you run 20 | # ./run.py all, or ./test_doctor_expensive.py. 21 | 22 | # Migrated here from test_cli.py 23 | def test_doctor_cli(self): 24 | # TODO: assert something more useful here... 25 | # This test simulates calling "g2p doctor" on the command line with no arguments, 26 | # which runs doctor on all mappings. 27 | runner = CliRunner() 28 | with self.assertLogs(LOGGER, level="WARNING") as cm: 29 | result = runner.invoke(doctor) 30 | self.assertEqual(result.exit_code, 0) 31 | self.assertGreaterEqual(len(cm.output), 10) 32 | 33 | # Migrated here from test_doctor.py 34 | # And skip this test, because test_doctor_cli() indirectly does the 35 | # expensive call to check_ipa_know_segs already so there is no value in 36 | # doing it a second time here. 37 | def not_test_ipa_known_segs_all(self): 38 | # This test simulates the innards of having called "g2p doctor" on the command 39 | # line with no arguments, again running the innards of doctor on all mappings. 40 | with self.assertLogs(LOGGER, level="WARNING") as cm: 41 | check_ipa_known_segs() 42 | self.assertGreaterEqual(len(cm.output), 20) 43 | 44 | 45 | if __name__ == "__main__": 46 | main() 47 | -------------------------------------------------------------------------------- /g2p/tests/test_langs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from unittest import TestCase, main 4 | 5 | from g2p import make_g2p 6 | from g2p.log import LOGGER 7 | from g2p.tests.public.data import load_public_test_data 8 | 9 | 10 | class LangTest(TestCase): 11 | """Basic Test for individual lookup tables. 12 | 13 | Test files (in g2p/tests/public/data) are either .csv, .psv, or 14 | .tsv files, the only difference being the delimiter used (comma, 15 | pipe, or tab). 16 | 17 | Each line in the test file consists of SOURCE,TARGET,INPUT,OUTPUT 18 | 19 | """ 20 | 21 | def test_io(self): 22 | langs_to_test = load_public_test_data() 23 | 24 | # go through each language declared in the test case set up 25 | # Instead of asserting immediately, we go through all the cases first, so that 26 | # running test_langs.py prints all the errors at once, to help debugging a given g2p mapping. 27 | # Then we call assertEqual on the first failed case, to make unittest register the failure. 28 | error_count = 0 29 | error_prefix = "test_langs.py: mapping error" 30 | for test in langs_to_test: 31 | transducer = make_g2p(test[0], test[1]) 32 | output_string = transducer(test[2]).output_string.strip() 33 | if output_string != test[3].strip(): 34 | LOGGER.error( 35 | "{} for {}: {} from {} to {} should be {}, got {}".format( 36 | error_prefix, 37 | test[-1], 38 | test[2], 39 | test[0], 40 | test[1], 41 | test[3], 42 | output_string, 43 | ) 44 | ) 45 | error_count += 1 46 | 47 | self.assertEqual( 48 | error_count, 49 | 0, 50 | f'Search for "ERROR - {error_prefix}" above to find all the g2p mapping errors.', 51 | ) 52 | 53 | 54 | if __name__ == "__main__": 55 | main() 56 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Gᵢ2Pᵢ Documentation 2 | theme: 3 | name: material 4 | favicon: favicon-32x32.png 5 | custom_dir: docs/overrides 6 | palette: 7 | primary: white 8 | features: 9 | - content.code.copy 10 | - content.code.select 11 | - announce.dismiss 12 | - navigation.footer 13 | plugins: 14 | - mkdocstrings: 15 | default_handler: python 16 | handlers: 17 | python: 18 | paths: [g2p] 19 | extra: 20 | homepage: https://roedoejet.github.io/g2p/ 21 | version: 22 | provider: mike 23 | default: stable 24 | markdown_extensions: 25 | - admonition 26 | - pymdownx.details 27 | - pymdownx.superfences 28 | - mkdocs-click 29 | - toc: 30 | permalink: true 31 | nav: 32 | - Home: index.md 33 | - Installation: installation.md 34 | - Guides: 35 | - Getting started: start.md 36 | - How to contribute: contributing.md 37 | - Using the g2p studio: studio.md 38 | - Migrating from g2p 1.x: migration-2.md 39 | - Reference: 40 | - Package: package.md 41 | - Command Line: cli.md 42 | -------------------------------------------------------------------------------- /readme-heroku.md: -------------------------------------------------------------------------------- 1 | Our production Heroku deployment is controlled by the following files: 2 | - `Procfile`: tells Heroku what command to launch in each Dyno; 3 | - `runtime.txt`: tells Heroku which run-time engine to use (i.e., which version of Python); 4 | 5 | Heroku detects Python by default, but `runtime.txt` lets us specify/bump the version as needed; 6 | - `requirements.txt`: tells Heroku what our production dependencies 7 | are. This is managed by `hatch` now. You will need to make sure 8 | the Python version in the `[tool.hatch.envs.prod]` section matches 9 | the one in `runtime.txt`. Now you can update the requirements with: 10 | 11 | hatch env remove prod 12 | rm -f requirements.txt 13 | hatch env create prod 14 | -------------------------------------------------------------------------------- /run_studio.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | 5 | if sys.version_info < (3, 8, 0): # pragma: no cover 6 | raise Exception("") 7 | sys.exit( 8 | "ERROR: While the g2p CLI and library can still run on Python 3.7, " 9 | "g2p-studio requires Python 3.8 or more recent.\n" 10 | f"You are using {sys.version}.\n" 11 | "Please use a newer version of Python." 12 | ) 13 | 14 | import uvicorn 15 | 16 | from g2p.app import APP 17 | from g2p.log import LOGGER 18 | 19 | host = "127.0.0.1" 20 | port = 5000 21 | LOGGER.info(f"g2p-studio listening on http://{host}:{port}") 22 | 23 | uvicorn.run(APP, host=host, port=port) 24 | -------------------------------------------------------------------------------- /run_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from g2p.tests.run import main 4 | 5 | main() 6 | -------------------------------------------------------------------------------- /runtime.txt: -------------------------------------------------------------------------------- 1 | python-3.10.15 2 | --------------------------------------------------------------------------------