├── .SETUPTOOLS_SCM_PRETEND_VERSION
├── .flake8
├── .git-blame-ignore-revs
├── .github
├── pull_request_template.md
├── pull_request_template
│ └── software_pr.md
└── workflows
│ ├── codeql.yml
│ ├── docs.yml
│ ├── matrix-tests.yml
│ ├── pythonpublish.yml
│ └── tests.yml
├── .gitignore
├── .gitlint
├── .pre-commit-config.yaml
├── CITATION.cff
├── Contributing.md
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── Procfile
├── README.md
├── bin
└── post_compile
├── docs
├── _static
│ ├── abbs.png
│ ├── basic.gif
│ ├── creator.png
│ └── rules.png
├── assets
│ └── g2p_update.jpeg
├── cli.md
├── contributing.md
├── index.md
├── installation.md
├── migration-2.md
├── overrides
│ └── partials
│ │ └── comments.html
├── package.md
├── start.md
└── studio.md
├── g2p
├── __init__.py
├── api.py
├── api_v2.py
├── app.py
├── cli.py
├── constants.py
├── exceptions.py
├── log.py
├── mappings
│ ├── .schema
│ │ ├── g2p-config-schema-2.0.json
│ │ ├── g2p-config-schema-2.1.json
│ │ └── g2p-config-schema-2.2.json
│ ├── __init__.py
│ ├── create_fallback_mapping.py
│ ├── create_ipa_mapping.py
│ ├── langs
│ │ ├── __init__.py
│ │ ├── alq
│ │ │ ├── alq_to_ipa.csv
│ │ │ └── config-g2p.yaml
│ │ ├── atj
│ │ │ ├── README.md
│ │ │ ├── atj_ipa_to_eng_ipa.json
│ │ │ ├── atj_to_ipa.json
│ │ │ └── config-g2p.yaml
│ │ ├── ckt
│ │ │ ├── README.md
│ │ │ ├── ckt_ipa_to_eng_ipa.json
│ │ │ ├── ckt_to_ipa.json
│ │ │ └── config-g2p.yaml
│ │ ├── clc
│ │ │ ├── config-g2p.yaml
│ │ │ └── doulos.csv
│ │ ├── clm
│ │ │ ├── clm_equiv.csv
│ │ │ ├── clm_to_ipa.csv
│ │ │ └── config-g2p.yaml
│ │ ├── crg
│ │ │ ├── abbreviations.csv
│ │ │ ├── config-g2p.yaml
│ │ │ ├── crg-dv-to-crg-ipa.csv
│ │ │ └── crg-tmd-to-crg-ipa.csv
│ │ ├── crj
│ │ │ ├── README.md
│ │ │ ├── config-g2p.yaml
│ │ │ ├── crj_equiv.json
│ │ │ ├── crj_ipa_to_eng_ipa.json
│ │ │ └── crj_to_ipa.json
│ │ ├── crk
│ │ │ ├── README.md
│ │ │ ├── config-g2p.yaml
│ │ │ ├── crk-no-symbols_to_ipa.json
│ │ │ └── crk_to_crk-no-symbols.json
│ │ ├── crl
│ │ │ ├── README.md
│ │ │ ├── config-g2p.yaml
│ │ │ ├── crl_equiv.json
│ │ │ ├── crl_ipa_to_eng_ipa.json
│ │ │ └── crl_to_ipa.json
│ │ ├── crm
│ │ │ ├── README.md
│ │ │ ├── config-g2p.yaml
│ │ │ ├── crm_equiv.json
│ │ │ ├── crm_ipa_to_eng_ipa.json
│ │ │ └── crm_to_ipa.json
│ │ ├── crx
│ │ │ ├── config-g2p.yaml
│ │ │ ├── stella_orth_to_syllabics.csv
│ │ │ └── stella_syllabics_to_orth.csv
│ │ ├── csw
│ │ │ ├── README.md
│ │ │ ├── config-g2p.yaml
│ │ │ ├── csw_equiv.json
│ │ │ ├── csw_ipa_to_eng_ipa.json
│ │ │ └── csw_to_ipa.json
│ │ ├── ctp
│ │ │ ├── config-g2p.yaml
│ │ │ ├── ctp_ipa_to_eng_ipa.json
│ │ │ └── ctp_to_ipa.json
│ │ ├── dan
│ │ │ ├── config-g2p.yaml
│ │ │ ├── dan_abbs.csv
│ │ │ ├── dan_to_dummy.json
│ │ │ └── dan_to_ipa.csv
│ │ ├── eng
│ │ │ ├── README.md
│ │ │ ├── cmudict.ipa.aligned.txt
│ │ │ ├── config-g2p.yaml
│ │ │ ├── dummy_to_arpabet.json
│ │ │ ├── eng_arpabet_to_ipa.json
│ │ │ ├── eng_inventory.json
│ │ │ ├── eng_ipa_to_arpabet.json
│ │ │ ├── make_alignments.sh
│ │ │ ├── make_ipa_cmudict.py
│ │ │ └── reverse_json.py
│ │ ├── fin
│ │ │ ├── config-g2p.yaml
│ │ │ └── fin_to_ipa.csv
│ │ ├── font-encodings
│ │ │ ├── README.md
│ │ │ ├── config-g2p.yaml
│ │ │ ├── fn_unicode.csv
│ │ │ ├── hei_doulos.csv
│ │ │ ├── hei_times.csv
│ │ │ └── nav_times.csv
│ │ ├── fra
│ │ │ ├── README.txt
│ │ │ ├── config-g2p.yaml
│ │ │ ├── fra_abbs.csv
│ │ │ └── fra_to_ipa.csv
│ │ ├── generated
│ │ │ ├── alq-ipa_to_eng-ipa.json
│ │ │ ├── atj-ipa_to_eng-ipa.json
│ │ │ ├── clm-ipa_to_eng-ipa.json
│ │ │ ├── config-g2p.yaml
│ │ │ ├── crg-ipa_to_eng-ipa.json
│ │ │ ├── crk-ipa_to_eng-ipa.json
│ │ │ ├── dan-ipa_to_eng-ipa.json
│ │ │ ├── fin-ipa_to_eng-ipa.json
│ │ │ ├── fra-ipa_to_eng-ipa.json
│ │ │ ├── gla-ipa_to_eng-ipa.json
│ │ │ ├── gwi-ipa_to_eng-ipa.json
│ │ │ ├── haa-ipa_to_eng-ipa.json
│ │ │ ├── ikt-ipa_to_eng-ipa.json
│ │ │ ├── ikt-ipa_to_hamming-eng-ipa.json
│ │ │ ├── iku-ipa_to_eng-ipa.json
│ │ │ ├── iku-ipa_to_hamming-eng-ipa.json
│ │ │ ├── iku-sro-ipa_to_eng-ipa.json
│ │ │ ├── kwk-ipa_to_eng-ipa.json
│ │ │ ├── lml-ipa_to_eng-ipa.json
│ │ │ ├── mic-ipa_to_eng-ipa.json
│ │ │ ├── moe-ipa_to_eng-ipa.json
│ │ │ ├── moh-equiv_to_dummy.json
│ │ │ ├── moh-equiv_to_hamming-dummy.json
│ │ │ ├── moh-ipa_to_eng-ipa.json
│ │ │ ├── moh-ipa_to_hamming-eng-ipa.json
│ │ │ ├── oji-ipa_to_eng-ipa.json
│ │ │ ├── oka-ipa_to_eng-ipa.json
│ │ │ ├── sal-ipa_to_eng-ipa.json
│ │ │ ├── see-ipa_to_eng-ipa.json
│ │ │ ├── str-equiv_to_dummy.json
│ │ │ ├── str-equiv_to_hamming-dummy.json
│ │ │ ├── str-ipa_to_eng-ipa.json
│ │ │ ├── str-ipa_to_hamming-eng-ipa.json
│ │ │ ├── tau-ipa_to_eng-ipa.json
│ │ │ ├── tce-ipa_to_eng-ipa.json
│ │ │ ├── tli-ipa_to_eng-ipa.json
│ │ │ ├── ttm-ipa_to_eng-ipa.json
│ │ │ ├── und-ascii_to_dummy.json
│ │ │ ├── und-ascii_to_hamming-dummy.json
│ │ │ ├── und-ipa_to_hamming-eng-ipa.json
│ │ │ └── win-ipa_to_eng-ipa.json
│ │ ├── git
│ │ │ ├── APA.csv
│ │ │ ├── Ortho_variables.csv
│ │ │ ├── Orthography.csv
│ │ │ ├── Orthography_Deterministic.csv
│ │ │ ├── RAPA.csv
│ │ │ ├── RAPA_Deterministic.csv
│ │ │ ├── README.md
│ │ │ ├── config-g2p.yaml
│ │ │ ├── equiv.csv
│ │ │ ├── git_ipa_to_eng_ipa.json
│ │ │ └── git_to_ipa.json
│ │ ├── gla
│ │ │ ├── README.txt
│ │ │ ├── config-g2p.yaml
│ │ │ └── gla_to_ipa.json
│ │ ├── gwi
│ │ │ ├── config-g2p.yaml
│ │ │ ├── gwi_equiv.json
│ │ │ └── gwi_to_ipa.json
│ │ ├── haa
│ │ │ ├── README.md
│ │ │ ├── config-g2p.yaml
│ │ │ ├── haa_abbs.csv
│ │ │ ├── haa_equiv.csv
│ │ │ └── haa_to_ipa.csv
│ │ ├── hur
│ │ │ ├── config-g2p.yaml
│ │ │ ├── hur_apa_to_hur_orthog.json
│ │ │ └── hur_orthog_to_hur_apa.json
│ │ ├── ikt
│ │ │ ├── README.md
│ │ │ ├── config-g2p.yaml
│ │ │ └── ikt_to_ipa.json
│ │ ├── iku
│ │ │ ├── config-g2p.yaml
│ │ │ ├── iku_equiv_to_ipa.json
│ │ │ ├── iku_sro_to_ipa.json
│ │ │ └── iku_to_iku_equiv.json
│ │ ├── kkz
│ │ │ ├── config-g2p.yaml
│ │ │ ├── kkz_ipa_to_eng_ipa.json
│ │ │ └── kkz_to_ipa.json
│ │ ├── kwk
│ │ │ ├── README.md
│ │ │ ├── config-g2p.yaml
│ │ │ ├── kwk_boas_to_umista.csv
│ │ │ ├── kwk_ipa_to_phonemic_ipa.json
│ │ │ ├── kwk_napa_to_ipa.csv
│ │ │ ├── kwk_napa_to_xsampa.json
│ │ │ ├── kwk_umista_to_ipa.json
│ │ │ ├── kwk_xsampa_to_eng_ipa.json
│ │ │ ├── napa_equiv_ubc.csv
│ │ │ ├── napa_equiv_uvic.csv
│ │ │ └── umista_equiv.csv
│ │ ├── langs.json.gz
│ │ ├── lml
│ │ │ ├── abbreviations.csv
│ │ │ ├── config-g2p.yaml
│ │ │ └── lml_to_ipa.csv
│ │ ├── mic
│ │ │ ├── abbreviations.csv
│ │ │ ├── config-g2p.yaml
│ │ │ └── mic_to_ipa.json
│ │ ├── moe
│ │ │ ├── README.md
│ │ │ ├── config-g2p.yaml
│ │ │ ├── moe_abbs.csv
│ │ │ └── moe_to_ipa.json
│ │ ├── moh
│ │ │ ├── README.md
│ │ │ ├── abbreviations.csv
│ │ │ ├── config-g2p.yaml
│ │ │ ├── moh_equiv.json
│ │ │ └── moh_to_ipa.json
│ │ ├── network.json.gz
│ │ ├── network_lite.py
│ │ ├── norm
│ │ │ ├── config-g2p.yaml
│ │ │ ├── panphon_preprocessor.csv
│ │ │ └── tone-map.txt
│ │ ├── oji
│ │ │ ├── README.md
│ │ │ ├── config-g2p.yaml
│ │ │ ├── oji_syllabics_to_orth.csv
│ │ │ └── oji_to_ipa.csv
│ │ ├── oka
│ │ │ ├── config-g2p.yaml
│ │ │ ├── oka_equiv.csv
│ │ │ └── oka_to_ipa.csv
│ │ ├── sal
│ │ │ ├── config-g2p.yaml
│ │ │ ├── sal_apa_to_ipa.csv
│ │ │ └── sal_equiv.csv
│ │ ├── see
│ │ │ ├── config-g2p.yaml
│ │ │ └── see_to_ipa.csv
│ │ ├── srs
│ │ │ ├── config-g2p.yaml
│ │ │ ├── srs_ipa_to_eng_ipa.json
│ │ │ └── srs_to_ipa.json
│ │ ├── str
│ │ │ ├── README.md
│ │ │ ├── config-g2p.yaml
│ │ │ ├── str_equiv.json
│ │ │ └── str_to_ipa.json
│ │ ├── tau
│ │ │ ├── config-g2p.yaml
│ │ │ ├── tau_equiv.json
│ │ │ └── tau_to_ipa.json
│ │ ├── tce
│ │ │ ├── README.md
│ │ │ ├── config-g2p.yaml
│ │ │ ├── tce_equiv.csv
│ │ │ └── tce_to_ipa.csv
│ │ ├── tgx
│ │ │ ├── config-g2p.yaml
│ │ │ ├── tgx_ipa_to_eng_ipa.json
│ │ │ └── tgx_to_ipa.json
│ │ ├── tli
│ │ │ ├── README.md
│ │ │ ├── config-g2p.yaml
│ │ │ ├── tli_equiv.csv
│ │ │ └── tli_to_ipa.csv
│ │ ├── ttm
│ │ │ ├── README.md
│ │ │ ├── config-g2p.yaml
│ │ │ ├── ttm_equiv.csv
│ │ │ └── ttm_to_ipa.csv
│ │ ├── und
│ │ │ ├── README.md
│ │ │ ├── config-g2p.yaml
│ │ │ ├── und_ipa_to_eng_ipa.json
│ │ │ └── und_to_ipa.json
│ │ ├── utils.py
│ │ └── win
│ │ │ ├── README.md
│ │ │ ├── config-g2p.yaml
│ │ │ ├── hoocak_alphabet.csv
│ │ │ └── win_to_ipa.json
│ ├── tokenizer.py
│ └── utils.py
├── shared_types.py
├── static
│ ├── __init__.py
│ ├── assets
│ │ └── bonjour.png
│ ├── blockly_main.js
│ ├── blocks.js
│ ├── custom.css
│ ├── custom.js
│ ├── echart_custom.js
│ ├── languages-network.json
│ ├── normalize.css
│ ├── skeleton.css
│ └── swagger.json
├── templates
│ └── index.html
├── tests
│ ├── .coveragerc
│ ├── __init__.py
│ ├── public
│ │ ├── __init__.py
│ │ ├── data
│ │ │ ├── __init__.py
│ │ │ ├── atj.psv
│ │ │ ├── clm.csv
│ │ │ ├── crg.psv
│ │ │ ├── crj.psv
│ │ │ ├── crk.psv
│ │ │ ├── crl.psv
│ │ │ ├── crm.psv
│ │ │ ├── csw.psv
│ │ │ ├── ctp.csv
│ │ │ ├── eng.csv
│ │ │ ├── fin.psv
│ │ │ ├── fn_unicode.psv
│ │ │ ├── fra.psv
│ │ │ ├── fra_panagrams.txt
│ │ │ ├── fra_panagrams_NFD.txt
│ │ │ ├── fra_simple.txt
│ │ │ ├── git.psv
│ │ │ ├── gwi.psv
│ │ │ ├── haa.csv
│ │ │ ├── hur.psv
│ │ │ ├── ikt.psv
│ │ │ ├── iku-sro.psv
│ │ │ ├── iku.psv
│ │ │ ├── kwk.psv
│ │ │ ├── lml.psv
│ │ │ ├── mic.psv
│ │ │ ├── moe.psv
│ │ │ ├── moh.psv
│ │ │ ├── oji-syl.psv
│ │ │ ├── oji.tsv
│ │ │ ├── oka.csv
│ │ │ ├── sal-arpabet.tsv
│ │ │ ├── sal.tsv
│ │ │ ├── srs.psv
│ │ │ ├── str.tsv
│ │ │ ├── str_un_human_rights.txt
│ │ │ ├── tau.psv
│ │ │ ├── tce.csv
│ │ │ ├── tli.csv
│ │ │ ├── ttm.csv
│ │ │ └── win.csv
│ │ ├── git_to_ipa.json
│ │ ├── mappings
│ │ │ ├── .gitignore
│ │ │ ├── abbreviation_config-g2p.yaml
│ │ │ ├── abbreviation_mapping.csv
│ │ │ ├── abbreviations.csv
│ │ │ ├── abbreviations.json
│ │ │ ├── abbreviations.psv
│ │ │ ├── abbreviations.substring.csv
│ │ │ ├── abbreviations.tsv
│ │ │ ├── bad_langs
│ │ │ │ └── lang1
│ │ │ │ │ ├── config-g2p.yaml
│ │ │ │ │ └── minimal.csv
│ │ │ ├── bad_langs2
│ │ │ │ └── lang1
│ │ │ │ │ ├── config-g2p.yaml
│ │ │ │ │ └── minimal.csv
│ │ │ ├── bad_lexicon_config-g2p.yaml
│ │ │ ├── case-feed
│ │ │ │ ├── README.md
│ │ │ │ ├── cf-in-lc-to-cf-out-uc.csv
│ │ │ │ ├── config-g2p.yaml
│ │ │ │ └── empty.csv
│ │ │ ├── compose.yaml
│ │ │ ├── compose1-2.csv
│ │ │ ├── compose2-3.csv
│ │ │ ├── deletion.csv
│ │ │ ├── deletion.json
│ │ │ ├── deletion_config_csv.yaml
│ │ │ ├── deletion_config_json.yaml
│ │ │ ├── g2p_studio.csv
│ │ │ ├── g2p_studio2.csv
│ │ │ ├── gen-map-1.csv
│ │ │ ├── gen-map-2.csv
│ │ │ ├── gen-map-3a.csv
│ │ │ ├── gen-map-3b.csv
│ │ │ ├── gen-map_config-g2p.yaml
│ │ │ ├── gm1-ipa_to_gm2-ipa.json
│ │ │ ├── gm2-ipa_to_gm3-ipa.json
│ │ │ ├── gm3-ipa_to_gm2-ipa.json
│ │ │ ├── hello.aligned.txt
│ │ │ ├── lexicon_config-g2p.yaml
│ │ │ ├── malformed_config-g2p.yaml
│ │ │ ├── minimal.csv
│ │ │ ├── minimal.json
│ │ │ ├── minimal.psv
│ │ │ ├── minimal.tsv
│ │ │ ├── minimal.xlsx
│ │ │ ├── minimal_config-g2p.yaml
│ │ │ ├── minimal_configs.yaml
│ │ │ ├── no_escape.csv
│ │ │ ├── no_mappings_key.yaml
│ │ │ ├── nofeed-indices.csv
│ │ │ ├── nofeed-indices.yaml
│ │ │ ├── null.csv
│ │ │ ├── null_config-g2p.yaml
│ │ │ ├── rule-ordering.yaml
│ │ │ ├── test.yaml
│ │ │ ├── test_to_ipa.csv
│ │ │ ├── tokenize_punct.csv
│ │ │ └── tokenize_punct_config-g2p.yaml
│ │ └── sample_response.json
│ ├── run.py
│ ├── test_api_resources.py
│ ├── test_api_v2.py
│ ├── test_check_ipa_arpabet.py
│ ├── test_cli.py
│ ├── test_create_mapping.py
│ ├── test_doctor.py
│ ├── test_doctor_expensive.py
│ ├── test_fallback.py
│ ├── test_indices.py
│ ├── test_langs.py
│ ├── test_lexicon_transducer.py
│ ├── test_mappings.py
│ ├── test_network.py
│ ├── test_studio.py
│ ├── test_tokenize_and_map.py
│ ├── test_tokenizer.py
│ ├── test_transducer.py
│ ├── test_unidecode_transducer.py
│ ├── test_utils.py
│ ├── test_z_local_config.py
│ └── time_panphon.py
└── transducer
│ └── __init__.py
├── mkdocs.yml
├── pyproject.toml
├── readme-heroku.md
├── requirements.txt
├── run_studio.py
├── run_tests.py
└── runtime.txt
/.SETUPTOOLS_SCM_PRETEND_VERSION:
--------------------------------------------------------------------------------
1 | 2.2
2 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 88
3 | extend-ignore = E203,E501,E704
4 |
--------------------------------------------------------------------------------
/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
1 | 9c26df474a70543588e4f8b3ce7d56a36f475da4
2 | cd3165733bbb5cd21b28aa2a3115cc13887dfa90
3 | 9b156b87b63f6ff3c337c3e82ec1ecc45a2af03a
4 | d6ae834863d309cd05096d32c7237eea35e21615
5 | ce0a4b1b2aca9c3e3dcb09dc473c44e9014cc103
6 | 1fa3d9d34b4087c44047df64fb0f936db73cb09f
7 | 4a982e6155dc51d17c80fb54b1f66c9f1d5affb1
8 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | Pull request template for adding a new language
2 | -----------------------------------------------
3 |
4 |
9 |
10 | * **Please check if the PR fulfills these requirements**
11 | - [ ] Mapping files are added in `g2p/mappings/langs`
12 | - [ ] Mapping is either added to an existing folder or a new folder has been added
13 | - [ ] Language folder and files use appropriate [ISO 639-3 codes](https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes)
14 | - [ ] `config-g2p.yaml` file includes all author names, and settings necessary
15 | - [ ] Please add some test data in `g2p/tests/public/data`. The added file should be a csv/tsv/psv file and each row should have the format `[input_mapping_code,output_mapping_code,input_string,output_string]`
16 | - [ ] As the last step, G2P has been updated by running `g2p update` locally and committing the change
17 | - [ ] You agree to license your contribution under the same license as this project (see [LICENSE](https://github.com/roedoejet/g2p/blob/main/LICENSE) file).
18 |
19 | * **Other information**:
20 |
--------------------------------------------------------------------------------
/.github/pull_request_template/software_pr.md:
--------------------------------------------------------------------------------
1 |
3 |
4 | ### PR Goal?
5 |
6 |
7 |
8 | ### Fixes?
9 |
10 |
11 |
12 | ### Feedback sought?
13 |
14 |
15 |
16 | ### Priority?
17 |
18 |
19 |
20 | ### Tests added?
21 |
22 |
23 |
24 | ### How to test?
25 |
26 |
27 |
28 | ### Confidence?
29 |
30 |
31 |
32 | ### Version change?
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
1 | name: Deploy docs
2 | on:
3 | push:
4 | branches:
5 | - main
6 | jobs:
7 | docs:
8 | # Create latest docs
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: actions/checkout@v4
12 | with:
13 | fetch-depth: 0 # fetch all commits/branches
14 | - name: Set up Python
15 | uses: actions/setup-python@v5
16 | with:
17 | python-version: "3.8"
18 | - name: Install dependencies
19 | run: |
20 | python -m pip install --upgrade pip
21 | pip install -e .[docs]
22 | - name: Setup doc deploy
23 | run: |
24 | git config user.name 'github-actions[bot]'
25 | git config user.email 'github-actions[bot]@users.noreply.github.com'
26 | - name: Deploy docs with mike 🚀
27 | run: |
28 | mike deploy --push --update-aliases dev latest
29 |
--------------------------------------------------------------------------------
/.github/workflows/matrix-tests.yml:
--------------------------------------------------------------------------------
1 | name: Run full matrix Tests before Releases
2 | on:
3 | workflow_call:
4 | workflow_dispatch:
5 | push:
6 | branches: main
7 | env:
8 | SETUPTOOLS_SCM_PRETEND_VERSION: "2.1"
9 | jobs:
10 | pre-release-matrix-test:
11 | strategy:
12 | fail-fast: false
13 | matrix:
14 | os: [ubuntu-22.04, windows-latest, macos-latest]
15 | python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
16 | exclude:
17 | - os: macos-latest
18 | python-version: "3.7"
19 | runs-on: ${{ matrix.os }}
20 | steps:
21 | - uses: actions/checkout@v4
22 | - name: Set up Python
23 | uses: actions/setup-python@v5
24 | with:
25 | python-version: ${{ matrix.python-version }}
26 | - name: Install dependencies
27 | shell: bash
28 | run: |
29 | python -m pip install --upgrade pip
30 | SETUPTOOLS_SCM_PRETEND_VERSION=`cat .SETUPTOOLS_SCM_PRETEND_VERSION` pip install -e .[test]
31 | - name: Run tests
32 | run: python run_tests.py dev
33 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled python modules.
2 | *.pyc
3 |
4 | # Setuptools distribution folder.
5 | /dist/
6 | build
7 | venv
8 |
9 | # Ignore private test data
10 | g2p/tests/private
11 |
12 | # Python egg metadata, regenerated from source files by setuptools.
13 | /*.egg-info
14 |
15 | .DS_Store
16 | .vscode
17 | .python-version
18 |
19 | *.log
20 | log.txt
21 |
22 | .coverage
23 | htmlcov
24 |
25 | flask_session
26 |
27 | # Sphinx documentation
28 | docs/_build/
29 |
30 | # coverage annotation output
31 | *,cover
32 |
33 | # vim temp files
34 | *~
35 |
36 | # mkdocs build
37 | site
38 | g2p/_version.py
39 |
--------------------------------------------------------------------------------
/.gitlint:
--------------------------------------------------------------------------------
1 | [general]
2 | # Enable conventional commit linting
3 | contrib=contrib-title-conventional-commits
4 |
5 | # Ignore any data sent to gitlint via stdin (helpful on Windows)
6 | ignore-stdin=true
7 |
8 | # We don't require a body, just a title, even though a body is also a good idea
9 | ignore=body-is-missing,body-min-length
10 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pre-commit/pre-commit-hooks
3 | rev: v4.4.0
4 | hooks:
5 | - id: check-yaml
6 | - id: check-json
7 | - id: end-of-file-fixer
8 | - id: trailing-whitespace
9 | exclude: \.svg$
10 | - repo: https://github.com/PyCQA/isort
11 | rev: 5.13.2
12 | hooks:
13 | - id: isort
14 | args: [--profile=black]
15 | - repo: https://github.com/psf/black
16 | rev: 24.8.0
17 | hooks:
18 | - id: black
19 | - repo: https://github.com/pycqa/flake8
20 | rev: 7.1.1
21 | hooks:
22 | - id: flake8
23 | - repo: https://github.com/pre-commit/mirrors-mypy
24 | rev: v1.13.0
25 | hooks:
26 | - id: mypy
27 | additional_dependencies:
28 | [pydantic, types-requests, types-python-slugify, types-PyYAML]
29 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Base Image
2 | FROM debian:latest
3 |
4 | ENV DEBIAN_FRONTEND=noninteractive
5 |
6 | # Dependencies that don't change with g2p updates and can be cached, and make it lean
7 | RUN apt-get update -y \
8 | && apt-get install -y \
9 | apt-transport-https \
10 | libffi-dev \
11 | openssl \
12 | libssl-dev \
13 | python3 \
14 | python3-pip \
15 | python3-dev \
16 | python3-venv \
17 | build-essential \
18 | nano \
19 | git \
20 | && apt-get clean \
21 | && apt-get autoremove \
22 | && rm -fr /var/lib/apt/lists/*
23 |
24 | # Create a venv to install packages locally
25 | RUN python3 -m venv --system-site-packages /g2p/venv
26 |
27 | # Get g2p-specific dependencies that can also often be cached
28 | RUN mkdir -p /g2p/g2p
29 | COPY requirements.txt /g2p
30 | COPY pyproject.toml /g2p
31 | RUN . /g2p/venv/bin/activate \
32 | && python3 -m pip install --upgrade pip \
33 | && MAKEFLAGS="-j$(nproc)" pip3 install -r /g2p/requirements.txt
34 |
35 | # Install g2p itself, last
36 | COPY . /g2p/
37 | COPY README.md /g2p
38 | COPY Dockerfile /g2p
39 | RUN . /g2p/venv/bin/activate \
40 | && pip3 install -e /g2p
41 |
42 | # Comment this out if you just want to install g2p in the container without running the studio.
43 | SHELL ["/bin/sh", "-c"]
44 | CMD gunicorn --worker-class uvicorn.workers.UvicornWorker -w 1 g2p.app:APP --bind 0.0.0.0:8000
45 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019-2021 Aidan Pine, David Huggins-Daines, National Research Council Canada (NRC)
4 | Individual mappings in g2p/mappings/langs contributed by others are Copyright
5 | their contributors.
6 |
7 | Permission is hereby granted, free of charge, to any person obtaining a copy
8 | of this software and associated documentation files (the "Software"), to deal
9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 |
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 |
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | SOFTWARE.
24 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include g2p/mappings/langs *
2 | recursive-include g2p/tests/public *
3 | include g2p/templates/*
4 | include g2p/static/*.css
5 | include g2p/static/*.js
6 | include g2p/static/*.json
7 | include g2p/static/swagger-ui/*
8 | include MANIFEST.in
9 | exclude .gitignore
10 | recursive-exclude * *.py[co]
11 | recursive-exclude * *~
12 | recursive-exclude * *.orig
13 | recursive-exclude * *.DS_Store
14 |
--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: gunicorn --worker-class uvicorn.workers.UvicornWorker -w 1 g2p.app:APP
2 |
--------------------------------------------------------------------------------
/bin/post_compile:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Heroku has no understanding of dynamic versioning so we have to hack this to make things work
4 | export SETUPTOOLS_SCM_PRETEND_VERSION=`cat .SETUPTOOLS_SCM_PRETEND_VERSION`
5 | # Also it expects to run your app in place, but doesn't actually do this for you because it's old
6 | pip install -e .
7 |
--------------------------------------------------------------------------------
/docs/_static/abbs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/docs/_static/abbs.png
--------------------------------------------------------------------------------
/docs/_static/basic.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/docs/_static/basic.gif
--------------------------------------------------------------------------------
/docs/_static/creator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/docs/_static/creator.png
--------------------------------------------------------------------------------
/docs/_static/rules.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/docs/_static/rules.png
--------------------------------------------------------------------------------
/docs/assets/g2p_update.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/docs/assets/g2p_update.jpeg
--------------------------------------------------------------------------------
/docs/cli.md:
--------------------------------------------------------------------------------
1 | ---
2 | comments: true
3 | ---
4 |
5 | # Command line interface
6 |
7 | There is a command line interface bundled with g2p that allows both basic and advanced functionality to be accessed from your command line. After [installing g2p](./installation.md), you can get information about how to use the command line by running `g2p --help`
8 |
9 |
10 | ::: mkdocs-click
11 | :module: g2p.cli
12 | :command: cli
13 | :prog_name: g2p
14 |
--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
1 | ---
2 | comments: true
3 | ---
4 |
5 | # Contributing
6 |
7 | Feel free to dive in! [Open an issue](https://github.com/roedoejet/g2p/issues/new) or submit PRs.
8 |
9 | This repo follows the [Contributor Covenant](http://contributor-covenant.org/version/1/3/0/) Code of Conduct.
10 |
11 | ## Adding a new language/mapping
12 |
13 | In order to add a new mapping or language to be supported, please fill out a pull request with the [pull request template](https://github.com/roedoejet/g2p/blob/main/.github/pull_request_template.md) provided.
14 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | # Welcome to the G2P documentation!
2 |
3 | G2P is a tool for doing rule-based conversions for text.
4 |
5 | This website has the technical documentation for G2P, but we've also written a [7-part blog series](https://blog.mothertongues.org/g2p-background/) if you want a more thorough introduction to why G2P exists and what you can use it for.
6 |
--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
1 | ---
2 | comments: true
3 | ---
4 |
5 | # Installation
6 |
7 | You can either install `g2p` with pip from PyPi:
8 |
9 | ```bash
10 | pip install g2p
11 | ```
12 |
13 | Or by cloning and installing from source:
14 |
15 | ```bash
16 | git clone https://github.com/roedoejet/g2p.git
17 |
18 | cd g2p
19 |
20 | pip install -e .
21 | ```
22 |
23 | To install in an isolated environment (recommended for development)
24 | you may use [hatch](https://hatch.pypa.io/latest/):
25 |
26 | ```
27 | hatch shell
28 | ```
29 |
--------------------------------------------------------------------------------
/docs/overrides/partials/comments.html:
--------------------------------------------------------------------------------
1 | {% if page.meta.comments %}
2 |
3 |
4 |
7 |
8 |
41 | {% endif %}
42 |
--------------------------------------------------------------------------------
/docs/package.md:
--------------------------------------------------------------------------------
1 | ---
2 | comments: true
3 | ---
4 |
5 | # Python package
6 |
7 | ## `make_g2p`
8 |
9 | The easiest way to create a transducer programmatically is to use the `g2p.make_g2p` function.
10 |
11 | To use it, first import the function:
12 |
13 | ```python
14 | from g2p import make_g2p
15 | ```
16 |
17 | Then, call it with an argument for `in_lang` and `out_lang`. Both must be strings equal to the name of a particular mapping.
18 |
19 | ```python
20 | >>> transducer = make_g2p("dan", "eng-arpabet")
21 | >>> transducer("hej").output_string
22 | 'HH EH Y'
23 | ```
24 |
25 | There must be a valid path between the `in_lang` and `out_lang` in order for this to work. If you've edited a mapping or added a custom mapping, you must update g2p to include it: `g2p update`
26 |
27 | ## `make_tokenizer`
28 |
29 | Basic usage for the language-aware tokenizer:
30 |
31 | ```python
32 | from g2p import make_tokenizer
33 | tokenizer = make_tokenizer("dan")
34 | for token in tokenizer.tokenize_text("Åh, hvordan har du det, Åbenrå?"):
35 | if token.is_word
36 | word = token.text
37 | else:
38 | interword_punctuation_and_spaces = token.text
39 | ```
40 |
41 | Note that selecting the tokenizer language is important to make sure punctuation-like letters are handled correctly. For example `:` and `'` are punctuation in English but they will be part of the word tokens in Kanien'kéha (moh):
42 |
43 | ```python
44 | >>> list(make_tokenizer("moh").tokenize_text("Kanien'kéha"))
45 | [{'text': "Kanien'kéha", 'is_word': True}]
46 | >>> list(make_tokenizer("eng").tokenize_text("Kanien'kéha"))
47 | [{'text': 'Kanien', 'is_word': True}, {'text': "'", 'is_word': False}, {'text': 'kéha', 'is_word': True}]
48 | ```
49 |
50 | ## A look under the hood
51 |
52 | A Mapping object is a list of defined rules. A `Rule` has the following permitted fields:
53 |
54 | ::: g2p.mappings.Rule
55 | options:
56 | show_root_heading: true
57 | show_source: false
58 | heading_level: 3
59 | members_order: source
60 |
--------------------------------------------------------------------------------
/docs/start.md:
--------------------------------------------------------------------------------
1 | ---
2 | comments: true
3 | ---
4 |
5 | # Getting Started
6 |
7 | ## Overview
8 |
9 | ### What is G2P?
10 |
11 | The initial version of this package was developed by [Patrick Littell](https://github.com/littell) and was developed in order to allow for g2p from community orthographies to IPA and back again in [ReadAlong-Studio](https://github.com/ReadAlongs/Studio). We decided to then pull out the g2p mechanism from [Convertextract](https://github.com/roedoejet/convertextract) which allows transducer relations to be declared in CSV files, and some g2p functionality from ReadAlong-Studio, and merge them into a stand-alone g2p library - here it is!
12 |
13 | This website has the technical documentation for G2P, but we've also written a [7-part blog series](https://blog.mothertongues.org/g2p-background/) if you want a more thorough introduction to why G2P exists and what you can use it for.
14 |
--------------------------------------------------------------------------------
/g2p/constants.py:
--------------------------------------------------------------------------------
1 | """
2 | This file is for constants that can be initialized without any (expensive) dependencies.
3 | """
4 |
5 | import os
6 |
7 | DISTANCE_METRICS = [
8 | "weighted_feature_edit_distance",
9 | "hamming_feature_edit_distance",
10 | "feature_edit_distance",
11 | "dolgo_prime_distance",
12 | "fast_levenshtein_distance",
13 | "levenshtein_distance",
14 | ]
15 |
16 | LANGS_DIR = os.path.join(os.path.dirname(__file__), "mappings", "langs")
17 | LANGS_FILE_NAME = "langs.json.gz"
18 | NETWORK_FILE_NAME = "network.json.gz"
19 |
--------------------------------------------------------------------------------
/g2p/log.py:
--------------------------------------------------------------------------------
1 | """
2 | Error Log
3 |
4 | """
5 |
6 | import logging
7 | import os
8 |
9 | import coloredlogs # type: ignore
10 |
11 | FIELD_STYLES = dict(
12 | levelname=dict(color="green"),
13 | )
14 |
15 |
16 | def setup_logger(name):
17 | """Create logger and configure with cool colors!"""
18 |
19 | logger = logging.getLogger(name)
20 | coloredlogs.install(
21 | level=os.environ.get("G2P_LOGLEVEL", "INFO").upper(),
22 | fmt="%(levelname)s - %(message)s",
23 | logger=logger,
24 | field_styles=FIELD_STYLES,
25 | )
26 | return logger
27 |
28 |
29 | LOGGER = setup_logger("root")
30 |
--------------------------------------------------------------------------------
/g2p/mappings/create_fallback_mapping.py:
--------------------------------------------------------------------------------
1 | import datetime as dt
2 |
3 | from text_unidecode import unidecode # type: ignore
4 |
5 | from g2p import make_g2p
6 | from g2p.log import LOGGER
7 | from g2p.mappings import Mapping
8 | from g2p.mappings.create_ipa_mapping import align_inventories
9 | from g2p.mappings.utils import is_ipa, unicode_escape
10 |
11 | DUMMY_INVENTORY = ["ɑ", "i", "u", "t", "s", "n"]
12 |
13 |
14 | def align_to_dummy_fallback(
15 | mapping: Mapping,
16 | io: str = "in",
17 | distance: str = "weighted_feature_edit_distance",
18 | quiet=False,
19 | ):
20 | """Create a mapping from mapping's output inventory to a minimalist dummy inventory"""
21 | mapping_config = mapping.model_dump()
22 | config = {
23 | "in_lang": mapping_config[f"{io}_lang"],
24 | "out_lang": "dummy",
25 | "authors": [f"Generated {dt.datetime.now()}"],
26 | }
27 | default_char = "t"
28 | if is_ipa(mapping_config[f"{io}_lang"]):
29 | list_of_rules = align_inventories(
30 | mapping.inventory(io), DUMMY_INVENTORY, distance=distance, quiet=quiet
31 | )
32 | else:
33 | und_g2p = make_g2p("und", "und-ipa", tokenize=False)
34 | list_of_rules = [
35 | {
36 | "in": unicode_escape(x),
37 | "out": und_g2p(unidecode(x).lower()).output_string,
38 | }
39 | for x in mapping.inventory(io)
40 | ]
41 | dummy_list = align_inventories(
42 | [x["out"] for x in list_of_rules],
43 | DUMMY_INVENTORY,
44 | distance=distance,
45 | quiet=quiet,
46 | )
47 | dummy_dict = {}
48 | for x in dummy_list:
49 | if x["in"]:
50 | dummy_dict[x["in"]] = x["out"]
51 |
52 | for x in list_of_rules:
53 | try:
54 | x["out"] = dummy_dict[x["out"]]
55 | except KeyError:
56 | LOGGER.warning(
57 | f"We couldn't guess at what {x['in']} means, so it's being "
58 | f"replaced with '{default_char}' instead."
59 | )
60 | x["out"] = default_char
61 |
62 | config["rules"] = list_of_rules
63 | return Mapping(**config)
64 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/alq/alq_to_ipa.csv:
--------------------------------------------------------------------------------
1 | j,ʒ,,
2 | ô,u,,
3 | o,u,,
4 | ò,o:,,
5 | i,ɪ
6 | ɪ̀,i:,,
7 | à,ɑ,,
8 | sh,ʃ,,
9 | ch,tʃ,,
10 | ng,ŋ,,\b
11 | g,ʒ,d,
12 | g,ɡ
13 | e,ɛ,,n
14 | e,e:,,
15 | ù,o,,
16 | a,ʌ
17 | y,j
18 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/alq/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Algonquin
3 | mappings:
4 | - display_name: Algonquin to IPA
5 | in_lang: alq
6 | out_lang: alq-ipa
7 | authors:
8 | - Eric Joanis
9 | type: mapping
10 | rules_path: alq_to_ipa.csv
11 | rule_ordering: as-written
12 | case_sensitive: false
13 | norm_form: 'NFD'
14 | <<: *shared
15 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/atj/README.md:
--------------------------------------------------------------------------------
1 | "c", not ʒ because that is rare in English"
2 | "tc" is often pronounced t͡s
3 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/atj/atj_ipa_to_eng_ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "in": "a",
4 | "out": "ɑ"
5 | },
6 | {
7 | "in": "aː",
8 | "out": "ɑ"
9 | },
10 | {
11 | "in": "b",
12 | "out": "b"
13 | },
14 | {
15 | "in": "d",
16 | "out": "d"
17 | },
18 | {
19 | "in": "d͡ʒ",
20 | "out": "dʒ"
21 | },
22 | {
23 | "in": "eː",
24 | "out": "eː"
25 | },
26 | {
27 | "in": "h",
28 | "out": "h"
29 | },
30 | {
31 | "in": "i",
32 | "out": "i"
33 | },
34 | {
35 | "in": "iː",
36 | "out": "i"
37 | },
38 | {
39 | "in": "m",
40 | "out": "m"
41 | },
42 | {
43 | "in": "n",
44 | "out": "n"
45 | },
46 | {
47 | "in": "r",
48 | "out": "ɾ"
49 | },
50 | {
51 | "in": "s",
52 | "out": "s"
53 | },
54 | {
55 | "in": "u",
56 | "out": "u"
57 | },
58 | {
59 | "in": "uː",
60 | "out": "u"
61 | },
62 | {
63 | "in": "w",
64 | "out": "w"
65 | },
66 | {
67 | "in": "ɡ",
68 | "out": "ɡ"
69 | },
70 | {
71 | "in": "ʃ",
72 | "out": "ʃ"
73 | }
74 | ]
75 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/atj/atj_to_ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | { "in": "p", "out": "b" },
3 | { "in": "t", "out": "d" },
4 | { "in": "k", "out": "ɡ" },
5 | { "in": "s", "out": "s" },
6 | { "in": "c", "out": "ʃ" },
7 | { "in": "tc", "out": "d͡ʒ" },
8 | { "in": "m", "out": "m" },
9 | { "in": "n", "out": "n" },
10 | { "in": "r", "out": "r" },
11 | { "in": "h", "out": "h" },
12 | { "in": "w", "out": "w" },
13 | { "in": "a", "out": "a" },
14 | { "in": "e", "out": "eː" },
15 | { "in": "i", "out": "i" },
16 | { "in": "o", "out": "u" },
17 | { "in": "â", "out": "aː" },
18 | { "in": "ê", "out": "eː" },
19 | { "in": "î", "out": "iː" },
20 | { "in": "ô", "out": "uː" }
21 | ]
22 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/atj/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Atikamekw
3 | mappings:
4 | - display_name: Atikamekw to IPA
5 | in_lang: atj
6 | out_lang: atj-ipa
7 | type: mapping
8 | case_sensitive: false
9 | authors:
10 | - David Huggins-Daines
11 | - Patrick Littell
12 | rules_path: atj_to_ipa.json
13 | rule_ordering: apply-longest-first
14 | <<: *shared
15 | - display_name: Atikamekw IPA to English IPA
16 | in_lang: atj-ipa
17 | out_lang: eng-ipa
18 | type: mapping
19 | rules_path: atj_ipa_to_eng_ipa.json
20 | case_sensitive: false
21 | <<: *shared
22 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/ckt/README.md:
--------------------------------------------------------------------------------
1 | Language-specific files for Chukchi
2 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/ckt/ckt_ipa_to_eng_ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "out": "ɑ",
4 | "in": "a"
5 | },
6 | {
7 | "out": "eː",
8 | "in": "e"
9 | },
10 | {
11 | "out": "i",
12 | "in": "i"
13 | },
14 | {
15 | "out": "j",
16 | "in": "j"
17 | },
18 | {
19 | "out": "k",
20 | "in": "k"
21 | },
22 | {
23 | "out": "m",
24 | "in": "m"
25 | },
26 | {
27 | "out": "n",
28 | "in": "n"
29 | },
30 | {
31 | "out": "oː",
32 | "in": "o"
33 | },
34 | {
35 | "out": "p",
36 | "in": "p"
37 | },
38 | {
39 | "out": "k",
40 | "in": "q"
41 | },
42 | {
43 | "out": "s",
44 | "in": "s"
45 | },
46 | {
47 | "out": "t",
48 | "in": "t"
49 | },
50 | {
51 | "out": "u",
52 | "in": "u"
53 | },
54 | {
55 | "out": "w",
56 | "in": "w"
57 | },
58 | {
59 | "out": "ŋ",
60 | "in": "ŋ"
61 | },
62 | {
63 | "out": "ə",
64 | "in": "ə"
65 | },
66 | {
67 | "out": "ɡ",
68 | "in": "ɣ"
69 | },
70 | {
71 | "out": "s",
72 | "in": "ɬ"
73 | },
74 | {
75 | "out": "ɾ",
76 | "in": "ɾ"
77 | },
78 | {
79 | "out": "ʔ",
80 | "in": "ʔ"
81 | }
82 | ]
83 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/ckt/ckt_to_ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "in": "ʼ",
4 | "out": "ʔ"
5 | },
6 | {
7 | "in": "а",
8 | "out": "a"
9 | },
10 | {
11 | "in": "в",
12 | "out": "w"
13 | },
14 | {
15 | "in": "г",
16 | "out": "ɣ"
17 | },
18 | {
19 | "in": "е",
20 | "out": "e"
21 | },
22 | {
23 | "in": "и",
24 | "out": "i"
25 | },
26 | {
27 | "in": "й",
28 | "out": "j"
29 | },
30 | {
31 | "in": "к",
32 | "out": "k"
33 | },
34 | {
35 | "in": "м",
36 | "out": "m"
37 | },
38 | {
39 | "in": "н",
40 | "out": "n"
41 | },
42 | {
43 | "in": "о",
44 | "out": "o"
45 | },
46 | {
47 | "in": "п",
48 | "out": "p"
49 | },
50 | {
51 | "in": "р",
52 | "out": "ɾ"
53 | },
54 | {
55 | "in": "с",
56 | "out": "s"
57 | },
58 | {
59 | "in": "т",
60 | "out": "t"
61 | },
62 | {
63 | "in": "у",
64 | "out": "u"
65 | },
66 | {
67 | "in": "ъ",
68 | "out": "ʔ"
69 | },
70 | {
71 | "in": "ы",
72 | "out": "ə"
73 | },
74 | {
75 | "in": "ь",
76 | "out": "ʔ"
77 | },
78 | {
79 | "in": "э",
80 | "out": "e"
81 | },
82 | {
83 | "in": "ю",
84 | "out": "u"
85 | },
86 | {
87 | "in": "я",
88 | "out": "a"
89 | },
90 | {
91 | "in": "ё",
92 | "out": "o"
93 | },
94 | {
95 | "in": "ӄ",
96 | "out": "q"
97 | },
98 | {
99 | "in": "ӈ",
100 | "out": "ŋ"
101 | },
102 | {
103 | "in": "ԓ",
104 | "out": "ɬ"
105 | }
106 | ]
107 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/ckt/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Chukchi
3 | mappings:
4 | - display_name: Chukchi to IPA
5 | in_lang: ckt
6 | out_lang: ckt-ipa
7 | type: mapping
8 | authors:
9 | - Vasilisa Andrianets
10 | - Patrick Littell
11 | rules_path: ckt_to_ipa.json
12 | <<: *shared
13 | - display_name: Chukchi IPA to English IPA
14 | in_lang: ckt-ipa
15 | out_lang: eng-ipa
16 | type: mapping
17 | rules_path: ckt_ipa_to_eng_ipa.json
18 | <<: *shared
19 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/clc/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Tsilhqot'in
3 | mappings:
4 | - display_name: Doulos
5 | rules_path: doulos.csv
6 | in_lang: clc-doulos
7 | out_lang: clc
8 | <<: *shared
9 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/clc/doulos.csv:
--------------------------------------------------------------------------------
1 | @,ŝ
2 | #,ŵ
3 | \^,ẑ
4 | &,ɨ
5 | /,ʔ
6 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/clm/clm_equiv.csv:
--------------------------------------------------------------------------------
1 | ’,̕
2 | ‘,̕
3 | ˊ,̕
4 | `,̕
5 | ́,̕
6 | ̒,̕
7 | ̓,̕
8 | ̔,̕
9 | ̕,̕
10 | ̛,̕
11 | ʻ,̕
12 | ʼ,̕
13 | ʽ,̕
14 | ʹ,̕
15 | ',̕
16 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/clm/clm_to_ipa.csv:
--------------------------------------------------------------------------------
1 | u̕,ˈɔ,,ʔ|h|y̕
2 | u,ɔ,,ʔ|h|y̕
3 | ə̕,ˈʌ
4 | ə,ɨ,č|š|č̕,č|š
5 | ə,ʊ,,[kqx]\u0323?\u0315?\u030C?ʷ
6 | ə,ə
7 | ʔ,ʔ
8 | a̕,ˈa
9 | a,a
10 | č̕,tʃʼ
11 | c̕,tsʼ
12 | č,tʃ
13 | c,ts
14 | e̕,ˈɛ
15 | e,ɛ
16 | h,h
17 | i̕,ˈi
18 | i,i
19 | k̕ʷ,kʼʷ
20 | kʷ,kʷ
21 | k,k
22 | l,l
23 | ɬ,ɬ
24 | ƛ̕,tɬʼ
25 | m̕,m̰
26 | m,m
27 | n̕,n̰
28 | n,n
29 | ŋ̕,ɴ̰
30 | ŋ,ɴ
31 | p̕,pʼ
32 | p,p
33 | q̕ʷ,qʼʷ
34 | qʷ,qʷ
35 | q̕,qʼ
36 | q,q
37 | š,ʃ
38 | s,s
39 | t̕,tʼ
40 | t,t
41 | u̕,ˈu
42 | u,u
43 | w̕,w̰
44 | w,w
45 | x̌ʷ,χʷ
46 | x̌,χ
47 | x̣ʷ,χʷ
48 | x̣,χ
49 | xʷ,xʷ
50 | y̕,j̰
51 | y,j
52 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/clm/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Klallam
3 | mappings:
4 | - display_name: Klallam to IPA
5 | rules_path: clm_to_ipa.csv
6 | in_lang: clm-equiv
7 | out_lang: clm-ipa
8 | authors:
9 | - Eric Joanis (coding)
10 | - Timothy Montler (linguistic data)
11 | type: mapping
12 | prevent_feeding: true
13 | rule_ordering: as-written
14 | norm_form: NFD
15 | <<: *shared
16 | - display_name: Unicode Equivalencies
17 | in_lang: clm
18 | out_lang: clm-equiv
19 | authors:
20 | - Eric Joanis
21 | rules_path: clm_equiv.csv
22 | norm_form: NFD
23 | <<: *shared
24 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/crg/abbreviations.csv:
--------------------------------------------------------------------------------
1 | VOWEL,i,a,e,u,o,ɑː,æ,ɛ,eː,ɪ,iː,oː,uː
2 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/crg/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Michif
3 | mappings:
4 | - display_name: Michif Turtle Mountain Dictionary (TMD) to Michif IPA
5 | rules_path: crg-tmd-to-crg-ipa.csv
6 | in_lang: crg-tmd
7 | out_lang: crg-ipa
8 | case_sensitive: false
9 | rule_ordering: as-written
10 | prevent_feeding: true
11 | abbreviations_path: abbreviations.csv
12 | authors:
13 | - Fineen Davis
14 | - Olivia Sammons
15 | - Heather Souter
16 | - Christopher Cox
17 | <<: *shared
18 | - display_name: Michif Double Vowel (DV) to Michif IPA
19 | rules_path: crg-dv-to-crg-ipa.csv
20 | in_lang: crg-dv
21 | out_lang: crg-ipa
22 | rule_ordering: as-written
23 | prevent_feeding: true
24 | case_sensitive: false
25 | authors:
26 | - Fineen Davis
27 | - Olivia Sammons
28 | - Heather Souter
29 | - Christopher Cox
30 | <<: *shared
31 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/crg/crg-dv-to-crg-ipa.csv:
--------------------------------------------------------------------------------
1 | g,ŋ,ñ
2 | aeñ,ɛ̃ː
3 | ooñ,ɔ̃ː
4 | oñ,ɔ̃ː
5 | hp,ʰp
6 | ht,ʰt
7 | hk,ʰk
8 | sh,ʃ
9 | zh,ʒ
10 | hch,ʰtʃ
11 | ch,tʃ
12 | uu,uː
13 | aañ,ɑ̃ː
14 | añ,ɑ̃ː
15 | iiñ,ĩː
16 | aw,aw
17 | ay,aj
18 | aa,ɑː
19 | ae,æ
20 | ee,eː
21 | oo,oː
22 | ii,iː
23 | oe,ʌː
24 | a,ʌ
25 | i,ɪ
26 | o,o
27 | u,ʊ
28 | y,j
29 | j,dʒ
30 | e,ɛ
31 | r,ɹ
32 | g,ɡ
33 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/crg/crg-tmd-to-crg-ipa.csv:
--------------------------------------------------------------------------------
1 | g,ŋ,n
2 | n,,in|an|en|un|on|ɑːn|æn|ɛn|eːn|ɪn|iːn|oːn|uːn
3 | aen,ɛ̃ː
4 | awn,ɑ̃ː
5 | een,ĩː
6 | oun,ɔ̃
7 | oow,oaw
8 | ow,aw
9 | uy,aj
10 | aw,ɑː
11 | wy,waj
12 | ae,æ
13 | ee,iː
14 | ay,eː
15 | oo,uː
16 | in,ĩ
17 | hp,ʰp
18 | ht,ʰt
19 | hk,ʰk
20 | sh,ʃ
21 | zh,ʒ
22 | hch,ʰtʃ
23 | ch,tʃ
24 | e,ɛ
25 | i,ɪ
26 | j,dʒ
27 | y,j
28 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/crj/README.md:
--------------------------------------------------------------------------------
1 | Language-specific files for Southern East Cree
2 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/crj/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: East Cree, Southern
3 | mappings:
4 | - display_name: Northern East Cree Equivalencies
5 | in_lang: crj
6 | out_lang: crj-equiv
7 | type: mapping
8 | authors:
9 | - Delasie Torkornoo
10 | - Aidan Pine
11 | - Eric Joanis
12 | rules_path: crj_equiv.json
13 | rule_ordering: as-written
14 | case_sensitive: false
15 | <<: *shared
16 | - display_name: Southern East Cree to IPA
17 | in_lang: crj-equiv
18 | out_lang: crj-ipa
19 | type: mapping
20 | authors:
21 | - David Huggins-Daines
22 | - Patrick Littell
23 | rules_path: crj_to_ipa.json
24 | rule_ordering: apply-longest-first
25 | case_sensitive: false
26 | <<: *shared
27 | - display_name: Southern East Cree IPA to English IPA
28 | in_lang: crj-ipa
29 | out_lang: eng-ipa
30 | type: mapping
31 | rules_path: crj_ipa_to_eng_ipa.json
32 | rule_ordering: apply-longest-first
33 | case_sensitive: false
34 | <<: *shared
35 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/crk/README.md:
--------------------------------------------------------------------------------
1 | Language-specific files for Plains Cree (nêhiyawêwin)
2 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/crk/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Plains Cree
3 | mappings:
4 | - display_name: Plains Cree (SRO) to IPA
5 | in_lang: crk-no-symbols
6 | out_lang: crk-ipa
7 | type: mapping
8 | norm_form: NFD
9 | rule_ordering: as-written
10 | prevent_feeding: true
11 | authors:
12 | - Eddie Antonio Santos
13 | rules_path: crk-no-symbols_to_ipa.json
14 | <<: *shared
15 | - display_name: Plains Cree Symbols to SRO
16 | in_lang: crk
17 | out_lang: crk-no-symbols
18 | type: mapping
19 | norm_form: NFD
20 | rule_ordering: as-written
21 | prevent_feeding: true
22 | escape_special: true
23 | authors:
24 | - Aidan Pine
25 | rules_path: crk_to_crk-no-symbols.json
26 | <<: *shared
27 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/crk/crk-no-symbols_to_ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "in": "ê",
4 | "out": "eː"
5 | },
6 | {
7 | "in": "î",
8 | "out": "iː"
9 | },
10 | {
11 | "in": "ô",
12 | "out": "oː"
13 | },
14 | {
15 | "in": "â",
16 | "out": "aː"
17 | },
18 | {
19 | "in": "i",
20 | "out": "ɪ"
21 | },
22 | {
23 | "in": "o",
24 | "out": "o"
25 | },
26 | {
27 | "in": "a",
28 | "out": "ʌ"
29 | },
30 | {
31 | "in": "p",
32 | "out": "p"
33 | },
34 | {
35 | "in": "t",
36 | "out": "t"
37 | },
38 | {
39 | "in": "k",
40 | "out": "k"
41 | },
42 | {
43 | "in": "m",
44 | "out": "m"
45 | },
46 | {
47 | "in": "n",
48 | "out": "n"
49 | },
50 | {
51 | "in": "c",
52 | "out": "t͡s"
53 | },
54 | {
55 | "in": "s",
56 | "out": "s"
57 | },
58 | {
59 | "in": "h",
60 | "out": "h"
61 | },
62 | {
63 | "in": "y",
64 | "out": "j"
65 | },
66 | {
67 | "in": "w",
68 | "out": "w"
69 | }
70 | ]
71 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/crk/crk_to_crk-no-symbols.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "in": "&",
4 | "out": "êkwa"
5 | }
6 | ]
7 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/crl/README.md:
--------------------------------------------------------------------------------
1 | Language-specific files for Northern East Cree
2 |
3 | AP: There seems to be a problem here with normalization. Most of the rules for long vowels are declared with \u1427 "canadian syllabics final middle dot", so ᐧᐋ is a sequence of \u1427\140B, but there also appears to be a specific code point for waa: \u1419. I've added a crl_norm.json that normalizes the sequence to the single codepoint for that character and changed the crl_to_ipa.json mapping to use \u1419 instead of \u1427\140B, but I'm not sure if this was the right choice. Either way, there needs to be some sort of normalization step here to handle real world input.
4 |
5 |
6 | DT: I have fixed the mappings so that all the w syllables are using the one unicode character instead of the two unicode sequence ( \u1427 plus unicode character).
7 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/crl/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: East Cree, Northern
3 | mappings:
4 | - display_name: Northern East Cree Equivalencies
5 | in_lang: crl
6 | out_lang: crl-equiv
7 | type: mapping
8 | authors:
9 | - Delasie Torkornoo
10 | - Aidan Pine
11 | - Eric Joanis
12 | rules_path: crl_equiv.json
13 | rule_ordering: as-written
14 | case_sensitive: false
15 | <<: *shared
16 | - display_name: Northern East Cree to IPA
17 | in_lang: crl-equiv
18 | out_lang: crl-ipa
19 | type: mapping
20 | authors:
21 | - David Huggins-Daines
22 | - Patrick Littell
23 | - Delasie Torkornoo
24 | rules_path: crl_to_ipa.json
25 | rule_ordering: apply-longest-first
26 | case_sensitive: false
27 | <<: *shared
28 | - display_name: Northern East Cree IPA to English IPA
29 | in_lang: crl-ipa
30 | out_lang: eng-ipa
31 | type: mapping
32 | rules_path: crl_ipa_to_eng_ipa.json
33 | rule_ordering: apply-longest-first
34 | case_sensitive: false
35 | <<: *shared
36 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/crm/README.md:
--------------------------------------------------------------------------------
1 | Language-specific files for Moose Cree
2 |
3 | Authors:
4 | Delasie Torkornoo,
5 | Bradley Ellert
6 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/crm/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Moose Cree
3 | mappings:
4 | - display_name: Moose Cree Equivalencies
5 | in_lang: crm
6 | out_lang: crm-equiv
7 | type: mapping
8 | authors:
9 | - Delasie Torkornoo
10 | - Bradley Ellert
11 | - Aidan Pine
12 | rules_path: crm_equiv.json
13 | case_sensitive: false
14 | <<: *shared
15 | - display_name: Moose Cree to IPA
16 | in_lang: crm-equiv
17 | out_lang: crm-ipa
18 | type: mapping
19 | authors:
20 | - David Huggins-Daines
21 | - Patrick Littell
22 | - Delasie Torkornoo
23 | - Bradley Ellert
24 | rules_path: crm_to_ipa.json
25 | case_sensitive: false
26 | <<: *shared
27 | - display_name: Moose Cree IPA to English IPA
28 | in_lang: crm-ipa
29 | out_lang: eng-ipa
30 | type: mapping
31 | rules_path: crm_ipa_to_eng_ipa.json
32 | rule_ordering: as-written
33 | case_sensitive: false
34 | <<: *shared
35 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/crx/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Stella Nadleh
3 | mappings:
4 | - display_name: Roman to Syllabics
5 | rules_path: stella_orth_to_syllabics.csv
6 | in_lang: crx-sro
7 | out_lang: crx-syl
8 | case_sensitive: false
9 | authors:
10 | - Aidan Pine
11 | <<: *shared
12 | - display_name: Syllabics to Roman
13 | rules_path: stella_orth_to_syllabics.csv
14 | in_lang: crx-syl
15 | out_lang: crx-sro
16 | case_sensitive: false
17 | reverse: true
18 | authors:
19 | - Aidan Pine
20 | <<: *shared
21 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/csw/README.md:
--------------------------------------------------------------------------------
1 | Language-specific files for Swampy Cree
2 |
3 | Authors:
4 | Delasie Torkornoo,
5 | Bradley Ellert
6 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/csw/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Swampy Cree
3 | mappings:
4 | - display_name: Swampy Cree Equivalencies
5 | in_lang: csw
6 | out_lang: csw-equiv
7 | type: mapping
8 | authors:
9 | - Delasie Torkornoo
10 | - Bradley Ellert
11 | - Aidan Pine
12 | rules_path: csw_equiv.json
13 | <<: *shared
14 | - display_name: Swampy Cree to IPA
15 | in_lang: csw-equiv
16 | out_lang: csw-ipa
17 | type: mapping
18 | authors:
19 | - David Huggins-Daines
20 | - Patrick Littell
21 | - Delasie Torkornoo
22 | - Bradley Ellert
23 | rules_path: csw_to_ipa.json
24 | <<: *shared
25 | - display_name: Swampy Cree IPA to English IPA
26 | in_lang: csw-ipa
27 | out_lang: eng-ipa
28 | type: mapping
29 | rules_path: csw_ipa_to_eng_ipa.json
30 | rule_ordering: as-written
31 | <<: *shared
32 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/ctp/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Western Highland Chatino
3 | mappings:
4 | - display_name: Western Highland Chatino to IPA
5 | in_lang: ctp
6 | out_lang: ctp-ipa
7 | type: mapping
8 | authors:
9 | - Patrick Littell
10 | rules_path: ctp_to_ipa.json
11 | case_sensitive: false
12 | rule_ordering: as-written
13 | <<: *shared
14 | - display_name: Western Highland Chatino IPA to English IPA
15 | in_lang: ctp-ipa
16 | out_lang: eng-ipa
17 | type: mapping
18 | rules_path: ctp_ipa_to_eng_ipa.json
19 | rule_ordering: as-written
20 | <<: *shared
21 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/ctp/ctp_ipa_to_eng_ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "in": "a",
4 | "out": "ɑ"
5 | },
6 | {
7 | "in": "ã",
8 | "out": "ɑ̃"
9 | },
10 | {
11 | "in": "d̻",
12 | "out": "d"
13 | },
14 | {
15 | "in": "d͡z",
16 | "out": "dz"
17 | },
18 | {
19 | "in": "ẽ",
20 | "out": "1"
21 | },
22 | {
23 | "in": "e",
24 | "out": "eː"
25 | },
26 | {
27 | "in": "1",
28 | "out": "ẽ"
29 | },
30 | {
31 | "in": "kʲ",
32 | "out": "kj"
33 | },
34 | {
35 | "in": "kʷ",
36 | "out": "kw"
37 | },
38 | {
39 | "in": "l̻",
40 | "out": "l"
41 | },
42 | {
43 | "in": "n̻",
44 | "out": "n"
45 | },
46 | {
47 | "in": "õ",
48 | "out": "õː",
49 | "prevent_feeding": true
50 | },
51 | {
52 | "in": "o",
53 | "out": "oː"
54 | },
55 | {
56 | "in": "r",
57 | "out": "ɾ"
58 | },
59 | {
60 | "in": "t̻",
61 | "out": "t"
62 | },
63 | {
64 | "in": "t͡s",
65 | "out": "ts"
66 | },
67 | {
68 | "in": "t͡ʃ",
69 | "out": "tʃ"
70 | }
71 | ]
72 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/ctp/ctp_to_ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "in": "ty",
4 | "out": "t̻"
5 | },
6 | {
7 | "in": "dy",
8 | "out": "d̻"
9 | },
10 | {
11 | "in": "kw",
12 | "out": "kʷ",
13 | "prevent_feeding": true
14 | },
15 | {
16 | "in": "k",
17 | "out": "kʲ"
18 | },
19 | {
20 | "in": "q",
21 | "out": "ʔ"
22 | },
23 | {
24 | "in": "ts",
25 | "out": "t͡s"
26 | },
27 | {
28 | "in": "dz",
29 | "out": "d͡z"
30 | },
31 | {
32 | "in": "ch",
33 | "out": "t͡ʃ"
34 | },
35 | {
36 | "in": "x",
37 | "out": "ʃ"
38 | },
39 | {
40 | "in": "j",
41 | "out": "h"
42 | },
43 | {
44 | "in": "ny",
45 | "out": "n̻"
46 | },
47 | {
48 | "in": "l",
49 | "out": "l̻"
50 | },
51 | {
52 | "in": "y",
53 | "out": "j"
54 | },
55 | {
56 | "in": "an",
57 | "out": "ã"
58 | },
59 | {
60 | "in": "en",
61 | "out": "ẽ"
62 | },
63 | {
64 | "in": "in",
65 | "out": "ĩ"
66 | },
67 | {
68 | "in": "on",
69 | "out": "õ"
70 | },
71 | {
72 | "in": "un",
73 | "out": "ũ"
74 | },
75 | {
76 | "in": "ᴬ",
77 | "out": ""
78 | },
79 | {
80 | "in": "ᴮ",
81 | "out": ""
82 | },
83 | {
84 | "in": "ᶜ",
85 | "out": ""
86 | },
87 | {
88 | "in": "ᴰ",
89 | "out": ""
90 | },
91 | {
92 | "in": "ᴱ",
93 | "out": ""
94 | },
95 | {
96 | "in": "ᶠ",
97 | "out": ""
98 | },
99 | {
100 | "in": "ᴳ",
101 | "out": ""
102 | },
103 | {
104 | "in": "ᴴ",
105 | "out": ""
106 | },
107 | {
108 | "in": "ᴵ",
109 | "out": ""
110 | },
111 | {
112 | "in": "ᴶ",
113 | "out": ""
114 | }
115 | ]
116 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/dan/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Danish
3 | mappings:
4 | - display_name: Danish to IPA
5 | in_lang: dan
6 | out_lang: dan-ipa
7 | type: mapping
8 | authors:
9 | - Aidan Pine
10 | rules_path: dan_to_ipa.csv
11 | abbreviations_path: dan_abbs.csv
12 | rule_ordering: as-written
13 | case_sensitive: false
14 | norm_form: 'none'
15 | <<: *shared
16 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/dan/dan_abbs.csv:
--------------------------------------------------------------------------------
1 | VOWEL,a,e,i,o,u,æ,å,ø
2 | CONSONANT,p,b,t,d,k,g,f,s,h,v,j,r,l,m,n
3 | FRONT,i,e,œ,ø,y
4 | BACK,u,o,a
5 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/dan/dan_to_dummy.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "in": "o",
4 | "out": "u"
5 | },
6 | {
7 | "in": "\u0061\u030a",
8 | "out": "u"
9 | },
10 | {
11 | "in": "\u00e5",
12 | "out": "u"
13 | },
14 | {
15 | "in": "d",
16 | "out": "t"
17 | },
18 | {
19 | "in": "h",
20 | "out": "s"
21 | },
22 | {
23 | "in": "f",
24 | "out": "s"
25 | },
26 | {
27 | "in": "b",
28 | "out": "t"
29 | },
30 | {
31 | "in": "k",
32 | "out": "t"
33 | },
34 | {
35 | "in": "l",
36 | "out": "n"
37 | },
38 | {
39 | "in": "v",
40 | "out": "s"
41 | },
42 | {
43 | "in": "m",
44 | "out": "n"
45 | },
46 | {
47 | "in": "j",
48 | "out": "t"
49 | },
50 | {
51 | "in": "t",
52 | "out": "t"
53 | },
54 | {
55 | "in": "g",
56 | "out": "t"
57 | },
58 | {
59 | "in": "g",
60 | "out": "t"
61 | },
62 | {
63 | "in": "g",
64 | "out": "t"
65 | },
66 | {
67 | "in": "r",
68 | "out": "n"
69 | },
70 | {
71 | "in": "r",
72 | "out": "n"
73 | },
74 | {
75 | "in": "c",
76 | "out": "t"
77 | },
78 | {
79 | "in": "e",
80 | "out": "i"
81 | },
82 | {
83 | "in": "a",
84 | "out": "\u0251"
85 | },
86 | {
87 | "in": "a",
88 | "out": "\u0251"
89 | },
90 | {
91 | "in": "a",
92 | "out": "\u0251"
93 | },
94 | {
95 | "in": "\u00f8",
96 | "out": "u"
97 | },
98 | {
99 | "in": "y",
100 | "out": "i"
101 | },
102 | {
103 | "in": "p",
104 | "out": "t"
105 | }
106 | ]
107 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/dan/dan_to_ipa.csv:
--------------------------------------------------------------------------------
1 | dd,ð,VOWEL,VOWEL
2 | tt,d,VOWEL,VOWEL
3 | ng,ŋ
4 | nk,ŋ
5 | sc,s
6 | r,ʁ,,VOWEL
7 | r,ɐ̯,VOWEL
8 | o,ɔ
9 | \u0061\u030A,oː
10 | \u00E5,oː
11 | d,ð̠˕ˠ,VOWEL,\b|CONSONANT|FRONT
12 | t,ð,VOWEL,\b|CONSONANT
13 | g,ɪ̯,FRONT
14 | g,ʊ̯,BACK
15 | af,a,\b,\b
16 | g,,,\b
17 | g,ɡ
18 | r,ɐ̯,,\b
19 | c,s
20 | e,ɛ
21 | a,æ
22 | a,ɑ,r
23 | a,ɑ,,r
24 | r,ʁ
25 | ø,œ
26 | y,u
27 | p,p
28 | (œ|ɑ|æ|ɛ|ʊ̯|ɪ̯|ɔ|VOWEL),,,ɐ̯
29 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/eng/README.md:
--------------------------------------------------------------------------------
1 | Aligned CMUDict for G2P
2 | -----------------------
3 |
4 | The alignments in `cmudict.ipa.aligned.txt` were generated by
5 | [Phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus),
6 | as seen in [`make_alignments.sh`](./make_alignments.sh)
7 |
8 | CMUDict was obtained from https://github.com/cmusphinx/cmudict and has
9 | this license (2-clause BSD, compatible with G2P):
10 |
11 | Copyright (C) 1993-2015 Carnegie Mellon University. All rights reserved.
12 |
13 | Redistribution and use in source and binary forms, with or without
14 | modification, are permitted provided that the following conditions
15 | are met:
16 |
17 | 1. Redistributions of source code must retain the above copyright
18 | notice, this list of conditions and the following disclaimer.
19 | The contents of this file are deemed to be source code.
20 |
21 | 2. Redistributions in binary form must reproduce the above copyright
22 | notice, this list of conditions and the following disclaimer in
23 | the documentation and/or other materials provided with the
24 | distribution.
25 |
26 | This work was supported in part by funding from the Defense Advanced
27 | Research Projects Agency, the Office of Naval Research and the National
28 | Science Foundation of the United States of America, and by member
29 | companies of the Carnegie Mellon Sphinx Speech Consortium. We acknowledge
30 | the contributions of many volunteers to the expansion and improvement of
31 | this dictionary.
32 |
33 | THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
34 | ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
35 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
36 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
37 | NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
38 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
39 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
40 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
41 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
42 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
43 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/eng/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: English
3 | mappings:
4 | - display_name: Dummy to Arpabet
5 | in_lang: dummy
6 | out_lang: dummy-eng-arpabet
7 | out_delimiter: " "
8 | type: mapping
9 | norm_form: "NFC"
10 | authors:
11 | - Aidan Pine
12 | rules_path: dummy_to_arpabet.json
13 | rule_ordering: apply-longest-first
14 | <<: *shared
15 | - display_name: Hamming Dummy to Arpabet
16 | in_lang: hamming-dummy
17 | out_lang: hamming-dummy-eng-arpabet
18 | out_delimiter: " "
19 | type: mapping
20 | norm_form: "NFC"
21 | authors:
22 | - Aidan Pine
23 | rules_path: dummy_to_arpabet.json
24 | rule_ordering: apply-longest-first
25 | <<: *shared
26 | - display_name: English IPA to Arpabet
27 | in_lang: eng-ipa
28 | out_lang: eng-arpabet
29 | out_delimiter: " "
30 | type: mapping
31 | norm_form: "NFC"
32 | authors:
33 | - Patrick Littell
34 | rules_path: eng_ipa_to_arpabet.json
35 | rule_ordering: apply-longest-first
36 | <<: *shared
37 | - display_name: English IPA to Arpabet
38 | in_lang: hamming-eng-ipa
39 | out_lang: hamming-eng-arpabet
40 | out_delimiter: " "
41 | type: mapping
42 | norm_form: "NFC"
43 | authors:
44 | - Patrick Littell
45 | rules_path: eng_ipa_to_arpabet.json
46 | rule_ordering: apply-longest-first
47 | <<: *shared
48 | - display_name: English to IPA
49 | type: lexicon
50 | alignments_path: cmudict.ipa.aligned.txt
51 | in_lang: eng
52 | out_lang: eng-ipa
53 | case_sensitive: false
54 | norm_form: "NFC"
55 | authors:
56 | - David Huggins-Daines
57 | <<: *shared
58 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/eng/dummy_to_arpabet.json:
--------------------------------------------------------------------------------
1 | [{
2 | "in": "ɑ",
3 | "out": "AA"
4 | },
5 | {
6 | "in": "i",
7 | "out": "IY"
8 | },
9 | {
10 | "in": "u",
11 | "out": "UW"
12 | },
13 | {
14 | "in": "s",
15 | "out": "S"
16 | },
17 | {
18 | "in": "t",
19 | "out": "T"
20 | },
21 | {
22 | "in": "n",
23 | "out": "N"
24 | }
25 | ]
26 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/eng/eng_arpabet_to_ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | { "in": "AA", "out": "ɑ" },
3 | { "in": "AE", "out": "æ" },
4 | { "in": "AH", "out": "ʌ" },
5 | { "in": "AO", "out": "ɔ" },
6 | { "in": "AW", "out": "aʊ" },
7 | { "in": "AY", "out": "aɪ" },
8 | { "in": "EH", "out": "ɛ" },
9 | { "in": "ER", "out": "ɜ˞" },
10 | { "in": "EY", "out": "eɪ" },
11 | { "in": "IH", "out": "ɪ" },
12 | { "in": "IX", "out": "ɨ" },
13 | { "in": "IY", "out": "i" },
14 | { "in": "OW", "out": "oʊ" },
15 | { "in": "OY", "out": "ɔɪ" },
16 | { "in": "UH", "out": "ʊ" },
17 | { "in": "UW", "out": "u" },
18 | { "in": "B", "out": "b" },
19 | { "in": "CH", "out": "tʃ" },
20 | { "in": "D", "out": "d" },
21 | { "in": "DH", "out": "ð" },
22 | { "in": "F", "out": "f" },
23 | { "in": "G", "out": "ɡ" },
24 | { "in": "HH", "out": "h" },
25 | { "in": "JH", "out": "dʒ" },
26 | { "in": "K", "out": "k" },
27 | { "in": "L", "out": "l" },
28 | { "in": "M", "out": "m" },
29 | { "in": "N", "out": "n" },
30 | { "in": "NG", "out": "ŋ" },
31 | { "in": "P", "out": "p" },
32 | { "in": "R", "out": "ɹ" },
33 | { "in": "S", "out": "s" },
34 | { "in": "SH", "out": "ʃ" },
35 | { "in": "T", "out": "t" },
36 | { "in": "TH", "out": "θ" },
37 | { "in": "V", "out": "v" },
38 | { "in": "W", "out": "w" },
39 | { "in": "Y", "out": "j" },
40 | { "in": "Z", "out": "z" },
41 | { "in": "ZH", "out": "ʒ" }
42 | ]
43 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/eng/eng_inventory.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "inventory",
3 | "authors": ["CMU Sphinx team"],
4 | "created": "2019-04-11",
5 | "last_modified": "2019-04-11",
6 | "metadata": {
7 | "display_name": "English",
8 | "display": true,
9 | "lang": "eng",
10 | "format": "custom",
11 | "delimiter": ""
12 | },
13 | "inventory": [ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l",
14 | "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" ]
15 | }
16 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/eng/make_alignments.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Replace with actual path!
4 | # cmudict.dict is retrievable from https://github.com/cmusphinx/cmudict/blob/master/cmudict.dict
5 | CMUDICT=../../../../../cmudict/cmudict.dict
6 | # Install Phonetisaurus with `pip install phonetisaurus`
7 | export PATH=$(python -c 'import phonetisaurus as p; print(p.guess_environment()["PATH"])')
8 | export LD_LIBRARY_PATH=$(python -c 'import phonetisaurus as p; print(p.guess_environment()["LD_LIBRARY_PATH"])')
9 |
10 | python make_ipa_cmudict.py < $CMUDICT > tmp.txt
11 | phonetisaurus-align --s1_char_delim="" --s2_char_delim="" \
12 | --seq1_del=true --seq2_del=true --seq1_max=2 --seq2_max=2 \
13 | --iter=5 --input=tmp.txt --ofile=cmudict.ipa.aligned.txt
14 | rm tmp.txt
15 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/eng/make_ipa_cmudict.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """Convert CMUDict to IPA, removing stress markers. We can only handle
4 | one pronunciation at a time so just take the first one."""
5 |
6 | import fileinput
7 | import json
8 | import re
9 |
10 | with open("eng_arpabet_to_ipa.json") as f:
11 | mappings = json.load(f)
12 | ipa_map = dict((e["in"], e["out"]) for e in mappings)
13 |
14 | comment_re = re.compile(r"#.*$")
15 | entry_re = re.compile(r"^(\S+?)(\(\d+\))?\s+(.*)$")
16 | stress_re = re.compile(r"\d+$")
17 | for spam in fileinput.input():
18 | m = entry_re.match(comment_re.sub("", spam.strip()))
19 | if m is None:
20 | continue
21 | word, alt, phones = m.groups()
22 | if alt is not None: # skip alterantes
23 | continue
24 | phones = "".join(
25 | ipa_map[np] for np in (stress_re.sub("", p) for p in phones.split())
26 | )
27 | print("\t".join((word, phones)))
28 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/eng/reverse_json.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import json
4 |
5 | with open("eng_ipa_to_arpabet.json") as f:
6 | mappings = json.load(f)
7 | print("[")
8 | seen = set()
9 | for i, m in enumerate(mappings):
10 | if m["out"] in seen:
11 | continue
12 | seen.add(m["out"])
13 | if " " in m["out"]:
14 | continue
15 | print(
16 | ' { "in": "%s", "out": "%s" }%s'
17 | % (m["out"], m["in"], "," if i != len(mappings) - 1 else "")
18 | )
19 | print("]")
20 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/fin/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | - language_name: Finnish
3 | mappings:
4 | - display_name: Finnish to IPA
5 | in_lang: fin
6 | out_lang: fin-ipa
7 | authors:
8 | - David Huggins-Daines
9 | type: mapping
10 | rules_path: fin_to_ipa.csv
11 | rule_ordering: apply-longest-first
12 | case_sensitive: false
13 | norm_form: 'NFC'
14 | <<: *shared
15 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/fin/fin_to_ipa.csv:
--------------------------------------------------------------------------------
1 | aa,ɑː,,
2 | ai,ɑɪ,,
3 | au,ɑʊ,,
4 | a,ɑ,,
5 | b,b,,
6 | c,s,,
7 | d,d,,
8 | ee,eː,,
9 | ei,eɪ,,
10 | eu,eʊ,,
11 | e,e,,
12 | f,f,,
13 | g,ɡ,,
14 | h,h,,
15 | ii,iː,,
16 | ie,iɛ,,
17 | i,i,,
18 | j,j,,
19 | k,k,,
20 | l,l,,
21 | m,m,,
22 | n,n,,
23 | oo,oː,,
24 | ou,oʊ,,
25 | oi,ɔɪ,,
26 | o,o,,
27 | p,p,,
28 | q,k,,
29 | r,r,,
30 | s,s,,
31 | t,t,,
32 | uu,uː,,
33 | uo,ʊɔ,,
34 | ui,ʊɪ,,
35 | u,u,,
36 | v,ʋ,,
37 | w,ʋ,,
38 | x,ks,,
39 | yy,yː,,
40 | y,y,,
41 | z,t͡s,,
42 | å,o,,
43 | ää,æː,,
44 | äy,æɥ,,
45 | ä,æ,,
46 | öö,øː,,
47 | öy,øɥ,,
48 | ö,ø,,
49 | n,ŋ,,k
50 | n,ŋ,,g
51 | n,m,,p
52 | n,m,,b
53 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/font-encodings/README.md:
--------------------------------------------------------------------------------
1 | # Font encodings
2 |
3 | Before broad utf8 support, communities often resorted to encoding or 'hacking' their characters into a font, and abusing a separate Unicode codepoint to render the character in their writing system as needed. This folder should be where these types of mappings are handled.
4 |
5 | Some style guidelines:
6 | * The `in_lang` key should end with `-font`
7 | * If the mapping is general, please use `Undetermined` as the language name
8 |
9 | Currently the following are supported:
10 | * SIL Fonts
11 | - Heiltsuk Doulos
12 | - Heiltsuk Times
13 | - Navajo Times
14 | * [UBC First Nations Unicode Font](https://fnel.arts.ubc.ca/resources/font/)
15 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/font-encodings/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | mappings:
2 | - display_name: Doulos
3 | in_lang: hei-doulos
4 | out_lang: hei
5 | authors:
6 | - Aidan Pine
7 | rules_path: hei_doulos.csv
8 | language_name: Heiltsuk
9 | - display_name: Times
10 | in_lang: hei-times-font
11 | out_lang: hei
12 | authors:
13 | - Aidan Pine
14 | rules_path: hei_times.csv
15 | language_name: Heiltsuk
16 | - display_name: Times
17 | rules_path: nav_times.csv
18 | in_lang: nav-times-font
19 | out_lang: nav
20 | language_name: Navajo
21 | authors:
22 | - Aidan Pine
23 | - display_name: First Nations Unicode
24 | rules_path: fn_unicode.csv
25 | in_lang: fn-unicode-font
26 | out_lang: fn-unicode
27 | norm_form: NFD
28 | escape_special: false
29 | case_sensitive: false
30 | language_name: Undetermined
31 | authors:
32 | - Aidan Pine
33 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/font-encodings/fn_unicode.csv:
--------------------------------------------------------------------------------
1 | \uF000,x\u030c
2 | \uF101,c\u0313
3 | \uF102,c\u030c\u0313
4 | \uF103,g\u0313
5 | \uF104,j\u030c\u0313
6 | \uF105,k\u0313
7 | \uF106,l\u0315
8 | \uF107,m\u0313
9 | \uF108,n\u0313
10 | \uF109,ŋ\u0313
11 | \uF10A,p\u0313
12 | \uF10B,q\u0313
13 | \uF10C,r\u0313
14 | \uF10D,t\u0315
15 | \uF10E,w\u0313
16 | \uF10F,y\u0313
17 | \uF110,z\u0313
18 | \uF111,ƛ\u0313
19 | \uF112,ɣ\u0313
20 | \uF113,ʕ\u0315
21 | \uF114,s\u0313
22 | \uF121,w\u0325
23 | \uF131,ᶻ
24 | \uF132,ᶿ
25 | \uF133,ˡ
26 | \uF141,ə\u0300
27 | \uF142,ə\u0301
28 | \uF181,l\u0329
29 | \uF182,m\u0329
30 | \uF183,n\u0329
31 | \uF184,r\u0329
32 | \uF191,a\u0332
33 | \uF197,g\u0332
34 | \uF19B,k\u0332
35 | \uF1A8,x\u0332
36 | \uF1CB,k\u0332\u0313
37 | \uF204,√
38 | ล,h\u0323
39 | ɤ,ɣ
40 | ∛,·
41 | ∙,·
42 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/font-encodings/hei_doulos.csv:
--------------------------------------------------------------------------------
1 | ¹,p̓
2 | ¡,ṃ́
3 | ™,ṃ
4 | µ,m̓
5 | ²,ṃ̓
6 | †,t̓
7 | ¢,ṇ́
8 | Þ,ṇ
9 | ñ,n̓
10 | ¦,ṇ̓
11 | ç,c̓
12 | ß,λ
13 | Ò,ƛ
14 | ƒ,ƛ̓
15 | æ,ɫ
16 | Æ,Ɫ
17 | ø,ḷ́
18 | ª,ḷ
19 | ¬,l̓
20 | …,ḷ̓
21 | ð,k̓
22 | í,í
23 | ¥,y̓
24 | î,i̓
25 | ðv,k̓v
26 | ú,ú
27 | „,w̓
28 | ü,u̓
29 | ©v,ǧv
30 | œv,q̓v
31 | þv,x̌v
32 | ©,ǧ
33 | œ,q̓
34 | þ,x̌
35 | á,á
36 | Ó,h̓
37 | å,a̓
38 | Ô,ħ
39 | ‰,ʔ
40 | ÿx,x̌
41 | ÿX,X̌
42 | Í,⅄
43 | Î,⅄
44 | Ï,⅄̓
45 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/font-encodings/hei_times.csv:
--------------------------------------------------------------------------------
1 | b,b
2 | p,p
3 | π,p̓
4 | m,m
5 | ¡,ṃ́
6 | ™,ṃ
7 | µ,m̓
8 | ≤,ṃ̓
9 | d,d
10 | †,t̓
11 | n,n
12 | ¢,ṇ́
13 | ∞,ṇ
14 | ñ,n̓
15 | ∫,ṇ̓
16 | z,z
17 | c,c
18 | ç,c̓
19 | s,s
20 | ß,λ
21 | ∂,ƛ
22 | ƒ,ƛ̓
23 | æ,ɫ
24 | l,l
25 | ø,ḷ́
26 | ª,ḷ
27 | ¬,l̓
28 | …,ḷ̓
29 | gv,g
30 | kv,k
31 | ˚,k̓
32 | x,x
33 | y,y
34 | í,í
35 | ¥,y̓
36 | î,i̓
37 | gv,gv
38 | kv,kv
39 | ˚v,k̓v
40 | xv,xv
41 | w,w
42 | ú,ú
43 | u,u
44 | ∑,w̓
45 | ü,u̓
46 | ©v,ǧv
47 | qv,qv
48 | œv,q̓v
49 | ≈v,x̌v
50 | ©,ǧ
51 | q,q
52 | œ,q̓
53 | ≈,x̌
54 | h,h
55 | á,á
56 | a,a
57 | ˙,h̓
58 | å,a̓
59 | ≈,x̌
60 | Í,⅄
61 | Î,⅄
62 | Ï,⅄̓
63 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/font-encodings/nav_times.csv:
--------------------------------------------------------------------------------
1 | 1,á
2 | 2,ą
3 | 3,ą́
4 | 4,é
5 | 5,ę
6 | 6,ę́
7 | 7,í
8 | 8,į
9 | 9,į́
10 | 0,ó
11 | -,ǫ
12 | =,ǫ́
13 | \[,ł
14 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/fra/README.txt:
--------------------------------------------------------------------------------
1 | Notes on French g2p by Eric Joanis
2 |
3 | French vowels are quite complicated, and I had a hard time figuring out how to
4 | catch even the common cases. I think I managed OK, but we should not consider
5 | my g2p map definitive.
6 |
7 | Just one tricky example:
8 | y -> /i/
9 | u -> /y/
10 | ou -> /u/
11 | To get this working withoug having a cyclical graph undoing my work, I mapped
12 | u->y first, and then oy->u, making sure that these rules occur *after* the
13 | mapping of oy->/wa/ earlier in the list.
14 |
15 | There were a bunch more challenging cases, solved with a best effort here but
16 | not thoroughly tested. Some other temporary changes are done and reset a few
17 | lines lower, e.g., with nasals, so don't be too surprised if you analyze my
18 | rules to find some that don't seem to make sense, at least in isolation.
19 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/fra/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: French
3 | mappings:
4 | - display_name: French to IPA
5 | in_lang: fra
6 | out_lang: fra-ipa
7 | authors:
8 | - Eric Joanis
9 | type: mapping
10 | rules_path: fra_to_ipa.csv
11 | abbreviations_path: fra_abbs.csv
12 | rule_ordering: as-written
13 | case_sensitive: false
14 | norm_form: 'NFC'
15 | <<: *shared
16 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/fra/fra_abbs.csv:
--------------------------------------------------------------------------------
1 | VOWEL,a,e,i,o,u,à,â,æ,è,é,ê,ë,î,ï,ô,œ,ù,û,ü,ÿ,ɛ,ɛː
2 | EI_VOW,e,i,è,é,ê,ë,î,ï,ÿ
3 | AOU_VOW,a,o,u,à,â,æ,ô,œ,ù,û,ü
4 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/fra/fra_to_ipa.csv:
--------------------------------------------------------------------------------
1 | ge,ʒ,,AOU_VOW
2 | g,ʒ,,EI_VOW
3 | s,z,VOWEL,VOWEL
4 | c,s,,EI_VOW
5 | &,et,,
6 | ais,ɛ,,\b
7 | aî,ɛː,,
8 | est,ɛ,,\b
9 | s,,\S,\b
10 | x,,\S,\b
11 | ent,e,,\b
12 | nt,n,,\b
13 | ez,é,,\b
14 | er,é,,\b
15 | è,ɛ,,
16 | b,b,,
17 | ch,ʃ,,VOWEL
18 | ch,k,,
19 | sh,ʃ,,
20 | ss,s,,
21 | s,s,,
22 | ç,s,,
23 | c,k,,
24 | d,d,,
25 | f,f,,
26 | ph,f,,
27 | gn,ɲ,,
28 | ù,u,,
29 | gu,ɡ,,
30 | ng,ŋ,,\b
31 | g,ɡ,,
32 | j,ʒ,,
33 | k,k,,
34 | ill,j,VOWEL,
35 | ill,ij,,
36 | ail,aj,,\b
37 | eil,ɛj,,\b
38 | ll,l,,
39 | l,l,,
40 | mm,m,,
41 | m,m,,
42 | nn,n,,
43 | n,n,,
44 | p,p,,
45 | qu,k,,
46 | q,k,,
47 | rh,ʁ,,
48 | rr,ʁ,,
49 | r,ʁ,,
50 | th,t,,
51 | t,t,,
52 | ti,si,,[aeou][nm]
53 | v,v,,
54 | w,w,,
55 | x,ks,,
56 | z,z,,
57 | en,ɛn,,VOWEL
58 | en,ɑ̃,,
59 | ein,ɛn,,VOWEL
60 | ein,ɛ̃,,
61 | em,ɑ̃,,[pb]
62 | an,ɑn,,VOWEL
63 | ant,ɑ̃,,\b
64 | an,ɑ̃,,
65 | am,ɑ̃,,[pb]
66 | ain,ɛn,,VOWEL
67 | in,ɪn,,VOWEL
68 | ain,ɛ̃,,
69 | in,ɛ̃,,
70 | aim,ɛ̃,,[pb]
71 | im,ɛ̃,,[pb]
72 | ɪ,i,,
73 | on,ɔn,,VOWEL
74 | om,ɔm,,VOWEL
75 | on,ɔ̃,,
76 | om,ɔ̃,,[pb]
77 | un,yyyn,,VOWEL
78 | un,œ̃,,
79 | um,œ̃,,[pb]
80 | um,œ̃,,\b
81 | yyy,u,,
82 | œu,œ,,
83 | eu,ø,,
84 | eau,o,,
85 | au,o,,
86 | o,ɔ,,[rnml]
87 | é,ɜ,,
88 | ae,ɜ,,
89 | æ,ɜ,,
90 | ai,ɜ,,
91 | ê,ɛː,,
92 | e,,\S,\b
93 | e,ʌ,,
94 | ɜ,e,,
95 | à,a,,
96 | â,ɑ,,
97 | a,a,,
98 | oin,wɛ,,
99 | oi,wa,,
100 | ay,ɛj,,VOWEL
101 | ay,e,,
102 | oy,waj,,VOWEL
103 | oy,wa,,
104 | y,i,,
105 | u,y,,
106 | oy,u,,
107 | ë,ɛ,,
108 | ï,i,,
109 | î,i,,
110 | ô,o,,
111 | û,y,,
112 | ü,y,,
113 | ÿ,i,,
114 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/alq-ipa_to_eng-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "e:", "out": "eː"},
3 | {"in": "i:", "out": "i"},
4 | {"in": "j", "out": "j"},
5 | {"in": "o", "out": "oː"},
6 | {"in": "o:", "out": "oː"},
7 | {"in": "u", "out": "u"},
8 | {"in": "ŋ", "out": "ŋ"},
9 | {"in": "ɑ", "out": "ɑ"},
10 | {"in": "ɛ", "out": "ɛ"},
11 | {"in": "ɡ", "out": "ɡ"},
12 | {"in": "ʃ", "out": "ʃ"},
13 | {"in": "ʌ", "out": "ʌ"},
14 | {"in": "ʒ", "out": "ʒ"}
15 | ]
16 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/atj-ipa_to_eng-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "a", "out": "ɑ"},
3 | {"in": "aː", "out": "ɑ"},
4 | {"in": "b", "out": "b"},
5 | {"in": "d", "out": "d"},
6 | {"in": "d͡ʒ", "out": "dʒ"},
7 | {"in": "eː", "out": "eː"},
8 | {"in": "h", "out": "h"},
9 | {"in": "i", "out": "i"},
10 | {"in": "iː", "out": "i"},
11 | {"in": "m", "out": "m"},
12 | {"in": "n", "out": "n"},
13 | {"in": "r", "out": "ɾ"},
14 | {"in": "s", "out": "s"},
15 | {"in": "u", "out": "u"},
16 | {"in": "uː", "out": "u"},
17 | {"in": "w", "out": "w"},
18 | {"in": "ɡ", "out": "ɡ"},
19 | {"in": "ʃ", "out": "ʃ"}
20 | ]
21 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/clm-ipa_to_eng-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "tʃʼ", "out": "tʃ"},
3 | {"in": "tsʼ", "out": "ts"},
4 | {"in": "kʼʷ", "out": "kw"},
5 | {"in": "tɬʼ", "out": "ts"},
6 | {"in": "qʼʷ", "out": "kw"},
7 | {"in": "ˈɔ", "out": "ɔ"},
8 | {"in": "ˈʌ", "out": "ʌ"},
9 | {"in": "ˈa", "out": "æ"},
10 | {"in": "tʃ", "out": "t͡ʃ"},
11 | {"in": "ts", "out": "ts"},
12 | {"in": "ˈɛ", "out": "ɛ"},
13 | {"in": "ˈi", "out": "i"},
14 | {"in": "kʷ", "out": "kw"},
15 | {"in": "m̰", "out": "m"},
16 | {"in": "n̰", "out": "n"},
17 | {"in": "ɴ̰", "out": "ŋ"},
18 | {"in": "pʼ", "out": "p"},
19 | {"in": "qʷ", "out": "kw"},
20 | {"in": "qʼ", "out": "k"},
21 | {"in": "tʼ", "out": "t"},
22 | {"in": "ˈu", "out": "u"},
23 | {"in": "w̰", "out": "w"},
24 | {"in": "χʷ", "out": "ʃw"},
25 | {"in": "xʷ", "out": "kw"},
26 | {"in": "j̰", "out": "j"},
27 | {"in": "ɔ", "out": "ɔ"},
28 | {"in": "ɨ", "out": "ɨ"},
29 | {"in": "ʊ", "out": "ʊ"},
30 | {"in": "ə", "out": "ə"},
31 | {"in": "ʔ", "out": "ʔ"},
32 | {"in": "a", "out": "æ"},
33 | {"in": "ɛ", "out": "ɛ"},
34 | {"in": "h", "out": "h"},
35 | {"in": "i", "out": "i"},
36 | {"in": "k", "out": "k"},
37 | {"in": "l", "out": "l"},
38 | {"in": "ɬ", "out": "s"},
39 | {"in": "m", "out": "m"},
40 | {"in": "n", "out": "n"},
41 | {"in": "ɴ", "out": "ŋ"},
42 | {"in": "p", "out": "p"},
43 | {"in": "q", "out": "k"},
44 | {"in": "ʃ", "out": "ʃ"},
45 | {"in": "s", "out": "s"},
46 | {"in": "t", "out": "t"},
47 | {"in": "u", "out": "u"},
48 | {"in": "w", "out": "w"},
49 | {"in": "χ", "out": "ʃ"},
50 | {"in": "j", "out": "j"}
51 | ]
52 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/crg-ipa_to_eng-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "ɛ̃ː", "out": "ẽː"},
3 | {"in": "ɑ̃ː", "out": "ɑ̃"},
4 | {"in": "ĩː", "out": "ẽː"},
5 | {"in": "oaw", "out": "ɔæw"},
6 | {"in": "waj", "out": "wæj"},
7 | {"in": "ʰtʃ", "out": "t͡ʃ"},
8 | {"in": "ɔ̃ː", "out": "ɔ̃ː"},
9 | {"in": "ɔ̃", "out": "ɔ̃"},
10 | {"in": "aw", "out": "æw"},
11 | {"in": "aj", "out": "æj"},
12 | {"in": "ɑː", "out": "ɑ"},
13 | {"in": "iː", "out": "eː"},
14 | {"in": "eː", "out": "eː"},
15 | {"in": "uː", "out": "u"},
16 | {"in": "ĩ", "out": "ĩ"},
17 | {"in": "ʰp", "out": "p"},
18 | {"in": "ʰt", "out": "t"},
19 | {"in": "ʰk", "out": "k"},
20 | {"in": "tʃ", "out": "t͡ʃ"},
21 | {"in": "dʒ", "out": "dʒ"},
22 | {"in": "oː", "out": "oː"},
23 | {"in": "ʌː", "out": "eː"},
24 | {"in": "ŋ", "out": "ŋ"},
25 | {"in": "æ", "out": "æ"},
26 | {"in": "ʃ", "out": "ʃ"},
27 | {"in": "ʒ", "out": "ʒ"},
28 | {"in": "ɛ", "out": "ɛ"},
29 | {"in": "ɪ", "out": "ɪ"},
30 | {"in": "j", "out": "j"},
31 | {"in": "ʌ", "out": "ʌ"},
32 | {"in": "o", "out": "ɔ"},
33 | {"in": "ʊ", "out": "ʊ"},
34 | {"in": "ɹ", "out": "ɹ"}
35 | ]
36 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/crk-ipa_to_eng-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "eː", "out": "eː", "context_before": "", "context_after": ""},
3 | {"in": "iː", "out": "eː", "context_before": "", "context_after": ""},
4 | {"in": "oː", "out": "oː", "context_before": "", "context_after": "", "prevent_feeding": true},
5 | {"in": "aː", "out": "eː", "context_before": "", "context_after": ""},
6 | {"in": "ɪ", "out": "ɪ", "context_before": "", "context_after": ""},
7 | {"in": "o", "out": "ɔ", "context_before": "", "context_after": ""},
8 | {"in": "ʌ", "out": "ʌ", "context_before": "", "context_after": ""},
9 | {"in": "p", "out": "p", "context_before": "", "context_after": ""},
10 | {"in": "t", "out": "t", "context_before": "", "context_after": ""},
11 | {"in": "k", "out": "k", "context_before": "", "context_after": ""},
12 | {"in": "m", "out": "m", "context_before": "", "context_after": ""},
13 | {"in": "n", "out": "n", "context_before": "", "context_after": ""},
14 | {"in": "t͡s", "out": "ts", "context_before": "", "context_after": ""},
15 | {"in": "s", "out": "s", "context_before": "", "context_after": ""},
16 | {"in": "h", "out": "h", "context_before": "", "context_after": ""},
17 | {"in": "j", "out": "j", "context_before": "", "context_after": ""},
18 | {"in": "w", "out": "w", "context_before": "", "context_after": ""}
19 | ]
20 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/dan-ipa_to_eng-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "ð", "out": "ð", "context_before": "", "context_after": ""},
3 | {"in": "d", "out": "d", "context_before": "", "context_after": ""},
4 | {"in": "ŋ", "out": "ŋ", "context_before": "", "context_after": ""},
5 | {"in": "ŋ", "out": "ŋ", "context_before": "", "context_after": ""},
6 | {"in": "s", "out": "s", "context_before": "", "context_after": ""},
7 | {"in": "ʁ", "out": "ʒ", "context_before": "", "context_after": ""},
8 | {"in": "ɐ̯", "out": "j", "context_before": "", "context_after": ""},
9 | {"in": "ɔ", "out": "ɔ", "context_before": "", "context_after": ""},
10 | {"in": "oː", "out": "oː", "context_before": "", "context_after": ""},
11 | {"in": "oː", "out": "oː", "context_before": "", "context_after": ""},
12 | {"in": "ð̠˕ˠ", "out": "ð", "context_before": "", "context_after": ""},
13 | {"in": "ð", "out": "ð", "context_before": "", "context_after": ""},
14 | {"in": "ɪ̯", "out": "j", "context_before": "", "context_after": ""},
15 | {"in": "ʊ̯", "out": "w", "context_before": "", "context_after": ""},
16 | {"in": "a", "out": "ɑ", "context_before": "", "context_after": ""},
17 | {"in": "ɡ", "out": "ɡ", "context_before": "", "context_after": ""},
18 | {"in": "ɐ̯", "out": "j", "context_before": "", "context_after": ""},
19 | {"in": "s", "out": "s", "context_before": "", "context_after": ""},
20 | {"in": "ɛ", "out": "ɛ", "context_before": "", "context_after": ""},
21 | {"in": "æ", "out": "æ", "context_before": "", "context_after": ""},
22 | {"in": "ɑ", "out": "ɑ", "context_before": "", "context_after": ""},
23 | {"in": "ɑ", "out": "ɑ", "context_before": "", "context_after": ""},
24 | {"in": "ʁ", "out": "ʒ", "context_before": "", "context_after": ""},
25 | {"in": "œ", "out": "ɔ", "context_before": "", "context_after": ""},
26 | {"in": "u", "out": "u", "context_before": "", "context_after": ""},
27 | {"in": "p", "out": "p", "context_before": "", "context_after": ""}
28 | ]
29 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/fin-ipa_to_eng-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "t͡s", "out": "ts"},
3 | {"in": "ɑː", "out": "ɑ"},
4 | {"in": "ɑɪ", "out": "ɑɪ"},
5 | {"in": "ɑʊ", "out": "ɑʊ"},
6 | {"in": "eː", "out": "eː"},
7 | {"in": "eɪ", "out": "eɪ"},
8 | {"in": "eʊ", "out": "ɛʊ"},
9 | {"in": "iː", "out": "i"},
10 | {"in": "iɛ", "out": "iɛ"},
11 | {"in": "oː", "out": "oː"},
12 | {"in": "oʊ", "out": "oʊ"},
13 | {"in": "ɔɪ", "out": "ɔɪ"},
14 | {"in": "uː", "out": "u"},
15 | {"in": "ʊɔ", "out": "ʊɔ"},
16 | {"in": "ʊɪ", "out": "ʊj"},
17 | {"in": "yː", "out": "u"},
18 | {"in": "æː", "out": "æ"},
19 | {"in": "æɥ", "out": "aʊ"},
20 | {"in": "øː", "out": "ə"},
21 | {"in": "øɥ", "out": "əj"},
22 | {"in": "ks", "out": "ks"},
23 | {"in": "ɑ", "out": "ɑ"},
24 | {"in": "b", "out": "b"},
25 | {"in": "s", "out": "s"},
26 | {"in": "d", "out": "d"},
27 | {"in": "e", "out": "ɛ"},
28 | {"in": "f", "out": "f"},
29 | {"in": "ɡ", "out": "ɡ"},
30 | {"in": "h", "out": "h"},
31 | {"in": "i", "out": "i"},
32 | {"in": "j", "out": "j"},
33 | {"in": "k", "out": "k"},
34 | {"in": "l", "out": "l"},
35 | {"in": "m", "out": "m"},
36 | {"in": "n", "out": "n"},
37 | {"in": "o", "out": "ɔ"},
38 | {"in": "p", "out": "p"},
39 | {"in": "k", "out": "k"},
40 | {"in": "r", "out": "ɾ"},
41 | {"in": "s", "out": "s"},
42 | {"in": "t", "out": "t"},
43 | {"in": "u", "out": "u"},
44 | {"in": "ʋ", "out": "w"},
45 | {"in": "ʋ", "out": "w"},
46 | {"in": "y", "out": "u"},
47 | {"in": "o", "out": "ɔ"},
48 | {"in": "æ", "out": "æ"},
49 | {"in": "ø", "out": "ə"},
50 | {"in": "ŋ", "out": "ŋ"},
51 | {"in": "ŋ", "out": "ŋ"},
52 | {"in": "m", "out": "m"},
53 | {"in": "m", "out": "m"}
54 | ]
55 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/fra-ipa_to_eng-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "a", "out": "ɑ"},
3 | {"in": "b", "out": "b"},
4 | {"in": "d", "out": "d"},
5 | {"in": "e", "out": "eː"},
6 | {"in": "f", "out": "f"},
7 | {"in": "i", "out": "i"},
8 | {"in": "j", "out": "j"},
9 | {"in": "k", "out": "k"},
10 | {"in": "l", "out": "l"},
11 | {"in": "m", "out": "m"},
12 | {"in": "n", "out": "n"},
13 | {"in": "o", "out": "oː"},
14 | {"in": "p", "out": "p"},
15 | {"in": "s", "out": "s"},
16 | {"in": "t", "out": "t"},
17 | {"in": "u", "out": "u"},
18 | {"in": "v", "out": "v"},
19 | {"in": "w", "out": "w"},
20 | {"in": "y", "out": "u"},
21 | {"in": "z", "out": "z"},
22 | {"in": "ø", "out": "oː"},
23 | {"in": "ŋ", "out": "ŋ"},
24 | {"in": "œ", "out": "ɔ"},
25 | {"in": "œ̃", "out": "ɔ̃"},
26 | {"in": "ɑ", "out": "ɑ"},
27 | {"in": "ɑ̃", "out": "ɑ̃"},
28 | {"in": "ɔ", "out": "ɔ"},
29 | {"in": "ɔ̃", "out": "ɔ̃"},
30 | {"in": "ɛ", "out": "ɛ"},
31 | {"in": "ɛː", "out": "ɛ"},
32 | {"in": "ɛ̃", "out": "ɛ̃"},
33 | {"in": "ɜ", "out": "ʌ"},
34 | {"in": "ɡ", "out": "ɡ"},
35 | {"in": "ɪn", "out": "ɪn"},
36 | {"in": "ɲ", "out": "ŋ"},
37 | {"in": "ʁ", "out": "ʒ"},
38 | {"in": "ʃ", "out": "ʃ"},
39 | {"in": "ʌ", "out": "ʌ"},
40 | {"in": "ʒ", "out": "ʒ"}
41 | ]
42 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/kwk-ipa_to_eng-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "tʼ͡s", "out": "ts"},
3 | {"in": "tʼ͡ɬ", "out": "ts"},
4 | {"in": "tʼs", "out": "ts"},
5 | {"in": "tʼɬ", "out": "ts"},
6 | {"in": "qʼʷ", "out": "kw"},
7 | {"in": "kʼʷ", "out": "kw"},
8 | {"in": "t͡s", "out": "ts"},
9 | {"in": "t͡ɬ", "out": "ts"},
10 | {"in": "kʼʲ", "out": "kj"},
11 | {"in": "d͡z", "out": "dz"},
12 | {"in": "d͡l", "out": "dl"},
13 | {"in": "d͡ɬ", "out": "ds"},
14 | {"in": "pʼ", "out": "p"},
15 | {"in": "tʼ", "out": "t"},
16 | {"in": "kʼ", "out": "k"},
17 | {"in": "qʼ", "out": "k"},
18 | {"in": "ʔm", "out": "ʔm"},
19 | {"in": "ʔn", "out": "ʔn"},
20 | {"in": "ʔw", "out": "ʔw"},
21 | {"in": "ʔy", "out": "ʔu"},
22 | {"in": "lʼ", "out": "l"},
23 | {"in": "dz", "out": "dz"},
24 | {"in": "ts", "out": "ts"},
25 | {"in": "tɬ", "out": "ts"},
26 | {"in": "dɬ", "out": "ds"},
27 | {"in": "qʷ", "out": "kw"},
28 | {"in": "χʷ", "out": "ʃw"},
29 | {"in": "ɢʷ", "out": "ɡw"},
30 | {"in": "kʷ", "out": "kw"},
31 | {"in": "xʷ", "out": "kw"},
32 | {"in": "ɡʷ", "out": "ɡw"},
33 | {"in": "ʔl", "out": "ʔl"},
34 | {"in": "ʔj", "out": "ʔj"},
35 | {"in": "kʲ", "out": "kj"},
36 | {"in": "xʲ", "out": "kj"},
37 | {"in": "ɡʲ", "out": "ɡj"},
38 | {"in": "ej", "out": "ej"},
39 | {"in": "ow", "out": "ow"},
40 | {"in": "ɢ", "out": "ɡ"},
41 | {"in": "χ", "out": "ʃ"},
42 | {"in": "ɡ", "out": "ɡ"},
43 | {"in": "q", "out": "k"},
44 | {"in": "ə", "out": "ə"},
45 | {"in": "p", "out": "p"},
46 | {"in": "t", "out": "t"},
47 | {"in": "k", "out": "k"},
48 | {"in": "ɬ", "out": "s"},
49 | {"in": "s", "out": "s"},
50 | {"in": "x", "out": "k"},
51 | {"in": "ʔ", "out": "ʔ"},
52 | {"in": "b", "out": "b"},
53 | {"in": "d", "out": "d"},
54 | {"in": "h", "out": "h"},
55 | {"in": "m", "out": "m"},
56 | {"in": "n", "out": "n"},
57 | {"in": "l", "out": "l"},
58 | {"in": "w", "out": "w"},
59 | {"in": "j", "out": "j"},
60 | {"in": "a", "out": "ɑ"},
61 | {"in": "e", "out": "ɛ"},
62 | {"in": "i", "out": "i"},
63 | {"in": "o", "out": "ɔ"},
64 | {"in": "u", "out": "u"}
65 | ]
66 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/lml-ipa_to_eng-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "ŋ", "out": "ŋ", "context_before": "", "context_after": ""},
3 | {"in": "ŋɡ", "out": "ŋ", "context_before": "", "context_after": ""},
4 | {"in": "ɑ", "out": "ɑ", "context_before": "", "context_after": ""},
5 | {"in": "ɡ", "out": "ɡ", "context_before": "", "context_after": ""},
6 | {"in": "ɣ", "out": "ɡ", "context_before": "", "context_after": ""},
7 | {"in": "ɹ", "out": "ɹ", "context_before": "", "context_after": ""}
8 | ]
9 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/mic-ipa_to_eng-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "ətʃ", "out": "ət͡ʃ"},
3 | {"in": "iː", "out": "eː"},
4 | {"in": "ɑː", "out": "ɑ"},
5 | {"in": "eː", "out": "eː"},
6 | {"in": "oː", "out": "oː"},
7 | {"in": "uː", "out": "u"},
8 | {"in": "tʃ", "out": "t͡ʃ"},
9 | {"in": "dʒ", "out": "dʒ"},
10 | {"in": "kə", "out": "kə"},
11 | {"in": "pə", "out": "pə"},
12 | {"in": "xə", "out": "kə"},
13 | {"in": "sə", "out": "sə"},
14 | {"in": "tə", "out": "tə"},
15 | {"in": "əj", "out": "əj"},
16 | {"in": "ək", "out": "ək"},
17 | {"in": "əl", "out": "əl"},
18 | {"in": "əm", "out": "əm"},
19 | {"in": "ən", "out": "ən"},
20 | {"in": "əp", "out": "əp"},
21 | {"in": "əx", "out": "ək"},
22 | {"in": "əs", "out": "əs"},
23 | {"in": "ət", "out": "ət"},
24 | {"in": "əw", "out": "əw"},
25 | {"in": "əy", "out": "əu"},
26 | {"in": "'", "out": ""},
27 | {"in": "ɑ", "out": "ɑ"},
28 | {"in": "o", "out": "ɔ"},
29 | {"in": "x", "out": "k"},
30 | {"in": "j", "out": "j"},
31 | {"in": "b", "out": "b"},
32 | {"in": "d", "out": "d"},
33 | {"in": "ɡ", "out": "ɡ"},
34 | {"in": "z", "out": "z"},
35 | {"in": "ɣ", "out": "ɡ"}
36 | ]
37 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/oji-ipa_to_eng-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "ŋ", "out": "ŋ", "context_before": "", "context_after": ""},
3 | {"in": "ɑː", "out": "ɑ", "context_before": "", "context_after": ""},
4 | {"in": "iː", "out": "i", "context_before": "", "context_after": ""},
5 | {"in": "oː", "out": "oː", "context_before": "", "context_after": ""},
6 | {"in": "eː", "out": "eː", "context_before": "", "context_after": ""},
7 | {"in": "ʌ", "out": "ʌ", "context_before": "", "context_after": ""},
8 | {"in": "i", "out": "i", "context_before": "", "context_after": ""},
9 | {"in": "o", "out": "oː", "context_before": "", "context_after": ""},
10 | {"in": "b", "out": "b", "context_before": "", "context_after": ""},
11 | {"in": "tʃ", "out": "t͡ʃ", "context_before": "", "context_after": ""},
12 | {"in": "d", "out": "d", "context_before": "", "context_after": ""},
13 | {"in": "ɡ", "out": "ɡ", "context_before": "", "context_after": ""},
14 | {"in": "h", "out": "h", "context_before": "", "context_after": ""},
15 | {"in": "ʔ", "out": "ʔ", "context_before": "", "context_after": ""},
16 | {"in": "ʔ", "out": "ʔ", "context_before": "", "context_after": ""},
17 | {"in": "dʒ", "out": "dʒ", "context_before": "", "context_after": ""},
18 | {"in": "k", "out": "k", "context_before": "", "context_after": ""},
19 | {"in": "m", "out": "m", "context_before": "", "context_after": ""},
20 | {"in": "n", "out": "n", "context_before": "", "context_after": ""},
21 | {"in": "p", "out": "p", "context_before": "", "context_after": ""},
22 | {"in": "ʃ", "out": "ʃ", "context_before": "", "context_after": ""},
23 | {"in": "s", "out": "s", "context_before": "", "context_after": ""},
24 | {"in": "t", "out": "t", "context_before": "", "context_after": ""},
25 | {"in": "w", "out": "w", "context_before": "", "context_after": ""},
26 | {"in": "j", "out": "j", "context_before": "", "context_after": ""},
27 | {"in": "ʒ", "out": "ʒ", "context_before": "", "context_after": ""},
28 | {"in": "z", "out": "z", "context_before": "", "context_after": ""}
29 | ]
30 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/oka-ipa_to_eng-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "kʼʷ", "out": "kw"},
3 | {"in": "qʼʷ", "out": "kw"},
4 | {"in": "ʕˀʷ", "out": "ʒw"},
5 | {"in": "tsʼ", "out": "ts"},
6 | {"in": "tɬʼ", "out": "ts"},
7 | {"in": "χʷ", "out": "ʃw"},
8 | {"in": "ˈa", "out": "æ"},
9 | {"in": "ˈi", "out": "i"},
10 | {"in": "ɣˀ", "out": "ɡ"},
11 | {"in": "kʼ", "out": "k"},
12 | {"in": "kʷ", "out": "kw"},
13 | {"in": "lˀ", "out": "l"},
14 | {"in": "mˀ", "out": "m"},
15 | {"in": "nˀ", "out": "n"},
16 | {"in": "ˈo", "out": "ɔ"},
17 | {"in": "pʼ", "out": "p"},
18 | {"in": "qʼ", "out": "k"},
19 | {"in": "qʷ", "out": "kw"},
20 | {"in": "ɾˀ", "out": "ɾ"},
21 | {"in": "tʼ", "out": "t"},
22 | {"in": "ʕˀ", "out": "ʒ"},
23 | {"in": "ˈu", "out": "u"},
24 | {"in": "ʕʷ", "out": "ʒw"},
25 | {"in": "xʷ", "out": "kw"},
26 | {"in": "wˀ", "out": "w"},
27 | {"in": "yˀ", "out": "u"},
28 | {"in": "ts", "out": "ts"},
29 | {"in": "h", "out": "h"},
30 | {"in": "χ", "out": "ʃ"},
31 | {"in": "a", "out": "æ"},
32 | {"in": "ə", "out": "ə"},
33 | {"in": "i", "out": "i"},
34 | {"in": "ɣ", "out": "ɡ"},
35 | {"in": "k", "out": "k"},
36 | {"in": "l", "out": "l"},
37 | {"in": "ɬ", "out": "s"},
38 | {"in": "m", "out": "m"},
39 | {"in": "n", "out": "n"},
40 | {"in": "o", "out": "ɔ"},
41 | {"in": "p", "out": "p"},
42 | {"in": "q", "out": "k"},
43 | {"in": "ɾ", "out": "ɾ"},
44 | {"in": "s", "out": "s"},
45 | {"in": "t", "out": "t"},
46 | {"in": "ʕ", "out": "ʒ"},
47 | {"in": "u", "out": "u"},
48 | {"in": "x", "out": "k"},
49 | {"in": "w", "out": "w"},
50 | {"in": "y", "out": "u"},
51 | {"in": "ʷ", "out": "w"},
52 | {"in": "ˀ", "out": ""}
53 | ]
54 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/see-ipa_to_eng-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "d͡ʒ", "out": "dʒ"},
3 | {"in": "t͡ʃ", "out": "t͡ʃ"},
4 | {"in": "ẽ", "out": ""},
5 | {"in": "e", "out": "eː"},
6 | {"in": "", "out": "ẽː"},
7 | {"in": "õ", "out": ""},
8 | {"in": "o", "out": "oː"},
9 | {"in": "", "out": "õː"}
10 | ]
11 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/str-ipa_to_eng-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "aɪ", "out": "aɪ"},
3 | {"in": "d͡z", "out": "dz"},
4 | {"in": "eː", "out": "e"},
5 | {"in": "e", "out": "eː"},
6 | {"in": "h", "out": "h"},
7 | {"in": "i", "out": "i"},
8 | {"in": "j", "out": "j"},
9 | {"in": "kʷʼ", "out": "kw"},
10 | {"in": "k̟", "out": "k"},
11 | {"in": "k̟ʷ", "out": "kw"},
12 | {"in": "l", "out": "l"},
13 | {"in": "m", "out": "m"},
14 | {"in": "n", "out": "n"},
15 | {"in": "p", "out": "p"},
16 | {"in": "pʼ", "out": "p"},
17 | {"in": "q", "out": "k"},
18 | {"in": "qʷ", "out": "kw"},
19 | {"in": "qʷʼ", "out": "kw"},
20 | {"in": "qʼ", "out": "k"},
21 | {"in": "s", "out": "s"},
22 | {"in": "t", "out": "t"},
23 | {"in": "tʼ", "out": "t"},
24 | {"in": "t͡s̪", "out": "tθ"},
25 | {"in": "t͡ɬʼ", "out": "ts"},
26 | {"in": "t͡ʃ", "out": "t͡ʃ"},
27 | {"in": "t͡ʃʼ", "out": "tʃ"},
28 | {"in": "u", "out": "u"},
29 | {"in": "w", "out": "w"},
30 | {"in": "xʷ", "out": "kw"},
31 | {"in": "æ", "out": "æ"},
32 | {"in": "ŋ", "out": "ŋ"},
33 | {"in": "ɑ", "out": "ɑ"},
34 | {"in": "ɬ", "out": "s"},
35 | {"in": "ʃ", "out": "ʃ"},
36 | {"in": "ʌ", "out": "ʌ"},
37 | {"in": "ʔ", "out": "ʔ"},
38 | {"in": "θ", "out": "θ"},
39 | {"in": "χ", "out": "ʃ"},
40 | {"in": "χʷ", "out": "ʃw"}
41 | ]
42 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/und-ascii_to_dummy.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "a", "out": "ɑ", "context_before": "", "context_after": ""},
3 | {"in": "b", "out": "t", "context_before": "", "context_after": ""},
4 | {"in": "c", "out": "ts", "context_before": "", "context_after": ""},
5 | {"in": "d", "out": "t", "context_before": "", "context_after": ""},
6 | {"in": "e", "out": "i", "context_before": "", "context_after": ""},
7 | {"in": "f", "out": "s", "context_before": "", "context_after": ""},
8 | {"in": "g", "out": "t", "context_before": "", "context_after": ""},
9 | {"in": "h", "out": "n", "context_before": "", "context_after": ""},
10 | {"in": "i", "out": "i", "context_before": "", "context_after": ""},
11 | {"in": "j", "out": "s", "context_before": "", "context_after": ""},
12 | {"in": "k", "out": "t", "context_before": "", "context_after": ""},
13 | {"in": "l", "out": "n", "context_before": "", "context_after": ""},
14 | {"in": "m", "out": "n", "context_before": "", "context_after": ""},
15 | {"in": "n", "out": "n", "context_before": "", "context_after": ""},
16 | {"in": "o", "out": "u", "context_before": "", "context_after": ""},
17 | {"in": "p", "out": "t", "context_before": "", "context_after": ""},
18 | {"in": "q", "out": "t", "context_before": "", "context_after": ""},
19 | {"in": "r", "out": "n", "context_before": "", "context_after": ""},
20 | {"in": "s", "out": "s", "context_before": "", "context_after": ""},
21 | {"in": "t", "out": "t", "context_before": "", "context_after": ""},
22 | {"in": "u", "out": "u", "context_before": "", "context_after": ""},
23 | {"in": "v", "out": "s", "context_before": "", "context_after": ""},
24 | {"in": "w", "out": "u", "context_before": "", "context_after": ""},
25 | {"in": "x", "out": "s", "context_before": "", "context_after": ""},
26 | {"in": "y", "out": "i", "context_before": "", "context_after": ""},
27 | {"in": "z", "out": "s", "context_before": "", "context_after": ""},
28 | {"in": "@", "out": "ɑ", "context_before": "", "context_after": ""},
29 | {"in": "\\?", "out": "n", "context_before": "", "context_after": ""},
30 | {"in": "'", "out": "n", "context_before": "", "context_after": ""},
31 | {"in": ",", "out": "n", "context_before": "", "context_after": ""}
32 | ]
33 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/und-ascii_to_hamming-dummy.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "a", "out": "ɑ", "context_before": "", "context_after": ""},
3 | {"in": "b", "out": "t", "context_before": "", "context_after": ""},
4 | {"in": "c", "out": "ts", "context_before": "", "context_after": ""},
5 | {"in": "d", "out": "t", "context_before": "", "context_after": ""},
6 | {"in": "e", "out": "i", "context_before": "", "context_after": ""},
7 | {"in": "f", "out": "s", "context_before": "", "context_after": ""},
8 | {"in": "g", "out": "t", "context_before": "", "context_after": ""},
9 | {"in": "h", "out": "s", "context_before": "", "context_after": ""},
10 | {"in": "i", "out": "i", "context_before": "", "context_after": ""},
11 | {"in": "j", "out": "s", "context_before": "", "context_after": ""},
12 | {"in": "k", "out": "t", "context_before": "", "context_after": ""},
13 | {"in": "l", "out": "s", "context_before": "", "context_after": ""},
14 | {"in": "m", "out": "n", "context_before": "", "context_after": ""},
15 | {"in": "n", "out": "n", "context_before": "", "context_after": ""},
16 | {"in": "o", "out": "u", "context_before": "", "context_after": ""},
17 | {"in": "p", "out": "t", "context_before": "", "context_after": ""},
18 | {"in": "q", "out": "t", "context_before": "", "context_after": ""},
19 | {"in": "r", "out": "s", "context_before": "", "context_after": ""},
20 | {"in": "s", "out": "s", "context_before": "", "context_after": ""},
21 | {"in": "t", "out": "t", "context_before": "", "context_after": ""},
22 | {"in": "u", "out": "u", "context_before": "", "context_after": ""},
23 | {"in": "v", "out": "s", "context_before": "", "context_after": ""},
24 | {"in": "w", "out": "u", "context_before": "", "context_after": ""},
25 | {"in": "x", "out": "s", "context_before": "", "context_after": ""},
26 | {"in": "y", "out": "i", "context_before": "", "context_after": ""},
27 | {"in": "z", "out": "s", "context_before": "", "context_after": ""},
28 | {"in": "@", "out": "ɑ", "context_before": "", "context_after": ""},
29 | {"in": "\\?", "out": "t", "context_before": "", "context_after": ""},
30 | {"in": "'", "out": "t", "context_before": "", "context_after": ""},
31 | {"in": ",", "out": "t", "context_before": "", "context_after": ""}
32 | ]
33 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/und-ipa_to_hamming-eng-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "t͡ʃ", "out": "t͡ʃ", "context_before": "", "context_after": ""},
3 | {"in": "a", "out": "æ", "context_before": "", "context_after": ""},
4 | {"in": "b", "out": "b", "context_before": "", "context_after": ""},
5 | {"in": "d", "out": "d", "context_before": "", "context_after": ""},
6 | {"in": "e", "out": "eː", "context_before": "", "context_after": ""},
7 | {"in": "f", "out": "f", "context_before": "", "context_after": ""},
8 | {"in": "ɡ", "out": "ɡ", "context_before": "", "context_after": ""},
9 | {"in": "h", "out": "h", "context_before": "", "context_after": ""},
10 | {"in": "i", "out": "i", "context_before": "", "context_after": ""},
11 | {"in": "ʒ", "out": "ʒ", "context_before": "", "context_after": ""},
12 | {"in": "k", "out": "k", "context_before": "", "context_after": ""},
13 | {"in": "l", "out": "l", "context_before": "", "context_after": ""},
14 | {"in": "m", "out": "m", "context_before": "", "context_after": ""},
15 | {"in": "n", "out": "n", "context_before": "", "context_after": ""},
16 | {"in": "o", "out": "oː", "context_before": "", "context_after": ""},
17 | {"in": "p", "out": "p", "context_before": "", "context_after": ""},
18 | {"in": "q", "out": "k", "context_before": "", "context_after": ""},
19 | {"in": "r", "out": "ɾ", "context_before": "", "context_after": ""},
20 | {"in": "s", "out": "s", "context_before": "", "context_after": ""},
21 | {"in": "t", "out": "t", "context_before": "", "context_after": ""},
22 | {"in": "u", "out": "u", "context_before": "", "context_after": ""},
23 | {"in": "v", "out": "v", "context_before": "", "context_after": ""},
24 | {"in": "w", "out": "w", "context_before": "", "context_after": ""},
25 | {"in": "x", "out": "k", "context_before": "", "context_after": ""},
26 | {"in": "j", "out": "j", "context_before": "", "context_after": ""},
27 | {"in": "z", "out": "z", "context_before": "", "context_after": ""},
28 | {"in": "ə", "out": "ə", "context_before": "", "context_after": ""},
29 | {"in": "ʔ", "out": "ʔ", "context_before": "", "context_after": ""}
30 | ]
31 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/generated/win-ipa_to_eng-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "aː", "out": "eː"},
3 | {"in": "ãː", "out": "æ̃"},
4 | {"in": "tʃ", "out": "t͡ʃ"},
5 | {"in": "eː", "out": "eː"},
6 | {"in": "iː", "out": "eː"},
7 | {"in": "ĩː", "out": "ẽː"},
8 | {"in": "dʒ", "out": "dʒ"},
9 | {"in": "kʼ", "out": "k"},
10 | {"in": "oː", "out": "oː"},
11 | {"in": "pʼ", "out": "p"},
12 | {"in": "sʼ", "out": "s"},
13 | {"in": "ʃʼ", "out": "ʃ"},
14 | {"in": "tʼ", "out": "t"},
15 | {"in": "uː", "out": "u"},
16 | {"in": "ũː", "out": "ũ"},
17 | {"in": "xʼ", "out": "k"},
18 | {"in": "a", "out": "æ"},
19 | {"in": "ã", "out": "æ̃"},
20 | {"in": "b", "out": "b"},
21 | {"in": "e", "out": "ɛ"},
22 | {"in": "ɡ", "out": "ɡ"},
23 | {"in": "ɣ", "out": "ɡ"},
24 | {"in": "h", "out": "h"},
25 | {"in": "i", "out": "i"},
26 | {"in": "ĩ", "out": "ĩ"},
27 | {"in": "k", "out": "k"},
28 | {"in": "m", "out": "m"},
29 | {"in": "n", "out": "n"},
30 | {"in": "o", "out": "ɔ"},
31 | {"in": "p", "out": "p"},
32 | {"in": "r", "out": "ɾ"},
33 | {"in": "s", "out": "s"},
34 | {"in": "ʃ", "out": "ʃ"},
35 | {"in": "t", "out": "t"},
36 | {"in": "u", "out": "u"},
37 | {"in": "ũ", "out": "ũ"},
38 | {"in": "w", "out": "w"},
39 | {"in": "x", "out": "k"},
40 | {"in": "j", "out": "j"},
41 | {"in": "z", "out": "z"},
42 | {"in": "ʒ", "out": "ʒ"},
43 | {"in": "ʔ", "out": "ʔ"}
44 | ]
45 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/git/APA.csv:
--------------------------------------------------------------------------------
1 | l{1}\u0313{2},l{1}\u02C0{2}
2 | l{1}\u0313{2},ʔ{2}l{1}
3 | m{1}\u0313{2},m{1}\u02C0{2}
4 | m{1}\u0313{2},ʔ{2}m{1}
5 | n{1}\u0313{2},n{1}\u02C0{2}
6 | n{1}\u0313{2},ʔ{2}n{1}
7 | w{1}\u0313{2},w{1}\u02C0{2}
8 | w{1}\u0313{2},ʔ{2}w{1}
9 | ʔ{1}y{2},ʔ{1}j{2}
10 | y{1}\u0313{2},j{1}\u02C0{2}
11 | \?{1}\u2071{2},ʔ{1}\u2071{2}
12 | ʒ{1},ʣ{1}
13 | k{1}\u0313{2},k{1}\u02C0{2}
14 | k{1}\u0313{2},ʔ{2}k{1}
15 | q{1}\u0313{2},q{1}\u02C0{2}
16 | q{1}\u0313{2},ʔ{2}q{1}
17 | k{1}\u0313{2}ʷ{3},k{1}ʷ{3}\u02C0{2}
18 | p{1}\u0313{2},p{1}\u02C0{2}
19 | p{1}\u0313{2},ʔ{2}p{1}
20 | t{1}\u0313{2},t{1}\u02C0{2}
21 | t{1}\u0313{2},ʔ{2}t{1}
22 | ʔ{1}ƛ{2},ʔ{1}t{2}\u0361{4}ɬ{3}
23 | ƛ{1}\u0313{2},t{1}\u0361{4}ɬ{3}\u02C0{2}
24 | c{1}ʰ{2},ʦ{1}ʰ{2}
25 | c{1},ʦ{1}
26 | ʔ{1}c{2},ʔ{1}ʦ{2}
27 | c{1}\u0313{2},ʦ{1}\u02C0{2}
28 | ʔ{1}c{2}ʰ{3},ʔ{1}ʦ{2}ʰ{3}
29 | x{1}\u0323{2},χ{1}
30 | y{1},j{1}
31 | ɣ{1},ʁ{1}
32 | a,æ
33 | a{1}ː{2},æ{1}ː{2}
34 | g{1}\u0323{2},ɢ{1
35 | g{1}\u0307{2},ɢ{1}
36 | g{1}ʸ{2},ɟ{1}
37 | g{1}\u0302{3},ɟ{1}
38 | ʔ{1}ɡ{2},ʔ{1}ɢ{2}
39 | ʔ{1}g{2}\u0307{3},ʔ{1}ɢ{2}
40 | ʔ{1}ɡ{2}ʸ{3},ʔ{1}ɟ{2}
41 | ʔ{1}g{2}\u0302{3},ʔ{1}ɟ{2}
42 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/git/Ortho_variables.csv:
--------------------------------------------------------------------------------
1 | VOWEL,a,ʌ,æ,e,ɛ,ɪ,ɨ,i,ɔ,o,ʊ,u,ʊ\u031E,
2 | CONSONANT,ʔ,b,d,g,ʁ,ɢ,ɟ,h,ɬ,ʣ,k,q,l,m,n,p,s,t,ʔt\u0361ɬ,t\u0361s,w,x,χ,j
3 | RESONANT,j,w,l,m,n
4 | UVULAR,q,ʁ,ɢ,χ
5 | VCLS_STOP,p,t,k,q
6 | VCD_STOP,b,d,g,ɢ,ɟ
7 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/git/RAPA.csv:
--------------------------------------------------------------------------------
1 | \?{1},ʔ{1}
2 | '{1}l{2},l{2}\u02C0{1}
3 | '{1}m{2},m{2}\u02C0{1}
4 | \?{1}m{2},ʔ{1}m{2}
5 | '{1}n{2},n{2}\u02C0{1}
6 | \?{1}n{2},ʔ{1}n{2}
7 | \?{1}w{2},ʔ{1}w{2}
8 | '{1}w{2},w{2}\u02C0{1}
9 | \?{1}y{2},ʔ{1}j{2}
10 | '{1}y{2},j{2}\u02C0{1}
11 | \?{1}i{2},ʔ{1}i{2}
12 | \?{1}\u2071{2},ʔ{1}\u2071{2}
13 | a{1},æ{1}
14 | a{1}a{2},æ{1}ː{2}
15 | E{1},ɛ{1}
16 | e{1}e{2},e{1}ː{2}
17 | ɡ{1}\u0332{2}\u0323{3},ʁ{1}
18 | g{1}\u0323{2},ɢ{1}
19 | ɡ{1}ʸ{2},ɟ{1}
20 | ɫ{1},ɬ{1}
21 | i{1}i{2},i{1}ː{2}
22 | j{1},ʣ{1}
23 | k{1}'{2},k{1}\u02C0{2}
24 | \?{1}k{2},ʔ{1}k{2}
25 | \?{1}g{2}\u0323{3},ʔ{1}ɢ{2}
26 | \?{1}q{2},ʔ{1}q{2}
27 | \?{1}q{2}ʰ{3},ʔ{1}q{2}ʰ{3}
28 | q{1}'{2},q{1}\u02C0{2}
29 | \?{1}g{2}ʸ{3},ʔ{1}ɟ{2}
30 | \?{1}k{2}ʷ{3},ʔ{1}k{2}ʷ{3}
31 | \?{1}k{2}ʷ{3}ʰ{4},ʔ{1}k{2}ʷ{3}ʰ{4}
32 | k{1}ʷ{2}'{3},k{1}ʷ{2}\u02C0{3}
33 | \?{1}ɡ{2}ʷ{3},ʔ{1}ɡ{2}ʷ{3}
34 | o{1}o{2},o{1}ː{2}
35 | \?{1}p{2}ʰ{3},ʔ{1}p{2}ʰ{3}
36 | \?{1}p{2},ʔ{1}p{2}
37 | p{1}'{2},p{1}\u02C0{2}
38 | \?{1}b{2},ʔ{1}b{2}
39 | \?{1}t{2},ʔ{1}t{2}
40 | \?{1}t{2}ʰ{3},ʔ{1}t{2}ʰ{3}
41 | t{1}'{2},t{1}\u02C0{2}
42 | \?{1}d{2},ʔ{1}d{2}
43 | \?{1}t{2}\u0361{4}ɬ{3},ʔ{1}t{2}\u0361{4}ɬ{3}
44 | Ƚ{1}'{2},t{1}\u0361{4}ɬ{3}\u02C0{2}
45 | c{1}ʰ{2},ʦ{1}ʰ{2}
46 | c{1},ʦ{1}
47 | \?{1}c{2},ʔ{1}ʦ{2}
48 | c{1}'{2},ʦ{1}\u02C0{2}
49 | \?{1}c{2}ʰ{3},ʔ{1}ʦ{2}ʰ{3}
50 | o{1}\u0323{2},ʊ{1}\u031E{2}
51 | u{1}u{2},u{1}ː{2}
52 | x{1}\u0323{2},χ{1}
53 | y{1},j{1}
54 | A{1},a{1}|ʌ{1}
55 | A{1}A{2},a{1}ː{2}|ʌ{1}ː{2}
56 | I{1},i{1}|ɨ{1}
57 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/git/RAPA_Deterministic.csv:
--------------------------------------------------------------------------------
1 | \?{1},ʔ{1}
2 | '{1}l{2},l{2}\u02C0{1}
3 | '{1}m{2},m{2}\u02C0{1}
4 | \?{1}m{2},ʔ{1}m{2}
5 | '{1}n{2},n{2}\u02C0{1}
6 | \?{1}n{2},ʔ{1}n{2}
7 | \?{1}w{2},ʔ{1}w{2}
8 | '{1}w{2},w{2}\u02C0{1}
9 | \?{1}y{2},ʔ{1}j{2}
10 | '{1}y{2},j{2}\u02C0{1}
11 | \?{1}i{2},ʔ{1}i{2}
12 | \?{1}\u2071{2},ʔ{1}\u2071{2}
13 | a{1},æ{1}
14 | a{1}a{2},æ{1}ː{2}
15 | E{1},ɛ{1}
16 | e{1}e{2},e{1}ː{2}
17 | ɡ{1}\u0332{2}\u0323{3},ʁ{1}
18 | g{1}\u0323{2},ɢ{1}
19 | ɡ{1}ʸ{2},ɟ{1}
20 | ɫ{1},ɬ{1}
21 | i{1}i{2},i{1}ː{2}
22 | j{1},ʣ{1}
23 | k{1}'{2},k{1}\u02C0{2}
24 | \?{1}k{2},ʔ{1}k{2}
25 | \?{1}g{2}\u0323{3},ʔ{1}ɢ{2}
26 | \?{1}q{2},ʔ{1}q{2}
27 | \?{1}q{2}ʰ{3},ʔ{1}q{2}ʰ{3}
28 | q{1}'{2},q{1}\u02C0{2}
29 | \?{1}g{2}ʸ{3},ʔ{1}ɟ{2}
30 | \?{1}k{2}ʷ{3},ʔ{1}k{2}ʷ{3}
31 | \?{1}k{2}ʷ{3}ʰ{4},ʔ{1}k{2}ʷ{3}ʰ{4}
32 | k{1}ʷ{2}'{3},k{1}ʷ{2}\u02C0{3}
33 | \?{1}ɡ{2}ʷ{3},ʔ{1}ɡ{2}ʷ{3}
34 | o{1}o{2},o{1}ː{2}
35 | \?{1}p{2}ʰ{3},ʔ{1}p{2}ʰ{3}
36 | \?{1}p{2},ʔ{1}p{2}
37 | p{1}'{2},p{1}\u02C0{2}
38 | \?{1}b{2},ʔ{1}b{2}
39 | \?{1}t{2},ʔ{1}t{2}
40 | \?{1}t{2}ʰ{3},ʔ{1}t{2}ʰ{3}
41 | t{1}'{2},t{1}\u02C0{2}
42 | \?{1}d{2},ʔ{1}d{2}
43 | \?{1}t{2}\u0361{4}ɬ{3},ʔ{1}t{2}\u0361{4}ɬ{3}
44 | Ƚ{1}'{2},t{1}\u0361{4}ɬ{3}\u02C0{2}
45 | c{1}ʰ{2},ʦ{1}ʰ{2}
46 | c{1},ʦ{1}
47 | \?{1}c{2},ʔ{1}ʦ{2}
48 | c{1}'{2},ʦ{1}\u02C0{2}
49 | \?{1}c{2}ʰ{3},ʔ{1}ʦ{2}ʰ{3}
50 | o{1}\u0323{2},ʊ{1}\u031E{2}
51 | u{1}u{2},u{1}ː{2}
52 | x{1}\u0323{2},χ{1}
53 | y{1},j{1}
54 | A{1},a{1}
55 | A{1}A{2},a{1}ː{2}
56 | I{1},i{1}
57 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/git/README.md:
--------------------------------------------------------------------------------
1 | Includes fallback for c -> k because the word 'Jacob' occurs in a story but this should be dealt with some other way.
2 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/git/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Gitksan
3 | mappings:
4 | - display_name: Orthography
5 | in_lang: git
6 | out_lang: git-ipa
7 | rule_ordering: as-written
8 | case_sensitive: false
9 | authors:
10 | - Fineen Davis
11 | rules_path: git_to_ipa.json
12 | <<: *shared
13 | - display_name: Rigsby APA
14 | in_lang: git
15 | out_lang: git-apa
16 | authors:
17 | - Fineen Davis
18 | rules_path: RAPA_Deterministic.csv
19 | <<: *shared
20 | - display_name: Unicode Equivalencies
21 | in_lang: git
22 | out_lang: git-equiv
23 | authors:
24 | - Aidan Pine
25 | rules_path: equiv.csv
26 | <<: *shared
27 | - display_name: Gitksan IPA to English IPA
28 | in_lang: git-ipa
29 | out_lang: eng-ipa
30 | rule_ordering: apply-longest-first
31 | authors:
32 | - Aidan Pine
33 | rules_path: git_ipa_to_eng_ipa.json
34 | <<: *shared
35 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/git/equiv.csv:
--------------------------------------------------------------------------------
1 | \u1E35,k\u0332
2 | \u0331,\u0332
3 | \u201C,\u0022
4 | \u201D,\u0022
5 | \u201E,\u0022
6 | \u2013,\u002D
7 | \u2014,\u002D
8 | ’,\u0027
9 | ‘,\u0027
10 | ˊ,\u0027
11 | `,\u0027
12 | ̒,\u0027
13 | ̔,\u0027
14 | ̕,\u0027
15 | ̛,\u0027
16 | ʻ,\u0027
17 | ʼ,\u0027
18 | ʽ,\u0027
19 | ʹ,\u0027
20 | ː,:
21 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/gla/README.txt:
--------------------------------------------------------------------------------
1 | There is no support here for 'slender' consonants, or any context-sensitive rules. These need to be added, although simple ReadAlongs support seems to work already.
2 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/gla/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Scottish Gaelic
3 | mappings:
4 | - display_name: Scottish Gaelic to IPA
5 | in_lang: gla
6 | out_lang: gla-ipa
7 | type: mapping
8 | case_sensitive: false
9 | rule_ordering: apply-longest-first
10 | authors:
11 | - Aidan Pine
12 | rules_path: gla_to_ipa.json
13 | <<: *shared
14 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/gwi/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Gwich'in
3 | mappings:
4 | - display_name: Gwich'in Equivalencies
5 | in_lang: gwi
6 | out_lang: gwi-equiv
7 | authors:
8 | - Sabrina Yu
9 | type: mapping
10 | rules_path: gwi_equiv.json
11 | prevent_feeding: false
12 | rule_ordering: as-written
13 | case_sensitive: false
14 | norm_form: NFD
15 | <<: *shared
16 | - display_name: Gwich'in to IPA
17 | in_lang: gwi-equiv
18 | out_lang: gwi-ipa
19 | authors:
20 | - Sabrina Yu
21 | type: mapping
22 | rules_path: gwi_to_ipa.json
23 | prevent_feeding: true
24 | rule_ordering: as-written
25 | case_sensitive: false
26 | norm_form: NFD
27 | <<: *shared
28 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/gwi/gwi_equiv.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "in":"o\u0328o\u0328",
4 | "out": "o\u0328"
5 | },
6 | {
7 | "in":"oo",
8 | "out": "o"
9 | },
10 | {
11 | "in":"\u0300",
12 | "out": ""
13 | },
14 | {
15 | "in": "\u2019",
16 | "out": "\u02bc"
17 | },
18 | {
19 | "in": "\u0027",
20 | "out": "\u02bc"
21 | }
22 | ]
23 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/haa/README.md:
--------------------------------------------------------------------------------
1 | IPA mappings for Hän with resources from the Yukon Native Language Centre
2 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/haa/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Hän
3 | mappings:
4 | - display_name: Hän equivalencies
5 | in_lang: haa
6 | out_lang: haa-equiv
7 | authors:
8 | - Shankhalika Srikanth
9 | type: mapping
10 | rules_path: haa_equiv.csv
11 | prevent_feeding: false
12 | rule_ordering: as-written
13 | case_sensitive: false
14 | norm_form: NFD
15 | <<: *shared
16 | - display_name: Hän to IPA
17 | in_lang: haa-equiv
18 | out_lang: haa-ipa
19 | authors:
20 | - Shankhalika Srikanth
21 | type: mapping
22 | rules_path: haa_to_ipa.csv
23 | abbreviations_path: haa_abbs.csv
24 | prevent_feeding: true
25 | rule_ordering: as-written
26 | case_sensitive: false
27 | norm_form: NFD
28 | <<: *shared
29 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/haa/haa_abbs.csv:
--------------------------------------------------------------------------------
1 | PLAIN_VOWEL,i,u,äw,aw,ay,ä,a,ew,ey,ë,oy,iw,o,e
2 | GRAVE_VOWEL,ì,ù,ä̀w,àw,ày,ä̀,à,èw,èy,ë̀,òy,ìw,ò,è
3 | HAT_VOWEL,î,û,ä̂w,âw,ây,ä̂,â,êw,êy,ë̂,ôy,îw,ô,ê
4 | VEE_VOWEL,ǐ,ǔ,ä̌w,ǎw,ǎy,ä̌,ǎ,ěw,ěy,ë̌,ǒy,ǐw,ǒ,ě
5 | NASAL_VOWEL,į,ų,ą̈w,ąw,ąy,ą̈,ą,ęw,ęy,ę̈,ǫy,įw,ǫ,ę
6 | NASAL_GRAVE_VOWEL,į̀,ų̀,ą̈̀w,ą̀w,ą̀y,ą̈̀,ą̀,ę̀w,ę̀y,ę̈̀,ǫ̀y,į̀w,ǫ̀,ę̀
7 | NASAL_HAT_VOWEL,į̂,ų̂,ą̈̂w,ą̂w,ą̂y,ą̈̂,ą̂,ę̂w,ę̂y,ę̈̂,ǫ̂y,į̂w,ǫ̂,ę̂
8 | NASAL_VEE_VOWEL,į̌,ų̌,ą̈̌w,ą̌w,ą̌y,ą̈̌,ą̌,ę̌w,ę̌y,ę̈̌,ǫ̌y,į̌w,ǫ̌,ę̌
9 | VOWELS,PLAIN_VOWEL,GRAVE_VOWEL,HAT_VOWEL,VEE_VOWEL,NASAL_VOWEL,NASAL_GRAVE_VOWEL,NASAL_HAT_VOWEL,NASAL_VEE_VOWEL
10 | VELAR,kh,k',k,gh,g,nj
11 | CONSONANTS,p,m,b,w,t,d,ḏ,n,r,z,s,ł,c,j̱,j,y,k,g,',l,h
12 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/haa/haa_equiv.csv:
--------------------------------------------------------------------------------
1 | \u0149,\u0027
2 | \u02BC,\u0027
3 | \u055A,\u0027
4 | \uFF07,\u0027
5 | ’,'
6 | ‘,'
7 | ˊ,'
8 | `,'
9 | ʻ,'
10 | ʼ,'
11 | ʽ,'
12 | ʹ,'
13 | ѐ,è
14 | À,À
15 | È,È
16 | Ì,Ì
17 | Ò,Ò
18 | Ù,Ù
19 | à,à
20 | è,è
21 | ì,ì
22 | ò,ò
23 | ù,ù
24 | Â,Â
25 | Ê,Ê
26 | Î,Î
27 | Ô,Ô
28 | Û,Û
29 | â,â
30 | ê,ê
31 | î,î
32 | ô,ô
33 | û,û
34 | Ě,Ě
35 | ě,ě
36 | Ǎ,Ǎ
37 | ǎ,ǎ
38 | Ǐ,Ǐ
39 | ǐ,ǐ
40 | Ǒ,Ǒ
41 | ǒ,ǒ
42 | Ǔ,Ǔ
43 | ǔ,ǔ
44 | Ḏ,ḏ
45 | ḏ,ḏ
46 | d\u0332,ḏ
47 | j\u0332,j̱
48 | Ä,Ä
49 | Ë,Ë
50 | ä,ä
51 | ë,ë
52 | ӓ,ä
53 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/haa/haa_to_ipa.csv:
--------------------------------------------------------------------------------
1 | nj,ⁿk,,CONSONANTS
2 | p,pʰ
3 | mb,ᵐp
4 | b,p
5 | wh,w̥
6 | tth',tθʼ
7 | tth,tθʰ
8 | th,θ
9 | ts',tsʼ
10 | ts,tsʰ
11 | tr',ʈʂʼ
12 | tr,ʈʂʰ
13 | tl',tɬʼ
14 | tl,tɬʰ
15 | t',tʼ
16 | t,tʰ
17 | ddh,tθ
18 | dh,ð
19 | dz,ts
20 | dr,ʈʂ
21 | dl,tɬ
22 | ḏ,d
23 | d,t
24 | nh,n̥
25 | nd,ⁿt
26 | nj,ⁿk,,VELAR
27 | nj,ŋ
28 | rh,ɻ̊
29 | zr,ʐ
30 | sr,ʂ
31 | r,ɻ
32 | zh,ʒ
33 | ł,ɬ
34 | ch',tʃʼ
35 | ch,tʃʰ
36 | j̱,dʒ
37 | j,tʃ
38 | sh,ʃ
39 | yh,j̊
40 | kh,x
41 | k',kʼ
42 | k,kʁ,,VOWELS
43 | k,kʰ
44 | gh,ɣ
45 | g,ɡʁ,,VOWELS
46 | g,k
47 | ',ʔ
48 | l,ɬɮ,VOWELS,VOWELS
49 | ą̈̀w,ã̀o
50 | ą̈̌w,ã̌o
51 | ą̈̂w,ã̂o
52 | a\u0328\u0308w,ão
53 | ą̀w,æ̃̀o
54 | ą̌w,æ̃̌o
55 | ą̂w,æ̃̂o
56 | a\u0328w,æ̃o
57 | ą̀y,æ̃̀i
58 | ą̌y,æ̃̌i
59 | ą̂y,æ̃̂i
60 | a\u0328y,æ̃i
61 | ä\u0328,ɑ̃
62 | a\u0328,æ̃
63 | ę̀w,ẽ̀o
64 | ę̌w,ẽ̌o
65 | ę̂w,ẽ̂o
66 | e\u0328w,ẽo
67 | ę̀y,ẽ̀i
68 | ę̌y,ẽ̌i
69 | ę̂y,ẽ̂i
70 | e\u0328y,ẽi
71 | ë\u0328,ə̃
72 | ǫ̀y,õ̀i
73 | ǫ̌y,õ̌i
74 | ǫ̂y,õ̂i
75 | o\u0328y,õi
76 | į̀w,ĩ̀u
77 | į̌w,ĩ̌u
78 | į̂w,ĩ̂u
79 | i\u0328w,ĩu
80 | ë\u0328,ə\u0303
81 | ë,ə
82 | ä̀w,ào
83 | ä̂w,âo
84 | ä̌w,ǎo
85 | äw,ao
86 | àw,æ̀o
87 | ǎw,æ̌o
88 | âw,æ̂o
89 | aw,æo
90 | ày,æ̀i
91 | ǎy,æ̌i
92 | ây,æ̂i
93 | ay,æi
94 | ä\u0328,ɑ\u0303
95 | ä,ɑ
96 | a\u0328,æ\u0303
97 | a,æ
98 | èw,èo
99 | ěw,ěo
100 | êw,êo
101 | ew,eo
102 | èy,èi
103 | ěy,ěi
104 | êy,êi
105 | ey,ei
106 | òy,òi
107 | ǒy,ǒi
108 | ôy,ôi
109 | oy,oi
110 | ìw,ìu
111 | ǐw,ǐu
112 | îw,îu
113 | iw,iu
114 | y,j
115 | o\u0328,o\u0303
116 | o,o
117 | e\u0328,e\u0303
118 | e,e
119 | \u0328,\u0303
120 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/hur/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Halkomelem
3 | mappings:
4 | - display_name: Halkomelem APA to Hul’q’umi’num’ (Island) practical orthography
5 | in_lang: hur-apa
6 | out_lang: hur
7 | authors:
8 | - Zack Gilkison
9 | type: mapping
10 | rules_path: hur_apa_to_hur_orthog.json
11 | prevent_feeding: false
12 | rule_ordering: apply-longest-first
13 | case_sensitive: false
14 | norm_form: NFD
15 | # <<: &shared
16 | - display_name: Hul’q’umi’num’ (Island) practical orthography to Halkomelem APA
17 | in_lang: hur
18 | out_lang: hur-apa
19 | authors:
20 | - Zack Gilkison
21 | type: mapping
22 | rules_path: hur_orthog_to_hur_apa.json
23 | prevent_feeding: true
24 | rule_ordering: apply-longest-first
25 | case_sensitive: false
26 | norm_form: NFD
27 | language_name: Halkomelem
28 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/hur/hur_orthog_to_hur_apa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "tth’", "out": "t̓ᶿ"},
3 | {"in": "kw’", "out": "k̓ʷ"},
4 | {"in": "qw’", "out": "q̓ʷ"},
5 | {"in": "tth", "out": "tᶿ"},
6 | {"in": "t-s", "out": "ts"},
7 | {"in": "ts’", "out": "c̓"},
8 | {"in": "ch’", "out": "č̓"},
9 | {"in": "tl’", "out": "ƛ̓"},
10 | {"in": "s-h", "out": "sh"},
11 | {"in": "ú", "out": "ə́"},
12 | {"in": "ù", "out": "ə̀"},
13 | {"in": "à", "out": "à"},
14 | {"in": "é", "out": "é"},
15 | {"in": "lh", "out": "ł"},
16 | {"in": "aa", "out": "a:"},
17 | {"in": "ee", "out": "e:"},
18 | {"in": "ii", "out": "i:"},
19 | {"in": "ou", "out": "u"},
20 | {"in": "oo", "out": "u:"},
21 | {"in": "p’", "out": "p̓"},
22 | {"in": "t’", "out": "t̓"},
23 | {"in": "kw", "out": "kʷ"},
24 | {"in": "q’", "out": "q̓"},
25 | {"in": "qw", "out": "qʷ"},
26 | {"in": "ts", "out": "c"},
27 | {"in": "ch", "out": "č"},
28 | {"in": "th", "out": "θ"},
29 | {"in": "sh", "out": "š"},
30 | {"in": "hw", "out": "xʷ"},
31 | {"in": "xw", "out": "x̌ʷ"},
32 | {"in": "’y", "out": "y̓"},
33 | {"in": "y’", "out": "y̓"},
34 | {"in": "’w", "out": "w̓"},
35 | {"in": "w’", "out": "w̓"},
36 | {"in": "’m", "out": "m̓"},
37 | {"in": "m’", "out": "m̓"},
38 | {"in": "’l", "out": "l̓"},
39 | {"in": "l’", "out": "l̓"},
40 | {"in": "’l", "out": "l̕"},
41 | {"in": "l’", "out": "l̕"},
42 | {"in": "’n", "out": "n̓"},
43 | {"in": "n’", "out": "n̓"},
44 | {"in": "ɛ", "out": "ɛ"},
45 | {"in": "‘", "out": "="},
46 | {"in": "·", "out": "·"},
47 | {"in": "’", "out": "ʔ"},
48 | {"in": "q", "out": "q"},
49 | {"in": "u", "out": "ə"},
50 | {"in": "h", "out": "h"},
51 | {"in": "x", "out": "x̌"}
52 | ]
53 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/ikt/README.md:
--------------------------------------------------------------------------------
1 | Language-specific files for Inuit languages spoken in Western Canada and written in Roman orthography, including Inuinnaqtun and Uummarmiutun.
2 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/ikt/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Inuktut, Western
3 | mappings:
4 | - display_name: Western Inuktut to IPA
5 | in_lang: ikt
6 | out_lang: ikt-ipa
7 | type: mapping
8 | case_sensitive: false
9 | norm_form: NFD
10 | rule_ordering: apply-longest-first
11 | authors:
12 | - Patrick Littell
13 | rules_path: ikt_to_ipa.json
14 | <<: *shared
15 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/iku/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Inuktitut
3 | mappings:
4 | - display_name: Inuktitut to Inuktitut (equiv)
5 | in_lang: iku
6 | out_lang: iku-equiv
7 | type: mapping
8 | rule_ordering: apply-longest-first
9 | authors:
10 | - Patrick Littell
11 | rules_path: iku_to_iku_equiv.json
12 | language_name: Inuktitut Syllabics
13 | - display_name: Inuktitut to IPA
14 | in_lang: iku-equiv
15 | out_lang: iku-ipa
16 | type: mapping
17 | rule_ordering: apply-longest-first
18 | authors:
19 | - Patrick Littell
20 | rules_path: iku_equiv_to_ipa.json
21 | <<: *shared
22 | - display_name: Inuktitut (SRO) to IPA
23 | in_lang: iku-sro
24 | out_lang: iku-sro-ipa
25 | type: mapping
26 | case_sensitive: false
27 | norm_form: NFD
28 | rule_ordering: apply-longest-first
29 | authors:
30 | - Patrick Littell
31 | rules_path: iku_sro_to_ipa.json
32 | language_name: Inuktitut Romanized
33 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/kkz/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Kaska
3 | mappings:
4 | - display_name: Kaska to IPA
5 | rules_path: kkz_to_ipa.json
6 | in_lang: kkz
7 | out_lang: kkz-ipa
8 | rule_ordering: apply-longest-first
9 | norm_form: NFD
10 | case_sensitive: false
11 | authors:
12 | - Christopher Cox
13 | <<: *shared
14 | - display_name: Kaska IPA to English IPA
15 | rules_path: kkz_ipa_to_eng_ipa.json
16 | in_lang: kkz-ipa
17 | out_lang: eng-ipa
18 | norm_form: NFD
19 | rule_ordering: apply-longest-first
20 | case_sensitive: false
21 | authors:
22 | - Christopher Cox
23 | <<: *shared
24 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/kwk/README.md:
--------------------------------------------------------------------------------
1 | Language-specific files for Kwak'wala (NAPA orthography)
2 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/kwk/kwk_ipa_to_phonemic_ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "kʲ", "out": "k"},
3 | {"in": "xʲ", "out": "x"},
4 | {"in": "kʼʲ", "out": "kʼ"},
5 | {"in": "ɡʲ", "out": "ɡ"},
6 | {"in": "ej", "out": "e"},
7 | {"in": "ow", "out": "o"}
8 | ]
9 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/kwk/kwk_napa_to_ipa.csv:
--------------------------------------------------------------------------------
1 | p̓,pʼ
2 | t̓,tʼ
3 | k̓,kʼ
4 | q̓,qʼ
5 | c,ts
6 | c̓,tʼs
7 | ƛ,tɬ
8 | ƛ̓,tʼɬ
9 | λ,dɬ
10 | g,ɡ
11 | m̓,ʔm
12 | n̓,ʔn
13 | w̓,ʔw
14 | y̓,ʔy
15 | l',lʼ
16 | dᶻ,dz
17 | ǧ,ɢ
18 | x̌,χ
19 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/kwk/kwk_umista_to_ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "p", "out": "p"},
3 | {"in": "t", "out": "t"},
4 | {"in": "ts", "out": "t͡s"},
5 | {"in": "tɬ", "out": "t͡ɬ"},
6 | {"in": "tl", "out": "t͡ɬ"},
7 | {"in": "k", "out": "kʲ"},
8 | {"in": "kw", "out": "kʷ"},
9 | {"in": "ḵ", "out": "q"},
10 | {"in": "ḵw", "out": "qʷ"},
11 | {"in": "ɬ", "out": "ɬ"},
12 | {"in": "ł", "out": "ɬ"},
13 | {"in": "s", "out": "s"},
14 | {"in": "x", "out": "xʲ"},
15 | {"in": "xw", "out": "xʷ"},
16 | {"in": "x̱", "out": "χ"},
17 | {"in": "x̱w", "out": "χʷ"},
18 | {"in": "p̓", "out": "pʼ"},
19 | {"in": "t̓", "out": "tʼ"},
20 | {"in": "t̕s", "out": "tʼ͡s"},
21 | {"in": "t̕ł", "out": "tʼ͡ɬ"},
22 | {"in": "t̕l", "out": "tʼ͡ɬ"},
23 | {"in": "k̓", "out": "kʼʲ"},
24 | {"in": "k̕w", "out": "kʼʷ"},
25 | {"in": "ḵ̓", "out": "qʼ"},
26 | {"in": "ḵ̕w", "out": "qʼʷ"},
27 | {"in": "'", "out": "ʔ"},
28 | {"in": "b", "out": "b"},
29 | {"in": "d", "out": "d"},
30 | {"in": "dz", "out": "d͡z"},
31 | {"in": "dɬ", "out": "d͡l"},
32 | {"in": "dł", "out": "d͡l"},
33 | {"in": "dl", "out": "d͡ɬ"},
34 | {"in": "g", "out": "ɡʲ"},
35 | {"in": "gw", "out": "ɡʷ"},
36 | {"in": "g̱", "out": "ɢ"},
37 | {"in": "g̱w", "out": "ɢʷ"},
38 | {"in": "'m", "out": "ʔm"},
39 | {"in": "'n", "out": "ʔn"},
40 | {"in": "'l", "out": "ʔl"},
41 | {"in": "'w", "out": "ʔw"},
42 | {"in": "'y", "out": "ʔj"},
43 | {"in": "h", "out": "h"},
44 | {"in": "m", "out": "m"},
45 | {"in": "n", "out": "n"},
46 | {"in": "l", "out": "l"},
47 | {"in": "w", "out": "w"},
48 | {"in": "y", "out": "j"},
49 | {"in": "a", "out": "a"},
50 | {"in": "e", "out": "ej"},
51 | {"in": "i", "out": "i"},
52 | {"in": "o", "out": "ow"},
53 | {"in": "u", "out": "u"},
54 | {"in": "a̱", "out": "ə"}
55 | ]
56 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/kwk/napa_equiv_ubc.csv:
--------------------------------------------------------------------------------
1 | p/,p̓
2 | p̕,p̓
3 | p’,p̓
4 | t/,t̓
5 | t̕,t̓
6 | t’,t̓
7 | k/,k̓
8 | k̕,k̓
9 | k’,k̓
10 | c/,c̓
11 | c̕,c̓
12 | c’,c̓
13 | ƛ/,ƛ̓
14 | ƛ̕,ƛ̓
15 | ƛ’,ƛ̓
16 | k'ʷ,k̓ʷ
17 | k]ʷ,k̓ʷ
18 | k̕ʷ,k̓ʷ
19 | k’ʷ,k̓ʷ
20 | k{1}ʷ{3}\u0313{2},k{1}\u0313{2}ʷ{3}
21 | k{1}ʷ{3}'{2},k{1}\u0313{2}ʷ{3}
22 | k{1}ʷ{3}’{2},k{1}\u0313{2}ʷ{3}
23 | k'w,k̓ʷ
24 | k]w,k̓ʷ
25 | k̕w,k̓ʷ
26 | k’w,k̓ʷ
27 | k{1}w{3}\u0313{2},k{1}\u0313{2}ʷ{3}
28 | k{1}w{3}'{2},k{1}\u0313{2}ʷ{3}
29 | k{1}w{3}’{2},k{1}\u0313{2}ʷ{3}
30 | q'ʷ,q̓ʷ
31 | q]ʷ,q̓ʷ
32 | q̕ʷ,q̓ʷ
33 | q’ʷ,q̓ʷ
34 | q{1}ʷ{3}\u0313{2},q{1}\u0313{2}ʷ{3}
35 | q{1}ʷ{3}'{2},q{1}\u0313{2}ʷ{3}
36 | q{1}ʷ{3}’{2},q{1}\u0313{2}ʷ{3}
37 | q'w,q̓ʷ
38 | q]w,q̓ʷ
39 | q̕w,q̓ʷ
40 | q’w,q̓ʷ
41 | q{1}w{3}\u0313{2},q{1}\u0313{2}ʷ{3}
42 | q{1}w{3}'{2},q{1}\u0313{2}ʷ{3}
43 | q{1}w{3}’{2},q{1}\u0313{2}ʷ{3}
44 | kw,kʷ
45 | gw,gʷ
46 | qw,qʷ
47 | ɢw,ɢʷ
48 | xw,xʷ
49 | χw,χʷ
50 | ̕{1}m{2},m{2}\u0313{1}
51 | ’{1}m{2},m{2}\u0313{1}
52 | '{1}m{2},m{2}\u0313{1}
53 | m',m̓
54 | ̕{1}n{2},n{2}\u0313{1}
55 | ’{1}n{2},n{2}\u0313{1}
56 | '{1}n{2},n{2}\u0313{1}
57 | n',n̓
58 | ̕{1}w{2},w{2}\u0313{1}
59 | ’{1}w{2},w{2}\u0313{1}
60 | '{1}w{2},w{2}\u0313{1}
61 | w',w̓
62 | ̕{1}y{2},y{2}\u0313{1}
63 | ’{1}y{2},y{2}\u0313{1}
64 | '{1}y{2},y{2}\u0313{1}
65 | y',y̓
66 | ̕{1}l{2},l{2}'{1}
67 | l̓,l'
68 | ’{1}l{2},l{2}'{1}
69 | '{1}l{2},l{2}'{1}
70 | dz,dᶻ
71 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/kwk/napa_equiv_uvic.csv:
--------------------------------------------------------------------------------
1 | p/,p̓
2 | p̕,p̓
3 | p’,p̓
4 | t/,t̓
5 | t̕,t̓
6 | t’,t̓
7 | k/,k̓
8 | k̕,k̓
9 | k’,k̓
10 | c/,c̓
11 | c̕,c̓
12 | c’,c̓
13 | ƛ/,ƛ̓
14 | ƛ̕,ƛ̓
15 | ƛ’,ƛ̓
16 | k'ʷ,k̓ʷ
17 | k]ʷ,k̓ʷ
18 | k̕ʷ,k̓ʷ
19 | k’ʷ,k̓ʷ
20 | kʷ̓,k̓ʷ
21 | kʷ',k̓ʷ
22 | kʷ’,k̓ʷ
23 | k'w,k̓ʷ
24 | k]w,k̓ʷ
25 | k̕w,k̓ʷ
26 | k’w,k̓ʷ
27 | kw̓,k̓ʷ
28 | kw',k̓ʷ
29 | kw’,k̓ʷ
30 | q'ʷ,q̓ʷ
31 | q]ʷ,q̓ʷ
32 | q̕ʷ,q̓ʷ
33 | q’ʷ,q̓ʷ
34 | qʷ̓,q̓ʷ
35 | qʷ',q̓ʷ
36 | qʷ’,q̓ʷ
37 | q'w,q̓ʷ
38 | q]w,q̓ʷ
39 | q̕w,q̓ʷ
40 | q’w,q̓ʷ
41 | qw̓,q̓ʷ
42 | qw',q̓ʷ
43 | qw’,q̓ʷ
44 | ǧw,ǧʷ
45 | x̌w,x̌ʷ
46 | kw,kʷ
47 | gw,gʷ
48 | qw,qʷ
49 | xw,xʷ
50 | χw,χʷ
51 | ̕m,m̓
52 | ’m,m̓
53 | 'm,m̓
54 | ̕n,n̓
55 | ’n,n̓
56 | 'n,n̓
57 | ̕w,w̓
58 | ’w,w̓
59 | 'w,w̓
60 | ̕y,y̓
61 | ’y,y̓
62 | 'y,y̓
63 | l̕,l'
64 | l̓,l'
65 | l’,l'
66 | dz,dᶻ
67 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/kwk/umista_equiv.csv:
--------------------------------------------------------------------------------
1 | p/,p̓
2 | p̕,p̓
3 | p’,p̓
4 | t/,t̓
5 | t̕,t̓
6 | t’,t̓
7 | t's,t̕s
8 | t]s,t̕s
9 | t̓s,t̕s
10 | t̕s,t̕s
11 | t’s,t̕s
12 | t{1}s{3}\u0313{2},t{1}\u0315{2}s{3}
13 | t{1}s{3}\u0315{2},t{1}\u0315{2}s{3}
14 | t{1}s{3}'{2},t{1}\u0315{2}s{3}
15 | t{1}s{3}’{2},t{1}\u0315{2}s{3}
16 | t'ɬ,t̕ɬ
17 | t]ɬ,t̕ɬ
18 | t̓ɬ,t̕ɬ
19 | t̕ɬ,t̕ɬ
20 | t’ɬ,t̕ɬ
21 | t{1}ɬ{3}\u0313{2},t{1}\u0315{2}ɬ{3}
22 | t{1}ɬ{3}\u0315{2},t{1}\u0315{2}ɬ{3}
23 | t{1}ɬ{3}'{2},t{1}\u0315{2}ɬ{3}
24 | t{1}ɬ{3}’{2},t{1}\u0315{2}ɬ{3}
25 | k/,k̓
26 | k̕,k̓
27 | k’,k̓
28 | k'w,k̕w
29 | k]w,k̕w
30 | k̓w,k̕w
31 | k̕w,k̕w
32 | k’w,k̕w
33 | k{1}w{3}\u0313{2},k{1}\u0315{2}w{3}
34 | k{1}w{3}\u0315{2},k{1}\u0315{2}w{3}
35 | k{1}w{3}'{2},k{1}\u0315{2}w{3}
36 | k{1}w{3}’{2},k{1}\u0315{2}w{3}
37 | k;,ḵ
38 | ḵ',ḵ̓
39 | ḵ̕,ḵ̓
40 | g;,g̱
41 | k;w,ḵw
42 | ḵ'w,ḵ̕w
43 | k;]w,ḵ̕w
44 | k{1}]{3};{2}w{4},k{1}\u0331{2}\u0315{3}w{4}
45 | ḵ̓w,ḵ̕w
46 | ḵ̕w,ḵ̕w
47 | k{1}\u0315{3}\u0331{2}w{4},k{1}\u0331{2}\u0315{3}w{4}
48 | k{1}\u0313{3}\u0331{2}w{4},k{1}\u0331{2}\u0315{3}w{4}
49 | g;w,g̱w
50 | ’,'
51 | ̕,'
52 | x;,x̱
53 | x;w,x̱w
54 | ̕m,'m
55 | m{2}\u0313{1},'{1}m{2}
56 | ’m,'m
57 | ̕n,'n
58 | n{2}\u0313{1},'{1}n{2}
59 | ’n,'n
60 | ̕w,'w
61 | w{2}\u0313{1},'{1}w{2}
62 | ’w,'w
63 | ̕y,'y
64 | y{2}\u0313{1},'{1}y{2}
65 | ’y,'y
66 | ̕l,'l
67 | l{2}\u0313{1},'{1}l{2}
68 | ’l,'l
69 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/langs.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/g2p/mappings/langs/langs.json.gz
--------------------------------------------------------------------------------
/g2p/mappings/langs/lml/abbreviations.csv:
--------------------------------------------------------------------------------
1 | CONSONANT,t,d,b,bw,p,k,g,g\u0304,n\u0304,n,m,mw,v,vw,l,r,h,s,w
2 | VOWEL,i,a,e,u,o
3 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/lml/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Raga
3 | mappings:
4 | - display_name: Raga to IPA
5 | rules_path: lml_to_ipa.csv
6 | in_lang: lml
7 | out_lang: lml-ipa
8 | case_sensitive: false
9 | norm_form: NFD
10 | rule_ordering: as-written
11 | prevent_feeding: true
12 | abbreviations_path: abbreviations.csv
13 | authors:
14 | - Fineen Davis
15 | - Codrington Hinge
16 | <<: *shared
17 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/lml/lml_to_ipa.csv:
--------------------------------------------------------------------------------
1 | g,ɣ,VOWEL,VOWEL
2 | g,ɣ,\b,VOWEL
3 | ḡ,ŋɡ,,
4 | g,ɡ
5 | n̄,ŋ,,
6 | r,ɹ,,
7 | ai,ɑj,,
8 | ei,ej,,
9 | a,ɑ,,
10 | e,eː,,
11 | o,oː,,
12 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/mic/abbreviations.csv:
--------------------------------------------------------------------------------
1 | SONORANT,i,a,e,u,o,iː,aː,eː,oː,uː,y,m,n,l
2 | CONSONANT,j,k,l,m,n,p,q,s,t,w,y,tʃ,x
3 | OBSTRUENT,k,p,q,s,t,x,tʃ
4 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/mic/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Mi'kmaq
3 | mappings:
4 | - display_name: Mi'kmaq to IPA
5 | rules_path: mic_to_ipa.json
6 | in_lang: mic
7 | out_lang: mic-ipa
8 | case_sensitive: false
9 | norm_form: NFD
10 | rule_ordering: as-written
11 | abbreviations_path: abbreviations.csv
12 | authors:
13 | - Aidan Pine
14 | notes:
15 | - Based on the Francis-Smith orthography
16 | <<: *shared
17 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/moe/README.md:
--------------------------------------------------------------------------------
1 | Authors: Delasie Torkornoo, Bradley Ellert, Laura Russo
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/moe/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Innu-aimun
3 | mappings:
4 | - display_name: Innu-aimun to IPA
5 | in_lang: moe
6 | out_lang: moe-ipa
7 | type: mapping
8 | authors:
9 | - Delasie Torkornoo
10 | - Bradley Ellert
11 | rules_path: moe_to_ipa.json
12 | abbreviations_path: moe_abbs.csv
13 | case_sensitive: false
14 | <<: *shared
15 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/moe/moe_abbs.csv:
--------------------------------------------------------------------------------
1 | VOWEL,a,e,i,o,u
2 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/moe/moe_to_ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "tshish", "out": "tʃʃ"},
3 | {"in": "nish", "out": "nəʃ"},
4 | {"in": "auk", "out": "awk"},
5 | {"in": "shp", "out": "ʃp"},
6 | {"in": "sht", "out": "st"},
7 | {"in": "shk", "out": "ʃk"},
8 | {"in": "tsh", "out": "tʃ"},
9 | {"in": "ikw", "out": "ukw"},
10 | {"in": "akw ", "out": "ukw"},
11 | {"in": "aa", "out": "aː"},
12 | {"in": "ai", "out": "ej"},
13 | {"in": "ii", "out": "iː"},
14 | {"in": "uu", "out": "uː"},
15 | {"in": "sh", "out": "ʃ"},
16 | {"in": "a", "out": "ə"},
17 | {"in": "i", "out": "u", "context_before": "VOWEL", "context_after": "VOWEL"},
18 | {"in": "i", "out": "i"},
19 | {"in": "e", "out": "eː"},
20 | {"in": "u", "out": "u"},
21 | {"in": "h", "out": "h"},
22 | {"in": "ǹ", "out": "l"},
23 | {"in": "ᵘ", "out": "w"},
24 | {"in": "w", "out": "w"},
25 | {"in": "k", "out": "k"},
26 | {"in": "m", "out": "m"},
27 | {"in": "n", "out": "n"},
28 | {"in": "p", "out": "p"},
29 | {"in": "s", "out": "s"},
30 | {"in": "t", "out": "t"}
31 | ]
32 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/moh/README.md:
--------------------------------------------------------------------------------
1 | Language-specific files for Mohawk
2 |
3 | IPA Phoneset:
4 |
5 | ɑ, ˈɑ, ɑː, ɑ́ː, ɑ̀ː, ʌ̃, ˈʌ̃, ʌ̃ː, ʌ̃̀ː, ʌ̃́ː, e, ˈe, èː, éː, i, ˈi, iː, íː, ìː, k, ɡ, kʷ, ɡʷ, kʰʷ, n, n̥, ũ, ˈũ, ṹː, ũ̀ː, o, ˈo, òː, óː, ɽ, ɽ̥, t, d, d͡z, d͡ʒ, t͡s, t͡ʃ, ʃ, s, ʒ, z, w, f, j, ʔ
6 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/moh/abbreviations.csv:
--------------------------------------------------------------------------------
1 | CONSONANT,h,k,n,n̥,r,ɽ,ɽ̥,t,s,w,',t͡s,d͡ʒ,d͡z,t͡ʃ,ʃ,ʒ,d,ɡ,ɡʷ,kʷ,kʰʷ,f,j,z,ʔ
2 | VOWEL,a,á:,à:,a:,à,á,ɑ,ɑ́,ɑ̀,ɑ̀ː,ɑ́ː,ɑː,ʌ̃,ʌ̃́ː,ʌ̃ː,ʌ̃̀ː,ʌ̃̀,ʌ̃́,én:,en,en:,èn:,én,èn,e,è:,é,é:,eː,éː,èː,i,i:,í:,í,ì:,ì,iː,íː,ìː,ón:,òn:,on:,òn,on,ón,ṹː,ũ̀ː,ũ̀,ũ,ṹ,ũː,o,ó,ó:,o:,ò:,ò,óː,oː,òː
3 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/moh/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Kanien'kéha
3 | mappings:
4 | - display_name: Kanien'kéha to IPA
5 | rules_path: moh_to_ipa.json
6 | in_lang: moh-equiv
7 | out_lang: moh-ipa
8 | case_sensitive: false
9 | norm_form: NFC
10 | rule_ordering: as-written
11 | abbreviations_path: abbreviations.csv
12 | authors:
13 | - Aidan Pine
14 | - Akwiratékha' Martin
15 | <<: *shared
16 | - display_name: IPA to Kanien'kéha
17 | rules_path: moh_to_ipa.json
18 | in_lang: moh-ipa
19 | reverse: true
20 | out_lang: moh
21 | case_sensitive: false
22 | norm_form: NFC
23 | rule_ordering: as-written
24 | abbreviations_path: abbreviations.csv
25 | authors:
26 | - Aidan Pine
27 | - Akwiratékha' Martin
28 | <<: *shared
29 | - display_name: Kanien'kéha Equivalencies
30 | rules_path: moh_equiv.json
31 | in_lang: moh
32 | out_lang: moh-equiv
33 | case_sensitive: false
34 | norm_form: NFC
35 | rule_ordering: as-written
36 | abbreviations_path: abbreviations.csv
37 | authors:
38 | - Aidan Pine
39 | <<: *shared
40 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/moh/moh_equiv.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "꞉", "out": ":"},
3 | {"in": "–", "out": "-"},
4 | {"in": "׃", "out": ":"},
5 | {"in": "’", "out": "'"},
6 | {"in": "‘", "out": "'"},
7 | {"in": "òn", "out": "òn:", "context_after": "(CONSONANT)"},
8 | {"in": "èn", "out": "èn:", "context_after": "(CONSONANT)"},
9 | {"in": "on:", "out": "ón:"},
10 | {"in": "en:", "out": "én:"},
11 | {"in": "à", "out": "à:", "context_after": "[^:]"},
12 | {"in": "è", "out": "è:", "context_after": "([^:n]|n(VOWEL))"},
13 | {"in": "ì", "out": "ì:", "context_after": "[^:]"},
14 | {"in": "ò", "out": "ò:", "context_after": "([^:n]|n(VOWEL))"},
15 | {"in": "i:", "out": "í:"},
16 | {"in": "e:", "out": "é:"},
17 | {"in": "a:", "out": "á:"},
18 | {"in": "o:", "out": "ó:"},
19 | {"in": "ό", "out": "ó"},
20 | {"in": ":", "out": ":", "comment": "force the tokenizer to recognize colon as a letter"}
21 | ]
22 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/network.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/g2p/mappings/langs/network.json.gz
--------------------------------------------------------------------------------
/g2p/mappings/langs/norm/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Normalization
3 | mappings:
4 | - display_name: Panphon Normalization
5 | rules_path: panphon_preprocessor.csv
6 | id: panphon_preprocessor
7 | in_lang: ipa
8 | out_lang: ipa
9 | rule_ordering: as-written
10 | authors:
11 | - Patrick Littell
12 | - Eric Joanis
13 | <<: *shared
14 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/norm/panphon_preprocessor.csv:
--------------------------------------------------------------------------------
1 | ʷ,w
2 | ᵐ,m
3 | ⁿ,n
4 | ᶯ,ɳ
5 | ᶮ,ɲ
6 | ᵑ,ŋ
7 | ʲ,j
8 | ͡,
9 | ˈ,
10 | \u030a,\u0325
11 | \u0300,
12 | \u0301,
13 | \u0302,
14 | \u0304,
15 | \u030b,
16 | \u030c,
17 | \u030f,
18 | \u1dc4,
19 | \u1dc5,
20 | \u1dc6,
21 | \u1dc7,
22 | \u1dc8,
23 | ˨,
24 | ˦,
25 | ˧,
26 | ˥,
27 | ˩,
28 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/oji/README.md:
--------------------------------------------------------------------------------
1 | IPA mappings for Ojibwe (Anishinaabemowin) double vowel system.
2 |
3 | Reference: https://ojibwe.lib.umn.edu/about-ojibwe-language
4 | More fun reference: https://www.youtube.com/watch?v=GW0pGtmHJHU
5 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/oji/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Ojibwe
3 | mappings:
4 | - display_name: Ojibwe to IPA
5 | in_lang: oji
6 | out_lang: oji-ipa
7 | authors:
8 | - David Huggins-Daines
9 | type: mapping
10 | rules_path: oji_to_ipa.csv
11 | prevent_feeding: true
12 | rule_ordering: as-written
13 | case_sensitive: false
14 | norm_form: NFC
15 | <<: *shared
16 | - display_name: Anishinaabemowin Syllabics to Romanized
17 | in_lang: oji-syl
18 | out_lang: oji
19 | authors:
20 | - Shankhalika Srikanth
21 | type: mapping
22 | rules_path: oji_syllabics_to_orth.csv
23 | prevent_feeding: true
24 | rule_ordering: as-written
25 | case_sensitive: false
26 | norm_form: NFC
27 | language_name: Ojibwe Syllabics
28 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/oji/oji_syllabics_to_orth.csv:
--------------------------------------------------------------------------------
1 | ᐊ,a
2 | ᐅ,o
3 | ᐁ,e
4 | ᐃ,i
5 | ᐘ,wa
6 | ᐏ,wi
7 | ᐍ,we
8 | ᐓ,wo
9 | ᐤ,w
10 | ᐸ,pa
11 | ᐳ,po
12 | ᐱ,pi
13 | ᐯ,pe
14 | ᑅ,pwa
15 | ᑁ,pwo
16 | ᐽ,pwi
17 | ᐻ,pwe
18 | ᑊ,p
19 | ᐦ,h
20 | ᑕ,ta
21 | ᑐ,to
22 | ᑎ,ti
23 | ᑌ,te
24 | ᑢ,twa
25 | ᑞ,two
26 | ᑚ,twi
27 | ᑘ,twe
28 | ᐟ,t
29 | ᒋ,chi
30 | ᒍ,cho
31 | ᘃ,che
32 | ᘂ,cha
33 | ᒝ,chwa
34 | ᒙ,chwo
35 | ᒕ,chwi
36 | ᒓ,chwe
37 | ᐨ,ch
38 | ᒪ,ma
39 | ᒥ,mi
40 | ᒧ,mo
41 | ᒣ,me
42 | ᒷ,mwa
43 | ᒳ,mwo
44 | ᒯ,mwi
45 | ᒭ,mwe
46 | ᒼ,m
47 | ᓇ,na
48 | ᓂ,ni
49 | ᓄ,no
50 | ᓀ,ne
51 | ᓌ,nwa
52 | ᓄᐧ,nwo
53 | ᓂᐧ,nwi
54 | ᓊ,nwe
55 | ᐣ,n
56 | ᑲ,ka
57 | ᑭ,ki
58 | ᑫ,ke
59 | ᑯ,ko
60 | ᑿ,kwa
61 | ᑻ,kwo
62 | ᑷ,kwi
63 | ᑵ,kwe
64 | ᐠ,k
65 | ᓴ,sa
66 | ᓯ,si
67 | ᓱ,so
68 | ᓭ,se
69 | ᔁ,swa
70 | ᓽ,swo
71 | ᓹ,swi
72 | ᓷ,swe
73 | ᐢ,s
74 | ᔕ,sha
75 | ᔑ,shi
76 | ᔓ,sho
77 | ᔐ,she
78 | ᔢ,shwa
79 | ᔞ,shwo
80 | ᔚ,shwi
81 | ᔘ,shwe
82 | ᐡ,sh
83 | ᔭ,ya
84 | ᔨ,yi
85 | ᔪ,yo
86 | ᔦ,ye
87 | ᔺ,ywa
88 | ᔶ,ywo
89 | ᔲ,ywi
90 | ᔰ,ywe
91 | ᔾ,y
92 | ᓬ,l
93 | ᕒ,r
94 | ᙮,.
95 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/oji/oji_to_ipa.csv:
--------------------------------------------------------------------------------
1 | ng,ŋ,,\b
2 | aa,ɑː,,
3 | ii,iː,,
4 | oo,oː,,
5 | e,eː,,
6 | a,ʌ,,
7 | i,i,,
8 | o,o,,
9 | b,b,,
10 | ch,tʃ,,
11 | d,d,,
12 | g,ɡ,,
13 | h,h,,
14 | ',ʔ,,
15 | ’,ʔ,,
16 | j,dʒ,,
17 | k,k,,
18 | m,m,,
19 | n,n,,
20 | p,p,,
21 | sh,ʃ,,
22 | s,s,,
23 | t,t,,
24 | w,w,,
25 | y,j,,
26 | zh,ʒ,,
27 | z,z,,
28 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/oka/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: nsyilxcən
3 | mappings:
4 | - display_name: nsyilxcən to IPA
5 | rules_path: oka_to_ipa.csv
6 | in_lang: oka-equiv
7 | out_lang: oka-ipa
8 | authors:
9 | - Craig Carpenter
10 | type: mapping
11 | prevent_feeding: true
12 | rule_ordering: apply-longest-first
13 | norm_form: NFC
14 | <<: *shared
15 | - display_name: Unicode Equivalencies
16 | in_lang: oka
17 | out_lang: oka-equiv
18 | authors:
19 | - Eric Joanis
20 | rules_path: oka_equiv.csv
21 | norm_form: NFD
22 | <<: *shared
23 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/oka/oka_equiv.csv:
--------------------------------------------------------------------------------
1 | ’,'
2 | ‘,'
3 | ˊ,'
4 | `,'
5 | ́,'
6 | ̒,'
7 | ̓,'
8 | ̔,'
9 | ̕,'
10 | ̛,'
11 | ʻ,'
12 | ʼ,'
13 | ʽ,'
14 | ʹ,'
15 | ','
16 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/oka/oka_to_ipa.csv:
--------------------------------------------------------------------------------
1 | a,a,,
2 | a',ˈa,,
3 | c,ts,,
4 | c',tsʼ,,
5 | ə,ə,,
6 | e,ə,,
7 | h,h,,
8 | ḥ,h,,
9 | i,i,,
10 | i',ˈi,,
11 | ɣ,ɣ,,
12 | ɣ',ɣˀ,,
13 | k,k,,
14 | k',kʼ,,
15 | kʷ,kʷ,,
16 | k'ʷ,kʼʷ,,
17 | l,l,,
18 | l',lˀ,,
19 | ɬ,ɬ,,
20 | ł,ɬ,,
21 | ƛ',tɬʼ,,
22 | m,m,,
23 | m',mˀ,,
24 | n,n,,
25 | n',nˀ,,
26 | o,o,,
27 | o',ˈo,,
28 | p,p,,
29 | p',pʼ,,
30 | q,q,,
31 | q',qʼ,,
32 | qʷ,qʷ,,
33 | q'ʷ,qʼʷ,,
34 | r,ɾ,,
35 | r',ɾˀ,,
36 | s,s,,
37 | t,t,,
38 | t',tʼ,,
39 | ť,tʼ,,
40 | ʕ,ʕ,,
41 | ʕ',ʕˀ,,
42 | u,u,,
43 | u',ˈu,,
44 | ʕʷ,ʕʷ,,
45 | ʕ'ʷ,ʕˀʷ,,
46 | x̌,χ,,
47 | x,x,,
48 | w,w,,
49 | y,j,,
50 | x̌ʷ,χʷ,,
51 | xʷ,xʷ,,
52 | w',wˀ,,
53 | w,w,,
54 | y',jˀ,,
55 | ʷ,ʷ,,
56 | ˀ,ˀ,,
57 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/sal/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Salishan languages with APA-based writing systems
3 | mappings:
4 | - display_name: Salishan APA to IPA
5 | rules_path: sal_apa_to_ipa.csv
6 | in_lang: sal-apa-equiv
7 | out_lang: sal-ipa
8 | authors:
9 | - Tony Mattina
10 | - Eric Joanis
11 | type: mapping
12 | prevent_feeding: true
13 | rule_ordering: apply-longest-first
14 | norm_form: NFD
15 | <<: *shared
16 | - display_name: Unicode Equivalencies
17 | in_lang: sal-apa
18 | out_lang: sal-apa-equiv
19 | authors:
20 | - Eric Joanis
21 | rules_path: sal_equiv.csv
22 | norm_form: NFD
23 | <<: *shared
24 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/sal/sal_apa_to_ipa.csv:
--------------------------------------------------------------------------------
1 | a,a,,
2 | a',ˈa,,
3 | á,ˈa,,
4 | ạ,a̠,,
5 | ạ́,ˈa̠,,
6 | æ,æ,,
7 | æ',ˈæ,,
8 | c,ts,,
9 | c̣,c̠,,
10 | θ,θ,,
11 | ᶿ,ᶿ,,
12 | c',tsʼ,,
13 | c̓,tsʼ,,
14 | č,ʧ,,
15 | č̓,ʧʼ,,
16 | ǰ,ʤ,,
17 | ɛ,ɛ,,
18 | ɛ́,ˈɛ,,
19 | ʌ,ʌ,,
20 | e,ə,,
21 | é,ˈə,,
22 | ẹ́,ˈə̠,,
23 | ə,ə,,
24 | ə́,ˈə,,
25 | ə̣́,ˈə̠,,
26 | ʌ́,ˈʌ,,
27 | g,ɡ,,
28 | gʷ,ɡʷ,,
29 | h,h,,
30 | ḥ,h,,
31 | ḥʷ,h̠ʷ,,
32 | i,i,,
33 | i',ˈi,,
34 | ɣ,ɣ,,
35 | ɣ',ɣˀ,,
36 | k,k,,
37 | k',kʼ,,
38 | kʷ,kʷ,,
39 | k'ʷ,kʼʷ,,
40 | l,l,,
41 | l',lˀ,,
42 | ḷ,l̠,,
43 | ɬ,ɬ,,
44 | ł,ɬ,,
45 | ƛ',tɬʼ,,
46 | m,m,,
47 | m',mˀ,,
48 | n,n,,
49 | n',nˀ,,
50 | ṇ,n̠,,
51 | ŋ,ŋ,,
52 | o,o,,
53 | o',ˈo,,
54 | ó,ˈo,,
55 | ɔ,ɔ,,
56 | ɔ́,ˈɔ,,
57 | p,p,,
58 | p',pʼ,,
59 | q,q,,
60 | q',qʼ,,
61 | qʷ,qʷ,,
62 | q'ʷ,qʼʷ,,
63 | r,ɾ,,
64 | r',ɾˀ,,
65 | s,s,,
66 | ṣ,s̠,,
67 | š,ʃ,,
68 | t,t,,
69 | t',tʼ,,
70 | ť,tʼ,,
71 | ʕ,ʕ,,
72 | ʕ',ʕˀ,,
73 | u,u,,
74 | u',ˈu,,
75 | ú,ˈu,,
76 | ụ,u̠,,
77 | ụ́,ˈu̠,,
78 | ʕʷ,ʕʷ,,
79 | ʕ'ʷ,ʕˀʷ,,
80 | x̌,χ,,
81 | x,x,,
82 | x̣,x̠,,
83 | x̣ʷ,x̠ʷ,,
84 | w,w,,
85 | y,j,,
86 | x̌ʷ,χʷ,,
87 | xʷ,xʷ,,
88 | w',wˀ,,
89 | w,w,,
90 | y',jˀ,,
91 | ʷ,ʷ,,
92 | ʡ,ʡ,,
93 | ʔ,ʔ,,
94 | ˀ,ˀ,,
95 | z',zˀ,,
96 | z̓,zˀ,,
97 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/sal/sal_equiv.csv:
--------------------------------------------------------------------------------
1 | ’,'
2 | ‘,'
3 | \u0315,'
4 | \u0301{1}\u0323{2},\u0323{2}\u0301{1}
5 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/see/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Seneca
3 | mappings:
4 | - display_name: Seneca to IPA
5 | rules_path: see_to_ipa.csv
6 | in_lang: see
7 | out_lang: see-ipa
8 | case_sensitive: false
9 | norm_form: NFD
10 | rule_ordering: as-written
11 | authors:
12 | - Aidan Pine
13 | <<: *shared
14 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/see/see_to_ipa.csv:
--------------------------------------------------------------------------------
1 | ö:,o\u0303
2 | ë:,e\u0303
3 | ä:,ɑ\u0303
4 | ö,o\u0303
5 | ë,e\u0303
6 | ä,ɑ\u0303
7 | a:,ɑ
8 | o:,o
9 | u:,u
10 | e:,e
11 | i:,i
12 | a,ɑ
13 | e,e
14 | i,i
15 | o,o
16 | u,u
17 | j,d͡ʒ
18 | z,z
19 | t,t
20 | d,d
21 | g,ɡ
22 | k,k
23 | tš,t͡ʃ
24 | s,s
25 | w,w
26 | ',ʔ
27 | ’,ʔ
28 | š,ʃ
29 | y,j
30 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/srs/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Tsuut'ina
3 | mappings:
4 | - display_name: Tsuut'ina to IPA
5 | rules_path: srs_to_ipa.json
6 | in_lang: srs
7 | out_lang: srs-ipa
8 | rule_ordering: as-written
9 | norm_form: NFD
10 | case_sensitive: false
11 | authors:
12 | - Christopher Cox
13 | <<: *shared
14 | - display_name: Tsuut'ina IPA to English IPA
15 | rules_path: srs_ipa_to_eng_ipa.json
16 | in_lang: srs-ipa
17 | out_lang: eng-ipa
18 | norm_form: NFD
19 | rule_ordering: as-written
20 | case_sensitive: false
21 | authors:
22 | - Christopher Cox
23 | <<: *shared
24 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/srs/srs_ipa_to_eng_ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | { "in": "pʰ", "out": "p" },
3 | { "in": "p", "out": "b" },
4 | { "in": "m", "out": "m" },
5 | { "in": "n", "out": "n" },
6 | { "in": "t͡ɬʼ", "out": "ᵗs" },
7 | { "in": "t͡ɬʰ", "out": "ᵗs" },
8 | { "in": "t͡ɬ", "out": "dz" },
9 | { "in": "ɬ", "out": "s" },
10 | { "in": "l", "out": "l" },
11 | { "in": "t͡sʼ", "out": "ᵗs" },
12 | { "in": "t͡sʰ", "out": "ᵗs" },
13 | { "in": "t͡s", "out": "dz" },
14 | { "in": "t͡ʃʼ", "out": "ᵗʃ" },
15 | { "in": "t͡ʃʰ", "out": "ᵗʃ" },
16 | { "in": "t͡ʃ", "out": "dʒ" },
17 | { "in": "ʃ", "out": "ʃ" },
18 | { "in": "ʒ", "out": "ʒ" },
19 | { "in": "s", "out": "s" },
20 | { "in": "z", "out": "z" },
21 | { "in": "tʼ", "out": "ᵗ" },
22 | { "in": "tʰ", "out": "ᵗ" },
23 | { "in": "t", "out": "d" },
24 | { "in": "kʼʷ", "out": "ᵏ" },
25 | { "in": "kʼ", "out": "ᵏ" },
26 | { "in": "kʰ", "out": "ᵏ" },
27 | { "in": "kʷ", "out": "ɡ" },
28 | { "in": "k", "out": "ɡ" },
29 | { "in": "ɰ", "out": "ɹ" },
30 | { "in": "x", "out": "h" },
31 | { "in": "h", "out": "h" },
32 | { "in": "ʔ", "out": "ʔ" },
33 | { "in": "ᵗ", "out": "t" },
34 | { "in": "ᵏ", "out": "k" },
35 | { "in": "\u0300", "out": "" },
36 | { "in": "\u0301", "out": "" },
37 | { "in": "\u0302", "out": "" },
38 | { "in": "\u0304", "out": "" },
39 | { "in": "\u030c", "out": "" },
40 | { "in": "\u1dc4", "out": "" },
41 | { "in": "\u1dc5", "out": "" },
42 | { "in": "\u1dc6", "out": "" },
43 | { "in": "\u1dc7", "out": "" },
44 | { "in": "aj", "out": "aɪ" },
45 | { "in": "ɛj", "out": "eɪ" },
46 | { "in": "ɔj", "out": "ɔɪ" },
47 | { "in": "ʊj", "out": "ɔɪ" },
48 | { "in": "j", "out": "j" },
49 | { "in": "ɑw", "out": "aʊ" },
50 | { "in": "ʊw", "out": "oʊ" },
51 | { "in": "w", "out": "w" },
52 | { "in": "ɑː", "out": "ɑ" },
53 | { "in": "ɪː", "out": "i" },
54 | { "in": "ɔː", "out": "oː" },
55 | { "in": "ʊː", "out": "u" },
56 | { "in": "ʌ", "out": "ʌ" },
57 | { "in": "ɪ", "out": "ɪ" },
58 | { "in": "ɔ", "out": "ɔ" },
59 | { "in": "ʊ", "out": "ʊ" }
60 | ]
61 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/str/README.md:
--------------------------------------------------------------------------------
1 | Language-specific files for SENĆOŦEN
2 |
3 | Note: SENĆOŦEN does not seem to have a specific ISO639-3 code, but
4 | 'str' is the general code for Straits Salish. See:
5 |
6 | https://iso639-3.sil.org/
7 | https://norrisresearch.com/lang_rept/NRI_Rept_Mar2016_Appendices.pdf
8 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/str/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: SENĆOŦEN
3 | mappings:
4 | - display_name: SENĆOŦEN equivalency
5 | rules_path: str_equiv.json
6 | in_lang: str
7 | out_lang: str-equiv
8 | rule_ordering: as-written
9 | norm_form: NFD
10 | case_sensitive: true
11 | authors:
12 | - Shankhalika Srikanth
13 | <<: *shared
14 | - display_name: SENĆOŦEN to IPA
15 | rules_path: str_to_ipa.json
16 | in_lang: str-equiv
17 | out_lang: str-ipa
18 | rule_ordering: as-written
19 | norm_form: NFD
20 | case_sensitive: true
21 | authors:
22 | - Aidan Pine
23 | <<: *shared
24 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/str/str_equiv.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "A\u0301", "out": "Á"},
3 | {"in": "C\u0301", "out": "Ć"},
4 | {"in": "I\u0301", "out": "Í"},
5 | {"in": "\u0335K", "out": "₭"},
6 | {"in": "Ḱ", "out": "K\u0301"},
7 | {"in": "Ḵ", "out": "K\u0331"},
8 | {"in": "K\u0332", "out": "K\u0331"},
9 | {"in": "\u0335L", "out": "Ƚ"},
10 | {"in": "Ṉ", "out": "N\u0331"},
11 | {"in": "N\u0332", "out": "N\u0331"},
12 | {"in": "S\u0301", "out": "Ś"},
13 | {"in": "T\u0331", "out": "Ṯ"},
14 | {"in": "T\u0332", "out": "Ṯ"},
15 | {"in": "\u0335T", "out": "Ŧ"},
16 | {"in": "W\u0332", "out": "W\u0331"},
17 | {"in": "X\u0332", "out": "X\u0331"},
18 | {"in": " \u0326", "out": "¸"},
19 | {"in": " \u0327", "out": "¸"}
20 | ]
21 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/str/str_to_ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "Á", "out": "e"},
3 | {"in": "Ⱥ", "out": "eː"},
4 | {"in": "A", "out": "æ"},
5 | {"in": "B", "out": "pʼ"},
6 | {"in": "Ć", "out": "t͡ʃ"},
7 | {"in": "Ȼ", "out": "k̟ʷ"},
8 | {"in": "C", "out": "k̟"},
9 | {"in": "D", "out": "tʼ"},
10 | {"in": "E", "out": "ʌ"},
11 | {"in": "H", "out": "h"},
12 | {"in": "Í", "out": "aɪ"},
13 | {"in": "I", "out": "i"},
14 | {"in": "J", "out": "t͡ʃʼ"},
15 | {"in": "Ꝁ", "out": "qʷʼ"},
16 | {"in": "₭", "out": "qʷʼ"},
17 | {"in": "K\u0301", "out": "qʷ"},
18 | {"in": "K\u0331", "out": "q"},
19 | {"in": "K", "out": "qʼ"},
20 | {"in": "L", "out": "l"},
21 | {"in": "Ƚ", "out": "ɬ"},
22 | {"in": "M", "out": "m"},
23 | {"in": "N\u0331", "out": "ŋ"},
24 | {"in": "N", "out": "n"},
25 | {"in": "O", "out": "ɑ"},
26 | {"in": "P", "out": "p"},
27 | {"in": "Q", "out": "kʷʼ"},
28 | {"in": "Ś", "out": "ʃ"},
29 | {"in": "S", "out": "s"},
30 | {"in": "Ⱦ", "out": "t͡s̪"},
31 | {"in": "Ṯ", "out": "t͡ɬʼ"},
32 | {"in": "Ŧ", "out": "θ"},
33 | {"in": "T", "out": "t"},
34 | {"in": "U", "out": "u"},
35 | {"in": "W\u0331", "out": "xʷ"},
36 | {"in": "W", "out": "w"},
37 | {"in": "X\u0331", "out": "χʷ"},
38 | {"in": "X", "out": "χ"},
39 | {"in": "Y", "out": "j"},
40 | {"in": "Z", "out": "d͡z"},
41 | {"in": "¸", "out": "ʔ", "comment": "cedilla is now the community standard for glottal stop"},
42 | {"in": ",", "out": "ʔ", "comment": "comma still occurs, though. TODO: disambiguate punctuation vs glottal stop"},
43 | {"in": "s", "out": "s"}
44 | ]
45 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/tau/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Upper Tanana
3 | mappings:
4 | - display_name: Upper Tanana Equivalencies
5 | in_lang: tau
6 | out_lang: tau-equiv
7 | authors:
8 | - Sabrina Yu
9 | type: mapping
10 | rules_path: tau_equiv.json
11 | prevent_feeding: false
12 | rule_ordering: as-written
13 | case_sensitive: false
14 | norm_form: NFD
15 | <<: *shared
16 | - display_name: Upper Tanana to IPA
17 | in_lang: tau-equiv
18 | out_lang: tau-ipa
19 | authors:
20 | - Sabrina Yu
21 | type: mapping
22 | rules_path: tau_to_ipa.json
23 | prevent_feeding: true
24 | rule_ordering: as-written
25 | case_sensitive: false
26 | norm_form: NFD
27 | <<: *shared
28 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/tau/tau_equiv.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "in": "\u2019",
4 | "out": "\u02bc"
5 | },
6 | {
7 | "in": "\u0027",
8 | "out": "\u02bc"
9 | },
10 | {
11 | "in": "\u0332",
12 | "out": ""
13 | }
14 | ]
15 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/tce/README.md:
--------------------------------------------------------------------------------
1 | IPA mappings for Southern Tutchone with resources from the Yukon Native Language Centre
2 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/tce/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Southern Tutchone
3 | mappings:
4 | - display_name: Southern Tutchone equivalencies
5 | in_lang: tce
6 | out_lang: tce-equiv
7 | authors:
8 | - Shankhalika Srikanth
9 | type: mapping
10 | rules_path: tce_equiv.csv
11 | prevent_feeding: false
12 | rule_ordering: as-written
13 | case_sensitive: false
14 | norm_form: NFD
15 | <<: *shared
16 | - display_name: Southern Tutchone to IPA
17 | in_lang: tce-equiv
18 | out_lang: tce-ipa
19 | authors:
20 | - Shankhalika Srikanth
21 | type: mapping
22 | rules_path: tce_to_ipa.csv
23 | prevent_feeding: true
24 | rule_ordering: as-written
25 | case_sensitive: false
26 | norm_form: NFD
27 | <<: *shared
28 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/tce/tce_equiv.csv:
--------------------------------------------------------------------------------
1 | \u0331,\u0332
2 | \u0149,\u0027
3 | \u02BC,\u0027
4 | \u055A,\u0027
5 | \uFF07,\u0027
6 | ’,'
7 | ‘,'
8 | ˊ,'
9 | `,'
10 | ʻ,'
11 | ʼ,'
12 | ʽ,'
13 | ʹ,'
14 | \u02CA,\u0301
15 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/tce/tce_to_ipa.csv:
--------------------------------------------------------------------------------
1 | b,p
2 | tth',tθʼ
3 | tth,tθʰ
4 | th,θ
5 | tl',tɬʼ
6 | tl,tɬʰ
7 | ts',tsʼ
8 | ts,tsʰ
9 | t',tʼ
10 | t,tʰ
11 | ddh,tθ
12 | dh,ð
13 | dz,ts
14 | dl,tɬ
15 | d,t
16 | zh,ʒ
17 | ch',tʃʼ
18 | ch,tʃʰ
19 | j,tʃ
20 | sh,ʃ
21 | ghw,ɣʷ
22 | gh,ɣ
23 | gw,kʷ
24 | khw,xʷ
25 | k'w,kʼʷ
26 | kh,x
27 | kw,kʰʷ
28 | k',kʼ
29 | k,kʰ
30 | r,ɹ
31 | ',ʔ
32 | ł,ɬ
33 | mb,ᵐp
34 | nd,ⁿt
35 | nj,ⁿtʃ
36 | \u0301\u0328,\u0303
37 | \u0300\u0328,\u0303
38 | \u0304\u0328,\u0303
39 | \u030C\u0328,\u0303
40 | \u0302\u0328,\u0303
41 | \u0301,
42 | \u0300,
43 | \u0304,
44 | \u030C,
45 | \u0302,
46 | e\u0328,ɛ̃
47 | ü\u0328,ʉ̃
48 | ä\u0328w,ə̃w
49 | a\u0328y,ãi
50 | ä\u0328,ʌ̃
51 | a\u0328,ɑ̃
52 | \u0328,\u0303
53 | e,ɛ
54 | ü,ʉ
55 | äw,əw
56 | ay,ai
57 | ä,ʌ
58 | a,ɑ
59 | o,o
60 | y,j
61 | g,\u0261
62 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/tgx/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Tagish
3 | mappings:
4 | - display_name: Tagish to IPA
5 | rules_path: tgx_to_ipa.json
6 | in_lang: tgx
7 | out_lang: tgx-ipa
8 | rule_ordering: apply-longest-first
9 | norm_form: NFD
10 | case_sensitive: false
11 | authors:
12 | - Christopher Cox
13 | <<: *shared
14 | - display_name: Tagish IPA to English IPA
15 | rules_path: tgx_ipa_to_eng_ipa.json
16 | in_lang: tgx-ipa
17 | out_lang: eng-ipa
18 | norm_form: NFD
19 | rule_ordering: apply-longest-first
20 | case_sensitive: false
21 | authors:
22 | - Christopher Cox
23 | <<: *shared
24 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/tli/README.md:
--------------------------------------------------------------------------------
1 | IPA mappings for Tlingit
2 | with resources from the Yukon Native Language Centre
3 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/tli/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Tlingit
3 | mappings:
4 | - display_name: Tlingit Equivalencies
5 | in_lang: tli
6 | out_lang: tli-equiv
7 | authors:
8 | - Shankhalika Srikanth
9 | type: mapping
10 | rules_path: tli_equiv.csv
11 | prevent_feeding: false
12 | rule_ordering: as-written
13 | case_sensitive: false
14 | norm_form: NFD
15 | <<: *shared
16 | - display_name: Tlingit to IPA
17 | in_lang: tli-equiv
18 | out_lang: tli-ipa
19 | authors:
20 | - Shankhalika Srikanth
21 | type: mapping
22 | rules_path: tli_to_ipa.csv
23 | prevent_feeding: true
24 | rule_ordering: as-written
25 | case_sensitive: false
26 | norm_form: NFD
27 | <<: *shared
28 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/tli/tli_equiv.csv:
--------------------------------------------------------------------------------
1 | \u0331,\u0332,,
2 | \u0149,\u0027,,
3 | \u02BC,\u0027,,
4 | \u055A,\u0027,,
5 | \uFF07,\u0027,,
6 | ’,'
7 | ‘,'
8 | ˊ,'
9 | `,'
10 | ʻ,'
11 | ʼ,'
12 | ʽ,'
13 | ʹ,'
14 | ','
15 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/tli/tli_to_ipa.csv:
--------------------------------------------------------------------------------
1 | p,pʰ,,
2 | b,p,,
3 | d,t,,
4 | t,tʰ,,[^'sl]
5 | l,ɬʰ,t,[^']
6 | l,ɬ,,
7 | ł,ɬ,,
8 | sh,ʃ,,
9 | z,s,,
10 | s,sʰ,t,[^']
11 | j,tʃ,,
12 | ch,tʃʰ,,[^']
13 | ch,tʃ,,
14 | c,tʃʰ,,[^']
15 | c,tʃ,,
16 | w,ʷ,[x'kgh\.𝚐̲𝚔̲𝚡̲̲],
17 | k,kʰ,,[^']
18 | x,x,,
19 | g,k,,
20 | ÿ,ɰ,,
21 | y,j,,
22 | é,e,,
23 | á,a,,
24 | ó,o,,
25 | ú,u,,
26 | í,i,,
27 | è,e,,
28 | à,a,,
29 | ì,i,,
30 | ò,o,,
31 | ù,u,,
32 | ê,e,,
33 | â,a,,
34 | î,i,,
35 | ô,o,,
36 | û,u,,
37 | \u0061\u0328,ã,,
38 | \u0065\u0328,ẽ,,
39 | \u0069\u0328,ĩ,,
40 | \u006F\u0328,õ,,
41 | \u0075\u0328,ũ,,
42 | ee,iː,,
43 | ei,eː,,
44 | aa,aː,,
45 | oo,uː,,
46 | 𝚘̲o,oː,,
47 | 𝚘̲,o,,
48 | 𝚡̲,χ,,
49 | \.,ʔ,,\S
50 | 𝚐̲,q,,
51 | 𝚔̲,q,,'
52 | 𝚔̲,qʰ,,[^']
53 | 𝚕̲,l,,
54 | ',ʼ
55 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/ttm/README.md:
--------------------------------------------------------------------------------
1 | IPA mapping for Northern Tutchone with files from the Yukon Native Language Centre
2 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/ttm/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Northern Tutchone
3 | mappings:
4 | - display_name: Northern Tutchone equivalencies
5 | in_lang: ttm
6 | out_lang: ttm-equiv
7 | authors:
8 | - Shankhalika Srikanth
9 | type: mapping
10 | rules_path: ttm_equiv.csv
11 | prevent_feeding: false
12 | rule_ordering: as-written
13 | case_sensitive: false
14 | norm_form: NFD
15 | <<: *shared
16 | - display_name: Northern Tutchone to IPA
17 | in_lang: ttm-equiv
18 | out_lang: ttm-ipa
19 | authors:
20 | - Shankhalika Srikanth
21 | type: mapping
22 | rules_path: ttm_to_ipa.csv
23 | prevent_feeding: true
24 | rule_ordering: as-written
25 | case_sensitive: false
26 | norm_form: NFD
27 | <<: *shared
28 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/ttm/ttm_equiv.csv:
--------------------------------------------------------------------------------
1 | \u0331,\u0332
2 | \u0149,\u0027
3 | \u02BC,\u0027
4 | \u055A,\u0027
5 | \uFF07,\u0027
6 | ’,'
7 | ‘,'
8 | ˊ,'
9 | `,'
10 | ʻ,'
11 | ʼ,'
12 | ʽ,'
13 | ʹ,'
14 | \u02CA,\u0301
15 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/ttm/ttm_to_ipa.csv:
--------------------------------------------------------------------------------
1 | b,p
2 | tth',tθʼ
3 | tth,tθʰ
4 | th,θ
5 | tl',tɬʼ
6 | tl,tɬʰ
7 | ts',tsʼ
8 | ts,tsʰ
9 | t',tʼ
10 | t,tʰ
11 | ddh,tθ
12 | dh,ð
13 | dz,ts
14 | dl,tɬ
15 | d,t
16 | zh,ʒ
17 | nj,ⁿtʃ
18 | ch',tʃʼ
19 | ch,tʃʰ
20 | j,tʃ
21 | sh,ʃ
22 | ghw,ɣʷ
23 | gh,ɣ
24 | gw,kʷ
25 | khw,xʷ
26 | k'w,kʼʷ
27 | kh,x
28 | kw,kʰʷ
29 | k',kʼ
30 | k,kʰ
31 | g,k
32 | r,ɹ
33 | ',ʔ
34 | ł,ɬ
35 | mb,ᵐp
36 | nd,ⁿt
37 | n,ŋ,i
38 | n,ŋ,u:
39 | \u0301\u0328,\u0303
40 | \u0304\u0328,\u0303
41 | \u030C\u0328,\u0303
42 | \u0301,
43 | \u0304,
44 | \u030C,
45 | a\u0328i,ʌ̃ɪ
46 | a\u0328u,ʌ̃ʊ
47 | a\u0328e,æ̃
48 | ä\u0328,ʌ̃
49 | a\u0328,ɑ̃
50 | \u0328,\u0303
51 | ai,ʌɪ
52 | au,ʌʊ
53 | ae,æ
54 | ä,ʌ
55 | a,ɑ
56 | y,j
57 | o,o
58 | e,e
59 | g,\u0261
60 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/und/README.md:
--------------------------------------------------------------------------------
1 | Fallback resources for an unknown language or for cases where other G2P solutions
2 | have failed. ("und" is a special ISO 639-3 code for an undetermined language.)
3 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/und/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Undetermined
3 | mappings:
4 | - display_name: Undetermined ASCII to IPA
5 | rules_path: und_to_ipa.json
6 | in_lang: und-ascii
7 | out_lang: und-ipa
8 | norm: NFD
9 | case_sensitive: false
10 | escape_special: false
11 | authors:
12 | - Patrick Littell
13 | <<: *shared
14 | - display_name: Undetermined IPA to English IPA
15 | rules_path: und_ipa_to_eng_ipa.json
16 | in_lang: und-ipa
17 | out_lang: eng-ipa
18 | rule_ordering: apply-longest-first
19 | authors:
20 | - Patrick Littell
21 | <<: *shared
22 | - display_name: Undetermined IPA to English IPA
23 | rules_path: und_ipa_to_eng_ipa.json
24 | in_lang: und-ipa
25 | out_lang: hamming-eng-ipa
26 | rule_ordering: apply-longest-first
27 | authors:
28 | - Patrick Littell
29 | <<: *shared
30 | - display_name: Undetermined Unicode to ASCII
31 | type: unidecode
32 | norm: NFD
33 | in_lang: und
34 | out_lang: und-ascii
35 | authors:
36 | - Eric Joanis
37 | <<: *shared
38 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/und/und_ipa_to_eng_ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "out": "ɑ",
4 | "in": "a"
5 | },
6 | {
7 | "out": "b",
8 | "in": "b"
9 | },
10 | {
11 | "out": "d",
12 | "in": "d"
13 | },
14 | {
15 | "out": "eː",
16 | "in": "e"
17 | },
18 | {
19 | "out": "f",
20 | "in": "f"
21 | },
22 | {
23 | "out": "h",
24 | "in": "h"
25 | },
26 | {
27 | "out": "i",
28 | "in": "i"
29 | },
30 | {
31 | "out": "j",
32 | "in": "j"
33 | },
34 | {
35 | "out": "k",
36 | "in": "k"
37 | },
38 | {
39 | "out": "l",
40 | "in": "l"
41 | },
42 | {
43 | "out": "m",
44 | "in": "m"
45 | },
46 | {
47 | "out": "n",
48 | "in": "n"
49 | },
50 | {
51 | "out": "oː",
52 | "in": "o"
53 | },
54 | {
55 | "out": "p",
56 | "in": "p"
57 | },
58 | {
59 | "out": "k",
60 | "in": "q"
61 | },
62 | {
63 | "out": "ɾ",
64 | "in": "r"
65 | },
66 | {
67 | "out": "s",
68 | "in": "s"
69 | },
70 | {
71 | "out": "t",
72 | "in": "t"
73 | },
74 | {
75 | "out": "ts",
76 | "in": "t͡s"
77 | },
78 | {
79 | "out": "u",
80 | "in": "u"
81 | },
82 | {
83 | "out": "v",
84 | "in": "v"
85 | },
86 | {
87 | "out": "w",
88 | "in": "w"
89 | },
90 | {
91 | "out": "k",
92 | "in": "x"
93 | },
94 | {
95 | "out": "z",
96 | "in": "z"
97 | },
98 | {
99 | "out": "ə",
100 | "in": "ə"
101 | },
102 | {
103 | "out": "ɡ",
104 | "in": "ɡ"
105 | },
106 | {
107 | "out": "ʒ",
108 | "in": "ʒ"
109 | },
110 | {
111 | "out": "ʔ",
112 | "in": "ʔ"
113 | }
114 | ]
115 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/und/und_to_ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | { "in": "a", "out": "a" },
3 | { "in": "b", "out": "b" },
4 | { "in": "c", "out": "t͡ʃ" },
5 | { "in": "d", "out": "d" },
6 | { "in": "e", "out": "e" },
7 | { "in": "f", "out": "f" },
8 | { "in": "g", "out": "ɡ" },
9 | { "in": "h", "out": "h" },
10 | { "in": "i", "out": "i" },
11 | { "in": "j", "out": "ʒ" },
12 | { "in": "k", "out": "k" },
13 | { "in": "l", "out": "l" },
14 | { "in": "m", "out": "m" },
15 | { "in": "n", "out": "n" },
16 | { "in": "o", "out": "o" },
17 | { "in": "p", "out": "p" },
18 | { "in": "q", "out": "q" },
19 | { "in": "r", "out": "r" },
20 | { "in": "s", "out": "s" },
21 | { "in": "t", "out": "t" },
22 | { "in": "u", "out": "u" },
23 | { "in": "v", "out": "v" },
24 | { "in": "w", "out": "w" },
25 | { "in": "x", "out": "x" },
26 | { "in": "y", "out": "j" },
27 | { "in": "z", "out": "z" },
28 | { "in": "@", "out": "ə" },
29 | { "in": "\\?", "out": "ʔ" },
30 | { "in": "'", "out": "ʔ" },
31 | { "in": ",", "out": "ʔ" },
32 | { "in": ":", "out": "" }
33 | ]
34 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/win/README.md:
--------------------------------------------------------------------------------
1 | Language-specific files for Hoocąk (Winnebago / Ho-Chunk)
2 |
3 | Alphabet table taken from (and slightly modified): https://en.wikipedia.org/wiki/Winnebago_language#The_sounds_of_Ho-Chunk_with_example_words[6]
4 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/win/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Hoocąk
3 | mappings:
4 | - display_name: Hoocąk to IPA
5 | rules_path: win_to_ipa.json
6 | in_lang: win
7 | out_lang: win-ipa
8 | case_sensitive: false
9 | norm_form: NFC
10 | rule_ordering: apply-longest-first
11 | authors:
12 | - David Huggins-Daines
13 | <<: *shared
14 |
--------------------------------------------------------------------------------
/g2p/mappings/langs/win/hoocak_alphabet.csv:
--------------------------------------------------------------------------------
1 | a,a,"a, ʌ"
2 | aa,ā,aː
3 | ą,ą,"ã, ʌ̃"
4 | ąą,ą̄,ã:
5 | b,b,b
6 | c,c,tʃ
7 | e,e,"e, ɛ"
8 | ee,ē,"eː, ɛː"
9 | g,g,g
10 | ǧ,ǧ,ɣ
11 | h,h,h
12 | i,i,i
13 | ii,ī,iː
14 | į,į,ĩ
15 | įį,į̄,ĩ:
16 | j,j,dʒ
17 | k,k,k
18 | k',k',k'
19 | m,m,m
20 | n,n,n
21 | o,o,o
22 | oo,ō,o:
23 | p,p,p
24 | p',p',p'
25 | r,r,r
26 | s,s,s
27 | s',s',s'
28 | š,š,ʃ
29 | š',š',ʃ'
30 | t,t,t
31 | t',t',t'
32 | u,u,u
33 | uu,ū,u:
34 | ų,ų,ũ
35 | ųų,ų̄,ũ:
36 | w,w,w
37 | x,x,x
38 | x',x',x'
39 | y,y,j
40 | z,z,z
41 | ž,ž,ʒ
42 | '','',ʔ
43 |
--------------------------------------------------------------------------------
/g2p/static/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/g2p/static/__init__.py
--------------------------------------------------------------------------------
/g2p/static/assets/bonjour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/g2p/static/assets/bonjour.png
--------------------------------------------------------------------------------
/g2p/static/blockly_main.js:
--------------------------------------------------------------------------------
1 | (function () {
2 | let currentButton;
3 | function exportJS() {
4 | let ws = Blockly.getMainWorkspace()
5 | Blockly.JavaScript.addReservedWords('code');
6 | let code = Blockly.JavaScript.workspaceToCode(ws)
7 | alert(code)
8 | return code
9 | }
10 | function exportPY(codeType) {
11 | let ws = Blockly.getMainWorkspace()
12 | Blockly.Python.addReservedWords('code');
13 | let code = Blockly.Python.workspaceToCode(ws)
14 | alert(code)
15 | return code
16 | }
17 |
18 | function add() {
19 | let ws = Blockly.getMainWorkspace()
20 | Blockly.JavaScript.addReservedWords('code');
21 | var code = Blockly.JavaScript.workspaceToCode(
22 | ws
23 | );
24 | try {
25 | console.log(code)
26 | eval(code)
27 | } catch (error) {
28 | console.log(error)
29 | }
30 | }
31 |
32 | function clear() {
33 | let ws = Blockly.getMainWorkspace()
34 | ws.clear()
35 | }
36 |
37 | function handleAdd() {
38 | add();
39 | // clear();
40 | }
41 |
42 | document.querySelector('#clear').addEventListener('click', clear);
43 | document.querySelector('#add').addEventListener('click', handleAdd);
44 | document.querySelector('#exportJS').addEventListener('click', exportJS)
45 | document.querySelector('#exportPY').addEventListener('click', exportPY)
46 |
47 | Blockly.inject('blockly-div', {
48 | toolbox: document.getElementById('toolbox'),
49 | toolboxPosition: 'end',
50 | horizontalLayout: true,
51 | scrollbars: false
52 | });
53 |
54 |
55 | })();
56 |
--------------------------------------------------------------------------------
/g2p/static/custom.css:
--------------------------------------------------------------------------------
1 | .center-text {
2 | text-align: center;
3 | }
4 |
5 | label {
6 | display: -webkit-inline-box;
7 | font-weight: 300;
8 | }
9 |
10 | #input,
11 | #output {
12 | width: 100%;
13 | height: 25%;
14 | }
15 |
16 | .export-buttons {
17 | display: inline;
18 | margin: 1rem;
19 | }
20 |
21 | #title {
22 | margin-top: 10px;
23 | margin-left: 5vw;
24 | }
25 |
26 | .ht_master .wtHolder {
27 | overflow-x: hidden;
28 | }
29 |
30 | .mg-top,
31 | h4 {
32 | margin-top: 40px;
33 | }
34 |
35 | .mg-bot {
36 | margin-bottom: 40px;
37 | }
38 |
39 | .hot-container,
40 | .settings,
41 | .abbs-container {
42 | display: none;
43 | }
44 |
45 | .hot-container.active,
46 | .settings.active,
47 | .abbs-container.active {
48 | display: inline;
49 | }
50 |
51 | table td {
52 | text-align: center;
53 | padding: 3px !important;
54 | }
55 |
56 | table th {
57 | padding: 3px !important;
58 | }
59 |
--------------------------------------------------------------------------------
/g2p/tests/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 | source_pkgs = g2p
4 | omit = *g2p/tests/*
5 |
6 | [report]
7 | precision = 2
8 |
--------------------------------------------------------------------------------
/g2p/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/g2p/tests/__init__.py
--------------------------------------------------------------------------------
/g2p/tests/public/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | PUBLIC_DIR = os.path.dirname(__file__)
4 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/atj.psv:
--------------------------------------------------------------------------------
1 | atj|atj-ipa|matcaci|mad͡ʒaʃi
2 | atj|atj-ipa|amiskw|amisɡw
3 | atj|atj-ipa|awesisak|aweːsisaɡ
4 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/clm.csv:
--------------------------------------------------------------------------------
1 | # Ten real words
2 | clm,clm-ipa,x̣áyk̕ʷs,χˈajkʼʷs
3 | clm,clm-ipa,sx̣cáʔəy̕,sχtsˈaʔəj̰
4 | clm,clm-ipa,čúxʷəŋ̕,tʃˈuxʷəɴ̰
5 | clm,clm-ipa,ɬə́č̕šən,ɬˈʌtʃʼʃən
6 | clm,clm-ipa,ʔəsyác̕ɬ,ʔəsjˈatsʼɬ
7 | clm,clm-ipa,q̕ʷəyíyəš,qʼʷəjˈijəʃ
8 | clm,clm-ipa,sqʷə́m̕x̣ʷ,sqʷˈʌm̰χʷ
9 | clm,clm-ipa,sq̕ʷə́ŋəs,sqʼʷˈʌɴəs
10 | clm,clm-ipa,sƛ̕ə́mk̕ʷ,stɬʼˈʌmkʼʷ
11 | clm,clm-ipa,sx̣ʷáx̣ʷc̕,sχʷˈaχʷtsʼ
12 |
13 | clm,eng-ipa,x̣áyk̕ʷs,ʃæjkws
14 | clm,eng-ipa,sx̣cáʔəy̕,sʃtsæʔəj
15 | clm,eng-ipa,čúxʷəŋ̕,t͡ʃukwəŋ
16 | clm,eng-ipa,ɬə́č̕šən,sʌtʃʃən
17 | clm,eng-ipa,ʔəsyác̕ɬ,ʔəsjætss
18 | clm,eng-ipa,q̕ʷəyíyəš,kwəjijəʃ
19 | clm,eng-ipa,sqʷə́m̕x̣ʷ,skwʌmʃw
20 | clm,eng-ipa,sq̕ʷə́ŋəs,skwʌŋəs
21 | clm,eng-ipa,sƛ̕ə́mk̕ʷ,stsʌmkw
22 | clm,eng-ipa,sx̣ʷáx̣ʷc̕,sʃwæʃwts
23 |
24 | clm,eng-arpabet,x̣áyk̕ʷs,SH AE Y K W S
25 | clm,eng-arpabet,sx̣cáʔəy̕,S SH T S AE HH AH Y
26 | clm,eng-arpabet,čúxʷəŋ̕,CH UW K W AH NG
27 | clm,eng-arpabet,ɬə́č̕šən,S AH CH SH AH N
28 | clm,eng-arpabet,ʔəsyác̕ɬ,HH AH S Y AE T S S
29 | clm,eng-arpabet,q̕ʷəyíyəš,K W AH Y IY Y AH SH
30 | clm,eng-arpabet,sqʷə́m̕x̣ʷ,S K W AH M SH W
31 | clm,eng-arpabet,sq̕ʷə́ŋəs,S K W AH NG AH S
32 | clm,eng-arpabet,sƛ̕ə́mk̕ʷ,S T S AH M K W
33 | clm,eng-arpabet,sx̣ʷáx̣ʷc̕,S SH W AE SH W T S
34 |
35 | # Artificial test data
36 | clm,clm-ipa,uʔúhuy̕,ɔʔˈɔhɔj̰
37 | clm,clm-ipa,utúluk,utˈuluk
38 | clm,clm-ipa,ə,ə
39 | clm,clm-ipa,ə́,ˈʌ
40 | clm,clm-ipa,šəč̕əš,ʃɨtʃʼɨʃ
41 | clm,clm-ipa,čəšəč,tʃɨʃɨtʃ
42 | clm,clm-ipa,ə́x̣ʷəx̣ʷ,ˈʌχʷʊχʷ
43 | clm,clm-ipa,ə́x̌əx̌ʷ,ˈʌχʊχʷ
44 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/crg.psv:
--------------------------------------------------------------------------------
1 | crg-tmd|crg-ipa|kishchaymikawshoow|kɪʃtʃeːmɪkɑːʃoaw
2 | crg-tmd|crg-ipa|smenn|smɛn
3 | crg-tmd|crg-ipa|dayistaen|deːɪstɛ̃ː
4 | crg-tmd|crg-ipa|baenn|bɛ̃ː
5 | crg-dv|crg-ipa|kishcheemikaashoaw|kɪʃtʃeːmɪkɑːʃoaw
6 | crg-dv|crg-ipa|deeistaeñ|deːɪstɛ̃ː
7 | crg-dv|crg-ipa|lañg|lɑ̃ːŋ
8 | crg-dv|crg-ipa|eede|eːdɛ
9 | crg-dv|crg-ipa|Booñ|bɔ̃ː
10 | crg-dv|crg-ipa|Not|not
11 | crg-dv|crg-ipa|mooñd|mɔ̃ːd
12 | crg-dv|crg-ipa|maañzhii|mɑ̃ːʒiː
13 | crg-dv|crg-ipa|Aeñ|ɛ̃ː
14 | crg-dv|crg-ipa|Kreatoer|kɹɛʌtʌːɹ
15 | crg-dv|eng-arpabet|Booñ|B AO N
16 | crg-dv|eng-arpabet|Not|N AO T
17 | crg-dv|eng-arpabet|maañzhii|M AA N ZH EY
18 | crg-dv|eng-arpabet|ooma|OW M AH
19 | crg-dv|eng-arpabet|grel|G R EH L
20 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/crj.psv:
--------------------------------------------------------------------------------
1 | crj|crj-equiv|chisichiiyeshiishtamaatin|ᒋᓯᒌᔦᔒᔥᑕᒫᑎᓐ
2 | crj|eng-ipa|ᒋᓯᒌᔦᔒᔥᑕᒫᑎᓐ|tʃisitʃijeːʃiʃtɑmɑtin
3 | crj|eng-ipa|chisichiiyeshiishtamaatin|tʃisitʃijeːʃiʃtɑmɑtin
4 | crj|crj-equiv|âhkuhîkuyan|ᐋᐦᑯᐦᐄᑯᔭᓐ
5 | crj|crj-equiv|chimûshihtân|ᒋᒨᔑᐦᑖᓐ
6 | crj|crj-equiv|êchêshê|ᐁᒉᔐ
7 | crj|eng-arpabet|âhkuhîkuyan|AA HH K UW HH IY K UW Y AA N
8 | crj|eng-arpabet|chimûshihtân|CH IY M UW SH IY HH T AA N
9 | crj|eng-arpabet|êchêshê|EY CH EY SH EY
10 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/crk.psv:
--------------------------------------------------------------------------------
1 | crk|crk-ipa|kikiskamohkan|kɪkɪskʌmohkʌn
2 | crk|crk-ipa|âkohkwênikanêkin|aːkohkweːnɪkʌneːkɪn
3 | crk|crk-ipa|kihcêyim|kɪht͡seːjɪm
4 | crk|crk-ipa|kiwîci-ayisiyiniw|kɪwiːt͡sɪ-ʌjɪsɪjɪnɪw
5 | crk|crk-ipa|kâ-itohtêyan|kaː-ɪtohteːjʌn
6 | crk|crk-ipa|ayisiyinînahk|ʌjɪsɪjɪniːnʌhk
7 | crk|crk-ipa|ô-masinahikêw|oː-mʌsɪnʌhɪkeːw
8 | crk|crk-no-symbols|&|êkwa
9 | crk|eng-arpabet|ômasinahikêw|OW M AH S IH N AH HH IH K EY W
10 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/crl.psv:
--------------------------------------------------------------------------------
1 | crl|crl-equiv|chichischaayihtaanaawaachichaa|ᒋᒋᔅᒑᔨᐦᑖᓈᐙᒋᒑ
2 | crl|eng-ipa|ᒋᒋᔅᒑᔨᐦᑖᓈᐙᒋᒑ|tʃitʃistʃɑjihtɑnɑwɑtʃitʃɑ
3 | crl|eng-ipa|chichischaayihtaanaawaachichaa|tʃitʃistʃɑjihtɑnɑwɑtʃitʃɑ
4 | crl|crl-equiv|âhkuhîkuyan|ᐋᐦᑯᐦᐄᑯᔭᓐ
5 | crl|crl-equiv|chimûshihtân|ᒋᒨᔑᐦᑖᓐ
6 | crl|crl-equiv|êchêshê|ᐁᒉᔐ
7 | crl|eng-arpabet|âhkuhîkuyan|AA HH K UW HH IY K UW Y AA N
8 | crl|eng-arpabet|chimûshihtân|CH IY M UW SH IY HH T AA N
9 | crl|eng-arpabet|êchêshê|EY CH EY SH EY
10 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/crm.psv:
--------------------------------------------------------------------------------
1 | crm|eng-ipa|êkotê|eːkuteː
2 | crm|eng-ipa|ᐁᑯᑌ|eːkuteː
3 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/csw.psv:
--------------------------------------------------------------------------------
1 | csw|eng-ipa|êkotê|eːkuteː
2 | csw|eng-ipa|ᐁᑯᑌ|eːkuteː
3 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/ctp.csv:
--------------------------------------------------------------------------------
1 | ctp,eng-arpabet,Qneᴬ,HH N EY
2 | ctp,eng-arpabet,ntkwaᴶ,N T K W AA
3 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/eng.csv:
--------------------------------------------------------------------------------
1 | # Make sure the same input in NFD and NFC both get handled correctly
2 | eng-ipa,eng-arpabet,ĩ,IY N
3 | eng-ipa,eng-arpabet,ĩ,IY N
4 | eng,eng-ipa,hello,hʌloʊ
5 | eng,eng-arpabet,hello,HH AH L OW
6 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/fin.psv:
--------------------------------------------------------------------------------
1 | fin|fin-ipa|nähdä|næhdæ
2 | fin|fin-ipa|polkupyörä|polkupyøræ
3 | fin|fin-ipa|haluttaa|hɑluttɑː
4 | fin|fin-ipa|tulevaisuus|tuleʋɑɪsuːs
5 | fin|fin-ipa|välinpitämättömyyksien|ʋælimpitæmættømyːksiɛn
6 | fin|fin-ipa|mihinkään|mihiŋkæːn
7 | fin|fin-ipa|nyt minä haluan juoda kahvia|nyt minæ hɑluɑn jʊɔdɑ kɑhʋiɑ
8 | fin|eng-ipa|nyt minä haluan juoda kahvia|nut minæ hɑluɑn jʊɔdɑ kɑhwiɑ
9 | fin|eng-arpabet|nyt minä haluan juoda kahvia|N UW T M IY N AE HH AA L UW AA N Y UH AO D AA K AA HH W IY AA
10 | fin|eng-arpabet|hiihtää|HH IY HH T AE
11 | fin|eng-arpabet|löylyä|L AH Y L UW AE
12 | fin|eng-arpabet|lyijy|L UW IY Y UW
13 | fin|eng-arpabet|luistaa|L UH Y S T AA
14 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/fn_unicode.psv:
--------------------------------------------------------------------------------
1 | fn-unicode-font|fn-unicode|qʷi∙qʷi∙diččaq|qʷi·qʷi·diččaq
2 | fn-unicode-font|fn-unicode|ล|ḥ
3 | fn-unicode-font|fn-unicode|X ล ɤ ∛ X|x ᶿ √ ḥ ɣ · x
4 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/fra.psv:
--------------------------------------------------------------------------------
1 | fra-ipa|eng-ipa|ʒ|ʒ
2 | fra|fra-ipa|manger|mɑ̃ʒe
3 | fra|fra-ipa|écoutons|ekutɔ̃
4 | fra|fra-ipa|écoutés|ekute
5 | fra|fra-ipa|écoutes|ekut
6 | fra|fra-ipa|programmeur|pʁoɡʁamøʁ
7 | fra|fra-ipa|traduction|tʁadyksiɔ̃
8 | fra|fra-ipa|bison|bizɔ̃
9 | fra|fra-ipa|cela|sʌla
10 | fra|fra-ipa|Noël|noɛl
11 | fra|fra-ipa|Noël|noɛl
12 | fra|fra-ipa|à côté|a kote
13 | fra|eng-ipa|à côté|ɑ koːteː
14 | fra|eng-arpabet|à côté|AA K OW T EY
15 | fra|eng-ipa|manger|mɑ̃ʒeː
16 | fra|eng-ipa|écoutons|eːkutɔ̃
17 | fra|eng-ipa|programmeur|pʒoːɡʒɑmoːʒ
18 | fra|eng-ipa|traduction|tʒɑduksiɔ̃
19 | fra|eng-ipa|bison|bizɔ̃
20 | fra|fra-ipa|gagnant|ɡaɲɑ̃
21 | fra|fra-ipa|êtres|ɛːtʁ
22 | fra|fra-ipa|où|u
23 | fra|fra-ipa|s|s
24 | fra|fra-ipa|tests s|tʌst s
25 | fra|fra-ipa|été e|ete ʌ
26 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/fra_panagrams.txt:
--------------------------------------------------------------------------------
1 | https://fr.wikipedia.org/wiki/Pangramme
2 |
3 | Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce qui lui permet de penser à la cænogénèse de l'être dont il est question dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, pense-t-il, diminue çà et là la qualité de son œuvre.
4 | Dès Noël, où un zéphyr haï me vêt de glaçons würmiens, je dîne d’exquis rôtis de bœuf au kir, à l’aÿ d’âge mûr, &cætera. (contient les 42 caractères de la langue française) (Gilles Esposito-Farèse)
5 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/fra_panagrams_NFD.txt:
--------------------------------------------------------------------------------
1 | https://fr.wikipedia.org/wiki/Pangramme
2 |
3 | Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce qui lui permet de penser à la cænogénèse de l'être dont il est question dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, pense-t-il, diminue çà et là la qualité de son œuvre.
4 | Dès Noël, où un zéphyr haï me vêt de glaçons würmiens, je dîne d’exquis rôtis de bœuf au kir, à l’aÿ d’âge mûr, &cætera. (contient les 42 caractères de la langue française) (Gilles Esposito-Farèse)
5 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/fra_simple.txt:
--------------------------------------------------------------------------------
1 | Un texte simple en français, qui passe avec scan.
2 | Testons quand même quelques accents: èéà.
3 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/git.psv:
--------------------------------------------------------------------------------
1 | git|git-ipa|gwila|ɡʷilæ
2 | git|git-ipa|hlik̲'sxw|ɬiqʔsxʷ
3 | git|eng-arpabet|K̲'ay|K HH AE Y
4 | git|eng-arpabet|guts'uusgi'y|G UW T S UW S G IY HH Y
5 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/gwi.psv:
--------------------------------------------------------------------------------
1 | gwi|gwi-ipa|tik|tʰɪkʰ
2 | gwi|gwi-ipa|àìùèò|əɪʊɛo
3 | gwi|gwi-ipa|tt’|tʰtʼ
4 | gwi|gwi-ipa|tʼt|tʼtʰ
5 | gwi|gwi-ipa|aaeeiioouu|ɑːɛːiːouː
6 | gwi|gwi-ipa|teetl'it|tʰɛːtɬʼɪtʰ
7 | gwi|gwi-ipa|zheh|ʒɛh
8 | gwi|gwi-ipa|ąąęęįįǫǫųų|ɑ̃ːɛ̃ːĩːõũː
9 | gwi|gwi-ipa|ąęįǫų|ə̃ɛ̃ɪ̃õʊ̃
10 | gwi|gwi-ipa|khghw|xɤʷ
11 | gwi|gwi-ipa|tsttrdzdrr|tsʰtʰʈʂʰtsʈʂɻ
12 | gwi|gwi-ipa|tth'tththdddhdh|tθʼtθʰθttθð
13 | gwi|gwi-ipa|kk'kwk'wkhkhw|kʰkʼkʰʷkʼʷxxʷ
14 | gwi|gwi-ipa|ggwghghwhw|kkʷɤɤʷhw
15 | gwi|gwi-ipa|gwich'in|kʷɪtʃʼɪn
16 | gwi|gwi-ipa|ai’|əɪʔ
17 | gwi|gwi-ipa|jnjj|tʃⁿtʃtʃ
18 | gwi|gwi-ipa|ltl'tldlɬ'|ltɬʼtɬʰtɬɬʔ
19 | gwi|gwi-ipa|bmmb|bmᵐp
20 | gwi|gwi-ipa|nnd|nⁿt
21 | gwi|gwi-ipa|ssrshs|sʂʃs
22 | gwi|gwi-ipa|vyr|vjɻ
23 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/haa.csv:
--------------------------------------------------------------------------------
1 | haa,haa-ipa,tth’ą̂ą̈̌,tθʼæ̃̂ɑ̃̌
2 | haa,eng-ipa,tth’ą̂ą̈̌,tθæ̃ɑ̃
3 | haa,haa-ipa,nhdhthtthddhwhshzhchch’yhkhghhʼ,n̥ðθtθʰtθw̥ʃʒtʃʰtʃʼj̊xɣhʔ
4 | haa,haa-ipa,nddh,ntθ
5 | haa,haa-ipa,njonjkënjp,ŋoⁿkkʁəⁿkpʰ
6 | haa,haa-ipa,zrhzrsrtrdrtr’r,zɻ̊ʐʂʈʂʰʈʂʈʂʼɻ
7 | haa,haa-ipa,chshzhjchʼyhj̱,tʃʰʃʒtʃtʃʼj̊dʒ
8 | haa,haa-ipa,eyewayawoyoeëäwä,eieoæiæooioeəaoɑ
9 | haa,haa-ipa,ḏgʼtldltl’lłh,dkʔtɬʰtɬtɬʼlɬh
10 | haa,haa-ipa,ywwhbpmbnszë̀ù,jww̥ppʰᵐpnszə̀ù
11 | haa,eng-ipa,ywwhbpmbnszë̀ù,jwwppmpnszəu
12 | haa,haa-ipa,kěwgǒy,kʁěoɡʁǒi
13 | haa,eng-ipa,kěwgǒy,kʒɛɔɡʒɔi
14 | haa,haa-ipa,ą̈̀wlį̌w,ã̀oɬɮĩ̌u
15 | haa,haa-ipa,į̌ǫ̀,ĩ̌õ̀
16 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/ikt.psv:
--------------------------------------------------------------------------------
1 | ikt|ikt-ipa|rřřȓȓ'’łsššnññ|ʁɻɻɟɟʔʔɬsʂʂnɲɲ
2 | ikt|ikt-ipa|niriyuq|niʁijuq
3 | ikt|eng-ipa|niriyuq|niʒijuk
4 | ikt|eng-arpabet|niriyuq|N IY ZH IY Y UW K
5 | ikt|ikt-ipa|niriřuq|niʁiɻuq
6 | ikt|ikt-ipa|niriřuq|niʁiɻuq
7 | ikt|eng-ipa|niriřuq|niʒijuk
8 | ikt|eng-arpabet|niriřuq|N IY ZH IY Y UW K
9 | ikt|ikt-ipa|pingahut|piŋahut
10 | ikt|eng-ipa|pingahut|piŋɑhut
11 | ikt|eng-arpabet|pingahut|P IY NG AA HH UW T
12 | ikt|ikt-ipa|piŋahut|piŋahut
13 | ikt|eng-ipa|piŋahut|piŋɑhut
14 | ikt|eng-arpabet|piŋahut|P IY NG AA HH UW T
15 | ikt|ikt-ipa|akhunaaq|akhunaːq
16 | ikt|eng-ipa|akhunaaq|ɑkhunɑk
17 | ikt|eng-arpabet|akhunaaq|AA K HH UW N AA K
18 | ikt|ikt-ipa|akłunaaq|akɬunaːq
19 | ikt|eng-ipa|akłunaaq|ɑksunɑk
20 | ikt|eng-arpabet|akłunaaq|AA K S UW N AA K
21 | ikt|ikt-ipa|uqhuq|uqhuq
22 | ikt|eng-ipa|uqhuq|ukhuk
23 | ikt|eng-arpabet|uqhuq|UW K HH UW K
24 | ikt|ikt-ipa|uqšuq|uqʂuq
25 | ikt|eng-ipa|uqšuq|ukʃuk
26 | ikt|eng-arpabet|uqšuq|UW K SH UW K
27 | ikt|ikt-ipa|uqsuq|uqsuq
28 | ikt|eng-ipa|uqsuq|uksuk
29 | ikt|eng-arpabet|uqsuq|UW K S UW K
30 | ikt|ikt-ipa|quana|quana
31 | ikt|eng-ipa|quana|kuɑnɑ
32 | ikt|ikt-ipa|ma'na|maʔna
33 | ikt|ikt-ipa|ma’na|maʔna
34 | ikt|eng-ipa|ma'na|mɑʔnɑ
35 | ikt|eng-arpabet|ma'na|M AA HH N AA
36 | ikt|eng-arpabet|quana|K UW AA N AA
37 | ikt|ikt-ipa|qujanaqqutit|qud͡ʒanaqqutit
38 | ikt|eng-ipa|qujanaqqutit|kudʒɑnɑkkutit
39 | ikt|eng-arpabet|qujanaqqutit|K UW JH AA N AA K K UW T IY T
40 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/iku-sro.psv:
--------------------------------------------------------------------------------
1 | iku-sro|iku-sro-ipa|yrřřȓȓłsššnññ|jʁɟɟɟɟɬsʂʂnɲɲ
2 | iku-sro|iku-sro-ipa|nirijuq|niʁijuq
3 | iku-sro|eng-ipa|nirijuq|niʒijuk
4 | iku-sro|eng-arpabet|nirijuq|N IY ZH IY Y UW K
5 | iku-sro|iku-sro-ipa|pingahut|piŋahut
6 | iku-sro|eng-ipa|pingahut|piŋɑhut
7 | iku-sro|eng-arpabet|pingahut|P IY NG AA HH UW T
8 | iku-sro|iku-sro-ipa|akłunaaq|akɬunaːq
9 | iku-sro|eng-ipa|akłunaaq|ɑksunɑk
10 | iku-sro|eng-arpabet|akłunaaq|AA K S UW N AA K
11 | iku-sro|iku-sro-ipa|atsunaaq|atsunaːq
12 | iku-sro|eng-ipa|atsunaaq|ɑtsunɑk
13 | iku-sro|eng-arpabet|atsunaaq|AA T S UW N AA K
14 | iku-sro|iku-sro-ipa|uqsuq|uqsuq
15 | iku-sro|eng-ipa|uqsuq|uksuk
16 | iku-sro|eng-arpabet|uqsuq|UW K S UW K
17 | iku-sro|iku-sro-ipa|aakka|aːkka
18 | iku-sro|eng-ipa|aakka|ɑkkɑ
19 | iku-sro|eng-arpabet|aakka|AA K K AA
20 | iku-sro|iku-sro-ipa|qujannamiik|qujannamiːk
21 | iku-sro|eng-ipa|qujannamiik|kujɑnnɑmik
22 | iku-sro|eng-arpabet|qujannamiik|K UW Y AA N N AA M IY K
23 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/iku.psv:
--------------------------------------------------------------------------------
1 | iku|iku-ipa|ᐃᒑᒥᒃ|iɡaːmik
2 | iku|eng-ipa|ᐃᒑᒥᒃ|iɡɑmik
3 | iku|eng-arpabet|ᐃᒑᒥᒃ|IY G AA M IY K
4 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/lml.psv:
--------------------------------------------------------------------------------
1 | lml|lml-ipa|mwaron̄g|mwɑɹoːŋɡ
2 | lml|lml-ipa|lagoana|lɑɣoːɑnɑ
3 | lml|lml-ipa|n̄oto|ŋoːtoː
4 | lml|lml-ipa|mulei|mulej
5 | lml|lml-ipa|mahamai|mɑhɑmɑj
6 | lml|lml-ipa|vevuri|veːvuɹi
7 | lml|lml-ipa|eig|ejɡ
8 | lml|lml-ipa|eiḡ|ejŋɡ
9 | lml|eng-arpabet|mwaron̄g|M W AA R OW NG
10 | lml|eng-arpabet|lagoana|L AA G OW AA N AA
11 | lml|eng-arpabet|n̄oto|NG OW T OW
12 | lml|eng-arpabet|mulei|M UW L EY
13 | lml|eng-arpabet|mahamai|M AA HH AA M AA Y
14 | lml|eng-arpabet|vevuri|V EY V UW R IY
15 | lml|eng-arpabet|eig|EY G
16 | lml|eng-arpabet|eiḡ|EY NG
17 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/mic.psv:
--------------------------------------------------------------------------------
1 | mic|mic-ipa|tiꞌam|tiːɑm
2 | mic|mic-ipa|Miꞌkmaq|miːɡmɑx
3 | mic|mic-ipa|mi'kmaq|miːɡmɑx
4 | mic|mic-ipa|sqolj|əsxoltʃ
5 | mic|mic-ipa|sq|səx
6 |
7 | # Test that ' is tokenized correctly as part of the words.
8 | mic|mic-ipa|mípi'tiꞌnála'jaꞌpéke'qeꞌ|miːbiːdiːnɑːlɑːtʃɑːpeːɡeːɣeː
9 |
10 | # Problem words detected by Marc, fixed by adding o->o "no-op" rule and regenerating mic-ipa to eng-ipa
11 | mic|eng-arpabet|Nsituoqn|N Z IY D UW AO G N
12 | mic|eng-arpabet|koqoey|K AO G AO EY Y
13 | mic|eng-arpabet|Penoqite'lsultinen|P EY N AO G IY D EY L Z UW L D IY N EY N
14 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/moe.psv:
--------------------------------------------------------------------------------
1 | moe|moe-ipa|nitaimunit|niteːjmunit
2 | moe|eng-ipa|ishi-mamitunenitenan|iʃi-məmituneːniteːnən
3 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/oji-syl.psv:
--------------------------------------------------------------------------------
1 | oji-syl|oji|ᒪᐦᑿ|mahkwa
2 | oji-syl|oji|ᐃᐦᑵ|ihkwe
3 | oji-syl|oji|ᐃᐦᑵᐘᐠ ᓂᑲᒧᐘᐠ᙮|ihkwewak nikamowak.
4 | oji-syl|oji|ᐱᓀᔑᐣᐦᐢ ᐊᒷ ᓴᑭᒣ᙮|pineshinhs amwa sakime.
5 | oji-syl|oji|ᐊᐣ ᐁᔑᓂᑲᓱᔭᐣ|an eshinikasoyan
6 | oji-syl|oji|ᓂᓇᑕᐏᐦᐃᐍ|ninatawihiwe
7 | oji-syl|oji|ᒪᒋᐱᓱ|machipiso
8 | oji-syl|oji|ᒝᐱᐡ|chwapish
9 | oji-syl|oji|ᐊᓂᒧᐦᐡ|animohsh
10 | oji-syl|oji|ᑎᐦᑎᐻᐱᐡᑭᑲᐣ|tihtipwepishkikan
11 | oji-syl|oji|ᒥᓂᐦᐠ|minihk
12 | oji-syl|oji|ᒪᓯᓇᐦᐃᑲᐣ|masinahikan
13 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/oji.tsv:
--------------------------------------------------------------------------------
1 | oji oji-ipa aagwiitoo’ooza ɑːɡwiːtoːʔoːzʌ
2 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/srs.psv:
--------------------------------------------------------------------------------
1 | srs|srs-ipa|dada|tʌ̄tʌ̄
2 | srs|srs-ipa|t'at'a|tʼʌ̄tʼʌ̄
3 | srs|srs-ipa|tata|tʰʌ̄tʰʌ̄
4 | srs|eng-arpabet|dada t'at'a tata|D AH D AH T AH T AH T AH T AH
5 | srs|srs-ipa|óo oó òo oò|ɔ᷇ː ɔ᷄ː ɔ᷅ː ɔ᷆ː
6 | srs|eng-arpabet|óo oó òo oò|OW OW OW OW
7 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/str.tsv:
--------------------------------------------------------------------------------
1 | str eng-arpabet X̱I¸ÁM¸ SH W IY HH EY M HH
2 |
3 | # Variants for cedilla: comma, space+combining cedilla, space+combining comma below
4 | str eng-arpabet X̱I,ÁM ̧ SH W IY HH EY M HH
5 | str eng-arpabet X̱I,ÁM ̦ SH W IY HH EY M HH
6 |
7 | # 2024 update: cedilla stays as such in equiv, but is turned into glottal stop in IPA
8 | # TODO: for comma, disambituate between glottal stop and punctuation
9 | str str-equiv X̱I¸ÁM ̦ X̱I¸ÁM¸
10 | str str-equiv X̱I,ÁM ̧ X̱I,ÁM¸
11 | str str-ipa X̱I¸ÁM ̦ χʷiʔemʔ
12 | str str-ipa X̱I,ÁM ̧ χʷiʔemʔ
13 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/str_un_human_rights.txt:
--------------------------------------------------------------------------------
1 | https://en.wikipedia.org/wiki/Saanich_dialect
2 |
3 | EWENE SÁN E TŦE U¸ MEQ EȽTÁLṈEW̱ Ȼ SNI¸S SQÍEŦ E TŦE XĆṈINS.
4 | U¸ XENENEȻEL TŦE U¸ MEQ EȽTÁLṈEW̱ E Ȼ SI¸ÁM¸TEṈS.
5 | ĆŚḰÁLEȻEN TŦE U¸ MEQ SÁN.
6 | ͸ Ȼ S¸Á¸ITEṈS TŦE U¸ MEQ SÁN X̱EN¸IṈ E TŦE SĆÁ¸ĆE¸S.
7 |
8 | FGR
9 | abcdefghijklqrstwxyz
10 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/tau.psv:
--------------------------------------------------------------------------------
1 | tau|tau-ipa|sh'oo|ʃʔoː
2 | tau|tau-ipa|Jign|tʃiŋ
3 | tau|tau-ipa|maasee'|maːseːʔ
4 | tau|tau-ipa|betlanh|bɛtɬan̥
5 | tau|tau-ipa|do'eent'aa|tɔʔeːntʼaː
6 | tau|tau-ipa|aaeeooiiuuioiaea|aːeːoːiːuːioiaea
7 | tau|tau-ipa|ąąęęįįǫǫųų|ãːẽːĩːõːũː
8 | tau|tau-ipa|ąęįųǫ|ãɛ̃ĩũɔ̃
9 | tau|tau-ipa|àìùèò|àìùɛ̀ɔ̀
10 | tau|tau-ipa|aäüüü|aʌɘːɘ
11 | tau|tau-ipa|thtth'tthht't|θtθʼtθhtʼt
12 | tau|tau-ipa|ddhdh|tθð
13 | tau|tau-ipa|mmbbw|mᵐbbw
14 | tau|tau-ipa|tdt'nnhndstsdzts'|tttʼnn̥ⁿtstststsʼ
15 | tau|tau-ipa|łtldltl'l|ɬtɬtɬtɬʼl
16 | tau|tau-ipa|shchjch'|ʃtʃtʃtʃʼ
17 | tau|tau-ipa|shyyyh|ʃʲjj̊
18 | tau|tau-ipa|kgk'gnx|kkkʼŋx
19 | tau|tau-ipa|h'|hʔ
20 | tau|eng-ipa|do'eent'aa àìùèò ąęįųǫ h' kgk'gnx|tɔʔeːnteː æiuɛɔ æ̃ɛ̃ĩũɔ̃ hʔ kkkŋk
21 | tau|eng-arpabet|do'eent'aa àìùèò ąęįųǫ h' kgk'gnx|T AO HH EY N T EY AE IY UW EH AO AE N EH N IY N UW N AO N HH HH K K K NG K
22 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/tce.csv:
--------------------------------------------------------------------------------
1 | tce,tce-ipa,ch’e,tʃʼɛ
2 | tce,tce-ipa,ghw'nj,ɣʷʔntʃ
3 | tce,tce-ipa,kwǚ\u0328,kʰʷʉ̃
4 | tce,tce-ipa,ä̀w,əw
5 | tce,tce-ipa,āyy,aij
6 | tce,tce-ipa,dzzh,tsʒ
7 | tce,tce-ipa,sih,sih
8 | tce,tce-ipa,tth,tθʰ
9 | tce,tce-ipa,dhh,ðh
10 | tce,tce-ipa,a\u0328y,ãi
11 | tce,tce-ipa,btl,ptɬʰ
12 | tce,tce-ipa,rkh,ɹx
13 | tce,tce-ipa,mnlg,mnlɡ
14 | tce,tce-ipa,dlsh,tɬʃ
15 | tce,tce-ipa,ä́\u0328ł,ʌ̃ɬ
16 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/tli.csv:
--------------------------------------------------------------------------------
1 | tli,tli-ipa,ei,eː
2 | tli,tli-ipa,ee,iː
3 | tli,tli-ipa,aa,aː
4 | tli,tli-ipa,oo,uː
5 | tli,tli-ipa,i,i
6 | tli,tli-ipa,e,e
7 | tli,tli-ipa,a,a
8 | tli,tli-ipa,u,u
9 | tli,tli-ipa,𝚘̲,o
10 | tli,tli-ipa,𝚘̲o,oː
11 | tli,tli-ipa,b,p
12 | tli,tli-ipa,p,pʰ
13 | tli,tli-ipa,d,t
14 | tli,tli-ipa,ti,tʰi
15 | tli,tli-ipa,t',tʼ
16 | tli,tli-ipa,dz,ts
17 | tli,tli-ipa,tsei,tsʰeː
18 | tli,tli-ipa,sh,ʃ
19 | tli,tli-ipa,j,tʃ
20 | tli,tli-ipa,ch𝚘̲o,tʃʰoː
21 | tli,tli-ipa,ch',tʃʼ
22 | tli,tli-ipa,gw,kʷ
23 | tli,tli-ipa,kw,kʰʷ
24 | tli,tli-ipa,k'w,kʼʷ
25 | tli,eng-ipa,k'w,kw
26 | tli,tli-ipa,dl,tɬ
27 | tli,tli-ipa,tlu,tɬʰu
28 | tli,tli-ipa,xw,xʷ
29 | tli,tli-ipa,kawe𝚔̲i𝚐̲,kʰaweqʰiq
30 | tli,tli-ipa,.woo,ʔʷuː
31 | tli,tli-ipa,.woo.,ʔʷuː.
32 | tli,tli-ipa,𝚔̲w,qʰʷ
33 | tli,tli-ipa,hw,hʷ
34 | tli,tli-ipa,y,j
35 | tli,tli-ipa,𝚡̲w,χʷ
36 | tli,tli-ipa,𝚕̲,l
37 | tli,tli-ipa,w𝚘̲,wo
38 | tli,tli-ipa,ÿ,ɰ
39 | tli,tli-ipa,tâch,tʰatʃ
40 | tli,eng-ipa,tâcha,tætʃæ
41 | tli,eng-ipa,ch'𝚘̲o,tʃoː
42 | tli,eng-arpabet,ch'𝚘̲o,CH OW
43 | tli,eng-arpabet,tsaa,T S EY
44 | tli,eng-ipa,x',k
45 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/ttm.csv:
--------------------------------------------------------------------------------
1 | ttm,ttm-ipa,NJę,ⁿtʃẽ
2 | ttm,ttm-ipa,Zha,ʒɑ
3 | ttm,ttm-ipa,ddhau,tθʌʊ
4 | ttm,ttm-ipa,dhth,ðθ
5 | ttm,ttm-ipa,k'w'o,kʼʷʔo
6 | ttm,ttm-ipa,dth,tθ
7 | ttm,ttm-ipa,ghwli,ɣʷli
8 | ttm,ttm-ipa,yai',jʌɪʔ
9 | ttm,ttm-ipa,dzuw,tsuw
10 | ttm,ttm-ipa,dlä,tɬʌ
11 | ttm,ttm-ipa,shłú,ʃɬu
12 | ttm,ttm-ipa,khtsae,xtsʰæ
13 | ttm,ttm-ipa,rch,ɹtʃʰ
14 | ttm,ttm-ipa,sw'aę̄,swʔæ̃
15 | ttm,ttm-ipa,kg,kʰk
16 |
--------------------------------------------------------------------------------
/g2p/tests/public/data/win.csv:
--------------------------------------------------------------------------------
1 | win,eng-ipa,ąą,æ̃
2 | win,eng-ipa,įį,ẽː
3 | win,eng-ipa,oo,oː
4 | win,eng-ipa,ō,oː
5 | win,eng-ipa,ee,eː
6 | win,eng-ipa,uu,u
7 | win,eng-ipa,t',t
8 | win,eng-ipa,p',p
9 | win,eng-arpabet,ąą,AE N
10 | win,eng-arpabet,įį,EY N
11 | win,eng-arpabet,oo,OW
12 | win,eng-arpabet,ō,OW
13 | win,eng-arpabet,ee,EY
14 | win,eng-arpabet,uu,UW
15 | win,eng-arpabet,t',T
16 | win,eng-arpabet,p',P
17 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/.gitignore:
--------------------------------------------------------------------------------
1 | generated_add.yaml
2 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/abbreviation_config-g2p.yaml:
--------------------------------------------------------------------------------
1 | mappings:
2 | - language_name: Abbreviation
3 | display_name: Minimal to Minimal
4 | rules_path: abbreviation_mapping.csv
5 | in_lang: min
6 | out_lang: min
7 | rule_ordering: as-written
8 | case_sensitive: false
9 | escape_special: false
10 | norm_form: "NFD"
11 | reverse: false
12 | abbreviations_path: abbreviations.substring.csv
13 | authors:
14 | - Aidan Pine
15 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/abbreviation_mapping.csv:
--------------------------------------------------------------------------------
1 | VOWEL_HI,1
2 | VOWEL,2
3 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/abbreviations.csv:
--------------------------------------------------------------------------------
1 | VOWEL,a,e,i,o,u
2 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/abbreviations.json:
--------------------------------------------------------------------------------
1 | {
2 | "VOWEL": [
3 | "a",
4 | "e",
5 | "i",
6 | "o",
7 | "u"
8 | ]
9 | }
10 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/abbreviations.psv:
--------------------------------------------------------------------------------
1 | VOWEL|a|e|i|o|u
2 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/abbreviations.substring.csv:
--------------------------------------------------------------------------------
1 | VOWEL,a,e,i,o,u
2 | VOWEL_HI,i,u
3 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/abbreviations.tsv:
--------------------------------------------------------------------------------
1 | VOWEL a e i o u
2 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/bad_langs/lang1/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | display_name: Minimal to Minimal
2 | rules_path: minimal.csv
3 | in_lang: min
4 | out_lang: min
5 | rule_ordering: as-written
6 | case_sensitive: false
7 | escape_special: true
8 | reverse: true
9 | authors:
10 | - Somebody
11 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/bad_langs/lang1/minimal.csv:
--------------------------------------------------------------------------------
1 | a,b,a,b
2 | 1,1,1,1
3 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/bad_langs2/lang1/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | mappings:
2 | - display_name: Minimal to Minimal
3 | rules_path: minimal.csv
4 | in_lang: min
5 | out_lang: min
6 | rule_ordering: as-written
7 | case_sensitive: false
8 | escape_special: true
9 | reverse: true
10 | authors:
11 | - Somebody
12 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/bad_langs2/lang1/minimal.csv:
--------------------------------------------------------------------------------
1 | a,b,a,b
2 | 1,1,1,1
3 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/bad_lexicon_config-g2p.yaml:
--------------------------------------------------------------------------------
1 | mappings:
2 | - language_name: English
3 | display_name: English to ARPABET
4 | type: lexicon
5 | alignments_path: hello.aligned.foo.txt
6 | out_delimiter: " "
7 | in_lang: eng
8 | out_lang: eng-arpabet
9 | case_sensitive: false
10 | norm_form: 'NFC'
11 | authors:
12 | - David Huggins-Daines
13 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/case-feed/README.md:
--------------------------------------------------------------------------------
1 | #### case-feeding mapping
2 |
3 | Use case: for spelling conversion where all rules have to prevent feeding of
4 | output text to input text of other rules, but need to allow feeding of output
5 | text to `context_before` or `context_after`.
6 |
7 | This three-step mapping:
8 | - first lowercases the input;
9 | - then applies the rules from lowercase input to uppercase output, in such a
10 | way that anything that's been converted cannot be converted again, similar to
11 | what `prevent_feeding` does, but allowing the context to specify upper and
12 | lower cases variants to allow both pre- and post-mapping matches;
13 | - and finally lowercases the output again.
14 |
15 | This ends up being equivalent to a case-insensitive prevent-feeding mapping,
16 | except for the behaviour of contexts.
17 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/case-feed/cf-in-lc-to-cf-out-uc.csv:
--------------------------------------------------------------------------------
1 | ka-,KE-,,
2 | atin,ETIN,,
3 | in,IN,,[aAeE]
4 | in,AN,,
5 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/case-feed/config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Example using case to prevent feeding rules but feed context
3 | mappings:
4 | - display name: case-feed input lowercaser
5 | rules_path: empty.csv
6 | in_lang: cf-in
7 | out_lang: cf-in-lc
8 | case_sensitive: false
9 | authors:
10 | - Eric Joanis
11 | <<: *shared
12 | - display_name: case-feed main mapping in is lc, out is uc, thus no feeding
13 | rules_path: cf-in-lc-to-cf-out-uc.csv
14 | in_lang: cf-in-lc
15 | out_lang: cf-out-uc
16 | case_sensitive: true
17 | prevent_feeding: false
18 | authors:
19 | - Eric Joanis
20 | <<: *shared
21 | - display name: case-feed output lowercaser
22 | rules_path: empty.csv
23 | in_lang: cf-out-uc
24 | out_lang: cf-out
25 | case_sensitive: false
26 | authors:
27 | - Eric Joanis
28 | <<: *shared
29 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/case-feed/empty.csv:
--------------------------------------------------------------------------------
1 | t,t,,Actually empty is illegal so create at least one dummy rule
2 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/compose.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Composition tests
3 | mappings:
4 | - display_name: Step 1
5 | rules_path: compose1-2.csv
6 | in_lang: c1
7 | out_lang: c2
8 | norm_form: NFC
9 | authors:
10 | - Eric Joanis
11 | - display_name: Step 2
12 | rules_path: compose2-3.csv
13 | in_lang: c2
14 | out_lang: c3
15 | norm_form: NFD
16 | authors:
17 | - Eric Joanis
18 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/compose1-2.csv:
--------------------------------------------------------------------------------
1 | a,ab
2 | bc,c
3 | é,ò
4 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/compose2-3.csv:
--------------------------------------------------------------------------------
1 | a,d
2 | bc,e
3 | g{1}h{2}i{3},G{2}H{1}I{3}J{1}
4 | m{1}n{2},N{2}M{1}
5 | ò,ù
6 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/deletion.csv:
--------------------------------------------------------------------------------
1 | a,
2 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/deletion.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "in": "a",
4 | "out": ""
5 | }
6 | ]
7 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/deletion_config_csv.yaml:
--------------------------------------------------------------------------------
1 | mappings:
2 | - language_name: Minimal
3 | display_name: Minimal to Minimal
4 | rules_path: deletion.csv
5 | in_lang: min
6 | out_lang: min
7 | rule_ordering: as-written
8 | case_sensitive: false
9 | escape_special: true
10 | norm_form: 'NFD'
11 | reverse: false
12 | authors:
13 | - Aidan Pine
14 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/deletion_config_json.yaml:
--------------------------------------------------------------------------------
1 | mappings:
2 | - language_name: Minimal
3 | display_name: Minimal to Minimal
4 | rules_path: deletion.json
5 | in_lang: min
6 | out_lang: min
7 | rule_ordering: as-written
8 | case_sensitive: false
9 | escape_special: true
10 | norm_form: 'NFD'
11 | reverse: false
12 | authors:
13 | - Aidan Pine
14 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/g2p_studio.csv:
--------------------------------------------------------------------------------
1 | aa,ɑː,,
2 | a,ɑ,,
3 | ,,,
4 | ,,,
5 | ,,,
6 | ,,,
7 | ,,,
8 | ,,,
9 | ,,,
10 | ,,,
11 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/g2p_studio2.csv:
--------------------------------------------------------------------------------
1 | ee,eː,,
2 | ,,,
3 | ,,,
4 | ,,,
5 | ,,,
6 | ,,,
7 | ,,,
8 | ,,,
9 | ,,,
10 | ,,,
11 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/gen-map-1.csv:
--------------------------------------------------------------------------------
1 | e,e
2 | o,o
3 | b,b
4 | l,l
5 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/gen-map-2.csv:
--------------------------------------------------------------------------------
1 | e,ɛ
2 | o,ɔ
3 | d,d
4 | n,n
5 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/gen-map-3a.csv:
--------------------------------------------------------------------------------
1 | i,i
2 | o,o
3 | k,k
4 | m,m
5 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/gen-map-3b.csv:
--------------------------------------------------------------------------------
1 | u,u
2 | y,y
3 | s,s
4 | n,ɲ
5 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/gen-map_config-g2p.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: GenMap
3 | authors:
4 | - Eric Joanis
5 | mappings:
6 | - display_name: GenMap 1 to IPA
7 | rules_path: gen-map-1.csv
8 | in_lang: gm1
9 | out_lang: gm1-ipa
10 | <<: *shared
11 | - display_name: GenMap 2 to IPA
12 | rules_path: gen-map-2.csv
13 | in_lang: gm2
14 | out_lang: gm2-ipa
15 | <<: *shared
16 | - display_name: GenMap 3a to IPA
17 | rules_path: gen-map-3a.csv
18 | in_lang: gm3a
19 | out_lang: gm3-ipa
20 | - display_name: GenMap 3b to IPA
21 | rules_path: gen-map-3b.csv
22 | in_lang: gm3b
23 | out_lang: gm3-ipa
24 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/gm1-ipa_to_gm2-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "e", "out": "ɛ"},
3 | {"in": "o", "out": "ɔ"},
4 | {"in": "b", "out": "d"},
5 | {"in": "l", "out": "n"}
6 | ]
7 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/gm2-ipa_to_gm3-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "ɛ", "out": "i"},
3 | {"in": "ɔ", "out": "o"},
4 | {"in": "d", "out": "s"},
5 | {"in": "n", "out": "m"}
6 | ]
7 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/gm3-ipa_to_gm2-ipa.json:
--------------------------------------------------------------------------------
1 | [
2 | {"in": "i", "out": "ɛ"},
3 | {"in": "o", "out": "ɔ"},
4 | {"in": "k", "out": "d"},
5 | {"in": "m", "out": "n"},
6 | {"in": "u", "out": "ɔ"},
7 | {"in": "y", "out": "ɛ"},
8 | {"in": "s", "out": "d"},
9 | {"in": "ɲ", "out": "n"}
10 | ]
11 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/hello.aligned.txt:
--------------------------------------------------------------------------------
1 | h}HH e}EH l|l}L o}OW
2 | y}Y o|u}UH '}_ r|e}R
3 | b}_ o}_ g}_ u}_ s}_
4 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/lexicon_config-g2p.yaml:
--------------------------------------------------------------------------------
1 | mappings:
2 | - language_name: English
3 | display_name: English to ARPABET
4 | type: lexicon
5 | alignments_path: hello.aligned.txt
6 | out_delimiter: " "
7 | in_lang: eng
8 | out_lang: eng-arpabet
9 | case_sensitive: false
10 | norm_form: 'NFC'
11 | authors:
12 | - David Huggins-Daines
13 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/malformed_config-g2p.yaml:
--------------------------------------------------------------------------------
1 | mappings:
2 | - language_name: Minimal
3 | - display_name: Minimal to Minimal
4 | - in_lang: min
5 | out_lang: min
6 | rule_ordering: as-written
7 | case_sensitive: false
8 | escape_special: true
9 | norm_form: 'NFD'
10 | reverse: true
11 | authors:
12 | - Aidan Pine
13 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/minimal.csv:
--------------------------------------------------------------------------------
1 | a,b,a,b
2 | 1,1,1,1
3 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/minimal.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "in": "a",
4 | "out": "b",
5 | "context_before": "a",
6 | "context_after": "b"
7 | },
8 | {
9 | "in": "1",
10 | "out": "1",
11 | "context_before": "1",
12 | "context_after": "1"
13 | }
14 | ]
15 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/minimal.psv:
--------------------------------------------------------------------------------
1 | a|b|a|b
2 | 1|1|1|1
3 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/minimal.tsv:
--------------------------------------------------------------------------------
1 | a b a b
2 | 1 1 1 1
3 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/minimal.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roedoejet/g2p/a6543833f6b6656aa2a5a598fa90ba4e75ea6b7c/g2p/tests/public/mappings/minimal.xlsx
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/minimal_config-g2p.yaml:
--------------------------------------------------------------------------------
1 | mappings:
2 | - language_name: Minimal
3 | display_name: Minimal to Minimal
4 | rules_path: minimal.csv
5 | in_lang: min
6 | out_lang: min
7 | rule_ordering: as-written
8 | case_sensitive: false
9 | escape_special: true
10 | norm_form: 'NFD'
11 | reverse: true
12 | authors:
13 | - Aidan Pine
14 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/minimal_configs.yaml:
--------------------------------------------------------------------------------
1 | <<: &shared
2 | language_name: Minimal
3 | mappings:
4 | - language_name: Minimal
5 | display_name: Minimal CSV to Minimal
6 | rules_path: minimal.csv
7 | in_lang: min
8 | out_lang: min
9 | rule_ordering: as-written
10 | case_sensitive: false
11 | escape_special: true
12 | norm_form: 'NFD'
13 | reverse: true
14 | authors:
15 | - Aidan Pine
16 | <<: *shared
17 | - language_name: Minimal
18 | display_name: Minimal TSV to Minimal
19 | rules_path: minimal.tsv
20 | in_lang: min
21 | out_lang: min
22 | rule_ordering: as-written
23 | case_sensitive: false
24 | escape_special: true
25 | norm_form: 'NFD'
26 | reverse: true
27 | authors:
28 | - Aidan Pine
29 | <<: *shared
30 | - language_name: Minimal
31 | display_name: Minimal PSV to Minimal
32 | rules_path: minimal.psv
33 | in_lang: min
34 | out_lang: min
35 | rule_ordering: as-written
36 | case_sensitive: false
37 | escape_special: true
38 | norm_form: 'NFD'
39 | reverse: true
40 | authors:
41 | - Aidan Pine
42 | <<: *shared
43 | - language_name: Minimal
44 | display_name: Minimal JSON to Minimal
45 | rules_path: minimal.json
46 | in_lang: min
47 | out_lang: min
48 | rule_ordering: as-written
49 | case_sensitive: false
50 | escape_special: true
51 | norm_form: 'NFD'
52 | reverse: true
53 | authors:
54 | - Aidan Pine
55 | <<: *shared
56 | - language_name: Minimal
57 | display_name: Minimal XLSX to Minimal
58 | rules_path: minimal.xlsx
59 | in_lang: min
60 | out_lang: min
61 | rule_ordering: as-written
62 | case_sensitive: false
63 | escape_special: true
64 | norm_form: 'NFD'
65 | reverse: true
66 | authors:
67 | - Aidan Pine
68 | <<: *shared
69 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/no_escape.csv:
--------------------------------------------------------------------------------
1 | \?,ʔ
2 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/no_mappings_key.yaml:
--------------------------------------------------------------------------------
1 | language_name: Null
2 | display_name: Null to Null
3 | rules_path: null.csv
4 | in_lang: null-in
5 | out_lang: null-out
6 | rule_ordering: as-written
7 | case_sensitive: false
8 | authors:
9 | - Eric Joanis
10 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/nofeed-indices.csv:
--------------------------------------------------------------------------------
1 | a{1}b{2},ce{2}d{1}
2 | a{1}ā{2},aʼ{1}a{2}
3 | d{1}ef{2},gh{1}i{2}
4 | klm,nop
5 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/nofeed-indices.yaml:
--------------------------------------------------------------------------------
1 | mappings:
2 | - language_name: Mapping with index and prevent-feeding
3 | display_name: nofeed-index
4 | in_lang: nofeed-indices-in
5 | out_lang: nofeed-indices-out
6 | type: mapping
7 | prevent_feeding: true
8 | authors:
9 | - Eric Joanis
10 | rules_path: nofeed-indices.csv
11 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/null.csv:
--------------------------------------------------------------------------------
1 | a,b,,
2 | ,,,
3 | d,e,,
4 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/null_config-g2p.yaml:
--------------------------------------------------------------------------------
1 | mappings:
2 | - language_name: Null
3 | display_name: Null to Null
4 | rules_path: null.csv
5 | in_lang: null-in
6 | out_lang: null-out
7 | rule_ordering: as-written
8 | case_sensitive: false
9 | authors:
10 | - Eric Joanis
11 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/rule-ordering.yaml:
--------------------------------------------------------------------------------
1 | mappings:
2 | - language_name: Minimal
3 | display_name: Minimal to Minimal
4 | rules_path: minimal.csv
5 | in_lang: min
6 | out_lang: min
7 | rule_ordering: apply-longest-first
8 | case_sensitive: false
9 | escape_special: true
10 | norm_form: 'NFD'
11 | reverse: true
12 | authors:
13 | - Eddie Antonio Santos
14 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/test.yaml:
--------------------------------------------------------------------------------
1 | mappings:
2 | - language_name: Local Config
3 | display_name: Local Config
4 | in_lang: local-config-in
5 | out_lang: local-config-out
6 | type: mapping
7 | authors:
8 | - Aidan Pine
9 | rules_path: test_to_ipa.csv
10 | - language_name: Local Config
11 | display_name: Local Config to IPA
12 | in_lang: local-config-in
13 | out_lang: dan-ipa
14 | type: mapping
15 | authors:
16 | - Aidan Pine
17 | rules_path: test_to_ipa.csv
18 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/test_to_ipa.csv:
--------------------------------------------------------------------------------
1 | b,a
2 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/tokenize_punct.csv:
--------------------------------------------------------------------------------
1 | A-B,c_d
2 | D,d_end,,$
3 |
--------------------------------------------------------------------------------
/g2p/tests/public/mappings/tokenize_punct_config-g2p.yaml:
--------------------------------------------------------------------------------
1 | mappings:
2 | - language_name: tok punct
3 | display_name: Tokenize Punctuation Case Insensitive
4 | rules_path: tokenize_punct.csv
5 | comment: "test mapping for Readalongs-Studio issue #40"
6 | issue_url: "https://github.com/ReadAlongs/Studio/issues/40"
7 | in_lang: tok-in
8 | out_lang: tok-out
9 | case_sensitive: false
10 | authors:
11 | - Eric Joanis
12 |
--------------------------------------------------------------------------------
/g2p/tests/test_doctor.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from unittest import TestCase, main
4 |
5 | from g2p.log import LOGGER
6 | from g2p.mappings.langs.utils import check_ipa_known_segs
7 |
8 |
9 | class DoctorTest(TestCase):
10 | def setUp(self):
11 | pass
12 |
13 | # the fra to fra-ipa mapping was fixed, this test no longer works
14 | def not_test_ipa_known_segs_fra(self):
15 | with self.assertLogs(LOGGER, level="WARNING") as cm:
16 | check_ipa_known_segs(["fra-ipa"])
17 | self.assertIn("vagon", "".join(cm.output))
18 | self.assertIn("panphon", "".join(cm.output))
19 | self.assertGreaterEqual(len(cm.output), 2)
20 |
21 | def test_ipa_known_segs_fra_fixed(self):
22 | self.assertTrue(check_ipa_known_segs(["fra-ipa"]))
23 |
24 | def test_ipa_known_segs_alq(self):
25 | with self.assertLogs(LOGGER, level="WARNING") as cm:
26 | self.assertFalse(check_ipa_known_segs(["alq-ipa"]))
27 | self.assertIn("o:", "".join(cm.output))
28 | self.assertIn("panphon", "".join(cm.output))
29 |
30 | # this test takes 8 seconds and doesn't do anything useful: it trivially increases
31 | # code coverage but does not have enough assertions to catch a future code-breaking
32 | # change.
33 | # Migrated to test_doctor_expensive.py so we can still run it, manually or via
34 | # ./run.py all.
35 | def not_test_ipa_known_segs_all(self):
36 | with self.assertLogs(LOGGER, level="WARNING") as cm:
37 | check_ipa_known_segs()
38 | self.assertGreaterEqual(len(cm.output), 20)
39 |
40 |
41 | if __name__ == "__main__":
42 | main()
43 |
--------------------------------------------------------------------------------
/g2p/tests/test_doctor_expensive.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from unittest import TestCase, main
4 |
5 | from click.testing import CliRunner
6 |
7 | from g2p.cli import doctor
8 | from g2p.log import LOGGER
9 | from g2p.mappings.langs.utils import check_ipa_known_segs
10 |
11 |
12 | class ExpensiveDoctorTest(TestCase):
13 | # We segragate the expensive tests for g2p doctor in this suite which is not included
14 | # in dev, so that it doesn't slow down our Travis CI tests, but can still be run by
15 | # hand when desired.
16 | # These tests are not very good because they don't assert enough to make sure doctor
17 | # actually works, but they still exercise the code.
18 | #
19 | # This test suite is deliberately left out of run.py: it will only get run if you run
20 | # ./run.py all, or ./test_doctor_expensive.py.
21 |
22 | # Migrated here from test_cli.py
23 | def test_doctor_cli(self):
24 | # TODO: assert something more useful here...
25 | # This test simulates calling "g2p doctor" on the command line with no arguments,
26 | # which runs doctor on all mappings.
27 | runner = CliRunner()
28 | with self.assertLogs(LOGGER, level="WARNING") as cm:
29 | result = runner.invoke(doctor)
30 | self.assertEqual(result.exit_code, 0)
31 | self.assertGreaterEqual(len(cm.output), 10)
32 |
33 | # Migrated here from test_doctor.py
34 | # And skip this test, because test_doctor_cli() indirectly does the
35 | # expensive call to check_ipa_know_segs already so there is no value in
36 | # doing it a second time here.
37 | def not_test_ipa_known_segs_all(self):
38 | # This test simulates the innards of having called "g2p doctor" on the command
39 | # line with no arguments, again running the innards of doctor on all mappings.
40 | with self.assertLogs(LOGGER, level="WARNING") as cm:
41 | check_ipa_known_segs()
42 | self.assertGreaterEqual(len(cm.output), 20)
43 |
44 |
45 | if __name__ == "__main__":
46 | main()
47 |
--------------------------------------------------------------------------------
/g2p/tests/test_langs.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from unittest import TestCase, main
4 |
5 | from g2p import make_g2p
6 | from g2p.log import LOGGER
7 | from g2p.tests.public.data import load_public_test_data
8 |
9 |
10 | class LangTest(TestCase):
11 | """Basic Test for individual lookup tables.
12 |
13 | Test files (in g2p/tests/public/data) are either .csv, .psv, or
14 | .tsv files, the only difference being the delimiter used (comma,
15 | pipe, or tab).
16 |
17 | Each line in the test file consists of SOURCE,TARGET,INPUT,OUTPUT
18 |
19 | """
20 |
21 | def test_io(self):
22 | langs_to_test = load_public_test_data()
23 |
24 | # go through each language declared in the test case set up
25 | # Instead of asserting immediately, we go through all the cases first, so that
26 | # running test_langs.py prints all the errors at once, to help debugging a given g2p mapping.
27 | # Then we call assertEqual on the first failed case, to make unittest register the failure.
28 | error_count = 0
29 | error_prefix = "test_langs.py: mapping error"
30 | for test in langs_to_test:
31 | transducer = make_g2p(test[0], test[1])
32 | output_string = transducer(test[2]).output_string.strip()
33 | if output_string != test[3].strip():
34 | LOGGER.error(
35 | "{} for {}: {} from {} to {} should be {}, got {}".format(
36 | error_prefix,
37 | test[-1],
38 | test[2],
39 | test[0],
40 | test[1],
41 | test[3],
42 | output_string,
43 | )
44 | )
45 | error_count += 1
46 |
47 | self.assertEqual(
48 | error_count,
49 | 0,
50 | f'Search for "ERROR - {error_prefix}" above to find all the g2p mapping errors.',
51 | )
52 |
53 |
54 | if __name__ == "__main__":
55 | main()
56 |
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: Gᵢ2Pᵢ Documentation
2 | theme:
3 | name: material
4 | favicon: favicon-32x32.png
5 | custom_dir: docs/overrides
6 | palette:
7 | primary: white
8 | features:
9 | - content.code.copy
10 | - content.code.select
11 | - announce.dismiss
12 | - navigation.footer
13 | plugins:
14 | - mkdocstrings:
15 | default_handler: python
16 | handlers:
17 | python:
18 | paths: [g2p]
19 | extra:
20 | homepage: https://roedoejet.github.io/g2p/
21 | version:
22 | provider: mike
23 | default: stable
24 | markdown_extensions:
25 | - admonition
26 | - pymdownx.details
27 | - pymdownx.superfences
28 | - mkdocs-click
29 | - toc:
30 | permalink: true
31 | nav:
32 | - Home: index.md
33 | - Installation: installation.md
34 | - Guides:
35 | - Getting started: start.md
36 | - How to contribute: contributing.md
37 | - Using the g2p studio: studio.md
38 | - Migrating from g2p 1.x: migration-2.md
39 | - Reference:
40 | - Package: package.md
41 | - Command Line: cli.md
42 |
--------------------------------------------------------------------------------
/readme-heroku.md:
--------------------------------------------------------------------------------
1 | Our production Heroku deployment is controlled by the following files:
2 | - `Procfile`: tells Heroku what command to launch in each Dyno;
3 | - `runtime.txt`: tells Heroku which run-time engine to use (i.e., which version of Python);
4 |
5 | Heroku detects Python by default, but `runtime.txt` lets us specify/bump the version as needed;
6 | - `requirements.txt`: tells Heroku what our production dependencies
7 | are. This is managed by `hatch` now. You will need to make sure
8 | the Python version in the `[tool.hatch.envs.prod]` section matches
9 | the one in `runtime.txt`. Now you can update the requirements with:
10 |
11 | hatch env remove prod
12 | rm -f requirements.txt
13 | hatch env create prod
14 |
--------------------------------------------------------------------------------
/run_studio.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import sys
4 |
5 | if sys.version_info < (3, 8, 0): # pragma: no cover
6 | raise Exception("")
7 | sys.exit(
8 | "ERROR: While the g2p CLI and library can still run on Python 3.7, "
9 | "g2p-studio requires Python 3.8 or more recent.\n"
10 | f"You are using {sys.version}.\n"
11 | "Please use a newer version of Python."
12 | )
13 |
14 | import uvicorn
15 |
16 | from g2p.app import APP
17 | from g2p.log import LOGGER
18 |
19 | host = "127.0.0.1"
20 | port = 5000
21 | LOGGER.info(f"g2p-studio listening on http://{host}:{port}")
22 |
23 | uvicorn.run(APP, host=host, port=port)
24 |
--------------------------------------------------------------------------------
/run_tests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from g2p.tests.run import main
4 |
5 | main()
6 |
--------------------------------------------------------------------------------
/runtime.txt:
--------------------------------------------------------------------------------
1 | python-3.10.15
2 |
--------------------------------------------------------------------------------