├── .github
    └── workflows
    │   └── python-publish.yml
├── .gitignore
├── LICENSE
├── README.md
├── TRAINING.md
├── fix_pt.py
├── openphonemizer
    └── __init__.py
├── setup.py
├── train.py
└── training
    └── config.yml


/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   deploy:
20 | 
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |     - uses: actions/checkout@v3
25 |     - name: Set up Python
26 |       uses: actions/setup-python@v3
27 |       with:
28 |         python-version: '3.x'
29 |     - name: Install dependencies
30 |       run: |
31 |         python -m pip install --upgrade pip
32 |         pip install build
33 |     - name: Build package
34 |       run: python -m build
35 |     - name: Publish package
36 |       uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
37 |       with:
38 |         user: __token__
39 |         password: ${{ secrets.PYPI_API_TOKEN }}
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | 
163 | 
164 | en.txt
165 | out.tsv


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The Clear BSD License
 2 | 
 3 | Copyright (c) 2024 mrfakename, NeuralVox, OpenPhonemizer Contributors
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted (subject to the limitations in the disclaimer
 8 | below) provided that the following conditions are met:
 9 | 
10 |   * Redistributions of source code must retain the above copyright notice,
11 | this list of conditions and the following disclaimer.
12 | 
13 |   * Redistributions in binary form must reproduce the above copyright
14 | notice, this list of conditions and the following disclaimer in the
15 | documentation and/or other materials provided with the distribution.
16 | 
17 |   * Neither the name of the copyright holder nor the names of its
18 | contributors may be used to endorse or promote products derived from this
19 | software without specific prior written permission.
20 | 
21 | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
22 | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
23 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
25 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
26 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
30 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 | POSSIBILITY OF SUCH DAMAGE.
33 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # OpenPhonemizer
  2 | 
  3 | **Code / [Audio Samples](https://neuralvox.github.io/OpenPhonemizer/) / [Models](https://huggingface.co/openphonemizer/ckpt) / [Live Demo](https://huggingface.co/spaces/openphonemizer/PhonemizerHub) / [Dataset](https://huggingface.co/datasets/mrfakename/ipa-phonemes-word-pairs)**
  4 | 
  5 | A permissively licensed, open sourced, local IPA Phonemizer (G2P) powered by deep learning. This Phonemizer attempts to replicate the `espeak` Phonemizer while remaining permissively-licensed.
  6 | 
  7 | OpenPhonemizer is designed to be a drop-in replacement for espeak's phonemizer. This means you can use DeepPhonemizer in your software even if your software is not GPL licensed.
  8 | 
  9 | OpenPhonemizer is heavily based on the amazing [DeepPhonemizer](https://github.com/as-ideas/DeepPhonemizer). The main changes are the model checkpoints, which more closely resemble `espeak`'s phonemizer.
 10 | 
 11 | Optional GPL-licensed portions are available [here](https://github.com/NeuralVox/OpenPhonemizer-GPL).
 12 | 
 13 | ## Features
 14 | 
 15 | * Permissively licensed & open source
 16 | * Fast & efficient
 17 | * Works well with TTS models that depend on phonemizer or espeak
 18 | * Automatic GPU acceleration (CUDA/MPS) if available
 19 | 
 20 | ## Project
 21 | 
 22 | * Project status: Alpha
 23 | * Supported languages: English (more coming soon! What languages do you want? Let me know!)
 24 | 
 25 | ## Installation
 26 | 
 27 | Easily install OpenPhonemizer:
 28 | 
 29 | ```bash
 30 | pip install -U openphonemizer
 31 | ```
 32 | 
 33 | Or, install the latest version from Git:
 34 | 
 35 | ```bash
 36 | pip install -U "openphonemizer @ git+https://github.com/NeuralVox/OpenPhonemizer"
 37 | ```
 38 | 
 39 | ## Usage
 40 | 
 41 | ### OpenPhonemizer
 42 | 
 43 | ```python
 44 | from openphonemizer import OpenPhonemizer
 45 | phonemizer = OpenPhonemizer()
 46 | # Or specify a custom checkpoint path: OpenPhonemizer('model.pt')
 47 | phonemizer('test')
 48 | phonemizer('hello this is a test')
 49 | ```
 50 | 
 51 | Please note that by default, OpenPhonemizer loads a built-in dictionary of words/phonemes. Because storage is quite inefficient, the model is ~100MB larger and uses more memory, however it is _much_ faster. If you're low on VRAM, you can either run the model exclusively on CPU (`disable_gpu=True`) or load a model without a dictionary.
 52 | 
 53 | **Load without dictionary:**
 54 | 
 55 | ```python
 56 | from cached_path import cached_path
 57 | from openphonemizer import OpenPhonemizer
 58 | phonemizer = OpenPhonemizer(str(cached_path('hf://openphonemizer/ckpt/best_model_no_optim.pt'))) # add disable_gpu=True to run on CPU only
 59 | phonemizer('test')
 60 | phonemizer('hello this is a test')
 61 | ```
 62 | 
 63 | **Use autoregressive model:**
 64 | 
 65 | > [!CAUTION]
 66 | > OpenPhonemizer had a **bug** in the training script that caused significantly degraded performance. The autoregressive model has not yet been fixed. For now, please use the forward model.
 67 | 
 68 | NEW: An autoregressive model is now available. The autoregressive model is more accurate but slightly slower. To use the autoregressive model:
 69 | 
 70 | ```python
 71 | OpenPhonemizer(str(cached_path('hf://openphonemizer/autoreg-ckpt/best_model.pt')))
 72 | ```
 73 | 
 74 | ## Evaluation
 75 | 
 76 | We introduce PhonemizerBench, a benchmark to evaluate the similarity of alternate Phonemizers to `espeak` (this benchmark measures against `espeak`, assuming it's score is 100).
 77 | 
 78 | | Phonemizer                    | Score (Run 1) | Score (Run 2) | Score (Run 3) | Average   |
 79 | | ----------------------------- | ------------- | ------------- | ------------- | --------- |
 80 | | Gruut                         | 75.08         | 75.54         | 73.72         | 74.78     |
 81 | | DeepPhonemizer                | 85.24         | 85.03         | 84.64         | 84.97     |
 82 | | G2P_EN                        | 86.16         | 86.28         | 85.74         | 86.06     |
 83 | | OpenPhonemizer                | 93.64         | 93.54         | 93.38         | 93.52     |
 84 | | OpenPhonemizer Autoregressive | **93.74**     | **93.59**     | **93.67**     | **93.67** |
 85 | 
 86 | ## Todo
 87 | 
 88 | - [x] Train autoregressive model
 89 | - [x] Allow disabling GPU usage
 90 | - [ ] Multilingual support (any requests?)
 91 | 
 92 | ## License
 93 | 
 94 | OpenPhonemizer is open source software. You may use it under the BSD-3-Clause Clear license found in the LICENSE file.
 95 | 
 96 | Please note that OpenPhonemizer depends on software under different licenses, it is your responsibility when redistributing or modifying OpenPhonemizer to comply with these licenses (notably LGPL).
 97 | 
 98 | *By contributing to this repository, you grant the author the permission to change the license in the future at their sole discretion or offer different licenses to other individuals.*
 99 | 
100 | **NOTE:** Model weights may be licensed under different licenses. Please make sure to check all model weights for licenses.
101 | 
102 | ## Credits
103 | 
104 | OpenPhonemizer is essentially a wrapper (using different pre-trained models) around the amazing [Deep Phonemizer](https://github.com/as-ideas/DeepPhonemizer) package created by [Christian Schäfer](https://github.com/cschaefer26).
105 | 
106 | OpenPhonemizer uses [num2words](https://github.com/savoirfairelinux/num2words) to read out large numbers and [cached_path](https://github.com/allenai/cached_path) from Allen AI for caching models.
107 | 
108 | OpenPhonemizer models were trained by [mrfakename](https://twitter.com/realmrfakename).
109 | 


--------------------------------------------------------------------------------
/TRAINING.md:
--------------------------------------------------------------------------------
 1 | # Training
 2 | 
 3 | (Some code borrowed from DeepPhonemizer)
 4 | 
 5 | Assuming you're using Jupyter:
 6 | 
 7 | ```
 8 | !pip install deep-phonemizer
 9 | ```
10 | 
11 | ```python
12 | !wget https://huggingface.co/datasets/mrfakename/ipa-phonemes-word-pairs/raw/main/out.tsv
13 | with open('out.tsv', 'r', encoding='utf-8') as f:
14 |     lines = f.readlines()
15 | 
16 | lines = [l.replace(' ', '').replace('\n', '') for l in lines]
17 | splits = [l.split('\t') for l in lines]
18 | train_data = [('en_us', s[0], s[1]) for s in splits if len(s)==2]
19 | for d in train_data[:10000:1000]:
20 |     print(d)
21 | ```
22 | 
23 | ```python
24 | from dp.utils.io import read_config, save_config
25 | import dp
26 | import os
27 | 
28 | config_file = 'config.yml'
29 | config = read_config(config_file)
30 | config['training']['epochs'] = 10
31 | config['training']['warmup_steps'] = 100
32 | config['training']['generate_steps'] = 500
33 | config['training']['validate_steps'] = 500
34 | save_config(config, 'config.yaml')
35 | for k, v in config.items():
36 |     print(f'{k} {v}')
37 | ```
38 | 
39 | ```
40 | %load_ext tensorboard
41 | %tensorboard --logdir /content/checkpoints
42 | ```
43 | 
44 | ```python
45 | from dp.preprocess import preprocess
46 | from dp.train import train
47 | preprocess(config_file='config.yaml', train_data=train_data)
48 | train(rank=0, num_gpus=1, config_file='config.yaml')
49 | ```
50 | 
51 | ```python
52 | from dp.phonemizer import Phonemizer
53 | 
54 | phonemizer = Phonemizer.from_checkpoint('/content/checkpoints/best_model.pt')
55 | result = phonemizer('Phonemizing an English text is imposimpable!', lang='en_us')
56 | 
57 | print(result)
58 | ```
59 | 
60 | ```python
61 | result = phonemizer.phonemise_list(['This is a test'], lang='en_us')
62 | 
63 | for word, pred in result.predictions.items():
64 |     print(f'{word} {pred.phonemes} {pred.confidence}')
65 | ```


--------------------------------------------------------------------------------
/fix_pt.py:
--------------------------------------------------------------------------------
 1 | MODEL_PATH = 'model_step_20k_fixed.pt'
 2 | import torch
 3 | from cached_path import cached_path
 4 | x = torch.load(MODEL_PATH)
 5 | with open(str(cached_path('https://huggingface.co/datasets/mrfakename/ipa-phonemes-word-pairs/raw/main/out.tsv'))) as f:
 6 |     lines = [l.replace(' ', '').replace('\n', '') for l in f.readlines()]
 7 | splits = [l.split('\t') for l in lines]
 8 | # for z in splits:
 9 | #     x['phoneme_dict']['en_us'][z[0]] = z[1]
10 | x['phoneme_dict']['en_us']['a']='ɐ'
11 | print(x['phoneme_dict']['en_us'])
12 | torch.save(x, 'model_step_20k_fixed.pt')


--------------------------------------------------------------------------------
/openphonemizer/__init__.py:
--------------------------------------------------------------------------------
 1 | # OpenPhonemizer
 2 | # 
 3 | # Copyright (c) 2024 mrfakename, NeuralVox, OpenPhonemizer Contributors
 4 | # All rights reserved.
 5 | # 
 6 | # The Clear BSD License
 7 | 
 8 | # Redistribution and use in source and binary forms, with or without
 9 | # modification, are permitted (subject to the limitations in the disclaimer
10 | # below) provided that the following conditions are met:
11 | 
12 | #  * Redistributions of source code must retain the above copyright notice,
13 | # this list of conditions and the following disclaimer.
14 | 
15 | #  * Redistributions in binary form must reproduce the above copyright
16 | # notice, this list of conditions and the following disclaimer in the
17 | # documentation and/or other materials provided with the distribution.
18 | 
19 | #  * Neither the name of the copyright holder nor the names of its
20 | # contributors may be used to endorse or promote products derived from this
21 | # software without specific prior written permission.
22 | 
23 | # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
24 | # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
25 | # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
27 | # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
28 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
31 | # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
32 | # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 | # POSSIBILITY OF SUCH DAMAGE.
35 | 
36 | from dp.phonemizer import Phonemizer
37 | from cached_path import cached_path
38 | from num2words import num2words
39 | import re, torch
40 | class OpenPhonemizer:
41 |     def __init__(self, model_checkpoint=None, disable_gpu=False):
42 |         device = 'cpu'
43 |         if torch.cuda.is_available(): device = 'cuda'
44 |         if torch.backends.mps.is_available(): device = 'mps'
45 |         if disable_gpu: device = 'cpu'
46 |         if not model_checkpoint:
47 |             model_checkpoint = str(cached_path('hf://openphonemizer/ckpt/best_model.pt'))
48 |         self.phonemizer = Phonemizer.from_checkpoint(model_checkpoint, device=device)
49 |         self.pattern = re.compile(r'\d+')
50 |     def _num_process(self, text):
51 |         matches = self.pattern.findall(text)
52 |         for match in matches:
53 |             word_equivalent = num2words(int(match))
54 |             text = text.replace(match, word_equivalent)
55 |         return text
56 |     def __call__(self, text, stress=True):
57 |         out = self.phonemizer(self._num_process(text.replace(' .', '.').replace('.', ' .')), lang='en_us')
58 |         if not stress:
59 |             out = out.replace('ˈ', '')
60 |         return out
61 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # OpenPhonemizer
 2 | # 
 3 | # Copyright (c) 2024 mrfakename, NeuralVox, OpenPhonemizer Contributors
 4 | # All rights reserved.
 5 | # 
 6 | # The Clear BSD License
 7 | 
 8 | # Redistribution and use in source and binary forms, with or without
 9 | # modification, are permitted (subject to the limitations in the disclaimer
10 | # below) provided that the following conditions are met:
11 | 
12 | #  * Redistributions of source code must retain the above copyright notice,
13 | # this list of conditions and the following disclaimer.
14 | 
15 | #  * Redistributions in binary form must reproduce the above copyright
16 | # notice, this list of conditions and the following disclaimer in the
17 | # documentation and/or other materials provided with the distribution.
18 | 
19 | #  * Neither the name of the copyright holder nor the names of its
20 | # contributors may be used to endorse or promote products derived from this
21 | # software without specific prior written permission.
22 | 
23 | # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
24 | # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
25 | # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
27 | # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
28 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
31 | # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
32 | # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 | # POSSIBILITY OF SUCH DAMAGE.
35 | 
36 | from setuptools import setup, find_packages
37 | 
38 | setup(
39 |     name="openphonemizer",
40 |     version="0.1.2",
41 |     packages=find_packages(),
42 |     author="mrfakename",
43 |     author_email="me@mrfake.name",
44 |     description="Permissively licensed, open sourced, local IPA Phonemizer (G2P) powered by deep learning.",
45 |     long_description=open("README.md").read(),
46 |     long_description_content_type="text/markdown",
47 |     url="https://github.com/NeuralVox/OpenPhonemizer",
48 |     license="BSD-3-Clause-Clear",
49 |     classifiers=[
50 |         "License :: OSI Approved :: BSD License",
51 |     ],
52 |     install_requires=[
53 |         "deep-phonemizer",
54 |         "cached-path",
55 |         "num2words",
56 |     ],
57 | )
58 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | # OpenPhonemizer
 2 | # 
 3 | # Copyright (c) 2024 mrfakename, NeuralVox, OpenPhonemizer Contributors
 4 | # All rights reserved.
 5 | # 
 6 | # The Clear BSD License
 7 | 
 8 | # Redistribution and use in source and binary forms, with or without
 9 | # modification, are permitted (subject to the limitations in the disclaimer
10 | # below) provided that the following conditions are met:
11 | 
12 | #  * Redistributions of source code must retain the above copyright notice,
13 | # this list of conditions and the following disclaimer.
14 | 
15 | #  * Redistributions in binary form must reproduce the above copyright
16 | # notice, this list of conditions and the following disclaimer in the
17 | # documentation and/or other materials provided with the distribution.
18 | 
19 | #  * Neither the name of the copyright holder nor the names of its
20 | # contributors may be used to endorse or promote products derived from this
21 | # software without specific prior written permission.
22 | 
23 | # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
24 | # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
25 | # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
27 | # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
28 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
31 | # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
32 | # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 | # POSSIBILITY OF SUCH DAMAGE.
35 | 


--------------------------------------------------------------------------------
/training/config.yml:
--------------------------------------------------------------------------------
  1 | paths:
  2 |   checkpoint_dir: checkpoints
  3 |   data_dir: datasets
  4 | 
  5 | preprocessing:
  6 |   languages: ["en_us"]
  7 |   text_symbols: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
  8 |   phoneme_symbols:
  9 |     [
 10 |       "a",
 11 |       "b",
 12 |       "d",
 13 |       "e",
 14 |       "f",
 15 |       "g",
 16 |       "h",
 17 |       "i",
 18 |       "j",
 19 |       "k",
 20 |       "l",
 21 |       "m",
 22 |       "n",
 23 |       "o",
 24 |       "p",
 25 |       "r",
 26 |       "s",
 27 |       "t",
 28 |       "u",
 29 |       "v",
 30 |       "w",
 31 |       "x",
 32 |       "y",
 33 |       "z",
 34 |       "æ",
 35 |       "ç",
 36 |       "ð",
 37 |       "ø",
 38 |       "ŋ",
 39 |       "œ",
 40 |       "ɐ",
 41 |       "ɑ",
 42 |       "ɔ",
 43 |       "ə",
 44 |       "ɛ",
 45 |       "ɜ",
 46 |       "ɝ",
 47 |       "ɹ",
 48 |       "ɚ",
 49 |       "ɡ",
 50 |       "ɪ",
 51 |       "ʁ",
 52 |       "ʃ",
 53 |       "ʊ",
 54 |       "ʌ",
 55 |       "ʏ",
 56 |       "ʒ",
 57 |       "ʔ",
 58 |       "ˈ",
 59 |       "ˌ",
 60 |       "ː",
 61 |       "̃",
 62 |       "̍",
 63 |       "̥",
 64 |       "̩",
 65 |       "̯",
 66 |       "͡",
 67 |       "θ",
 68 |       "'",
 69 |       "ɾ",
 70 |       "ᵻ"
 71 |     ]
 72 |   char_repeats: 3
 73 |   lowercase: true
 74 |   n_val: 5000
 75 | model:
 76 |   type: "transformer"
 77 |   d_model: 512
 78 |   d_fft: 1024
 79 |   layers: 6
 80 |   dropout: 0.1
 81 |   heads: 4
 82 | 
 83 | training:
 84 |   learning_rate: 0.0001
 85 |   warmup_steps: 10000
 86 |   scheduler_plateau_factor: 0.5
 87 |   scheduler_plateau_patience: 10
 88 |   batch_size: 64
 89 |   batch_size_val: 64
 90 |   epochs: 15
 91 |   generate_steps:
 92 |     10000
 93 |   validate_steps:
 94 |     10000
 95 |   checkpoint_steps: 10000
 96 |   n_generate_samples: 10
 97 |   store_phoneme_dict_in_model:
 98 |     true
 99 |   ddp_backend: "nccl"
100 |   ddp_host: "localhost"
101 |   ddp_post: "12355"
102 | 


--------------------------------------------------------------------------------