├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README-zh.md
├── README.md
├── app.py
├── audiosample
    ├── demo.mp3
    ├── fastspeech2.mp3
    └── tacotron2.mp3
├── demo.wav
├── setup.cfg
├── setup.py
├── templates
    └── index.html
└── zhtts
    ├── __init__.py
    ├── asset
        ├── baker_mapper.json
        ├── fastspeech2_quan.tflite
        ├── mb_melgan.tflite
        └── tacotron2_quan.tflite
    ├── tensorflow_tts
        ├── __init__.py
        └── processor
        │   ├── __init__.py
        │   ├── baker.py
        │   ├── base_processor.py
        │   └── cn_tn.py
    └── tts.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | data
  2 | refe
  3 | .vscode
  4 | demo.py
  5 | model
  6 | run_test.py
  7 | test.txt
  8 | eval.txt
  9 | tts_one_sentence.py
 10 | zhtts/asset/tacotron2.tflite
 11 | notes.md
 12 | 
 13 | # Byte-compiled / optimized / DLL files
 14 | __pycache__/
 15 | *.py[cod]
 16 | *$py.class
 17 | 
 18 | # C extensions
 19 | *.so
 20 | 
 21 | # Distribution / packaging
 22 | .Python
 23 | build/
 24 | develop-eggs/
 25 | dist/
 26 | downloads/
 27 | eggs/
 28 | .eggs/
 29 | lib/
 30 | lib64/
 31 | parts/
 32 | sdist/
 33 | var/
 34 | wheels/
 35 | pip-wheel-metadata/
 36 | share/python-wheels/
 37 | *.egg-info/
 38 | .installed.cfg
 39 | *.egg
 40 | MANIFEST
 41 | 
 42 | # PyInstaller
 43 | #  Usually these files are written by a python script from a template
 44 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 45 | *.manifest
 46 | *.spec
 47 | 
 48 | # Installer logs
 49 | pip-log.txt
 50 | pip-delete-this-directory.txt
 51 | 
 52 | # Unit test / coverage reports
 53 | htmlcov/
 54 | .tox/
 55 | .nox/
 56 | .coverage
 57 | .coverage.*
 58 | .cache
 59 | nosetests.xml
 60 | coverage.xml
 61 | *.cover
 62 | *.py,cover
 63 | .hypothesis/
 64 | .pytest_cache/
 65 | 
 66 | # Translations
 67 | *.mo
 68 | *.pot
 69 | 
 70 | # Django stuff:
 71 | *.log
 72 | local_settings.py
 73 | db.sqlite3
 74 | db.sqlite3-journal
 75 | 
 76 | # Flask stuff:
 77 | instance/
 78 | .webassets-cache
 79 | 
 80 | # Scrapy stuff:
 81 | .scrapy
 82 | 
 83 | # Sphinx documentation
 84 | docs/_build/
 85 | 
 86 | # PyBuilder
 87 | target/
 88 | 
 89 | # Jupyter Notebook
 90 | .ipynb_checkpoints
 91 | 
 92 | # IPython
 93 | profile_default/
 94 | ipython_config.py
 95 | 
 96 | # pyenv
 97 | .python-version
 98 | 
 99 | # pipenv
100 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
101 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
102 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
103 | #   install all needed dependencies.
104 | #Pipfile.lock
105 | 
106 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
107 | __pypackages__/
108 | 
109 | # Celery stuff
110 | celerybeat-schedule
111 | celerybeat.pid
112 | 
113 | # SageMath parsed files
114 | *.sage.py
115 | 
116 | # Environments
117 | .env
118 | .venv
119 | env/
120 | venv/
121 | ENV/
122 | env.bak/
123 | venv.bak/
124 | 
125 | # Spyder project settings
126 | .spyderproject
127 | .spyproject
128 | 
129 | # Rope project settings
130 | .ropeproject
131 | 
132 | # mkdocs documentation
133 | /site
134 | 
135 | # mypy
136 | .mypy_cache/
137 | .dmypy.json
138 | dmypy.json
139 | 
140 | # Pyre type checker
141 | .pyre/
142 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 jackiexiao
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include zhtts/asset *


--------------------------------------------------------------------------------
/README-zh.md:
--------------------------------------------------------------------------------
 1 | # ZhTTS
 2 | [English](https://github.com/Jackiexiao/zhtts/blob/main/README.md)
 3 | 
 4 | 在CPU上实时运行的中文语音合成系统（一个简单的示例，使用 Fastspeech2 + MbMelGan），但总体效果离“能用”还有很大差距，供大家参考
 5 | 
 6 | > 实时率RTF：0.2 Cpu: Intel(R) Core(TM) i5-7200U CPU @ 2.50GHz 采样率24khz  fastspeech2, RTF1.6 for tacotron2
 7 | 
 8 | 这个项目**主要依赖**于 [TensorFlowTTS](https://github.com/TensorSpeech/TensorFlowTTS)，做了非常简单的改进：
 9 | 
10 | * tflite 模型来源于[colab](https://colab.research.google.com/drive/1Ma3MIcSdLsOxqOKcN1MlElncYMhrOg3J?usp=sharing), 感谢[@azraelkuan](https://github.com/azraelkuan)
11 | * 在标点符号处停顿
12 | * 增加了简单的文本正则化（数字转汉字）TN (Text Normalization) 使用 [chinese_text_normalization](https://github.com/speechio/chinese_text_normalization)
13 | 
14 | ## 合成效果
15 | text = "2020年，这是一个开源的端到端中文语音合成系统"
16 | 
17 | * [zhtts synthesis mp3](https://shimo.im/docs/tcXPY9pdrdRdwqk6/ )
18 | 
19 | 
20 | ## 安装
21 | ```
22 | pip install zhtts
23 | ```
24 | or clone this repo, then ` pip install . `
25 | 
26 | ## 使用
27 | ```python
28 | import zhtts
29 | 
30 | text = "2020年，这是一个开源的端到端中文语音合成系统"
31 | tts = zhtts.TTS() # use fastspeech2 by default
32 | 
33 | tts.text2wav(text, "demo.wav")
34 | >>> Save wav to demo.wav
35 | 
36 | tts.frontend(text)
37 | >>> ('二零二零年，这是一个开源的端到端中文语音合成系统', 'sil ^ er4 #0 l ing2 #0 ^ er4 #0 l ing2 #0 n ian2 #0 #3 zh e4 #0 sh iii4 #0 ^ i2 #0 g e4 #0 k ai1 #0 ^ van2 #0 d e5 #0 d uan1 #0 d ao4 #0 d uan1 #0 zh ong1 #0 ^ uen2 #0 ^ v3 #0 ^ in1 #0 h e2 #0 ch eng2 #0 x i4 #0 t ong3 sil')
38 | 
39 | tts.synthesis(text)
40 | >>> array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)
41 | ```
42 | 
43 | ### 网页 api demo
44 | 下载这个项目, `pip install flask` first, then
45 | ```
46 | python app.py
47 | ```
48 | * 访问 http://localhost:5000 可以直接进行语音合成交互
49 | * do HTTP GET at http://localhost:5000/api/tts?text=your%20sentence to get WAV audio back:
50 | 
51 | ```sh
52 | $ curl -o "helloworld.wav" "http://localhost:5000/api/tts?text=%E4%BD%A0%E5%A5%BD%E4%B8%96%E7%95%8C"
53 | ```
54 | `%E4%BD%A0%E5%A5%BD%E4%B8%96%E7%95%8C` 是"你好，世界！"的 url 编码
55 | 
56 | ## 使用 Tacotron2 模型
57 | 某些情况下 Tacotron2 合成效果会好一点，不过合成速度会慢不少
58 | ```python
59 | import zhtts
60 | tts = zhtts.TTS(text2mel_name="TACOTRON")
61 | # tts = zhtts.TTS(text2mel_name="FASTSPEECH2")
62 | ```
63 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 建议使用 [paddlespeech来做中文语音合成](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/README_cn.md#%E5%BF%AB%E9%80%9F%E5%BC%80%E5%A7%8B)
 2 | 
 3 | # ZhTTS
 4 | [中文](https://github.com/Jackiexiao/zhtts/blob/main/README-zh.md)
 5 | 
 6 | A demo of zh/Chinese Text to Speech system run on CPU in real time. (fastspeech2 + mbmelgan)
 7 | 
 8 | > RTF(real time factor): 0.2 with cpu: Intel(R) Core(TM) i5-7200U CPU @ 2.50GHz 24khz audio use fastspeech2, RTF1.6 for tacotron2
 9 | 
10 | This repo is **mainly based on** [TensorFlowTTS](https://github.com/TensorSpeech/TensorFlowTTS) with little improvement.
11 | 
12 | * tflite model come from [colab](https://colab.research.google.com/drive/1Ma3MIcSdLsOxqOKcN1MlElncYMhrOg3J?usp=sharing), thx to [@azraelkuan](https://github.com/azraelkuan)
13 | * add pause at punctuation
14 | * add TN (Text Normalization) from [chinese_text_normalization](https://github.com/speechio/chinese_text_normalization)
15 | 
16 | ## demo wav 
17 | text = "2020年，这是一个开源的端到端中文语音合成系统"
18 | 
19 | [zhtts synthesis mp3](https://shimo.im/docs/tcXPY9pdrdRdwqk6/ )
20 | 
21 | ## Install 
22 | ```
23 | pip install zhtts
24 | ```
25 | or clone this repo, then ` pip install . `
26 | 
27 | ## Usage 
28 | ```python
29 | import zhtts
30 | 
31 | text = "2020年，这是一个开源的端到端中文语音合成系统"
32 | tts = zhtts.TTS() # use fastspeech2 by default
33 | 
34 | tts.text2wav(text, "demo.wav")
35 | >>> Save wav to demo.wav
36 | 
37 | tts.frontend(text)
38 | >>> ('二零二零年，这是一个开源的端到端中文语音合成系统', 'sil ^ er4 #0 l ing2 #0 ^ er4 #0 l ing2 #0 n ian2 #0 #3 zh e4 #0 sh iii4 #0 ^ i2 #0 g e4 #0 k ai1 #0 ^ van2 #0 d e5 #0 d uan1 #0 d ao4 #0 d uan1 #0 zh ong1 #0 ^ uen2 #0 ^ v3 #0 ^ in1 #0 h e2 #0 ch eng2 #0 x i4 #0 t ong3 sil')
39 | 
40 | tts.synthesis(text)
41 | >>> array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)
42 | ```
43 | 
44 | ### web api demo
45 | clone this repo, `pip install flask` first, then
46 | ```
47 | python app.py
48 | ```
49 | * visit http://localhost:5000 for tts interaction
50 | * do HTTP GET at http://localhost:5000/api/tts?text=your%20sentence to get WAV audio back:
51 | 
52 | ```sh
53 | $ curl -o "helloworld.wav" "http://localhost:5000/api/tts?text=%E4%BD%A0%E5%A5%BD%E4%B8%96%E7%95%8C"
54 | ```
55 | `%E4%BD%A0%E5%A5%BD%E4%B8%96%E7%95%8C` is url code of"你好，世界！"
56 | 
57 | ## Use tacotron2 instead of fastspeech2
58 | wav generate from tacotron model is better than fast speech, however tacotron is much slower , to use Tacotron, change code
59 | ```python
60 | import zhtts
61 | tts = zhtts.TTS(text2mel_name="TACOTRON")
62 | # tts = zhtts.TTS(text2mel_name="FASTSPEECH2")
63 | ```
64 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from zhtts import TTS
 3 | 
 4 | tts = TTS(text2mel_name="FASTSPEECH2")
 5 | #tts = TTS(text2mel_name="TACOTRON")
 6 | 
 7 | import io
 8 | import time
 9 | from pathlib import Path
10 | import scipy
11 | from scipy.io import wavfile
12 | 
13 | from flask import Flask, Response, render_template, request
14 | # from flask_cors import CORS
15 | 
16 | app = Flask("__name__")
17 | # CORS(app)
18 | 
19 | @app.route("/api/tts")
20 | def api_tts():
21 |     text = request.args.get("text", "").strip()
22 |     audio = tts.synthesis(text)
23 | 
24 |     with io.BytesIO() as out:
25 |         wavfile.write(out, 24000, audio)
26 |         return Response(out.getvalue(), mimetype="audio/wav")
27 | 
28 | @app.route("/")
29 | def index():
30 |     return render_template("index.html")
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     app.run(host="0.0.0.0", port=5000)
35 | 


--------------------------------------------------------------------------------
/audiosample/demo.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jackiexiao/zhtts/f96d084506aaeb3d1bcdd066525aad891ef66949/audiosample/demo.mp3


--------------------------------------------------------------------------------
/audiosample/fastspeech2.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jackiexiao/zhtts/f96d084506aaeb3d1bcdd066525aad891ef66949/audiosample/fastspeech2.mp3


--------------------------------------------------------------------------------
/audiosample/tacotron2.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jackiexiao/zhtts/f96d084506aaeb3d1bcdd066525aad891ef66949/audiosample/tacotron2.mp3


--------------------------------------------------------------------------------
/demo.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jackiexiao/zhtts/f96d084506aaeb3d1bcdd066525aad891ef66949/demo.wav


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | desciption-file = README.md
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | 
 4 | setup(
 5 |     name='zhtts',
 6 |     version='0.0.1',
 7 |     packages=find_packages(),
 8 |     url='https://github.com/jackiexiao/zhtts',
 9 |     license='MIT',
10 |     author='jackiexiao',
11 |     author_email='707610215@qq.com',
12 |     description="A demo of zh/Chinese Text to Speech system run on CPU",
13 |     long_description=open("README.md", 'r', encoding='utf-8').read(),
14 |     long_description_content_type="text/markdown",
15 |     include_package_data=True,
16 |     install_requires=(
17 |         "tensorflow-cpu>=2.4.0",
18 |         "numpy",
19 |         "scipy",
20 |         "pypinyin",
21 |         "dataclasses"
22 |     ),
23 |     classifiers=(
24 |         'License :: OSI Approved :: MIT License',
25 |         'Programming Language :: Python :: 3.6',
26 |     )
27 | )
28 | 


--------------------------------------------------------------------------------
/templates/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | 
 4 |   <head>
 5 | 
 6 |     <meta charset="utf-8">
 7 |     <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
 8 |     <meta name="description" content="">
 9 |     <meta name="author" content="">
10 | 
11 |     <title>Zhtts - Text2Speech Demo</title>
12 | 
13 |     <!-- Bootstrap core CSS -->
14 |     <link href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.1/css/bootstrap.min.css"
15 |      integrity="sha384-WskhaSGFgHYWDcbwN70/dfYBj47jz9qbsMId/iRN3ewGhXQFZCSftd1LZCfmhktB" crossorigin="anonymous" rel="stylesheet">
16 | 
17 |     <!-- Custom styles for this template -->
18 |     <style>
19 |       body {
20 |         padding-top: 54px;
21 |       }
22 |       @media (min-width: 992px) {
23 |         body {
24 |           padding-top: 56px;
25 |         }
26 |       }
27 | 
28 |     </style>
29 |   </head>
30 | 
31 |   <body>
32 |     <a href="https://github.com/jackiexiao/zhtts"><img style="position: absolute; z-index:1000; top: 0; left: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_left_darkblue_121621.png" alt="Fork me on GitHub"></a>
33 | 
34 |     <!-- Page Content -->
35 |     <div class="container">
36 |       <div class="row">
37 |         <div class="col-lg-12 text-center">
38 |           <h1 class="mt-5">基于TensorFlowTTS的中文TTS-Demo</h1>
39 | 	  <h2 class="mt-5">实时中文语音合成样例</h2>
40 |           <ul class="list-unstyled">
41 |           </ul>
42 |           <input id="text" placeholder="Type here..." size=45 type="text" name="text">
43 |           <button id="speak-button" name="speak">Speak</button><br/><br/>
44 |           <audio id="audio" controls autoplay hidden></audio>
45 |           <p id="message"></p>
46 |         </div>
47 |       </div>
48 |     </div>
49 | 
50 |     <!-- Bootstrap core JavaScript -->
51 |     <script>
52 |             function q(selector) {return document.querySelector(selector)}
53 |             q('#text').focus()
54 |             function do_tts(e) {
55 |                 text = q('#text').value
56 |                 if (text) {
57 |                     q('#message').textContent = 'Synthesizing...'
58 |                     q('#speak-button').disabled = true
59 |                     q('#audio').hidden = true
60 |                     synthesize(text)
61 |                 }
62 |                 e.preventDefault()
63 |                 return false
64 |             }
65 |             q('#speak-button').addEventListener('click', do_tts)
66 |             q('#text').addEventListener('keyup', function(e) {
67 |               if (e.keyCode == 13) { // enter
68 |                 do_tts(e)
69 |               }
70 |             })
71 |             function synthesize(text) {
72 |                 fetch('/api/tts?text=' + encodeURIComponent(text), {cache: 'no-cache'})
73 |                     .then(function(res) {
74 |                         if (!res.ok) throw Error(res.statusText)
75 |                             return res.blob()
76 |                         }).then(function(blob) {
77 |                             q('#message').textContent = ''
78 |                             q('#speak-button').disabled = false
79 |                             q('#audio').src = URL.createObjectURL(blob)
80 |                             q('#audio').hidden = false
81 |                         }).catch(function(err) {
82 |                             q('#message').textContent = 'Error: ' + err.message
83 |                             q('#speak-button').disabled = false
84 |                         })
85 |             }
86 |         </script>
87 | 
88 |   </body>
89 | 
90 | </html>


--------------------------------------------------------------------------------
/zhtts/__init__.py:
--------------------------------------------------------------------------------
1 | from .tts import TTS


--------------------------------------------------------------------------------
/zhtts/asset/baker_mapper.json:
--------------------------------------------------------------------------------
1 | {"symbol_to_id": {"pad": 0, "sil": 1, "#0": 2, "#1": 3, "#2": 4, "#3": 5, "^": 6, "b": 7, "c": 8, "ch": 9, "d": 10, "f": 11, "g": 12, "h": 13, "j": 14, "k": 15, "l": 16, "m": 17, "n": 18, "p": 19, "q": 20, "r": 21, "s": 22, "sh": 23, "t": 24, "x": 25, "z": 26, "zh": 27, "a1": 28, "a2": 29, "a3": 30, "a4": 31, "a5": 32, "ai1": 33, "ai2": 34, "ai3": 35, "ai4": 36, "ai5": 37, "an1": 38, "an2": 39, "an3": 40, "an4": 41, "an5": 42, "ang1": 43, "ang2": 44, "ang3": 45, "ang4": 46, "ang5": 47, "ao1": 48, "ao2": 49, "ao3": 50, "ao4": 51, "ao5": 52, "e1": 53, "e2": 54, "e3": 55, "e4": 56, "e5": 57, "ei1": 58, "ei2": 59, "ei3": 60, "ei4": 61, "ei5": 62, "en1": 63, "en2": 64, "en3": 65, "en4": 66, "en5": 67, "eng1": 68, "eng2": 69, "eng3": 70, "eng4": 71, "eng5": 72, "er1": 73, "er2": 74, "er3": 75, "er4": 76, "er5": 77, "i1": 78, "i2": 79, "i3": 80, "i4": 81, "i5": 82, "ia1": 83, "ia2": 84, "ia3": 85, "ia4": 86, "ia5": 87, "ian1": 88, "ian2": 89, "ian3": 90, "ian4": 91, "ian5": 92, "iang1": 93, "iang2": 94, "iang3": 95, "iang4": 96, "iang5": 97, "iao1": 98, "iao2": 99, "iao3": 100, "iao4": 101, "iao5": 102, "ie1": 103, "ie2": 104, "ie3": 105, "ie4": 106, "ie5": 107, "ii1": 108, "ii2": 109, "ii3": 110, "ii4": 111, "ii5": 112, "iii1": 113, "iii2": 114, "iii3": 115, "iii4": 116, "iii5": 117, "in1": 118, "in2": 119, "in3": 120, "in4": 121, "in5": 122, "ing1": 123, "ing2": 124, "ing3": 125, "ing4": 126, "ing5": 127, "iong1": 128, "iong2": 129, "iong3": 130, "iong4": 131, "iong5": 132, "iou1": 133, "iou2": 134, "iou3": 135, "iou4": 136, "iou5": 137, "o1": 138, "o2": 139, "o3": 140, "o4": 141, "o5": 142, "ong1": 143, "ong2": 144, "ong3": 145, "ong4": 146, "ong5": 147, "ou1": 148, "ou2": 149, "ou3": 150, "ou4": 151, "ou5": 152, "u1": 153, "u2": 154, "u3": 155, "u4": 156, "u5": 157, "ua1": 158, "ua2": 159, "ua3": 160, "ua4": 161, "ua5": 162, "uai1": 163, "uai2": 164, "uai3": 165, "uai4": 166, "uai5": 167, "uan1": 168, "uan2": 169, "uan3": 170, "uan4": 171, "uan5": 172, "uang1": 173, "uang2": 174, "uang3": 175, "uang4": 176, "uang5": 177, "uei1": 178, "uei2": 179, "uei3": 180, "uei4": 181, "uei5": 182, "uen1": 183, "uen2": 184, "uen3": 185, "uen4": 186, "uen5": 187, "ueng1": 188, "ueng2": 189, "ueng3": 190, "ueng4": 191, "ueng5": 192, "uo1": 193, "uo2": 194, "uo3": 195, "uo4": 196, "uo5": 197, "v1": 198, "v2": 199, "v3": 200, "v4": 201, "v5": 202, "van1": 203, "van2": 204, "van3": 205, "van4": 206, "van5": 207, "ve1": 208, "ve2": 209, "ve3": 210, "ve4": 211, "ve5": 212, "vn1": 213, "vn2": 214, "vn3": 215, "vn4": 216, "vn5": 217, "eos": 218}, "id_to_symbol": {"0": "pad", "1": "sil", "2": "#0", "3": "#1", "4": "#2", "5": "#3", "6": "^", "7": "b", "8": "c", "9": "ch", "10": "d", "11": "f", "12": "g", "13": "h", "14": "j", "15": "k", "16": "l", "17": "m", "18": "n", "19": "p", "20": "q", "21": "r", "22": "s", "23": "sh", "24": "t", "25": "x", "26": "z", "27": "zh", "28": "a1", "29": "a2", "30": "a3", "31": "a4", "32": "a5", "33": "ai1", "34": "ai2", "35": "ai3", "36": "ai4", "37": "ai5", "38": "an1", "39": "an2", "40": "an3", "41": "an4", "42": "an5", "43": "ang1", "44": "ang2", "45": "ang3", "46": "ang4", "47": "ang5", "48": "ao1", "49": "ao2", "50": "ao3", "51": "ao4", "52": "ao5", "53": "e1", "54": "e2", "55": "e3", "56": "e4", "57": "e5", "58": "ei1", "59": "ei2", "60": "ei3", "61": "ei4", "62": "ei5", "63": "en1", "64": "en2", "65": "en3", "66": "en4", "67": "en5", "68": "eng1", "69": "eng2", "70": "eng3", "71": "eng4", "72": "eng5", "73": "er1", "74": "er2", "75": "er3", "76": "er4", "77": "er5", "78": "i1", "79": "i2", "80": "i3", "81": "i4", "82": "i5", "83": "ia1", "84": "ia2", "85": "ia3", "86": "ia4", "87": "ia5", "88": "ian1", "89": "ian2", "90": "ian3", "91": "ian4", "92": "ian5", "93": "iang1", "94": "iang2", "95": "iang3", "96": "iang4", "97": "iang5", "98": "iao1", "99": "iao2", "100": "iao3", "101": "iao4", "102": "iao5", "103": "ie1", "104": "ie2", "105": "ie3", "106": "ie4", "107": "ie5", "108": "ii1", "109": "ii2", "110": "ii3", "111": "ii4", "112": "ii5", "113": "iii1", "114": "iii2", "115": "iii3", "116": "iii4", "117": "iii5", "118": "in1", "119": "in2", "120": "in3", "121": "in4", "122": "in5", "123": "ing1", "124": "ing2", "125": "ing3", "126": "ing4", "127": "ing5", "128": "iong1", "129": "iong2", "130": "iong3", "131": "iong4", "132": "iong5", "133": "iou1", "134": "iou2", "135": "iou3", "136": "iou4", "137": "iou5", "138": "o1", "139": "o2", "140": "o3", "141": "o4", "142": "o5", "143": "ong1", "144": "ong2", "145": "ong3", "146": "ong4", "147": "ong5", "148": "ou1", "149": "ou2", "150": "ou3", "151": "ou4", "152": "ou5", "153": "u1", "154": "u2", "155": "u3", "156": "u4", "157": "u5", "158": "ua1", "159": "ua2", "160": "ua3", "161": "ua4", "162": "ua5", "163": "uai1", "164": "uai2", "165": "uai3", "166": "uai4", "167": "uai5", "168": "uan1", "169": "uan2", "170": "uan3", "171": "uan4", "172": "uan5", "173": "uang1", "174": "uang2", "175": "uang3", "176": "uang4", "177": "uang5", "178": "uei1", "179": "uei2", "180": "uei3", "181": "uei4", "182": "uei5", "183": "uen1", "184": "uen2", "185": "uen3", "186": "uen4", "187": "uen5", "188": "ueng1", "189": "ueng2", "190": "ueng3", "191": "ueng4", "192": "ueng5", "193": "uo1", "194": "uo2", "195": "uo3", "196": "uo4", "197": "uo5", "198": "v1", "199": "v2", "200": "v3", "201": "v4", "202": "v5", "203": "van1", "204": "van2", "205": "van3", "206": "van4", "207": "van5", "208": "ve1", "209": "ve2", "210": "ve3", "211": "ve4", "212": "ve5", "213": "vn1", "214": "vn2", "215": "vn3", "216": "vn4", "217": "vn5", "218": "eos"}, "speakers_map": {"baker": 0}, "processor_name": "BakerProcessor", "pinyin_dict": {"a": ["^", "a"], "ai": ["^", "ai"], "an": ["^", "an"], "ang": ["^", "ang"], "ao": ["^", "ao"], "ba": ["b", "a"], "bai": ["b", "ai"], "ban": ["b", "an"], "bang": ["b", "ang"], "bao": ["b", "ao"], "be": ["b", "e"], "bei": ["b", "ei"], "ben": ["b", "en"], "beng": ["b", "eng"], "bi": ["b", "i"], "bian": ["b", "ian"], "biao": ["b", "iao"], "bie": ["b", "ie"], "bin": ["b", "in"], "bing": ["b", "ing"], "bo": ["b", "o"], "bu": ["b", "u"], "ca": ["c", "a"], "cai": ["c", "ai"], "can": ["c", "an"], "cang": ["c", "ang"], "cao": ["c", "ao"], "ce": ["c", "e"], "cen": ["c", "en"], "ceng": ["c", "eng"], "cha": ["ch", "a"], "chai": ["ch", "ai"], "chan": ["ch", "an"], "chang": ["ch", "ang"], "chao": ["ch", "ao"], "che": ["ch", "e"], "chen": ["ch", "en"], "cheng": ["ch", "eng"], "chi": ["ch", "iii"], "chong": ["ch", "ong"], "chou": ["ch", "ou"], "chu": ["ch", "u"], "chua": ["ch", "ua"], "chuai": ["ch", "uai"], "chuan": ["ch", "uan"], "chuang": ["ch", "uang"], "chui": ["ch", "uei"], "chun": ["ch", "uen"], "chuo": ["ch", "uo"], "ci": ["c", "ii"], "cong": ["c", "ong"], "cou": ["c", "ou"], "cu": ["c", "u"], "cuan": ["c", "uan"], "cui": ["c", "uei"], "cun": ["c", "uen"], "cuo": ["c", "uo"], "da": ["d", "a"], "dai": ["d", "ai"], "dan": ["d", "an"], "dang": ["d", "ang"], "dao": ["d", "ao"], "de": ["d", "e"], "dei": ["d", "ei"], "den": ["d", "en"], "deng": ["d", "eng"], "di": ["d", "i"], "dia": ["d", "ia"], "dian": ["d", "ian"], "diao": ["d", "iao"], "die": ["d", "ie"], "ding": ["d", "ing"], "diu": ["d", "iou"], "dong": ["d", "ong"], "dou": ["d", "ou"], "du": ["d", "u"], "duan": ["d", "uan"], "dui": ["d", "uei"], "dun": ["d", "uen"], "duo": ["d", "uo"], "e": ["^", "e"], "ei": ["^", "ei"], "en": ["^", "en"], "ng": ["^", "en"], "eng": ["^", "eng"], "er": ["^", "er"], "fa": ["f", "a"], "fan": ["f", "an"], "fang": ["f", "ang"], "fei": ["f", "ei"], "fen": ["f", "en"], "feng": ["f", "eng"], "fo": ["f", "o"], "fou": ["f", "ou"], "fu": ["f", "u"], "ga": ["g", "a"], "gai": ["g", "ai"], "gan": ["g", "an"], "gang": ["g", "ang"], "gao": ["g", "ao"], "ge": ["g", "e"], "gei": ["g", "ei"], "gen": ["g", "en"], "geng": ["g", "eng"], "gong": ["g", "ong"], "gou": ["g", "ou"], "gu": ["g", "u"], "gua": ["g", "ua"], "guai": ["g", "uai"], "guan": ["g", "uan"], "guang": ["g", "uang"], "gui": ["g", "uei"], "gun": ["g", "uen"], "guo": ["g", "uo"], "ha": ["h", "a"], "hai": ["h", "ai"], "han": ["h", "an"], "hang": ["h", "ang"], "hao": ["h", "ao"], "he": ["h", "e"], "hei": ["h", "ei"], "hen": ["h", "en"], "heng": ["h", "eng"], "hong": ["h", "ong"], "hou": ["h", "ou"], "hu": ["h", "u"], "hua": ["h", "ua"], "huai": ["h", "uai"], "huan": ["h", "uan"], "huang": ["h", "uang"], "hui": ["h", "uei"], "hun": ["h", "uen"], "huo": ["h", "uo"], "ji": ["j", "i"], "jia": ["j", "ia"], "jian": ["j", "ian"], "jiang": ["j", "iang"], "jiao": ["j", "iao"], "jie": ["j", "ie"], "jin": ["j", "in"], "jing": ["j", "ing"], "jiong": ["j", "iong"], "jiu": ["j", "iou"], "ju": ["j", "v"], "juan": ["j", "van"], "jue": ["j", "ve"], "jun": ["j", "vn"], "ka": ["k", "a"], "kai": ["k", "ai"], "kan": ["k", "an"], "kang": ["k", "ang"], "kao": ["k", "ao"], "ke": ["k", "e"], "kei": ["k", "ei"], "ken": ["k", "en"], "keng": ["k", "eng"], "kong": ["k", "ong"], "kou": ["k", "ou"], "ku": ["k", "u"], "kua": ["k", "ua"], "kuai": ["k", "uai"], "kuan": ["k", "uan"], "kuang": ["k", "uang"], "kui": ["k", "uei"], "kun": ["k", "uen"], "kuo": ["k", "uo"], "la": ["l", "a"], "lai": ["l", "ai"], "lan": ["l", "an"], "lang": ["l", "ang"], "lao": ["l", "ao"], "le": ["l", "e"], "lei": ["l", "ei"], "leng": ["l", "eng"], "li": ["l", "i"], "lia": ["l", "ia"], "lian": ["l", "ian"], "liang": ["l", "iang"], "liao": ["l", "iao"], "lie": ["l", "ie"], "lin": ["l", "in"], "ling": ["l", "ing"], "liu": ["l", "iou"], "lo": ["l", "o"], "long": ["l", "ong"], "lou": ["l", "ou"], "lu": ["l", "u"], "lv": ["l", "v"], "luan": ["l", "uan"], "lve": ["l", "ve"], "lue": ["l", "ve"], "lun": ["l", "uen"], "luo": ["l", "uo"], "ma": ["m", "a"], "mai": ["m", "ai"], "man": ["m", "an"], "mang": ["m", "ang"], "mao": ["m", "ao"], "me": ["m", "e"], "mei": ["m", "ei"], "men": ["m", "en"], "meng": ["m", "eng"], "mi": ["m", "i"], "mian": ["m", "ian"], "miao": ["m", "iao"], "mie": ["m", "ie"], "min": ["m", "in"], "ming": ["m", "ing"], "miu": ["m", "iou"], "mo": ["m", "o"], "mou": ["m", "ou"], "mu": ["m", "u"], "na": ["n", "a"], "nai": ["n", "ai"], "nan": ["n", "an"], "nang": ["n", "ang"], "nao": ["n", "ao"], "ne": ["n", "e"], "nei": ["n", "ei"], "nen": ["n", "en"], "neng": ["n", "eng"], "ni": ["n", "i"], "nia": ["n", "ia"], "nian": ["n", "ian"], "niang": ["n", "iang"], "niao": ["n", "iao"], "nie": ["n", "ie"], "nin": ["n", "in"], "ning": ["n", "ing"], "niu": ["n", "iou"], "nong": ["n", "ong"], "nou": ["n", "ou"], "nu": ["n", "u"], "nv": ["n", "v"], "nuan": ["n", "uan"], "nve": ["n", "ve"], "nue": ["n", "ve"], "nuo": ["n", "uo"], "o": ["^", "o"], "ou": ["^", "ou"], "pa": ["p", "a"], "pai": ["p", "ai"], "pan": ["p", "an"], "pang": ["p", "ang"], "pao": ["p", "ao"], "pe": ["p", "e"], "pei": ["p", "ei"], "pen": ["p", "en"], "peng": ["p", "eng"], "pi": ["p", "i"], "pian": ["p", "ian"], "piao": ["p", "iao"], "pie": ["p", "ie"], "pin": ["p", "in"], "ping": ["p", "ing"], "po": ["p", "o"], "pou": ["p", "ou"], "pu": ["p", "u"], "qi": ["q", "i"], "qia": ["q", "ia"], "qian": ["q", "ian"], "qiang": ["q", "iang"], "qiao": ["q", "iao"], "qie": ["q", "ie"], "qin": ["q", "in"], "qing": ["q", "ing"], "qiong": ["q", "iong"], "qiu": ["q", "iou"], "qu": ["q", "v"], "quan": ["q", "van"], "que": ["q", "ve"], "qun": ["q", "vn"], "ran": ["r", "an"], "rang": ["r", "ang"], "rao": ["r", "ao"], "re": ["r", "e"], "ren": ["r", "en"], "reng": ["r", "eng"], "ri": ["r", "iii"], "rong": ["r", "ong"], "rou": ["r", "ou"], "ru": ["r", "u"], "rua": ["r", "ua"], "ruan": ["r", "uan"], "rui": ["r", "uei"], "run": ["r", "uen"], "ruo": ["r", "uo"], "sa": ["s", "a"], "sai": ["s", "ai"], "san": ["s", "an"], "sang": ["s", "ang"], "sao": ["s", "ao"], "se": ["s", "e"], "sen": ["s", "en"], "seng": ["s", "eng"], "sha": ["sh", "a"], "shai": ["sh", "ai"], "shan": ["sh", "an"], "shang": ["sh", "ang"], "shao": ["sh", "ao"], "she": ["sh", "e"], "shei": ["sh", "ei"], "shen": ["sh", "en"], "sheng": ["sh", "eng"], "shi": ["sh", "iii"], "shou": ["sh", "ou"], "shu": ["sh", "u"], "shua": ["sh", "ua"], "shuai": ["sh", "uai"], "shuan": ["sh", "uan"], "shuang": ["sh", "uang"], "shui": ["sh", "uei"], "shun": ["sh", "uen"], "shuo": ["sh", "uo"], "si": ["s", "ii"], "song": ["s", "ong"], "sou": ["s", "ou"], "su": ["s", "u"], "suan": ["s", "uan"], "sui": ["s", "uei"], "sun": ["s", "uen"], "suo": ["s", "uo"], "ta": ["t", "a"], "tai": ["t", "ai"], "tan": ["t", "an"], "tang": ["t", "ang"], "tao": ["t", "ao"], "te": ["t", "e"], "tei": ["t", "ei"], "teng": ["t", "eng"], "ti": ["t", "i"], "tian": ["t", "ian"], "tiao": ["t", "iao"], "tie": ["t", "ie"], "ting": ["t", "ing"], "tong": ["t", "ong"], "tou": ["t", "ou"], "tu": ["t", "u"], "tuan": ["t", "uan"], "tui": ["t", "uei"], "tun": ["t", "uen"], "tuo": ["t", "uo"], "wa": ["^", "ua"], "wai": ["^", "uai"], "wan": ["^", "uan"], "wang": ["^", "uang"], "wei": ["^", "uei"], "wen": ["^", "uen"], "weng": ["^", "ueng"], "wo": ["^", "uo"], "wu": ["^", "u"], "xi": ["x", "i"], "xia": ["x", "ia"], "xian": ["x", "ian"], "xiang": ["x", "iang"], "xiao": ["x", "iao"], "xie": ["x", "ie"], "xin": ["x", "in"], "xing": ["x", "ing"], "xiong": ["x", "iong"], "xiu": ["x", "iou"], "xu": ["x", "v"], "xuan": ["x", "van"], "xue": ["x", "ve"], "xun": ["x", "vn"], "ya": ["^", "ia"], "yan": ["^", "ian"], "yang": ["^", "iang"], "yao": ["^", "iao"], "ye": ["^", "ie"], "yi": ["^", "i"], "yin": ["^", "in"], "ying": ["^", "ing"], "yo": ["^", "iou"], "yong": ["^", "iong"], "you": ["^", "iou"], "yu": ["^", "v"], "yuan": ["^", "van"], "yue": ["^", "ve"], "yun": ["^", "vn"], "za": ["z", "a"], "zai": ["z", "ai"], "zan": ["z", "an"], "zang": ["z", "ang"], "zao": ["z", "ao"], "ze": ["z", "e"], "zei": ["z", "ei"], "zen": ["z", "en"], "zeng": ["z", "eng"], "zha": ["zh", "a"], "zhai": ["zh", "ai"], "zhan": ["zh", "an"], "zhang": ["zh", "ang"], "zhao": ["zh", "ao"], "zhe": ["zh", "e"], "zhei": ["zh", "ei"], "zhen": ["zh", "en"], "zheng": ["zh", "eng"], "zhi": ["zh", "iii"], "zhong": ["zh", "ong"], "zhou": ["zh", "ou"], "zhu": ["zh", "u"], "zhua": ["zh", "ua"], "zhuai": ["zh", "uai"], "zhuan": ["zh", "uan"], "zhuang": ["zh", "uang"], "zhui": ["zh", "uei"], "zhun": ["zh", "uen"], "zhuo": ["zh", "uo"], "zi": ["z", "ii"], "zong": ["z", "ong"], "zou": ["z", "ou"], "zu": ["z", "u"], "zuan": ["z", "uan"], "zui": ["z", "uei"], "zun": ["z", "uen"], "zuo": ["z", "uo"]}}


--------------------------------------------------------------------------------
/zhtts/asset/fastspeech2_quan.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jackiexiao/zhtts/f96d084506aaeb3d1bcdd066525aad891ef66949/zhtts/asset/fastspeech2_quan.tflite


--------------------------------------------------------------------------------
/zhtts/asset/mb_melgan.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jackiexiao/zhtts/f96d084506aaeb3d1bcdd066525aad891ef66949/zhtts/asset/mb_melgan.tflite


--------------------------------------------------------------------------------
/zhtts/asset/tacotron2_quan.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jackiexiao/zhtts/f96d084506aaeb3d1bcdd066525aad891ef66949/zhtts/asset/tacotron2_quan.tflite


--------------------------------------------------------------------------------
/zhtts/tensorflow_tts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jackiexiao/zhtts/f96d084506aaeb3d1bcdd066525aad891ef66949/zhtts/tensorflow_tts/__init__.py


--------------------------------------------------------------------------------
/zhtts/tensorflow_tts/processor/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_processor import BaseProcessor
2 | from .baker import BakerProcessor


--------------------------------------------------------------------------------
/zhtts/tensorflow_tts/processor/baker.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright 2020 TensorFlowTTS Team.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Perform preprocessing and raw feature extraction for Baker dataset."""
 16 | 
 17 | import os
 18 | import re
 19 | from typing import Dict, List, Union, Tuple, Any
 20 | 
 21 | # import librosa
 22 | import numpy as np
 23 | # import soundfile as sf
 24 | from dataclasses import dataclass, field
 25 | from pypinyin import Style
 26 | from pypinyin.contrib.neutral_tone import NeutralToneWith5Mixin
 27 | from pypinyin.converter import DefaultConverter
 28 | from pypinyin.core import Pinyin
 29 | from .base_processor import BaseProcessor
 30 | from .cn_tn import NSWNormalizer
 31 | 
 32 | _pad = ["pad"]
 33 | _eos = ["eos"]
 34 | _pause = ["sil", "#0", "#1", "#2", "#3"]
 35 | 
 36 | _initials = [
 37 |     "^",
 38 |     "b",
 39 |     "c",
 40 |     "ch",
 41 |     "d",
 42 |     "f",
 43 |     "g",
 44 |     "h",
 45 |     "j",
 46 |     "k",
 47 |     "l",
 48 |     "m",
 49 |     "n",
 50 |     "p",
 51 |     "q",
 52 |     "r",
 53 |     "s",
 54 |     "sh",
 55 |     "t",
 56 |     "x",
 57 |     "z",
 58 |     "zh",
 59 | ]
 60 | 
 61 | _tones = ["1", "2", "3", "4", "5"]
 62 | 
 63 | _finals = [
 64 |     "a",
 65 |     "ai",
 66 |     "an",
 67 |     "ang",
 68 |     "ao",
 69 |     "e",
 70 |     "ei",
 71 |     "en",
 72 |     "eng",
 73 |     "er",
 74 |     "i",
 75 |     "ia",
 76 |     "ian",
 77 |     "iang",
 78 |     "iao",
 79 |     "ie",
 80 |     "ii",
 81 |     "iii",
 82 |     "in",
 83 |     "ing",
 84 |     "iong",
 85 |     "iou",
 86 |     "o",
 87 |     "ong",
 88 |     "ou",
 89 |     "u",
 90 |     "ua",
 91 |     "uai",
 92 |     "uan",
 93 |     "uang",
 94 |     "uei",
 95 |     "uen",
 96 |     "ueng",
 97 |     "uo",
 98 |     "v",
 99 |     "van",
100 |     "ve",
101 |     "vn",
102 | ]
103 | 
104 | 
105 | ALPHA_PHONE_DICT = {
106 |  'A': ['EE', 'EI1'],
107 |  'B': ['B', 'I4'],
108 |  'C': ['S', 'I1'],
109 |  'D': ['D', 'I4'],
110 |  'E': ['II', 'I4'],
111 |  'F': ['EE', 'EI2', 'F', 'U5'],
112 |  'G': ['J', 'I4'],
113 |  'H': ['EE', 'EI1', 'Q', 'U1'],
114 |  'I': ['AA', 'AI4'],
115 |  'J': ['J', 'IE4'],
116 |  'K': ['K', 'IE4'],
117 |  'L': ['EE', 'EI2', 'L', 'E5'],
118 |  'M': ['EE', 'EI2', 'M', 'ENG5'],
119 |  'N': ['EE', 'EN1'],
120 |  'O': ['OO', 'OU1'],
121 |  'P': ['P', 'I1'],
122 |  'Q': ['Q', 'OU1'],
123 |  'R': ['AA', 'AI1', 'EE', 'ER5'],
124 |  'S': ['EE', 'EI2', 'S', 'IY1'],
125 |  'T': ['T', 'I4'],
126 |  'U': ['II', 'IU1'],
127 |  'V': ['UU', 'UI1'],
128 |  'W': ['D', 'A2', 'B', 'U5', 'L', 'IU5'],
129 |  'X': ['EE', 'EI2', 'K', 'IE5', 'S', 'IY1'],
130 |  'Y': ['UU', 'UAI1'],
131 |  'Z': ['Z', 'E1']}
132 | 
133 | _alpha_phones = []
134 | [_alpha_phones.extend(i) for i in ALPHA_PHONE_DICT.values()]
135 | 
136 | # BAKER_SYMBOLS = _pad + _pause + _initials + [i + j for i in _finals for j in _tones] + _eos + _alpha_phones
137 | # TODO 等待支持英文字母
138 | BAKER_SYMBOLS = _pad + _pause + _initials + [i + j for i in _finals for j in _tones] + _eos
139 | 
140 | PINYIN_DICT = {
141 |     "a": ("^", "a"),
142 |     "ai": ("^", "ai"),
143 |     "an": ("^", "an"),
144 |     "ang": ("^", "ang"),
145 |     "ao": ("^", "ao"),
146 |     "ba": ("b", "a"),
147 |     "bai": ("b", "ai"),
148 |     "ban": ("b", "an"),
149 |     "bang": ("b", "ang"),
150 |     "bao": ("b", "ao"),
151 |     "be": ("b", "e"),
152 |     "bei": ("b", "ei"),
153 |     "ben": ("b", "en"),
154 |     "beng": ("b", "eng"),
155 |     "bi": ("b", "i"),
156 |     "bian": ("b", "ian"),
157 |     "biao": ("b", "iao"),
158 |     "bie": ("b", "ie"),
159 |     "bin": ("b", "in"),
160 |     "bing": ("b", "ing"),
161 |     "bo": ("b", "o"),
162 |     "bu": ("b", "u"),
163 |     "ca": ("c", "a"),
164 |     "cai": ("c", "ai"),
165 |     "can": ("c", "an"),
166 |     "cang": ("c", "ang"),
167 |     "cao": ("c", "ao"),
168 |     "ce": ("c", "e"),
169 |     "cen": ("c", "en"),
170 |     "ceng": ("c", "eng"),
171 |     "cha": ("ch", "a"),
172 |     "chai": ("ch", "ai"),
173 |     "chan": ("ch", "an"),
174 |     "chang": ("ch", "ang"),
175 |     "chao": ("ch", "ao"),
176 |     "che": ("ch", "e"),
177 |     "chen": ("ch", "en"),
178 |     "cheng": ("ch", "eng"),
179 |     "chi": ("ch", "iii"),
180 |     "chong": ("ch", "ong"),
181 |     "chou": ("ch", "ou"),
182 |     "chu": ("ch", "u"),
183 |     "chua": ("ch", "ua"),
184 |     "chuai": ("ch", "uai"),
185 |     "chuan": ("ch", "uan"),
186 |     "chuang": ("ch", "uang"),
187 |     "chui": ("ch", "uei"),
188 |     "chun": ("ch", "uen"),
189 |     "chuo": ("ch", "uo"),
190 |     "ci": ("c", "ii"),
191 |     "cong": ("c", "ong"),
192 |     "cou": ("c", "ou"),
193 |     "cu": ("c", "u"),
194 |     "cuan": ("c", "uan"),
195 |     "cui": ("c", "uei"),
196 |     "cun": ("c", "uen"),
197 |     "cuo": ("c", "uo"),
198 |     "da": ("d", "a"),
199 |     "dai": ("d", "ai"),
200 |     "dan": ("d", "an"),
201 |     "dang": ("d", "ang"),
202 |     "dao": ("d", "ao"),
203 |     "de": ("d", "e"),
204 |     "dei": ("d", "ei"),
205 |     "den": ("d", "en"),
206 |     "deng": ("d", "eng"),
207 |     "di": ("d", "i"),
208 |     "dia": ("d", "ia"),
209 |     "dian": ("d", "ian"),
210 |     "diao": ("d", "iao"),
211 |     "die": ("d", "ie"),
212 |     "ding": ("d", "ing"),
213 |     "diu": ("d", "iou"),
214 |     "dong": ("d", "ong"),
215 |     "dou": ("d", "ou"),
216 |     "du": ("d", "u"),
217 |     "duan": ("d", "uan"),
218 |     "dui": ("d", "uei"),
219 |     "dun": ("d", "uen"),
220 |     "duo": ("d", "uo"),
221 |     "e": ("^", "e"),
222 |     "ei": ("^", "ei"),
223 |     "en": ("^", "en"),
224 |     "ng": ("^", "en"),
225 |     "eng": ("^", "eng"),
226 |     "er": ("^", "er"),
227 |     "fa": ("f", "a"),
228 |     "fan": ("f", "an"),
229 |     "fang": ("f", "ang"),
230 |     "fei": ("f", "ei"),
231 |     "fen": ("f", "en"),
232 |     "feng": ("f", "eng"),
233 |     "fo": ("f", "o"),
234 |     "fou": ("f", "ou"),
235 |     "fu": ("f", "u"),
236 |     "ga": ("g", "a"),
237 |     "gai": ("g", "ai"),
238 |     "gan": ("g", "an"),
239 |     "gang": ("g", "ang"),
240 |     "gao": ("g", "ao"),
241 |     "ge": ("g", "e"),
242 |     "gei": ("g", "ei"),
243 |     "gen": ("g", "en"),
244 |     "geng": ("g", "eng"),
245 |     "gong": ("g", "ong"),
246 |     "gou": ("g", "ou"),
247 |     "gu": ("g", "u"),
248 |     "gua": ("g", "ua"),
249 |     "guai": ("g", "uai"),
250 |     "guan": ("g", "uan"),
251 |     "guang": ("g", "uang"),
252 |     "gui": ("g", "uei"),
253 |     "gun": ("g", "uen"),
254 |     "guo": ("g", "uo"),
255 |     "ha": ("h", "a"),
256 |     "hai": ("h", "ai"),
257 |     "han": ("h", "an"),
258 |     "hang": ("h", "ang"),
259 |     "hao": ("h", "ao"),
260 |     "he": ("h", "e"),
261 |     "hei": ("h", "ei"),
262 |     "hen": ("h", "en"),
263 |     "heng": ("h", "eng"),
264 |     "hong": ("h", "ong"),
265 |     "hou": ("h", "ou"),
266 |     "hu": ("h", "u"),
267 |     "hua": ("h", "ua"),
268 |     "huai": ("h", "uai"),
269 |     "huan": ("h", "uan"),
270 |     "huang": ("h", "uang"),
271 |     "hui": ("h", "uei"),
272 |     "hun": ("h", "uen"),
273 |     "huo": ("h", "uo"),
274 |     "ji": ("j", "i"),
275 |     "jia": ("j", "ia"),
276 |     "jian": ("j", "ian"),
277 |     "jiang": ("j", "iang"),
278 |     "jiao": ("j", "iao"),
279 |     "jie": ("j", "ie"),
280 |     "jin": ("j", "in"),
281 |     "jing": ("j", "ing"),
282 |     "jiong": ("j", "iong"),
283 |     "jiu": ("j", "iou"),
284 |     "ju": ("j", "v"),
285 |     "juan": ("j", "van"),
286 |     "jue": ("j", "ve"),
287 |     "jun": ("j", "vn"),
288 |     "ka": ("k", "a"),
289 |     "kai": ("k", "ai"),
290 |     "kan": ("k", "an"),
291 |     "kang": ("k", "ang"),
292 |     "kao": ("k", "ao"),
293 |     "ke": ("k", "e"),
294 |     "kei": ("k", "ei"),
295 |     "ken": ("k", "en"),
296 |     "keng": ("k", "eng"),
297 |     "kong": ("k", "ong"),
298 |     "kou": ("k", "ou"),
299 |     "ku": ("k", "u"),
300 |     "kua": ("k", "ua"),
301 |     "kuai": ("k", "uai"),
302 |     "kuan": ("k", "uan"),
303 |     "kuang": ("k", "uang"),
304 |     "kui": ("k", "uei"),
305 |     "kun": ("k", "uen"),
306 |     "kuo": ("k", "uo"),
307 |     "la": ("l", "a"),
308 |     "lai": ("l", "ai"),
309 |     "lan": ("l", "an"),
310 |     "lang": ("l", "ang"),
311 |     "lao": ("l", "ao"),
312 |     "le": ("l", "e"),
313 |     "lei": ("l", "ei"),
314 |     "leng": ("l", "eng"),
315 |     "li": ("l", "i"),
316 |     "lia": ("l", "ia"),
317 |     "lian": ("l", "ian"),
318 |     "liang": ("l", "iang"),
319 |     "liao": ("l", "iao"),
320 |     "lie": ("l", "ie"),
321 |     "lin": ("l", "in"),
322 |     "ling": ("l", "ing"),
323 |     "liu": ("l", "iou"),
324 |     "lo": ("l", "o"),
325 |     "long": ("l", "ong"),
326 |     "lou": ("l", "ou"),
327 |     "lu": ("l", "u"),
328 |     "lv": ("l", "v"),
329 |     "luan": ("l", "uan"),
330 |     "lve": ("l", "ve"),
331 |     "lue": ("l", "ve"),
332 |     "lun": ("l", "uen"),
333 |     "luo": ("l", "uo"),
334 |     "ma": ("m", "a"),
335 |     "mai": ("m", "ai"),
336 |     "man": ("m", "an"),
337 |     "mang": ("m", "ang"),
338 |     "mao": ("m", "ao"),
339 |     "me": ("m", "e"),
340 |     "mei": ("m", "ei"),
341 |     "men": ("m", "en"),
342 |     "meng": ("m", "eng"),
343 |     "mi": ("m", "i"),
344 |     "mian": ("m", "ian"),
345 |     "miao": ("m", "iao"),
346 |     "mie": ("m", "ie"),
347 |     "min": ("m", "in"),
348 |     "ming": ("m", "ing"),
349 |     "miu": ("m", "iou"),
350 |     "mo": ("m", "o"),
351 |     "mou": ("m", "ou"),
352 |     "mu": ("m", "u"),
353 |     "na": ("n", "a"),
354 |     "nai": ("n", "ai"),
355 |     "nan": ("n", "an"),
356 |     "nang": ("n", "ang"),
357 |     "nao": ("n", "ao"),
358 |     "ne": ("n", "e"),
359 |     "nei": ("n", "ei"),
360 |     "nen": ("n", "en"),
361 |     "neng": ("n", "eng"),
362 |     "ni": ("n", "i"),
363 |     "nia": ("n", "ia"),
364 |     "nian": ("n", "ian"),
365 |     "niang": ("n", "iang"),
366 |     "niao": ("n", "iao"),
367 |     "nie": ("n", "ie"),
368 |     "nin": ("n", "in"),
369 |     "ning": ("n", "ing"),
370 |     "niu": ("n", "iou"),
371 |     "nong": ("n", "ong"),
372 |     "nou": ("n", "ou"),
373 |     "nu": ("n", "u"),
374 |     "nv": ("n", "v"),
375 |     "nuan": ("n", "uan"),
376 |     "nve": ("n", "ve"),
377 |     "nue": ("n", "ve"),
378 |     "nuo": ("n", "uo"),
379 |     "o": ("^", "o"),
380 |     "ou": ("^", "ou"),
381 |     "pa": ("p", "a"),
382 |     "pai": ("p", "ai"),
383 |     "pan": ("p", "an"),
384 |     "pang": ("p", "ang"),
385 |     "pao": ("p", "ao"),
386 |     "pe": ("p", "e"),
387 |     "pei": ("p", "ei"),
388 |     "pen": ("p", "en"),
389 |     "peng": ("p", "eng"),
390 |     "pi": ("p", "i"),
391 |     "pian": ("p", "ian"),
392 |     "piao": ("p", "iao"),
393 |     "pie": ("p", "ie"),
394 |     "pin": ("p", "in"),
395 |     "ping": ("p", "ing"),
396 |     "po": ("p", "o"),
397 |     "pou": ("p", "ou"),
398 |     "pu": ("p", "u"),
399 |     "qi": ("q", "i"),
400 |     "qia": ("q", "ia"),
401 |     "qian": ("q", "ian"),
402 |     "qiang": ("q", "iang"),
403 |     "qiao": ("q", "iao"),
404 |     "qie": ("q", "ie"),
405 |     "qin": ("q", "in"),
406 |     "qing": ("q", "ing"),
407 |     "qiong": ("q", "iong"),
408 |     "qiu": ("q", "iou"),
409 |     "qu": ("q", "v"),
410 |     "quan": ("q", "van"),
411 |     "que": ("q", "ve"),
412 |     "qun": ("q", "vn"),
413 |     "ran": ("r", "an"),
414 |     "rang": ("r", "ang"),
415 |     "rao": ("r", "ao"),
416 |     "re": ("r", "e"),
417 |     "ren": ("r", "en"),
418 |     "reng": ("r", "eng"),
419 |     "ri": ("r", "iii"),
420 |     "rong": ("r", "ong"),
421 |     "rou": ("r", "ou"),
422 |     "ru": ("r", "u"),
423 |     "rua": ("r", "ua"),
424 |     "ruan": ("r", "uan"),
425 |     "rui": ("r", "uei"),
426 |     "run": ("r", "uen"),
427 |     "ruo": ("r", "uo"),
428 |     "sa": ("s", "a"),
429 |     "sai": ("s", "ai"),
430 |     "san": ("s", "an"),
431 |     "sang": ("s", "ang"),
432 |     "sao": ("s", "ao"),
433 |     "se": ("s", "e"),
434 |     "sen": ("s", "en"),
435 |     "seng": ("s", "eng"),
436 |     "sha": ("sh", "a"),
437 |     "shai": ("sh", "ai"),
438 |     "shan": ("sh", "an"),
439 |     "shang": ("sh", "ang"),
440 |     "shao": ("sh", "ao"),
441 |     "she": ("sh", "e"),
442 |     "shei": ("sh", "ei"),
443 |     "shen": ("sh", "en"),
444 |     "sheng": ("sh", "eng"),
445 |     "shi": ("sh", "iii"),
446 |     "shou": ("sh", "ou"),
447 |     "shu": ("sh", "u"),
448 |     "shua": ("sh", "ua"),
449 |     "shuai": ("sh", "uai"),
450 |     "shuan": ("sh", "uan"),
451 |     "shuang": ("sh", "uang"),
452 |     "shui": ("sh", "uei"),
453 |     "shun": ("sh", "uen"),
454 |     "shuo": ("sh", "uo"),
455 |     "si": ("s", "ii"),
456 |     "song": ("s", "ong"),
457 |     "sou": ("s", "ou"),
458 |     "su": ("s", "u"),
459 |     "suan": ("s", "uan"),
460 |     "sui": ("s", "uei"),
461 |     "sun": ("s", "uen"),
462 |     "suo": ("s", "uo"),
463 |     "ta": ("t", "a"),
464 |     "tai": ("t", "ai"),
465 |     "tan": ("t", "an"),
466 |     "tang": ("t", "ang"),
467 |     "tao": ("t", "ao"),
468 |     "te": ("t", "e"),
469 |     "tei": ("t", "ei"),
470 |     "teng": ("t", "eng"),
471 |     "ti": ("t", "i"),
472 |     "tian": ("t", "ian"),
473 |     "tiao": ("t", "iao"),
474 |     "tie": ("t", "ie"),
475 |     "ting": ("t", "ing"),
476 |     "tong": ("t", "ong"),
477 |     "tou": ("t", "ou"),
478 |     "tu": ("t", "u"),
479 |     "tuan": ("t", "uan"),
480 |     "tui": ("t", "uei"),
481 |     "tun": ("t", "uen"),
482 |     "tuo": ("t", "uo"),
483 |     "wa": ("^", "ua"),
484 |     "wai": ("^", "uai"),
485 |     "wan": ("^", "uan"),
486 |     "wang": ("^", "uang"),
487 |     "wei": ("^", "uei"),
488 |     "wen": ("^", "uen"),
489 |     "weng": ("^", "ueng"),
490 |     "wo": ("^", "uo"),
491 |     "wu": ("^", "u"),
492 |     "xi": ("x", "i"),
493 |     "xia": ("x", "ia"),
494 |     "xian": ("x", "ian"),
495 |     "xiang": ("x", "iang"),
496 |     "xiao": ("x", "iao"),
497 |     "xie": ("x", "ie"),
498 |     "xin": ("x", "in"),
499 |     "xing": ("x", "ing"),
500 |     "xiong": ("x", "iong"),
501 |     "xiu": ("x", "iou"),
502 |     "xu": ("x", "v"),
503 |     "xuan": ("x", "van"),
504 |     "xue": ("x", "ve"),
505 |     "xun": ("x", "vn"),
506 |     "ya": ("^", "ia"),
507 |     "yan": ("^", "ian"),
508 |     "yang": ("^", "iang"),
509 |     "yao": ("^", "iao"),
510 |     "ye": ("^", "ie"),
511 |     "yi": ("^", "i"),
512 |     "yin": ("^", "in"),
513 |     "ying": ("^", "ing"),
514 |     "yo": ("^", "iou"),
515 |     "yong": ("^", "iong"),
516 |     "you": ("^", "iou"),
517 |     "yu": ("^", "v"),
518 |     "yuan": ("^", "van"),
519 |     "yue": ("^", "ve"),
520 |     "yun": ("^", "vn"),
521 |     "za": ("z", "a"),
522 |     "zai": ("z", "ai"),
523 |     "zan": ("z", "an"),
524 |     "zang": ("z", "ang"),
525 |     "zao": ("z", "ao"),
526 |     "ze": ("z", "e"),
527 |     "zei": ("z", "ei"),
528 |     "zen": ("z", "en"),
529 |     "zeng": ("z", "eng"),
530 |     "zha": ("zh", "a"),
531 |     "zhai": ("zh", "ai"),
532 |     "zhan": ("zh", "an"),
533 |     "zhang": ("zh", "ang"),
534 |     "zhao": ("zh", "ao"),
535 |     "zhe": ("zh", "e"),
536 |     "zhei": ("zh", "ei"),
537 |     "zhen": ("zh", "en"),
538 |     "zheng": ("zh", "eng"),
539 |     "zhi": ("zh", "iii"),
540 |     "zhong": ("zh", "ong"),
541 |     "zhou": ("zh", "ou"),
542 |     "zhu": ("zh", "u"),
543 |     "zhua": ("zh", "ua"),
544 |     "zhuai": ("zh", "uai"),
545 |     "zhuan": ("zh", "uan"),
546 |     "zhuang": ("zh", "uang"),
547 |     "zhui": ("zh", "uei"),
548 |     "zhun": ("zh", "uen"),
549 |     "zhuo": ("zh", "uo"),
550 |     "zi": ("z", "ii"),
551 |     "zong": ("z", "ong"),
552 |     "zou": ("z", "ou"),
553 |     "zu": ("z", "u"),
554 |     "zuan": ("z", "uan"),
555 |     "zui": ("z", "uei"),
556 |     "zun": ("z", "uen"),
557 |     "zuo": ("z", "uo"),
558 | }
559 | 
560 | 
561 | zh_pattern = re.compile("[\u4e00-\u9fa5]")
562 | alpha_pattern = re.compile(r"[a-zA-Z]")
563 | 
564 | 
565 | def is_zh(word):
566 |     global zh_pattern
567 |     match = zh_pattern.search(word)
568 |     return match is not None
569 | 
570 | def is_alpha(word):
571 |     global alpha_pattern
572 |     match = alpha_pattern.search(word)
573 |     return match is not None
574 | 
575 | 
576 | class MyConverter(NeutralToneWith5Mixin, DefaultConverter):
577 |     pass
578 | 
579 | 
580 | @dataclass
581 | class BakerProcessor(BaseProcessor):
582 | 
583 |     pinyin_dict: Dict[str, Tuple[str, str]] = field(default_factory=lambda: PINYIN_DICT)
584 |     cleaner_names: str = None
585 |     target_rate: int = 24000
586 |     speaker_name: str = "baker"
587 | 
588 |     def __post_init__(self):
589 |         super().__post_init__()
590 |         self.pinyin_parser = self.get_pinyin_parser()
591 | 
592 |     def setup_eos_token(self):
593 |         return _eos[0]
594 | 
595 |     def create_items(self):
596 |         items = []
597 |         if self.data_dir:
598 |             with open(
599 |                 os.path.join(self.data_dir, "ProsodyLabeling/000001-010000.txt"),
600 |                 encoding="utf-8",
601 |             ) as ttf:
602 |                 lines = ttf.readlines()
603 |                 for idx in range(0, len(lines), 2):
604 |                     utt_id, chn_char = lines[idx].strip().split()  # [100001, 中文]
605 |                     pinyin = lines[idx + 1].strip().split() # ['zhong1', 'wen2']
606 |                     phonemes = self.get_phoneme_from_char_and_pinyin(chn_char, pinyin)
607 |                     wav_path = os.path.join(self.data_dir, "Wave", "%s.wav" % utt_id)
608 |                     items.append(
609 |                         [" ".join(phonemes), wav_path, utt_id, self.speaker_name]
610 |                     )
611 |             self.items = items
612 | 
613 |     def get_phoneme_from_char_and_pinyin(self, chn_char, pinyin):
614 |         # we do not need #4, use sil to replace it
615 |         chn_char = chn_char.replace("#4", "")
616 |         char_len = len(chn_char)
617 |         i, j = 0, 0
618 |         result = ["sil"]
619 |         while i < char_len:
620 |             cur_char = chn_char[i]
621 |             if is_zh(cur_char):
622 |                 if pinyin[j][:-1] == 'n': # 处理特殊“嗯” 特殊拼音
623 |                     pinyin[j] = 'en' + pinyin[j][-1]
624 |                 if pinyin[j][:-1] not in self.pinyin_dict: #处理儿化音
625 |                     assert chn_char[i + 1] == "儿", f"current_char : {cur_char}, next_char: {chn_char[i+1]}, cur_pinyin: {pinyin[j]}"
626 |                     assert pinyin[j][-2] == "r"
627 |                     tone = pinyin[j][-1]
628 |                     a = pinyin[j][:-2]
629 |                     a1, a2 = self.pinyin_dict[a]
630 |                     result += [a1, a2 + tone, "er5"]
631 |                     if i + 2 < char_len and chn_char[i + 2] != "#":
632 |                         result.append("#0")
633 | 
634 |                     i += 2
635 |                     j += 1
636 |                 else:
637 |                     tone = pinyin[j][-1]
638 |                     a = pinyin[j][:-1]
639 |                     a1, a2 = self.pinyin_dict[a] # a="wen" a1="^", a2="en"
640 |                     result += [a1, a2 + tone]  # result = [zh, ong1, ^,en2]
641 | 
642 |                     if i + 1 < char_len and chn_char[i + 1] != "#":  # 每个字后面接一个#0
643 |                         result.append("#0")
644 | 
645 |                     i += 1
646 |                     j += 1
647 |             # TODO support English alpha
648 |             # elif is_alpha(cur_char):
649 |             #     result += ALPHA_PHONE_DICT[cur_char.upper()]
650 |             #     if i + 1 < char_len and chn_char[i + 1] not in "#、，。！？：" :  # 每个字后面接一个#0
651 |             #         result.append("#0")
652 |             #     i += 1
653 |             #     j += 1  # baker alpha dataset "ABC" in pinyin
654 |             elif cur_char == "#":
655 |                 result.append(chn_char[i : i + 2])
656 |                 i += 2
657 |             # elif cur_char in "、，。！？：": # 遇到标点符号，添加停顿
658 |             #     result.pop() # 去掉#0
659 |             #     result.append("#3")
660 |             #     i += 1
661 |             else:
662 |                 # ignore the unknown char 
663 |                 # result.append(chn_char[i])
664 |                 i += 1
665 |         if result[-1] == "#0": # 去掉最后的#0，改为sil
666 |             result = result[:-1]
667 |         if result[-1] != "sil":
668 |             result.append("sil")
669 |         assert j == len(pinyin)
670 |         return result
671 | 
672 |     def get_one_sample(self, item):
673 |         text, wav_file, utt_id, speaker_name = item
674 | 
675 |         # normalize audio signal to be [-1, 1], soundfile already norm.
676 |         audio, rate = sf.read(wav_file)
677 |         audio = audio.astype(np.float32)
678 |         if rate != self.target_rate:
679 |             assert rate > self.target_rate
680 |             audio = librosa.resample(audio, rate, self.target_rate)
681 | 
682 |         # convert text to ids
683 |         try:
684 |             text_ids = np.asarray(self.text_to_sequence(text), np.int32)
685 |         except Exception as e:
686 |             print(e, utt_id, text)
687 |             return None
688 | 
689 |         # return None
690 |         sample = {
691 |             "raw_text": text,
692 |             "text_ids": text_ids,
693 |             "audio": audio,
694 |             "utt_id": str(int(utt_id)),
695 |             "speaker_name": speaker_name,
696 |             "rate": self.target_rate,
697 |         }
698 | 
699 |         return sample
700 | 
701 |     def get_pinyin_parser(self):
702 |         my_pinyin = Pinyin(MyConverter())
703 |         pinyin = my_pinyin.pinyin
704 |         return pinyin
705 | 
706 | 
707 |     def text_to_phone(self, text):
708 |         """ return string like 'sil c e4 #0 sh iii4 #0 ^ uen2 #0 b en3 sil' """
709 |         text = NSWNormalizer(text.strip()).normalize()
710 |         pinyin = self.pinyin_parser(text, style=Style.TONE3, errors="ignore")
711 |         new_pinyin = []
712 |         for x in pinyin:
713 |             x = "".join(x)
714 |             if "#" not in x:
715 |                 new_pinyin.append(x)
716 |         phonemes = self.get_phoneme_from_char_and_pinyin(text, new_pinyin) # phoneme seq: [sil c e4 #0 sh iii4 #0 ^ uen2 #0 b en3 sil]  string 的list
717 |         phones = " ".join(phonemes)
718 |         return text, phones
719 | 
720 |     def text_to_sequence(self, text, inference=False):
721 |         """ string 'sil c e4 #0 sh iii4 #0 ^ uen2 #0 b en3 sil' to list[int], use mapper.json symbol_to_id """
722 |         if inference:
723 |             _, phones = self.text_to_phone(text)
724 | 
725 |         sequence = []
726 |         for symbol in phones.split():
727 |             idx = self.symbol_to_id[symbol]
728 |             sequence.append(idx)
729 |         
730 |         # add eos tokens
731 |         sequence += [self.eos_id]
732 |         return sequence
733 | 


--------------------------------------------------------------------------------
/zhtts/tensorflow_tts/processor/base_processor.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright 2020 TensorFlowTTS Team.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Base Processor for all processor."""
 16 | 
 17 | import abc
 18 | import json
 19 | import os
 20 | from typing import Dict, List, Union
 21 | 
 22 | from dataclasses import dataclass, field
 23 | 
 24 | 
 25 | class DataProcessorError(Exception):
 26 |     pass
 27 | 
 28 | 
 29 | @dataclass
 30 | class BaseProcessor(abc.ABC):
 31 |     data_dir: str
 32 |     symbols: List[str] = field(default_factory=list)
 33 |     speakers_map: Dict[str, int] = field(default_factory=dict)
 34 |     train_f_name: str = "train.txt"
 35 |     delimiter: str = "|"
 36 |     positions = {
 37 |         "file": 0,
 38 |         "text": 1,
 39 |         "speaker_name": 2,
 40 |     }  # positions of file,text,speaker_name after split line
 41 |     f_extension: str = ".wav"
 42 |     saved_mapper_path: str = None
 43 |     loaded_mapper_path: str = None
 44 |     # extras
 45 |     items: List[List[str]] = field(default_factory=list)  # text, wav_path, speaker_name
 46 |     symbol_to_id: Dict[str, int] = field(default_factory=dict)
 47 |     id_to_symbol: Dict[int, str] = field(default_factory=dict)
 48 | 
 49 |     def __post_init__(self):
 50 | 
 51 |         if self.loaded_mapper_path is not None:
 52 |             self._load_mapper(loaded_path=self.loaded_mapper_path)
 53 |             if self.setup_eos_token():
 54 |                 self.add_symbol(
 55 |                     self.setup_eos_token()
 56 |                 )  # if this eos token not yet present in symbols list.
 57 |                 self.eos_id = self.symbol_to_id[self.setup_eos_token()]
 58 |             return
 59 | 
 60 |         if self.symbols.__len__() < 1:
 61 |             raise DataProcessorError("Symbols list is empty but mapper isn't loaded")
 62 | 
 63 |         self.create_items()
 64 |         self.create_speaker_map()
 65 |         self.reverse_speaker = {v: k for k, v in self.speakers_map.items()}
 66 |         self.create_symbols()
 67 |         if self.saved_mapper_path is not None:
 68 |             self._save_mapper(saved_path=self.saved_mapper_path)
 69 | 
 70 |         # processor name. usefull to use it for AutoProcessor
 71 |         self._processor_name = type(self).__name__
 72 | 
 73 |         if self.setup_eos_token():
 74 |             self.add_symbol(
 75 |                 self.setup_eos_token()
 76 |             )  # if this eos token not yet present in symbols list.
 77 |             self.eos_id = self.symbol_to_id[self.setup_eos_token()]
 78 | 
 79 |     def __getattr__(self, name: str) -> Union[str, int]:
 80 |         if "_id" in name:  # map symbol to id
 81 |             return self.symbol_to_id[name.replace("_id", "")]
 82 |         return self.symbol_to_id[name]  # map symbol to value
 83 | 
 84 |     def create_speaker_map(self):
 85 |         """
 86 |         Create speaker map for dataset.
 87 |         """
 88 |         sp_id = 0
 89 |         for i in self.items:
 90 |             speaker_name = i[-1]
 91 |             if speaker_name not in self.speakers_map:
 92 |                 self.speakers_map[speaker_name] = sp_id
 93 |                 sp_id += 1
 94 | 
 95 |     def get_speaker_id(self, name: str) -> int:
 96 |         return self.speakers_map[name]
 97 | 
 98 |     def get_speaker_name(self, speaker_id: int) -> str:
 99 |         return self.speakers_map[speaker_id]
100 | 
101 |     def create_symbols(self):
102 |         self.symbol_to_id = {s: i for i, s in enumerate(self.symbols)}
103 |         self.id_to_symbol = {i: s for i, s in enumerate(self.symbols)}
104 | 
105 |     def create_items(self):
106 |         """
107 |         Method used to create items from training file
108 |         items struct example => text, wav_file_path, speaker_name.
109 |         Note that the speaker_name should be a last.
110 |         """
111 |         with open(
112 |             os.path.join(self.data_dir, self.train_f_name), mode="r", encoding="utf-8"
113 |         ) as f:
114 |             for line in f:
115 |                 parts = line.strip().split(self.delimiter)
116 |                 wav_path = os.path.join(self.data_dir, parts[self.positions["file"]])
117 |                 wav_path = (
118 |                     wav_path + self.f_extension
119 |                     if wav_path[-len(self.f_extension) :] != self.f_extension
120 |                     else wav_path
121 |                 )
122 |                 text = parts[self.positions["text"]]
123 |                 speaker_name = parts[self.positions["speaker_name"]]
124 |                 self.items.append([text, wav_path, speaker_name])
125 | 
126 |     def add_symbol(self, symbol: Union[str, list]):
127 |         if isinstance(symbol, str):
128 |             if symbol in self.symbol_to_id:
129 |                 return
130 |             self.symbols.append(symbol)
131 |             symbol_id = len(self.symbol_to_id)
132 |             self.symbol_to_id[symbol] = symbol_id
133 |             self.id_to_symbol[symbol_id] = symbol
134 | 
135 |         elif isinstance(symbol, list):
136 |             for i in symbol:
137 |                 self.add_symbol(i)
138 |         else:
139 |             raise ValueError("A new_symbols must be a string or list of string.")
140 | 
141 |     @abc.abstractmethod
142 |     def get_one_sample(self, item):
143 |         """Get one sample from dataset items.
144 |         Args:
145 |             item: one item in Dataset items.
146 |                 Dataset items may include (raw_text, speaker_id, wav_path, ...)
147 | 
148 |         Returns:
149 |             sample (dict): sample dictionary return all feature used for preprocessing later.
150 |         """
151 |         sample = {
152 |             "raw_text": None,
153 |             "text_ids": None,
154 |             "audio": None,
155 |             "utt_id": None,
156 |             "speaker_name": None,
157 |             "rate": None,
158 |         }
159 |         return sample
160 | 
161 |     @abc.abstractmethod
162 |     def text_to_sequence(self, text: str):
163 |         return []
164 | 
165 |     @abc.abstractmethod
166 |     def setup_eos_token(self):
167 |         """Return eos symbol of type string."""
168 |         return "eos"
169 | 
170 |     def convert_symbols_to_ids(self, symbols: Union[str, list]):
171 |         sequence = []
172 |         if isinstance(symbols, str):
173 |             sequence.append(self._symbol_to_id[symbols])
174 |             return sequence
175 |         elif isinstance(symbols, list):
176 |             for s in symbols:
177 |                 if isinstance(s, str):
178 |                     sequence.append(self._symbol_to_id[s])
179 |                 else:
180 |                     raise ValueError("All elements of symbols must be a string.")
181 |         else:
182 |             raise ValueError("A symbols must be a string or list of string.")
183 | 
184 |         return sequence
185 | 
186 |     def _load_mapper(self, loaded_path: str = None):
187 |         """
188 |         Save all needed mappers to file
189 |         """
190 |         loaded_path = (
191 |             os.path.join(self.data_dir, "mapper.json")
192 |             if loaded_path is None
193 |             else loaded_path
194 |         )
195 |         with open(loaded_path, "r") as f:
196 |             data = json.load(f)
197 |         self.speakers_map = data["speakers_map"]
198 |         self.symbol_to_id = data["symbol_to_id"]
199 |         self.id_to_symbol = {int(k): v for k, v in data["id_to_symbol"].items()}
200 |         self._processor_name = data["processor_name"]
201 | 
202 |         # other keys
203 |         all_data_keys = data.keys()
204 |         for key in all_data_keys:
205 |             if key not in ["speakers_map", "symbol_to_id", "id_to_symbol"]:
206 |                 setattr(self, key, data[key])
207 | 
208 |     def _save_mapper(self, saved_path: str = None, extra_attrs_to_save: dict = None):
209 |         """
210 |         Save all needed mappers to file
211 |         """
212 |         saved_path = (
213 |             os.path.join(self.data_dir, "mapper.json")
214 |             if saved_path is None
215 |             else saved_path
216 |         )
217 |         with open(saved_path, "w") as f:
218 |             full_mapper = {
219 |                 "symbol_to_id": self.symbol_to_id,
220 |                 "id_to_symbol": self.id_to_symbol,
221 |                 "speakers_map": self.speakers_map,
222 |                 "processor_name": self._processor_name,
223 |             }
224 |             if extra_attrs_to_save:
225 |                 full_mapper = {**full_mapper, **extra_attrs_to_save}
226 |             json.dump(full_mapper, f)
227 | 


--------------------------------------------------------------------------------
/zhtts/tensorflow_tts/processor/cn_tn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # coding=utf-8
  3 | # Authors:
  4 | #   2019.5 Zhiyang Zhou (https://github.com/Joee1995/chn_text_norm.git)
  5 | #   2019.9 Jiayu DU
  6 | #
  7 | # requirements:
  8 | #   - python 3.X
  9 | # notes: python 2.X WILL fail or produce misleading results
 10 | 
 11 | import sys, os, argparse, codecs, string, re
 12 | 
 13 | # ================================================================================ #
 14 | #                                    basic constant
 15 | # ================================================================================ #
 16 | CHINESE_DIGIS = u'零一二三四五六七八九'
 17 | BIG_CHINESE_DIGIS_SIMPLIFIED = u'零壹贰叁肆伍陆柒捌玖'
 18 | BIG_CHINESE_DIGIS_TRADITIONAL = u'零壹貳參肆伍陸柒捌玖'
 19 | SMALLER_BIG_CHINESE_UNITS_SIMPLIFIED = u'十百千万'
 20 | SMALLER_BIG_CHINESE_UNITS_TRADITIONAL = u'拾佰仟萬'
 21 | LARGER_CHINESE_NUMERING_UNITS_SIMPLIFIED = u'亿兆京垓秭穰沟涧正载'
 22 | LARGER_CHINESE_NUMERING_UNITS_TRADITIONAL = u'億兆京垓秭穰溝澗正載'
 23 | SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED = u'十百千万'
 24 | SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL = u'拾佰仟萬'
 25 | 
 26 | ZERO_ALT = u'〇'
 27 | ONE_ALT = u'幺'
 28 | TWO_ALTS = [u'两', u'兩']
 29 | 
 30 | POSITIVE = [u'正', u'正']
 31 | NEGATIVE = [u'负', u'負']
 32 | POINT = [u'点', u'點']
 33 | # PLUS = [u'加', u'加']
 34 | # SIL = [u'杠', u'槓']
 35 | 
 36 | # 中文数字系统类型
 37 | NUMBERING_TYPES = ['low', 'mid', 'high']
 38 | 
 39 | CURRENCY_NAMES = '(人民币|美元|日元|英镑|欧元|马克|法郎|加拿大元|澳元|港币|先令|芬兰马克|爱尔兰镑|' \
 40 |                  '里拉|荷兰盾|埃斯库多|比塞塔|印尼盾|林吉特|新西兰元|比索|卢布|新加坡元|韩元|泰铢)'
 41 | CURRENCY_UNITS = '((亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|)元|(亿|千万|百万|万|千|百|)块|角|毛|分)'
 42 | COM_QUANTIFIERS = '(匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|' \
 43 |                   '砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|' \
 44 |                   '针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|' \
 45 |                   '毫|厘|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|' \
 46 |                   '盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|旬|' \
 47 |                   '纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块)'
 48 | 
 49 | # punctuation information are based on Zhon project (https://github.com/tsroten/zhon.git)
 50 | CHINESE_PUNC_STOP = '！？｡。'
 51 | CHINESE_PUNC_NON_STOP = '＂＃＄％＆＇（）＊＋，－／：；＜＝＞＠［＼］＾＿｀｛｜｝～｟｠｢｣､、〃《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏'
 52 | CHINESE_PUNC_OTHER = '·〈〉-'
 53 | CHINESE_PUNC_LIST = CHINESE_PUNC_STOP + CHINESE_PUNC_NON_STOP + CHINESE_PUNC_OTHER
 54 | 
 55 | # ================================================================================ #
 56 | #                                    basic class
 57 | # ================================================================================ #
 58 | class ChineseChar(object):
 59 |     """
 60 |     中文字符
 61 |     每个字符对应简体和繁体,
 62 |     e.g. 简体 = '负', 繁体 = '負'
 63 |     转换时可转换为简体或繁体
 64 |     """
 65 | 
 66 |     def __init__(self, simplified, traditional):
 67 |         self.simplified = simplified
 68 |         self.traditional = traditional
 69 |         #self.__repr__ = self.__str__
 70 | 
 71 |     def __str__(self):
 72 |         return self.simplified or self.traditional or None
 73 | 
 74 |     def __repr__(self):
 75 |         return self.__str__()
 76 | 
 77 | 
 78 | class ChineseNumberUnit(ChineseChar):
 79 |     """
 80 |     中文数字/数位字符
 81 |     每个字符除繁简体外还有一个额外的大写字符
 82 |     e.g. '陆' 和 '陸'
 83 |     """
 84 | 
 85 |     def __init__(self, power, simplified, traditional, big_s, big_t):
 86 |         super(ChineseNumberUnit, self).__init__(simplified, traditional)
 87 |         self.power = power
 88 |         self.big_s = big_s
 89 |         self.big_t = big_t
 90 | 
 91 |     def __str__(self):
 92 |         return '10^{}'.format(self.power)
 93 | 
 94 |     @classmethod
 95 |     def create(cls, index, value, numbering_type=NUMBERING_TYPES[1], small_unit=False):
 96 | 
 97 |         if small_unit:
 98 |             return ChineseNumberUnit(power=index + 1,
 99 |                                      simplified=value[0], traditional=value[1], big_s=value[1], big_t=value[1])
100 |         elif numbering_type == NUMBERING_TYPES[0]:
101 |             return ChineseNumberUnit(power=index + 8,
102 |                                      simplified=value[0], traditional=value[1], big_s=value[0], big_t=value[1])
103 |         elif numbering_type == NUMBERING_TYPES[1]:
104 |             return ChineseNumberUnit(power=(index + 2) * 4,
105 |                                      simplified=value[0], traditional=value[1], big_s=value[0], big_t=value[1])
106 |         elif numbering_type == NUMBERING_TYPES[2]:
107 |             return ChineseNumberUnit(power=pow(2, index + 3),
108 |                                      simplified=value[0], traditional=value[1], big_s=value[0], big_t=value[1])
109 |         else:
110 |             raise ValueError(
111 |                 'Counting type should be in {0} ({1} provided).'.format(NUMBERING_TYPES, numbering_type))
112 | 
113 | 
114 | class ChineseNumberDigit(ChineseChar):
115 |     """
116 |     中文数字字符
117 |     """
118 | 
119 |     def __init__(self, value, simplified, traditional, big_s, big_t, alt_s=None, alt_t=None):
120 |         super(ChineseNumberDigit, self).__init__(simplified, traditional)
121 |         self.value = value
122 |         self.big_s = big_s
123 |         self.big_t = big_t
124 |         self.alt_s = alt_s
125 |         self.alt_t = alt_t
126 | 
127 |     def __str__(self):
128 |         return str(self.value)
129 | 
130 |     @classmethod
131 |     def create(cls, i, v):
132 |         return ChineseNumberDigit(i, v[0], v[1], v[2], v[3])
133 | 
134 | 
135 | class ChineseMath(ChineseChar):
136 |     """
137 |     中文数位字符
138 |     """
139 | 
140 |     def __init__(self, simplified, traditional, symbol, expression=None):
141 |         super(ChineseMath, self).__init__(simplified, traditional)
142 |         self.symbol = symbol
143 |         self.expression = expression
144 |         self.big_s = simplified
145 |         self.big_t = traditional
146 | 
147 | 
148 | CC, CNU, CND, CM = ChineseChar, ChineseNumberUnit, ChineseNumberDigit, ChineseMath
149 | 
150 | 
151 | class NumberSystem(object):
152 |     """
153 |     中文数字系统
154 |     """
155 |     pass
156 | 
157 | 
158 | class MathSymbol(object):
159 |     """
160 |     用于中文数字系统的数学符号 (繁/简体), e.g.
161 |     positive = ['正', '正']
162 |     negative = ['负', '負']
163 |     point = ['点', '點']
164 |     """
165 | 
166 |     def __init__(self, positive, negative, point):
167 |         self.positive = positive
168 |         self.negative = negative
169 |         self.point = point
170 | 
171 |     def __iter__(self):
172 |         for v in self.__dict__.values():
173 |             yield v
174 | 
175 | 
176 | # class OtherSymbol(object):
177 | #     """
178 | #     其他符号
179 | #     """
180 | #
181 | #     def __init__(self, sil):
182 | #         self.sil = sil
183 | #
184 | #     def __iter__(self):
185 | #         for v in self.__dict__.values():
186 | #             yield v
187 | 
188 | 
189 | # ================================================================================ #
190 | #                                    basic utils
191 | # ================================================================================ #
192 | def create_system(numbering_type=NUMBERING_TYPES[1]):
193 |     """
194 |     根据数字系统类型返回创建相应的数字系统，默认为 mid
195 |     NUMBERING_TYPES = ['low', 'mid', 'high']: 中文数字系统类型
196 |         low:  '兆' = '亿' * '十' = $10^{9}$,  '京' = '兆' * '十', etc.
197 |         mid:  '兆' = '亿' * '万' = $10^{12}$, '京' = '兆' * '万', etc.
198 |         high: '兆' = '亿' * '亿' = $10^{16}$, '京' = '兆' * '兆', etc.
199 |     返回对应的数字系统
200 |     """
201 | 
202 |     # chinese number units of '亿' and larger
203 |     all_larger_units = zip(
204 |         LARGER_CHINESE_NUMERING_UNITS_SIMPLIFIED, LARGER_CHINESE_NUMERING_UNITS_TRADITIONAL)
205 |     larger_units = [CNU.create(i, v, numbering_type, False)
206 |                     for i, v in enumerate(all_larger_units)]
207 |     # chinese number units of '十, 百, 千, 万'
208 |     all_smaller_units = zip(
209 |         SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED, SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL)
210 |     smaller_units = [CNU.create(i, v, small_unit=True)
211 |                      for i, v in enumerate(all_smaller_units)]
212 |     # digis
213 |     chinese_digis = zip(CHINESE_DIGIS, CHINESE_DIGIS,
214 |                         BIG_CHINESE_DIGIS_SIMPLIFIED, BIG_CHINESE_DIGIS_TRADITIONAL)
215 |     digits = [CND.create(i, v) for i, v in enumerate(chinese_digis)]
216 |     digits[0].alt_s, digits[0].alt_t = ZERO_ALT, ZERO_ALT
217 |     digits[1].alt_s, digits[1].alt_t = ONE_ALT, ONE_ALT
218 |     digits[2].alt_s, digits[2].alt_t = TWO_ALTS[0], TWO_ALTS[1]
219 | 
220 |     # symbols
221 |     positive_cn = CM(POSITIVE[0], POSITIVE[1], '+', lambda x: x)
222 |     negative_cn = CM(NEGATIVE[0], NEGATIVE[1], '-', lambda x: -x)
223 |     point_cn = CM(POINT[0], POINT[1], '.', lambda x,
224 |                   y: float(str(x) + '.' + str(y)))
225 |     # sil_cn = CM(SIL[0], SIL[1], '-', lambda x, y: float(str(x) + '-' + str(y)))
226 |     system = NumberSystem()
227 |     system.units = smaller_units + larger_units
228 |     system.digits = digits
229 |     system.math = MathSymbol(positive_cn, negative_cn, point_cn)
230 |     # system.symbols = OtherSymbol(sil_cn)
231 |     return system
232 | 
233 | 
234 | def chn2num(chinese_string, numbering_type=NUMBERING_TYPES[1]):
235 | 
236 |     def get_symbol(char, system):
237 |         for u in system.units:
238 |             if char in [u.traditional, u.simplified, u.big_s, u.big_t]:
239 |                 return u
240 |         for d in system.digits:
241 |             if char in [d.traditional, d.simplified, d.big_s, d.big_t, d.alt_s, d.alt_t]:
242 |                 return d
243 |         for m in system.math:
244 |             if char in [m.traditional, m.simplified]:
245 |                 return m
246 | 
247 |     def string2symbols(chinese_string, system):
248 |         int_string, dec_string = chinese_string, ''
249 |         for p in [system.math.point.simplified, system.math.point.traditional]:
250 |             if p in chinese_string:
251 |                 int_string, dec_string = chinese_string.split(p)
252 |                 break
253 |         return [get_symbol(c, system) for c in int_string], \
254 |                [get_symbol(c, system) for c in dec_string]
255 | 
256 |     def correct_symbols(integer_symbols, system):
257 |         """
258 |         一百八 to 一百八十
259 |         一亿一千三百万 to 一亿 一千万 三百万
260 |         """
261 | 
262 |         if integer_symbols and isinstance(integer_symbols[0], CNU):
263 |             if integer_symbols[0].power == 1:
264 |                 integer_symbols = [system.digits[1]] + integer_symbols
265 | 
266 |         if len(integer_symbols) > 1:
267 |             if isinstance(integer_symbols[-1], CND) and isinstance(integer_symbols[-2], CNU):
268 |                 integer_symbols.append(
269 |                     CNU(integer_symbols[-2].power - 1, None, None, None, None))
270 | 
271 |         result = []
272 |         unit_count = 0
273 |         for s in integer_symbols:
274 |             if isinstance(s, CND):
275 |                 result.append(s)
276 |                 unit_count = 0
277 |             elif isinstance(s, CNU):
278 |                 current_unit = CNU(s.power, None, None, None, None)
279 |                 unit_count += 1
280 | 
281 |             if unit_count == 1:
282 |                 result.append(current_unit)
283 |             elif unit_count > 1:
284 |                 for i in range(len(result)):
285 |                     if isinstance(result[-i - 1], CNU) and result[-i - 1].power < current_unit.power:
286 |                         result[-i - 1] = CNU(result[-i - 1].power +
287 |                                              current_unit.power, None, None, None, None)
288 |         return result
289 | 
290 |     def compute_value(integer_symbols):
291 |         """
292 |         Compute the value.
293 |         When current unit is larger than previous unit, current unit * all previous units will be used as all previous units.
294 |         e.g. '两千万' = 2000 * 10000 not 2000 + 10000
295 |         """
296 |         value = [0]
297 |         last_power = 0
298 |         for s in integer_symbols:
299 |             if isinstance(s, CND):
300 |                 value[-1] = s.value
301 |             elif isinstance(s, CNU):
302 |                 value[-1] *= pow(10, s.power)
303 |                 if s.power > last_power:
304 |                     value[:-1] = list(map(lambda v: v *
305 |                                                     pow(10, s.power), value[:-1]))
306 |                     last_power = s.power
307 |                 value.append(0)
308 |         return sum(value)
309 | 
310 |     system = create_system(numbering_type)
311 |     int_part, dec_part = string2symbols(chinese_string, system)
312 |     int_part = correct_symbols(int_part, system)
313 |     int_str = str(compute_value(int_part))
314 |     dec_str = ''.join([str(d.value) for d in dec_part])
315 |     if dec_part:
316 |         return '{0}.{1}'.format(int_str, dec_str)
317 |     else:
318 |         return int_str
319 | 
320 | 
321 | def num2chn(number_string, numbering_type=NUMBERING_TYPES[1], big=False,
322 |             traditional=False, alt_zero=False, alt_one=False, alt_two=True,
323 |             use_zeros=True, use_units=True):
324 | 
325 |     def get_value(value_string, use_zeros=True):
326 | 
327 |         striped_string = value_string.lstrip('0')
328 | 
329 |         # record nothing if all zeros
330 |         if not striped_string:
331 |             return []
332 | 
333 |         # record one digits
334 |         elif len(striped_string) == 1:
335 |             if use_zeros and len(value_string) != len(striped_string):
336 |                 return [system.digits[0], system.digits[int(striped_string)]]
337 |             else:
338 |                 return [system.digits[int(striped_string)]]
339 | 
340 |         # recursively record multiple digits
341 |         else:
342 |             result_unit = next(u for u in reversed(
343 |                 system.units) if u.power < len(striped_string))
344 |             result_string = value_string[:-result_unit.power]
345 |             return get_value(result_string) + [result_unit] + get_value(striped_string[-result_unit.power:])
346 | 
347 |     system = create_system(numbering_type)
348 | 
349 |     int_dec = number_string.split('.')
350 |     if len(int_dec) == 1:
351 |         int_string = int_dec[0]
352 |         dec_string = ""
353 |     elif len(int_dec) == 2:
354 |         int_string = int_dec[0]
355 |         dec_string = int_dec[1]
356 |     else:
357 |         raise ValueError(
358 |             "invalid input num string with more than one dot: {}".format(number_string))
359 | 
360 |     if use_units and len(int_string) > 1:
361 |         result_symbols = get_value(int_string)
362 |     else:
363 |         result_symbols = [system.digits[int(c)] for c in int_string]
364 |     dec_symbols = [system.digits[int(c)] for c in dec_string]
365 |     if dec_string:
366 |         result_symbols += [system.math.point] + dec_symbols
367 | 
368 |     if alt_two:
369 |         liang = CND(2, system.digits[2].alt_s, system.digits[2].alt_t,
370 |                     system.digits[2].big_s, system.digits[2].big_t)
371 |         for i, v in enumerate(result_symbols):
372 |             if isinstance(v, CND) and v.value == 2:
373 |                 next_symbol = result_symbols[i +
374 |                                              1] if i < len(result_symbols) - 1 else None
375 |                 previous_symbol = result_symbols[i - 1] if i > 0 else None
376 |                 if isinstance(next_symbol, CNU) and isinstance(previous_symbol, (CNU, type(None))):
377 |                     if next_symbol.power != 1 and ((previous_symbol is None) or (previous_symbol.power != 1)):
378 |                         result_symbols[i] = liang
379 | 
380 |     # if big is True, '两' will not be used and `alt_two` has no impact on output
381 |     if big:
382 |         attr_name = 'big_'
383 |         if traditional:
384 |             attr_name += 't'
385 |         else:
386 |             attr_name += 's'
387 |     else:
388 |         if traditional:
389 |             attr_name = 'traditional'
390 |         else:
391 |             attr_name = 'simplified'
392 | 
393 |     result = ''.join([getattr(s, attr_name) for s in result_symbols])
394 | 
395 |     # if not use_zeros:
396 |     #     result = result.strip(getattr(system.digits[0], attr_name))
397 | 
398 |     if alt_zero:
399 |         result = result.replace(
400 |             getattr(system.digits[0], attr_name), system.digits[0].alt_s)
401 | 
402 |     if alt_one:
403 |         result = result.replace(
404 |             getattr(system.digits[1], attr_name), system.digits[1].alt_s)
405 | 
406 |     for i, p in enumerate(POINT):
407 |         if result.startswith(p):
408 |             return CHINESE_DIGIS[0] + result
409 | 
410 |     # ^10, 11, .., 19
411 |     if len(result) >= 2 and result[1] in [SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED[0],
412 |                                           SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL[0]] and \
413 |             result[0] in [CHINESE_DIGIS[1], BIG_CHINESE_DIGIS_SIMPLIFIED[1], BIG_CHINESE_DIGIS_TRADITIONAL[1]]:
414 |         result = result[1:]
415 | 
416 |     return result
417 | 
418 | 
419 | # ================================================================================ #
420 | #                          different types of rewriters
421 | # ================================================================================ #
422 | class Cardinal:
423 |     """
424 |     CARDINAL类
425 |     """
426 | 
427 |     def __init__(self, cardinal=None, chntext=None):
428 |         self.cardinal = cardinal
429 |         self.chntext = chntext
430 | 
431 |     def chntext2cardinal(self):
432 |         return chn2num(self.chntext)
433 | 
434 |     def cardinal2chntext(self):
435 |         return num2chn(self.cardinal)
436 | 
437 | class Digit:
438 |     """
439 |     DIGIT类
440 |     """
441 | 
442 |     def __init__(self, digit=None, chntext=None):
443 |         self.digit = digit
444 |         self.chntext = chntext
445 | 
446 |     # def chntext2digit(self):
447 |     #     return chn2num(self.chntext)
448 | 
449 |     def digit2chntext(self):
450 |         return num2chn(self.digit, alt_two=False, use_units=False)
451 | 
452 | 
453 | class TelePhone:
454 |     """
455 |     TELEPHONE类
456 |     """
457 | 
458 |     def __init__(self, telephone=None, raw_chntext=None, chntext=None):
459 |         self.telephone = telephone
460 |         self.raw_chntext = raw_chntext
461 |         self.chntext = chntext
462 | 
463 |     # def chntext2telephone(self):
464 |     #     sil_parts = self.raw_chntext.split('<SIL>')
465 |     #     self.telephone = '-'.join([
466 |     #         str(chn2num(p)) for p in sil_parts
467 |     #     ])
468 |     #     return self.telephone
469 | 
470 |     def telephone2chntext(self, fixed=False):
471 | 
472 |         if fixed:
473 |             sil_parts = self.telephone.split('-')
474 |             self.raw_chntext = '<SIL>'.join([
475 |                 num2chn(part, alt_two=False, use_units=False) for part in sil_parts
476 |             ])
477 |             self.chntext = self.raw_chntext.replace('<SIL>', '')
478 |         else:
479 |             sp_parts = self.telephone.strip('+').split()
480 |             self.raw_chntext = '<SP>'.join([
481 |                 num2chn(part, alt_two=False, use_units=False) for part in sp_parts
482 |             ])
483 |             self.chntext = self.raw_chntext.replace('<SP>', '')
484 |         return self.chntext
485 | 
486 | 
487 | class Fraction:
488 |     """
489 |     FRACTION类
490 |     """
491 | 
492 |     def __init__(self, fraction=None, chntext=None):
493 |         self.fraction = fraction
494 |         self.chntext = chntext
495 | 
496 |     def chntext2fraction(self):
497 |         denominator, numerator = self.chntext.split('分之')
498 |         return chn2num(numerator) + '/' + chn2num(denominator)
499 | 
500 |     def fraction2chntext(self):
501 |         numerator, denominator = self.fraction.split('/')
502 |         return num2chn(denominator) + '分之' + num2chn(numerator)
503 | 
504 | 
505 | class Date:
506 |     """
507 |     DATE类
508 |     """
509 | 
510 |     def __init__(self, date=None, chntext=None):
511 |         self.date = date
512 |         self.chntext = chntext
513 | 
514 |     # def chntext2date(self):
515 |     #     chntext = self.chntext
516 |     #     try:
517 |     #         year, other = chntext.strip().split('年', maxsplit=1)
518 |     #         year = Digit(chntext=year).digit2chntext() + '年'
519 |     #     except ValueError:
520 |     #         other = chntext
521 |     #         year = ''
522 |     #     if other:
523 |     #         try:
524 |     #             month, day = other.strip().split('月', maxsplit=1)
525 |     #             month = Cardinal(chntext=month).chntext2cardinal() + '月'
526 |     #         except ValueError:
527 |     #             day = chntext
528 |     #             month = ''
529 |     #         if day:
530 |     #             day = Cardinal(chntext=day[:-1]).chntext2cardinal() + day[-1]
531 |     #     else:
532 |     #         month = ''
533 |     #         day = ''
534 |     #     date = year + month + day
535 |     #     self.date = date
536 |     #     return self.date
537 | 
538 |     def date2chntext(self):
539 |         date = self.date
540 |         try:
541 |             year, other = date.strip().split('年', 1)
542 |             year = Digit(digit=year).digit2chntext() + '年'
543 |         except ValueError:
544 |             other = date
545 |             year = ''
546 |         if other:
547 |             try:
548 |                 month, day = other.strip().split('月', 1)
549 |                 month = Cardinal(cardinal=month).cardinal2chntext() + '月'
550 |             except ValueError:
551 |                 day = date
552 |                 month = ''
553 |             if day:
554 |                 day = Cardinal(cardinal=day[:-1]).cardinal2chntext() + day[-1]
555 |         else:
556 |             month = ''
557 |             day = ''
558 |         chntext = year + month + day
559 |         self.chntext = chntext
560 |         return self.chntext
561 | 
562 | 
563 | class Money:
564 |     """
565 |     MONEY类
566 |     """
567 | 
568 |     def __init__(self, money=None, chntext=None):
569 |         self.money = money
570 |         self.chntext = chntext
571 | 
572 |     # def chntext2money(self):
573 |     #     return self.money
574 | 
575 |     def money2chntext(self):
576 |         money = self.money
577 |         pattern = re.compile(r'(\d+(\.\d+)?)')
578 |         matchers = pattern.findall(money)
579 |         if matchers:
580 |             for matcher in matchers:
581 |                 money = money.replace(matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext())
582 |         self.chntext = money
583 |         return self.chntext
584 | 
585 | 
586 | class Percentage:
587 |     """
588 |     PERCENTAGE类
589 |     """
590 | 
591 |     def __init__(self, percentage=None, chntext=None):
592 |         self.percentage = percentage
593 |         self.chntext = chntext
594 | 
595 |     def chntext2percentage(self):
596 |         return chn2num(self.chntext.strip().strip('百分之')) + '%'
597 | 
598 |     def percentage2chntext(self):
599 |         return '百分之' + num2chn(self.percentage.strip().strip('%'))
600 | 
601 | 
602 | # ================================================================================ #
603 | #                            NSW Normalizer
604 | # ================================================================================ #
605 | class NSWNormalizer:
606 |     def __init__(self, raw_text):
607 |         self.raw_text = '^' + raw_text + '$'
608 |         self.norm_text = ''
609 | 
610 |     def _particular(self):
611 |         text = self.norm_text
612 |         pattern = re.compile(r"(([a-zA-Z]+)二([a-zA-Z]+))")
613 |         matchers = pattern.findall(text)
614 |         if matchers:
615 |             # print('particular')
616 |             for matcher in matchers:
617 |                 text = text.replace(matcher[0], matcher[1]+'2'+matcher[2], 1)
618 |         self.norm_text = text
619 |         return self.norm_text
620 | 
621 |     def normalize(self):
622 |         text = self.raw_text
623 | 
624 |         # 规范化日期
625 |         pattern = re.compile(r"\D+((([089]\d|(19|20)\d{2})年)?(\d{1,2}月(\d{1,2}[日号])?)?)")
626 |         matchers = pattern.findall(text)
627 |         if matchers:
628 |             #print('date')
629 |             for matcher in matchers:
630 |                 text = text.replace(matcher[0], Date(date=matcher[0]).date2chntext(), 1)
631 | 
632 |         # 规范化金钱
633 |         pattern = re.compile(r"\D+((\d+(\.\d+)?)[多余几]?" + CURRENCY_UNITS + r"(\d" + CURRENCY_UNITS + r"?)?)")
634 |         matchers = pattern.findall(text)
635 |         if matchers:
636 |             #print('money')
637 |             for matcher in matchers:
638 |                 text = text.replace(matcher[0], Money(money=matcher[0]).money2chntext(), 1)
639 | 
640 |         # 规范化固话/手机号码
641 |         # 手机
642 |         # http://www.jihaoba.com/news/show/13680
643 |         # 移动：139、138、137、136、135、134、159、158、157、150、151、152、188、187、182、183、184、178、198
644 |         # 联通：130、131、132、156、155、186、185、176
645 |         # 电信：133、153、189、180、181、177
646 |         pattern = re.compile(r"\D((\+?86 ?)?1([38]\d|5[0-35-9]|7[678]|9[89])\d{8})\D")
647 |         matchers = pattern.findall(text)
648 |         if matchers:
649 |             #print('telephone')
650 |             for matcher in matchers:
651 |                 text = text.replace(matcher[0], TelePhone(telephone=matcher[0]).telephone2chntext(), 1)
652 |         # 固话
653 |         pattern = re.compile(r"\D((0(10|2[1-3]|[3-9]\d{2})-?)?[1-9]\d{6,7})\D")
654 |         matchers = pattern.findall(text)
655 |         if matchers:
656 |             # print('fixed telephone')
657 |             for matcher in matchers:
658 |                 text = text.replace(matcher[0], TelePhone(telephone=matcher[0]).telephone2chntext(fixed=True), 1)
659 | 
660 |         # 规范化分数
661 |         pattern = re.compile(r"(\d+/\d+)")
662 |         matchers = pattern.findall(text)
663 |         if matchers:
664 |             #print('fraction')
665 |             for matcher in matchers:
666 |                 text = text.replace(matcher, Fraction(fraction=matcher).fraction2chntext(), 1)
667 | 
668 |         # 规范化百分数
669 |         text = text.replace('％', '%')
670 |         pattern = re.compile(r"(\d+(\.\d+)?%)")
671 |         matchers = pattern.findall(text)
672 |         if matchers:
673 |             #print('percentage')
674 |             for matcher in matchers:
675 |                 text = text.replace(matcher[0], Percentage(percentage=matcher[0]).percentage2chntext(), 1)
676 | 
677 |         # 规范化纯数+量词
678 |         pattern = re.compile(r"(\d+(\.\d+)?)[多余几]?" + COM_QUANTIFIERS)
679 |         matchers = pattern.findall(text)
680 |         if matchers:
681 |             #print('cardinal+quantifier')
682 |             for matcher in matchers:
683 |                 text = text.replace(matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext(), 1)
684 | 
685 |         # 规范化数字编号
686 |         pattern = re.compile(r"(\d{4,32})")
687 |         matchers = pattern.findall(text)
688 |         if matchers:
689 |             #print('digit')
690 |             for matcher in matchers:
691 |                 text = text.replace(matcher, Digit(digit=matcher).digit2chntext(), 1)
692 | 
693 |         # 规范化纯数
694 |         pattern = re.compile(r"(\d+(\.\d+)?)")
695 |         matchers = pattern.findall(text)
696 |         if matchers:
697 |             #print('cardinal')
698 |             for matcher in matchers:
699 |                 text = text.replace(matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext(), 1)
700 | 
701 |         self.norm_text = text
702 |         self._particular()
703 | 
704 |         return self.norm_text.lstrip('^').rstrip('$')
705 | 
706 | 
707 | def nsw_test_case(raw_text):
708 |     print('I:' + raw_text)
709 |     print('O:' + NSWNormalizer(raw_text).normalize())
710 |     print('')
711 | 
712 | 
713 | def nsw_test():
714 |     nsw_test_case('固话：0595-23865596或23880880。')
715 |     nsw_test_case('固话：0595-23865596或23880880。')
716 |     nsw_test_case('手机：+86 19859213959或15659451527。')
717 |     nsw_test_case('分数：32477/76391。')
718 |     nsw_test_case('百分数：80.03%。')
719 |     nsw_test_case('编号：31520181154418。')
720 |     nsw_test_case('纯数：2983.07克或12345.60米。')
721 |     nsw_test_case('日期：1999年2月20日或09年3月15号。')
722 |     nsw_test_case('金钱：12块5，34.5元，20.1万')
723 |     nsw_test_case('特殊：O2O或B2C。')
724 |     nsw_test_case('3456万吨')
725 |     nsw_test_case('2938个')
726 |     nsw_test_case('938')
727 |     nsw_test_case('今天吃了115个小笼包231个馒头')
728 |     nsw_test_case('有62％的概率')
729 | 
730 | 
731 | if __name__ == '__main__':
732 |     #nsw_test()
733 | 
734 |     p = argparse.ArgumentParser()
735 |     p.add_argument('ifile', help='input filename, assume utf-8 encoding')
736 |     p.add_argument('ofile', help='output filename')
737 |     p.add_argument('--to_upper', action='store_true', help='convert to upper case')
738 |     p.add_argument('--to_lower', action='store_true', help='convert to lower case')
739 |     p.add_argument('--has_key', action='store_true', help="input text has Kaldi's key as first field.")
740 |     p.add_argument('--log_interval', type=int, default=100000, help='log interval in number of processed lines')
741 |     args = p.parse_args()
742 | 
743 |     ifile = codecs.open(args.ifile, 'r', 'utf8')
744 |     ofile = codecs.open(args.ofile, 'w+', 'utf8')
745 | 
746 |     n = 0
747 |     for l in ifile:
748 |         key = ''
749 |         text = ''
750 |         if args.has_key:
751 |             cols = l.split(maxsplit=1)
752 |             key = cols[0]
753 |             if len(cols) == 2:
754 |                 text = cols[1].strip()
755 |             else:
756 |                 text = ''
757 |         else:
758 |             text = l.strip()
759 | 
760 |         # cases
761 |         if args.to_upper and args.to_lower:
762 |             sys.stderr.write('cn_tn.py: to_upper OR to_lower?')
763 |             exit(1)
764 |         if args.to_upper:
765 |             text = text.upper()
766 |         if args.to_lower:
767 |             text = text.lower()
768 | 
769 |         # NSW(Non-Standard-Word) normalization
770 |         text = NSWNormalizer(text).normalize()
771 | 
772 |         # Punctuations removal
773 |         old_chars = CHINESE_PUNC_LIST + string.punctuation # includes all CN and EN punctuations
774 |         new_chars = ' ' * len(old_chars)
775 |         del_chars = ''
776 |         text = text.translate(str.maketrans(old_chars, new_chars, del_chars))
777 | 
778 |         #
779 |         if args.has_key:
780 |             ofile.write(key + '\t' + text + '\n')
781 |         else:
782 |             if text.strip() != '': # skip empty line in pure text format(without Kaldi's utt key)
783 |                 ofile.write(text + '\n')
784 | 
785 |         n += 1
786 |         if n % args.log_interval == 0:
787 |             sys.stderr.write("cn_tn.py: {} lines done.\n".format(n))
788 |             sys.stderr.flush()
789 | 
790 |     sys.stderr.write("cn_tn.py: {} lines done in total.\n".format(n))
791 |     sys.stderr.flush()
792 | 
793 |     ifile.close()
794 |     ofile.close()
795 | 


--------------------------------------------------------------------------------
/zhtts/tts.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | from pathlib import Path
  4 | import tensorflow as tf 
  5 | #import tflite_runtime.interpreter as tflite
  6 | from scipy.io import wavfile
  7 | import re
  8 | 
  9 | from .tensorflow_tts.processor import BakerProcessor
 10 | 
 11 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 12 | ASSET_DIR = Path(__file__).parent / "asset"
 13 | 
 14 | def split_sens(text):
 15 |     """ split sentence and keep sperator to the left 
 16 | 
 17 |     Args:
 18 |         text (str): 
 19 |         
 20 |     Returns:
 21 |         list[str]: splited sentence
 22 |         
 23 |     Examples:
 24 |         >>> split_sens("中文：语音，合成！系统\n")
 25 |         ['中文：', '语音，', '合成！', '系统']
 26 |     """
 27 |     texts = re.split(r";", re.sub(r"([、，。！？])", r"\1;", text.strip()))
 28 |     return [x for x in texts if x]
 29 | 
 30 | class TTS():
 31 |     def __init__(self, text2mel_name="FASTSPEECH2"):
 32 |         """text2mel_name: ["FASTSPEECH2", "TACOTRON"] """
 33 |         self.sample_rate = 24000
 34 |         self.processor = BakerProcessor(
 35 |             data_dir=None, loaded_mapper_path=ASSET_DIR / "baker_mapper.json")
 36 |         self.text2mel_name = text2mel_name
 37 |         if text2mel_name == "FASTSPEECH2":
 38 |             self.acoustic = tf.lite.Interpreter(model_path=str(ASSET_DIR / 'fastspeech2_quan.tflite'))
 39 |         elif text2mel_name == "TACOTRON":
 40 |             self.acoustic = tf.lite.Interpreter(model_path=str(ASSET_DIR / 'tacotron2_quan.tflite'))
 41 |         else:
 42 |             raise ValueError(f"unsported text2mel_name: {text2mel_name}")
 43 |         self.vocoder = tf.lite.Interpreter(model_path=str(ASSET_DIR / 'mb_melgan.tflite'))
 44 | 
 45 |     def prepare_input(self, input_ids):
 46 |         input_ids = np.expand_dims(np.array(input_ids, np.int32), 0)
 47 |         if self.text2mel_name == "TACOTRON":
 48 |             return (input_ids,
 49 |                     np.array([input_ids.shape[1]], np.int32),
 50 |                     np.array([0], np.int32),)
 51 |         elif self.text2mel_name == "FASTSPEECH2":
 52 |             return (input_ids,
 53 |                     np.array([0], np.int32),
 54 |                     np.array([1.0], np.float32),
 55 |                     np.array([1.0], np.float32),
 56 |                     np.array([1.0], np.float32),)
 57 | 
 58 |     def text2mel(self, input_text):
 59 |         input_details = self.acoustic.get_input_details()
 60 |         output_details = self.acoustic.get_output_details()
 61 |         input_ids = self.processor.text_to_sequence(input_text, inference=True)
 62 | 
 63 |         self.acoustic.resize_tensor_input(
 64 |             input_details[0]['index'], [1, len(input_ids)])
 65 |         self.acoustic.allocate_tensors()
 66 | 
 67 |         input_data = self.prepare_input(input_ids)
 68 |         for i, detail in enumerate(input_details):
 69 |             self.acoustic.set_tensor(detail['index'], input_data[i])
 70 |         self.acoustic.invoke()
 71 | 
 72 |         return self.acoustic.get_tensor(output_details[1]['index'])
 73 | 
 74 |     def mel2audio(self, mel):
 75 |         input_details = self.vocoder.get_input_details()
 76 |         output_details = self.vocoder.get_output_details()
 77 |         self.vocoder.resize_tensor_input(input_details[0]['index'], mel.shape)
 78 |         self.vocoder.allocate_tensors()
 79 |         self.vocoder.set_tensor(input_details[0]['index'], mel)
 80 |         self.vocoder.invoke()
 81 | 
 82 |         return self.vocoder.get_tensor(output_details[0]['index'])[0, :, 0]
 83 | 
 84 |     def synthesis(self, text, sil_time=0.2):
 85 |         """ synthesis text to audio
 86 | 
 87 |         Args:
 88 |             text (str)
 89 |             sil_time (float): silence duration between two wav
 90 |         Returns:
 91 |             ndarray: audio
 92 |         """
 93 |         audios = []
 94 |         texts = split_sens(text)
 95 |         silence = np.zeros(int(sil_time * self.sample_rate), dtype=np.float32) # 添加静音
 96 |         for i, text in enumerate(texts):
 97 |             print(f"index: {i}, text: {text}")
 98 |             print(f"frontend info: {self.frontend(text)}")
 99 |             # print(self.processor.text_to_sequence(text, inference=True))
100 |             mel = self.text2mel(text)
101 |             audio = self.mel2audio(mel)
102 |             if self.text2mel_name == "TACOTRON":
103 |                 audio = audio[:-2048]  # tacotron will generate noise at the end
104 |             audios.append(audio)
105 |             if i < len(texts)-1:
106 |                 audios.append(silence)
107 |         return np.concatenate(audios)
108 | 
109 |     def frontend(self, text):
110 |         """ return normalize_text, phoneme_seq for debug
111 | 
112 |         Args:
113 |             text (str)
114 |         Returns:
115 |             (tuple): tuple containing:
116 | 
117 |                 normalize_text (str):  text after text_normalize
118 |                 phoneme (str):  " ".join(phones)
119 |         """
120 |         return self.processor.text_to_phone(text)
121 | 
122 |     def text2wav(self, text, wavpath):
123 |         """synthesis text and save to wavfile"""
124 |         audio = self.synthesis(text)
125 |         
126 |         wavfile.write(wavpath, self.sample_rate, audio)
127 |         print(f"Save wav to {wavpath}")
128 |         
129 | 


--------------------------------------------------------------------------------