├── .github └── workflows │ └── SyncToGitee.yml ├── .gitignore ├── LICENSE ├── README.md ├── cpp └── .gitkeep └── python └── PaddleSpeech ├── README.md ├── convert_model.md ├── csmsc_tts2 ├── README.md ├── acoustic │ ├── __init__.py │ └── speedyspeech_csmsc.py ├── frontend │ ├── __init__.py │ ├── arpabet.py │ ├── generate_lexicon.py │ ├── normalizer │ │ ├── __init__.py │ │ ├── abbrrviation.py │ │ ├── acronyms.py │ │ ├── normalizer.py │ │ ├── numbers.py │ │ └── width.py │ ├── phonectic.py │ ├── punctuation.py │ ├── tone_sandhi.py │ ├── vocab.py │ ├── zh_frontend.py │ └── zh_normalization │ │ ├── README.md │ │ ├── __init__.py │ │ ├── char_convert.py │ │ ├── chronology.py │ │ ├── constants.py │ │ ├── num.py │ │ ├── phonecode.py │ │ ├── quantifier.py │ │ └── text_normlization.py ├── infer_result │ ├── 001.wav │ ├── 002.wav │ ├── 003.wav │ ├── 004.wav │ ├── 005.wav │ ├── 006.wav │ ├── 007.wav │ ├── 008.wav │ ├── 009.wav │ ├── 010.wav │ ├── 011.wav │ ├── 012.wav │ ├── 013.wav │ ├── 014.wav │ ├── 015.wav │ ├── 016.wav │ └── 017.wav ├── requirements.txt ├── sentences.txt ├── tts2.py ├── utils.py └── vocoder │ ├── __init__.py │ └── pwgan_csmsc.py ├── csmsc_tts3 ├── README.md ├── assets │ ├── 000001.wav │ └── audio_icon.png ├── csmsc_test.txt ├── frontend │ ├── __init__.py │ ├── arpabet.py │ ├── generate_lexicon.py │ ├── normalizer │ │ ├── __init__.py │ │ ├── abbrrviation.py │ │ ├── acronyms.py │ │ ├── normalizer.py │ │ ├── numbers.py │ │ └── width.py │ ├── phonectic.py │ ├── punctuation.py │ ├── tone_sandhi.py │ ├── vocab.py │ ├── zh_frontend.py │ └── zh_normalization │ │ ├── README.md │ │ ├── __init__.py │ │ ├── char_convert.py │ │ ├── chronology.py │ │ ├── constants.py │ │ ├── num.py │ │ ├── phonecode.py │ │ ├── quantifier.py │ │ └── text_normlization.py ├── main.sh ├── requirements.txt ├── syn_utils.py └── tts3.py └── ljspeech_tts3 ├── README.md ├── assets ├── 009.wav └── audio_icon.png ├── frontend ├── __init__.py ├── arpabet.py ├── generate_lexicon.py ├── normalizer │ ├── __init__.py │ ├── abbrrviation.py │ ├── acronyms.py │ ├── normalizer.py │ ├── numbers.py │ └── width.py ├── phonectic.py ├── punctuation.py ├── tone_sandhi.py ├── vocab.py ├── zh_frontend.py └── zh_normalization │ ├── README.md │ ├── __init__.py │ ├── char_convert.py │ ├── chronology.py │ ├── constants.py │ ├── num.py │ ├── phonecode.py │ ├── quantifier.py │ └── text_normlization.py ├── main.sh ├── requirements.txt ├── sentences_en.txt ├── syn_utils.py └── tts3.py /.github/workflows/SyncToGitee.yml: -------------------------------------------------------------------------------- 1 | name: syncToGitee 2 | on: 3 | push: 4 | branches: 5 | - main 6 | jobs: 7 | repo-sync: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout source codes 11 | uses: actions/checkout@v2 12 | 13 | - name: Mirror the Github organization repos to Gitee. 14 | uses: Yikun/hub-mirror-action@master 15 | with: 16 | src: 'github/RapidAI' 17 | dst: 'gitee/RapidAI' 18 | dst_key: ${{ secrets.GITEE_PRIVATE_KEY }} 19 | dst_token: ${{ secrets.GITEE_TOKEN }} 20 | force_update: true 21 | debug: true 22 | 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.pyc 3 | 4 | csmsc_tts2/resources/ 5 | csmsc_tts3/resources/ 6 | ljspeech_tts3/resources/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## RapidTTS(文本转语音) 2 | - 本仓库是将开源的文本转语音项目中模型转换为ONNX格式,并对代码做了整理而来。 3 | - 本着易用的原则整理,方便快速落地使用。 4 | - 努力做到推理引擎只用onnxruntime等轻量推理引擎,不依赖torch或者Paddle。 5 | 6 | #### 📖文档导航 7 | - [PaddleSpeech](./python/PaddleSpeech/README.md) 8 | 9 | #### TODO 10 | - 参考[link](https://github.com/RapidAI/RapidTTS/labels/enhancement) 11 | -------------------------------------------------------------------------------- /cpp/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/cpp/.gitkeep -------------------------------------------------------------------------------- /python/PaddleSpeech/README.md: -------------------------------------------------------------------------------- 1 | ## RapidTTS(文本转语音) 2 | 3 | |目录名称|推理引擎|支持语言| 4 | |:---:|:---:|:---:| 5 | |[csmsc_tts2](./csmsc_tts2)|Paddle+ONNXRuntime|中文和数字| 6 | |[csmsc_tts3](./csmsc_tts3)|ONNXRuntime|中文和数字| 7 | |[ljspeech_tts3](./ljspeech_tts3)|ONNXRuntime|英文| 8 | 9 | ### 更新日志 10 | 11 | #### 🎈2022-04-16 update 12 | - 添加`ljspeech_tts3`,英文文本转语音模块 13 | 14 | #### 2022-04-09 update 15 | - 添加`csmsc_tts2`中模型转换说明文档([模型转换](./convert_model.md)) 16 | 17 | #### 2022-04-08 update 18 | - 尝试采用OpenVINO推理引擎,但是目前模型尚未转换成功,具体尝试过程参见:[Paddle模型尝试转换](https://github.com/RapidAI/RapidTTS2/wiki/Paddle%E6%A8%A1%E5%9E%8B%E5%B0%9D%E8%AF%95%E8%BD%AC%E6%8D%A2) 19 | -------------------------------------------------------------------------------- /python/PaddleSpeech/convert_model.md: -------------------------------------------------------------------------------- 1 | #### 转换pwgan_csmsc到onnx 2 | - `Paddle2ONXX`: latest 3 | - 模型下载: [Pretrained model](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc1#pretrained-models) | [link](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_static_0.4.zip) 4 | - 转换脚本: 5 | ```bash 6 | paddle2onnx --model_dir pwg_baker_static_0.4 \ 7 | --model_filename pwgan_csmsc.pdmodel \ 8 | --params_filename pwgan_csmsc.pdiparams \ 9 | --save_file pwgan_csmsc.onnx \ 10 | --opset_version 11 11 | ``` 12 | 13 | #### 转换pwgan_ljspeech和fastspeech_ljspeech到onnx 14 | - pwgan_ljspeech官方只提供了[动态图模型](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc1#pretrained-model)。不过运行代码中提供了动态图转静态图模型代码,只需要搭建PaddleSpeech运行环境,跑一遍示例demo,即可得到对应的静态模型 15 | - 详情参见[AI Studio](https://aistudio.baidu.com/aistudio/projectdetail/3359986?shared=1) -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/README.md: -------------------------------------------------------------------------------- 1 | #### csmsc_tts2 2 | - **支持合成语言**: 中文和数字,不支持英文 3 | - 基于[PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech)下的[TTS2](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/text_to_speech/README_cn.md)整理而来 4 | - 共分为三步,`frontend`、`acoustic`、`vocoder` 5 | - `acoustic`模型推理目前基于`PaddlePaddle` 6 | - `vocoder`模型推理基于`ONNXRuntime` 7 | - 其中PaddleSpeech中提供的预训练模型可以参见[link](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/text_to_speech/README_cn.md#4-%E9%A2%84%E8%AE%AD%E7%BB%83%E6%A8%A1%E5%9E%8B)。在csmsc_tts2中使用的是: 8 | 9 | |主要部分|具体模型|支持语言| 10 | |:---|:---|:---| 11 | |声学模型|[speedyspeech_csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts2)|zh| 12 | |声码器|[pwgan_csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc1#pretrained-models)|zh| 13 | 14 | #### [模型转换](./convert_model.md) 15 | 16 | #### 运行步骤 17 | 1. 下载`resources`, [Google Drive](https://drive.google.com/file/d/1q3NCydNhFeU2cpLUgevidCHeSzclK0a7/view?usp=sharing) | [百度网盘,提取码:kmcf](https://pan.baidu.com/s/1MGbaS6e_pFqrfIc5OVjWjg), 解压到RapidTTS2目录下 18 | 19 | 2. 安装`requirements.txt` 20 | ```bash 21 | pip install -r requirements.txt -i https://pypi.douban.com/simple/ 22 | ``` 23 | 24 | 3. 运行`tts2.py` 25 | ```bash 26 | python tts2.py 27 | ``` 28 | 4. 运行日志如下: 29 | ```text 30 | 初始化前处理部分 31 | frontend done! 32 | 初始化提取特征模型 33 | am_predictor done! 34 | 初始化合成wav模型 35 | 合成指定句子 36 | Building prefix dict from the default dictionary ... 37 | Loading model from cache /tmp/jieba.cache 38 | Loading model cost 1.431 seconds. 39 | Prefix dict has been built successfully. 40 | infer_result/001.wav done! cost: 7.226019859313965s 41 | infer_result/002.wav done! cost: 9.149477005004883s 42 | infer_result/003.wav done! cost: 3.4020116329193115s 43 | infer_result/004.wav done! cost: 14.5472412109375s 44 | infer_result/005.wav done! cost: 14.142913818359375s 45 | infer_result/006.wav done! cost: 10.191686630249023s 46 | infer_result/007.wav done! cost: 15.726643800735474s 47 | infer_result/008.wav done! cost: 15.421608209609985s 48 | infer_result/009.wav done! cost: 8.083441972732544s 49 | infer_result/010.wav done! cost: 10.538750886917114s 50 | infer_result/011.wav done! cost: 7.974739074707031s 51 | infer_result/012.wav done! cost: 7.274432897567749s 52 | infer_result/013.wav done! cost: 8.204563856124878s 53 | infer_result/014.wav done! cost: 8.994312286376953s 54 | infer_result/015.wav done! cost: 5.084768056869507s 55 | infer_result/016.wav done! cost: 5.3102569580078125s 56 | ``` 57 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/acoustic/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | from .speedyspeech_csmsc import SpeedySpeechAcoustic 3 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/acoustic/speedyspeech_csmsc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from paddle import inference 15 | 16 | 17 | class SpeedySpeechAcoustic(object): 18 | def __init__(self, pdmodel_path, pdiparams_path): 19 | am_config = inference.Config(pdmodel_path, pdiparams_path) 20 | am_config.disable_glog_info() 21 | self.am_predictor = inference.create_predictor(am_config) 22 | 23 | self.am_input_names = self.am_predictor.get_input_names() 24 | self.am_output_names = self.am_predictor.get_output_names() 25 | 26 | def __call__(self, input_ids): 27 | phone_ids = input_ids["phone_ids"] 28 | phones = phone_ids[0].numpy() 29 | phones_handle = self.am_predictor.get_input_handle(self.am_input_names[0]) 30 | phones_handle.reshape(phones.shape) 31 | phones_handle.copy_from_cpu(phones) 32 | 33 | tone_ids = input_ids["tone_ids"] 34 | tones = tone_ids[0].numpy() 35 | tones_handle = self.am_predictor.get_input_handle(self.am_input_names[1]) 36 | tones_handle.reshape(tones.shape) 37 | tones_handle.copy_from_cpu(tones) 38 | 39 | self.am_predictor.run() 40 | am_output_handle = self.am_predictor.get_output_handle(self.am_output_names[0]) 41 | am_output_data = am_output_handle.copy_to_cpu() 42 | return am_output_data 43 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .generate_lexicon import * 15 | from .normalizer import * 16 | from .phonectic import * 17 | from .punctuation import * 18 | from .tone_sandhi import * 19 | from .vocab import * 20 | from .zh_normalization import * 21 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/arpabet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .phonectic import Phonetics 15 | """ 16 | A phonology system with ARPABET symbols and limited punctuations. The G2P 17 | conversion is done by g2p_en. 18 | 19 | Note that g2p_en does not handle words with hypen well. So make sure the input 20 | sentence is first normalized. 21 | """ 22 | from .vocab import Vocab 23 | from g2p_en import G2p 24 | 25 | 26 | class ARPABET(Phonetics): 27 | """A phonology for English that uses ARPABET as the phoneme vocabulary. 28 | See http://www.speech.cs.cmu.edu/cgi-bin/cmudict for more details. 29 | Phoneme Example Translation 30 | ------- ------- ----------- 31 | AA odd AA D 32 | AE at AE T 33 | AH hut HH AH T 34 | AO ought AO T 35 | AW cow K AW 36 | AY hide HH AY D 37 | B be B IY 38 | CH cheese CH IY Z 39 | D dee D IY 40 | DH thee DH IY 41 | EH Ed EH D 42 | ER hurt HH ER T 43 | EY ate EY T 44 | F fee F IY 45 | G green G R IY N 46 | HH he HH IY 47 | IH it IH T 48 | IY eat IY T 49 | JH gee JH IY 50 | K key K IY 51 | L lee L IY 52 | M me M IY 53 | N knee N IY 54 | NG ping P IH NG 55 | OW oat OW T 56 | OY toy T OY 57 | P pee P IY 58 | R read R IY D 59 | S sea S IY 60 | SH she SH IY 61 | T tea T IY 62 | TH theta TH EY T AH 63 | UH hood HH UH D 64 | UW two T UW 65 | V vee V IY 66 | W we W IY 67 | Y yield Y IY L D 68 | Z zee Z IY 69 | ZH seizure S IY ZH ER 70 | """ 71 | phonemes = [ 72 | 'AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'B', 'CH', 'D', 'DH', 'EH', 'ER', 73 | 'EY', 'F', 'G', 'HH', 'IH', 'IY', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW', 74 | 'OY', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UW', 'UH', 'V', 'W', 'Y', 'Z', 75 | 'ZH' 76 | ] 77 | punctuations = [',', '.', '?', '!'] 78 | symbols = phonemes + punctuations 79 | _stress_to_no_stress_ = { 80 | 'AA0': 'AA', 81 | 'AA1': 'AA', 82 | 'AA2': 'AA', 83 | 'AE0': 'AE', 84 | 'AE1': 'AE', 85 | 'AE2': 'AE', 86 | 'AH0': 'AH', 87 | 'AH1': 'AH', 88 | 'AH2': 'AH', 89 | 'AO0': 'AO', 90 | 'AO1': 'AO', 91 | 'AO2': 'AO', 92 | 'AW0': 'AW', 93 | 'AW1': 'AW', 94 | 'AW2': 'AW', 95 | 'AY0': 'AY', 96 | 'AY1': 'AY', 97 | 'AY2': 'AY', 98 | 'EH0': 'EH', 99 | 'EH1': 'EH', 100 | 'EH2': 'EH', 101 | 'ER0': 'ER', 102 | 'ER1': 'ER', 103 | 'ER2': 'ER', 104 | 'EY0': 'EY', 105 | 'EY1': 'EY', 106 | 'EY2': 'EY', 107 | 'IH0': 'IH', 108 | 'IH1': 'IH', 109 | 'IH2': 'IH', 110 | 'IY0': 'IY', 111 | 'IY1': 'IY', 112 | 'IY2': 'IY', 113 | 'OW0': 'OW', 114 | 'OW1': 'OW', 115 | 'OW2': 'OW', 116 | 'OY0': 'OY', 117 | 'OY1': 'OY', 118 | 'OY2': 'OY', 119 | 'UH0': 'UH', 120 | 'UH1': 'UH', 121 | 'UH2': 'UH', 122 | 'UW0': 'UW', 123 | 'UW1': 'UW', 124 | 'UW2': 'UW' 125 | } 126 | 127 | def __init__(self): 128 | self.backend = G2p() 129 | self.vocab = Vocab(self.phonemes + self.punctuations) 130 | 131 | def _remove_vowels(self, phone): 132 | return self._stress_to_no_stress_.get(phone, phone) 133 | 134 | def phoneticize(self, sentence, add_start_end=False): 135 | """ Normalize the input text sequence and convert it into pronunciation sequence. 136 | 137 | Parameters 138 | ----------- 139 | sentence: str 140 | The input text sequence. 141 | 142 | Returns 143 | ---------- 144 | List[str] 145 | The list of pronunciation sequence. 146 | """ 147 | phonemes = [ 148 | self._remove_vowels(item) for item in self.backend(sentence) 149 | ] 150 | if add_start_end: 151 | start = self.vocab.start_symbol 152 | end = self.vocab.end_symbol 153 | phonemes = [start] + phonemes + [end] 154 | phonemes = [item for item in phonemes if item in self.vocab.stoi] 155 | return phonemes 156 | 157 | def numericalize(self, phonemes): 158 | """ Convert pronunciation sequence into pronunciation id sequence. 159 | 160 | Parameters 161 | ----------- 162 | phonemes: List[str] 163 | The list of pronunciation sequence. 164 | 165 | Returns 166 | ---------- 167 | List[int] 168 | The list of pronunciation id sequence. 169 | """ 170 | ids = [self.vocab.lookup(item) for item in phonemes] 171 | return ids 172 | 173 | def reverse(self, ids): 174 | """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence. 175 | 176 | Parameters 177 | ----------- 178 | ids: List[int] 179 | The list of pronunciation id sequence. 180 | 181 | Returns 182 | ---------- 183 | List[str] 184 | The list of pronunciation sequence. 185 | """ 186 | return [self.vocab.reverse(i) for i in ids] 187 | 188 | def __call__(self, sentence, add_start_end=False): 189 | """ Convert the input text sequence into pronunciation id sequence. 190 | 191 | Parameters 192 | ----------- 193 | sentence: str 194 | The input text sequence. 195 | 196 | Returns 197 | ---------- 198 | List[str] 199 | The list of pronunciation id sequence. 200 | """ 201 | return self.numericalize( 202 | self.phoneticize(sentence, add_start_end=add_start_end)) 203 | 204 | @property 205 | def vocab_size(self): 206 | """ Vocab size. 207 | """ 208 | # 47 = 39 phones + 4 punctuations + 4 special tokens 209 | return len(self.vocab) 210 | 211 | 212 | class ARPABETWithStress(Phonetics): 213 | phonemes = [ 214 | 'AA0', 'AA1', 'AA2', 'AE0', 'AE1', 'AE2', 'AH0', 'AH1', 'AH2', 'AO0', 215 | 'AO1', 'AO2', 'AW0', 'AW1', 'AW2', 'AY0', 'AY1', 'AY2', 'B', 'CH', 'D', 216 | 'DH', 'EH0', 'EH1', 'EH2', 'ER0', 'ER1', 'ER2', 'EY0', 'EY1', 'EY2', 217 | 'F', 'G', 'HH', 'IH0', 'IH1', 'IH2', 'IY0', 'IY1', 'IY2', 'JH', 'K', 218 | 'L', 'M', 'N', 'NG', 'OW0', 'OW1', 'OW2', 'OY0', 'OY1', 'OY2', 'P', 'R', 219 | 'S', 'SH', 'T', 'TH', 'UH0', 'UH1', 'UH2', 'UW0', 'UW1', 'UW2', 'V', 220 | 'W', 'Y', 'Z', 'ZH' 221 | ] 222 | punctuations = [',', '.', '?', '!'] 223 | symbols = phonemes + punctuations 224 | 225 | def __init__(self): 226 | self.backend = G2p() 227 | self.vocab = Vocab(self.phonemes + self.punctuations) 228 | 229 | def phoneticize(self, sentence, add_start_end=False): 230 | """ Normalize the input text sequence and convert it into pronunciation sequence. 231 | 232 | Parameters 233 | ----------- 234 | sentence: str 235 | The input text sequence. 236 | 237 | Returns 238 | ---------- 239 | List[str] 240 | The list of pronunciation sequence. 241 | """ 242 | phonemes = self.backend(sentence) 243 | if add_start_end: 244 | start = self.vocab.start_symbol 245 | end = self.vocab.end_symbol 246 | phonemes = [start] + phonemes + [end] 247 | phonemes = [item for item in phonemes if item in self.vocab.stoi] 248 | return phonemes 249 | 250 | def numericalize(self, phonemes): 251 | """ Convert pronunciation sequence into pronunciation id sequence. 252 | 253 | Parameters 254 | ----------- 255 | phonemes: List[str] 256 | The list of pronunciation sequence. 257 | 258 | Returns 259 | ---------- 260 | List[int] 261 | The list of pronunciation id sequence. 262 | """ 263 | ids = [self.vocab.lookup(item) for item in phonemes] 264 | return ids 265 | 266 | def reverse(self, ids): 267 | """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence. 268 | 269 | Parameters 270 | ----------- 271 | ids: List[int] 272 | The list of pronunciation id sequence. 273 | 274 | Returns 275 | ---------- 276 | List[str] 277 | The list of pronunciation sequence. 278 | """ 279 | return [self.vocab.reverse(i) for i in ids] 280 | 281 | def __call__(self, sentence, add_start_end=False): 282 | """ Convert the input text sequence into pronunciation id sequence. 283 | 284 | Parameters 285 | ----------- 286 | sentence: str 287 | The input text sequence. 288 | 289 | Returns 290 | ---------- 291 | List[str] 292 | The list of pronunciation id sequence. 293 | """ 294 | return self.numericalize( 295 | self.phoneticize(sentence, add_start_end=add_start_end)) 296 | 297 | @property 298 | def vocab_size(self): 299 | """ Vocab size. 300 | """ 301 | # 77 = 69 phones + 4 punctuations + 4 special tokens 302 | return len(self.vocab) 303 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/generate_lexicon.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Design principles: https://zhuanlan.zhihu.com/p/349600439 15 | """Generate lexicon and symbols for Mandarin Chinese phonology. 16 | The lexicon is used for Montreal Force Aligner. 17 | Note that syllables are used as word in this lexicon. Since syllables rather 18 | than words are used in transcriptions produced by `reorganize_baker.py`. 19 | We make this choice to better leverage other software for chinese text to 20 | pinyin tools like pypinyin. This is the convention for G2P in Chinese. 21 | """ 22 | import re 23 | from collections import OrderedDict 24 | 25 | INITIALS = [ 26 | 'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h', 'zh', 'ch', 'sh', 27 | 'r', 'z', 'c', 's', 'j', 'q', 'x' 28 | ] 29 | 30 | FINALS = [ 31 | 'a', 'ai', 'ao', 'an', 'ang', 'e', 'er', 'ei', 'en', 'eng', 'o', 'ou', 32 | 'ong', 'ii', 'iii', 'i', 'ia', 'iao', 'ian', 'iang', 'ie', 'io', 'iou', 33 | 'iong', 'in', 'ing', 'u', 'ua', 'uai', 'uan', 'uang', 'uei', 'uo', 'uen', 34 | 'ueng', 'v', 've', 'van', 'vn' 35 | ] 36 | 37 | SPECIALS = ['sil', 'sp'] 38 | 39 | 40 | def rule(C, V, R, T): 41 | """Generate a syllable given the initial, the final, erhua indicator, and tone. 42 | Orthographical rules for pinyin are applied. (special case for y, w, ui, un, iu) 43 | 44 | Note that in this system, 'ü' is alway written as 'v' when appeared in phoneme, but converted to 45 | 'u' in syllables when certain conditions are satisfied. 46 | 47 | 'i' is distinguished when appeared in phonemes, and separated into 3 categories, 'i', 'ii' and 'iii'. 48 | Erhua is is possibly applied to every finals, except for finals that already ends with 'r'. 49 | When a syllable is impossible or does not have any characters with this pronunciation, return None 50 | to filter it out. 51 | """ 52 | 53 | # 不可拼的音节, ii 只能和 z, c, s 拼 54 | if V in ["ii"] and (C not in ['z', 'c', 's']): 55 | return None 56 | # iii 只能和 zh, ch, sh, r 拼 57 | if V in ['iii'] and (C not in ['zh', 'ch', 'sh', 'r']): 58 | return None 59 | 60 | # 齐齿呼或者撮口呼不能和 f, g, k, h, zh, ch, sh, r, z, c, s 61 | if (V not in ['ii', 'iii']) and V[0] in ['i', 'v'] and ( 62 | C in ['f', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's']): 63 | return None 64 | 65 | # 撮口呼只能和 j, q, x l, n 拼 66 | if V.startswith("v"): 67 | # v, ve 只能和 j ,q , x, n, l 拼 68 | if V in ['v', 've']: 69 | if C not in ['j', 'q', 'x', 'n', 'l', '']: 70 | return None 71 | # 其他只能和 j, q, x 拼 72 | else: 73 | if C not in ['j', 'q', 'x', '']: 74 | return None 75 | 76 | # j, q, x 只能和齐齿呼或者撮口呼拼 77 | if (C in ['j', 'q', 'x']) and not ( 78 | (V not in ['ii', 'iii']) and V[0] in ['i', 'v']): 79 | return None 80 | 81 | # b, p ,m, f 不能和合口呼拼,除了 u 之外 82 | # bm p, m, f 不能和撮口呼拼 83 | if (C in ['b', 'p', 'm', 'f']) and ((V[0] in ['u', 'v'] and V != "u") or 84 | V == 'ong'): 85 | return None 86 | 87 | # ua, uai, uang 不能和 d, t, n, l, r, z, c, s 拼 88 | if V in ['ua', 'uai', 89 | 'uang'] and C in ['d', 't', 'n', 'l', 'r', 'z', 'c', 's']: 90 | return None 91 | 92 | # sh 和 ong 不能拼 93 | if V == 'ong' and C in ['sh']: 94 | return None 95 | 96 | # o 和 gkh, zh ch sh r z c s 不能拼 97 | if V == "o" and C in [ 98 | 'd', 't', 'n', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's' 99 | ]: 100 | return None 101 | 102 | # ueng 只是 weng 这个 ad-hoc 其他情况下都是 ong 103 | if V == 'ueng' and C != '': 104 | return 105 | 106 | # 非儿化的 er 只能单独存在 107 | if V == 'er' and C != '': 108 | return None 109 | 110 | if C == '': 111 | if V in ["i", "in", "ing"]: 112 | C = 'y' 113 | elif V == 'u': 114 | C = 'w' 115 | elif V.startswith('i') and V not in ["ii", "iii"]: 116 | C = 'y' 117 | V = V[1:] 118 | elif V.startswith('u'): 119 | C = 'w' 120 | V = V[1:] 121 | elif V.startswith('v'): 122 | C = 'yu' 123 | V = V[1:] 124 | else: 125 | if C in ['j', 'q', 'x']: 126 | if V.startswith('v'): 127 | V = re.sub('v', 'u', V) 128 | if V == 'iou': 129 | V = 'iu' 130 | elif V == 'uei': 131 | V = 'ui' 132 | elif V == 'uen': 133 | V = 'un' 134 | result = C + V 135 | 136 | # Filter er 不能再儿化 137 | if result.endswith('r') and R == 'r': 138 | return None 139 | 140 | # ii and iii, change back to i 141 | result = re.sub(r'i+', 'i', result) 142 | 143 | result = result + R + T 144 | return result 145 | 146 | 147 | def generate_lexicon(with_tone=False, with_erhua=False): 148 | """Generate lexicon for Mandarin Chinese.""" 149 | syllables = OrderedDict() 150 | 151 | for C in [''] + INITIALS: 152 | for V in FINALS: 153 | for R in [''] if not with_erhua else ['', 'r']: 154 | for T in [''] if not with_tone else ['1', '2', '3', '4', '5']: 155 | result = rule(C, V, R, T) 156 | if result: 157 | syllables[result] = f'{C} {V}{R}{T}' 158 | return syllables 159 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/normalizer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/normalizer/abbrrviation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/normalizer/acronyms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/normalizer/normalizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | import unicodedata 16 | from builtins import str as unicode 17 | 18 | from .numbers import normalize_numbers 19 | 20 | 21 | def normalize(sentence): 22 | """ Normalize English text. 23 | """ 24 | # preprocessing 25 | sentence = unicode(sentence) 26 | sentence = normalize_numbers(sentence) 27 | sentence = ''.join( 28 | char for char in unicodedata.normalize('NFD', sentence) 29 | if unicodedata.category(char) != 'Mn') # Strip accents 30 | sentence = sentence.lower() 31 | sentence = re.sub(r"[^ a-z'.,?!\-]", "", sentence) 32 | sentence = sentence.replace("i.e.", "that is") 33 | sentence = sentence.replace("e.g.", "for example") 34 | return sentence 35 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/normalizer/numbers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # number expansion is not that easy 15 | import re 16 | 17 | import inflect 18 | 19 | _inflect = inflect.engine() 20 | _comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])') 21 | _decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)') 22 | _pounds_re = re.compile(r'£([0-9\,]*[0-9]+)') 23 | _dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)') 24 | _ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)') 25 | _number_re = re.compile(r'[0-9]+') 26 | 27 | 28 | def _remove_commas(m): 29 | return m.group(1).replace(',', '') 30 | 31 | 32 | def _expand_decimal_point(m): 33 | return m.group(1).replace('.', ' point ') 34 | 35 | 36 | def _expand_dollars(m): 37 | match = m.group(1) 38 | parts = match.split('.') 39 | if len(parts) > 2: 40 | return match + ' dollars' # Unexpected format 41 | dollars = int(parts[0]) if parts[0] else 0 42 | cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0 43 | if dollars and cents: 44 | dollar_unit = 'dollar' if dollars == 1 else 'dollars' 45 | cent_unit = 'cent' if cents == 1 else 'cents' 46 | return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit) 47 | elif dollars: 48 | dollar_unit = 'dollar' if dollars == 1 else 'dollars' 49 | return '%s %s' % (dollars, dollar_unit) 50 | elif cents: 51 | cent_unit = 'cent' if cents == 1 else 'cents' 52 | return '%s %s' % (cents, cent_unit) 53 | else: 54 | return 'zero dollars' 55 | 56 | 57 | def _expand_ordinal(m): 58 | return _inflect.number_to_words(m.group(0)) 59 | 60 | 61 | def _expand_number(m): 62 | num = int(m.group(0)) 63 | if num > 1000 and num < 3000: 64 | if num == 2000: 65 | return 'two thousand' 66 | elif num > 2000 and num < 2010: 67 | return 'two thousand ' + _inflect.number_to_words(num % 100) 68 | elif num % 100 == 0: 69 | return _inflect.number_to_words(num // 100) + ' hundred' 70 | else: 71 | return _inflect.number_to_words( 72 | num, andword='', zero='oh', group=2).replace(', ', ' ') 73 | else: 74 | return _inflect.number_to_words(num, andword='') 75 | 76 | 77 | def normalize_numbers(text): 78 | """ Normalize numbers in English text. 79 | """ 80 | text = re.sub(_comma_number_re, _remove_commas, text) 81 | text = re.sub(_pounds_re, r'\1 pounds', text) 82 | text = re.sub(_dollars_re, _expand_dollars, text) 83 | text = re.sub(_decimal_number_re, _expand_decimal_point, text) 84 | text = re.sub(_ordinal_re, _expand_ordinal, text) 85 | text = re.sub(_number_re, _expand_number, text) 86 | return text 87 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/normalizer/width.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def full2half_width(ustr): 17 | half = [] 18 | for u in ustr: 19 | num = ord(u) 20 | if num == 0x3000: # 全角空格变半角 21 | num = 32 22 | elif 0xFF01 <= num <= 0xFF5E: 23 | num -= 0xfee0 24 | u = chr(num) 25 | half.append(u) 26 | return ''.join(half) 27 | 28 | 29 | def half2full_width(ustr): 30 | full = [] 31 | for u in ustr: 32 | num = ord(u) 33 | if num == 32: # 半角空格变全角 34 | num = 0x3000 35 | elif 0x21 <= num <= 0x7E: 36 | num += 0xfee0 37 | u = chr(num) # to unicode 38 | full.append(u) 39 | 40 | return ''.join(full) 41 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/phonectic.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from abc import ABC 15 | from abc import abstractmethod 16 | 17 | import paddle 18 | from g2p_en import G2p 19 | from g2pM import G2pM 20 | 21 | from .normalizer.normalizer import normalize 22 | from .punctuation import get_punctuations 23 | from .vocab import Vocab 24 | 25 | # discard opencc untill we find an easy solution to install it on windows 26 | # from opencc import OpenCC 27 | 28 | __all__ = ["Phonetics", "English", "EnglishCharacter", "Chinese"] 29 | 30 | 31 | class Phonetics(ABC): 32 | @abstractmethod 33 | def __call__(self, sentence): 34 | pass 35 | 36 | @abstractmethod 37 | def phoneticize(self, sentence): 38 | pass 39 | 40 | @abstractmethod 41 | def numericalize(self, phonemes): 42 | pass 43 | 44 | 45 | class English(Phonetics): 46 | """ Normalize the input text sequence and convert into pronunciation id sequence. 47 | """ 48 | 49 | def __init__(self, phone_vocab_path=None): 50 | self.backend = G2p() 51 | self.phonemes = list(self.backend.phonemes) 52 | self.punctuations = get_punctuations("en") 53 | self.vocab = Vocab(self.phonemes + self.punctuations) 54 | self.vocab_phones = {} 55 | self.punc = ":,;。?!“”‘’':,;.?!" 56 | if phone_vocab_path: 57 | with open(phone_vocab_path, 'rt') as f: 58 | phn_id = [line.strip().split() for line in f.readlines()] 59 | for phn, id in phn_id: 60 | self.vocab_phones[phn] = int(id) 61 | 62 | def phoneticize(self, sentence): 63 | """ Normalize the input text sequence and convert it into pronunciation sequence. 64 | Parameters 65 | ----------- 66 | sentence: str 67 | The input text sequence. 68 | Returns 69 | ---------- 70 | List[str] 71 | The list of pronunciation sequence. 72 | """ 73 | start = self.vocab.start_symbol 74 | end = self.vocab.end_symbol 75 | phonemes = ([] if start is None else [start]) \ 76 | + self.backend(sentence) \ 77 | + ([] if end is None else [end]) 78 | phonemes = [item for item in phonemes if item in self.vocab.stoi] 79 | return phonemes 80 | 81 | def get_input_ids(self, sentence: str) -> paddle.Tensor: 82 | result = {} 83 | phones = self.phoneticize(sentence) 84 | # remove start_symbol and end_symbol 85 | phones = phones[1:-1] 86 | phones = [phn for phn in phones if not phn.isspace()] 87 | phones = [ 88 | phn if (phn in self.vocab_phones and phn not in self.punc) else "sp" 89 | for phn in phones 90 | ] 91 | phone_ids = [self.vocab_phones[phn] for phn in phones] 92 | phone_ids = paddle.to_tensor(phone_ids) 93 | result["phone_ids"] = phone_ids 94 | return result 95 | 96 | def numericalize(self, phonemes): 97 | """ Convert pronunciation sequence into pronunciation id sequence. 98 | Parameters 99 | ----------- 100 | phonemes: List[str] 101 | The list of pronunciation sequence. 102 | Returns 103 | ---------- 104 | List[int] 105 | The list of pronunciation id sequence. 106 | """ 107 | ids = [ 108 | self.vocab.lookup(item) for item in phonemes 109 | if item in self.vocab.stoi 110 | ] 111 | return ids 112 | 113 | def reverse(self, ids): 114 | """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence. 115 | Parameters 116 | ----------- 117 | ids: List[int] 118 | The list of pronunciation id sequence. 119 | Returns 120 | ---------- 121 | List[str] 122 | The list of pronunciation sequence. 123 | """ 124 | return [self.vocab.reverse(i) for i in ids] 125 | 126 | def __call__(self, sentence): 127 | """ Convert the input text sequence into pronunciation id sequence. 128 | Parameters 129 | ----------- 130 | sentence: str 131 | The input text sequence. 132 | Returns 133 | ---------- 134 | List[str] 135 | The list of pronunciation id sequence. 136 | """ 137 | return self.numericalize(self.phoneticize(sentence)) 138 | 139 | @property 140 | def vocab_size(self): 141 | """ Vocab size. 142 | """ 143 | return len(self.vocab) 144 | 145 | 146 | class EnglishCharacter(Phonetics): 147 | """ Normalize the input text sequence and convert it into character id sequence. 148 | """ 149 | 150 | def __init__(self): 151 | self.backend = G2p() 152 | self.graphemes = list(self.backend.graphemes) 153 | self.punctuations = get_punctuations("en") 154 | self.vocab = Vocab(self.graphemes + self.punctuations) 155 | 156 | def phoneticize(self, sentence): 157 | """ Normalize the input text sequence. 158 | Parameters 159 | ----------- 160 | sentence: str 161 | The input text sequence. 162 | Returns 163 | ---------- 164 | str 165 | A text sequence after normalize. 166 | """ 167 | words = normalize(sentence) 168 | return words 169 | 170 | def numericalize(self, sentence): 171 | """ Convert a text sequence into ids. 172 | Parameters 173 | ----------- 174 | sentence: str 175 | The input text sequence. 176 | Returns 177 | ---------- 178 | List[int] 179 | List of a character id sequence. 180 | """ 181 | ids = [ 182 | self.vocab.lookup(item) for item in sentence 183 | if item in self.vocab.stoi 184 | ] 185 | return ids 186 | 187 | def reverse(self, ids): 188 | """ Convert a character id sequence into text. 189 | Parameters 190 | ----------- 191 | ids: List[int] 192 | List of a character id sequence. 193 | Returns 194 | ---------- 195 | str 196 | The input text sequence. 197 | """ 198 | return [self.vocab.reverse(i) for i in ids] 199 | 200 | def __call__(self, sentence): 201 | """ Normalize the input text sequence and convert it into character id sequence. 202 | Parameters 203 | ----------- 204 | sentence: str 205 | The input text sequence. 206 | Returns 207 | ---------- 208 | List[int] 209 | List of a character id sequence. 210 | """ 211 | return self.numericalize(self.phoneticize(sentence)) 212 | 213 | @property 214 | def vocab_size(self): 215 | """ Vocab size. 216 | """ 217 | return len(self.vocab) 218 | 219 | 220 | class Chinese(Phonetics): 221 | """Normalize Chinese text sequence and convert it into ids. 222 | """ 223 | 224 | def __init__(self): 225 | # self.opencc_backend = OpenCC('t2s.json') 226 | self.backend = G2pM() 227 | self.phonemes = self._get_all_syllables() 228 | self.punctuations = get_punctuations("cn") 229 | self.vocab = Vocab(self.phonemes + self.punctuations) 230 | 231 | def _get_all_syllables(self): 232 | all_syllables = set([ 233 | syllable for k, v in self.backend.cedict.items() for syllable in v 234 | ]) 235 | return list(all_syllables) 236 | 237 | def phoneticize(self, sentence): 238 | """ Normalize the input text sequence and convert it into pronunciation sequence. 239 | Parameters 240 | ----------- 241 | sentence: str 242 | The input text sequence. 243 | Returns 244 | ---------- 245 | List[str] 246 | The list of pronunciation sequence. 247 | """ 248 | # simplified = self.opencc_backend.convert(sentence) 249 | simplified = sentence 250 | phonemes = self.backend(simplified) 251 | start = self.vocab.start_symbol 252 | end = self.vocab.end_symbol 253 | phonemes = ([] if start is None else [start]) \ 254 | + phonemes \ 255 | + ([] if end is None else [end]) 256 | return self._filter_symbols(phonemes) 257 | 258 | def _filter_symbols(self, phonemes): 259 | cleaned_phonemes = [] 260 | for item in phonemes: 261 | if item in self.vocab.stoi: 262 | cleaned_phonemes.append(item) 263 | else: 264 | for char in item: 265 | if char in self.vocab.stoi: 266 | cleaned_phonemes.append(char) 267 | return cleaned_phonemes 268 | 269 | def numericalize(self, phonemes): 270 | """ Convert pronunciation sequence into pronunciation id sequence. 271 | Parameters 272 | ----------- 273 | phonemes: List[str] 274 | The list of pronunciation sequence. 275 | Returns 276 | ---------- 277 | List[int] 278 | The list of pronunciation id sequence. 279 | """ 280 | ids = [self.vocab.lookup(item) for item in phonemes] 281 | return ids 282 | 283 | def __call__(self, sentence): 284 | """ Convert the input text sequence into pronunciation id sequence. 285 | Parameters 286 | ----------- 287 | sentence: str 288 | The input text sequence. 289 | Returns 290 | ---------- 291 | List[str] 292 | The list of pronunciation id sequence. 293 | """ 294 | return self.numericalize(self.phoneticize(sentence)) 295 | 296 | @property 297 | def vocab_size(self): 298 | """ Vocab size. 299 | """ 300 | return len(self.vocab) 301 | 302 | def reverse(self, ids): 303 | """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence. 304 | Parameters 305 | ----------- 306 | ids: List[int] 307 | The list of pronunciation id sequence. 308 | Returns 309 | ---------- 310 | List[str] 311 | The list of pronunciation sequence. 312 | """ 313 | return [self.vocab.reverse(i) for i in ids] 314 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/punctuation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __all__ = ["get_punctuations"] 16 | 17 | EN_PUNCT = [ 18 | " ", 19 | "-", 20 | "...", 21 | ",", 22 | ".", 23 | "?", 24 | "!", 25 | ] 26 | 27 | CN_PUNCT = ["、", ",", ";", ":", "。", "?", "!"] 28 | 29 | 30 | def get_punctuations(lang): 31 | if lang == "en": 32 | return EN_PUNCT 33 | elif lang == "cn": 34 | return CN_PUNCT 35 | else: 36 | raise ValueError(f"language {lang} Not supported") 37 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/vocab.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from collections import OrderedDict 15 | from typing import Iterable 16 | 17 | __all__ = ["Vocab"] 18 | 19 | 20 | class Vocab(object): 21 | """ Vocabulary. 22 | 23 | Parameters 24 | ----------- 25 | symbols: Iterable[str] 26 | Common symbols. 27 | 28 | padding_symbol: str, optional 29 | Symbol for pad. Defaults to "". 30 | 31 | unk_symbol: str, optional 32 | Symbol for unknow. Defaults to "" 33 | 34 | start_symbol: str, optional 35 | Symbol for start. Defaults to "" 36 | 37 | end_symbol: str, optional 38 | Symbol for end. Defaults to "" 39 | """ 40 | 41 | def __init__(self, 42 | symbols: Iterable[str], 43 | padding_symbol="", 44 | unk_symbol="", 45 | start_symbol="", 46 | end_symbol=""): 47 | self.special_symbols = OrderedDict() 48 | for i, item in enumerate( 49 | [padding_symbol, unk_symbol, start_symbol, end_symbol]): 50 | if item: 51 | self.special_symbols[item] = len(self.special_symbols) 52 | 53 | self.padding_symbol = padding_symbol 54 | self.unk_symbol = unk_symbol 55 | self.start_symbol = start_symbol 56 | self.end_symbol = end_symbol 57 | 58 | self.stoi = OrderedDict() 59 | self.stoi.update(self.special_symbols) 60 | 61 | for i, s in enumerate(symbols): 62 | if s not in self.stoi: 63 | self.stoi[s] = len(self.stoi) 64 | self.itos = {v: k for k, v in self.stoi.items()} 65 | 66 | def __len__(self): 67 | return len(self.stoi) 68 | 69 | @property 70 | def num_specials(self): 71 | """ The number of special symbols. 72 | """ 73 | return len(self.special_symbols) 74 | 75 | # special tokens 76 | @property 77 | def padding_index(self): 78 | """ The index of padding symbol 79 | """ 80 | return self.stoi.get(self.padding_symbol, -1) 81 | 82 | @property 83 | def unk_index(self): 84 | """The index of unknow symbol. 85 | """ 86 | return self.stoi.get(self.unk_symbol, -1) 87 | 88 | @property 89 | def start_index(self): 90 | """The index of start symbol. 91 | """ 92 | return self.stoi.get(self.start_symbol, -1) 93 | 94 | @property 95 | def end_index(self): 96 | """ The index of end symbol. 97 | """ 98 | return self.stoi.get(self.end_symbol, -1) 99 | 100 | def __repr__(self): 101 | fmt = "Vocab(size: {},\nstoi:\n{})" 102 | return fmt.format(len(self), self.stoi) 103 | 104 | def __str__(self): 105 | return self.__repr__() 106 | 107 | def lookup(self, symbol): 108 | """ The index that symbol correspond. 109 | """ 110 | return self.stoi[symbol] 111 | 112 | def reverse(self, index): 113 | """ The symbol thar index cottespond. 114 | """ 115 | return self.itos[index] 116 | 117 | def add_symbol(self, symbol): 118 | """ Add a new symbol in vocab. 119 | """ 120 | if symbol in self.stoi: 121 | return 122 | N = len(self.stoi) 123 | self.stoi[symbol] = N 124 | self.itos[N] = symbol 125 | 126 | def add_symbols(self, symbols): 127 | """ Add multiple symbols in vocab. 128 | """ 129 | for symbol in symbols: 130 | self.add_symbol(symbol) 131 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/zh_normalization/README.md: -------------------------------------------------------------------------------- 1 | ## Supported NSW (Non-Standard-Word) Normalization 2 | 3 | |NSW type|raw|normalized| 4 | |:--|:-|:-| 5 | |serial number|电影中梁朝伟扮演的陈永仁的编号27149|电影中梁朝伟扮演的陈永仁的编号二七一四九| 6 | |cardinal|这块黄金重达324.75克
我们班的最高总分为583分|这块黄金重达三百二十四点七五克
我们班的最高总分为五百八十三分| 7 | |numeric range |12\~23
-1.5\~2|十二到二十三
负一点五到二| 8 | |date|她出生于86年8月18日,她弟弟出生于1995年3月1日|她出生于八六年八月十八日, 她弟弟出生于一九九五年三月一日| 9 | |time|等会请在12:05请通知我|等会请在十二点零五分请通知我 10 | |temperature|今天的最低气温达到-10°C|今天的最低气温达到零下十度 11 | |fraction|现场有7/12的观众投出了赞成票|现场有十二分之七的观众投出了赞成票| 12 | |percentage|明天有62%的概率降雨|明天有百分之六十二的概率降雨| 13 | |money|随便来几个价格12块5,34.5元,20.1万|随便来几个价格十二块五,三十四点五元,二十点一万| 14 | |telephone|这是固话0421-33441122
这是手机+86 18544139121|这是固话零四二一三三四四一一二二
这是手机八六一八五四四一三九一二一| 15 | ## References 16 | [Pull requests #658 of DeepSpeech](https://github.com/PaddlePaddle/DeepSpeech/pull/658/files) 17 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/zh_normalization/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/zh_normalization/chronology.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | 16 | from .num import DIGITS 17 | from .num import num2str 18 | from .num import verbalize_cardinal 19 | from .num import verbalize_digit 20 | 21 | 22 | def _time_num2str(num_string: str) -> str: 23 | """A special case for verbalizing number in time.""" 24 | result = num2str(num_string.lstrip('0')) 25 | if num_string.startswith('0'): 26 | result = DIGITS['0'] + result 27 | return result 28 | 29 | 30 | # 时刻表达式 31 | RE_TIME = re.compile(r'([0-1]?[0-9]|2[0-3])' 32 | r':([0-5][0-9])' 33 | r'(:([0-5][0-9]))?') 34 | 35 | # 时间范围,如8:30-12:30 36 | RE_TIME_RANGE = re.compile(r'([0-1]?[0-9]|2[0-3])' 37 | r':([0-5][0-9])' 38 | r'(:([0-5][0-9]))?' 39 | r'(~|-)' 40 | r'([0-1]?[0-9]|2[0-3])' 41 | r':([0-5][0-9])' 42 | r'(:([0-5][0-9]))?') 43 | 44 | 45 | def replace_time(match) -> str: 46 | """ 47 | Parameters 48 | ---------- 49 | match : re.Match 50 | Returns 51 | ---------- 52 | str 53 | """ 54 | 55 | is_range = len(match.groups()) > 5 56 | 57 | hour = match.group(1) 58 | minute = match.group(2) 59 | second = match.group(4) 60 | 61 | if is_range: 62 | hour_2 = match.group(6) 63 | minute_2 = match.group(7) 64 | second_2 = match.group(9) 65 | 66 | result = f"{num2str(hour)}点" 67 | if minute.lstrip('0'): 68 | result += f"{_time_num2str(minute)}分" 69 | if second and second.lstrip('0'): 70 | result += f"{_time_num2str(second)}秒" 71 | 72 | if is_range: 73 | result += "至" 74 | result += f"{num2str(hour_2)}点" 75 | if minute_2.lstrip('0'): 76 | result += f"{_time_num2str(minute_2)}分" 77 | if second_2 and second_2.lstrip('0'): 78 | result += f"{_time_num2str(second_2)}秒" 79 | 80 | return result 81 | 82 | 83 | RE_DATE = re.compile(r'(\d{4}|\d{2})年' 84 | r'((0?[1-9]|1[0-2])月)?' 85 | r'(((0?[1-9])|((1|2)[0-9])|30|31)([日号]))?') 86 | 87 | 88 | def replace_date(match) -> str: 89 | """ 90 | Parameters 91 | ---------- 92 | match : re.Match 93 | Returns 94 | ---------- 95 | str 96 | """ 97 | year = match.group(1) 98 | month = match.group(3) 99 | day = match.group(5) 100 | result = "" 101 | if year: 102 | result += f"{verbalize_digit(year)}年" 103 | if month: 104 | result += f"{verbalize_cardinal(month)}月" 105 | if day: 106 | result += f"{verbalize_cardinal(day)}{match.group(9)}" 107 | return result 108 | 109 | 110 | # 用 / 或者 - 分隔的 YY/MM/DD 或者 YY-MM-DD 日期 111 | RE_DATE2 = re.compile( 112 | r'(\d{4})([- /.])(0[1-9]|1[012])\2(0[1-9]|[12][0-9]|3[01])') 113 | 114 | 115 | def replace_date2(match) -> str: 116 | """ 117 | Parameters 118 | ---------- 119 | match : re.Match 120 | Returns 121 | ---------- 122 | str 123 | """ 124 | year = match.group(1) 125 | month = match.group(3) 126 | day = match.group(4) 127 | result = "" 128 | if year: 129 | result += f"{verbalize_digit(year)}年" 130 | if month: 131 | result += f"{verbalize_cardinal(month)}月" 132 | if day: 133 | result += f"{verbalize_cardinal(day)}日" 134 | return result 135 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/zh_normalization/constants.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | import string 16 | 17 | from pypinyin.constants import SUPPORT_UCS4 18 | 19 | # 全角半角转换 20 | # 英文字符全角 -> 半角映射表 (num: 52) 21 | F2H_ASCII_LETTERS = { 22 | chr(ord(char) + 65248): char 23 | for char in string.ascii_letters 24 | } 25 | 26 | # 英文字符半角 -> 全角映射表 27 | H2F_ASCII_LETTERS = {value: key for key, value in F2H_ASCII_LETTERS.items()} 28 | 29 | # 数字字符全角 -> 半角映射表 (num: 10) 30 | F2H_DIGITS = {chr(ord(char) + 65248): char for char in string.digits} 31 | # 数字字符半角 -> 全角映射表 32 | H2F_DIGITS = {value: key for key, value in F2H_DIGITS.items()} 33 | 34 | # 标点符号全角 -> 半角映射表 (num: 32) 35 | F2H_PUNCTUATIONS = {chr(ord(char) + 65248): char for char in string.punctuation} 36 | # 标点符号半角 -> 全角映射表 37 | H2F_PUNCTUATIONS = {value: key for key, value in F2H_PUNCTUATIONS.items()} 38 | 39 | # 空格 (num: 1) 40 | F2H_SPACE = {'\u3000': ' '} 41 | H2F_SPACE = {' ': '\u3000'} 42 | 43 | # 非"有拼音的汉字"的字符串,可用于NSW提取 44 | if SUPPORT_UCS4: 45 | RE_NSW = re.compile(r'(?:[^' 46 | r'\u3007' # 〇 47 | r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF] 48 | r'\u4e00-\u9fff' # CJK基本:[4E00-9FFF] 49 | r'\uf900-\ufaff' # CJK兼容:[F900-FAFF] 50 | r'\U00020000-\U0002A6DF' # CJK扩展B:[20000-2A6DF] 51 | r'\U0002A703-\U0002B73F' # CJK扩展C:[2A700-2B73F] 52 | r'\U0002B740-\U0002B81D' # CJK扩展D:[2B740-2B81D] 53 | r'\U0002F80A-\U0002FA1F' # CJK兼容扩展:[2F800-2FA1F] 54 | r'])+') 55 | else: 56 | RE_NSW = re.compile( # pragma: no cover 57 | r'(?:[^' 58 | r'\u3007' # 〇 59 | r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF] 60 | r'\u4e00-\u9fff' # CJK基本:[4E00-9FFF] 61 | r'\uf900-\ufaff' # CJK兼容:[F900-FAFF] 62 | r'])+') 63 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/zh_normalization/num.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Rules to verbalize numbers into Chinese characters. 16 | https://zh.wikipedia.org/wiki/中文数字#現代中文 17 | """ 18 | import re 19 | from collections import OrderedDict 20 | from typing import List 21 | 22 | DIGITS = {str(i): tran for i, tran in enumerate('零一二三四五六七八九')} 23 | UNITS = OrderedDict({ 24 | 1: '十', 25 | 2: '百', 26 | 3: '千', 27 | 4: '万', 28 | 8: '亿', 29 | }) 30 | 31 | COM_QUANTIFIERS = '(朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|)块|角|毛|分)' 32 | 33 | # 分数表达式 34 | RE_FRAC = re.compile(r'(-?)(\d+)/(\d+)') 35 | 36 | 37 | def replace_frac(match) -> str: 38 | """ 39 | Parameters 40 | ---------- 41 | match : re.Match 42 | Returns 43 | ---------- 44 | str 45 | """ 46 | sign = match.group(1) 47 | nominator = match.group(2) 48 | denominator = match.group(3) 49 | sign: str = "负" if sign else "" 50 | nominator: str = num2str(nominator) 51 | denominator: str = num2str(denominator) 52 | result = f"{sign}{denominator}分之{nominator}" 53 | return result 54 | 55 | 56 | # 百分数表达式 57 | RE_PERCENTAGE = re.compile(r'(-?)(\d+(\.\d+)?)%') 58 | 59 | 60 | def replace_percentage(match) -> str: 61 | """ 62 | Parameters 63 | ---------- 64 | match : re.Match 65 | Returns 66 | ---------- 67 | str 68 | """ 69 | sign = match.group(1) 70 | percent = match.group(2) 71 | sign: str = "负" if sign else "" 72 | percent: str = num2str(percent) 73 | result = f"{sign}百分之{percent}" 74 | return result 75 | 76 | 77 | # 整数表达式 78 | # 带负号的整数 -10 79 | RE_INTEGER = re.compile(r'(-)' r'(\d+)') 80 | 81 | 82 | def replace_negative_num(match) -> str: 83 | """ 84 | Parameters 85 | ---------- 86 | match : re.Match 87 | Returns 88 | ---------- 89 | str 90 | """ 91 | sign = match.group(1) 92 | number = match.group(2) 93 | sign: str = "负" if sign else "" 94 | number: str = num2str(number) 95 | result = f"{sign}{number}" 96 | return result 97 | 98 | 99 | # 编号-无符号整形 100 | # 00078 101 | RE_DEFAULT_NUM = re.compile(r'\d{3}\d*') 102 | 103 | 104 | def replace_default_num(match): 105 | """ 106 | Parameters 107 | ---------- 108 | match : re.Match 109 | Returns 110 | ---------- 111 | str 112 | """ 113 | number = match.group(0) 114 | return verbalize_digit(number) 115 | 116 | 117 | # 数字表达式 118 | # 纯小数 119 | RE_DECIMAL_NUM = re.compile(r'(-?)((\d+)(\.\d+))' r'|(\.(\d+))') 120 | # 正整数 + 量词 121 | RE_POSITIVE_QUANTIFIERS = re.compile(r"(\d+)([多余几])?" + COM_QUANTIFIERS) 122 | RE_NUMBER = re.compile(r'(-?)((\d+)(\.\d+)?)' r'|(\.(\d+))') 123 | 124 | 125 | def replace_positive_quantifier(match) -> str: 126 | """ 127 | Parameters 128 | ---------- 129 | match : re.Match 130 | Returns 131 | ---------- 132 | str 133 | """ 134 | number = match.group(1) 135 | match_2 = match.group(2) 136 | match_2: str = match_2 if match_2 else "" 137 | quantifiers: str = match.group(3) 138 | number: str = num2str(number) 139 | result = f"{number}{match_2}{quantifiers}" 140 | return result 141 | 142 | 143 | def replace_number(match) -> str: 144 | """ 145 | Parameters 146 | ---------- 147 | match : re.Match 148 | Returns 149 | ---------- 150 | str 151 | """ 152 | sign = match.group(1) 153 | number = match.group(2) 154 | pure_decimal = match.group(5) 155 | if pure_decimal: 156 | result = num2str(pure_decimal) 157 | else: 158 | sign: str = "负" if sign else "" 159 | number: str = num2str(number) 160 | result = f"{sign}{number}" 161 | return result 162 | 163 | 164 | # 范围表达式 165 | # match.group(1) and match.group(8) are copy from RE_NUMBER 166 | RE_RANGE = re.compile( 167 | r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))[-~]((-?)((\d+)(\.\d+)?)|(\.(\d+)))') 168 | 169 | 170 | def replace_range(match) -> str: 171 | """ 172 | Parameters 173 | ---------- 174 | match : re.Match 175 | Returns 176 | ---------- 177 | str 178 | """ 179 | first, second = match.group(1), match.group(8) 180 | first = RE_NUMBER.sub(replace_number, first) 181 | second = RE_NUMBER.sub(replace_number, second) 182 | result = f"{first}到{second}" 183 | return result 184 | 185 | 186 | def _get_value(value_string: str, use_zero: bool=True) -> List[str]: 187 | stripped = value_string.lstrip('0') 188 | if len(stripped) == 0: 189 | return [] 190 | elif len(stripped) == 1: 191 | if use_zero and len(stripped) < len(value_string): 192 | return [DIGITS['0'], DIGITS[stripped]] 193 | else: 194 | return [DIGITS[stripped]] 195 | else: 196 | largest_unit = next( 197 | power for power in reversed(UNITS.keys()) if power < len(stripped)) 198 | first_part = value_string[:-largest_unit] 199 | second_part = value_string[-largest_unit:] 200 | return _get_value(first_part) + [UNITS[largest_unit]] + _get_value( 201 | second_part) 202 | 203 | 204 | def verbalize_cardinal(value_string: str) -> str: 205 | if not value_string: 206 | return '' 207 | 208 | # 000 -> '零' , 0 -> '零' 209 | value_string = value_string.lstrip('0') 210 | if len(value_string) == 0: 211 | return DIGITS['0'] 212 | 213 | result_symbols = _get_value(value_string) 214 | # verbalized number starting with '一十*' is abbreviated as `十*` 215 | if len(result_symbols) >= 2 and result_symbols[0] == DIGITS[ 216 | '1'] and result_symbols[1] == UNITS[1]: 217 | result_symbols = result_symbols[1:] 218 | return ''.join(result_symbols) 219 | 220 | 221 | def verbalize_digit(value_string: str, alt_one=False) -> str: 222 | result_symbols = [DIGITS[digit] for digit in value_string] 223 | result = ''.join(result_symbols) 224 | if alt_one: 225 | result.replace("一", "幺") 226 | return result 227 | 228 | 229 | def num2str(value_string: str) -> str: 230 | integer_decimal = value_string.split('.') 231 | if len(integer_decimal) == 1: 232 | integer = integer_decimal[0] 233 | decimal = '' 234 | elif len(integer_decimal) == 2: 235 | integer, decimal = integer_decimal 236 | else: 237 | raise ValueError( 238 | f"The value string: '${value_string}' has more than one point in it." 239 | ) 240 | 241 | result = verbalize_cardinal(integer) 242 | 243 | decimal = decimal.rstrip('0') 244 | if decimal: 245 | # '.22' is verbalized as '零点二二' 246 | # '3.20' is verbalized as '三点二 247 | result = result if result else "零" 248 | result += '点' + verbalize_digit(decimal) 249 | return result 250 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/zh_normalization/phonecode.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | 16 | from .num import verbalize_digit 17 | 18 | # 规范化固话/手机号码 19 | # 手机 20 | # http://www.jihaoba.com/news/show/13680 21 | # 移动:139、138、137、136、135、134、159、158、157、150、151、152、188、187、182、183、184、178、198 22 | # 联通:130、131、132、156、155、186、185、176 23 | # 电信:133、153、189、180、181、177 24 | RE_MOBILE_PHONE = re.compile( 25 | r"(? str: 34 | if mobile: 35 | sp_parts = phone_string.strip('+').split() 36 | result = ','.join( 37 | [verbalize_digit(part, alt_one=True) for part in sp_parts]) 38 | return result 39 | else: 40 | sil_parts = phone_string.split('-') 41 | result = ','.join( 42 | [verbalize_digit(part, alt_one=True) for part in sil_parts]) 43 | return result 44 | 45 | 46 | def replace_phone(match) -> str: 47 | """ 48 | Parameters 49 | ---------- 50 | match : re.Match 51 | Returns 52 | ---------- 53 | str 54 | """ 55 | return phone2str(match.group(0), mobile=False) 56 | 57 | 58 | def replace_mobile(match) -> str: 59 | """ 60 | Parameters 61 | ---------- 62 | match : re.Match 63 | Returns 64 | ---------- 65 | str 66 | """ 67 | return phone2str(match.group(0)) 68 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/zh_normalization/quantifier.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | 16 | from .num import num2str 17 | 18 | # 温度表达式,温度会影响负号的读法 19 | # -3°C 零下三度 20 | RE_TEMPERATURE = re.compile(r'(-?)(\d+(\.\d+)?)(°C|℃|度|摄氏度)') 21 | 22 | 23 | def replace_temperature(match) -> str: 24 | """ 25 | Parameters 26 | ---------- 27 | match : re.Match 28 | Returns 29 | ---------- 30 | str 31 | """ 32 | sign = match.group(1) 33 | temperature = match.group(2) 34 | unit = match.group(3) 35 | sign: str = "零下" if sign else "" 36 | temperature: str = num2str(temperature) 37 | unit: str = "摄氏度" if unit == "摄氏度" else "度" 38 | result = f"{sign}{temperature}{unit}" 39 | return result 40 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/frontend/zh_normalization/text_normlization.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | from typing import List 16 | 17 | from .char_convert import tranditional_to_simplified 18 | from .chronology import RE_DATE 19 | from .chronology import RE_DATE2 20 | from .chronology import RE_TIME 21 | from .chronology import RE_TIME_RANGE 22 | from .chronology import replace_date 23 | from .chronology import replace_date2 24 | from .chronology import replace_time 25 | from .constants import F2H_ASCII_LETTERS 26 | from .constants import F2H_DIGITS 27 | from .constants import F2H_SPACE 28 | from .num import RE_DECIMAL_NUM 29 | from .num import RE_DEFAULT_NUM 30 | from .num import RE_FRAC 31 | from .num import RE_INTEGER 32 | from .num import RE_NUMBER 33 | from .num import RE_PERCENTAGE 34 | from .num import RE_POSITIVE_QUANTIFIERS 35 | from .num import RE_RANGE 36 | from .num import replace_default_num 37 | from .num import replace_frac 38 | from .num import replace_negative_num 39 | from .num import replace_number 40 | from .num import replace_percentage 41 | from .num import replace_positive_quantifier 42 | from .num import replace_range 43 | from .phonecode import RE_MOBILE_PHONE 44 | from .phonecode import RE_NATIONAL_UNIFORM_NUMBER 45 | from .phonecode import RE_TELEPHONE 46 | from .phonecode import replace_mobile 47 | from .phonecode import replace_phone 48 | from .quantifier import RE_TEMPERATURE 49 | from .quantifier import replace_temperature 50 | 51 | 52 | class TextNormalizer(): 53 | def __init__(self): 54 | self.SENTENCE_SPLITOR = re.compile(r'([:,;。?!,;?!][”’]?)') 55 | 56 | def _split(self, text: str) -> List[str]: 57 | """Split long text into sentences with sentence-splitting punctuations. 58 | Parameters 59 | ---------- 60 | text : str 61 | The input text. 62 | Returns 63 | ------- 64 | List[str] 65 | Sentences. 66 | """ 67 | # Only for pure Chinese here 68 | text = text.replace(" ", "") 69 | text = self.SENTENCE_SPLITOR.sub(r'\1\n', text) 70 | text = text.strip() 71 | sentences = [sentence.strip() for sentence in re.split(r'\n+', text)] 72 | return sentences 73 | 74 | def normalize_sentence(self, sentence: str) -> str: 75 | # basic character conversions 76 | sentence = tranditional_to_simplified(sentence) 77 | sentence = sentence.translate(F2H_ASCII_LETTERS).translate( 78 | F2H_DIGITS).translate(F2H_SPACE) 79 | 80 | # number related NSW verbalization 81 | sentence = RE_DATE.sub(replace_date, sentence) 82 | sentence = RE_DATE2.sub(replace_date2, sentence) 83 | 84 | # range first 85 | sentence = RE_TIME_RANGE.sub(replace_time, sentence) 86 | sentence = RE_TIME.sub(replace_time, sentence) 87 | 88 | sentence = RE_TEMPERATURE.sub(replace_temperature, sentence) 89 | sentence = RE_FRAC.sub(replace_frac, sentence) 90 | sentence = RE_PERCENTAGE.sub(replace_percentage, sentence) 91 | sentence = RE_MOBILE_PHONE.sub(replace_mobile, sentence) 92 | 93 | sentence = RE_TELEPHONE.sub(replace_phone, sentence) 94 | sentence = RE_NATIONAL_UNIFORM_NUMBER.sub(replace_phone, sentence) 95 | 96 | sentence = RE_RANGE.sub(replace_range, sentence) 97 | sentence = RE_INTEGER.sub(replace_negative_num, sentence) 98 | sentence = RE_DECIMAL_NUM.sub(replace_number, sentence) 99 | sentence = RE_POSITIVE_QUANTIFIERS.sub(replace_positive_quantifier, 100 | sentence) 101 | sentence = RE_DEFAULT_NUM.sub(replace_default_num, sentence) 102 | sentence = RE_NUMBER.sub(replace_number, sentence) 103 | 104 | return sentence 105 | 106 | def normalize(self, text: str) -> List[str]: 107 | sentences = self._split(text) 108 | 109 | sentences = [self.normalize_sentence(sent) for sent in sentences] 110 | return sentences 111 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/infer_result/001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/001.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/infer_result/002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/002.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/infer_result/003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/003.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/infer_result/004.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/004.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/infer_result/005.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/005.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/infer_result/006.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/006.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/infer_result/007.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/007.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/infer_result/008.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/008.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/infer_result/009.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/009.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/infer_result/010.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/010.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/infer_result/011.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/011.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/infer_result/012.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/012.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/infer_result/013.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/013.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/infer_result/014.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/014.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/infer_result/015.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/015.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/infer_result/016.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/016.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/infer_result/017.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/017.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/requirements.txt: -------------------------------------------------------------------------------- 1 | g2p_en==2.1.0 2 | g2pM 3 | inflect==5.3.0 4 | jieba==0.42.1 5 | numpy 6 | onnxruntime==1.10.0 7 | paddlepaddle 8 | pypinyin==0.44.0 9 | SoundFile==0.10.3.post1 10 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/sentences.txt: -------------------------------------------------------------------------------- 1 | 001 凯莫瑞安联合体的经济崩溃,迫在眉睫。 2 | 002 对于所有想要离开那片废土,去寻找更美好生活的人来说。 3 | 003 克哈,是你们所有人安全的港湾。 4 | 004 为了保护尤摩扬人民不受异虫的残害,我所做的,比他们自己的领导委员会都多。 5 | 005 无论他们如何诽谤我,我将继续为所有泰伦人的最大利益,而努力奋斗。 6 | 006 身为你们的元首,我带领泰伦人实现了人类统治领地和经济的扩张。 7 | 007 我们将继续成长,用行动回击那些只会说风凉话,不愿意和我们相向而行的害群之马。 8 | 008 帝国武装力量,无数的优秀儿女,正时刻守卫着我们的家园大门,但是他们孤木难支。 9 | 009 凡是今天应征入伍者,所获的所有刑罚罪责,减半。 10 | 010 激进分子和异见者希望你们一听见枪声,就背弃多年的和平与繁荣。 11 | 011 他们没有勇气和能力,带领人类穿越一个充满危险的星系。 12 | 012 法治是我们的命脉,然而它却受到前所未有的挑战。 13 | 013 我将恢复我们帝国的荣光,绝不会向任何外星势力低头。 14 | 014 我已经驯服了异虫,荡平了星灵。如今它们的创造者,想要夺走我们拥有的一切。 15 | 015 永远记住,谁才是最能保护你们的人。 16 | 016 不要听信别人的谗言,我不是什么克隆人。 -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/tts2.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import time 15 | from pathlib import Path 16 | 17 | import soundfile as sf 18 | 19 | from acoustic import SpeedySpeechAcoustic 20 | from frontend.zh_frontend import Frontend 21 | from utils import mkdir, read_txt 22 | from vocoder import PWGANVocoder 23 | 24 | print('初始化前处理部分') 25 | phones_dict = 'resources/speedyspeech_nosil_baker_ckpt_0.5/phone_id_map.txt' 26 | tones_dict = 'resources/speedyspeech_nosil_baker_ckpt_0.5/tone_id_map.txt' 27 | frontend = Frontend(phone_vocab_path=phones_dict, 28 | tone_vocab_path=tones_dict) 29 | print("frontend done!") 30 | 31 | print('初始化提取特征模型') 32 | speedyspeech_dir = Path('resources/models/speedyspeech_csmsc') 33 | pdmodel_path = str(speedyspeech_dir / 'speedyspeech_csmsc.pdmodel') 34 | pdiparam_path = str(speedyspeech_dir / 'speedyspeech_csmsc.pdiparams') 35 | 36 | am_predictor = SpeedySpeechAcoustic(pdmodel_path, pdiparam_path) 37 | print('am_predictor done!') 38 | 39 | print('初始化合成wav模型') 40 | pwgan_model_path = 'resources/models/pwgan_csmsc/pwgan_csmsc.onnx' 41 | voc_predictor = PWGANVocoder(pwgan_model_path) 42 | 43 | save_wav_dir = 'infer_result' 44 | mkdir(save_wav_dir) 45 | 46 | print('合成指定句子') 47 | sentences_path = 'sentences.txt' 48 | sentences = read_txt(sentences_path) 49 | 50 | for sentence_info in sentences: 51 | start = time.time() 52 | 53 | uuid, sentence = sentence_info.split(' ') 54 | 55 | input_ids = frontend.get_input_ids(sentence, 56 | merge_sentences=True, 57 | get_tone_ids=True) 58 | 59 | am_output_data = am_predictor(input_ids) 60 | 61 | wav = voc_predictor(am_output_data) 62 | 63 | elapse = time.time() - start 64 | 65 | save_wav_path = f'{save_wav_dir}/{uuid}.wav' 66 | sf.write(save_wav_path, wav, samplerate=24000) 67 | 68 | print(f'{save_wav_path} done!\tcost: {elapse}s') 69 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/utils.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | from pathlib import Path 4 | 5 | 6 | def mkdir(dir_path): 7 | Path(dir_path).mkdir(parents=True, exist_ok=True) 8 | 9 | 10 | def read_txt(txt_path: str) -> list: 11 | with open(txt_path, 'r', encoding='utf-8') as f: 12 | data = list(map(lambda x: x.rstrip('\n'), f)) 13 | return data 14 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/vocoder/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | from .pwgan_csmsc import PWGANVocoder 3 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts2/vocoder/pwgan_csmsc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # -*- encoding: utf-8 -*- 15 | import onnxruntime as ort 16 | 17 | 18 | class PWGANVocoder(object): 19 | def __init__(self, model_path): 20 | sess_opt = ort.SessionOptions() 21 | sess_opt.log_severity_level = 4 22 | sess_opt.enable_cpu_mem_arena = False 23 | self.sess = ort.InferenceSession(model_path, 24 | sess_options=sess_opt) 25 | self.input_name = self.sess.get_inputs()[0].name 26 | 27 | def __call__(self, am_output_data): 28 | wav = self.sess.run(None, {self.input_name: am_output_data})[0] 29 | return wav 30 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/README.md: -------------------------------------------------------------------------------- 1 | ### csmsc_tts3 2 | - **支持合成语言**: 中文和数字,不支持英文字母 3 | - 基于[PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech)下的[TTS3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3)整理而来 4 | - 整个推理引擎只采用`ONNXRuntime` 5 | - 其中PaddleSpeech中提供的预训练模型可以参见[link](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/text_to_speech/README_cn.md#4-%E9%A2%84%E8%AE%AD%E7%BB%83%E6%A8%A1%E5%9E%8B)。在csmsc_tts3中使用的是: 6 | 7 | |主要部分|具体模型|支持语言| 8 | |:---|:---|:---| 9 | |声学模型|[fastspeech2_csmsc](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/csmsc/tts3/README.md)|zh| 10 | |声码器|[hifigan_csmsc](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/csmsc/voc5/README.md)|zh| 11 | 12 | #### 结果示例 13 |
14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 28 | 29 | 30 |
输入文本合成音频
早上好,今天是2020/10/29,最低温度是-3°C。 25 | 26 |
27 |
31 | 32 |
33 | 34 | 35 | #### 运行步骤 36 | 1. 下载`resources`, [Google Drive](https://drive.google.com/file/d/1xYD9NrTraiDFkwtvg7SkKcETLFfa6mlR/view?usp=sharing) | [百度网盘,提取码:a2nw](https://pan.baidu.com/s/1DbqKTNuWZd0Y9UMVgRaRqQ), 解压到`csmsc_tts3`目录下,最终目录结构如下: 37 | ```text 38 | csmsc_tts3 39 | ├── csmsc_test.txt 40 | ├── requirements.txt 41 | ├── frontend 42 | ├── main.sh 43 | ├── tts3.py 44 | ├── infer_result 45 | ├── resources 46 | │ ├── fastspeech2_csmsc_onnx_0.2.0 47 | │ │ ├── fastspeech2_csmsc.onnx 48 | │ │ └── phone_id_map.txt 49 | │ └── hifigan_csmsc.onnx 50 | └──syn_utils.py 51 | ``` 52 | 53 | 2. 安装`requirements.txt` 54 | ```bash 55 | pip install -r requirements.txt -i https://pypi.douban.com/simple/ 56 | ``` 57 | 58 | 3. 运行`tts3.py` 59 | ```bash 60 | python tts3.py 61 | ``` 62 | or 63 | ```bash 64 | bash main.sh 65 | ``` 66 | 67 | 4. 运行日志如下: 68 | ```text 69 | frontend done! 70 | warm up done! 71 | Building prefix dict from the default dictionary ... 72 | Loading model from cache C:\Users\WANGJI~1\AppData\Local\Temp\jieba.cache 73 | Loading model cost 0.836 seconds. 74 | Prefix dict has been built successfully. 75 | 009901, mel: (331, 80), wave: 99300, time: 1.3718173s, Hz: 72385.938204132, RTF: 0.33155610876132857. 76 | 009902, mel: (288, 80), wave: 86400, time: 1.1350326000000024s, Hz: 76121.49025085453, RTF: 0.3152854722222228. 77 | 009903, mel: (341, 80), wave: 102300, time: 1.4687841000000006s, Hz: 69649.7502651354, RTF: 0.3445812785923755. 78 | generation speed: 72441.68237053939Hz, RTF: 0.33130097499999983 79 | ``` 80 | 生成结果会保存到`infer_result`目录下 81 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/assets/000001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts3/assets/000001.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/assets/audio_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts3/assets/audio_icon.png -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/csmsc_test.txt: -------------------------------------------------------------------------------- 1 | 000001 早上好,今天是2020/10/29,最低温度是-3°C。 2 | 009901 昨日,这名伤者与医生全部被警方依法刑事拘留。 3 | 009902 钱伟长想到上海来办学校是经过深思熟虑的。 4 | 009903 她见我一进门就骂,吃饭时也骂,骂得我抬不起头。 5 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .generate_lexicon import * 15 | from .normalizer import * 16 | from .phonectic import * 17 | from .punctuation import * 18 | from .tone_sandhi import * 19 | from .vocab import * 20 | from .zh_normalization import * 21 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/arpabet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from paddlespeech.t2s.frontend.phonectic import Phonetics 15 | """ 16 | A phonology system with ARPABET symbols and limited punctuations. The G2P 17 | conversion is done by g2p_en. 18 | 19 | Note that g2p_en does not handle words with hypen well. So make sure the input 20 | sentence is first normalized. 21 | """ 22 | from paddlespeech.t2s.frontend.vocab import Vocab 23 | from g2p_en import G2p 24 | 25 | 26 | class ARPABET(Phonetics): 27 | """A phonology for English that uses ARPABET as the phoneme vocabulary. 28 | See http://www.speech.cs.cmu.edu/cgi-bin/cmudict for more details. 29 | Phoneme Example Translation 30 | ------- ------- ----------- 31 | AA odd AA D 32 | AE at AE T 33 | AH hut HH AH T 34 | AO ought AO T 35 | AW cow K AW 36 | AY hide HH AY D 37 | B be B IY 38 | CH cheese CH IY Z 39 | D dee D IY 40 | DH thee DH IY 41 | EH Ed EH D 42 | ER hurt HH ER T 43 | EY ate EY T 44 | F fee F IY 45 | G green G R IY N 46 | HH he HH IY 47 | IH it IH T 48 | IY eat IY T 49 | JH gee JH IY 50 | K key K IY 51 | L lee L IY 52 | M me M IY 53 | N knee N IY 54 | NG ping P IH NG 55 | OW oat OW T 56 | OY toy T OY 57 | P pee P IY 58 | R read R IY D 59 | S sea S IY 60 | SH she SH IY 61 | T tea T IY 62 | TH theta TH EY T AH 63 | UH hood HH UH D 64 | UW two T UW 65 | V vee V IY 66 | W we W IY 67 | Y yield Y IY L D 68 | Z zee Z IY 69 | ZH seizure S IY ZH ER 70 | """ 71 | phonemes = [ 72 | 'AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'B', 'CH', 'D', 'DH', 'EH', 'ER', 73 | 'EY', 'F', 'G', 'HH', 'IH', 'IY', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW', 74 | 'OY', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UW', 'UH', 'V', 'W', 'Y', 'Z', 75 | 'ZH' 76 | ] 77 | punctuations = [',', '.', '?', '!'] 78 | symbols = phonemes + punctuations 79 | _stress_to_no_stress_ = { 80 | 'AA0': 'AA', 81 | 'AA1': 'AA', 82 | 'AA2': 'AA', 83 | 'AE0': 'AE', 84 | 'AE1': 'AE', 85 | 'AE2': 'AE', 86 | 'AH0': 'AH', 87 | 'AH1': 'AH', 88 | 'AH2': 'AH', 89 | 'AO0': 'AO', 90 | 'AO1': 'AO', 91 | 'AO2': 'AO', 92 | 'AW0': 'AW', 93 | 'AW1': 'AW', 94 | 'AW2': 'AW', 95 | 'AY0': 'AY', 96 | 'AY1': 'AY', 97 | 'AY2': 'AY', 98 | 'EH0': 'EH', 99 | 'EH1': 'EH', 100 | 'EH2': 'EH', 101 | 'ER0': 'ER', 102 | 'ER1': 'ER', 103 | 'ER2': 'ER', 104 | 'EY0': 'EY', 105 | 'EY1': 'EY', 106 | 'EY2': 'EY', 107 | 'IH0': 'IH', 108 | 'IH1': 'IH', 109 | 'IH2': 'IH', 110 | 'IY0': 'IY', 111 | 'IY1': 'IY', 112 | 'IY2': 'IY', 113 | 'OW0': 'OW', 114 | 'OW1': 'OW', 115 | 'OW2': 'OW', 116 | 'OY0': 'OY', 117 | 'OY1': 'OY', 118 | 'OY2': 'OY', 119 | 'UH0': 'UH', 120 | 'UH1': 'UH', 121 | 'UH2': 'UH', 122 | 'UW0': 'UW', 123 | 'UW1': 'UW', 124 | 'UW2': 'UW' 125 | } 126 | 127 | def __init__(self): 128 | self.backend = G2p() 129 | self.vocab = Vocab(self.phonemes + self.punctuations) 130 | 131 | def _remove_vowels(self, phone): 132 | return self._stress_to_no_stress_.get(phone, phone) 133 | 134 | def phoneticize(self, sentence, add_start_end=False): 135 | """ Normalize the input text sequence and convert it into pronunciation sequence. 136 | Args: 137 | sentence (str): The input text sequence. 138 | 139 | Returns: 140 | List[str]: The list of pronunciation sequence. 141 | """ 142 | phonemes = [ 143 | self._remove_vowels(item) for item in self.backend(sentence) 144 | ] 145 | if add_start_end: 146 | start = self.vocab.start_symbol 147 | end = self.vocab.end_symbol 148 | phonemes = [start] + phonemes + [end] 149 | phonemes = [item for item in phonemes if item in self.vocab.stoi] 150 | return phonemes 151 | 152 | def numericalize(self, phonemes): 153 | """ Convert pronunciation sequence into pronunciation id sequence. 154 | 155 | Args: 156 | phonemes (List[str]): The list of pronunciation sequence. 157 | 158 | Returns: 159 | List[int]: The list of pronunciation id sequence. 160 | """ 161 | ids = [self.vocab.lookup(item) for item in phonemes] 162 | return ids 163 | 164 | def reverse(self, ids): 165 | """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence. 166 | 167 | Args: 168 | ids( List[int]): The list of pronunciation id sequence. 169 | 170 | Returns: 171 | List[str]: 172 | The list of pronunciation sequence. 173 | """ 174 | return [self.vocab.reverse(i) for i in ids] 175 | 176 | def __call__(self, sentence, add_start_end=False): 177 | """ Convert the input text sequence into pronunciation id sequence. 178 | 179 | Args: 180 | sentence (str): The input text sequence. 181 | 182 | Returns: 183 | List[str]: The list of pronunciation id sequence. 184 | """ 185 | return self.numericalize( 186 | self.phoneticize(sentence, add_start_end=add_start_end)) 187 | 188 | @property 189 | def vocab_size(self): 190 | """ Vocab size. 191 | """ 192 | # 47 = 39 phones + 4 punctuations + 4 special tokens 193 | return len(self.vocab) 194 | 195 | 196 | class ARPABETWithStress(Phonetics): 197 | phonemes = [ 198 | 'AA0', 'AA1', 'AA2', 'AE0', 'AE1', 'AE2', 'AH0', 'AH1', 'AH2', 'AO0', 199 | 'AO1', 'AO2', 'AW0', 'AW1', 'AW2', 'AY0', 'AY1', 'AY2', 'B', 'CH', 'D', 200 | 'DH', 'EH0', 'EH1', 'EH2', 'ER0', 'ER1', 'ER2', 'EY0', 'EY1', 'EY2', 201 | 'F', 'G', 'HH', 'IH0', 'IH1', 'IH2', 'IY0', 'IY1', 'IY2', 'JH', 'K', 202 | 'L', 'M', 'N', 'NG', 'OW0', 'OW1', 'OW2', 'OY0', 'OY1', 'OY2', 'P', 'R', 203 | 'S', 'SH', 'T', 'TH', 'UH0', 'UH1', 'UH2', 'UW0', 'UW1', 'UW2', 'V', 204 | 'W', 'Y', 'Z', 'ZH' 205 | ] 206 | punctuations = [',', '.', '?', '!'] 207 | symbols = phonemes + punctuations 208 | 209 | def __init__(self): 210 | self.backend = G2p() 211 | self.vocab = Vocab(self.phonemes + self.punctuations) 212 | 213 | def phoneticize(self, sentence, add_start_end=False): 214 | """ Normalize the input text sequence and convert it into pronunciation sequence. 215 | 216 | Args: 217 | sentence (str): The input text sequence. 218 | 219 | Returns: 220 | List[str]: The list of pronunciation sequence. 221 | """ 222 | phonemes = self.backend(sentence) 223 | if add_start_end: 224 | start = self.vocab.start_symbol 225 | end = self.vocab.end_symbol 226 | phonemes = [start] + phonemes + [end] 227 | phonemes = [item for item in phonemes if item in self.vocab.stoi] 228 | return phonemes 229 | 230 | def numericalize(self, phonemes): 231 | """ Convert pronunciation sequence into pronunciation id sequence. 232 | 233 | Args: 234 | phonemes (List[str]): The list of pronunciation sequence. 235 | 236 | Returns: 237 | List[int]: The list of pronunciation id sequence. 238 | """ 239 | ids = [self.vocab.lookup(item) for item in phonemes] 240 | return ids 241 | 242 | def reverse(self, ids): 243 | """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence. 244 | Args: 245 | ids (List[int]): The list of pronunciation id sequence. 246 | 247 | Returns: 248 | List[str]: The list of pronunciation sequence. 249 | """ 250 | return [self.vocab.reverse(i) for i in ids] 251 | 252 | def __call__(self, sentence, add_start_end=False): 253 | """ Convert the input text sequence into pronunciation id sequence. 254 | Args: 255 | sentence (str): The input text sequence. 256 | 257 | Returns: 258 | List[str]: The list of pronunciation id sequence. 259 | """ 260 | return self.numericalize( 261 | self.phoneticize(sentence, add_start_end=add_start_end)) 262 | 263 | @property 264 | def vocab_size(self): 265 | """ Vocab size. 266 | """ 267 | # 77 = 69 phones + 4 punctuations + 4 special tokens 268 | return len(self.vocab) 269 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/generate_lexicon.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Design principles: https://zhuanlan.zhihu.com/p/349600439 15 | """Generate lexicon and symbols for Mandarin Chinese phonology. 16 | The lexicon is used for Montreal Force Aligner. 17 | Note that syllables are used as word in this lexicon. Since syllables rather 18 | than words are used in transcriptions produced by `reorganize_baker.py`. 19 | We make this choice to better leverage other software for chinese text to 20 | pinyin tools like pypinyin. This is the convention for G2P in Chinese. 21 | """ 22 | import re 23 | from collections import OrderedDict 24 | 25 | INITIALS = [ 26 | 'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h', 'zh', 'ch', 'sh', 27 | 'r', 'z', 'c', 's', 'j', 'q', 'x' 28 | ] 29 | 30 | FINALS = [ 31 | 'a', 'ai', 'ao', 'an', 'ang', 'e', 'er', 'ei', 'en', 'eng', 'o', 'ou', 32 | 'ong', 'ii', 'iii', 'i', 'ia', 'iao', 'ian', 'iang', 'ie', 'io', 'iou', 33 | 'iong', 'in', 'ing', 'u', 'ua', 'uai', 'uan', 'uang', 'uei', 'uo', 'uen', 34 | 'ueng', 'v', 've', 'van', 'vn' 35 | ] 36 | 37 | SPECIALS = ['sil', 'sp'] 38 | 39 | 40 | def rule(C, V, R, T): 41 | """Generate a syllable given the initial, the final, erhua indicator, and tone. 42 | Orthographical rules for pinyin are applied. (special case for y, w, ui, un, iu) 43 | 44 | Note that in this system, 'ü' is alway written as 'v' when appeared in phoneme, but converted to 45 | 'u' in syllables when certain conditions are satisfied. 46 | 47 | 'i' is distinguished when appeared in phonemes, and separated into 3 categories, 'i', 'ii' and 'iii'. 48 | Erhua is is possibly applied to every finals, except for finals that already ends with 'r'. 49 | When a syllable is impossible or does not have any characters with this pronunciation, return None 50 | to filter it out. 51 | """ 52 | 53 | # 不可拼的音节, ii 只能和 z, c, s 拼 54 | if V in ["ii"] and (C not in ['z', 'c', 's']): 55 | return None 56 | # iii 只能和 zh, ch, sh, r 拼 57 | if V in ['iii'] and (C not in ['zh', 'ch', 'sh', 'r']): 58 | return None 59 | 60 | # 齐齿呼或者撮口呼不能和 f, g, k, h, zh, ch, sh, r, z, c, s 61 | if (V not in ['ii', 'iii']) and V[0] in ['i', 'v'] and ( 62 | C in ['f', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's']): 63 | return None 64 | 65 | # 撮口呼只能和 j, q, x l, n 拼 66 | if V.startswith("v"): 67 | # v, ve 只能和 j ,q , x, n, l 拼 68 | if V in ['v', 've']: 69 | if C not in ['j', 'q', 'x', 'n', 'l', '']: 70 | return None 71 | # 其他只能和 j, q, x 拼 72 | else: 73 | if C not in ['j', 'q', 'x', '']: 74 | return None 75 | 76 | # j, q, x 只能和齐齿呼或者撮口呼拼 77 | if (C in ['j', 'q', 'x']) and not ( 78 | (V not in ['ii', 'iii']) and V[0] in ['i', 'v']): 79 | return None 80 | 81 | # b, p ,m, f 不能和合口呼拼,除了 u 之外 82 | # bm p, m, f 不能和撮口呼拼 83 | if (C in ['b', 'p', 'm', 'f']) and ((V[0] in ['u', 'v'] and V != "u") or 84 | V == 'ong'): 85 | return None 86 | 87 | # ua, uai, uang 不能和 d, t, n, l, r, z, c, s 拼 88 | if V in ['ua', 'uai', 89 | 'uang'] and C in ['d', 't', 'n', 'l', 'r', 'z', 'c', 's']: 90 | return None 91 | 92 | # sh 和 ong 不能拼 93 | if V == 'ong' and C in ['sh']: 94 | return None 95 | 96 | # o 和 gkh, zh ch sh r z c s 不能拼 97 | if V == "o" and C in [ 98 | 'd', 't', 'n', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's' 99 | ]: 100 | return None 101 | 102 | # ueng 只是 weng 这个 ad-hoc 其他情况下都是 ong 103 | if V == 'ueng' and C != '': 104 | return 105 | 106 | # 非儿化的 er 只能单独存在 107 | if V == 'er' and C != '': 108 | return None 109 | 110 | if C == '': 111 | if V in ["i", "in", "ing"]: 112 | C = 'y' 113 | elif V == 'u': 114 | C = 'w' 115 | elif V.startswith('i') and V not in ["ii", "iii"]: 116 | C = 'y' 117 | V = V[1:] 118 | elif V.startswith('u'): 119 | C = 'w' 120 | V = V[1:] 121 | elif V.startswith('v'): 122 | C = 'yu' 123 | V = V[1:] 124 | else: 125 | if C in ['j', 'q', 'x']: 126 | if V.startswith('v'): 127 | V = re.sub('v', 'u', V) 128 | if V == 'iou': 129 | V = 'iu' 130 | elif V == 'uei': 131 | V = 'ui' 132 | elif V == 'uen': 133 | V = 'un' 134 | result = C + V 135 | 136 | # Filter er 不能再儿化 137 | if result.endswith('r') and R == 'r': 138 | return None 139 | 140 | # ii and iii, change back to i 141 | result = re.sub(r'i+', 'i', result) 142 | 143 | result = result + R + T 144 | return result 145 | 146 | 147 | def generate_lexicon(with_tone=False, with_erhua=False): 148 | """Generate lexicon for Mandarin Chinese.""" 149 | syllables = OrderedDict() 150 | 151 | for C in [''] + INITIALS: 152 | for V in FINALS: 153 | for R in [''] if not with_erhua else ['', 'r']: 154 | for T in [''] if not with_tone else ['1', '2', '3', '4', '5']: 155 | result = rule(C, V, R, T) 156 | if result: 157 | syllables[result] = f'{C} {V}{R}{T}' 158 | return syllables 159 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/normalizer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .normalizer import * 15 | from .numbers import * 16 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/normalizer/abbrrviation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/normalizer/acronyms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/normalizer/normalizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | import unicodedata 16 | from builtins import str as unicode 17 | 18 | from .numbers import normalize_numbers 19 | 20 | 21 | def normalize(sentence): 22 | """ Normalize English text. 23 | """ 24 | # preprocessing 25 | sentence = unicode(sentence) 26 | sentence = normalize_numbers(sentence) 27 | sentence = ''.join( 28 | char for char in unicodedata.normalize('NFD', sentence) 29 | if unicodedata.category(char) != 'Mn') # Strip accents 30 | sentence = sentence.lower() 31 | sentence = re.sub(r"[^ a-z'.,?!\-]", "", sentence) 32 | sentence = sentence.replace("i.e.", "that is") 33 | sentence = sentence.replace("e.g.", "for example") 34 | return sentence 35 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/normalizer/numbers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # number expansion is not that easy 15 | import re 16 | 17 | import inflect 18 | 19 | _inflect = inflect.engine() 20 | _comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])') 21 | _decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)') 22 | _pounds_re = re.compile(r'£([0-9\,]*[0-9]+)') 23 | _dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)') 24 | _ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)') 25 | _number_re = re.compile(r'[0-9]+') 26 | 27 | 28 | def _remove_commas(m): 29 | return m.group(1).replace(',', '') 30 | 31 | 32 | def _expand_decimal_point(m): 33 | return m.group(1).replace('.', ' point ') 34 | 35 | 36 | def _expand_dollars(m): 37 | match = m.group(1) 38 | parts = match.split('.') 39 | if len(parts) > 2: 40 | return match + ' dollars' # Unexpected format 41 | dollars = int(parts[0]) if parts[0] else 0 42 | cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0 43 | if dollars and cents: 44 | dollar_unit = 'dollar' if dollars == 1 else 'dollars' 45 | cent_unit = 'cent' if cents == 1 else 'cents' 46 | return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit) 47 | elif dollars: 48 | dollar_unit = 'dollar' if dollars == 1 else 'dollars' 49 | return '%s %s' % (dollars, dollar_unit) 50 | elif cents: 51 | cent_unit = 'cent' if cents == 1 else 'cents' 52 | return '%s %s' % (cents, cent_unit) 53 | else: 54 | return 'zero dollars' 55 | 56 | 57 | def _expand_ordinal(m): 58 | return _inflect.number_to_words(m.group(0)) 59 | 60 | 61 | def _expand_number(m): 62 | num = int(m.group(0)) 63 | if num > 1000 and num < 3000: 64 | if num == 2000: 65 | return 'two thousand' 66 | elif num > 2000 and num < 2010: 67 | return 'two thousand ' + _inflect.number_to_words(num % 100) 68 | elif num % 100 == 0: 69 | return _inflect.number_to_words(num // 100) + ' hundred' 70 | else: 71 | return _inflect.number_to_words( 72 | num, andword='', zero='oh', group=2).replace(', ', ' ') 73 | else: 74 | return _inflect.number_to_words(num, andword='') 75 | 76 | 77 | def normalize_numbers(text): 78 | """ Normalize numbers in English text. 79 | """ 80 | text = re.sub(_comma_number_re, _remove_commas, text) 81 | text = re.sub(_pounds_re, r'\1 pounds', text) 82 | text = re.sub(_dollars_re, _expand_dollars, text) 83 | text = re.sub(_decimal_number_re, _expand_decimal_point, text) 84 | text = re.sub(_ordinal_re, _expand_ordinal, text) 85 | text = re.sub(_number_re, _expand_number, text) 86 | return text 87 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/normalizer/width.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def full2half_width(ustr): 17 | half = [] 18 | for u in ustr: 19 | num = ord(u) 20 | if num == 0x3000: # 全角空格变半角 21 | num = 32 22 | elif 0xFF01 <= num <= 0xFF5E: 23 | num -= 0xfee0 24 | u = chr(num) 25 | half.append(u) 26 | return ''.join(half) 27 | 28 | 29 | def half2full_width(ustr): 30 | full = [] 31 | for u in ustr: 32 | num = ord(u) 33 | if num == 32: # 半角空格变全角 34 | num = 0x3000 35 | elif 0x21 <= num <= 0x7E: 36 | num += 0xfee0 37 | u = chr(num) # to unicode 38 | full.append(u) 39 | 40 | return ''.join(full) 41 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/punctuation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __all__ = ["get_punctuations"] 16 | 17 | EN_PUNCT = [ 18 | " ", 19 | "-", 20 | "...", 21 | ",", 22 | ".", 23 | "?", 24 | "!", 25 | ] 26 | 27 | CN_PUNCT = ["、", ",", ";", ":", "。", "?", "!"] 28 | 29 | 30 | def get_punctuations(lang): 31 | if lang == "en": 32 | return EN_PUNCT 33 | elif lang == "cn": 34 | return CN_PUNCT 35 | else: 36 | raise ValueError(f"language {lang} Not supported") 37 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/vocab.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from collections import OrderedDict 15 | from typing import Iterable 16 | 17 | __all__ = ["Vocab"] 18 | 19 | 20 | class Vocab(object): 21 | """ Vocabulary. 22 | 23 | Args: 24 | symbols (Iterable[str]): Common symbols. 25 | padding_symbol (str, optional): Symbol for pad. Defaults to "". 26 | unk_symbol (str, optional): Symbol for unknow. Defaults to "" 27 | start_symbol (str, optional): Symbol for start. Defaults to "" 28 | end_symbol (str, optional): Symbol for end. Defaults to "" 29 | """ 30 | 31 | def __init__(self, 32 | symbols: Iterable[str], 33 | padding_symbol="", 34 | unk_symbol="", 35 | start_symbol="", 36 | end_symbol=""): 37 | self.special_symbols = OrderedDict() 38 | for i, item in enumerate( 39 | [padding_symbol, unk_symbol, start_symbol, end_symbol]): 40 | if item: 41 | self.special_symbols[item] = len(self.special_symbols) 42 | 43 | self.padding_symbol = padding_symbol 44 | self.unk_symbol = unk_symbol 45 | self.start_symbol = start_symbol 46 | self.end_symbol = end_symbol 47 | 48 | self.stoi = OrderedDict() 49 | self.stoi.update(self.special_symbols) 50 | 51 | for i, s in enumerate(symbols): 52 | if s not in self.stoi: 53 | self.stoi[s] = len(self.stoi) 54 | self.itos = {v: k for k, v in self.stoi.items()} 55 | 56 | def __len__(self): 57 | return len(self.stoi) 58 | 59 | @property 60 | def num_specials(self): 61 | """ The number of special symbols. 62 | """ 63 | return len(self.special_symbols) 64 | 65 | # special tokens 66 | @property 67 | def padding_index(self): 68 | """ The index of padding symbol 69 | """ 70 | return self.stoi.get(self.padding_symbol, -1) 71 | 72 | @property 73 | def unk_index(self): 74 | """The index of unknow symbol. 75 | """ 76 | return self.stoi.get(self.unk_symbol, -1) 77 | 78 | @property 79 | def start_index(self): 80 | """The index of start symbol. 81 | """ 82 | return self.stoi.get(self.start_symbol, -1) 83 | 84 | @property 85 | def end_index(self): 86 | """ The index of end symbol. 87 | """ 88 | return self.stoi.get(self.end_symbol, -1) 89 | 90 | def __repr__(self): 91 | fmt = "Vocab(size: {},\nstoi:\n{})" 92 | return fmt.format(len(self), self.stoi) 93 | 94 | def __str__(self): 95 | return self.__repr__() 96 | 97 | def lookup(self, symbol): 98 | """ The index that symbol correspond. 99 | """ 100 | return self.stoi[symbol] 101 | 102 | def reverse(self, index): 103 | """ The symbol thar index cottespond. 104 | """ 105 | return self.itos[index] 106 | 107 | def add_symbol(self, symbol): 108 | """ Add a new symbol in vocab. 109 | """ 110 | if symbol in self.stoi: 111 | return 112 | N = len(self.stoi) 113 | self.stoi[symbol] = N 114 | self.itos[N] = symbol 115 | 116 | def add_symbols(self, symbols): 117 | """ Add multiple symbols in vocab. 118 | """ 119 | for symbol in symbols: 120 | self.add_symbol(symbol) 121 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/zh_normalization/README.md: -------------------------------------------------------------------------------- 1 | ## Supported NSW (Non-Standard-Word) Normalization 2 | 3 | |NSW type|raw|normalized| 4 | |:--|:-|:-| 5 | |serial number|电影中梁朝伟扮演的陈永仁的编号27149|电影中梁朝伟扮演的陈永仁的编号二七一四九| 6 | |cardinal|这块黄金重达324.75克
我们班的最高总分为583分|这块黄金重达三百二十四点七五克
我们班的最高总分为五百八十三分| 7 | |numeric range |12\~23
-1.5\~2|十二到二十三
负一点五到二| 8 | |date|她出生于86年8月18日,她弟弟出生于1995年3月1日|她出生于八六年八月十八日, 她弟弟出生于一九九五年三月一日| 9 | |time|等会请在12:05请通知我|等会请在十二点零五分请通知我 10 | |temperature|今天的最低气温达到-10°C|今天的最低气温达到零下十度 11 | |fraction|现场有7/12的观众投出了赞成票|现场有十二分之七的观众投出了赞成票| 12 | |percentage|明天有62%的概率降雨|明天有百分之六十二的概率降雨| 13 | |money|随便来几个价格12块5,34.5元,20.1万|随便来几个价格十二块五,三十四点五元,二十点一万| 14 | |telephone|这是固话0421-33441122
这是手机+86 18544139121|这是固话零四二一三三四四一一二二
这是手机八六一八五四四一三九一二一| 15 | ## References 16 | [Pull requests #658 of DeepSpeech](https://github.com/PaddlePaddle/DeepSpeech/pull/658/files) 17 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/zh_normalization/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .text_normlization import * 15 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/zh_normalization/chronology.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | 16 | from .num import DIGITS 17 | from .num import num2str 18 | from .num import verbalize_cardinal 19 | from .num import verbalize_digit 20 | 21 | 22 | def _time_num2str(num_string: str) -> str: 23 | """A special case for verbalizing number in time.""" 24 | result = num2str(num_string.lstrip('0')) 25 | if num_string.startswith('0'): 26 | result = DIGITS['0'] + result 27 | return result 28 | 29 | 30 | # 时刻表达式 31 | RE_TIME = re.compile(r'([0-1]?[0-9]|2[0-3])' 32 | r':([0-5][0-9])' 33 | r'(:([0-5][0-9]))?') 34 | 35 | # 时间范围,如8:30-12:30 36 | RE_TIME_RANGE = re.compile(r'([0-1]?[0-9]|2[0-3])' 37 | r':([0-5][0-9])' 38 | r'(:([0-5][0-9]))?' 39 | r'(~|-)' 40 | r'([0-1]?[0-9]|2[0-3])' 41 | r':([0-5][0-9])' 42 | r'(:([0-5][0-9]))?') 43 | 44 | 45 | def replace_time(match) -> str: 46 | """ 47 | Args: 48 | match (re.Match) 49 | Returns: 50 | str 51 | """ 52 | 53 | is_range = len(match.groups()) > 5 54 | 55 | hour = match.group(1) 56 | minute = match.group(2) 57 | second = match.group(4) 58 | 59 | if is_range: 60 | hour_2 = match.group(6) 61 | minute_2 = match.group(7) 62 | second_2 = match.group(9) 63 | 64 | result = f"{num2str(hour)}点" 65 | if minute.lstrip('0'): 66 | if int(minute) == 30: 67 | result += "半" 68 | else: 69 | result += f"{_time_num2str(minute)}分" 70 | if second and second.lstrip('0'): 71 | result += f"{_time_num2str(second)}秒" 72 | 73 | if is_range: 74 | result += "至" 75 | result += f"{num2str(hour_2)}点" 76 | if minute_2.lstrip('0'): 77 | if int(minute) == 30: 78 | result += "半" 79 | else: 80 | result += f"{_time_num2str(minute_2)}分" 81 | if second_2 and second_2.lstrip('0'): 82 | result += f"{_time_num2str(second_2)}秒" 83 | 84 | return result 85 | 86 | 87 | RE_DATE = re.compile(r'(\d{4}|\d{2})年' 88 | r'((0?[1-9]|1[0-2])月)?' 89 | r'(((0?[1-9])|((1|2)[0-9])|30|31)([日号]))?') 90 | 91 | 92 | def replace_date(match) -> str: 93 | """ 94 | Args: 95 | match (re.Match) 96 | Returns: 97 | str 98 | """ 99 | year = match.group(1) 100 | month = match.group(3) 101 | day = match.group(5) 102 | result = "" 103 | if year: 104 | result += f"{verbalize_digit(year)}年" 105 | if month: 106 | result += f"{verbalize_cardinal(month)}月" 107 | if day: 108 | result += f"{verbalize_cardinal(day)}{match.group(9)}" 109 | return result 110 | 111 | 112 | # 用 / 或者 - 分隔的 YY/MM/DD 或者 YY-MM-DD 日期 113 | RE_DATE2 = re.compile( 114 | r'(\d{4})([- /.])(0[1-9]|1[012])\2(0[1-9]|[12][0-9]|3[01])') 115 | 116 | 117 | def replace_date2(match) -> str: 118 | """ 119 | Args: 120 | match (re.Match) 121 | Returns: 122 | str 123 | """ 124 | year = match.group(1) 125 | month = match.group(3) 126 | day = match.group(4) 127 | result = "" 128 | if year: 129 | result += f"{verbalize_digit(year)}年" 130 | if month: 131 | result += f"{verbalize_cardinal(month)}月" 132 | if day: 133 | result += f"{verbalize_cardinal(day)}日" 134 | return result 135 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/zh_normalization/constants.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | import string 16 | 17 | from pypinyin.constants import SUPPORT_UCS4 18 | 19 | # 全角半角转换 20 | # 英文字符全角 -> 半角映射表 (num: 52) 21 | F2H_ASCII_LETTERS = { 22 | chr(ord(char) + 65248): char 23 | for char in string.ascii_letters 24 | } 25 | 26 | # 英文字符半角 -> 全角映射表 27 | H2F_ASCII_LETTERS = {value: key for key, value in F2H_ASCII_LETTERS.items()} 28 | 29 | # 数字字符全角 -> 半角映射表 (num: 10) 30 | F2H_DIGITS = {chr(ord(char) + 65248): char for char in string.digits} 31 | # 数字字符半角 -> 全角映射表 32 | H2F_DIGITS = {value: key for key, value in F2H_DIGITS.items()} 33 | 34 | # 标点符号全角 -> 半角映射表 (num: 32) 35 | F2H_PUNCTUATIONS = {chr(ord(char) + 65248): char for char in string.punctuation} 36 | # 标点符号半角 -> 全角映射表 37 | H2F_PUNCTUATIONS = {value: key for key, value in F2H_PUNCTUATIONS.items()} 38 | 39 | # 空格 (num: 1) 40 | F2H_SPACE = {'\u3000': ' '} 41 | H2F_SPACE = {' ': '\u3000'} 42 | 43 | # 非"有拼音的汉字"的字符串,可用于NSW提取 44 | if SUPPORT_UCS4: 45 | RE_NSW = re.compile(r'(?:[^' 46 | r'\u3007' # 〇 47 | r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF] 48 | r'\u4e00-\u9fff' # CJK基本:[4E00-9FFF] 49 | r'\uf900-\ufaff' # CJK兼容:[F900-FAFF] 50 | r'\U00020000-\U0002A6DF' # CJK扩展B:[20000-2A6DF] 51 | r'\U0002A703-\U0002B73F' # CJK扩展C:[2A700-2B73F] 52 | r'\U0002B740-\U0002B81D' # CJK扩展D:[2B740-2B81D] 53 | r'\U0002F80A-\U0002FA1F' # CJK兼容扩展:[2F800-2FA1F] 54 | r'])+') 55 | else: 56 | RE_NSW = re.compile( # pragma: no cover 57 | r'(?:[^' 58 | r'\u3007' # 〇 59 | r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF] 60 | r'\u4e00-\u9fff' # CJK基本:[4E00-9FFF] 61 | r'\uf900-\ufaff' # CJK兼容:[F900-FAFF] 62 | r'])+') 63 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/zh_normalization/num.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Rules to verbalize numbers into Chinese characters. 16 | https://zh.wikipedia.org/wiki/中文数字#現代中文 17 | """ 18 | import re 19 | from collections import OrderedDict 20 | from typing import List 21 | 22 | DIGITS = {str(i): tran for i, tran in enumerate('零一二三四五六七八九')} 23 | UNITS = OrderedDict({ 24 | 1: '十', 25 | 2: '百', 26 | 3: '千', 27 | 4: '万', 28 | 8: '亿', 29 | }) 30 | 31 | COM_QUANTIFIERS = '(所|朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|小时|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|)块|角|毛|分)' 32 | 33 | # 分数表达式 34 | RE_FRAC = re.compile(r'(-?)(\d+)/(\d+)') 35 | 36 | 37 | def replace_frac(match) -> str: 38 | """ 39 | Args: 40 | match (re.Match) 41 | Returns: 42 | str 43 | """ 44 | sign = match.group(1) 45 | nominator = match.group(2) 46 | denominator = match.group(3) 47 | sign: str = "负" if sign else "" 48 | nominator: str = num2str(nominator) 49 | denominator: str = num2str(denominator) 50 | result = f"{sign}{denominator}分之{nominator}" 51 | return result 52 | 53 | 54 | # 百分数表达式 55 | RE_PERCENTAGE = re.compile(r'(-?)(\d+(\.\d+)?)%') 56 | 57 | 58 | def replace_percentage(match) -> str: 59 | """ 60 | Args: 61 | match (re.Match) 62 | Returns: 63 | str 64 | """ 65 | sign = match.group(1) 66 | percent = match.group(2) 67 | sign: str = "负" if sign else "" 68 | percent: str = num2str(percent) 69 | result = f"{sign}百分之{percent}" 70 | return result 71 | 72 | 73 | # 整数表达式 74 | # 带负号的整数 -10 75 | RE_INTEGER = re.compile(r'(-)' r'(\d+)') 76 | 77 | 78 | def replace_negative_num(match) -> str: 79 | """ 80 | Args: 81 | match (re.Match) 82 | Returns: 83 | str 84 | """ 85 | sign = match.group(1) 86 | number = match.group(2) 87 | sign: str = "负" if sign else "" 88 | number: str = num2str(number) 89 | result = f"{sign}{number}" 90 | return result 91 | 92 | 93 | # 编号-无符号整形 94 | # 00078 95 | RE_DEFAULT_NUM = re.compile(r'\d{3}\d*') 96 | 97 | 98 | def replace_default_num(match): 99 | """ 100 | Args: 101 | match (re.Match) 102 | Returns: 103 | str 104 | """ 105 | number = match.group(0) 106 | return verbalize_digit(number) 107 | 108 | 109 | # 数字表达式 110 | # 纯小数 111 | RE_DECIMAL_NUM = re.compile(r'(-?)((\d+)(\.\d+))' r'|(\.(\d+))') 112 | # 正整数 + 量词 113 | RE_POSITIVE_QUANTIFIERS = re.compile(r"(\d+)([多余几\+])?" + COM_QUANTIFIERS) 114 | RE_NUMBER = re.compile(r'(-?)((\d+)(\.\d+)?)' r'|(\.(\d+))') 115 | 116 | 117 | def replace_positive_quantifier(match) -> str: 118 | """ 119 | Args: 120 | match (re.Match) 121 | Returns: 122 | str 123 | """ 124 | number = match.group(1) 125 | match_2 = match.group(2) 126 | if match_2 == "+": 127 | match_2 = "多" 128 | match_2: str = match_2 if match_2 else "" 129 | quantifiers: str = match.group(3) 130 | number: str = num2str(number) 131 | result = f"{number}{match_2}{quantifiers}" 132 | return result 133 | 134 | 135 | def replace_number(match) -> str: 136 | """ 137 | Args: 138 | match (re.Match) 139 | Returns: 140 | str 141 | """ 142 | sign = match.group(1) 143 | number = match.group(2) 144 | pure_decimal = match.group(5) 145 | if pure_decimal: 146 | result = num2str(pure_decimal) 147 | else: 148 | sign: str = "负" if sign else "" 149 | number: str = num2str(number) 150 | result = f"{sign}{number}" 151 | return result 152 | 153 | 154 | # 范围表达式 155 | # match.group(1) and match.group(8) are copy from RE_NUMBER 156 | 157 | RE_RANGE = re.compile( 158 | r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))[-~]((-?)((\d+)(\.\d+)?)|(\.(\d+)))') 159 | 160 | 161 | def replace_range(match) -> str: 162 | """ 163 | Args: 164 | match (re.Match) 165 | Returns: 166 | str 167 | """ 168 | first, second = match.group(1), match.group(8) 169 | first = RE_NUMBER.sub(replace_number, first) 170 | second = RE_NUMBER.sub(replace_number, second) 171 | result = f"{first}到{second}" 172 | return result 173 | 174 | 175 | def _get_value(value_string: str, use_zero: bool=True) -> List[str]: 176 | stripped = value_string.lstrip('0') 177 | if len(stripped) == 0: 178 | return [] 179 | elif len(stripped) == 1: 180 | if use_zero and len(stripped) < len(value_string): 181 | return [DIGITS['0'], DIGITS[stripped]] 182 | else: 183 | return [DIGITS[stripped]] 184 | else: 185 | largest_unit = next( 186 | power for power in reversed(UNITS.keys()) if power < len(stripped)) 187 | first_part = value_string[:-largest_unit] 188 | second_part = value_string[-largest_unit:] 189 | return _get_value(first_part) + [UNITS[largest_unit]] + _get_value( 190 | second_part) 191 | 192 | 193 | def verbalize_cardinal(value_string: str) -> str: 194 | if not value_string: 195 | return '' 196 | 197 | # 000 -> '零' , 0 -> '零' 198 | value_string = value_string.lstrip('0') 199 | if len(value_string) == 0: 200 | return DIGITS['0'] 201 | 202 | result_symbols = _get_value(value_string) 203 | # verbalized number starting with '一十*' is abbreviated as `十*` 204 | if len(result_symbols) >= 2 and result_symbols[0] == DIGITS[ 205 | '1'] and result_symbols[1] == UNITS[1]: 206 | result_symbols = result_symbols[1:] 207 | return ''.join(result_symbols) 208 | 209 | 210 | def verbalize_digit(value_string: str, alt_one=False) -> str: 211 | result_symbols = [DIGITS[digit] for digit in value_string] 212 | result = ''.join(result_symbols) 213 | if alt_one: 214 | result = result.replace("一", "幺") 215 | return result 216 | 217 | 218 | def num2str(value_string: str) -> str: 219 | integer_decimal = value_string.split('.') 220 | if len(integer_decimal) == 1: 221 | integer = integer_decimal[0] 222 | decimal = '' 223 | elif len(integer_decimal) == 2: 224 | integer, decimal = integer_decimal 225 | else: 226 | raise ValueError( 227 | f"The value string: '${value_string}' has more than one point in it." 228 | ) 229 | 230 | result = verbalize_cardinal(integer) 231 | 232 | decimal = decimal.rstrip('0') 233 | if decimal: 234 | # '.22' is verbalized as '零点二二' 235 | # '3.20' is verbalized as '三点二 236 | result = result if result else "零" 237 | result += '点' + verbalize_digit(decimal) 238 | return result 239 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/zh_normalization/phonecode.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | 16 | from .num import verbalize_digit 17 | 18 | # 规范化固话/手机号码 19 | # 手机 20 | # http://www.jihaoba.com/news/show/13680 21 | # 移动:139、138、137、136、135、134、159、158、157、150、151、152、188、187、182、183、184、178、198 22 | # 联通:130、131、132、156、155、186、185、176 23 | # 电信:133、153、189、180、181、177 24 | RE_MOBILE_PHONE = re.compile( 25 | r"(? str: 34 | if mobile: 35 | sp_parts = phone_string.strip('+').split() 36 | result = ','.join( 37 | [verbalize_digit(part, alt_one=True) for part in sp_parts]) 38 | return result 39 | else: 40 | sil_parts = phone_string.split('-') 41 | result = ','.join( 42 | [verbalize_digit(part, alt_one=True) for part in sil_parts]) 43 | return result 44 | 45 | 46 | def replace_phone(match) -> str: 47 | """ 48 | Args: 49 | match (re.Match) 50 | Returns: 51 | str 52 | """ 53 | return phone2str(match.group(0), mobile=False) 54 | 55 | 56 | def replace_mobile(match) -> str: 57 | """ 58 | Args: 59 | match (re.Match) 60 | Returns: 61 | str 62 | """ 63 | return phone2str(match.group(0)) 64 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/zh_normalization/quantifier.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | 16 | from .num import num2str 17 | 18 | # 温度表达式,温度会影响负号的读法 19 | # -3°C 零下三度 20 | RE_TEMPERATURE = re.compile(r'(-?)(\d+(\.\d+)?)(°C|℃|度|摄氏度)') 21 | 22 | 23 | def replace_temperature(match) -> str: 24 | """ 25 | Args: 26 | match (re.Match) 27 | Returns: 28 | str 29 | """ 30 | sign = match.group(1) 31 | temperature = match.group(2) 32 | unit = match.group(3) 33 | sign: str = "零下" if sign else "" 34 | temperature: str = num2str(temperature) 35 | unit: str = "摄氏度" if unit == "摄氏度" else "度" 36 | result = f"{sign}{temperature}{unit}" 37 | return result 38 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/frontend/zh_normalization/text_normlization.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | from typing import List 16 | 17 | from .char_convert import tranditional_to_simplified 18 | from .chronology import RE_DATE 19 | from .chronology import RE_DATE2 20 | from .chronology import RE_TIME 21 | from .chronology import RE_TIME_RANGE 22 | from .chronology import replace_date 23 | from .chronology import replace_date2 24 | from .chronology import replace_time 25 | from .constants import F2H_ASCII_LETTERS 26 | from .constants import F2H_DIGITS 27 | from .constants import F2H_SPACE 28 | from .num import RE_DECIMAL_NUM 29 | from .num import RE_DEFAULT_NUM 30 | from .num import RE_FRAC 31 | from .num import RE_INTEGER 32 | from .num import RE_NUMBER 33 | from .num import RE_PERCENTAGE 34 | from .num import RE_POSITIVE_QUANTIFIERS 35 | from .num import RE_RANGE 36 | from .num import replace_default_num 37 | from .num import replace_frac 38 | from .num import replace_negative_num 39 | from .num import replace_number 40 | from .num import replace_percentage 41 | from .num import replace_positive_quantifier 42 | from .num import replace_range 43 | from .phonecode import RE_MOBILE_PHONE 44 | from .phonecode import RE_NATIONAL_UNIFORM_NUMBER 45 | from .phonecode import RE_TELEPHONE 46 | from .phonecode import replace_mobile 47 | from .phonecode import replace_phone 48 | from .quantifier import RE_TEMPERATURE 49 | from .quantifier import replace_temperature 50 | 51 | 52 | class TextNormalizer(): 53 | def __init__(self): 54 | self.SENTENCE_SPLITOR = re.compile(r'([:、,;。?!,;?!][”’]?)') 55 | 56 | def _split(self, text: str, lang="zh") -> List[str]: 57 | """Split long text into sentences with sentence-splitting punctuations. 58 | Args: 59 | text (str): The input text. 60 | Returns: 61 | List[str]: Sentences. 62 | """ 63 | # Only for pure Chinese here 64 | if lang == "zh": 65 | text = text.replace(" ", "") 66 | # 过滤掉特殊字符 67 | text = re.sub(r'[《》【】<=>{}()()#&@“”^_|…\\]', '', text) 68 | text = self.SENTENCE_SPLITOR.sub(r'\1\n', text) 69 | text = text.strip() 70 | sentences = [sentence.strip() for sentence in re.split(r'\n+', text)] 71 | return sentences 72 | 73 | def _post_replace(self, sentence: str) -> str: 74 | sentence = sentence.replace('/', '每') 75 | sentence = sentence.replace('~', '至') 76 | 77 | return sentence 78 | 79 | def normalize_sentence(self, sentence: str) -> str: 80 | # basic character conversions 81 | sentence = tranditional_to_simplified(sentence) 82 | sentence = sentence.translate(F2H_ASCII_LETTERS).translate( 83 | F2H_DIGITS).translate(F2H_SPACE) 84 | 85 | # number related NSW verbalization 86 | sentence = RE_DATE.sub(replace_date, sentence) 87 | sentence = RE_DATE2.sub(replace_date2, sentence) 88 | 89 | # range first 90 | sentence = RE_TIME_RANGE.sub(replace_time, sentence) 91 | sentence = RE_TIME.sub(replace_time, sentence) 92 | 93 | sentence = RE_TEMPERATURE.sub(replace_temperature, sentence) 94 | sentence = RE_FRAC.sub(replace_frac, sentence) 95 | sentence = RE_PERCENTAGE.sub(replace_percentage, sentence) 96 | sentence = RE_MOBILE_PHONE.sub(replace_mobile, sentence) 97 | 98 | sentence = RE_TELEPHONE.sub(replace_phone, sentence) 99 | sentence = RE_NATIONAL_UNIFORM_NUMBER.sub(replace_phone, sentence) 100 | 101 | sentence = RE_RANGE.sub(replace_range, sentence) 102 | sentence = RE_INTEGER.sub(replace_negative_num, sentence) 103 | sentence = RE_DECIMAL_NUM.sub(replace_number, sentence) 104 | sentence = RE_POSITIVE_QUANTIFIERS.sub(replace_positive_quantifier, 105 | sentence) 106 | sentence = RE_DEFAULT_NUM.sub(replace_default_num, sentence) 107 | sentence = RE_NUMBER.sub(replace_number, sentence) 108 | sentence = self._post_replace(sentence) 109 | 110 | return sentence 111 | 112 | def normalize(self, text: str) -> List[str]: 113 | sentences = self._split(text) 114 | 115 | sentences = [self.normalize_sentence(sent) for sent in sentences] 116 | return sentences 117 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/main.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | inference_dir="$PWD/resources" 4 | 5 | am="fastspeech2_csmsc" 6 | am_onnx="fastspeech2_csmsc_onnx_0.2.0/fastspeech2_csmsc.onnx" 7 | 8 | voc="hifigan_csmsc" 9 | voc_onnx="hifigan_csmsc.onnx" 10 | 11 | output_dir="result" 12 | text="$PWD/csmsc_test.txt" 13 | phones_dict="$PWD/resources/fastspeech2_csmsc_onnx_0.2.0/phone_id_map.txt" 14 | 15 | python tts3.py \ 16 | --inference_dir=${inference_dir} \ 17 | --am=${am} \ 18 | --am_onnx=${am_onnx} \ 19 | --voc=${voc} \ 20 | --voc_onnx=${voc_onnx} \ 21 | --output_dir=${output_dir} \ 22 | --text=${text} \ 23 | --phones_dict=${phones_dict} \ 24 | --device=cpu \ 25 | --cpu_threads=2 26 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/requirements.txt: -------------------------------------------------------------------------------- 1 | g2p_en==2.1.0 2 | inflect==5.3.0 3 | jieba==0.42.1 4 | numpy>=1.19.3 5 | onnxruntime>=1.10.0 6 | pypinyin==0.44.0 7 | pypinyin_dict==0.2.0 8 | SoundFile==0.10.3.post1 9 | timer==0.2.2 10 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/syn_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from frontend import English 15 | from frontend.zh_frontend import Frontend 16 | 17 | 18 | # input 19 | def get_sentences(args): 20 | # construct dataset for evaluation 21 | sentences = [] 22 | with open(args.text, 'rt', encoding='utf-8') as f: 23 | for line in f: 24 | items = line.strip().split() 25 | utt_id = items[0] 26 | if 'lang' in args and args.lang == 'zh': 27 | sentence = "".join(items[1:]) 28 | elif 'lang' in args and args.lang == 'en': 29 | sentence = " ".join(items[1:]) 30 | sentences.append((utt_id, sentence)) 31 | return sentences 32 | 33 | 34 | # frontend 35 | def get_frontend(args): 36 | if 'lang' in args and args.lang == 'zh': 37 | frontend = Frontend(phone_vocab_path=args.phones_dict, 38 | tone_vocab_path=args.tones_dict) 39 | elif 'lang' in args and args.lang == 'en': 40 | frontend = English(phone_vocab_path=args.phones_dict) 41 | else: 42 | print("wrong lang!") 43 | print("frontend done!") 44 | return frontend 45 | -------------------------------------------------------------------------------- /python/PaddleSpeech/csmsc_tts3/tts3.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import argparse 15 | from pathlib import Path 16 | 17 | import numpy as np 18 | import onnxruntime as ort 19 | import soundfile as sf 20 | from timer import timer 21 | 22 | from syn_utils import get_frontend, get_sentences 23 | 24 | root_dir = str(Path.cwd()) 25 | 26 | 27 | def str2bool(str): 28 | return True if str.lower() == 'true' else False 29 | 30 | 31 | def get_sess(args, filed='am'): 32 | full_name = '' 33 | if filed == 'am': 34 | full_name = args.am_onnx 35 | elif filed == 'voc': 36 | full_name = args.voc_onnx 37 | 38 | model_dir = str(Path(args.inference_dir) / full_name) 39 | 40 | sess_options = ort.SessionOptions() 41 | sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL 42 | sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL 43 | 44 | if args.device == "gpu": 45 | # fastspeech2/mb_melgan can't use trt now! 46 | if args.use_trt: 47 | providers = ['TensorrtExecutionProvider'] 48 | else: 49 | providers = ['CUDAExecutionProvider'] 50 | elif args.device == "cpu": 51 | providers = ['CPUExecutionProvider'] 52 | 53 | sess_options.intra_op_num_threads = args.cpu_threads 54 | sess = ort.InferenceSession(model_dir, 55 | providers=providers, 56 | sess_options=sess_options) 57 | return sess 58 | 59 | 60 | def ort_predict(args): 61 | 62 | # frontend 63 | frontend = get_frontend(args) 64 | 65 | output_dir = Path(args.output_dir) 66 | output_dir.mkdir(parents=True, exist_ok=True) 67 | sentences = get_sentences(args) 68 | 69 | am_name = args.am[:args.am.rindex('_')] 70 | am_dataset = args.am[args.am.rindex('_') + 1:] 71 | fs = 24000 if am_dataset != 'ljspeech' else 22050 72 | 73 | # am 74 | am_sess = get_sess(args, filed='am') 75 | 76 | # vocoder 77 | voc_sess = get_sess(args, filed='voc') 78 | 79 | # am warmup 80 | for T in [27, 38, 54]: 81 | data = np.random.randint(1, 266, size=(T, )).astype("int64") 82 | am_sess.run(None, {"text": data}) 83 | 84 | # voc warmup 85 | for T in [227, 308, 544]: 86 | data = np.random.rand(T, 80).astype("float32") 87 | voc_sess.run(None, {"logmel": data}) 88 | print("warm up done!") 89 | 90 | # frontend warmup 91 | # Loading model cost 0.5+ seconds 92 | if args.lang == 'zh': 93 | frontend.get_input_ids("你好,欢迎使用飞桨框架进行深度学习研究!", 94 | merge_sentences=True) 95 | else: 96 | print("lang should in be 'zh' here!") 97 | 98 | N = 0 99 | T = 0 100 | merge_sentences = True 101 | for utt_id, sentence in sentences: 102 | with timer() as t: 103 | if args.lang == 'zh': 104 | input_ids = frontend.get_input_ids( 105 | sentence, merge_sentences=merge_sentences) 106 | 107 | phone_ids = input_ids["phone_ids"] 108 | else: 109 | print("lang should in be 'zh' here!") 110 | # merge_sentences=True here, so we only use the first item of phone_ids 111 | phone_ids = phone_ids[0] 112 | mel = am_sess.run(output_names=None, input_feed={ 113 | 'text': phone_ids}) 114 | mel = mel[0] 115 | wav = voc_sess.run(output_names=None, input_feed={'logmel': mel}) 116 | 117 | N += len(wav[0]) 118 | T += t.elapse 119 | speed = len(wav[0]) / t.elapse 120 | rtf = fs / speed 121 | sf.write( 122 | str(output_dir / (utt_id + ".wav")), 123 | np.array(wav)[0], 124 | samplerate=fs) 125 | print( 126 | f"{utt_id}, mel: {mel.shape}, wave: {len(wav[0])}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}." 127 | ) 128 | print(f"generation speed: {N / T}Hz, RTF: {fs / (N / T) }") 129 | 130 | 131 | def parse_args(): 132 | parser = argparse.ArgumentParser(description="Infernce with onnxruntime.") 133 | 134 | # acoustic model 135 | parser.add_argument('--am', type=str, 136 | default='fastspeech2_csmsc', 137 | help='Choose acoustic model type of tts task.') 138 | parser.add_argument('--am_onnx', type=str, 139 | default='fastspeech2_csmsc_onnx_0.2.0/fastspeech2_csmsc.onnx') 140 | 141 | parser.add_argument("--phones_dict", type=str, 142 | default='resources/fastspeech2_csmsc_onnx_0.2.0/phone_id_map.txt', 143 | help="phone vocabulary file.") 144 | 145 | parser.add_argument("--tones_dict", type=str, 146 | default=None, 147 | help="tone vocabulary file.") 148 | 149 | # voc 150 | parser.add_argument('--voc', type=str, 151 | default='hifigan_csmsc', 152 | help='Choose vocoder type of tts task.') 153 | 154 | parser.add_argument('--voc_onnx', type=str, 155 | default='hifigan_csmsc.onnx') 156 | 157 | # other 158 | parser.add_argument("--inference_dir", type=str, 159 | default=f"{root_dir}/resources", 160 | help="dir to save inference models") 161 | 162 | parser.add_argument("--text", type=str, 163 | default='csmsc_test.txt') 164 | 165 | parser.add_argument("--output_dir", type=str, 166 | default='infer_result') 167 | 168 | parser.add_argument('--lang', type=str, 169 | default='zh', 170 | help='Choose model language. zh or en') 171 | 172 | # inference 173 | parser.add_argument("--use_trt", 174 | type=str2bool, 175 | default=False, 176 | help="Whether to use inference engin TensorRT.", ) 177 | 178 | parser.add_argument("--device", default="cpu", 179 | choices=["gpu", "cpu"]) 180 | 181 | parser.add_argument('--cpu_threads', type=int, default=2) 182 | 183 | args, _ = parser.parse_known_args() 184 | Path(args.output_dir).mkdir(parents=True, exist_ok=True) 185 | return args 186 | 187 | 188 | if __name__ == "__main__": 189 | args = parse_args() 190 | ort_predict(args) -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/README.md: -------------------------------------------------------------------------------- 1 | ### ljspeech_tts3 2 | - **支持合成语言**: 英文字母 3 | - 基于[PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech)下的[ljspeech-TTS3](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/ljspeech/tts3/README.md)整理而来 4 | - 整个推理引擎只采用`ONNXRuntime` 5 | - 其中PaddleSpeech中提供的预训练模型可以参见[link](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/text_to_speech/README_cn.md#4-%E9%A2%84%E8%AE%AD%E7%BB%83%E6%A8%A1%E5%9E%8B)。在ljspeech_tts3中使用的是: 6 | 7 | |主要部分|具体模型|支持语言| 8 | |:---|:---|:---| 9 | |声学模型|[fastspeech2_ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/ljspeech/tts3/README.md#pretrained-model)|en| 10 | |声码器|[pwg_ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc1)|en| 11 | 12 | #### 结果示例 13 |
14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 28 | 29 | 30 |
输入文本合成音频
Love you three thousand times. 25 | 26 |
27 |
31 | 32 |
33 | 34 | #### 运行步骤 35 | 1. 下载`resources`, [Google Drive](https://drive.google.com/file/d/1xQwsY1tWebQSWu32KgLlGO1QUnrixvwo/view?usp=sharing) | [百度网盘,提取码:4vlu](https://pan.baidu.com/s/1vvBnuNEcj-AngXdw3j0S4g?pwd=4vlu), 解压到`ljspeech_tts3`目录下,最终目录结构如下: 36 | ```text 37 | ljspeech_tts3 38 | ├── sentences_en.txt 39 | ├── requirements.txt 40 | ├── frontend 41 | ├── main.sh 42 | ├── tts3.py 43 | ├── infer_result 44 | ├── resources 45 | │ ├── fastspeech2_ljspeech 46 | │ │ ├── fastspeech2_ljspeech.onnx 47 | │ │ └── phone_id_map.txt 48 | │ └── pwgan_ljspeech.onnx 49 | └──syn_utils.py 50 | ``` 51 | 52 | 2. 安装`requirements.txt` 53 | ```bash 54 | pip install -r requirements.txt -i https://pypi.douban.com/simple/ 55 | ``` 56 | 57 | 3. 运行`tts3.py` 58 | ```bash 59 | python tts3.py 60 | ``` 61 | or 62 | ```bash 63 | bash main.sh 64 | ``` 65 | 66 | 4. 运行日志如下: 67 | ```text 68 | frontend done! 69 | 001, mel: (343, 80), wave: 87808, time: 7.583922399999999s, Hz: 11578.186242472837, RTF: 1.9044433677455357. 70 | 002, mel: (274, 80), wave: 70144, time: 5.986744399999999s, Hz: 11716.561243394675, RTF: 1.8819514994154878. 71 | 003, mel: (175, 80), wave: 44800, time: 3.911470399999999s, Hz: 11453.51349948683, RTF: 1.9251734414062498. 72 | 004, mel: (217, 80), wave: 55552, time: 4.678628299999996s, Hz: 11873.585640758554, RTF: 1.8570632888104823. 73 | 005, mel: (371, 80), wave: 94976, time: 7.7152417s, Hz: 12310.185834993608, RTF: 1.7911996045843162. 74 | 006, mel: (338, 80), wave: 86528, time: 7.670878100000003s, Hz: 11280.071739420744, RTF: 1.954774801913832. 75 | 007, mel: (205, 80), wave: 52480, time: 4.628822800000002s, Hz: 11337.668363997142, RTF: 1.9448443270769813. 76 | 008, mel: (390, 80), wave: 99840, time: 8.2700763s, Hz: 12072.447745611855, RTF: 1.826473012319712. 77 | 009, mel: (169, 80), wave: 43264, time: 4.2657806000000065s, Hz: 10142.12548840801, RTF: 2.1741004905926427. 78 | generation speed: 11613.502408804885Hz, RTF: 1.8986520365538124 79 | ``` 80 | 生成结果会保存到`infer_result`目录下 81 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/assets/009.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/ljspeech_tts3/assets/009.wav -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/assets/audio_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/ljspeech_tts3/assets/audio_icon.png -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .generate_lexicon import * 15 | from .normalizer import * 16 | from .phonectic import * 17 | from .punctuation import * 18 | from .tone_sandhi import * 19 | from .vocab import * 20 | from .zh_normalization import * 21 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/arpabet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from paddlespeech.t2s.frontend.phonectic import Phonetics 15 | """ 16 | A phonology system with ARPABET symbols and limited punctuations. The G2P 17 | conversion is done by g2p_en. 18 | 19 | Note that g2p_en does not handle words with hypen well. So make sure the input 20 | sentence is first normalized. 21 | """ 22 | from paddlespeech.t2s.frontend.vocab import Vocab 23 | from g2p_en import G2p 24 | 25 | 26 | class ARPABET(Phonetics): 27 | """A phonology for English that uses ARPABET as the phoneme vocabulary. 28 | See http://www.speech.cs.cmu.edu/cgi-bin/cmudict for more details. 29 | Phoneme Example Translation 30 | ------- ------- ----------- 31 | AA odd AA D 32 | AE at AE T 33 | AH hut HH AH T 34 | AO ought AO T 35 | AW cow K AW 36 | AY hide HH AY D 37 | B be B IY 38 | CH cheese CH IY Z 39 | D dee D IY 40 | DH thee DH IY 41 | EH Ed EH D 42 | ER hurt HH ER T 43 | EY ate EY T 44 | F fee F IY 45 | G green G R IY N 46 | HH he HH IY 47 | IH it IH T 48 | IY eat IY T 49 | JH gee JH IY 50 | K key K IY 51 | L lee L IY 52 | M me M IY 53 | N knee N IY 54 | NG ping P IH NG 55 | OW oat OW T 56 | OY toy T OY 57 | P pee P IY 58 | R read R IY D 59 | S sea S IY 60 | SH she SH IY 61 | T tea T IY 62 | TH theta TH EY T AH 63 | UH hood HH UH D 64 | UW two T UW 65 | V vee V IY 66 | W we W IY 67 | Y yield Y IY L D 68 | Z zee Z IY 69 | ZH seizure S IY ZH ER 70 | """ 71 | phonemes = [ 72 | 'AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'B', 'CH', 'D', 'DH', 'EH', 'ER', 73 | 'EY', 'F', 'G', 'HH', 'IH', 'IY', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW', 74 | 'OY', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UW', 'UH', 'V', 'W', 'Y', 'Z', 75 | 'ZH' 76 | ] 77 | punctuations = [',', '.', '?', '!'] 78 | symbols = phonemes + punctuations 79 | _stress_to_no_stress_ = { 80 | 'AA0': 'AA', 81 | 'AA1': 'AA', 82 | 'AA2': 'AA', 83 | 'AE0': 'AE', 84 | 'AE1': 'AE', 85 | 'AE2': 'AE', 86 | 'AH0': 'AH', 87 | 'AH1': 'AH', 88 | 'AH2': 'AH', 89 | 'AO0': 'AO', 90 | 'AO1': 'AO', 91 | 'AO2': 'AO', 92 | 'AW0': 'AW', 93 | 'AW1': 'AW', 94 | 'AW2': 'AW', 95 | 'AY0': 'AY', 96 | 'AY1': 'AY', 97 | 'AY2': 'AY', 98 | 'EH0': 'EH', 99 | 'EH1': 'EH', 100 | 'EH2': 'EH', 101 | 'ER0': 'ER', 102 | 'ER1': 'ER', 103 | 'ER2': 'ER', 104 | 'EY0': 'EY', 105 | 'EY1': 'EY', 106 | 'EY2': 'EY', 107 | 'IH0': 'IH', 108 | 'IH1': 'IH', 109 | 'IH2': 'IH', 110 | 'IY0': 'IY', 111 | 'IY1': 'IY', 112 | 'IY2': 'IY', 113 | 'OW0': 'OW', 114 | 'OW1': 'OW', 115 | 'OW2': 'OW', 116 | 'OY0': 'OY', 117 | 'OY1': 'OY', 118 | 'OY2': 'OY', 119 | 'UH0': 'UH', 120 | 'UH1': 'UH', 121 | 'UH2': 'UH', 122 | 'UW0': 'UW', 123 | 'UW1': 'UW', 124 | 'UW2': 'UW' 125 | } 126 | 127 | def __init__(self): 128 | self.backend = G2p() 129 | self.vocab = Vocab(self.phonemes + self.punctuations) 130 | 131 | def _remove_vowels(self, phone): 132 | return self._stress_to_no_stress_.get(phone, phone) 133 | 134 | def phoneticize(self, sentence, add_start_end=False): 135 | """ Normalize the input text sequence and convert it into pronunciation sequence. 136 | Args: 137 | sentence (str): The input text sequence. 138 | 139 | Returns: 140 | List[str]: The list of pronunciation sequence. 141 | """ 142 | phonemes = [ 143 | self._remove_vowels(item) for item in self.backend(sentence) 144 | ] 145 | if add_start_end: 146 | start = self.vocab.start_symbol 147 | end = self.vocab.end_symbol 148 | phonemes = [start] + phonemes + [end] 149 | phonemes = [item for item in phonemes if item in self.vocab.stoi] 150 | return phonemes 151 | 152 | def numericalize(self, phonemes): 153 | """ Convert pronunciation sequence into pronunciation id sequence. 154 | 155 | Args: 156 | phonemes (List[str]): The list of pronunciation sequence. 157 | 158 | Returns: 159 | List[int]: The list of pronunciation id sequence. 160 | """ 161 | ids = [self.vocab.lookup(item) for item in phonemes] 162 | return ids 163 | 164 | def reverse(self, ids): 165 | """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence. 166 | 167 | Args: 168 | ids( List[int]): The list of pronunciation id sequence. 169 | 170 | Returns: 171 | List[str]: 172 | The list of pronunciation sequence. 173 | """ 174 | return [self.vocab.reverse(i) for i in ids] 175 | 176 | def __call__(self, sentence, add_start_end=False): 177 | """ Convert the input text sequence into pronunciation id sequence. 178 | 179 | Args: 180 | sentence (str): The input text sequence. 181 | 182 | Returns: 183 | List[str]: The list of pronunciation id sequence. 184 | """ 185 | return self.numericalize( 186 | self.phoneticize(sentence, add_start_end=add_start_end)) 187 | 188 | @property 189 | def vocab_size(self): 190 | """ Vocab size. 191 | """ 192 | # 47 = 39 phones + 4 punctuations + 4 special tokens 193 | return len(self.vocab) 194 | 195 | 196 | class ARPABETWithStress(Phonetics): 197 | phonemes = [ 198 | 'AA0', 'AA1', 'AA2', 'AE0', 'AE1', 'AE2', 'AH0', 'AH1', 'AH2', 'AO0', 199 | 'AO1', 'AO2', 'AW0', 'AW1', 'AW2', 'AY0', 'AY1', 'AY2', 'B', 'CH', 'D', 200 | 'DH', 'EH0', 'EH1', 'EH2', 'ER0', 'ER1', 'ER2', 'EY0', 'EY1', 'EY2', 201 | 'F', 'G', 'HH', 'IH0', 'IH1', 'IH2', 'IY0', 'IY1', 'IY2', 'JH', 'K', 202 | 'L', 'M', 'N', 'NG', 'OW0', 'OW1', 'OW2', 'OY0', 'OY1', 'OY2', 'P', 'R', 203 | 'S', 'SH', 'T', 'TH', 'UH0', 'UH1', 'UH2', 'UW0', 'UW1', 'UW2', 'V', 204 | 'W', 'Y', 'Z', 'ZH' 205 | ] 206 | punctuations = [',', '.', '?', '!'] 207 | symbols = phonemes + punctuations 208 | 209 | def __init__(self): 210 | self.backend = G2p() 211 | self.vocab = Vocab(self.phonemes + self.punctuations) 212 | 213 | def phoneticize(self, sentence, add_start_end=False): 214 | """ Normalize the input text sequence and convert it into pronunciation sequence. 215 | 216 | Args: 217 | sentence (str): The input text sequence. 218 | 219 | Returns: 220 | List[str]: The list of pronunciation sequence. 221 | """ 222 | phonemes = self.backend(sentence) 223 | if add_start_end: 224 | start = self.vocab.start_symbol 225 | end = self.vocab.end_symbol 226 | phonemes = [start] + phonemes + [end] 227 | phonemes = [item for item in phonemes if item in self.vocab.stoi] 228 | return phonemes 229 | 230 | def numericalize(self, phonemes): 231 | """ Convert pronunciation sequence into pronunciation id sequence. 232 | 233 | Args: 234 | phonemes (List[str]): The list of pronunciation sequence. 235 | 236 | Returns: 237 | List[int]: The list of pronunciation id sequence. 238 | """ 239 | ids = [self.vocab.lookup(item) for item in phonemes] 240 | return ids 241 | 242 | def reverse(self, ids): 243 | """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence. 244 | Args: 245 | ids (List[int]): The list of pronunciation id sequence. 246 | 247 | Returns: 248 | List[str]: The list of pronunciation sequence. 249 | """ 250 | return [self.vocab.reverse(i) for i in ids] 251 | 252 | def __call__(self, sentence, add_start_end=False): 253 | """ Convert the input text sequence into pronunciation id sequence. 254 | Args: 255 | sentence (str): The input text sequence. 256 | 257 | Returns: 258 | List[str]: The list of pronunciation id sequence. 259 | """ 260 | return self.numericalize( 261 | self.phoneticize(sentence, add_start_end=add_start_end)) 262 | 263 | @property 264 | def vocab_size(self): 265 | """ Vocab size. 266 | """ 267 | # 77 = 69 phones + 4 punctuations + 4 special tokens 268 | return len(self.vocab) 269 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/generate_lexicon.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Design principles: https://zhuanlan.zhihu.com/p/349600439 15 | """Generate lexicon and symbols for Mandarin Chinese phonology. 16 | The lexicon is used for Montreal Force Aligner. 17 | Note that syllables are used as word in this lexicon. Since syllables rather 18 | than words are used in transcriptions produced by `reorganize_baker.py`. 19 | We make this choice to better leverage other software for chinese text to 20 | pinyin tools like pypinyin. This is the convention for G2P in Chinese. 21 | """ 22 | import re 23 | from collections import OrderedDict 24 | 25 | INITIALS = [ 26 | 'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h', 'zh', 'ch', 'sh', 27 | 'r', 'z', 'c', 's', 'j', 'q', 'x' 28 | ] 29 | 30 | FINALS = [ 31 | 'a', 'ai', 'ao', 'an', 'ang', 'e', 'er', 'ei', 'en', 'eng', 'o', 'ou', 32 | 'ong', 'ii', 'iii', 'i', 'ia', 'iao', 'ian', 'iang', 'ie', 'io', 'iou', 33 | 'iong', 'in', 'ing', 'u', 'ua', 'uai', 'uan', 'uang', 'uei', 'uo', 'uen', 34 | 'ueng', 'v', 've', 'van', 'vn' 35 | ] 36 | 37 | SPECIALS = ['sil', 'sp'] 38 | 39 | 40 | def rule(C, V, R, T): 41 | """Generate a syllable given the initial, the final, erhua indicator, and tone. 42 | Orthographical rules for pinyin are applied. (special case for y, w, ui, un, iu) 43 | 44 | Note that in this system, 'ü' is alway written as 'v' when appeared in phoneme, but converted to 45 | 'u' in syllables when certain conditions are satisfied. 46 | 47 | 'i' is distinguished when appeared in phonemes, and separated into 3 categories, 'i', 'ii' and 'iii'. 48 | Erhua is is possibly applied to every finals, except for finals that already ends with 'r'. 49 | When a syllable is impossible or does not have any characters with this pronunciation, return None 50 | to filter it out. 51 | """ 52 | 53 | # 不可拼的音节, ii 只能和 z, c, s 拼 54 | if V in ["ii"] and (C not in ['z', 'c', 's']): 55 | return None 56 | # iii 只能和 zh, ch, sh, r 拼 57 | if V in ['iii'] and (C not in ['zh', 'ch', 'sh', 'r']): 58 | return None 59 | 60 | # 齐齿呼或者撮口呼不能和 f, g, k, h, zh, ch, sh, r, z, c, s 61 | if (V not in ['ii', 'iii']) and V[0] in ['i', 'v'] and ( 62 | C in ['f', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's']): 63 | return None 64 | 65 | # 撮口呼只能和 j, q, x l, n 拼 66 | if V.startswith("v"): 67 | # v, ve 只能和 j ,q , x, n, l 拼 68 | if V in ['v', 've']: 69 | if C not in ['j', 'q', 'x', 'n', 'l', '']: 70 | return None 71 | # 其他只能和 j, q, x 拼 72 | else: 73 | if C not in ['j', 'q', 'x', '']: 74 | return None 75 | 76 | # j, q, x 只能和齐齿呼或者撮口呼拼 77 | if (C in ['j', 'q', 'x']) and not ( 78 | (V not in ['ii', 'iii']) and V[0] in ['i', 'v']): 79 | return None 80 | 81 | # b, p ,m, f 不能和合口呼拼,除了 u 之外 82 | # bm p, m, f 不能和撮口呼拼 83 | if (C in ['b', 'p', 'm', 'f']) and ((V[0] in ['u', 'v'] and V != "u") or 84 | V == 'ong'): 85 | return None 86 | 87 | # ua, uai, uang 不能和 d, t, n, l, r, z, c, s 拼 88 | if V in ['ua', 'uai', 89 | 'uang'] and C in ['d', 't', 'n', 'l', 'r', 'z', 'c', 's']: 90 | return None 91 | 92 | # sh 和 ong 不能拼 93 | if V == 'ong' and C in ['sh']: 94 | return None 95 | 96 | # o 和 gkh, zh ch sh r z c s 不能拼 97 | if V == "o" and C in [ 98 | 'd', 't', 'n', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's' 99 | ]: 100 | return None 101 | 102 | # ueng 只是 weng 这个 ad-hoc 其他情况下都是 ong 103 | if V == 'ueng' and C != '': 104 | return 105 | 106 | # 非儿化的 er 只能单独存在 107 | if V == 'er' and C != '': 108 | return None 109 | 110 | if C == '': 111 | if V in ["i", "in", "ing"]: 112 | C = 'y' 113 | elif V == 'u': 114 | C = 'w' 115 | elif V.startswith('i') and V not in ["ii", "iii"]: 116 | C = 'y' 117 | V = V[1:] 118 | elif V.startswith('u'): 119 | C = 'w' 120 | V = V[1:] 121 | elif V.startswith('v'): 122 | C = 'yu' 123 | V = V[1:] 124 | else: 125 | if C in ['j', 'q', 'x']: 126 | if V.startswith('v'): 127 | V = re.sub('v', 'u', V) 128 | if V == 'iou': 129 | V = 'iu' 130 | elif V == 'uei': 131 | V = 'ui' 132 | elif V == 'uen': 133 | V = 'un' 134 | result = C + V 135 | 136 | # Filter er 不能再儿化 137 | if result.endswith('r') and R == 'r': 138 | return None 139 | 140 | # ii and iii, change back to i 141 | result = re.sub(r'i+', 'i', result) 142 | 143 | result = result + R + T 144 | return result 145 | 146 | 147 | def generate_lexicon(with_tone=False, with_erhua=False): 148 | """Generate lexicon for Mandarin Chinese.""" 149 | syllables = OrderedDict() 150 | 151 | for C in [''] + INITIALS: 152 | for V in FINALS: 153 | for R in [''] if not with_erhua else ['', 'r']: 154 | for T in [''] if not with_tone else ['1', '2', '3', '4', '5']: 155 | result = rule(C, V, R, T) 156 | if result: 157 | syllables[result] = f'{C} {V}{R}{T}' 158 | return syllables 159 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/normalizer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .normalizer import * 15 | from .numbers import * 16 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/normalizer/abbrrviation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/normalizer/acronyms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/normalizer/normalizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | import unicodedata 16 | from builtins import str as unicode 17 | 18 | from .numbers import normalize_numbers 19 | 20 | 21 | def normalize(sentence): 22 | """ Normalize English text. 23 | """ 24 | # preprocessing 25 | sentence = unicode(sentence) 26 | sentence = normalize_numbers(sentence) 27 | sentence = ''.join( 28 | char for char in unicodedata.normalize('NFD', sentence) 29 | if unicodedata.category(char) != 'Mn') # Strip accents 30 | sentence = sentence.lower() 31 | sentence = re.sub(r"[^ a-z'.,?!\-]", "", sentence) 32 | sentence = sentence.replace("i.e.", "that is") 33 | sentence = sentence.replace("e.g.", "for example") 34 | return sentence 35 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/normalizer/numbers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # number expansion is not that easy 15 | import re 16 | 17 | import inflect 18 | 19 | _inflect = inflect.engine() 20 | _comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])') 21 | _decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)') 22 | _pounds_re = re.compile(r'£([0-9\,]*[0-9]+)') 23 | _dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)') 24 | _ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)') 25 | _number_re = re.compile(r'[0-9]+') 26 | 27 | 28 | def _remove_commas(m): 29 | return m.group(1).replace(',', '') 30 | 31 | 32 | def _expand_decimal_point(m): 33 | return m.group(1).replace('.', ' point ') 34 | 35 | 36 | def _expand_dollars(m): 37 | match = m.group(1) 38 | parts = match.split('.') 39 | if len(parts) > 2: 40 | return match + ' dollars' # Unexpected format 41 | dollars = int(parts[0]) if parts[0] else 0 42 | cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0 43 | if dollars and cents: 44 | dollar_unit = 'dollar' if dollars == 1 else 'dollars' 45 | cent_unit = 'cent' if cents == 1 else 'cents' 46 | return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit) 47 | elif dollars: 48 | dollar_unit = 'dollar' if dollars == 1 else 'dollars' 49 | return '%s %s' % (dollars, dollar_unit) 50 | elif cents: 51 | cent_unit = 'cent' if cents == 1 else 'cents' 52 | return '%s %s' % (cents, cent_unit) 53 | else: 54 | return 'zero dollars' 55 | 56 | 57 | def _expand_ordinal(m): 58 | return _inflect.number_to_words(m.group(0)) 59 | 60 | 61 | def _expand_number(m): 62 | num = int(m.group(0)) 63 | if num > 1000 and num < 3000: 64 | if num == 2000: 65 | return 'two thousand' 66 | elif num > 2000 and num < 2010: 67 | return 'two thousand ' + _inflect.number_to_words(num % 100) 68 | elif num % 100 == 0: 69 | return _inflect.number_to_words(num // 100) + ' hundred' 70 | else: 71 | return _inflect.number_to_words( 72 | num, andword='', zero='oh', group=2).replace(', ', ' ') 73 | else: 74 | return _inflect.number_to_words(num, andword='') 75 | 76 | 77 | def normalize_numbers(text): 78 | """ Normalize numbers in English text. 79 | """ 80 | text = re.sub(_comma_number_re, _remove_commas, text) 81 | text = re.sub(_pounds_re, r'\1 pounds', text) 82 | text = re.sub(_dollars_re, _expand_dollars, text) 83 | text = re.sub(_decimal_number_re, _expand_decimal_point, text) 84 | text = re.sub(_ordinal_re, _expand_ordinal, text) 85 | text = re.sub(_number_re, _expand_number, text) 86 | return text 87 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/normalizer/width.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def full2half_width(ustr): 17 | half = [] 18 | for u in ustr: 19 | num = ord(u) 20 | if num == 0x3000: # 全角空格变半角 21 | num = 32 22 | elif 0xFF01 <= num <= 0xFF5E: 23 | num -= 0xfee0 24 | u = chr(num) 25 | half.append(u) 26 | return ''.join(half) 27 | 28 | 29 | def half2full_width(ustr): 30 | full = [] 31 | for u in ustr: 32 | num = ord(u) 33 | if num == 32: # 半角空格变全角 34 | num = 0x3000 35 | elif 0x21 <= num <= 0x7E: 36 | num += 0xfee0 37 | u = chr(num) # to unicode 38 | full.append(u) 39 | 40 | return ''.join(full) 41 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/punctuation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __all__ = ["get_punctuations"] 16 | 17 | EN_PUNCT = [ 18 | " ", 19 | "-", 20 | "...", 21 | ",", 22 | ".", 23 | "?", 24 | "!", 25 | ] 26 | 27 | CN_PUNCT = ["、", ",", ";", ":", "。", "?", "!"] 28 | 29 | 30 | def get_punctuations(lang): 31 | if lang == "en": 32 | return EN_PUNCT 33 | elif lang == "cn": 34 | return CN_PUNCT 35 | else: 36 | raise ValueError(f"language {lang} Not supported") 37 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/vocab.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from collections import OrderedDict 15 | from typing import Iterable 16 | 17 | __all__ = ["Vocab"] 18 | 19 | 20 | class Vocab(object): 21 | """ Vocabulary. 22 | 23 | Args: 24 | symbols (Iterable[str]): Common symbols. 25 | padding_symbol (str, optional): Symbol for pad. Defaults to "". 26 | unk_symbol (str, optional): Symbol for unknow. Defaults to "" 27 | start_symbol (str, optional): Symbol for start. Defaults to "" 28 | end_symbol (str, optional): Symbol for end. Defaults to "" 29 | """ 30 | 31 | def __init__(self, 32 | symbols: Iterable[str], 33 | padding_symbol="", 34 | unk_symbol="", 35 | start_symbol="", 36 | end_symbol=""): 37 | self.special_symbols = OrderedDict() 38 | for i, item in enumerate( 39 | [padding_symbol, unk_symbol, start_symbol, end_symbol]): 40 | if item: 41 | self.special_symbols[item] = len(self.special_symbols) 42 | 43 | self.padding_symbol = padding_symbol 44 | self.unk_symbol = unk_symbol 45 | self.start_symbol = start_symbol 46 | self.end_symbol = end_symbol 47 | 48 | self.stoi = OrderedDict() 49 | self.stoi.update(self.special_symbols) 50 | 51 | for i, s in enumerate(symbols): 52 | if s not in self.stoi: 53 | self.stoi[s] = len(self.stoi) 54 | self.itos = {v: k for k, v in self.stoi.items()} 55 | 56 | def __len__(self): 57 | return len(self.stoi) 58 | 59 | @property 60 | def num_specials(self): 61 | """ The number of special symbols. 62 | """ 63 | return len(self.special_symbols) 64 | 65 | # special tokens 66 | @property 67 | def padding_index(self): 68 | """ The index of padding symbol 69 | """ 70 | return self.stoi.get(self.padding_symbol, -1) 71 | 72 | @property 73 | def unk_index(self): 74 | """The index of unknow symbol. 75 | """ 76 | return self.stoi.get(self.unk_symbol, -1) 77 | 78 | @property 79 | def start_index(self): 80 | """The index of start symbol. 81 | """ 82 | return self.stoi.get(self.start_symbol, -1) 83 | 84 | @property 85 | def end_index(self): 86 | """ The index of end symbol. 87 | """ 88 | return self.stoi.get(self.end_symbol, -1) 89 | 90 | def __repr__(self): 91 | fmt = "Vocab(size: {},\nstoi:\n{})" 92 | return fmt.format(len(self), self.stoi) 93 | 94 | def __str__(self): 95 | return self.__repr__() 96 | 97 | def lookup(self, symbol): 98 | """ The index that symbol correspond. 99 | """ 100 | return self.stoi[symbol] 101 | 102 | def reverse(self, index): 103 | """ The symbol thar index cottespond. 104 | """ 105 | return self.itos[index] 106 | 107 | def add_symbol(self, symbol): 108 | """ Add a new symbol in vocab. 109 | """ 110 | if symbol in self.stoi: 111 | return 112 | N = len(self.stoi) 113 | self.stoi[symbol] = N 114 | self.itos[N] = symbol 115 | 116 | def add_symbols(self, symbols): 117 | """ Add multiple symbols in vocab. 118 | """ 119 | for symbol in symbols: 120 | self.add_symbol(symbol) 121 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/zh_normalization/README.md: -------------------------------------------------------------------------------- 1 | ## Supported NSW (Non-Standard-Word) Normalization 2 | 3 | |NSW type|raw|normalized| 4 | |:--|:-|:-| 5 | |serial number|电影中梁朝伟扮演的陈永仁的编号27149|电影中梁朝伟扮演的陈永仁的编号二七一四九| 6 | |cardinal|这块黄金重达324.75克
我们班的最高总分为583分|这块黄金重达三百二十四点七五克
我们班的最高总分为五百八十三分| 7 | |numeric range |12\~23
-1.5\~2|十二到二十三
负一点五到二| 8 | |date|她出生于86年8月18日,她弟弟出生于1995年3月1日|她出生于八六年八月十八日, 她弟弟出生于一九九五年三月一日| 9 | |time|等会请在12:05请通知我|等会请在十二点零五分请通知我 10 | |temperature|今天的最低气温达到-10°C|今天的最低气温达到零下十度 11 | |fraction|现场有7/12的观众投出了赞成票|现场有十二分之七的观众投出了赞成票| 12 | |percentage|明天有62%的概率降雨|明天有百分之六十二的概率降雨| 13 | |money|随便来几个价格12块5,34.5元,20.1万|随便来几个价格十二块五,三十四点五元,二十点一万| 14 | |telephone|这是固话0421-33441122
这是手机+86 18544139121|这是固话零四二一三三四四一一二二
这是手机八六一八五四四一三九一二一| 15 | ## References 16 | [Pull requests #658 of DeepSpeech](https://github.com/PaddlePaddle/DeepSpeech/pull/658/files) 17 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/zh_normalization/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .text_normlization import * 15 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/zh_normalization/chronology.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | 16 | from .num import DIGITS 17 | from .num import num2str 18 | from .num import verbalize_cardinal 19 | from .num import verbalize_digit 20 | 21 | 22 | def _time_num2str(num_string: str) -> str: 23 | """A special case for verbalizing number in time.""" 24 | result = num2str(num_string.lstrip('0')) 25 | if num_string.startswith('0'): 26 | result = DIGITS['0'] + result 27 | return result 28 | 29 | 30 | # 时刻表达式 31 | RE_TIME = re.compile(r'([0-1]?[0-9]|2[0-3])' 32 | r':([0-5][0-9])' 33 | r'(:([0-5][0-9]))?') 34 | 35 | # 时间范围,如8:30-12:30 36 | RE_TIME_RANGE = re.compile(r'([0-1]?[0-9]|2[0-3])' 37 | r':([0-5][0-9])' 38 | r'(:([0-5][0-9]))?' 39 | r'(~|-)' 40 | r'([0-1]?[0-9]|2[0-3])' 41 | r':([0-5][0-9])' 42 | r'(:([0-5][0-9]))?') 43 | 44 | 45 | def replace_time(match) -> str: 46 | """ 47 | Args: 48 | match (re.Match) 49 | Returns: 50 | str 51 | """ 52 | 53 | is_range = len(match.groups()) > 5 54 | 55 | hour = match.group(1) 56 | minute = match.group(2) 57 | second = match.group(4) 58 | 59 | if is_range: 60 | hour_2 = match.group(6) 61 | minute_2 = match.group(7) 62 | second_2 = match.group(9) 63 | 64 | result = f"{num2str(hour)}点" 65 | if minute.lstrip('0'): 66 | if int(minute) == 30: 67 | result += "半" 68 | else: 69 | result += f"{_time_num2str(minute)}分" 70 | if second and second.lstrip('0'): 71 | result += f"{_time_num2str(second)}秒" 72 | 73 | if is_range: 74 | result += "至" 75 | result += f"{num2str(hour_2)}点" 76 | if minute_2.lstrip('0'): 77 | if int(minute) == 30: 78 | result += "半" 79 | else: 80 | result += f"{_time_num2str(minute_2)}分" 81 | if second_2 and second_2.lstrip('0'): 82 | result += f"{_time_num2str(second_2)}秒" 83 | 84 | return result 85 | 86 | 87 | RE_DATE = re.compile(r'(\d{4}|\d{2})年' 88 | r'((0?[1-9]|1[0-2])月)?' 89 | r'(((0?[1-9])|((1|2)[0-9])|30|31)([日号]))?') 90 | 91 | 92 | def replace_date(match) -> str: 93 | """ 94 | Args: 95 | match (re.Match) 96 | Returns: 97 | str 98 | """ 99 | year = match.group(1) 100 | month = match.group(3) 101 | day = match.group(5) 102 | result = "" 103 | if year: 104 | result += f"{verbalize_digit(year)}年" 105 | if month: 106 | result += f"{verbalize_cardinal(month)}月" 107 | if day: 108 | result += f"{verbalize_cardinal(day)}{match.group(9)}" 109 | return result 110 | 111 | 112 | # 用 / 或者 - 分隔的 YY/MM/DD 或者 YY-MM-DD 日期 113 | RE_DATE2 = re.compile( 114 | r'(\d{4})([- /.])(0[1-9]|1[012])\2(0[1-9]|[12][0-9]|3[01])') 115 | 116 | 117 | def replace_date2(match) -> str: 118 | """ 119 | Args: 120 | match (re.Match) 121 | Returns: 122 | str 123 | """ 124 | year = match.group(1) 125 | month = match.group(3) 126 | day = match.group(4) 127 | result = "" 128 | if year: 129 | result += f"{verbalize_digit(year)}年" 130 | if month: 131 | result += f"{verbalize_cardinal(month)}月" 132 | if day: 133 | result += f"{verbalize_cardinal(day)}日" 134 | return result 135 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/zh_normalization/constants.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | import string 16 | 17 | from pypinyin.constants import SUPPORT_UCS4 18 | 19 | # 全角半角转换 20 | # 英文字符全角 -> 半角映射表 (num: 52) 21 | F2H_ASCII_LETTERS = { 22 | chr(ord(char) + 65248): char 23 | for char in string.ascii_letters 24 | } 25 | 26 | # 英文字符半角 -> 全角映射表 27 | H2F_ASCII_LETTERS = {value: key for key, value in F2H_ASCII_LETTERS.items()} 28 | 29 | # 数字字符全角 -> 半角映射表 (num: 10) 30 | F2H_DIGITS = {chr(ord(char) + 65248): char for char in string.digits} 31 | # 数字字符半角 -> 全角映射表 32 | H2F_DIGITS = {value: key for key, value in F2H_DIGITS.items()} 33 | 34 | # 标点符号全角 -> 半角映射表 (num: 32) 35 | F2H_PUNCTUATIONS = {chr(ord(char) + 65248): char for char in string.punctuation} 36 | # 标点符号半角 -> 全角映射表 37 | H2F_PUNCTUATIONS = {value: key for key, value in F2H_PUNCTUATIONS.items()} 38 | 39 | # 空格 (num: 1) 40 | F2H_SPACE = {'\u3000': ' '} 41 | H2F_SPACE = {' ': '\u3000'} 42 | 43 | # 非"有拼音的汉字"的字符串,可用于NSW提取 44 | if SUPPORT_UCS4: 45 | RE_NSW = re.compile(r'(?:[^' 46 | r'\u3007' # 〇 47 | r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF] 48 | r'\u4e00-\u9fff' # CJK基本:[4E00-9FFF] 49 | r'\uf900-\ufaff' # CJK兼容:[F900-FAFF] 50 | r'\U00020000-\U0002A6DF' # CJK扩展B:[20000-2A6DF] 51 | r'\U0002A703-\U0002B73F' # CJK扩展C:[2A700-2B73F] 52 | r'\U0002B740-\U0002B81D' # CJK扩展D:[2B740-2B81D] 53 | r'\U0002F80A-\U0002FA1F' # CJK兼容扩展:[2F800-2FA1F] 54 | r'])+') 55 | else: 56 | RE_NSW = re.compile( # pragma: no cover 57 | r'(?:[^' 58 | r'\u3007' # 〇 59 | r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF] 60 | r'\u4e00-\u9fff' # CJK基本:[4E00-9FFF] 61 | r'\uf900-\ufaff' # CJK兼容:[F900-FAFF] 62 | r'])+') 63 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/zh_normalization/num.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Rules to verbalize numbers into Chinese characters. 16 | https://zh.wikipedia.org/wiki/中文数字#現代中文 17 | """ 18 | import re 19 | from collections import OrderedDict 20 | from typing import List 21 | 22 | DIGITS = {str(i): tran for i, tran in enumerate('零一二三四五六七八九')} 23 | UNITS = OrderedDict({ 24 | 1: '十', 25 | 2: '百', 26 | 3: '千', 27 | 4: '万', 28 | 8: '亿', 29 | }) 30 | 31 | COM_QUANTIFIERS = '(所|朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|小时|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|)块|角|毛|分)' 32 | 33 | # 分数表达式 34 | RE_FRAC = re.compile(r'(-?)(\d+)/(\d+)') 35 | 36 | 37 | def replace_frac(match) -> str: 38 | """ 39 | Args: 40 | match (re.Match) 41 | Returns: 42 | str 43 | """ 44 | sign = match.group(1) 45 | nominator = match.group(2) 46 | denominator = match.group(3) 47 | sign: str = "负" if sign else "" 48 | nominator: str = num2str(nominator) 49 | denominator: str = num2str(denominator) 50 | result = f"{sign}{denominator}分之{nominator}" 51 | return result 52 | 53 | 54 | # 百分数表达式 55 | RE_PERCENTAGE = re.compile(r'(-?)(\d+(\.\d+)?)%') 56 | 57 | 58 | def replace_percentage(match) -> str: 59 | """ 60 | Args: 61 | match (re.Match) 62 | Returns: 63 | str 64 | """ 65 | sign = match.group(1) 66 | percent = match.group(2) 67 | sign: str = "负" if sign else "" 68 | percent: str = num2str(percent) 69 | result = f"{sign}百分之{percent}" 70 | return result 71 | 72 | 73 | # 整数表达式 74 | # 带负号的整数 -10 75 | RE_INTEGER = re.compile(r'(-)' r'(\d+)') 76 | 77 | 78 | def replace_negative_num(match) -> str: 79 | """ 80 | Args: 81 | match (re.Match) 82 | Returns: 83 | str 84 | """ 85 | sign = match.group(1) 86 | number = match.group(2) 87 | sign: str = "负" if sign else "" 88 | number: str = num2str(number) 89 | result = f"{sign}{number}" 90 | return result 91 | 92 | 93 | # 编号-无符号整形 94 | # 00078 95 | RE_DEFAULT_NUM = re.compile(r'\d{3}\d*') 96 | 97 | 98 | def replace_default_num(match): 99 | """ 100 | Args: 101 | match (re.Match) 102 | Returns: 103 | str 104 | """ 105 | number = match.group(0) 106 | return verbalize_digit(number) 107 | 108 | 109 | # 数字表达式 110 | # 纯小数 111 | RE_DECIMAL_NUM = re.compile(r'(-?)((\d+)(\.\d+))' r'|(\.(\d+))') 112 | # 正整数 + 量词 113 | RE_POSITIVE_QUANTIFIERS = re.compile(r"(\d+)([多余几\+])?" + COM_QUANTIFIERS) 114 | RE_NUMBER = re.compile(r'(-?)((\d+)(\.\d+)?)' r'|(\.(\d+))') 115 | 116 | 117 | def replace_positive_quantifier(match) -> str: 118 | """ 119 | Args: 120 | match (re.Match) 121 | Returns: 122 | str 123 | """ 124 | number = match.group(1) 125 | match_2 = match.group(2) 126 | if match_2 == "+": 127 | match_2 = "多" 128 | match_2: str = match_2 if match_2 else "" 129 | quantifiers: str = match.group(3) 130 | number: str = num2str(number) 131 | result = f"{number}{match_2}{quantifiers}" 132 | return result 133 | 134 | 135 | def replace_number(match) -> str: 136 | """ 137 | Args: 138 | match (re.Match) 139 | Returns: 140 | str 141 | """ 142 | sign = match.group(1) 143 | number = match.group(2) 144 | pure_decimal = match.group(5) 145 | if pure_decimal: 146 | result = num2str(pure_decimal) 147 | else: 148 | sign: str = "负" if sign else "" 149 | number: str = num2str(number) 150 | result = f"{sign}{number}" 151 | return result 152 | 153 | 154 | # 范围表达式 155 | # match.group(1) and match.group(8) are copy from RE_NUMBER 156 | 157 | RE_RANGE = re.compile( 158 | r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))[-~]((-?)((\d+)(\.\d+)?)|(\.(\d+)))') 159 | 160 | 161 | def replace_range(match) -> str: 162 | """ 163 | Args: 164 | match (re.Match) 165 | Returns: 166 | str 167 | """ 168 | first, second = match.group(1), match.group(8) 169 | first = RE_NUMBER.sub(replace_number, first) 170 | second = RE_NUMBER.sub(replace_number, second) 171 | result = f"{first}到{second}" 172 | return result 173 | 174 | 175 | def _get_value(value_string: str, use_zero: bool=True) -> List[str]: 176 | stripped = value_string.lstrip('0') 177 | if len(stripped) == 0: 178 | return [] 179 | elif len(stripped) == 1: 180 | if use_zero and len(stripped) < len(value_string): 181 | return [DIGITS['0'], DIGITS[stripped]] 182 | else: 183 | return [DIGITS[stripped]] 184 | else: 185 | largest_unit = next( 186 | power for power in reversed(UNITS.keys()) if power < len(stripped)) 187 | first_part = value_string[:-largest_unit] 188 | second_part = value_string[-largest_unit:] 189 | return _get_value(first_part) + [UNITS[largest_unit]] + _get_value( 190 | second_part) 191 | 192 | 193 | def verbalize_cardinal(value_string: str) -> str: 194 | if not value_string: 195 | return '' 196 | 197 | # 000 -> '零' , 0 -> '零' 198 | value_string = value_string.lstrip('0') 199 | if len(value_string) == 0: 200 | return DIGITS['0'] 201 | 202 | result_symbols = _get_value(value_string) 203 | # verbalized number starting with '一十*' is abbreviated as `十*` 204 | if len(result_symbols) >= 2 and result_symbols[0] == DIGITS[ 205 | '1'] and result_symbols[1] == UNITS[1]: 206 | result_symbols = result_symbols[1:] 207 | return ''.join(result_symbols) 208 | 209 | 210 | def verbalize_digit(value_string: str, alt_one=False) -> str: 211 | result_symbols = [DIGITS[digit] for digit in value_string] 212 | result = ''.join(result_symbols) 213 | if alt_one: 214 | result = result.replace("一", "幺") 215 | return result 216 | 217 | 218 | def num2str(value_string: str) -> str: 219 | integer_decimal = value_string.split('.') 220 | if len(integer_decimal) == 1: 221 | integer = integer_decimal[0] 222 | decimal = '' 223 | elif len(integer_decimal) == 2: 224 | integer, decimal = integer_decimal 225 | else: 226 | raise ValueError( 227 | f"The value string: '${value_string}' has more than one point in it." 228 | ) 229 | 230 | result = verbalize_cardinal(integer) 231 | 232 | decimal = decimal.rstrip('0') 233 | if decimal: 234 | # '.22' is verbalized as '零点二二' 235 | # '3.20' is verbalized as '三点二 236 | result = result if result else "零" 237 | result += '点' + verbalize_digit(decimal) 238 | return result 239 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/zh_normalization/phonecode.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | 16 | from .num import verbalize_digit 17 | 18 | # 规范化固话/手机号码 19 | # 手机 20 | # http://www.jihaoba.com/news/show/13680 21 | # 移动:139、138、137、136、135、134、159、158、157、150、151、152、188、187、182、183、184、178、198 22 | # 联通:130、131、132、156、155、186、185、176 23 | # 电信:133、153、189、180、181、177 24 | RE_MOBILE_PHONE = re.compile( 25 | r"(? str: 34 | if mobile: 35 | sp_parts = phone_string.strip('+').split() 36 | result = ','.join( 37 | [verbalize_digit(part, alt_one=True) for part in sp_parts]) 38 | return result 39 | else: 40 | sil_parts = phone_string.split('-') 41 | result = ','.join( 42 | [verbalize_digit(part, alt_one=True) for part in sil_parts]) 43 | return result 44 | 45 | 46 | def replace_phone(match) -> str: 47 | """ 48 | Args: 49 | match (re.Match) 50 | Returns: 51 | str 52 | """ 53 | return phone2str(match.group(0), mobile=False) 54 | 55 | 56 | def replace_mobile(match) -> str: 57 | """ 58 | Args: 59 | match (re.Match) 60 | Returns: 61 | str 62 | """ 63 | return phone2str(match.group(0)) 64 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/zh_normalization/quantifier.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | 16 | from .num import num2str 17 | 18 | # 温度表达式,温度会影响负号的读法 19 | # -3°C 零下三度 20 | RE_TEMPERATURE = re.compile(r'(-?)(\d+(\.\d+)?)(°C|℃|度|摄氏度)') 21 | 22 | 23 | def replace_temperature(match) -> str: 24 | """ 25 | Args: 26 | match (re.Match) 27 | Returns: 28 | str 29 | """ 30 | sign = match.group(1) 31 | temperature = match.group(2) 32 | unit = match.group(3) 33 | sign: str = "零下" if sign else "" 34 | temperature: str = num2str(temperature) 35 | unit: str = "摄氏度" if unit == "摄氏度" else "度" 36 | result = f"{sign}{temperature}{unit}" 37 | return result 38 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/frontend/zh_normalization/text_normlization.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | from typing import List 16 | 17 | from .char_convert import tranditional_to_simplified 18 | from .chronology import RE_DATE 19 | from .chronology import RE_DATE2 20 | from .chronology import RE_TIME 21 | from .chronology import RE_TIME_RANGE 22 | from .chronology import replace_date 23 | from .chronology import replace_date2 24 | from .chronology import replace_time 25 | from .constants import F2H_ASCII_LETTERS 26 | from .constants import F2H_DIGITS 27 | from .constants import F2H_SPACE 28 | from .num import RE_DECIMAL_NUM 29 | from .num import RE_DEFAULT_NUM 30 | from .num import RE_FRAC 31 | from .num import RE_INTEGER 32 | from .num import RE_NUMBER 33 | from .num import RE_PERCENTAGE 34 | from .num import RE_POSITIVE_QUANTIFIERS 35 | from .num import RE_RANGE 36 | from .num import replace_default_num 37 | from .num import replace_frac 38 | from .num import replace_negative_num 39 | from .num import replace_number 40 | from .num import replace_percentage 41 | from .num import replace_positive_quantifier 42 | from .num import replace_range 43 | from .phonecode import RE_MOBILE_PHONE 44 | from .phonecode import RE_NATIONAL_UNIFORM_NUMBER 45 | from .phonecode import RE_TELEPHONE 46 | from .phonecode import replace_mobile 47 | from .phonecode import replace_phone 48 | from .quantifier import RE_TEMPERATURE 49 | from .quantifier import replace_temperature 50 | 51 | 52 | class TextNormalizer(): 53 | def __init__(self): 54 | self.SENTENCE_SPLITOR = re.compile(r'([:、,;。?!,;?!][”’]?)') 55 | 56 | def _split(self, text: str, lang="zh") -> List[str]: 57 | """Split long text into sentences with sentence-splitting punctuations. 58 | Args: 59 | text (str): The input text. 60 | Returns: 61 | List[str]: Sentences. 62 | """ 63 | # Only for pure Chinese here 64 | if lang == "zh": 65 | text = text.replace(" ", "") 66 | # 过滤掉特殊字符 67 | text = re.sub(r'[《》【】<=>{}()()#&@“”^_|…\\]', '', text) 68 | text = self.SENTENCE_SPLITOR.sub(r'\1\n', text) 69 | text = text.strip() 70 | sentences = [sentence.strip() for sentence in re.split(r'\n+', text)] 71 | return sentences 72 | 73 | def _post_replace(self, sentence: str) -> str: 74 | sentence = sentence.replace('/', '每') 75 | sentence = sentence.replace('~', '至') 76 | 77 | return sentence 78 | 79 | def normalize_sentence(self, sentence: str) -> str: 80 | # basic character conversions 81 | sentence = tranditional_to_simplified(sentence) 82 | sentence = sentence.translate(F2H_ASCII_LETTERS).translate( 83 | F2H_DIGITS).translate(F2H_SPACE) 84 | 85 | # number related NSW verbalization 86 | sentence = RE_DATE.sub(replace_date, sentence) 87 | sentence = RE_DATE2.sub(replace_date2, sentence) 88 | 89 | # range first 90 | sentence = RE_TIME_RANGE.sub(replace_time, sentence) 91 | sentence = RE_TIME.sub(replace_time, sentence) 92 | 93 | sentence = RE_TEMPERATURE.sub(replace_temperature, sentence) 94 | sentence = RE_FRAC.sub(replace_frac, sentence) 95 | sentence = RE_PERCENTAGE.sub(replace_percentage, sentence) 96 | sentence = RE_MOBILE_PHONE.sub(replace_mobile, sentence) 97 | 98 | sentence = RE_TELEPHONE.sub(replace_phone, sentence) 99 | sentence = RE_NATIONAL_UNIFORM_NUMBER.sub(replace_phone, sentence) 100 | 101 | sentence = RE_RANGE.sub(replace_range, sentence) 102 | sentence = RE_INTEGER.sub(replace_negative_num, sentence) 103 | sentence = RE_DECIMAL_NUM.sub(replace_number, sentence) 104 | sentence = RE_POSITIVE_QUANTIFIERS.sub(replace_positive_quantifier, 105 | sentence) 106 | sentence = RE_DEFAULT_NUM.sub(replace_default_num, sentence) 107 | sentence = RE_NUMBER.sub(replace_number, sentence) 108 | sentence = self._post_replace(sentence) 109 | 110 | return sentence 111 | 112 | def normalize(self, text: str) -> List[str]: 113 | sentences = self._split(text) 114 | 115 | sentences = [self.normalize_sentence(sent) for sent in sentences] 116 | return sentences 117 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/main.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | inference_dir="$PWD/resources" 4 | 5 | am="fastspeech2_ljspeech" 6 | am_onnx="fastspeech2_ljspeech/fastspeech2_ljspeech.onnx" 7 | 8 | voc="pwgan_ljspeech" 9 | voc_onnx="pwgan_ljspeech.onnx" 10 | 11 | output_dir="result" 12 | text="$PWD/sentences_en.txt" 13 | phones_dict="$PWD/resources/fastspeech2_ljspeech/phone_id_map.txt" 14 | 15 | python tts3.py \ 16 | --inference_dir=${inference_dir} \ 17 | --am=${am} \ 18 | --am_onnx=${am_onnx} \ 19 | --voc=${voc} \ 20 | --voc_onnx=${voc_onnx} \ 21 | --output_dir=${output_dir} \ 22 | --text=${text} \ 23 | --phones_dict=${phones_dict} \ 24 | --device=cpu \ 25 | --cpu_threads=2 26 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/requirements.txt: -------------------------------------------------------------------------------- 1 | g2p_en==2.1.0 2 | inflect==5.3.0 3 | jieba==0.42.1 4 | numpy>=1.19.3 5 | onnxruntime>=1.10.0 6 | pypinyin==0.44.0 7 | pypinyin_dict==0.2.0 8 | SoundFile==0.10.3.post1 9 | timer==0.2.2 10 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/sentences_en.txt: -------------------------------------------------------------------------------- 1 | 001 Life was like a box of chocolates, you never know what you're gonna get. 2 | 002 With great power there must come great responsibility. 3 | 003 To be or not to be, that’s a question. 4 | 004 A man can be destroyed but not defeated 5 | 005 Do not, for one repulse, give up the purpose that you resolved to effort. 6 | 006 Death is just a part of life, something we're all destined to do. 7 | 007 I think it's hard winning a war with words. 8 | 008 Don’t argue with the people of strong determination, because they may change the fact! 9 | 009 Love you three thousand times. -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/syn_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from frontend import English 15 | from frontend.zh_frontend import Frontend 16 | 17 | 18 | # input 19 | def get_sentences(args): 20 | # construct dataset for evaluation 21 | sentences = [] 22 | with open(args.text, 'rt', encoding='utf-8') as f: 23 | for line in f: 24 | items = line.strip().split() 25 | utt_id = items[0] 26 | if 'lang' in args and args.lang == 'zh': 27 | sentence = "".join(items[1:]) 28 | elif 'lang' in args and args.lang == 'en': 29 | sentence = " ".join(items[1:]) 30 | sentences.append((utt_id, sentence)) 31 | return sentences 32 | 33 | 34 | # frontend 35 | def get_frontend(args): 36 | if 'lang' in args and args.lang == 'zh': 37 | frontend = Frontend(phone_vocab_path=args.phones_dict, 38 | tone_vocab_path=args.tones_dict) 39 | elif 'lang' in args and args.lang == 'en': 40 | frontend = English(phone_vocab_path=args.phones_dict) 41 | else: 42 | print("wrong lang!") 43 | print("frontend done!") 44 | return frontend 45 | -------------------------------------------------------------------------------- /python/PaddleSpeech/ljspeech_tts3/tts3.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import argparse 15 | from pathlib import Path 16 | 17 | import numpy as np 18 | import onnxruntime as ort 19 | import soundfile as sf 20 | from timer import timer 21 | 22 | from syn_utils import get_frontend, get_sentences 23 | 24 | root_dir = str(Path.cwd()) 25 | 26 | 27 | def str2bool(str): 28 | return True if str.lower() == 'true' else False 29 | 30 | 31 | def get_sess(args, filed='am'): 32 | full_name = '' 33 | if filed == 'am': 34 | full_name = args.am_onnx 35 | elif filed == 'voc': 36 | full_name = args.voc_onnx 37 | 38 | model_dir = str(Path(args.inference_dir) / full_name) 39 | 40 | sess_options = ort.SessionOptions() 41 | sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL 42 | sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL 43 | 44 | if args.device == "gpu": 45 | # fastspeech2/mb_melgan can't use trt now! 46 | if args.use_trt: 47 | providers = ['TensorrtExecutionProvider'] 48 | else: 49 | providers = ['CUDAExecutionProvider'] 50 | elif args.device == "cpu": 51 | providers = ['CPUExecutionProvider'] 52 | 53 | sess_options.intra_op_num_threads = args.cpu_threads 54 | sess = ort.InferenceSession(model_dir, 55 | providers=providers, 56 | sess_options=sess_options) 57 | return sess 58 | 59 | 60 | def ort_predict(args): 61 | 62 | # frontend 63 | frontend = get_frontend(args) 64 | 65 | output_dir = Path(args.output_dir) 66 | output_dir.mkdir(parents=True, exist_ok=True) 67 | sentences = get_sentences(args) 68 | 69 | am_name = args.am[:args.am.rindex('_')] 70 | am_dataset = args.am[args.am.rindex('_') + 1:] 71 | fs = 24000 if am_dataset != 'ljspeech' else 22050 72 | 73 | # am 74 | am_sess = get_sess(args, filed='am') 75 | 76 | # vocoder 77 | voc_sess = get_sess(args, filed='voc') 78 | 79 | # frontend warmup 80 | # Loading model cost 0.5+ seconds 81 | if args.lang == 'zh': 82 | frontend.get_input_ids("你好,欢迎使用飞桨框架进行深度学习研究!", 83 | merge_sentences=True) 84 | elif args.lang == 'en': 85 | frontend.get_input_ids("Love you three thousand times.", 86 | merge_sentences=False) 87 | else: 88 | print("lang should in be 'zh' or 'en' here!") 89 | 90 | N = 0 91 | T = 0 92 | merge_sentences = True 93 | for utt_id, sentence in sentences: 94 | with timer() as t: 95 | if args.lang == 'zh' or args.lang == 'en': 96 | input_ids = frontend.get_input_ids( 97 | sentence, merge_sentences=merge_sentences) 98 | 99 | phone_ids = input_ids["phone_ids"] 100 | else: 101 | print("lang should in be 'zh' here!") 102 | 103 | # merge_sentences=True here, so we only use the first item of phone_ids 104 | phone_ids = phone_ids[0] 105 | mel = am_sess.run(output_names=None, input_feed={ 106 | 'text': phone_ids}) 107 | mel = mel[0] 108 | wav = voc_sess.run(output_names=None, input_feed={'logmel': mel}) 109 | 110 | N += len(wav[0]) 111 | T += t.elapse 112 | speed = len(wav[0]) / t.elapse 113 | rtf = fs / speed 114 | sf.write( 115 | str(output_dir / (utt_id + ".wav")), 116 | np.array(wav)[0], 117 | samplerate=fs) 118 | print( 119 | f"{utt_id}, mel: {mel.shape}, wave: {len(wav[0])}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}." 120 | ) 121 | print(f"generation speed: {N / T}Hz, RTF: {fs / (N / T) }") 122 | 123 | 124 | def parse_args(): 125 | parser = argparse.ArgumentParser(description="Infernce with onnxruntime.") 126 | 127 | # acoustic model 128 | parser.add_argument('--am', type=str, 129 | default='fastspeech2_ljspeech', 130 | help='Choose acoustic model type of tts task.') 131 | parser.add_argument('--am_onnx', type=str, 132 | default='fastspeech2_ljspeech/fastspeech2_ljspeech.onnx') 133 | 134 | parser.add_argument("--phones_dict", type=str, 135 | default='resources/fastspeech2_ljspeech/phone_id_map.txt', 136 | help="phone vocabulary file.") 137 | 138 | # voc 139 | parser.add_argument('--voc', type=str, 140 | default='pwgan_ljspeech', 141 | help='Choose vocoder type of tts task.') 142 | 143 | parser.add_argument('--voc_onnx', type=str, 144 | default='pwgan_ljspeech.onnx') 145 | 146 | # other 147 | parser.add_argument("--inference_dir", type=str, 148 | default=f"{root_dir}/resources", 149 | help="dir to save inference models") 150 | 151 | parser.add_argument("--text", type=str, 152 | default='sentences_en.txt') 153 | 154 | parser.add_argument("--output_dir", type=str, 155 | default='infer_result') 156 | 157 | parser.add_argument('--lang', type=str, 158 | default='en', 159 | help='Choose model language. zh or en') 160 | 161 | # inference 162 | parser.add_argument("--use_trt", 163 | type=str2bool, 164 | default=False, 165 | help="Whether to use inference engin TensorRT.", ) 166 | 167 | parser.add_argument("--device", default="cpu", 168 | choices=["gpu", "cpu"]) 169 | 170 | parser.add_argument('--cpu_threads', type=int, default=1) 171 | 172 | args, _ = parser.parse_known_args() 173 | Path(args.output_dir).mkdir(parents=True, exist_ok=True) 174 | return args 175 | 176 | 177 | if __name__ == "__main__": 178 | args = parse_args() 179 | ort_predict(args) --------------------------------------------------------------------------------