├── .github
    └── workflows
    │   └── SyncToGitee.yml
├── .gitignore
├── LICENSE
├── README.md
├── cpp
    └── .gitkeep
└── python
    └── PaddleSpeech
        ├── README.md
        ├── convert_model.md
        ├── csmsc_tts2
            ├── README.md
            ├── acoustic
            │   ├── __init__.py
            │   └── speedyspeech_csmsc.py
            ├── frontend
            │   ├── __init__.py
            │   ├── arpabet.py
            │   ├── generate_lexicon.py
            │   ├── normalizer
            │   │   ├── __init__.py
            │   │   ├── abbrrviation.py
            │   │   ├── acronyms.py
            │   │   ├── normalizer.py
            │   │   ├── numbers.py
            │   │   └── width.py
            │   ├── phonectic.py
            │   ├── punctuation.py
            │   ├── tone_sandhi.py
            │   ├── vocab.py
            │   ├── zh_frontend.py
            │   └── zh_normalization
            │   │   ├── README.md
            │   │   ├── __init__.py
            │   │   ├── char_convert.py
            │   │   ├── chronology.py
            │   │   ├── constants.py
            │   │   ├── num.py
            │   │   ├── phonecode.py
            │   │   ├── quantifier.py
            │   │   └── text_normlization.py
            ├── infer_result
            │   ├── 001.wav
            │   ├── 002.wav
            │   ├── 003.wav
            │   ├── 004.wav
            │   ├── 005.wav
            │   ├── 006.wav
            │   ├── 007.wav
            │   ├── 008.wav
            │   ├── 009.wav
            │   ├── 010.wav
            │   ├── 011.wav
            │   ├── 012.wav
            │   ├── 013.wav
            │   ├── 014.wav
            │   ├── 015.wav
            │   ├── 016.wav
            │   └── 017.wav
            ├── requirements.txt
            ├── sentences.txt
            ├── tts2.py
            ├── utils.py
            └── vocoder
            │   ├── __init__.py
            │   └── pwgan_csmsc.py
        ├── csmsc_tts3
            ├── README.md
            ├── assets
            │   ├── 000001.wav
            │   └── audio_icon.png
            ├── csmsc_test.txt
            ├── frontend
            │   ├── __init__.py
            │   ├── arpabet.py
            │   ├── generate_lexicon.py
            │   ├── normalizer
            │   │   ├── __init__.py
            │   │   ├── abbrrviation.py
            │   │   ├── acronyms.py
            │   │   ├── normalizer.py
            │   │   ├── numbers.py
            │   │   └── width.py
            │   ├── phonectic.py
            │   ├── punctuation.py
            │   ├── tone_sandhi.py
            │   ├── vocab.py
            │   ├── zh_frontend.py
            │   └── zh_normalization
            │   │   ├── README.md
            │   │   ├── __init__.py
            │   │   ├── char_convert.py
            │   │   ├── chronology.py
            │   │   ├── constants.py
            │   │   ├── num.py
            │   │   ├── phonecode.py
            │   │   ├── quantifier.py
            │   │   └── text_normlization.py
            ├── main.sh
            ├── requirements.txt
            ├── syn_utils.py
            └── tts3.py
        └── ljspeech_tts3
            ├── README.md
            ├── assets
                ├── 009.wav
                └── audio_icon.png
            ├── frontend
                ├── __init__.py
                ├── arpabet.py
                ├── generate_lexicon.py
                ├── normalizer
                │   ├── __init__.py
                │   ├── abbrrviation.py
                │   ├── acronyms.py
                │   ├── normalizer.py
                │   ├── numbers.py
                │   └── width.py
                ├── phonectic.py
                ├── punctuation.py
                ├── tone_sandhi.py
                ├── vocab.py
                ├── zh_frontend.py
                └── zh_normalization
                │   ├── README.md
                │   ├── __init__.py
                │   ├── char_convert.py
                │   ├── chronology.py
                │   ├── constants.py
                │   ├── num.py
                │   ├── phonecode.py
                │   ├── quantifier.py
                │   └── text_normlization.py
            ├── main.sh
            ├── requirements.txt
            ├── sentences_en.txt
            ├── syn_utils.py
            └── tts3.py


/.github/workflows/SyncToGitee.yml:
--------------------------------------------------------------------------------
 1 | name: syncToGitee
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 | jobs:
 7 |   repo-sync:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - name: Checkout source codes
11 |         uses: actions/checkout@v2
12 |   
13 |       - name: Mirror the Github organization repos to Gitee.
14 |         uses: Yikun/hub-mirror-action@master
15 |         with:
16 |           src: 'github/RapidAI'
17 |           dst: 'gitee/RapidAI'
18 |           dst_key: ${{ secrets.GITEE_PRIVATE_KEY }}
19 |           dst_token:  ${{ secrets.GITEE_TOKEN }}
20 |           force_update: true
21 |           debug: true
22 |   
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | *.pyc
3 | 
4 | csmsc_tts2/resources/
5 | csmsc_tts3/resources/
6 | ljspeech_tts3/resources/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## RapidTTS(文本转语音)
 2 | - 本仓库是将开源的文本转语音项目中模型转换为ONNX格式，并对代码做了整理而来。
 3 | - 本着易用的原则整理，方便快速落地使用。
 4 | - 努力做到推理引擎只用onnxruntime等轻量推理引擎，不依赖torch或者Paddle。
 5 | 
 6 | #### 📖文档导航
 7 | - [PaddleSpeech](./python/PaddleSpeech/README.md)
 8 | 
 9 | #### TODO
10 | - 参考[link](https://github.com/RapidAI/RapidTTS/labels/enhancement)
11 | 


--------------------------------------------------------------------------------
/cpp/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/cpp/.gitkeep


--------------------------------------------------------------------------------
/python/PaddleSpeech/README.md:
--------------------------------------------------------------------------------
 1 | ## RapidTTS(文本转语音)
 2 | 
 3 | |目录名称|推理引擎|支持语言|
 4 | |:---:|:---:|:---:|
 5 | |[csmsc_tts2](./csmsc_tts2)|Paddle+ONNXRuntime|中文和数字|
 6 | |[csmsc_tts3](./csmsc_tts3)|ONNXRuntime|中文和数字|
 7 | |[ljspeech_tts3](./ljspeech_tts3)|ONNXRuntime|英文|
 8 | 
 9 | ### 更新日志
10 | 
11 | #### 🎈2022-04-16 update
12 | - 添加`ljspeech_tts3`，英文文本转语音模块
13 | 
14 | #### 2022-04-09 update
15 | - 添加`csmsc_tts2`中模型转换说明文档([模型转换](./convert_model.md))
16 | 
17 | #### 2022-04-08 update
18 | - 尝试采用OpenVINO推理引擎，但是目前模型尚未转换成功，具体尝试过程参见:[Paddle模型尝试转换](https://github.com/RapidAI/RapidTTS2/wiki/Paddle%E6%A8%A1%E5%9E%8B%E5%B0%9D%E8%AF%95%E8%BD%AC%E6%8D%A2)
19 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/convert_model.md:
--------------------------------------------------------------------------------
 1 | #### 转换pwgan_csmsc到onnx
 2 | - `Paddle2ONXX`: latest
 3 | - 模型下载: [Pretrained model](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc1#pretrained-models) | [link](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_static_0.4.zip)
 4 | - 转换脚本:
 5 |     ```bash
 6 |     paddle2onnx --model_dir pwg_baker_static_0.4 \
 7 |                 --model_filename pwgan_csmsc.pdmodel \
 8 |                 --params_filename pwgan_csmsc.pdiparams \
 9 |                 --save_file pwgan_csmsc.onnx \
10 |                 --opset_version 11
11 |     ```
12 | 
13 | #### 转换pwgan_ljspeech和fastspeech_ljspeech到onnx
14 | - pwgan_ljspeech官方只提供了[动态图模型](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc1#pretrained-model)。不过运行代码中提供了动态图转静态图模型代码，只需要搭建PaddleSpeech运行环境，跑一遍示例demo，即可得到对应的静态模型
15 | - 详情参见[AI Studio](https://aistudio.baidu.com/aistudio/projectdetail/3359986?shared=1)


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/README.md:
--------------------------------------------------------------------------------
 1 | #### csmsc_tts2
 2 | - **支持合成语言**: 中文和数字,不支持英文
 3 | - 基于[PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech)下的[TTS2](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/text_to_speech/README_cn.md)整理而来
 4 | - 共分为三步，`frontend`、`acoustic`、`vocoder`
 5 |   - `acoustic`模型推理目前基于`PaddlePaddle`
 6 |   - `vocoder`模型推理基于`ONNXRuntime`
 7 | - 其中PaddleSpeech中提供的预训练模型可以参见[link](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/text_to_speech/README_cn.md#4-%E9%A2%84%E8%AE%AD%E7%BB%83%E6%A8%A1%E5%9E%8B)。在csmsc_tts2中使用的是:
 8 | 
 9 |     |主要部分|具体模型|支持语言|
10 |     |:---|:---|:---|
11 |     |声学模型|[speedyspeech_csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts2)|zh|
12 |     |声码器|[pwgan_csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc1#pretrained-models)|zh|
13 | 
14 | #### [模型转换](./convert_model.md)
15 | 
16 | #### 运行步骤
17 | 1. 下载`resources`, [Google Drive](https://drive.google.com/file/d/1q3NCydNhFeU2cpLUgevidCHeSzclK0a7/view?usp=sharing) | [百度网盘,提取码:kmcf](https://pan.baidu.com/s/1MGbaS6e_pFqrfIc5OVjWjg), 解压到RapidTTS2目录下
18 | 
19 | 2. 安装`requirements.txt`
20 |    ```bash
21 |    pip install -r requirements.txt -i https://pypi.douban.com/simple/
22 |    ```
23 | 
24 | 3. 运行`tts2.py`
25 |    ```bash
26 |    python tts2.py
27 |    ```
28 | 4. 运行日志如下:
29 |    ```text
30 |     初始化前处理部分
31 |     frontend done!
32 |     初始化提取特征模型
33 |     am_predictor done!
34 |     初始化合成wav模型
35 |     合成指定句子
36 |     Building prefix dict from the default dictionary ...
37 |     Loading model from cache /tmp/jieba.cache
38 |     Loading model cost 1.431 seconds.
39 |     Prefix dict has been built successfully.
40 |     infer_result/001.wav done!      cost: 7.226019859313965s
41 |     infer_result/002.wav done!      cost: 9.149477005004883s
42 |     infer_result/003.wav done!      cost: 3.4020116329193115s
43 |     infer_result/004.wav done!      cost: 14.5472412109375s
44 |     infer_result/005.wav done!      cost: 14.142913818359375s
45 |     infer_result/006.wav done!      cost: 10.191686630249023s
46 |     infer_result/007.wav done!      cost: 15.726643800735474s
47 |     infer_result/008.wav done!      cost: 15.421608209609985s
48 |     infer_result/009.wav done!      cost: 8.083441972732544s
49 |     infer_result/010.wav done!      cost: 10.538750886917114s
50 |     infer_result/011.wav done!      cost: 7.974739074707031s
51 |     infer_result/012.wav done!      cost: 7.274432897567749s
52 |     infer_result/013.wav done!      cost: 8.204563856124878s
53 |     infer_result/014.wav done!      cost: 8.994312286376953s
54 |     infer_result/015.wav done!      cost: 5.084768056869507s
55 |     infer_result/016.wav done!      cost: 5.3102569580078125s
56 |    ```
57 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/acoustic/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | from .speedyspeech_csmsc import SpeedySpeechAcoustic
3 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/acoustic/speedyspeech_csmsc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from paddle import inference
15 | 
16 | 
17 | class SpeedySpeechAcoustic(object):
18 |     def __init__(self, pdmodel_path, pdiparams_path):
19 |         am_config = inference.Config(pdmodel_path, pdiparams_path)
20 |         am_config.disable_glog_info()
21 |         self.am_predictor = inference.create_predictor(am_config)
22 | 
23 |         self.am_input_names = self.am_predictor.get_input_names()
24 |         self.am_output_names = self.am_predictor.get_output_names()
25 | 
26 |     def __call__(self, input_ids):
27 |         phone_ids = input_ids["phone_ids"]
28 |         phones = phone_ids[0].numpy()
29 |         phones_handle = self.am_predictor.get_input_handle(self.am_input_names[0])
30 |         phones_handle.reshape(phones.shape)
31 |         phones_handle.copy_from_cpu(phones)
32 | 
33 |         tone_ids = input_ids["tone_ids"]
34 |         tones = tone_ids[0].numpy()
35 |         tones_handle = self.am_predictor.get_input_handle(self.am_input_names[1])
36 |         tones_handle.reshape(tones.shape)
37 |         tones_handle.copy_from_cpu(tones)
38 | 
39 |         self.am_predictor.run()
40 |         am_output_handle = self.am_predictor.get_output_handle(self.am_output_names[0])
41 |         am_output_data = am_output_handle.copy_to_cpu()
42 |         return am_output_data
43 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from .generate_lexicon import *
15 | from .normalizer import *
16 | from .phonectic import *
17 | from .punctuation import *
18 | from .tone_sandhi import *
19 | from .vocab import *
20 | from .zh_normalization import *
21 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/arpabet.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | from .phonectic import Phonetics
 15 | """
 16 | A phonology system with ARPABET symbols and limited punctuations. The G2P
 17 | conversion is done by g2p_en.
 18 | 
 19 | Note that g2p_en does not handle words with hypen well. So make sure the input
 20 | sentence is first normalized.
 21 | """
 22 | from .vocab import Vocab
 23 | from g2p_en import G2p
 24 | 
 25 | 
 26 | class ARPABET(Phonetics):
 27 |     """A phonology for English that uses ARPABET as the phoneme vocabulary.
 28 |     See http://www.speech.cs.cmu.edu/cgi-bin/cmudict for more details.
 29 |     Phoneme Example Translation
 30 |         ------- ------- -----------
 31 |         AA	odd     AA D
 32 |         AE	at	AE T
 33 |         AH	hut	HH AH T
 34 |         AO	ought	AO T
 35 |         AW	cow	K AW
 36 |         AY	hide	HH AY D
 37 |         B 	be	B IY
 38 |         CH	cheese	CH IY Z
 39 |         D 	dee	D IY
 40 |         DH	thee	DH IY
 41 |         EH	Ed	EH D
 42 |         ER	hurt	HH ER T
 43 |         EY	ate	EY T
 44 |         F 	fee	F IY
 45 |         G 	green	G R IY N
 46 |         HH	he	HH IY
 47 |         IH	it	IH T
 48 |         IY	eat	IY T
 49 |         JH	gee	JH IY
 50 |         K 	key	K IY
 51 |         L 	lee	L IY
 52 |         M 	me	M IY
 53 |         N 	knee	N IY
 54 |         NG	ping	P IH NG
 55 |         OW	oat	OW T
 56 |         OY	toy	T OY
 57 |         P 	pee	P IY
 58 |         R 	read	R IY D
 59 |         S 	sea	S IY
 60 |         SH	she	SH IY
 61 |         T 	tea	T IY
 62 |         TH	theta	TH EY T AH
 63 |         UH	hood	HH UH D
 64 |         UW	two	T UW
 65 |         V 	vee	V IY
 66 |         W 	we	W IY
 67 |         Y 	yield	Y IY L D
 68 |         Z 	zee	Z IY
 69 |         ZH	seizure	S IY ZH ER
 70 |     """
 71 |     phonemes = [
 72 |         'AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'B', 'CH', 'D', 'DH', 'EH', 'ER',
 73 |         'EY', 'F', 'G', 'HH', 'IH', 'IY', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW',
 74 |         'OY', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UW', 'UH', 'V', 'W', 'Y', 'Z',
 75 |         'ZH'
 76 |     ]
 77 |     punctuations = [',', '.', '?', '!']
 78 |     symbols = phonemes + punctuations
 79 |     _stress_to_no_stress_ = {
 80 |         'AA0': 'AA',
 81 |         'AA1': 'AA',
 82 |         'AA2': 'AA',
 83 |         'AE0': 'AE',
 84 |         'AE1': 'AE',
 85 |         'AE2': 'AE',
 86 |         'AH0': 'AH',
 87 |         'AH1': 'AH',
 88 |         'AH2': 'AH',
 89 |         'AO0': 'AO',
 90 |         'AO1': 'AO',
 91 |         'AO2': 'AO',
 92 |         'AW0': 'AW',
 93 |         'AW1': 'AW',
 94 |         'AW2': 'AW',
 95 |         'AY0': 'AY',
 96 |         'AY1': 'AY',
 97 |         'AY2': 'AY',
 98 |         'EH0': 'EH',
 99 |         'EH1': 'EH',
100 |         'EH2': 'EH',
101 |         'ER0': 'ER',
102 |         'ER1': 'ER',
103 |         'ER2': 'ER',
104 |         'EY0': 'EY',
105 |         'EY1': 'EY',
106 |         'EY2': 'EY',
107 |         'IH0': 'IH',
108 |         'IH1': 'IH',
109 |         'IH2': 'IH',
110 |         'IY0': 'IY',
111 |         'IY1': 'IY',
112 |         'IY2': 'IY',
113 |         'OW0': 'OW',
114 |         'OW1': 'OW',
115 |         'OW2': 'OW',
116 |         'OY0': 'OY',
117 |         'OY1': 'OY',
118 |         'OY2': 'OY',
119 |         'UH0': 'UH',
120 |         'UH1': 'UH',
121 |         'UH2': 'UH',
122 |         'UW0': 'UW',
123 |         'UW1': 'UW',
124 |         'UW2': 'UW'
125 |     }
126 | 
127 |     def __init__(self):
128 |         self.backend = G2p()
129 |         self.vocab = Vocab(self.phonemes + self.punctuations)
130 | 
131 |     def _remove_vowels(self, phone):
132 |         return self._stress_to_no_stress_.get(phone, phone)
133 | 
134 |     def phoneticize(self, sentence, add_start_end=False):
135 |         """ Normalize the input text sequence and convert it into pronunciation sequence.
136 | 
137 |         Parameters
138 |         -----------
139 |         sentence: str
140 |             The input text sequence.
141 | 
142 |         Returns
143 |         ----------
144 |         List[str]
145 |             The list of pronunciation sequence.
146 |         """
147 |         phonemes = [
148 |             self._remove_vowels(item) for item in self.backend(sentence)
149 |         ]
150 |         if add_start_end:
151 |             start = self.vocab.start_symbol
152 |             end = self.vocab.end_symbol
153 |             phonemes = [start] + phonemes + [end]
154 |         phonemes = [item for item in phonemes if item in self.vocab.stoi]
155 |         return phonemes
156 | 
157 |     def numericalize(self, phonemes):
158 |         """ Convert pronunciation sequence into pronunciation id sequence.
159 | 
160 |         Parameters
161 |         -----------
162 |         phonemes: List[str]
163 |             The list of pronunciation sequence.
164 | 
165 |         Returns
166 |         ----------
167 |         List[int]
168 |             The list of pronunciation id sequence.
169 |         """
170 |         ids = [self.vocab.lookup(item) for item in phonemes]
171 |         return ids
172 | 
173 |     def reverse(self, ids):
174 |         """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence.
175 | 
176 |         Parameters
177 |         -----------
178 |         ids: List[int]
179 |             The list of pronunciation id sequence.
180 | 
181 |         Returns
182 |         ----------
183 |         List[str]
184 |             The list of pronunciation sequence.
185 |         """
186 |         return [self.vocab.reverse(i) for i in ids]
187 | 
188 |     def __call__(self, sentence, add_start_end=False):
189 |         """ Convert the input text sequence into pronunciation id sequence.
190 | 
191 |         Parameters
192 |         -----------
193 |         sentence: str
194 |             The input text sequence.
195 | 
196 |         Returns
197 |         ----------
198 |         List[str]
199 |             The list of pronunciation id sequence.
200 |         """
201 |         return self.numericalize(
202 |             self.phoneticize(sentence, add_start_end=add_start_end))
203 | 
204 |     @property
205 |     def vocab_size(self):
206 |         """ Vocab size.
207 |         """
208 |         # 47 = 39 phones + 4 punctuations + 4 special tokens
209 |         return len(self.vocab)
210 | 
211 | 
212 | class ARPABETWithStress(Phonetics):
213 |     phonemes = [
214 |         'AA0', 'AA1', 'AA2', 'AE0', 'AE1', 'AE2', 'AH0', 'AH1', 'AH2', 'AO0',
215 |         'AO1', 'AO2', 'AW0', 'AW1', 'AW2', 'AY0', 'AY1', 'AY2', 'B', 'CH', 'D',
216 |         'DH', 'EH0', 'EH1', 'EH2', 'ER0', 'ER1', 'ER2', 'EY0', 'EY1', 'EY2',
217 |         'F', 'G', 'HH', 'IH0', 'IH1', 'IH2', 'IY0', 'IY1', 'IY2', 'JH', 'K',
218 |         'L', 'M', 'N', 'NG', 'OW0', 'OW1', 'OW2', 'OY0', 'OY1', 'OY2', 'P', 'R',
219 |         'S', 'SH', 'T', 'TH', 'UH0', 'UH1', 'UH2', 'UW0', 'UW1', 'UW2', 'V',
220 |         'W', 'Y', 'Z', 'ZH'
221 |     ]
222 |     punctuations = [',', '.', '?', '!']
223 |     symbols = phonemes + punctuations
224 | 
225 |     def __init__(self):
226 |         self.backend = G2p()
227 |         self.vocab = Vocab(self.phonemes + self.punctuations)
228 | 
229 |     def phoneticize(self, sentence, add_start_end=False):
230 |         """ Normalize the input text sequence and convert it into pronunciation sequence.
231 | 
232 |         Parameters
233 |         -----------
234 |         sentence: str
235 |             The input text sequence.
236 | 
237 |         Returns
238 |         ----------
239 |         List[str]
240 |             The list of pronunciation sequence.
241 |         """
242 |         phonemes = self.backend(sentence)
243 |         if add_start_end:
244 |             start = self.vocab.start_symbol
245 |             end = self.vocab.end_symbol
246 |             phonemes = [start] + phonemes + [end]
247 |         phonemes = [item for item in phonemes if item in self.vocab.stoi]
248 |         return phonemes
249 | 
250 |     def numericalize(self, phonemes):
251 |         """ Convert pronunciation sequence into pronunciation id sequence.
252 | 
253 |         Parameters
254 |         -----------
255 |         phonemes: List[str]
256 |             The list of pronunciation sequence.
257 | 
258 |         Returns
259 |         ----------
260 |         List[int]
261 |             The list of pronunciation id sequence.
262 |         """
263 |         ids = [self.vocab.lookup(item) for item in phonemes]
264 |         return ids
265 | 
266 |     def reverse(self, ids):
267 |         """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence.
268 | 
269 |         Parameters
270 |         -----------
271 |         ids: List[int]
272 |             The list of pronunciation id sequence.
273 | 
274 |         Returns
275 |         ----------
276 |         List[str]
277 |             The list of pronunciation sequence.
278 |         """
279 |         return [self.vocab.reverse(i) for i in ids]
280 | 
281 |     def __call__(self, sentence, add_start_end=False):
282 |         """ Convert the input text sequence into pronunciation id sequence.
283 | 
284 |         Parameters
285 |         -----------
286 |         sentence: str
287 |             The input text sequence.
288 | 
289 |         Returns
290 |         ----------
291 |         List[str]
292 |             The list of pronunciation id sequence.
293 |         """
294 |         return self.numericalize(
295 |             self.phoneticize(sentence, add_start_end=add_start_end))
296 | 
297 |     @property
298 |     def vocab_size(self):
299 |         """ Vocab size.
300 |         """
301 |         # 77 = 69 phones + 4 punctuations + 4 special tokens
302 |         return len(self.vocab)
303 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/generate_lexicon.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # Design principles: https://zhuanlan.zhihu.com/p/349600439
 15 | """Generate lexicon and symbols for Mandarin Chinese phonology.
 16 | The lexicon is used for Montreal Force Aligner.
 17 | Note that syllables are used as word in this lexicon. Since syllables rather 
 18 | than words are used in transcriptions produced by `reorganize_baker.py`.
 19 | We make this choice to better leverage other software for chinese text to 
 20 | pinyin tools like pypinyin. This is the convention for G2P in Chinese.
 21 | """
 22 | import re
 23 | from collections import OrderedDict
 24 | 
 25 | INITIALS = [
 26 |     'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h', 'zh', 'ch', 'sh',
 27 |     'r', 'z', 'c', 's', 'j', 'q', 'x'
 28 | ]
 29 | 
 30 | FINALS = [
 31 |     'a', 'ai', 'ao', 'an', 'ang', 'e', 'er', 'ei', 'en', 'eng', 'o', 'ou',
 32 |     'ong', 'ii', 'iii', 'i', 'ia', 'iao', 'ian', 'iang', 'ie', 'io', 'iou',
 33 |     'iong', 'in', 'ing', 'u', 'ua', 'uai', 'uan', 'uang', 'uei', 'uo', 'uen',
 34 |     'ueng', 'v', 've', 'van', 'vn'
 35 | ]
 36 | 
 37 | SPECIALS = ['sil', 'sp']
 38 | 
 39 | 
 40 | def rule(C, V, R, T):
 41 |     """Generate a syllable given the initial, the final, erhua indicator, and tone.
 42 |     Orthographical rules for pinyin are applied. (special case for y, w, ui, un, iu)
 43 | 
 44 |     Note that in this system, 'ü' is alway written as 'v' when appeared in phoneme, but converted to 
 45 |     'u' in syllables when certain conditions are satisfied.
 46 | 
 47 |     'i' is distinguished when appeared in phonemes, and separated into 3 categories, 'i', 'ii' and 'iii'.
 48 |     Erhua is is possibly applied to every finals, except for finals that already ends with 'r'.
 49 |     When a syllable is impossible or does not have any characters with this pronunciation, return None
 50 |     to filter it out.
 51 |     """
 52 | 
 53 |     # 不可拼的音节, ii 只能和 z, c, s 拼
 54 |     if V in ["ii"] and (C not in ['z', 'c', 's']):
 55 |         return None
 56 |     # iii 只能和 zh, ch, sh, r 拼
 57 |     if V in ['iii'] and (C not in ['zh', 'ch', 'sh', 'r']):
 58 |         return None
 59 | 
 60 |     # 齐齿呼或者撮口呼不能和 f, g, k, h, zh, ch, sh, r, z, c, s
 61 |     if (V not in ['ii', 'iii']) and V[0] in ['i', 'v'] and (
 62 |             C in ['f', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's']):
 63 |         return None
 64 | 
 65 |     # 撮口呼只能和 j, q, x l, n 拼
 66 |     if V.startswith("v"):
 67 |         # v, ve 只能和 j ,q , x, n, l 拼
 68 |         if V in ['v', 've']:
 69 |             if C not in ['j', 'q', 'x', 'n', 'l', '']:
 70 |                 return None
 71 |         # 其他只能和 j, q, x 拼
 72 |         else:
 73 |             if C not in ['j', 'q', 'x', '']:
 74 |                 return None
 75 | 
 76 |     # j, q, x 只能和齐齿呼或者撮口呼拼
 77 |     if (C in ['j', 'q', 'x']) and not (
 78 |         (V not in ['ii', 'iii']) and V[0] in ['i', 'v']):
 79 |         return None
 80 | 
 81 |     # b, p ,m, f 不能和合口呼拼，除了 u 之外
 82 |     # bm p, m, f 不能和撮口呼拼
 83 |     if (C in ['b', 'p', 'm', 'f']) and ((V[0] in ['u', 'v'] and V != "u") or
 84 |                                         V == 'ong'):
 85 |         return None
 86 | 
 87 |     # ua, uai, uang 不能和 d, t, n, l, r, z, c, s 拼
 88 |     if V in ['ua', 'uai',
 89 |              'uang'] and C in ['d', 't', 'n', 'l', 'r', 'z', 'c', 's']:
 90 |         return None
 91 | 
 92 |     # sh 和 ong 不能拼
 93 |     if V == 'ong' and C in ['sh']:
 94 |         return None
 95 | 
 96 |     # o 和 gkh, zh ch sh r z c s 不能拼
 97 |     if V == "o" and C in [
 98 |             'd', 't', 'n', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's'
 99 |     ]:
100 |         return None
101 | 
102 |     # ueng 只是 weng 这个 ad-hoc 其他情况下都是 ong
103 |     if V == 'ueng' and C != '':
104 |         return
105 | 
106 |     # 非儿化的 er 只能单独存在
107 |     if V == 'er' and C != '':
108 |         return None
109 | 
110 |     if C == '':
111 |         if V in ["i", "in", "ing"]:
112 |             C = 'y'
113 |         elif V == 'u':
114 |             C = 'w'
115 |         elif V.startswith('i') and V not in ["ii", "iii"]:
116 |             C = 'y'
117 |             V = V[1:]
118 |         elif V.startswith('u'):
119 |             C = 'w'
120 |             V = V[1:]
121 |         elif V.startswith('v'):
122 |             C = 'yu'
123 |             V = V[1:]
124 |     else:
125 |         if C in ['j', 'q', 'x']:
126 |             if V.startswith('v'):
127 |                 V = re.sub('v', 'u', V)
128 |         if V == 'iou':
129 |             V = 'iu'
130 |         elif V == 'uei':
131 |             V = 'ui'
132 |         elif V == 'uen':
133 |             V = 'un'
134 |     result = C + V
135 | 
136 |     # Filter  er 不能再儿化
137 |     if result.endswith('r') and R == 'r':
138 |         return None
139 | 
140 |     # ii and iii, change back to i
141 |     result = re.sub(r'i+', 'i', result)
142 | 
143 |     result = result + R + T
144 |     return result
145 | 
146 | 
147 | def generate_lexicon(with_tone=False, with_erhua=False):
148 |     """Generate lexicon for Mandarin Chinese."""
149 |     syllables = OrderedDict()
150 | 
151 |     for C in [''] + INITIALS:
152 |         for V in FINALS:
153 |             for R in [''] if not with_erhua else ['', 'r']:
154 |                 for T in [''] if not with_tone else ['1', '2', '3', '4', '5']:
155 |                     result = rule(C, V, R, T)
156 |                     if result:
157 |                         syllables[result] = f'{C} {V}{R}{T}'
158 |     return syllables
159 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/normalizer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/normalizer/abbrrviation.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/normalizer/acronyms.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/normalizer/normalizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import re
15 | import unicodedata
16 | from builtins import str as unicode
17 | 
18 | from .numbers import normalize_numbers
19 | 
20 | 
21 | def normalize(sentence):
22 |     """ Normalize English text.
23 |     """
24 |     # preprocessing
25 |     sentence = unicode(sentence)
26 |     sentence = normalize_numbers(sentence)
27 |     sentence = ''.join(
28 |         char for char in unicodedata.normalize('NFD', sentence)
29 |         if unicodedata.category(char) != 'Mn')  # Strip accents
30 |     sentence = sentence.lower()
31 |     sentence = re.sub(r"[^ a-z'.,?!\-]", "", sentence)
32 |     sentence = sentence.replace("i.e.", "that is")
33 |     sentence = sentence.replace("e.g.", "for example")
34 |     return sentence
35 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/normalizer/numbers.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # number expansion is not that easy
15 | import re
16 | 
17 | import inflect
18 | 
19 | _inflect = inflect.engine()
20 | _comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
21 | _decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
22 | _pounds_re = re.compile(r'£([0-9\,]*[0-9]+)')
23 | _dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)')
24 | _ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)')
25 | _number_re = re.compile(r'[0-9]+')
26 | 
27 | 
28 | def _remove_commas(m):
29 |     return m.group(1).replace(',', '')
30 | 
31 | 
32 | def _expand_decimal_point(m):
33 |     return m.group(1).replace('.', ' point ')
34 | 
35 | 
36 | def _expand_dollars(m):
37 |     match = m.group(1)
38 |     parts = match.split('.')
39 |     if len(parts) > 2:
40 |         return match + ' dollars'  # Unexpected format
41 |     dollars = int(parts[0]) if parts[0] else 0
42 |     cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
43 |     if dollars and cents:
44 |         dollar_unit = 'dollar' if dollars == 1 else 'dollars'
45 |         cent_unit = 'cent' if cents == 1 else 'cents'
46 |         return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit)
47 |     elif dollars:
48 |         dollar_unit = 'dollar' if dollars == 1 else 'dollars'
49 |         return '%s %s' % (dollars, dollar_unit)
50 |     elif cents:
51 |         cent_unit = 'cent' if cents == 1 else 'cents'
52 |         return '%s %s' % (cents, cent_unit)
53 |     else:
54 |         return 'zero dollars'
55 | 
56 | 
57 | def _expand_ordinal(m):
58 |     return _inflect.number_to_words(m.group(0))
59 | 
60 | 
61 | def _expand_number(m):
62 |     num = int(m.group(0))
63 |     if num > 1000 and num < 3000:
64 |         if num == 2000:
65 |             return 'two thousand'
66 |         elif num > 2000 and num < 2010:
67 |             return 'two thousand ' + _inflect.number_to_words(num % 100)
68 |         elif num % 100 == 0:
69 |             return _inflect.number_to_words(num // 100) + ' hundred'
70 |         else:
71 |             return _inflect.number_to_words(
72 |                 num, andword='', zero='oh', group=2).replace(', ', ' ')
73 |     else:
74 |         return _inflect.number_to_words(num, andword='')
75 | 
76 | 
77 | def normalize_numbers(text):
78 |     """ Normalize numbers in English text.
79 |     """
80 |     text = re.sub(_comma_number_re, _remove_commas, text)
81 |     text = re.sub(_pounds_re, r'\1 pounds', text)
82 |     text = re.sub(_dollars_re, _expand_dollars, text)
83 |     text = re.sub(_decimal_number_re, _expand_decimal_point, text)
84 |     text = re.sub(_ordinal_re, _expand_ordinal, text)
85 |     text = re.sub(_number_re, _expand_number, text)
86 |     return text
87 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/normalizer/width.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def full2half_width(ustr):
17 |     half = []
18 |     for u in ustr:
19 |         num = ord(u)
20 |         if num == 0x3000:  # 全角空格变半角
21 |             num = 32
22 |         elif 0xFF01 <= num <= 0xFF5E:
23 |             num -= 0xfee0
24 |         u = chr(num)
25 |         half.append(u)
26 |     return ''.join(half)
27 | 
28 | 
29 | def half2full_width(ustr):
30 |     full = []
31 |     for u in ustr:
32 |         num = ord(u)
33 |         if num == 32:  # 半角空格变全角
34 |             num = 0x3000
35 |         elif 0x21 <= num <= 0x7E:
36 |             num += 0xfee0
37 |         u = chr(num)  # to unicode
38 |         full.append(u)
39 | 
40 |     return ''.join(full)
41 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/phonectic.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | from abc import ABC
 15 | from abc import abstractmethod
 16 | 
 17 | import paddle
 18 | from g2p_en import G2p
 19 | from g2pM import G2pM
 20 | 
 21 | from .normalizer.normalizer import normalize
 22 | from .punctuation import get_punctuations
 23 | from .vocab import Vocab
 24 | 
 25 | # discard opencc untill we find an easy solution to install it on windows
 26 | # from opencc import OpenCC
 27 | 
 28 | __all__ = ["Phonetics", "English", "EnglishCharacter", "Chinese"]
 29 | 
 30 | 
 31 | class Phonetics(ABC):
 32 |     @abstractmethod
 33 |     def __call__(self, sentence):
 34 |         pass
 35 | 
 36 |     @abstractmethod
 37 |     def phoneticize(self, sentence):
 38 |         pass
 39 | 
 40 |     @abstractmethod
 41 |     def numericalize(self, phonemes):
 42 |         pass
 43 | 
 44 | 
 45 | class English(Phonetics):
 46 |     """ Normalize the input text sequence and convert into pronunciation id sequence.
 47 |     """
 48 | 
 49 |     def __init__(self, phone_vocab_path=None):
 50 |         self.backend = G2p()
 51 |         self.phonemes = list(self.backend.phonemes)
 52 |         self.punctuations = get_punctuations("en")
 53 |         self.vocab = Vocab(self.phonemes + self.punctuations)
 54 |         self.vocab_phones = {}
 55 |         self.punc = "：，；。？！“”‘’':,;.?!"
 56 |         if phone_vocab_path:
 57 |             with open(phone_vocab_path, 'rt') as f:
 58 |                 phn_id = [line.strip().split() for line in f.readlines()]
 59 |             for phn, id in phn_id:
 60 |                 self.vocab_phones[phn] = int(id)
 61 | 
 62 |     def phoneticize(self, sentence):
 63 |         """ Normalize the input text sequence and convert it into pronunciation sequence.
 64 |         Parameters
 65 |         -----------
 66 |         sentence: str
 67 |             The input text sequence.
 68 |         Returns
 69 |         ----------
 70 |         List[str]
 71 |             The list of pronunciation sequence.
 72 |         """
 73 |         start = self.vocab.start_symbol
 74 |         end = self.vocab.end_symbol
 75 |         phonemes = ([] if start is None else [start]) \
 76 |                    + self.backend(sentence) \
 77 |                    + ([] if end is None else [end])
 78 |         phonemes = [item for item in phonemes if item in self.vocab.stoi]
 79 |         return phonemes
 80 | 
 81 |     def get_input_ids(self, sentence: str) -> paddle.Tensor:
 82 |         result = {}
 83 |         phones = self.phoneticize(sentence)
 84 |         # remove start_symbol and end_symbol
 85 |         phones = phones[1:-1]
 86 |         phones = [phn for phn in phones if not phn.isspace()]
 87 |         phones = [
 88 |             phn if (phn in self.vocab_phones and phn not in self.punc) else "sp"
 89 |             for phn in phones
 90 |         ]
 91 |         phone_ids = [self.vocab_phones[phn] for phn in phones]
 92 |         phone_ids = paddle.to_tensor(phone_ids)
 93 |         result["phone_ids"] = phone_ids
 94 |         return result
 95 | 
 96 |     def numericalize(self, phonemes):
 97 |         """ Convert pronunciation sequence into pronunciation id sequence.
 98 |         Parameters
 99 |         -----------
100 |         phonemes: List[str]
101 |             The list of pronunciation sequence.
102 |         Returns
103 |         ----------
104 |         List[int]
105 |             The list of pronunciation id sequence.
106 |         """
107 |         ids = [
108 |             self.vocab.lookup(item) for item in phonemes
109 |             if item in self.vocab.stoi
110 |         ]
111 |         return ids
112 | 
113 |     def reverse(self, ids):
114 |         """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence.
115 |         Parameters
116 |         -----------
117 |         ids: List[int]
118 |             The list of pronunciation id sequence.
119 |         Returns
120 |         ----------
121 |         List[str]
122 |             The list of pronunciation sequence.
123 |         """
124 |         return [self.vocab.reverse(i) for i in ids]
125 | 
126 |     def __call__(self, sentence):
127 |         """ Convert the input text sequence into pronunciation id sequence.
128 |         Parameters
129 |         -----------
130 |         sentence: str
131 |             The input text sequence.
132 |         Returns
133 |         ----------
134 |         List[str]
135 |             The list of pronunciation id sequence.
136 |         """
137 |         return self.numericalize(self.phoneticize(sentence))
138 | 
139 |     @property
140 |     def vocab_size(self):
141 |         """ Vocab size.
142 |         """
143 |         return len(self.vocab)
144 | 
145 | 
146 | class EnglishCharacter(Phonetics):
147 |     """ Normalize the input text sequence and convert it into character id sequence.
148 |     """
149 | 
150 |     def __init__(self):
151 |         self.backend = G2p()
152 |         self.graphemes = list(self.backend.graphemes)
153 |         self.punctuations = get_punctuations("en")
154 |         self.vocab = Vocab(self.graphemes + self.punctuations)
155 | 
156 |     def phoneticize(self, sentence):
157 |         """ Normalize the input text sequence.
158 |         Parameters
159 |         -----------
160 |         sentence: str
161 |             The input text sequence.
162 |         Returns
163 |         ----------
164 |         str
165 |             A text sequence after normalize.
166 |         """
167 |         words = normalize(sentence)
168 |         return words
169 | 
170 |     def numericalize(self, sentence):
171 |         """ Convert a text sequence into ids.
172 |         Parameters
173 |         -----------
174 |         sentence: str
175 |             The input text sequence.
176 |         Returns
177 |         ----------
178 |         List[int]
179 |             List of a character id sequence.
180 |         """
181 |         ids = [
182 |             self.vocab.lookup(item) for item in sentence
183 |             if item in self.vocab.stoi
184 |         ]
185 |         return ids
186 | 
187 |     def reverse(self, ids):
188 |         """ Convert a character id sequence into text.
189 |         Parameters
190 |         -----------
191 |         ids: List[int]
192 |             List of a character id sequence.
193 |         Returns
194 |         ----------
195 |         str
196 |             The input text sequence.
197 |         """
198 |         return [self.vocab.reverse(i) for i in ids]
199 | 
200 |     def __call__(self, sentence):
201 |         """ Normalize the input text sequence and convert it into character id sequence.
202 |         Parameters
203 |         -----------
204 |         sentence: str
205 |             The input text sequence.
206 |         Returns
207 |         ----------
208 |         List[int]
209 |             List of a character id sequence.
210 |         """
211 |         return self.numericalize(self.phoneticize(sentence))
212 | 
213 |     @property
214 |     def vocab_size(self):
215 |         """ Vocab size.
216 |         """
217 |         return len(self.vocab)
218 | 
219 | 
220 | class Chinese(Phonetics):
221 |     """Normalize Chinese text sequence and convert it into ids.
222 |     """
223 | 
224 |     def __init__(self):
225 |         # self.opencc_backend = OpenCC('t2s.json')
226 |         self.backend = G2pM()
227 |         self.phonemes = self._get_all_syllables()
228 |         self.punctuations = get_punctuations("cn")
229 |         self.vocab = Vocab(self.phonemes + self.punctuations)
230 | 
231 |     def _get_all_syllables(self):
232 |         all_syllables = set([
233 |             syllable for k, v in self.backend.cedict.items() for syllable in v
234 |         ])
235 |         return list(all_syllables)
236 | 
237 |     def phoneticize(self, sentence):
238 |         """ Normalize the input text sequence and convert it into pronunciation sequence.
239 |         Parameters
240 |         -----------
241 |         sentence: str
242 |             The input text sequence.
243 |         Returns
244 |         ----------
245 |         List[str]
246 |             The list of pronunciation sequence.
247 |         """
248 |         # simplified = self.opencc_backend.convert(sentence)
249 |         simplified = sentence
250 |         phonemes = self.backend(simplified)
251 |         start = self.vocab.start_symbol
252 |         end = self.vocab.end_symbol
253 |         phonemes = ([] if start is None else [start]) \
254 |                    + phonemes \
255 |                    + ([] if end is None else [end])
256 |         return self._filter_symbols(phonemes)
257 | 
258 |     def _filter_symbols(self, phonemes):
259 |         cleaned_phonemes = []
260 |         for item in phonemes:
261 |             if item in self.vocab.stoi:
262 |                 cleaned_phonemes.append(item)
263 |             else:
264 |                 for char in item:
265 |                     if char in self.vocab.stoi:
266 |                         cleaned_phonemes.append(char)
267 |         return cleaned_phonemes
268 | 
269 |     def numericalize(self, phonemes):
270 |         """ Convert pronunciation sequence into pronunciation id sequence.
271 |         Parameters
272 |         -----------
273 |         phonemes: List[str]
274 |             The list of pronunciation sequence.
275 |         Returns
276 |         ----------
277 |         List[int]
278 |             The list of pronunciation id sequence.
279 |         """
280 |         ids = [self.vocab.lookup(item) for item in phonemes]
281 |         return ids
282 | 
283 |     def __call__(self, sentence):
284 |         """ Convert the input text sequence into pronunciation id sequence.
285 |         Parameters
286 |         -----------
287 |         sentence: str
288 |             The input text sequence.
289 |         Returns
290 |         ----------
291 |         List[str]
292 |             The list of pronunciation id sequence.
293 |         """
294 |         return self.numericalize(self.phoneticize(sentence))
295 | 
296 |     @property
297 |     def vocab_size(self):
298 |         """ Vocab size.
299 |         """
300 |         return len(self.vocab)
301 | 
302 |     def reverse(self, ids):
303 |         """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence.
304 |         Parameters
305 |         -----------
306 |         ids: List[int]
307 |             The list of pronunciation id sequence.
308 |         Returns
309 |         ----------
310 |         List[str]
311 |             The list of pronunciation sequence.
312 |         """
313 |         return [self.vocab.reverse(i) for i in ids]
314 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/punctuation.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | __all__ = ["get_punctuations"]
16 | 
17 | EN_PUNCT = [
18 |     " ",
19 |     "-",
20 |     "...",
21 |     ",",
22 |     ".",
23 |     "?",
24 |     "!",
25 | ]
26 | 
27 | CN_PUNCT = ["、", "，", "；", "：", "。", "？", "！"]
28 | 
29 | 
30 | def get_punctuations(lang):
31 |     if lang == "en":
32 |         return EN_PUNCT
33 |     elif lang == "cn":
34 |         return CN_PUNCT
35 |     else:
36 |         raise ValueError(f"language {lang} Not supported")
37 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/vocab.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | from collections import OrderedDict
 15 | from typing import Iterable
 16 | 
 17 | __all__ = ["Vocab"]
 18 | 
 19 | 
 20 | class Vocab(object):
 21 |     """  Vocabulary.
 22 | 
 23 |     Parameters
 24 |     -----------
 25 |     symbols: Iterable[str]
 26 |         Common symbols.
 27 | 
 28 |     padding_symbol: str, optional
 29 |         Symbol for pad. Defaults to "<pad>".
 30 | 
 31 |     unk_symbol: str, optional
 32 |         Symbol for unknow. Defaults to "<unk>"
 33 | 
 34 |     start_symbol: str, optional
 35 |         Symbol for start. Defaults to "<s>"
 36 | 
 37 |     end_symbol: str, optional
 38 |         Symbol for end. Defaults to "</s>"
 39 |     """
 40 | 
 41 |     def __init__(self,
 42 |                  symbols: Iterable[str],
 43 |                  padding_symbol="<pad>",
 44 |                  unk_symbol="<unk>",
 45 |                  start_symbol="<s>",
 46 |                  end_symbol="</s>"):
 47 |         self.special_symbols = OrderedDict()
 48 |         for i, item in enumerate(
 49 |             [padding_symbol, unk_symbol, start_symbol, end_symbol]):
 50 |             if item:
 51 |                 self.special_symbols[item] = len(self.special_symbols)
 52 | 
 53 |         self.padding_symbol = padding_symbol
 54 |         self.unk_symbol = unk_symbol
 55 |         self.start_symbol = start_symbol
 56 |         self.end_symbol = end_symbol
 57 | 
 58 |         self.stoi = OrderedDict()
 59 |         self.stoi.update(self.special_symbols)
 60 | 
 61 |         for i, s in enumerate(symbols):
 62 |             if s not in self.stoi:
 63 |                 self.stoi[s] = len(self.stoi)
 64 |         self.itos = {v: k for k, v in self.stoi.items()}
 65 | 
 66 |     def __len__(self):
 67 |         return len(self.stoi)
 68 | 
 69 |     @property
 70 |     def num_specials(self):
 71 |         """ The number of special symbols.
 72 |         """
 73 |         return len(self.special_symbols)
 74 | 
 75 |     # special tokens
 76 |     @property
 77 |     def padding_index(self):
 78 |         """ The index of padding symbol
 79 |         """
 80 |         return self.stoi.get(self.padding_symbol, -1)
 81 | 
 82 |     @property
 83 |     def unk_index(self):
 84 |         """The index of unknow symbol.
 85 |         """
 86 |         return self.stoi.get(self.unk_symbol, -1)
 87 | 
 88 |     @property
 89 |     def start_index(self):
 90 |         """The index of start symbol.
 91 |         """
 92 |         return self.stoi.get(self.start_symbol, -1)
 93 | 
 94 |     @property
 95 |     def end_index(self):
 96 |         """ The index of end symbol.
 97 |         """
 98 |         return self.stoi.get(self.end_symbol, -1)
 99 | 
100 |     def __repr__(self):
101 |         fmt = "Vocab(size: {},\nstoi:\n{})"
102 |         return fmt.format(len(self), self.stoi)
103 | 
104 |     def __str__(self):
105 |         return self.__repr__()
106 | 
107 |     def lookup(self, symbol):
108 |         """ The index that symbol correspond.
109 |         """
110 |         return self.stoi[symbol]
111 | 
112 |     def reverse(self, index):
113 |         """ The symbol thar index cottespond.
114 |         """
115 |         return self.itos[index]
116 | 
117 |     def add_symbol(self, symbol):
118 |         """ Add a new symbol in vocab.
119 |         """
120 |         if symbol in self.stoi:
121 |             return
122 |         N = len(self.stoi)
123 |         self.stoi[symbol] = N
124 |         self.itos[N] = symbol
125 | 
126 |     def add_symbols(self, symbols):
127 |         """ Add multiple symbols in vocab.
128 |         """
129 |         for symbol in symbols:
130 |             self.add_symbol(symbol)
131 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/zh_normalization/README.md:
--------------------------------------------------------------------------------
 1 | ## Supported NSW (Non-Standard-Word) Normalization
 2 | 
 3 | |NSW type|raw|normalized|
 4 | |:--|:-|:-|
 5 | |serial number|电影中梁朝伟扮演的陈永仁的编号27149|电影中梁朝伟扮演的陈永仁的编号二七一四九|
 6 | |cardinal|这块黄金重达324.75克<br>我们班的最高总分为583分|这块黄金重达三百二十四点七五克<br>我们班的最高总分为五百八十三分|
 7 | |numeric range |12\~23<br>-1.5\~2|十二到二十三<br>负一点五到二|
 8 | |date|她出生于86年8月18日，她弟弟出生于1995年3月1日|她出生于八六年八月十八日， 她弟弟出生于一九九五年三月一日|
 9 | |time|等会请在12:05请通知我|等会请在十二点零五分请通知我
10 | |temperature|今天的最低气温达到-10°C|今天的最低气温达到零下十度
11 | |fraction|现场有7/12的观众投出了赞成票|现场有十二分之七的观众投出了赞成票|
12 | |percentage|明天有62％的概率降雨|明天有百分之六十二的概率降雨|
13 | |money|随便来几个价格12块5，34.5元，20.1万|随便来几个价格十二块五，三十四点五元，二十点一万|
14 | |telephone|这是固话0421-33441122<br>这是手机+86 18544139121|这是固话零四二一三三四四一一二二<br>这是手机八六一八五四四一三九一二一|
15 | ## References
16 | [Pull requests #658 of DeepSpeech](https://github.com/PaddlePaddle/DeepSpeech/pull/658/files)
17 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/zh_normalization/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/zh_normalization/chronology.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import re
 15 | 
 16 | from .num import DIGITS
 17 | from .num import num2str
 18 | from .num import verbalize_cardinal
 19 | from .num import verbalize_digit
 20 | 
 21 | 
 22 | def _time_num2str(num_string: str) -> str:
 23 |     """A special case for verbalizing number in time."""
 24 |     result = num2str(num_string.lstrip('0'))
 25 |     if num_string.startswith('0'):
 26 |         result = DIGITS['0'] + result
 27 |     return result
 28 | 
 29 | 
 30 | # 时刻表达式
 31 | RE_TIME = re.compile(r'([0-1]?[0-9]|2[0-3])'
 32 |                      r':([0-5][0-9])'
 33 |                      r'(:([0-5][0-9]))?')
 34 | 
 35 | # 时间范围，如8:30-12:30
 36 | RE_TIME_RANGE = re.compile(r'([0-1]?[0-9]|2[0-3])'
 37 |                            r':([0-5][0-9])'
 38 |                            r'(:([0-5][0-9]))?'
 39 |                            r'(~|-)'
 40 |                            r'([0-1]?[0-9]|2[0-3])'
 41 |                            r':([0-5][0-9])'
 42 |                            r'(:([0-5][0-9]))?')
 43 | 
 44 | 
 45 | def replace_time(match) -> str:
 46 |     """
 47 |     Parameters
 48 |     ----------
 49 |     match : re.Match
 50 |     Returns
 51 |     ----------
 52 |     str
 53 |     """
 54 | 
 55 |     is_range = len(match.groups()) > 5
 56 | 
 57 |     hour = match.group(1)
 58 |     minute = match.group(2)
 59 |     second = match.group(4)
 60 | 
 61 |     if is_range:
 62 |         hour_2 = match.group(6)
 63 |         minute_2 = match.group(7)
 64 |         second_2 = match.group(9)
 65 | 
 66 |     result = f"{num2str(hour)}点"
 67 |     if minute.lstrip('0'):
 68 |         result += f"{_time_num2str(minute)}分"
 69 |     if second and second.lstrip('0'):
 70 |         result += f"{_time_num2str(second)}秒"
 71 | 
 72 |     if is_range:
 73 |         result += "至"
 74 |         result += f"{num2str(hour_2)}点"
 75 |         if minute_2.lstrip('0'):
 76 |             result += f"{_time_num2str(minute_2)}分"
 77 |         if second_2 and second_2.lstrip('0'):
 78 |             result += f"{_time_num2str(second_2)}秒"
 79 | 
 80 |     return result
 81 | 
 82 | 
 83 | RE_DATE = re.compile(r'(\d{4}|\d{2})年'
 84 |                      r'((0?[1-9]|1[0-2])月)?'
 85 |                      r'(((0?[1-9])|((1|2)[0-9])|30|31)([日号]))?')
 86 | 
 87 | 
 88 | def replace_date(match) -> str:
 89 |     """
 90 |     Parameters
 91 |     ----------
 92 |     match : re.Match
 93 |     Returns
 94 |     ----------
 95 |     str
 96 |     """
 97 |     year = match.group(1)
 98 |     month = match.group(3)
 99 |     day = match.group(5)
100 |     result = ""
101 |     if year:
102 |         result += f"{verbalize_digit(year)}年"
103 |     if month:
104 |         result += f"{verbalize_cardinal(month)}月"
105 |     if day:
106 |         result += f"{verbalize_cardinal(day)}{match.group(9)}"
107 |     return result
108 | 
109 | 
110 | # 用 / 或者 - 分隔的 YY/MM/DD 或者 YY-MM-DD 日期
111 | RE_DATE2 = re.compile(
112 |     r'(\d{4})([- /.])(0[1-9]|1[012])\2(0[1-9]|[12][0-9]|3[01])')
113 | 
114 | 
115 | def replace_date2(match) -> str:
116 |     """
117 |     Parameters
118 |     ----------
119 |     match : re.Match
120 |     Returns
121 |     ----------
122 |     str
123 |     """
124 |     year = match.group(1)
125 |     month = match.group(3)
126 |     day = match.group(4)
127 |     result = ""
128 |     if year:
129 |         result += f"{verbalize_digit(year)}年"
130 |     if month:
131 |         result += f"{verbalize_cardinal(month)}月"
132 |     if day:
133 |         result += f"{verbalize_cardinal(day)}日"
134 |     return result
135 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/zh_normalization/constants.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import re
15 | import string
16 | 
17 | from pypinyin.constants import SUPPORT_UCS4
18 | 
19 | # 全角半角转换
20 | # 英文字符全角 -> 半角映射表 (num: 52)
21 | F2H_ASCII_LETTERS = {
22 |     chr(ord(char) + 65248): char
23 |     for char in string.ascii_letters
24 | }
25 | 
26 | # 英文字符半角 -> 全角映射表
27 | H2F_ASCII_LETTERS = {value: key for key, value in F2H_ASCII_LETTERS.items()}
28 | 
29 | # 数字字符全角 -> 半角映射表 (num: 10)
30 | F2H_DIGITS = {chr(ord(char) + 65248): char for char in string.digits}
31 | # 数字字符半角 -> 全角映射表
32 | H2F_DIGITS = {value: key for key, value in F2H_DIGITS.items()}
33 | 
34 | # 标点符号全角 -> 半角映射表 (num: 32)
35 | F2H_PUNCTUATIONS = {chr(ord(char) + 65248): char for char in string.punctuation}
36 | # 标点符号半角 -> 全角映射表
37 | H2F_PUNCTUATIONS = {value: key for key, value in F2H_PUNCTUATIONS.items()}
38 | 
39 | # 空格 (num: 1)
40 | F2H_SPACE = {'\u3000': ' '}
41 | H2F_SPACE = {' ': '\u3000'}
42 | 
43 | # 非"有拼音的汉字"的字符串，可用于NSW提取
44 | if SUPPORT_UCS4:
45 |     RE_NSW = re.compile(r'(?:[^'
46 |                         r'\u3007'  # 〇
47 |                         r'\u3400-\u4dbf'  # CJK扩展A:[3400-4DBF]
48 |                         r'\u4e00-\u9fff'  # CJK基本:[4E00-9FFF]
49 |                         r'\uf900-\ufaff'  # CJK兼容:[F900-FAFF]
50 |                         r'\U00020000-\U0002A6DF'  # CJK扩展B:[20000-2A6DF]
51 |                         r'\U0002A703-\U0002B73F'  # CJK扩展C:[2A700-2B73F]
52 |                         r'\U0002B740-\U0002B81D'  # CJK扩展D:[2B740-2B81D]
53 |                         r'\U0002F80A-\U0002FA1F'  # CJK兼容扩展:[2F800-2FA1F]
54 |                         r'])+')
55 | else:
56 |     RE_NSW = re.compile(  # pragma: no cover
57 |         r'(?:[^'
58 |         r'\u3007'  # 〇
59 |         r'\u3400-\u4dbf'  # CJK扩展A:[3400-4DBF]
60 |         r'\u4e00-\u9fff'  # CJK基本:[4E00-9FFF]
61 |         r'\uf900-\ufaff'  # CJK兼容:[F900-FAFF]
62 |         r'])+')
63 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/zh_normalization/num.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """
 15 | Rules to verbalize numbers into Chinese characters.
 16 | https://zh.wikipedia.org/wiki/中文数字#現代中文
 17 | """
 18 | import re
 19 | from collections import OrderedDict
 20 | from typing import List
 21 | 
 22 | DIGITS = {str(i): tran for i, tran in enumerate('零一二三四五六七八九')}
 23 | UNITS = OrderedDict({
 24 |     1: '十',
 25 |     2: '百',
 26 |     3: '千',
 27 |     4: '万',
 28 |     8: '亿',
 29 | })
 30 | 
 31 | COM_QUANTIFIERS = '(朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|)块|角|毛|分)'
 32 | 
 33 | # 分数表达式
 34 | RE_FRAC = re.compile(r'(-?)(\d+)/(\d+)')
 35 | 
 36 | 
 37 | def replace_frac(match) -> str:
 38 |     """
 39 |     Parameters
 40 |     ----------
 41 |     match : re.Match
 42 |     Returns
 43 |     ----------
 44 |     str
 45 |     """
 46 |     sign = match.group(1)
 47 |     nominator = match.group(2)
 48 |     denominator = match.group(3)
 49 |     sign: str = "负" if sign else ""
 50 |     nominator: str = num2str(nominator)
 51 |     denominator: str = num2str(denominator)
 52 |     result = f"{sign}{denominator}分之{nominator}"
 53 |     return result
 54 | 
 55 | 
 56 | # 百分数表达式
 57 | RE_PERCENTAGE = re.compile(r'(-?)(\d+(\.\d+)?)%')
 58 | 
 59 | 
 60 | def replace_percentage(match) -> str:
 61 |     """
 62 |     Parameters
 63 |     ----------
 64 |     match : re.Match
 65 |     Returns
 66 |     ----------
 67 |     str
 68 |     """
 69 |     sign = match.group(1)
 70 |     percent = match.group(2)
 71 |     sign: str = "负" if sign else ""
 72 |     percent: str = num2str(percent)
 73 |     result = f"{sign}百分之{percent}"
 74 |     return result
 75 | 
 76 | 
 77 | # 整数表达式
 78 | # 带负号的整数 -10
 79 | RE_INTEGER = re.compile(r'(-)' r'(\d+)')
 80 | 
 81 | 
 82 | def replace_negative_num(match) -> str:
 83 |     """
 84 |     Parameters
 85 |     ----------
 86 |     match : re.Match
 87 |     Returns
 88 |     ----------
 89 |     str
 90 |     """
 91 |     sign = match.group(1)
 92 |     number = match.group(2)
 93 |     sign: str = "负" if sign else ""
 94 |     number: str = num2str(number)
 95 |     result = f"{sign}{number}"
 96 |     return result
 97 | 
 98 | 
 99 | # 编号-无符号整形
100 | # 00078
101 | RE_DEFAULT_NUM = re.compile(r'\d{3}\d*')
102 | 
103 | 
104 | def replace_default_num(match):
105 |     """
106 |     Parameters
107 |     ----------
108 |     match : re.Match
109 |     Returns
110 |     ----------
111 |     str
112 |     """
113 |     number = match.group(0)
114 |     return verbalize_digit(number)
115 | 
116 | 
117 | # 数字表达式
118 | # 纯小数
119 | RE_DECIMAL_NUM = re.compile(r'(-?)((\d+)(\.\d+))' r'|(\.(\d+))')
120 | # 正整数 + 量词
121 | RE_POSITIVE_QUANTIFIERS = re.compile(r"(\d+)([多余几])?" + COM_QUANTIFIERS)
122 | RE_NUMBER = re.compile(r'(-?)((\d+)(\.\d+)?)' r'|(\.(\d+))')
123 | 
124 | 
125 | def replace_positive_quantifier(match) -> str:
126 |     """
127 |     Parameters
128 |     ----------
129 |     match : re.Match
130 |     Returns
131 |     ----------
132 |     str
133 |     """
134 |     number = match.group(1)
135 |     match_2 = match.group(2)
136 |     match_2: str = match_2 if match_2 else ""
137 |     quantifiers: str = match.group(3)
138 |     number: str = num2str(number)
139 |     result = f"{number}{match_2}{quantifiers}"
140 |     return result
141 | 
142 | 
143 | def replace_number(match) -> str:
144 |     """
145 |     Parameters
146 |     ----------
147 |     match : re.Match
148 |     Returns
149 |     ----------
150 |     str
151 |     """
152 |     sign = match.group(1)
153 |     number = match.group(2)
154 |     pure_decimal = match.group(5)
155 |     if pure_decimal:
156 |         result = num2str(pure_decimal)
157 |     else:
158 |         sign: str = "负" if sign else ""
159 |         number: str = num2str(number)
160 |         result = f"{sign}{number}"
161 |     return result
162 | 
163 | 
164 | # 范围表达式
165 | # match.group(1) and match.group(8) are copy from RE_NUMBER
166 | RE_RANGE = re.compile(
167 |     r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))[-~]((-?)((\d+)(\.\d+)?)|(\.(\d+)))')
168 | 
169 | 
170 | def replace_range(match) -> str:
171 |     """
172 |     Parameters
173 |     ----------
174 |     match : re.Match
175 |     Returns
176 |     ----------
177 |     str
178 |     """
179 |     first, second = match.group(1), match.group(8)
180 |     first = RE_NUMBER.sub(replace_number, first)
181 |     second = RE_NUMBER.sub(replace_number, second)
182 |     result = f"{first}到{second}"
183 |     return result
184 | 
185 | 
186 | def _get_value(value_string: str, use_zero: bool=True) -> List[str]:
187 |     stripped = value_string.lstrip('0')
188 |     if len(stripped) == 0:
189 |         return []
190 |     elif len(stripped) == 1:
191 |         if use_zero and len(stripped) < len(value_string):
192 |             return [DIGITS['0'], DIGITS[stripped]]
193 |         else:
194 |             return [DIGITS[stripped]]
195 |     else:
196 |         largest_unit = next(
197 |             power for power in reversed(UNITS.keys()) if power < len(stripped))
198 |         first_part = value_string[:-largest_unit]
199 |         second_part = value_string[-largest_unit:]
200 |         return _get_value(first_part) + [UNITS[largest_unit]] + _get_value(
201 |             second_part)
202 | 
203 | 
204 | def verbalize_cardinal(value_string: str) -> str:
205 |     if not value_string:
206 |         return ''
207 | 
208 |     # 000 -> '零' , 0 -> '零'
209 |     value_string = value_string.lstrip('0')
210 |     if len(value_string) == 0:
211 |         return DIGITS['0']
212 | 
213 |     result_symbols = _get_value(value_string)
214 |     # verbalized number starting with '一十*' is abbreviated as `十*`
215 |     if len(result_symbols) >= 2 and result_symbols[0] == DIGITS[
216 |             '1'] and result_symbols[1] == UNITS[1]:
217 |         result_symbols = result_symbols[1:]
218 |     return ''.join(result_symbols)
219 | 
220 | 
221 | def verbalize_digit(value_string: str, alt_one=False) -> str:
222 |     result_symbols = [DIGITS[digit] for digit in value_string]
223 |     result = ''.join(result_symbols)
224 |     if alt_one:
225 |         result.replace("一", "幺")
226 |     return result
227 | 
228 | 
229 | def num2str(value_string: str) -> str:
230 |     integer_decimal = value_string.split('.')
231 |     if len(integer_decimal) == 1:
232 |         integer = integer_decimal[0]
233 |         decimal = ''
234 |     elif len(integer_decimal) == 2:
235 |         integer, decimal = integer_decimal
236 |     else:
237 |         raise ValueError(
238 |             f"The value string: '${value_string}' has more than one point in it."
239 |         )
240 | 
241 |     result = verbalize_cardinal(integer)
242 | 
243 |     decimal = decimal.rstrip('0')
244 |     if decimal:
245 |         # '.22' is verbalized as '零点二二'
246 |         # '3.20' is verbalized as '三点二
247 |         result = result if result else "零"
248 |         result += '点' + verbalize_digit(decimal)
249 |     return result
250 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/zh_normalization/phonecode.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import re
15 | 
16 | from .num import verbalize_digit
17 | 
18 | # 规范化固话/手机号码
19 | # 手机
20 | # http://www.jihaoba.com/news/show/13680
21 | # 移动：139、138、137、136、135、134、159、158、157、150、151、152、188、187、182、183、184、178、198
22 | # 联通：130、131、132、156、155、186、185、176
23 | # 电信：133、153、189、180、181、177
24 | RE_MOBILE_PHONE = re.compile(
25 |     r"(?<!\d)((\+?86 ?)?1([38]\d|5[0-35-9]|7[678]|9[89])\d{8})(?!\d)")
26 | RE_TELEPHONE = re.compile(
27 |     r"(?<!\d)((0(10|2[1-3]|[3-9]\d{2})-?)?[1-9]\d{7,8})(?!\d)")
28 | 
29 | # 全国统一的号码400开头
30 | RE_NATIONAL_UNIFORM_NUMBER = re.compile(r"(400)(-)?\d{3}(-)?\d{4}")
31 | 
32 | 
33 | def phone2str(phone_string: str, mobile=True) -> str:
34 |     if mobile:
35 |         sp_parts = phone_string.strip('+').split()
36 |         result = '，'.join(
37 |             [verbalize_digit(part, alt_one=True) for part in sp_parts])
38 |         return result
39 |     else:
40 |         sil_parts = phone_string.split('-')
41 |         result = '，'.join(
42 |             [verbalize_digit(part, alt_one=True) for part in sil_parts])
43 |         return result
44 | 
45 | 
46 | def replace_phone(match) -> str:
47 |     """
48 |     Parameters
49 |     ----------
50 |     match : re.Match
51 |     Returns
52 |     ----------
53 |     str
54 |     """
55 |     return phone2str(match.group(0), mobile=False)
56 | 
57 | 
58 | def replace_mobile(match) -> str:
59 |     """
60 |     Parameters
61 |     ----------
62 |     match : re.Match
63 |     Returns
64 |     ----------
65 |     str
66 |     """
67 |     return phone2str(match.group(0))
68 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/zh_normalization/quantifier.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import re
15 | 
16 | from .num import num2str
17 | 
18 | # 温度表达式，温度会影响负号的读法
19 | # -3°C 零下三度
20 | RE_TEMPERATURE = re.compile(r'(-?)(\d+(\.\d+)?)(°C|℃|度|摄氏度)')
21 | 
22 | 
23 | def replace_temperature(match) -> str:
24 |     """
25 |     Parameters
26 |     ----------
27 |     match : re.Match
28 |     Returns
29 |     ----------
30 |     str
31 |     """
32 |     sign = match.group(1)
33 |     temperature = match.group(2)
34 |     unit = match.group(3)
35 |     sign: str = "零下" if sign else ""
36 |     temperature: str = num2str(temperature)
37 |     unit: str = "摄氏度" if unit == "摄氏度" else "度"
38 |     result = f"{sign}{temperature}{unit}"
39 |     return result
40 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/frontend/zh_normalization/text_normlization.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import re
 15 | from typing import List
 16 | 
 17 | from .char_convert import tranditional_to_simplified
 18 | from .chronology import RE_DATE
 19 | from .chronology import RE_DATE2
 20 | from .chronology import RE_TIME
 21 | from .chronology import RE_TIME_RANGE
 22 | from .chronology import replace_date
 23 | from .chronology import replace_date2
 24 | from .chronology import replace_time
 25 | from .constants import F2H_ASCII_LETTERS
 26 | from .constants import F2H_DIGITS
 27 | from .constants import F2H_SPACE
 28 | from .num import RE_DECIMAL_NUM
 29 | from .num import RE_DEFAULT_NUM
 30 | from .num import RE_FRAC
 31 | from .num import RE_INTEGER
 32 | from .num import RE_NUMBER
 33 | from .num import RE_PERCENTAGE
 34 | from .num import RE_POSITIVE_QUANTIFIERS
 35 | from .num import RE_RANGE
 36 | from .num import replace_default_num
 37 | from .num import replace_frac
 38 | from .num import replace_negative_num
 39 | from .num import replace_number
 40 | from .num import replace_percentage
 41 | from .num import replace_positive_quantifier
 42 | from .num import replace_range
 43 | from .phonecode import RE_MOBILE_PHONE
 44 | from .phonecode import RE_NATIONAL_UNIFORM_NUMBER
 45 | from .phonecode import RE_TELEPHONE
 46 | from .phonecode import replace_mobile
 47 | from .phonecode import replace_phone
 48 | from .quantifier import RE_TEMPERATURE
 49 | from .quantifier import replace_temperature
 50 | 
 51 | 
 52 | class TextNormalizer():
 53 |     def __init__(self):
 54 |         self.SENTENCE_SPLITOR = re.compile(r'([：，；。？！,;?!][”’]?)')
 55 | 
 56 |     def _split(self, text: str) -> List[str]:
 57 |         """Split long text into sentences with sentence-splitting punctuations.
 58 |         Parameters
 59 |         ----------
 60 |         text : str
 61 |             The input text.
 62 |         Returns
 63 |         -------
 64 |         List[str]
 65 |             Sentences.
 66 |         """
 67 |         # Only for pure Chinese here
 68 |         text = text.replace(" ", "")
 69 |         text = self.SENTENCE_SPLITOR.sub(r'\1\n', text)
 70 |         text = text.strip()
 71 |         sentences = [sentence.strip() for sentence in re.split(r'\n+', text)]
 72 |         return sentences
 73 | 
 74 |     def normalize_sentence(self, sentence: str) -> str:
 75 |         # basic character conversions
 76 |         sentence = tranditional_to_simplified(sentence)
 77 |         sentence = sentence.translate(F2H_ASCII_LETTERS).translate(
 78 |             F2H_DIGITS).translate(F2H_SPACE)
 79 | 
 80 |         # number related NSW verbalization
 81 |         sentence = RE_DATE.sub(replace_date, sentence)
 82 |         sentence = RE_DATE2.sub(replace_date2, sentence)
 83 | 
 84 |         # range first
 85 |         sentence = RE_TIME_RANGE.sub(replace_time, sentence)
 86 |         sentence = RE_TIME.sub(replace_time, sentence)
 87 | 
 88 |         sentence = RE_TEMPERATURE.sub(replace_temperature, sentence)
 89 |         sentence = RE_FRAC.sub(replace_frac, sentence)
 90 |         sentence = RE_PERCENTAGE.sub(replace_percentage, sentence)
 91 |         sentence = RE_MOBILE_PHONE.sub(replace_mobile, sentence)
 92 | 
 93 |         sentence = RE_TELEPHONE.sub(replace_phone, sentence)
 94 |         sentence = RE_NATIONAL_UNIFORM_NUMBER.sub(replace_phone, sentence)
 95 | 
 96 |         sentence = RE_RANGE.sub(replace_range, sentence)
 97 |         sentence = RE_INTEGER.sub(replace_negative_num, sentence)
 98 |         sentence = RE_DECIMAL_NUM.sub(replace_number, sentence)
 99 |         sentence = RE_POSITIVE_QUANTIFIERS.sub(replace_positive_quantifier,
100 |                                                sentence)
101 |         sentence = RE_DEFAULT_NUM.sub(replace_default_num, sentence)
102 |         sentence = RE_NUMBER.sub(replace_number, sentence)
103 | 
104 |         return sentence
105 | 
106 |     def normalize(self, text: str) -> List[str]:
107 |         sentences = self._split(text)
108 | 
109 |         sentences = [self.normalize_sentence(sent) for sent in sentences]
110 |         return sentences
111 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/infer_result/001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/001.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/infer_result/002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/002.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/infer_result/003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/003.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/infer_result/004.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/004.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/infer_result/005.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/005.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/infer_result/006.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/006.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/infer_result/007.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/007.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/infer_result/008.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/008.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/infer_result/009.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/009.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/infer_result/010.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/010.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/infer_result/011.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/011.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/infer_result/012.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/012.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/infer_result/013.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/013.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/infer_result/014.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/014.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/infer_result/015.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/015.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/infer_result/016.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/016.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/infer_result/017.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts2/infer_result/017.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/requirements.txt:
--------------------------------------------------------------------------------
 1 | g2p_en==2.1.0
 2 | g2pM
 3 | inflect==5.3.0
 4 | jieba==0.42.1
 5 | numpy
 6 | onnxruntime==1.10.0
 7 | paddlepaddle
 8 | pypinyin==0.44.0
 9 | SoundFile==0.10.3.post1
10 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/sentences.txt:
--------------------------------------------------------------------------------
 1 | 001 凯莫瑞安联合体的经济崩溃，迫在眉睫。
 2 | 002 对于所有想要离开那片废土，去寻找更美好生活的人来说。
 3 | 003 克哈，是你们所有人安全的港湾。
 4 | 004 为了保护尤摩扬人民不受异虫的残害，我所做的，比他们自己的领导委员会都多。
 5 | 005 无论他们如何诽谤我，我将继续为所有泰伦人的最大利益，而努力奋斗。
 6 | 006 身为你们的元首，我带领泰伦人实现了人类统治领地和经济的扩张。
 7 | 007 我们将继续成长，用行动回击那些只会说风凉话，不愿意和我们相向而行的害群之马。
 8 | 008 帝国武装力量，无数的优秀儿女，正时刻守卫着我们的家园大门，但是他们孤木难支。
 9 | 009 凡是今天应征入伍者，所获的所有刑罚罪责，减半。
10 | 010 激进分子和异见者希望你们一听见枪声，就背弃多年的和平与繁荣。
11 | 011 他们没有勇气和能力，带领人类穿越一个充满危险的星系。
12 | 012 法治是我们的命脉，然而它却受到前所未有的挑战。
13 | 013 我将恢复我们帝国的荣光，绝不会向任何外星势力低头。
14 | 014 我已经驯服了异虫，荡平了星灵。如今它们的创造者，想要夺走我们拥有的一切。
15 | 015 永远记住，谁才是最能保护你们的人。
16 | 016 不要听信别人的谗言，我不是什么克隆人。


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/tts2.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import time
15 | from pathlib import Path
16 | 
17 | import soundfile as sf
18 | 
19 | from acoustic import SpeedySpeechAcoustic
20 | from frontend.zh_frontend import Frontend
21 | from utils import mkdir, read_txt
22 | from vocoder import PWGANVocoder
23 | 
24 | print('初始化前处理部分')
25 | phones_dict = 'resources/speedyspeech_nosil_baker_ckpt_0.5/phone_id_map.txt'
26 | tones_dict = 'resources/speedyspeech_nosil_baker_ckpt_0.5/tone_id_map.txt'
27 | frontend = Frontend(phone_vocab_path=phones_dict,
28 |                     tone_vocab_path=tones_dict)
29 | print("frontend done!")
30 | 
31 | print('初始化提取特征模型')
32 | speedyspeech_dir = Path('resources/models/speedyspeech_csmsc')
33 | pdmodel_path = str(speedyspeech_dir / 'speedyspeech_csmsc.pdmodel')
34 | pdiparam_path = str(speedyspeech_dir / 'speedyspeech_csmsc.pdiparams')
35 | 
36 | am_predictor = SpeedySpeechAcoustic(pdmodel_path, pdiparam_path)
37 | print('am_predictor done!')
38 | 
39 | print('初始化合成wav模型')
40 | pwgan_model_path = 'resources/models/pwgan_csmsc/pwgan_csmsc.onnx'
41 | voc_predictor = PWGANVocoder(pwgan_model_path)
42 | 
43 | save_wav_dir = 'infer_result'
44 | mkdir(save_wav_dir)
45 | 
46 | print('合成指定句子')
47 | sentences_path = 'sentences.txt'
48 | sentences = read_txt(sentences_path)
49 | 
50 | for sentence_info in sentences:
51 |     start = time.time()
52 | 
53 |     uuid, sentence = sentence_info.split(' ')
54 | 
55 |     input_ids = frontend.get_input_ids(sentence,
56 |                                        merge_sentences=True,
57 |                                        get_tone_ids=True)
58 | 
59 |     am_output_data = am_predictor(input_ids)
60 | 
61 |     wav = voc_predictor(am_output_data)
62 | 
63 |     elapse = time.time() - start
64 | 
65 |     save_wav_path = f'{save_wav_dir}/{uuid}.wav'
66 |     sf.write(save_wav_path, wav, samplerate=24000)
67 | 
68 |     print(f'{save_wav_path} done!\tcost: {elapse}s')
69 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/utils.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | 
 3 | from pathlib import Path
 4 | 
 5 | 
 6 | def mkdir(dir_path):
 7 |     Path(dir_path).mkdir(parents=True, exist_ok=True)
 8 | 
 9 | 
10 | def read_txt(txt_path: str) -> list:
11 |     with open(txt_path, 'r', encoding='utf-8') as f:
12 |         data = list(map(lambda x: x.rstrip('\n'), f))
13 |     return data
14 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/vocoder/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | from .pwgan_csmsc import PWGANVocoder
3 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts2/vocoder/pwgan_csmsc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # -*- encoding: utf-8 -*-
15 | import onnxruntime as ort
16 | 
17 | 
18 | class PWGANVocoder(object):
19 |     def __init__(self, model_path):
20 |         sess_opt = ort.SessionOptions()
21 |         sess_opt.log_severity_level = 4
22 |         sess_opt.enable_cpu_mem_arena = False
23 |         self.sess = ort.InferenceSession(model_path,
24 |                                          sess_options=sess_opt)
25 |         self.input_name = self.sess.get_inputs()[0].name
26 | 
27 |     def __call__(self, am_output_data):
28 |         wav = self.sess.run(None, {self.input_name: am_output_data})[0]
29 |         return wav
30 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/README.md:
--------------------------------------------------------------------------------
 1 | ### csmsc_tts3
 2 | - **支持合成语言**: 中文和数字，不支持英文字母
 3 | - 基于[PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech)下的[TTS3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3)整理而来
 4 | - 整个推理引擎只采用`ONNXRuntime`
 5 | - 其中PaddleSpeech中提供的预训练模型可以参见[link](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/text_to_speech/README_cn.md#4-%E9%A2%84%E8%AE%AD%E7%BB%83%E6%A8%A1%E5%9E%8B)。在csmsc_tts3中使用的是:
 6 | 
 7 |     |主要部分|具体模型|支持语言|
 8 |     |:---|:---|:---|
 9 |     |声学模型|[fastspeech2_csmsc](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/csmsc/tts3/README.md)|zh|
10 |     |声码器|[hifigan_csmsc](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/csmsc/voc5/README.md)|zh|
11 | 
12 | #### 结果示例
13 | <div align = "center">
14 | <table style="width:100%">
15 |   <thead>
16 |     <tr>
17 |       <th width="550">输入文本</th>
18 |       <th>合成音频</th>
19 |     </tr>
20 |   </thead>
21 |   <tbody>
22 |     <tr>
23 |       <td >早上好，今天是2020/10/29，最低温度是-3°C。</td>
24 |       <td align = "center">
25 |       <a href="https://drive.google.com/file/d/1aC2d_NN8RkFw1gWFUe3gOJ-_EShw-bDx/view?usp=sharing" rel="nofollow" target="_blank">
26 |             <img align="center" src="./assets/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
27 |       </td>
28 |     </tr>
29 |   </tbody>
30 | </table>
31 | 
32 | </div>
33 | 
34 | 
35 | #### 运行步骤
36 | 1. 下载`resources`, [Google Drive](https://drive.google.com/file/d/1xYD9NrTraiDFkwtvg7SkKcETLFfa6mlR/view?usp=sharing) | [百度网盘,提取码:a2nw](https://pan.baidu.com/s/1DbqKTNuWZd0Y9UMVgRaRqQ), 解压到`csmsc_tts3`目录下，最终目录结构如下：
37 |    ```text
38 |     csmsc_tts3
39 |     ├── csmsc_test.txt
40 |     ├── requirements.txt
41 |     ├── frontend
42 |     ├── main.sh
43 |     ├── tts3.py
44 |     ├── infer_result
45 |     ├── resources
46 |     │   ├── fastspeech2_csmsc_onnx_0.2.0
47 |     │   │   ├── fastspeech2_csmsc.onnx
48 |     │   │   └── phone_id_map.txt
49 |     │   └── hifigan_csmsc.onnx
50 |     └──syn_utils.py
51 |    ```
52 | 
53 | 2. 安装`requirements.txt`
54 |    ```bash
55 |    pip install -r requirements.txt -i https://pypi.douban.com/simple/
56 |    ```
57 | 
58 | 3. 运行`tts3.py`
59 |    ```bash
60 |    python tts3.py
61 |    ```
62 |    or
63 |    ```bash
64 |    bash main.sh
65 |    ```
66 | 
67 | 4. 运行日志如下:
68 |    ```text
69 |     frontend done!
70 |     warm up done!
71 |     Building prefix dict from the default dictionary ...
72 |     Loading model from cache C:\Users\WANGJI~1\AppData\Local\Temp\jieba.cache
73 |     Loading model cost 0.836 seconds.
74 |     Prefix dict has been built successfully.
75 |     009901, mel: (331, 80), wave: 99300, time: 1.3718173s, Hz: 72385.938204132, RTF: 0.33155610876132857.
76 |     009902, mel: (288, 80), wave: 86400, time: 1.1350326000000024s, Hz: 76121.49025085453, RTF: 0.3152854722222228.
77 |     009903, mel: (341, 80), wave: 102300, time: 1.4687841000000006s, Hz: 69649.7502651354, RTF: 0.3445812785923755.
78 |     generation speed: 72441.68237053939Hz, RTF: 0.33130097499999983
79 |    ```
80 |    生成结果会保存到`infer_result`目录下
81 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/assets/000001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts3/assets/000001.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/assets/audio_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/csmsc_tts3/assets/audio_icon.png


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/csmsc_test.txt:
--------------------------------------------------------------------------------
1 | 000001 早上好，今天是2020/10/29，最低温度是-3°C。
2 | 009901 昨日，这名伤者与医生全部被警方依法刑事拘留。
3 | 009902 钱伟长想到上海来办学校是经过深思熟虑的。
4 | 009903 她见我一进门就骂，吃饭时也骂，骂得我抬不起头。
5 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from .generate_lexicon import *
15 | from .normalizer import *
16 | from .phonectic import *
17 | from .punctuation import *
18 | from .tone_sandhi import *
19 | from .vocab import *
20 | from .zh_normalization import *
21 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/arpabet.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | from paddlespeech.t2s.frontend.phonectic import Phonetics
 15 | """
 16 | A phonology system with ARPABET symbols and limited punctuations. The G2P 
 17 | conversion is done by g2p_en.
 18 | 
 19 | Note that g2p_en does not handle words with hypen well. So make sure the input
 20 | sentence is first normalized.
 21 | """
 22 | from paddlespeech.t2s.frontend.vocab import Vocab
 23 | from g2p_en import G2p
 24 | 
 25 | 
 26 | class ARPABET(Phonetics):
 27 |     """A phonology for English that uses ARPABET as the phoneme vocabulary.
 28 |     See http://www.speech.cs.cmu.edu/cgi-bin/cmudict for more details.
 29 |     Phoneme Example Translation
 30 |         ------- ------- -----------
 31 |         AA	odd     AA D
 32 |         AE	at	AE T
 33 |         AH	hut	HH AH T
 34 |         AO	ought	AO T
 35 |         AW	cow	K AW
 36 |         AY	hide	HH AY D
 37 |         B 	be	B IY
 38 |         CH	cheese	CH IY Z
 39 |         D 	dee	D IY
 40 |         DH	thee	DH IY
 41 |         EH	Ed	EH D
 42 |         ER	hurt	HH ER T
 43 |         EY	ate	EY T
 44 |         F 	fee	F IY
 45 |         G 	green	G R IY N
 46 |         HH	he	HH IY
 47 |         IH	it	IH T
 48 |         IY	eat	IY T
 49 |         JH	gee	JH IY
 50 |         K 	key	K IY
 51 |         L 	lee	L IY
 52 |         M 	me	M IY
 53 |         N 	knee	N IY
 54 |         NG	ping	P IH NG
 55 |         OW	oat	OW T
 56 |         OY	toy	T OY
 57 |         P 	pee	P IY
 58 |         R 	read	R IY D
 59 |         S 	sea	S IY
 60 |         SH	she	SH IY
 61 |         T 	tea	T IY
 62 |         TH	theta	TH EY T AH
 63 |         UH	hood	HH UH D
 64 |         UW	two	T UW
 65 |         V 	vee	V IY
 66 |         W 	we	W IY
 67 |         Y 	yield	Y IY L D
 68 |         Z 	zee	Z IY
 69 |         ZH	seizure	S IY ZH ER
 70 |     """
 71 |     phonemes = [
 72 |         'AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'B', 'CH', 'D', 'DH', 'EH', 'ER',
 73 |         'EY', 'F', 'G', 'HH', 'IH', 'IY', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW',
 74 |         'OY', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UW', 'UH', 'V', 'W', 'Y', 'Z',
 75 |         'ZH'
 76 |     ]
 77 |     punctuations = [',', '.', '?', '!']
 78 |     symbols = phonemes + punctuations
 79 |     _stress_to_no_stress_ = {
 80 |         'AA0': 'AA',
 81 |         'AA1': 'AA',
 82 |         'AA2': 'AA',
 83 |         'AE0': 'AE',
 84 |         'AE1': 'AE',
 85 |         'AE2': 'AE',
 86 |         'AH0': 'AH',
 87 |         'AH1': 'AH',
 88 |         'AH2': 'AH',
 89 |         'AO0': 'AO',
 90 |         'AO1': 'AO',
 91 |         'AO2': 'AO',
 92 |         'AW0': 'AW',
 93 |         'AW1': 'AW',
 94 |         'AW2': 'AW',
 95 |         'AY0': 'AY',
 96 |         'AY1': 'AY',
 97 |         'AY2': 'AY',
 98 |         'EH0': 'EH',
 99 |         'EH1': 'EH',
100 |         'EH2': 'EH',
101 |         'ER0': 'ER',
102 |         'ER1': 'ER',
103 |         'ER2': 'ER',
104 |         'EY0': 'EY',
105 |         'EY1': 'EY',
106 |         'EY2': 'EY',
107 |         'IH0': 'IH',
108 |         'IH1': 'IH',
109 |         'IH2': 'IH',
110 |         'IY0': 'IY',
111 |         'IY1': 'IY',
112 |         'IY2': 'IY',
113 |         'OW0': 'OW',
114 |         'OW1': 'OW',
115 |         'OW2': 'OW',
116 |         'OY0': 'OY',
117 |         'OY1': 'OY',
118 |         'OY2': 'OY',
119 |         'UH0': 'UH',
120 |         'UH1': 'UH',
121 |         'UH2': 'UH',
122 |         'UW0': 'UW',
123 |         'UW1': 'UW',
124 |         'UW2': 'UW'
125 |     }
126 | 
127 |     def __init__(self):
128 |         self.backend = G2p()
129 |         self.vocab = Vocab(self.phonemes + self.punctuations)
130 | 
131 |     def _remove_vowels(self, phone):
132 |         return self._stress_to_no_stress_.get(phone, phone)
133 | 
134 |     def phoneticize(self, sentence, add_start_end=False):
135 |         """ Normalize the input text sequence and convert it into pronunciation sequence.
136 |         Args:
137 |             sentence (str): The input text sequence.
138 |     
139 |         Returns:
140 |             List[str]: The list of pronunciation sequence.
141 |         """
142 |         phonemes = [
143 |             self._remove_vowels(item) for item in self.backend(sentence)
144 |         ]
145 |         if add_start_end:
146 |             start = self.vocab.start_symbol
147 |             end = self.vocab.end_symbol
148 |             phonemes = [start] + phonemes + [end]
149 |         phonemes = [item for item in phonemes if item in self.vocab.stoi]
150 |         return phonemes
151 | 
152 |     def numericalize(self, phonemes):
153 |         """ Convert pronunciation sequence into pronunciation id sequence.
154 | 
155 |         Args:
156 |             phonemes (List[str]): The list of pronunciation sequence.
157 |     
158 |         Returns:
159 |             List[int]: The list of pronunciation id sequence.
160 |         """
161 |         ids = [self.vocab.lookup(item) for item in phonemes]
162 |         return ids
163 | 
164 |     def reverse(self, ids):
165 |         """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence.
166 |         
167 |         Args:
168 |             ids( List[int]): The list of pronunciation id sequence.
169 |     
170 |         Returns: 
171 |             List[str]: 
172 |                 The list of pronunciation sequence.
173 |         """
174 |         return [self.vocab.reverse(i) for i in ids]
175 | 
176 |     def __call__(self, sentence, add_start_end=False):
177 |         """ Convert the input text sequence into pronunciation id sequence.
178 |     
179 |         Args:
180 |             sentence (str): The input text sequence.
181 |     
182 |         Returns:
183 |             List[str]: The list of pronunciation id sequence.
184 |         """
185 |         return self.numericalize(
186 |             self.phoneticize(sentence, add_start_end=add_start_end))
187 | 
188 |     @property
189 |     def vocab_size(self):
190 |         """ Vocab size.
191 |         """
192 |         # 47 = 39 phones + 4 punctuations + 4 special tokens
193 |         return len(self.vocab)
194 | 
195 | 
196 | class ARPABETWithStress(Phonetics):
197 |     phonemes = [
198 |         'AA0', 'AA1', 'AA2', 'AE0', 'AE1', 'AE2', 'AH0', 'AH1', 'AH2', 'AO0',
199 |         'AO1', 'AO2', 'AW0', 'AW1', 'AW2', 'AY0', 'AY1', 'AY2', 'B', 'CH', 'D',
200 |         'DH', 'EH0', 'EH1', 'EH2', 'ER0', 'ER1', 'ER2', 'EY0', 'EY1', 'EY2',
201 |         'F', 'G', 'HH', 'IH0', 'IH1', 'IH2', 'IY0', 'IY1', 'IY2', 'JH', 'K',
202 |         'L', 'M', 'N', 'NG', 'OW0', 'OW1', 'OW2', 'OY0', 'OY1', 'OY2', 'P', 'R',
203 |         'S', 'SH', 'T', 'TH', 'UH0', 'UH1', 'UH2', 'UW0', 'UW1', 'UW2', 'V',
204 |         'W', 'Y', 'Z', 'ZH'
205 |     ]
206 |     punctuations = [',', '.', '?', '!']
207 |     symbols = phonemes + punctuations
208 | 
209 |     def __init__(self):
210 |         self.backend = G2p()
211 |         self.vocab = Vocab(self.phonemes + self.punctuations)
212 | 
213 |     def phoneticize(self, sentence, add_start_end=False):
214 |         """ Normalize the input text sequence and convert it into pronunciation sequence.
215 |     
216 |         Args: 
217 |             sentence (str): The input text sequence.
218 |     
219 |         Returns: 
220 |             List[str]: The list of pronunciation sequence.
221 |         """
222 |         phonemes = self.backend(sentence)
223 |         if add_start_end:
224 |             start = self.vocab.start_symbol
225 |             end = self.vocab.end_symbol
226 |             phonemes = [start] + phonemes + [end]
227 |         phonemes = [item for item in phonemes if item in self.vocab.stoi]
228 |         return phonemes
229 | 
230 |     def numericalize(self, phonemes):
231 |         """ Convert pronunciation sequence into pronunciation id sequence.
232 | 
233 |         Args:
234 |             phonemes (List[str]): The list of pronunciation sequence.
235 |     
236 |         Returns:
237 |             List[int]: The list of pronunciation id sequence.
238 |         """
239 |         ids = [self.vocab.lookup(item) for item in phonemes]
240 |         return ids
241 | 
242 |     def reverse(self, ids):
243 |         """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence.
244 |         Args:
245 |             ids (List[int]): The list of pronunciation id sequence.
246 |     
247 |         Returns: 
248 |             List[str]: The list of pronunciation sequence.
249 |         """
250 |         return [self.vocab.reverse(i) for i in ids]
251 | 
252 |     def __call__(self, sentence, add_start_end=False):
253 |         """ Convert the input text sequence into pronunciation id sequence.
254 |         Args:
255 |             sentence (str): The input text sequence.
256 |     
257 |         Returns: 
258 |             List[str]: The list of pronunciation id sequence.
259 |         """
260 |         return self.numericalize(
261 |             self.phoneticize(sentence, add_start_end=add_start_end))
262 | 
263 |     @property
264 |     def vocab_size(self):
265 |         """ Vocab size.
266 |         """
267 |         # 77 = 69 phones + 4 punctuations + 4 special tokens
268 |         return len(self.vocab)
269 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/generate_lexicon.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # Design principles: https://zhuanlan.zhihu.com/p/349600439
 15 | """Generate lexicon and symbols for Mandarin Chinese phonology.
 16 | The lexicon is used for Montreal Force Aligner.
 17 | Note that syllables are used as word in this lexicon. Since syllables rather 
 18 | than words are used in transcriptions produced by `reorganize_baker.py`.
 19 | We make this choice to better leverage other software for chinese text to 
 20 | pinyin tools like pypinyin. This is the convention for G2P in Chinese.
 21 | """
 22 | import re
 23 | from collections import OrderedDict
 24 | 
 25 | INITIALS = [
 26 |     'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h', 'zh', 'ch', 'sh',
 27 |     'r', 'z', 'c', 's', 'j', 'q', 'x'
 28 | ]
 29 | 
 30 | FINALS = [
 31 |     'a', 'ai', 'ao', 'an', 'ang', 'e', 'er', 'ei', 'en', 'eng', 'o', 'ou',
 32 |     'ong', 'ii', 'iii', 'i', 'ia', 'iao', 'ian', 'iang', 'ie', 'io', 'iou',
 33 |     'iong', 'in', 'ing', 'u', 'ua', 'uai', 'uan', 'uang', 'uei', 'uo', 'uen',
 34 |     'ueng', 'v', 've', 'van', 'vn'
 35 | ]
 36 | 
 37 | SPECIALS = ['sil', 'sp']
 38 | 
 39 | 
 40 | def rule(C, V, R, T):
 41 |     """Generate a syllable given the initial, the final, erhua indicator, and tone.
 42 |     Orthographical rules for pinyin are applied. (special case for y, w, ui, un, iu)
 43 | 
 44 |     Note that in this system, 'ü' is alway written as 'v' when appeared in phoneme, but converted to 
 45 |     'u' in syllables when certain conditions are satisfied.
 46 | 
 47 |     'i' is distinguished when appeared in phonemes, and separated into 3 categories, 'i', 'ii' and 'iii'.
 48 |     Erhua is is possibly applied to every finals, except for finals that already ends with 'r'.
 49 |     When a syllable is impossible or does not have any characters with this pronunciation, return None
 50 |     to filter it out.
 51 |     """
 52 | 
 53 |     # 不可拼的音节, ii 只能和 z, c, s 拼
 54 |     if V in ["ii"] and (C not in ['z', 'c', 's']):
 55 |         return None
 56 |     # iii 只能和 zh, ch, sh, r 拼
 57 |     if V in ['iii'] and (C not in ['zh', 'ch', 'sh', 'r']):
 58 |         return None
 59 | 
 60 |     # 齐齿呼或者撮口呼不能和 f, g, k, h, zh, ch, sh, r, z, c, s
 61 |     if (V not in ['ii', 'iii']) and V[0] in ['i', 'v'] and (
 62 |             C in ['f', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's']):
 63 |         return None
 64 | 
 65 |     # 撮口呼只能和 j, q, x l, n 拼
 66 |     if V.startswith("v"):
 67 |         # v, ve 只能和 j ,q , x, n, l 拼
 68 |         if V in ['v', 've']:
 69 |             if C not in ['j', 'q', 'x', 'n', 'l', '']:
 70 |                 return None
 71 |         # 其他只能和 j, q, x 拼
 72 |         else:
 73 |             if C not in ['j', 'q', 'x', '']:
 74 |                 return None
 75 | 
 76 |     # j, q, x 只能和齐齿呼或者撮口呼拼
 77 |     if (C in ['j', 'q', 'x']) and not (
 78 |         (V not in ['ii', 'iii']) and V[0] in ['i', 'v']):
 79 |         return None
 80 | 
 81 |     # b, p ,m, f 不能和合口呼拼，除了 u 之外
 82 |     # bm p, m, f 不能和撮口呼拼
 83 |     if (C in ['b', 'p', 'm', 'f']) and ((V[0] in ['u', 'v'] and V != "u") or
 84 |                                         V == 'ong'):
 85 |         return None
 86 | 
 87 |     # ua, uai, uang 不能和 d, t, n, l, r, z, c, s 拼
 88 |     if V in ['ua', 'uai',
 89 |              'uang'] and C in ['d', 't', 'n', 'l', 'r', 'z', 'c', 's']:
 90 |         return None
 91 | 
 92 |     # sh 和 ong 不能拼
 93 |     if V == 'ong' and C in ['sh']:
 94 |         return None
 95 | 
 96 |     # o 和 gkh, zh ch sh r z c s 不能拼
 97 |     if V == "o" and C in [
 98 |             'd', 't', 'n', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's'
 99 |     ]:
100 |         return None
101 | 
102 |     # ueng 只是 weng 这个 ad-hoc 其他情况下都是 ong
103 |     if V == 'ueng' and C != '':
104 |         return
105 | 
106 |     # 非儿化的 er 只能单独存在
107 |     if V == 'er' and C != '':
108 |         return None
109 | 
110 |     if C == '':
111 |         if V in ["i", "in", "ing"]:
112 |             C = 'y'
113 |         elif V == 'u':
114 |             C = 'w'
115 |         elif V.startswith('i') and V not in ["ii", "iii"]:
116 |             C = 'y'
117 |             V = V[1:]
118 |         elif V.startswith('u'):
119 |             C = 'w'
120 |             V = V[1:]
121 |         elif V.startswith('v'):
122 |             C = 'yu'
123 |             V = V[1:]
124 |     else:
125 |         if C in ['j', 'q', 'x']:
126 |             if V.startswith('v'):
127 |                 V = re.sub('v', 'u', V)
128 |         if V == 'iou':
129 |             V = 'iu'
130 |         elif V == 'uei':
131 |             V = 'ui'
132 |         elif V == 'uen':
133 |             V = 'un'
134 |     result = C + V
135 | 
136 |     # Filter  er 不能再儿化
137 |     if result.endswith('r') and R == 'r':
138 |         return None
139 | 
140 |     # ii and iii, change back to i
141 |     result = re.sub(r'i+', 'i', result)
142 | 
143 |     result = result + R + T
144 |     return result
145 | 
146 | 
147 | def generate_lexicon(with_tone=False, with_erhua=False):
148 |     """Generate lexicon for Mandarin Chinese."""
149 |     syllables = OrderedDict()
150 | 
151 |     for C in [''] + INITIALS:
152 |         for V in FINALS:
153 |             for R in [''] if not with_erhua else ['', 'r']:
154 |                 for T in [''] if not with_tone else ['1', '2', '3', '4', '5']:
155 |                     result = rule(C, V, R, T)
156 |                     if result:
157 |                         syllables[result] = f'{C} {V}{R}{T}'
158 |     return syllables
159 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/normalizer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from .normalizer import *
15 | from .numbers import *
16 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/normalizer/abbrrviation.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/normalizer/acronyms.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/normalizer/normalizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import re
15 | import unicodedata
16 | from builtins import str as unicode
17 | 
18 | from .numbers import normalize_numbers
19 | 
20 | 
21 | def normalize(sentence):
22 |     """ Normalize English text.
23 |     """
24 |     # preprocessing
25 |     sentence = unicode(sentence)
26 |     sentence = normalize_numbers(sentence)
27 |     sentence = ''.join(
28 |         char for char in unicodedata.normalize('NFD', sentence)
29 |         if unicodedata.category(char) != 'Mn')  # Strip accents
30 |     sentence = sentence.lower()
31 |     sentence = re.sub(r"[^ a-z'.,?!\-]", "", sentence)
32 |     sentence = sentence.replace("i.e.", "that is")
33 |     sentence = sentence.replace("e.g.", "for example")
34 |     return sentence
35 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/normalizer/numbers.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # number expansion is not that easy
15 | import re
16 | 
17 | import inflect
18 | 
19 | _inflect = inflect.engine()
20 | _comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
21 | _decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
22 | _pounds_re = re.compile(r'£([0-9\,]*[0-9]+)')
23 | _dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)')
24 | _ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)')
25 | _number_re = re.compile(r'[0-9]+')
26 | 
27 | 
28 | def _remove_commas(m):
29 |     return m.group(1).replace(',', '')
30 | 
31 | 
32 | def _expand_decimal_point(m):
33 |     return m.group(1).replace('.', ' point ')
34 | 
35 | 
36 | def _expand_dollars(m):
37 |     match = m.group(1)
38 |     parts = match.split('.')
39 |     if len(parts) > 2:
40 |         return match + ' dollars'  # Unexpected format
41 |     dollars = int(parts[0]) if parts[0] else 0
42 |     cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
43 |     if dollars and cents:
44 |         dollar_unit = 'dollar' if dollars == 1 else 'dollars'
45 |         cent_unit = 'cent' if cents == 1 else 'cents'
46 |         return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit)
47 |     elif dollars:
48 |         dollar_unit = 'dollar' if dollars == 1 else 'dollars'
49 |         return '%s %s' % (dollars, dollar_unit)
50 |     elif cents:
51 |         cent_unit = 'cent' if cents == 1 else 'cents'
52 |         return '%s %s' % (cents, cent_unit)
53 |     else:
54 |         return 'zero dollars'
55 | 
56 | 
57 | def _expand_ordinal(m):
58 |     return _inflect.number_to_words(m.group(0))
59 | 
60 | 
61 | def _expand_number(m):
62 |     num = int(m.group(0))
63 |     if num > 1000 and num < 3000:
64 |         if num == 2000:
65 |             return 'two thousand'
66 |         elif num > 2000 and num < 2010:
67 |             return 'two thousand ' + _inflect.number_to_words(num % 100)
68 |         elif num % 100 == 0:
69 |             return _inflect.number_to_words(num // 100) + ' hundred'
70 |         else:
71 |             return _inflect.number_to_words(
72 |                 num, andword='', zero='oh', group=2).replace(', ', ' ')
73 |     else:
74 |         return _inflect.number_to_words(num, andword='')
75 | 
76 | 
77 | def normalize_numbers(text):
78 |     """ Normalize numbers in English text.
79 |     """
80 |     text = re.sub(_comma_number_re, _remove_commas, text)
81 |     text = re.sub(_pounds_re, r'\1 pounds', text)
82 |     text = re.sub(_dollars_re, _expand_dollars, text)
83 |     text = re.sub(_decimal_number_re, _expand_decimal_point, text)
84 |     text = re.sub(_ordinal_re, _expand_ordinal, text)
85 |     text = re.sub(_number_re, _expand_number, text)
86 |     return text
87 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/normalizer/width.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def full2half_width(ustr):
17 |     half = []
18 |     for u in ustr:
19 |         num = ord(u)
20 |         if num == 0x3000:  # 全角空格变半角
21 |             num = 32
22 |         elif 0xFF01 <= num <= 0xFF5E:
23 |             num -= 0xfee0
24 |         u = chr(num)
25 |         half.append(u)
26 |     return ''.join(half)
27 | 
28 | 
29 | def half2full_width(ustr):
30 |     full = []
31 |     for u in ustr:
32 |         num = ord(u)
33 |         if num == 32:  # 半角空格变全角
34 |             num = 0x3000
35 |         elif 0x21 <= num <= 0x7E:
36 |             num += 0xfee0
37 |         u = chr(num)  # to unicode
38 |         full.append(u)
39 | 
40 |     return ''.join(full)
41 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/punctuation.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | __all__ = ["get_punctuations"]
16 | 
17 | EN_PUNCT = [
18 |     " ",
19 |     "-",
20 |     "...",
21 |     ",",
22 |     ".",
23 |     "?",
24 |     "!",
25 | ]
26 | 
27 | CN_PUNCT = ["、", "，", "；", "：", "。", "？", "！"]
28 | 
29 | 
30 | def get_punctuations(lang):
31 |     if lang == "en":
32 |         return EN_PUNCT
33 |     elif lang == "cn":
34 |         return CN_PUNCT
35 |     else:
36 |         raise ValueError(f"language {lang} Not supported")
37 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/vocab.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | from collections import OrderedDict
 15 | from typing import Iterable
 16 | 
 17 | __all__ = ["Vocab"]
 18 | 
 19 | 
 20 | class Vocab(object):
 21 |     """  Vocabulary.
 22 | 
 23 |     Args:
 24 |         symbols (Iterable[str]): Common symbols.
 25 |         padding_symbol (str, optional): Symbol for pad. Defaults to "<pad>".
 26 |         unk_symbol (str, optional): Symbol for unknow. Defaults to "<unk>"
 27 |         start_symbol (str, optional): Symbol for start. Defaults to "<s>"
 28 |         end_symbol (str, optional): Symbol for end. Defaults to "</s>"
 29 |     """
 30 | 
 31 |     def __init__(self,
 32 |                  symbols: Iterable[str],
 33 |                  padding_symbol="<pad>",
 34 |                  unk_symbol="<unk>",
 35 |                  start_symbol="<s>",
 36 |                  end_symbol="</s>"):
 37 |         self.special_symbols = OrderedDict()
 38 |         for i, item in enumerate(
 39 |             [padding_symbol, unk_symbol, start_symbol, end_symbol]):
 40 |             if item:
 41 |                 self.special_symbols[item] = len(self.special_symbols)
 42 | 
 43 |         self.padding_symbol = padding_symbol
 44 |         self.unk_symbol = unk_symbol
 45 |         self.start_symbol = start_symbol
 46 |         self.end_symbol = end_symbol
 47 | 
 48 |         self.stoi = OrderedDict()
 49 |         self.stoi.update(self.special_symbols)
 50 | 
 51 |         for i, s in enumerate(symbols):
 52 |             if s not in self.stoi:
 53 |                 self.stoi[s] = len(self.stoi)
 54 |         self.itos = {v: k for k, v in self.stoi.items()}
 55 | 
 56 |     def __len__(self):
 57 |         return len(self.stoi)
 58 | 
 59 |     @property
 60 |     def num_specials(self):
 61 |         """ The number of special symbols.
 62 |         """
 63 |         return len(self.special_symbols)
 64 | 
 65 |     # special tokens
 66 |     @property
 67 |     def padding_index(self):
 68 |         """ The index of padding symbol
 69 |         """
 70 |         return self.stoi.get(self.padding_symbol, -1)
 71 | 
 72 |     @property
 73 |     def unk_index(self):
 74 |         """The index of unknow symbol.
 75 |         """
 76 |         return self.stoi.get(self.unk_symbol, -1)
 77 | 
 78 |     @property
 79 |     def start_index(self):
 80 |         """The index of start symbol.
 81 |         """
 82 |         return self.stoi.get(self.start_symbol, -1)
 83 | 
 84 |     @property
 85 |     def end_index(self):
 86 |         """ The index of end symbol.
 87 |         """
 88 |         return self.stoi.get(self.end_symbol, -1)
 89 | 
 90 |     def __repr__(self):
 91 |         fmt = "Vocab(size: {},\nstoi:\n{})"
 92 |         return fmt.format(len(self), self.stoi)
 93 | 
 94 |     def __str__(self):
 95 |         return self.__repr__()
 96 | 
 97 |     def lookup(self, symbol):
 98 |         """ The index that symbol correspond.
 99 |         """
100 |         return self.stoi[symbol]
101 | 
102 |     def reverse(self, index):
103 |         """ The symbol thar index cottespond.
104 |         """
105 |         return self.itos[index]
106 | 
107 |     def add_symbol(self, symbol):
108 |         """ Add a new symbol in vocab.
109 |         """
110 |         if symbol in self.stoi:
111 |             return
112 |         N = len(self.stoi)
113 |         self.stoi[symbol] = N
114 |         self.itos[N] = symbol
115 | 
116 |     def add_symbols(self, symbols):
117 |         """ Add multiple symbols in vocab.
118 |         """
119 |         for symbol in symbols:
120 |             self.add_symbol(symbol)
121 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/zh_normalization/README.md:
--------------------------------------------------------------------------------
 1 | ## Supported NSW (Non-Standard-Word) Normalization
 2 | 
 3 | |NSW type|raw|normalized|
 4 | |:--|:-|:-|
 5 | |serial number|电影中梁朝伟扮演的陈永仁的编号27149|电影中梁朝伟扮演的陈永仁的编号二七一四九|
 6 | |cardinal|这块黄金重达324.75克<br>我们班的最高总分为583分|这块黄金重达三百二十四点七五克<br>我们班的最高总分为五百八十三分|
 7 | |numeric range |12\~23<br>-1.5\~2|十二到二十三<br>负一点五到二|
 8 | |date|她出生于86年8月18日，她弟弟出生于1995年3月1日|她出生于八六年八月十八日， 她弟弟出生于一九九五年三月一日|
 9 | |time|等会请在12:05请通知我|等会请在十二点零五分请通知我
10 | |temperature|今天的最低气温达到-10°C|今天的最低气温达到零下十度
11 | |fraction|现场有7/12的观众投出了赞成票|现场有十二分之七的观众投出了赞成票|
12 | |percentage|明天有62％的概率降雨|明天有百分之六十二的概率降雨|
13 | |money|随便来几个价格12块5，34.5元，20.1万|随便来几个价格十二块五，三十四点五元，二十点一万|
14 | |telephone|这是固话0421-33441122<br>这是手机+86 18544139121|这是固话零四二一三三四四一一二二<br>这是手机八六一八五四四一三九一二一|
15 | ## References
16 | [Pull requests #658 of DeepSpeech](https://github.com/PaddlePaddle/DeepSpeech/pull/658/files)
17 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/zh_normalization/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from .text_normlization import *
15 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/zh_normalization/chronology.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import re
 15 | 
 16 | from .num import DIGITS
 17 | from .num import num2str
 18 | from .num import verbalize_cardinal
 19 | from .num import verbalize_digit
 20 | 
 21 | 
 22 | def _time_num2str(num_string: str) -> str:
 23 |     """A special case for verbalizing number in time."""
 24 |     result = num2str(num_string.lstrip('0'))
 25 |     if num_string.startswith('0'):
 26 |         result = DIGITS['0'] + result
 27 |     return result
 28 | 
 29 | 
 30 | # 时刻表达式
 31 | RE_TIME = re.compile(r'([0-1]?[0-9]|2[0-3])'
 32 |                      r':([0-5][0-9])'
 33 |                      r'(:([0-5][0-9]))?')
 34 | 
 35 | # 时间范围，如8:30-12:30
 36 | RE_TIME_RANGE = re.compile(r'([0-1]?[0-9]|2[0-3])'
 37 |                            r':([0-5][0-9])'
 38 |                            r'(:([0-5][0-9]))?'
 39 |                            r'(~|-)'
 40 |                            r'([0-1]?[0-9]|2[0-3])'
 41 |                            r':([0-5][0-9])'
 42 |                            r'(:([0-5][0-9]))?')
 43 | 
 44 | 
 45 | def replace_time(match) -> str:
 46 |     """
 47 |     Args:
 48 |         match (re.Match)
 49 |     Returns:
 50 |         str
 51 |     """
 52 | 
 53 |     is_range = len(match.groups()) > 5
 54 | 
 55 |     hour = match.group(1)
 56 |     minute = match.group(2)
 57 |     second = match.group(4)
 58 | 
 59 |     if is_range:
 60 |         hour_2 = match.group(6)
 61 |         minute_2 = match.group(7)
 62 |         second_2 = match.group(9)
 63 | 
 64 |     result = f"{num2str(hour)}点"
 65 |     if minute.lstrip('0'):
 66 |         if int(minute) == 30:
 67 |             result += "半"
 68 |         else:
 69 |             result += f"{_time_num2str(minute)}分"
 70 |     if second and second.lstrip('0'):
 71 |         result += f"{_time_num2str(second)}秒"
 72 | 
 73 |     if is_range:
 74 |         result += "至"
 75 |         result += f"{num2str(hour_2)}点"
 76 |         if minute_2.lstrip('0'):
 77 |             if int(minute) == 30:
 78 |                 result += "半"
 79 |             else:
 80 |                 result += f"{_time_num2str(minute_2)}分"
 81 |         if second_2 and second_2.lstrip('0'):
 82 |             result += f"{_time_num2str(second_2)}秒"
 83 | 
 84 |     return result
 85 | 
 86 | 
 87 | RE_DATE = re.compile(r'(\d{4}|\d{2})年'
 88 |                      r'((0?[1-9]|1[0-2])月)?'
 89 |                      r'(((0?[1-9])|((1|2)[0-9])|30|31)([日号]))?')
 90 | 
 91 | 
 92 | def replace_date(match) -> str:
 93 |     """
 94 |     Args:
 95 |         match (re.Match)
 96 |     Returns:
 97 |         str
 98 |     """
 99 |     year = match.group(1)
100 |     month = match.group(3)
101 |     day = match.group(5)
102 |     result = ""
103 |     if year:
104 |         result += f"{verbalize_digit(year)}年"
105 |     if month:
106 |         result += f"{verbalize_cardinal(month)}月"
107 |     if day:
108 |         result += f"{verbalize_cardinal(day)}{match.group(9)}"
109 |     return result
110 | 
111 | 
112 | # 用 / 或者 - 分隔的 YY/MM/DD 或者 YY-MM-DD 日期
113 | RE_DATE2 = re.compile(
114 |     r'(\d{4})([- /.])(0[1-9]|1[012])\2(0[1-9]|[12][0-9]|3[01])')
115 | 
116 | 
117 | def replace_date2(match) -> str:
118 |     """
119 |     Args:
120 |         match (re.Match)
121 |     Returns:
122 |         str
123 |     """
124 |     year = match.group(1)
125 |     month = match.group(3)
126 |     day = match.group(4)
127 |     result = ""
128 |     if year:
129 |         result += f"{verbalize_digit(year)}年"
130 |     if month:
131 |         result += f"{verbalize_cardinal(month)}月"
132 |     if day:
133 |         result += f"{verbalize_cardinal(day)}日"
134 |     return result
135 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/zh_normalization/constants.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import re
15 | import string
16 | 
17 | from pypinyin.constants import SUPPORT_UCS4
18 | 
19 | # 全角半角转换
20 | # 英文字符全角 -> 半角映射表 (num: 52)
21 | F2H_ASCII_LETTERS = {
22 |     chr(ord(char) + 65248): char
23 |     for char in string.ascii_letters
24 | }
25 | 
26 | # 英文字符半角 -> 全角映射表
27 | H2F_ASCII_LETTERS = {value: key for key, value in F2H_ASCII_LETTERS.items()}
28 | 
29 | # 数字字符全角 -> 半角映射表 (num: 10)
30 | F2H_DIGITS = {chr(ord(char) + 65248): char for char in string.digits}
31 | # 数字字符半角 -> 全角映射表
32 | H2F_DIGITS = {value: key for key, value in F2H_DIGITS.items()}
33 | 
34 | # 标点符号全角 -> 半角映射表 (num: 32)
35 | F2H_PUNCTUATIONS = {chr(ord(char) + 65248): char for char in string.punctuation}
36 | # 标点符号半角 -> 全角映射表
37 | H2F_PUNCTUATIONS = {value: key for key, value in F2H_PUNCTUATIONS.items()}
38 | 
39 | # 空格 (num: 1)
40 | F2H_SPACE = {'\u3000': ' '}
41 | H2F_SPACE = {' ': '\u3000'}
42 | 
43 | # 非"有拼音的汉字"的字符串，可用于NSW提取
44 | if SUPPORT_UCS4:
45 |     RE_NSW = re.compile(r'(?:[^'
46 |                         r'\u3007'  # 〇
47 |                         r'\u3400-\u4dbf'  # CJK扩展A:[3400-4DBF]
48 |                         r'\u4e00-\u9fff'  # CJK基本:[4E00-9FFF]
49 |                         r'\uf900-\ufaff'  # CJK兼容:[F900-FAFF]
50 |                         r'\U00020000-\U0002A6DF'  # CJK扩展B:[20000-2A6DF]
51 |                         r'\U0002A703-\U0002B73F'  # CJK扩展C:[2A700-2B73F]
52 |                         r'\U0002B740-\U0002B81D'  # CJK扩展D:[2B740-2B81D]
53 |                         r'\U0002F80A-\U0002FA1F'  # CJK兼容扩展:[2F800-2FA1F]
54 |                         r'])+')
55 | else:
56 |     RE_NSW = re.compile(  # pragma: no cover
57 |         r'(?:[^'
58 |         r'\u3007'  # 〇
59 |         r'\u3400-\u4dbf'  # CJK扩展A:[3400-4DBF]
60 |         r'\u4e00-\u9fff'  # CJK基本:[4E00-9FFF]
61 |         r'\uf900-\ufaff'  # CJK兼容:[F900-FAFF]
62 |         r'])+')
63 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/zh_normalization/num.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """
 15 | Rules to verbalize numbers into Chinese characters.
 16 | https://zh.wikipedia.org/wiki/中文数字#現代中文
 17 | """
 18 | import re
 19 | from collections import OrderedDict
 20 | from typing import List
 21 | 
 22 | DIGITS = {str(i): tran for i, tran in enumerate('零一二三四五六七八九')}
 23 | UNITS = OrderedDict({
 24 |     1: '十',
 25 |     2: '百',
 26 |     3: '千',
 27 |     4: '万',
 28 |     8: '亿',
 29 | })
 30 | 
 31 | COM_QUANTIFIERS = '(所|朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|小时|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|)块|角|毛|分)'
 32 | 
 33 | # 分数表达式
 34 | RE_FRAC = re.compile(r'(-?)(\d+)/(\d+)')
 35 | 
 36 | 
 37 | def replace_frac(match) -> str:
 38 |     """
 39 |     Args:
 40 |         match (re.Match)
 41 |     Returns:
 42 |         str
 43 |     """
 44 |     sign = match.group(1)
 45 |     nominator = match.group(2)
 46 |     denominator = match.group(3)
 47 |     sign: str = "负" if sign else ""
 48 |     nominator: str = num2str(nominator)
 49 |     denominator: str = num2str(denominator)
 50 |     result = f"{sign}{denominator}分之{nominator}"
 51 |     return result
 52 | 
 53 | 
 54 | # 百分数表达式
 55 | RE_PERCENTAGE = re.compile(r'(-?)(\d+(\.\d+)?)%')
 56 | 
 57 | 
 58 | def replace_percentage(match) -> str:
 59 |     """
 60 |     Args:
 61 |         match (re.Match)
 62 |     Returns:
 63 |         str
 64 |     """
 65 |     sign = match.group(1)
 66 |     percent = match.group(2)
 67 |     sign: str = "负" if sign else ""
 68 |     percent: str = num2str(percent)
 69 |     result = f"{sign}百分之{percent}"
 70 |     return result
 71 | 
 72 | 
 73 | # 整数表达式
 74 | # 带负号的整数 -10
 75 | RE_INTEGER = re.compile(r'(-)' r'(\d+)')
 76 | 
 77 | 
 78 | def replace_negative_num(match) -> str:
 79 |     """
 80 |     Args:
 81 |         match (re.Match)
 82 |     Returns:
 83 |         str
 84 |     """
 85 |     sign = match.group(1)
 86 |     number = match.group(2)
 87 |     sign: str = "负" if sign else ""
 88 |     number: str = num2str(number)
 89 |     result = f"{sign}{number}"
 90 |     return result
 91 | 
 92 | 
 93 | # 编号-无符号整形
 94 | # 00078
 95 | RE_DEFAULT_NUM = re.compile(r'\d{3}\d*')
 96 | 
 97 | 
 98 | def replace_default_num(match):
 99 |     """
100 |     Args:
101 |         match (re.Match)
102 |     Returns:
103 |         str
104 |     """
105 |     number = match.group(0)
106 |     return verbalize_digit(number)
107 | 
108 | 
109 | # 数字表达式
110 | # 纯小数
111 | RE_DECIMAL_NUM = re.compile(r'(-?)((\d+)(\.\d+))' r'|(\.(\d+))')
112 | # 正整数 + 量词
113 | RE_POSITIVE_QUANTIFIERS = re.compile(r"(\d+)([多余几\+])?" + COM_QUANTIFIERS)
114 | RE_NUMBER = re.compile(r'(-?)((\d+)(\.\d+)?)' r'|(\.(\d+))')
115 | 
116 | 
117 | def replace_positive_quantifier(match) -> str:
118 |     """
119 |     Args:
120 |         match (re.Match)
121 |     Returns:
122 |         str
123 |     """
124 |     number = match.group(1)
125 |     match_2 = match.group(2)
126 |     if match_2 == "+":
127 |         match_2 = "多"
128 |     match_2: str = match_2 if match_2 else ""
129 |     quantifiers: str = match.group(3)
130 |     number: str = num2str(number)
131 |     result = f"{number}{match_2}{quantifiers}"
132 |     return result
133 | 
134 | 
135 | def replace_number(match) -> str:
136 |     """
137 |     Args:
138 |         match (re.Match)
139 |     Returns:
140 |         str
141 |     """
142 |     sign = match.group(1)
143 |     number = match.group(2)
144 |     pure_decimal = match.group(5)
145 |     if pure_decimal:
146 |         result = num2str(pure_decimal)
147 |     else:
148 |         sign: str = "负" if sign else ""
149 |         number: str = num2str(number)
150 |         result = f"{sign}{number}"
151 |     return result
152 | 
153 | 
154 | # 范围表达式
155 | # match.group(1) and match.group(8) are copy from RE_NUMBER
156 | 
157 | RE_RANGE = re.compile(
158 |     r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))[-~]((-?)((\d+)(\.\d+)?)|(\.(\d+)))')
159 | 
160 | 
161 | def replace_range(match) -> str:
162 |     """
163 |     Args:
164 |         match (re.Match)
165 |     Returns:
166 |         str
167 |     """
168 |     first, second = match.group(1), match.group(8)
169 |     first = RE_NUMBER.sub(replace_number, first)
170 |     second = RE_NUMBER.sub(replace_number, second)
171 |     result = f"{first}到{second}"
172 |     return result
173 | 
174 | 
175 | def _get_value(value_string: str, use_zero: bool=True) -> List[str]:
176 |     stripped = value_string.lstrip('0')
177 |     if len(stripped) == 0:
178 |         return []
179 |     elif len(stripped) == 1:
180 |         if use_zero and len(stripped) < len(value_string):
181 |             return [DIGITS['0'], DIGITS[stripped]]
182 |         else:
183 |             return [DIGITS[stripped]]
184 |     else:
185 |         largest_unit = next(
186 |             power for power in reversed(UNITS.keys()) if power < len(stripped))
187 |         first_part = value_string[:-largest_unit]
188 |         second_part = value_string[-largest_unit:]
189 |         return _get_value(first_part) + [UNITS[largest_unit]] + _get_value(
190 |             second_part)
191 | 
192 | 
193 | def verbalize_cardinal(value_string: str) -> str:
194 |     if not value_string:
195 |         return ''
196 | 
197 |     # 000 -> '零' , 0 -> '零'
198 |     value_string = value_string.lstrip('0')
199 |     if len(value_string) == 0:
200 |         return DIGITS['0']
201 | 
202 |     result_symbols = _get_value(value_string)
203 |     # verbalized number starting with '一十*' is abbreviated as `十*`
204 |     if len(result_symbols) >= 2 and result_symbols[0] == DIGITS[
205 |             '1'] and result_symbols[1] == UNITS[1]:
206 |         result_symbols = result_symbols[1:]
207 |     return ''.join(result_symbols)
208 | 
209 | 
210 | def verbalize_digit(value_string: str, alt_one=False) -> str:
211 |     result_symbols = [DIGITS[digit] for digit in value_string]
212 |     result = ''.join(result_symbols)
213 |     if alt_one:
214 |         result = result.replace("一", "幺")
215 |     return result
216 | 
217 | 
218 | def num2str(value_string: str) -> str:
219 |     integer_decimal = value_string.split('.')
220 |     if len(integer_decimal) == 1:
221 |         integer = integer_decimal[0]
222 |         decimal = ''
223 |     elif len(integer_decimal) == 2:
224 |         integer, decimal = integer_decimal
225 |     else:
226 |         raise ValueError(
227 |             f"The value string: '${value_string}' has more than one point in it."
228 |         )
229 | 
230 |     result = verbalize_cardinal(integer)
231 | 
232 |     decimal = decimal.rstrip('0')
233 |     if decimal:
234 |         # '.22' is verbalized as '零点二二'
235 |         # '3.20' is verbalized as '三点二
236 |         result = result if result else "零"
237 |         result += '点' + verbalize_digit(decimal)
238 |     return result
239 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/zh_normalization/phonecode.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import re
15 | 
16 | from .num import verbalize_digit
17 | 
18 | # 规范化固话/手机号码
19 | # 手机
20 | # http://www.jihaoba.com/news/show/13680
21 | # 移动：139、138、137、136、135、134、159、158、157、150、151、152、188、187、182、183、184、178、198
22 | # 联通：130、131、132、156、155、186、185、176
23 | # 电信：133、153、189、180、181、177
24 | RE_MOBILE_PHONE = re.compile(
25 |     r"(?<!\d)((\+?86 ?)?1([38]\d|5[0-35-9]|7[678]|9[89])\d{8})(?!\d)")
26 | RE_TELEPHONE = re.compile(
27 |     r"(?<!\d)((0(10|2[1-3]|[3-9]\d{2})-?)?[1-9]\d{7,8})(?!\d)")
28 | 
29 | # 全国统一的号码400开头
30 | RE_NATIONAL_UNIFORM_NUMBER = re.compile(r"(400)(-)?\d{3}(-)?\d{4}")
31 | 
32 | 
33 | def phone2str(phone_string: str, mobile=True) -> str:
34 |     if mobile:
35 |         sp_parts = phone_string.strip('+').split()
36 |         result = '，'.join(
37 |             [verbalize_digit(part, alt_one=True) for part in sp_parts])
38 |         return result
39 |     else:
40 |         sil_parts = phone_string.split('-')
41 |         result = '，'.join(
42 |             [verbalize_digit(part, alt_one=True) for part in sil_parts])
43 |         return result
44 | 
45 | 
46 | def replace_phone(match) -> str:
47 |     """
48 |     Args:
49 |         match (re.Match)
50 |     Returns:
51 |         str
52 |     """
53 |     return phone2str(match.group(0), mobile=False)
54 | 
55 | 
56 | def replace_mobile(match) -> str:
57 |     """
58 |     Args:
59 |         match (re.Match)
60 |     Returns:
61 |         str
62 |     """
63 |     return phone2str(match.group(0))
64 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/zh_normalization/quantifier.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import re
15 | 
16 | from .num import num2str
17 | 
18 | # 温度表达式，温度会影响负号的读法
19 | # -3°C 零下三度
20 | RE_TEMPERATURE = re.compile(r'(-?)(\d+(\.\d+)?)(°C|℃|度|摄氏度)')
21 | 
22 | 
23 | def replace_temperature(match) -> str:
24 |     """
25 |     Args:
26 |         match (re.Match)
27 |     Returns:
28 |         str
29 |     """
30 |     sign = match.group(1)
31 |     temperature = match.group(2)
32 |     unit = match.group(3)
33 |     sign: str = "零下" if sign else ""
34 |     temperature: str = num2str(temperature)
35 |     unit: str = "摄氏度" if unit == "摄氏度" else "度"
36 |     result = f"{sign}{temperature}{unit}"
37 |     return result
38 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/frontend/zh_normalization/text_normlization.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import re
 15 | from typing import List
 16 | 
 17 | from .char_convert import tranditional_to_simplified
 18 | from .chronology import RE_DATE
 19 | from .chronology import RE_DATE2
 20 | from .chronology import RE_TIME
 21 | from .chronology import RE_TIME_RANGE
 22 | from .chronology import replace_date
 23 | from .chronology import replace_date2
 24 | from .chronology import replace_time
 25 | from .constants import F2H_ASCII_LETTERS
 26 | from .constants import F2H_DIGITS
 27 | from .constants import F2H_SPACE
 28 | from .num import RE_DECIMAL_NUM
 29 | from .num import RE_DEFAULT_NUM
 30 | from .num import RE_FRAC
 31 | from .num import RE_INTEGER
 32 | from .num import RE_NUMBER
 33 | from .num import RE_PERCENTAGE
 34 | from .num import RE_POSITIVE_QUANTIFIERS
 35 | from .num import RE_RANGE
 36 | from .num import replace_default_num
 37 | from .num import replace_frac
 38 | from .num import replace_negative_num
 39 | from .num import replace_number
 40 | from .num import replace_percentage
 41 | from .num import replace_positive_quantifier
 42 | from .num import replace_range
 43 | from .phonecode import RE_MOBILE_PHONE
 44 | from .phonecode import RE_NATIONAL_UNIFORM_NUMBER
 45 | from .phonecode import RE_TELEPHONE
 46 | from .phonecode import replace_mobile
 47 | from .phonecode import replace_phone
 48 | from .quantifier import RE_TEMPERATURE
 49 | from .quantifier import replace_temperature
 50 | 
 51 | 
 52 | class TextNormalizer():
 53 |     def __init__(self):
 54 |         self.SENTENCE_SPLITOR = re.compile(r'([：、，；。？！,;?!][”’]?)')
 55 | 
 56 |     def _split(self, text: str, lang="zh") -> List[str]:
 57 |         """Split long text into sentences with sentence-splitting punctuations.
 58 |         Args:
 59 |             text (str): The input text.
 60 |         Returns:
 61 |             List[str]: Sentences.
 62 |         """
 63 |         # Only for pure Chinese here
 64 |         if lang == "zh":
 65 |             text = text.replace(" ", "")
 66 |             # 过滤掉特殊字符
 67 |             text = re.sub(r'[《》【】<=>{}()（）#&@“”^_|…\\]', '', text)
 68 |         text = self.SENTENCE_SPLITOR.sub(r'\1\n', text)
 69 |         text = text.strip()
 70 |         sentences = [sentence.strip() for sentence in re.split(r'\n+', text)]
 71 |         return sentences
 72 | 
 73 |     def _post_replace(self, sentence: str) -> str:
 74 |         sentence = sentence.replace('/', '每')
 75 |         sentence = sentence.replace('~', '至')
 76 | 
 77 |         return sentence
 78 | 
 79 |     def normalize_sentence(self, sentence: str) -> str:
 80 |         # basic character conversions
 81 |         sentence = tranditional_to_simplified(sentence)
 82 |         sentence = sentence.translate(F2H_ASCII_LETTERS).translate(
 83 |             F2H_DIGITS).translate(F2H_SPACE)
 84 | 
 85 |         # number related NSW verbalization
 86 |         sentence = RE_DATE.sub(replace_date, sentence)
 87 |         sentence = RE_DATE2.sub(replace_date2, sentence)
 88 | 
 89 |         # range first
 90 |         sentence = RE_TIME_RANGE.sub(replace_time, sentence)
 91 |         sentence = RE_TIME.sub(replace_time, sentence)
 92 | 
 93 |         sentence = RE_TEMPERATURE.sub(replace_temperature, sentence)
 94 |         sentence = RE_FRAC.sub(replace_frac, sentence)
 95 |         sentence = RE_PERCENTAGE.sub(replace_percentage, sentence)
 96 |         sentence = RE_MOBILE_PHONE.sub(replace_mobile, sentence)
 97 | 
 98 |         sentence = RE_TELEPHONE.sub(replace_phone, sentence)
 99 |         sentence = RE_NATIONAL_UNIFORM_NUMBER.sub(replace_phone, sentence)
100 | 
101 |         sentence = RE_RANGE.sub(replace_range, sentence)
102 |         sentence = RE_INTEGER.sub(replace_negative_num, sentence)
103 |         sentence = RE_DECIMAL_NUM.sub(replace_number, sentence)
104 |         sentence = RE_POSITIVE_QUANTIFIERS.sub(replace_positive_quantifier,
105 |                                                sentence)
106 |         sentence = RE_DEFAULT_NUM.sub(replace_default_num, sentence)
107 |         sentence = RE_NUMBER.sub(replace_number, sentence)
108 |         sentence = self._post_replace(sentence)
109 | 
110 |         return sentence
111 | 
112 |     def normalize(self, text: str) -> List[str]:
113 |         sentences = self._split(text)
114 | 
115 |         sentences = [self.normalize_sentence(sent) for sent in sentences]
116 |         return sentences
117 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/main.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | inference_dir="$PWD/resources"
 4 | 
 5 | am="fastspeech2_csmsc"
 6 | am_onnx="fastspeech2_csmsc_onnx_0.2.0/fastspeech2_csmsc.onnx"
 7 | 
 8 | voc="hifigan_csmsc"
 9 | voc_onnx="hifigan_csmsc.onnx"
10 | 
11 | output_dir="result"
12 | text="$PWD/csmsc_test.txt"
13 | phones_dict="$PWD/resources/fastspeech2_csmsc_onnx_0.2.0/phone_id_map.txt"
14 | 
15 | python tts3.py \
16 |     --inference_dir=${inference_dir} \
17 |     --am=${am} \
18 |     --am_onnx=${am_onnx} \
19 |     --voc=${voc} \
20 |     --voc_onnx=${voc_onnx} \
21 |     --output_dir=${output_dir} \
22 |     --text=${text} \
23 |     --phones_dict=${phones_dict} \
24 |     --device=cpu \
25 |     --cpu_threads=2
26 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/requirements.txt:
--------------------------------------------------------------------------------
 1 | g2p_en==2.1.0
 2 | inflect==5.3.0
 3 | jieba==0.42.1
 4 | numpy>=1.19.3
 5 | onnxruntime>=1.10.0
 6 | pypinyin==0.44.0
 7 | pypinyin_dict==0.2.0
 8 | SoundFile==0.10.3.post1
 9 | timer==0.2.2
10 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/syn_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from frontend import English
15 | from frontend.zh_frontend import Frontend
16 | 
17 | 
18 | # input
19 | def get_sentences(args):
20 |     # construct dataset for evaluation
21 |     sentences = []
22 |     with open(args.text, 'rt', encoding='utf-8') as f:
23 |         for line in f:
24 |             items = line.strip().split()
25 |             utt_id = items[0]
26 |             if 'lang' in args and args.lang == 'zh':
27 |                 sentence = "".join(items[1:])
28 |             elif 'lang' in args and args.lang == 'en':
29 |                 sentence = " ".join(items[1:])
30 |             sentences.append((utt_id, sentence))
31 |     return sentences
32 | 
33 | 
34 | # frontend
35 | def get_frontend(args):
36 |     if 'lang' in args and args.lang == 'zh':
37 |         frontend = Frontend(phone_vocab_path=args.phones_dict,
38 |                             tone_vocab_path=args.tones_dict)
39 |     elif 'lang' in args and args.lang == 'en':
40 |         frontend = English(phone_vocab_path=args.phones_dict)
41 |     else:
42 |         print("wrong lang!")
43 |     print("frontend done!")
44 |     return frontend
45 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/csmsc_tts3/tts3.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import argparse
 15 | from pathlib import Path
 16 | 
 17 | import numpy as np
 18 | import onnxruntime as ort
 19 | import soundfile as sf
 20 | from timer import timer
 21 | 
 22 | from syn_utils import get_frontend, get_sentences
 23 | 
 24 | root_dir = str(Path.cwd())
 25 | 
 26 | 
 27 | def str2bool(str):
 28 |     return True if str.lower() == 'true' else False
 29 | 
 30 | 
 31 | def get_sess(args, filed='am'):
 32 |     full_name = ''
 33 |     if filed == 'am':
 34 |         full_name = args.am_onnx
 35 |     elif filed == 'voc':
 36 |         full_name = args.voc_onnx
 37 | 
 38 |     model_dir = str(Path(args.inference_dir) / full_name)
 39 | 
 40 |     sess_options = ort.SessionOptions()
 41 |     sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
 42 |     sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
 43 | 
 44 |     if args.device == "gpu":
 45 |         # fastspeech2/mb_melgan can't use trt now!
 46 |         if args.use_trt:
 47 |             providers = ['TensorrtExecutionProvider']
 48 |         else:
 49 |             providers = ['CUDAExecutionProvider']
 50 |     elif args.device == "cpu":
 51 |         providers = ['CPUExecutionProvider']
 52 | 
 53 |     sess_options.intra_op_num_threads = args.cpu_threads
 54 |     sess = ort.InferenceSession(model_dir,
 55 |                                 providers=providers,
 56 |                                 sess_options=sess_options)
 57 |     return sess
 58 | 
 59 | 
 60 | def ort_predict(args):
 61 | 
 62 |     # frontend
 63 |     frontend = get_frontend(args)
 64 | 
 65 |     output_dir = Path(args.output_dir)
 66 |     output_dir.mkdir(parents=True, exist_ok=True)
 67 |     sentences = get_sentences(args)
 68 | 
 69 |     am_name = args.am[:args.am.rindex('_')]
 70 |     am_dataset = args.am[args.am.rindex('_') + 1:]
 71 |     fs = 24000 if am_dataset != 'ljspeech' else 22050
 72 | 
 73 |     # am
 74 |     am_sess = get_sess(args, filed='am')
 75 | 
 76 |     # vocoder
 77 |     voc_sess = get_sess(args, filed='voc')
 78 | 
 79 |     # am warmup
 80 |     for T in [27, 38, 54]:
 81 |         data = np.random.randint(1, 266, size=(T, )).astype("int64")
 82 |         am_sess.run(None, {"text": data})
 83 | 
 84 |     # voc warmup
 85 |     for T in [227, 308, 544]:
 86 |         data = np.random.rand(T, 80).astype("float32")
 87 |         voc_sess.run(None, {"logmel": data})
 88 |     print("warm up done!")
 89 | 
 90 |     # frontend warmup
 91 |     # Loading model cost 0.5+ seconds
 92 |     if args.lang == 'zh':
 93 |         frontend.get_input_ids("你好，欢迎使用飞桨框架进行深度学习研究！",
 94 |                                merge_sentences=True)
 95 |     else:
 96 |         print("lang should in be 'zh' here!")
 97 | 
 98 |     N = 0
 99 |     T = 0
100 |     merge_sentences = True
101 |     for utt_id, sentence in sentences:
102 |         with timer() as t:
103 |             if args.lang == 'zh':
104 |                 input_ids = frontend.get_input_ids(
105 |                     sentence, merge_sentences=merge_sentences)
106 | 
107 |                 phone_ids = input_ids["phone_ids"]
108 |             else:
109 |                 print("lang should in be 'zh' here!")
110 |             # merge_sentences=True here, so we only use the first item of phone_ids
111 |             phone_ids = phone_ids[0]
112 |             mel = am_sess.run(output_names=None, input_feed={
113 |                               'text': phone_ids})
114 |             mel = mel[0]
115 |             wav = voc_sess.run(output_names=None, input_feed={'logmel': mel})
116 | 
117 |             N += len(wav[0])
118 |             T += t.elapse
119 |             speed = len(wav[0]) / t.elapse
120 |             rtf = fs / speed
121 |         sf.write(
122 |             str(output_dir / (utt_id + ".wav")),
123 |             np.array(wav)[0],
124 |             samplerate=fs)
125 |         print(
126 |             f"{utt_id}, mel: {mel.shape}, wave: {len(wav[0])}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
127 |         )
128 |     print(f"generation speed: {N / T}Hz, RTF: {fs / (N / T) }")
129 | 
130 | 
131 | def parse_args():
132 |     parser = argparse.ArgumentParser(description="Infernce with onnxruntime.")
133 | 
134 |     # acoustic model
135 |     parser.add_argument('--am', type=str,
136 |                         default='fastspeech2_csmsc',
137 |                         help='Choose acoustic model type of tts task.')
138 |     parser.add_argument('--am_onnx', type=str,
139 |                         default='fastspeech2_csmsc_onnx_0.2.0/fastspeech2_csmsc.onnx')
140 | 
141 |     parser.add_argument("--phones_dict", type=str,
142 |                         default='resources/fastspeech2_csmsc_onnx_0.2.0/phone_id_map.txt',
143 |                         help="phone vocabulary file.")
144 | 
145 |     parser.add_argument("--tones_dict", type=str,
146 |                         default=None,
147 |                         help="tone vocabulary file.")
148 | 
149 |     # voc
150 |     parser.add_argument('--voc', type=str,
151 |                         default='hifigan_csmsc',
152 |                         help='Choose vocoder type of tts task.')
153 | 
154 |     parser.add_argument('--voc_onnx', type=str,
155 |                         default='hifigan_csmsc.onnx')
156 | 
157 |     # other
158 |     parser.add_argument("--inference_dir", type=str,
159 |                         default=f"{root_dir}/resources",
160 |                         help="dir to save inference models")
161 | 
162 |     parser.add_argument("--text", type=str,
163 |                         default='csmsc_test.txt')
164 | 
165 |     parser.add_argument("--output_dir", type=str,
166 |                         default='infer_result')
167 | 
168 |     parser.add_argument('--lang', type=str,
169 |                         default='zh',
170 |                         help='Choose model language. zh or en')
171 | 
172 |     # inference
173 |     parser.add_argument("--use_trt",
174 |                         type=str2bool,
175 |                         default=False,
176 |                         help="Whether to use inference engin TensorRT.", )
177 | 
178 |     parser.add_argument("--device", default="cpu",
179 |                         choices=["gpu", "cpu"])
180 | 
181 |     parser.add_argument('--cpu_threads', type=int, default=2)
182 | 
183 |     args, _ = parser.parse_known_args()
184 |     Path(args.output_dir).mkdir(parents=True, exist_ok=True)
185 |     return args
186 | 
187 | 
188 | if __name__ == "__main__":
189 |     args = parse_args()
190 |     ort_predict(args)


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/README.md:
--------------------------------------------------------------------------------
 1 | ### ljspeech_tts3
 2 | - **支持合成语言**: 英文字母
 3 | - 基于[PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech)下的[ljspeech-TTS3](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/ljspeech/tts3/README.md)整理而来
 4 | - 整个推理引擎只采用`ONNXRuntime`
 5 | - 其中PaddleSpeech中提供的预训练模型可以参见[link](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/text_to_speech/README_cn.md#4-%E9%A2%84%E8%AE%AD%E7%BB%83%E6%A8%A1%E5%9E%8B)。在ljspeech_tts3中使用的是:
 6 | 
 7 |     |主要部分|具体模型|支持语言|
 8 |     |:---|:---|:---|
 9 |     |声学模型|[fastspeech2_ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/ljspeech/tts3/README.md#pretrained-model)|en|
10 |     |声码器|[pwg_ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc1)|en|
11 | 
12 | #### 结果示例
13 | <div align = "center">
14 | <table style="width:100%">
15 |   <thead>
16 |     <tr>
17 |       <th width="550">输入文本</th>
18 |       <th>合成音频</th>
19 |     </tr>
20 |   </thead>
21 |   <tbody>
22 |     <tr>
23 |       <td >Love you three thousand times.</td>
24 |       <td align = "center">
25 |       <a href="https://drive.google.com/file/d/1RkpE569HBou7__568HupQlo3w21pAysR/view?usp=sharing" rel="nofollow" target="_blank">
26 |             <img align="center" src="./assets/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
27 |       </td>
28 |     </tr>
29 |   </tbody>
30 | </table>
31 | 
32 | </div>
33 | 
34 | #### 运行步骤
35 | 1. 下载`resources`, [Google Drive](https://drive.google.com/file/d/1xQwsY1tWebQSWu32KgLlGO1QUnrixvwo/view?usp=sharing) | [百度网盘,提取码:4vlu](https://pan.baidu.com/s/1vvBnuNEcj-AngXdw3j0S4g?pwd=4vlu), 解压到`ljspeech_tts3`目录下，最终目录结构如下：
36 |    ```text
37 |     ljspeech_tts3
38 |     ├── sentences_en.txt
39 |     ├── requirements.txt
40 |     ├── frontend
41 |     ├── main.sh
42 |     ├── tts3.py
43 |     ├── infer_result
44 |     ├── resources
45 |     │   ├── fastspeech2_ljspeech
46 |     │   │   ├── fastspeech2_ljspeech.onnx
47 |     │   │   └── phone_id_map.txt
48 |     │   └── pwgan_ljspeech.onnx
49 |     └──syn_utils.py
50 |    ```
51 | 
52 | 2. 安装`requirements.txt`
53 |    ```bash
54 |    pip install -r requirements.txt -i https://pypi.douban.com/simple/
55 |    ```
56 | 
57 | 3. 运行`tts3.py`
58 |    ```bash
59 |    python tts3.py
60 |    ```
61 |    or
62 |    ```bash
63 |    bash main.sh
64 |    ```
65 | 
66 | 4. 运行日志如下:
67 |    ```text
68 |     frontend done!
69 |     001, mel: (343, 80), wave: 87808, time: 7.583922399999999s, Hz: 11578.186242472837, RTF: 1.9044433677455357.
70 |     002, mel: (274, 80), wave: 70144, time: 5.986744399999999s, Hz: 11716.561243394675, RTF: 1.8819514994154878.
71 |     003, mel: (175, 80), wave: 44800, time: 3.911470399999999s, Hz: 11453.51349948683, RTF: 1.9251734414062498.
72 |     004, mel: (217, 80), wave: 55552, time: 4.678628299999996s, Hz: 11873.585640758554, RTF: 1.8570632888104823.
73 |     005, mel: (371, 80), wave: 94976, time: 7.7152417s, Hz: 12310.185834993608, RTF: 1.7911996045843162.
74 |     006, mel: (338, 80), wave: 86528, time: 7.670878100000003s, Hz: 11280.071739420744, RTF: 1.954774801913832.
75 |     007, mel: (205, 80), wave: 52480, time: 4.628822800000002s, Hz: 11337.668363997142, RTF: 1.9448443270769813.
76 |     008, mel: (390, 80), wave: 99840, time: 8.2700763s, Hz: 12072.447745611855, RTF: 1.826473012319712.
77 |     009, mel: (169, 80), wave: 43264, time: 4.2657806000000065s, Hz: 10142.12548840801, RTF: 2.1741004905926427.
78 |     generation speed: 11613.502408804885Hz, RTF: 1.8986520365538124
79 |    ```
80 |    生成结果会保存到`infer_result`目录下
81 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/assets/009.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/ljspeech_tts3/assets/009.wav


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/assets/audio_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidTTS/e2b308a72e8b90beaef2cf344c914d25e19f43fd/python/PaddleSpeech/ljspeech_tts3/assets/audio_icon.png


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from .generate_lexicon import *
15 | from .normalizer import *
16 | from .phonectic import *
17 | from .punctuation import *
18 | from .tone_sandhi import *
19 | from .vocab import *
20 | from .zh_normalization import *
21 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/arpabet.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | from paddlespeech.t2s.frontend.phonectic import Phonetics
 15 | """
 16 | A phonology system with ARPABET symbols and limited punctuations. The G2P 
 17 | conversion is done by g2p_en.
 18 | 
 19 | Note that g2p_en does not handle words with hypen well. So make sure the input
 20 | sentence is first normalized.
 21 | """
 22 | from paddlespeech.t2s.frontend.vocab import Vocab
 23 | from g2p_en import G2p
 24 | 
 25 | 
 26 | class ARPABET(Phonetics):
 27 |     """A phonology for English that uses ARPABET as the phoneme vocabulary.
 28 |     See http://www.speech.cs.cmu.edu/cgi-bin/cmudict for more details.
 29 |     Phoneme Example Translation
 30 |         ------- ------- -----------
 31 |         AA	odd     AA D
 32 |         AE	at	AE T
 33 |         AH	hut	HH AH T
 34 |         AO	ought	AO T
 35 |         AW	cow	K AW
 36 |         AY	hide	HH AY D
 37 |         B 	be	B IY
 38 |         CH	cheese	CH IY Z
 39 |         D 	dee	D IY
 40 |         DH	thee	DH IY
 41 |         EH	Ed	EH D
 42 |         ER	hurt	HH ER T
 43 |         EY	ate	EY T
 44 |         F 	fee	F IY
 45 |         G 	green	G R IY N
 46 |         HH	he	HH IY
 47 |         IH	it	IH T
 48 |         IY	eat	IY T
 49 |         JH	gee	JH IY
 50 |         K 	key	K IY
 51 |         L 	lee	L IY
 52 |         M 	me	M IY
 53 |         N 	knee	N IY
 54 |         NG	ping	P IH NG
 55 |         OW	oat	OW T
 56 |         OY	toy	T OY
 57 |         P 	pee	P IY
 58 |         R 	read	R IY D
 59 |         S 	sea	S IY
 60 |         SH	she	SH IY
 61 |         T 	tea	T IY
 62 |         TH	theta	TH EY T AH
 63 |         UH	hood	HH UH D
 64 |         UW	two	T UW
 65 |         V 	vee	V IY
 66 |         W 	we	W IY
 67 |         Y 	yield	Y IY L D
 68 |         Z 	zee	Z IY
 69 |         ZH	seizure	S IY ZH ER
 70 |     """
 71 |     phonemes = [
 72 |         'AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'B', 'CH', 'D', 'DH', 'EH', 'ER',
 73 |         'EY', 'F', 'G', 'HH', 'IH', 'IY', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW',
 74 |         'OY', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UW', 'UH', 'V', 'W', 'Y', 'Z',
 75 |         'ZH'
 76 |     ]
 77 |     punctuations = [',', '.', '?', '!']
 78 |     symbols = phonemes + punctuations
 79 |     _stress_to_no_stress_ = {
 80 |         'AA0': 'AA',
 81 |         'AA1': 'AA',
 82 |         'AA2': 'AA',
 83 |         'AE0': 'AE',
 84 |         'AE1': 'AE',
 85 |         'AE2': 'AE',
 86 |         'AH0': 'AH',
 87 |         'AH1': 'AH',
 88 |         'AH2': 'AH',
 89 |         'AO0': 'AO',
 90 |         'AO1': 'AO',
 91 |         'AO2': 'AO',
 92 |         'AW0': 'AW',
 93 |         'AW1': 'AW',
 94 |         'AW2': 'AW',
 95 |         'AY0': 'AY',
 96 |         'AY1': 'AY',
 97 |         'AY2': 'AY',
 98 |         'EH0': 'EH',
 99 |         'EH1': 'EH',
100 |         'EH2': 'EH',
101 |         'ER0': 'ER',
102 |         'ER1': 'ER',
103 |         'ER2': 'ER',
104 |         'EY0': 'EY',
105 |         'EY1': 'EY',
106 |         'EY2': 'EY',
107 |         'IH0': 'IH',
108 |         'IH1': 'IH',
109 |         'IH2': 'IH',
110 |         'IY0': 'IY',
111 |         'IY1': 'IY',
112 |         'IY2': 'IY',
113 |         'OW0': 'OW',
114 |         'OW1': 'OW',
115 |         'OW2': 'OW',
116 |         'OY0': 'OY',
117 |         'OY1': 'OY',
118 |         'OY2': 'OY',
119 |         'UH0': 'UH',
120 |         'UH1': 'UH',
121 |         'UH2': 'UH',
122 |         'UW0': 'UW',
123 |         'UW1': 'UW',
124 |         'UW2': 'UW'
125 |     }
126 | 
127 |     def __init__(self):
128 |         self.backend = G2p()
129 |         self.vocab = Vocab(self.phonemes + self.punctuations)
130 | 
131 |     def _remove_vowels(self, phone):
132 |         return self._stress_to_no_stress_.get(phone, phone)
133 | 
134 |     def phoneticize(self, sentence, add_start_end=False):
135 |         """ Normalize the input text sequence and convert it into pronunciation sequence.
136 |         Args:
137 |             sentence (str): The input text sequence.
138 |     
139 |         Returns:
140 |             List[str]: The list of pronunciation sequence.
141 |         """
142 |         phonemes = [
143 |             self._remove_vowels(item) for item in self.backend(sentence)
144 |         ]
145 |         if add_start_end:
146 |             start = self.vocab.start_symbol
147 |             end = self.vocab.end_symbol
148 |             phonemes = [start] + phonemes + [end]
149 |         phonemes = [item for item in phonemes if item in self.vocab.stoi]
150 |         return phonemes
151 | 
152 |     def numericalize(self, phonemes):
153 |         """ Convert pronunciation sequence into pronunciation id sequence.
154 | 
155 |         Args:
156 |             phonemes (List[str]): The list of pronunciation sequence.
157 |     
158 |         Returns:
159 |             List[int]: The list of pronunciation id sequence.
160 |         """
161 |         ids = [self.vocab.lookup(item) for item in phonemes]
162 |         return ids
163 | 
164 |     def reverse(self, ids):
165 |         """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence.
166 |         
167 |         Args:
168 |             ids( List[int]): The list of pronunciation id sequence.
169 |     
170 |         Returns: 
171 |             List[str]: 
172 |                 The list of pronunciation sequence.
173 |         """
174 |         return [self.vocab.reverse(i) for i in ids]
175 | 
176 |     def __call__(self, sentence, add_start_end=False):
177 |         """ Convert the input text sequence into pronunciation id sequence.
178 |     
179 |         Args:
180 |             sentence (str): The input text sequence.
181 |     
182 |         Returns:
183 |             List[str]: The list of pronunciation id sequence.
184 |         """
185 |         return self.numericalize(
186 |             self.phoneticize(sentence, add_start_end=add_start_end))
187 | 
188 |     @property
189 |     def vocab_size(self):
190 |         """ Vocab size.
191 |         """
192 |         # 47 = 39 phones + 4 punctuations + 4 special tokens
193 |         return len(self.vocab)
194 | 
195 | 
196 | class ARPABETWithStress(Phonetics):
197 |     phonemes = [
198 |         'AA0', 'AA1', 'AA2', 'AE0', 'AE1', 'AE2', 'AH0', 'AH1', 'AH2', 'AO0',
199 |         'AO1', 'AO2', 'AW0', 'AW1', 'AW2', 'AY0', 'AY1', 'AY2', 'B', 'CH', 'D',
200 |         'DH', 'EH0', 'EH1', 'EH2', 'ER0', 'ER1', 'ER2', 'EY0', 'EY1', 'EY2',
201 |         'F', 'G', 'HH', 'IH0', 'IH1', 'IH2', 'IY0', 'IY1', 'IY2', 'JH', 'K',
202 |         'L', 'M', 'N', 'NG', 'OW0', 'OW1', 'OW2', 'OY0', 'OY1', 'OY2', 'P', 'R',
203 |         'S', 'SH', 'T', 'TH', 'UH0', 'UH1', 'UH2', 'UW0', 'UW1', 'UW2', 'V',
204 |         'W', 'Y', 'Z', 'ZH'
205 |     ]
206 |     punctuations = [',', '.', '?', '!']
207 |     symbols = phonemes + punctuations
208 | 
209 |     def __init__(self):
210 |         self.backend = G2p()
211 |         self.vocab = Vocab(self.phonemes + self.punctuations)
212 | 
213 |     def phoneticize(self, sentence, add_start_end=False):
214 |         """ Normalize the input text sequence and convert it into pronunciation sequence.
215 |     
216 |         Args: 
217 |             sentence (str): The input text sequence.
218 |     
219 |         Returns: 
220 |             List[str]: The list of pronunciation sequence.
221 |         """
222 |         phonemes = self.backend(sentence)
223 |         if add_start_end:
224 |             start = self.vocab.start_symbol
225 |             end = self.vocab.end_symbol
226 |             phonemes = [start] + phonemes + [end]
227 |         phonemes = [item for item in phonemes if item in self.vocab.stoi]
228 |         return phonemes
229 | 
230 |     def numericalize(self, phonemes):
231 |         """ Convert pronunciation sequence into pronunciation id sequence.
232 | 
233 |         Args:
234 |             phonemes (List[str]): The list of pronunciation sequence.
235 |     
236 |         Returns:
237 |             List[int]: The list of pronunciation id sequence.
238 |         """
239 |         ids = [self.vocab.lookup(item) for item in phonemes]
240 |         return ids
241 | 
242 |     def reverse(self, ids):
243 |         """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence.
244 |         Args:
245 |             ids (List[int]): The list of pronunciation id sequence.
246 |     
247 |         Returns: 
248 |             List[str]: The list of pronunciation sequence.
249 |         """
250 |         return [self.vocab.reverse(i) for i in ids]
251 | 
252 |     def __call__(self, sentence, add_start_end=False):
253 |         """ Convert the input text sequence into pronunciation id sequence.
254 |         Args:
255 |             sentence (str): The input text sequence.
256 |     
257 |         Returns: 
258 |             List[str]: The list of pronunciation id sequence.
259 |         """
260 |         return self.numericalize(
261 |             self.phoneticize(sentence, add_start_end=add_start_end))
262 | 
263 |     @property
264 |     def vocab_size(self):
265 |         """ Vocab size.
266 |         """
267 |         # 77 = 69 phones + 4 punctuations + 4 special tokens
268 |         return len(self.vocab)
269 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/generate_lexicon.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # Design principles: https://zhuanlan.zhihu.com/p/349600439
 15 | """Generate lexicon and symbols for Mandarin Chinese phonology.
 16 | The lexicon is used for Montreal Force Aligner.
 17 | Note that syllables are used as word in this lexicon. Since syllables rather 
 18 | than words are used in transcriptions produced by `reorganize_baker.py`.
 19 | We make this choice to better leverage other software for chinese text to 
 20 | pinyin tools like pypinyin. This is the convention for G2P in Chinese.
 21 | """
 22 | import re
 23 | from collections import OrderedDict
 24 | 
 25 | INITIALS = [
 26 |     'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h', 'zh', 'ch', 'sh',
 27 |     'r', 'z', 'c', 's', 'j', 'q', 'x'
 28 | ]
 29 | 
 30 | FINALS = [
 31 |     'a', 'ai', 'ao', 'an', 'ang', 'e', 'er', 'ei', 'en', 'eng', 'o', 'ou',
 32 |     'ong', 'ii', 'iii', 'i', 'ia', 'iao', 'ian', 'iang', 'ie', 'io', 'iou',
 33 |     'iong', 'in', 'ing', 'u', 'ua', 'uai', 'uan', 'uang', 'uei', 'uo', 'uen',
 34 |     'ueng', 'v', 've', 'van', 'vn'
 35 | ]
 36 | 
 37 | SPECIALS = ['sil', 'sp']
 38 | 
 39 | 
 40 | def rule(C, V, R, T):
 41 |     """Generate a syllable given the initial, the final, erhua indicator, and tone.
 42 |     Orthographical rules for pinyin are applied. (special case for y, w, ui, un, iu)
 43 | 
 44 |     Note that in this system, 'ü' is alway written as 'v' when appeared in phoneme, but converted to 
 45 |     'u' in syllables when certain conditions are satisfied.
 46 | 
 47 |     'i' is distinguished when appeared in phonemes, and separated into 3 categories, 'i', 'ii' and 'iii'.
 48 |     Erhua is is possibly applied to every finals, except for finals that already ends with 'r'.
 49 |     When a syllable is impossible or does not have any characters with this pronunciation, return None
 50 |     to filter it out.
 51 |     """
 52 | 
 53 |     # 不可拼的音节, ii 只能和 z, c, s 拼
 54 |     if V in ["ii"] and (C not in ['z', 'c', 's']):
 55 |         return None
 56 |     # iii 只能和 zh, ch, sh, r 拼
 57 |     if V in ['iii'] and (C not in ['zh', 'ch', 'sh', 'r']):
 58 |         return None
 59 | 
 60 |     # 齐齿呼或者撮口呼不能和 f, g, k, h, zh, ch, sh, r, z, c, s
 61 |     if (V not in ['ii', 'iii']) and V[0] in ['i', 'v'] and (
 62 |             C in ['f', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's']):
 63 |         return None
 64 | 
 65 |     # 撮口呼只能和 j, q, x l, n 拼
 66 |     if V.startswith("v"):
 67 |         # v, ve 只能和 j ,q , x, n, l 拼
 68 |         if V in ['v', 've']:
 69 |             if C not in ['j', 'q', 'x', 'n', 'l', '']:
 70 |                 return None
 71 |         # 其他只能和 j, q, x 拼
 72 |         else:
 73 |             if C not in ['j', 'q', 'x', '']:
 74 |                 return None
 75 | 
 76 |     # j, q, x 只能和齐齿呼或者撮口呼拼
 77 |     if (C in ['j', 'q', 'x']) and not (
 78 |         (V not in ['ii', 'iii']) and V[0] in ['i', 'v']):
 79 |         return None
 80 | 
 81 |     # b, p ,m, f 不能和合口呼拼，除了 u 之外
 82 |     # bm p, m, f 不能和撮口呼拼
 83 |     if (C in ['b', 'p', 'm', 'f']) and ((V[0] in ['u', 'v'] and V != "u") or
 84 |                                         V == 'ong'):
 85 |         return None
 86 | 
 87 |     # ua, uai, uang 不能和 d, t, n, l, r, z, c, s 拼
 88 |     if V in ['ua', 'uai',
 89 |              'uang'] and C in ['d', 't', 'n', 'l', 'r', 'z', 'c', 's']:
 90 |         return None
 91 | 
 92 |     # sh 和 ong 不能拼
 93 |     if V == 'ong' and C in ['sh']:
 94 |         return None
 95 | 
 96 |     # o 和 gkh, zh ch sh r z c s 不能拼
 97 |     if V == "o" and C in [
 98 |             'd', 't', 'n', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's'
 99 |     ]:
100 |         return None
101 | 
102 |     # ueng 只是 weng 这个 ad-hoc 其他情况下都是 ong
103 |     if V == 'ueng' and C != '':
104 |         return
105 | 
106 |     # 非儿化的 er 只能单独存在
107 |     if V == 'er' and C != '':
108 |         return None
109 | 
110 |     if C == '':
111 |         if V in ["i", "in", "ing"]:
112 |             C = 'y'
113 |         elif V == 'u':
114 |             C = 'w'
115 |         elif V.startswith('i') and V not in ["ii", "iii"]:
116 |             C = 'y'
117 |             V = V[1:]
118 |         elif V.startswith('u'):
119 |             C = 'w'
120 |             V = V[1:]
121 |         elif V.startswith('v'):
122 |             C = 'yu'
123 |             V = V[1:]
124 |     else:
125 |         if C in ['j', 'q', 'x']:
126 |             if V.startswith('v'):
127 |                 V = re.sub('v', 'u', V)
128 |         if V == 'iou':
129 |             V = 'iu'
130 |         elif V == 'uei':
131 |             V = 'ui'
132 |         elif V == 'uen':
133 |             V = 'un'
134 |     result = C + V
135 | 
136 |     # Filter  er 不能再儿化
137 |     if result.endswith('r') and R == 'r':
138 |         return None
139 | 
140 |     # ii and iii, change back to i
141 |     result = re.sub(r'i+', 'i', result)
142 | 
143 |     result = result + R + T
144 |     return result
145 | 
146 | 
147 | def generate_lexicon(with_tone=False, with_erhua=False):
148 |     """Generate lexicon for Mandarin Chinese."""
149 |     syllables = OrderedDict()
150 | 
151 |     for C in [''] + INITIALS:
152 |         for V in FINALS:
153 |             for R in [''] if not with_erhua else ['', 'r']:
154 |                 for T in [''] if not with_tone else ['1', '2', '3', '4', '5']:
155 |                     result = rule(C, V, R, T)
156 |                     if result:
157 |                         syllables[result] = f'{C} {V}{R}{T}'
158 |     return syllables
159 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/normalizer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from .normalizer import *
15 | from .numbers import *
16 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/normalizer/abbrrviation.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/normalizer/acronyms.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/normalizer/normalizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import re
15 | import unicodedata
16 | from builtins import str as unicode
17 | 
18 | from .numbers import normalize_numbers
19 | 
20 | 
21 | def normalize(sentence):
22 |     """ Normalize English text.
23 |     """
24 |     # preprocessing
25 |     sentence = unicode(sentence)
26 |     sentence = normalize_numbers(sentence)
27 |     sentence = ''.join(
28 |         char for char in unicodedata.normalize('NFD', sentence)
29 |         if unicodedata.category(char) != 'Mn')  # Strip accents
30 |     sentence = sentence.lower()
31 |     sentence = re.sub(r"[^ a-z'.,?!\-]", "", sentence)
32 |     sentence = sentence.replace("i.e.", "that is")
33 |     sentence = sentence.replace("e.g.", "for example")
34 |     return sentence
35 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/normalizer/numbers.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # number expansion is not that easy
15 | import re
16 | 
17 | import inflect
18 | 
19 | _inflect = inflect.engine()
20 | _comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
21 | _decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
22 | _pounds_re = re.compile(r'£([0-9\,]*[0-9]+)')
23 | _dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)')
24 | _ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)')
25 | _number_re = re.compile(r'[0-9]+')
26 | 
27 | 
28 | def _remove_commas(m):
29 |     return m.group(1).replace(',', '')
30 | 
31 | 
32 | def _expand_decimal_point(m):
33 |     return m.group(1).replace('.', ' point ')
34 | 
35 | 
36 | def _expand_dollars(m):
37 |     match = m.group(1)
38 |     parts = match.split('.')
39 |     if len(parts) > 2:
40 |         return match + ' dollars'  # Unexpected format
41 |     dollars = int(parts[0]) if parts[0] else 0
42 |     cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
43 |     if dollars and cents:
44 |         dollar_unit = 'dollar' if dollars == 1 else 'dollars'
45 |         cent_unit = 'cent' if cents == 1 else 'cents'
46 |         return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit)
47 |     elif dollars:
48 |         dollar_unit = 'dollar' if dollars == 1 else 'dollars'
49 |         return '%s %s' % (dollars, dollar_unit)
50 |     elif cents:
51 |         cent_unit = 'cent' if cents == 1 else 'cents'
52 |         return '%s %s' % (cents, cent_unit)
53 |     else:
54 |         return 'zero dollars'
55 | 
56 | 
57 | def _expand_ordinal(m):
58 |     return _inflect.number_to_words(m.group(0))
59 | 
60 | 
61 | def _expand_number(m):
62 |     num = int(m.group(0))
63 |     if num > 1000 and num < 3000:
64 |         if num == 2000:
65 |             return 'two thousand'
66 |         elif num > 2000 and num < 2010:
67 |             return 'two thousand ' + _inflect.number_to_words(num % 100)
68 |         elif num % 100 == 0:
69 |             return _inflect.number_to_words(num // 100) + ' hundred'
70 |         else:
71 |             return _inflect.number_to_words(
72 |                 num, andword='', zero='oh', group=2).replace(', ', ' ')
73 |     else:
74 |         return _inflect.number_to_words(num, andword='')
75 | 
76 | 
77 | def normalize_numbers(text):
78 |     """ Normalize numbers in English text.
79 |     """
80 |     text = re.sub(_comma_number_re, _remove_commas, text)
81 |     text = re.sub(_pounds_re, r'\1 pounds', text)
82 |     text = re.sub(_dollars_re, _expand_dollars, text)
83 |     text = re.sub(_decimal_number_re, _expand_decimal_point, text)
84 |     text = re.sub(_ordinal_re, _expand_ordinal, text)
85 |     text = re.sub(_number_re, _expand_number, text)
86 |     return text
87 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/normalizer/width.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def full2half_width(ustr):
17 |     half = []
18 |     for u in ustr:
19 |         num = ord(u)
20 |         if num == 0x3000:  # 全角空格变半角
21 |             num = 32
22 |         elif 0xFF01 <= num <= 0xFF5E:
23 |             num -= 0xfee0
24 |         u = chr(num)
25 |         half.append(u)
26 |     return ''.join(half)
27 | 
28 | 
29 | def half2full_width(ustr):
30 |     full = []
31 |     for u in ustr:
32 |         num = ord(u)
33 |         if num == 32:  # 半角空格变全角
34 |             num = 0x3000
35 |         elif 0x21 <= num <= 0x7E:
36 |             num += 0xfee0
37 |         u = chr(num)  # to unicode
38 |         full.append(u)
39 | 
40 |     return ''.join(full)
41 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/punctuation.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | __all__ = ["get_punctuations"]
16 | 
17 | EN_PUNCT = [
18 |     " ",
19 |     "-",
20 |     "...",
21 |     ",",
22 |     ".",
23 |     "?",
24 |     "!",
25 | ]
26 | 
27 | CN_PUNCT = ["、", "，", "；", "：", "。", "？", "！"]
28 | 
29 | 
30 | def get_punctuations(lang):
31 |     if lang == "en":
32 |         return EN_PUNCT
33 |     elif lang == "cn":
34 |         return CN_PUNCT
35 |     else:
36 |         raise ValueError(f"language {lang} Not supported")
37 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/vocab.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | from collections import OrderedDict
 15 | from typing import Iterable
 16 | 
 17 | __all__ = ["Vocab"]
 18 | 
 19 | 
 20 | class Vocab(object):
 21 |     """  Vocabulary.
 22 | 
 23 |     Args:
 24 |         symbols (Iterable[str]): Common symbols.
 25 |         padding_symbol (str, optional): Symbol for pad. Defaults to "<pad>".
 26 |         unk_symbol (str, optional): Symbol for unknow. Defaults to "<unk>"
 27 |         start_symbol (str, optional): Symbol for start. Defaults to "<s>"
 28 |         end_symbol (str, optional): Symbol for end. Defaults to "</s>"
 29 |     """
 30 | 
 31 |     def __init__(self,
 32 |                  symbols: Iterable[str],
 33 |                  padding_symbol="<pad>",
 34 |                  unk_symbol="<unk>",
 35 |                  start_symbol="<s>",
 36 |                  end_symbol="</s>"):
 37 |         self.special_symbols = OrderedDict()
 38 |         for i, item in enumerate(
 39 |             [padding_symbol, unk_symbol, start_symbol, end_symbol]):
 40 |             if item:
 41 |                 self.special_symbols[item] = len(self.special_symbols)
 42 | 
 43 |         self.padding_symbol = padding_symbol
 44 |         self.unk_symbol = unk_symbol
 45 |         self.start_symbol = start_symbol
 46 |         self.end_symbol = end_symbol
 47 | 
 48 |         self.stoi = OrderedDict()
 49 |         self.stoi.update(self.special_symbols)
 50 | 
 51 |         for i, s in enumerate(symbols):
 52 |             if s not in self.stoi:
 53 |                 self.stoi[s] = len(self.stoi)
 54 |         self.itos = {v: k for k, v in self.stoi.items()}
 55 | 
 56 |     def __len__(self):
 57 |         return len(self.stoi)
 58 | 
 59 |     @property
 60 |     def num_specials(self):
 61 |         """ The number of special symbols.
 62 |         """
 63 |         return len(self.special_symbols)
 64 | 
 65 |     # special tokens
 66 |     @property
 67 |     def padding_index(self):
 68 |         """ The index of padding symbol
 69 |         """
 70 |         return self.stoi.get(self.padding_symbol, -1)
 71 | 
 72 |     @property
 73 |     def unk_index(self):
 74 |         """The index of unknow symbol.
 75 |         """
 76 |         return self.stoi.get(self.unk_symbol, -1)
 77 | 
 78 |     @property
 79 |     def start_index(self):
 80 |         """The index of start symbol.
 81 |         """
 82 |         return self.stoi.get(self.start_symbol, -1)
 83 | 
 84 |     @property
 85 |     def end_index(self):
 86 |         """ The index of end symbol.
 87 |         """
 88 |         return self.stoi.get(self.end_symbol, -1)
 89 | 
 90 |     def __repr__(self):
 91 |         fmt = "Vocab(size: {},\nstoi:\n{})"
 92 |         return fmt.format(len(self), self.stoi)
 93 | 
 94 |     def __str__(self):
 95 |         return self.__repr__()
 96 | 
 97 |     def lookup(self, symbol):
 98 |         """ The index that symbol correspond.
 99 |         """
100 |         return self.stoi[symbol]
101 | 
102 |     def reverse(self, index):
103 |         """ The symbol thar index cottespond.
104 |         """
105 |         return self.itos[index]
106 | 
107 |     def add_symbol(self, symbol):
108 |         """ Add a new symbol in vocab.
109 |         """
110 |         if symbol in self.stoi:
111 |             return
112 |         N = len(self.stoi)
113 |         self.stoi[symbol] = N
114 |         self.itos[N] = symbol
115 | 
116 |     def add_symbols(self, symbols):
117 |         """ Add multiple symbols in vocab.
118 |         """
119 |         for symbol in symbols:
120 |             self.add_symbol(symbol)
121 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/zh_normalization/README.md:
--------------------------------------------------------------------------------
 1 | ## Supported NSW (Non-Standard-Word) Normalization
 2 | 
 3 | |NSW type|raw|normalized|
 4 | |:--|:-|:-|
 5 | |serial number|电影中梁朝伟扮演的陈永仁的编号27149|电影中梁朝伟扮演的陈永仁的编号二七一四九|
 6 | |cardinal|这块黄金重达324.75克<br>我们班的最高总分为583分|这块黄金重达三百二十四点七五克<br>我们班的最高总分为五百八十三分|
 7 | |numeric range |12\~23<br>-1.5\~2|十二到二十三<br>负一点五到二|
 8 | |date|她出生于86年8月18日，她弟弟出生于1995年3月1日|她出生于八六年八月十八日， 她弟弟出生于一九九五年三月一日|
 9 | |time|等会请在12:05请通知我|等会请在十二点零五分请通知我
10 | |temperature|今天的最低气温达到-10°C|今天的最低气温达到零下十度
11 | |fraction|现场有7/12的观众投出了赞成票|现场有十二分之七的观众投出了赞成票|
12 | |percentage|明天有62％的概率降雨|明天有百分之六十二的概率降雨|
13 | |money|随便来几个价格12块5，34.5元，20.1万|随便来几个价格十二块五，三十四点五元，二十点一万|
14 | |telephone|这是固话0421-33441122<br>这是手机+86 18544139121|这是固话零四二一三三四四一一二二<br>这是手机八六一八五四四一三九一二一|
15 | ## References
16 | [Pull requests #658 of DeepSpeech](https://github.com/PaddlePaddle/DeepSpeech/pull/658/files)
17 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/zh_normalization/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from .text_normlization import *
15 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/zh_normalization/chronology.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import re
 15 | 
 16 | from .num import DIGITS
 17 | from .num import num2str
 18 | from .num import verbalize_cardinal
 19 | from .num import verbalize_digit
 20 | 
 21 | 
 22 | def _time_num2str(num_string: str) -> str:
 23 |     """A special case for verbalizing number in time."""
 24 |     result = num2str(num_string.lstrip('0'))
 25 |     if num_string.startswith('0'):
 26 |         result = DIGITS['0'] + result
 27 |     return result
 28 | 
 29 | 
 30 | # 时刻表达式
 31 | RE_TIME = re.compile(r'([0-1]?[0-9]|2[0-3])'
 32 |                      r':([0-5][0-9])'
 33 |                      r'(:([0-5][0-9]))?')
 34 | 
 35 | # 时间范围，如8:30-12:30
 36 | RE_TIME_RANGE = re.compile(r'([0-1]?[0-9]|2[0-3])'
 37 |                            r':([0-5][0-9])'
 38 |                            r'(:([0-5][0-9]))?'
 39 |                            r'(~|-)'
 40 |                            r'([0-1]?[0-9]|2[0-3])'
 41 |                            r':([0-5][0-9])'
 42 |                            r'(:([0-5][0-9]))?')
 43 | 
 44 | 
 45 | def replace_time(match) -> str:
 46 |     """
 47 |     Args:
 48 |         match (re.Match)
 49 |     Returns:
 50 |         str
 51 |     """
 52 | 
 53 |     is_range = len(match.groups()) > 5
 54 | 
 55 |     hour = match.group(1)
 56 |     minute = match.group(2)
 57 |     second = match.group(4)
 58 | 
 59 |     if is_range:
 60 |         hour_2 = match.group(6)
 61 |         minute_2 = match.group(7)
 62 |         second_2 = match.group(9)
 63 | 
 64 |     result = f"{num2str(hour)}点"
 65 |     if minute.lstrip('0'):
 66 |         if int(minute) == 30:
 67 |             result += "半"
 68 |         else:
 69 |             result += f"{_time_num2str(minute)}分"
 70 |     if second and second.lstrip('0'):
 71 |         result += f"{_time_num2str(second)}秒"
 72 | 
 73 |     if is_range:
 74 |         result += "至"
 75 |         result += f"{num2str(hour_2)}点"
 76 |         if minute_2.lstrip('0'):
 77 |             if int(minute) == 30:
 78 |                 result += "半"
 79 |             else:
 80 |                 result += f"{_time_num2str(minute_2)}分"
 81 |         if second_2 and second_2.lstrip('0'):
 82 |             result += f"{_time_num2str(second_2)}秒"
 83 | 
 84 |     return result
 85 | 
 86 | 
 87 | RE_DATE = re.compile(r'(\d{4}|\d{2})年'
 88 |                      r'((0?[1-9]|1[0-2])月)?'
 89 |                      r'(((0?[1-9])|((1|2)[0-9])|30|31)([日号]))?')
 90 | 
 91 | 
 92 | def replace_date(match) -> str:
 93 |     """
 94 |     Args:
 95 |         match (re.Match)
 96 |     Returns:
 97 |         str
 98 |     """
 99 |     year = match.group(1)
100 |     month = match.group(3)
101 |     day = match.group(5)
102 |     result = ""
103 |     if year:
104 |         result += f"{verbalize_digit(year)}年"
105 |     if month:
106 |         result += f"{verbalize_cardinal(month)}月"
107 |     if day:
108 |         result += f"{verbalize_cardinal(day)}{match.group(9)}"
109 |     return result
110 | 
111 | 
112 | # 用 / 或者 - 分隔的 YY/MM/DD 或者 YY-MM-DD 日期
113 | RE_DATE2 = re.compile(
114 |     r'(\d{4})([- /.])(0[1-9]|1[012])\2(0[1-9]|[12][0-9]|3[01])')
115 | 
116 | 
117 | def replace_date2(match) -> str:
118 |     """
119 |     Args:
120 |         match (re.Match)
121 |     Returns:
122 |         str
123 |     """
124 |     year = match.group(1)
125 |     month = match.group(3)
126 |     day = match.group(4)
127 |     result = ""
128 |     if year:
129 |         result += f"{verbalize_digit(year)}年"
130 |     if month:
131 |         result += f"{verbalize_cardinal(month)}月"
132 |     if day:
133 |         result += f"{verbalize_cardinal(day)}日"
134 |     return result
135 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/zh_normalization/constants.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import re
15 | import string
16 | 
17 | from pypinyin.constants import SUPPORT_UCS4
18 | 
19 | # 全角半角转换
20 | # 英文字符全角 -> 半角映射表 (num: 52)
21 | F2H_ASCII_LETTERS = {
22 |     chr(ord(char) + 65248): char
23 |     for char in string.ascii_letters
24 | }
25 | 
26 | # 英文字符半角 -> 全角映射表
27 | H2F_ASCII_LETTERS = {value: key for key, value in F2H_ASCII_LETTERS.items()}
28 | 
29 | # 数字字符全角 -> 半角映射表 (num: 10)
30 | F2H_DIGITS = {chr(ord(char) + 65248): char for char in string.digits}
31 | # 数字字符半角 -> 全角映射表
32 | H2F_DIGITS = {value: key for key, value in F2H_DIGITS.items()}
33 | 
34 | # 标点符号全角 -> 半角映射表 (num: 32)
35 | F2H_PUNCTUATIONS = {chr(ord(char) + 65248): char for char in string.punctuation}
36 | # 标点符号半角 -> 全角映射表
37 | H2F_PUNCTUATIONS = {value: key for key, value in F2H_PUNCTUATIONS.items()}
38 | 
39 | # 空格 (num: 1)
40 | F2H_SPACE = {'\u3000': ' '}
41 | H2F_SPACE = {' ': '\u3000'}
42 | 
43 | # 非"有拼音的汉字"的字符串，可用于NSW提取
44 | if SUPPORT_UCS4:
45 |     RE_NSW = re.compile(r'(?:[^'
46 |                         r'\u3007'  # 〇
47 |                         r'\u3400-\u4dbf'  # CJK扩展A:[3400-4DBF]
48 |                         r'\u4e00-\u9fff'  # CJK基本:[4E00-9FFF]
49 |                         r'\uf900-\ufaff'  # CJK兼容:[F900-FAFF]
50 |                         r'\U00020000-\U0002A6DF'  # CJK扩展B:[20000-2A6DF]
51 |                         r'\U0002A703-\U0002B73F'  # CJK扩展C:[2A700-2B73F]
52 |                         r'\U0002B740-\U0002B81D'  # CJK扩展D:[2B740-2B81D]
53 |                         r'\U0002F80A-\U0002FA1F'  # CJK兼容扩展:[2F800-2FA1F]
54 |                         r'])+')
55 | else:
56 |     RE_NSW = re.compile(  # pragma: no cover
57 |         r'(?:[^'
58 |         r'\u3007'  # 〇
59 |         r'\u3400-\u4dbf'  # CJK扩展A:[3400-4DBF]
60 |         r'\u4e00-\u9fff'  # CJK基本:[4E00-9FFF]
61 |         r'\uf900-\ufaff'  # CJK兼容:[F900-FAFF]
62 |         r'])+')
63 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/zh_normalization/num.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """
 15 | Rules to verbalize numbers into Chinese characters.
 16 | https://zh.wikipedia.org/wiki/中文数字#現代中文
 17 | """
 18 | import re
 19 | from collections import OrderedDict
 20 | from typing import List
 21 | 
 22 | DIGITS = {str(i): tran for i, tran in enumerate('零一二三四五六七八九')}
 23 | UNITS = OrderedDict({
 24 |     1: '十',
 25 |     2: '百',
 26 |     3: '千',
 27 |     4: '万',
 28 |     8: '亿',
 29 | })
 30 | 
 31 | COM_QUANTIFIERS = '(所|朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|小时|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|)块|角|毛|分)'
 32 | 
 33 | # 分数表达式
 34 | RE_FRAC = re.compile(r'(-?)(\d+)/(\d+)')
 35 | 
 36 | 
 37 | def replace_frac(match) -> str:
 38 |     """
 39 |     Args:
 40 |         match (re.Match)
 41 |     Returns:
 42 |         str
 43 |     """
 44 |     sign = match.group(1)
 45 |     nominator = match.group(2)
 46 |     denominator = match.group(3)
 47 |     sign: str = "负" if sign else ""
 48 |     nominator: str = num2str(nominator)
 49 |     denominator: str = num2str(denominator)
 50 |     result = f"{sign}{denominator}分之{nominator}"
 51 |     return result
 52 | 
 53 | 
 54 | # 百分数表达式
 55 | RE_PERCENTAGE = re.compile(r'(-?)(\d+(\.\d+)?)%')
 56 | 
 57 | 
 58 | def replace_percentage(match) -> str:
 59 |     """
 60 |     Args:
 61 |         match (re.Match)
 62 |     Returns:
 63 |         str
 64 |     """
 65 |     sign = match.group(1)
 66 |     percent = match.group(2)
 67 |     sign: str = "负" if sign else ""
 68 |     percent: str = num2str(percent)
 69 |     result = f"{sign}百分之{percent}"
 70 |     return result
 71 | 
 72 | 
 73 | # 整数表达式
 74 | # 带负号的整数 -10
 75 | RE_INTEGER = re.compile(r'(-)' r'(\d+)')
 76 | 
 77 | 
 78 | def replace_negative_num(match) -> str:
 79 |     """
 80 |     Args:
 81 |         match (re.Match)
 82 |     Returns:
 83 |         str
 84 |     """
 85 |     sign = match.group(1)
 86 |     number = match.group(2)
 87 |     sign: str = "负" if sign else ""
 88 |     number: str = num2str(number)
 89 |     result = f"{sign}{number}"
 90 |     return result
 91 | 
 92 | 
 93 | # 编号-无符号整形
 94 | # 00078
 95 | RE_DEFAULT_NUM = re.compile(r'\d{3}\d*')
 96 | 
 97 | 
 98 | def replace_default_num(match):
 99 |     """
100 |     Args:
101 |         match (re.Match)
102 |     Returns:
103 |         str
104 |     """
105 |     number = match.group(0)
106 |     return verbalize_digit(number)
107 | 
108 | 
109 | # 数字表达式
110 | # 纯小数
111 | RE_DECIMAL_NUM = re.compile(r'(-?)((\d+)(\.\d+))' r'|(\.(\d+))')
112 | # 正整数 + 量词
113 | RE_POSITIVE_QUANTIFIERS = re.compile(r"(\d+)([多余几\+])?" + COM_QUANTIFIERS)
114 | RE_NUMBER = re.compile(r'(-?)((\d+)(\.\d+)?)' r'|(\.(\d+))')
115 | 
116 | 
117 | def replace_positive_quantifier(match) -> str:
118 |     """
119 |     Args:
120 |         match (re.Match)
121 |     Returns:
122 |         str
123 |     """
124 |     number = match.group(1)
125 |     match_2 = match.group(2)
126 |     if match_2 == "+":
127 |         match_2 = "多"
128 |     match_2: str = match_2 if match_2 else ""
129 |     quantifiers: str = match.group(3)
130 |     number: str = num2str(number)
131 |     result = f"{number}{match_2}{quantifiers}"
132 |     return result
133 | 
134 | 
135 | def replace_number(match) -> str:
136 |     """
137 |     Args:
138 |         match (re.Match)
139 |     Returns:
140 |         str
141 |     """
142 |     sign = match.group(1)
143 |     number = match.group(2)
144 |     pure_decimal = match.group(5)
145 |     if pure_decimal:
146 |         result = num2str(pure_decimal)
147 |     else:
148 |         sign: str = "负" if sign else ""
149 |         number: str = num2str(number)
150 |         result = f"{sign}{number}"
151 |     return result
152 | 
153 | 
154 | # 范围表达式
155 | # match.group(1) and match.group(8) are copy from RE_NUMBER
156 | 
157 | RE_RANGE = re.compile(
158 |     r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))[-~]((-?)((\d+)(\.\d+)?)|(\.(\d+)))')
159 | 
160 | 
161 | def replace_range(match) -> str:
162 |     """
163 |     Args:
164 |         match (re.Match)
165 |     Returns:
166 |         str
167 |     """
168 |     first, second = match.group(1), match.group(8)
169 |     first = RE_NUMBER.sub(replace_number, first)
170 |     second = RE_NUMBER.sub(replace_number, second)
171 |     result = f"{first}到{second}"
172 |     return result
173 | 
174 | 
175 | def _get_value(value_string: str, use_zero: bool=True) -> List[str]:
176 |     stripped = value_string.lstrip('0')
177 |     if len(stripped) == 0:
178 |         return []
179 |     elif len(stripped) == 1:
180 |         if use_zero and len(stripped) < len(value_string):
181 |             return [DIGITS['0'], DIGITS[stripped]]
182 |         else:
183 |             return [DIGITS[stripped]]
184 |     else:
185 |         largest_unit = next(
186 |             power for power in reversed(UNITS.keys()) if power < len(stripped))
187 |         first_part = value_string[:-largest_unit]
188 |         second_part = value_string[-largest_unit:]
189 |         return _get_value(first_part) + [UNITS[largest_unit]] + _get_value(
190 |             second_part)
191 | 
192 | 
193 | def verbalize_cardinal(value_string: str) -> str:
194 |     if not value_string:
195 |         return ''
196 | 
197 |     # 000 -> '零' , 0 -> '零'
198 |     value_string = value_string.lstrip('0')
199 |     if len(value_string) == 0:
200 |         return DIGITS['0']
201 | 
202 |     result_symbols = _get_value(value_string)
203 |     # verbalized number starting with '一十*' is abbreviated as `十*`
204 |     if len(result_symbols) >= 2 and result_symbols[0] == DIGITS[
205 |             '1'] and result_symbols[1] == UNITS[1]:
206 |         result_symbols = result_symbols[1:]
207 |     return ''.join(result_symbols)
208 | 
209 | 
210 | def verbalize_digit(value_string: str, alt_one=False) -> str:
211 |     result_symbols = [DIGITS[digit] for digit in value_string]
212 |     result = ''.join(result_symbols)
213 |     if alt_one:
214 |         result = result.replace("一", "幺")
215 |     return result
216 | 
217 | 
218 | def num2str(value_string: str) -> str:
219 |     integer_decimal = value_string.split('.')
220 |     if len(integer_decimal) == 1:
221 |         integer = integer_decimal[0]
222 |         decimal = ''
223 |     elif len(integer_decimal) == 2:
224 |         integer, decimal = integer_decimal
225 |     else:
226 |         raise ValueError(
227 |             f"The value string: '${value_string}' has more than one point in it."
228 |         )
229 | 
230 |     result = verbalize_cardinal(integer)
231 | 
232 |     decimal = decimal.rstrip('0')
233 |     if decimal:
234 |         # '.22' is verbalized as '零点二二'
235 |         # '3.20' is verbalized as '三点二
236 |         result = result if result else "零"
237 |         result += '点' + verbalize_digit(decimal)
238 |     return result
239 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/zh_normalization/phonecode.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import re
15 | 
16 | from .num import verbalize_digit
17 | 
18 | # 规范化固话/手机号码
19 | # 手机
20 | # http://www.jihaoba.com/news/show/13680
21 | # 移动：139、138、137、136、135、134、159、158、157、150、151、152、188、187、182、183、184、178、198
22 | # 联通：130、131、132、156、155、186、185、176
23 | # 电信：133、153、189、180、181、177
24 | RE_MOBILE_PHONE = re.compile(
25 |     r"(?<!\d)((\+?86 ?)?1([38]\d|5[0-35-9]|7[678]|9[89])\d{8})(?!\d)")
26 | RE_TELEPHONE = re.compile(
27 |     r"(?<!\d)((0(10|2[1-3]|[3-9]\d{2})-?)?[1-9]\d{7,8})(?!\d)")
28 | 
29 | # 全国统一的号码400开头
30 | RE_NATIONAL_UNIFORM_NUMBER = re.compile(r"(400)(-)?\d{3}(-)?\d{4}")
31 | 
32 | 
33 | def phone2str(phone_string: str, mobile=True) -> str:
34 |     if mobile:
35 |         sp_parts = phone_string.strip('+').split()
36 |         result = '，'.join(
37 |             [verbalize_digit(part, alt_one=True) for part in sp_parts])
38 |         return result
39 |     else:
40 |         sil_parts = phone_string.split('-')
41 |         result = '，'.join(
42 |             [verbalize_digit(part, alt_one=True) for part in sil_parts])
43 |         return result
44 | 
45 | 
46 | def replace_phone(match) -> str:
47 |     """
48 |     Args:
49 |         match (re.Match)
50 |     Returns:
51 |         str
52 |     """
53 |     return phone2str(match.group(0), mobile=False)
54 | 
55 | 
56 | def replace_mobile(match) -> str:
57 |     """
58 |     Args:
59 |         match (re.Match)
60 |     Returns:
61 |         str
62 |     """
63 |     return phone2str(match.group(0))
64 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/zh_normalization/quantifier.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import re
15 | 
16 | from .num import num2str
17 | 
18 | # 温度表达式，温度会影响负号的读法
19 | # -3°C 零下三度
20 | RE_TEMPERATURE = re.compile(r'(-?)(\d+(\.\d+)?)(°C|℃|度|摄氏度)')
21 | 
22 | 
23 | def replace_temperature(match) -> str:
24 |     """
25 |     Args:
26 |         match (re.Match)
27 |     Returns:
28 |         str
29 |     """
30 |     sign = match.group(1)
31 |     temperature = match.group(2)
32 |     unit = match.group(3)
33 |     sign: str = "零下" if sign else ""
34 |     temperature: str = num2str(temperature)
35 |     unit: str = "摄氏度" if unit == "摄氏度" else "度"
36 |     result = f"{sign}{temperature}{unit}"
37 |     return result
38 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/frontend/zh_normalization/text_normlization.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import re
 15 | from typing import List
 16 | 
 17 | from .char_convert import tranditional_to_simplified
 18 | from .chronology import RE_DATE
 19 | from .chronology import RE_DATE2
 20 | from .chronology import RE_TIME
 21 | from .chronology import RE_TIME_RANGE
 22 | from .chronology import replace_date
 23 | from .chronology import replace_date2
 24 | from .chronology import replace_time
 25 | from .constants import F2H_ASCII_LETTERS
 26 | from .constants import F2H_DIGITS
 27 | from .constants import F2H_SPACE
 28 | from .num import RE_DECIMAL_NUM
 29 | from .num import RE_DEFAULT_NUM
 30 | from .num import RE_FRAC
 31 | from .num import RE_INTEGER
 32 | from .num import RE_NUMBER
 33 | from .num import RE_PERCENTAGE
 34 | from .num import RE_POSITIVE_QUANTIFIERS
 35 | from .num import RE_RANGE
 36 | from .num import replace_default_num
 37 | from .num import replace_frac
 38 | from .num import replace_negative_num
 39 | from .num import replace_number
 40 | from .num import replace_percentage
 41 | from .num import replace_positive_quantifier
 42 | from .num import replace_range
 43 | from .phonecode import RE_MOBILE_PHONE
 44 | from .phonecode import RE_NATIONAL_UNIFORM_NUMBER
 45 | from .phonecode import RE_TELEPHONE
 46 | from .phonecode import replace_mobile
 47 | from .phonecode import replace_phone
 48 | from .quantifier import RE_TEMPERATURE
 49 | from .quantifier import replace_temperature
 50 | 
 51 | 
 52 | class TextNormalizer():
 53 |     def __init__(self):
 54 |         self.SENTENCE_SPLITOR = re.compile(r'([：、，；。？！,;?!][”’]?)')
 55 | 
 56 |     def _split(self, text: str, lang="zh") -> List[str]:
 57 |         """Split long text into sentences with sentence-splitting punctuations.
 58 |         Args:
 59 |             text (str): The input text.
 60 |         Returns:
 61 |             List[str]: Sentences.
 62 |         """
 63 |         # Only for pure Chinese here
 64 |         if lang == "zh":
 65 |             text = text.replace(" ", "")
 66 |             # 过滤掉特殊字符
 67 |             text = re.sub(r'[《》【】<=>{}()（）#&@“”^_|…\\]', '', text)
 68 |         text = self.SENTENCE_SPLITOR.sub(r'\1\n', text)
 69 |         text = text.strip()
 70 |         sentences = [sentence.strip() for sentence in re.split(r'\n+', text)]
 71 |         return sentences
 72 | 
 73 |     def _post_replace(self, sentence: str) -> str:
 74 |         sentence = sentence.replace('/', '每')
 75 |         sentence = sentence.replace('~', '至')
 76 | 
 77 |         return sentence
 78 | 
 79 |     def normalize_sentence(self, sentence: str) -> str:
 80 |         # basic character conversions
 81 |         sentence = tranditional_to_simplified(sentence)
 82 |         sentence = sentence.translate(F2H_ASCII_LETTERS).translate(
 83 |             F2H_DIGITS).translate(F2H_SPACE)
 84 | 
 85 |         # number related NSW verbalization
 86 |         sentence = RE_DATE.sub(replace_date, sentence)
 87 |         sentence = RE_DATE2.sub(replace_date2, sentence)
 88 | 
 89 |         # range first
 90 |         sentence = RE_TIME_RANGE.sub(replace_time, sentence)
 91 |         sentence = RE_TIME.sub(replace_time, sentence)
 92 | 
 93 |         sentence = RE_TEMPERATURE.sub(replace_temperature, sentence)
 94 |         sentence = RE_FRAC.sub(replace_frac, sentence)
 95 |         sentence = RE_PERCENTAGE.sub(replace_percentage, sentence)
 96 |         sentence = RE_MOBILE_PHONE.sub(replace_mobile, sentence)
 97 | 
 98 |         sentence = RE_TELEPHONE.sub(replace_phone, sentence)
 99 |         sentence = RE_NATIONAL_UNIFORM_NUMBER.sub(replace_phone, sentence)
100 | 
101 |         sentence = RE_RANGE.sub(replace_range, sentence)
102 |         sentence = RE_INTEGER.sub(replace_negative_num, sentence)
103 |         sentence = RE_DECIMAL_NUM.sub(replace_number, sentence)
104 |         sentence = RE_POSITIVE_QUANTIFIERS.sub(replace_positive_quantifier,
105 |                                                sentence)
106 |         sentence = RE_DEFAULT_NUM.sub(replace_default_num, sentence)
107 |         sentence = RE_NUMBER.sub(replace_number, sentence)
108 |         sentence = self._post_replace(sentence)
109 | 
110 |         return sentence
111 | 
112 |     def normalize(self, text: str) -> List[str]:
113 |         sentences = self._split(text)
114 | 
115 |         sentences = [self.normalize_sentence(sent) for sent in sentences]
116 |         return sentences
117 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/main.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | inference_dir="$PWD/resources"
 4 | 
 5 | am="fastspeech2_ljspeech"
 6 | am_onnx="fastspeech2_ljspeech/fastspeech2_ljspeech.onnx"
 7 | 
 8 | voc="pwgan_ljspeech"
 9 | voc_onnx="pwgan_ljspeech.onnx"
10 | 
11 | output_dir="result"
12 | text="$PWD/sentences_en.txt"
13 | phones_dict="$PWD/resources/fastspeech2_ljspeech/phone_id_map.txt"
14 | 
15 | python tts3.py \
16 |     --inference_dir=${inference_dir} \
17 |     --am=${am} \
18 |     --am_onnx=${am_onnx} \
19 |     --voc=${voc} \
20 |     --voc_onnx=${voc_onnx} \
21 |     --output_dir=${output_dir} \
22 |     --text=${text} \
23 |     --phones_dict=${phones_dict} \
24 |     --device=cpu \
25 |     --cpu_threads=2
26 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/requirements.txt:
--------------------------------------------------------------------------------
 1 | g2p_en==2.1.0
 2 | inflect==5.3.0
 3 | jieba==0.42.1
 4 | numpy>=1.19.3
 5 | onnxruntime>=1.10.0
 6 | pypinyin==0.44.0
 7 | pypinyin_dict==0.2.0
 8 | SoundFile==0.10.3.post1
 9 | timer==0.2.2
10 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/sentences_en.txt:
--------------------------------------------------------------------------------
1 | 001 Life was like a box of chocolates, you never know what you're gonna get.
2 | 002 With great power there must come great responsibility.
3 | 003 To be or not to be, that’s a question.
4 | 004 A man can be destroyed but not defeated
5 | 005 Do not, for one repulse, give up the purpose that you resolved to effort.
6 | 006 Death is just a part of life, something we're all destined to do.
7 | 007 I think it's hard winning a war with words. 
8 | 008 Don’t argue with the people of strong determination, because they may change the fact!
9 | 009 Love you three thousand times.


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/syn_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from frontend import English
15 | from frontend.zh_frontend import Frontend
16 | 
17 | 
18 | # input
19 | def get_sentences(args):
20 |     # construct dataset for evaluation
21 |     sentences = []
22 |     with open(args.text, 'rt', encoding='utf-8') as f:
23 |         for line in f:
24 |             items = line.strip().split()
25 |             utt_id = items[0]
26 |             if 'lang' in args and args.lang == 'zh':
27 |                 sentence = "".join(items[1:])
28 |             elif 'lang' in args and args.lang == 'en':
29 |                 sentence = " ".join(items[1:])
30 |             sentences.append((utt_id, sentence))
31 |     return sentences
32 | 
33 | 
34 | # frontend
35 | def get_frontend(args):
36 |     if 'lang' in args and args.lang == 'zh':
37 |         frontend = Frontend(phone_vocab_path=args.phones_dict,
38 |                             tone_vocab_path=args.tones_dict)
39 |     elif 'lang' in args and args.lang == 'en':
40 |         frontend = English(phone_vocab_path=args.phones_dict)
41 |     else:
42 |         print("wrong lang!")
43 |     print("frontend done!")
44 |     return frontend
45 | 


--------------------------------------------------------------------------------
/python/PaddleSpeech/ljspeech_tts3/tts3.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import argparse
 15 | from pathlib import Path
 16 | 
 17 | import numpy as np
 18 | import onnxruntime as ort
 19 | import soundfile as sf
 20 | from timer import timer
 21 | 
 22 | from syn_utils import get_frontend, get_sentences
 23 | 
 24 | root_dir = str(Path.cwd())
 25 | 
 26 | 
 27 | def str2bool(str):
 28 |     return True if str.lower() == 'true' else False
 29 | 
 30 | 
 31 | def get_sess(args, filed='am'):
 32 |     full_name = ''
 33 |     if filed == 'am':
 34 |         full_name = args.am_onnx
 35 |     elif filed == 'voc':
 36 |         full_name = args.voc_onnx
 37 | 
 38 |     model_dir = str(Path(args.inference_dir) / full_name)
 39 | 
 40 |     sess_options = ort.SessionOptions()
 41 |     sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
 42 |     sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
 43 | 
 44 |     if args.device == "gpu":
 45 |         # fastspeech2/mb_melgan can't use trt now!
 46 |         if args.use_trt:
 47 |             providers = ['TensorrtExecutionProvider']
 48 |         else:
 49 |             providers = ['CUDAExecutionProvider']
 50 |     elif args.device == "cpu":
 51 |         providers = ['CPUExecutionProvider']
 52 | 
 53 |     sess_options.intra_op_num_threads = args.cpu_threads
 54 |     sess = ort.InferenceSession(model_dir,
 55 |                                 providers=providers,
 56 |                                 sess_options=sess_options)
 57 |     return sess
 58 | 
 59 | 
 60 | def ort_predict(args):
 61 | 
 62 |     # frontend
 63 |     frontend = get_frontend(args)
 64 | 
 65 |     output_dir = Path(args.output_dir)
 66 |     output_dir.mkdir(parents=True, exist_ok=True)
 67 |     sentences = get_sentences(args)
 68 | 
 69 |     am_name = args.am[:args.am.rindex('_')]
 70 |     am_dataset = args.am[args.am.rindex('_') + 1:]
 71 |     fs = 24000 if am_dataset != 'ljspeech' else 22050
 72 | 
 73 |     # am
 74 |     am_sess = get_sess(args, filed='am')
 75 | 
 76 |     # vocoder
 77 |     voc_sess = get_sess(args, filed='voc')
 78 | 
 79 |     # frontend warmup
 80 |     # Loading model cost 0.5+ seconds
 81 |     if args.lang == 'zh':
 82 |         frontend.get_input_ids("你好，欢迎使用飞桨框架进行深度学习研究！",
 83 |                                merge_sentences=True)
 84 |     elif args.lang == 'en':
 85 |         frontend.get_input_ids("Love you three thousand times.",
 86 |                                merge_sentences=False)
 87 |     else:
 88 |         print("lang should in be 'zh' or 'en' here!")
 89 | 
 90 |     N = 0
 91 |     T = 0
 92 |     merge_sentences = True
 93 |     for utt_id, sentence in sentences:
 94 |         with timer() as t:
 95 |             if args.lang == 'zh' or args.lang == 'en':
 96 |                 input_ids = frontend.get_input_ids(
 97 |                     sentence, merge_sentences=merge_sentences)
 98 | 
 99 |                 phone_ids = input_ids["phone_ids"]
100 |             else:
101 |                 print("lang should in be 'zh' here!")
102 | 
103 |             # merge_sentences=True here, so we only use the first item of phone_ids
104 |             phone_ids = phone_ids[0]
105 |             mel = am_sess.run(output_names=None, input_feed={
106 |                               'text': phone_ids})
107 |             mel = mel[0]
108 |             wav = voc_sess.run(output_names=None, input_feed={'logmel': mel})
109 | 
110 |             N += len(wav[0])
111 |             T += t.elapse
112 |             speed = len(wav[0]) / t.elapse
113 |             rtf = fs / speed
114 |         sf.write(
115 |             str(output_dir / (utt_id + ".wav")),
116 |             np.array(wav)[0],
117 |             samplerate=fs)
118 |         print(
119 |             f"{utt_id}, mel: {mel.shape}, wave: {len(wav[0])}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
120 |         )
121 |     print(f"generation speed: {N / T}Hz, RTF: {fs / (N / T) }")
122 | 
123 | 
124 | def parse_args():
125 |     parser = argparse.ArgumentParser(description="Infernce with onnxruntime.")
126 | 
127 |     # acoustic model
128 |     parser.add_argument('--am', type=str,
129 |                         default='fastspeech2_ljspeech',
130 |                         help='Choose acoustic model type of tts task.')
131 |     parser.add_argument('--am_onnx', type=str,
132 |                         default='fastspeech2_ljspeech/fastspeech2_ljspeech.onnx')
133 | 
134 |     parser.add_argument("--phones_dict", type=str,
135 |                         default='resources/fastspeech2_ljspeech/phone_id_map.txt',
136 |                         help="phone vocabulary file.")
137 | 
138 |     # voc
139 |     parser.add_argument('--voc', type=str,
140 |                         default='pwgan_ljspeech',
141 |                         help='Choose vocoder type of tts task.')
142 | 
143 |     parser.add_argument('--voc_onnx', type=str,
144 |                         default='pwgan_ljspeech.onnx')
145 | 
146 |     # other
147 |     parser.add_argument("--inference_dir", type=str,
148 |                         default=f"{root_dir}/resources",
149 |                         help="dir to save inference models")
150 | 
151 |     parser.add_argument("--text", type=str,
152 |                         default='sentences_en.txt')
153 | 
154 |     parser.add_argument("--output_dir", type=str,
155 |                         default='infer_result')
156 | 
157 |     parser.add_argument('--lang', type=str,
158 |                         default='en',
159 |                         help='Choose model language. zh or en')
160 | 
161 |     # inference
162 |     parser.add_argument("--use_trt",
163 |                         type=str2bool,
164 |                         default=False,
165 |                         help="Whether to use inference engin TensorRT.", )
166 | 
167 |     parser.add_argument("--device", default="cpu",
168 |                         choices=["gpu", "cpu"])
169 | 
170 |     parser.add_argument('--cpu_threads', type=int, default=1)
171 | 
172 |     args, _ = parser.parse_known_args()
173 |     Path(args.output_dir).mkdir(parents=True, exist_ok=True)
174 |     return args
175 | 
176 | 
177 | if __name__ == "__main__":
178 |     args = parse_args()
179 |     ort_predict(args)


--------------------------------------------------------------------------------