├── README.md
├── hlp
├── __init__.py
├── chat
│ ├── README.md
│ ├── __init__.py
│ ├── chatter.py
│ ├── common
│ │ ├── data_utils.py
│ │ ├── pre_treat.py
│ │ └── utils.py
│ ├── config
│ │ ├── seq2seq.json
│ │ ├── smn.json
│ │ └── transformer.json
│ ├── data
│ │ ├── LCCC.json
│ │ ├── cross_woz.json
│ │ ├── douban.txt
│ │ ├── qin_yun.csv
│ │ ├── stc_weibo_train_post
│ │ ├── stc_weibo_train_response
│ │ ├── tieba.dialogues
│ │ ├── ubuntu_train.txt
│ │ ├── ubuntu_valid.txt
│ │ └── xiaohuangjie.txt
│ ├── model
│ │ ├── seq2seq.py
│ │ ├── smn.py
│ │ └── transformer.py
│ ├── seq2seq_chatter.py
│ ├── smn_chatter.py
│ └── transformer_chatter.py
├── mt
│ ├── README.md
│ ├── __init__.py
│ ├── common
│ │ ├── bleu.py
│ │ ├── load_dataset.py
│ │ ├── misc.py
│ │ ├── text_split.py
│ │ └── text_vectorize.py
│ ├── config
│ │ ├── config.json
│ │ └── get_config.py
│ ├── data
│ │ └── anki
│ │ │ ├── anki-cmn-eng.txt
│ │ │ ├── en-zh_eval.txt
│ │ │ └── en-zh_val.txt
│ ├── evaluate.py
│ ├── lm
│ │ ├── __init__.py
│ │ ├── language_model.py
│ │ ├── lm_preprocess.py
│ │ ├── lm_rescore.py
│ │ └── lm_train.py
│ ├── model
│ │ ├── checkpoint.py
│ │ ├── nmt_model.py
│ │ └── transformer.py
│ ├── preprocess.py
│ ├── train.py
│ ├── translate.py
│ └── translator.py
├── stt
│ ├── __init__.py
│ ├── data
│ │ ├── LibriSpeech
│ │ │ ├── dev-clean-2
│ │ │ │ ├── 84
│ │ │ │ │ └── 121550
│ │ │ │ │ │ ├── 84-121550-0000.flac
│ │ │ │ │ │ ├── 84-121550-0001.flac
│ │ │ │ │ │ └── 84-121550.trans.txt
│ │ │ │ └── 174
│ │ │ │ │ └── 168635
│ │ │ │ │ ├── 174-168635-0000.flac
│ │ │ │ │ ├── 174-168635-0001.flac
│ │ │ │ │ └── 174-168635.trans.txt
│ │ │ └── train-clean-5
│ │ │ │ ├── 1088
│ │ │ │ ├── 134315
│ │ │ │ │ ├── 1088-134315-0000.flac
│ │ │ │ │ ├── 1088-134315-0001.flac
│ │ │ │ │ ├── 1088-134315-0002.flac
│ │ │ │ │ └── 1088-134315.trans.txt
│ │ │ │ └── 134318
│ │ │ │ │ ├── 1088-134318-0000.flac
│ │ │ │ │ ├── 1088-134318-0001.flac
│ │ │ │ │ └── 1088-134318.trans.txt
│ │ │ │ └── 1737
│ │ │ │ └── 146161
│ │ │ │ ├── 1737-146161-0000.flac
│ │ │ │ ├── 1737-146161-0001.flac
│ │ │ │ └── 1737-146161.trans.txt
│ │ └── data_thchs30
│ │ │ ├── data
│ │ │ ├── .wav.scp
│ │ │ ├── A2_0.wav
│ │ │ ├── A2_0.wav.trn
│ │ │ ├── A2_1.wav
│ │ │ ├── A2_1.wav.trn
│ │ │ ├── A2_2.wav
│ │ │ ├── A2_2.wav.trn
│ │ │ ├── A2_3.wav
│ │ │ ├── A2_3.wav.trn
│ │ │ ├── A2_33.wav
│ │ │ ├── A2_33.wav.trn
│ │ │ ├── A2_4.wav
│ │ │ ├── A2_4.wav.trn
│ │ │ ├── A2_5.wav
│ │ │ ├── A2_5.wav.trn
│ │ │ ├── A2_58.wav
│ │ │ ├── A2_58.wav.trn
│ │ │ ├── A2_6.wav
│ │ │ ├── A2_6.wav.trn
│ │ │ ├── A2_7.wav
│ │ │ ├── A2_7.wav.trn
│ │ │ ├── D4_750.wav
│ │ │ ├── D4_750.wav.trn
│ │ │ ├── D4_751.wav
│ │ │ └── D4_751.wav.trn
│ │ │ ├── dev
│ │ │ ├── A2_33.wav
│ │ │ ├── A2_33.wav.trn
│ │ │ ├── A2_58.wav
│ │ │ └── A2_58.wav.trn
│ │ │ ├── test
│ │ │ ├── D4_750.wav
│ │ │ ├── D4_750.wav.trn
│ │ │ ├── D4_751.wav
│ │ │ └── D4_751.wav.trn
│ │ │ └── train
│ │ │ ├── A2_0.wav
│ │ │ ├── A2_0.wav.trn
│ │ │ ├── A2_1.wav
│ │ │ ├── A2_1.wav.trn
│ │ │ ├── A2_2.wav
│ │ │ ├── A2_2.wav.trn
│ │ │ ├── A2_3.wav
│ │ │ ├── A2_3.wav.trn
│ │ │ ├── A2_4.wav
│ │ │ ├── A2_4.wav.trn
│ │ │ ├── A2_5.wav
│ │ │ ├── A2_5.wav.trn
│ │ │ ├── A2_6.wav
│ │ │ ├── A2_6.wav.trn
│ │ │ ├── A2_7.wav
│ │ │ └── A2_7.wav.trn
│ ├── deepspeech2
│ │ ├── actuator.py
│ │ ├── model.py
│ │ └── module.py
│ ├── las
│ │ ├── actuator.py
│ │ ├── las.py
│ │ ├── module.py
│ │ └── plas.py
│ ├── rnnt
│ │ ├── __init__.py
│ │ └── model.py
│ ├── transformer
│ │ ├── actuator.py
│ │ ├── model.py
│ │ └── module.py
│ └── utils
│ │ ├── audio_process.py
│ │ ├── load_dataset.py
│ │ ├── pre_treat.py
│ │ ├── spec_augment.py
│ │ ├── text_process.py
│ │ └── utils.py
├── tts
│ ├── __init__.py
│ ├── data
│ │ ├── LJSpeech-1.1
│ │ │ ├── metadata.csv
│ │ │ └── wavs
│ │ │ │ ├── LJ001-0001.wav
│ │ │ │ ├── LJ001-0002.wav
│ │ │ │ ├── LJ001-0003.wav
│ │ │ │ ├── LJ001-0004.wav
│ │ │ │ ├── LJ001-0005.wav
│ │ │ │ └── LJ001-0006.wav
│ │ ├── cmudict-0.7b
│ │ └── number
│ │ │ ├── metadata.csv
│ │ │ ├── test
│ │ │ └── wavs
│ │ │ │ ├── 0_jackson_0.wav
│ │ │ │ ├── 0_jackson_1.wav
│ │ │ │ ├── 1_jackson_0.wav
│ │ │ │ ├── 1_jackson_1.wav
│ │ │ │ ├── 2_jackson_0.wav
│ │ │ │ ├── 2_jackson_1.wav
│ │ │ │ ├── 3_jackson_0.wav
│ │ │ │ ├── 3_jackson_1.wav
│ │ │ │ ├── 4_jackson_0.wav
│ │ │ │ ├── 4_jackson_1.wav
│ │ │ │ ├── 5_jackson_0.wav
│ │ │ │ ├── 5_jackson_1.wav
│ │ │ │ ├── 6_jackson_0.wav
│ │ │ │ ├── 6_jackson_1.wav
│ │ │ │ ├── 7_jackson_0.wav
│ │ │ │ ├── 7_jackson_1.wav
│ │ │ │ ├── 8_jackson_0.wav
│ │ │ │ ├── 8_jackson_1.wav
│ │ │ │ ├── 9_jackson_0.wav
│ │ │ │ └── 9_jackson_1.wav
│ │ │ └── train
│ │ │ └── wavs
│ │ │ ├── 0_jackson_10.wav
│ │ │ ├── 0_jackson_11.wav
│ │ │ ├── 0_jackson_2.wav
│ │ │ ├── 0_jackson_3.wav
│ │ │ ├── 0_jackson_4.wav
│ │ │ ├── 0_jackson_5.wav
│ │ │ ├── 0_jackson_6.wav
│ │ │ ├── 0_jackson_7.wav
│ │ │ ├── 0_jackson_8.wav
│ │ │ ├── 0_jackson_9.wav
│ │ │ ├── 1_jackson_0.wav
│ │ │ ├── 1_jackson_1.wav
│ │ │ ├── 1_jackson_2.wav
│ │ │ ├── 1_jackson_3.wav
│ │ │ ├── 1_jackson_4.wav
│ │ │ ├── 1_jackson_5.wav
│ │ │ ├── 1_jackson_6.wav
│ │ │ ├── 1_jackson_7.wav
│ │ │ ├── 1_jackson_8.wav
│ │ │ ├── 1_jackson_9.wav
│ │ │ ├── 2_jackson_10.wav
│ │ │ ├── 2_jackson_11.wav
│ │ │ ├── 2_jackson_2.wav
│ │ │ ├── 2_jackson_3.wav
│ │ │ ├── 2_jackson_4.wav
│ │ │ ├── 2_jackson_5.wav
│ │ │ ├── 2_jackson_6.wav
│ │ │ ├── 2_jackson_7.wav
│ │ │ ├── 2_jackson_8.wav
│ │ │ ├── 2_jackson_9.wav
│ │ │ ├── 3_jackson_0.wav
│ │ │ ├── 3_jackson_1.wav
│ │ │ ├── 3_jackson_2.wav
│ │ │ ├── 3_jackson_3.wav
│ │ │ ├── 3_jackson_4.wav
│ │ │ ├── 3_jackson_5.wav
│ │ │ ├── 3_jackson_6.wav
│ │ │ ├── 3_jackson_7.wav
│ │ │ ├── 3_jackson_8.wav
│ │ │ ├── 3_jackson_9.wav
│ │ │ ├── 4_jackson_0.wav
│ │ │ ├── 4_jackson_1.wav
│ │ │ ├── 4_jackson_2.wav
│ │ │ ├── 4_jackson_3.wav
│ │ │ ├── 4_jackson_4.wav
│ │ │ ├── 4_jackson_5.wav
│ │ │ ├── 4_jackson_6.wav
│ │ │ ├── 4_jackson_7.wav
│ │ │ ├── 4_jackson_8.wav
│ │ │ ├── 4_jackson_9.wav
│ │ │ ├── 5_jackson_0.wav
│ │ │ ├── 5_jackson_1.wav
│ │ │ ├── 5_jackson_2.wav
│ │ │ ├── 5_jackson_3.wav
│ │ │ ├── 5_jackson_4.wav
│ │ │ ├── 5_jackson_5.wav
│ │ │ ├── 5_jackson_6.wav
│ │ │ ├── 5_jackson_7.wav
│ │ │ ├── 5_jackson_8.wav
│ │ │ ├── 5_jackson_9.wav
│ │ │ ├── 6_jackson_0.wav
│ │ │ ├── 6_jackson_1.wav
│ │ │ ├── 6_jackson_2.wav
│ │ │ ├── 6_jackson_3.wav
│ │ │ ├── 6_jackson_4.wav
│ │ │ ├── 6_jackson_5.wav
│ │ │ ├── 6_jackson_6.wav
│ │ │ ├── 6_jackson_7.wav
│ │ │ ├── 6_jackson_8.wav
│ │ │ ├── 6_jackson_9.wav
│ │ │ ├── 7_jackson_0.wav
│ │ │ ├── 7_jackson_1.wav
│ │ │ ├── 7_jackson_2.wav
│ │ │ ├── 7_jackson_3.wav
│ │ │ ├── 7_jackson_4.wav
│ │ │ ├── 7_jackson_5.wav
│ │ │ ├── 7_jackson_6.wav
│ │ │ ├── 7_jackson_7.wav
│ │ │ ├── 7_jackson_8.wav
│ │ │ ├── 7_jackson_9.wav
│ │ │ ├── 8_jackson_0.wav
│ │ │ ├── 8_jackson_1.wav
│ │ │ ├── 8_jackson_2.wav
│ │ │ ├── 8_jackson_3.wav
│ │ │ ├── 8_jackson_4.wav
│ │ │ ├── 8_jackson_5.wav
│ │ │ ├── 8_jackson_6.wav
│ │ │ ├── 8_jackson_7.wav
│ │ │ ├── 8_jackson_8.wav
│ │ │ ├── 8_jackson_9.wav
│ │ │ ├── 9_jackson_0.wav
│ │ │ ├── 9_jackson_1.wav
│ │ │ ├── 9_jackson_2.wav
│ │ │ ├── 9_jackson_3.wav
│ │ │ ├── 9_jackson_4.wav
│ │ │ ├── 9_jackson_5.wav
│ │ │ ├── 9_jackson_6.wav
│ │ │ ├── 9_jackson_7.wav
│ │ │ ├── 9_jackson_8.wav
│ │ │ └── 9_jackson_9.wav
│ ├── tacotron2
│ │ ├── actuator.py
│ │ ├── model.py
│ │ └── module.py
│ ├── transformer
│ │ ├── actuator.py
│ │ ├── model.py
│ │ └── module.py
│ ├── utils
│ │ ├── layers.py
│ │ ├── load_dataset.py
│ │ ├── pre_treat.py
│ │ ├── spec.py
│ │ └── text_preprocess.py
│ └── wavernn
│ │ ├── generator.py
│ │ ├── preprocess.py
│ │ ├── train.py
│ │ ├── utils.py
│ │ └── wavernn.py
└── utils
│ ├── __init__.py
│ ├── beamsearch.py
│ ├── layers.py
│ ├── optimizers.py
│ ├── text_split.py
│ ├── train_history.py
│ └── utils.py
└── requirements.txt
/README.md:
--------------------------------------------------------------------------------
1 | # hlp
2 | 基于深度学习的对话系统、语音识别、机器翻译和语音合成等。
3 | # 目录结构
4 | - hlp: 顶层包目录
5 | - mt: 机器翻译包目录
6 | - stt: 语音识别包目录
7 | - tts: 语音合成包目录
8 | - chat: 对话系统包目录
9 | - utils: 公共功能包目录
10 |
11 | 每个部分的不同方法、模型和实现应该在mt、stt、tts、chat其中一个目录下建立单独的子包目录。
12 | 例如,Tacotron实现语音合成,应在tts下建立tacotron包。
13 | # In Progress
14 | - 基于Seq2Seq的闲聊系统
15 | - 基于DeepSpeech2的语音识别
16 | - 基于Tacotron2的语音合成
17 | - 基于Transformer的闲聊系统
18 | - 基于Transformer的机器翻译
19 | - 基于Transformer的语音识别
20 | - 基于Transformer的语音合成
21 | - 基于Listen-Attend-Spell的语音识别
22 | - 基于检索的多轮闲聊系统
23 | - RNN-T流式语音识别
24 | - WaveRNN声码器
25 |
--------------------------------------------------------------------------------
/hlp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/__init__.py
--------------------------------------------------------------------------------
/hlp/chat/README.md:
--------------------------------------------------------------------------------
1 |
2 | # 运行说明
3 | + 运行入口:
4 | + seq2seq_chatter.py为seq2seq的执行入口文件:指令需要附带运行参数
5 | + transformer_chatter.py为transformer的执行入口文件:指令需要附带运行参数
6 | + smn_chatter.py为smn的执行入口文件:指令需要附带运行参数
7 | + 执行的指令格式:
8 | + seq2seq:python seq2seq_chatter.py --act [执行模式]
9 | + transformer:python transformer_chatter.py --act [执行模式]
10 | + smn:python smn_chatter.py --act [执行模式/pre_treat/train/evaluate/chat]
11 | + 执行类别:pre_treat(默认)/train/chat
12 | + 执行指令示例:
13 | + python seq2seq_chatter.py
14 | + python seq2seq_chatter.py --act pre_treat
15 | + python transformer_chatter.py
16 | + python transformer_chatter.py --act pre_treat
17 | + python smn_chatter.py
18 | + python smn_chatter.py --act pre_treat
19 | + pre_treat模式为文本预处理模式,如果在没有分词结果集的情况下,需要先运行pre_treat模式
20 | + train模式为训练模式
21 | + evaluate模式为指标评估模式
22 | + chat模式为对话模式。chat模式下运行时,输入exit即退出对话。
23 |
24 | + 正常执行顺序为pre_treat->train->evaluate->chat
25 |
26 | # SMN模型运行说明
27 | SMN检索式对话系统使用前需要准备solr环境,solr部署系统环境推荐Linux,工具推荐使用容器部署(推荐Docker),并准备:
28 | + Solr(8.6.3)
29 | + pysolr(3.9.0)
30 | ## Solr环境
31 | 需要保证solr在线上运行稳定,以及方便后续维护,请使用DockerFile进行部署,DockerFile获取地址:[docker-solr](https://github.com/docker-solr/docker-solr)
32 |
33 | 仅测试模型使用,可使用如下最简构建指令:
34 | ```
35 | docker pull solr:8.6.3
36 | # 然后启动solr
37 | docker run -itd --name solr -p 8983:8983 solr:8.6.3
38 | # 然后创建core核心选择器,这里取名smn(可选)
39 | docker exec -it --user=solr solr bin/solr create_core -c smn
40 | ```
41 |
42 | 关于solr中分词工具有IK Analyzer、Smartcn、拼音分词器等等,需要下载对应jar,然后在Solr核心配置文件managed-schema中添加配置。
43 |
44 | **特别说明**:如果使用TF-IDF,还需要在managed-schema中开启相似度配置。
45 | ## Python中使用说明
46 | 线上部署好Solr之后,在Python中使用pysolr进行连接使用:
47 | ```
48 | pip install pysolr
49 | ```
50 |
51 | 添加索引数据(一般需要先安全检查)方式如下。将回复数据添加索引,responses是一个json,形式如:[{},{},{},...],里面每个对象构建按照你回复的需求即可:
52 | ```
53 | solr = pysolr.Solr(url=solr_server, always_commit=True, timeout=10)
54 | # 安全检查
55 | solr.ping()
56 | solr.add(docs=responses)
57 | ```
58 |
59 | 查询方式如下,以TF-IDF查询所有语句query语句方式如下:
60 | ```
61 | {!func}sum(product(idf(utterance,key1),tf(utterance,key1),product(idf(utterance,key2),tf(utterance,key2),...)
62 | ```
63 |
64 | 使用前需要先将数据添加至Solr,在本SMN模型中使用,先执行pre_treat模式即可。
65 |
--------------------------------------------------------------------------------
/hlp/chat/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/chat/__init__.py
--------------------------------------------------------------------------------
/hlp/chat/common/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import logging
3 |
4 |
5 | def log_operator(level: str, log_file: str = None,
6 | log_format: str = "[%(levelname)s] - [%(asctime)s] - [file: %(filename)s] - "
7 | "[function: %(funcName)s] - [%(message)s]") -> logging.Logger:
8 | """
9 | 日志操作方法,日志级别有'CRITICAL','FATAL','ERROR','WARN','WARNING','INFO','DEBUG','NOTSET'
10 | CRITICAL = 50
11 | FATAL = CRITICAL
12 | ERROR = 40
13 | WARNING = 30
14 | WARN = WARNING
15 | INFO = 20
16 | DEBUG = 10
17 | NOTSET = 0
18 | :param log_file: 日志路径
19 | :param message: 日志信息
20 | :param level: 日志级别
21 | :param log_format: 日志信息格式
22 | :return: 日志记录器
23 | """
24 | if log_file is None:
25 | log_file = os.path.abspath(__file__)[:os.path.abspath(__file__).rfind("\\hlp\\")] + '\\hlp\\chat\\data\\runtime.log'
26 |
27 | logger = logging.getLogger()
28 | logger.setLevel(level)
29 | file_handler = logging.FileHandler(log_file, encoding='utf-8')
30 | file_handler.setLevel(level=level)
31 | formatter = logging.Formatter(log_format)
32 | file_handler.setFormatter(formatter)
33 | logger.addHandler(file_handler)
34 |
35 | return logger
36 |
--------------------------------------------------------------------------------
/hlp/chat/config/seq2seq.json:
--------------------------------------------------------------------------------
1 | {
2 | "num_layers": 2,
3 | "encoder_layers": 2,
4 | "decoder_layers": 2,
5 | "units": 1024,
6 | "vocab_size": 1000,
7 | "embedding_dim": 256,
8 | "max_train_data_size": 200,
9 | "max_valid_data_size": 100,
10 | "max_length": 40,
11 | "type": "pre_treat",
12 | "dict_file": "\\data\\seq2seq_dict.json",
13 | "checkpoint": "\\checkpoints\\seq2seq",
14 | "resource_data": "\\data\\LCCC.json",
15 | "tokenized_data": "\\data\\lccc_tokenized.txt",
16 | "qa_tokenized_data": "\\data\\tokenized.txt",
17 | "history_image_dir": "\\data\\history\\seq2seq\\",
18 | "valid_data_file": "",
19 | "valid_freq": 5,
20 | "checkpoint_save_freq": 2,
21 | "checkpoint_save_size": 1,
22 | "batch_size": 32,
23 | "buffer_size": 20000,
24 | "beam_size": 3,
25 | "valid_data_split": 0.2,
26 | "epochs": 5,
27 | "start_sign": "start",
28 | "end_sign": "end"
29 | }
--------------------------------------------------------------------------------
/hlp/chat/config/smn.json:
--------------------------------------------------------------------------------
1 | {
2 | "max_sentence": 50,
3 | "max_utterance": 10,
4 | "units": 200,
5 | "vocab_size": 2000,
6 | "embedding_dim": 200,
7 | "max_train_data_size": 36,
8 | "max_valid_data_size": 100,
9 | "max_database_size": 0,
10 | "learning_rate": 0.001,
11 | "type": "pre_treat",
12 | "dict_file": "\\data\\smn_dict_fn.json",
13 | "checkpoint": "\\checkpoints\\smn",
14 | "tokenized_train": "\\data\\ubuntu_train.txt",
15 | "tokenized_valid": "\\data\\ubuntu_valid.txt",
16 | "solr_server": "http://49.235.33.100:8983/solr/smn/",
17 | "candidate_database": "\\data\\candidate.json",
18 | "batch_size": 32,
19 | "buffer_size": 20000,
20 | "epochs": 5
21 | }
--------------------------------------------------------------------------------
/hlp/chat/config/transformer.json:
--------------------------------------------------------------------------------
1 | {
2 | "num_layers": 2,
3 | "d_model": 256,
4 | "num_heads": 8,
5 | "units": 512,
6 | "dropout": 0.1,
7 | "vocab_size": 1500,
8 | "embedding_dim": 256,
9 | "max_train_data_size": 200,
10 | "max_valid_data_size": 100,
11 | "max_length": 40,
12 | "type": "pre_treat",
13 | "dict_file": "\\data\\transformer_dict.json",
14 | "checkpoint": "\\checkpoints\\transformer",
15 | "resource_data": "\\data\\LCCC.json",
16 | "tokenized_data": "\\data\\lccc_tokenized.txt",
17 | "qa_tokenized_data": "\\data\\tokenized.txt",
18 | "history_image_dir": "\\data\\history\\transformer\\",
19 | "valid_data_file": "",
20 | "valid_freq": 5,
21 | "checkpoint_save_freq": 2,
22 | "checkpoint_save_size": 1,
23 | "batch_size": 32,
24 | "buffer_size": 20000,
25 | "beam_size": 3,
26 | "valid_data_split": 0.2,
27 | "epochs": 5,
28 | "start_sign": "start",
29 | "end_sign": "end"
30 | }
--------------------------------------------------------------------------------
/hlp/chat/model/seq2seq.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import hlp.utils.layers as layers
3 | import hlp.chat.common.utils as utils
4 |
5 |
6 | def rnn_layer(units: int, input_feature_dim: int, cell_type: str = 'lstm',
7 | if_bidirectional: bool = True) -> tf.keras.Model:
8 | """
9 | RNNCell层,其中可定义cell类型,是否双向
10 | :param units: cell单元数
11 | :param input_feature_dim: 输入的特征维大小
12 | :param cell_type: cell类型,lstm/gru, 默认lstm
13 | :param if_bidirectional: 是否双向
14 | :return: Multi-layer RNN
15 | """
16 | inputs = tf.keras.Input(shape=(None, input_feature_dim))
17 | if cell_type == 'lstm':
18 | rnn = tf.keras.layers.LSTM(units=units, return_sequences=True, return_state=True,
19 | recurrent_initializer='glorot_uniform')
20 | elif cell_type == 'gru':
21 | rnn = tf.keras.layers.GRU(units=units, return_sequences=True, return_state=True,
22 | recurrent_initializer='glorot_uniform')
23 | else:
24 | print('cell执行了类型执行出错,定位细节参见log')
25 | utils.log_operator(level=10).info("cell执行了类型执行出错")
26 |
27 | if if_bidirectional:
28 | rnn = tf.keras.layers.Bidirectional(rnn)
29 |
30 | rnn_outputs = rnn(inputs)
31 | outputs = rnn_outputs[0]
32 | states = outputs[:, -1, :]
33 |
34 | return tf.keras.Model(inputs=inputs, outputs=[outputs, states])
35 |
36 |
37 | def encoder(vocab_size: int, embedding_dim: int, enc_units: int, num_layers: int,
38 | cell_type: str, if_bidirectional: bool = True) -> tf.keras.Model:
39 | """
40 | seq2seq的encoder,主要就是使用Embedding和GRU对输入进行编码,
41 | 这里需要注意传入一个初始化的隐藏层,随机也可以,但是我这里就
42 | 直接写了一个隐藏层方法。
43 | :param vocab_size: 词汇量大小
44 | :param embedding_dim: 词嵌入维度
45 | :param enc_units: 单元大小
46 | :param num_layers: encoder中内部RNN层数
47 | :param cell_type: cell类型,lstm/gru, 默认lstm
48 | :param if_bidirectional: 是否双向
49 | :return: Seq2Seq的Encoder
50 | """
51 | inputs = tf.keras.Input(shape=(None,))
52 | outputs = tf.keras.layers.Embedding(vocab_size, embedding_dim)(inputs)
53 |
54 | for i in range(num_layers):
55 | outputs, states = rnn_layer(units=enc_units, input_feature_dim=outputs.shape[-1],
56 | cell_type=cell_type, if_bidirectional=if_bidirectional)(outputs)
57 |
58 | return tf.keras.Model(inputs=inputs, outputs=[outputs, states])
59 |
60 |
61 | def decoder(vocab_size: int, embedding_dim: int, dec_units: int, enc_units: int,
62 | num_layers: int, cell_type: str) -> tf.keras.Model:
63 | """
64 | seq2seq的decoder,将初始化的x、隐藏层和encoder的输出作为
65 | 输入,encoder的输入用来和隐藏层进行attention,得到的上下文
66 | 向量和x进行整合然后丢到gru里去,最后Dense输出一下
67 | :param vocab_size: 词汇量大小
68 | :param embedding_dim: 词嵌入维度
69 | :param dec_units: decoder单元大小
70 | :param enc_units: encoder单元大小
71 | :param num_layers: encoder中内部RNN层数
72 | :param cell_type: cell类型,lstm/gru, 默认lstm
73 | :return: Seq2Seq的Decoder
74 | """
75 | inputs = tf.keras.Input(shape=(None,))
76 | enc_output = tf.keras.Input(shape=(None, enc_units))
77 | dec_hidden = tf.keras.Input(shape=(enc_units,))
78 |
79 | embeddings = tf.keras.layers.Embedding(vocab_size, embedding_dim)(inputs)
80 | context_vector, attention_weight = layers.BahdanauAttention(dec_units)(dec_hidden, enc_output)
81 | outputs = tf.concat([tf.expand_dims(context_vector, 1), embeddings], axis=-1)
82 |
83 | for i in range(num_layers):
84 | # Decoder中不允许使用双向
85 | outputs, states = rnn_layer(units=dec_units, input_feature_dim=outputs.shape[-1],
86 | cell_type=cell_type, if_bidirectional=False)(outputs)
87 |
88 | outputs = tf.reshape(outputs, (-1, outputs.shape[-1]))
89 | outputs = tf.keras.layers.Dense(vocab_size)(outputs)
90 |
91 | return tf.keras.Model(inputs=[inputs, enc_output, dec_hidden], outputs=[outputs, states, attention_weight])
92 |
--------------------------------------------------------------------------------
/hlp/chat/model/smn.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 |
4 | def accumulate(units: int, embedding_dim: int,
5 | max_utterance: int, max_sentence: int) -> tf.keras.Model:
6 | """
7 | SMN的语义抽取层,主要是对匹配对的两个相似度矩阵进行计
8 | 算,并返回最终的最后一层GRU的状态,用于计算分数
9 | :param units: GRU单元数
10 | :param embedding_dim: embedding维度
11 | :param max_utterance: 每轮最大语句数
12 | :param max_sentence: 句子最大长度
13 | :return: GRU的状态
14 | """
15 | utterance_inputs = tf.keras.Input(shape=(max_utterance, max_sentence, embedding_dim))
16 | response_inputs = tf.keras.Input(shape=(max_sentence, embedding_dim))
17 | a_matrix = tf.keras.initializers.GlorotNormal()(shape=(units, units), dtype=tf.float32)
18 |
19 | # 这里对response进行GRU的Word级关系建模,这里用正交矩阵初始化内核权重矩阵,用于输入的线性变换。
20 | response_gru = tf.keras.layers.GRU(units=units, return_sequences=True,
21 | kernel_initializer='orthogonal')(response_inputs)
22 | conv2d_layer = tf.keras.layers.Conv2D(filters=8, kernel_size=(3, 3), padding='valid',
23 | kernel_initializer='he_normal', activation='relu')
24 | max_polling2d_layer = tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(3, 3), padding='valid')
25 | dense_layer = tf.keras.layers.Dense(50, activation='tanh', kernel_initializer='glorot_normal')
26 |
27 | # 这里需要做一些前提工作,因为我们要针对每个batch中的每个utterance进行运算,所
28 | # 以我们需要将batch中的utterance序列进行拆分,使得batch中的序列顺序一一匹配
29 | utterance_embeddings = tf.unstack(utterance_inputs, num=max_utterance, axis=1)
30 | matching_vectors = []
31 | for utterance_input in utterance_embeddings:
32 | # 求解第一个相似度矩阵,公式见论文
33 | matrix1 = tf.matmul(utterance_input, response_inputs, transpose_b=True)
34 | utterance_gru = tf.keras.layers.GRU(units, return_sequences=True,
35 | kernel_initializer='orthogonal')(utterance_input)
36 | matrix2 = tf.einsum("aij,jk->aik", utterance_gru, a_matrix)
37 | # matrix2 = tf.matmul(utterance_gru, a_matrix)
38 | # 求解第二个相似度矩阵
39 | matrix2 = tf.matmul(matrix2, response_gru, transpose_b=True)
40 | matrix = tf.stack([matrix1, matrix2], axis=3)
41 |
42 | conv_outputs = conv2d_layer(matrix)
43 | pooling_outputs = max_polling2d_layer(conv_outputs)
44 | flatten_outputs = tf.keras.layers.Flatten()(pooling_outputs)
45 |
46 | matching_vector = dense_layer(flatten_outputs)
47 | matching_vectors.append(matching_vector)
48 |
49 | vector = tf.stack(matching_vectors, axis=1)
50 | outputs = tf.keras.layers.GRU(units, kernel_initializer='orthogonal')(vector)
51 |
52 | return tf.keras.Model(inputs=[utterance_inputs, response_inputs], outputs=outputs)
53 |
54 |
55 | def smn(units: int, vocab_size: int, embedding_dim: int,
56 | max_utterance: int, max_sentence: int) -> tf.keras.Model:
57 | """
58 | SMN的模型,在这里将输入进行accumulate之后,得
59 | 到匹配对的向量,然后通过这些向量计算最终的分类概率
60 | :param units: GRU单元数
61 | :param vocab_size: embedding词汇量
62 | :param embedding_dim: embedding维度
63 | :param max_utterance: 每轮最大语句数
64 | :param max_sentence: 句子最大长度
65 | :return: 匹配对打分
66 | """
67 | utterances = tf.keras.Input(shape=(max_utterance, max_sentence))
68 | responses = tf.keras.Input(shape=(max_sentence,))
69 |
70 | embeddings = tf.keras.layers.Embedding(vocab_size, embedding_dim, name="encoder")
71 | utterances_embeddings = embeddings(utterances)
72 | responses_embeddings = embeddings(responses)
73 |
74 | accumulate_outputs = accumulate(units=units, embedding_dim=embedding_dim, max_utterance=max_utterance,
75 | max_sentence=max_sentence)(
76 | inputs=[utterances_embeddings, responses_embeddings])
77 |
78 | outputs = tf.keras.layers.Dense(2, kernel_initializer='glorot_normal')(accumulate_outputs)
79 |
80 | return tf.keras.Model(inputs=[utterances, responses], outputs=outputs)
81 |
--------------------------------------------------------------------------------
/hlp/mt/README.md:
--------------------------------------------------------------------------------
1 | # 使用说明
2 | 项目自带较小的训练和验证数据集,可以无需配置运行所有功能。
3 | - 语料预处理
4 | - (可选步骤)在mt/config/config.json中配置语料路径和切分方法
5 | - 运行mt/preprocess.py
6 |
7 | - 训练模型
8 | - (可选步骤)在mt/config/config.json中配置语料路径、切分方法、模型参数和训练超参数等
9 | - 运行mt/train.py
10 |
11 | - 评价模型
12 | - (可选步骤)在mt/config/config.json中配置验证语料路径
13 | - 运行mt/evaluate.py
14 |
15 | - 交互式翻译
16 | - 运行mt/translate.py
--------------------------------------------------------------------------------
/hlp/mt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/mt/__init__.py
--------------------------------------------------------------------------------
/hlp/mt/common/bleu.py:
--------------------------------------------------------------------------------
1 | import re
2 | import nltk
3 |
4 |
5 | def bleu_nltk(candidate_sentence, reference_sentences, language):
6 | """
7 | :param candidate_sentence:机翻句子
8 | :param reference_sentences:参考句子列表
9 | :param language:句子的语言
10 |
11 | """
12 | # 根据所选择的语言对句子进行预处理
13 | if language == "zh":
14 | candidate_sentence = [w for w in candidate_sentence]
15 | reference_sentences = []
16 | for sentence in reference_sentences:
17 | reference_sentences.append([w for w in sentence])
18 | elif language == "en":
19 | candidate_sentence = re.sub(r'([?.!,])', r' \1', candidate_sentence) # 在?.!,前添加空格
20 | candidate_sentence = re.sub(r'[" "]+', " ", candidate_sentence) # 合并连续的空格
21 | candidate_sentence = candidate_sentence.split(' ')
22 | reference_sentences = []
23 | for sentence in reference_sentences:
24 | sentence = re.sub(r'([?.!,])', r' \1', sentence) # 在?.!,前添加空格
25 | sentence = re.sub(r'[" "]+', " ", sentence) # 合并连续的空格
26 | sentence = sentence.split(' ')
27 | reference_sentences.append(sentence)
28 |
29 | smooth_function = nltk.translate.bleu_score.SmoothingFunction()
30 | score = nltk.translate.bleu_score.sentence_bleu(reference_sentences,
31 | candidate_sentence,
32 | smoothing_function=smooth_function.method1)
33 | return score * 100
34 |
35 |
36 | def main():
37 | # 测试语句
38 | candidate_sentence_zh = '今天的天气真好啊。'
39 | reference_sentence_zh = '今天可真是个好天气啊。'
40 | score = bleu_nltk(candidate_sentence_zh, [reference_sentence_zh], language='zh')
41 | print('NLTK_BLEU:%.2f' % score)
42 |
43 | # 测试英语语句
44 | candidate_sentence_en = "It's a good day."
45 | reference_sentence_en = "It's really a good sunny day."
46 | score = bleu_nltk(candidate_sentence_en, [reference_sentence_en], language='en')
47 | print('NLTK_BLEU:%.2f' % score)
48 |
49 |
50 | if __name__ == '__main__':
51 | main()
52 |
--------------------------------------------------------------------------------
/hlp/mt/common/load_dataset.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | from sklearn.model_selection import train_test_split
3 | import tensorflow as tf
4 | from hlp.mt.config import get_config as _config
5 |
6 |
7 | def load_single_sentences(path, num_sentences, column):
8 | """加载指定列文本,列计数从1开始"""
9 | sentences = []
10 | with open(path, encoding='UTF-8') as file:
11 | for i in range(num_sentences):
12 | line = file.readline()
13 | sentences.append(line.split('\t')[column - 1])
14 | return sentences
15 |
16 |
17 | def load_sentences(path, num_sentences, reverse=_config.reverse):
18 | """加载句子对,
19 | @param path:加载文本的路径
20 | @param num_sentences:加载句子数量
21 | @param reverse:是否交换列的顺序
22 | @return:相应列句子的列表
23 | """
24 | source_sentences = []
25 | target_sentences = []
26 | with open(path, encoding='UTF-8') as file:
27 | for i in range(num_sentences):
28 | line = file.readline()
29 | source_sentences.append(line.split('\t')[0])
30 | target_sentences.append(line.split('\t')[1])
31 | if reverse == 'True':
32 | return target_sentences, source_sentences
33 | else:
34 | return source_sentences, target_sentences
35 |
36 |
37 | def _generate_batch_from_ram(input_path, target_path, train_size=_config.train_size):
38 | """从内存中产生训练和验证批数据
39 | """
40 | input_tensor = numpy.loadtxt(input_path, dtype='int32')
41 | target_tensor = numpy.loadtxt(target_path, dtype='int32')
42 | x_train, x_test, y_train, y_test = train_test_split(input_tensor, target_tensor, train_size=train_size)
43 |
44 | train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
45 | train_dataset = train_dataset.shuffle(_config.BUFFER_SIZE).batch(_config.BATCH_SIZE, drop_remainder=True)
46 |
47 | val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
48 | val_dataset = val_dataset.shuffle(_config.BUFFER_SIZE).batch(_config.BATCH_SIZE, drop_remainder=True)
49 |
50 | return train_dataset, val_dataset
51 |
52 |
53 | def _generate_batch_from_file(input_path, target_path, num_steps, start_step, batch_size):
54 | """
55 | 从编码文件中分batch读入数据集
56 | 自动从配置文件设置确定input_path、target_path
57 | num_steps:整个训练集的step数,即数据集中包含多少个batch
58 | start_step:从哪个step开始读batch
59 | batch_size:batch大小
60 |
61 | return:input_tensor shape=(batch_size, sentence_length), dtype=tf.int32
62 | , target_tensor shape=(batch_size, sentence_length), dtype=tf.int32
63 | """
64 |
65 | step = int(start_step)
66 | while step < num_steps:
67 | # TODO: 这个效率有问题
68 | input_tensor = numpy.loadtxt(input_path, dtype='int32', skiprows=0 + step * batch_size, max_rows=batch_size)
69 | target_tensor = numpy.loadtxt(target_path, dtype='int32', skiprows=0 + step * batch_size, max_rows=batch_size)
70 | step += 1
71 | yield tf.cast(input_tensor, tf.int32), tf.cast(target_tensor, tf.int32)
72 |
73 |
74 | def get_dataset(input_path, target_path, cache, train_size, steps=None):
75 | """从指定的路径中获取数据集
76 |
77 | @param input_path: 输入已编码文本路径
78 | @param target_path: 目标已编码文本路径
79 | @param cache: 是否一次性加载入内存,即是采用generator
80 | @param train_size: 训练集比例
81 | @param steps: 训练集文本共含多少个batch,cache为 False时可为None
82 | """
83 | if cache:
84 | train_dataset, val_dataset = _generate_batch_from_ram(input_path, target_path, train_size)
85 | else:
86 | train_dataset = _generate_batch_from_file(input_path, target_path, steps * train_size, 0, _config.BATCH_SIZE)
87 | val_dataset = _generate_batch_from_file(input_path, target_path, steps, steps * train_size, _config.BATCH_SIZE)
88 | return train_dataset, val_dataset
89 |
90 |
91 |
--------------------------------------------------------------------------------
/hlp/mt/common/misc.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import Path
3 |
4 |
5 | def check_and_create(checkpoint_dir):
6 | chkpt_path = Path(checkpoint_dir)
7 | if not chkpt_path.exists():
8 | os.makedirs(checkpoint_dir, exist_ok=True)
9 | return False
10 | else:
11 | return True
--------------------------------------------------------------------------------
/hlp/mt/common/text_split.py:
--------------------------------------------------------------------------------
1 | from hlp.mt.config import get_config as _config
2 | from hlp.utils import text_split
3 |
4 |
5 | def _preprocess_sentence_en_bpe(sentence, start_word=_config.start_word, end_word=_config.end_word):
6 | sentence = start_word + ' ' + sentence + ' ' + end_word
7 | return sentence
8 |
9 |
10 | def preprocess_sentences_en(sentences, mode=_config.en_tokenize_type,
11 | start_word=_config.start_word, end_word=_config.end_word):
12 | """
13 | 对英文句子列表进行指定mode的预处理
14 | 返回处理好的句子列表,句子为添加开始介绍符的空格分隔的字符串
15 | """
16 | if mode == 'BPE':
17 | sentences = [_preprocess_sentence_en_bpe(s, start_word, end_word) for s in sentences]
18 | return sentences
19 | elif mode == 'WORD':
20 | sentences = [text_split.split_en_word(s) for s in sentences]
21 | sentences = [start_word + ' ' + ' '.join(s) + ' ' + end_word for s in sentences]
22 | return sentences
23 | else:
24 | return ''
25 |
26 |
27 | def preprocess_sentences_zh(sentences, mode=_config.zh_tokenize_type,
28 | start_word=_config.start_word, end_word=_config.end_word):
29 | """
30 | 对中文句子列表进行指定mode的预处理
31 | 返回处理好的句子列表,句子为添加开始介绍符的空格分隔的字符串
32 | """
33 | if mode == 'CHAR':
34 | sentences = [text_split.split_zh_char(s) for s in sentences]
35 | sentences = [start_word + ' ' + ' '.join(s) + ' ' + end_word for s in sentences]
36 | return sentences
37 | elif mode == 'WORD':
38 | sentences = [text_split.split_zh_word(s) for s in sentences]
39 | sentences = [start_word + ' ' + ' '.join(s) + ' ' + end_word for s in sentences]
40 | return sentences
41 |
42 |
43 | def preprocess_sentences(sentences, language, mode):
44 | """
45 |
46 | :param sentences: 原始句子字符串列表
47 | :param language:
48 | :param mode:
49 | :return: 添加开始结束符的空格分隔的句子字符串构成的列表
50 | """
51 | if language == "en":
52 | return preprocess_sentences_en(sentences, mode)
53 | elif language == "zh":
54 | return preprocess_sentences_zh(sentences, mode)
55 |
--------------------------------------------------------------------------------
/hlp/mt/config/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "source_lang": "en",
3 | "target_lang": "zh",
4 | "reverse": "False",
5 | "en_tokenize_type": "WORD",
6 | "zh_tokenize_type": "CHAR",
7 | "train_size": 0.9,
8 | "BEAM_SIZE": 1,
9 | "validation_data": "False",
10 | "checkpoint_ensembling": "True",
11 | "num_validate_sentences": 200,
12 | "validation_freq": 1,
13 | "max_checkpoints_num": 5,
14 | "checkpoints_save_freq": 5,
15 | "checkpoint_name": "4layers512units",
16 | "num_eval": 5,
17 | "BUFFER_SIZE": 20000,
18 | "EPOCHS": 4,
19 | "num_sentences": 1000,
20 | "BATCH_SIZE": 32,
21 | "num_layers": 4,
22 | "d_model": 256,
23 | "dff": 512,
24 | "num_heads": 8,
25 | "dropout_rate" : 0.1,
26 | "max_target_length": 200,
27 | "start_word": "",
28 | "end_word": "",
29 | "target_vocab_size": 8192,
30 | "result_save_dir": "./data/result/",
31 | "path_to_train_file": "./data/anki/anki-cmn-eng.txt",
32 | "path_to_eval_file": "./data/anki/en-zh_eval.txt",
33 | "path_to_val_file": "./data/anki/en-zh_val.txt",
34 | "encoded_sequences_path_prefix": "./data/encoded_corpus/encoded_sequences_",
35 | "tokenizer_path_prefix": "./data/tokenizer/tokenizer_",
36 | "checkpoint_path_dir": "./checkpoints",
37 |
38 | "language_model": {
39 | "path_to_train_file_lm": "../data/anki/anki-cmn-eng.txt",
40 | "language": "zh",
41 | "tokenize_type": "CHAR",
42 | "EPOCHS": 4,
43 | "num_sentences": 1000,
44 | "BATCH_SIZE": 32,
45 | "train_size": 0.8,
46 | "d_rnn": 200,
47 | "d_embedding": 256,
48 | "validation_freq": 1,
49 | "max_checkpoints_num": 5,
50 | "checkpoints_save_freq": 5
51 | }
52 | }
--------------------------------------------------------------------------------
/hlp/mt/config/get_config.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 |
4 | json_path = os.path.join(os.path.dirname(__file__), 'config.json') # 配置文件路径
5 |
6 |
7 | def get_config_json(config_file='main.json'):
8 | with open(config_file, 'r') as file:
9 | return json.load(file)
10 |
11 |
12 | conf = get_config_json(json_path)
13 |
14 | # 对各变量赋值
15 | num_validate_sentences = conf["num_validate_sentences"] # 用来验证的句子数量
16 | path_to_val_file = conf["path_to_val_file"] # 验证集文本路径
17 | validation_data = conf["validation_data"] # 是否从给定文本数据集用来验证
18 | checkpoint_name = conf['checkpoint_name'] # 检查点名字
19 | validation_freq = conf['validation_freq'] # 验证频率,即每训练几个epoch进行验证
20 | checkpoints_save_freq = conf['checkpoints_save_freq'] # 检查点保存频率
21 | max_checkpoints_num = conf['max_checkpoints_num'] # 保存最大检查点数量
22 | source_lang = conf['source_lang'] # 源语言
23 | target_lang = conf['target_lang'] # 目标语言
24 | reverse = conf['reverse'] # 是否对语料语言对翻转
25 | en_tokenize_type = conf['en_tokenize_type'] # 英文分词类型,可选:BPE/TOKENIZE
26 | zh_tokenize_type = conf['zh_tokenize_type'] # 中文分词类型,可选:TOKENIZE
27 | tokenizer_path_prefix = conf["tokenizer_path_prefix"] # 字典保存路径前缀
28 | encoded_sequences_path_prefix = conf['encoded_sequences_path_prefix'] # 编码句子保存路径前缀
29 | result_save_dir = conf['result_save_dir'] # 训练过程指标变化图保存路径
30 | path_to_train_file = conf['path_to_train_file'] # 用于训练的文本路径
31 | path_to_eval_file = conf['path_to_eval_file'] # 用于评估计算指标的文本路径
32 | num_eval = conf['num_eval'] # 用于计算指标的句子对数量
33 | checkpoint_path = os.path.join(conf["checkpoint_path_dir"], conf['source_lang']+'_'+conf['target_lang']) # 检查点路径
34 | BUFFER_SIZE = conf['BUFFER_SIZE']
35 | BATCH_SIZE = conf['BATCH_SIZE']
36 | train_size = conf['train_size'] # 训练数据中test数据占比
37 | num_sentences = conf["num_sentences"] # 用于训练的句子对数量
38 | num_layers = conf["num_layers"] # encoder 与 decoder 中包含的 encoder 与 decoder 层数
39 | d_model = conf["d_model"] # embedding 的维度
40 | dff = conf["dff"] # 点式前馈网络(Point wise feed forward network)第一层dense的维度
41 | num_heads = conf["num_heads"] # 多头注意力的头数
42 | dropout_rate = conf["dropout_rate"]
43 | EPOCHS = conf["EPOCHS"] # 训练轮次
44 | max_target_length = conf['max_target_length'] # 最大生成目标句子长度
45 | target_vocab_size = conf["target_vocab_size"] # 英语分词target_vocab_size
46 | start_word = conf["start_word"] # 句子开始标志
47 | end_word = conf["end_word"] # 句子结束标志
48 | BEAM_SIZE = conf["BEAM_SIZE"] # BEAM_SIZE
49 | checkpoint_ensembling = conf["checkpoint_ensembling"] # 是否采用checkpoint_ensembling
50 |
51 | lm_path_to_train_file = conf["language_model"]["path_to_train_file_lm"] # 语言模型训练文本路径
52 | lm_language = conf["language_model"]["language"]
53 | lm_tokenize_type = conf["language_model"]["tokenize_type"]
54 | lm_EPOCHS = conf["language_model"]["EPOCHS"]
55 | lm_num_sentences = conf["language_model"]["num_sentences"]
56 | lm_BATCH_SIZE = conf["language_model"]["BATCH_SIZE"]
57 | lm_train_size = conf["language_model"]["train_size"]
58 | lm_checkpoint_path = os.path.join(conf["checkpoint_path_dir"], 'lm')
59 | lm_d_embedding = conf["language_model"]["d_embedding"]
60 | lm_d_rnn = conf["language_model"]["d_rnn"]
61 | lm_max_checkpoints_num = conf["language_model"]["max_checkpoints_num"]
62 | lm_checkpoints_save_freq = conf["language_model"]["checkpoints_save_freq"]
63 | lm_validation_freq = conf["language_model"]["validation_freq"]
64 |
65 |
--------------------------------------------------------------------------------
/hlp/mt/evaluate.py:
--------------------------------------------------------------------------------
1 | from hlp.mt.common import bleu as _bleu
2 | from hlp.mt.common import load_dataset
3 | from hlp.mt.config import get_config as _config
4 | from hlp.mt.model import nmt_model
5 | from hlp.mt import translator
6 | from hlp.mt.common.misc import check_and_create
7 |
8 |
9 | # BLEU指标计算
10 | def _calc_bleu(path, model, tokenizer_source, tokenizer_target):
11 | # 读入文本
12 | source_sentences, target_sentences = load_dataset.load_sentences(path, _config.num_eval)
13 |
14 | print('开始计算BLEU指标...')
15 | bleu_sum = 0
16 | for i in range(_config.num_eval):
17 | candidate_sentence = translator.translate(source_sentences[i], model, tokenizer_source,
18 | tokenizer_target, beam_size=_config.BEAM_SIZE)[0]
19 | print('-' * 20)
20 | print('第%d/%d个句子:' % (i + 1, _config.num_eval))
21 | print('源句子:' + source_sentences[i].strip())
22 | print('机翻句子:' + candidate_sentence)
23 | print('参考句子:' + target_sentences[i])
24 | bleu_i = _bleu.bleu_nltk(candidate_sentence, [target_sentences[i]], language=_config.target_lang)
25 | print('此句子BLEU指标:%.2f' % bleu_i)
26 | bleu_sum += bleu_i
27 |
28 | bleu = bleu_sum / _config.num_eval
29 | print('-' * 20)
30 | print('平均BLEU指标为:%.2f' % bleu)
31 |
32 |
33 | def main():
34 | if check_and_create(_config.checkpoint_path): # 检测是否有检查点
35 | # 读取保存的需要的配置
36 | transformer, _, tokenizer_source, tokenizer_target = nmt_model.load_model()
37 | _calc_bleu(_config.path_to_eval_file, transformer, tokenizer_source, tokenizer_target)
38 | else:
39 | print('没有发现训练好的模型,请先训练模型.')
40 |
41 |
42 | if __name__ == '__main__':
43 | main()
44 |
--------------------------------------------------------------------------------
/hlp/mt/lm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/mt/lm/__init__.py
--------------------------------------------------------------------------------
/hlp/mt/lm/language_model.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from pathlib import Path
4 | import tensorflow as tf
5 |
6 | from hlp.mt.config import get_config as _config
7 |
8 |
9 | class LanguageModel(tf.keras.Model):
10 | """
11 | 语言模型,将input输入
12 | """
13 |
14 | def __init__(self, vocab_size, d_embedding, batch_size, d_rnn):
15 | super(LanguageModel, self).__init__()
16 | # 初始参数
17 | self.batch_size = batch_size
18 | self.d_rnn = d_rnn
19 | self.d_embedding = d_embedding
20 | self.embedding = tf.keras.layers.Embedding(vocab_size+1, d_embedding)
21 | self.state0 = [tf.zeros([batch_size, d_rnn]), tf.zeros([batch_size, d_rnn])]
22 | self.state1 = [tf.zeros([batch_size, d_rnn]), tf.zeros([batch_size, d_rnn])]
23 |
24 | self.cell0 = tf.keras.layers.LSTMCell(d_rnn)
25 | self.cell1 = tf.keras.layers.LSTMCell(d_rnn)
26 |
27 | self.output_layer = tf.keras.layers.Dense(vocab_size)
28 |
29 | def call(self, sequences):
30 | """
31 | 传入已编码的句子,shape ---> (batch_size, seq_len)
32 | 返回预测序列
33 | """
34 | output = []
35 | sequences = self.embedding(sequences) # shape ---> (batch_size, seq_len, vocab_size)
36 | sequences *= tf.math.sqrt(tf.cast(self.d_embedding, tf.float32))
37 | # output 为输出的字符列表,每个列表元素shape --> (batch_size, vocab_size)
38 | for sequences_t in tf.unstack(sequences, axis=1): # sequences_t.shape --> (batch_size, vocab_size)
39 | out0, self.state0 = self.cell0(sequences_t, self.state0) # out0.shape --> (batch_size, vocab_size)
40 | out1, self.state1 = self.cell1(out0, self.state1)
41 | out1 = self.output_layer(out1)
42 | output.append(out1)
43 |
44 | predictions = tf.stack(output, axis=1) # prediction.shape --> (batch_size, seq_len, vocab_size)
45 | return predictions
46 |
47 | def reset_states(self):
48 | super(LanguageModel, self).reset_states()
49 | self.state0 = [tf.zeros([self.batch_size, self.d_rnn]), tf.zeros([self.batch_size, self.d_rnn])]
50 | self.state1 = [tf.zeros([self.batch_size, self.d_rnn]), tf.zeros([self.batch_size, self.d_rnn])]
51 |
52 |
53 | def check_point():
54 | """
55 | 检测检查点目录下是否有文件
56 | """
57 | # 进行语言对判断从而确定检查点路径
58 | checkpoint_dir = _config.lm_checkpoint_path
59 | is_exist = Path(checkpoint_dir)
60 | if not is_exist.exists():
61 | os.makedirs(checkpoint_dir, exist_ok=True)
62 | if_ckpt = tf.io.gfile.listdir(checkpoint_dir)
63 | return if_ckpt
64 |
65 |
--------------------------------------------------------------------------------
/hlp/mt/lm/lm_preprocess.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy
3 | from sklearn.model_selection import train_test_split
4 |
5 | from hlp.mt.common import load_dataset, text_vectorize, text_split
6 | from hlp.mt.config import get_config as _config
7 |
8 |
9 | def get_tokenizer_path(language, mode):
10 | """合成字典保存路径
11 |
12 | @param language:语言
13 | @param mode:编码类型
14 | @return:字典保存路径
15 | """
16 | return _config.tokenizer_path_prefix + language + '_' + mode.lower()
17 |
18 |
19 | def get_encoded_sequences_path(language, postfix=''):
20 | """根据语言获取已编码句子的保存路径
21 |
22 | @param language: 语言
23 | @param postfix: 保存路径的后缀
24 | @return:已编码句子的保存路径
25 | """
26 | return _config.encoded_sequences_path_prefix + language + postfix
27 |
28 |
29 | def train_preprocess():
30 | language = _config.lm_language
31 | mode = _config.lm_tokenize_type
32 | tokenizer_path = get_tokenizer_path(language, mode)
33 | encoded_sequences_path_train = get_encoded_sequences_path(language, postfix='_train')
34 | # encoded_sequences_path_val = get_encoded_sequences_path(language, postfix='_val')
35 |
36 | # 文本加载及预处理
37 | print('正在加载、预处理数据...')
38 | sentences = load_dataset.load_single_sentences(_config.lm_path_to_train_file, _config.lm_num_sentences, column=2)
39 | sentences = text_split.preprocess_sentences(sentences, language, mode)
40 | print('已加载句子数量:%d' % _config.lm_num_sentences)
41 | print('数据加载、预处理完毕!\n')
42 |
43 | # 使用预处理的文本生成及保存字典
44 | tokenizer, vocab_size = text_vectorize.create_and_save_tokenizer(sentences, tokenizer_path, language, mode)
45 | print('生成字典大小:%d' % vocab_size)
46 | print('字典生成、保存完毕!\n')
47 |
48 | # 使用字典对文本进行编码并保存
49 | print("正在编码训练集句子...")
50 | max_sequence_length = text_vectorize.encode_and_save(sentences, tokenizer, encoded_sequences_path_train, language,
51 | mode)
52 | print('最大句子长度:%d' % max_sequence_length)
53 | print("句子编码完毕!\n")
54 |
55 | return tokenizer, vocab_size, max_sequence_length
56 |
57 |
58 | def get_dataset(sequences_path, train_size=_config.lm_train_size):
59 | """加载并划分数据集
60 | """
61 | tensor = numpy.loadtxt(sequences_path, dtype='int32')
62 |
63 | train_dataset, val_dataset = train_test_split(tensor, train_size=train_size)
64 | train_dataset = tf.data.Dataset.from_tensor_slices(train_dataset)
65 | train_dataset = train_dataset.shuffle(_config.lm_BATCH_SIZE).batch(_config.lm_BATCH_SIZE, drop_remainder=True)
66 | val_dataset = tf.data.Dataset.from_tensor_slices(val_dataset)
67 | val_dataset = val_dataset.shuffle(_config.lm_BATCH_SIZE).batch(_config.lm_BATCH_SIZE, drop_remainder=True)
68 |
69 | return train_dataset, val_dataset
70 |
71 |
--------------------------------------------------------------------------------
/hlp/mt/lm/lm_rescore.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import tensorflow as tf
4 | from pathlib import Path
5 |
6 | from hlp.mt.config import get_config as _config
7 | from hlp.mt.common import text_vectorize
8 | from hlp.mt.lm import language_model
9 |
10 |
11 | def sentence_rescore(sentences, model, tokenizer):
12 | """给句子列表打分
13 |
14 | @param sentences: 需要进行打分的句子列表
15 | @param model: 打分使用的语言模型实例
16 | @param tokenizer: 字典
17 | """
18 | language = _config.lm_language
19 | mode = _config.lm_tokenize_type
20 |
21 | scores_list = []
22 | for i, sentence in enumerate(sentences):
23 | score = 0
24 | sequence = text_vectorize.encode_sentences([sentence], tokenizer, language, mode)
25 | seq_input = sequence[:, :-1]
26 | seq_real = sequence[:, 1:]
27 | prediction = model(seq_input) # (1, seq_len, vocab_size)
28 | for j in range(prediction.shape[0]):
29 | score += prediction[seq_real[0][j]]
30 | scores_list.append(score)
31 |
32 | return scores_list
33 |
34 |
35 | def load_checkpoint(model, checkpoint_path=None):
36 | """从检查点加载模型
37 |
38 | @param model: 模型
39 | @param checkpoint_path:检查点路径,若为None,则默认使用保存的最新的检查点
40 | """
41 | if checkpoint_path is None:
42 | checkpoint_dir = _config.lm_checkpoint_path
43 | is_exist = Path(checkpoint_dir)
44 | else:
45 | checkpoint_dir = os.path.dirname(checkpoint_path)
46 | is_exist = Path(checkpoint_path)
47 |
48 | ckpt = tf.train.Checkpoint(language_model=model, optimizer=tf.keras.optimizers.Adam())
49 | ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=_config.max_checkpoints_num)
50 | if not is_exist.exists():
51 | ValueError("路径 %s 不存在" % checkpoint_path)
52 | elif checkpoint_path is None:
53 | if language_model.check_point():
54 | ckpt.restore(ckpt_manager.latest_checkpoint)
55 | print('已恢复至最新检查点!')
56 | else:
57 | ckpt.restore(checkpoint_path)
58 |
--------------------------------------------------------------------------------
/hlp/mt/lm/lm_train.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | import tensorflow as tf
4 |
5 | from hlp.mt.config import get_config as _config
6 | from hlp.mt.lm import language_model, lm_preprocess
7 | from hlp.utils import optimizers
8 | from hlp.utils import train_history
9 |
10 |
11 | def _train_step(sequences, lm, optimizer, train_loss, train_accuracy):
12 | """一个训练步
13 | @param sequences: 已编码的一个batch的数据集 shape --> (batch_size, seq_length)
14 | @param lm: 语言模型实例
15 | @param optimizer: 优化器
16 | """
17 | seq_input = sequences[:, :-1]
18 | seq_real = sequences[:, 1:]
19 |
20 | with tf.GradientTape() as tape:
21 | predictions = lm(seq_input)
22 | loss = optimizers.loss_func_mask(seq_real, predictions)
23 |
24 | gradients = tape.gradient(loss, lm.trainable_variables)
25 | optimizer.apply_gradients(zip(gradients, lm.trainable_variables))
26 |
27 | train_loss(loss)
28 | train_accuracy(seq_real, predictions)
29 |
30 |
31 | def _train_epoch(dataset, model, optimizer, train_loss, train_accuracy, sample_sum):
32 | """
33 | 对dataset进行训练并打印相关信息
34 | """
35 | trained_seq_sum = 0
36 | for batch, sequences in enumerate(dataset):
37 | _train_step(sequences, model, optimizer, train_loss, train_accuracy)
38 | trained_seq_sum += _config.lm_BATCH_SIZE
39 | print('\r{}/{} [batch {} loss {:.4f} accuracy {:.4f}]'.format(trained_seq_sum,
40 | sample_sum,
41 | batch + 1,
42 | train_loss.result()
43 | , train_accuracy.result()), end='')
44 | print('\r{}/{} [==============================]'.format(sample_sum, sample_sum), end='')
45 |
46 |
47 | def train(epochs=_config.lm_EPOCHS, validation_split=0.0,
48 | min_delta=0.00003, patience=10, validation_freq=1):
49 | """训练
50 | @param epochs: 训练轮次
51 | @return: 训练过程history
52 | @param validation_split: 验证集划分比例
53 | @param min_delta: 增大或减小的阈值,只有大于这个部分才算作improvement
54 | @param patience: 能够容忍多少个val_accuracy都没有improvement
55 | @param validation_freq: 验证频率
56 | @return: history,包含训练过程中所有的指标
57 | """
58 | max_acc = 0
59 | patience_num = 0
60 |
61 | optimizer = tf.keras.optimizers.Adam()
62 | train_loss = tf.keras.metrics.Mean(name='train_loss')
63 | train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
64 | history = {'accuracy': [], 'loss': [], 'val_accuracy': [], 'val_loss': []}
65 | encoded_sequences_path_train = lm_preprocess.get_encoded_sequences_path(_config.lm_language, postfix='_train')
66 |
67 | tokenizer, vocab_size, max_sequence_length = lm_preprocess.train_preprocess()
68 | train_dataset, val_dataset = lm_preprocess.get_dataset(encoded_sequences_path_train, train_size=validation_split)
69 |
70 | lm = language_model.LanguageModel(vocab_size, _config.lm_d_embedding, _config.lm_BATCH_SIZE, _config.lm_d_rnn)
71 |
72 | # 检查点设置,如果检查点存在,则恢复最新的检查点。
73 | ckpt = tf.train.Checkpoint(language_model=lm, optimizer=optimizer)
74 | ckpt_manager = tf.train.CheckpointManager(ckpt, _config.lm_checkpoint_path, max_to_keep=_config.max_checkpoints_num)
75 | if language_model.check_point():
76 | ckpt.restore(ckpt_manager.latest_checkpoint)
77 | print('已恢复至最新检查点!')
78 |
79 | train_batch_sum = int((_config.lm_num_sentences*_config.lm_train_size)//_config.lm_BATCH_SIZE)
80 | val_batch_sum = int((_config.lm_num_sentences*(1-_config.lm_train_size))//_config.lm_BATCH_SIZE)
81 | train_seq_sum = int(train_batch_sum * _config.lm_BATCH_SIZE)
82 | val_seq_sum = int(val_batch_sum * _config.lm_BATCH_SIZE)
83 |
84 | print("开始训练...")
85 | for epoch in range(epochs):
86 | print('Epoch {}/{}'.format(epoch + 1, epochs))
87 | start = time.time()
88 | train_loss.reset_states()
89 | train_accuracy.reset_states()
90 |
91 | _train_epoch(train_dataset, lm, optimizer, train_loss, train_accuracy, train_seq_sum)
92 |
93 | history['accuracy'].append(train_accuracy.result().numpy())
94 | history['loss'].append(train_loss.result().numpy())
95 |
96 | epoch_time = (time.time() - start)
97 | step_time = epoch_time * _config.BATCH_SIZE / (_config.lm_num_sentences*_config.lm_train_size)
98 |
99 | # 验证部分
100 | # 若到达所设置验证频率或最后一个epoch,并且validate_from_txt为False和train_size不同时满足时使用验证集验证
101 | if (epoch + 1) % validation_freq == 0 or (epoch + 1) == _config.EPOCHS:
102 | temp_loss = train_loss.result()
103 | temp_acc = train_accuracy.result()
104 | train_loss.reset_states()
105 | train_accuracy.reset_states()
106 |
107 | _train_epoch(val_dataset, lm, optimizer, train_loss, train_accuracy, train_seq_sum+val_seq_sum)
108 |
109 | history['val_accuracy'].append(train_accuracy.result().numpy())
110 | history['val_loss'].append(train_loss.result().numpy())
111 | print(' - {:.0f}s - {:.0f}ms/step - loss: {:.4f} - accuracy {:.4f} - val_loss: {:.4f} - val_accuracy {:.4f}'
112 | .format(epoch_time, step_time * 1000, temp_loss, temp_acc, train_loss.result(),
113 | train_accuracy.result()))
114 | # stop-early判断
115 | if train_accuracy.result().numpy() >= (max_acc * (1 + min_delta)):
116 | max_acc = train_accuracy.result().numpy()
117 | patience_num = 0
118 | else:
119 | patience_num += 1
120 |
121 | if (epoch + 1) % _config.checkpoints_save_freq == 0:
122 | ckpt_save_path = ckpt_manager.save()
123 | print('检查点已保存至:{}'.format(ckpt_save_path))
124 |
125 | # 若连续patience个val_accuracy不达标,则停止训练
126 | if patience_num == patience:
127 | print('检测到连续%d个验证集增长不达标,停止训练' % patience)
128 | break
129 |
130 | if (epoch + 1) % _config.checkpoints_save_freq != 0:
131 | ckpt_save_path = ckpt_manager.save()
132 | print('检查点已保存至:{}'.format(ckpt_save_path))
133 |
134 | train_history.show_and_save_history(history, _config.result_save_dir, _config.lm_validation_freq)
135 | return history
136 |
137 |
138 | def main():
139 | train(validation_split=1-_config.train_size)
140 |
141 |
142 | if __name__ == '__main__':
143 | main()
144 |
--------------------------------------------------------------------------------
/hlp/mt/model/checkpoint.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from tensorflow.python.training.tracking import graph_view
4 | import tensorflow as tf
5 | import numpy
6 |
7 | from hlp.mt.config import get_config as _config
8 | from hlp.mt.model import nmt_model, checkpoint
9 | from hlp.mt.model import transformer as _transformer
10 |
11 |
12 | def load_checkpoint(transformer, optimizer, checkpoint_path=_config.checkpoint_path):
13 | """
14 | 获取检查点
15 | @param transformer: 模型实例
16 | @param optimizer: 优化器
17 | @param checkpoint_path:检查点的路径
18 | """
19 | # 加载检查点
20 | checkpoint_dir = os.path.dirname(checkpoint_path)
21 | ckpt = tf.train.Checkpoint(transformer=transformer,
22 | optimizer=optimizer)
23 | ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=_config.max_checkpoints_num)
24 | if ckpt_manager.latest_checkpoint:
25 | # ckpt.restore(ckpt_manager.latest_checkpoint)
26 | ckpt.restore(checkpoint_path)
27 | # print('已恢复至最新的检查点!')
28 | print('正在使用检查点:'+checkpoint_path)
29 |
30 |
31 | def get_checkpoints_path(model_dir=_config.checkpoint_path):
32 | """
33 | 获取检查点路径列表
34 | @param model_dir:
35 | @return:
36 | """
37 | checkpoint_state = tf.train.get_checkpoint_state(model_dir)
38 | if checkpoint_state is None:
39 | raise ValueError("未在目录:%s 中发现检查点!" % model_dir)
40 | return checkpoint_state.all_model_checkpoint_paths
41 |
42 |
43 | def average_checkpoints(model_dir,
44 | output_dir,
45 | trackables,
46 | max_count=8,
47 | model_key="model"):
48 | """
49 |
50 | @param model_dir: 需要平均的检查点的文件夹路径
51 | @param output_dir: 将得到的检查点输出的文件夹路径
52 | @param trackables: 检查点所保存的对象的字典
53 | @param max_count: 最多使用几个检查点进行平均
54 | @param model_key: 字典中模型对应的key
55 | @return:
56 | """
57 | if model_dir == output_dir:
58 | raise ValueError("输入与输出需是不同文件夹")
59 | model = trackables.get(model_key)
60 | if model is None:
61 | raise ValueError("模型的key:%s 并没有在字典 %s 中找到" % (model_key, trackables))
62 |
63 | # 取检查点路径列表
64 | checkpoint_state = tf.train.get_checkpoint_state(model_dir)
65 | if checkpoint_state is None:
66 | raise ValueError(" %s 文件夹中没有检查点" % model_dir)
67 | checkpoints_path = checkpoint_state.all_model_checkpoint_paths
68 | if len(checkpoints_path) > max_count:
69 | checkpoints_path = checkpoints_path[-max_count:]
70 |
71 | _average_checkpoints_into_layer(checkpoints_path, model, model_key)
72 |
73 | last_step = _get_step_from_checkpoint_prefix(checkpoints_path[-1])
74 | checkpoint = tf.train.Checkpoint(**trackables)
75 | new_checkpoint_manager = tf.train.CheckpointManager(checkpoint, output_dir, max_to_keep=None)
76 | new_checkpoint_manager.save(checkpoint_number=last_step)
77 | return output_dir
78 |
79 |
80 | def _average_checkpoints_into_layer(checkpoints, layer, layer_prefix):
81 | """将检查点平均并将平均值放到模型中
82 | @param checkpoints: 检查点路径的列表
83 | @param layer: 模型实例
84 | @param layer_prefix:模型的key
85 | """
86 | if not checkpoints:
87 | raise ValueError("至少应有一个检查点")
88 | if not layer.built:
89 | raise ValueError("使用此方法前应对模型进行build")
90 |
91 | # 将模型的变量都重置为0
92 | for variable in layer.variables:
93 | variable.assign(tf.zeros_like(variable))
94 |
95 | # 得到一个检查点中变量名到层中变量的字典
96 | _, names_to_variables = _get_variables_name_mapping(layer, root_key=layer_prefix)
97 |
98 | num_checkpoints = len(checkpoints)
99 | tf.get_logger().info("正在平均 %d 个检查点...", num_checkpoints)
100 | for checkpoint_path in checkpoints:
101 | tf.get_logger().info("正在读取检查点 %s...", checkpoint_path)
102 | reader = tf.train.load_checkpoint(checkpoint_path)
103 | for path in reader.get_variable_to_shape_map().keys():
104 | if not path.startswith(layer_prefix) or ".OPTIMIZER_SLOT" in path:
105 | continue
106 | variable = names_to_variables[path]
107 | value = reader.get_tensor(path)
108 | variable.assign_add(value / num_checkpoints)
109 |
110 |
111 | def _get_step_from_checkpoint_prefix(prefix):
112 | """Extracts the training step from the checkpoint file prefix."""
113 | return int(prefix.split("-")[-1])
114 |
115 |
116 | def _get_variables_name_mapping(root, root_key=None):
117 | """ 返回一个检查点中变量名到层中变量的字典
118 | @param root: 模型(层)实例
119 | @param root_key: 模型(层)的key,即在检查点中的key
120 | @return: 返回一个检查点中变量名到层中变量的字典
121 | """
122 | named_variables, _, _ = graph_view.ObjectGraphView(root).serialize_object_graph()
123 | variables_to_names = {}
124 | names_to_variables = {}
125 | for saveable_object in named_variables:
126 | variable = saveable_object.op
127 | # 判断是否是张量,暂时去掉
128 | # if not hasattr(variable, "ref"):
129 | # continue
130 | name = saveable_object.name
131 | if root_key is not None:
132 | name = "%s/%s" % (root_key, name)
133 | variables_to_names[variable.experimental_ref()] = name
134 | names_to_variables[name] = variable
135 | return variables_to_names, names_to_variables
136 |
137 |
138 | def _model_build(model, inp, tar):
139 | tar_inp = tar[:, :-1]
140 | enc_padding_mask, combined_mask, dec_padding_mask = _transformer.create_masks(inp, tar_inp)
141 | predictions, _ = model(inp, tar_inp, True, enc_padding_mask, combined_mask, dec_padding_mask)
142 |
143 |
144 | def _get_sample_dataset():
145 | """从保存的文件中读取样例数据进行模型build"""
146 | input_path = _config.encoded_sequences_path_prefix + _config.source_lang
147 | target_path = _config.encoded_sequences_path_prefix + _config.target_lang
148 | input_tensor = tf.cast(numpy.loadtxt(input_path, dtype='int32', max_rows=_config.BATCH_SIZE), tf.int32)
149 | target_tensor = tf.cast(numpy.loadtxt(target_path, dtype='int32', max_rows=_config.BATCH_SIZE), tf.int32)
150 | return input_tensor, target_tensor
151 |
152 |
153 | def average_checkpoints_test():
154 | """
155 | 对检查点本身进行平均的示例
156 | 需要先进行训练保存几个检查点
157 | """
158 | # 模型相关配置
159 | transformer, optimizer, _, _ = nmt_model.load_model()
160 | trackables = {'transformer': transformer, 'optimizer': optimizer}
161 | model_key = 'transformer'
162 |
163 | # 模型build加载一个batch数据
164 | input_tensor, target_tensor = _get_sample_dataset()
165 | _model_build(transformer, input_tensor, target_tensor)
166 |
167 | # 检查点路径及输出平均检查点路径
168 | model_dir = _config.checkpoint_path
169 | output_dir = model_dir + '_avg_ckpts'
170 | if not os.path.exists(output_dir):
171 | os.makedirs(output_dir, exist_ok=True)
172 |
173 | path = checkpoint.average_checkpoints(model_dir, output_dir, trackables, max_count=8, model_key=model_key)
174 | print(path)
175 |
176 |
177 | if __name__ == '__main__':
178 | average_checkpoints_test()
--------------------------------------------------------------------------------
/hlp/mt/model/nmt_model.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | import hlp.mt.common.text_vectorize
4 | from hlp.mt.model import transformer as _transformer
5 | from hlp.mt.config import get_config as _config
6 | from hlp.mt.common import text_vectorize
7 | from hlp.utils import optimizers as _optimizers
8 |
9 |
10 | def create_model(vocab_size_source, vocab_size_target):
11 | """获取模型"""
12 | transformer = _transformer.Transformer(_config.num_layers,
13 | _config.d_model,
14 | _config.num_heads,
15 | _config.dff,
16 | vocab_size_source + 1,
17 | vocab_size_target + 1,
18 | pe_input=vocab_size_source + 1,
19 | pe_target=vocab_size_target + 1,
20 | rate=_config.dropout_rate)
21 | return transformer
22 |
23 |
24 | def load_model():
25 | """
26 | 进行翻译或评估前数据恢复工作
27 | """
28 | # 获取字典保存路径
29 | source_mode = hlp.mt.common.text_vectorize.get_tokenizer_mode(_config.source_lang)
30 | target_mode = hlp.mt.common.text_vectorize.get_tokenizer_mode(_config.target_lang)
31 | source_tokenizer_path = hlp.mt.common.text_vectorize.get_tokenizer_path(_config.source_lang, source_mode)
32 | target_tokenizer_path = hlp.mt.common.text_vectorize.get_tokenizer_path(_config.target_lang, target_mode)
33 | # 加载源语言字典
34 | print("正在加载源语言(%s)字典..." % _config.source_lang)
35 | tokenizer_source, vocab_size_source = text_vectorize.load_tokenizer(source_tokenizer_path,
36 | _config.source_lang, source_mode)
37 | print('源语言字典大小:%d' % vocab_size_source)
38 | print('源语言字典加载完毕!\n')
39 |
40 | # 加载目标语言字典
41 | print("正在加载目标语言(%s)字典..." % _config.target_lang)
42 | tokenizer_target, vocab_size_target = text_vectorize.load_tokenizer(target_tokenizer_path,
43 | _config.target_lang, target_mode)
44 | print('目标语言字典大小:%d' % vocab_size_target)
45 | print('目标语言字典加载完毕!\n')
46 |
47 | # 创建模型及相关变量
48 | learning_rate = _optimizers.CustomSchedule(_config.d_model)
49 | optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)
50 | transformer = create_model(vocab_size_source, vocab_size_target)
51 |
52 | return transformer, optimizer, tokenizer_source, tokenizer_target
53 |
--------------------------------------------------------------------------------
/hlp/mt/preprocess.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from hlp.mt.common.text_vectorize import get_encoded_sequences_path, get_tokenizer_path, get_tokenizer_mode
4 | from hlp.mt.config import get_config as _config
5 | from hlp.mt.common import text_vectorize
6 | from hlp.mt.common.text_split import preprocess_sentences
7 | from hlp.mt.common.load_dataset import load_sentences
8 |
9 |
10 | def _count_words(sentences):
11 | """输入句子列表,使用空格分隔返回单词数"""
12 | count = 0
13 | for s in sentences:
14 | s = re.split(r' +', s)
15 | count += len(s)
16 | return count
17 |
18 |
19 | def train_preprocess():
20 | # 获取source、target编码模式,字典保存路径,编码句子保存路径
21 | source_mode = get_tokenizer_mode(_config.source_lang)
22 | target_mode = get_tokenizer_mode(_config.target_lang)
23 |
24 | source_tokenizer_path = get_tokenizer_path(_config.source_lang, source_mode)
25 | target_tokenizer_path = get_tokenizer_path(_config.target_lang, target_mode)
26 |
27 | source_sequences_path_train = get_encoded_sequences_path(_config.source_lang, postfix='_train')
28 | target_sequences_path_train = get_encoded_sequences_path(_config.target_lang, postfix='_train')
29 | source_sequences_path_val = get_encoded_sequences_path(_config.source_lang, postfix='_val')
30 | target_sequences_path_val = get_encoded_sequences_path(_config.target_lang, postfix='_val')
31 |
32 | # 加载句子
33 | print('加载训练数据集...')
34 | source_sentences, target_sentences = load_sentences(_config.path_to_train_file, _config.num_sentences)
35 |
36 | # 加载验证集
37 | if _config.validation_data == "True":
38 | print('加载验证数据集...')
39 | source_sentences_val, target_sentences_val = load_sentences(_config.path_to_val_file,
40 | _config.num_validate_sentences)
41 |
42 | print('已加载句子数量:%d' % _config.num_sentences)
43 | # 计算语料词数
44 | num_words = _count_words(source_sentences)
45 | print('源语料(%s)词数:%d' % (_config.source_lang, num_words))
46 |
47 | # 预处理句子
48 | print('预处理训练数据集...')
49 | source_sentences = preprocess_sentences(source_sentences, _config.source_lang, source_mode)
50 | target_sentences = preprocess_sentences(target_sentences, _config.target_lang, target_mode)
51 |
52 | if _config.validation_data == "True":
53 | print('预处理验证数据集...')
54 | source_sentences_val = preprocess_sentences(source_sentences_val, _config.source_lang, source_mode)
55 | target_sentences_val = preprocess_sentences(target_sentences_val, _config.target_lang, target_mode)
56 |
57 | # 生成及保存字典
58 | print('正在生成、保存源语言(%s)字典(分词方式:%s)...' % (_config.source_lang, _config.en_tokenize_type))
59 | tokenizer_source, vocab_size_source = text_vectorize.create_and_save_tokenizer(source_sentences,
60 | source_tokenizer_path,
61 | _config.source_lang,
62 | source_mode)
63 | print('源语言字典大小:%d' % vocab_size_source)
64 |
65 | print('正在生成、保存目标语言(%s)字典(分词方式:%s)...' % (_config.target_lang, _config.zh_tokenize_type))
66 | tokenizer_target, vocab_size_target = text_vectorize.create_and_save_tokenizer(target_sentences,
67 | target_tokenizer_path,
68 | _config.target_lang,
69 | target_mode)
70 | print('目标语言字典大小:%d' % vocab_size_target)
71 |
72 | # 编码句子
73 | print("正在编码训练集句子...")
74 | max_sequence_length_source = text_vectorize.encode_and_save(sentences=source_sentences, tokenizer=tokenizer_source,
75 | save_path=source_sequences_path_train,
76 | language=_config.source_lang, mode=source_mode)
77 | max_sequence_length_target = text_vectorize.encode_and_save(sentences=target_sentences, tokenizer=tokenizer_target,
78 | save_path=target_sequences_path_train,
79 | language=_config.target_lang, mode=target_mode)
80 | print('最大源语言(%s)句子长度:%d' % (_config.source_lang, max_sequence_length_source))
81 | print('最大目标语言(%s)句子长度:%d' % (_config.target_lang, max_sequence_length_target))
82 |
83 | if _config.validation_data == "True":
84 | print("正在编码验证集句子...")
85 | _ = text_vectorize.encode_and_save(sentences=source_sentences_val, tokenizer=tokenizer_source,
86 | save_path=source_sequences_path_val, language=_config.source_lang,
87 | mode=source_mode)
88 | _ = text_vectorize.encode_and_save(sentences=target_sentences_val, tokenizer=tokenizer_target,
89 | save_path=target_sequences_path_val, language=_config.target_lang,
90 | mode=target_mode)
91 | print("语料处理完成.\n")
92 |
93 | return vocab_size_source, vocab_size_target
94 |
95 |
96 | if __name__ == '__main__':
97 | train_preprocess()
98 |
--------------------------------------------------------------------------------
/hlp/mt/translate.py:
--------------------------------------------------------------------------------
1 | import hlp.mt.common.misc
2 | from hlp.mt.model import nmt_model
3 | from hlp.mt import translator
4 | from hlp.mt.config import get_config as _config
5 | from hlp.mt.common.misc import check_and_create
6 |
7 |
8 | def main():
9 | if check_and_create(_config.checkpoint_path): # 检测是否有检查点
10 | # 读取保存的需要的配置
11 | transformer, _, tokenizer_source, tokenizer_target = nmt_model.load_model()
12 |
13 | # translate
14 | while True:
15 | print('-' * 30)
16 | print('输入0可退出程序')
17 | sentence = input('请输入要翻译的句子 :')
18 | if sentence == '0':
19 | break
20 | else:
21 | print('翻译结果:', translator.translate(sentence, transformer,
22 | tokenizer_source, tokenizer_target,
23 | beam_size=_config.BEAM_SIZE))
24 | else:
25 | print('请先训练才可使用翻译功能...')
26 |
27 |
28 | if __name__ == '__main__':
29 | main()
30 |
--------------------------------------------------------------------------------
/hlp/mt/translator.py:
--------------------------------------------------------------------------------
1 | """
2 | 对输出的句子进行翻译
3 | """
4 | import copy
5 |
6 | import tensorflow as tf
7 |
8 | import hlp.mt.common.text_vectorize
9 | from hlp.mt.common import text_split
10 | from hlp.mt.common import text_vectorize
11 | from hlp.mt.config import get_config as _config
12 | from hlp.mt.model import checkpoint
13 | from hlp.mt.model import transformer as _transformer
14 | from hlp.utils import beamsearch
15 |
16 |
17 | def _checkpoint_ensembling(checkpoints_path, model, inputs, decoder_input):
18 | """
19 | 使用路径中的检查点得到此步的predictions
20 | @param checkpoints_path: 使用的检查点路径列表
21 | @param model: 模型
22 | @param inputs: 输入
23 | @param decoder_input: 解码器输入
24 | @param enc_padding_mask: 编码器遮挡
25 | @param combined_mask: 遮挡
26 | @param dec_padding_mask: 解码器遮挡
27 | @return:使用多个检查点模型后的平均predictions
28 | """
29 | # 首先使用首个检查点模型得到结果
30 | enc_padding_mask, combined_mask, dec_padding_mask = _transformer.create_masks(inputs, decoder_input)
31 | checkpoint_path = checkpoints_path[0]
32 | checkpoint.load_checkpoint(model, tf.keras.optimizers.Adam(), checkpoint_path=checkpoint_path)
33 | predictions, _ = model(inputs, decoder_input, False, enc_padding_mask, combined_mask, dec_padding_mask)
34 | # 从 seq_len 维度选择最后一个词
35 | predictions = tf.squeeze(predictions[:, -1:, :], axis=1) # (batch_size, vocab_size)
36 | predictions_sum = copy.deepcopy(predictions)
37 | if len(checkpoints_path) > 1:
38 | for i in range(len(checkpoints_path) - 1): # 分别读取n个检查点模型并预测得到predictions进行累加
39 | checkpoint_path = checkpoints_path[i + 1]
40 | checkpoint.load_checkpoint(model, tf.keras.optimizers.Adam(), checkpoint_path=checkpoint_path)
41 | predictions, _ = model(inputs, decoder_input, False, enc_padding_mask, combined_mask, dec_padding_mask)
42 | predictions = tf.squeeze(predictions[:, -1:, :], axis=1) # (batch_size, vocab_size)
43 | predictions_sum = tf.add(predictions_sum, predictions)
44 | predictions_avg = tf.divide(predictions_sum, len(checkpoints_path))
45 |
46 | return predictions_avg
47 |
48 |
49 | def _predict_index(checkpoints_path, inp_sentence, model, beam_search_container,
50 | input_tokenizer, target_tokenizer):
51 | """对输入句子进行翻译并返回编码的句子列表"""
52 | input_mode = text_vectorize.get_tokenizer_mode(_config.source_lang)
53 | target_mode = text_vectorize.get_tokenizer_mode(_config.target_lang)
54 |
55 | sentence = text_split.preprocess_sentences([inp_sentence], _config.source_lang, input_mode)
56 |
57 | inp_sequence, _ = text_vectorize.encode_sentences(sentence, input_tokenizer,
58 | language=_config.source_lang, mode=input_mode)
59 | inp_sequence = tf.squeeze(inp_sequence)
60 | inp_sequence = tf.expand_dims(inp_sequence, 0)
61 |
62 | # start_token shape:(1,)
63 | start_token = text_vectorize.encode_start_token(_config.start_word, target_tokenizer,
64 | language=_config.target_lang)
65 | end_token, _ = text_vectorize.encode_sentences([_config.end_word], target_tokenizer,
66 | language=_config.target_lang, mode=target_mode)
67 | end_token = tf.squeeze(end_token)
68 |
69 | decoder_input = tf.expand_dims(start_token, 0) # shape --> (1,1) 即(batch_size,sentence_length)
70 |
71 | beam_search_container.reset(inputs=inp_sequence, dec_input=decoder_input)
72 | inputs, decoder_input = beam_search_container.get_search_inputs()
73 | if len(checkpoints_path) == 1: # 如果只使用一个检查点,则不使用checkpoint_ensembling
74 | checkpoint_path = checkpoints_path[0]
75 | checkpoint.load_checkpoint(model, tf.keras.optimizers.Adam(), checkpoint_path=checkpoint_path)
76 |
77 | for i in range(_config.max_target_length):
78 | if len(checkpoints_path) == 1: # 如果只使用一个检查点,则不使用checkpoint_ensembling
79 | enc_padding_mask, combined_mask, dec_padding_mask = _transformer.create_masks(inputs, decoder_input)
80 | predictions, _ = model(inputs, decoder_input, False, enc_padding_mask, combined_mask, dec_padding_mask)
81 | predictions = tf.squeeze(predictions[:, -1:, :], axis=1) # (batch_size, vocab_size)
82 | else:
83 | predictions = _checkpoint_ensembling(checkpoints_path, model, inputs, decoder_input)
84 |
85 | beam_search_container.expand(predictions=predictions, end_sign=end_token)
86 | if beam_search_container.beam_size == 0:
87 | break
88 | inputs, decoder_input = beam_search_container.get_search_inputs()
89 |
90 | beam_search_result = beam_search_container.get_result()
91 |
92 | return beam_search_result
93 |
94 |
95 | def translate(sentence, model, tokenizer_source, tokenizer_target, beam_size):
96 | """对句子(经过预处理未经过编码)进行翻译,未进行检查点的判断"""
97 | beam_search_container = beamsearch.BeamSearch(beam_size=beam_size,
98 | max_length=_config.max_target_length,
99 | worst_score=0)
100 |
101 | # 采用checkpoint_ensembling,获取需要使用的检查点路径列表
102 | checkpoints_path = checkpoint.get_checkpoints_path()
103 | if _config.checkpoint_ensembling == "False":
104 | checkpoints_path = checkpoints_path[-1:]
105 |
106 | predict_idxes = _predict_index(checkpoints_path, sentence, model, beam_search_container,
107 | tokenizer_source, tokenizer_target)
108 |
109 | predicted_sentences = []
110 | target_mode = text_vectorize.get_tokenizer_mode(_config.target_lang)
111 | # 从容器中抽取序列,生成最终结果
112 | for i in range(len(predict_idxes)):
113 | predict_idx = predict_idxes[i].numpy()
114 | predict_idx = tf.squeeze(predict_idx)
115 | predict_sentence = text_vectorize.decode_sentence(predict_idx, tokenizer_target,
116 | _config.target_lang, target_mode)
117 | predict_sentence = predict_sentence.replace(_config.start_word, '') \
118 | .replace(_config.end_word, '').strip()
119 | predicted_sentences.append(predict_sentence)
120 | return predicted_sentences
121 |
--------------------------------------------------------------------------------
/hlp/stt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/__init__.py
--------------------------------------------------------------------------------
/hlp/stt/data/LibriSpeech/dev-clean-2/174/168635/174-168635-0000.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/dev-clean-2/174/168635/174-168635-0000.flac
--------------------------------------------------------------------------------
/hlp/stt/data/LibriSpeech/dev-clean-2/174/168635/174-168635-0001.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/dev-clean-2/174/168635/174-168635-0001.flac
--------------------------------------------------------------------------------
/hlp/stt/data/LibriSpeech/dev-clean-2/174/168635/174-168635.trans.txt:
--------------------------------------------------------------------------------
1 | 174-168635-0000 HE HAD NEVER BEEN FATHER LOVER HUSBAND FRIEND
2 | 174-168635-0001 THE HEART OF THAT EX CONVICT WAS FULL OF VIRGINITY
3 |
--------------------------------------------------------------------------------
/hlp/stt/data/LibriSpeech/dev-clean-2/84/121550/84-121550-0000.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/dev-clean-2/84/121550/84-121550-0000.flac
--------------------------------------------------------------------------------
/hlp/stt/data/LibriSpeech/dev-clean-2/84/121550/84-121550-0001.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/dev-clean-2/84/121550/84-121550-0001.flac
--------------------------------------------------------------------------------
/hlp/stt/data/LibriSpeech/dev-clean-2/84/121550/84-121550.trans.txt:
--------------------------------------------------------------------------------
1 | 84-121550-0000 BUT WITH FULL RAVISHMENT THE HOURS OF PRIME SINGING RECEIVED THEY IN THE MIDST OF LEAVES THAT EVER BORE A BURDEN TO THEIR RHYMES
2 | 84-121550-0001 ALL WATERS THAT ON EARTH MOST LIMPID ARE WOULD SEEM TO HAVE WITHIN THEMSELVES SOME MIXTURE COMPARED WITH THAT WHICH NOTHING DOTH CONCEAL
3 |
--------------------------------------------------------------------------------
/hlp/stt/data/LibriSpeech/train-clean-5/1088/134315/1088-134315-0000.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/train-clean-5/1088/134315/1088-134315-0000.flac
--------------------------------------------------------------------------------
/hlp/stt/data/LibriSpeech/train-clean-5/1088/134315/1088-134315-0001.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/train-clean-5/1088/134315/1088-134315-0001.flac
--------------------------------------------------------------------------------
/hlp/stt/data/LibriSpeech/train-clean-5/1088/134315/1088-134315-0002.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/train-clean-5/1088/134315/1088-134315-0002.flac
--------------------------------------------------------------------------------
/hlp/stt/data/LibriSpeech/train-clean-5/1088/134315/1088-134315.trans.txt:
--------------------------------------------------------------------------------
1 | 1088-134315-0000 AS YOU KNOW AND AS I HAVE GIVEN YOU PROOF I HAVE THE GREATEST ADMIRATION IN THE WORLD FOR ONE WHOSE WORK FOR HUMANITY HAS WON SUCH UNIVERSAL RECOGNITION I HOPE THAT WE SHALL BOTH FORGET THIS UNHAPPY MORNING AND THAT YOU WILL GIVE ME AN OPPORTUNITY OF RENDERING TO YOU IN PERSON
2 | 1088-134315-0001 THE APOLOGIES WHICH ARE DUE TO YOU I FEEL THAT ANYTHING LESS WILL NEITHER REHABILITATE ME IN YOUR ESTEEM NOR SECURE FOR ME THE REMNANTS OF MY SHATTERED SELF RESPECT I AM HOPING YOU WILL DINE WITH ME NEXT WEEK AND MEET A MOST INTERESTING MAN GEORGE GATHERCOLE
3 | 1088-134315-0002 TO DISTURB A RELATIONSHIP WHICH I HAVE ALWAYS HOPED WOULD BE MUTUALLY PLEASANT IF YOU WILL ALLOW GATHERCOLE WHO WILL BE UNCONSCIOUS OF THE PART HE IS PLAYING TO ACT AS PEACEMAKER BETWEEN YOURSELF AND MYSELF
4 |
--------------------------------------------------------------------------------
/hlp/stt/data/LibriSpeech/train-clean-5/1088/134318/1088-134318-0000.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/train-clean-5/1088/134318/1088-134318-0000.flac
--------------------------------------------------------------------------------
/hlp/stt/data/LibriSpeech/train-clean-5/1088/134318/1088-134318-0001.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/train-clean-5/1088/134318/1088-134318-0001.flac
--------------------------------------------------------------------------------
/hlp/stt/data/LibriSpeech/train-clean-5/1088/134318/1088-134318.trans.txt:
--------------------------------------------------------------------------------
1 | 1088-134318-0000 CHAPTER TWELVE KARA LAY BACK ON HIS DOWN PILLOWS WITH A SNEER ON HIS FACE AND HIS BRAIN VERY BUSY WHAT STARTED THE TRAIN OF THOUGHT HE DID NOT KNOW BUT AT THAT MOMENT HIS MIND WAS VERY FAR AWAY
2 | 1088-134318-0001 IT CARRIED HIM BACK A DOZEN YEARS TO A DIRTY LITTLE PEASANT'S CABIN ON THE HILLSIDE OUTSIDE DURAZZO TO THE LIVID FACE OF A YOUNG ALBANIAN CHIEF WHO HAD LOST AT KARA'S WHIM ALL THAT LIFE HELD FOR A MAN
3 |
--------------------------------------------------------------------------------
/hlp/stt/data/LibriSpeech/train-clean-5/1737/146161/1737-146161-0000.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/train-clean-5/1737/146161/1737-146161-0000.flac
--------------------------------------------------------------------------------
/hlp/stt/data/LibriSpeech/train-clean-5/1737/146161/1737-146161-0001.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/train-clean-5/1737/146161/1737-146161-0001.flac
--------------------------------------------------------------------------------
/hlp/stt/data/LibriSpeech/train-clean-5/1737/146161/1737-146161.trans.txt:
--------------------------------------------------------------------------------
1 | 1737-146161-0000 KNIT TWO TOGETHER KNIT THREE MAKE ONE KNIT ONE MAKE ONE KNIT THREE KNIT TWO TOGETHER KNIT ONE MAKE ONE SECOND ROW
2 | 1737-146161-0001 SEAMED MAKING A STITCH AT THE BEGINNING THIRD ROW MAKE ONE KNIT ONE KNIT TWO TOGETHER KNIT TWO MAKE ONE KNIT THREE MAKE ONE KNIT TWO
3 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/.wav.scp:
--------------------------------------------------------------------------------
1 | A11_0 ./A11_0.wav
2 | A11_1 ./A11_1.wav
3 | A11_10 ./A11_10.wav
4 | A11_100 ./A11_100.wav
5 | A11_101 ./A11_101.wav
6 | A11_102 ./A11_102.wav
7 | A11_103 ./A11_103.wav
8 | A11_104 ./A11_104.wav
9 | A11_105 ./A11_105.wav
10 | A11_106 ./A11_106.wav
11 | A11_107 ./A11_107.wav
12 | A11_108 ./A11_108.wav
13 | A11_109 ./A11_109.wav
14 | A11_11 ./A11_11.wav
15 | A11_110 ./A11_110.wav
16 | A11_111 ./A11_111.wav
17 | A11_112 ./A11_112.wav
18 | A11_113 ./A11_113.wav
19 | A11_114 ./A11_114.wav
20 | A11_115 ./A11_115.wav
21 | A11_116 ./A11_116.wav
22 | A11_117 ./A11_117.wav
23 | A11_118 ./A11_118.wav
24 | A11_119 ./A11_119.wav
25 | A11_12 ./A11_12.wav
26 | A11_120 ./A11_120.wav
27 | A11_121 ./A11_121.wav
28 | A11_122 ./A11_122.wav
29 | A11_123 ./A11_123.wav
30 | A11_124 ./A11_124.wav
31 | A11_125 ./A11_125.wav
32 | A11_126 ./A11_126.wav
33 | A11_127 ./A11_127.wav
34 | A11_128 ./A11_128.wav
35 | A11_129 ./A11_129.wav
36 | A11_13 ./A11_13.wav
37 | A11_130 ./A11_130.wav
38 | A11_131 ./A11_131.wav
39 | A11_132 ./A11_132.wav
40 | A11_133 ./A11_133.wav
41 | A11_134 ./A11_134.wav
42 | A11_135 ./A11_135.wav
43 | A11_136 ./A11_136.wav
44 | A11_137 ./A11_137.wav
45 | A11_138 ./A11_138.wav
46 | A11_139 ./A11_139.wav
47 | A11_14 ./A11_14.wav
48 | A11_140 ./A11_140.wav
49 | A11_141 ./A11_141.wav
50 | A11_142 ./A11_142.wav
51 | A11_143 ./A11_143.wav
52 | A11_144 ./A11_144.wav
53 | A11_145 ./A11_145.wav
54 | A11_146 ./A11_146.wav
55 | A11_147 ./A11_147.wav
56 | A11_148 ./A11_148.wav
57 | A11_149 ./A11_149.wav
58 | A11_15 ./A11_15.wav
59 | A11_150 ./A11_150.wav
60 | A11_151 ./A11_151.wav
61 | A11_152 ./A11_152.wav
62 | A11_153 ./A11_153.wav
63 | A11_154 ./A11_154.wav
64 | A11_155 ./A11_155.wav
65 | A11_156 ./A11_156.wav
66 | A11_157 ./A11_157.wav
67 | A11_158 ./A11_158.wav
68 | A11_159 ./A11_159.wav
69 | A11_16 ./A11_16.wav
70 | A11_160 ./A11_160.wav
71 | A11_161 ./A11_161.wav
72 | A11_162 ./A11_162.wav
73 | A11_163 ./A11_163.wav
74 | A11_164 ./A11_164.wav
75 | A11_165 ./A11_165.wav
76 | A11_166 ./A11_166.wav
77 | A11_167 ./A11_167.wav
78 | A11_168 ./A11_168.wav
79 | A11_169 ./A11_169.wav
80 | A11_17 ./A11_17.wav
81 | A11_170 ./A11_170.wav
82 | A11_171 ./A11_171.wav
83 | A11_172 ./A11_172.wav
84 | A11_173 ./A11_173.wav
85 | A11_174 ./A11_174.wav
86 | A11_175 ./A11_175.wav
87 | A11_176 ./A11_176.wav
88 | A11_177 ./A11_177.wav
89 | A11_178 ./A11_178.wav
90 | A11_179 ./A11_179.wav
91 | A11_18 ./A11_18.wav
92 | A11_180 ./A11_180.wav
93 | A11_181 ./A11_181.wav
94 | A11_182 ./A11_182.wav
95 | A11_183 ./A11_183.wav
96 | A11_184 ./A11_184.wav
97 | A11_185 ./A11_185.wav
98 | A11_186 ./A11_186.wav
99 | A11_187 ./A11_187.wav
100 | A11_188 ./A11_188.wav
101 | A11_189 ./A11_189.wav
102 | A11_19 ./A11_19.wav
103 | A11_190 ./A11_190.wav
104 | A11_191 ./A11_191.wav
105 | A11_192 ./A11_192.wav
106 | A11_193 ./A11_193.wav
107 | A11_194 ./A11_194.wav
108 | A11_195 ./A11_195.wav
109 | A11_196 ./A11_196.wav
110 | A11_197 ./A11_197.wav
111 | A11_198 ./A11_198.wav
112 | A11_199 ./A11_199.wav
113 | A11_2 ./A11_2.wav
114 | A11_20 ./A11_20.wav
115 | A11_200 ./A11_200.wav
116 | A11_201 ./A11_201.wav
117 | A11_202 ./A11_202.wav
118 | A11_203 ./A11_203.wav
119 | A11_204 ./A11_204.wav
120 | A11_205 ./A11_205.wav
121 | A11_206 ./A11_206.wav
122 | A11_207 ./A11_207.wav
123 | A11_208 ./A11_208.wav
124 | A11_209 ./A11_209.wav
125 | A11_21 ./A11_21.wav
126 | A11_210 ./A11_210.wav
127 | A11_211 ./A11_211.wav
128 | A11_212 ./A11_212.wav
129 | A11_213 ./A11_213.wav
130 | A11_214 ./A11_214.wav
131 | A11_215 ./A11_215.wav
132 | A11_216 ./A11_216.wav
133 | A11_217 ./A11_217.wav
134 | A11_218 ./A11_218.wav
135 | A11_219 ./A11_219.wav
136 | A11_22 ./A11_22.wav
137 | A11_220 ./A11_220.wav
138 | A11_221 ./A11_221.wav
139 | A11_222 ./A11_222.wav
140 | A11_223 ./A11_223.wav
141 | A11_224 ./A11_224.wav
142 | A11_225 ./A11_225.wav
143 | A11_226 ./A11_226.wav
144 | A11_227 ./A11_227.wav
145 | A11_228 ./A11_228.wav
146 | A11_229 ./A11_229.wav
147 | A11_23 ./A11_23.wav
148 | A11_230 ./A11_230.wav
149 | A11_231 ./A11_231.wav
150 | A11_232 ./A11_232.wav
151 | A11_233 ./A11_233.wav
152 | A11_234 ./A11_234.wav
153 | A11_235 ./A11_235.wav
154 | A11_236 ./A11_236.wav
155 | A11_237 ./A11_237.wav
156 | A11_238 ./A11_238.wav
157 | A11_239 ./A11_239.wav
158 | A11_24 ./A11_24.wav
159 | A11_240 ./A11_240.wav
160 | A11_241 ./A11_241.wav
161 | A11_242 ./A11_242.wav
162 | A11_243 ./A11_243.wav
163 | A11_244 ./A11_244.wav
164 | A11_245 ./A11_245.wav
165 | A11_246 ./A11_246.wav
166 | A11_247 ./A11_247.wav
167 | A11_248 ./A11_248.wav
168 | A11_249 ./A11_249.wav
169 | A11_25 ./A11_25.wav
170 | A11_26 ./A11_26.wav
171 | A11_27 ./A11_27.wav
172 | A11_28 ./A11_28.wav
173 | A11_29 ./A11_29.wav
174 | A11_3 ./A11_3.wav
175 | A11_30 ./A11_30.wav
176 | A11_31 ./A11_31.wav
177 | A11_32 ./A11_32.wav
178 | A11_33 ./A11_33.wav
179 | A11_34 ./A11_34.wav
180 | A11_35 ./A11_35.wav
181 | A11_36 ./A11_36.wav
182 | A11_37 ./A11_37.wav
183 | A11_38 ./A11_38.wav
184 | A11_39 ./A11_39.wav
185 | A11_4 ./A11_4.wav
186 | A11_40 ./A11_40.wav
187 | A11_41 ./A11_41.wav
188 | A11_42 ./A11_42.wav
189 | A11_43 ./A11_43.wav
190 | A11_44 ./A11_44.wav
191 | A11_45 ./A11_45.wav
192 | A11_46 ./A11_46.wav
193 | A11_47 ./A11_47.wav
194 | A11_48 ./A11_48.wav
195 | A11_49 ./A11_49.wav
196 | A11_5 ./A11_5.wav
197 | A11_50 ./A11_50.wav
198 | A11_51 ./A11_51.wav
199 | A11_52 ./A11_52.wav
200 | A11_53 ./A11_53.wav
201 | A11_54 ./A11_54.wav
202 | A11_55 ./A11_55.wav
203 | A11_56 ./A11_56.wav
204 | A11_57 ./A11_57.wav
205 | A11_58 ./A11_58.wav
206 | A11_59 ./A11_59.wav
207 | A11_6 ./A11_6.wav
208 | A11_60 ./A11_60.wav
209 | A11_61 ./A11_61.wav
210 | A11_62 ./A11_62.wav
211 | A11_63 ./A11_63.wav
212 | A11_64 ./A11_64.wav
213 | A11_65 ./A11_65.wav
214 | A11_66 ./A11_66.wav
215 | A11_67 ./A11_67.wav
216 | A11_68 ./A11_68.wav
217 | A11_69 ./A11_69.wav
218 | A11_7 ./A11_7.wav
219 | A11_70 ./A11_70.wav
220 | A11_71 ./A11_71.wav
221 | A11_72 ./A11_72.wav
222 | A11_73 ./A11_73.wav
223 | A11_74 ./A11_74.wav
224 | A11_75 ./A11_75.wav
225 | A11_76 ./A11_76.wav
226 | A11_77 ./A11_77.wav
227 | A11_78 ./A11_78.wav
228 | A11_79 ./A11_79.wav
229 | A11_8 ./A11_8.wav
230 | A11_80 ./A11_80.wav
231 | A11_81 ./A11_81.wav
232 | A11_82 ./A11_82.wav
233 | A11_83 ./A11_83.wav
234 | A11_84 ./A11_84.wav
235 | A11_85 ./A11_85.wav
236 | A11_86 ./A11_86.wav
237 | A11_87 ./A11_87.wav
238 | A11_88 ./A11_88.wav
239 | A11_89 ./A11_89.wav
240 | A11_9 ./A11_9.wav
241 | A11_90 ./A11_90.wav
242 | A11_91 ./A11_91.wav
243 | A11_92 ./A11_92.wav
244 | A11_93 ./A11_93.wav
245 | A11_94 ./A11_94.wav
246 | A11_95 ./A11_95.wav
247 | A11_96 ./A11_96.wav
248 | A11_97 ./A11_97.wav
249 | A11_98 ./A11_98.wav
250 | A11_99 ./A11_99.wav
251 | A12_0 ./A12_0.wav
252 | A12_1 ./A12_1.wav
253 | A12_10 ./A12_10.wav
254 | A12_100 ./A12_100.wav
255 | A12_101 ./A12_101.wav
256 | A12_102 ./A12_102.wav
257 | A12_103 ./A12_103.wav
258 | A12_104 ./A12_104.wav
259 | A12_105 ./A12_105.wav
260 | A12_106 ./A12_106.wav
261 | A12_107 ./A12_107.wav
262 | A12_108 ./A12_108.wav
263 | A12_109 ./A12_109.wav
264 | A12_11 ./A12_11.wav
265 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_0.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_0.wav.trn:
--------------------------------------------------------------------------------
1 | 绿 是 阳春 烟 景 大块 文章 的 底色 四月 的 林 峦 更是 绿 得 鲜活 秀媚 诗意 盎然
2 | lv4 shi4 yang2 chun1 yan1 jing3 da4 kuai4 wen2 zhang1 de5 di3 se4 si4 yue4 de5 lin2 luan2 geng4 shi4 lv4 de5 xian1 huo2 xiu4 mei4 shi1 yi4 ang4 ran2
3 | l v4 sh ix4 ii iang2 ch un1 ii ian1 j ing3 d a4 k uai4 uu un2 zh ang1 d e5 d i3 s e4 s iy4 vv ve4 d e5 l in2 l uan2 g eng4 sh ix4 l v4 d e5 x ian1 h uo2 x iu4 m ei4 sh ix1 ii i4 aa ang4 r an2
4 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_1.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_1.wav.trn:
--------------------------------------------------------------------------------
1 | 他 仅 凭 腰部 的 力量 在 泳道 上下 翻腾 蛹 动 蛇行 状 如 海豚 一直 以 一头 的 优势 领先
2 | ta1 jin3 ping2 yao1 bu4 de5 li4 liang4 zai4 yong3 dao4 shang4 xia4 fan1 teng2 yong3 dong4 she2 xing2 zhuang4 ru2 hai3 tun2 yi4 zhi2 yi3 yi4 tou2 de5 you1 shi4 ling3 xian1
3 | t a1 j in3 p ing2 ii iao1 b u4 d e5 l i4 l iang4 z ai4 ii iong3 d ao4 sh ang4 x ia4 f an1 t eng2 ii iong3 d ong4 sh e2 x ing2 zh uang4 r u2 h ai3 t un2 ii i4 zh ix2 ii i3 ii i4 t ou2 d e5 ii iu1 sh ix4 l ing3 x ian1
4 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_2.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_2.wav.trn:
--------------------------------------------------------------------------------
1 | 企业 依靠 技术 挖潜 增效 他 负责 全厂 产品质量 与 技术培训 成了 厂里 的 大忙人
2 | qi3 ye4 yi1 kao4 ji4 shu4 wa1 qian2 zeng1 xiao4 ta1 fu4 ze2 quan2 chang3 chan2 pin3 zhi4 liang4 yu3 ji4 shu4 pei2 xun4 cheng2 le5 chang3 li3 de5 da4 mang2 ren2
3 | q i3 ii ie4 ii i1 k ao4 j i4 sh u4 uu ua1 q ian2 z eng1 x iao4 t a1 f u4 z e2 q van2 ch ang3 ch an2 p in3 zh ix4 l iang4 vv v3 j i4 sh u4 p ei2 x vn4 ch eng2 l e5 ch ang3 l i3 d e5 d a4 m ang2 r en2
4 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_3.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_3.wav.trn:
--------------------------------------------------------------------------------
1 | 菜 做好 了 一碗 清蒸 武昌鱼 一碗 蕃茄 炒鸡蛋 一碗 榨菜 干 子 炒肉丝
2 | cai4 zuo4 hao3 le5 yi4 wan3 qing1 zheng1 wu3 chang1 yu2 yi4 wan3 fan1 qie2 chao3 ji1 dan4 yi4 wan3 zha4 cai4 gan1 zi3 chao3 rou4 si1
3 | c ai4 z uo4 h ao3 l e5 ii i4 uu uan3 q ing1 zh eng1 uu u3 ch ang1 vv v2 ii i4 uu uan3 f an1 q ie2 ch ao3 j i1 d an4 ii i4 uu uan3 zh a4 c ai4 g an1 z iy3 ch ao3 r ou4 s iy1
4 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_33.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_33.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_33.wav.trn:
--------------------------------------------------------------------------------
1 | 与 王伟 四平市 货车 司机 杜 大平 岳 玉杰 装卸工 刘 春山 一同 追赶 逃跑 的 案犯
2 | yu3 wang2 wei3 si4 ping2 shi4 huo4 che1 si1 ji1 du4 da4 ping2 yue4 yu4 jie2 zhuang1 xie4 gong1 liu2 chun1 shan1 yi4 tong2 zhui1 gan3 tao2 pao3 de5 an4 fan4
3 | vv v3 uu uang2 uu ui3 s iy4 p ing2 sh ix4 h uo4 ch e1 s iy1 j i1 d u4 d a4 p ing2 vv ve4 vv v4 j ie2 zh uang1 x ie4 g ong1 l iu2 ch un1 sh an1 ii i4 t ong2 zh ui1 g an3 t ao2 p ao3 d e5 aa an4 f an4
4 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_4.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_4.wav.trn:
--------------------------------------------------------------------------------
1 | 她 看看 夜 己 很 深 白天 的 炎热 已 给 夜 凉 吹散 吩咐 大家 各自 安息 明天 继续 玩乐
2 | ta1 kan4 kan5 ye4 ji3 hen3 shen1 bai2 tian1 de5 yan2 re4 yi3 gei3 ye4 liang2 chui1 san4 fen1 fu4 da4 jia1 ge4 zi4 an1 xi1 ming2 tian1 ji4 xu4 wan2 le4
3 | t a1 k an4 k an5 ii ie4 j i3 h en3 sh en1 b ai2 t ian1 d e5 ii ian2 r e4 ii i3 g ei3 ii ie4 l iang2 ch ui1 s an4 f en1 f u4 d a4 j ia1 g e4 z iy4 aa an1 x i1 m ing2 t ian1 j i4 x v4 uu uan2 l e4
4 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_5.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_5.wav.trn:
--------------------------------------------------------------------------------
1 | 有 一家 个体 制品厂 本该 用 完整 的 型材 生产 门窗 却 用 半截 材 打结 凑合
2 | you3 yi4 jia1 ge4 ti3 zhi4 pin2 chang3 ben3 gai1 yong4 wan2 zheng3 de5 xing2 cai2 sheng1 chan3 men2 chuang1 que4 yong4 ban4 jie2 cai2 da3 jie2 cou4 he5
3 | ii iu3 ii i4 j ia1 g e4 t i3 zh ix4 p in2 ch ang3 b en3 g ai1 ii iong4 uu uan2 zh eng3 d e5 x ing2 c ai2 sh eng1 ch an3 m en2 ch uang1 q ve4 ii iong4 b an4 j ie2 c ai2 d a3 j ie2 c ou4 h e5
4 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_58.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_58.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_58.wav.trn:
--------------------------------------------------------------------------------
1 | 特 别是 跨 省区 电网 超 计划 用电 不仅 损害 自己 也 损害 别人 损害 电网 损害 国家
2 | te4 bie2 shi4 kua4 sheng3 qu1 dian4 wang3 chao1 ji4 hua4 yong4 dian4 bu4 jin3 sun3 hai4 zi4 ji3 ye3 sun3 hai4 bie2 ren2 sun3 hai4 dian4 wang3 sun3 hai4 guo2 jia1
3 | t e4 b ie2 sh ix4 k ua4 sh eng3 q v1 d ian4 uu uang3 ch ao1 j i4 h ua4 ii iong4 d ian4 b u4 j in3 s un3 h ai4 z iy4 j i3 ii ie3 s un3 h ai4 b ie2 r en2 s un3 h ai4 d ian4 uu uang3 s un3 h ai4 g uo2 j ia1
4 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_6.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_6.wav.trn:
--------------------------------------------------------------------------------
1 | 久居 闹市 常常 忘了 山 之 外水 之外 身 之外 还有 沃野 平 畴 还有 光 风 丽日
2 | jiu3 ju1 nao4 shi4 chang2 chang2 wang4 le5 shan1 zhi1 wai4 shui3 zhi1 wai4 shen1 zhi1 wai4 hai2 you3 wo4 ye3 ping2 chou2 hai2 you3 guang1 feng1 li4 ri4
3 | j iu3 j v1 n ao4 sh ix4 ch ang2 ch ang2 uu uang4 l e5 sh an1 zh ix1 uu uai4 sh ui3 zh ix1 uu uai4 sh en1 zh ix1 uu uai4 h ai2 ii iu3 uu uo4 ii ie3 p ing2 ch ou2 h ai2 ii iu3 g uang1 f eng1 l i4 r iz4
4 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_7.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_7.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/A2_7.wav.trn:
--------------------------------------------------------------------------------
1 | 旷野 的 风 要 往 这儿 刮 那儿 刮 你 能 命令 风 四面八方 全 刮 一点 吗
2 | kuang4 ye3 de5 feng1 yao4 wang3 zhe4 er5 gua1 na4 er5 gua1 ni3 neng2 ming4 ling4 feng1 si4 mian4 ba1 fang1 quan2 gua1 yi4 dian3 ma5
3 | k uang4 ii ie3 d e5 f eng1 ii iao4 uu uang3 zh e4 ee er5 g ua1 n a4 ee er5 g ua1 n i3 n eng2 m ing4 l ing4 f eng1 s iy4 m ian4 b a1 f ang1 q van2 g ua1 ii i4 d ian3 m a5
4 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/D4_750.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/D4_750.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/D4_750.wav.trn:
--------------------------------------------------------------------------------
1 | 东北军 的 一些 爱国 将士 马 占 山 李杜 唐 聚 伍 苏 炳 艾 邓 铁梅 等 也 奋起 抗战
2 | dong1 bei3 jun1 de5 yi4 xie1 ai4 guo2 jiang4 shi4 ma3 zhan4 shan1 li3 du4 tang2 ju4 wu3 su1 bing3 ai4 deng4 tie3 mei2 deng3 ye3 fen4 qi3 kang4 zhan4
3 | d ong1 b ei3 j vn1 d e5 ii i4 x ie1 aa ai4 g uo2 j iang4 sh ix4 m a3 zh an4 sh an1 l i3 d u4 t ang2 j v4 uu u3 s u1 b ing3 aa ai4 d eng4 t ie3 m ei2 d eng3 ii ie3 f en4 q i3 k ang4 zh an4
4 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/D4_751.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/D4_751.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/data/D4_751.wav.trn:
--------------------------------------------------------------------------------
1 | 王 英汉 被 枪毙 后 部分 余孽 深 藏起来 几次 围捕 均 未 抓获
2 | wang2 ying1 han4 bei4 qiang1 bi4 hou4 bu4 fen5 yu2 nie4 shen1 cang2 qi3 lai5 ji3 ci4 wei2 bu3 jun1 wei4 zhua1 huo4
3 | uu uang2 ii ing1 h an4 b ei4 q iang1 b i4 h ou4 b u4 f en5 vv v2 n ie4 sh en1 c ang2 q i3 l ai5 j i3 c iy4 uu ui2 b u3 j vn1 uu ui4 zh ua1 h uo4
4 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/dev/A2_33.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/dev/A2_33.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/dev/A2_33.wav.trn:
--------------------------------------------------------------------------------
1 | ../data/A2_33.wav.trn
2 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/dev/A2_58.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/dev/A2_58.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/dev/A2_58.wav.trn:
--------------------------------------------------------------------------------
1 | ../data/A2_58.wav.trn
2 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/test/D4_750.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/test/D4_750.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/test/D4_750.wav.trn:
--------------------------------------------------------------------------------
1 | ../data/D4_750.wav.trn
2 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/test/D4_751.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/test/D4_751.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/test/D4_751.wav.trn:
--------------------------------------------------------------------------------
1 | ../data/D4_751.wav.trn
2 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/train/A2_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/train/A2_0.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/train/A2_0.wav.trn:
--------------------------------------------------------------------------------
1 | ../data/A2_0.wav.trn
2 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/train/A2_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/train/A2_1.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/train/A2_1.wav.trn:
--------------------------------------------------------------------------------
1 | ../data/A2_1.wav.trn
2 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/train/A2_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/train/A2_2.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/train/A2_2.wav.trn:
--------------------------------------------------------------------------------
1 | ../data/A2_2.wav.trn
2 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/train/A2_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/train/A2_3.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/train/A2_3.wav.trn:
--------------------------------------------------------------------------------
1 | ../data/A2_3.wav.trn
2 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/train/A2_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/train/A2_4.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/train/A2_4.wav.trn:
--------------------------------------------------------------------------------
1 | ../data/A2_4.wav.trn
2 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/train/A2_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/train/A2_5.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/train/A2_5.wav.trn:
--------------------------------------------------------------------------------
1 | ../data/A2_5.wav.trn
2 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/train/A2_6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/train/A2_6.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/train/A2_6.wav.trn:
--------------------------------------------------------------------------------
1 | ../data/A2_6.wav.trn
2 |
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/train/A2_7.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/train/A2_7.wav
--------------------------------------------------------------------------------
/hlp/stt/data/data_thchs30/train/A2_7.wav.trn:
--------------------------------------------------------------------------------
1 | ../data/A2_7.wav.trn
2 |
--------------------------------------------------------------------------------
/hlp/stt/deepspeech2/model.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 |
4 | def clipped_relu(x):
5 | return tf.keras.activations.relu(x, max_value=20)
6 |
7 |
8 | class DS2(tf.keras.Model):
9 | def __init__(self,
10 | conv_layers, filters, kernel_size, strides,
11 | bi_gru_layers, gru_units,
12 | fc_units,
13 | output_dim,
14 | **kwargs):
15 | super(DS2, self).__init__(**kwargs)
16 |
17 | self.bn1 = tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)
18 |
19 | self.conv_layers = conv_layers
20 | self.conv = []
21 | for i in range(conv_layers):
22 | self.conv.append(tf.keras.layers.Conv1D(filters=filters, kernel_size=kernel_size,
23 | strides=strides, padding="valid",
24 | activation="relu", name="conv" + str(i)))
25 |
26 | self.bn2 = tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)
27 |
28 | self.bi_gru_layers = bi_gru_layers
29 | self.bi_gru = []
30 | for i in range(bi_gru_layers):
31 | self.bi_gru.append(tf.keras.layers.Bidirectional(
32 | tf.keras.layers.GRU(gru_units, activation="relu", return_sequences=True),
33 | merge_mode="sum", name="bi_gru" + str(i)))
34 |
35 | self.bn3 = tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)
36 |
37 | self.fc = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(fc_units, activation=clipped_relu))
38 | self.sm = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(output_dim, activation="softmax"))
39 |
40 | def call(self, inputs):
41 | x = inputs
42 | x = self.bn1(x)
43 | for i in range(self.conv_layers):
44 | x = self.conv[i](x)
45 | x = self.bn2(x)
46 | for i in range(self.bi_gru_layers):
47 | x = self.bi_gru[i](x)
48 | x = self.bn3(x)
49 | x = self.fc(x)
50 | x = self.sm(x)
51 | return x
52 |
53 |
54 | if __name__ == "__main__":
55 | pass
56 |
--------------------------------------------------------------------------------
/hlp/stt/las/las.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | from hlp.utils.layers import BahdanauAttention
4 |
5 |
6 | class Encoder(tf.keras.Model):
7 | def __init__(self, cnn1_filters, cnn1_kernel_size, cnn2_filters,
8 | cnn2_kernel_size, max_pool_strides, max_pool_size, d, w):
9 | """
10 |
11 | :param cnn1_filters:
12 | :param cnn1_kernel_size:
13 | :param cnn2_filters:
14 | :param cnn2_kernel_size:
15 | :param max_pool_strides:
16 | :param max_pool_size:
17 | :param d: BiLSTM层数
18 | :param w: BiLSTM单元数
19 | """
20 | super(Encoder, self).__init__()
21 | self.d = d
22 | self.w = w
23 | self.cnn1 = tf.keras.layers.Conv1D(filters=cnn1_filters, kernel_size=cnn1_kernel_size, activation='relu')
24 | self.cnn2 = tf.keras.layers.Conv1D(filters=cnn2_filters, kernel_size=cnn2_kernel_size, activation='relu')
25 | self.max_pool = tf.keras.layers.MaxPooling1D(strides=max_pool_strides, pool_size=max_pool_size)
26 |
27 | self.bi_lstm = []
28 | for i in range(self.d):
29 | self.bi_lstm.append(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(w, return_sequences=True)))
30 |
31 | def call(self, x):
32 | x = self.cnn1(x)
33 | x = self.cnn2(x)
34 | x = self.max_pool(x)
35 |
36 | for i in range(self.d):
37 | x = self.bi_lstm[i](x)
38 |
39 | return x
40 |
41 | def initialize_hidden_state(self):
42 | return tf.zeros((self.batch_sz, self.w))
43 |
44 |
45 | class Decoder(tf.keras.Model):
46 | def __init__(self, vocab_size, embedding_dim, dec_units, w):
47 | super(Decoder, self).__init__()
48 | self.dec_units = dec_units
49 | self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
50 |
51 | # TODO: LSTM层数可变
52 | self.rnn1 = tf.keras.layers.LSTM(w, return_sequences=True)
53 | # self.rnn2 = tf.keras.layers.LSTM(w, return_sequences=True)
54 | self.fc = tf.keras.layers.Dense(vocab_size)
55 | self.attention = BahdanauAttention(self.dec_units)
56 |
57 | def call(self, x, hidden, enc_output):
58 | """解码
59 |
60 | :param x: 目标符号, (批大小,id)
61 | :param hidden: 解码器状态, (批大小,隐藏层大小)
62 | :param enc_output: 编码器输出, (批大小,最大长度,隐藏层大小)
63 | :return: token分布, 解码器专题, 注意力权重
64 | """
65 | context_vector, attention_weights = self.attention(hidden, enc_output)
66 |
67 | # x 在通过嵌入层后的形状 == (批大小,1,嵌入维度)
68 | x = self.embedding(x)
69 |
70 | # x 在拼接 (concatenation) 后的形状 == (批大小,1,嵌入维度 + 隐藏层大小)
71 | x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
72 |
73 | output = self.rnn1(x)
74 | # output = self.rnn2(x)
75 | # 输出的形状 == (批大小 * 1,隐藏层大小)
76 | output = tf.reshape(output, (-1, output.shape[2]))
77 |
78 | # 输出的形状 == (批大小,vocab)
79 | tokens_prob = self.fc(output)
80 |
81 | return tokens_prob, attention_weights
82 |
83 |
84 | class LAS(tf.keras.Model):
85 | def __init__(self, vocab_tar_size, cnn1_filters, cnn1_kernel_size, cnn2_filters,
86 | cnn2_kernel_size, max_pool_strides, max_pool_size, d, w,
87 | embedding_dim, dec_units, batch_size):
88 | super(LAS, self).__init__()
89 | self.vocab_tar_size = vocab_tar_size
90 | self.d = d
91 | self.w = w
92 | self.batch_size = batch_size
93 | self.encoder = Encoder(cnn1_filters, cnn1_kernel_size,
94 | cnn2_filters, cnn2_kernel_size,
95 | max_pool_strides, max_pool_size, d, w)
96 | self.decoder = Decoder(vocab_tar_size, embedding_dim, dec_units, w)
97 |
98 | def call(self, inputx_1, enc_hidden, dec_input):
99 | enc_output = self.encoder(inputx_1)
100 |
101 | dec_hidden = enc_hidden # 编码器状态作为解码器初始状态?
102 | predictions, dec_hidden = self.decoder(dec_input, dec_hidden, enc_output)
103 | return predictions, dec_hidden
104 |
105 | def initialize_hidden_state(self):
106 | return tf.zeros((self.batch_size, self.w))
107 |
--------------------------------------------------------------------------------
/hlp/stt/las/plas.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from hlp.utils.layers import BahdanauAttention
3 |
4 |
5 | class PBLSTM(tf.keras.layers.Layer):
6 | """金字塔BiLSTM
7 |
8 | 逐层缩减序列长度
9 | """
10 | def __init__(self, dim):
11 | super(PBLSTM, self).__init__()
12 | self.dim = dim
13 | self.bidi_lstm = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(self.dim, return_sequences=True))
14 |
15 | @tf.function
16 | def call(self, inputs):
17 | y = self.bidi_lstm(inputs)
18 |
19 | if tf.shape(inputs)[1] % 2 == 1:
20 | y = tf.keras.layers.ZeroPadding1D(padding=(0, 1))(y)
21 |
22 | y = tf.keras.layers.Reshape(target_shape=(-1, int(self.dim * 4)))(y)
23 | return y
24 |
25 |
26 | class Encoder(tf.keras.Model):
27 | def __init__(self, dim, enc_units):
28 | # TODO: 金字塔层数可变
29 | super(Encoder, self).__init__()
30 | self.enc_units = enc_units
31 | self.dim = dim
32 | # Listen; Lower resoultion by 8x
33 | self.plstm1 = PBLSTM(self.dim // 2)
34 | self.plstm2 = PBLSTM(self.dim // 2)
35 | self.plstm3 = PBLSTM(self.dim // 2)
36 |
37 | def call(self, x):
38 | """声学特征序列编码
39 |
40 | :param x: 声学特征序列
41 | :return: 缩减后的编码特征序列
42 | """
43 | x = self.plstm1(x)
44 | x = self.plstm2(x)
45 | output = self.plstm3(x)
46 | return output
47 |
48 |
49 | class Decoder(tf.keras.Model):
50 | def __init__(self, vocab_size, embedding_dim, dec_units):
51 | super(Decoder, self).__init__()
52 | self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
53 | self.gru = tf.keras.layers.GRU(dec_units,
54 | return_sequences=True,
55 | return_state=True,
56 | recurrent_initializer='glorot_uniform')
57 | self.fc = tf.keras.layers.Dense(vocab_size)
58 | self.attention = BahdanauAttention(dec_units)
59 |
60 | def call(self, x, hidden, enc_output):
61 | """解码
62 |
63 | :param x: 目标符号, (批大小,id)
64 | :param hidden: 解码器状态, (批大小,隐藏层大小)
65 | :param enc_output: 编码器输出, (批大小,最大长度,隐藏层大小)
66 | :return: token分布, 解码器专题, 注意力权重
67 | """
68 | context_vector, attention_weights = self.attention(hidden, enc_output)
69 |
70 | # x 在通过嵌入层后的形状 == (批大小,1,嵌入维度)
71 | x = self.embedding(x)
72 |
73 | # x 在拼接 (concatenation) 后的形状 == (批大小,1,嵌入维度 + 隐藏层大小)
74 | x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
75 |
76 | output, state = self.gru(x)
77 | # 输出的形状 == (批大小 * 1,隐藏层大小)
78 | output = tf.reshape(output, (-1, output.shape[2]))
79 |
80 | # 输出的形状 == (批大小,vocab)
81 | tokens_prob = self.fc(output)
82 |
83 | return tokens_prob, state, attention_weights
84 |
85 |
86 | class PLAS(tf.keras.Model):
87 | def __init__(self, vocab_tar_size, embedding_dim, units, batch_size):
88 | super(PLAS, self).__init__()
89 | self.units = units
90 | self.batch_size = batch_size
91 | # TODO: 编码器和解码器使用不同的单元数
92 | self.encoder = Encoder(embedding_dim, units)
93 | self.decoder = Decoder(vocab_tar_size, embedding_dim, units)
94 |
95 | def call(self, x, enc_hidden, dec_input):
96 | """
97 |
98 | :param x: 编码器输入
99 | :param enc_hidden:
100 | :param dec_input: 解码器输入
101 | :return: 解码器预测, 解码器状态
102 | """
103 | enc_output = self.encoder(x)
104 | dec_hidden = enc_hidden # 编码器状态作为解码器初始状态?
105 | predictions, dec_hidden, _ = self.decoder(dec_input, dec_hidden, enc_output)
106 | return predictions, dec_hidden
107 |
108 | def initialize_hidden_state(self):
109 | return tf.zeros((self.batch_size, self.units))
110 |
111 |
112 | if __name__ == "__main__":
113 | import numpy as np
114 | # a = np.arange(12).reshape((1, 4, 3)).astype(np.float)
115 | a = np.arange(15).reshape((1, 5, 3)).astype(np.float)
116 | p_lstm = PBLSTM(8)
117 | r = p_lstm(a)
118 | print(r.shape)
119 |
--------------------------------------------------------------------------------
/hlp/stt/rnnt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/rnnt/__init__.py
--------------------------------------------------------------------------------
/hlp/stt/rnnt/model.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 |
4 | # 时间缩减层
5 | class TimeReduction(tf.keras.layers.Layer):
6 | def __init__(self,
7 | reduction_factor,
8 | **kwargs):
9 | super(TimeReduction, self).__init__(**kwargs)
10 |
11 | self.reduction_factor = reduction_factor
12 |
13 | def call(self, inputs):
14 | batch_size = inputs.shape[0]
15 |
16 | max_time = inputs.shape[1]
17 | num_units = inputs.shape[-1]
18 |
19 | paddings = [[0, 0], [0, tf.floormod(max_time, self.reduction_factor)], [0, 0]]
20 | outputs = tf.pad(inputs, paddings)
21 |
22 | return tf.reshape(outputs, (batch_size, -1, num_units * self.reduction_factor))
23 |
24 |
25 | # 编码器
26 | class Encoder(tf.keras.layers.Layer):
27 | def __init__(self, encoder_layers, encoder_lstm_units,
28 | proj_size, dropout, reduction_factor, **kwargs):
29 | super(Encoder, self).__init__(**kwargs)
30 |
31 | self.bn = tf.keras.layers.BatchNormalization(axis=-1,
32 | momentum=0.99,
33 | epsilon=0.001)
34 |
35 | self.encoder_layers = encoder_layers
36 | self.lstm = []
37 | self.dense = []
38 | self.dropout = []
39 | self.ln = []
40 | for i in range(self.encoder_layers):
41 | self.lstm.append(tf.keras.layers.LSTM(
42 | encoder_lstm_units, return_sequences=True))
43 | self.dense.append(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(proj_size)))
44 | self.dropout.append(tf.keras.layers.Dropout(dropout))
45 | self.ln.append(tf.keras.layers.LayerNormalization())
46 | self.reduction_factor = reduction_factor
47 | self.tr = TimeReduction(self.reduction_factor)
48 |
49 | def call(self, inputs):
50 | x = self.bn(inputs)
51 | for i in range(self.encoder_layers):
52 | x = self.lstm[i](x)
53 | x = self.dense[i](x)
54 | x = self.dropout[i](x)
55 | x = self.ln[i](x)
56 |
57 | if i == self.reduction_factor:
58 | x = self.tr(x)
59 |
60 | return x
61 |
62 |
63 | # 预测网络
64 | class PredictionNetwork(tf.keras.layers.Layer):
65 | def __init__(self, vocab_size, embedding_size,
66 | prediction_network_layers, prediction_network_lstm_units,
67 | proj_size, dropout, **kwargs):
68 | super(PredictionNetwork, self).__init__(**kwargs)
69 |
70 | self.embedding_layer = tf.keras.layers.Embedding(vocab_size, embedding_size)
71 |
72 | self.prediction_network_layers = prediction_network_layers
73 | self.lstm = []
74 | self.dense = []
75 | self.dropout = []
76 | self.ln = []
77 | for i in range(self.prediction_network_layers):
78 | self.lstm.append(
79 | tf.keras.layers.LSTM(prediction_network_lstm_units, return_sequences=True))
80 | self.dense.append(
81 | tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(proj_size)))
82 | self.dropout.append(tf.keras.layers.Dropout(dropout))
83 | self.ln.append(tf.keras.layers.LayerNormalization())
84 |
85 | def call(self, inputs):
86 | x = self.embedding_layer(inputs)
87 | for i in range(self.prediction_network_layers):
88 | x = self.lstm[i](x)
89 | x = self.dense[i](x)
90 | x = self.dropout[i](x)
91 | x = self.ln[i](x)
92 |
93 | return x
94 |
95 |
96 | # RNNT,将Encoder和预测网络拼接
97 | class RNNT(tf.keras.Model):
98 | def __init__(self, encoder_layers, encoder_lstm_units,
99 | encoder_proj_size, encoder_dropout, reduction_factor,
100 | joint_dense_units, vocab_size,
101 | embedding_size,
102 | prediction_network_layers, prediction_network_lstm_units,
103 | pred_proj_size, pred_dropout, **kwargs):
104 | super(RNNT, self).__init__(**kwargs)
105 |
106 | self.encoder = Encoder(encoder_layers, encoder_lstm_units,
107 | encoder_proj_size, encoder_dropout, reduction_factor)
108 | self.prediction_network = PredictionNetwork(vocab_size,
109 | embedding_size,
110 | prediction_network_layers,
111 | prediction_network_lstm_units,
112 | pred_proj_size, pred_dropout)
113 | self.ds1 = tf.keras.layers.TimeDistributed(
114 | tf.keras.layers.Dense(joint_dense_units, activation="tanh"))
115 | self.ds2 = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(vocab_size))
116 |
117 | def call(self, encoder_inputs, pre_inputs):
118 | encoder_outputs = self.encoder(encoder_inputs)
119 | pred_outputs = self.prediction_network(pre_inputs)
120 |
121 | # [B, T, V] => [B, T, 1, V]
122 | encoder_outputs = tf.expand_dims(encoder_outputs, axis=2)
123 |
124 | # [B, U, V] => [B, 1, U, V]
125 | pred_outputs = tf.expand_dims(pred_outputs, axis=1)
126 |
127 | # 拼接(joint):[B, T, U, V]
128 | # TODO: 加合适吗?
129 | joint_inputs = encoder_outputs + pred_outputs
130 |
131 | joint_outputs = self.ds1(joint_inputs)
132 | outputs = self.ds2(joint_outputs)
133 |
134 | return outputs
135 |
136 |
137 | if __name__ == "__main__":
138 | pass
139 |
--------------------------------------------------------------------------------
/hlp/stt/transformer/model.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from hlp.utils.layers import positional_encoding
3 | from hlp.utils.layers import create_padding_mask
4 | from hlp.utils.layers import create_look_ahead_mask
5 | from hlp.utils.layers import transformer_encoder_layer
6 | from hlp.utils.layers import transformer_decoder_layer
7 |
8 |
9 | def encoder(vocab_size: int, embedding_dim: int, num_layers: int, feature_dim: int,
10 | encoder_units: int, num_heads: int, dropout: float = 0.1) -> tf.keras.Model:
11 | """
12 | transformer tts的encoder层
13 | :param vocab_size: 词汇大小
14 | :param embedding_dim: 嵌入层维度
15 | :param num_layers: encoder层数量
16 | :param feature_dim: 特征维度
17 | :param encoder_units: 单元大小
18 | :param dropout: encoder的dropout采样率
19 | :param num_heads: 头注意力数量
20 | """
21 | inputs = tf.keras.Input(shape=(None, feature_dim))
22 | padding_mask = tf.keras.layers.Lambda(_create_padding_mask,
23 | output_shape=(1, 1, None))(inputs)
24 | outputs = tf.keras.layers.Dense(embedding_dim)(inputs)
25 | outputs = tf.keras.layers.LayerNormalization(epsilon=1e-6)(outputs)
26 |
27 | outputs = outputs * tf.math.sqrt(tf.cast(embedding_dim, tf.float32))
28 | pos_encoding = positional_encoding(vocab_size, embedding_dim)
29 | outputs = outputs + pos_encoding[:, :tf.shape(outputs)[1], :]
30 |
31 | outputs = tf.keras.layers.Dropout(rate=dropout)(outputs)
32 |
33 | for i in range(num_layers):
34 | outputs = transformer_encoder_layer(
35 | units=encoder_units,
36 | d_model=embedding_dim,
37 | num_heads=num_heads,
38 | dropout=dropout,
39 | name="transformer_encoder_layer_{}".format(i),
40 | )([outputs, padding_mask])
41 |
42 | return tf.keras.Model(inputs=inputs, outputs=[outputs, padding_mask])
43 |
44 |
45 | def decoder(vocab_size: int, embedding_dim: int, num_layers: int,
46 | decoder_units: int, num_heads: int, dropout: float = 0.1) -> tf.keras.Model:
47 | """
48 | :param vocab_size: 词汇大小
49 | :param embedding_dim: 嵌入层维度
50 | :param num_layers: encoder层数量
51 | :param decoder_units: 单元大小
52 | :param num_heads: 头注意力数量
53 | :param dropout: decoder的dropout采样率
54 | """
55 | enc_outputs = tf.keras.Input(shape=(None, None))
56 | dec_inputs = tf.keras.Input(shape=(None,))
57 | padding_mask = tf.keras.Input(shape=(1, 1, None))
58 | pos_encoding = positional_encoding(vocab_size, embedding_dim)
59 | look_ahead_mask = tf.keras.layers.Lambda(_combine_mask,
60 | output_shape=(1, None, None))(dec_inputs)
61 |
62 | embeddings = tf.keras.layers.Embedding(vocab_size, embedding_dim)(dec_inputs)
63 | embeddings *= tf.math.sqrt(tf.cast(embedding_dim, tf.float32))
64 | embeddings = embeddings + pos_encoding[:, :tf.shape(embeddings)[1], :]
65 |
66 | outputs = tf.keras.layers.Dropout(rate=dropout)(embeddings)
67 |
68 | for i in range(num_layers):
69 | outputs = transformer_decoder_layer(
70 | units=decoder_units, d_model=embedding_dim, num_heads=num_heads,
71 | dropout=dropout, name="transformer_decoder_layer_{}".format(i),
72 | )(inputs=[outputs, enc_outputs, look_ahead_mask, padding_mask])
73 |
74 | outputs = tf.keras.layers.Dense(units=vocab_size, name="outputs")(outputs)
75 |
76 | return tf.keras.Model(inputs=[dec_inputs, enc_outputs, padding_mask], outputs=outputs)
77 |
78 |
79 | def _combine_mask(seq: tf.Tensor):
80 | """
81 | 对input中的不能见单位进行mask
82 | :param seq: 输入序列
83 | :return: mask
84 | """
85 | look_ahead_mask = create_look_ahead_mask(seq)
86 | padding_mask = create_padding_mask(seq)
87 | return tf.maximum(look_ahead_mask, padding_mask)
88 |
89 |
90 | def _create_padding_mask(seq: tf.Tensor):
91 | """
92 | 用于创建输入序列的扩充部分的mask,专用于mel序列
93 | :param seq: 输入序列
94 | :return: mask
95 | """
96 | seq = tf.cast(tf.math.equal(seq, 0), tf.float32)
97 | seq = seq[:, :, 0]
98 | return seq[:, tf.newaxis, tf.newaxis, :]
99 |
--------------------------------------------------------------------------------
/hlp/stt/utils/audio_process.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import soundfile as sf
3 | import tensorflow as tf
4 | from python_speech_features import mfcc, logfbank, delta
5 |
6 |
7 | def wav_to_feature(wav_path, audio_feature_type):
8 | """
9 | 提取语音文件语音特征
10 | :param wav_path: 音频文件路径
11 | :param audio_feature_type: 特征类型
12 | :return: shape为(timestep, dim)的音频特征
13 | """
14 | sig, sr = sf.read(wav_path)
15 |
16 | if audio_feature_type == "mfcc":
17 | return get_mfcc_(sig, sr)
18 | elif audio_feature_type == "fbank":
19 | return get_fbank(sig, sr)
20 |
21 |
22 | def get_mfcc_(wav_signal, sr):
23 | """
24 | :param wav_signal: 音频数字信号
25 | :param sr: 采样率
26 | 输入为语音文件数学表示和采样频率,输出为语音的MFCC特征(默认13维)+一阶差分+二阶差分;
27 | """
28 | feat_mfcc = mfcc(wav_signal, sr)
29 | feat_mfcc_d = delta(feat_mfcc, 2)
30 | feat_mfcc_dd = delta(feat_mfcc_d, 2)
31 |
32 | # (timestep, 39)
33 | wav_feature = np.column_stack((feat_mfcc, feat_mfcc_d, feat_mfcc_dd))
34 | return wav_feature.astype(np.float32)
35 |
36 |
37 | def get_fbank(wav_signal, sr):
38 | """
39 | :param wav_signal: 音频数字信号
40 | :param sr: 采样率
41 | 输入为语音文件数学表示和采样频率,输出为语音的FBANK特征
42 | """
43 | feat_fbank = logfbank(wav_signal, sr, nfilt=80)
44 |
45 | return feat_fbank.astype(np.float32)
46 |
47 |
48 | def get_input_and_length(audio_path_list, audio_feature_type, max_len):
49 | """
50 | 获得语音文件的特征和长度
51 | :param audio_path_list: 语音文件列表
52 | :param audio_feature_type: 语音特征类型
53 | :param max_len: 最大补齐长度
54 | :return: 补齐后的语音特征数组,每个语音文件的帧数
55 | """
56 | audio_feature_list = []
57 | input_length_list = []
58 | for audio_path in audio_path_list:
59 | audio_feature = wav_to_feature(audio_path, audio_feature_type)
60 | audio_feature_list.append(audio_feature)
61 | input_length_list.append([audio_feature.shape[0]])
62 |
63 | input_tensor = tf.keras.preprocessing.sequence.pad_sequences(audio_feature_list, maxlen=max_len,
64 | dtype='float32', padding='post')
65 | input_length = tf.convert_to_tensor(input_length_list)
66 |
67 | return input_tensor, input_length
68 |
69 |
70 | def max_audio_length(audio_path_list, audio_feature_type):
71 | """
72 | 获得语音特征帧最大长度
73 | 注意:这个方法会读取所有语音文件,并提取特征.
74 | :param audio_path_list: 语音文件列表
75 | :param audio_feature_type: 语音特征类型
76 | :return: 最大帧数
77 | """
78 | return max(wav_to_feature(audio_path, audio_feature_type).shape[0] for audio_path in audio_path_list)
79 |
80 |
81 | if __name__ == "__main__":
82 | pass
83 |
--------------------------------------------------------------------------------
/hlp/stt/utils/load_dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import tensorflow as tf
4 |
5 |
6 | def load_data(train_data_path: str, batch_size: int, buffer_size: int, valid_data_split: float = 0.0,
7 | valid_data_path: str = "", train_length_path: str = "", valid_length_path: str = "",
8 | max_train_data_size: int = 0, max_valid_data_size: int = 0):
9 | """
10 | 加载训练验证数据方法,验证数据的优先级为:验证数据文件>从训练集划分验证集
11 | :param train_data_path: 文本数据路径
12 | :param buffer_size: Dataset加载缓存大小
13 | :param batch_size: Dataset加载批大小
14 | :param valid_data_split: 用于从训练数据中划分验证数据
15 | :param valid_data_path: 验证数据文本路径
16 | :param train_length_path: 训练样本长度保存路径
17 | :param valid_length_path: 验证样本长度保存路径
18 | :param max_train_data_size: 最大训练数据量
19 | :param max_valid_data_size: 最大验证数据量
20 | :return: 返回train_dataset, valid_dataset, steps_per_epoch, valid_steps_per_epoch
21 | """
22 | if not os.path.exists(train_data_path):
23 | print("加载的训练验证数据文件不存在,请先执行pre_treat模式后重试")
24 | exit(0)
25 |
26 | print("正在加载训练数据...")
27 | train_audio_data_path, train_sentence_data_path, train_length_data = \
28 | read_data(data_path=train_data_path, length_path=train_length_path, num_examples=max_train_data_size)
29 |
30 | valid_flag = True # 是否开启验证标记
31 | valid_steps_per_epoch = 0
32 |
33 | # 根据是否传入验证数据文件,切分验证数据
34 | if valid_data_path != "":
35 | print("正在加载验证数据...")
36 | valid_audio_data_path, valid_sentence_data_path, valid_length_data = \
37 | read_data(data_path=valid_data_path, length_path=valid_length_path, num_examples=max_valid_data_size)
38 | elif valid_data_split != 0.0:
39 | print("从训练数据中划分验证数据...")
40 | train_size = int(len(train_audio_data_path) * (1.0 - valid_data_split))
41 | valid_audio_data_path = train_audio_data_path[train_size:]
42 | valid_sentence_data_path = train_sentence_data_path[train_size:]
43 | valid_length_data = train_length_data[train_size:]
44 | train_audio_data_path = train_audio_data_path[:train_size]
45 | train_sentence_data_path = train_sentence_data_path[:train_size]
46 | train_length_data = train_length_data[:train_size]
47 | else:
48 | valid_flag = False
49 |
50 | train_dataset = _to_dataset(data=(train_audio_data_path, train_sentence_data_path, train_length_data),
51 | batch_size=batch_size, buffer_size=buffer_size)
52 | steps_per_epoch = len(train_sentence_data_path) // batch_size
53 |
54 | if valid_flag:
55 | valid_dataset = _to_dataset(data=(valid_audio_data_path, valid_sentence_data_path, valid_length_data),
56 | batch_size=batch_size, buffer_size=buffer_size)
57 | valid_steps_per_epoch = len(valid_sentence_data_path) // batch_size
58 | else:
59 | valid_dataset = None
60 |
61 | print("训练验证数据加载完毕")
62 | return train_dataset, valid_dataset, steps_per_epoch, valid_steps_per_epoch
63 |
64 |
65 | def _to_dataset(data: tuple, batch_size: int, buffer_size: int):
66 | """
67 | 将data封装成tf.data.Dataset
68 | :param data: 要封装的数据元组
69 | :param buffer_size: Dataset加载缓存大小
70 | :param batch_size: Dataset加载批大小
71 | :return: dataset
72 | """
73 | dataset = tf.data.Dataset.from_tensor_slices(data). \
74 | cache().shuffle(buffer_size).prefetch(tf.data.experimental.AUTOTUNE)
75 | dataset = dataset.map(_process_audio_sentence_pairs, num_parallel_calls=tf.data.experimental.AUTOTUNE)
76 | dataset = dataset.batch(batch_size, drop_remainder=True)
77 |
78 | return dataset
79 |
80 |
81 | def read_data(data_path: str, length_path: str, num_examples: int):
82 | """
83 | :param data_path: 需要读取整理的数据文件路径
84 | :param length_path: 样本长度保存路径
85 | :param num_examples: 读取的数据量大小
86 | :return: 返回读取的音频特征数据路径和句子数据
87 | """
88 | audio_data_path = []
89 | sentence_data_path = []
90 | with open(data_path, 'r', encoding="utf-8") as data_file:
91 | lines = data_file.read().strip().split('\n')
92 | if num_examples != 0:
93 | lines = lines[:num_examples]
94 |
95 | for line in lines:
96 | line = line.strip().strip("\n").replace("/", " ").split("\t")
97 | audio_data_path.append(line[0])
98 | sentence_data_path.append(line[1])
99 |
100 | length_data = np.load(length_path)
101 |
102 | return audio_data_path, sentence_data_path, length_data
103 |
104 |
105 | def read_npy_file(filename):
106 | """
107 | 专门用于匹配dataset的map读取文件的方法
108 | :param filename: 传入的文件名张量
109 | :return: 返回读取的数据
110 | """
111 | return np.load(filename.numpy().decode())
112 |
113 |
114 | def _process_audio_sentence_pairs(audio_data_path: tf.Tensor, sentence_data_path: tf.Tensor, length: tf.Tensor):
115 | """
116 | 用于处理音频句子对,将其转化为张量
117 | :param audio_data_path: 音频特征数据保存文件
118 | :param sentence_data_path: 音频句子
119 | :param length: 样本长度
120 | :return: audio_feature, sentence
121 | """
122 | [audio_feature] = tf.py_function(read_npy_file, [audio_data_path], [tf.float32])
123 | [sentence] = tf.py_function(read_npy_file, [sentence_data_path], [tf.int32])
124 |
125 | return audio_feature, sentence, length
126 |
--------------------------------------------------------------------------------
/hlp/stt/utils/spec_augment.py:
--------------------------------------------------------------------------------
1 | import librosa.display
2 | import matplotlib.pyplot as plt
3 | import tensorflow as tf
4 | from tensorflow_addons.image import sparse_image_warp
5 |
6 |
7 | def sparse_warp(mel_spectrogram, time_warping_para=80):
8 | fbank_size = tf.shape(mel_spectrogram)
9 | n, v = fbank_size[1], fbank_size[2]
10 |
11 | # Image warping control point setting.
12 | # Source
13 | pt = tf.random.uniform([], time_warping_para, n - time_warping_para, tf.int32) # radnom point along the time axis
14 | src_ctr_pt_freq = tf.range(v // 2) # control points on freq-axis
15 | src_ctr_pt_time = tf.ones_like(src_ctr_pt_freq) * pt # control points on time-axis
16 | src_ctr_pts = tf.stack((src_ctr_pt_time, src_ctr_pt_freq), -1)
17 | src_ctr_pts = tf.cast(src_ctr_pts, dtype=tf.float32)
18 |
19 | # Destination
20 | w = tf.random.uniform([], -time_warping_para, time_warping_para, tf.int32) # distance
21 | dest_ctr_pt_freq = src_ctr_pt_freq
22 | dest_ctr_pt_time = src_ctr_pt_time + w
23 | dest_ctr_pts = tf.stack((dest_ctr_pt_time, dest_ctr_pt_freq), -1)
24 | dest_ctr_pts = tf.cast(dest_ctr_pts, dtype=tf.float32)
25 |
26 | # warp
27 | source_control_point_locations = tf.expand_dims(src_ctr_pts, 0) # (1, v//2, 2)
28 | dest_control_point_locations = tf.expand_dims(dest_ctr_pts, 0) # (1, v//2, 2)
29 |
30 | warped_image, _ = sparse_image_warp(mel_spectrogram,
31 | source_control_point_locations,
32 | dest_control_point_locations)
33 | return warped_image
34 |
35 |
36 | def frequency_masking(mel_spectrogram, v, frequency_masking_para=27, frequency_mask_num=2):
37 | fbank_size = tf.shape(mel_spectrogram)
38 | n, v = fbank_size[1], fbank_size[2]
39 |
40 | for i in range(frequency_mask_num):
41 | f = tf.random.uniform([], minval=0, maxval=frequency_masking_para, dtype=tf.int32)
42 | v = tf.cast(v, dtype=tf.int32)
43 | f0 = tf.random.uniform([], minval=0, maxval=v - f, dtype=tf.int32)
44 |
45 | # warped_mel_spectrogram[f0:f0 + f, :] = 0
46 | mask = tf.concat((tf.ones(shape=(1, n, v - f0 - f, 1)),
47 | tf.zeros(shape=(1, n, f, 1)),
48 | tf.ones(shape=(1, n, f0, 1)),
49 | ), 2)
50 | mel_spectrogram = mel_spectrogram * mask
51 |
52 | return tf.cast(mel_spectrogram, dtype=tf.float32)
53 |
54 |
55 | def time_masking(mel_spectrogram, tau, time_masking_para=100, time_mask_num=2):
56 | fbank_size = tf.shape(mel_spectrogram)
57 | n, v = fbank_size[1], fbank_size[2]
58 |
59 | for i in range(time_mask_num):
60 | t = tf.random.uniform([], minval=0, maxval=time_masking_para, dtype=tf.int32)
61 | t0 = tf.random.uniform([], minval=0, maxval=tau - t, dtype=tf.int32)
62 |
63 | # mel_spectrogram[:, t0:t0+t] = 0
64 | mask = tf.concat((tf.ones(shape=(1, n - t0 - t, v, 1)),
65 | tf.zeros(shape=(1, t, v, 1)),
66 | tf.ones(shape=(1, t0, v, 1)),), 1)
67 | mel_spectrogram = mel_spectrogram * mask
68 |
69 | return tf.cast(mel_spectrogram, dtype=tf.float32)
70 |
71 |
72 | def spec_augment(mel_spectrogram):
73 | v = mel_spectrogram.shape[0]
74 | tau = mel_spectrogram.shape[1]
75 |
76 | warped_mel_spectrogram = sparse_warp(mel_spectrogram)
77 |
78 | warped_frequency_spectrogram = frequency_masking(warped_mel_spectrogram, v=v)
79 |
80 | warped_frequency_time_sepctrogram = time_masking(warped_frequency_spectrogram, tau=tau)
81 |
82 | return warped_frequency_time_sepctrogram
83 |
84 |
85 | def _plot_spectrogram(mel_spectrogram, title):
86 | plt.figure(figsize=(10, 4))
87 | librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :, 0], ref=np.max),
88 | y_axis='mel', fmax=8000,
89 | x_axis='time')
90 | # plt.colorbar(format='%+2.0f dB')
91 | plt.title(title)
92 | plt.tight_layout()
93 | plt.show()
94 |
95 |
96 | if __name__ == "__main__":
97 | import argparse
98 | import numpy as np
99 |
100 | parser = argparse.ArgumentParser(description='Spec Augment')
101 | parser.add_argument('--audio-path', default='../data/data_thchs30/data/A2_0.wav',
102 | help='The audio file.')
103 | parser.add_argument('--time-warp-para', default=80,
104 | help='time warp parameter W')
105 | parser.add_argument('--frequency-mask-para', default=100,
106 | help='frequency mask parameter F')
107 | parser.add_argument('--time-mask-para', default=27,
108 | help='time mask parameter T')
109 | parser.add_argument('--masking-line-number', default=1,
110 | help='masking line number')
111 |
112 | args = parser.parse_args()
113 | audio_path = args.audio_path
114 | time_warping_para = args.time_warp_para
115 | time_masking_para = args.frequency_mask_para
116 | frequency_masking_para = args.time_mask_para
117 | masking_line_number = args.masking_line_number
118 |
119 | audio, sampling_rate = librosa.load(audio_path)
120 | mel_spectrogram = librosa.feature.melspectrogram(y=audio,
121 | sr=sampling_rate,
122 | n_mels=256,
123 | hop_length=128,
124 | fmax=8000)
125 |
126 | # reshape spectrogram shape to [batch_size, time, frequency, 1]
127 | shape = mel_spectrogram.shape
128 | mel_spectrogram = np.reshape(mel_spectrogram, (-1, shape[0], shape[1], 1))
129 |
130 | _plot_spectrogram(mel_spectrogram=mel_spectrogram,
131 | title="Raw Mel Spectrogram")
132 |
133 | _plot_spectrogram(
134 | mel_spectrogram=spec_augment(mel_spectrogram),
135 | title="tensorflow Warped & Masked Mel Spectrogram")
136 |
--------------------------------------------------------------------------------
/hlp/stt/utils/text_process.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from hlp.utils import text_split
3 |
4 |
5 | def tokenize_and_encode(texts: list, dict_path: str, max_len: int,
6 | num_words: int, unk_token: str = ""):
7 | """
8 | 用于将文本序列集合转化为token序列
9 | :param texts: 文本序列列表
10 | :param dict_path: 字典保存路径
11 | :param max_len: 文本最大长度
12 | :param num_words:最多保存词汇数量
13 | :param unk_token: 未登录词
14 | :return texts: 处理好的文本token序列
15 | :return tokenizer: tokenizer
16 | """
17 | tokenizer = tf.keras.preprocessing.text.Tokenizer(filters="", oov_token=unk_token, num_words=num_words)
18 | tokenizer.fit_on_texts(texts)
19 | texts = tokenizer.texts_to_sequences(texts)
20 | texts = tf.keras.preprocessing.sequence.pad_sequences(texts, maxlen=max_len, padding="post")
21 |
22 | with open(dict_path, 'w', encoding="utf-8") as dict_file:
23 | dict_file.write(tokenizer.to_json())
24 |
25 | return texts, tokenizer
26 |
27 |
28 | def split_and_encode(sentences, mode, word_index):
29 | """对文本进行切分和编码
30 |
31 | :param sentences: 文本列表
32 | :param mode: 切分模式
33 | :param word_index: 词典
34 | :return: 文本编码序列
35 | """
36 | splitted_sentences = split_sentences(sentences, mode)
37 | text_int_sequences_list = encode_texts(splitted_sentences, word_index)
38 | return text_int_sequences_list
39 |
40 |
41 | # token转换成id
42 | def encode_texts(splitted_sentences, word_index):
43 | text_int_sequences = []
44 | for splitted_sentence in splitted_sentences:
45 | text_int_sequences.append(encode_text(splitted_sentence, word_index))
46 | return text_int_sequences
47 |
48 |
49 | # token转换成id
50 | def encode_text(splitted_sentence, word_index):
51 | int_sequence = []
52 | for c in splitted_sentence.split(" "):
53 | int_sequence.append(int(word_index[c]))
54 | return int_sequence
55 |
56 |
57 | def split_sentence(line, mode):
58 | """对转写文本进行切分
59 |
60 | :param line: 转写文本
61 | :param mode: 语料文本的切分方法
62 | :return: 切分后的文本,以空格分隔的字符串
63 | """
64 | if mode.lower() == "cn":
65 | return _split_sentence_cn(line)
66 | elif mode.lower() == "en_word":
67 | return _split_sentence_en_word(line)
68 | elif mode.lower() == "en_char":
69 | return _split_sentence_en_char(line)
70 | elif mode.lower() == "las_cn":
71 | return _split_sentence_las_cn_char(line)
72 | elif mode.lower() == "las_en_word":
73 | return _split_sentence_las_en_word(line)
74 | elif mode.lower() == "las_en_char":
75 | return _split_sentence_las_en_char(line)
76 |
77 |
78 | def split_sentences(sentences, mode):
79 | """对文本进行切换
80 |
81 | :param sentences: 待切分文本序列
82 | :param mode: 切分模式
83 | :return: 空格分隔的token串的列表
84 | """
85 | text_list = []
86 | for text in sentences:
87 | text_list.append(split_sentence(text, mode))
88 | return text_list
89 |
90 |
91 | def _split_sentence_en_word(s):
92 | result = text_split.split_en_word(s)
93 | return result
94 |
95 |
96 | def _split_sentence_en_char(s):
97 | result = text_split.split_en_char(s)
98 | return result
99 |
100 |
101 | def _split_sentence_las_en_char(s):
102 | s = text_split.split_en_char(s)
103 |
104 | # 给句子加上开始和结束标记
105 | # 以便模型知道何时开始和结束预测
106 | s.insert(0, '')
107 | s.append('')
108 |
109 | return s
110 |
111 |
112 | def _split_sentence_las_en_word(s):
113 | s = text_split.split_en_word(s)
114 |
115 | # 给句子加上开始和结束标记
116 | # 以便模型知道何时开始和结束预测
117 | s.insert(0, '')
118 | s.append('')
119 |
120 | return s
121 |
122 |
123 | def _split_sentence_cn(s):
124 | result = text_split.split_zh_char(s)
125 | return result
126 |
127 |
128 | def _split_sentence_las_cn_char(s):
129 | s = text_split.split_zh_char(s)
130 |
131 | # 给句子加上开始和结束标记
132 | # 以便模型知道何时开始和结束预测
133 | s.insert(0, '')
134 | s.append('')
135 |
136 | return s
137 |
138 |
139 | # 获取最长的label_length
140 | def get_max_label_length(text_int_sequences):
141 | return max(len(seq) for seq in text_int_sequences)
142 |
143 |
144 | def get_label_and_length(text_int_sequences_list, max_label_length):
145 | target_length_list = []
146 | for text_int_sequence in text_int_sequences_list:
147 | target_length_list.append([len(text_int_sequence)])
148 | target_tensor_numpy = tf.keras.preprocessing.sequence.pad_sequences(text_int_sequences_list,
149 | maxlen=max_label_length,
150 | padding='post'
151 | )
152 | target_length = tf.convert_to_tensor(target_length_list)
153 | return target_tensor_numpy, target_length
154 |
155 |
156 | # 将输出token id序列解码为token序列
157 | def int_to_text_sequence(seq, index_word, mode):
158 | if mode.lower() == "cn":
159 | return int_to_text_sequence_cn(seq, index_word)
160 | elif mode.lower() == "en_word":
161 | return int_to_text_sequence_en_word(seq, index_word)
162 | elif mode.lower() == "en_char":
163 | return int_to_text_sequence_en_char(seq, index_word)
164 |
165 |
166 | def int_to_text_sequence_cn(ids, index_word):
167 | result = []
168 | for i in ids:
169 | if 1 <= i <= len(index_word):
170 | word = index_word[str(i)]
171 | result.append(word)
172 | return "".join(result).strip()
173 |
174 |
175 | def int_to_text_sequence_en_word(ids, index_word):
176 | result = []
177 | for i in ids:
178 | if 1 <= i <= (len(index_word)):
179 | word = index_word[str(i)]
180 | result.append(word)
181 | result.append(" ")
182 | return "".join(result).strip()
183 |
184 |
185 | def int_to_text_sequence_en_char(ids, index_word):
186 | result = []
187 | for i in ids:
188 | if 1 <= i <= (len(index_word)):
189 | word = index_word[str(i)]
190 | if word != "":
191 | result.append(word)
192 | else:
193 | result.append(" ")
194 | return "".join(result).strip()
195 |
--------------------------------------------------------------------------------
/hlp/tts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/__init__.py
--------------------------------------------------------------------------------
/hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0001.wav
--------------------------------------------------------------------------------
/hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0002.wav
--------------------------------------------------------------------------------
/hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0003.wav
--------------------------------------------------------------------------------
/hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0004.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0004.wav
--------------------------------------------------------------------------------
/hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0005.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0005.wav
--------------------------------------------------------------------------------
/hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0006.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0006.wav
--------------------------------------------------------------------------------
/hlp/tts/data/cmudict-0.7b:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/cmudict-0.7b
--------------------------------------------------------------------------------
/hlp/tts/data/number/metadata.csv:
--------------------------------------------------------------------------------
1 | 0_jackson_0|zero
2 | 0_jackson_1|zero
3 | 0_jackson_2|zero
4 | 0_jackson_3|zero
5 | 0_jackson_4|zero
6 | 0_jackson_5|zero
7 | 0_jackson_6|zero
8 | 0_jackson_7|zero
9 | 0_jackson_8|zero
10 | 0_jackson_9|zero
11 | 0_jackson_10|zero
12 | 0_jackson_11|zero
13 | 0_jackson_12|zero
14 | 0_jackson_13|zero
15 | 0_jackson_14|zero
16 | 0_jackson_15|zero
17 | 0_jackson_16|zero
18 | 0_jackson_17|zero
19 | 0_jackson_18|zero
20 | 0_jackson_19|zero
21 | 0_jackson_20|zero
22 | 0_jackson_21|zero
23 | 0_jackson_22|zero
24 | 0_jackson_23|zero
25 | 0_jackson_24|zero
26 | 0_jackson_25|zero
27 | 0_jackson_26|zero
28 | 0_jackson_27|zero
29 | 0_jackson_28|zero
30 | 0_jackson_29|zero
31 | 0_jackson_30|zero
32 | 0_jackson_31|zero
33 | 0_jackson_32|zero
34 | 0_jackson_33|zero
35 | 0_jackson_34|zero
36 | 0_jackson_35|zero
37 | 0_jackson_36|zero
38 | 0_jackson_37|zero
39 | 0_jackson_38|zero
40 | 0_jackson_39|zero
41 | 0_jackson_40|zero
42 | 0_jackson_41|zero
43 | 0_jackson_42|zero
44 | 0_jackson_43|zero
45 | 0_jackson_44|zero
46 | 0_jackson_45|zero
47 | 0_jackson_46|zero
48 | 0_jackson_47|zero
49 | 0_jackson_48|zero
50 | 0_jackson_49|zero
51 | 1_jackson_0|one
52 | 1_jackson_1|one
53 | 1_jackson_2|one
54 | 1_jackson_3|one
55 | 1_jackson_4|one
56 | 1_jackson_5|one
57 | 1_jackson_6|one
58 | 1_jackson_7|one
59 | 1_jackson_8|one
60 | 1_jackson_9|one
61 | 1_jackson_10|one
62 | 1_jackson_11|one
63 | 1_jackson_12|one
64 | 1_jackson_13|one
65 | 1_jackson_14|one
66 | 1_jackson_15|one
67 | 1_jackson_16|one
68 | 1_jackson_17|one
69 | 1_jackson_18|one
70 | 1_jackson_19|one
71 | 1_jackson_20|one
72 | 1_jackson_21|one
73 | 1_jackson_22|one
74 | 1_jackson_23|one
75 | 1_jackson_24|one
76 | 1_jackson_25|one
77 | 1_jackson_26|one
78 | 1_jackson_27|one
79 | 1_jackson_28|one
80 | 1_jackson_29|one
81 | 1_jackson_30|one
82 | 1_jackson_31|one
83 | 1_jackson_32|one
84 | 1_jackson_33|one
85 | 1_jackson_34|one
86 | 1_jackson_35|one
87 | 1_jackson_36|one
88 | 1_jackson_37|one
89 | 1_jackson_38|one
90 | 1_jackson_39|one
91 | 1_jackson_40|one
92 | 1_jackson_41|one
93 | 1_jackson_42|one
94 | 1_jackson_43|one
95 | 1_jackson_44|one
96 | 1_jackson_45|one
97 | 1_jackson_46|one
98 | 1_jackson_47|one
99 | 1_jackson_48|one
100 | 1_jackson_49|one
101 | 2_jackson_0|two
102 | 2_jackson_1|two
103 | 2_jackson_2|two
104 | 2_jackson_3|two
105 | 2_jackson_4|two
106 | 2_jackson_5|two
107 | 2_jackson_6|two
108 | 2_jackson_7|two
109 | 2_jackson_8|two
110 | 2_jackson_9|two
111 | 2_jackson_10|two
112 | 2_jackson_11|two
113 | 2_jackson_12|two
114 | 2_jackson_13|two
115 | 2_jackson_14|two
116 | 2_jackson_15|two
117 | 2_jackson_16|two
118 | 2_jackson_17|two
119 | 2_jackson_18|two
120 | 2_jackson_19|two
121 | 2_jackson_20|two
122 | 2_jackson_21|two
123 | 2_jackson_22|two
124 | 2_jackson_23|two
125 | 2_jackson_24|two
126 | 2_jackson_25|two
127 | 2_jackson_26|two
128 | 2_jackson_27|two
129 | 2_jackson_28|two
130 | 2_jackson_29|two
131 | 2_jackson_30|two
132 | 2_jackson_31|two
133 | 2_jackson_32|two
134 | 2_jackson_33|two
135 | 2_jackson_34|two
136 | 2_jackson_35|two
137 | 2_jackson_36|two
138 | 2_jackson_37|two
139 | 2_jackson_38|two
140 | 2_jackson_39|two
141 | 2_jackson_40|two
142 | 2_jackson_41|two
143 | 2_jackson_42|two
144 | 2_jackson_43|two
145 | 2_jackson_44|two
146 | 2_jackson_45|two
147 | 2_jackson_46|two
148 | 2_jackson_47|two
149 | 2_jackson_48|two
150 | 2_jackson_49|two
151 | 3_jackson_0|three
152 | 3_jackson_1|three
153 | 3_jackson_2|three
154 | 3_jackson_3|three
155 | 3_jackson_4|three
156 | 3_jackson_5|three
157 | 3_jackson_6|three
158 | 3_jackson_7|three
159 | 3_jackson_8|three
160 | 3_jackson_9|three
161 | 3_jackson_10|three
162 | 3_jackson_11|three
163 | 3_jackson_12|three
164 | 3_jackson_13|three
165 | 3_jackson_14|three
166 | 3_jackson_15|three
167 | 3_jackson_16|three
168 | 3_jackson_17|three
169 | 3_jackson_18|three
170 | 3_jackson_19|three
171 | 3_jackson_20|three
172 | 3_jackson_21|three
173 | 3_jackson_22|three
174 | 3_jackson_23|three
175 | 3_jackson_24|three
176 | 3_jackson_25|three
177 | 3_jackson_26|three
178 | 3_jackson_27|three
179 | 3_jackson_28|three
180 | 3_jackson_29|three
181 | 3_jackson_30|three
182 | 3_jackson_31|three
183 | 3_jackson_32|three
184 | 3_jackson_33|three
185 | 3_jackson_34|three
186 | 3_jackson_35|three
187 | 3_jackson_36|three
188 | 3_jackson_37|three
189 | 3_jackson_38|three
190 | 3_jackson_39|three
191 | 3_jackson_40|three
192 | 3_jackson_41|three
193 | 3_jackson_42|three
194 | 3_jackson_43|three
195 | 3_jackson_44|three
196 | 3_jackson_45|three
197 | 3_jackson_46|three
198 | 3_jackson_47|three
199 | 3_jackson_48|three
200 | 3_jackson_49|three
201 | 4_jackson_0|four
202 | 4_jackson_1|four
203 | 4_jackson_2|four
204 | 4_jackson_3|four
205 | 4_jackson_4|four
206 | 4_jackson_5|four
207 | 4_jackson_6|four
208 | 4_jackson_7|four
209 | 4_jackson_8|four
210 | 4_jackson_9|four
211 | 4_jackson_10|four
212 | 4_jackson_11|four
213 | 4_jackson_12|four
214 | 4_jackson_13|four
215 | 4_jackson_14|four
216 | 4_jackson_15|four
217 | 4_jackson_16|four
218 | 4_jackson_17|four
219 | 4_jackson_18|four
220 | 4_jackson_19|four
221 | 4_jackson_20|four
222 | 4_jackson_21|four
223 | 4_jackson_22|four
224 | 4_jackson_23|four
225 | 4_jackson_24|four
226 | 4_jackson_25|four
227 | 4_jackson_26|four
228 | 4_jackson_27|four
229 | 4_jackson_28|four
230 | 4_jackson_29|four
231 | 4_jackson_30|four
232 | 4_jackson_31|four
233 | 4_jackson_32|four
234 | 4_jackson_33|four
235 | 4_jackson_34|four
236 | 4_jackson_35|four
237 | 4_jackson_36|four
238 | 4_jackson_37|four
239 | 4_jackson_38|four
240 | 4_jackson_39|four
241 | 4_jackson_40|four
242 | 4_jackson_41|four
243 | 4_jackson_42|four
244 | 4_jackson_43|four
245 | 4_jackson_44|four
246 | 4_jackson_45|four
247 | 4_jackson_46|four
248 | 4_jackson_47|four
249 | 4_jackson_48|four
250 | 4_jackson_49|four
251 | 5_jackson_0|five
252 | 5_jackson_1|five
253 | 5_jackson_2|five
254 | 5_jackson_3|five
255 | 5_jackson_4|five
256 | 5_jackson_5|five
257 | 5_jackson_6|five
258 | 5_jackson_7|five
259 | 5_jackson_8|five
260 | 5_jackson_9|five
261 | 5_jackson_10|five
262 | 5_jackson_11|five
263 | 5_jackson_12|five
264 | 6_jackson_0|six
265 | 6_jackson_1|six
266 | 6_jackson_2|six
267 | 6_jackson_3|six
268 | 6_jackson_4|six
269 | 6_jackson_5|six
270 | 6_jackson_6|six
271 | 6_jackson_7|six
272 | 6_jackson_8|six
273 | 6_jackson_9|six
274 | 6_jackson_10|six
275 | 6_jackson_11|six
276 | 6_jackson_12|six
277 | 7_jackson_0|seven
278 | 7_jackson_1|seven
279 | 7_jackson_2|seven
280 | 7_jackson_3|seven
281 | 7_jackson_4|seven
282 | 7_jackson_5|seven
283 | 7_jackson_6|seven
284 | 7_jackson_7|seven
285 | 7_jackson_8|seven
286 | 7_jackson_9|seven
287 | 7_jackson_10|seven
288 | 7_jackson_11|seven
289 | 7_jackson_12|seven
290 | 8_jackson_0|eight
291 | 8_jackson_1|eight
292 | 8_jackson_2|eight
293 | 8_jackson_3|eight
294 | 8_jackson_4|eight
295 | 8_jackson_5|eight
296 | 8_jackson_6|eight
297 | 8_jackson_7|eight
298 | 8_jackson_8|eight
299 | 8_jackson_9|eight
300 | 8_jackson_10|eight
301 | 8_jackson_11|eight
302 | 8_jackson_12|eight
303 | 9_jackson_0|nine
304 | 9_jackson_1|nine
305 | 9_jackson_2|nine
306 | 9_jackson_3|nine
307 | 9_jackson_4|nine
308 | 9_jackson_5|nine
309 | 9_jackson_6|nine
310 | 9_jackson_7|nine
311 | 9_jackson_8|nine
312 | 9_jackson_9|nine
313 | 9_jackson_10|nine
314 | 9_jackson_11|nine
315 | 9_jackson_12|nine
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/0_jackson_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/0_jackson_0.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/0_jackson_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/0_jackson_1.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/1_jackson_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/1_jackson_0.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/1_jackson_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/1_jackson_1.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/2_jackson_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/2_jackson_0.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/2_jackson_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/2_jackson_1.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/3_jackson_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/3_jackson_0.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/3_jackson_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/3_jackson_1.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/4_jackson_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/4_jackson_0.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/4_jackson_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/4_jackson_1.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/5_jackson_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/5_jackson_0.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/5_jackson_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/5_jackson_1.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/6_jackson_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/6_jackson_0.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/6_jackson_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/6_jackson_1.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/7_jackson_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/7_jackson_0.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/7_jackson_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/7_jackson_1.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/8_jackson_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/8_jackson_0.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/8_jackson_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/8_jackson_1.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/9_jackson_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/9_jackson_0.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/test/wavs/9_jackson_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/9_jackson_1.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/0_jackson_10.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_10.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/0_jackson_11.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_11.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/0_jackson_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_2.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/0_jackson_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_3.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/0_jackson_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_4.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/0_jackson_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_5.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/0_jackson_6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_6.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/0_jackson_7.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_7.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/0_jackson_8.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_8.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/0_jackson_9.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_9.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/1_jackson_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_0.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/1_jackson_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_1.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/1_jackson_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_2.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/1_jackson_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_3.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/1_jackson_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_4.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/1_jackson_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_5.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/1_jackson_6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_6.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/1_jackson_7.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_7.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/1_jackson_8.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_8.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/1_jackson_9.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_9.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/2_jackson_10.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_10.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/2_jackson_11.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_11.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/2_jackson_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_2.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/2_jackson_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_3.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/2_jackson_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_4.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/2_jackson_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_5.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/2_jackson_6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_6.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/2_jackson_7.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_7.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/2_jackson_8.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_8.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/2_jackson_9.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_9.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/3_jackson_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_0.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/3_jackson_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_1.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/3_jackson_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_2.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/3_jackson_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_3.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/3_jackson_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_4.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/3_jackson_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_5.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/3_jackson_6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_6.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/3_jackson_7.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_7.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/3_jackson_8.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_8.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/3_jackson_9.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_9.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/4_jackson_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_0.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/4_jackson_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_1.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/4_jackson_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_2.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/4_jackson_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_3.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/4_jackson_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_4.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/4_jackson_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_5.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/4_jackson_6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_6.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/4_jackson_7.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_7.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/4_jackson_8.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_8.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/4_jackson_9.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_9.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/5_jackson_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_0.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/5_jackson_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_1.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/5_jackson_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_2.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/5_jackson_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_3.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/5_jackson_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_4.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/5_jackson_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_5.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/5_jackson_6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_6.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/5_jackson_7.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_7.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/5_jackson_8.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_8.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/5_jackson_9.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_9.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/6_jackson_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_0.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/6_jackson_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_1.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/6_jackson_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_2.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/6_jackson_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_3.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/6_jackson_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_4.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/6_jackson_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_5.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/6_jackson_6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_6.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/6_jackson_7.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_7.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/6_jackson_8.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_8.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/6_jackson_9.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_9.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/7_jackson_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_0.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/7_jackson_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_1.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/7_jackson_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_2.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/7_jackson_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_3.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/7_jackson_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_4.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/7_jackson_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_5.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/7_jackson_6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_6.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/7_jackson_7.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_7.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/7_jackson_8.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_8.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/7_jackson_9.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_9.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/8_jackson_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_0.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/8_jackson_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_1.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/8_jackson_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_2.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/8_jackson_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_3.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/8_jackson_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_4.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/8_jackson_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_5.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/8_jackson_6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_6.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/8_jackson_7.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_7.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/8_jackson_8.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_8.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/8_jackson_9.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_9.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/9_jackson_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_0.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/9_jackson_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_1.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/9_jackson_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_2.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/9_jackson_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_3.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/9_jackson_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_4.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/9_jackson_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_5.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/9_jackson_6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_6.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/9_jackson_7.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_7.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/9_jackson_8.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_8.wav
--------------------------------------------------------------------------------
/hlp/tts/data/number/train/wavs/9_jackson_9.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_9.wav
--------------------------------------------------------------------------------
/hlp/tts/utils/layers.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 |
4 | class ConvDropBN(tf.keras.layers.Layer):
5 | """
6 | 卷积-Dropout-BatchNormalization块
7 | """
8 |
9 | def __init__(self, filters, kernel_size, activation, dropout_rate):
10 | """
11 | :param filters: 输出空间维数
12 | :param kernel_size: 卷积核大小
13 | :param activation: 激活方法
14 | :param dropout_rate: dropout采样率
15 | """
16 | super(ConvDropBN, self).__init__()
17 | self.conv1d = tf.keras.layers.Conv1D(filters, kernel_size,
18 | padding="same", activation=activation)
19 | self.dropout = tf.keras.layers.Dropout(rate=dropout_rate)
20 | self.norm = tf.keras.layers.BatchNormalization()
21 |
22 | def call(self, inputs):
23 | outputs = self.conv1d(inputs)
24 | outputs = self.dropout(outputs)
25 | outputs = self.norm(outputs)
26 | return outputs
27 |
28 |
29 | class DecoderPreNet(tf.keras.layers.Layer):
30 | """
31 | Decoder的pre_net,用于映射频谱样本的空间
32 | """
33 |
34 | def __init__(self, pre_net_units, pre_net_layers_num, pre_net_dropout_rate):
35 | """
36 | :param pre_net_units: 全连接层单元数
37 | :param pre_net_layers_num: pre_net层数
38 | :param pre_net_dropout_rate: dropout采样率
39 | """
40 | super().__init__()
41 | self.pre_net_units = pre_net_units
42 | self.pre_net_layers_num = pre_net_layers_num
43 | self.pre_net_dropout_rate = pre_net_dropout_rate
44 | self.pre_net_dense = [
45 | tf.keras.layers.Dense(units=self.pre_net_units, activation='relu')
46 | for i in range(self.pre_net_layers_num)
47 | ]
48 | self.dropout = tf.keras.layers.Dropout(rate=self.pre_net_dropout_rate)
49 |
50 | def call(self, inputs):
51 | outputs = inputs
52 | for layer in self.pre_net_dense:
53 | outputs = layer(outputs)
54 | outputs = self.dropout(outputs)
55 | return outputs
56 |
57 |
58 | class PostNet(tf.keras.layers.Layer):
59 | """
60 | Tacotron2的PostNet,包含n_conv_encoder数量的卷积层
61 | """
62 |
63 | def __init__(self, encoder_conv_num: int, post_net_conv_num: int, post_net_filters: int,
64 | post_net_kernel_sizes: int, post_net_dropout: float,
65 | post_net_activation: str, num_mel: int):
66 | """
67 | :param encoder_conv_num: encoder卷积层数量
68 | :param post_net_conv_num: post_net的卷积层数量
69 | :param post_net_filters: post_net卷积输出空间维数
70 | :param post_net_kernel_sizes: post_net卷积核大小
71 | :param post_net_dropout: post_net的dropout采样率
72 | :param post_net_activation: post_net卷积激活函数
73 | :param n_mels: 梅尔带数
74 | """
75 | super().__init__()
76 | self.conv_batch_norm = []
77 | for i in range(encoder_conv_num):
78 | if i == post_net_conv_num - 1:
79 | conv = ConvDropBN(filters=post_net_filters, kernel_size=post_net_kernel_sizes,
80 | activation=None, dropout_rate=post_net_dropout)
81 | else:
82 | conv = ConvDropBN(filters=post_net_filters, kernel_size=post_net_kernel_sizes,
83 | activation=post_net_activation, dropout_rate=post_net_dropout)
84 | self.conv_batch_norm.append(conv)
85 |
86 | self.fc = tf.keras.layers.Dense(units=num_mel, activation=None, name="frame_projection1")
87 |
88 | def call(self, inputs):
89 | x = tf.transpose(inputs, [0, 2, 1])
90 | for _, conv in enumerate(self.conv_batch_norm):
91 | x = conv(x)
92 | x = self.fc(x)
93 | x = tf.transpose(x, [0, 2, 1])
94 | return x
95 |
--------------------------------------------------------------------------------
/hlp/tts/utils/load_dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import tensorflow as tf
4 | from hlp.tts.utils.text_preprocess import text_to_sequence_phoneme
5 |
6 |
7 | def load_data(train_data_path: str, max_len: int, vocab_size: int, batch_size: int, buffer_size: int,
8 | tokenized_type: str = "phoneme", dict_path: str = "", valid_data_split: float = 0.0,
9 | valid_data_path: str = "", max_train_data_size: int = 0, max_valid_data_size: int = 0):
10 | """
11 | 加载训练验证数据方法,非phoneme的方法将会保存字典
12 | 验证数据的优先级为:验证数据文件>从训练集划分验证集
13 | :param train_data_path: 文本数据路径
14 | :param max_len: 文本序列最大长度
15 | :param vocab_size: 词汇大小
16 | :param tokenized_type: 分词类型,默认按音素分词,模式:phoneme(音素)/word(单词)/char(字符)
17 | :param dict_path: 字典路径,若使用phoneme则不用传
18 | :param buffer_size: Dataset加载缓存大小
19 | :param batch_size: Dataset加载批大小
20 | :param valid_data_split: 用于从训练数据中划分验证数据
21 | :param valid_data_path: 验证数据文本路径
22 | :param max_train_data_size: 最大训练数据量
23 | :param max_valid_data_size: 最大验证数据量
24 | :return: 返回train_dataset, valid_dataset, steps_per_epoch, valid_steps_per_epoch
25 | """
26 | if not os.path.exists(train_data_path):
27 | print("加载的训练验证数据文件不存在,请先执行pre_treat模式后重试")
28 | exit(0)
29 |
30 | print("正在加载训练数据...")
31 | train_audio_data_pair, train_sentence_data = read_data(data_path=train_data_path, num_examples=max_train_data_size)
32 |
33 | valid_flag = True # 是否开启验证标记
34 | valid_steps_per_epoch = 0
35 |
36 | # 根据是否传入验证数据文件,切分验证数据
37 | if valid_data_path != "":
38 | print("正在加载验证数据...")
39 | valid_audio_data_pair, valid_sentence_data = read_data(data_path=valid_data_path,
40 | num_examples=max_valid_data_size)
41 | elif valid_data_split != 0.0:
42 | print("从训练数据中划分验证数据...")
43 | train_size = int(len(train_audio_data_pair) * (1.0 - valid_data_split))
44 | valid_audio_data_pair = train_audio_data_pair[train_size:]
45 | valid_sentence_data = train_sentence_data[train_size:]
46 | train_audio_data_pair = train_audio_data_pair[:train_size]
47 | train_sentence_data = train_sentence_data[:train_size]
48 | else:
49 | print("没有验证数据.")
50 | valid_flag = False
51 |
52 | # 根据分词类型进行序列转换
53 | if tokenized_type == "phoneme":
54 | train_sentence_sequences = text_to_sequence_phoneme(texts=train_sentence_data, max_len=max_len)
55 | if valid_flag:
56 | valid_sentence_sequences = text_to_sequence_phoneme(texts=valid_sentence_data, max_len=max_len)
57 | else:
58 | if dict_path == "":
59 | print("请在加载数据时,传入字典保存路径")
60 | exit(0)
61 | tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='', oov_token="", num_words=vocab_size)
62 | tokenizer.fit_on_texts(train_sentence_data)
63 | train_sentence_sequences = tokenizer.texts_to_sequences(train_sentence_data)
64 | train_sentence_sequences = tf.keras.preprocessing.sequence.pad_sequences(train_sentence_sequences,
65 | max_len=max_len, padding="post")
66 | with open(dict_path, 'w', encoding="utf-8") as dict_file:
67 | dict_file.write(tokenizer.to_json())
68 |
69 | if valid_flag:
70 | valid_sentence_sequences = tokenizer.texts_to_sequences(valid_sentence_data)
71 | valid_sentence_sequences = tf.keras.preprocessing.sequence.pad_sequences(valid_sentence_sequences,
72 | max_len=max_len, padding="post")
73 |
74 | train_dataset = _to_dataset(data=(train_audio_data_pair, train_sentence_sequences),
75 | batch_size=batch_size, buffer_size=buffer_size)
76 | if valid_flag:
77 | valid_dataset = _to_dataset(data=(valid_audio_data_pair, valid_sentence_sequences),
78 | batch_size=batch_size, buffer_size=buffer_size)
79 | valid_steps_per_epoch = len(valid_sentence_sequences) // batch_size
80 | else:
81 | valid_dataset = None
82 |
83 | steps_per_epoch = len(train_sentence_sequences) // batch_size
84 |
85 | print("训练验证数据加载完毕")
86 | return train_dataset, valid_dataset, steps_per_epoch, valid_steps_per_epoch
87 |
88 |
89 | def _to_dataset(data: tuple, batch_size: int, buffer_size: int):
90 | """
91 | 将data封装成tf.data.Dataset
92 | :param data: 要封装的数据元组
93 | :param buffer_size: Dataset加载缓存大小
94 | :param batch_size: Dataset加载批大小
95 | :return: dataset
96 | """
97 | dataset = tf.data.Dataset.from_tensor_slices(data). \
98 | cache().shuffle(buffer_size).prefetch(tf.data.experimental.AUTOTUNE)
99 | dataset = dataset.map(_process_audio_sentence_pairs, num_parallel_calls=tf.data.experimental.AUTOTUNE)
100 | dataset = dataset.batch(batch_size, drop_remainder=True)
101 |
102 | return dataset
103 |
104 |
105 | def read_data(data_path: str, num_examples: int):
106 | """
107 | :param data_path: 需要读取整理的数据文件路径
108 | :param num_examples: 读取的数据量大小
109 | :return: 返回读取的音频数据对和句子数据
110 | """
111 | audio_data_pair = []
112 | sentence_data = []
113 | with open(data_path, 'r', encoding="utf-8") as data_file:
114 | lines = data_file.read().strip().split('\n')
115 | if num_examples != 0:
116 | lines = lines[:num_examples]
117 |
118 | for line in lines:
119 | line = line.strip().strip("\n").replace("/", " ").split("\t")
120 | sentence_data.append(line[-1])
121 | line.pop(-1)
122 | audio_data_pair.append(line)
123 |
124 | return audio_data_pair, sentence_data
125 |
126 |
127 | def read_npy_file(filename):
128 | """
129 | 专门用于匹配dataset的map读取文件的方法
130 | :param filename: 传入的文件名张量
131 | :return: 返回读取的数据
132 | """
133 | data = np.load(filename.numpy().decode())
134 | return data.astype(np.float32)
135 |
136 |
137 | def _process_audio_sentence_pairs(audio_data_pair: tf.Tensor, sentence: tf.Tensor):
138 | """
139 | 用于处理音频句子对,将其转化为张量
140 | :param audio_data_pair: 音频相关数据对,mel、mag、stop_token保存文件
141 | :param sentence: 音频句子对
142 | :return: mel, mag, stop_token, sentence
143 | """
144 | [mel, ] = tf.py_function(read_npy_file, [audio_data_pair[0]], [tf.float32, ])
145 | [stop_token, ] = tf.py_function(read_npy_file, [audio_data_pair[2]], [tf.float32, ])
146 |
147 | return mel, stop_token, sentence
148 |
149 |
--------------------------------------------------------------------------------
/hlp/tts/utils/pre_treat.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import tensorflow as tf
4 | from hlp.tts.utils import text_preprocess
5 | from hlp.tts.utils.spec import get_spectrograms
6 |
7 |
8 | def preprocess_lj_speech_raw_data(metadata_path: str, audio_dir: str, dataset_infos_file: str, max_length: int,
9 | pre_emphasis: float, n_fft: int, n_mels: int, hop_length: int,
10 | win_length: int, max_db: int, ref_db: int, top_db: int,
11 | spectrum_data_dir: str, audio_suffix: str = ".wav",
12 | tokenized_type: str = "phoneme", cmu_dict_path: str = ""):
13 | """
14 | 用于处理LJSpeech数据集的方法,将数据整理为<音频地址, 句子>的
15 | 形式,这样方便后续进行分批读取
16 | :param metadata_path: 元数据CSV文件路径
17 | :param audio_dir: 音频目录路径
18 | :param dataset_infos_file: 保存处理之后的数据路径
19 | :param max_length: 最大序列长度
20 | :param audio_suffix: 音频的类型后缀
21 | :param tokenized_type: 分词类型,默认按音素分词,模式:phoneme(音素)/word(单词)/char(字符)
22 | :param cmu_dict_path: cmu音素字典路径,使用phoneme时必传
23 | :param spectrum_data_dir: 保存mel和mag数据目录
24 | :param pre_emphasis: 预加重
25 | :param n_fft: FFT窗口大小
26 | :param n_mels: 产生的梅尔带数
27 | :param hop_length: 帧移
28 | :param win_length: 每一帧音频都由window()加窗,窗长win_length,然后用零填充以匹配N_FFT
29 | :param max_db: 峰值分贝值
30 | :param ref_db: 参考分贝值
31 | :param top_db: 峰值以下的阈值分贝值
32 | :return: 无返回值
33 | """
34 | audios_list = os.listdir(audio_dir)
35 | if not os.path.exists(metadata_path):
36 | print("元数据CSV文件路径不存在,请检查重试")
37 | exit(0)
38 |
39 | if not os.path.exists(spectrum_data_dir):
40 | os.makedirs(spectrum_data_dir)
41 |
42 | count = 0
43 | with open(metadata_path, 'r', encoding='utf-8') as raw_file, \
44 | open(dataset_infos_file, 'w', encoding='utf-8') as ds_infos_file:
45 | for line in raw_file:
46 | line = line.strip('\n').replace('/', '')
47 | pair = line.split('|')
48 | audio_file = pair[0] + audio_suffix
49 | mel_file = spectrum_data_dir + pair[0] + ".mel.npy"
50 | mag_file = spectrum_data_dir + pair[0] + ".mag.npy"
51 | stop_token_file = spectrum_data_dir + pair[0] + ".stop.npy"
52 |
53 | if audios_list.count(audio_file) < 1:
54 | continue
55 |
56 | text = dispatch_tokenized_func(text=pair[1], tokenized_type=tokenized_type,
57 | cmu_dict_path=cmu_dict_path)
58 | mel, mag = get_spectrograms(audio_path=audio_dir + audio_file, pre_emphasis=pre_emphasis,
59 | n_fft=n_fft, n_mels=n_mels, hop_length=hop_length,
60 | win_length=win_length, max_db=max_db, ref_db=ref_db, top_db=top_db)
61 | stop_token = np.zeros(shape=max_length)
62 | stop_token[len(mel) - 1:] = 1
63 |
64 | mel = tf.keras.preprocessing.sequence.pad_sequences(tf.expand_dims(mel, axis=0),
65 | maxlen=max_length, dtype="float32", padding="post")
66 | mel = tf.squeeze(mel, axis=0)
67 | mel = tf.transpose(mel, [1, 0])
68 |
69 | np.save(file=mel_file, arr=mel)
70 | np.save(file=mag_file, arr=mag)
71 | np.save(file=stop_token_file, arr=stop_token)
72 |
73 | ds_infos_file.write(mel_file + "\t" + mag_file + "\t" + stop_token_file + "\t" + text + "\n")
74 |
75 | count += 1
76 | print('\r已处理音频句子对数:{}'.format(count), flush=True, end='')
77 |
78 | print("\n数据处理完毕,共计{}条语音数据".format(count))
79 |
80 |
81 | def dispatch_tokenized_func(text: str, tokenized_type: str = "phoneme", cmu_dict_path: str = ""):
82 | """
83 | 用来整合目前所有分词处理方法,通过字典匹配进行调用,默认使用phoneme分词
84 | :param text: 句子文本
85 | :param tokenized_type: 分词类型,默认按音素分词,模式:phoneme(音素)/word(单词)/char(字符)
86 | :param cmu_dict_path: cmu音素字典路径,使用phoneme时必传
87 | :return: 按照对应方法处理好的文本序列
88 | """
89 | operation = {
90 | "phoneme": lambda: text_preprocess.text_to_phonemes(text=text,
91 | cmu_dict_path=cmu_dict_path),
92 | "word": lambda: text_preprocess.text_to_words(text=text),
93 | "char": lambda: text_preprocess.text_to_chars(text=text)
94 | }
95 |
96 | return operation.get(tokenized_type, "phoneme")()
97 |
--------------------------------------------------------------------------------
/hlp/tts/utils/spec.py:
--------------------------------------------------------------------------------
1 | import copy
2 |
3 | import librosa
4 | import numpy as np
5 | import scipy
6 | import tensorflow as tf
7 |
8 |
9 | def get_spectrograms(audio_path: str, pre_emphasis: float, n_fft: int, n_mels: int,
10 | hop_length: int, win_length: int, max_db: int, ref_db: int, top_db: int):
11 | """
12 | 处理音频文件,转换成梅尔频谱和线性谱
13 | :param audio_path: 音频路径
14 | :param pre_emphasis: 预加重
15 | :param n_fft: FFT窗口大小
16 | :param n_mels: 产生的梅尔带数
17 | :param hop_length: 帧移
18 | :param win_length: 每一帧音频都由window()加窗,窗长win_length,然后用零填充以匹配N_FFT
19 | :param max_db: 峰值分贝值
20 | :param ref_db: 参考分贝值
21 | :param top_db: 峰值以下的阈值分贝值
22 | :return: 返回归一化后的梅尔频谱和线性谱,形状分别为(T, n_mels)和(T, 1+n_fft//2)
23 | """
24 | y, sr = librosa.load(audio_path, sr=None)
25 | y, _ = librosa.effects.trim(y, top_db=top_db)
26 | y = np.append(y[0], y[1:] - pre_emphasis * y[:-1])
27 | # 短时傅里叶变换
28 | linear = librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
29 |
30 | # 幅度谱
31 | mag = np.abs(linear) # (1+n_fft//2, T)
32 | # mel频谱
33 | mel_basis = librosa.filters.mel(sr, n_fft, n_mels) # (n_mels, 1+n_fft//2)
34 | mel = np.dot(mel_basis, mag) # (n_mels, t)
35 | mel = 20 * np.log10(np.maximum(1e-5, mel))
36 | mag = 20 * np.log10(np.maximum(1e-5, mag))
37 | mel = np.clip((mel - ref_db + max_db) / max_db, 1e-8, 1)
38 | mag = np.clip((mag - ref_db + max_db) / max_db, 1e-8, 1)
39 | mel = mel.T.astype(np.float32) # (T, n_mels)
40 | mag = mag.T.astype(np.float32) # (T, 1+n_fft//2)
41 | return mel, mag
42 |
43 |
44 | def melspectrogram2wav(mel, max_db, ref_db, sr, n_fft, n_mels, preemphasis, n_iter, hop_length, win_length):
45 | """
46 | 从线性幅度谱图生成wav文件
47 | :param mel: 梅尔谱
48 | :param sr: 采样率
49 | :param preemphasis: 预加重
50 | :param n_fft: FFT窗口大小
51 | :param n_mels: 产生的梅尔带数
52 | :param hop_length: 帧移
53 | :param win_length: 每一帧音频都由window()加窗,窗长win_length,然后用零填充以匹配N_FFT
54 | :param max_db: 峰值分贝值
55 | :param ref_db: 参考分贝值
56 | :param n_iter: 迭代指针
57 | """
58 | mel = (np.clip(mel, 0, 1) * max_db) - max_db + ref_db
59 | # 转为幅度谱
60 | mel = np.power(10.0, mel * 0.05)
61 | m = _mel_to_linear_matrix(sr, n_fft, n_mels)
62 | mag = np.dot(m, mel)
63 | # 波形重构
64 | wav = griffin_lim(mag, n_iter, n_fft, hop_length, win_length)
65 | wav = scipy.signal.lfilter([1], [1, -preemphasis], wav)
66 | # 剪裁
67 | wav, _ = librosa.effects.trim(wav)
68 | return wav.astype(np.float32)
69 |
70 |
71 | def _mel_to_linear_matrix(sr, n_fft, n_mels):
72 | m = librosa.filters.mel(sr, n_fft, n_mels)
73 | m_t = np.transpose(m)
74 | p = np.matmul(m, m_t)
75 | d = [1.0 / x if np.abs(x) > 1.0e-8 else x for x in np.sum(p, axis=0)]
76 | return np.matmul(m_t, np.diag(d))
77 |
78 |
79 | def griffin_lim(spectrogram, n_iter, n_fft, hop_length, win_length):
80 | """
81 | 已知幅度谱,未知相位谱,通过迭代生成相位谱,并用已
82 | 知的幅度谱和计算得出的相位谱,重建语音波形的方法
83 | :param spectrogram: 幅度谱
84 | :param n_iter: 迭代指针
85 | :param n_fft: FFT窗口大小
86 | :param hop_length: 帧移
87 | :param win_length: 窗长win_length
88 | :return:
89 | """
90 | x_best = copy.deepcopy(spectrogram)
91 | for i in range(n_iter):
92 | x_t = invert_spectrogram(x_best, hop_length, win_length)
93 | est = librosa.stft(x_t, n_fft, hop_length, win_length=win_length)
94 | phase = est / np.maximum(1e-8, np.abs(est))
95 | x_best = spectrogram * phase
96 | x_t = invert_spectrogram(x_best, hop_length, win_length)
97 | y = np.real(x_t)
98 | return y
99 |
100 |
101 | def invert_spectrogram(spectrogram, hop_length, win_length):
102 | """
103 | spectrogram: [f, t]
104 | :param spectrogram: 幅度谱
105 | :param hop_length: 帧移
106 | :param win_length: 窗长win_length
107 | """
108 | return librosa.istft(spectrogram, hop_length, win_length=win_length, window="hann")
109 |
110 |
111 | def spec_distance(mel1, mel2):
112 | """
113 | 计算mel谱之间的欧式距离
114 | :param mel1: 预测mel
115 | :param mel2: ground-true mel
116 | :return 两者之间的欧氏距离
117 | """
118 | mel1 = tf.transpose(mel1, [0, 2, 1])
119 | score = np.sqrt(np.sum((mel1 - mel2) ** 2))
120 | return score
--------------------------------------------------------------------------------
/hlp/tts/wavernn/generator.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 | from hlp.tts.wavernn.preprocess import read_data, label_2_float
5 |
6 |
7 | # 数据生成器
8 | def generator(wav_name_list, batch_size, sample_rate, peak_norm, voc_mode, bits, mu_law, wave_path, voc_pad, hop_length,
9 | voc_seq_len, preemphasis, n_fft, n_mels, win_length, max_db, ref_db, top_db):
10 | # generator只能进行一次生成,故需要while True来进行多个epoch的数据生成
11 | while True:
12 | # 每epoch将所有数据进行一次shuffle
13 | # order = np.random.choice(len(wav_name_list), len(wav_name_list), replace=False)
14 | # audio_data_path_list = [wav_name_list[i] for i in order]
15 | audio_data_path_list = wav_name_list
16 | batchs = len(wav_name_list) // batch_size
17 | for idx in range(batchs):
18 | # 逐步取音频名
19 | wav_name_list2 = audio_data_path_list[idx * batch_size: (idx + 1) * batch_size]
20 |
21 | # 取音频数据
22 | input_mel, input_sig = read_data(
23 | wave_path, sample_rate, peak_norm, voc_mode, bits, mu_law, wav_name_list2, preemphasis, n_fft, n_mels,
24 | hop_length, win_length, max_db, ref_db, top_db
25 | )
26 |
27 | dataset = collate_vocoder(input_mel, input_sig, voc_seq_len, hop_length, voc_pad, voc_mode, bits)
28 | # input_mel = tf.convert_to_tensor(input_mel[0])
29 | # input_sig = tf.convert_to_tensor(input_sig[0])
30 | yield dataset
31 |
32 |
33 | def collate_vocoder(input_mel: tf.Tensor, input_sig: tf.Tensor, voc_seq_len, hop_length, voc_pad, voc_mode, bits):
34 | # print(tf.shape(input_mel[0]))
35 | mel_win = voc_seq_len // hop_length + 2 * voc_pad
36 |
37 | max_offsets = [x.shape[-1] - 2 - (mel_win + 2 * voc_pad) for x in input_mel]
38 | mel_offsets = [np.random.randint(0, offset) for offset in max_offsets]
39 | sig_offsets = [(offset + voc_pad) * hop_length for offset in mel_offsets]
40 |
41 | mels = [x[:, mel_offsets[i]:mel_offsets[i] + mel_win] for i, x in enumerate(input_mel)]
42 | # mels = [x[:, mel_offsets[i]:mel_offsets[i] + mel_win] for i, x in enumerate(input_mel)]
43 |
44 | labels = [x[sig_offsets[i]:sig_offsets[i] + voc_seq_len + 1] for i, x in enumerate(input_sig)]
45 |
46 | mels = np.stack(mels).astype(np.float32)
47 | labels = np.stack(labels).astype(np.int64)
48 |
49 | mels = tf.convert_to_tensor(mels)
50 | labels = tf.convert_to_tensor(labels)
51 |
52 | x = labels[:, :voc_seq_len]
53 | y = labels[:, 1:]
54 | bits = 16 if voc_mode == 'MOL' else bits
55 |
56 | x = label_2_float(tf.cast(x, dtype=float), bits)
57 |
58 | if voc_mode == 'MOL':
59 | y = label_2_float(tf.cast(y, dtype=float), bits)
60 |
61 | dataset = [x, y, mels]
62 | return dataset
63 |
--------------------------------------------------------------------------------
/hlp/tts/wavernn/preprocess.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | import librosa
5 |
6 | sys.path.append(os.path.abspath(__file__)[:os.path.abspath(__file__).rfind("\\hlp\\")])
7 | from hlp.tts.utils.spec import get_spectrograms
8 |
9 | import numpy as np
10 | import tensorflow as tf
11 |
12 |
13 | # 处理语音文件
14 | def load_wav(path, sample_rate):
15 | y = librosa.load(path, sr=sample_rate)[0]
16 | return y
17 |
18 |
19 | def process_wav(path, sample_rate, peak_norm, voc_mode, bits, mu_law, preemphasis, n_fft, n_mels, hop_length, win_length
20 | , max_db, ref_db, top_db):
21 | y = load_wav(path, sample_rate)
22 | peak = np.abs(y).max()
23 | if peak_norm or peak > 1.0:
24 | y /= peak
25 |
26 | mel, _ = get_spectrograms(path, preemphasis, n_fft, n_mels, hop_length, win_length, max_db, ref_db, top_db)
27 | mel = tf.transpose(mel, (1, 0)).numpy()
28 | # mel = melspectrogram(y)
29 | if voc_mode == 'RAW':
30 | quant = encode_mu_law(y, mu=2 ** bits) if mu_law else float_2_label(y, bits=bits)
31 | elif voc_mode == 'MOL':
32 | quant = float_2_label(y, bits=16)
33 |
34 | return mel.astype(np.float32), quant.astype(np.int64)
35 |
36 |
37 | def read_data(path, sample_rate, peak_norm, voc_mode, bits, mu_law, wav_name_list2, preemphasis, n_fft, n_mels,
38 | hop_length, win_length, max_db, ref_db, top_db):
39 | mel_list = []
40 | sig_list = []
41 | for file in wav_name_list2:
42 | m, x = process_wav(path + file + '.wav', sample_rate, peak_norm, voc_mode, bits, mu_law, preemphasis, n_fft,
43 | n_mels, hop_length, win_length, max_db, ref_db, top_db)
44 |
45 | mel_list.append(m)
46 | sig_list.append(x)
47 |
48 | return mel_list, sig_list
49 |
50 |
51 | def encode_mu_law(x, mu):
52 | mu = mu - 1
53 | fx = np.sign(x) * np.log(1 + mu * np.abs(x)) / np.log(1 + mu)
54 | return np.floor((fx + 1) / 2 * mu + 0.5)
55 |
56 |
57 | def float_2_label(x, bits):
58 | assert abs(x).max() <= 1.0
59 | x = (x + 1.) * (2 ** bits - 1) / 2
60 | return x.clip(0, 2 ** bits - 1)
61 |
62 |
63 | # 提取语音文件名
64 | def process_wav_name(wav_path):
65 | datanames = os.listdir(wav_path)
66 | wav_name_list = []
67 | for i in datanames:
68 | wav_name_list.append(i[:10])
69 | return wav_name_list
70 |
71 |
72 | def label_2_float(x, bits):
73 | return 2 * x / (2 ** bits - 1.) - 1.
74 |
--------------------------------------------------------------------------------
/hlp/tts/wavernn/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | import tensorflow as tf
5 |
6 |
7 | def load_checkpoint(model: tf.keras.Model, checkpoint_dir: str, checkpoint_save_size: int):
8 | """
9 | 恢复检查点
10 | """
11 | # 如果检查点存在就恢复,如果不存在就重新创建一个
12 | checkpoint = tf.train.Checkpoint(wavernn=model)
13 | ckpt_manager = tf.train.CheckpointManager(checkpoint, checkpoint_dir, max_to_keep=checkpoint_save_size)
14 |
15 | if os.path.exists(checkpoint_dir):
16 | if ckpt_manager.latest_checkpoint:
17 | checkpoint.restore(ckpt_manager.latest_checkpoint).expect_partial()
18 | else:
19 | os.makedirs(checkpoint_dir, exist_ok=True)
20 | # if execute_type == "generate":
21 | # print("没有检查点,请先执行train模式")
22 | # exit(0)
23 |
24 | return ckpt_manager
25 |
26 |
27 | def log_sum_exp(x):
28 | """ numerically stable log_sum_exp implementation that prevents overflow """
29 | # TF ordering
30 | dim = len(x.shape) - 1
31 | m = tf.reduce_max(x, axis=dim)
32 | m2, _ = tf.reduce_max(x, axis=dim, keepdims=True)
33 | return m + tf.math.log(tf.reduce_sum(tf.exp(x - m2), axis=dim))
34 |
35 |
36 | # It is adapted from https://github.com/r9y9/wavenet_vocoder/blob/master/wavenet_vocoder/mixture.py
37 | def Discretized_Mix_Logistic_Loss(y_hat, y, num_classes=65536,
38 | log_scale_min=None, reduce=True):
39 | if log_scale_min is None:
40 | log_scale_min = float(np.log(1e-14))
41 | y_hat = tf.transpose(y_hat, (0, 2, 1))
42 |
43 | # assert y_hat.dim() == 3
44 | assert y_hat.shape[1] % 3 == 0
45 | nr_mix = y_hat.shape[1] // 3
46 |
47 | # (B x T x C)
48 | y_hat = tf.transpose(y_hat, (0, 2, 1))
49 |
50 | # unpack parameters. (B, T, num_mixtures) x 3
51 | logit_probs = y_hat[:, :, :nr_mix]
52 | means = y_hat[:, :, nr_mix:2 * nr_mix]
53 | log_scales = tf.clip_by_value(y_hat[:, :, 2 * nr_mix:3 * nr_mix], clip_value_min=log_scale_min,
54 | clip_value_max=10000000)
55 |
56 | # B x T x 1 -> B x T x num_mixtures
57 | y = tf.tile(y, (1, 1, means.shape[-1]))
58 | centered_y = y - means
59 | inv_stdv = tf.exp(-log_scales)
60 | plus_in = inv_stdv * (centered_y + 1. / (num_classes - 1))
61 | cdf_plus = tf.sigmoid(plus_in)
62 | min_in = inv_stdv * (centered_y - 1. / (num_classes - 1))
63 | cdf_min = tf.sigmoid(min_in)
64 |
65 | # log probability for edge case of 0 (before scaling)
66 | # equivalent: torch.log(F.sigmoid(plus_in))
67 | log_cdf_plus = plus_in - tf.nn.softplus(plus_in)
68 |
69 | # log probability for edge case of 255 (before scaling)
70 | # equivalent: (1 - F.sigmoid(min_in)).log()
71 | log_one_minus_cdf_min = -tf.nn.softplus(min_in)
72 |
73 | # probability for all other cases
74 | cdf_delta = cdf_plus - cdf_min
75 |
76 | mid_in = inv_stdv * centered_y
77 | # log probability in the center of the bin, to be used in extreme cases
78 | # (not actually used in our code)
79 | log_pdf_mid = mid_in - log_scales - 2. * tf.nn.softplus(mid_in)
80 |
81 | # tf equivalent
82 | """
83 | log_probs = tf.where(x < -0.999, log_cdf_plus,
84 | tf.where(x > 0.999, log_one_minus_cdf_min,
85 | tf.where(cdf_delta > 1e-5,
86 | tf.log(tf.maximum(cdf_delta, 1e-12)),
87 | log_pdf_mid - np.log(127.5))))
88 | """
89 | # TODO: cdf_delta <= 1e-5 actually can happen. How can we choose the value
90 | # for num_classes=65536 case? 1e-7? not sure..
91 | inner_inner_cond = tf.cast((cdf_delta > 1e-5), dtype=float)
92 |
93 | inner_inner_out = inner_inner_cond * \
94 | tf.math.log(tf.clip_by_value(cdf_delta, clip_value_min=1e-12, clip_value_max=100000000)) + \
95 | (1. - inner_inner_cond) * (log_pdf_mid - np.log((num_classes - 1) / 2))
96 | inner_cond = tf.cast((y > 0.999), dtype=float)
97 | inner_out = inner_cond * log_one_minus_cdf_min + (1. - inner_cond) * inner_inner_out
98 | cond = tf.cast((y < -0.999), dtype=float)
99 | log_probs = cond * log_cdf_plus + (1. - cond) * inner_out
100 |
101 | log_probs = log_probs + tf.nn.log_softmax(logit_probs, -1)
102 |
103 | if reduce:
104 | return -tf.reduce_mean(log_sum_exp(log_probs))
105 | else:
106 | return -tf.expand_dims(log_sum_exp(log_probs), axis=-1)
107 |
--------------------------------------------------------------------------------
/hlp/tts/wavernn/wavernn.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 |
5 | class ResBlock(tf.keras.layers.Layer):
6 | def __init__(self, dims):
7 | super().__init__()
8 | self.conv1 = tf.keras.layers.Conv1D(filters=dims, kernel_size=1, use_bias=False)
9 | self.conv2 = tf.keras.layers.Conv1D(filters=dims, kernel_size=1, use_bias=False)
10 | self.batch_norm1 = tf.keras.layers.BatchNormalization()
11 | self.batch_norm2 = tf.keras.layers.BatchNormalization()
12 |
13 | def call(self, x):
14 | residual = x
15 | x = self.conv1(x)
16 | x = self.batch_norm1(x)
17 | x = tf.nn.relu(x)
18 | x = self.conv2(x)
19 | x = self.batch_norm2(x)
20 | return x + residual
21 |
22 |
23 | class MelResNet(tf.keras.Model):
24 | def __init__(self, res_blocks, in_dims, compute_dims, res_out_dims, pad):
25 | super().__init__()
26 | k_size = pad * 2 + 1
27 | self.conv_in = tf.keras.layers.Conv1D(compute_dims, kernel_size=k_size, use_bias=False)
28 | self.batch_norm = tf.keras.layers.BatchNormalization()
29 |
30 | self.layer = []
31 | # self.layer = tf.keras.Sequential()
32 | for i in range(res_blocks):
33 | self.layer.append(ResBlock(compute_dims))
34 | self.conv_out = tf.keras.layers.Conv1D(res_out_dims, kernel_size=1)
35 |
36 | def call(self, x):
37 | x = tf.transpose(x, [0, 2, 1])
38 | x = self.conv_in(x)
39 | x = self.batch_norm(x)
40 | x = tf.nn.relu(x)
41 | for f in self.layer:
42 | x = f(x)
43 |
44 | x = self.conv_out(x)
45 | x = tf.transpose(x, (0, 2, 1))
46 | return x
47 |
48 |
49 | class Stretch2d(tf.keras.layers.Layer):
50 | def __init__(self, x_scale, y_scale):
51 | super().__init__()
52 | self.x_scale = x_scale
53 | self.y_scale = y_scale
54 |
55 | def call(self, x):
56 | b, c, h, w = x.shape
57 |
58 | x = tf.expand_dims(x, axis=-1)
59 | x = tf.expand_dims(x, axis=3)
60 |
61 | x = tf.tile(x, [1, 1, 1, self.y_scale, 1, self.x_scale])
62 | return tf.reshape(x, (b, c, h * self.y_scale, w * self.x_scale))
63 |
64 |
65 | class UpsampleNetwork(tf.keras.layers.Layer):
66 | def __init__(self, feat_dims, upsample_scales, compute_dims,
67 | res_blocks, res_out_dims, pad):
68 | super().__init__()
69 | total_scale = np.cumproduct(upsample_scales)[-1]
70 | self.indent = pad * total_scale
71 | self.resnet = MelResNet(res_blocks, feat_dims, compute_dims, res_out_dims, pad)
72 | self.resnet_stretch = Stretch2d(total_scale, 1)
73 | self.up_layers1 = []
74 | self.up_layers2 = []
75 | for scale in upsample_scales:
76 | k_size = (1, scale * 2 + 1)
77 | padding = (0, scale)
78 | stretch = Stretch2d(scale, 1)
79 | conv = tf.keras.layers.Conv2D(filters=1, kernel_size=k_size,
80 | kernel_initializer=tf.constant_initializer(1. / k_size[1]),
81 | padding="same", use_bias=False)
82 | self.up_layers1.append(stretch)
83 | self.up_layers2.append(conv)
84 |
85 | def call(self, m):
86 | aux = self.resnet(m)
87 | aux = tf.expand_dims(aux, axis=1)
88 | aux = self.resnet_stretch(aux)
89 | aux = tf.squeeze(aux, axis=1)
90 | m = tf.expand_dims(m, axis=1)
91 |
92 | for f1, f2 in zip(self.up_layers1, self.up_layers2):
93 | m = f1(m)
94 | m = tf.transpose(m, (0, 3, 2, 1))
95 |
96 | m = f2(m)
97 | m = tf.transpose(m, (0, 3, 2, 1))
98 |
99 | m = tf.squeeze(m, axis=1)[:, :, self.indent:-self.indent]
100 | return tf.transpose(m, (0, 2, 1)), tf.transpose(aux, (0, 2, 1))
101 |
102 |
103 | class WaveRNN(tf.keras.Model):
104 | def __init__(self, rnn_dims, fc_dims, bits, pad, upsample_factors,
105 | feat_dims, compute_dims, res_out_dims, res_blocks,
106 | hop_length, sample_rate, mode='RAW'):
107 | super().__init__()
108 | self.mode = mode
109 | self.pad = pad
110 | if self.mode == 'RAW':
111 | self.n_classes = 2 ** bits
112 | elif self.mode == 'MOL':
113 | self.n_classes = 30
114 | else:
115 | RuntimeError("Unknown model mode value - ", self.mode)
116 |
117 | # List of rnns to call `flatten_parameters()` on
118 | self._to_flatten = []
119 | self.rnn_dims = rnn_dims
120 | self.aux_dims = res_out_dims // 4
121 | self.hop_length = hop_length
122 | self.sample_rate = sample_rate
123 |
124 | self.upsample = UpsampleNetwork(feat_dims, upsample_factors, compute_dims, res_blocks, res_out_dims, pad)
125 |
126 | self.I = tf.keras.layers.Dense(rnn_dims, activation=None)
127 |
128 | self.rnn1 = tf.keras.layers.GRU(rnn_dims)
129 | self.rnn2 = tf.keras.layers.GRU(rnn_dims)
130 | self._to_flatten += [self.rnn1, self.rnn2]
131 |
132 | self.fc1 = tf.keras.layers.Dense(fc_dims, activation=None)
133 | self.fc2 = tf.keras.layers.Dense(fc_dims, activation=None)
134 | self.fc3 = tf.keras.layers.Dense(self.n_classes, activation=None)
135 |
136 | def call(self, x, mels):
137 | bsize = x.shape[0]
138 | h1 = tf.zeros(shape=(bsize, self.rnn_dims))
139 |
140 | h2 = tf.zeros(shape=(bsize, self.rnn_dims))
141 | mels, aux = self.upsample(mels)
142 |
143 | aux_idx = [self.aux_dims * i for i in range(5)]
144 | a1 = aux[:, :, aux_idx[0]:aux_idx[1]]
145 | a2 = aux[:, :, aux_idx[1]:aux_idx[2]]
146 | a3 = aux[:, :, aux_idx[2]:aux_idx[3]]
147 | a4 = aux[:, :, aux_idx[3]:aux_idx[4]]
148 |
149 | x = tf.concat([tf.expand_dims(x, axis=-1), mels, a1], axis=2)
150 | x = self.I(x)
151 | res = x
152 | x, _ = self.rnn1(x, h1)
153 |
154 | x = x + res
155 | res = x
156 | x = tf.concat([x, a2], axis=2)
157 | x, _ = self.rnn2(x, h2)
158 |
159 | x = x + res
160 | x = tf.concat([x, a3], axis=2)
161 | x = tf.nn.relu(self.fc1(x))
162 |
163 | x = tf.concat([x, a4], axis=2)
164 | x = tf.nn.relu(self.fc2(x))
165 | return self.fc3(x)
166 |
--------------------------------------------------------------------------------
/hlp/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/utils/__init__.py
--------------------------------------------------------------------------------
/hlp/utils/optimizers.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 |
4 | class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
5 | def __init__(self, d_model, warmup_steps=4000):
6 | super(CustomSchedule, self).__init__()
7 | self.d_model = d_model
8 | self.d_model = tf.cast(self.d_model, tf.float32)
9 | self.warmup_steps = warmup_steps
10 |
11 | def __call__(self, step):
12 | arg1 = tf.math.rsqrt(step)
13 | arg2 = step * (self.warmup_steps ** -1.5)
14 | return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)
15 |
16 |
17 | def loss_func_mask(real, pred, weights=None):
18 | """ 屏蔽填充的SparseCategoricalCrossentropy损失
19 |
20 | 真实标签real中有0填充部分,这部分不记入预测损失
21 |
22 | :param weights: 样本权重
23 | :param real: 真实标签张量
24 | :param pred: logits张量
25 | :return: 损失平均值
26 | """
27 | loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
28 | from_logits=True, reduction='none')
29 | mask = tf.math.logical_not(tf.math.equal(real, 0)) # 填充位为0,掩蔽
30 | loss_ = loss_object(real, pred, sample_weight=weights)
31 | mask = tf.cast(mask, dtype=loss_.dtype)
32 | loss_ *= mask
33 | return tf.reduce_mean(loss_)
34 |
--------------------------------------------------------------------------------
/hlp/utils/text_split.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | import jieba
4 |
5 |
6 | def split_en_word(s):
7 | """ 对英文文本小写并按词进行切分
8 |
9 | :param s: 待切分的英文文本
10 | :return: token列表
11 | """
12 | s = s.lower().strip()
13 | # 在单词与跟在其后的标点符号之间插入一个空格
14 | # 例如: "he is a boy." => "he is a boy ."
15 | s = re.sub(r"([?.!,])", r" \1 ", s) # 切分断句的标点符号
16 |
17 | # 除了 (a-z, A-Z, ".", "?", "!", ","),将所有字符替换为空格
18 | s = re.sub(r"[^a-zA-Z?.!,]+", " ", s)
19 |
20 | s = re.sub(r'[" "]+', " ", s) # 合并多个空格
21 |
22 | return s.strip().split()
23 |
24 |
25 | def split_en_char(s):
26 | """ 对英文文本小写并按字符进行切分
27 |
28 | :param s: 待切分的英文文本
29 | :return: token列表
30 | """
31 | s = s.lower().strip()
32 | s = re.sub(r"([?.!,])", r" \1 ", s) # 切分断句的标点符号
33 |
34 | # 除了 (a-z, A-Z, ".", "?", "!", ",")外的所有字符替换为空格
35 | s = re.sub(r"[^a-zA-Z?.!,]+", " ", s)
36 |
37 | s = re.sub(r'[" "]+', " ", s) # 合并多个空格
38 |
39 | return [c for c in s.strip()]
40 |
41 |
42 | def split_zh_char(s):
43 | """ 对中文按字进行切分
44 |
45 | :param s: 待切分的中文
46 | :return: token列表
47 | """
48 | s = s.lower().strip()
49 |
50 | s = [c for c in s]
51 | s = ' '.join(s)
52 | s = re.sub(r'[" "]+', " ", s) # 合并多个空格
53 |
54 | return s.strip()
55 |
56 |
57 | def split_zh_word(s):
58 | """ 对中文(含英文)按词进行切切分
59 |
60 | :param s: 待切分的中文
61 | :return: token列表
62 | """
63 | return list(jieba.cut(s.lower().strip()))
64 |
65 |
66 | if __name__ == "__main__":
67 | en_txt1 = "I like NLP."
68 | print(split_en_word(en_txt1))
69 | print(split_en_char(en_txt1))
70 |
71 | en_txt2 = " I like NLP. "
72 | print(split_en_word(en_txt2))
73 | print(split_en_char(en_txt2))
74 |
75 | cn_txt1 = "我喜欢深度学习。"
76 | print(split_zh_char(cn_txt1))
77 | print(split_zh_word(cn_txt1))
78 |
79 | cn_txt2 = " 我喜欢深度学习。 "
80 | print(split_zh_char(cn_txt2))
81 | print(split_zh_word(cn_txt2))
82 |
83 | cn_txt3 = "我喜欢book."
84 | print(split_zh_word(cn_txt3))
85 |
--------------------------------------------------------------------------------
/hlp/utils/train_history.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import matplotlib.pyplot as plt
4 | import matplotlib.ticker as ticker
5 |
6 |
7 | # TODO: 指标名不在代码中指定
8 | def show_and_save_history(history, save_dir, valid_freq=1):
9 | """
10 | 用于显示历史指标趋势以及保存历史指标图表图
11 | :param history: 历史指标字典
12 | :param save_dir: 历史指标显示图片保存位置
13 | :param valid_freq: 验证频率
14 | :return: 无返回值
15 | """
16 | train_x_axis = [i + 1 for i in range(len(history['loss']))]
17 | valid_x_axis = [(i + 1) * valid_freq for i in range(len(history['val_loss']))]
18 |
19 | figure, axis = plt.subplots(1, 1)
20 | tick_spacing = 1
21 | if len(history['loss']) > 20:
22 | tick_spacing = len(history['loss']) // 20
23 |
24 | plt.plot(train_x_axis, history['loss'], label='loss', marker='.')
25 | plt.plot(train_x_axis, history['accuracy'], label='accuracy', marker='.')
26 | plt.plot(valid_x_axis, history['val_loss'], label='val_loss', marker='.', linestyle='--')
27 | plt.plot(valid_x_axis, history['val_accuracy'], label='val_accuracy', marker='.', linestyle='--')
28 |
29 | plt.xticks(valid_x_axis)
30 | plt.xlabel('epoch')
31 | plt.legend()
32 | axis.xaxis.set_major_locator(ticker.MultipleLocator(tick_spacing))
33 |
34 | save_path = save_dir + time.strftime("%Y_%m_%d_%H_%M_%S_", time.localtime(time.time()))
35 | if not os.path.exists(save_dir):
36 | os.makedirs(save_dir, exist_ok=True)
37 | plt.savefig(save_path)
38 | plt.show()
39 |
--------------------------------------------------------------------------------
/hlp/utils/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import tensorflow as tf
3 |
4 |
5 | def load_tokenizer(dict_path: str):
6 | """
7 | 通过字典加载tokenizer
8 | :param dict_path: 字典路径
9 | :return tokenizer: 分词器
10 | """
11 | if not os.path.exists(dict_path):
12 | print("字典不存在,请检查之后重试")
13 | exit(0)
14 | with open(dict_path, 'r', encoding='utf-8') as dict_file:
15 | json_string = dict_file.read().strip().strip("\n")
16 | tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(json_string)
17 |
18 | return tokenizer
19 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | jieba==0.42.1
2 | librosa==0.8.0
3 | python-speech-features==0.6
4 | matplotlib==3.3.2
5 | nltk==3.5
6 | numpy==1.18.5
7 | playsound==1.2.2
8 | pyaudio==0.2.11
9 | scikit-learn==0.23.2
10 | scipy==1.5.3
11 | tensorflow==2.3.1
12 | tensorflow-datasets==3.2.1
--------------------------------------------------------------------------------