├── README.md ├── hlp ├── __init__.py ├── chat │ ├── README.md │ ├── __init__.py │ ├── chatter.py │ ├── common │ │ ├── data_utils.py │ │ ├── pre_treat.py │ │ └── utils.py │ ├── config │ │ ├── seq2seq.json │ │ ├── smn.json │ │ └── transformer.json │ ├── data │ │ ├── LCCC.json │ │ ├── cross_woz.json │ │ ├── douban.txt │ │ ├── qin_yun.csv │ │ ├── stc_weibo_train_post │ │ ├── stc_weibo_train_response │ │ ├── tieba.dialogues │ │ ├── ubuntu_train.txt │ │ ├── ubuntu_valid.txt │ │ └── xiaohuangjie.txt │ ├── model │ │ ├── seq2seq.py │ │ ├── smn.py │ │ └── transformer.py │ ├── seq2seq_chatter.py │ ├── smn_chatter.py │ └── transformer_chatter.py ├── mt │ ├── README.md │ ├── __init__.py │ ├── common │ │ ├── bleu.py │ │ ├── load_dataset.py │ │ ├── misc.py │ │ ├── text_split.py │ │ └── text_vectorize.py │ ├── config │ │ ├── config.json │ │ └── get_config.py │ ├── data │ │ └── anki │ │ │ ├── anki-cmn-eng.txt │ │ │ ├── en-zh_eval.txt │ │ │ └── en-zh_val.txt │ ├── evaluate.py │ ├── lm │ │ ├── __init__.py │ │ ├── language_model.py │ │ ├── lm_preprocess.py │ │ ├── lm_rescore.py │ │ └── lm_train.py │ ├── model │ │ ├── checkpoint.py │ │ ├── nmt_model.py │ │ └── transformer.py │ ├── preprocess.py │ ├── train.py │ ├── translate.py │ └── translator.py ├── stt │ ├── __init__.py │ ├── data │ │ ├── LibriSpeech │ │ │ ├── dev-clean-2 │ │ │ │ ├── 84 │ │ │ │ │ └── 121550 │ │ │ │ │ │ ├── 84-121550-0000.flac │ │ │ │ │ │ ├── 84-121550-0001.flac │ │ │ │ │ │ └── 84-121550.trans.txt │ │ │ │ └── 174 │ │ │ │ │ └── 168635 │ │ │ │ │ ├── 174-168635-0000.flac │ │ │ │ │ ├── 174-168635-0001.flac │ │ │ │ │ └── 174-168635.trans.txt │ │ │ └── train-clean-5 │ │ │ │ ├── 1088 │ │ │ │ ├── 134315 │ │ │ │ │ ├── 1088-134315-0000.flac │ │ │ │ │ ├── 1088-134315-0001.flac │ │ │ │ │ ├── 1088-134315-0002.flac │ │ │ │ │ └── 1088-134315.trans.txt │ │ │ │ └── 134318 │ │ │ │ │ ├── 1088-134318-0000.flac │ │ │ │ │ ├── 1088-134318-0001.flac │ │ │ │ │ └── 1088-134318.trans.txt │ │ │ │ └── 1737 │ │ │ │ └── 146161 │ │ │ │ ├── 1737-146161-0000.flac │ │ │ │ ├── 1737-146161-0001.flac │ │ │ │ └── 1737-146161.trans.txt │ │ └── data_thchs30 │ │ │ ├── data │ │ │ ├── .wav.scp │ │ │ ├── A2_0.wav │ │ │ ├── A2_0.wav.trn │ │ │ ├── A2_1.wav │ │ │ ├── A2_1.wav.trn │ │ │ ├── A2_2.wav │ │ │ ├── A2_2.wav.trn │ │ │ ├── A2_3.wav │ │ │ ├── A2_3.wav.trn │ │ │ ├── A2_33.wav │ │ │ ├── A2_33.wav.trn │ │ │ ├── A2_4.wav │ │ │ ├── A2_4.wav.trn │ │ │ ├── A2_5.wav │ │ │ ├── A2_5.wav.trn │ │ │ ├── A2_58.wav │ │ │ ├── A2_58.wav.trn │ │ │ ├── A2_6.wav │ │ │ ├── A2_6.wav.trn │ │ │ ├── A2_7.wav │ │ │ ├── A2_7.wav.trn │ │ │ ├── D4_750.wav │ │ │ ├── D4_750.wav.trn │ │ │ ├── D4_751.wav │ │ │ └── D4_751.wav.trn │ │ │ ├── dev │ │ │ ├── A2_33.wav │ │ │ ├── A2_33.wav.trn │ │ │ ├── A2_58.wav │ │ │ └── A2_58.wav.trn │ │ │ ├── test │ │ │ ├── D4_750.wav │ │ │ ├── D4_750.wav.trn │ │ │ ├── D4_751.wav │ │ │ └── D4_751.wav.trn │ │ │ └── train │ │ │ ├── A2_0.wav │ │ │ ├── A2_0.wav.trn │ │ │ ├── A2_1.wav │ │ │ ├── A2_1.wav.trn │ │ │ ├── A2_2.wav │ │ │ ├── A2_2.wav.trn │ │ │ ├── A2_3.wav │ │ │ ├── A2_3.wav.trn │ │ │ ├── A2_4.wav │ │ │ ├── A2_4.wav.trn │ │ │ ├── A2_5.wav │ │ │ ├── A2_5.wav.trn │ │ │ ├── A2_6.wav │ │ │ ├── A2_6.wav.trn │ │ │ ├── A2_7.wav │ │ │ └── A2_7.wav.trn │ ├── deepspeech2 │ │ ├── actuator.py │ │ ├── model.py │ │ └── module.py │ ├── las │ │ ├── actuator.py │ │ ├── las.py │ │ ├── module.py │ │ └── plas.py │ ├── rnnt │ │ ├── __init__.py │ │ └── model.py │ ├── transformer │ │ ├── actuator.py │ │ ├── model.py │ │ └── module.py │ └── utils │ │ ├── audio_process.py │ │ ├── load_dataset.py │ │ ├── pre_treat.py │ │ ├── spec_augment.py │ │ ├── text_process.py │ │ └── utils.py ├── tts │ ├── __init__.py │ ├── data │ │ ├── LJSpeech-1.1 │ │ │ ├── metadata.csv │ │ │ └── wavs │ │ │ │ ├── LJ001-0001.wav │ │ │ │ ├── LJ001-0002.wav │ │ │ │ ├── LJ001-0003.wav │ │ │ │ ├── LJ001-0004.wav │ │ │ │ ├── LJ001-0005.wav │ │ │ │ └── LJ001-0006.wav │ │ ├── cmudict-0.7b │ │ └── number │ │ │ ├── metadata.csv │ │ │ ├── test │ │ │ └── wavs │ │ │ │ ├── 0_jackson_0.wav │ │ │ │ ├── 0_jackson_1.wav │ │ │ │ ├── 1_jackson_0.wav │ │ │ │ ├── 1_jackson_1.wav │ │ │ │ ├── 2_jackson_0.wav │ │ │ │ ├── 2_jackson_1.wav │ │ │ │ ├── 3_jackson_0.wav │ │ │ │ ├── 3_jackson_1.wav │ │ │ │ ├── 4_jackson_0.wav │ │ │ │ ├── 4_jackson_1.wav │ │ │ │ ├── 5_jackson_0.wav │ │ │ │ ├── 5_jackson_1.wav │ │ │ │ ├── 6_jackson_0.wav │ │ │ │ ├── 6_jackson_1.wav │ │ │ │ ├── 7_jackson_0.wav │ │ │ │ ├── 7_jackson_1.wav │ │ │ │ ├── 8_jackson_0.wav │ │ │ │ ├── 8_jackson_1.wav │ │ │ │ ├── 9_jackson_0.wav │ │ │ │ └── 9_jackson_1.wav │ │ │ └── train │ │ │ └── wavs │ │ │ ├── 0_jackson_10.wav │ │ │ ├── 0_jackson_11.wav │ │ │ ├── 0_jackson_2.wav │ │ │ ├── 0_jackson_3.wav │ │ │ ├── 0_jackson_4.wav │ │ │ ├── 0_jackson_5.wav │ │ │ ├── 0_jackson_6.wav │ │ │ ├── 0_jackson_7.wav │ │ │ ├── 0_jackson_8.wav │ │ │ ├── 0_jackson_9.wav │ │ │ ├── 1_jackson_0.wav │ │ │ ├── 1_jackson_1.wav │ │ │ ├── 1_jackson_2.wav │ │ │ ├── 1_jackson_3.wav │ │ │ ├── 1_jackson_4.wav │ │ │ ├── 1_jackson_5.wav │ │ │ ├── 1_jackson_6.wav │ │ │ ├── 1_jackson_7.wav │ │ │ ├── 1_jackson_8.wav │ │ │ ├── 1_jackson_9.wav │ │ │ ├── 2_jackson_10.wav │ │ │ ├── 2_jackson_11.wav │ │ │ ├── 2_jackson_2.wav │ │ │ ├── 2_jackson_3.wav │ │ │ ├── 2_jackson_4.wav │ │ │ ├── 2_jackson_5.wav │ │ │ ├── 2_jackson_6.wav │ │ │ ├── 2_jackson_7.wav │ │ │ ├── 2_jackson_8.wav │ │ │ ├── 2_jackson_9.wav │ │ │ ├── 3_jackson_0.wav │ │ │ ├── 3_jackson_1.wav │ │ │ ├── 3_jackson_2.wav │ │ │ ├── 3_jackson_3.wav │ │ │ ├── 3_jackson_4.wav │ │ │ ├── 3_jackson_5.wav │ │ │ ├── 3_jackson_6.wav │ │ │ ├── 3_jackson_7.wav │ │ │ ├── 3_jackson_8.wav │ │ │ ├── 3_jackson_9.wav │ │ │ ├── 4_jackson_0.wav │ │ │ ├── 4_jackson_1.wav │ │ │ ├── 4_jackson_2.wav │ │ │ ├── 4_jackson_3.wav │ │ │ ├── 4_jackson_4.wav │ │ │ ├── 4_jackson_5.wav │ │ │ ├── 4_jackson_6.wav │ │ │ ├── 4_jackson_7.wav │ │ │ ├── 4_jackson_8.wav │ │ │ ├── 4_jackson_9.wav │ │ │ ├── 5_jackson_0.wav │ │ │ ├── 5_jackson_1.wav │ │ │ ├── 5_jackson_2.wav │ │ │ ├── 5_jackson_3.wav │ │ │ ├── 5_jackson_4.wav │ │ │ ├── 5_jackson_5.wav │ │ │ ├── 5_jackson_6.wav │ │ │ ├── 5_jackson_7.wav │ │ │ ├── 5_jackson_8.wav │ │ │ ├── 5_jackson_9.wav │ │ │ ├── 6_jackson_0.wav │ │ │ ├── 6_jackson_1.wav │ │ │ ├── 6_jackson_2.wav │ │ │ ├── 6_jackson_3.wav │ │ │ ├── 6_jackson_4.wav │ │ │ ├── 6_jackson_5.wav │ │ │ ├── 6_jackson_6.wav │ │ │ ├── 6_jackson_7.wav │ │ │ ├── 6_jackson_8.wav │ │ │ ├── 6_jackson_9.wav │ │ │ ├── 7_jackson_0.wav │ │ │ ├── 7_jackson_1.wav │ │ │ ├── 7_jackson_2.wav │ │ │ ├── 7_jackson_3.wav │ │ │ ├── 7_jackson_4.wav │ │ │ ├── 7_jackson_5.wav │ │ │ ├── 7_jackson_6.wav │ │ │ ├── 7_jackson_7.wav │ │ │ ├── 7_jackson_8.wav │ │ │ ├── 7_jackson_9.wav │ │ │ ├── 8_jackson_0.wav │ │ │ ├── 8_jackson_1.wav │ │ │ ├── 8_jackson_2.wav │ │ │ ├── 8_jackson_3.wav │ │ │ ├── 8_jackson_4.wav │ │ │ ├── 8_jackson_5.wav │ │ │ ├── 8_jackson_6.wav │ │ │ ├── 8_jackson_7.wav │ │ │ ├── 8_jackson_8.wav │ │ │ ├── 8_jackson_9.wav │ │ │ ├── 9_jackson_0.wav │ │ │ ├── 9_jackson_1.wav │ │ │ ├── 9_jackson_2.wav │ │ │ ├── 9_jackson_3.wav │ │ │ ├── 9_jackson_4.wav │ │ │ ├── 9_jackson_5.wav │ │ │ ├── 9_jackson_6.wav │ │ │ ├── 9_jackson_7.wav │ │ │ ├── 9_jackson_8.wav │ │ │ └── 9_jackson_9.wav │ ├── tacotron2 │ │ ├── actuator.py │ │ ├── model.py │ │ └── module.py │ ├── transformer │ │ ├── actuator.py │ │ ├── model.py │ │ └── module.py │ ├── utils │ │ ├── layers.py │ │ ├── load_dataset.py │ │ ├── pre_treat.py │ │ ├── spec.py │ │ └── text_preprocess.py │ └── wavernn │ │ ├── generator.py │ │ ├── preprocess.py │ │ ├── train.py │ │ ├── utils.py │ │ └── wavernn.py └── utils │ ├── __init__.py │ ├── beamsearch.py │ ├── layers.py │ ├── optimizers.py │ ├── text_split.py │ ├── train_history.py │ └── utils.py └── requirements.txt /README.md: -------------------------------------------------------------------------------- 1 | # hlp 2 | 基于深度学习的对话系统、语音识别、机器翻译和语音合成等。 3 | # 目录结构 4 | - hlp: 顶层包目录 5 | - mt: 机器翻译包目录 6 | - stt: 语音识别包目录 7 | - tts: 语音合成包目录 8 | - chat: 对话系统包目录 9 | - utils: 公共功能包目录 10 | 11 | 每个部分的不同方法、模型和实现应该在mt、stt、tts、chat其中一个目录下建立单独的子包目录。
12 | 例如,Tacotron实现语音合成,应在tts下建立tacotron包。 13 | # In Progress 14 | - 基于Seq2Seq的闲聊系统 15 | - 基于DeepSpeech2的语音识别 16 | - 基于Tacotron2的语音合成 17 | - 基于Transformer的闲聊系统 18 | - 基于Transformer的机器翻译 19 | - 基于Transformer的语音识别 20 | - 基于Transformer的语音合成 21 | - 基于Listen-Attend-Spell的语音识别 22 | - 基于检索的多轮闲聊系统 23 | - RNN-T流式语音识别 24 | - WaveRNN声码器 25 | -------------------------------------------------------------------------------- /hlp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/__init__.py -------------------------------------------------------------------------------- /hlp/chat/README.md: -------------------------------------------------------------------------------- 1 | 2 | # 运行说明 3 | + 运行入口: 4 | + seq2seq_chatter.py为seq2seq的执行入口文件:指令需要附带运行参数 5 | + transformer_chatter.py为transformer的执行入口文件:指令需要附带运行参数 6 | + smn_chatter.py为smn的执行入口文件:指令需要附带运行参数 7 | + 执行的指令格式: 8 | + seq2seq:python seq2seq_chatter.py --act [执行模式] 9 | + transformer:python transformer_chatter.py --act [执行模式] 10 | + smn:python smn_chatter.py --act [执行模式/pre_treat/train/evaluate/chat] 11 | + 执行类别:pre_treat(默认)/train/chat 12 | + 执行指令示例: 13 | + python seq2seq_chatter.py 14 | + python seq2seq_chatter.py --act pre_treat 15 | + python transformer_chatter.py 16 | + python transformer_chatter.py --act pre_treat 17 | + python smn_chatter.py 18 | + python smn_chatter.py --act pre_treat 19 | + pre_treat模式为文本预处理模式,如果在没有分词结果集的情况下,需要先运行pre_treat模式 20 | + train模式为训练模式 21 | + evaluate模式为指标评估模式 22 | + chat模式为对话模式。chat模式下运行时,输入exit即退出对话。 23 | 24 | + 正常执行顺序为pre_treat->train->evaluate->chat 25 | 26 | # SMN模型运行说明 27 | SMN检索式对话系统使用前需要准备solr环境,solr部署系统环境推荐Linux,工具推荐使用容器部署(推荐Docker),并准备: 28 | + Solr(8.6.3) 29 | + pysolr(3.9.0) 30 | ## Solr环境 31 | 需要保证solr在线上运行稳定,以及方便后续维护,请使用DockerFile进行部署,DockerFile获取地址:[docker-solr](https://github.com/docker-solr/docker-solr) 32 | 33 | 仅测试模型使用,可使用如下最简构建指令: 34 | ``` 35 | docker pull solr:8.6.3 36 | # 然后启动solr 37 | docker run -itd --name solr -p 8983:8983 solr:8.6.3 38 | # 然后创建core核心选择器,这里取名smn(可选) 39 | docker exec -it --user=solr solr bin/solr create_core -c smn 40 | ``` 41 | 42 | 关于solr中分词工具有IK Analyzer、Smartcn、拼音分词器等等,需要下载对应jar,然后在Solr核心配置文件managed-schema中添加配置。 43 | 44 | **特别说明**:如果使用TF-IDF,还需要在managed-schema中开启相似度配置。 45 | ## Python中使用说明 46 | 线上部署好Solr之后,在Python中使用pysolr进行连接使用: 47 | ``` 48 | pip install pysolr 49 | ``` 50 | 51 | 添加索引数据(一般需要先安全检查)方式如下。将回复数据添加索引,responses是一个json,形式如:[{},{},{},...],里面每个对象构建按照你回复的需求即可: 52 | ``` 53 | solr = pysolr.Solr(url=solr_server, always_commit=True, timeout=10) 54 | # 安全检查 55 | solr.ping() 56 | solr.add(docs=responses) 57 | ``` 58 | 59 | 查询方式如下,以TF-IDF查询所有语句query语句方式如下: 60 | ``` 61 | {!func}sum(product(idf(utterance,key1),tf(utterance,key1),product(idf(utterance,key2),tf(utterance,key2),...) 62 | ``` 63 | 64 | 使用前需要先将数据添加至Solr,在本SMN模型中使用,先执行pre_treat模式即可。 65 | -------------------------------------------------------------------------------- /hlp/chat/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/chat/__init__.py -------------------------------------------------------------------------------- /hlp/chat/common/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | 4 | 5 | def log_operator(level: str, log_file: str = None, 6 | log_format: str = "[%(levelname)s] - [%(asctime)s] - [file: %(filename)s] - " 7 | "[function: %(funcName)s] - [%(message)s]") -> logging.Logger: 8 | """ 9 | 日志操作方法,日志级别有'CRITICAL','FATAL','ERROR','WARN','WARNING','INFO','DEBUG','NOTSET' 10 | CRITICAL = 50 11 | FATAL = CRITICAL 12 | ERROR = 40 13 | WARNING = 30 14 | WARN = WARNING 15 | INFO = 20 16 | DEBUG = 10 17 | NOTSET = 0 18 | :param log_file: 日志路径 19 | :param message: 日志信息 20 | :param level: 日志级别 21 | :param log_format: 日志信息格式 22 | :return: 日志记录器 23 | """ 24 | if log_file is None: 25 | log_file = os.path.abspath(__file__)[:os.path.abspath(__file__).rfind("\\hlp\\")] + '\\hlp\\chat\\data\\runtime.log' 26 | 27 | logger = logging.getLogger() 28 | logger.setLevel(level) 29 | file_handler = logging.FileHandler(log_file, encoding='utf-8') 30 | file_handler.setLevel(level=level) 31 | formatter = logging.Formatter(log_format) 32 | file_handler.setFormatter(formatter) 33 | logger.addHandler(file_handler) 34 | 35 | return logger 36 | -------------------------------------------------------------------------------- /hlp/chat/config/seq2seq.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_layers": 2, 3 | "encoder_layers": 2, 4 | "decoder_layers": 2, 5 | "units": 1024, 6 | "vocab_size": 1000, 7 | "embedding_dim": 256, 8 | "max_train_data_size": 200, 9 | "max_valid_data_size": 100, 10 | "max_length": 40, 11 | "type": "pre_treat", 12 | "dict_file": "\\data\\seq2seq_dict.json", 13 | "checkpoint": "\\checkpoints\\seq2seq", 14 | "resource_data": "\\data\\LCCC.json", 15 | "tokenized_data": "\\data\\lccc_tokenized.txt", 16 | "qa_tokenized_data": "\\data\\tokenized.txt", 17 | "history_image_dir": "\\data\\history\\seq2seq\\", 18 | "valid_data_file": "", 19 | "valid_freq": 5, 20 | "checkpoint_save_freq": 2, 21 | "checkpoint_save_size": 1, 22 | "batch_size": 32, 23 | "buffer_size": 20000, 24 | "beam_size": 3, 25 | "valid_data_split": 0.2, 26 | "epochs": 5, 27 | "start_sign": "start", 28 | "end_sign": "end" 29 | } -------------------------------------------------------------------------------- /hlp/chat/config/smn.json: -------------------------------------------------------------------------------- 1 | { 2 | "max_sentence": 50, 3 | "max_utterance": 10, 4 | "units": 200, 5 | "vocab_size": 2000, 6 | "embedding_dim": 200, 7 | "max_train_data_size": 36, 8 | "max_valid_data_size": 100, 9 | "max_database_size": 0, 10 | "learning_rate": 0.001, 11 | "type": "pre_treat", 12 | "dict_file": "\\data\\smn_dict_fn.json", 13 | "checkpoint": "\\checkpoints\\smn", 14 | "tokenized_train": "\\data\\ubuntu_train.txt", 15 | "tokenized_valid": "\\data\\ubuntu_valid.txt", 16 | "solr_server": "http://49.235.33.100:8983/solr/smn/", 17 | "candidate_database": "\\data\\candidate.json", 18 | "batch_size": 32, 19 | "buffer_size": 20000, 20 | "epochs": 5 21 | } -------------------------------------------------------------------------------- /hlp/chat/config/transformer.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_layers": 2, 3 | "d_model": 256, 4 | "num_heads": 8, 5 | "units": 512, 6 | "dropout": 0.1, 7 | "vocab_size": 1500, 8 | "embedding_dim": 256, 9 | "max_train_data_size": 200, 10 | "max_valid_data_size": 100, 11 | "max_length": 40, 12 | "type": "pre_treat", 13 | "dict_file": "\\data\\transformer_dict.json", 14 | "checkpoint": "\\checkpoints\\transformer", 15 | "resource_data": "\\data\\LCCC.json", 16 | "tokenized_data": "\\data\\lccc_tokenized.txt", 17 | "qa_tokenized_data": "\\data\\tokenized.txt", 18 | "history_image_dir": "\\data\\history\\transformer\\", 19 | "valid_data_file": "", 20 | "valid_freq": 5, 21 | "checkpoint_save_freq": 2, 22 | "checkpoint_save_size": 1, 23 | "batch_size": 32, 24 | "buffer_size": 20000, 25 | "beam_size": 3, 26 | "valid_data_split": 0.2, 27 | "epochs": 5, 28 | "start_sign": "start", 29 | "end_sign": "end" 30 | } -------------------------------------------------------------------------------- /hlp/chat/model/seq2seq.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import hlp.utils.layers as layers 3 | import hlp.chat.common.utils as utils 4 | 5 | 6 | def rnn_layer(units: int, input_feature_dim: int, cell_type: str = 'lstm', 7 | if_bidirectional: bool = True) -> tf.keras.Model: 8 | """ 9 | RNNCell层,其中可定义cell类型,是否双向 10 | :param units: cell单元数 11 | :param input_feature_dim: 输入的特征维大小 12 | :param cell_type: cell类型,lstm/gru, 默认lstm 13 | :param if_bidirectional: 是否双向 14 | :return: Multi-layer RNN 15 | """ 16 | inputs = tf.keras.Input(shape=(None, input_feature_dim)) 17 | if cell_type == 'lstm': 18 | rnn = tf.keras.layers.LSTM(units=units, return_sequences=True, return_state=True, 19 | recurrent_initializer='glorot_uniform') 20 | elif cell_type == 'gru': 21 | rnn = tf.keras.layers.GRU(units=units, return_sequences=True, return_state=True, 22 | recurrent_initializer='glorot_uniform') 23 | else: 24 | print('cell执行了类型执行出错,定位细节参见log') 25 | utils.log_operator(level=10).info("cell执行了类型执行出错") 26 | 27 | if if_bidirectional: 28 | rnn = tf.keras.layers.Bidirectional(rnn) 29 | 30 | rnn_outputs = rnn(inputs) 31 | outputs = rnn_outputs[0] 32 | states = outputs[:, -1, :] 33 | 34 | return tf.keras.Model(inputs=inputs, outputs=[outputs, states]) 35 | 36 | 37 | def encoder(vocab_size: int, embedding_dim: int, enc_units: int, num_layers: int, 38 | cell_type: str, if_bidirectional: bool = True) -> tf.keras.Model: 39 | """ 40 | seq2seq的encoder,主要就是使用Embedding和GRU对输入进行编码, 41 | 这里需要注意传入一个初始化的隐藏层,随机也可以,但是我这里就 42 | 直接写了一个隐藏层方法。 43 | :param vocab_size: 词汇量大小 44 | :param embedding_dim: 词嵌入维度 45 | :param enc_units: 单元大小 46 | :param num_layers: encoder中内部RNN层数 47 | :param cell_type: cell类型,lstm/gru, 默认lstm 48 | :param if_bidirectional: 是否双向 49 | :return: Seq2Seq的Encoder 50 | """ 51 | inputs = tf.keras.Input(shape=(None,)) 52 | outputs = tf.keras.layers.Embedding(vocab_size, embedding_dim)(inputs) 53 | 54 | for i in range(num_layers): 55 | outputs, states = rnn_layer(units=enc_units, input_feature_dim=outputs.shape[-1], 56 | cell_type=cell_type, if_bidirectional=if_bidirectional)(outputs) 57 | 58 | return tf.keras.Model(inputs=inputs, outputs=[outputs, states]) 59 | 60 | 61 | def decoder(vocab_size: int, embedding_dim: int, dec_units: int, enc_units: int, 62 | num_layers: int, cell_type: str) -> tf.keras.Model: 63 | """ 64 | seq2seq的decoder,将初始化的x、隐藏层和encoder的输出作为 65 | 输入,encoder的输入用来和隐藏层进行attention,得到的上下文 66 | 向量和x进行整合然后丢到gru里去,最后Dense输出一下 67 | :param vocab_size: 词汇量大小 68 | :param embedding_dim: 词嵌入维度 69 | :param dec_units: decoder单元大小 70 | :param enc_units: encoder单元大小 71 | :param num_layers: encoder中内部RNN层数 72 | :param cell_type: cell类型,lstm/gru, 默认lstm 73 | :return: Seq2Seq的Decoder 74 | """ 75 | inputs = tf.keras.Input(shape=(None,)) 76 | enc_output = tf.keras.Input(shape=(None, enc_units)) 77 | dec_hidden = tf.keras.Input(shape=(enc_units,)) 78 | 79 | embeddings = tf.keras.layers.Embedding(vocab_size, embedding_dim)(inputs) 80 | context_vector, attention_weight = layers.BahdanauAttention(dec_units)(dec_hidden, enc_output) 81 | outputs = tf.concat([tf.expand_dims(context_vector, 1), embeddings], axis=-1) 82 | 83 | for i in range(num_layers): 84 | # Decoder中不允许使用双向 85 | outputs, states = rnn_layer(units=dec_units, input_feature_dim=outputs.shape[-1], 86 | cell_type=cell_type, if_bidirectional=False)(outputs) 87 | 88 | outputs = tf.reshape(outputs, (-1, outputs.shape[-1])) 89 | outputs = tf.keras.layers.Dense(vocab_size)(outputs) 90 | 91 | return tf.keras.Model(inputs=[inputs, enc_output, dec_hidden], outputs=[outputs, states, attention_weight]) 92 | -------------------------------------------------------------------------------- /hlp/chat/model/smn.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def accumulate(units: int, embedding_dim: int, 5 | max_utterance: int, max_sentence: int) -> tf.keras.Model: 6 | """ 7 | SMN的语义抽取层,主要是对匹配对的两个相似度矩阵进行计 8 | 算,并返回最终的最后一层GRU的状态,用于计算分数 9 | :param units: GRU单元数 10 | :param embedding_dim: embedding维度 11 | :param max_utterance: 每轮最大语句数 12 | :param max_sentence: 句子最大长度 13 | :return: GRU的状态 14 | """ 15 | utterance_inputs = tf.keras.Input(shape=(max_utterance, max_sentence, embedding_dim)) 16 | response_inputs = tf.keras.Input(shape=(max_sentence, embedding_dim)) 17 | a_matrix = tf.keras.initializers.GlorotNormal()(shape=(units, units), dtype=tf.float32) 18 | 19 | # 这里对response进行GRU的Word级关系建模,这里用正交矩阵初始化内核权重矩阵,用于输入的线性变换。 20 | response_gru = tf.keras.layers.GRU(units=units, return_sequences=True, 21 | kernel_initializer='orthogonal')(response_inputs) 22 | conv2d_layer = tf.keras.layers.Conv2D(filters=8, kernel_size=(3, 3), padding='valid', 23 | kernel_initializer='he_normal', activation='relu') 24 | max_polling2d_layer = tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(3, 3), padding='valid') 25 | dense_layer = tf.keras.layers.Dense(50, activation='tanh', kernel_initializer='glorot_normal') 26 | 27 | # 这里需要做一些前提工作,因为我们要针对每个batch中的每个utterance进行运算,所 28 | # 以我们需要将batch中的utterance序列进行拆分,使得batch中的序列顺序一一匹配 29 | utterance_embeddings = tf.unstack(utterance_inputs, num=max_utterance, axis=1) 30 | matching_vectors = [] 31 | for utterance_input in utterance_embeddings: 32 | # 求解第一个相似度矩阵,公式见论文 33 | matrix1 = tf.matmul(utterance_input, response_inputs, transpose_b=True) 34 | utterance_gru = tf.keras.layers.GRU(units, return_sequences=True, 35 | kernel_initializer='orthogonal')(utterance_input) 36 | matrix2 = tf.einsum("aij,jk->aik", utterance_gru, a_matrix) 37 | # matrix2 = tf.matmul(utterance_gru, a_matrix) 38 | # 求解第二个相似度矩阵 39 | matrix2 = tf.matmul(matrix2, response_gru, transpose_b=True) 40 | matrix = tf.stack([matrix1, matrix2], axis=3) 41 | 42 | conv_outputs = conv2d_layer(matrix) 43 | pooling_outputs = max_polling2d_layer(conv_outputs) 44 | flatten_outputs = tf.keras.layers.Flatten()(pooling_outputs) 45 | 46 | matching_vector = dense_layer(flatten_outputs) 47 | matching_vectors.append(matching_vector) 48 | 49 | vector = tf.stack(matching_vectors, axis=1) 50 | outputs = tf.keras.layers.GRU(units, kernel_initializer='orthogonal')(vector) 51 | 52 | return tf.keras.Model(inputs=[utterance_inputs, response_inputs], outputs=outputs) 53 | 54 | 55 | def smn(units: int, vocab_size: int, embedding_dim: int, 56 | max_utterance: int, max_sentence: int) -> tf.keras.Model: 57 | """ 58 | SMN的模型,在这里将输入进行accumulate之后,得 59 | 到匹配对的向量,然后通过这些向量计算最终的分类概率 60 | :param units: GRU单元数 61 | :param vocab_size: embedding词汇量 62 | :param embedding_dim: embedding维度 63 | :param max_utterance: 每轮最大语句数 64 | :param max_sentence: 句子最大长度 65 | :return: 匹配对打分 66 | """ 67 | utterances = tf.keras.Input(shape=(max_utterance, max_sentence)) 68 | responses = tf.keras.Input(shape=(max_sentence,)) 69 | 70 | embeddings = tf.keras.layers.Embedding(vocab_size, embedding_dim, name="encoder") 71 | utterances_embeddings = embeddings(utterances) 72 | responses_embeddings = embeddings(responses) 73 | 74 | accumulate_outputs = accumulate(units=units, embedding_dim=embedding_dim, max_utterance=max_utterance, 75 | max_sentence=max_sentence)( 76 | inputs=[utterances_embeddings, responses_embeddings]) 77 | 78 | outputs = tf.keras.layers.Dense(2, kernel_initializer='glorot_normal')(accumulate_outputs) 79 | 80 | return tf.keras.Model(inputs=[utterances, responses], outputs=outputs) 81 | -------------------------------------------------------------------------------- /hlp/mt/README.md: -------------------------------------------------------------------------------- 1 | # 使用说明 2 | 项目自带较小的训练和验证数据集,可以无需配置运行所有功能。 3 | - 语料预处理 4 | - (可选步骤)在mt/config/config.json中配置语料路径和切分方法 5 | - 运行mt/preprocess.py 6 | 7 | - 训练模型 8 | - (可选步骤)在mt/config/config.json中配置语料路径、切分方法、模型参数和训练超参数等 9 | - 运行mt/train.py 10 | 11 | - 评价模型 12 | - (可选步骤)在mt/config/config.json中配置验证语料路径 13 | - 运行mt/evaluate.py 14 | 15 | - 交互式翻译 16 | - 运行mt/translate.py -------------------------------------------------------------------------------- /hlp/mt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/mt/__init__.py -------------------------------------------------------------------------------- /hlp/mt/common/bleu.py: -------------------------------------------------------------------------------- 1 | import re 2 | import nltk 3 | 4 | 5 | def bleu_nltk(candidate_sentence, reference_sentences, language): 6 | """ 7 | :param candidate_sentence:机翻句子 8 | :param reference_sentences:参考句子列表 9 | :param language:句子的语言 10 | 11 | """ 12 | # 根据所选择的语言对句子进行预处理 13 | if language == "zh": 14 | candidate_sentence = [w for w in candidate_sentence] 15 | reference_sentences = [] 16 | for sentence in reference_sentences: 17 | reference_sentences.append([w for w in sentence]) 18 | elif language == "en": 19 | candidate_sentence = re.sub(r'([?.!,])', r' \1', candidate_sentence) # 在?.!,前添加空格 20 | candidate_sentence = re.sub(r'[" "]+', " ", candidate_sentence) # 合并连续的空格 21 | candidate_sentence = candidate_sentence.split(' ') 22 | reference_sentences = [] 23 | for sentence in reference_sentences: 24 | sentence = re.sub(r'([?.!,])', r' \1', sentence) # 在?.!,前添加空格 25 | sentence = re.sub(r'[" "]+', " ", sentence) # 合并连续的空格 26 | sentence = sentence.split(' ') 27 | reference_sentences.append(sentence) 28 | 29 | smooth_function = nltk.translate.bleu_score.SmoothingFunction() 30 | score = nltk.translate.bleu_score.sentence_bleu(reference_sentences, 31 | candidate_sentence, 32 | smoothing_function=smooth_function.method1) 33 | return score * 100 34 | 35 | 36 | def main(): 37 | # 测试语句 38 | candidate_sentence_zh = '今天的天气真好啊。' 39 | reference_sentence_zh = '今天可真是个好天气啊。' 40 | score = bleu_nltk(candidate_sentence_zh, [reference_sentence_zh], language='zh') 41 | print('NLTK_BLEU:%.2f' % score) 42 | 43 | # 测试英语语句 44 | candidate_sentence_en = "It's a good day." 45 | reference_sentence_en = "It's really a good sunny day." 46 | score = bleu_nltk(candidate_sentence_en, [reference_sentence_en], language='en') 47 | print('NLTK_BLEU:%.2f' % score) 48 | 49 | 50 | if __name__ == '__main__': 51 | main() 52 | -------------------------------------------------------------------------------- /hlp/mt/common/load_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from sklearn.model_selection import train_test_split 3 | import tensorflow as tf 4 | from hlp.mt.config import get_config as _config 5 | 6 | 7 | def load_single_sentences(path, num_sentences, column): 8 | """加载指定列文本,列计数从1开始""" 9 | sentences = [] 10 | with open(path, encoding='UTF-8') as file: 11 | for i in range(num_sentences): 12 | line = file.readline() 13 | sentences.append(line.split('\t')[column - 1]) 14 | return sentences 15 | 16 | 17 | def load_sentences(path, num_sentences, reverse=_config.reverse): 18 | """加载句子对, 19 | @param path:加载文本的路径 20 | @param num_sentences:加载句子数量 21 | @param reverse:是否交换列的顺序 22 | @return:相应列句子的列表 23 | """ 24 | source_sentences = [] 25 | target_sentences = [] 26 | with open(path, encoding='UTF-8') as file: 27 | for i in range(num_sentences): 28 | line = file.readline() 29 | source_sentences.append(line.split('\t')[0]) 30 | target_sentences.append(line.split('\t')[1]) 31 | if reverse == 'True': 32 | return target_sentences, source_sentences 33 | else: 34 | return source_sentences, target_sentences 35 | 36 | 37 | def _generate_batch_from_ram(input_path, target_path, train_size=_config.train_size): 38 | """从内存中产生训练和验证批数据 39 | """ 40 | input_tensor = numpy.loadtxt(input_path, dtype='int32') 41 | target_tensor = numpy.loadtxt(target_path, dtype='int32') 42 | x_train, x_test, y_train, y_test = train_test_split(input_tensor, target_tensor, train_size=train_size) 43 | 44 | train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) 45 | train_dataset = train_dataset.shuffle(_config.BUFFER_SIZE).batch(_config.BATCH_SIZE, drop_remainder=True) 46 | 47 | val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)) 48 | val_dataset = val_dataset.shuffle(_config.BUFFER_SIZE).batch(_config.BATCH_SIZE, drop_remainder=True) 49 | 50 | return train_dataset, val_dataset 51 | 52 | 53 | def _generate_batch_from_file(input_path, target_path, num_steps, start_step, batch_size): 54 | """ 55 | 从编码文件中分batch读入数据集 56 | 自动从配置文件设置确定input_path、target_path 57 | num_steps:整个训练集的step数,即数据集中包含多少个batch 58 | start_step:从哪个step开始读batch 59 | batch_size:batch大小 60 | 61 | return:input_tensor shape=(batch_size, sentence_length), dtype=tf.int32 62 | , target_tensor shape=(batch_size, sentence_length), dtype=tf.int32 63 | """ 64 | 65 | step = int(start_step) 66 | while step < num_steps: 67 | # TODO: 这个效率有问题 68 | input_tensor = numpy.loadtxt(input_path, dtype='int32', skiprows=0 + step * batch_size, max_rows=batch_size) 69 | target_tensor = numpy.loadtxt(target_path, dtype='int32', skiprows=0 + step * batch_size, max_rows=batch_size) 70 | step += 1 71 | yield tf.cast(input_tensor, tf.int32), tf.cast(target_tensor, tf.int32) 72 | 73 | 74 | def get_dataset(input_path, target_path, cache, train_size, steps=None): 75 | """从指定的路径中获取数据集 76 | 77 | @param input_path: 输入已编码文本路径 78 | @param target_path: 目标已编码文本路径 79 | @param cache: 是否一次性加载入内存,即是采用generator 80 | @param train_size: 训练集比例 81 | @param steps: 训练集文本共含多少个batch,cache为 False时可为None 82 | """ 83 | if cache: 84 | train_dataset, val_dataset = _generate_batch_from_ram(input_path, target_path, train_size) 85 | else: 86 | train_dataset = _generate_batch_from_file(input_path, target_path, steps * train_size, 0, _config.BATCH_SIZE) 87 | val_dataset = _generate_batch_from_file(input_path, target_path, steps, steps * train_size, _config.BATCH_SIZE) 88 | return train_dataset, val_dataset 89 | 90 | 91 | -------------------------------------------------------------------------------- /hlp/mt/common/misc.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | 5 | def check_and_create(checkpoint_dir): 6 | chkpt_path = Path(checkpoint_dir) 7 | if not chkpt_path.exists(): 8 | os.makedirs(checkpoint_dir, exist_ok=True) 9 | return False 10 | else: 11 | return True -------------------------------------------------------------------------------- /hlp/mt/common/text_split.py: -------------------------------------------------------------------------------- 1 | from hlp.mt.config import get_config as _config 2 | from hlp.utils import text_split 3 | 4 | 5 | def _preprocess_sentence_en_bpe(sentence, start_word=_config.start_word, end_word=_config.end_word): 6 | sentence = start_word + ' ' + sentence + ' ' + end_word 7 | return sentence 8 | 9 | 10 | def preprocess_sentences_en(sentences, mode=_config.en_tokenize_type, 11 | start_word=_config.start_word, end_word=_config.end_word): 12 | """ 13 | 对英文句子列表进行指定mode的预处理 14 | 返回处理好的句子列表,句子为添加开始介绍符的空格分隔的字符串 15 | """ 16 | if mode == 'BPE': 17 | sentences = [_preprocess_sentence_en_bpe(s, start_word, end_word) for s in sentences] 18 | return sentences 19 | elif mode == 'WORD': 20 | sentences = [text_split.split_en_word(s) for s in sentences] 21 | sentences = [start_word + ' ' + ' '.join(s) + ' ' + end_word for s in sentences] 22 | return sentences 23 | else: 24 | return '' 25 | 26 | 27 | def preprocess_sentences_zh(sentences, mode=_config.zh_tokenize_type, 28 | start_word=_config.start_word, end_word=_config.end_word): 29 | """ 30 | 对中文句子列表进行指定mode的预处理 31 | 返回处理好的句子列表,句子为添加开始介绍符的空格分隔的字符串 32 | """ 33 | if mode == 'CHAR': 34 | sentences = [text_split.split_zh_char(s) for s in sentences] 35 | sentences = [start_word + ' ' + ' '.join(s) + ' ' + end_word for s in sentences] 36 | return sentences 37 | elif mode == 'WORD': 38 | sentences = [text_split.split_zh_word(s) for s in sentences] 39 | sentences = [start_word + ' ' + ' '.join(s) + ' ' + end_word for s in sentences] 40 | return sentences 41 | 42 | 43 | def preprocess_sentences(sentences, language, mode): 44 | """ 45 | 46 | :param sentences: 原始句子字符串列表 47 | :param language: 48 | :param mode: 49 | :return: 添加开始结束符的空格分隔的句子字符串构成的列表 50 | """ 51 | if language == "en": 52 | return preprocess_sentences_en(sentences, mode) 53 | elif language == "zh": 54 | return preprocess_sentences_zh(sentences, mode) 55 | -------------------------------------------------------------------------------- /hlp/mt/config/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "source_lang": "en", 3 | "target_lang": "zh", 4 | "reverse": "False", 5 | "en_tokenize_type": "WORD", 6 | "zh_tokenize_type": "CHAR", 7 | "train_size": 0.9, 8 | "BEAM_SIZE": 1, 9 | "validation_data": "False", 10 | "checkpoint_ensembling": "True", 11 | "num_validate_sentences": 200, 12 | "validation_freq": 1, 13 | "max_checkpoints_num": 5, 14 | "checkpoints_save_freq": 5, 15 | "checkpoint_name": "4layers512units", 16 | "num_eval": 5, 17 | "BUFFER_SIZE": 20000, 18 | "EPOCHS": 4, 19 | "num_sentences": 1000, 20 | "BATCH_SIZE": 32, 21 | "num_layers": 4, 22 | "d_model": 256, 23 | "dff": 512, 24 | "num_heads": 8, 25 | "dropout_rate" : 0.1, 26 | "max_target_length": 200, 27 | "start_word": "", 28 | "end_word": "", 29 | "target_vocab_size": 8192, 30 | "result_save_dir": "./data/result/", 31 | "path_to_train_file": "./data/anki/anki-cmn-eng.txt", 32 | "path_to_eval_file": "./data/anki/en-zh_eval.txt", 33 | "path_to_val_file": "./data/anki/en-zh_val.txt", 34 | "encoded_sequences_path_prefix": "./data/encoded_corpus/encoded_sequences_", 35 | "tokenizer_path_prefix": "./data/tokenizer/tokenizer_", 36 | "checkpoint_path_dir": "./checkpoints", 37 | 38 | "language_model": { 39 | "path_to_train_file_lm": "../data/anki/anki-cmn-eng.txt", 40 | "language": "zh", 41 | "tokenize_type": "CHAR", 42 | "EPOCHS": 4, 43 | "num_sentences": 1000, 44 | "BATCH_SIZE": 32, 45 | "train_size": 0.8, 46 | "d_rnn": 200, 47 | "d_embedding": 256, 48 | "validation_freq": 1, 49 | "max_checkpoints_num": 5, 50 | "checkpoints_save_freq": 5 51 | } 52 | } -------------------------------------------------------------------------------- /hlp/mt/config/get_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | json_path = os.path.join(os.path.dirname(__file__), 'config.json') # 配置文件路径 5 | 6 | 7 | def get_config_json(config_file='main.json'): 8 | with open(config_file, 'r') as file: 9 | return json.load(file) 10 | 11 | 12 | conf = get_config_json(json_path) 13 | 14 | # 对各变量赋值 15 | num_validate_sentences = conf["num_validate_sentences"] # 用来验证的句子数量 16 | path_to_val_file = conf["path_to_val_file"] # 验证集文本路径 17 | validation_data = conf["validation_data"] # 是否从给定文本数据集用来验证 18 | checkpoint_name = conf['checkpoint_name'] # 检查点名字 19 | validation_freq = conf['validation_freq'] # 验证频率,即每训练几个epoch进行验证 20 | checkpoints_save_freq = conf['checkpoints_save_freq'] # 检查点保存频率 21 | max_checkpoints_num = conf['max_checkpoints_num'] # 保存最大检查点数量 22 | source_lang = conf['source_lang'] # 源语言 23 | target_lang = conf['target_lang'] # 目标语言 24 | reverse = conf['reverse'] # 是否对语料语言对翻转 25 | en_tokenize_type = conf['en_tokenize_type'] # 英文分词类型,可选:BPE/TOKENIZE 26 | zh_tokenize_type = conf['zh_tokenize_type'] # 中文分词类型,可选:TOKENIZE 27 | tokenizer_path_prefix = conf["tokenizer_path_prefix"] # 字典保存路径前缀 28 | encoded_sequences_path_prefix = conf['encoded_sequences_path_prefix'] # 编码句子保存路径前缀 29 | result_save_dir = conf['result_save_dir'] # 训练过程指标变化图保存路径 30 | path_to_train_file = conf['path_to_train_file'] # 用于训练的文本路径 31 | path_to_eval_file = conf['path_to_eval_file'] # 用于评估计算指标的文本路径 32 | num_eval = conf['num_eval'] # 用于计算指标的句子对数量 33 | checkpoint_path = os.path.join(conf["checkpoint_path_dir"], conf['source_lang']+'_'+conf['target_lang']) # 检查点路径 34 | BUFFER_SIZE = conf['BUFFER_SIZE'] 35 | BATCH_SIZE = conf['BATCH_SIZE'] 36 | train_size = conf['train_size'] # 训练数据中test数据占比 37 | num_sentences = conf["num_sentences"] # 用于训练的句子对数量 38 | num_layers = conf["num_layers"] # encoder 与 decoder 中包含的 encoder 与 decoder 层数 39 | d_model = conf["d_model"] # embedding 的维度 40 | dff = conf["dff"] # 点式前馈网络(Point wise feed forward network)第一层dense的维度 41 | num_heads = conf["num_heads"] # 多头注意力的头数 42 | dropout_rate = conf["dropout_rate"] 43 | EPOCHS = conf["EPOCHS"] # 训练轮次 44 | max_target_length = conf['max_target_length'] # 最大生成目标句子长度 45 | target_vocab_size = conf["target_vocab_size"] # 英语分词target_vocab_size 46 | start_word = conf["start_word"] # 句子开始标志 47 | end_word = conf["end_word"] # 句子结束标志 48 | BEAM_SIZE = conf["BEAM_SIZE"] # BEAM_SIZE 49 | checkpoint_ensembling = conf["checkpoint_ensembling"] # 是否采用checkpoint_ensembling 50 | 51 | lm_path_to_train_file = conf["language_model"]["path_to_train_file_lm"] # 语言模型训练文本路径 52 | lm_language = conf["language_model"]["language"] 53 | lm_tokenize_type = conf["language_model"]["tokenize_type"] 54 | lm_EPOCHS = conf["language_model"]["EPOCHS"] 55 | lm_num_sentences = conf["language_model"]["num_sentences"] 56 | lm_BATCH_SIZE = conf["language_model"]["BATCH_SIZE"] 57 | lm_train_size = conf["language_model"]["train_size"] 58 | lm_checkpoint_path = os.path.join(conf["checkpoint_path_dir"], 'lm') 59 | lm_d_embedding = conf["language_model"]["d_embedding"] 60 | lm_d_rnn = conf["language_model"]["d_rnn"] 61 | lm_max_checkpoints_num = conf["language_model"]["max_checkpoints_num"] 62 | lm_checkpoints_save_freq = conf["language_model"]["checkpoints_save_freq"] 63 | lm_validation_freq = conf["language_model"]["validation_freq"] 64 | 65 | -------------------------------------------------------------------------------- /hlp/mt/evaluate.py: -------------------------------------------------------------------------------- 1 | from hlp.mt.common import bleu as _bleu 2 | from hlp.mt.common import load_dataset 3 | from hlp.mt.config import get_config as _config 4 | from hlp.mt.model import nmt_model 5 | from hlp.mt import translator 6 | from hlp.mt.common.misc import check_and_create 7 | 8 | 9 | # BLEU指标计算 10 | def _calc_bleu(path, model, tokenizer_source, tokenizer_target): 11 | # 读入文本 12 | source_sentences, target_sentences = load_dataset.load_sentences(path, _config.num_eval) 13 | 14 | print('开始计算BLEU指标...') 15 | bleu_sum = 0 16 | for i in range(_config.num_eval): 17 | candidate_sentence = translator.translate(source_sentences[i], model, tokenizer_source, 18 | tokenizer_target, beam_size=_config.BEAM_SIZE)[0] 19 | print('-' * 20) 20 | print('第%d/%d个句子:' % (i + 1, _config.num_eval)) 21 | print('源句子:' + source_sentences[i].strip()) 22 | print('机翻句子:' + candidate_sentence) 23 | print('参考句子:' + target_sentences[i]) 24 | bleu_i = _bleu.bleu_nltk(candidate_sentence, [target_sentences[i]], language=_config.target_lang) 25 | print('此句子BLEU指标:%.2f' % bleu_i) 26 | bleu_sum += bleu_i 27 | 28 | bleu = bleu_sum / _config.num_eval 29 | print('-' * 20) 30 | print('平均BLEU指标为:%.2f' % bleu) 31 | 32 | 33 | def main(): 34 | if check_and_create(_config.checkpoint_path): # 检测是否有检查点 35 | # 读取保存的需要的配置 36 | transformer, _, tokenizer_source, tokenizer_target = nmt_model.load_model() 37 | _calc_bleu(_config.path_to_eval_file, transformer, tokenizer_source, tokenizer_target) 38 | else: 39 | print('没有发现训练好的模型,请先训练模型.') 40 | 41 | 42 | if __name__ == '__main__': 43 | main() 44 | -------------------------------------------------------------------------------- /hlp/mt/lm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/mt/lm/__init__.py -------------------------------------------------------------------------------- /hlp/mt/lm/language_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from pathlib import Path 4 | import tensorflow as tf 5 | 6 | from hlp.mt.config import get_config as _config 7 | 8 | 9 | class LanguageModel(tf.keras.Model): 10 | """ 11 | 语言模型,将input输入 12 | """ 13 | 14 | def __init__(self, vocab_size, d_embedding, batch_size, d_rnn): 15 | super(LanguageModel, self).__init__() 16 | # 初始参数 17 | self.batch_size = batch_size 18 | self.d_rnn = d_rnn 19 | self.d_embedding = d_embedding 20 | self.embedding = tf.keras.layers.Embedding(vocab_size+1, d_embedding) 21 | self.state0 = [tf.zeros([batch_size, d_rnn]), tf.zeros([batch_size, d_rnn])] 22 | self.state1 = [tf.zeros([batch_size, d_rnn]), tf.zeros([batch_size, d_rnn])] 23 | 24 | self.cell0 = tf.keras.layers.LSTMCell(d_rnn) 25 | self.cell1 = tf.keras.layers.LSTMCell(d_rnn) 26 | 27 | self.output_layer = tf.keras.layers.Dense(vocab_size) 28 | 29 | def call(self, sequences): 30 | """ 31 | 传入已编码的句子,shape ---> (batch_size, seq_len) 32 | 返回预测序列 33 | """ 34 | output = [] 35 | sequences = self.embedding(sequences) # shape ---> (batch_size, seq_len, vocab_size) 36 | sequences *= tf.math.sqrt(tf.cast(self.d_embedding, tf.float32)) 37 | # output 为输出的字符列表,每个列表元素shape --> (batch_size, vocab_size) 38 | for sequences_t in tf.unstack(sequences, axis=1): # sequences_t.shape --> (batch_size, vocab_size) 39 | out0, self.state0 = self.cell0(sequences_t, self.state0) # out0.shape --> (batch_size, vocab_size) 40 | out1, self.state1 = self.cell1(out0, self.state1) 41 | out1 = self.output_layer(out1) 42 | output.append(out1) 43 | 44 | predictions = tf.stack(output, axis=1) # prediction.shape --> (batch_size, seq_len, vocab_size) 45 | return predictions 46 | 47 | def reset_states(self): 48 | super(LanguageModel, self).reset_states() 49 | self.state0 = [tf.zeros([self.batch_size, self.d_rnn]), tf.zeros([self.batch_size, self.d_rnn])] 50 | self.state1 = [tf.zeros([self.batch_size, self.d_rnn]), tf.zeros([self.batch_size, self.d_rnn])] 51 | 52 | 53 | def check_point(): 54 | """ 55 | 检测检查点目录下是否有文件 56 | """ 57 | # 进行语言对判断从而确定检查点路径 58 | checkpoint_dir = _config.lm_checkpoint_path 59 | is_exist = Path(checkpoint_dir) 60 | if not is_exist.exists(): 61 | os.makedirs(checkpoint_dir, exist_ok=True) 62 | if_ckpt = tf.io.gfile.listdir(checkpoint_dir) 63 | return if_ckpt 64 | 65 | -------------------------------------------------------------------------------- /hlp/mt/lm/lm_preprocess.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy 3 | from sklearn.model_selection import train_test_split 4 | 5 | from hlp.mt.common import load_dataset, text_vectorize, text_split 6 | from hlp.mt.config import get_config as _config 7 | 8 | 9 | def get_tokenizer_path(language, mode): 10 | """合成字典保存路径 11 | 12 | @param language:语言 13 | @param mode:编码类型 14 | @return:字典保存路径 15 | """ 16 | return _config.tokenizer_path_prefix + language + '_' + mode.lower() 17 | 18 | 19 | def get_encoded_sequences_path(language, postfix=''): 20 | """根据语言获取已编码句子的保存路径 21 | 22 | @param language: 语言 23 | @param postfix: 保存路径的后缀 24 | @return:已编码句子的保存路径 25 | """ 26 | return _config.encoded_sequences_path_prefix + language + postfix 27 | 28 | 29 | def train_preprocess(): 30 | language = _config.lm_language 31 | mode = _config.lm_tokenize_type 32 | tokenizer_path = get_tokenizer_path(language, mode) 33 | encoded_sequences_path_train = get_encoded_sequences_path(language, postfix='_train') 34 | # encoded_sequences_path_val = get_encoded_sequences_path(language, postfix='_val') 35 | 36 | # 文本加载及预处理 37 | print('正在加载、预处理数据...') 38 | sentences = load_dataset.load_single_sentences(_config.lm_path_to_train_file, _config.lm_num_sentences, column=2) 39 | sentences = text_split.preprocess_sentences(sentences, language, mode) 40 | print('已加载句子数量:%d' % _config.lm_num_sentences) 41 | print('数据加载、预处理完毕!\n') 42 | 43 | # 使用预处理的文本生成及保存字典 44 | tokenizer, vocab_size = text_vectorize.create_and_save_tokenizer(sentences, tokenizer_path, language, mode) 45 | print('生成字典大小:%d' % vocab_size) 46 | print('字典生成、保存完毕!\n') 47 | 48 | # 使用字典对文本进行编码并保存 49 | print("正在编码训练集句子...") 50 | max_sequence_length = text_vectorize.encode_and_save(sentences, tokenizer, encoded_sequences_path_train, language, 51 | mode) 52 | print('最大句子长度:%d' % max_sequence_length) 53 | print("句子编码完毕!\n") 54 | 55 | return tokenizer, vocab_size, max_sequence_length 56 | 57 | 58 | def get_dataset(sequences_path, train_size=_config.lm_train_size): 59 | """加载并划分数据集 60 | """ 61 | tensor = numpy.loadtxt(sequences_path, dtype='int32') 62 | 63 | train_dataset, val_dataset = train_test_split(tensor, train_size=train_size) 64 | train_dataset = tf.data.Dataset.from_tensor_slices(train_dataset) 65 | train_dataset = train_dataset.shuffle(_config.lm_BATCH_SIZE).batch(_config.lm_BATCH_SIZE, drop_remainder=True) 66 | val_dataset = tf.data.Dataset.from_tensor_slices(val_dataset) 67 | val_dataset = val_dataset.shuffle(_config.lm_BATCH_SIZE).batch(_config.lm_BATCH_SIZE, drop_remainder=True) 68 | 69 | return train_dataset, val_dataset 70 | 71 | -------------------------------------------------------------------------------- /hlp/mt/lm/lm_rescore.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import tensorflow as tf 4 | from pathlib import Path 5 | 6 | from hlp.mt.config import get_config as _config 7 | from hlp.mt.common import text_vectorize 8 | from hlp.mt.lm import language_model 9 | 10 | 11 | def sentence_rescore(sentences, model, tokenizer): 12 | """给句子列表打分 13 | 14 | @param sentences: 需要进行打分的句子列表 15 | @param model: 打分使用的语言模型实例 16 | @param tokenizer: 字典 17 | """ 18 | language = _config.lm_language 19 | mode = _config.lm_tokenize_type 20 | 21 | scores_list = [] 22 | for i, sentence in enumerate(sentences): 23 | score = 0 24 | sequence = text_vectorize.encode_sentences([sentence], tokenizer, language, mode) 25 | seq_input = sequence[:, :-1] 26 | seq_real = sequence[:, 1:] 27 | prediction = model(seq_input) # (1, seq_len, vocab_size) 28 | for j in range(prediction.shape[0]): 29 | score += prediction[seq_real[0][j]] 30 | scores_list.append(score) 31 | 32 | return scores_list 33 | 34 | 35 | def load_checkpoint(model, checkpoint_path=None): 36 | """从检查点加载模型 37 | 38 | @param model: 模型 39 | @param checkpoint_path:检查点路径,若为None,则默认使用保存的最新的检查点 40 | """ 41 | if checkpoint_path is None: 42 | checkpoint_dir = _config.lm_checkpoint_path 43 | is_exist = Path(checkpoint_dir) 44 | else: 45 | checkpoint_dir = os.path.dirname(checkpoint_path) 46 | is_exist = Path(checkpoint_path) 47 | 48 | ckpt = tf.train.Checkpoint(language_model=model, optimizer=tf.keras.optimizers.Adam()) 49 | ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=_config.max_checkpoints_num) 50 | if not is_exist.exists(): 51 | ValueError("路径 %s 不存在" % checkpoint_path) 52 | elif checkpoint_path is None: 53 | if language_model.check_point(): 54 | ckpt.restore(ckpt_manager.latest_checkpoint) 55 | print('已恢复至最新检查点!') 56 | else: 57 | ckpt.restore(checkpoint_path) 58 | -------------------------------------------------------------------------------- /hlp/mt/lm/lm_train.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import tensorflow as tf 4 | 5 | from hlp.mt.config import get_config as _config 6 | from hlp.mt.lm import language_model, lm_preprocess 7 | from hlp.utils import optimizers 8 | from hlp.utils import train_history 9 | 10 | 11 | def _train_step(sequences, lm, optimizer, train_loss, train_accuracy): 12 | """一个训练步 13 | @param sequences: 已编码的一个batch的数据集 shape --> (batch_size, seq_length) 14 | @param lm: 语言模型实例 15 | @param optimizer: 优化器 16 | """ 17 | seq_input = sequences[:, :-1] 18 | seq_real = sequences[:, 1:] 19 | 20 | with tf.GradientTape() as tape: 21 | predictions = lm(seq_input) 22 | loss = optimizers.loss_func_mask(seq_real, predictions) 23 | 24 | gradients = tape.gradient(loss, lm.trainable_variables) 25 | optimizer.apply_gradients(zip(gradients, lm.trainable_variables)) 26 | 27 | train_loss(loss) 28 | train_accuracy(seq_real, predictions) 29 | 30 | 31 | def _train_epoch(dataset, model, optimizer, train_loss, train_accuracy, sample_sum): 32 | """ 33 | 对dataset进行训练并打印相关信息 34 | """ 35 | trained_seq_sum = 0 36 | for batch, sequences in enumerate(dataset): 37 | _train_step(sequences, model, optimizer, train_loss, train_accuracy) 38 | trained_seq_sum += _config.lm_BATCH_SIZE 39 | print('\r{}/{} [batch {} loss {:.4f} accuracy {:.4f}]'.format(trained_seq_sum, 40 | sample_sum, 41 | batch + 1, 42 | train_loss.result() 43 | , train_accuracy.result()), end='') 44 | print('\r{}/{} [==============================]'.format(sample_sum, sample_sum), end='') 45 | 46 | 47 | def train(epochs=_config.lm_EPOCHS, validation_split=0.0, 48 | min_delta=0.00003, patience=10, validation_freq=1): 49 | """训练 50 | @param epochs: 训练轮次 51 | @return: 训练过程history 52 | @param validation_split: 验证集划分比例 53 | @param min_delta: 增大或减小的阈值,只有大于这个部分才算作improvement 54 | @param patience: 能够容忍多少个val_accuracy都没有improvement 55 | @param validation_freq: 验证频率 56 | @return: history,包含训练过程中所有的指标 57 | """ 58 | max_acc = 0 59 | patience_num = 0 60 | 61 | optimizer = tf.keras.optimizers.Adam() 62 | train_loss = tf.keras.metrics.Mean(name='train_loss') 63 | train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') 64 | history = {'accuracy': [], 'loss': [], 'val_accuracy': [], 'val_loss': []} 65 | encoded_sequences_path_train = lm_preprocess.get_encoded_sequences_path(_config.lm_language, postfix='_train') 66 | 67 | tokenizer, vocab_size, max_sequence_length = lm_preprocess.train_preprocess() 68 | train_dataset, val_dataset = lm_preprocess.get_dataset(encoded_sequences_path_train, train_size=validation_split) 69 | 70 | lm = language_model.LanguageModel(vocab_size, _config.lm_d_embedding, _config.lm_BATCH_SIZE, _config.lm_d_rnn) 71 | 72 | # 检查点设置,如果检查点存在,则恢复最新的检查点。 73 | ckpt = tf.train.Checkpoint(language_model=lm, optimizer=optimizer) 74 | ckpt_manager = tf.train.CheckpointManager(ckpt, _config.lm_checkpoint_path, max_to_keep=_config.max_checkpoints_num) 75 | if language_model.check_point(): 76 | ckpt.restore(ckpt_manager.latest_checkpoint) 77 | print('已恢复至最新检查点!') 78 | 79 | train_batch_sum = int((_config.lm_num_sentences*_config.lm_train_size)//_config.lm_BATCH_SIZE) 80 | val_batch_sum = int((_config.lm_num_sentences*(1-_config.lm_train_size))//_config.lm_BATCH_SIZE) 81 | train_seq_sum = int(train_batch_sum * _config.lm_BATCH_SIZE) 82 | val_seq_sum = int(val_batch_sum * _config.lm_BATCH_SIZE) 83 | 84 | print("开始训练...") 85 | for epoch in range(epochs): 86 | print('Epoch {}/{}'.format(epoch + 1, epochs)) 87 | start = time.time() 88 | train_loss.reset_states() 89 | train_accuracy.reset_states() 90 | 91 | _train_epoch(train_dataset, lm, optimizer, train_loss, train_accuracy, train_seq_sum) 92 | 93 | history['accuracy'].append(train_accuracy.result().numpy()) 94 | history['loss'].append(train_loss.result().numpy()) 95 | 96 | epoch_time = (time.time() - start) 97 | step_time = epoch_time * _config.BATCH_SIZE / (_config.lm_num_sentences*_config.lm_train_size) 98 | 99 | # 验证部分 100 | # 若到达所设置验证频率或最后一个epoch,并且validate_from_txt为False和train_size不同时满足时使用验证集验证 101 | if (epoch + 1) % validation_freq == 0 or (epoch + 1) == _config.EPOCHS: 102 | temp_loss = train_loss.result() 103 | temp_acc = train_accuracy.result() 104 | train_loss.reset_states() 105 | train_accuracy.reset_states() 106 | 107 | _train_epoch(val_dataset, lm, optimizer, train_loss, train_accuracy, train_seq_sum+val_seq_sum) 108 | 109 | history['val_accuracy'].append(train_accuracy.result().numpy()) 110 | history['val_loss'].append(train_loss.result().numpy()) 111 | print(' - {:.0f}s - {:.0f}ms/step - loss: {:.4f} - accuracy {:.4f} - val_loss: {:.4f} - val_accuracy {:.4f}' 112 | .format(epoch_time, step_time * 1000, temp_loss, temp_acc, train_loss.result(), 113 | train_accuracy.result())) 114 | # stop-early判断 115 | if train_accuracy.result().numpy() >= (max_acc * (1 + min_delta)): 116 | max_acc = train_accuracy.result().numpy() 117 | patience_num = 0 118 | else: 119 | patience_num += 1 120 | 121 | if (epoch + 1) % _config.checkpoints_save_freq == 0: 122 | ckpt_save_path = ckpt_manager.save() 123 | print('检查点已保存至:{}'.format(ckpt_save_path)) 124 | 125 | # 若连续patience个val_accuracy不达标,则停止训练 126 | if patience_num == patience: 127 | print('检测到连续%d个验证集增长不达标,停止训练' % patience) 128 | break 129 | 130 | if (epoch + 1) % _config.checkpoints_save_freq != 0: 131 | ckpt_save_path = ckpt_manager.save() 132 | print('检查点已保存至:{}'.format(ckpt_save_path)) 133 | 134 | train_history.show_and_save_history(history, _config.result_save_dir, _config.lm_validation_freq) 135 | return history 136 | 137 | 138 | def main(): 139 | train(validation_split=1-_config.train_size) 140 | 141 | 142 | if __name__ == '__main__': 143 | main() 144 | -------------------------------------------------------------------------------- /hlp/mt/model/checkpoint.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from tensorflow.python.training.tracking import graph_view 4 | import tensorflow as tf 5 | import numpy 6 | 7 | from hlp.mt.config import get_config as _config 8 | from hlp.mt.model import nmt_model, checkpoint 9 | from hlp.mt.model import transformer as _transformer 10 | 11 | 12 | def load_checkpoint(transformer, optimizer, checkpoint_path=_config.checkpoint_path): 13 | """ 14 | 获取检查点 15 | @param transformer: 模型实例 16 | @param optimizer: 优化器 17 | @param checkpoint_path:检查点的路径 18 | """ 19 | # 加载检查点 20 | checkpoint_dir = os.path.dirname(checkpoint_path) 21 | ckpt = tf.train.Checkpoint(transformer=transformer, 22 | optimizer=optimizer) 23 | ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=_config.max_checkpoints_num) 24 | if ckpt_manager.latest_checkpoint: 25 | # ckpt.restore(ckpt_manager.latest_checkpoint) 26 | ckpt.restore(checkpoint_path) 27 | # print('已恢复至最新的检查点!') 28 | print('正在使用检查点:'+checkpoint_path) 29 | 30 | 31 | def get_checkpoints_path(model_dir=_config.checkpoint_path): 32 | """ 33 | 获取检查点路径列表 34 | @param model_dir: 35 | @return: 36 | """ 37 | checkpoint_state = tf.train.get_checkpoint_state(model_dir) 38 | if checkpoint_state is None: 39 | raise ValueError("未在目录:%s 中发现检查点!" % model_dir) 40 | return checkpoint_state.all_model_checkpoint_paths 41 | 42 | 43 | def average_checkpoints(model_dir, 44 | output_dir, 45 | trackables, 46 | max_count=8, 47 | model_key="model"): 48 | """ 49 | 50 | @param model_dir: 需要平均的检查点的文件夹路径 51 | @param output_dir: 将得到的检查点输出的文件夹路径 52 | @param trackables: 检查点所保存的对象的字典 53 | @param max_count: 最多使用几个检查点进行平均 54 | @param model_key: 字典中模型对应的key 55 | @return: 56 | """ 57 | if model_dir == output_dir: 58 | raise ValueError("输入与输出需是不同文件夹") 59 | model = trackables.get(model_key) 60 | if model is None: 61 | raise ValueError("模型的key:%s 并没有在字典 %s 中找到" % (model_key, trackables)) 62 | 63 | # 取检查点路径列表 64 | checkpoint_state = tf.train.get_checkpoint_state(model_dir) 65 | if checkpoint_state is None: 66 | raise ValueError(" %s 文件夹中没有检查点" % model_dir) 67 | checkpoints_path = checkpoint_state.all_model_checkpoint_paths 68 | if len(checkpoints_path) > max_count: 69 | checkpoints_path = checkpoints_path[-max_count:] 70 | 71 | _average_checkpoints_into_layer(checkpoints_path, model, model_key) 72 | 73 | last_step = _get_step_from_checkpoint_prefix(checkpoints_path[-1]) 74 | checkpoint = tf.train.Checkpoint(**trackables) 75 | new_checkpoint_manager = tf.train.CheckpointManager(checkpoint, output_dir, max_to_keep=None) 76 | new_checkpoint_manager.save(checkpoint_number=last_step) 77 | return output_dir 78 | 79 | 80 | def _average_checkpoints_into_layer(checkpoints, layer, layer_prefix): 81 | """将检查点平均并将平均值放到模型中 82 | @param checkpoints: 检查点路径的列表 83 | @param layer: 模型实例 84 | @param layer_prefix:模型的key 85 | """ 86 | if not checkpoints: 87 | raise ValueError("至少应有一个检查点") 88 | if not layer.built: 89 | raise ValueError("使用此方法前应对模型进行build") 90 | 91 | # 将模型的变量都重置为0 92 | for variable in layer.variables: 93 | variable.assign(tf.zeros_like(variable)) 94 | 95 | # 得到一个检查点中变量名到层中变量的字典 96 | _, names_to_variables = _get_variables_name_mapping(layer, root_key=layer_prefix) 97 | 98 | num_checkpoints = len(checkpoints) 99 | tf.get_logger().info("正在平均 %d 个检查点...", num_checkpoints) 100 | for checkpoint_path in checkpoints: 101 | tf.get_logger().info("正在读取检查点 %s...", checkpoint_path) 102 | reader = tf.train.load_checkpoint(checkpoint_path) 103 | for path in reader.get_variable_to_shape_map().keys(): 104 | if not path.startswith(layer_prefix) or ".OPTIMIZER_SLOT" in path: 105 | continue 106 | variable = names_to_variables[path] 107 | value = reader.get_tensor(path) 108 | variable.assign_add(value / num_checkpoints) 109 | 110 | 111 | def _get_step_from_checkpoint_prefix(prefix): 112 | """Extracts the training step from the checkpoint file prefix.""" 113 | return int(prefix.split("-")[-1]) 114 | 115 | 116 | def _get_variables_name_mapping(root, root_key=None): 117 | """ 返回一个检查点中变量名到层中变量的字典 118 | @param root: 模型(层)实例 119 | @param root_key: 模型(层)的key,即在检查点中的key 120 | @return: 返回一个检查点中变量名到层中变量的字典 121 | """ 122 | named_variables, _, _ = graph_view.ObjectGraphView(root).serialize_object_graph() 123 | variables_to_names = {} 124 | names_to_variables = {} 125 | for saveable_object in named_variables: 126 | variable = saveable_object.op 127 | # 判断是否是张量,暂时去掉 128 | # if not hasattr(variable, "ref"): 129 | # continue 130 | name = saveable_object.name 131 | if root_key is not None: 132 | name = "%s/%s" % (root_key, name) 133 | variables_to_names[variable.experimental_ref()] = name 134 | names_to_variables[name] = variable 135 | return variables_to_names, names_to_variables 136 | 137 | 138 | def _model_build(model, inp, tar): 139 | tar_inp = tar[:, :-1] 140 | enc_padding_mask, combined_mask, dec_padding_mask = _transformer.create_masks(inp, tar_inp) 141 | predictions, _ = model(inp, tar_inp, True, enc_padding_mask, combined_mask, dec_padding_mask) 142 | 143 | 144 | def _get_sample_dataset(): 145 | """从保存的文件中读取样例数据进行模型build""" 146 | input_path = _config.encoded_sequences_path_prefix + _config.source_lang 147 | target_path = _config.encoded_sequences_path_prefix + _config.target_lang 148 | input_tensor = tf.cast(numpy.loadtxt(input_path, dtype='int32', max_rows=_config.BATCH_SIZE), tf.int32) 149 | target_tensor = tf.cast(numpy.loadtxt(target_path, dtype='int32', max_rows=_config.BATCH_SIZE), tf.int32) 150 | return input_tensor, target_tensor 151 | 152 | 153 | def average_checkpoints_test(): 154 | """ 155 | 对检查点本身进行平均的示例 156 | 需要先进行训练保存几个检查点 157 | """ 158 | # 模型相关配置 159 | transformer, optimizer, _, _ = nmt_model.load_model() 160 | trackables = {'transformer': transformer, 'optimizer': optimizer} 161 | model_key = 'transformer' 162 | 163 | # 模型build加载一个batch数据 164 | input_tensor, target_tensor = _get_sample_dataset() 165 | _model_build(transformer, input_tensor, target_tensor) 166 | 167 | # 检查点路径及输出平均检查点路径 168 | model_dir = _config.checkpoint_path 169 | output_dir = model_dir + '_avg_ckpts' 170 | if not os.path.exists(output_dir): 171 | os.makedirs(output_dir, exist_ok=True) 172 | 173 | path = checkpoint.average_checkpoints(model_dir, output_dir, trackables, max_count=8, model_key=model_key) 174 | print(path) 175 | 176 | 177 | if __name__ == '__main__': 178 | average_checkpoints_test() -------------------------------------------------------------------------------- /hlp/mt/model/nmt_model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | import hlp.mt.common.text_vectorize 4 | from hlp.mt.model import transformer as _transformer 5 | from hlp.mt.config import get_config as _config 6 | from hlp.mt.common import text_vectorize 7 | from hlp.utils import optimizers as _optimizers 8 | 9 | 10 | def create_model(vocab_size_source, vocab_size_target): 11 | """获取模型""" 12 | transformer = _transformer.Transformer(_config.num_layers, 13 | _config.d_model, 14 | _config.num_heads, 15 | _config.dff, 16 | vocab_size_source + 1, 17 | vocab_size_target + 1, 18 | pe_input=vocab_size_source + 1, 19 | pe_target=vocab_size_target + 1, 20 | rate=_config.dropout_rate) 21 | return transformer 22 | 23 | 24 | def load_model(): 25 | """ 26 | 进行翻译或评估前数据恢复工作 27 | """ 28 | # 获取字典保存路径 29 | source_mode = hlp.mt.common.text_vectorize.get_tokenizer_mode(_config.source_lang) 30 | target_mode = hlp.mt.common.text_vectorize.get_tokenizer_mode(_config.target_lang) 31 | source_tokenizer_path = hlp.mt.common.text_vectorize.get_tokenizer_path(_config.source_lang, source_mode) 32 | target_tokenizer_path = hlp.mt.common.text_vectorize.get_tokenizer_path(_config.target_lang, target_mode) 33 | # 加载源语言字典 34 | print("正在加载源语言(%s)字典..." % _config.source_lang) 35 | tokenizer_source, vocab_size_source = text_vectorize.load_tokenizer(source_tokenizer_path, 36 | _config.source_lang, source_mode) 37 | print('源语言字典大小:%d' % vocab_size_source) 38 | print('源语言字典加载完毕!\n') 39 | 40 | # 加载目标语言字典 41 | print("正在加载目标语言(%s)字典..." % _config.target_lang) 42 | tokenizer_target, vocab_size_target = text_vectorize.load_tokenizer(target_tokenizer_path, 43 | _config.target_lang, target_mode) 44 | print('目标语言字典大小:%d' % vocab_size_target) 45 | print('目标语言字典加载完毕!\n') 46 | 47 | # 创建模型及相关变量 48 | learning_rate = _optimizers.CustomSchedule(_config.d_model) 49 | optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) 50 | transformer = create_model(vocab_size_source, vocab_size_target) 51 | 52 | return transformer, optimizer, tokenizer_source, tokenizer_target 53 | -------------------------------------------------------------------------------- /hlp/mt/preprocess.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from hlp.mt.common.text_vectorize import get_encoded_sequences_path, get_tokenizer_path, get_tokenizer_mode 4 | from hlp.mt.config import get_config as _config 5 | from hlp.mt.common import text_vectorize 6 | from hlp.mt.common.text_split import preprocess_sentences 7 | from hlp.mt.common.load_dataset import load_sentences 8 | 9 | 10 | def _count_words(sentences): 11 | """输入句子列表,使用空格分隔返回单词数""" 12 | count = 0 13 | for s in sentences: 14 | s = re.split(r' +', s) 15 | count += len(s) 16 | return count 17 | 18 | 19 | def train_preprocess(): 20 | # 获取source、target编码模式,字典保存路径,编码句子保存路径 21 | source_mode = get_tokenizer_mode(_config.source_lang) 22 | target_mode = get_tokenizer_mode(_config.target_lang) 23 | 24 | source_tokenizer_path = get_tokenizer_path(_config.source_lang, source_mode) 25 | target_tokenizer_path = get_tokenizer_path(_config.target_lang, target_mode) 26 | 27 | source_sequences_path_train = get_encoded_sequences_path(_config.source_lang, postfix='_train') 28 | target_sequences_path_train = get_encoded_sequences_path(_config.target_lang, postfix='_train') 29 | source_sequences_path_val = get_encoded_sequences_path(_config.source_lang, postfix='_val') 30 | target_sequences_path_val = get_encoded_sequences_path(_config.target_lang, postfix='_val') 31 | 32 | # 加载句子 33 | print('加载训练数据集...') 34 | source_sentences, target_sentences = load_sentences(_config.path_to_train_file, _config.num_sentences) 35 | 36 | # 加载验证集 37 | if _config.validation_data == "True": 38 | print('加载验证数据集...') 39 | source_sentences_val, target_sentences_val = load_sentences(_config.path_to_val_file, 40 | _config.num_validate_sentences) 41 | 42 | print('已加载句子数量:%d' % _config.num_sentences) 43 | # 计算语料词数 44 | num_words = _count_words(source_sentences) 45 | print('源语料(%s)词数:%d' % (_config.source_lang, num_words)) 46 | 47 | # 预处理句子 48 | print('预处理训练数据集...') 49 | source_sentences = preprocess_sentences(source_sentences, _config.source_lang, source_mode) 50 | target_sentences = preprocess_sentences(target_sentences, _config.target_lang, target_mode) 51 | 52 | if _config.validation_data == "True": 53 | print('预处理验证数据集...') 54 | source_sentences_val = preprocess_sentences(source_sentences_val, _config.source_lang, source_mode) 55 | target_sentences_val = preprocess_sentences(target_sentences_val, _config.target_lang, target_mode) 56 | 57 | # 生成及保存字典 58 | print('正在生成、保存源语言(%s)字典(分词方式:%s)...' % (_config.source_lang, _config.en_tokenize_type)) 59 | tokenizer_source, vocab_size_source = text_vectorize.create_and_save_tokenizer(source_sentences, 60 | source_tokenizer_path, 61 | _config.source_lang, 62 | source_mode) 63 | print('源语言字典大小:%d' % vocab_size_source) 64 | 65 | print('正在生成、保存目标语言(%s)字典(分词方式:%s)...' % (_config.target_lang, _config.zh_tokenize_type)) 66 | tokenizer_target, vocab_size_target = text_vectorize.create_and_save_tokenizer(target_sentences, 67 | target_tokenizer_path, 68 | _config.target_lang, 69 | target_mode) 70 | print('目标语言字典大小:%d' % vocab_size_target) 71 | 72 | # 编码句子 73 | print("正在编码训练集句子...") 74 | max_sequence_length_source = text_vectorize.encode_and_save(sentences=source_sentences, tokenizer=tokenizer_source, 75 | save_path=source_sequences_path_train, 76 | language=_config.source_lang, mode=source_mode) 77 | max_sequence_length_target = text_vectorize.encode_and_save(sentences=target_sentences, tokenizer=tokenizer_target, 78 | save_path=target_sequences_path_train, 79 | language=_config.target_lang, mode=target_mode) 80 | print('最大源语言(%s)句子长度:%d' % (_config.source_lang, max_sequence_length_source)) 81 | print('最大目标语言(%s)句子长度:%d' % (_config.target_lang, max_sequence_length_target)) 82 | 83 | if _config.validation_data == "True": 84 | print("正在编码验证集句子...") 85 | _ = text_vectorize.encode_and_save(sentences=source_sentences_val, tokenizer=tokenizer_source, 86 | save_path=source_sequences_path_val, language=_config.source_lang, 87 | mode=source_mode) 88 | _ = text_vectorize.encode_and_save(sentences=target_sentences_val, tokenizer=tokenizer_target, 89 | save_path=target_sequences_path_val, language=_config.target_lang, 90 | mode=target_mode) 91 | print("语料处理完成.\n") 92 | 93 | return vocab_size_source, vocab_size_target 94 | 95 | 96 | if __name__ == '__main__': 97 | train_preprocess() 98 | -------------------------------------------------------------------------------- /hlp/mt/translate.py: -------------------------------------------------------------------------------- 1 | import hlp.mt.common.misc 2 | from hlp.mt.model import nmt_model 3 | from hlp.mt import translator 4 | from hlp.mt.config import get_config as _config 5 | from hlp.mt.common.misc import check_and_create 6 | 7 | 8 | def main(): 9 | if check_and_create(_config.checkpoint_path): # 检测是否有检查点 10 | # 读取保存的需要的配置 11 | transformer, _, tokenizer_source, tokenizer_target = nmt_model.load_model() 12 | 13 | # translate 14 | while True: 15 | print('-' * 30) 16 | print('输入0可退出程序') 17 | sentence = input('请输入要翻译的句子 :') 18 | if sentence == '0': 19 | break 20 | else: 21 | print('翻译结果:', translator.translate(sentence, transformer, 22 | tokenizer_source, tokenizer_target, 23 | beam_size=_config.BEAM_SIZE)) 24 | else: 25 | print('请先训练才可使用翻译功能...') 26 | 27 | 28 | if __name__ == '__main__': 29 | main() 30 | -------------------------------------------------------------------------------- /hlp/mt/translator.py: -------------------------------------------------------------------------------- 1 | """ 2 | 对输出的句子进行翻译 3 | """ 4 | import copy 5 | 6 | import tensorflow as tf 7 | 8 | import hlp.mt.common.text_vectorize 9 | from hlp.mt.common import text_split 10 | from hlp.mt.common import text_vectorize 11 | from hlp.mt.config import get_config as _config 12 | from hlp.mt.model import checkpoint 13 | from hlp.mt.model import transformer as _transformer 14 | from hlp.utils import beamsearch 15 | 16 | 17 | def _checkpoint_ensembling(checkpoints_path, model, inputs, decoder_input): 18 | """ 19 | 使用路径中的检查点得到此步的predictions 20 | @param checkpoints_path: 使用的检查点路径列表 21 | @param model: 模型 22 | @param inputs: 输入 23 | @param decoder_input: 解码器输入 24 | @param enc_padding_mask: 编码器遮挡 25 | @param combined_mask: 遮挡 26 | @param dec_padding_mask: 解码器遮挡 27 | @return:使用多个检查点模型后的平均predictions 28 | """ 29 | # 首先使用首个检查点模型得到结果 30 | enc_padding_mask, combined_mask, dec_padding_mask = _transformer.create_masks(inputs, decoder_input) 31 | checkpoint_path = checkpoints_path[0] 32 | checkpoint.load_checkpoint(model, tf.keras.optimizers.Adam(), checkpoint_path=checkpoint_path) 33 | predictions, _ = model(inputs, decoder_input, False, enc_padding_mask, combined_mask, dec_padding_mask) 34 | # 从 seq_len 维度选择最后一个词 35 | predictions = tf.squeeze(predictions[:, -1:, :], axis=1) # (batch_size, vocab_size) 36 | predictions_sum = copy.deepcopy(predictions) 37 | if len(checkpoints_path) > 1: 38 | for i in range(len(checkpoints_path) - 1): # 分别读取n个检查点模型并预测得到predictions进行累加 39 | checkpoint_path = checkpoints_path[i + 1] 40 | checkpoint.load_checkpoint(model, tf.keras.optimizers.Adam(), checkpoint_path=checkpoint_path) 41 | predictions, _ = model(inputs, decoder_input, False, enc_padding_mask, combined_mask, dec_padding_mask) 42 | predictions = tf.squeeze(predictions[:, -1:, :], axis=1) # (batch_size, vocab_size) 43 | predictions_sum = tf.add(predictions_sum, predictions) 44 | predictions_avg = tf.divide(predictions_sum, len(checkpoints_path)) 45 | 46 | return predictions_avg 47 | 48 | 49 | def _predict_index(checkpoints_path, inp_sentence, model, beam_search_container, 50 | input_tokenizer, target_tokenizer): 51 | """对输入句子进行翻译并返回编码的句子列表""" 52 | input_mode = text_vectorize.get_tokenizer_mode(_config.source_lang) 53 | target_mode = text_vectorize.get_tokenizer_mode(_config.target_lang) 54 | 55 | sentence = text_split.preprocess_sentences([inp_sentence], _config.source_lang, input_mode) 56 | 57 | inp_sequence, _ = text_vectorize.encode_sentences(sentence, input_tokenizer, 58 | language=_config.source_lang, mode=input_mode) 59 | inp_sequence = tf.squeeze(inp_sequence) 60 | inp_sequence = tf.expand_dims(inp_sequence, 0) 61 | 62 | # start_token shape:(1,) 63 | start_token = text_vectorize.encode_start_token(_config.start_word, target_tokenizer, 64 | language=_config.target_lang) 65 | end_token, _ = text_vectorize.encode_sentences([_config.end_word], target_tokenizer, 66 | language=_config.target_lang, mode=target_mode) 67 | end_token = tf.squeeze(end_token) 68 | 69 | decoder_input = tf.expand_dims(start_token, 0) # shape --> (1,1) 即(batch_size,sentence_length) 70 | 71 | beam_search_container.reset(inputs=inp_sequence, dec_input=decoder_input) 72 | inputs, decoder_input = beam_search_container.get_search_inputs() 73 | if len(checkpoints_path) == 1: # 如果只使用一个检查点,则不使用checkpoint_ensembling 74 | checkpoint_path = checkpoints_path[0] 75 | checkpoint.load_checkpoint(model, tf.keras.optimizers.Adam(), checkpoint_path=checkpoint_path) 76 | 77 | for i in range(_config.max_target_length): 78 | if len(checkpoints_path) == 1: # 如果只使用一个检查点,则不使用checkpoint_ensembling 79 | enc_padding_mask, combined_mask, dec_padding_mask = _transformer.create_masks(inputs, decoder_input) 80 | predictions, _ = model(inputs, decoder_input, False, enc_padding_mask, combined_mask, dec_padding_mask) 81 | predictions = tf.squeeze(predictions[:, -1:, :], axis=1) # (batch_size, vocab_size) 82 | else: 83 | predictions = _checkpoint_ensembling(checkpoints_path, model, inputs, decoder_input) 84 | 85 | beam_search_container.expand(predictions=predictions, end_sign=end_token) 86 | if beam_search_container.beam_size == 0: 87 | break 88 | inputs, decoder_input = beam_search_container.get_search_inputs() 89 | 90 | beam_search_result = beam_search_container.get_result() 91 | 92 | return beam_search_result 93 | 94 | 95 | def translate(sentence, model, tokenizer_source, tokenizer_target, beam_size): 96 | """对句子(经过预处理未经过编码)进行翻译,未进行检查点的判断""" 97 | beam_search_container = beamsearch.BeamSearch(beam_size=beam_size, 98 | max_length=_config.max_target_length, 99 | worst_score=0) 100 | 101 | # 采用checkpoint_ensembling,获取需要使用的检查点路径列表 102 | checkpoints_path = checkpoint.get_checkpoints_path() 103 | if _config.checkpoint_ensembling == "False": 104 | checkpoints_path = checkpoints_path[-1:] 105 | 106 | predict_idxes = _predict_index(checkpoints_path, sentence, model, beam_search_container, 107 | tokenizer_source, tokenizer_target) 108 | 109 | predicted_sentences = [] 110 | target_mode = text_vectorize.get_tokenizer_mode(_config.target_lang) 111 | # 从容器中抽取序列,生成最终结果 112 | for i in range(len(predict_idxes)): 113 | predict_idx = predict_idxes[i].numpy() 114 | predict_idx = tf.squeeze(predict_idx) 115 | predict_sentence = text_vectorize.decode_sentence(predict_idx, tokenizer_target, 116 | _config.target_lang, target_mode) 117 | predict_sentence = predict_sentence.replace(_config.start_word, '') \ 118 | .replace(_config.end_word, '').strip() 119 | predicted_sentences.append(predict_sentence) 120 | return predicted_sentences 121 | -------------------------------------------------------------------------------- /hlp/stt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/__init__.py -------------------------------------------------------------------------------- /hlp/stt/data/LibriSpeech/dev-clean-2/174/168635/174-168635-0000.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/dev-clean-2/174/168635/174-168635-0000.flac -------------------------------------------------------------------------------- /hlp/stt/data/LibriSpeech/dev-clean-2/174/168635/174-168635-0001.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/dev-clean-2/174/168635/174-168635-0001.flac -------------------------------------------------------------------------------- /hlp/stt/data/LibriSpeech/dev-clean-2/174/168635/174-168635.trans.txt: -------------------------------------------------------------------------------- 1 | 174-168635-0000 HE HAD NEVER BEEN FATHER LOVER HUSBAND FRIEND 2 | 174-168635-0001 THE HEART OF THAT EX CONVICT WAS FULL OF VIRGINITY 3 | -------------------------------------------------------------------------------- /hlp/stt/data/LibriSpeech/dev-clean-2/84/121550/84-121550-0000.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/dev-clean-2/84/121550/84-121550-0000.flac -------------------------------------------------------------------------------- /hlp/stt/data/LibriSpeech/dev-clean-2/84/121550/84-121550-0001.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/dev-clean-2/84/121550/84-121550-0001.flac -------------------------------------------------------------------------------- /hlp/stt/data/LibriSpeech/dev-clean-2/84/121550/84-121550.trans.txt: -------------------------------------------------------------------------------- 1 | 84-121550-0000 BUT WITH FULL RAVISHMENT THE HOURS OF PRIME SINGING RECEIVED THEY IN THE MIDST OF LEAVES THAT EVER BORE A BURDEN TO THEIR RHYMES 2 | 84-121550-0001 ALL WATERS THAT ON EARTH MOST LIMPID ARE WOULD SEEM TO HAVE WITHIN THEMSELVES SOME MIXTURE COMPARED WITH THAT WHICH NOTHING DOTH CONCEAL 3 | -------------------------------------------------------------------------------- /hlp/stt/data/LibriSpeech/train-clean-5/1088/134315/1088-134315-0000.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/train-clean-5/1088/134315/1088-134315-0000.flac -------------------------------------------------------------------------------- /hlp/stt/data/LibriSpeech/train-clean-5/1088/134315/1088-134315-0001.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/train-clean-5/1088/134315/1088-134315-0001.flac -------------------------------------------------------------------------------- /hlp/stt/data/LibriSpeech/train-clean-5/1088/134315/1088-134315-0002.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/train-clean-5/1088/134315/1088-134315-0002.flac -------------------------------------------------------------------------------- /hlp/stt/data/LibriSpeech/train-clean-5/1088/134315/1088-134315.trans.txt: -------------------------------------------------------------------------------- 1 | 1088-134315-0000 AS YOU KNOW AND AS I HAVE GIVEN YOU PROOF I HAVE THE GREATEST ADMIRATION IN THE WORLD FOR ONE WHOSE WORK FOR HUMANITY HAS WON SUCH UNIVERSAL RECOGNITION I HOPE THAT WE SHALL BOTH FORGET THIS UNHAPPY MORNING AND THAT YOU WILL GIVE ME AN OPPORTUNITY OF RENDERING TO YOU IN PERSON 2 | 1088-134315-0001 THE APOLOGIES WHICH ARE DUE TO YOU I FEEL THAT ANYTHING LESS WILL NEITHER REHABILITATE ME IN YOUR ESTEEM NOR SECURE FOR ME THE REMNANTS OF MY SHATTERED SELF RESPECT I AM HOPING YOU WILL DINE WITH ME NEXT WEEK AND MEET A MOST INTERESTING MAN GEORGE GATHERCOLE 3 | 1088-134315-0002 TO DISTURB A RELATIONSHIP WHICH I HAVE ALWAYS HOPED WOULD BE MUTUALLY PLEASANT IF YOU WILL ALLOW GATHERCOLE WHO WILL BE UNCONSCIOUS OF THE PART HE IS PLAYING TO ACT AS PEACEMAKER BETWEEN YOURSELF AND MYSELF 4 | -------------------------------------------------------------------------------- /hlp/stt/data/LibriSpeech/train-clean-5/1088/134318/1088-134318-0000.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/train-clean-5/1088/134318/1088-134318-0000.flac -------------------------------------------------------------------------------- /hlp/stt/data/LibriSpeech/train-clean-5/1088/134318/1088-134318-0001.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/train-clean-5/1088/134318/1088-134318-0001.flac -------------------------------------------------------------------------------- /hlp/stt/data/LibriSpeech/train-clean-5/1088/134318/1088-134318.trans.txt: -------------------------------------------------------------------------------- 1 | 1088-134318-0000 CHAPTER TWELVE KARA LAY BACK ON HIS DOWN PILLOWS WITH A SNEER ON HIS FACE AND HIS BRAIN VERY BUSY WHAT STARTED THE TRAIN OF THOUGHT HE DID NOT KNOW BUT AT THAT MOMENT HIS MIND WAS VERY FAR AWAY 2 | 1088-134318-0001 IT CARRIED HIM BACK A DOZEN YEARS TO A DIRTY LITTLE PEASANT'S CABIN ON THE HILLSIDE OUTSIDE DURAZZO TO THE LIVID FACE OF A YOUNG ALBANIAN CHIEF WHO HAD LOST AT KARA'S WHIM ALL THAT LIFE HELD FOR A MAN 3 | -------------------------------------------------------------------------------- /hlp/stt/data/LibriSpeech/train-clean-5/1737/146161/1737-146161-0000.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/train-clean-5/1737/146161/1737-146161-0000.flac -------------------------------------------------------------------------------- /hlp/stt/data/LibriSpeech/train-clean-5/1737/146161/1737-146161-0001.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/LibriSpeech/train-clean-5/1737/146161/1737-146161-0001.flac -------------------------------------------------------------------------------- /hlp/stt/data/LibriSpeech/train-clean-5/1737/146161/1737-146161.trans.txt: -------------------------------------------------------------------------------- 1 | 1737-146161-0000 KNIT TWO TOGETHER KNIT THREE MAKE ONE KNIT ONE MAKE ONE KNIT THREE KNIT TWO TOGETHER KNIT ONE MAKE ONE SECOND ROW 2 | 1737-146161-0001 SEAMED MAKING A STITCH AT THE BEGINNING THIRD ROW MAKE ONE KNIT ONE KNIT TWO TOGETHER KNIT TWO MAKE ONE KNIT THREE MAKE ONE KNIT TWO 3 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/.wav.scp: -------------------------------------------------------------------------------- 1 | A11_0 ./A11_0.wav 2 | A11_1 ./A11_1.wav 3 | A11_10 ./A11_10.wav 4 | A11_100 ./A11_100.wav 5 | A11_101 ./A11_101.wav 6 | A11_102 ./A11_102.wav 7 | A11_103 ./A11_103.wav 8 | A11_104 ./A11_104.wav 9 | A11_105 ./A11_105.wav 10 | A11_106 ./A11_106.wav 11 | A11_107 ./A11_107.wav 12 | A11_108 ./A11_108.wav 13 | A11_109 ./A11_109.wav 14 | A11_11 ./A11_11.wav 15 | A11_110 ./A11_110.wav 16 | A11_111 ./A11_111.wav 17 | A11_112 ./A11_112.wav 18 | A11_113 ./A11_113.wav 19 | A11_114 ./A11_114.wav 20 | A11_115 ./A11_115.wav 21 | A11_116 ./A11_116.wav 22 | A11_117 ./A11_117.wav 23 | A11_118 ./A11_118.wav 24 | A11_119 ./A11_119.wav 25 | A11_12 ./A11_12.wav 26 | A11_120 ./A11_120.wav 27 | A11_121 ./A11_121.wav 28 | A11_122 ./A11_122.wav 29 | A11_123 ./A11_123.wav 30 | A11_124 ./A11_124.wav 31 | A11_125 ./A11_125.wav 32 | A11_126 ./A11_126.wav 33 | A11_127 ./A11_127.wav 34 | A11_128 ./A11_128.wav 35 | A11_129 ./A11_129.wav 36 | A11_13 ./A11_13.wav 37 | A11_130 ./A11_130.wav 38 | A11_131 ./A11_131.wav 39 | A11_132 ./A11_132.wav 40 | A11_133 ./A11_133.wav 41 | A11_134 ./A11_134.wav 42 | A11_135 ./A11_135.wav 43 | A11_136 ./A11_136.wav 44 | A11_137 ./A11_137.wav 45 | A11_138 ./A11_138.wav 46 | A11_139 ./A11_139.wav 47 | A11_14 ./A11_14.wav 48 | A11_140 ./A11_140.wav 49 | A11_141 ./A11_141.wav 50 | A11_142 ./A11_142.wav 51 | A11_143 ./A11_143.wav 52 | A11_144 ./A11_144.wav 53 | A11_145 ./A11_145.wav 54 | A11_146 ./A11_146.wav 55 | A11_147 ./A11_147.wav 56 | A11_148 ./A11_148.wav 57 | A11_149 ./A11_149.wav 58 | A11_15 ./A11_15.wav 59 | A11_150 ./A11_150.wav 60 | A11_151 ./A11_151.wav 61 | A11_152 ./A11_152.wav 62 | A11_153 ./A11_153.wav 63 | A11_154 ./A11_154.wav 64 | A11_155 ./A11_155.wav 65 | A11_156 ./A11_156.wav 66 | A11_157 ./A11_157.wav 67 | A11_158 ./A11_158.wav 68 | A11_159 ./A11_159.wav 69 | A11_16 ./A11_16.wav 70 | A11_160 ./A11_160.wav 71 | A11_161 ./A11_161.wav 72 | A11_162 ./A11_162.wav 73 | A11_163 ./A11_163.wav 74 | A11_164 ./A11_164.wav 75 | A11_165 ./A11_165.wav 76 | A11_166 ./A11_166.wav 77 | A11_167 ./A11_167.wav 78 | A11_168 ./A11_168.wav 79 | A11_169 ./A11_169.wav 80 | A11_17 ./A11_17.wav 81 | A11_170 ./A11_170.wav 82 | A11_171 ./A11_171.wav 83 | A11_172 ./A11_172.wav 84 | A11_173 ./A11_173.wav 85 | A11_174 ./A11_174.wav 86 | A11_175 ./A11_175.wav 87 | A11_176 ./A11_176.wav 88 | A11_177 ./A11_177.wav 89 | A11_178 ./A11_178.wav 90 | A11_179 ./A11_179.wav 91 | A11_18 ./A11_18.wav 92 | A11_180 ./A11_180.wav 93 | A11_181 ./A11_181.wav 94 | A11_182 ./A11_182.wav 95 | A11_183 ./A11_183.wav 96 | A11_184 ./A11_184.wav 97 | A11_185 ./A11_185.wav 98 | A11_186 ./A11_186.wav 99 | A11_187 ./A11_187.wav 100 | A11_188 ./A11_188.wav 101 | A11_189 ./A11_189.wav 102 | A11_19 ./A11_19.wav 103 | A11_190 ./A11_190.wav 104 | A11_191 ./A11_191.wav 105 | A11_192 ./A11_192.wav 106 | A11_193 ./A11_193.wav 107 | A11_194 ./A11_194.wav 108 | A11_195 ./A11_195.wav 109 | A11_196 ./A11_196.wav 110 | A11_197 ./A11_197.wav 111 | A11_198 ./A11_198.wav 112 | A11_199 ./A11_199.wav 113 | A11_2 ./A11_2.wav 114 | A11_20 ./A11_20.wav 115 | A11_200 ./A11_200.wav 116 | A11_201 ./A11_201.wav 117 | A11_202 ./A11_202.wav 118 | A11_203 ./A11_203.wav 119 | A11_204 ./A11_204.wav 120 | A11_205 ./A11_205.wav 121 | A11_206 ./A11_206.wav 122 | A11_207 ./A11_207.wav 123 | A11_208 ./A11_208.wav 124 | A11_209 ./A11_209.wav 125 | A11_21 ./A11_21.wav 126 | A11_210 ./A11_210.wav 127 | A11_211 ./A11_211.wav 128 | A11_212 ./A11_212.wav 129 | A11_213 ./A11_213.wav 130 | A11_214 ./A11_214.wav 131 | A11_215 ./A11_215.wav 132 | A11_216 ./A11_216.wav 133 | A11_217 ./A11_217.wav 134 | A11_218 ./A11_218.wav 135 | A11_219 ./A11_219.wav 136 | A11_22 ./A11_22.wav 137 | A11_220 ./A11_220.wav 138 | A11_221 ./A11_221.wav 139 | A11_222 ./A11_222.wav 140 | A11_223 ./A11_223.wav 141 | A11_224 ./A11_224.wav 142 | A11_225 ./A11_225.wav 143 | A11_226 ./A11_226.wav 144 | A11_227 ./A11_227.wav 145 | A11_228 ./A11_228.wav 146 | A11_229 ./A11_229.wav 147 | A11_23 ./A11_23.wav 148 | A11_230 ./A11_230.wav 149 | A11_231 ./A11_231.wav 150 | A11_232 ./A11_232.wav 151 | A11_233 ./A11_233.wav 152 | A11_234 ./A11_234.wav 153 | A11_235 ./A11_235.wav 154 | A11_236 ./A11_236.wav 155 | A11_237 ./A11_237.wav 156 | A11_238 ./A11_238.wav 157 | A11_239 ./A11_239.wav 158 | A11_24 ./A11_24.wav 159 | A11_240 ./A11_240.wav 160 | A11_241 ./A11_241.wav 161 | A11_242 ./A11_242.wav 162 | A11_243 ./A11_243.wav 163 | A11_244 ./A11_244.wav 164 | A11_245 ./A11_245.wav 165 | A11_246 ./A11_246.wav 166 | A11_247 ./A11_247.wav 167 | A11_248 ./A11_248.wav 168 | A11_249 ./A11_249.wav 169 | A11_25 ./A11_25.wav 170 | A11_26 ./A11_26.wav 171 | A11_27 ./A11_27.wav 172 | A11_28 ./A11_28.wav 173 | A11_29 ./A11_29.wav 174 | A11_3 ./A11_3.wav 175 | A11_30 ./A11_30.wav 176 | A11_31 ./A11_31.wav 177 | A11_32 ./A11_32.wav 178 | A11_33 ./A11_33.wav 179 | A11_34 ./A11_34.wav 180 | A11_35 ./A11_35.wav 181 | A11_36 ./A11_36.wav 182 | A11_37 ./A11_37.wav 183 | A11_38 ./A11_38.wav 184 | A11_39 ./A11_39.wav 185 | A11_4 ./A11_4.wav 186 | A11_40 ./A11_40.wav 187 | A11_41 ./A11_41.wav 188 | A11_42 ./A11_42.wav 189 | A11_43 ./A11_43.wav 190 | A11_44 ./A11_44.wav 191 | A11_45 ./A11_45.wav 192 | A11_46 ./A11_46.wav 193 | A11_47 ./A11_47.wav 194 | A11_48 ./A11_48.wav 195 | A11_49 ./A11_49.wav 196 | A11_5 ./A11_5.wav 197 | A11_50 ./A11_50.wav 198 | A11_51 ./A11_51.wav 199 | A11_52 ./A11_52.wav 200 | A11_53 ./A11_53.wav 201 | A11_54 ./A11_54.wav 202 | A11_55 ./A11_55.wav 203 | A11_56 ./A11_56.wav 204 | A11_57 ./A11_57.wav 205 | A11_58 ./A11_58.wav 206 | A11_59 ./A11_59.wav 207 | A11_6 ./A11_6.wav 208 | A11_60 ./A11_60.wav 209 | A11_61 ./A11_61.wav 210 | A11_62 ./A11_62.wav 211 | A11_63 ./A11_63.wav 212 | A11_64 ./A11_64.wav 213 | A11_65 ./A11_65.wav 214 | A11_66 ./A11_66.wav 215 | A11_67 ./A11_67.wav 216 | A11_68 ./A11_68.wav 217 | A11_69 ./A11_69.wav 218 | A11_7 ./A11_7.wav 219 | A11_70 ./A11_70.wav 220 | A11_71 ./A11_71.wav 221 | A11_72 ./A11_72.wav 222 | A11_73 ./A11_73.wav 223 | A11_74 ./A11_74.wav 224 | A11_75 ./A11_75.wav 225 | A11_76 ./A11_76.wav 226 | A11_77 ./A11_77.wav 227 | A11_78 ./A11_78.wav 228 | A11_79 ./A11_79.wav 229 | A11_8 ./A11_8.wav 230 | A11_80 ./A11_80.wav 231 | A11_81 ./A11_81.wav 232 | A11_82 ./A11_82.wav 233 | A11_83 ./A11_83.wav 234 | A11_84 ./A11_84.wav 235 | A11_85 ./A11_85.wav 236 | A11_86 ./A11_86.wav 237 | A11_87 ./A11_87.wav 238 | A11_88 ./A11_88.wav 239 | A11_89 ./A11_89.wav 240 | A11_9 ./A11_9.wav 241 | A11_90 ./A11_90.wav 242 | A11_91 ./A11_91.wav 243 | A11_92 ./A11_92.wav 244 | A11_93 ./A11_93.wav 245 | A11_94 ./A11_94.wav 246 | A11_95 ./A11_95.wav 247 | A11_96 ./A11_96.wav 248 | A11_97 ./A11_97.wav 249 | A11_98 ./A11_98.wav 250 | A11_99 ./A11_99.wav 251 | A12_0 ./A12_0.wav 252 | A12_1 ./A12_1.wav 253 | A12_10 ./A12_10.wav 254 | A12_100 ./A12_100.wav 255 | A12_101 ./A12_101.wav 256 | A12_102 ./A12_102.wav 257 | A12_103 ./A12_103.wav 258 | A12_104 ./A12_104.wav 259 | A12_105 ./A12_105.wav 260 | A12_106 ./A12_106.wav 261 | A12_107 ./A12_107.wav 262 | A12_108 ./A12_108.wav 263 | A12_109 ./A12_109.wav 264 | A12_11 ./A12_11.wav 265 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_0.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_0.wav.trn: -------------------------------------------------------------------------------- 1 | 绿 是 阳春 烟 景 大块 文章 的 底色 四月 的 林 峦 更是 绿 得 鲜活 秀媚 诗意 盎然 2 | lv4 shi4 yang2 chun1 yan1 jing3 da4 kuai4 wen2 zhang1 de5 di3 se4 si4 yue4 de5 lin2 luan2 geng4 shi4 lv4 de5 xian1 huo2 xiu4 mei4 shi1 yi4 ang4 ran2 3 | l v4 sh ix4 ii iang2 ch un1 ii ian1 j ing3 d a4 k uai4 uu un2 zh ang1 d e5 d i3 s e4 s iy4 vv ve4 d e5 l in2 l uan2 g eng4 sh ix4 l v4 d e5 x ian1 h uo2 x iu4 m ei4 sh ix1 ii i4 aa ang4 r an2 4 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_1.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_1.wav.trn: -------------------------------------------------------------------------------- 1 | 他 仅 凭 腰部 的 力量 在 泳道 上下 翻腾 蛹 动 蛇行 状 如 海豚 一直 以 一头 的 优势 领先 2 | ta1 jin3 ping2 yao1 bu4 de5 li4 liang4 zai4 yong3 dao4 shang4 xia4 fan1 teng2 yong3 dong4 she2 xing2 zhuang4 ru2 hai3 tun2 yi4 zhi2 yi3 yi4 tou2 de5 you1 shi4 ling3 xian1 3 | t a1 j in3 p ing2 ii iao1 b u4 d e5 l i4 l iang4 z ai4 ii iong3 d ao4 sh ang4 x ia4 f an1 t eng2 ii iong3 d ong4 sh e2 x ing2 zh uang4 r u2 h ai3 t un2 ii i4 zh ix2 ii i3 ii i4 t ou2 d e5 ii iu1 sh ix4 l ing3 x ian1 4 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_2.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_2.wav.trn: -------------------------------------------------------------------------------- 1 | 企业 依靠 技术 挖潜 增效 他 负责 全厂 产品质量 与 技术培训 成了 厂里 的 大忙人 2 | qi3 ye4 yi1 kao4 ji4 shu4 wa1 qian2 zeng1 xiao4 ta1 fu4 ze2 quan2 chang3 chan2 pin3 zhi4 liang4 yu3 ji4 shu4 pei2 xun4 cheng2 le5 chang3 li3 de5 da4 mang2 ren2 3 | q i3 ii ie4 ii i1 k ao4 j i4 sh u4 uu ua1 q ian2 z eng1 x iao4 t a1 f u4 z e2 q van2 ch ang3 ch an2 p in3 zh ix4 l iang4 vv v3 j i4 sh u4 p ei2 x vn4 ch eng2 l e5 ch ang3 l i3 d e5 d a4 m ang2 r en2 4 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_3.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_3.wav.trn: -------------------------------------------------------------------------------- 1 | 菜 做好 了 一碗 清蒸 武昌鱼 一碗 蕃茄 炒鸡蛋 一碗 榨菜 干 子 炒肉丝 2 | cai4 zuo4 hao3 le5 yi4 wan3 qing1 zheng1 wu3 chang1 yu2 yi4 wan3 fan1 qie2 chao3 ji1 dan4 yi4 wan3 zha4 cai4 gan1 zi3 chao3 rou4 si1 3 | c ai4 z uo4 h ao3 l e5 ii i4 uu uan3 q ing1 zh eng1 uu u3 ch ang1 vv v2 ii i4 uu uan3 f an1 q ie2 ch ao3 j i1 d an4 ii i4 uu uan3 zh a4 c ai4 g an1 z iy3 ch ao3 r ou4 s iy1 4 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_33.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_33.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_33.wav.trn: -------------------------------------------------------------------------------- 1 | 与 王伟 四平市 货车 司机 杜 大平 岳 玉杰 装卸工 刘 春山 一同 追赶 逃跑 的 案犯 2 | yu3 wang2 wei3 si4 ping2 shi4 huo4 che1 si1 ji1 du4 da4 ping2 yue4 yu4 jie2 zhuang1 xie4 gong1 liu2 chun1 shan1 yi4 tong2 zhui1 gan3 tao2 pao3 de5 an4 fan4 3 | vv v3 uu uang2 uu ui3 s iy4 p ing2 sh ix4 h uo4 ch e1 s iy1 j i1 d u4 d a4 p ing2 vv ve4 vv v4 j ie2 zh uang1 x ie4 g ong1 l iu2 ch un1 sh an1 ii i4 t ong2 zh ui1 g an3 t ao2 p ao3 d e5 aa an4 f an4 4 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_4.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_4.wav.trn: -------------------------------------------------------------------------------- 1 | 她 看看 夜 己 很 深 白天 的 炎热 已 给 夜 凉 吹散 吩咐 大家 各自 安息 明天 继续 玩乐 2 | ta1 kan4 kan5 ye4 ji3 hen3 shen1 bai2 tian1 de5 yan2 re4 yi3 gei3 ye4 liang2 chui1 san4 fen1 fu4 da4 jia1 ge4 zi4 an1 xi1 ming2 tian1 ji4 xu4 wan2 le4 3 | t a1 k an4 k an5 ii ie4 j i3 h en3 sh en1 b ai2 t ian1 d e5 ii ian2 r e4 ii i3 g ei3 ii ie4 l iang2 ch ui1 s an4 f en1 f u4 d a4 j ia1 g e4 z iy4 aa an1 x i1 m ing2 t ian1 j i4 x v4 uu uan2 l e4 4 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_5.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_5.wav.trn: -------------------------------------------------------------------------------- 1 | 有 一家 个体 制品厂 本该 用 完整 的 型材 生产 门窗 却 用 半截 材 打结 凑合 2 | you3 yi4 jia1 ge4 ti3 zhi4 pin2 chang3 ben3 gai1 yong4 wan2 zheng3 de5 xing2 cai2 sheng1 chan3 men2 chuang1 que4 yong4 ban4 jie2 cai2 da3 jie2 cou4 he5 3 | ii iu3 ii i4 j ia1 g e4 t i3 zh ix4 p in2 ch ang3 b en3 g ai1 ii iong4 uu uan2 zh eng3 d e5 x ing2 c ai2 sh eng1 ch an3 m en2 ch uang1 q ve4 ii iong4 b an4 j ie2 c ai2 d a3 j ie2 c ou4 h e5 4 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_58.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_58.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_58.wav.trn: -------------------------------------------------------------------------------- 1 | 特 别是 跨 省区 电网 超 计划 用电 不仅 损害 自己 也 损害 别人 损害 电网 损害 国家 2 | te4 bie2 shi4 kua4 sheng3 qu1 dian4 wang3 chao1 ji4 hua4 yong4 dian4 bu4 jin3 sun3 hai4 zi4 ji3 ye3 sun3 hai4 bie2 ren2 sun3 hai4 dian4 wang3 sun3 hai4 guo2 jia1 3 | t e4 b ie2 sh ix4 k ua4 sh eng3 q v1 d ian4 uu uang3 ch ao1 j i4 h ua4 ii iong4 d ian4 b u4 j in3 s un3 h ai4 z iy4 j i3 ii ie3 s un3 h ai4 b ie2 r en2 s un3 h ai4 d ian4 uu uang3 s un3 h ai4 g uo2 j ia1 4 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_6.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_6.wav.trn: -------------------------------------------------------------------------------- 1 | 久居 闹市 常常 忘了 山 之 外水 之外 身 之外 还有 沃野 平 畴 还有 光 风 丽日 2 | jiu3 ju1 nao4 shi4 chang2 chang2 wang4 le5 shan1 zhi1 wai4 shui3 zhi1 wai4 shen1 zhi1 wai4 hai2 you3 wo4 ye3 ping2 chou2 hai2 you3 guang1 feng1 li4 ri4 3 | j iu3 j v1 n ao4 sh ix4 ch ang2 ch ang2 uu uang4 l e5 sh an1 zh ix1 uu uai4 sh ui3 zh ix1 uu uai4 sh en1 zh ix1 uu uai4 h ai2 ii iu3 uu uo4 ii ie3 p ing2 ch ou2 h ai2 ii iu3 g uang1 f eng1 l i4 r iz4 4 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/A2_7.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/A2_7.wav.trn: -------------------------------------------------------------------------------- 1 | 旷野 的 风 要 往 这儿 刮 那儿 刮 你 能 命令 风 四面八方 全 刮 一点 吗 2 | kuang4 ye3 de5 feng1 yao4 wang3 zhe4 er5 gua1 na4 er5 gua1 ni3 neng2 ming4 ling4 feng1 si4 mian4 ba1 fang1 quan2 gua1 yi4 dian3 ma5 3 | k uang4 ii ie3 d e5 f eng1 ii iao4 uu uang3 zh e4 ee er5 g ua1 n a4 ee er5 g ua1 n i3 n eng2 m ing4 l ing4 f eng1 s iy4 m ian4 b a1 f ang1 q van2 g ua1 ii i4 d ian3 m a5 4 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/D4_750.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/D4_750.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/D4_750.wav.trn: -------------------------------------------------------------------------------- 1 | 东北军 的 一些 爱国 将士 马 占 山 李杜 唐 聚 伍 苏 炳 艾 邓 铁梅 等 也 奋起 抗战 2 | dong1 bei3 jun1 de5 yi4 xie1 ai4 guo2 jiang4 shi4 ma3 zhan4 shan1 li3 du4 tang2 ju4 wu3 su1 bing3 ai4 deng4 tie3 mei2 deng3 ye3 fen4 qi3 kang4 zhan4 3 | d ong1 b ei3 j vn1 d e5 ii i4 x ie1 aa ai4 g uo2 j iang4 sh ix4 m a3 zh an4 sh an1 l i3 d u4 t ang2 j v4 uu u3 s u1 b ing3 aa ai4 d eng4 t ie3 m ei2 d eng3 ii ie3 f en4 q i3 k ang4 zh an4 4 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/D4_751.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/data/D4_751.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/data/D4_751.wav.trn: -------------------------------------------------------------------------------- 1 | 王 英汉 被 枪毙 后 部分 余孽 深 藏起来 几次 围捕 均 未 抓获 2 | wang2 ying1 han4 bei4 qiang1 bi4 hou4 bu4 fen5 yu2 nie4 shen1 cang2 qi3 lai5 ji3 ci4 wei2 bu3 jun1 wei4 zhua1 huo4 3 | uu uang2 ii ing1 h an4 b ei4 q iang1 b i4 h ou4 b u4 f en5 vv v2 n ie4 sh en1 c ang2 q i3 l ai5 j i3 c iy4 uu ui2 b u3 j vn1 uu ui4 zh ua1 h uo4 4 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/dev/A2_33.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/dev/A2_33.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/dev/A2_33.wav.trn: -------------------------------------------------------------------------------- 1 | ../data/A2_33.wav.trn 2 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/dev/A2_58.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/dev/A2_58.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/dev/A2_58.wav.trn: -------------------------------------------------------------------------------- 1 | ../data/A2_58.wav.trn 2 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/test/D4_750.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/test/D4_750.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/test/D4_750.wav.trn: -------------------------------------------------------------------------------- 1 | ../data/D4_750.wav.trn 2 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/test/D4_751.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/test/D4_751.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/test/D4_751.wav.trn: -------------------------------------------------------------------------------- 1 | ../data/D4_751.wav.trn 2 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/train/A2_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/train/A2_0.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/train/A2_0.wav.trn: -------------------------------------------------------------------------------- 1 | ../data/A2_0.wav.trn 2 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/train/A2_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/train/A2_1.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/train/A2_1.wav.trn: -------------------------------------------------------------------------------- 1 | ../data/A2_1.wav.trn 2 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/train/A2_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/train/A2_2.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/train/A2_2.wav.trn: -------------------------------------------------------------------------------- 1 | ../data/A2_2.wav.trn 2 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/train/A2_3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/train/A2_3.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/train/A2_3.wav.trn: -------------------------------------------------------------------------------- 1 | ../data/A2_3.wav.trn 2 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/train/A2_4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/train/A2_4.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/train/A2_4.wav.trn: -------------------------------------------------------------------------------- 1 | ../data/A2_4.wav.trn 2 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/train/A2_5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/train/A2_5.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/train/A2_5.wav.trn: -------------------------------------------------------------------------------- 1 | ../data/A2_5.wav.trn 2 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/train/A2_6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/train/A2_6.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/train/A2_6.wav.trn: -------------------------------------------------------------------------------- 1 | ../data/A2_6.wav.trn 2 | -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/train/A2_7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/data/data_thchs30/train/A2_7.wav -------------------------------------------------------------------------------- /hlp/stt/data/data_thchs30/train/A2_7.wav.trn: -------------------------------------------------------------------------------- 1 | ../data/A2_7.wav.trn 2 | -------------------------------------------------------------------------------- /hlp/stt/deepspeech2/model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def clipped_relu(x): 5 | return tf.keras.activations.relu(x, max_value=20) 6 | 7 | 8 | class DS2(tf.keras.Model): 9 | def __init__(self, 10 | conv_layers, filters, kernel_size, strides, 11 | bi_gru_layers, gru_units, 12 | fc_units, 13 | output_dim, 14 | **kwargs): 15 | super(DS2, self).__init__(**kwargs) 16 | 17 | self.bn1 = tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001) 18 | 19 | self.conv_layers = conv_layers 20 | self.conv = [] 21 | for i in range(conv_layers): 22 | self.conv.append(tf.keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, 23 | strides=strides, padding="valid", 24 | activation="relu", name="conv" + str(i))) 25 | 26 | self.bn2 = tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001) 27 | 28 | self.bi_gru_layers = bi_gru_layers 29 | self.bi_gru = [] 30 | for i in range(bi_gru_layers): 31 | self.bi_gru.append(tf.keras.layers.Bidirectional( 32 | tf.keras.layers.GRU(gru_units, activation="relu", return_sequences=True), 33 | merge_mode="sum", name="bi_gru" + str(i))) 34 | 35 | self.bn3 = tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001) 36 | 37 | self.fc = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(fc_units, activation=clipped_relu)) 38 | self.sm = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(output_dim, activation="softmax")) 39 | 40 | def call(self, inputs): 41 | x = inputs 42 | x = self.bn1(x) 43 | for i in range(self.conv_layers): 44 | x = self.conv[i](x) 45 | x = self.bn2(x) 46 | for i in range(self.bi_gru_layers): 47 | x = self.bi_gru[i](x) 48 | x = self.bn3(x) 49 | x = self.fc(x) 50 | x = self.sm(x) 51 | return x 52 | 53 | 54 | if __name__ == "__main__": 55 | pass 56 | -------------------------------------------------------------------------------- /hlp/stt/las/las.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from hlp.utils.layers import BahdanauAttention 4 | 5 | 6 | class Encoder(tf.keras.Model): 7 | def __init__(self, cnn1_filters, cnn1_kernel_size, cnn2_filters, 8 | cnn2_kernel_size, max_pool_strides, max_pool_size, d, w): 9 | """ 10 | 11 | :param cnn1_filters: 12 | :param cnn1_kernel_size: 13 | :param cnn2_filters: 14 | :param cnn2_kernel_size: 15 | :param max_pool_strides: 16 | :param max_pool_size: 17 | :param d: BiLSTM层数 18 | :param w: BiLSTM单元数 19 | """ 20 | super(Encoder, self).__init__() 21 | self.d = d 22 | self.w = w 23 | self.cnn1 = tf.keras.layers.Conv1D(filters=cnn1_filters, kernel_size=cnn1_kernel_size, activation='relu') 24 | self.cnn2 = tf.keras.layers.Conv1D(filters=cnn2_filters, kernel_size=cnn2_kernel_size, activation='relu') 25 | self.max_pool = tf.keras.layers.MaxPooling1D(strides=max_pool_strides, pool_size=max_pool_size) 26 | 27 | self.bi_lstm = [] 28 | for i in range(self.d): 29 | self.bi_lstm.append(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(w, return_sequences=True))) 30 | 31 | def call(self, x): 32 | x = self.cnn1(x) 33 | x = self.cnn2(x) 34 | x = self.max_pool(x) 35 | 36 | for i in range(self.d): 37 | x = self.bi_lstm[i](x) 38 | 39 | return x 40 | 41 | def initialize_hidden_state(self): 42 | return tf.zeros((self.batch_sz, self.w)) 43 | 44 | 45 | class Decoder(tf.keras.Model): 46 | def __init__(self, vocab_size, embedding_dim, dec_units, w): 47 | super(Decoder, self).__init__() 48 | self.dec_units = dec_units 49 | self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim) 50 | 51 | # TODO: LSTM层数可变 52 | self.rnn1 = tf.keras.layers.LSTM(w, return_sequences=True) 53 | # self.rnn2 = tf.keras.layers.LSTM(w, return_sequences=True) 54 | self.fc = tf.keras.layers.Dense(vocab_size) 55 | self.attention = BahdanauAttention(self.dec_units) 56 | 57 | def call(self, x, hidden, enc_output): 58 | """解码 59 | 60 | :param x: 目标符号, (批大小,id) 61 | :param hidden: 解码器状态, (批大小,隐藏层大小) 62 | :param enc_output: 编码器输出, (批大小,最大长度,隐藏层大小) 63 | :return: token分布, 解码器专题, 注意力权重 64 | """ 65 | context_vector, attention_weights = self.attention(hidden, enc_output) 66 | 67 | # x 在通过嵌入层后的形状 == (批大小,1,嵌入维度) 68 | x = self.embedding(x) 69 | 70 | # x 在拼接 (concatenation) 后的形状 == (批大小,1,嵌入维度 + 隐藏层大小) 71 | x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1) 72 | 73 | output = self.rnn1(x) 74 | # output = self.rnn2(x) 75 | # 输出的形状 == (批大小 * 1,隐藏层大小) 76 | output = tf.reshape(output, (-1, output.shape[2])) 77 | 78 | # 输出的形状 == (批大小,vocab) 79 | tokens_prob = self.fc(output) 80 | 81 | return tokens_prob, attention_weights 82 | 83 | 84 | class LAS(tf.keras.Model): 85 | def __init__(self, vocab_tar_size, cnn1_filters, cnn1_kernel_size, cnn2_filters, 86 | cnn2_kernel_size, max_pool_strides, max_pool_size, d, w, 87 | embedding_dim, dec_units, batch_size): 88 | super(LAS, self).__init__() 89 | self.vocab_tar_size = vocab_tar_size 90 | self.d = d 91 | self.w = w 92 | self.batch_size = batch_size 93 | self.encoder = Encoder(cnn1_filters, cnn1_kernel_size, 94 | cnn2_filters, cnn2_kernel_size, 95 | max_pool_strides, max_pool_size, d, w) 96 | self.decoder = Decoder(vocab_tar_size, embedding_dim, dec_units, w) 97 | 98 | def call(self, inputx_1, enc_hidden, dec_input): 99 | enc_output = self.encoder(inputx_1) 100 | 101 | dec_hidden = enc_hidden # 编码器状态作为解码器初始状态? 102 | predictions, dec_hidden = self.decoder(dec_input, dec_hidden, enc_output) 103 | return predictions, dec_hidden 104 | 105 | def initialize_hidden_state(self): 106 | return tf.zeros((self.batch_size, self.w)) 107 | -------------------------------------------------------------------------------- /hlp/stt/las/plas.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from hlp.utils.layers import BahdanauAttention 3 | 4 | 5 | class PBLSTM(tf.keras.layers.Layer): 6 | """金字塔BiLSTM 7 | 8 | 逐层缩减序列长度 9 | """ 10 | def __init__(self, dim): 11 | super(PBLSTM, self).__init__() 12 | self.dim = dim 13 | self.bidi_lstm = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(self.dim, return_sequences=True)) 14 | 15 | @tf.function 16 | def call(self, inputs): 17 | y = self.bidi_lstm(inputs) 18 | 19 | if tf.shape(inputs)[1] % 2 == 1: 20 | y = tf.keras.layers.ZeroPadding1D(padding=(0, 1))(y) 21 | 22 | y = tf.keras.layers.Reshape(target_shape=(-1, int(self.dim * 4)))(y) 23 | return y 24 | 25 | 26 | class Encoder(tf.keras.Model): 27 | def __init__(self, dim, enc_units): 28 | # TODO: 金字塔层数可变 29 | super(Encoder, self).__init__() 30 | self.enc_units = enc_units 31 | self.dim = dim 32 | # Listen; Lower resoultion by 8x 33 | self.plstm1 = PBLSTM(self.dim // 2) 34 | self.plstm2 = PBLSTM(self.dim // 2) 35 | self.plstm3 = PBLSTM(self.dim // 2) 36 | 37 | def call(self, x): 38 | """声学特征序列编码 39 | 40 | :param x: 声学特征序列 41 | :return: 缩减后的编码特征序列 42 | """ 43 | x = self.plstm1(x) 44 | x = self.plstm2(x) 45 | output = self.plstm3(x) 46 | return output 47 | 48 | 49 | class Decoder(tf.keras.Model): 50 | def __init__(self, vocab_size, embedding_dim, dec_units): 51 | super(Decoder, self).__init__() 52 | self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim) 53 | self.gru = tf.keras.layers.GRU(dec_units, 54 | return_sequences=True, 55 | return_state=True, 56 | recurrent_initializer='glorot_uniform') 57 | self.fc = tf.keras.layers.Dense(vocab_size) 58 | self.attention = BahdanauAttention(dec_units) 59 | 60 | def call(self, x, hidden, enc_output): 61 | """解码 62 | 63 | :param x: 目标符号, (批大小,id) 64 | :param hidden: 解码器状态, (批大小,隐藏层大小) 65 | :param enc_output: 编码器输出, (批大小,最大长度,隐藏层大小) 66 | :return: token分布, 解码器专题, 注意力权重 67 | """ 68 | context_vector, attention_weights = self.attention(hidden, enc_output) 69 | 70 | # x 在通过嵌入层后的形状 == (批大小,1,嵌入维度) 71 | x = self.embedding(x) 72 | 73 | # x 在拼接 (concatenation) 后的形状 == (批大小,1,嵌入维度 + 隐藏层大小) 74 | x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1) 75 | 76 | output, state = self.gru(x) 77 | # 输出的形状 == (批大小 * 1,隐藏层大小) 78 | output = tf.reshape(output, (-1, output.shape[2])) 79 | 80 | # 输出的形状 == (批大小,vocab) 81 | tokens_prob = self.fc(output) 82 | 83 | return tokens_prob, state, attention_weights 84 | 85 | 86 | class PLAS(tf.keras.Model): 87 | def __init__(self, vocab_tar_size, embedding_dim, units, batch_size): 88 | super(PLAS, self).__init__() 89 | self.units = units 90 | self.batch_size = batch_size 91 | # TODO: 编码器和解码器使用不同的单元数 92 | self.encoder = Encoder(embedding_dim, units) 93 | self.decoder = Decoder(vocab_tar_size, embedding_dim, units) 94 | 95 | def call(self, x, enc_hidden, dec_input): 96 | """ 97 | 98 | :param x: 编码器输入 99 | :param enc_hidden: 100 | :param dec_input: 解码器输入 101 | :return: 解码器预测, 解码器状态 102 | """ 103 | enc_output = self.encoder(x) 104 | dec_hidden = enc_hidden # 编码器状态作为解码器初始状态? 105 | predictions, dec_hidden, _ = self.decoder(dec_input, dec_hidden, enc_output) 106 | return predictions, dec_hidden 107 | 108 | def initialize_hidden_state(self): 109 | return tf.zeros((self.batch_size, self.units)) 110 | 111 | 112 | if __name__ == "__main__": 113 | import numpy as np 114 | # a = np.arange(12).reshape((1, 4, 3)).astype(np.float) 115 | a = np.arange(15).reshape((1, 5, 3)).astype(np.float) 116 | p_lstm = PBLSTM(8) 117 | r = p_lstm(a) 118 | print(r.shape) 119 | -------------------------------------------------------------------------------- /hlp/stt/rnnt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/stt/rnnt/__init__.py -------------------------------------------------------------------------------- /hlp/stt/rnnt/model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | # 时间缩减层 5 | class TimeReduction(tf.keras.layers.Layer): 6 | def __init__(self, 7 | reduction_factor, 8 | **kwargs): 9 | super(TimeReduction, self).__init__(**kwargs) 10 | 11 | self.reduction_factor = reduction_factor 12 | 13 | def call(self, inputs): 14 | batch_size = inputs.shape[0] 15 | 16 | max_time = inputs.shape[1] 17 | num_units = inputs.shape[-1] 18 | 19 | paddings = [[0, 0], [0, tf.floormod(max_time, self.reduction_factor)], [0, 0]] 20 | outputs = tf.pad(inputs, paddings) 21 | 22 | return tf.reshape(outputs, (batch_size, -1, num_units * self.reduction_factor)) 23 | 24 | 25 | # 编码器 26 | class Encoder(tf.keras.layers.Layer): 27 | def __init__(self, encoder_layers, encoder_lstm_units, 28 | proj_size, dropout, reduction_factor, **kwargs): 29 | super(Encoder, self).__init__(**kwargs) 30 | 31 | self.bn = tf.keras.layers.BatchNormalization(axis=-1, 32 | momentum=0.99, 33 | epsilon=0.001) 34 | 35 | self.encoder_layers = encoder_layers 36 | self.lstm = [] 37 | self.dense = [] 38 | self.dropout = [] 39 | self.ln = [] 40 | for i in range(self.encoder_layers): 41 | self.lstm.append(tf.keras.layers.LSTM( 42 | encoder_lstm_units, return_sequences=True)) 43 | self.dense.append(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(proj_size))) 44 | self.dropout.append(tf.keras.layers.Dropout(dropout)) 45 | self.ln.append(tf.keras.layers.LayerNormalization()) 46 | self.reduction_factor = reduction_factor 47 | self.tr = TimeReduction(self.reduction_factor) 48 | 49 | def call(self, inputs): 50 | x = self.bn(inputs) 51 | for i in range(self.encoder_layers): 52 | x = self.lstm[i](x) 53 | x = self.dense[i](x) 54 | x = self.dropout[i](x) 55 | x = self.ln[i](x) 56 | 57 | if i == self.reduction_factor: 58 | x = self.tr(x) 59 | 60 | return x 61 | 62 | 63 | # 预测网络 64 | class PredictionNetwork(tf.keras.layers.Layer): 65 | def __init__(self, vocab_size, embedding_size, 66 | prediction_network_layers, prediction_network_lstm_units, 67 | proj_size, dropout, **kwargs): 68 | super(PredictionNetwork, self).__init__(**kwargs) 69 | 70 | self.embedding_layer = tf.keras.layers.Embedding(vocab_size, embedding_size) 71 | 72 | self.prediction_network_layers = prediction_network_layers 73 | self.lstm = [] 74 | self.dense = [] 75 | self.dropout = [] 76 | self.ln = [] 77 | for i in range(self.prediction_network_layers): 78 | self.lstm.append( 79 | tf.keras.layers.LSTM(prediction_network_lstm_units, return_sequences=True)) 80 | self.dense.append( 81 | tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(proj_size))) 82 | self.dropout.append(tf.keras.layers.Dropout(dropout)) 83 | self.ln.append(tf.keras.layers.LayerNormalization()) 84 | 85 | def call(self, inputs): 86 | x = self.embedding_layer(inputs) 87 | for i in range(self.prediction_network_layers): 88 | x = self.lstm[i](x) 89 | x = self.dense[i](x) 90 | x = self.dropout[i](x) 91 | x = self.ln[i](x) 92 | 93 | return x 94 | 95 | 96 | # RNNT,将Encoder和预测网络拼接 97 | class RNNT(tf.keras.Model): 98 | def __init__(self, encoder_layers, encoder_lstm_units, 99 | encoder_proj_size, encoder_dropout, reduction_factor, 100 | joint_dense_units, vocab_size, 101 | embedding_size, 102 | prediction_network_layers, prediction_network_lstm_units, 103 | pred_proj_size, pred_dropout, **kwargs): 104 | super(RNNT, self).__init__(**kwargs) 105 | 106 | self.encoder = Encoder(encoder_layers, encoder_lstm_units, 107 | encoder_proj_size, encoder_dropout, reduction_factor) 108 | self.prediction_network = PredictionNetwork(vocab_size, 109 | embedding_size, 110 | prediction_network_layers, 111 | prediction_network_lstm_units, 112 | pred_proj_size, pred_dropout) 113 | self.ds1 = tf.keras.layers.TimeDistributed( 114 | tf.keras.layers.Dense(joint_dense_units, activation="tanh")) 115 | self.ds2 = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(vocab_size)) 116 | 117 | def call(self, encoder_inputs, pre_inputs): 118 | encoder_outputs = self.encoder(encoder_inputs) 119 | pred_outputs = self.prediction_network(pre_inputs) 120 | 121 | # [B, T, V] => [B, T, 1, V] 122 | encoder_outputs = tf.expand_dims(encoder_outputs, axis=2) 123 | 124 | # [B, U, V] => [B, 1, U, V] 125 | pred_outputs = tf.expand_dims(pred_outputs, axis=1) 126 | 127 | # 拼接(joint):[B, T, U, V] 128 | # TODO: 加合适吗? 129 | joint_inputs = encoder_outputs + pred_outputs 130 | 131 | joint_outputs = self.ds1(joint_inputs) 132 | outputs = self.ds2(joint_outputs) 133 | 134 | return outputs 135 | 136 | 137 | if __name__ == "__main__": 138 | pass 139 | -------------------------------------------------------------------------------- /hlp/stt/transformer/model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from hlp.utils.layers import positional_encoding 3 | from hlp.utils.layers import create_padding_mask 4 | from hlp.utils.layers import create_look_ahead_mask 5 | from hlp.utils.layers import transformer_encoder_layer 6 | from hlp.utils.layers import transformer_decoder_layer 7 | 8 | 9 | def encoder(vocab_size: int, embedding_dim: int, num_layers: int, feature_dim: int, 10 | encoder_units: int, num_heads: int, dropout: float = 0.1) -> tf.keras.Model: 11 | """ 12 | transformer tts的encoder层 13 | :param vocab_size: 词汇大小 14 | :param embedding_dim: 嵌入层维度 15 | :param num_layers: encoder层数量 16 | :param feature_dim: 特征维度 17 | :param encoder_units: 单元大小 18 | :param dropout: encoder的dropout采样率 19 | :param num_heads: 头注意力数量 20 | """ 21 | inputs = tf.keras.Input(shape=(None, feature_dim)) 22 | padding_mask = tf.keras.layers.Lambda(_create_padding_mask, 23 | output_shape=(1, 1, None))(inputs) 24 | outputs = tf.keras.layers.Dense(embedding_dim)(inputs) 25 | outputs = tf.keras.layers.LayerNormalization(epsilon=1e-6)(outputs) 26 | 27 | outputs = outputs * tf.math.sqrt(tf.cast(embedding_dim, tf.float32)) 28 | pos_encoding = positional_encoding(vocab_size, embedding_dim) 29 | outputs = outputs + pos_encoding[:, :tf.shape(outputs)[1], :] 30 | 31 | outputs = tf.keras.layers.Dropout(rate=dropout)(outputs) 32 | 33 | for i in range(num_layers): 34 | outputs = transformer_encoder_layer( 35 | units=encoder_units, 36 | d_model=embedding_dim, 37 | num_heads=num_heads, 38 | dropout=dropout, 39 | name="transformer_encoder_layer_{}".format(i), 40 | )([outputs, padding_mask]) 41 | 42 | return tf.keras.Model(inputs=inputs, outputs=[outputs, padding_mask]) 43 | 44 | 45 | def decoder(vocab_size: int, embedding_dim: int, num_layers: int, 46 | decoder_units: int, num_heads: int, dropout: float = 0.1) -> tf.keras.Model: 47 | """ 48 | :param vocab_size: 词汇大小 49 | :param embedding_dim: 嵌入层维度 50 | :param num_layers: encoder层数量 51 | :param decoder_units: 单元大小 52 | :param num_heads: 头注意力数量 53 | :param dropout: decoder的dropout采样率 54 | """ 55 | enc_outputs = tf.keras.Input(shape=(None, None)) 56 | dec_inputs = tf.keras.Input(shape=(None,)) 57 | padding_mask = tf.keras.Input(shape=(1, 1, None)) 58 | pos_encoding = positional_encoding(vocab_size, embedding_dim) 59 | look_ahead_mask = tf.keras.layers.Lambda(_combine_mask, 60 | output_shape=(1, None, None))(dec_inputs) 61 | 62 | embeddings = tf.keras.layers.Embedding(vocab_size, embedding_dim)(dec_inputs) 63 | embeddings *= tf.math.sqrt(tf.cast(embedding_dim, tf.float32)) 64 | embeddings = embeddings + pos_encoding[:, :tf.shape(embeddings)[1], :] 65 | 66 | outputs = tf.keras.layers.Dropout(rate=dropout)(embeddings) 67 | 68 | for i in range(num_layers): 69 | outputs = transformer_decoder_layer( 70 | units=decoder_units, d_model=embedding_dim, num_heads=num_heads, 71 | dropout=dropout, name="transformer_decoder_layer_{}".format(i), 72 | )(inputs=[outputs, enc_outputs, look_ahead_mask, padding_mask]) 73 | 74 | outputs = tf.keras.layers.Dense(units=vocab_size, name="outputs")(outputs) 75 | 76 | return tf.keras.Model(inputs=[dec_inputs, enc_outputs, padding_mask], outputs=outputs) 77 | 78 | 79 | def _combine_mask(seq: tf.Tensor): 80 | """ 81 | 对input中的不能见单位进行mask 82 | :param seq: 输入序列 83 | :return: mask 84 | """ 85 | look_ahead_mask = create_look_ahead_mask(seq) 86 | padding_mask = create_padding_mask(seq) 87 | return tf.maximum(look_ahead_mask, padding_mask) 88 | 89 | 90 | def _create_padding_mask(seq: tf.Tensor): 91 | """ 92 | 用于创建输入序列的扩充部分的mask,专用于mel序列 93 | :param seq: 输入序列 94 | :return: mask 95 | """ 96 | seq = tf.cast(tf.math.equal(seq, 0), tf.float32) 97 | seq = seq[:, :, 0] 98 | return seq[:, tf.newaxis, tf.newaxis, :] 99 | -------------------------------------------------------------------------------- /hlp/stt/utils/audio_process.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import soundfile as sf 3 | import tensorflow as tf 4 | from python_speech_features import mfcc, logfbank, delta 5 | 6 | 7 | def wav_to_feature(wav_path, audio_feature_type): 8 | """ 9 | 提取语音文件语音特征 10 | :param wav_path: 音频文件路径 11 | :param audio_feature_type: 特征类型 12 | :return: shape为(timestep, dim)的音频特征 13 | """ 14 | sig, sr = sf.read(wav_path) 15 | 16 | if audio_feature_type == "mfcc": 17 | return get_mfcc_(sig, sr) 18 | elif audio_feature_type == "fbank": 19 | return get_fbank(sig, sr) 20 | 21 | 22 | def get_mfcc_(wav_signal, sr): 23 | """ 24 | :param wav_signal: 音频数字信号 25 | :param sr: 采样率 26 | 输入为语音文件数学表示和采样频率,输出为语音的MFCC特征(默认13维)+一阶差分+二阶差分; 27 | """ 28 | feat_mfcc = mfcc(wav_signal, sr) 29 | feat_mfcc_d = delta(feat_mfcc, 2) 30 | feat_mfcc_dd = delta(feat_mfcc_d, 2) 31 | 32 | # (timestep, 39) 33 | wav_feature = np.column_stack((feat_mfcc, feat_mfcc_d, feat_mfcc_dd)) 34 | return wav_feature.astype(np.float32) 35 | 36 | 37 | def get_fbank(wav_signal, sr): 38 | """ 39 | :param wav_signal: 音频数字信号 40 | :param sr: 采样率 41 | 输入为语音文件数学表示和采样频率,输出为语音的FBANK特征 42 | """ 43 | feat_fbank = logfbank(wav_signal, sr, nfilt=80) 44 | 45 | return feat_fbank.astype(np.float32) 46 | 47 | 48 | def get_input_and_length(audio_path_list, audio_feature_type, max_len): 49 | """ 50 | 获得语音文件的特征和长度 51 | :param audio_path_list: 语音文件列表 52 | :param audio_feature_type: 语音特征类型 53 | :param max_len: 最大补齐长度 54 | :return: 补齐后的语音特征数组,每个语音文件的帧数 55 | """ 56 | audio_feature_list = [] 57 | input_length_list = [] 58 | for audio_path in audio_path_list: 59 | audio_feature = wav_to_feature(audio_path, audio_feature_type) 60 | audio_feature_list.append(audio_feature) 61 | input_length_list.append([audio_feature.shape[0]]) 62 | 63 | input_tensor = tf.keras.preprocessing.sequence.pad_sequences(audio_feature_list, maxlen=max_len, 64 | dtype='float32', padding='post') 65 | input_length = tf.convert_to_tensor(input_length_list) 66 | 67 | return input_tensor, input_length 68 | 69 | 70 | def max_audio_length(audio_path_list, audio_feature_type): 71 | """ 72 | 获得语音特征帧最大长度 73 | 注意:这个方法会读取所有语音文件,并提取特征. 74 | :param audio_path_list: 语音文件列表 75 | :param audio_feature_type: 语音特征类型 76 | :return: 最大帧数 77 | """ 78 | return max(wav_to_feature(audio_path, audio_feature_type).shape[0] for audio_path in audio_path_list) 79 | 80 | 81 | if __name__ == "__main__": 82 | pass 83 | -------------------------------------------------------------------------------- /hlp/stt/utils/load_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | 6 | def load_data(train_data_path: str, batch_size: int, buffer_size: int, valid_data_split: float = 0.0, 7 | valid_data_path: str = "", train_length_path: str = "", valid_length_path: str = "", 8 | max_train_data_size: int = 0, max_valid_data_size: int = 0): 9 | """ 10 | 加载训练验证数据方法,验证数据的优先级为:验证数据文件>从训练集划分验证集 11 | :param train_data_path: 文本数据路径 12 | :param buffer_size: Dataset加载缓存大小 13 | :param batch_size: Dataset加载批大小 14 | :param valid_data_split: 用于从训练数据中划分验证数据 15 | :param valid_data_path: 验证数据文本路径 16 | :param train_length_path: 训练样本长度保存路径 17 | :param valid_length_path: 验证样本长度保存路径 18 | :param max_train_data_size: 最大训练数据量 19 | :param max_valid_data_size: 最大验证数据量 20 | :return: 返回train_dataset, valid_dataset, steps_per_epoch, valid_steps_per_epoch 21 | """ 22 | if not os.path.exists(train_data_path): 23 | print("加载的训练验证数据文件不存在,请先执行pre_treat模式后重试") 24 | exit(0) 25 | 26 | print("正在加载训练数据...") 27 | train_audio_data_path, train_sentence_data_path, train_length_data = \ 28 | read_data(data_path=train_data_path, length_path=train_length_path, num_examples=max_train_data_size) 29 | 30 | valid_flag = True # 是否开启验证标记 31 | valid_steps_per_epoch = 0 32 | 33 | # 根据是否传入验证数据文件,切分验证数据 34 | if valid_data_path != "": 35 | print("正在加载验证数据...") 36 | valid_audio_data_path, valid_sentence_data_path, valid_length_data = \ 37 | read_data(data_path=valid_data_path, length_path=valid_length_path, num_examples=max_valid_data_size) 38 | elif valid_data_split != 0.0: 39 | print("从训练数据中划分验证数据...") 40 | train_size = int(len(train_audio_data_path) * (1.0 - valid_data_split)) 41 | valid_audio_data_path = train_audio_data_path[train_size:] 42 | valid_sentence_data_path = train_sentence_data_path[train_size:] 43 | valid_length_data = train_length_data[train_size:] 44 | train_audio_data_path = train_audio_data_path[:train_size] 45 | train_sentence_data_path = train_sentence_data_path[:train_size] 46 | train_length_data = train_length_data[:train_size] 47 | else: 48 | valid_flag = False 49 | 50 | train_dataset = _to_dataset(data=(train_audio_data_path, train_sentence_data_path, train_length_data), 51 | batch_size=batch_size, buffer_size=buffer_size) 52 | steps_per_epoch = len(train_sentence_data_path) // batch_size 53 | 54 | if valid_flag: 55 | valid_dataset = _to_dataset(data=(valid_audio_data_path, valid_sentence_data_path, valid_length_data), 56 | batch_size=batch_size, buffer_size=buffer_size) 57 | valid_steps_per_epoch = len(valid_sentence_data_path) // batch_size 58 | else: 59 | valid_dataset = None 60 | 61 | print("训练验证数据加载完毕") 62 | return train_dataset, valid_dataset, steps_per_epoch, valid_steps_per_epoch 63 | 64 | 65 | def _to_dataset(data: tuple, batch_size: int, buffer_size: int): 66 | """ 67 | 将data封装成tf.data.Dataset 68 | :param data: 要封装的数据元组 69 | :param buffer_size: Dataset加载缓存大小 70 | :param batch_size: Dataset加载批大小 71 | :return: dataset 72 | """ 73 | dataset = tf.data.Dataset.from_tensor_slices(data). \ 74 | cache().shuffle(buffer_size).prefetch(tf.data.experimental.AUTOTUNE) 75 | dataset = dataset.map(_process_audio_sentence_pairs, num_parallel_calls=tf.data.experimental.AUTOTUNE) 76 | dataset = dataset.batch(batch_size, drop_remainder=True) 77 | 78 | return dataset 79 | 80 | 81 | def read_data(data_path: str, length_path: str, num_examples: int): 82 | """ 83 | :param data_path: 需要读取整理的数据文件路径 84 | :param length_path: 样本长度保存路径 85 | :param num_examples: 读取的数据量大小 86 | :return: 返回读取的音频特征数据路径和句子数据 87 | """ 88 | audio_data_path = [] 89 | sentence_data_path = [] 90 | with open(data_path, 'r', encoding="utf-8") as data_file: 91 | lines = data_file.read().strip().split('\n') 92 | if num_examples != 0: 93 | lines = lines[:num_examples] 94 | 95 | for line in lines: 96 | line = line.strip().strip("\n").replace("/", " ").split("\t") 97 | audio_data_path.append(line[0]) 98 | sentence_data_path.append(line[1]) 99 | 100 | length_data = np.load(length_path) 101 | 102 | return audio_data_path, sentence_data_path, length_data 103 | 104 | 105 | def read_npy_file(filename): 106 | """ 107 | 专门用于匹配dataset的map读取文件的方法 108 | :param filename: 传入的文件名张量 109 | :return: 返回读取的数据 110 | """ 111 | return np.load(filename.numpy().decode()) 112 | 113 | 114 | def _process_audio_sentence_pairs(audio_data_path: tf.Tensor, sentence_data_path: tf.Tensor, length: tf.Tensor): 115 | """ 116 | 用于处理音频句子对,将其转化为张量 117 | :param audio_data_path: 音频特征数据保存文件 118 | :param sentence_data_path: 音频句子 119 | :param length: 样本长度 120 | :return: audio_feature, sentence 121 | """ 122 | [audio_feature] = tf.py_function(read_npy_file, [audio_data_path], [tf.float32]) 123 | [sentence] = tf.py_function(read_npy_file, [sentence_data_path], [tf.int32]) 124 | 125 | return audio_feature, sentence, length 126 | -------------------------------------------------------------------------------- /hlp/stt/utils/spec_augment.py: -------------------------------------------------------------------------------- 1 | import librosa.display 2 | import matplotlib.pyplot as plt 3 | import tensorflow as tf 4 | from tensorflow_addons.image import sparse_image_warp 5 | 6 | 7 | def sparse_warp(mel_spectrogram, time_warping_para=80): 8 | fbank_size = tf.shape(mel_spectrogram) 9 | n, v = fbank_size[1], fbank_size[2] 10 | 11 | # Image warping control point setting. 12 | # Source 13 | pt = tf.random.uniform([], time_warping_para, n - time_warping_para, tf.int32) # radnom point along the time axis 14 | src_ctr_pt_freq = tf.range(v // 2) # control points on freq-axis 15 | src_ctr_pt_time = tf.ones_like(src_ctr_pt_freq) * pt # control points on time-axis 16 | src_ctr_pts = tf.stack((src_ctr_pt_time, src_ctr_pt_freq), -1) 17 | src_ctr_pts = tf.cast(src_ctr_pts, dtype=tf.float32) 18 | 19 | # Destination 20 | w = tf.random.uniform([], -time_warping_para, time_warping_para, tf.int32) # distance 21 | dest_ctr_pt_freq = src_ctr_pt_freq 22 | dest_ctr_pt_time = src_ctr_pt_time + w 23 | dest_ctr_pts = tf.stack((dest_ctr_pt_time, dest_ctr_pt_freq), -1) 24 | dest_ctr_pts = tf.cast(dest_ctr_pts, dtype=tf.float32) 25 | 26 | # warp 27 | source_control_point_locations = tf.expand_dims(src_ctr_pts, 0) # (1, v//2, 2) 28 | dest_control_point_locations = tf.expand_dims(dest_ctr_pts, 0) # (1, v//2, 2) 29 | 30 | warped_image, _ = sparse_image_warp(mel_spectrogram, 31 | source_control_point_locations, 32 | dest_control_point_locations) 33 | return warped_image 34 | 35 | 36 | def frequency_masking(mel_spectrogram, v, frequency_masking_para=27, frequency_mask_num=2): 37 | fbank_size = tf.shape(mel_spectrogram) 38 | n, v = fbank_size[1], fbank_size[2] 39 | 40 | for i in range(frequency_mask_num): 41 | f = tf.random.uniform([], minval=0, maxval=frequency_masking_para, dtype=tf.int32) 42 | v = tf.cast(v, dtype=tf.int32) 43 | f0 = tf.random.uniform([], minval=0, maxval=v - f, dtype=tf.int32) 44 | 45 | # warped_mel_spectrogram[f0:f0 + f, :] = 0 46 | mask = tf.concat((tf.ones(shape=(1, n, v - f0 - f, 1)), 47 | tf.zeros(shape=(1, n, f, 1)), 48 | tf.ones(shape=(1, n, f0, 1)), 49 | ), 2) 50 | mel_spectrogram = mel_spectrogram * mask 51 | 52 | return tf.cast(mel_spectrogram, dtype=tf.float32) 53 | 54 | 55 | def time_masking(mel_spectrogram, tau, time_masking_para=100, time_mask_num=2): 56 | fbank_size = tf.shape(mel_spectrogram) 57 | n, v = fbank_size[1], fbank_size[2] 58 | 59 | for i in range(time_mask_num): 60 | t = tf.random.uniform([], minval=0, maxval=time_masking_para, dtype=tf.int32) 61 | t0 = tf.random.uniform([], minval=0, maxval=tau - t, dtype=tf.int32) 62 | 63 | # mel_spectrogram[:, t0:t0+t] = 0 64 | mask = tf.concat((tf.ones(shape=(1, n - t0 - t, v, 1)), 65 | tf.zeros(shape=(1, t, v, 1)), 66 | tf.ones(shape=(1, t0, v, 1)),), 1) 67 | mel_spectrogram = mel_spectrogram * mask 68 | 69 | return tf.cast(mel_spectrogram, dtype=tf.float32) 70 | 71 | 72 | def spec_augment(mel_spectrogram): 73 | v = mel_spectrogram.shape[0] 74 | tau = mel_spectrogram.shape[1] 75 | 76 | warped_mel_spectrogram = sparse_warp(mel_spectrogram) 77 | 78 | warped_frequency_spectrogram = frequency_masking(warped_mel_spectrogram, v=v) 79 | 80 | warped_frequency_time_sepctrogram = time_masking(warped_frequency_spectrogram, tau=tau) 81 | 82 | return warped_frequency_time_sepctrogram 83 | 84 | 85 | def _plot_spectrogram(mel_spectrogram, title): 86 | plt.figure(figsize=(10, 4)) 87 | librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :, 0], ref=np.max), 88 | y_axis='mel', fmax=8000, 89 | x_axis='time') 90 | # plt.colorbar(format='%+2.0f dB') 91 | plt.title(title) 92 | plt.tight_layout() 93 | plt.show() 94 | 95 | 96 | if __name__ == "__main__": 97 | import argparse 98 | import numpy as np 99 | 100 | parser = argparse.ArgumentParser(description='Spec Augment') 101 | parser.add_argument('--audio-path', default='../data/data_thchs30/data/A2_0.wav', 102 | help='The audio file.') 103 | parser.add_argument('--time-warp-para', default=80, 104 | help='time warp parameter W') 105 | parser.add_argument('--frequency-mask-para', default=100, 106 | help='frequency mask parameter F') 107 | parser.add_argument('--time-mask-para', default=27, 108 | help='time mask parameter T') 109 | parser.add_argument('--masking-line-number', default=1, 110 | help='masking line number') 111 | 112 | args = parser.parse_args() 113 | audio_path = args.audio_path 114 | time_warping_para = args.time_warp_para 115 | time_masking_para = args.frequency_mask_para 116 | frequency_masking_para = args.time_mask_para 117 | masking_line_number = args.masking_line_number 118 | 119 | audio, sampling_rate = librosa.load(audio_path) 120 | mel_spectrogram = librosa.feature.melspectrogram(y=audio, 121 | sr=sampling_rate, 122 | n_mels=256, 123 | hop_length=128, 124 | fmax=8000) 125 | 126 | # reshape spectrogram shape to [batch_size, time, frequency, 1] 127 | shape = mel_spectrogram.shape 128 | mel_spectrogram = np.reshape(mel_spectrogram, (-1, shape[0], shape[1], 1)) 129 | 130 | _plot_spectrogram(mel_spectrogram=mel_spectrogram, 131 | title="Raw Mel Spectrogram") 132 | 133 | _plot_spectrogram( 134 | mel_spectrogram=spec_augment(mel_spectrogram), 135 | title="tensorflow Warped & Masked Mel Spectrogram") 136 | -------------------------------------------------------------------------------- /hlp/stt/utils/text_process.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from hlp.utils import text_split 3 | 4 | 5 | def tokenize_and_encode(texts: list, dict_path: str, max_len: int, 6 | num_words: int, unk_token: str = ""): 7 | """ 8 | 用于将文本序列集合转化为token序列 9 | :param texts: 文本序列列表 10 | :param dict_path: 字典保存路径 11 | :param max_len: 文本最大长度 12 | :param num_words:最多保存词汇数量 13 | :param unk_token: 未登录词 14 | :return texts: 处理好的文本token序列 15 | :return tokenizer: tokenizer 16 | """ 17 | tokenizer = tf.keras.preprocessing.text.Tokenizer(filters="", oov_token=unk_token, num_words=num_words) 18 | tokenizer.fit_on_texts(texts) 19 | texts = tokenizer.texts_to_sequences(texts) 20 | texts = tf.keras.preprocessing.sequence.pad_sequences(texts, maxlen=max_len, padding="post") 21 | 22 | with open(dict_path, 'w', encoding="utf-8") as dict_file: 23 | dict_file.write(tokenizer.to_json()) 24 | 25 | return texts, tokenizer 26 | 27 | 28 | def split_and_encode(sentences, mode, word_index): 29 | """对文本进行切分和编码 30 | 31 | :param sentences: 文本列表 32 | :param mode: 切分模式 33 | :param word_index: 词典 34 | :return: 文本编码序列 35 | """ 36 | splitted_sentences = split_sentences(sentences, mode) 37 | text_int_sequences_list = encode_texts(splitted_sentences, word_index) 38 | return text_int_sequences_list 39 | 40 | 41 | # token转换成id 42 | def encode_texts(splitted_sentences, word_index): 43 | text_int_sequences = [] 44 | for splitted_sentence in splitted_sentences: 45 | text_int_sequences.append(encode_text(splitted_sentence, word_index)) 46 | return text_int_sequences 47 | 48 | 49 | # token转换成id 50 | def encode_text(splitted_sentence, word_index): 51 | int_sequence = [] 52 | for c in splitted_sentence.split(" "): 53 | int_sequence.append(int(word_index[c])) 54 | return int_sequence 55 | 56 | 57 | def split_sentence(line, mode): 58 | """对转写文本进行切分 59 | 60 | :param line: 转写文本 61 | :param mode: 语料文本的切分方法 62 | :return: 切分后的文本,以空格分隔的字符串 63 | """ 64 | if mode.lower() == "cn": 65 | return _split_sentence_cn(line) 66 | elif mode.lower() == "en_word": 67 | return _split_sentence_en_word(line) 68 | elif mode.lower() == "en_char": 69 | return _split_sentence_en_char(line) 70 | elif mode.lower() == "las_cn": 71 | return _split_sentence_las_cn_char(line) 72 | elif mode.lower() == "las_en_word": 73 | return _split_sentence_las_en_word(line) 74 | elif mode.lower() == "las_en_char": 75 | return _split_sentence_las_en_char(line) 76 | 77 | 78 | def split_sentences(sentences, mode): 79 | """对文本进行切换 80 | 81 | :param sentences: 待切分文本序列 82 | :param mode: 切分模式 83 | :return: 空格分隔的token串的列表 84 | """ 85 | text_list = [] 86 | for text in sentences: 87 | text_list.append(split_sentence(text, mode)) 88 | return text_list 89 | 90 | 91 | def _split_sentence_en_word(s): 92 | result = text_split.split_en_word(s) 93 | return result 94 | 95 | 96 | def _split_sentence_en_char(s): 97 | result = text_split.split_en_char(s) 98 | return result 99 | 100 | 101 | def _split_sentence_las_en_char(s): 102 | s = text_split.split_en_char(s) 103 | 104 | # 给句子加上开始和结束标记 105 | # 以便模型知道何时开始和结束预测 106 | s.insert(0, '') 107 | s.append('') 108 | 109 | return s 110 | 111 | 112 | def _split_sentence_las_en_word(s): 113 | s = text_split.split_en_word(s) 114 | 115 | # 给句子加上开始和结束标记 116 | # 以便模型知道何时开始和结束预测 117 | s.insert(0, '') 118 | s.append('') 119 | 120 | return s 121 | 122 | 123 | def _split_sentence_cn(s): 124 | result = text_split.split_zh_char(s) 125 | return result 126 | 127 | 128 | def _split_sentence_las_cn_char(s): 129 | s = text_split.split_zh_char(s) 130 | 131 | # 给句子加上开始和结束标记 132 | # 以便模型知道何时开始和结束预测 133 | s.insert(0, '') 134 | s.append('') 135 | 136 | return s 137 | 138 | 139 | # 获取最长的label_length 140 | def get_max_label_length(text_int_sequences): 141 | return max(len(seq) for seq in text_int_sequences) 142 | 143 | 144 | def get_label_and_length(text_int_sequences_list, max_label_length): 145 | target_length_list = [] 146 | for text_int_sequence in text_int_sequences_list: 147 | target_length_list.append([len(text_int_sequence)]) 148 | target_tensor_numpy = tf.keras.preprocessing.sequence.pad_sequences(text_int_sequences_list, 149 | maxlen=max_label_length, 150 | padding='post' 151 | ) 152 | target_length = tf.convert_to_tensor(target_length_list) 153 | return target_tensor_numpy, target_length 154 | 155 | 156 | # 将输出token id序列解码为token序列 157 | def int_to_text_sequence(seq, index_word, mode): 158 | if mode.lower() == "cn": 159 | return int_to_text_sequence_cn(seq, index_word) 160 | elif mode.lower() == "en_word": 161 | return int_to_text_sequence_en_word(seq, index_word) 162 | elif mode.lower() == "en_char": 163 | return int_to_text_sequence_en_char(seq, index_word) 164 | 165 | 166 | def int_to_text_sequence_cn(ids, index_word): 167 | result = [] 168 | for i in ids: 169 | if 1 <= i <= len(index_word): 170 | word = index_word[str(i)] 171 | result.append(word) 172 | return "".join(result).strip() 173 | 174 | 175 | def int_to_text_sequence_en_word(ids, index_word): 176 | result = [] 177 | for i in ids: 178 | if 1 <= i <= (len(index_word)): 179 | word = index_word[str(i)] 180 | result.append(word) 181 | result.append(" ") 182 | return "".join(result).strip() 183 | 184 | 185 | def int_to_text_sequence_en_char(ids, index_word): 186 | result = [] 187 | for i in ids: 188 | if 1 <= i <= (len(index_word)): 189 | word = index_word[str(i)] 190 | if word != "": 191 | result.append(word) 192 | else: 193 | result.append(" ") 194 | return "".join(result).strip() 195 | -------------------------------------------------------------------------------- /hlp/tts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/__init__.py -------------------------------------------------------------------------------- /hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0001.wav -------------------------------------------------------------------------------- /hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0002.wav -------------------------------------------------------------------------------- /hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0003.wav -------------------------------------------------------------------------------- /hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0004.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0004.wav -------------------------------------------------------------------------------- /hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0005.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0005.wav -------------------------------------------------------------------------------- /hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0006.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/LJSpeech-1.1/wavs/LJ001-0006.wav -------------------------------------------------------------------------------- /hlp/tts/data/cmudict-0.7b: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/cmudict-0.7b -------------------------------------------------------------------------------- /hlp/tts/data/number/metadata.csv: -------------------------------------------------------------------------------- 1 | 0_jackson_0|zero 2 | 0_jackson_1|zero 3 | 0_jackson_2|zero 4 | 0_jackson_3|zero 5 | 0_jackson_4|zero 6 | 0_jackson_5|zero 7 | 0_jackson_6|zero 8 | 0_jackson_7|zero 9 | 0_jackson_8|zero 10 | 0_jackson_9|zero 11 | 0_jackson_10|zero 12 | 0_jackson_11|zero 13 | 0_jackson_12|zero 14 | 0_jackson_13|zero 15 | 0_jackson_14|zero 16 | 0_jackson_15|zero 17 | 0_jackson_16|zero 18 | 0_jackson_17|zero 19 | 0_jackson_18|zero 20 | 0_jackson_19|zero 21 | 0_jackson_20|zero 22 | 0_jackson_21|zero 23 | 0_jackson_22|zero 24 | 0_jackson_23|zero 25 | 0_jackson_24|zero 26 | 0_jackson_25|zero 27 | 0_jackson_26|zero 28 | 0_jackson_27|zero 29 | 0_jackson_28|zero 30 | 0_jackson_29|zero 31 | 0_jackson_30|zero 32 | 0_jackson_31|zero 33 | 0_jackson_32|zero 34 | 0_jackson_33|zero 35 | 0_jackson_34|zero 36 | 0_jackson_35|zero 37 | 0_jackson_36|zero 38 | 0_jackson_37|zero 39 | 0_jackson_38|zero 40 | 0_jackson_39|zero 41 | 0_jackson_40|zero 42 | 0_jackson_41|zero 43 | 0_jackson_42|zero 44 | 0_jackson_43|zero 45 | 0_jackson_44|zero 46 | 0_jackson_45|zero 47 | 0_jackson_46|zero 48 | 0_jackson_47|zero 49 | 0_jackson_48|zero 50 | 0_jackson_49|zero 51 | 1_jackson_0|one 52 | 1_jackson_1|one 53 | 1_jackson_2|one 54 | 1_jackson_3|one 55 | 1_jackson_4|one 56 | 1_jackson_5|one 57 | 1_jackson_6|one 58 | 1_jackson_7|one 59 | 1_jackson_8|one 60 | 1_jackson_9|one 61 | 1_jackson_10|one 62 | 1_jackson_11|one 63 | 1_jackson_12|one 64 | 1_jackson_13|one 65 | 1_jackson_14|one 66 | 1_jackson_15|one 67 | 1_jackson_16|one 68 | 1_jackson_17|one 69 | 1_jackson_18|one 70 | 1_jackson_19|one 71 | 1_jackson_20|one 72 | 1_jackson_21|one 73 | 1_jackson_22|one 74 | 1_jackson_23|one 75 | 1_jackson_24|one 76 | 1_jackson_25|one 77 | 1_jackson_26|one 78 | 1_jackson_27|one 79 | 1_jackson_28|one 80 | 1_jackson_29|one 81 | 1_jackson_30|one 82 | 1_jackson_31|one 83 | 1_jackson_32|one 84 | 1_jackson_33|one 85 | 1_jackson_34|one 86 | 1_jackson_35|one 87 | 1_jackson_36|one 88 | 1_jackson_37|one 89 | 1_jackson_38|one 90 | 1_jackson_39|one 91 | 1_jackson_40|one 92 | 1_jackson_41|one 93 | 1_jackson_42|one 94 | 1_jackson_43|one 95 | 1_jackson_44|one 96 | 1_jackson_45|one 97 | 1_jackson_46|one 98 | 1_jackson_47|one 99 | 1_jackson_48|one 100 | 1_jackson_49|one 101 | 2_jackson_0|two 102 | 2_jackson_1|two 103 | 2_jackson_2|two 104 | 2_jackson_3|two 105 | 2_jackson_4|two 106 | 2_jackson_5|two 107 | 2_jackson_6|two 108 | 2_jackson_7|two 109 | 2_jackson_8|two 110 | 2_jackson_9|two 111 | 2_jackson_10|two 112 | 2_jackson_11|two 113 | 2_jackson_12|two 114 | 2_jackson_13|two 115 | 2_jackson_14|two 116 | 2_jackson_15|two 117 | 2_jackson_16|two 118 | 2_jackson_17|two 119 | 2_jackson_18|two 120 | 2_jackson_19|two 121 | 2_jackson_20|two 122 | 2_jackson_21|two 123 | 2_jackson_22|two 124 | 2_jackson_23|two 125 | 2_jackson_24|two 126 | 2_jackson_25|two 127 | 2_jackson_26|two 128 | 2_jackson_27|two 129 | 2_jackson_28|two 130 | 2_jackson_29|two 131 | 2_jackson_30|two 132 | 2_jackson_31|two 133 | 2_jackson_32|two 134 | 2_jackson_33|two 135 | 2_jackson_34|two 136 | 2_jackson_35|two 137 | 2_jackson_36|two 138 | 2_jackson_37|two 139 | 2_jackson_38|two 140 | 2_jackson_39|two 141 | 2_jackson_40|two 142 | 2_jackson_41|two 143 | 2_jackson_42|two 144 | 2_jackson_43|two 145 | 2_jackson_44|two 146 | 2_jackson_45|two 147 | 2_jackson_46|two 148 | 2_jackson_47|two 149 | 2_jackson_48|two 150 | 2_jackson_49|two 151 | 3_jackson_0|three 152 | 3_jackson_1|three 153 | 3_jackson_2|three 154 | 3_jackson_3|three 155 | 3_jackson_4|three 156 | 3_jackson_5|three 157 | 3_jackson_6|three 158 | 3_jackson_7|three 159 | 3_jackson_8|three 160 | 3_jackson_9|three 161 | 3_jackson_10|three 162 | 3_jackson_11|three 163 | 3_jackson_12|three 164 | 3_jackson_13|three 165 | 3_jackson_14|three 166 | 3_jackson_15|three 167 | 3_jackson_16|three 168 | 3_jackson_17|three 169 | 3_jackson_18|three 170 | 3_jackson_19|three 171 | 3_jackson_20|three 172 | 3_jackson_21|three 173 | 3_jackson_22|three 174 | 3_jackson_23|three 175 | 3_jackson_24|three 176 | 3_jackson_25|three 177 | 3_jackson_26|three 178 | 3_jackson_27|three 179 | 3_jackson_28|three 180 | 3_jackson_29|three 181 | 3_jackson_30|three 182 | 3_jackson_31|three 183 | 3_jackson_32|three 184 | 3_jackson_33|three 185 | 3_jackson_34|three 186 | 3_jackson_35|three 187 | 3_jackson_36|three 188 | 3_jackson_37|three 189 | 3_jackson_38|three 190 | 3_jackson_39|three 191 | 3_jackson_40|three 192 | 3_jackson_41|three 193 | 3_jackson_42|three 194 | 3_jackson_43|three 195 | 3_jackson_44|three 196 | 3_jackson_45|three 197 | 3_jackson_46|three 198 | 3_jackson_47|three 199 | 3_jackson_48|three 200 | 3_jackson_49|three 201 | 4_jackson_0|four 202 | 4_jackson_1|four 203 | 4_jackson_2|four 204 | 4_jackson_3|four 205 | 4_jackson_4|four 206 | 4_jackson_5|four 207 | 4_jackson_6|four 208 | 4_jackson_7|four 209 | 4_jackson_8|four 210 | 4_jackson_9|four 211 | 4_jackson_10|four 212 | 4_jackson_11|four 213 | 4_jackson_12|four 214 | 4_jackson_13|four 215 | 4_jackson_14|four 216 | 4_jackson_15|four 217 | 4_jackson_16|four 218 | 4_jackson_17|four 219 | 4_jackson_18|four 220 | 4_jackson_19|four 221 | 4_jackson_20|four 222 | 4_jackson_21|four 223 | 4_jackson_22|four 224 | 4_jackson_23|four 225 | 4_jackson_24|four 226 | 4_jackson_25|four 227 | 4_jackson_26|four 228 | 4_jackson_27|four 229 | 4_jackson_28|four 230 | 4_jackson_29|four 231 | 4_jackson_30|four 232 | 4_jackson_31|four 233 | 4_jackson_32|four 234 | 4_jackson_33|four 235 | 4_jackson_34|four 236 | 4_jackson_35|four 237 | 4_jackson_36|four 238 | 4_jackson_37|four 239 | 4_jackson_38|four 240 | 4_jackson_39|four 241 | 4_jackson_40|four 242 | 4_jackson_41|four 243 | 4_jackson_42|four 244 | 4_jackson_43|four 245 | 4_jackson_44|four 246 | 4_jackson_45|four 247 | 4_jackson_46|four 248 | 4_jackson_47|four 249 | 4_jackson_48|four 250 | 4_jackson_49|four 251 | 5_jackson_0|five 252 | 5_jackson_1|five 253 | 5_jackson_2|five 254 | 5_jackson_3|five 255 | 5_jackson_4|five 256 | 5_jackson_5|five 257 | 5_jackson_6|five 258 | 5_jackson_7|five 259 | 5_jackson_8|five 260 | 5_jackson_9|five 261 | 5_jackson_10|five 262 | 5_jackson_11|five 263 | 5_jackson_12|five 264 | 6_jackson_0|six 265 | 6_jackson_1|six 266 | 6_jackson_2|six 267 | 6_jackson_3|six 268 | 6_jackson_4|six 269 | 6_jackson_5|six 270 | 6_jackson_6|six 271 | 6_jackson_7|six 272 | 6_jackson_8|six 273 | 6_jackson_9|six 274 | 6_jackson_10|six 275 | 6_jackson_11|six 276 | 6_jackson_12|six 277 | 7_jackson_0|seven 278 | 7_jackson_1|seven 279 | 7_jackson_2|seven 280 | 7_jackson_3|seven 281 | 7_jackson_4|seven 282 | 7_jackson_5|seven 283 | 7_jackson_6|seven 284 | 7_jackson_7|seven 285 | 7_jackson_8|seven 286 | 7_jackson_9|seven 287 | 7_jackson_10|seven 288 | 7_jackson_11|seven 289 | 7_jackson_12|seven 290 | 8_jackson_0|eight 291 | 8_jackson_1|eight 292 | 8_jackson_2|eight 293 | 8_jackson_3|eight 294 | 8_jackson_4|eight 295 | 8_jackson_5|eight 296 | 8_jackson_6|eight 297 | 8_jackson_7|eight 298 | 8_jackson_8|eight 299 | 8_jackson_9|eight 300 | 8_jackson_10|eight 301 | 8_jackson_11|eight 302 | 8_jackson_12|eight 303 | 9_jackson_0|nine 304 | 9_jackson_1|nine 305 | 9_jackson_2|nine 306 | 9_jackson_3|nine 307 | 9_jackson_4|nine 308 | 9_jackson_5|nine 309 | 9_jackson_6|nine 310 | 9_jackson_7|nine 311 | 9_jackson_8|nine 312 | 9_jackson_9|nine 313 | 9_jackson_10|nine 314 | 9_jackson_11|nine 315 | 9_jackson_12|nine -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/0_jackson_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/0_jackson_0.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/0_jackson_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/0_jackson_1.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/1_jackson_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/1_jackson_0.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/1_jackson_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/1_jackson_1.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/2_jackson_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/2_jackson_0.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/2_jackson_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/2_jackson_1.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/3_jackson_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/3_jackson_0.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/3_jackson_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/3_jackson_1.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/4_jackson_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/4_jackson_0.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/4_jackson_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/4_jackson_1.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/5_jackson_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/5_jackson_0.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/5_jackson_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/5_jackson_1.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/6_jackson_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/6_jackson_0.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/6_jackson_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/6_jackson_1.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/7_jackson_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/7_jackson_0.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/7_jackson_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/7_jackson_1.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/8_jackson_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/8_jackson_0.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/8_jackson_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/8_jackson_1.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/9_jackson_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/9_jackson_0.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/test/wavs/9_jackson_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/test/wavs/9_jackson_1.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/0_jackson_10.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_10.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/0_jackson_11.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_11.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/0_jackson_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_2.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/0_jackson_3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_3.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/0_jackson_4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_4.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/0_jackson_5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_5.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/0_jackson_6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_6.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/0_jackson_7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_7.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/0_jackson_8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_8.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/0_jackson_9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/0_jackson_9.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/1_jackson_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_0.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/1_jackson_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_1.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/1_jackson_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_2.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/1_jackson_3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_3.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/1_jackson_4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_4.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/1_jackson_5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_5.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/1_jackson_6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_6.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/1_jackson_7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_7.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/1_jackson_8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_8.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/1_jackson_9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/1_jackson_9.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/2_jackson_10.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_10.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/2_jackson_11.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_11.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/2_jackson_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_2.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/2_jackson_3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_3.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/2_jackson_4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_4.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/2_jackson_5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_5.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/2_jackson_6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_6.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/2_jackson_7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_7.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/2_jackson_8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_8.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/2_jackson_9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/2_jackson_9.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/3_jackson_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_0.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/3_jackson_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_1.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/3_jackson_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_2.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/3_jackson_3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_3.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/3_jackson_4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_4.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/3_jackson_5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_5.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/3_jackson_6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_6.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/3_jackson_7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_7.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/3_jackson_8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_8.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/3_jackson_9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/3_jackson_9.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/4_jackson_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_0.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/4_jackson_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_1.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/4_jackson_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_2.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/4_jackson_3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_3.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/4_jackson_4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_4.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/4_jackson_5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_5.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/4_jackson_6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_6.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/4_jackson_7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_7.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/4_jackson_8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_8.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/4_jackson_9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/4_jackson_9.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/5_jackson_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_0.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/5_jackson_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_1.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/5_jackson_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_2.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/5_jackson_3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_3.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/5_jackson_4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_4.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/5_jackson_5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_5.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/5_jackson_6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_6.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/5_jackson_7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_7.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/5_jackson_8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_8.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/5_jackson_9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/5_jackson_9.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/6_jackson_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_0.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/6_jackson_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_1.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/6_jackson_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_2.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/6_jackson_3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_3.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/6_jackson_4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_4.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/6_jackson_5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_5.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/6_jackson_6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_6.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/6_jackson_7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_7.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/6_jackson_8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_8.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/6_jackson_9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/6_jackson_9.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/7_jackson_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_0.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/7_jackson_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_1.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/7_jackson_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_2.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/7_jackson_3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_3.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/7_jackson_4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_4.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/7_jackson_5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_5.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/7_jackson_6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_6.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/7_jackson_7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_7.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/7_jackson_8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_8.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/7_jackson_9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/7_jackson_9.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/8_jackson_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_0.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/8_jackson_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_1.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/8_jackson_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_2.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/8_jackson_3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_3.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/8_jackson_4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_4.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/8_jackson_5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_5.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/8_jackson_6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_6.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/8_jackson_7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_7.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/8_jackson_8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_8.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/8_jackson_9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/8_jackson_9.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/9_jackson_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_0.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/9_jackson_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_1.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/9_jackson_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_2.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/9_jackson_3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_3.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/9_jackson_4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_4.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/9_jackson_5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_5.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/9_jackson_6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_6.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/9_jackson_7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_7.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/9_jackson_8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_8.wav -------------------------------------------------------------------------------- /hlp/tts/data/number/train/wavs/9_jackson_9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/tts/data/number/train/wavs/9_jackson_9.wav -------------------------------------------------------------------------------- /hlp/tts/utils/layers.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class ConvDropBN(tf.keras.layers.Layer): 5 | """ 6 | 卷积-Dropout-BatchNormalization块 7 | """ 8 | 9 | def __init__(self, filters, kernel_size, activation, dropout_rate): 10 | """ 11 | :param filters: 输出空间维数 12 | :param kernel_size: 卷积核大小 13 | :param activation: 激活方法 14 | :param dropout_rate: dropout采样率 15 | """ 16 | super(ConvDropBN, self).__init__() 17 | self.conv1d = tf.keras.layers.Conv1D(filters, kernel_size, 18 | padding="same", activation=activation) 19 | self.dropout = tf.keras.layers.Dropout(rate=dropout_rate) 20 | self.norm = tf.keras.layers.BatchNormalization() 21 | 22 | def call(self, inputs): 23 | outputs = self.conv1d(inputs) 24 | outputs = self.dropout(outputs) 25 | outputs = self.norm(outputs) 26 | return outputs 27 | 28 | 29 | class DecoderPreNet(tf.keras.layers.Layer): 30 | """ 31 | Decoder的pre_net,用于映射频谱样本的空间 32 | """ 33 | 34 | def __init__(self, pre_net_units, pre_net_layers_num, pre_net_dropout_rate): 35 | """ 36 | :param pre_net_units: 全连接层单元数 37 | :param pre_net_layers_num: pre_net层数 38 | :param pre_net_dropout_rate: dropout采样率 39 | """ 40 | super().__init__() 41 | self.pre_net_units = pre_net_units 42 | self.pre_net_layers_num = pre_net_layers_num 43 | self.pre_net_dropout_rate = pre_net_dropout_rate 44 | self.pre_net_dense = [ 45 | tf.keras.layers.Dense(units=self.pre_net_units, activation='relu') 46 | for i in range(self.pre_net_layers_num) 47 | ] 48 | self.dropout = tf.keras.layers.Dropout(rate=self.pre_net_dropout_rate) 49 | 50 | def call(self, inputs): 51 | outputs = inputs 52 | for layer in self.pre_net_dense: 53 | outputs = layer(outputs) 54 | outputs = self.dropout(outputs) 55 | return outputs 56 | 57 | 58 | class PostNet(tf.keras.layers.Layer): 59 | """ 60 | Tacotron2的PostNet,包含n_conv_encoder数量的卷积层 61 | """ 62 | 63 | def __init__(self, encoder_conv_num: int, post_net_conv_num: int, post_net_filters: int, 64 | post_net_kernel_sizes: int, post_net_dropout: float, 65 | post_net_activation: str, num_mel: int): 66 | """ 67 | :param encoder_conv_num: encoder卷积层数量 68 | :param post_net_conv_num: post_net的卷积层数量 69 | :param post_net_filters: post_net卷积输出空间维数 70 | :param post_net_kernel_sizes: post_net卷积核大小 71 | :param post_net_dropout: post_net的dropout采样率 72 | :param post_net_activation: post_net卷积激活函数 73 | :param n_mels: 梅尔带数 74 | """ 75 | super().__init__() 76 | self.conv_batch_norm = [] 77 | for i in range(encoder_conv_num): 78 | if i == post_net_conv_num - 1: 79 | conv = ConvDropBN(filters=post_net_filters, kernel_size=post_net_kernel_sizes, 80 | activation=None, dropout_rate=post_net_dropout) 81 | else: 82 | conv = ConvDropBN(filters=post_net_filters, kernel_size=post_net_kernel_sizes, 83 | activation=post_net_activation, dropout_rate=post_net_dropout) 84 | self.conv_batch_norm.append(conv) 85 | 86 | self.fc = tf.keras.layers.Dense(units=num_mel, activation=None, name="frame_projection1") 87 | 88 | def call(self, inputs): 89 | x = tf.transpose(inputs, [0, 2, 1]) 90 | for _, conv in enumerate(self.conv_batch_norm): 91 | x = conv(x) 92 | x = self.fc(x) 93 | x = tf.transpose(x, [0, 2, 1]) 94 | return x 95 | -------------------------------------------------------------------------------- /hlp/tts/utils/load_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import tensorflow as tf 4 | from hlp.tts.utils.text_preprocess import text_to_sequence_phoneme 5 | 6 | 7 | def load_data(train_data_path: str, max_len: int, vocab_size: int, batch_size: int, buffer_size: int, 8 | tokenized_type: str = "phoneme", dict_path: str = "", valid_data_split: float = 0.0, 9 | valid_data_path: str = "", max_train_data_size: int = 0, max_valid_data_size: int = 0): 10 | """ 11 | 加载训练验证数据方法,非phoneme的方法将会保存字典 12 | 验证数据的优先级为:验证数据文件>从训练集划分验证集 13 | :param train_data_path: 文本数据路径 14 | :param max_len: 文本序列最大长度 15 | :param vocab_size: 词汇大小 16 | :param tokenized_type: 分词类型,默认按音素分词,模式:phoneme(音素)/word(单词)/char(字符) 17 | :param dict_path: 字典路径,若使用phoneme则不用传 18 | :param buffer_size: Dataset加载缓存大小 19 | :param batch_size: Dataset加载批大小 20 | :param valid_data_split: 用于从训练数据中划分验证数据 21 | :param valid_data_path: 验证数据文本路径 22 | :param max_train_data_size: 最大训练数据量 23 | :param max_valid_data_size: 最大验证数据量 24 | :return: 返回train_dataset, valid_dataset, steps_per_epoch, valid_steps_per_epoch 25 | """ 26 | if not os.path.exists(train_data_path): 27 | print("加载的训练验证数据文件不存在,请先执行pre_treat模式后重试") 28 | exit(0) 29 | 30 | print("正在加载训练数据...") 31 | train_audio_data_pair, train_sentence_data = read_data(data_path=train_data_path, num_examples=max_train_data_size) 32 | 33 | valid_flag = True # 是否开启验证标记 34 | valid_steps_per_epoch = 0 35 | 36 | # 根据是否传入验证数据文件,切分验证数据 37 | if valid_data_path != "": 38 | print("正在加载验证数据...") 39 | valid_audio_data_pair, valid_sentence_data = read_data(data_path=valid_data_path, 40 | num_examples=max_valid_data_size) 41 | elif valid_data_split != 0.0: 42 | print("从训练数据中划分验证数据...") 43 | train_size = int(len(train_audio_data_pair) * (1.0 - valid_data_split)) 44 | valid_audio_data_pair = train_audio_data_pair[train_size:] 45 | valid_sentence_data = train_sentence_data[train_size:] 46 | train_audio_data_pair = train_audio_data_pair[:train_size] 47 | train_sentence_data = train_sentence_data[:train_size] 48 | else: 49 | print("没有验证数据.") 50 | valid_flag = False 51 | 52 | # 根据分词类型进行序列转换 53 | if tokenized_type == "phoneme": 54 | train_sentence_sequences = text_to_sequence_phoneme(texts=train_sentence_data, max_len=max_len) 55 | if valid_flag: 56 | valid_sentence_sequences = text_to_sequence_phoneme(texts=valid_sentence_data, max_len=max_len) 57 | else: 58 | if dict_path == "": 59 | print("请在加载数据时,传入字典保存路径") 60 | exit(0) 61 | tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='', oov_token="", num_words=vocab_size) 62 | tokenizer.fit_on_texts(train_sentence_data) 63 | train_sentence_sequences = tokenizer.texts_to_sequences(train_sentence_data) 64 | train_sentence_sequences = tf.keras.preprocessing.sequence.pad_sequences(train_sentence_sequences, 65 | max_len=max_len, padding="post") 66 | with open(dict_path, 'w', encoding="utf-8") as dict_file: 67 | dict_file.write(tokenizer.to_json()) 68 | 69 | if valid_flag: 70 | valid_sentence_sequences = tokenizer.texts_to_sequences(valid_sentence_data) 71 | valid_sentence_sequences = tf.keras.preprocessing.sequence.pad_sequences(valid_sentence_sequences, 72 | max_len=max_len, padding="post") 73 | 74 | train_dataset = _to_dataset(data=(train_audio_data_pair, train_sentence_sequences), 75 | batch_size=batch_size, buffer_size=buffer_size) 76 | if valid_flag: 77 | valid_dataset = _to_dataset(data=(valid_audio_data_pair, valid_sentence_sequences), 78 | batch_size=batch_size, buffer_size=buffer_size) 79 | valid_steps_per_epoch = len(valid_sentence_sequences) // batch_size 80 | else: 81 | valid_dataset = None 82 | 83 | steps_per_epoch = len(train_sentence_sequences) // batch_size 84 | 85 | print("训练验证数据加载完毕") 86 | return train_dataset, valid_dataset, steps_per_epoch, valid_steps_per_epoch 87 | 88 | 89 | def _to_dataset(data: tuple, batch_size: int, buffer_size: int): 90 | """ 91 | 将data封装成tf.data.Dataset 92 | :param data: 要封装的数据元组 93 | :param buffer_size: Dataset加载缓存大小 94 | :param batch_size: Dataset加载批大小 95 | :return: dataset 96 | """ 97 | dataset = tf.data.Dataset.from_tensor_slices(data). \ 98 | cache().shuffle(buffer_size).prefetch(tf.data.experimental.AUTOTUNE) 99 | dataset = dataset.map(_process_audio_sentence_pairs, num_parallel_calls=tf.data.experimental.AUTOTUNE) 100 | dataset = dataset.batch(batch_size, drop_remainder=True) 101 | 102 | return dataset 103 | 104 | 105 | def read_data(data_path: str, num_examples: int): 106 | """ 107 | :param data_path: 需要读取整理的数据文件路径 108 | :param num_examples: 读取的数据量大小 109 | :return: 返回读取的音频数据对和句子数据 110 | """ 111 | audio_data_pair = [] 112 | sentence_data = [] 113 | with open(data_path, 'r', encoding="utf-8") as data_file: 114 | lines = data_file.read().strip().split('\n') 115 | if num_examples != 0: 116 | lines = lines[:num_examples] 117 | 118 | for line in lines: 119 | line = line.strip().strip("\n").replace("/", " ").split("\t") 120 | sentence_data.append(line[-1]) 121 | line.pop(-1) 122 | audio_data_pair.append(line) 123 | 124 | return audio_data_pair, sentence_data 125 | 126 | 127 | def read_npy_file(filename): 128 | """ 129 | 专门用于匹配dataset的map读取文件的方法 130 | :param filename: 传入的文件名张量 131 | :return: 返回读取的数据 132 | """ 133 | data = np.load(filename.numpy().decode()) 134 | return data.astype(np.float32) 135 | 136 | 137 | def _process_audio_sentence_pairs(audio_data_pair: tf.Tensor, sentence: tf.Tensor): 138 | """ 139 | 用于处理音频句子对,将其转化为张量 140 | :param audio_data_pair: 音频相关数据对,mel、mag、stop_token保存文件 141 | :param sentence: 音频句子对 142 | :return: mel, mag, stop_token, sentence 143 | """ 144 | [mel, ] = tf.py_function(read_npy_file, [audio_data_pair[0]], [tf.float32, ]) 145 | [stop_token, ] = tf.py_function(read_npy_file, [audio_data_pair[2]], [tf.float32, ]) 146 | 147 | return mel, stop_token, sentence 148 | 149 | -------------------------------------------------------------------------------- /hlp/tts/utils/pre_treat.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import tensorflow as tf 4 | from hlp.tts.utils import text_preprocess 5 | from hlp.tts.utils.spec import get_spectrograms 6 | 7 | 8 | def preprocess_lj_speech_raw_data(metadata_path: str, audio_dir: str, dataset_infos_file: str, max_length: int, 9 | pre_emphasis: float, n_fft: int, n_mels: int, hop_length: int, 10 | win_length: int, max_db: int, ref_db: int, top_db: int, 11 | spectrum_data_dir: str, audio_suffix: str = ".wav", 12 | tokenized_type: str = "phoneme", cmu_dict_path: str = ""): 13 | """ 14 | 用于处理LJSpeech数据集的方法,将数据整理为<音频地址, 句子>的 15 | 形式,这样方便后续进行分批读取 16 | :param metadata_path: 元数据CSV文件路径 17 | :param audio_dir: 音频目录路径 18 | :param dataset_infos_file: 保存处理之后的数据路径 19 | :param max_length: 最大序列长度 20 | :param audio_suffix: 音频的类型后缀 21 | :param tokenized_type: 分词类型,默认按音素分词,模式:phoneme(音素)/word(单词)/char(字符) 22 | :param cmu_dict_path: cmu音素字典路径,使用phoneme时必传 23 | :param spectrum_data_dir: 保存mel和mag数据目录 24 | :param pre_emphasis: 预加重 25 | :param n_fft: FFT窗口大小 26 | :param n_mels: 产生的梅尔带数 27 | :param hop_length: 帧移 28 | :param win_length: 每一帧音频都由window()加窗,窗长win_length,然后用零填充以匹配N_FFT 29 | :param max_db: 峰值分贝值 30 | :param ref_db: 参考分贝值 31 | :param top_db: 峰值以下的阈值分贝值 32 | :return: 无返回值 33 | """ 34 | audios_list = os.listdir(audio_dir) 35 | if not os.path.exists(metadata_path): 36 | print("元数据CSV文件路径不存在,请检查重试") 37 | exit(0) 38 | 39 | if not os.path.exists(spectrum_data_dir): 40 | os.makedirs(spectrum_data_dir) 41 | 42 | count = 0 43 | with open(metadata_path, 'r', encoding='utf-8') as raw_file, \ 44 | open(dataset_infos_file, 'w', encoding='utf-8') as ds_infos_file: 45 | for line in raw_file: 46 | line = line.strip('\n').replace('/', '') 47 | pair = line.split('|') 48 | audio_file = pair[0] + audio_suffix 49 | mel_file = spectrum_data_dir + pair[0] + ".mel.npy" 50 | mag_file = spectrum_data_dir + pair[0] + ".mag.npy" 51 | stop_token_file = spectrum_data_dir + pair[0] + ".stop.npy" 52 | 53 | if audios_list.count(audio_file) < 1: 54 | continue 55 | 56 | text = dispatch_tokenized_func(text=pair[1], tokenized_type=tokenized_type, 57 | cmu_dict_path=cmu_dict_path) 58 | mel, mag = get_spectrograms(audio_path=audio_dir + audio_file, pre_emphasis=pre_emphasis, 59 | n_fft=n_fft, n_mels=n_mels, hop_length=hop_length, 60 | win_length=win_length, max_db=max_db, ref_db=ref_db, top_db=top_db) 61 | stop_token = np.zeros(shape=max_length) 62 | stop_token[len(mel) - 1:] = 1 63 | 64 | mel = tf.keras.preprocessing.sequence.pad_sequences(tf.expand_dims(mel, axis=0), 65 | maxlen=max_length, dtype="float32", padding="post") 66 | mel = tf.squeeze(mel, axis=0) 67 | mel = tf.transpose(mel, [1, 0]) 68 | 69 | np.save(file=mel_file, arr=mel) 70 | np.save(file=mag_file, arr=mag) 71 | np.save(file=stop_token_file, arr=stop_token) 72 | 73 | ds_infos_file.write(mel_file + "\t" + mag_file + "\t" + stop_token_file + "\t" + text + "\n") 74 | 75 | count += 1 76 | print('\r已处理音频句子对数:{}'.format(count), flush=True, end='') 77 | 78 | print("\n数据处理完毕,共计{}条语音数据".format(count)) 79 | 80 | 81 | def dispatch_tokenized_func(text: str, tokenized_type: str = "phoneme", cmu_dict_path: str = ""): 82 | """ 83 | 用来整合目前所有分词处理方法,通过字典匹配进行调用,默认使用phoneme分词 84 | :param text: 句子文本 85 | :param tokenized_type: 分词类型,默认按音素分词,模式:phoneme(音素)/word(单词)/char(字符) 86 | :param cmu_dict_path: cmu音素字典路径,使用phoneme时必传 87 | :return: 按照对应方法处理好的文本序列 88 | """ 89 | operation = { 90 | "phoneme": lambda: text_preprocess.text_to_phonemes(text=text, 91 | cmu_dict_path=cmu_dict_path), 92 | "word": lambda: text_preprocess.text_to_words(text=text), 93 | "char": lambda: text_preprocess.text_to_chars(text=text) 94 | } 95 | 96 | return operation.get(tokenized_type, "phoneme")() 97 | -------------------------------------------------------------------------------- /hlp/tts/utils/spec.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | import librosa 4 | import numpy as np 5 | import scipy 6 | import tensorflow as tf 7 | 8 | 9 | def get_spectrograms(audio_path: str, pre_emphasis: float, n_fft: int, n_mels: int, 10 | hop_length: int, win_length: int, max_db: int, ref_db: int, top_db: int): 11 | """ 12 | 处理音频文件,转换成梅尔频谱和线性谱 13 | :param audio_path: 音频路径 14 | :param pre_emphasis: 预加重 15 | :param n_fft: FFT窗口大小 16 | :param n_mels: 产生的梅尔带数 17 | :param hop_length: 帧移 18 | :param win_length: 每一帧音频都由window()加窗,窗长win_length,然后用零填充以匹配N_FFT 19 | :param max_db: 峰值分贝值 20 | :param ref_db: 参考分贝值 21 | :param top_db: 峰值以下的阈值分贝值 22 | :return: 返回归一化后的梅尔频谱和线性谱,形状分别为(T, n_mels)和(T, 1+n_fft//2) 23 | """ 24 | y, sr = librosa.load(audio_path, sr=None) 25 | y, _ = librosa.effects.trim(y, top_db=top_db) 26 | y = np.append(y[0], y[1:] - pre_emphasis * y[:-1]) 27 | # 短时傅里叶变换 28 | linear = librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length) 29 | 30 | # 幅度谱 31 | mag = np.abs(linear) # (1+n_fft//2, T) 32 | # mel频谱 33 | mel_basis = librosa.filters.mel(sr, n_fft, n_mels) # (n_mels, 1+n_fft//2) 34 | mel = np.dot(mel_basis, mag) # (n_mels, t) 35 | mel = 20 * np.log10(np.maximum(1e-5, mel)) 36 | mag = 20 * np.log10(np.maximum(1e-5, mag)) 37 | mel = np.clip((mel - ref_db + max_db) / max_db, 1e-8, 1) 38 | mag = np.clip((mag - ref_db + max_db) / max_db, 1e-8, 1) 39 | mel = mel.T.astype(np.float32) # (T, n_mels) 40 | mag = mag.T.astype(np.float32) # (T, 1+n_fft//2) 41 | return mel, mag 42 | 43 | 44 | def melspectrogram2wav(mel, max_db, ref_db, sr, n_fft, n_mels, preemphasis, n_iter, hop_length, win_length): 45 | """ 46 | 从线性幅度谱图生成wav文件 47 | :param mel: 梅尔谱 48 | :param sr: 采样率 49 | :param preemphasis: 预加重 50 | :param n_fft: FFT窗口大小 51 | :param n_mels: 产生的梅尔带数 52 | :param hop_length: 帧移 53 | :param win_length: 每一帧音频都由window()加窗,窗长win_length,然后用零填充以匹配N_FFT 54 | :param max_db: 峰值分贝值 55 | :param ref_db: 参考分贝值 56 | :param n_iter: 迭代指针 57 | """ 58 | mel = (np.clip(mel, 0, 1) * max_db) - max_db + ref_db 59 | # 转为幅度谱 60 | mel = np.power(10.0, mel * 0.05) 61 | m = _mel_to_linear_matrix(sr, n_fft, n_mels) 62 | mag = np.dot(m, mel) 63 | # 波形重构 64 | wav = griffin_lim(mag, n_iter, n_fft, hop_length, win_length) 65 | wav = scipy.signal.lfilter([1], [1, -preemphasis], wav) 66 | # 剪裁 67 | wav, _ = librosa.effects.trim(wav) 68 | return wav.astype(np.float32) 69 | 70 | 71 | def _mel_to_linear_matrix(sr, n_fft, n_mels): 72 | m = librosa.filters.mel(sr, n_fft, n_mels) 73 | m_t = np.transpose(m) 74 | p = np.matmul(m, m_t) 75 | d = [1.0 / x if np.abs(x) > 1.0e-8 else x for x in np.sum(p, axis=0)] 76 | return np.matmul(m_t, np.diag(d)) 77 | 78 | 79 | def griffin_lim(spectrogram, n_iter, n_fft, hop_length, win_length): 80 | """ 81 | 已知幅度谱,未知相位谱,通过迭代生成相位谱,并用已 82 | 知的幅度谱和计算得出的相位谱,重建语音波形的方法 83 | :param spectrogram: 幅度谱 84 | :param n_iter: 迭代指针 85 | :param n_fft: FFT窗口大小 86 | :param hop_length: 帧移 87 | :param win_length: 窗长win_length 88 | :return: 89 | """ 90 | x_best = copy.deepcopy(spectrogram) 91 | for i in range(n_iter): 92 | x_t = invert_spectrogram(x_best, hop_length, win_length) 93 | est = librosa.stft(x_t, n_fft, hop_length, win_length=win_length) 94 | phase = est / np.maximum(1e-8, np.abs(est)) 95 | x_best = spectrogram * phase 96 | x_t = invert_spectrogram(x_best, hop_length, win_length) 97 | y = np.real(x_t) 98 | return y 99 | 100 | 101 | def invert_spectrogram(spectrogram, hop_length, win_length): 102 | """ 103 | spectrogram: [f, t] 104 | :param spectrogram: 幅度谱 105 | :param hop_length: 帧移 106 | :param win_length: 窗长win_length 107 | """ 108 | return librosa.istft(spectrogram, hop_length, win_length=win_length, window="hann") 109 | 110 | 111 | def spec_distance(mel1, mel2): 112 | """ 113 | 计算mel谱之间的欧式距离 114 | :param mel1: 预测mel 115 | :param mel2: ground-true mel 116 | :return 两者之间的欧氏距离 117 | """ 118 | mel1 = tf.transpose(mel1, [0, 2, 1]) 119 | score = np.sqrt(np.sum((mel1 - mel2) ** 2)) 120 | return score -------------------------------------------------------------------------------- /hlp/tts/wavernn/generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from hlp.tts.wavernn.preprocess import read_data, label_2_float 5 | 6 | 7 | # 数据生成器 8 | def generator(wav_name_list, batch_size, sample_rate, peak_norm, voc_mode, bits, mu_law, wave_path, voc_pad, hop_length, 9 | voc_seq_len, preemphasis, n_fft, n_mels, win_length, max_db, ref_db, top_db): 10 | # generator只能进行一次生成,故需要while True来进行多个epoch的数据生成 11 | while True: 12 | # 每epoch将所有数据进行一次shuffle 13 | # order = np.random.choice(len(wav_name_list), len(wav_name_list), replace=False) 14 | # audio_data_path_list = [wav_name_list[i] for i in order] 15 | audio_data_path_list = wav_name_list 16 | batchs = len(wav_name_list) // batch_size 17 | for idx in range(batchs): 18 | # 逐步取音频名 19 | wav_name_list2 = audio_data_path_list[idx * batch_size: (idx + 1) * batch_size] 20 | 21 | # 取音频数据 22 | input_mel, input_sig = read_data( 23 | wave_path, sample_rate, peak_norm, voc_mode, bits, mu_law, wav_name_list2, preemphasis, n_fft, n_mels, 24 | hop_length, win_length, max_db, ref_db, top_db 25 | ) 26 | 27 | dataset = collate_vocoder(input_mel, input_sig, voc_seq_len, hop_length, voc_pad, voc_mode, bits) 28 | # input_mel = tf.convert_to_tensor(input_mel[0]) 29 | # input_sig = tf.convert_to_tensor(input_sig[0]) 30 | yield dataset 31 | 32 | 33 | def collate_vocoder(input_mel: tf.Tensor, input_sig: tf.Tensor, voc_seq_len, hop_length, voc_pad, voc_mode, bits): 34 | # print(tf.shape(input_mel[0])) 35 | mel_win = voc_seq_len // hop_length + 2 * voc_pad 36 | 37 | max_offsets = [x.shape[-1] - 2 - (mel_win + 2 * voc_pad) for x in input_mel] 38 | mel_offsets = [np.random.randint(0, offset) for offset in max_offsets] 39 | sig_offsets = [(offset + voc_pad) * hop_length for offset in mel_offsets] 40 | 41 | mels = [x[:, mel_offsets[i]:mel_offsets[i] + mel_win] for i, x in enumerate(input_mel)] 42 | # mels = [x[:, mel_offsets[i]:mel_offsets[i] + mel_win] for i, x in enumerate(input_mel)] 43 | 44 | labels = [x[sig_offsets[i]:sig_offsets[i] + voc_seq_len + 1] for i, x in enumerate(input_sig)] 45 | 46 | mels = np.stack(mels).astype(np.float32) 47 | labels = np.stack(labels).astype(np.int64) 48 | 49 | mels = tf.convert_to_tensor(mels) 50 | labels = tf.convert_to_tensor(labels) 51 | 52 | x = labels[:, :voc_seq_len] 53 | y = labels[:, 1:] 54 | bits = 16 if voc_mode == 'MOL' else bits 55 | 56 | x = label_2_float(tf.cast(x, dtype=float), bits) 57 | 58 | if voc_mode == 'MOL': 59 | y = label_2_float(tf.cast(y, dtype=float), bits) 60 | 61 | dataset = [x, y, mels] 62 | return dataset 63 | -------------------------------------------------------------------------------- /hlp/tts/wavernn/preprocess.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import librosa 5 | 6 | sys.path.append(os.path.abspath(__file__)[:os.path.abspath(__file__).rfind("\\hlp\\")]) 7 | from hlp.tts.utils.spec import get_spectrograms 8 | 9 | import numpy as np 10 | import tensorflow as tf 11 | 12 | 13 | # 处理语音文件 14 | def load_wav(path, sample_rate): 15 | y = librosa.load(path, sr=sample_rate)[0] 16 | return y 17 | 18 | 19 | def process_wav(path, sample_rate, peak_norm, voc_mode, bits, mu_law, preemphasis, n_fft, n_mels, hop_length, win_length 20 | , max_db, ref_db, top_db): 21 | y = load_wav(path, sample_rate) 22 | peak = np.abs(y).max() 23 | if peak_norm or peak > 1.0: 24 | y /= peak 25 | 26 | mel, _ = get_spectrograms(path, preemphasis, n_fft, n_mels, hop_length, win_length, max_db, ref_db, top_db) 27 | mel = tf.transpose(mel, (1, 0)).numpy() 28 | # mel = melspectrogram(y) 29 | if voc_mode == 'RAW': 30 | quant = encode_mu_law(y, mu=2 ** bits) if mu_law else float_2_label(y, bits=bits) 31 | elif voc_mode == 'MOL': 32 | quant = float_2_label(y, bits=16) 33 | 34 | return mel.astype(np.float32), quant.astype(np.int64) 35 | 36 | 37 | def read_data(path, sample_rate, peak_norm, voc_mode, bits, mu_law, wav_name_list2, preemphasis, n_fft, n_mels, 38 | hop_length, win_length, max_db, ref_db, top_db): 39 | mel_list = [] 40 | sig_list = [] 41 | for file in wav_name_list2: 42 | m, x = process_wav(path + file + '.wav', sample_rate, peak_norm, voc_mode, bits, mu_law, preemphasis, n_fft, 43 | n_mels, hop_length, win_length, max_db, ref_db, top_db) 44 | 45 | mel_list.append(m) 46 | sig_list.append(x) 47 | 48 | return mel_list, sig_list 49 | 50 | 51 | def encode_mu_law(x, mu): 52 | mu = mu - 1 53 | fx = np.sign(x) * np.log(1 + mu * np.abs(x)) / np.log(1 + mu) 54 | return np.floor((fx + 1) / 2 * mu + 0.5) 55 | 56 | 57 | def float_2_label(x, bits): 58 | assert abs(x).max() <= 1.0 59 | x = (x + 1.) * (2 ** bits - 1) / 2 60 | return x.clip(0, 2 ** bits - 1) 61 | 62 | 63 | # 提取语音文件名 64 | def process_wav_name(wav_path): 65 | datanames = os.listdir(wav_path) 66 | wav_name_list = [] 67 | for i in datanames: 68 | wav_name_list.append(i[:10]) 69 | return wav_name_list 70 | 71 | 72 | def label_2_float(x, bits): 73 | return 2 * x / (2 ** bits - 1.) - 1. 74 | -------------------------------------------------------------------------------- /hlp/tts/wavernn/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | 7 | def load_checkpoint(model: tf.keras.Model, checkpoint_dir: str, checkpoint_save_size: int): 8 | """ 9 | 恢复检查点 10 | """ 11 | # 如果检查点存在就恢复,如果不存在就重新创建一个 12 | checkpoint = tf.train.Checkpoint(wavernn=model) 13 | ckpt_manager = tf.train.CheckpointManager(checkpoint, checkpoint_dir, max_to_keep=checkpoint_save_size) 14 | 15 | if os.path.exists(checkpoint_dir): 16 | if ckpt_manager.latest_checkpoint: 17 | checkpoint.restore(ckpt_manager.latest_checkpoint).expect_partial() 18 | else: 19 | os.makedirs(checkpoint_dir, exist_ok=True) 20 | # if execute_type == "generate": 21 | # print("没有检查点,请先执行train模式") 22 | # exit(0) 23 | 24 | return ckpt_manager 25 | 26 | 27 | def log_sum_exp(x): 28 | """ numerically stable log_sum_exp implementation that prevents overflow """ 29 | # TF ordering 30 | dim = len(x.shape) - 1 31 | m = tf.reduce_max(x, axis=dim) 32 | m2, _ = tf.reduce_max(x, axis=dim, keepdims=True) 33 | return m + tf.math.log(tf.reduce_sum(tf.exp(x - m2), axis=dim)) 34 | 35 | 36 | # It is adapted from https://github.com/r9y9/wavenet_vocoder/blob/master/wavenet_vocoder/mixture.py 37 | def Discretized_Mix_Logistic_Loss(y_hat, y, num_classes=65536, 38 | log_scale_min=None, reduce=True): 39 | if log_scale_min is None: 40 | log_scale_min = float(np.log(1e-14)) 41 | y_hat = tf.transpose(y_hat, (0, 2, 1)) 42 | 43 | # assert y_hat.dim() == 3 44 | assert y_hat.shape[1] % 3 == 0 45 | nr_mix = y_hat.shape[1] // 3 46 | 47 | # (B x T x C) 48 | y_hat = tf.transpose(y_hat, (0, 2, 1)) 49 | 50 | # unpack parameters. (B, T, num_mixtures) x 3 51 | logit_probs = y_hat[:, :, :nr_mix] 52 | means = y_hat[:, :, nr_mix:2 * nr_mix] 53 | log_scales = tf.clip_by_value(y_hat[:, :, 2 * nr_mix:3 * nr_mix], clip_value_min=log_scale_min, 54 | clip_value_max=10000000) 55 | 56 | # B x T x 1 -> B x T x num_mixtures 57 | y = tf.tile(y, (1, 1, means.shape[-1])) 58 | centered_y = y - means 59 | inv_stdv = tf.exp(-log_scales) 60 | plus_in = inv_stdv * (centered_y + 1. / (num_classes - 1)) 61 | cdf_plus = tf.sigmoid(plus_in) 62 | min_in = inv_stdv * (centered_y - 1. / (num_classes - 1)) 63 | cdf_min = tf.sigmoid(min_in) 64 | 65 | # log probability for edge case of 0 (before scaling) 66 | # equivalent: torch.log(F.sigmoid(plus_in)) 67 | log_cdf_plus = plus_in - tf.nn.softplus(plus_in) 68 | 69 | # log probability for edge case of 255 (before scaling) 70 | # equivalent: (1 - F.sigmoid(min_in)).log() 71 | log_one_minus_cdf_min = -tf.nn.softplus(min_in) 72 | 73 | # probability for all other cases 74 | cdf_delta = cdf_plus - cdf_min 75 | 76 | mid_in = inv_stdv * centered_y 77 | # log probability in the center of the bin, to be used in extreme cases 78 | # (not actually used in our code) 79 | log_pdf_mid = mid_in - log_scales - 2. * tf.nn.softplus(mid_in) 80 | 81 | # tf equivalent 82 | """ 83 | log_probs = tf.where(x < -0.999, log_cdf_plus, 84 | tf.where(x > 0.999, log_one_minus_cdf_min, 85 | tf.where(cdf_delta > 1e-5, 86 | tf.log(tf.maximum(cdf_delta, 1e-12)), 87 | log_pdf_mid - np.log(127.5)))) 88 | """ 89 | # TODO: cdf_delta <= 1e-5 actually can happen. How can we choose the value 90 | # for num_classes=65536 case? 1e-7? not sure.. 91 | inner_inner_cond = tf.cast((cdf_delta > 1e-5), dtype=float) 92 | 93 | inner_inner_out = inner_inner_cond * \ 94 | tf.math.log(tf.clip_by_value(cdf_delta, clip_value_min=1e-12, clip_value_max=100000000)) + \ 95 | (1. - inner_inner_cond) * (log_pdf_mid - np.log((num_classes - 1) / 2)) 96 | inner_cond = tf.cast((y > 0.999), dtype=float) 97 | inner_out = inner_cond * log_one_minus_cdf_min + (1. - inner_cond) * inner_inner_out 98 | cond = tf.cast((y < -0.999), dtype=float) 99 | log_probs = cond * log_cdf_plus + (1. - cond) * inner_out 100 | 101 | log_probs = log_probs + tf.nn.log_softmax(logit_probs, -1) 102 | 103 | if reduce: 104 | return -tf.reduce_mean(log_sum_exp(log_probs)) 105 | else: 106 | return -tf.expand_dims(log_sum_exp(log_probs), axis=-1) 107 | -------------------------------------------------------------------------------- /hlp/tts/wavernn/wavernn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | class ResBlock(tf.keras.layers.Layer): 6 | def __init__(self, dims): 7 | super().__init__() 8 | self.conv1 = tf.keras.layers.Conv1D(filters=dims, kernel_size=1, use_bias=False) 9 | self.conv2 = tf.keras.layers.Conv1D(filters=dims, kernel_size=1, use_bias=False) 10 | self.batch_norm1 = tf.keras.layers.BatchNormalization() 11 | self.batch_norm2 = tf.keras.layers.BatchNormalization() 12 | 13 | def call(self, x): 14 | residual = x 15 | x = self.conv1(x) 16 | x = self.batch_norm1(x) 17 | x = tf.nn.relu(x) 18 | x = self.conv2(x) 19 | x = self.batch_norm2(x) 20 | return x + residual 21 | 22 | 23 | class MelResNet(tf.keras.Model): 24 | def __init__(self, res_blocks, in_dims, compute_dims, res_out_dims, pad): 25 | super().__init__() 26 | k_size = pad * 2 + 1 27 | self.conv_in = tf.keras.layers.Conv1D(compute_dims, kernel_size=k_size, use_bias=False) 28 | self.batch_norm = tf.keras.layers.BatchNormalization() 29 | 30 | self.layer = [] 31 | # self.layer = tf.keras.Sequential() 32 | for i in range(res_blocks): 33 | self.layer.append(ResBlock(compute_dims)) 34 | self.conv_out = tf.keras.layers.Conv1D(res_out_dims, kernel_size=1) 35 | 36 | def call(self, x): 37 | x = tf.transpose(x, [0, 2, 1]) 38 | x = self.conv_in(x) 39 | x = self.batch_norm(x) 40 | x = tf.nn.relu(x) 41 | for f in self.layer: 42 | x = f(x) 43 | 44 | x = self.conv_out(x) 45 | x = tf.transpose(x, (0, 2, 1)) 46 | return x 47 | 48 | 49 | class Stretch2d(tf.keras.layers.Layer): 50 | def __init__(self, x_scale, y_scale): 51 | super().__init__() 52 | self.x_scale = x_scale 53 | self.y_scale = y_scale 54 | 55 | def call(self, x): 56 | b, c, h, w = x.shape 57 | 58 | x = tf.expand_dims(x, axis=-1) 59 | x = tf.expand_dims(x, axis=3) 60 | 61 | x = tf.tile(x, [1, 1, 1, self.y_scale, 1, self.x_scale]) 62 | return tf.reshape(x, (b, c, h * self.y_scale, w * self.x_scale)) 63 | 64 | 65 | class UpsampleNetwork(tf.keras.layers.Layer): 66 | def __init__(self, feat_dims, upsample_scales, compute_dims, 67 | res_blocks, res_out_dims, pad): 68 | super().__init__() 69 | total_scale = np.cumproduct(upsample_scales)[-1] 70 | self.indent = pad * total_scale 71 | self.resnet = MelResNet(res_blocks, feat_dims, compute_dims, res_out_dims, pad) 72 | self.resnet_stretch = Stretch2d(total_scale, 1) 73 | self.up_layers1 = [] 74 | self.up_layers2 = [] 75 | for scale in upsample_scales: 76 | k_size = (1, scale * 2 + 1) 77 | padding = (0, scale) 78 | stretch = Stretch2d(scale, 1) 79 | conv = tf.keras.layers.Conv2D(filters=1, kernel_size=k_size, 80 | kernel_initializer=tf.constant_initializer(1. / k_size[1]), 81 | padding="same", use_bias=False) 82 | self.up_layers1.append(stretch) 83 | self.up_layers2.append(conv) 84 | 85 | def call(self, m): 86 | aux = self.resnet(m) 87 | aux = tf.expand_dims(aux, axis=1) 88 | aux = self.resnet_stretch(aux) 89 | aux = tf.squeeze(aux, axis=1) 90 | m = tf.expand_dims(m, axis=1) 91 | 92 | for f1, f2 in zip(self.up_layers1, self.up_layers2): 93 | m = f1(m) 94 | m = tf.transpose(m, (0, 3, 2, 1)) 95 | 96 | m = f2(m) 97 | m = tf.transpose(m, (0, 3, 2, 1)) 98 | 99 | m = tf.squeeze(m, axis=1)[:, :, self.indent:-self.indent] 100 | return tf.transpose(m, (0, 2, 1)), tf.transpose(aux, (0, 2, 1)) 101 | 102 | 103 | class WaveRNN(tf.keras.Model): 104 | def __init__(self, rnn_dims, fc_dims, bits, pad, upsample_factors, 105 | feat_dims, compute_dims, res_out_dims, res_blocks, 106 | hop_length, sample_rate, mode='RAW'): 107 | super().__init__() 108 | self.mode = mode 109 | self.pad = pad 110 | if self.mode == 'RAW': 111 | self.n_classes = 2 ** bits 112 | elif self.mode == 'MOL': 113 | self.n_classes = 30 114 | else: 115 | RuntimeError("Unknown model mode value - ", self.mode) 116 | 117 | # List of rnns to call `flatten_parameters()` on 118 | self._to_flatten = [] 119 | self.rnn_dims = rnn_dims 120 | self.aux_dims = res_out_dims // 4 121 | self.hop_length = hop_length 122 | self.sample_rate = sample_rate 123 | 124 | self.upsample = UpsampleNetwork(feat_dims, upsample_factors, compute_dims, res_blocks, res_out_dims, pad) 125 | 126 | self.I = tf.keras.layers.Dense(rnn_dims, activation=None) 127 | 128 | self.rnn1 = tf.keras.layers.GRU(rnn_dims) 129 | self.rnn2 = tf.keras.layers.GRU(rnn_dims) 130 | self._to_flatten += [self.rnn1, self.rnn2] 131 | 132 | self.fc1 = tf.keras.layers.Dense(fc_dims, activation=None) 133 | self.fc2 = tf.keras.layers.Dense(fc_dims, activation=None) 134 | self.fc3 = tf.keras.layers.Dense(self.n_classes, activation=None) 135 | 136 | def call(self, x, mels): 137 | bsize = x.shape[0] 138 | h1 = tf.zeros(shape=(bsize, self.rnn_dims)) 139 | 140 | h2 = tf.zeros(shape=(bsize, self.rnn_dims)) 141 | mels, aux = self.upsample(mels) 142 | 143 | aux_idx = [self.aux_dims * i for i in range(5)] 144 | a1 = aux[:, :, aux_idx[0]:aux_idx[1]] 145 | a2 = aux[:, :, aux_idx[1]:aux_idx[2]] 146 | a3 = aux[:, :, aux_idx[2]:aux_idx[3]] 147 | a4 = aux[:, :, aux_idx[3]:aux_idx[4]] 148 | 149 | x = tf.concat([tf.expand_dims(x, axis=-1), mels, a1], axis=2) 150 | x = self.I(x) 151 | res = x 152 | x, _ = self.rnn1(x, h1) 153 | 154 | x = x + res 155 | res = x 156 | x = tf.concat([x, a2], axis=2) 157 | x, _ = self.rnn2(x, h2) 158 | 159 | x = x + res 160 | x = tf.concat([x, a3], axis=2) 161 | x = tf.nn.relu(self.fc1(x)) 162 | 163 | x = tf.concat([x, a4], axis=2) 164 | x = tf.nn.relu(self.fc2(x)) 165 | return self.fc3(x) 166 | -------------------------------------------------------------------------------- /hlp/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DengBoCong/hlp/1cf596e870bc1ebd017fc39c4c2e5ba7d74c367e/hlp/utils/__init__.py -------------------------------------------------------------------------------- /hlp/utils/optimizers.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule): 5 | def __init__(self, d_model, warmup_steps=4000): 6 | super(CustomSchedule, self).__init__() 7 | self.d_model = d_model 8 | self.d_model = tf.cast(self.d_model, tf.float32) 9 | self.warmup_steps = warmup_steps 10 | 11 | def __call__(self, step): 12 | arg1 = tf.math.rsqrt(step) 13 | arg2 = step * (self.warmup_steps ** -1.5) 14 | return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2) 15 | 16 | 17 | def loss_func_mask(real, pred, weights=None): 18 | """ 屏蔽填充的SparseCategoricalCrossentropy损失 19 | 20 | 真实标签real中有0填充部分,这部分不记入预测损失 21 | 22 | :param weights: 样本权重 23 | :param real: 真实标签张量 24 | :param pred: logits张量 25 | :return: 损失平均值 26 | """ 27 | loss_object = tf.keras.losses.SparseCategoricalCrossentropy( 28 | from_logits=True, reduction='none') 29 | mask = tf.math.logical_not(tf.math.equal(real, 0)) # 填充位为0,掩蔽 30 | loss_ = loss_object(real, pred, sample_weight=weights) 31 | mask = tf.cast(mask, dtype=loss_.dtype) 32 | loss_ *= mask 33 | return tf.reduce_mean(loss_) 34 | -------------------------------------------------------------------------------- /hlp/utils/text_split.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import jieba 4 | 5 | 6 | def split_en_word(s): 7 | """ 对英文文本小写并按词进行切分 8 | 9 | :param s: 待切分的英文文本 10 | :return: token列表 11 | """ 12 | s = s.lower().strip() 13 | # 在单词与跟在其后的标点符号之间插入一个空格 14 | # 例如: "he is a boy." => "he is a boy ." 15 | s = re.sub(r"([?.!,])", r" \1 ", s) # 切分断句的标点符号 16 | 17 | # 除了 (a-z, A-Z, ".", "?", "!", ","),将所有字符替换为空格 18 | s = re.sub(r"[^a-zA-Z?.!,]+", " ", s) 19 | 20 | s = re.sub(r'[" "]+', " ", s) # 合并多个空格 21 | 22 | return s.strip().split() 23 | 24 | 25 | def split_en_char(s): 26 | """ 对英文文本小写并按字符进行切分 27 | 28 | :param s: 待切分的英文文本 29 | :return: token列表 30 | """ 31 | s = s.lower().strip() 32 | s = re.sub(r"([?.!,])", r" \1 ", s) # 切分断句的标点符号 33 | 34 | # 除了 (a-z, A-Z, ".", "?", "!", ",")外的所有字符替换为空格 35 | s = re.sub(r"[^a-zA-Z?.!,]+", " ", s) 36 | 37 | s = re.sub(r'[" "]+', " ", s) # 合并多个空格 38 | 39 | return [c for c in s.strip()] 40 | 41 | 42 | def split_zh_char(s): 43 | """ 对中文按字进行切分 44 | 45 | :param s: 待切分的中文 46 | :return: token列表 47 | """ 48 | s = s.lower().strip() 49 | 50 | s = [c for c in s] 51 | s = ' '.join(s) 52 | s = re.sub(r'[" "]+', " ", s) # 合并多个空格 53 | 54 | return s.strip() 55 | 56 | 57 | def split_zh_word(s): 58 | """ 对中文(含英文)按词进行切切分 59 | 60 | :param s: 待切分的中文 61 | :return: token列表 62 | """ 63 | return list(jieba.cut(s.lower().strip())) 64 | 65 | 66 | if __name__ == "__main__": 67 | en_txt1 = "I like NLP." 68 | print(split_en_word(en_txt1)) 69 | print(split_en_char(en_txt1)) 70 | 71 | en_txt2 = " I like NLP. " 72 | print(split_en_word(en_txt2)) 73 | print(split_en_char(en_txt2)) 74 | 75 | cn_txt1 = "我喜欢深度学习。" 76 | print(split_zh_char(cn_txt1)) 77 | print(split_zh_word(cn_txt1)) 78 | 79 | cn_txt2 = " 我喜欢深度学习。 " 80 | print(split_zh_char(cn_txt2)) 81 | print(split_zh_word(cn_txt2)) 82 | 83 | cn_txt3 = "我喜欢book." 84 | print(split_zh_word(cn_txt3)) 85 | -------------------------------------------------------------------------------- /hlp/utils/train_history.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import matplotlib.pyplot as plt 4 | import matplotlib.ticker as ticker 5 | 6 | 7 | # TODO: 指标名不在代码中指定 8 | def show_and_save_history(history, save_dir, valid_freq=1): 9 | """ 10 | 用于显示历史指标趋势以及保存历史指标图表图 11 | :param history: 历史指标字典 12 | :param save_dir: 历史指标显示图片保存位置 13 | :param valid_freq: 验证频率 14 | :return: 无返回值 15 | """ 16 | train_x_axis = [i + 1 for i in range(len(history['loss']))] 17 | valid_x_axis = [(i + 1) * valid_freq for i in range(len(history['val_loss']))] 18 | 19 | figure, axis = plt.subplots(1, 1) 20 | tick_spacing = 1 21 | if len(history['loss']) > 20: 22 | tick_spacing = len(history['loss']) // 20 23 | 24 | plt.plot(train_x_axis, history['loss'], label='loss', marker='.') 25 | plt.plot(train_x_axis, history['accuracy'], label='accuracy', marker='.') 26 | plt.plot(valid_x_axis, history['val_loss'], label='val_loss', marker='.', linestyle='--') 27 | plt.plot(valid_x_axis, history['val_accuracy'], label='val_accuracy', marker='.', linestyle='--') 28 | 29 | plt.xticks(valid_x_axis) 30 | plt.xlabel('epoch') 31 | plt.legend() 32 | axis.xaxis.set_major_locator(ticker.MultipleLocator(tick_spacing)) 33 | 34 | save_path = save_dir + time.strftime("%Y_%m_%d_%H_%M_%S_", time.localtime(time.time())) 35 | if not os.path.exists(save_dir): 36 | os.makedirs(save_dir, exist_ok=True) 37 | plt.savefig(save_path) 38 | plt.show() 39 | -------------------------------------------------------------------------------- /hlp/utils/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | 4 | 5 | def load_tokenizer(dict_path: str): 6 | """ 7 | 通过字典加载tokenizer 8 | :param dict_path: 字典路径 9 | :return tokenizer: 分词器 10 | """ 11 | if not os.path.exists(dict_path): 12 | print("字典不存在,请检查之后重试") 13 | exit(0) 14 | with open(dict_path, 'r', encoding='utf-8') as dict_file: 15 | json_string = dict_file.read().strip().strip("\n") 16 | tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(json_string) 17 | 18 | return tokenizer 19 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jieba==0.42.1 2 | librosa==0.8.0 3 | python-speech-features==0.6 4 | matplotlib==3.3.2 5 | nltk==3.5 6 | numpy==1.18.5 7 | playsound==1.2.2 8 | pyaudio==0.2.11 9 | scikit-learn==0.23.2 10 | scipy==1.5.3 11 | tensorflow==2.3.1 12 | tensorflow-datasets==3.2.1 --------------------------------------------------------------------------------