├── AR ├── __init__.py ├── __pycache__ │ └── __init__.cpython-39.pyc ├── data │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ ├── bucket_sampler.cpython-39.pyc │ │ ├── data_module.cpython-39.pyc │ │ └── dataset.cpython-39.pyc │ ├── bucket_sampler.py │ ├── data_module.py │ └── dataset.py ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ ├── t2s_lightning_module.cpython-39.pyc │ │ ├── t2s_model.cpython-39.pyc │ │ └── utils.cpython-39.pyc │ ├── t2s_lightning_module.py │ ├── t2s_model.py │ └── utils.py ├── modules │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ ├── activation.cpython-39.pyc │ │ ├── embedding.cpython-39.pyc │ │ ├── lr_schedulers.cpython-39.pyc │ │ ├── optim.cpython-39.pyc │ │ ├── patched_mha_with_cache.cpython-39.pyc │ │ ├── scaling.cpython-39.pyc │ │ └── transformer.cpython-39.pyc │ ├── activation.py │ ├── embedding.py │ ├── lr_schedulers.py │ ├── optim.py │ ├── patched_mha_with_cache.py │ ├── scaling.py │ └── transformer.py ├── text_processing │ ├── __init__.py │ ├── phonemizer.py │ └── symbols.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-39.pyc │ └── io.cpython-39.pyc │ ├── initialize.py │ └── io.py ├── README.md ├── __pycache__ ├── my_utils.cpython-39.pyc ├── process_ckpt.cpython-39.pyc └── utils.cpython-39.pyc ├── configs ├── s1longer.yaml └── s2.json ├── datasets ├── processed │ └── DailyTalk │ │ ├── 2-cnhubert-len.txt │ │ ├── 2-name2text.txt │ │ ├── 4-cnhubert │ │ ├── 0_0_d1.wav.pt │ │ ├── 0_0_d2.wav.pt │ │ ├── 0_0_d4.wav.pt │ │ ├── 0_1_d0.wav.pt │ │ ├── 0_1_d3.wav.pt │ │ ├── 10_0_d2.wav.pt │ │ ├── 10_0_d4.wav.pt │ │ ├── 10_1_d0.wav.pt │ │ ├── 11_0_d0.wav.pt │ │ ├── 11_1_d2.wav.pt │ │ ├── 11_1_d4.wav.pt │ │ ├── 12_0_d2.wav.pt │ │ ├── 1_0_d0.wav.pt │ │ ├── 1_0_d3.wav.pt │ │ ├── 1_1_d1.wav.pt │ │ ├── 1_1_d2.wav.pt │ │ ├── 1_1_d4.wav.pt │ │ ├── 2_0_d1.wav.pt │ │ ├── 2_0_d2.wav.pt │ │ ├── 2_0_d4.wav.pt │ │ ├── 2_1_d0.wav.pt │ │ ├── 2_1_d3.wav.pt │ │ ├── 3_0_d0.wav.pt │ │ ├── 3_0_d3.wav.pt │ │ ├── 3_1_d1.wav.pt │ │ ├── 3_1_d2.wav.pt │ │ ├── 3_1_d4.wav.pt │ │ ├── 4_0_d1.wav.pt │ │ ├── 4_0_d2.wav.pt │ │ ├── 4_0_d4.wav.pt │ │ ├── 4_1_d0.wav.pt │ │ ├── 4_1_d3.wav.pt │ │ ├── 5_0_d0.wav.pt │ │ ├── 5_1_d1.wav.pt │ │ ├── 5_1_d2.wav.pt │ │ ├── 5_1_d4.wav.pt │ │ ├── 6_0_d1.wav.pt │ │ ├── 6_0_d2.wav.pt │ │ ├── 6_0_d4.wav.pt │ │ ├── 6_1_d0.wav.pt │ │ ├── 7_0_d0.wav.pt │ │ ├── 7_1_d1.wav.pt │ │ ├── 7_1_d2.wav.pt │ │ ├── 7_1_d4.wav.pt │ │ ├── 8_0_d2.wav.pt │ │ ├── 8_0_d4.wav.pt │ │ ├── 8_1_d0.wav.pt │ │ ├── 9_0_d0.wav.pt │ │ ├── 9_1_d2.wav.pt │ │ └── 9_1_d4.wav.pt │ │ ├── 5-wav32k │ │ ├── 0_0_d1.wav │ │ ├── 0_0_d2.wav │ │ ├── 0_0_d4.wav │ │ ├── 0_1_d0.wav │ │ ├── 0_1_d3.wav │ │ ├── 10_0_d2.wav │ │ ├── 10_0_d4.wav │ │ ├── 10_1_d0.wav │ │ ├── 11_0_d0.wav │ │ ├── 11_1_d2.wav │ │ ├── 11_1_d4.wav │ │ ├── 12_0_d2.wav │ │ ├── 1_0_d0.wav │ │ ├── 1_0_d3.wav │ │ ├── 1_1_d1.wav │ │ ├── 1_1_d2.wav │ │ ├── 1_1_d4.wav │ │ ├── 2_0_d1.wav │ │ ├── 2_0_d2.wav │ │ ├── 2_0_d4.wav │ │ ├── 2_1_d0.wav │ │ ├── 2_1_d3.wav │ │ ├── 3_0_d0.wav │ │ ├── 3_0_d3.wav │ │ ├── 3_1_d1.wav │ │ ├── 3_1_d2.wav │ │ ├── 3_1_d4.wav │ │ ├── 4_0_d1.wav │ │ ├── 4_0_d2.wav │ │ ├── 4_0_d4.wav │ │ ├── 4_1_d0.wav │ │ ├── 4_1_d3.wav │ │ ├── 5_0_d0.wav │ │ ├── 5_1_d1.wav │ │ ├── 5_1_d2.wav │ │ ├── 5_1_d4.wav │ │ ├── 6_0_d1.wav │ │ ├── 6_0_d2.wav │ │ ├── 6_0_d4.wav │ │ ├── 6_1_d0.wav │ │ ├── 7_0_d0.wav │ │ ├── 7_1_d1.wav │ │ ├── 7_1_d2.wav │ │ ├── 7_1_d4.wav │ │ ├── 8_0_d2.wav │ │ ├── 8_0_d4.wav │ │ ├── 8_1_d0.wav │ │ ├── 9_0_d0.wav │ │ ├── 9_1_d2.wav │ │ └── 9_1_d4.wav │ │ ├── 6-name2semantic.tsv │ │ ├── slicer_opt.list │ │ ├── train-len-speaker │ │ ├── 0_0_d1.wav.npy │ │ ├── 0_0_d2.wav.npy │ │ ├── 0_0_d4.wav.npy │ │ ├── 0_1_d0.wav.npy │ │ ├── 0_1_d3.wav.npy │ │ ├── 10_0_d2.wav.npy │ │ ├── 10_0_d4.wav.npy │ │ ├── 10_1_d0.wav.npy │ │ ├── 11_0_d0.wav.npy │ │ ├── 11_1_d2.wav.npy │ │ ├── 11_1_d4.wav.npy │ │ ├── 12_0_d2.wav.npy │ │ ├── 1_0_d0.wav.npy │ │ ├── 1_0_d3.wav.npy │ │ ├── 1_1_d1.wav.npy │ │ ├── 1_1_d2.wav.npy │ │ ├── 1_1_d4.wav.npy │ │ ├── 2_0_d1.wav.npy │ │ ├── 2_0_d2.wav.npy │ │ ├── 2_0_d4.wav.npy │ │ ├── 2_1_d0.wav.npy │ │ ├── 2_1_d3.wav.npy │ │ ├── 3_0_d0.wav.npy │ │ ├── 3_0_d3.wav.npy │ │ ├── 3_1_d1.wav.npy │ │ ├── 3_1_d2.wav.npy │ │ ├── 3_1_d4.wav.npy │ │ ├── 4_0_d1.wav.npy │ │ ├── 4_0_d2.wav.npy │ │ ├── 4_0_d4.wav.npy │ │ ├── 4_1_d0.wav.npy │ │ ├── 4_1_d3.wav.npy │ │ ├── 5_0_d0.wav.npy │ │ ├── 5_1_d1.wav.npy │ │ ├── 5_1_d2.wav.npy │ │ ├── 5_1_d4.wav.npy │ │ ├── 6_0_d1.wav.npy │ │ ├── 6_0_d2.wav.npy │ │ ├── 6_0_d4.wav.npy │ │ ├── 6_1_d0.wav.npy │ │ ├── 7_0_d0.wav.npy │ │ ├── 7_1_d1.wav.npy │ │ ├── 7_1_d2.wav.npy │ │ ├── 7_1_d4.wav.npy │ │ ├── 8_0_d2.wav.npy │ │ ├── 8_0_d4.wav.npy │ │ ├── 8_1_d0.wav.npy │ │ ├── 9_0_d0.wav.npy │ │ ├── 9_1_d2.wav.npy │ │ └── 9_1_d4.wav.npy │ │ ├── train-semantic-phoneme │ │ ├── 0_0_d1.wav.npy │ │ ├── 0_0_d2.wav.npy │ │ ├── 0_0_d4.wav.npy │ │ ├── 0_1_d0.wav.npy │ │ ├── 0_1_d3.wav.npy │ │ ├── 10_0_d2.wav.npy │ │ ├── 10_0_d4.wav.npy │ │ ├── 10_1_d0.wav.npy │ │ ├── 11_0_d0.wav.npy │ │ ├── 11_1_d2.wav.npy │ │ ├── 11_1_d4.wav.npy │ │ ├── 12_0_d2.wav.npy │ │ ├── 1_0_d0.wav.npy │ │ ├── 1_0_d3.wav.npy │ │ ├── 1_1_d1.wav.npy │ │ ├── 1_1_d2.wav.npy │ │ ├── 1_1_d4.wav.npy │ │ ├── 2_0_d1.wav.npy │ │ ├── 2_0_d2.wav.npy │ │ ├── 2_0_d4.wav.npy │ │ ├── 2_1_d0.wav.npy │ │ ├── 2_1_d3.wav.npy │ │ ├── 3_0_d0.wav.npy │ │ ├── 3_0_d3.wav.npy │ │ ├── 3_1_d1.wav.npy │ │ ├── 3_1_d2.wav.npy │ │ ├── 3_1_d4.wav.npy │ │ ├── 4_0_d1.wav.npy │ │ ├── 4_0_d2.wav.npy │ │ ├── 4_0_d4.wav.npy │ │ ├── 4_1_d0.wav.npy │ │ ├── 4_1_d3.wav.npy │ │ ├── 5_0_d0.wav.npy │ │ ├── 5_1_d1.wav.npy │ │ ├── 5_1_d2.wav.npy │ │ ├── 5_1_d4.wav.npy │ │ ├── 6_0_d1.wav.npy │ │ ├── 6_0_d2.wav.npy │ │ ├── 6_0_d4.wav.npy │ │ ├── 6_1_d0.wav.npy │ │ ├── 7_0_d0.wav.npy │ │ ├── 7_1_d1.wav.npy │ │ ├── 7_1_d2.wav.npy │ │ ├── 7_1_d4.wav.npy │ │ ├── 8_0_d2.wav.npy │ │ ├── 8_0_d4.wav.npy │ │ ├── 8_1_d0.wav.npy │ │ ├── 9_0_d0.wav.npy │ │ ├── 9_1_d2.wav.npy │ │ └── 9_1_d4.wav.npy │ │ ├── train.list │ │ └── train.log └── raw │ └── DailyTalk │ ├── 0 │ ├── 0_1_d0.txt │ ├── 0_1_d0.wav │ ├── 0_1_d0.wav.pt │ ├── 10_1_d0.txt │ ├── 10_1_d0.wav │ ├── 10_1_d0.wav.pt │ ├── 11_0_d0.txt │ ├── 11_0_d0.wav │ ├── 11_0_d0.wav.pt │ ├── 1_0_d0.txt │ ├── 1_0_d0.wav │ ├── 1_0_d0.wav.pt │ ├── 2_1_d0.txt │ ├── 2_1_d0.wav │ ├── 2_1_d0.wav.pt │ ├── 3_0_d0.txt │ ├── 3_0_d0.wav │ ├── 3_0_d0.wav.pt │ ├── 4_1_d0.txt │ ├── 4_1_d0.wav │ ├── 4_1_d0.wav.pt │ ├── 5_0_d0.txt │ ├── 5_0_d0.wav │ ├── 5_0_d0.wav.pt │ ├── 6_1_d0.txt │ ├── 6_1_d0.wav │ ├── 6_1_d0.wav.pt │ ├── 7_0_d0.txt │ ├── 7_0_d0.wav │ ├── 7_0_d0.wav.pt │ ├── 8_1_d0.txt │ ├── 8_1_d0.wav │ ├── 8_1_d0.wav.pt │ ├── 9_0_d0.txt │ ├── 9_0_d0.wav │ └── 9_0_d0.wav.pt │ ├── 1 │ ├── 0_0_d1.txt │ ├── 0_0_d1.wav │ ├── 0_0_d1.wav.pt │ ├── 1_1_d1.txt │ ├── 1_1_d1.wav │ ├── 1_1_d1.wav.pt │ ├── 2_0_d1.txt │ ├── 2_0_d1.wav │ ├── 2_0_d1.wav.pt │ ├── 3_1_d1.txt │ ├── 3_1_d1.wav │ ├── 3_1_d1.wav.pt │ ├── 4_0_d1.txt │ ├── 4_0_d1.wav │ ├── 4_0_d1.wav.pt │ ├── 5_1_d1.txt │ ├── 5_1_d1.wav │ ├── 5_1_d1.wav.pt │ ├── 6_0_d1.txt │ ├── 6_0_d1.wav │ ├── 6_0_d1.wav.pt │ ├── 7_1_d1.txt │ ├── 7_1_d1.wav │ └── 7_1_d1.wav.pt │ ├── 2 │ ├── 0_0_d2.txt │ ├── 0_0_d2.wav │ ├── 0_0_d2.wav.pt │ ├── 10_0_d2.txt │ ├── 10_0_d2.wav │ ├── 10_0_d2.wav.pt │ ├── 11_1_d2.txt │ ├── 11_1_d2.wav │ ├── 11_1_d2.wav.pt │ ├── 12_0_d2.txt │ ├── 12_0_d2.wav │ ├── 12_0_d2.wav.pt │ ├── 1_1_d2.txt │ ├── 1_1_d2.wav │ ├── 1_1_d2.wav.pt │ ├── 2_0_d2.txt │ ├── 2_0_d2.wav │ ├── 2_0_d2.wav.pt │ ├── 3_1_d2.txt │ ├── 3_1_d2.wav │ ├── 3_1_d2.wav.pt │ ├── 4_0_d2.txt │ ├── 4_0_d2.wav │ ├── 4_0_d2.wav.pt │ ├── 5_1_d2.txt │ ├── 5_1_d2.wav │ ├── 5_1_d2.wav.pt │ ├── 6_0_d2.txt │ ├── 6_0_d2.wav │ ├── 6_0_d2.wav.pt │ ├── 7_1_d2.txt │ ├── 7_1_d2.wav │ ├── 7_1_d2.wav.pt │ ├── 8_0_d2.txt │ ├── 8_0_d2.wav │ ├── 8_0_d2.wav.pt │ ├── 9_1_d2.txt │ ├── 9_1_d2.wav │ └── 9_1_d2.wav.pt │ ├── 3 │ ├── 0_1_d3.txt │ ├── 0_1_d3.wav │ ├── 0_1_d3.wav.pt │ ├── 1_0_d3.txt │ ├── 1_0_d3.wav │ ├── 1_0_d3.wav.pt │ ├── 2_1_d3.txt │ ├── 2_1_d3.wav │ ├── 2_1_d3.wav.pt │ ├── 3_0_d3.txt │ ├── 3_0_d3.wav │ ├── 3_0_d3.wav.pt │ ├── 4_1_d3.txt │ ├── 4_1_d3.wav │ └── 4_1_d3.wav.pt │ └── 4 │ ├── 0_0_d4.txt │ ├── 0_0_d4.wav │ ├── 0_0_d4.wav.pt │ ├── 10_0_d4.txt │ ├── 10_0_d4.wav │ ├── 10_0_d4.wav.pt │ ├── 11_1_d4.txt │ ├── 11_1_d4.wav │ ├── 11_1_d4.wav.pt │ ├── 1_1_d4.txt │ ├── 1_1_d4.wav │ ├── 1_1_d4.wav.pt │ ├── 2_0_d4.txt │ ├── 2_0_d4.wav │ ├── 2_0_d4.wav.pt │ ├── 3_1_d4.txt │ ├── 3_1_d4.wav │ ├── 3_1_d4.wav.pt │ ├── 4_0_d4.txt │ ├── 4_0_d4.wav │ ├── 4_0_d4.wav.pt │ ├── 5_1_d4.txt │ ├── 5_1_d4.wav │ ├── 5_1_d4.wav.pt │ ├── 6_0_d4.txt │ ├── 6_0_d4.wav │ ├── 6_0_d4.wav.pt │ ├── 7_1_d4.txt │ ├── 7_1_d4.wav │ ├── 7_1_d4.wav.pt │ ├── 8_0_d4.txt │ ├── 8_0_d4.wav │ ├── 8_0_d4.wav.pt │ ├── 9_1_d4.txt │ ├── 9_1_d4.wav │ └── 9_1_d4.wav.pt ├── feature_extractor ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-39.pyc │ ├── cnhubert.cpython-39.pyc │ └── whisper_enc.cpython-39.pyc └── cnhubert.py ├── i18n ├── __pycache__ │ └── i18n.cpython-39.pyc ├── i18n.py ├── locale │ ├── en_US.json │ ├── es_ES.json │ ├── fr_FR.json │ ├── it_IT.json │ ├── ja_JP.json │ ├── ru_RU.json │ ├── tr_TR.json │ ├── zh_CN.json │ ├── zh_HK.json │ ├── zh_SG.json │ └── zh_TW.json ├── locale_diff.py └── scan_i18n.py ├── inference.py ├── module ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-39.pyc │ ├── attentions.cpython-39.pyc │ ├── commons.cpython-39.pyc │ ├── core_vq.cpython-39.pyc │ ├── data_utils.cpython-39.pyc │ ├── losses.cpython-39.pyc │ ├── mel_processing.cpython-39.pyc │ ├── models.cpython-39.pyc │ ├── modules.cpython-39.pyc │ ├── mrte_model.cpython-39.pyc │ ├── quantize.cpython-39.pyc │ └── transforms.cpython-39.pyc ├── attentions.py ├── commons.py ├── core_vq.py ├── data_utils.py ├── losses.py ├── mel_processing.py ├── models.py ├── modules.py ├── mrte_model.py ├── quantize.py └── transforms.py ├── prepare_datasets ├── step-five.py ├── step-four.py ├── step-one.py ├── step-two.py └── step_three.py ├── pretrained_models ├── chinese-hubert-base │ ├── config.json │ └── preprocessor_config.json └── chinese-roberta-wwm-ext-large │ ├── config.json │ └── tokenizer.json ├── process_ckpt.py ├── s1_train.py ├── s2_train.py ├── text ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-39.pyc │ ├── chinese.cpython-39.pyc │ ├── cleaner.cpython-39.pyc │ ├── english.cpython-39.pyc │ ├── japanese.cpython-39.pyc │ ├── symbols.cpython-39.pyc │ └── tone_sandhi.cpython-39.pyc ├── chinese.py ├── cleaner.py ├── cmudict.rep ├── cmudict_cache.pickle ├── english.py ├── opencpop-strict.txt ├── symbols.py └── tone_sandhi.py ├── tools ├── my_utils.py ├── slice_audio.py └── slicer2.py └── utils.py /AR/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/__init__.py -------------------------------------------------------------------------------- /AR/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /AR/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/data/__init__.py -------------------------------------------------------------------------------- /AR/data/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/data/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /AR/data/__pycache__/bucket_sampler.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/data/__pycache__/bucket_sampler.cpython-39.pyc -------------------------------------------------------------------------------- /AR/data/__pycache__/data_module.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/data/__pycache__/data_module.cpython-39.pyc -------------------------------------------------------------------------------- /AR/data/__pycache__/dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/data/__pycache__/dataset.cpython-39.pyc -------------------------------------------------------------------------------- /AR/data/data_module.py: -------------------------------------------------------------------------------- 1 | # modified from https://github.com/feng-yufei/shared_debugging_code/blob/main/data_module.py 2 | from pytorch_lightning import LightningDataModule 3 | from AR.data.bucket_sampler import DistributedBucketSampler 4 | from AR.data.dataset import Text2SemanticDataset 5 | from torch.utils.data import DataLoader 6 | 7 | 8 | class Text2SemanticDataModule(LightningDataModule): 9 | def __init__(self, config, train_semantic_path, train_phoneme_path,dev_semantic_path=None, dev_phoneme_path=None): 10 | super().__init__() 11 | self.config = config 12 | self.train_semantic_path = train_semantic_path 13 | self.train_phoneme_path = train_phoneme_path 14 | self.dev_semantic_path = dev_semantic_path 15 | self.dev_phoneme_path = dev_phoneme_path 16 | self.num_workers = self.config['data']['num_workers'] 17 | 18 | def prepare_data(self): 19 | pass 20 | 21 | def setup(self, stage=None, output_logs=False): 22 | self._train_dataset = Text2SemanticDataset( 23 | phoneme_path=self.train_phoneme_path, 24 | semantic_path=self.train_semantic_path, 25 | max_sec=self.config['data']['max_sec'], 26 | pad_val=self.config['data']['pad_val']) 27 | 28 | self._dev_dataset = self._train_dataset 29 | 30 | def train_dataloader(self): 31 | batch_size = self.config['train']['batch_size'] 32 | sampler = DistributedBucketSampler( 33 | self._train_dataset, batch_size=batch_size) 34 | return DataLoader( 35 | self._train_dataset, 36 | batch_size=batch_size, 37 | sampler=sampler, 38 | collate_fn=self._train_dataset.collate, 39 | num_workers=self.num_workers, 40 | persistent_workers=True, 41 | prefetch_factor=16 42 | ) 43 | 44 | def val_dataloader(self): 45 | return DataLoader( 46 | self._dev_dataset, 47 | batch_size=1, 48 | shuffle=False, 49 | collate_fn=self._train_dataset.collate, 50 | num_workers=max(self.num_workers,12), 51 | persistent_workers=True, 52 | prefetch_factor=16 53 | ) 54 | 55 | def test_dataloader(self): 56 | return DataLoader( 57 | self._dev_dataset, 58 | batch_size=1, 59 | shuffle=False, 60 | collate_fn=self._train_dataset.collate) 61 | -------------------------------------------------------------------------------- /AR/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/models/__init__.py -------------------------------------------------------------------------------- /AR/models/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/models/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /AR/models/__pycache__/t2s_lightning_module.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/models/__pycache__/t2s_lightning_module.cpython-39.pyc -------------------------------------------------------------------------------- /AR/models/__pycache__/t2s_model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/models/__pycache__/t2s_model.cpython-39.pyc -------------------------------------------------------------------------------- /AR/models/__pycache__/utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/models/__pycache__/utils.cpython-39.pyc -------------------------------------------------------------------------------- /AR/models/t2s_lightning_module.py: -------------------------------------------------------------------------------- 1 | # modified from https://github.com/feng-yufei/shared_debugging_code/blob/main/model/t2s_lightning_module.py 2 | import os,sys 3 | now_dir = os.getcwd() 4 | sys.path.append(now_dir) 5 | from typing import Dict 6 | 7 | import torch 8 | from pytorch_lightning import LightningModule 9 | from AR.models.t2s_model import Text2SemanticDecoder 10 | from AR.modules.lr_schedulers import WarmupCosineLRSchedule 11 | from AR.modules.optim import ScaledAdam 12 | 13 | 14 | class Text2SemanticLightningModule(LightningModule): 15 | def __init__(self, config, output_dir,is_train=True): 16 | super().__init__() 17 | self.config = config 18 | self.top_k = 3 19 | self.model = Text2SemanticDecoder(config=config, top_k=self.top_k) 20 | pretrained_s1 = config.get("pretrained_s1") 21 | 22 | if pretrained_s1 and is_train: 23 | print( 24 | self.load_state_dict( 25 | torch.load(pretrained_s1, map_location="cpu")["weight"] 26 | ) 27 | ) 28 | 29 | if is_train: 30 | self.automatic_optimization = False 31 | self.save_hyperparameters() 32 | self.eval_dir = output_dir / 'eval' 33 | self.eval_dir.mkdir(parents=True, exist_ok=True) 34 | 35 | def training_step(self, batch: Dict, batch_idx: int): 36 | 37 | opt = self.optimizers() 38 | scheduler = self.lr_schedulers() 39 | loss, acc = self.model.forward( 40 | batch['phoneme_ids'], batch['phoneme_ids_len'], 41 | batch['semantic_ids'], batch['semantic_ids_len'], 42 | batch['bert_feature'], batch['phone_len'], 43 | batch['bert_feature_len'], batch['speaker_list'], 44 | batch['semantic_len']) 45 | 46 | self.manual_backward(loss) 47 | if batch_idx > 0 and batch_idx % 4 == 0: 48 | opt.step() 49 | opt.zero_grad() 50 | scheduler.step() 51 | 52 | self.log( 53 | "total_loss", 54 | loss, 55 | on_step=True, 56 | on_epoch=True, 57 | prog_bar=True, 58 | sync_dist=True) 59 | 60 | self.log( 61 | "lr", 62 | scheduler.get_last_lr()[0], 63 | on_epoch=True, 64 | prog_bar=True, 65 | sync_dist=True) 66 | 67 | self.log( 68 | f"top_{self.top_k}_acc", 69 | acc, 70 | on_step=True, 71 | on_epoch=True, 72 | prog_bar=True, 73 | sync_dist=True) 74 | 75 | def validation_step(self, batch: Dict, batch_idx: int):return 76 | 77 | def configure_optimizers(self): 78 | model_parameters = self.model.parameters() 79 | parameters_names = [] 80 | parameters_names.append([ 81 | name_param_pair[0] 82 | for name_param_pair in self.model.named_parameters() 83 | ]) 84 | lm_opt = ScaledAdam( 85 | model_parameters, 86 | lr=0.01, 87 | betas=(0.9, 0.95), 88 | clipping_scale=2.0, 89 | parameters_names=parameters_names, 90 | show_dominant_parameters=False, 91 | clipping_update_period=1000, ) 92 | 93 | return { 94 | "optimizer": lm_opt, 95 | "lr_scheduler": { 96 | "scheduler": 97 | WarmupCosineLRSchedule( 98 | lm_opt, 99 | init_lr=self.config['optimizer']['lr_init'], 100 | peak_lr=self.config['optimizer']['lr'], 101 | end_lr=self.config['optimizer']['lr_end'], 102 | warmup_steps=self.config['optimizer']['warmup_steps'], 103 | total_steps=self.config['optimizer']['decay_steps']) 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /AR/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/modules/__init__.py -------------------------------------------------------------------------------- /AR/modules/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/modules/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /AR/modules/__pycache__/activation.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/modules/__pycache__/activation.cpython-39.pyc -------------------------------------------------------------------------------- /AR/modules/__pycache__/embedding.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/modules/__pycache__/embedding.cpython-39.pyc -------------------------------------------------------------------------------- /AR/modules/__pycache__/lr_schedulers.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/modules/__pycache__/lr_schedulers.cpython-39.pyc -------------------------------------------------------------------------------- /AR/modules/__pycache__/optim.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/modules/__pycache__/optim.cpython-39.pyc -------------------------------------------------------------------------------- /AR/modules/__pycache__/patched_mha_with_cache.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/modules/__pycache__/patched_mha_with_cache.cpython-39.pyc -------------------------------------------------------------------------------- /AR/modules/__pycache__/scaling.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/modules/__pycache__/scaling.cpython-39.pyc -------------------------------------------------------------------------------- /AR/modules/__pycache__/transformer.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/modules/__pycache__/transformer.cpython-39.pyc -------------------------------------------------------------------------------- /AR/modules/embedding.py: -------------------------------------------------------------------------------- 1 | # modified from https://github.com/lifeiteng/vall-e/blob/main/valle/modules/embedding.py 2 | import math 3 | 4 | import torch 5 | from torch import nn 6 | 7 | 8 | class TokenEmbedding(nn.Module): 9 | def __init__( 10 | self, 11 | embedding_dim: int, 12 | vocab_size: int, 13 | dropout: float=0.0, ): 14 | super().__init__() 15 | 16 | self.vocab_size = vocab_size 17 | self.embedding_dim = embedding_dim 18 | 19 | self.dropout = torch.nn.Dropout(p=dropout) 20 | self.word_embeddings = nn.Embedding(self.vocab_size, self.embedding_dim) 21 | 22 | @property 23 | def weight(self) -> torch.Tensor: 24 | return self.word_embeddings.weight 25 | 26 | def embedding(self, index: int) -> torch.Tensor: 27 | return self.word_embeddings.weight[index:index + 1] 28 | 29 | def forward(self, x: torch.Tensor): 30 | x = self.word_embeddings(x) 31 | x = self.dropout(x) 32 | return x 33 | 34 | 35 | class SinePositionalEmbedding(nn.Module): 36 | def __init__( 37 | self, 38 | embedding_dim: int, 39 | dropout: float=0.0, 40 | scale: bool=False, 41 | alpha: bool=False, ): 42 | super().__init__() 43 | self.embedding_dim = embedding_dim 44 | self.x_scale = math.sqrt(embedding_dim) if scale else 1.0 45 | self.alpha = nn.Parameter(torch.ones(1), requires_grad=alpha) 46 | self.dropout = torch.nn.Dropout(p=dropout) 47 | 48 | self.reverse = False 49 | self.pe = None 50 | self.extend_pe(torch.tensor(0.0).expand(1, 4000)) 51 | 52 | def extend_pe(self, x): 53 | """Reset the positional encodings.""" 54 | if self.pe is not None: 55 | if self.pe.size(1) >= x.size(1): 56 | if self.pe.dtype != x.dtype or self.pe.device != x.device: 57 | self.pe = self.pe.to(dtype=x.dtype, device=x.device) 58 | return 59 | pe = torch.zeros(x.size(1), self.embedding_dim) 60 | if self.reverse: 61 | position = torch.arange( 62 | x.size(1) - 1, -1, -1.0, dtype=torch.float32).unsqueeze(1) 63 | else: 64 | position = torch.arange( 65 | 0, x.size(1), dtype=torch.float32).unsqueeze(1) 66 | div_term = torch.exp( 67 | torch.arange(0, self.embedding_dim, 2, dtype=torch.float32) * 68 | -(math.log(10000.0) / self.embedding_dim)) 69 | pe[:, 0::2] = torch.sin(position * div_term) 70 | pe[:, 1::2] = torch.cos(position * div_term) 71 | pe = pe.unsqueeze(0) 72 | self.pe = pe.to(device=x.device, dtype=x.dtype).detach() 73 | 74 | def forward(self, x: torch.Tensor) -> torch.Tensor: 75 | self.extend_pe(x) 76 | output = x.unsqueeze(-1) if x.ndim == 2 else x 77 | output = output * self.x_scale + self.alpha * self.pe[:, :x.size(1)] 78 | return self.dropout(output) 79 | -------------------------------------------------------------------------------- /AR/modules/lr_schedulers.py: -------------------------------------------------------------------------------- 1 | # modified from https://github.com/feng-yufei/shared_debugging_code/blob/main/model/lr_schedulers.py 2 | import math 3 | 4 | import torch 5 | from matplotlib import pyplot as plt 6 | from torch import nn 7 | from torch.optim import Adam 8 | 9 | 10 | class WarmupCosineLRSchedule(torch.optim.lr_scheduler._LRScheduler): 11 | """ 12 | Implements Warmup learning rate schedule until 'warmup_steps', going from 'init_lr' to 'peak_lr' for multiple optimizers. 13 | """ 14 | 15 | def __init__(self, 16 | optimizer, 17 | init_lr, 18 | peak_lr, 19 | end_lr, 20 | warmup_steps=10000, 21 | total_steps=400000, 22 | current_step=0): 23 | self.init_lr = init_lr 24 | self.peak_lr = peak_lr 25 | self.end_lr = end_lr 26 | self.optimizer = optimizer 27 | self._warmup_rate = (peak_lr - init_lr) / warmup_steps 28 | self._decay_rate = (end_lr - peak_lr) / (total_steps - warmup_steps) 29 | self._current_step = current_step 30 | self.lr = init_lr 31 | self.warmup_steps = warmup_steps 32 | self.total_steps = total_steps 33 | self._last_lr = [self.lr] 34 | 35 | def set_lr(self, lr): 36 | self._last_lr = [g['lr'] for g in self.optimizer.param_groups] 37 | for g in self.optimizer.param_groups: 38 | # g['lr'] = lr 39 | g['lr'] = self.end_lr 40 | 41 | def step(self): 42 | if self._current_step < self.warmup_steps: 43 | lr = self.init_lr + self._warmup_rate * self._current_step 44 | 45 | elif self._current_step > self.total_steps: 46 | lr = self.end_lr 47 | 48 | else: 49 | decay_ratio = (self._current_step - self.warmup_steps) / ( 50 | self.total_steps - self.warmup_steps) 51 | if decay_ratio < 0.0 or decay_ratio > 1.0: 52 | raise RuntimeError( 53 | "Decay ratio must be in [0.0, 1.0]. Fix LR scheduler settings." 54 | ) 55 | coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio)) 56 | lr = self.end_lr + coeff * (self.peak_lr - self.end_lr) 57 | 58 | self.lr=lr=self.end_lr=0.002 59 | self.set_lr(lr) 60 | self.lr = lr 61 | self._current_step += 1 62 | return self.lr 63 | 64 | 65 | 66 | if __name__ == '__main__': 67 | m = nn.Linear(10, 10) 68 | opt = Adam(m.parameters(), lr=1e-4) 69 | s = WarmupCosineLRSchedule( 70 | opt, 71 | 1e-6, 72 | 2e-4, 73 | 1e-6, 74 | warmup_steps=2000, 75 | total_steps=20000, 76 | current_step=0) 77 | lrs = [] 78 | for i in range(25000): 79 | s.step() 80 | lrs.append(s.lr) 81 | print(s.lr) 82 | 83 | plt.plot(lrs) 84 | plt.plot(range(0, 25000), lrs) 85 | plt.show() 86 | -------------------------------------------------------------------------------- /AR/text_processing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/text_processing/__init__.py -------------------------------------------------------------------------------- /AR/text_processing/phonemizer.py: -------------------------------------------------------------------------------- 1 | # modified from https://github.com/feng-yufei/shared_debugging_code/blob/main/text_processing/phonemizer.py 2 | import itertools 3 | import re 4 | from typing import Dict 5 | from typing import List 6 | 7 | import regex 8 | from gruut import sentences 9 | from gruut.const import Sentence 10 | from gruut.const import Word 11 | from AR.text_processing.symbols import SYMBOL_TO_ID 12 | 13 | 14 | class GruutPhonemizer: 15 | def __init__(self, language: str): 16 | self._phonemizer = sentences 17 | self.lang = language 18 | self.symbol_to_id = SYMBOL_TO_ID 19 | self._special_cases_dict: Dict[str] = { 20 | r"\.\.\.": "... ", 21 | ";": "; ", 22 | ":": ": ", 23 | ",": ", ", 24 | r"\.": ". ", 25 | "!": "! ", 26 | r"\?": "? ", 27 | "—": "—", 28 | "…": "… ", 29 | "«": "«", 30 | "»": "»" 31 | } 32 | self._punctuation_regexp: str = rf"([{''.join(self._special_cases_dict.keys())}])" 33 | 34 | def _normalize_punctuation(self, text: str) -> str: 35 | text = regex.sub(fr"\pZ+{self._punctuation_regexp}", r"\1", text) 36 | text = regex.sub(fr"{self._punctuation_regexp}(\pL)", r"\1 \2", text) 37 | text = regex.sub(r"\pZ+", r" ", text) 38 | return text.strip() 39 | 40 | def _convert_punctuation(self, word: Word) -> str: 41 | if not word.phonemes: 42 | return '' 43 | if word.phonemes[0] in ['‖', '|']: 44 | return word.text.strip() 45 | 46 | phonemes = ''.join(word.phonemes) 47 | # remove modifier characters ˈˌː with regex 48 | phonemes = re.sub(r'[ˈˌː͡]', '', phonemes) 49 | return phonemes.strip() 50 | 51 | def phonemize(self, text: str, espeak: bool=False) -> str: 52 | text_to_phonemize: str = self._normalize_punctuation(text) 53 | sents: List[Sentence] = [ 54 | sent 55 | for sent in self._phonemizer( 56 | text_to_phonemize, lang="en-us", espeak=espeak) 57 | ] 58 | words: List[str] = [ 59 | self._convert_punctuation(word) for word in itertools.chain(*sents) 60 | ] 61 | return ' '.join(words) 62 | 63 | def transform(self, phonemes): 64 | # convert phonemes to ids 65 | # dictionary is in symbols.py 66 | return [ 67 | self.symbol_to_id[p] for p in phonemes 68 | if p in self.symbol_to_id.keys() 69 | ] 70 | 71 | 72 | if __name__ == "__main__": 73 | phonemizer = GruutPhonemizer("en-us") 74 | # text -> IPA 75 | phonemes = phonemizer.phonemize("Hello, wor-ld ?") 76 | print("phonemes:", phonemes) 77 | print("len(phonemes):", len(phonemes)) 78 | phoneme_ids = phonemizer.transform(phonemes) 79 | print("phoneme_ids:", phoneme_ids) 80 | print("len(phoneme_ids):", len(phoneme_ids)) 81 | -------------------------------------------------------------------------------- /AR/text_processing/symbols.py: -------------------------------------------------------------------------------- 1 | # modified from https://github.com/feng-yufei/shared_debugging_code/blob/main/text_processing/symbols.py 2 | PAD = '_' 3 | PUNCTUATION = ';:,.!?¡¿—…"«»“” ' 4 | LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' 5 | IPA_LETTERS = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ" 6 | SYMBOLS = [PAD] + list(PUNCTUATION) + list(LETTERS) + list(IPA_LETTERS) 7 | SPACE_ID = SYMBOLS.index(" ") 8 | SYMBOL_TO_ID = {s: i for i, s in enumerate(SYMBOLS)} 9 | ID_TO_SYMBOL = {i: s for i, s in enumerate(SYMBOLS)} 10 | -------------------------------------------------------------------------------- /AR/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | def str2bool(str): 5 | return True if str.lower() == 'true' else False 6 | 7 | 8 | def get_newest_ckpt(string_list): 9 | # 定义一个正则表达式模式,用于匹配字符串中的数字 10 | pattern = r'epoch=(\d+)-step=(\d+)\.ckpt' 11 | 12 | # 使用正则表达式提取每个字符串中的数字信息,并创建一个包含元组的列表 13 | extracted_info = [] 14 | for string in string_list: 15 | match = re.match(pattern, string) 16 | if match: 17 | epoch = int(match.group(1)) 18 | step = int(match.group(2)) 19 | extracted_info.append((epoch, step, string)) 20 | # 按照 epoch 后面的数字和 step 后面的数字进行排序 21 | sorted_info = sorted( 22 | extracted_info, key=lambda x: (x[0], x[1]), reverse=True) 23 | # 获取最新的 ckpt 文件名 24 | newest_ckpt = sorted_info[0][2] 25 | return newest_ckpt 26 | 27 | 28 | # 文本存在且不为空时 return True 29 | def check_txt_file(file_path): 30 | try: 31 | with open(file_path, 'r') as file: 32 | text = file.readline().strip() 33 | assert text.strip() != '' 34 | return text 35 | except Exception: 36 | return False 37 | return False 38 | -------------------------------------------------------------------------------- /AR/utils/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/utils/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /AR/utils/__pycache__/io.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/AR/utils/__pycache__/io.cpython-39.pyc -------------------------------------------------------------------------------- /AR/utils/initialize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Initialize modules for espnet2 neural networks.""" 3 | import torch 4 | from typeguard import check_argument_types 5 | 6 | 7 | def initialize(model: torch.nn.Module, init: str): 8 | """Initialize weights of a neural network module. 9 | 10 | Parameters are initialized using the given method or distribution. 11 | 12 | Custom initialization routines can be implemented into submodules 13 | as function `espnet_initialization_fn` within the custom module. 14 | 15 | Args: 16 | model: Target. 17 | init: Method of initialization. 18 | """ 19 | assert check_argument_types() 20 | print("init with", init) 21 | 22 | # weight init 23 | for p in model.parameters(): 24 | if p.dim() > 1: 25 | if init == "xavier_uniform": 26 | torch.nn.init.xavier_uniform_(p.data) 27 | elif init == "xavier_normal": 28 | torch.nn.init.xavier_normal_(p.data) 29 | elif init == "kaiming_uniform": 30 | torch.nn.init.kaiming_uniform_(p.data, nonlinearity="relu") 31 | elif init == "kaiming_normal": 32 | torch.nn.init.kaiming_normal_(p.data, nonlinearity="relu") 33 | else: 34 | raise ValueError("Unknown initialization: " + init) 35 | # bias init 36 | for name, p in model.named_parameters(): 37 | if ".bias" in name and p.dim() == 1: 38 | p.data.zero_() 39 | -------------------------------------------------------------------------------- /AR/utils/io.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import torch 4 | import yaml 5 | 6 | 7 | def load_yaml_config(path): 8 | with open(path) as f: 9 | config = yaml.full_load(f) 10 | return config 11 | 12 | 13 | def save_config_to_yaml(config, path): 14 | assert path.endswith('.yaml') 15 | with open(path, 'w') as f: 16 | f.write(yaml.dump(config)) 17 | f.close() 18 | 19 | 20 | def write_args(args, path): 21 | args_dict = dict((name, getattr(args, name)) for name in dir(args) 22 | if not name.startswith('_')) 23 | with open(path, 'a') as args_file: 24 | args_file.write('==> torch version: {}\n'.format(torch.__version__)) 25 | args_file.write( 26 | '==> cudnn version: {}\n'.format(torch.backends.cudnn.version())) 27 | args_file.write('==> Cmd:\n') 28 | args_file.write(str(sys.argv)) 29 | args_file.write('\n==> args:\n') 30 | for k, v in sorted(args_dict.items()): 31 | args_file.write(' %s: %s\n' % (str(k), str(v))) 32 | args_file.close() 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GPT-Talker 2 | 3 | ## Introduction 4 | This is an implementation of the following paper. 5 | [《Generative Expressive Conversational Speech Synthesis》](https://arxiv.org/pdf/2407.21491) 6 | (Accepted by MM'2024) 7 | 8 | [Rui Liu *](https://ttslr.github.io/), Yifan Hu, [Yi Ren](https://rayeren.github.io/), Xiang Yin, [Haizhou Li](https://colips.org/~eleliha/). 9 | 10 | ## Demo Page 11 | [Speech Demo](https://walker-hyf.github.io/GPT-Talker/) 12 | 13 | ## Dependencies 14 | * For details about the operating environment dependency. Please refer to [GPT-SoVITS'requirements.txt](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/requirements.txt) 15 | * Please ```conda install ffmpeg``` 16 | * Tested environment: Ubuntu=22.04.2, python=3.9.18, torch=2.0.1+cu118 17 | 18 | ## NCSSD 19 | The large-scale conversational speech synthesis dataset we constructed, including those collected over the Internet as well as those recorded by sound recorders, consists of approximately 236 hours and over 776 speakers. 20 | 21 | Please refer to [NCSSD'repo](https://github.com/walker-hyf/NCSSD) 22 | 23 | ## Prepare Datasets 24 | Execute the five steps in the [./prepare_datastes](./prepare_datasets/) directory to build the training data for GPT-Talker. 25 | 26 | ## Train 27 | * Conversational VITS 28 | 29 | ```python train_s2.py``` 30 | 31 | The corresponding configuration file is in ./configs/s2.json 32 | 33 | * Conversational GPT 34 | 35 | ```python train_s1.py``` 36 | 37 | The corresponding configuration file is in ./configs/s1longer.yaml 38 | 39 | ## Fine-tuning 40 | Fine-tunable base models in the [./pretrained_models](./pretrained_models/), from [GPT-SoVITS](https://drive.google.com/drive/folders/15rap3Z_-w0mYgxz66pDcx2abhDRb17dk?usp=sharing) (Single Speech). 41 | 42 | ## Citations 43 | 44 | ```bibtex 45 | @inproceedings{10.1145/3664647.3681697, 46 | author = {Liu, Rui and Hu, Yifan and Ren, Yi and Yin, Xiang and Li, Haizhou}, 47 | title = {Generative Expressive Conversational Speech Synthesis}, 48 | year = {2024}, 49 | isbn = {9798400706868}, 50 | publisher = {Association for Computing Machinery}, 51 | address = {New York, NY, USA}, 52 | url = {https://doi.org/10.1145/3664647.3681697}, 53 | doi = {10.1145/3664647.3681697}, 54 | abstract = {Conversational Speech Synthesis (CSS) aims to express a target utterance with the proper speaking style in a user-agent conversation setting. Existing CSS methods employ effective multi-modal context modeling techniques to achieve empathy understanding and expression. However, they often need to design complex network architectures and meticulously optimize the modules within them. In addition, due to the limitations of small-scale datasets containing scripted recording styles, they often fail to simulate real natural conversational styles. To address the above issues, we propose a novel generative expressive CSS system, termed GPT-Talker.We transform the multimodal information of the multi-turn dialogue history into discrete token sequences and seamlessly integrate them to form a comprehensive user-agent dialogue context. Leveraging the power of GPT, we predict the token sequence, that includes both semantic and style knowledge, of response for the agent. After that, the expressive conversational speech is synthesized by the conversation-enriched VITS to deliver feedback to the user.Furthermore, we propose a large-scale Natural CSS Dataset called NCSSD, that includes both naturally recorded conversational speech in improvised styles and dialogues extracted from TV shows. It encompasses both Chinese and English languages, with a total duration of 236 hours. We conducted comprehensive experiments on the reliability of the NCSSD and the effectiveness of our GPT-Talker. Both subjective and objective evaluations demonstrate that our model outperforms other state-of-the-art CSS systems significantly in terms of naturalness and expressiveness. The Code, Dataset, and Pre-trained Model are available at: https://github.com/AI-S2-Lab/GPT-Talker.}, 55 | booktitle = {Proceedings of the 32nd ACM International Conference on Multimedia}, 56 | pages = {4187–4196}, 57 | numpages = {10}, 58 | keywords = {conversational speech synthesis (css), expressiveness, gpt, user-agent conversation}, 59 | location = {Melbourne VIC, Australia}, 60 | series = {MM '24} 61 | } 62 | ``` 63 | -------------------------------------------------------------------------------- /__pycache__/my_utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/__pycache__/my_utils.cpython-39.pyc -------------------------------------------------------------------------------- /__pycache__/process_ckpt.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/__pycache__/process_ckpt.cpython-39.pyc -------------------------------------------------------------------------------- /__pycache__/utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/__pycache__/utils.cpython-39.pyc -------------------------------------------------------------------------------- /configs/s1longer.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | seed: 1234 3 | epochs: 1500 4 | batch_size: 6 5 | save_every_n_epoch: 2 6 | precision: 16-mixed 7 | gradient_clip: 1.0 8 | if_save_every_weights: true 9 | if_save_latest: false 10 | half_weights_save_dir: "./GPT_weights/" 11 | exp_name: "DailyTalk" 12 | optimizer: 13 | lr: 0.01 14 | lr_init: 0.00001 15 | lr_end: 0.0001 16 | warmup_steps: 2000 17 | decay_steps: 40000 18 | data: 19 | max_eval_sample: 8 20 | max_sec: 54 21 | num_workers: 2 22 | pad_val: 1024 23 | model: 24 | vocab_size: 1025 25 | phoneme_vocab_size: 512 26 | embedding_dim: 512 27 | hidden_dim: 512 28 | head: 16 29 | linear_units: 2048 30 | n_layer: 24 31 | dropout: 0 32 | EOS: 1024 33 | random_bert: 0 34 | inference: 35 | top_k: 5 36 | 37 | train_semantic_path: "I:\\GPT-Talker\\datasets\\processed\\DailyTalk\\6-name2semantic.tsv" 38 | train_phoneme_path: "I:\\GPT-Talker\\datasets\\processed\\DailyTalk\\2-name2text.txt" 39 | output_dir: "I:\\GPT-Talker\\log\\DailyTalk\\logs_s1" 40 | pretrained_s1: "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt" -------------------------------------------------------------------------------- /configs/s2.json: -------------------------------------------------------------------------------- 1 | { 2 | "train": { 3 | "log_interval": 100, 4 | "eval_interval": 500, 5 | "seed": 1234, 6 | "epochs": 100, 7 | "learning_rate": 0.0001, 8 | "betas": [ 9 | 0.8, 10 | 0.99 11 | ], 12 | "eps": 1e-09, 13 | "batch_size": 2, 14 | "fp16_run": true, 15 | "lr_decay": 0.999875, 16 | "segment_size": 20480, 17 | "init_lr_ratio": 1, 18 | "warmup_epochs": 0, 19 | "c_mel": 45, 20 | "c_kl": 1.0, 21 | "text_low_lr_rate": 0.4, 22 | "save_every_epoch": 5, 23 | "if_save_latest": false, 24 | "if_save_every_weights": true, 25 | "save_weight_dir": "ConVITS_weights/", 26 | "pretrained_s2G": "pretrained_models/s2G488k.pth", 27 | "pretrained_s2D": "pretrained_models/s2D488k.pth", 28 | "pretrained_s1": "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt", 29 | "exp_name": "DailyTalk" 30 | }, 31 | "data": { 32 | "max_wav_value": 32768.0, 33 | "sampling_rate": 32000, 34 | "filter_length": 2048, 35 | "hop_length": 640, 36 | "win_length": 2048, 37 | "n_mel_channels": 128, 38 | "mel_fmin": 0.0, 39 | "mel_fmax": null, 40 | "add_blank": true, 41 | "n_speakers": 300, 42 | "cleaned_text": true, 43 | "exp_dir": "I:\\GPT-Talker\\datasets\\processed\\DailyTalk" 44 | }, 45 | "model": { 46 | "inter_channels": 192, 47 | "hidden_channels": 192, 48 | "filter_channels": 768, 49 | "n_heads": 2, 50 | "n_layers": 6, 51 | "kernel_size": 3, 52 | "p_dropout": 0.1, 53 | "resblock": "1", 54 | "resblock_kernel_sizes": [ 55 | 3, 56 | 7, 57 | 11 58 | ], 59 | "resblock_dilation_sizes": [ 60 | [ 61 | 1, 62 | 3, 63 | 5 64 | ], 65 | [ 66 | 1, 67 | 3, 68 | 5 69 | ], 70 | [ 71 | 1, 72 | 3, 73 | 5 74 | ] 75 | ], 76 | "upsample_rates": [ 77 | 10, 78 | 8, 79 | 2, 80 | 2, 81 | 2 82 | ], 83 | "upsample_initial_channel": 512, 84 | "upsample_kernel_sizes": [ 85 | 16, 86 | 16, 87 | 8, 88 | 2, 89 | 2 90 | ], 91 | "n_layers_q": 3, 92 | "use_spectral_norm": false, 93 | "gin_channels": 512, 94 | "semantic_frame_rate": "25hz", 95 | "freeze_quantizer": true 96 | }, 97 | "s2_ckpt_dir": "I:\\GPT-Talker\\log\\DailyTalk\\logs-s2", 98 | "content_modul": "cnhubert" 99 | 100 | } -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/2-cnhubert-len.txt: -------------------------------------------------------------------------------- 1 | 0_1_d0.wav [77] 2 | 10_1_d0.wav [119] 3 | 11_0_d0.wav [147] 4 | 1_0_d0.wav [96] 5 | 2_1_d0.wav [92] 6 | 3_0_d0.wav [180] 7 | 4_1_d0.wav [97] 8 | 5_0_d0.wav [113] 9 | 6_1_d0.wav [100] 10 | 7_0_d0.wav [83] 11 | 8_1_d0.wav [42] 12 | 9_0_d0.wav [72] 13 | 0_0_d1.wav [98] 14 | 1_1_d1.wav [80] 15 | 2_0_d1.wav [72] 16 | 3_1_d1.wav [129] 17 | 4_0_d1.wav [119] 18 | 5_1_d1.wav [162] 19 | 6_0_d1.wav [122] 20 | 7_1_d1.wav [128] 21 | 0_0_d2.wav [136] 22 | 10_0_d2.wav [88] 23 | 11_1_d2.wav [485] 24 | 12_0_d2.wav [199] 25 | 1_1_d2.wav [147] 26 | 2_0_d2.wav [54] 27 | 3_1_d2.wav [70] 28 | 4_0_d2.wav [97] 29 | 5_1_d2.wav [127] 30 | 6_0_d2.wav [74] 31 | 7_1_d2.wav [58] 32 | 8_0_d2.wav [144] 33 | 9_1_d2.wav [279] 34 | 0_1_d3.wav [95] 35 | 1_0_d3.wav [75] 36 | 2_1_d3.wav [90] 37 | 3_0_d3.wav [163] 38 | 4_1_d3.wav [127] 39 | 0_0_d4.wav [176] 40 | 10_0_d4.wav [135] 41 | 11_1_d4.wav [191] 42 | 1_1_d4.wav [119] 43 | 2_0_d4.wav [99] 44 | 3_1_d4.wav [195] 45 | 4_0_d4.wav [116] 46 | 5_1_d4.wav [202] 47 | 6_0_d4.wav [197] 48 | 7_1_d4.wav [206] 49 | 8_0_d4.wav [189] 50 | 9_1_d4.wav [242] 51 | -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/0_0_d1.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/0_0_d1.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/0_0_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/0_0_d2.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/0_0_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/0_0_d4.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/0_1_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/0_1_d0.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/0_1_d3.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/0_1_d3.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/10_0_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/10_0_d2.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/10_0_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/10_0_d4.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/10_1_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/10_1_d0.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/11_0_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/11_0_d0.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/11_1_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/11_1_d2.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/11_1_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/11_1_d4.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/12_0_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/12_0_d2.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/1_0_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/1_0_d0.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/1_0_d3.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/1_0_d3.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/1_1_d1.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/1_1_d1.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/1_1_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/1_1_d2.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/1_1_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/1_1_d4.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/2_0_d1.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/2_0_d1.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/2_0_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/2_0_d2.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/2_0_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/2_0_d4.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/2_1_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/2_1_d0.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/2_1_d3.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/2_1_d3.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/3_0_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/3_0_d0.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/3_0_d3.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/3_0_d3.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/3_1_d1.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/3_1_d1.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/3_1_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/3_1_d2.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/3_1_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/3_1_d4.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/4_0_d1.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/4_0_d1.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/4_0_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/4_0_d2.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/4_0_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/4_0_d4.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/4_1_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/4_1_d0.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/4_1_d3.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/4_1_d3.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/5_0_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/5_0_d0.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/5_1_d1.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/5_1_d1.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/5_1_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/5_1_d2.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/5_1_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/5_1_d4.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/6_0_d1.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/6_0_d1.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/6_0_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/6_0_d2.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/6_0_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/6_0_d4.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/6_1_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/6_1_d0.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/7_0_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/7_0_d0.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/7_1_d1.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/7_1_d1.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/7_1_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/7_1_d2.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/7_1_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/7_1_d4.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/8_0_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/8_0_d2.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/8_0_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/8_0_d4.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/8_1_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/8_1_d0.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/9_0_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/9_0_d0.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/9_1_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/9_1_d2.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/4-cnhubert/9_1_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/4-cnhubert/9_1_d4.wav.pt -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/0_0_d1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/0_0_d1.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/0_0_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/0_0_d2.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/0_0_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/0_0_d4.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/0_1_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/0_1_d0.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/0_1_d3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/0_1_d3.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/10_0_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/10_0_d2.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/10_0_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/10_0_d4.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/10_1_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/10_1_d0.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/11_0_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/11_0_d0.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/11_1_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/11_1_d2.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/11_1_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/11_1_d4.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/12_0_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/12_0_d2.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/1_0_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/1_0_d0.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/1_0_d3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/1_0_d3.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/1_1_d1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/1_1_d1.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/1_1_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/1_1_d2.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/1_1_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/1_1_d4.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/2_0_d1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/2_0_d1.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/2_0_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/2_0_d2.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/2_0_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/2_0_d4.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/2_1_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/2_1_d0.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/2_1_d3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/2_1_d3.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/3_0_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/3_0_d0.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/3_0_d3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/3_0_d3.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/3_1_d1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/3_1_d1.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/3_1_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/3_1_d2.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/3_1_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/3_1_d4.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/4_0_d1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/4_0_d1.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/4_0_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/4_0_d2.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/4_0_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/4_0_d4.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/4_1_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/4_1_d0.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/4_1_d3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/4_1_d3.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/5_0_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/5_0_d0.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/5_1_d1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/5_1_d1.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/5_1_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/5_1_d2.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/5_1_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/5_1_d4.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/6_0_d1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/6_0_d1.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/6_0_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/6_0_d2.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/6_0_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/6_0_d4.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/6_1_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/6_1_d0.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/7_0_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/7_0_d0.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/7_1_d1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/7_1_d1.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/7_1_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/7_1_d2.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/7_1_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/7_1_d4.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/8_0_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/8_0_d2.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/8_0_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/8_0_d4.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/8_1_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/8_1_d0.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/9_0_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/9_0_d0.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/9_1_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/9_1_d2.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/5-wav32k/9_1_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/5-wav32k/9_1_d4.wav -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/slicer_opt.list: -------------------------------------------------------------------------------- 1 | I:\GPT-Talker\datasets\raw\DailyTalk\0\0_1_d0.wav|DailyTalk_EN_1|EN|What are you working on? 2 | I:\GPT-Talker\datasets\raw\DailyTalk\0\10_1_d0.wav|DailyTalk_EN_1|EN|Your budget is a good idea. 3 | I:\GPT-Talker\datasets\raw\DailyTalk\0\11_0_d0.wav|DailyTalk_EN_0|EN|I know. It's going to save me a lot of money, I hope. 4 | I:\GPT-Talker\datasets\raw\DailyTalk\0\1_0_d0.wav|DailyTalk_EN_0|EN|I'm figuring out my budget. 5 | I:\GPT-Talker\datasets\raw\DailyTalk\0\2_1_d0.wav|DailyTalk_EN_1|EN|Umm…. What budget? 6 | I:\GPT-Talker\datasets\raw\DailyTalk\0\3_0_d0.wav|DailyTalk_EN_0|EN|I'm making a shopping budget, so that I don't spend too much money. 7 | I:\GPT-Talker\datasets\raw\DailyTalk\0\4_1_d0.wav|DailyTalk_EN_1|EN|How much money can you spend? 8 | I:\GPT-Talker\datasets\raw\DailyTalk\0\5_0_d0.wav|DailyTalk_EN_0|EN|I can only spend three hundred dollars a month. 9 | I:\GPT-Talker\datasets\raw\DailyTalk\0\6_1_d0.wav|DailyTalk_EN_1|EN|Why only three hundred dollars? 10 | I:\GPT-Talker\datasets\raw\DailyTalk\0\7_0_d0.wav|DailyTalk_EN_0|EN|I need to save the rest. 11 | I:\GPT-Talker\datasets\raw\DailyTalk\0\8_1_d0.wav|DailyTalk_EN_1|EN|For what? 12 | I:\GPT-Talker\datasets\raw\DailyTalk\0\9_0_d0.wav|DailyTalk_EN_0|EN|I need to pay my bills. 13 | I:\GPT-Talker\datasets\raw\DailyTalk\1\0_0_d1.wav|DailyTalk_EN_0|EN|Well, how does it look? 14 | I:\GPT-Talker\datasets\raw\DailyTalk\1\1_1_d1.wav|DailyTalk_EN_1|EN|It's a perfect fit. 15 | I:\GPT-Talker\datasets\raw\DailyTalk\1\2_0_d1.wav|DailyTalk_EN_0|EN|Let me pay for it now. 16 | I:\GPT-Talker\datasets\raw\DailyTalk\1\3_1_d1.wav|DailyTalk_EN_1|EN|Cash, credit card, or debit card? 17 | I:\GPT-Talker\datasets\raw\DailyTalk\1\4_0_d1.wav|DailyTalk_EN_0|EN|Umm I think I'll use my credit card. 18 | I:\GPT-Talker\datasets\raw\DailyTalk\1\5_1_d1.wav|DailyTalk_EN_1|EN|Your signature here, please, and we're finished. 19 | I:\GPT-Talker\datasets\raw\DailyTalk\1\6_0_d1.wav|DailyTalk_EN_0|EN|Here's my John Hancock. 20 | I:\GPT-Talker\datasets\raw\DailyTalk\1\7_1_d1.wav|DailyTalk_EN_1|EN|Bye-bye. Visit us again soon! 21 | I:\GPT-Talker\datasets\raw\DailyTalk\2\0_0_d2.wav|DailyTalk_EN_0|EN|Hello. Overseas operator. May I help you? 22 | I:\GPT-Talker\datasets\raw\DailyTalk\2\10_0_d2.wav|DailyTalk_EN_0|EN|And the number in Japan, please? 23 | I:\GPT-Talker\datasets\raw\DailyTalk\2\11_1_d2.wav|DailyTalk_EN_1|EN|Country code is eighty one, area code on ethirty eight, and the number is eight four six eight nine seven two. 24 | I:\GPT-Talker\datasets\raw\DailyTalk\2\12_0_d2.wav|DailyTalk_EN_0|EN|Hold the line, please. I'll put your call through. 25 | I:\GPT-Talker\datasets\raw\DailyTalk\2\1_1_d2.wav|DailyTalk_EN_1|EN|Yes. I'd like to make a collect call to Japan. 26 | I:\GPT-Talker\datasets\raw\DailyTalk\2\2_0_d2.wav|DailyTalk_EN_0|EN|Your name, please? 27 | I:\GPT-Talker\datasets\raw\DailyTalk\2\3_1_d2.wav|DailyTalk_EN_1|EN|It's Helen Kent. 28 | I:\GPT-Talker\datasets\raw\DailyTalk\2\4_0_d2.wav|DailyTalk_EN_0|EN|Who would you like to talk to? 29 | I:\GPT-Talker\datasets\raw\DailyTalk\2\5_1_d2.wav|DailyTalk_EN_1|EN|I'd like to talk to Mr. Edna Kent. 30 | I:\GPT-Talker\datasets\raw\DailyTalk\2\6_0_d2.wav|DailyTalk_EN_0|EN|Is that Mr. Edna Kent? 31 | I:\GPT-Talker\datasets\raw\DailyTalk\2\7_1_d2.wav|DailyTalk_EN_1|EN|That's right. 32 | I:\GPT-Talker\datasets\raw\DailyTalk\2\8_0_d2.wav|DailyTalk_EN_0|EN|What number are you calling from, please? 33 | I:\GPT-Talker\datasets\raw\DailyTalk\2\9_1_d2.wav|DailyTalk_EN_1|EN|From code five one three four three two six seven four eight. 34 | I:\GPT-Talker\datasets\raw\DailyTalk\3\0_1_d3.wav|DailyTalk_EN_1|EN|Did you bring some lunch with you? 35 | I:\GPT-Talker\datasets\raw\DailyTalk\3\1_0_d3.wav|DailyTalk_EN_0|EN|Yes, I packed it myself. 36 | I:\GPT-Talker\datasets\raw\DailyTalk\3\2_1_d3.wav|DailyTalk_EN_1|EN|Wow, that looks beautiful. 37 | I:\GPT-Talker\datasets\raw\DailyTalk\3\3_0_d3.wav|DailyTalk_EN_0|EN|It's my health-conscious lunch, good for my health and beauty. 38 | I:\GPT-Talker\datasets\raw\DailyTalk\3\4_1_d3.wav|DailyTalk_EN_1|EN|Umm, I might try it myself. 39 | I:\GPT-Talker\datasets\raw\DailyTalk\4\0_0_d4.wav|DailyTalk_EN_0|EN|Good morning, Miss Wang. How beautiful you look today! 40 | I:\GPT-Talker\datasets\raw\DailyTalk\4\10_0_d4.wav|DailyTalk_EN_0|EN|How did you grow such long nails? 41 | I:\GPT-Talker\datasets\raw\DailyTalk\4\11_1_d4.wav|DailyTalk_EN_1|EN|You have to pay attention to trimming them from time to time. 42 | I:\GPT-Talker\datasets\raw\DailyTalk\4\1_1_d4.wav|DailyTalk_EN_1|EN|Thank you. I'm wearing make-up. 43 | I:\GPT-Talker\datasets\raw\DailyTalk\4\2_0_d4.wav|DailyTalk_EN_0|EN|Who taught you to put on make-up? 44 | I:\GPT-Talker\datasets\raw\DailyTalk\4\3_1_d4.wav|DailyTalk_EN_1|EN|It's me. I have studied make-up at a beauty shop. 45 | I:\GPT-Talker\datasets\raw\DailyTalk\4\4_0_d4.wav|DailyTalk_EN_0|EN|Can you teach me how to do make-up? 46 | I:\GPT-Talker\datasets\raw\DailyTalk\4\5_1_d4.wav|DailyTalk_EN_1|EN|Of course. First, use eye shadow to heighten your eyes. 47 | I:\GPT-Talker\datasets\raw\DailyTalk\4\6_0_d4.wav|DailyTalk_EN_0|EN|What eye shadow do you think is the most fit for me? 48 | I:\GPT-Talker\datasets\raw\DailyTalk\4\7_1_d4.wav|DailyTalk_EN_1|EN|I think pink eye shadow is popular among Chinese people. 49 | I:\GPT-Talker\datasets\raw\DailyTalk\4\8_0_d4.wav|DailyTalk_EN_0|EN|How do you protect yourself from chapped lips? 50 | I:\GPT-Talker\datasets\raw\DailyTalk\4\9_1_d4.wav|DailyTalk_EN_1|EN|I suggest you use lipstick, which also accentuates your lips. 51 | -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/0_0_d1.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/0_0_d1.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/0_0_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/0_0_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/0_0_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/0_0_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/0_1_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/0_1_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/0_1_d3.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/0_1_d3.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/10_0_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/10_0_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/10_0_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/10_0_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/10_1_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/10_1_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/11_0_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/11_0_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/11_1_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/11_1_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/11_1_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/11_1_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/12_0_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/12_0_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/1_0_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/1_0_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/1_0_d3.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/1_0_d3.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/1_1_d1.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/1_1_d1.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/1_1_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/1_1_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/1_1_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/1_1_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/2_0_d1.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/2_0_d1.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/2_0_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/2_0_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/2_0_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/2_0_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/2_1_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/2_1_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/2_1_d3.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/2_1_d3.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/3_0_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/3_0_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/3_0_d3.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/3_0_d3.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/3_1_d1.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/3_1_d1.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/3_1_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/3_1_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/3_1_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/3_1_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/4_0_d1.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/4_0_d1.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/4_0_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/4_0_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/4_0_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/4_0_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/4_1_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/4_1_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/4_1_d3.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/4_1_d3.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/5_0_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/5_0_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/5_1_d1.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/5_1_d1.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/5_1_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/5_1_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/5_1_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/5_1_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/6_0_d1.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/6_0_d1.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/6_0_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/6_0_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/6_0_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/6_0_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/6_1_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/6_1_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/7_0_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/7_0_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/7_1_d1.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/7_1_d1.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/7_1_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/7_1_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/7_1_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/7_1_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/8_0_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/8_0_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/8_0_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/8_0_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/8_1_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/8_1_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/9_0_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/9_0_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/9_1_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/9_1_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-len-speaker/9_1_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-len-speaker/9_1_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/0_0_d1.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/0_0_d1.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/0_0_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/0_0_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/0_0_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/0_0_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/0_1_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/0_1_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/0_1_d3.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/0_1_d3.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/10_0_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/10_0_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/10_0_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/10_0_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/10_1_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/10_1_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/11_0_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/11_0_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/11_1_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/11_1_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/11_1_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/11_1_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/12_0_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/12_0_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/1_0_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/1_0_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/1_0_d3.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/1_0_d3.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/1_1_d1.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/1_1_d1.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/1_1_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/1_1_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/1_1_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/1_1_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/2_0_d1.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/2_0_d1.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/2_0_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/2_0_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/2_0_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/2_0_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/2_1_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/2_1_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/2_1_d3.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/2_1_d3.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/3_0_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/3_0_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/3_0_d3.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/3_0_d3.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/3_1_d1.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/3_1_d1.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/3_1_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/3_1_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/3_1_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/3_1_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/4_0_d1.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/4_0_d1.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/4_0_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/4_0_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/4_0_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/4_0_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/4_1_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/4_1_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/4_1_d3.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/4_1_d3.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/5_0_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/5_0_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/5_1_d1.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/5_1_d1.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/5_1_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/5_1_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/5_1_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/5_1_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/6_0_d1.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/6_0_d1.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/6_0_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/6_0_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/6_0_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/6_0_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/6_1_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/6_1_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/7_0_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/7_0_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/7_1_d1.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/7_1_d1.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/7_1_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/7_1_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/7_1_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/7_1_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/8_0_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/8_0_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/8_0_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/8_0_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/8_1_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/8_1_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/9_0_d0.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/9_0_d0.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/9_1_d2.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/9_1_d2.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train-semantic-phoneme/9_1_d4.wav.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/processed/DailyTalk/train-semantic-phoneme/9_1_d4.wav.npy -------------------------------------------------------------------------------- /datasets/processed/DailyTalk/train.list: -------------------------------------------------------------------------------- 1 | ['0_1_d0.wav', 38, 15] 2 | ['10_1_d0.wav', 116, 41] 3 | ['11_0_d0.wav', 168, 68] 4 | ['1_0_d0.wav', 86, 34] 5 | ['2_1_d0.wav', 132, 46] 6 | ['3_0_d0.wav', 184, 75] 7 | ['4_1_d0.wav', 184, 76] 8 | ['5_0_d0.wav', 194, 98] 9 | ['6_1_d0.wav', 154, 76] 10 | ['7_0_d0.wav', 147, 72] 11 | ['8_1_d0.wav', 112, 45] 12 | ['9_0_d0.wav', 98, 38] 13 | ['0_0_d1.wav', 49, 15] 14 | ['1_1_d1.wav', 89, 29] 15 | ['2_0_d1.wav', 125, 44] 16 | ['3_1_d1.wav', 140, 56] 17 | ['4_0_d1.wav', 159, 67] 18 | ['5_1_d1.wav', 204, 84] 19 | ['6_0_d1.wav', 201, 73] 20 | ['7_1_d1.wav', 206, 69] 21 | ['0_0_d2.wav', 68, 28] 22 | ['10_0_d2.wav', 255, 89] 23 | ['11_1_d2.wav', 425, 131] 24 | ['12_0_d2.wav', 385, 122] 25 | ['1_1_d2.wav', 141, 60] 26 | ['2_0_d2.wav', 168, 72] 27 | ['3_1_d2.wav', 135, 57] 28 | ['4_0_d2.wav', 110, 43] 29 | ['5_1_d2.wav', 146, 58] 30 | ['6_0_d2.wav', 148, 65] 31 | ['7_1_d2.wav', 129, 55] 32 | ['8_0_d2.wav', 138, 55] 33 | ['9_1_d2.wav', 240, 74] 34 | ['0_1_d3.wav', 47, 22] 35 | ['1_0_d3.wav', 84, 40] 36 | ['2_1_d3.wav', 129, 59] 37 | ['3_0_d3.wav', 163, 79] 38 | ['4_1_d3.wav', 189, 80] 39 | ['0_0_d4.wav', 88, 37] 40 | ['10_0_d4.wav', 282, 99] 41 | ['11_1_d4.wav', 283, 107] 42 | ['1_1_d4.wav', 147, 58] 43 | ['2_0_d4.wav', 196, 79] 44 | ['3_1_d4.wav', 205, 76] 45 | ['4_0_d4.wav', 204, 78] 46 | ['5_1_d4.wav', 256, 90] 47 | ['6_0_d4.wav', 257, 89] 48 | ['7_1_d4.wav', 302, 104] 49 | ['8_0_d4.wav', 295, 103] 50 | ['9_1_d4.wav', 318, 116] 51 | -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/0_1_d0.txt: -------------------------------------------------------------------------------- 1 | What are you working on? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/0_1_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/0_1_d0.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/0_1_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/0_1_d0.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/10_1_d0.txt: -------------------------------------------------------------------------------- 1 | Your budget is a good idea. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/10_1_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/10_1_d0.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/10_1_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/10_1_d0.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/11_0_d0.txt: -------------------------------------------------------------------------------- 1 | I know. It's going to save me a lot of money, I hope. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/11_0_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/11_0_d0.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/11_0_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/11_0_d0.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/1_0_d0.txt: -------------------------------------------------------------------------------- 1 | I'm figuring out my budget. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/1_0_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/1_0_d0.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/1_0_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/1_0_d0.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/2_1_d0.txt: -------------------------------------------------------------------------------- 1 | Umm…. What budget? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/2_1_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/2_1_d0.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/2_1_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/2_1_d0.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/3_0_d0.txt: -------------------------------------------------------------------------------- 1 | I'm making a shopping budget, so that I don't spend too much money. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/3_0_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/3_0_d0.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/3_0_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/3_0_d0.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/4_1_d0.txt: -------------------------------------------------------------------------------- 1 | How much money can you spend? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/4_1_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/4_1_d0.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/4_1_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/4_1_d0.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/5_0_d0.txt: -------------------------------------------------------------------------------- 1 | I can only spend three hundred dollars a month. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/5_0_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/5_0_d0.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/5_0_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/5_0_d0.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/6_1_d0.txt: -------------------------------------------------------------------------------- 1 | Why only three hundred dollars? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/6_1_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/6_1_d0.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/6_1_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/6_1_d0.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/7_0_d0.txt: -------------------------------------------------------------------------------- 1 | I need to save the rest. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/7_0_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/7_0_d0.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/7_0_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/7_0_d0.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/8_1_d0.txt: -------------------------------------------------------------------------------- 1 | For what? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/8_1_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/8_1_d0.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/8_1_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/8_1_d0.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/9_0_d0.txt: -------------------------------------------------------------------------------- 1 | I need to pay my bills. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/9_0_d0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/9_0_d0.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/0/9_0_d0.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/0/9_0_d0.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/0_0_d1.txt: -------------------------------------------------------------------------------- 1 | Well, how does it look? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/0_0_d1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/1/0_0_d1.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/0_0_d1.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/1/0_0_d1.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/1_1_d1.txt: -------------------------------------------------------------------------------- 1 | It's a perfect fit. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/1_1_d1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/1/1_1_d1.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/1_1_d1.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/1/1_1_d1.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/2_0_d1.txt: -------------------------------------------------------------------------------- 1 | Let me pay for it now. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/2_0_d1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/1/2_0_d1.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/2_0_d1.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/1/2_0_d1.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/3_1_d1.txt: -------------------------------------------------------------------------------- 1 | Cash, credit card, or debit card? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/3_1_d1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/1/3_1_d1.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/3_1_d1.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/1/3_1_d1.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/4_0_d1.txt: -------------------------------------------------------------------------------- 1 | Umm I think I'll use my credit card. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/4_0_d1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/1/4_0_d1.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/4_0_d1.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/1/4_0_d1.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/5_1_d1.txt: -------------------------------------------------------------------------------- 1 | Your signature here, please, and we're finished. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/5_1_d1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/1/5_1_d1.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/5_1_d1.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/1/5_1_d1.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/6_0_d1.txt: -------------------------------------------------------------------------------- 1 | Here's my John Hancock. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/6_0_d1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/1/6_0_d1.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/6_0_d1.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/1/6_0_d1.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/7_1_d1.txt: -------------------------------------------------------------------------------- 1 | Bye-bye. Visit us again soon! -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/7_1_d1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/1/7_1_d1.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/1/7_1_d1.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/1/7_1_d1.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/0_0_d2.txt: -------------------------------------------------------------------------------- 1 | Hello. Overseas operator. May I help you? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/0_0_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/0_0_d2.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/0_0_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/0_0_d2.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/10_0_d2.txt: -------------------------------------------------------------------------------- 1 | And the number in Japan, please? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/10_0_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/10_0_d2.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/10_0_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/10_0_d2.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/11_1_d2.txt: -------------------------------------------------------------------------------- 1 | Country code is eighty one, area code on ethirty eight, and the number is eight four six eight nine seven two. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/11_1_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/11_1_d2.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/11_1_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/11_1_d2.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/12_0_d2.txt: -------------------------------------------------------------------------------- 1 | Hold the line, please. I'll put your call through. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/12_0_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/12_0_d2.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/12_0_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/12_0_d2.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/1_1_d2.txt: -------------------------------------------------------------------------------- 1 | Yes. I'd like to make a collect call to Japan. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/1_1_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/1_1_d2.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/1_1_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/1_1_d2.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/2_0_d2.txt: -------------------------------------------------------------------------------- 1 | Your name, please? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/2_0_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/2_0_d2.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/2_0_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/2_0_d2.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/3_1_d2.txt: -------------------------------------------------------------------------------- 1 | It's Helen Kent. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/3_1_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/3_1_d2.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/3_1_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/3_1_d2.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/4_0_d2.txt: -------------------------------------------------------------------------------- 1 | Who would you like to talk to? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/4_0_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/4_0_d2.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/4_0_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/4_0_d2.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/5_1_d2.txt: -------------------------------------------------------------------------------- 1 | I'd like to talk to Mr. Edna Kent. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/5_1_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/5_1_d2.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/5_1_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/5_1_d2.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/6_0_d2.txt: -------------------------------------------------------------------------------- 1 | Is that Mr. Edna Kent? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/6_0_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/6_0_d2.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/6_0_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/6_0_d2.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/7_1_d2.txt: -------------------------------------------------------------------------------- 1 | That's right. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/7_1_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/7_1_d2.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/7_1_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/7_1_d2.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/8_0_d2.txt: -------------------------------------------------------------------------------- 1 | What number are you calling from, please? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/8_0_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/8_0_d2.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/8_0_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/8_0_d2.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/9_1_d2.txt: -------------------------------------------------------------------------------- 1 | From code five one three four three two six seven four eight. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/9_1_d2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/9_1_d2.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/2/9_1_d2.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/2/9_1_d2.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/3/0_1_d3.txt: -------------------------------------------------------------------------------- 1 | Did you bring some lunch with you? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/3/0_1_d3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/3/0_1_d3.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/3/0_1_d3.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/3/0_1_d3.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/3/1_0_d3.txt: -------------------------------------------------------------------------------- 1 | Yes, I packed it myself. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/3/1_0_d3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/3/1_0_d3.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/3/1_0_d3.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/3/1_0_d3.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/3/2_1_d3.txt: -------------------------------------------------------------------------------- 1 | Wow, that looks beautiful. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/3/2_1_d3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/3/2_1_d3.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/3/2_1_d3.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/3/2_1_d3.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/3/3_0_d3.txt: -------------------------------------------------------------------------------- 1 | It's my health-conscious lunch, good for my health and beauty. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/3/3_0_d3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/3/3_0_d3.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/3/3_0_d3.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/3/3_0_d3.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/3/4_1_d3.txt: -------------------------------------------------------------------------------- 1 | Umm, I might try it myself. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/3/4_1_d3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/3/4_1_d3.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/3/4_1_d3.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/3/4_1_d3.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/0_0_d4.txt: -------------------------------------------------------------------------------- 1 | Good morning, Miss Wang. How beautiful you look today! -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/0_0_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/0_0_d4.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/0_0_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/0_0_d4.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/10_0_d4.txt: -------------------------------------------------------------------------------- 1 | How did you grow such long nails? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/10_0_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/10_0_d4.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/10_0_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/10_0_d4.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/11_1_d4.txt: -------------------------------------------------------------------------------- 1 | You have to pay attention to trimming them from time to time. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/11_1_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/11_1_d4.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/11_1_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/11_1_d4.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/1_1_d4.txt: -------------------------------------------------------------------------------- 1 | Thank you. I'm wearing make-up. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/1_1_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/1_1_d4.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/1_1_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/1_1_d4.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/2_0_d4.txt: -------------------------------------------------------------------------------- 1 | Who taught you to put on make-up? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/2_0_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/2_0_d4.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/2_0_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/2_0_d4.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/3_1_d4.txt: -------------------------------------------------------------------------------- 1 | It's me. I have studied make-up at a beauty shop. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/3_1_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/3_1_d4.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/3_1_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/3_1_d4.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/4_0_d4.txt: -------------------------------------------------------------------------------- 1 | Can you teach me how to do make-up? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/4_0_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/4_0_d4.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/4_0_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/4_0_d4.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/5_1_d4.txt: -------------------------------------------------------------------------------- 1 | Of course. First, use eye shadow to heighten your eyes. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/5_1_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/5_1_d4.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/5_1_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/5_1_d4.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/6_0_d4.txt: -------------------------------------------------------------------------------- 1 | What eye shadow do you think is the most fit for me? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/6_0_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/6_0_d4.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/6_0_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/6_0_d4.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/7_1_d4.txt: -------------------------------------------------------------------------------- 1 | I think pink eye shadow is popular among Chinese people. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/7_1_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/7_1_d4.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/7_1_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/7_1_d4.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/8_0_d4.txt: -------------------------------------------------------------------------------- 1 | How do you protect yourself from chapped lips? -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/8_0_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/8_0_d4.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/8_0_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/8_0_d4.wav.pt -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/9_1_d4.txt: -------------------------------------------------------------------------------- 1 | I suggest you use lipstick, which also accentuates your lips. -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/9_1_d4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/9_1_d4.wav -------------------------------------------------------------------------------- /datasets/raw/DailyTalk/4/9_1_d4.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/datasets/raw/DailyTalk/4/9_1_d4.wav.pt -------------------------------------------------------------------------------- /feature_extractor/__init__.py: -------------------------------------------------------------------------------- 1 | from . import cnhubert 2 | 3 | content_module_map = { 4 | 'cnhubert': cnhubert 5 | } -------------------------------------------------------------------------------- /feature_extractor/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/feature_extractor/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /feature_extractor/__pycache__/cnhubert.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/feature_extractor/__pycache__/cnhubert.cpython-39.pyc -------------------------------------------------------------------------------- /feature_extractor/__pycache__/whisper_enc.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/feature_extractor/__pycache__/whisper_enc.cpython-39.pyc -------------------------------------------------------------------------------- /feature_extractor/cnhubert.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import librosa 4 | import torch 5 | import torch.nn.functional as F 6 | import soundfile as sf 7 | import logging 8 | 9 | logging.getLogger("numba").setLevel(logging.WARNING) 10 | 11 | from transformers import ( 12 | Wav2Vec2FeatureExtractor, 13 | HubertModel, 14 | Wav2Vec2Model, 15 | ) 16 | 17 | import utils 18 | import torch.nn as nn 19 | 20 | cnhubert_base_path=None 21 | class CNHubert(nn.Module): 22 | def __init__(self): 23 | super().__init__() 24 | self.model = HubertModel.from_pretrained(cnhubert_base_path) 25 | self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(cnhubert_base_path) 26 | def forward(self, x): 27 | input_values = self.feature_extractor(x, return_tensors="pt", sampling_rate=16000).input_values.to(x.device) 28 | feats = self.model(input_values)["last_hidden_state"] 29 | return feats 30 | 31 | # class CNHubertLarge(nn.Module): 32 | # def __init__(self): 33 | # super().__init__() 34 | # self.model = HubertModel.from_pretrained("/data/docker/liujing04/gpt-vits/chinese-hubert-large") 35 | # self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("/data/docker/liujing04/gpt-vits/chinese-hubert-large") 36 | # def forward(self, x): 37 | # input_values = self.feature_extractor(x, return_tensors="pt", sampling_rate=16000).input_values.to(x.device) 38 | # feats = self.model(input_values)["last_hidden_state"] 39 | # return feats 40 | # 41 | # class CVec(nn.Module): 42 | # def __init__(self): 43 | # super().__init__() 44 | # self.model = HubertModel.from_pretrained("/data/docker/liujing04/vc-webui-big/hubert_base") 45 | # self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("/data/docker/liujing04/vc-webui-big/hubert_base") 46 | # def forward(self, x): 47 | # input_values = self.feature_extractor(x, return_tensors="pt", sampling_rate=16000).input_values.to(x.device) 48 | # feats = self.model(input_values)["last_hidden_state"] 49 | # return feats 50 | # 51 | # class cnw2v2base(nn.Module): 52 | # def __init__(self): 53 | # super().__init__() 54 | # self.model = Wav2Vec2Model.from_pretrained("/data/docker/liujing04/gpt-vits/chinese-wav2vec2-base") 55 | # self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("/data/docker/liujing04/gpt-vits/chinese-wav2vec2-base") 56 | # def forward(self, x): 57 | # input_values = self.feature_extractor(x, return_tensors="pt", sampling_rate=16000).input_values.to(x.device) 58 | # feats = self.model(input_values)["last_hidden_state"] 59 | # return feats 60 | 61 | 62 | 63 | def get_model(): 64 | model = CNHubert() 65 | model.eval() 66 | return model 67 | 68 | # def get_large_model(): 69 | # model = CNHubertLarge() 70 | # model.eval() 71 | # return model 72 | # 73 | # def get_model_cvec(): 74 | # model = CVec() 75 | # model.eval() 76 | # return model 77 | # 78 | # def get_model_cnw2v2base(): 79 | # model = cnw2v2base() 80 | # model.eval() 81 | # return model 82 | 83 | def get_content(hmodel, wav_16k_tensor): 84 | with torch.no_grad(): 85 | feats = hmodel(wav_16k_tensor) 86 | return feats.transpose(1,2) 87 | 88 | 89 | if __name__ == '__main__': 90 | model = get_model() 91 | src_path = "xx.wav" 92 | wav_16k_tensor = utils.load_wav_to_torch_and_resample(src_path, 16000) 93 | model = model 94 | wav_16k_tensor = wav_16k_tensor 95 | feats = get_content(model,wav_16k_tensor) 96 | print(feats.shape) 97 | 98 | -------------------------------------------------------------------------------- /i18n/__pycache__/i18n.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/i18n/__pycache__/i18n.cpython-39.pyc -------------------------------------------------------------------------------- /i18n/i18n.py: -------------------------------------------------------------------------------- 1 | import json 2 | import locale 3 | import os 4 | 5 | 6 | def load_language_list(language): 7 | with open(f"./i18n/locale/{language}.json", "r", encoding="utf-8") as f: 8 | language_list = json.load(f) 9 | return language_list 10 | 11 | 12 | class I18nAuto: 13 | def __init__(self, language=None): 14 | if language in ["Auto", None]: 15 | language = locale.getdefaultlocale()[ 16 | 0 17 | ] # getlocale can't identify the system's language ((None, None)) 18 | if not os.path.exists(f"./i18n/locale/{language}.json"): 19 | language = "en_US" 20 | self.language = language 21 | self.language_map = load_language_list(language) 22 | 23 | def __call__(self, key): 24 | return self.language_map.get(key, key) 25 | 26 | def __repr__(self): 27 | return "Use Language: " + self.language 28 | -------------------------------------------------------------------------------- /i18n/locale_diff.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from collections import OrderedDict 4 | 5 | # Define the standard file name 6 | standard_file = "locale/zh_CN.json" 7 | 8 | # Find all JSON files in the directory 9 | dir_path = "locale/" 10 | languages = [ 11 | os.path.join(dir_path, f) 12 | for f in os.listdir(dir_path) 13 | if f.endswith(".json") and f != standard_file 14 | ] 15 | 16 | # Load the standard file 17 | with open(standard_file, "r", encoding="utf-8") as f: 18 | standard_data = json.load(f, object_pairs_hook=OrderedDict) 19 | 20 | # Loop through each language file 21 | for lang_file in languages: 22 | # Load the language file 23 | with open(lang_file, "r", encoding="utf-8") as f: 24 | lang_data = json.load(f, object_pairs_hook=OrderedDict) 25 | 26 | # Find the difference between the language file and the standard file 27 | diff = set(standard_data.keys()) - set(lang_data.keys()) 28 | 29 | miss = set(lang_data.keys()) - set(standard_data.keys()) 30 | 31 | # Add any missing keys to the language file 32 | for key in diff: 33 | lang_data[key] = key 34 | 35 | # Del any extra keys to the language file 36 | for key in miss: 37 | del lang_data[key] 38 | 39 | # Sort the keys of the language file to match the order of the standard file 40 | lang_data = OrderedDict( 41 | sorted(lang_data.items(), key=lambda x: list(standard_data.keys()).index(x[0])) 42 | ) 43 | 44 | # Save the updated language file 45 | with open(lang_file, "w", encoding="utf-8") as f: 46 | json.dump(lang_data, f, ensure_ascii=False, indent=4, sort_keys=True) 47 | f.write("\n") 48 | -------------------------------------------------------------------------------- /i18n/scan_i18n.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import glob 3 | import json 4 | from collections import OrderedDict 5 | 6 | 7 | def extract_i18n_strings(node): 8 | i18n_strings = [] 9 | 10 | if ( 11 | isinstance(node, ast.Call) 12 | and isinstance(node.func, ast.Name) 13 | and node.func.id == "i18n" 14 | ): 15 | for arg in node.args: 16 | if isinstance(arg, ast.Str): 17 | i18n_strings.append(arg.s) 18 | 19 | for child_node in ast.iter_child_nodes(node): 20 | i18n_strings.extend(extract_i18n_strings(child_node)) 21 | 22 | return i18n_strings 23 | 24 | 25 | # scan the directory for all .py files (recursively) 26 | # for each file, parse the code into an AST 27 | # for each AST, extract the i18n strings 28 | 29 | strings = [] 30 | for filename in glob.iglob("**/*.py", recursive=True): 31 | with open(filename, "r") as f: 32 | code = f.read() 33 | if "I18nAuto" in code: 34 | tree = ast.parse(code) 35 | i18n_strings = extract_i18n_strings(tree) 36 | print(filename, len(i18n_strings)) 37 | strings.extend(i18n_strings) 38 | code_keys = set(strings) 39 | """ 40 | n_i18n.py 41 | gui_v1.py 26 42 | app.py 16 43 | infer-web.py 147 44 | scan_i18n.py 0 45 | i18n.py 0 46 | lib/train/process_ckpt.py 1 47 | """ 48 | print() 49 | print("Total unique:", len(code_keys)) 50 | 51 | 52 | standard_file = "i18n/locale/zh_CN.json" 53 | with open(standard_file, "r", encoding="utf-8") as f: 54 | standard_data = json.load(f, object_pairs_hook=OrderedDict) 55 | standard_keys = set(standard_data.keys()) 56 | 57 | # Define the standard file name 58 | unused_keys = standard_keys - code_keys 59 | print("Unused keys:", len(unused_keys)) 60 | for unused_key in unused_keys: 61 | print("\t", unused_key) 62 | 63 | missing_keys = code_keys - standard_keys 64 | print("Missing keys:", len(missing_keys)) 65 | for missing_key in missing_keys: 66 | print("\t", missing_key) 67 | 68 | code_keys_dict = OrderedDict() 69 | for s in strings: 70 | code_keys_dict[s] = s 71 | 72 | # write back 73 | with open(standard_file, "w", encoding="utf-8") as f: 74 | json.dump(code_keys_dict, f, ensure_ascii=False, indent=4, sort_keys=True) 75 | f.write("\n") 76 | -------------------------------------------------------------------------------- /module/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/module/__init__.py -------------------------------------------------------------------------------- /module/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/module/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /module/__pycache__/attentions.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/module/__pycache__/attentions.cpython-39.pyc -------------------------------------------------------------------------------- /module/__pycache__/commons.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/module/__pycache__/commons.cpython-39.pyc -------------------------------------------------------------------------------- /module/__pycache__/core_vq.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/module/__pycache__/core_vq.cpython-39.pyc -------------------------------------------------------------------------------- /module/__pycache__/data_utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/module/__pycache__/data_utils.cpython-39.pyc -------------------------------------------------------------------------------- /module/__pycache__/losses.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/module/__pycache__/losses.cpython-39.pyc -------------------------------------------------------------------------------- /module/__pycache__/mel_processing.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/module/__pycache__/mel_processing.cpython-39.pyc -------------------------------------------------------------------------------- /module/__pycache__/models.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/module/__pycache__/models.cpython-39.pyc -------------------------------------------------------------------------------- /module/__pycache__/modules.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/module/__pycache__/modules.cpython-39.pyc -------------------------------------------------------------------------------- /module/__pycache__/mrte_model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/module/__pycache__/mrte_model.cpython-39.pyc -------------------------------------------------------------------------------- /module/__pycache__/quantize.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/module/__pycache__/quantize.cpython-39.pyc -------------------------------------------------------------------------------- /module/__pycache__/transforms.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/module/__pycache__/transforms.cpython-39.pyc -------------------------------------------------------------------------------- /module/losses.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.nn import functional as F 5 | 6 | 7 | def feature_loss(fmap_r, fmap_g): 8 | loss = 0 9 | for dr, dg in zip(fmap_r, fmap_g): 10 | for rl, gl in zip(dr, dg): 11 | rl = rl.float().detach() 12 | gl = gl.float() 13 | loss += torch.mean(torch.abs(rl - gl)) 14 | 15 | return loss * 2 16 | 17 | 18 | def discriminator_loss(disc_real_outputs, disc_generated_outputs): 19 | loss = 0 20 | r_losses = [] 21 | g_losses = [] 22 | for dr, dg in zip(disc_real_outputs, disc_generated_outputs): 23 | dr = dr.float() 24 | dg = dg.float() 25 | r_loss = torch.mean((1-dr)**2) 26 | g_loss = torch.mean(dg**2) 27 | loss += (r_loss + g_loss) 28 | r_losses.append(r_loss.item()) 29 | g_losses.append(g_loss.item()) 30 | 31 | return loss, r_losses, g_losses 32 | 33 | 34 | def generator_loss(disc_outputs): 35 | loss = 0 36 | gen_losses = [] 37 | for dg in disc_outputs: 38 | dg = dg.float() 39 | l = torch.mean((1-dg)**2) 40 | gen_losses.append(l) 41 | loss += l 42 | 43 | return loss, gen_losses 44 | 45 | 46 | def kl_loss(z_p, logs_q, m_p, logs_p, z_mask): 47 | """ 48 | z_p, logs_q: [b, h, t_t] 49 | m_p, logs_p: [b, h, t_t] 50 | """ 51 | z_p = z_p.float() 52 | logs_q = logs_q.float() 53 | m_p = m_p.float() 54 | logs_p = logs_p.float() 55 | z_mask = z_mask.float() 56 | 57 | kl = logs_p - logs_q - 0.5 58 | kl += 0.5 * ((z_p - m_p)**2) * torch.exp(-2. * logs_p) 59 | kl = torch.sum(kl * z_mask) 60 | l = kl / torch.sum(z_mask) 61 | return l 62 | 63 | def mle_loss(z, m, logs, logdet, mask): 64 | l = torch.sum(logs) + 0.5 * torch.sum(torch.exp(-2 * logs) * ((z - m)**2)) # neg normal likelihood w/o the constant term 65 | l = l - torch.sum(logdet) # log jacobian determinant 66 | l = l / torch.sum(torch.ones_like(z) * mask) # averaging across batch, channel and time axes 67 | l = l + 0.5 * math.log(2 * math.pi) # add the remaining constant term 68 | return l -------------------------------------------------------------------------------- /module/mel_processing.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | import random 4 | import torch 5 | from torch import nn 6 | import torch.nn.functional as F 7 | import torch.utils.data 8 | import numpy as np 9 | import librosa 10 | import librosa.util as librosa_util 11 | from librosa.util import normalize, pad_center, tiny 12 | from scipy.signal import get_window 13 | from scipy.io.wavfile import read 14 | from librosa.filters import mel as librosa_mel_fn 15 | 16 | MAX_WAV_VALUE = 32768.0 17 | 18 | 19 | def dynamic_range_compression_torch(x, C=1, clip_val=1e-5): 20 | """ 21 | PARAMS 22 | ------ 23 | C: compression factor 24 | """ 25 | return torch.log(torch.clamp(x, min=clip_val) * C) 26 | 27 | 28 | def dynamic_range_decompression_torch(x, C=1): 29 | """ 30 | PARAMS 31 | ------ 32 | C: compression factor used to compress 33 | """ 34 | return torch.exp(x) / C 35 | 36 | 37 | def spectral_normalize_torch(magnitudes): 38 | output = dynamic_range_compression_torch(magnitudes) 39 | return output 40 | 41 | 42 | def spectral_de_normalize_torch(magnitudes): 43 | output = dynamic_range_decompression_torch(magnitudes) 44 | return output 45 | 46 | 47 | mel_basis = {} 48 | hann_window = {} 49 | 50 | 51 | def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False): 52 | if torch.min(y) < -1.: 53 | print('min value is ', torch.min(y)) 54 | if torch.max(y) > 1.: 55 | print('max value is ', torch.max(y)) 56 | 57 | global hann_window 58 | dtype_device = str(y.dtype) + '_' + str(y.device) 59 | wnsize_dtype_device = str(win_size) + '_' + dtype_device 60 | if wnsize_dtype_device not in hann_window: 61 | hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device) 62 | 63 | y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect') 64 | y = y.squeeze(1) 65 | spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device], 66 | center=center, pad_mode='reflect', normalized=False, onesided=True, return_complex=False) 67 | 68 | spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) 69 | return spec 70 | 71 | 72 | def spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax): 73 | global mel_basis 74 | dtype_device = str(spec.dtype) + '_' + str(spec.device) 75 | fmax_dtype_device = str(fmax) + '_' + dtype_device 76 | if fmax_dtype_device not in mel_basis: 77 | mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax) 78 | mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=spec.dtype, device=spec.device) 79 | spec = torch.matmul(mel_basis[fmax_dtype_device], spec) 80 | spec = spectral_normalize_torch(spec) 81 | return spec 82 | 83 | 84 | def mel_spectrogram_torch(y, n_fft, num_mels, sampling_rate, hop_size, win_size, fmin, fmax, center=False): 85 | if torch.min(y) < -1.: 86 | print('min value is ', torch.min(y)) 87 | if torch.max(y) > 1.: 88 | print('max value is ', torch.max(y)) 89 | 90 | global mel_basis, hann_window 91 | dtype_device = str(y.dtype) + '_' + str(y.device) 92 | fmax_dtype_device = str(fmax) + '_' + dtype_device 93 | wnsize_dtype_device = str(win_size) + '_' + dtype_device 94 | if fmax_dtype_device not in mel_basis: 95 | mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax) 96 | mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=y.dtype, device=y.device) 97 | if wnsize_dtype_device not in hann_window: 98 | hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device) 99 | 100 | y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect') 101 | y = y.squeeze(1) 102 | 103 | spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device], 104 | center=center, pad_mode='reflect', normalized=False, onesided=True, return_complex=False) 105 | 106 | spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) 107 | 108 | spec = torch.matmul(mel_basis[fmax_dtype_device], spec) 109 | spec = spectral_normalize_torch(spec) 110 | 111 | return spec 112 | -------------------------------------------------------------------------------- /module/quantize.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | """Residual vector quantizer implementation.""" 8 | 9 | from dataclasses import dataclass, field 10 | import math 11 | import typing as tp 12 | 13 | import torch 14 | from torch import nn 15 | 16 | from module.core_vq import ResidualVectorQuantization 17 | 18 | 19 | @dataclass 20 | class QuantizedResult: 21 | quantized: torch.Tensor 22 | codes: torch.Tensor 23 | bandwidth: torch.Tensor # bandwidth in kb/s used, per batch item. 24 | penalty: tp.Optional[torch.Tensor] = None 25 | metrics: dict = field(default_factory=dict) 26 | 27 | 28 | class ResidualVectorQuantizer(nn.Module): 29 | """Residual Vector Quantizer. 30 | Args: 31 | dimension (int): Dimension of the codebooks. 32 | n_q (int): Number of residual vector quantizers used. 33 | bins (int): Codebook size. 34 | decay (float): Decay for exponential moving average over the codebooks. 35 | kmeans_init (bool): Whether to use kmeans to initialize the codebooks. 36 | kmeans_iters (int): Number of iterations used for kmeans initialization. 37 | threshold_ema_dead_code (int): Threshold for dead code expiration. Replace any codes 38 | that have an exponential moving average cluster size less than the specified threshold with 39 | randomly selected vector from the current batch. 40 | """ 41 | def __init__( 42 | self, 43 | dimension: int = 256, 44 | n_q: int = 8, 45 | bins: int = 1024, 46 | decay: float = 0.99, 47 | kmeans_init: bool = True, 48 | kmeans_iters: int = 50, 49 | threshold_ema_dead_code: int = 2, 50 | ): 51 | super().__init__() 52 | self.n_q = n_q 53 | self.dimension = dimension 54 | self.bins = bins 55 | self.decay = decay 56 | self.kmeans_init = kmeans_init 57 | self.kmeans_iters = kmeans_iters 58 | self.threshold_ema_dead_code = threshold_ema_dead_code 59 | self.vq = ResidualVectorQuantization( 60 | dim=self.dimension, 61 | codebook_size=self.bins, 62 | num_quantizers=self.n_q, 63 | decay=self.decay, 64 | kmeans_init=self.kmeans_init, 65 | kmeans_iters=self.kmeans_iters, 66 | threshold_ema_dead_code=self.threshold_ema_dead_code, 67 | ) 68 | 69 | def forward(self, x: torch.Tensor, n_q: tp.Optional[int] = None, layers: tp.Optional[list] = None) -> QuantizedResult: 70 | """Residual vector quantization on the given input tensor. 71 | Args: 72 | x (torch.Tensor): Input tensor. 73 | n_q (int): Number of quantizer used to quantize. Default: All quantizers. 74 | layers (list): Layer that need to return quantized. Defalt: None. 75 | Returns: 76 | QuantizedResult: 77 | The quantized (or approximately quantized) representation with 78 | the associated numbert quantizers and layer quantized required to return. 79 | """ 80 | n_q = n_q if n_q else self.n_q 81 | if layers and max(layers) >= n_q: 82 | raise ValueError(f'Last layer index in layers: A {max(layers)}. Number of quantizers in RVQ: B {self.n_q}. A must less than B.') 83 | quantized, codes, commit_loss, quantized_list = self.vq(x, n_q=n_q, layers=layers) 84 | return quantized, codes, torch.mean(commit_loss), quantized_list 85 | 86 | 87 | def encode(self, x: torch.Tensor, n_q: tp.Optional[int] = None, st: tp.Optional[int] = None) -> torch.Tensor: 88 | """Encode a given input tensor with the specified sample rate at the given bandwidth. 89 | The RVQ encode method sets the appropriate number of quantizer to use 90 | and returns indices for each quantizer. 91 | Args: 92 | x (torch.Tensor): Input tensor. 93 | n_q (int): Number of quantizer used to quantize. Default: All quantizers. 94 | st (int): Start to encode input from which layers. Default: 0. 95 | """ 96 | n_q = n_q if n_q else self.n_q 97 | st = st or 0 98 | codes = self.vq.encode(x, n_q=n_q, st=st) 99 | return codes 100 | 101 | def decode(self, codes: torch.Tensor, st: int = 0) -> torch.Tensor: 102 | """Decode the given codes to the quantized representation. 103 | Args: 104 | codes (torch.Tensor): Input indices for each quantizer. 105 | st (int): Start to decode input codes from which layers. Default: 0. 106 | """ 107 | quantized = self.vq.decode(codes, st=st) 108 | return quantized -------------------------------------------------------------------------------- /prepare_datasets/step-four.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math,traceback 3 | import multiprocessing 4 | import sys,pdb 5 | now_dir = os.getcwd() 6 | sys.path.append(now_dir) 7 | from random import shuffle 8 | import torch.multiprocessing as mp 9 | import glob 10 | from tqdm import tqdm 11 | import logging,librosa,utils,torch 12 | from module.models import SynthesizerTrn 13 | from my_utils import load_audio 14 | 15 | 16 | logging.getLogger("numba").setLevel(logging.WARNING) 17 | 18 | 19 | if __name__ == "__main__": 20 | 21 | # //////////////////////////////////////////// 22 | 23 | source_path = "I:\\GPT-Talker\\" 24 | exp_name = "DailyTalk" 25 | h_turn = 2 # Dialogue history rounds 26 | 27 | # //////////////////////////////////////////// 28 | 29 | inp_text = source_path+"\\datasets\\processed\\"+exp_name+"\\slicer_opt.list" 30 | inp_wav_dir = source_path+"\\datasets\\raw\\"+exp_name 31 | opt_dir = source_path+"\\datasets\\processed\\"+exp_name+"\\" 32 | bert_pretrained_dir = source_path+"pretrained_models\\chinese-hubert-base\\" 33 | txt_path="%s/2-name2text.txt"%(opt_dir) 34 | hubert_dir="%s/4-cnhubert"%(opt_dir) 35 | wav32dir="%s/5-wav32k"%(opt_dir) 36 | semantic_path = "%s/6-name2semantic.tsv"%(opt_dir) 37 | 38 | pretrained_s2G = "pretrained_models\\s2G488k.pth" 39 | s2config_path = "configs\\s2.json" 40 | 41 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 42 | is_half = eval(os.environ.get("is_half", "True")) 43 | 44 | 45 | if(os.path.exists(semantic_path)==False): 46 | os.makedirs(opt_dir, exist_ok=True) 47 | 48 | device="cuda:0" 49 | hps = utils.get_hparams_from_file(s2config_path) 50 | vq_model = SynthesizerTrn( 51 | hps.data.filter_length // 2 + 1, 52 | hps.train.segment_size // hps.data.hop_length, 53 | n_speakers=hps.data.n_speakers, 54 | **hps.model) 55 | 56 | if(is_half==True): 57 | vq_model=vq_model.half().to(device) 58 | else: 59 | vq_model = vq_model.to(device) 60 | 61 | vq_model.eval() 62 | 63 | print(vq_model.load_state_dict(torch.load(pretrained_s2G,map_location="cpu")["weight"], strict=False)) 64 | 65 | def name2go(wav_name, lines): 66 | semantic = "" 67 | c_dialogue = wav_name.split("_")[2].strip("d").strip(".wav") 68 | source_folder = "%s/%s/"%(inp_wav_dir,c_dialogue) 69 | semantic_len = [] 70 | c_index = wav_name.split("_")[0] 71 | matching_wav_files = [] 72 | for current_index in range(0, int(c_index)): 73 | 74 | if(current_index < int(c_index) and current_index>=(int(c_index) - h_turn)): 75 | 76 | h_hubert_paths = glob.glob(hubert_dir+"\\"+str(current_index)+"_*_d"+c_dialogue+".wav.pt") 77 | if(len(h_hubert_paths) == 0): 78 | continue 79 | else: 80 | h_hubert_path = h_hubert_paths[0] 81 | if(os.path.exists(h_hubert_path) == False):return 82 | 83 | h_ssl_content = torch.load(h_hubert_path, map_location="cpu") 84 | 85 | if(is_half == True): 86 | h_ssl_content = h_ssl_content.half().to(device) 87 | else: 88 | h_ssl_content = h_ssl_content.to(device) 89 | 90 | h_codes = vq_model.extract_latent(h_ssl_content) 91 | 92 | if(semantic == ""): 93 | semantic = " ".join([str(i) for i in h_codes[0, 0, :].tolist()]) 94 | semantic = semantic + " " 95 | semantic_len.append(len(h_codes[0, 0, :].tolist())) 96 | else: 97 | semantic += " ".join([str(i) for i in h_codes[0, 0, :].tolist()]) 98 | semantic = semantic + " " 99 | semantic_len.append(len(h_codes[0, 0, :].tolist())) 100 | 101 | c_hubert_path = "%s/%s.pt" % (hubert_dir, wav_name) 102 | if(os.path.exists(c_hubert_path) == False):return 103 | 104 | c_ssl_content = torch.load(c_hubert_path, map_location="cpu") 105 | 106 | if(is_half == True): 107 | c_ssl_content = c_ssl_content.half().to(device) 108 | else: 109 | c_ssl_content = c_ssl_content.to(device) 110 | 111 | c_codes = vq_model.extract_latent(c_ssl_content) 112 | 113 | if(semantic == ""): 114 | semantic = " ".join([str(i) for i in c_codes[0, 0, :].tolist()]) 115 | semantic_len.append(len(c_codes[0, 0, :].tolist())) 116 | else: 117 | semantic += " ".join([str(i) for i in c_codes[0, 0, :].tolist()]) 118 | semantic_len.append(len(c_codes[0, 0, :].tolist())) 119 | 120 | lines.append("%s\t%s\t%s"%(wav_name, semantic, semantic_len)) 121 | 122 | with open(inp_text,"r", encoding="utf8") as f: 123 | lines = f.read().strip("\n").split("\n") 124 | 125 | lines1 = [] 126 | count = 0 127 | for line in lines: 128 | print(line) 129 | try: 130 | wav_name, spk_name, language, text = line.split("|") 131 | wav_name = os.path.basename(wav_name) 132 | name2go(wav_name, lines1) 133 | except: 134 | print(line, traceback.format_exc()) 135 | 136 | header = "item_name\tsemantic_audio\tsemantic_len\n" 137 | with open(semantic_path, "w", encoding="utf8") as f: 138 | f.write(header+"\n".join(lines1)) -------------------------------------------------------------------------------- /prepare_datasets/step-one.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import shutil 4 | import wave 5 | 6 | def write_to_list_file(file_path, content_list): 7 | parent_dir = os.path.dirname(file_path) 8 | 9 | if not os.path.exists(parent_dir): 10 | os.makedirs(parent_dir) 11 | 12 | with open(file_path, 'w', encoding="utf8") as file: 13 | for line in content_list: 14 | file.write(line + '\n') 15 | 16 | def get_all_folders(path): 17 | folder_paths = [] 18 | for root, dirs, files in os.walk(path): 19 | for dir in dirs: 20 | folder_path = os.path.join(root, dir) 21 | folder_paths.append(folder_path) 22 | return folder_paths 23 | 24 | def get_all_wav_in_dir(dir_path): 25 | wav_files = [] 26 | for root, dirs, files in os.walk(dir_path): 27 | for file in files: 28 | if file.endswith('.wav'): 29 | wav_files.append(os.path.join(root, file)) 30 | return wav_files 31 | 32 | if __name__ == "__main__": 33 | 34 | # ///////////////////////////////////////////// 35 | 36 | target_dir = "I:\\GPT-Talker\\datasets\\processed\\DailyTalk\\" 37 | lang = "EN" # ZH 38 | root_dir = "I:\\GPT-Talker\\datasets\\raw\\DailyTalk\\" 39 | data_name = "DailyTalk" # Dialogue Turn = 2 40 | 41 | # //////////////////////////////////////////// 42 | 43 | # Generate a .list file (slicer_opt.list) 44 | # Data format: [address of voice|speaker identity|language|corresponding text] 45 | 46 | txt_files = glob.glob(os.path.join(target_dir, '*.txt')) # .lab 47 | if not txt_files: 48 | txt_files = glob.glob(os.path.join(target_dir, '*.lab')) 49 | 50 | folders = get_all_folders(root_dir) 51 | data = {} 52 | content_list = [] 53 | 54 | for folder in folders: 55 | print(folder) 56 | wav_files = get_all_wav_in_dir(folder) 57 | 58 | for wav in wav_files: 59 | print("------new dialogue-------") 60 | print(wav) 61 | wav_basename = os.path.basename(wav) 62 | index = wav_basename.split("_")[0] 63 | speaker = wav_basename.split("_")[1] 64 | dialogue = wav_basename.split("_")[2].strip("d").strip(".wav") 65 | 66 | txt_path = os.path.join(folder,wav_basename.replace(".wav",".txt")) 67 | if not txt_path: 68 | txt_path = glob.glob(os.path.join(target_dir, '*.lab')) 69 | 70 | with open(txt_path, "r", encoding="utf8") as file: 71 | content = file.read() 72 | 73 | wav_path = wav 74 | speaker = data_name+"_"+lang+"_"+speaker 75 | language = lang 76 | text = content.strip() 77 | 78 | content_list.append(wav_path+"|"+speaker+"|"+language+"|"+text) 79 | 80 | # Calling the function to write to a .list file 81 | write_to_list_file(os.path.join(target_dir,"slicer_opt.list"), content_list) -------------------------------------------------------------------------------- /prepare_datasets/step_three.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys, os 3 | from feature_extractor import cnhubert 4 | import pdb, traceback, numpy as np, logging 5 | from scipy.io import wavfile 6 | import librosa, torch 7 | from time import time as ttime 8 | import shutil 9 | import glob 10 | from my_utils import load_audio 11 | now_dir = os.getcwd() 12 | sys.path.append(now_dir) 13 | 14 | 15 | def my_save(fea,path): 16 | dir = os.path.dirname(path) 17 | name = os.path.basename(path) 18 | tmp_path = "%s/%s.pth"%(dir,ttime()) 19 | torch.save(fea, tmp_path) 20 | shutil.move(tmp_path, "%s/%s"%(dir,name)) 21 | 22 | def name2go(wav_name,hubert_dir): 23 | wav_basename = os.path.basename(wav_name) 24 | hubert_path = "%s/%s.pt"%(hubert_dir,wav_basename) 25 | if(os.path.exists(hubert_path)):return 26 | 27 | c_dialogue = wav_basename.split("_")[2].strip("d").strip(".wav") 28 | wav_path = wav_name 29 | print(wav_path) 30 | 31 | ssl_len = [] 32 | 33 | c_index = wav_basename.split("_")[0] 34 | source_folder = "%s/%s/"%(inp_wav_dir,c_dialogue) 35 | matching_wav_files = [] 36 | 37 | ssl = None 38 | 39 | tmp_audio = load_audio(wav_path, 32000) 40 | tmp_max = np.abs(tmp_audio).max() 41 | 42 | tmp_audio32 = (tmp_audio / tmp_max * (maxx * alpha*32768)) + ((1 - alpha)*32768) * tmp_audio 43 | tmp_audio = librosa.resample( 44 | tmp_audio32, orig_sr=32000, target_sr=16000 45 | ) 46 | 47 | tensor_wav16 = torch.from_numpy(tmp_audio) 48 | 49 | if (is_half == True): 50 | tensor_wav16=tensor_wav16.half().to(device) 51 | else: 52 | tensor_wav16 = tensor_wav16.to(device) 53 | 54 | c_ssl = model.model(tensor_wav16.unsqueeze(0))["last_hidden_state"].transpose(1,2).cpu() 55 | 56 | if(ssl == None): 57 | ssl = c_ssl 58 | ssl_len.append(c_ssl.size(-1)) 59 | else: 60 | ssl = torch.cat([ssl, c_ssl], dim=-1) 61 | ssl_len.append(c_ssl.size(-1)) 62 | 63 | if np.isnan(ssl.detach().numpy()).sum() != 0: 64 | print("--") 65 | return 66 | 67 | wavfile.write( 68 | "%s/%s"%(wav32dir, wav_basename), 69 | 32000, 70 | tmp_audio32.astype("int16"), 71 | ) 72 | my_save(ssl, hubert_path) 73 | opt.append("%s\t%s"%(wav_basename, ssl_len)) 74 | 75 | 76 | if __name__ == "__main__": 77 | # //////////////////////////////////////////// 78 | 79 | source_path = "I:\\GPT-Talker\\" 80 | exp_name = "DailyTalk" 81 | h_turn = 2 # Dialogue history rounds 82 | 83 | # //////////////////////////////////////////// 84 | 85 | opt_dir = source_path+"\\datasets\\processed\\"+exp_name+"\\" 86 | inp_text= "%s/slicer_opt.list"%(opt_dir) 87 | inp_wav_dir = source_path+"\\datasets\\raw\\"+exp_name 88 | txt_path = "%s/2-cnhubert-len.txt"%(opt_dir) 89 | 90 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 91 | 92 | bert_pretrained_dir = source_path+"pretrained_models\\chinese-hubert-base\\" 93 | cnhubert.cnhubert_base_path = bert_pretrained_dir 94 | is_half = eval(os.environ.get("is_half", "True")) 95 | 96 | hubert_dir="%s/4-cnhubert"%(opt_dir) 97 | wav32dir="%s/5-wav32k"%(opt_dir) 98 | os.makedirs(opt_dir,exist_ok=True) 99 | os.makedirs(hubert_dir,exist_ok=True) 100 | os.makedirs(wav32dir,exist_ok=True) 101 | 102 | maxx = 0.95 103 | alpha = 0.5 104 | device = "cuda:0" 105 | model = cnhubert.get_model() 106 | if(is_half==True): 107 | model=model.half().to(device) 108 | else: 109 | model = model.to(device) 110 | 111 | opt=[] 112 | 113 | 114 | with open(inp_text, "r", encoding="utf8")as f: 115 | lines = f.read().strip("\n").split("\n") 116 | 117 | for line in lines: 118 | try: 119 | wav_name, spk_name, language, text = line.split("|") 120 | name2go(wav_name,hubert_dir) 121 | 122 | except: 123 | print(line,traceback.format_exc()) 124 | 125 | with open(txt_path, "w", encoding="utf8") as f: 126 | f.write("\n".join(opt)+"\n") 127 | -------------------------------------------------------------------------------- /pretrained_models/chinese-hubert-base/config.json: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:c3e5060a1277e0f078cc6be9da4528a605dba6ece93018981fe2c820e5c7b103 3 | size 1449 4 | -------------------------------------------------------------------------------- /pretrained_models/chinese-hubert-base/preprocessor_config.json: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:dcd684124d06722947939d41ea6ae58dbf10968c60a11a29f23ddc602c64a29b 3 | size 212 4 | -------------------------------------------------------------------------------- /pretrained_models/chinese-roberta-wwm-ext-large/config.json: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:3d57de2fd7e80d0e5c8ff194f0bbb6baa10df7e43fc262a0cc71298a78b0a3e5 3 | size 963 4 | -------------------------------------------------------------------------------- /pretrained_models/chinese-roberta-wwm-ext-large/tokenizer.json: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:173796956820ea27bd14f76bf28162607ff4254807e2948253eb5b46f5bb643b 3 | size 268962 4 | -------------------------------------------------------------------------------- /process_ckpt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import traceback 4 | from collections import OrderedDict 5 | 6 | import torch 7 | from i18n.i18n import I18nAuto 8 | i18n = I18nAuto() 9 | def savee(ckpt, name, epoch, steps, hps): 10 | try: 11 | opt = OrderedDict() 12 | opt["weight"] = {} 13 | for key in ckpt.keys(): 14 | if "enc_q" in key: 15 | continue 16 | opt["weight"][key] = ckpt[key].half() 17 | opt["config"] = hps 18 | opt["info"] = "%sepoch_%siteration" % (epoch,steps) 19 | torch.save(opt, "%s/%s.pth" % (hps.save_weight_dir,name)) 20 | return "Success." 21 | except: 22 | return traceback.format_exc() 23 | -------------------------------------------------------------------------------- /s1_train.py: -------------------------------------------------------------------------------- 1 | # modified from https://github.com/feng-yufei/shared_debugging_code/blob/main/train_t2s.py 2 | import os 3 | import pdb 4 | 5 | if("_CUDA_VISIBLE_DEVICES"in os.environ): 6 | os.environ["CUDA_VISIBLE_DEVICES"]=os.environ["_CUDA_VISIBLE_DEVICES"] 7 | import argparse 8 | import logging 9 | from pathlib import Path 10 | 11 | import torch,platform 12 | from pytorch_lightning import seed_everything 13 | from pytorch_lightning import Trainer 14 | from pytorch_lightning.callbacks import ModelCheckpoint 15 | from pytorch_lightning.loggers import TensorBoardLogger # WandbLogger 16 | from pytorch_lightning.strategies import DDPStrategy 17 | from AR.data.data_module import Text2SemanticDataModule 18 | from AR.models.t2s_lightning_module import Text2SemanticLightningModule 19 | from AR.utils.io import load_yaml_config 20 | logging.getLogger('numba').setLevel(logging.WARNING) 21 | logging.getLogger('matplotlib').setLevel(logging.WARNING) 22 | torch.set_float32_matmul_precision('high') 23 | from AR.utils import get_newest_ckpt 24 | import pathlib 25 | from collections import OrderedDict 26 | 27 | class my_model_ckpt(ModelCheckpoint): 28 | def __init__(self,config,if_save_latest,if_save_every_weights,half_weights_save_dir,exp_name,**kwargs): 29 | super().__init__(**kwargs) 30 | self.if_save_latest=if_save_latest 31 | self.if_save_every_weights=if_save_every_weights 32 | self.half_weights_save_dir=half_weights_save_dir 33 | self.exp_name=exp_name 34 | self.config=config 35 | 36 | def on_train_epoch_end(self, trainer, pl_module): 37 | if not self._should_skip_saving_checkpoint(trainer) and self._should_save_on_train_epoch_end(trainer): 38 | monitor_candidates = self._monitor_candidates(trainer) 39 | 40 | if self._every_n_epochs >= 1 and (trainer.current_epoch + 1) % self._every_n_epochs == 0: 41 | if(self.if_save_latest==True): 42 | to_clean=list(os.listdir(self.dirpath)) 43 | 44 | self._save_topk_checkpoint(trainer, monitor_candidates) 45 | if (self.if_save_latest == True): 46 | for name in to_clean: 47 | try: 48 | os.remove("%s/%s"%(self.dirpath,name)) 49 | except:pass 50 | if(self.if_save_every_weights==True): 51 | to_save_od=OrderedDict() 52 | to_save_od["weight"]=OrderedDict() 53 | dictt=trainer.strategy._lightning_module.state_dict() 54 | for key in dictt:to_save_od["weight"][key]=dictt[key].half() 55 | to_save_od["config"]=self.config 56 | to_save_od["info"]="GPT-e%s"%(trainer.current_epoch+1) 57 | torch.save(to_save_od,"%s/%s-e%s.ckpt"%(self.half_weights_save_dir,self.exp_name,trainer.current_epoch+1)) 58 | 59 | self._save_last_checkpoint(trainer, monitor_candidates) 60 | 61 | 62 | def main(args): 63 | config = load_yaml_config(args.config_file) 64 | 65 | output_dir = Path(config["output_dir"]) 66 | output_dir.mkdir(parents=True, exist_ok=True) 67 | 68 | ckpt_dir = output_dir / 'ckpt' 69 | ckpt_dir.mkdir(parents=True, exist_ok=True) 70 | 71 | seed_everything(config["train"]["seed"], workers=True) 72 | ckpt_callback: ModelCheckpoint = my_model_ckpt( 73 | config=config, 74 | if_save_latest=config["train"]["if_save_latest"], if_save_every_weights=config["train"]["if_save_every_weights"], half_weights_save_dir=config["train"]["half_weights_save_dir"], exp_name=config["train"]["exp_name"], 75 | save_top_k=-1, 76 | monitor='top_3_acc', 77 | mode='max', 78 | save_on_train_epoch_end=True, 79 | every_n_epochs=config["train"]["save_every_n_epoch"], 80 | dirpath=ckpt_dir, 81 | ) 82 | 83 | logger = TensorBoardLogger( 84 | name=output_dir.stem, 85 | save_dir=output_dir 86 | ) 87 | 88 | trainer: Trainer = Trainer( 89 | max_epochs=config["train"]["epochs"], 90 | accelerator='gpu', 91 | limit_val_batches=0, 92 | devices=-1, 93 | benchmark=False, 94 | fast_dev_run=False, 95 | strategy=DDPStrategy(process_group_backend="nccl"if platform.system()!="Windows"else "gloo"), 96 | precision=config["train"]["precision"], 97 | logger=logger,num_sanity_val_steps=0, 98 | callbacks=[ckpt_callback]) 99 | 100 | model: Text2SemanticLightningModule = Text2SemanticLightningModule( 101 | config, output_dir) 102 | 103 | data_module: Text2SemanticDataModule = Text2SemanticDataModule( 104 | config, 105 | train_semantic_path=config["train_semantic_path"], 106 | train_phoneme_path=config["train_phoneme_path"], 107 | ) 108 | 109 | print("Loading the existing model...") 110 | try: 111 | newest_ckpt_name = get_newest_ckpt(os.listdir(ckpt_dir)) 112 | ckpt_path = ckpt_dir / newest_ckpt_name 113 | except Exception: 114 | ckpt_path = None 115 | print("ckpt_path:", ckpt_path) 116 | 117 | 118 | temp = pathlib.PosixPath 119 | pathlib.PosixPath = pathlib.WindowsPath 120 | 121 | print("1:{}".format(torch.cuda.memory_allocated(0))) 122 | trainer.fit(model, data_module, ckpt_path=ckpt_path) 123 | print("2:{}".format(torch.cuda.memory_allocated(0))) 124 | 125 | 126 | # srun --gpus-per-node=1 --ntasks-per-node=1 python train.py --path-to-configuration configurations/default.yaml 127 | if __name__ == '__main__': 128 | parser = argparse.ArgumentParser() 129 | parser.add_argument( 130 | '-c', 131 | '--config_file', 132 | type=str, 133 | default='configs/s1longer.yaml', 134 | help='path of config file') 135 | 136 | args = parser.parse_args() 137 | logging.info(str(args)) 138 | main(args) 139 | -------------------------------------------------------------------------------- /text/__init__.py: -------------------------------------------------------------------------------- 1 | from text.symbols import * 2 | 3 | 4 | _symbol_to_id = {s: i for i, s in enumerate(symbols)} 5 | 6 | def cleaned_text_to_sequence(cleaned_text): 7 | '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text. 8 | Args: 9 | text: string to convert to a sequence 10 | Returns: 11 | List of integers corresponding to the symbols in the text 12 | ''' 13 | phones = [_symbol_to_id[symbol] for symbol in cleaned_text] 14 | return phones 15 | 16 | -------------------------------------------------------------------------------- /text/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/text/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /text/__pycache__/chinese.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/text/__pycache__/chinese.cpython-39.pyc -------------------------------------------------------------------------------- /text/__pycache__/cleaner.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/text/__pycache__/cleaner.cpython-39.pyc -------------------------------------------------------------------------------- /text/__pycache__/english.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/text/__pycache__/english.cpython-39.pyc -------------------------------------------------------------------------------- /text/__pycache__/japanese.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/text/__pycache__/japanese.cpython-39.pyc -------------------------------------------------------------------------------- /text/__pycache__/symbols.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/text/__pycache__/symbols.cpython-39.pyc -------------------------------------------------------------------------------- /text/__pycache__/tone_sandhi.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/text/__pycache__/tone_sandhi.cpython-39.pyc -------------------------------------------------------------------------------- /text/chinese.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pdb 3 | import re 4 | 5 | import cn2an 6 | from pypinyin import lazy_pinyin, Style 7 | from text.symbols import punctuation 8 | from text.tone_sandhi import ToneSandhi 9 | 10 | current_file_path = os.path.dirname(__file__) 11 | pinyin_to_symbol_map = {line.split("\t")[0]: line.strip().split("\t")[1] for line in 12 | open(os.path.join(current_file_path, 'opencpop-strict.txt')).readlines()} 13 | 14 | import jieba.posseg as psg 15 | 16 | 17 | rep_map = { 18 | ':': ',', 19 | ';': ',', 20 | ',': ',', 21 | '。': '.', 22 | '!': '!', 23 | '?': '?', 24 | '\n': '.', 25 | "·": ",", 26 | '、': ",", 27 | '...': '…', 28 | '$': '.', 29 | '/': ',', 30 | '—': "-" 31 | } 32 | 33 | tone_modifier = ToneSandhi() 34 | 35 | def replace_punctuation(text): 36 | text = text.replace("嗯", "恩").replace("呣","母") 37 | pattern = re.compile('|'.join(re.escape(p) for p in rep_map.keys())) 38 | 39 | replaced_text = pattern.sub(lambda x: rep_map[x.group()], text) 40 | 41 | replaced_text = re.sub(r'[^\u4e00-\u9fa5'+"".join(punctuation)+r']+', '', replaced_text) 42 | 43 | return replaced_text 44 | 45 | def g2p(text): 46 | pattern = r'(?<=[{0}])\s*'.format(''.join(punctuation)) 47 | sentences = [i for i in re.split(pattern, text) if i.strip()!=''] 48 | phones, word2ph = _g2p(sentences) 49 | return phones, word2ph 50 | 51 | 52 | def _get_initials_finals(word): 53 | initials = [] 54 | finals = [] 55 | orig_initials = lazy_pinyin( 56 | word, neutral_tone_with_five=True, style=Style.INITIALS) 57 | orig_finals = lazy_pinyin( 58 | word, neutral_tone_with_five=True, style=Style.FINALS_TONE3) 59 | for c, v in zip(orig_initials, orig_finals): 60 | initials.append(c) 61 | finals.append(v) 62 | return initials, finals 63 | 64 | 65 | def _g2p(segments): 66 | phones_list = [] 67 | word2ph = [] 68 | for seg in segments: 69 | pinyins = [] 70 | # Replace all English words in the sentence 71 | seg = re.sub('[a-zA-Z]+', '', seg) 72 | seg_cut = psg.lcut(seg) 73 | initials = [] 74 | finals = [] 75 | seg_cut = tone_modifier.pre_merge_for_modify(seg_cut) 76 | for word, pos in seg_cut: 77 | if pos == 'eng': 78 | continue 79 | sub_initials, sub_finals = _get_initials_finals(word) 80 | sub_finals = tone_modifier.modified_tone(word, pos, 81 | sub_finals) 82 | initials.append(sub_initials) 83 | finals.append(sub_finals) 84 | 85 | # assert len(sub_initials) == len(sub_finals) == len(word) 86 | initials = sum(initials, []) 87 | finals = sum(finals, []) 88 | # 89 | for c, v in zip(initials, finals): 90 | raw_pinyin = c+v 91 | # NOTE: post process for pypinyin outputs 92 | # we discriminate i, ii and iii 93 | if c == v: 94 | assert c in punctuation 95 | phone = [c] 96 | word2ph.append(1) 97 | else: 98 | v_without_tone = v[:-1] 99 | tone = v[-1] 100 | 101 | pinyin = c+v_without_tone 102 | assert tone in '12345' 103 | 104 | if c: 105 | v_rep_map = { 106 | "uei": 'ui', 107 | 'iou': 'iu', 108 | 'uen': 'un', 109 | } 110 | if v_without_tone in v_rep_map.keys(): 111 | pinyin = c+v_rep_map[v_without_tone] 112 | else: 113 | pinyin_rep_map = { 114 | 'ing': 'ying', 115 | 'i': 'yi', 116 | 'in': 'yin', 117 | 'u': 'wu', 118 | } 119 | if pinyin in pinyin_rep_map.keys(): 120 | pinyin = pinyin_rep_map[pinyin] 121 | else: 122 | single_rep_map = { 123 | 'v': 'yu', 124 | 'e': 'e', 125 | 'i': 'y', 126 | 'u': 'w', 127 | } 128 | if pinyin[0] in single_rep_map.keys(): 129 | pinyin = single_rep_map[pinyin[0]]+pinyin[1:] 130 | 131 | assert pinyin in pinyin_to_symbol_map.keys(), (pinyin, seg, raw_pinyin) 132 | new_c, new_v = pinyin_to_symbol_map[pinyin].split(' ') 133 | new_v = new_v + tone 134 | phone = [new_c, new_v] 135 | word2ph.append(len(phone)) 136 | 137 | phones_list += phone 138 | return phones_list, word2ph 139 | 140 | 141 | 142 | def text_normalize(text): 143 | numbers = re.findall(r'\d+(?:\.?\d+)?', text) 144 | for number in numbers: 145 | text = text.replace(number, cn2an.an2cn(number), 1) 146 | text = replace_punctuation(text) 147 | 148 | return text 149 | 150 | 151 | if __name__ == '__main__': 152 | text = "测试" 153 | text = text_normalize(text) 154 | print(g2p(text)) 155 | 156 | -------------------------------------------------------------------------------- /text/cleaner.py: -------------------------------------------------------------------------------- 1 | from text import chinese, cleaned_text_to_sequence, symbols, english 2 | from text.minbpe.minbpe import GPT4Tokenizer 3 | 4 | language_module_map = { 5 | 'zh': chinese, 6 | 'en': english 7 | } 8 | special = [ 9 | ('%', 'zh', "SP"), 10 | ('¥', 'zh', "SP2"), 11 | ('^', 'zh', "SP3"), 12 | ] 13 | def clean_text(text, language): 14 | for special_s, special_l, target_symbol in special: 15 | if special_s in text and language == special_l: 16 | return clean_special(text, language, special_s, target_symbol) 17 | language_module = language_module_map[language] 18 | norm_text = language_module.text_normalize(text) 19 | if(language=="zh"): 20 | phones, word2ph = language_module.g2p(norm_text) 21 | assert len(phones) == sum(word2ph) 22 | assert len(norm_text) == len(word2ph) 23 | else: 24 | phones = language_module.g2p(norm_text) 25 | word2ph=None 26 | 27 | for ph in phones: 28 | assert ph in symbols 29 | return phones, word2ph, norm_text 30 | 31 | 32 | def clean_special(text, language, special_s, target_symbol): 33 | """ 34 | Special mute segment sp symbol handling 35 | """ 36 | text = text.replace(special_s, ",") 37 | language_module = language_module_map[language] 38 | norm_text = language_module.text_normalize(text) 39 | phones = language_module.g2p(norm_text) 40 | new_ph = [] 41 | for ph in phones: 42 | assert ph in symbols 43 | if ph == ',': 44 | new_ph.append(target_symbol) 45 | else: 46 | new_ph.append(ph) 47 | return new_ph 48 | 49 | def text_to_sequence(text, language): 50 | phones = clean_text(text) 51 | return cleaned_text_to_sequence(phones) 52 | 53 | tokenizer = GPT4Tokenizer() 54 | def clean_text_BPE(text, language): 55 | 56 | language_module = language_module_map[language] 57 | norm_text = language_module.text_normalize(text) 58 | 59 | phone_ids = tokenizer.encode(norm_text, allowed_special="all") 60 | word2ph = None 61 | 62 | return phone_ids, word2ph, norm_text 63 | 64 | 65 | if __name__ == '__main__': 66 | print(clean_text_BPE("测试", 'zh')) 67 | 68 | 69 | -------------------------------------------------------------------------------- /text/cmudict_cache.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walker-hyf/GPT-Talker/a18704a9e102ef2d216f8b7e3ee1d2c313bd5c26/text/cmudict_cache.pickle -------------------------------------------------------------------------------- /text/english.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import os 3 | import re 4 | from g2p_en import G2p 5 | 6 | from string import punctuation 7 | 8 | from text import symbols 9 | 10 | current_file_path = os.path.dirname(__file__) 11 | CMU_DICT_PATH = os.path.join(current_file_path, 'cmudict.rep') 12 | CACHE_PATH = os.path.join(current_file_path, 'cmudict_cache.pickle') 13 | _g2p = G2p() 14 | 15 | arpa = {'AH0', 'S', 'AH1', 'EY2', 'AE2', 'EH0', 'OW2', 'UH0', 'NG', 'B', 'G', 'AY0', 'M', 'AA0', 'F', 'AO0', 'ER2', 'UH1', 'IY1', 'AH2', 'DH', 'IY0', 'EY1', 'IH0', 'K', 'N', 'W', 'IY2', 'T', 'AA1', 'ER1', 'EH2', 'OY0', 'UH2', 'UW1', 'Z', 'AW2', 'AW1', 'V', 'UW2', 'AA2', 'ER', 'AW0', 'UW0', 'R', 'OW1', 'EH1', 'ZH', 'AE0', 'IH2', 'IH', 'Y', 'JH', 'P', 'AY1', 'EY0', 'OY2', 'TH', 'HH', 'D', 'ER0', 'CH', 'AO1', 'AE1', 'AO2', 'OY1', 'AY2', 'IH1', 'OW0', 'L', 'SH'} 16 | 17 | 18 | def replace_phs(phs): 19 | rep_map = { 20 | ';': ',', 21 | ':': ',', 22 | '\'': '-', 23 | '"': '-' 24 | } 25 | phs_new = [] 26 | for ph in phs: 27 | if ph in symbols: 28 | phs_new.append(ph) 29 | elif ph in rep_map.keys(): 30 | phs_new.append(rep_map[ph]) 31 | else: 32 | print('ph not in symbols: ', ph) 33 | return phs_new 34 | 35 | def read_dict(): 36 | g2p_dict = {} 37 | start_line = 49 38 | with open(CMU_DICT_PATH) as f: 39 | line = f.readline() 40 | line_index = 1 41 | while line: 42 | if line_index >= start_line: 43 | line = line.strip() 44 | word_split = line.split(' ') 45 | word = word_split[0] 46 | 47 | syllable_split = word_split[1].split(' - ') 48 | g2p_dict[word] = [] 49 | for syllable in syllable_split: 50 | phone_split = syllable.split(' ') 51 | g2p_dict[word].append(phone_split) 52 | 53 | line_index = line_index + 1 54 | line = f.readline() 55 | 56 | return g2p_dict 57 | 58 | 59 | def cache_dict(g2p_dict, file_path): 60 | with open(file_path, 'wb') as pickle_file: 61 | pickle.dump(g2p_dict, pickle_file) 62 | 63 | 64 | def get_dict(): 65 | if os.path.exists(CACHE_PATH): 66 | with open(CACHE_PATH, 'rb') as pickle_file: 67 | g2p_dict = pickle.load(pickle_file) 68 | else: 69 | g2p_dict = read_dict() 70 | cache_dict(g2p_dict, CACHE_PATH) 71 | 72 | return g2p_dict 73 | 74 | eng_dict = get_dict() 75 | 76 | 77 | def text_normalize(text): 78 | # todo: eng text normalize 79 | return text.replace(";", ",") 80 | 81 | def g2p(text): 82 | 83 | phones = [] 84 | words = re.split(r"([,;.\-\?\!\s+])", text) 85 | for w in words: 86 | if w.upper() in eng_dict: 87 | phns = eng_dict[w.upper()] 88 | for ph in phns: 89 | phones += ph 90 | else: 91 | phone_list = list(filter(lambda p: p != " ", _g2p(w))) 92 | for ph in phone_list: 93 | if ph in arpa: 94 | phones.append(ph) 95 | else: 96 | phones.append(ph) 97 | 98 | return replace_phs(phones) 99 | 100 | if __name__ == "__main__": 101 | # print(get_dict()) 102 | print(g2p("hello")) -------------------------------------------------------------------------------- /text/symbols.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | punctuation = ['!', '?', '…', ",", "."] #@ It's an SP pause. 4 | punctuation.append("-") 5 | pu_symbols = punctuation + ["SP", 'SP2', 'SP3', "UNK"] 6 | # pu_symbols = punctuation + ["SP", 'SP2', 'SP3','SP4', "UNK"] 7 | pad = '_' 8 | 9 | c = ['AA', 'EE', 'OO', 'b', 'c', 'ch', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 'sh', 't', 'w', 'x', 'y', 'z', 'zh'] 10 | v = ['E1', 'En1', 'a1', 'ai1', 'an1', 'ang1', 'ao1', 'e1', 'ei1', 'en1', 'eng1', 'er1', 'i1', 'i01', 'ia1', 'ian1', 'iang1', 'iao1', 'ie1', 'in1', 'ing1', 'iong1', 'ir1', 'iu1', 'o1', 'ong1', 'ou1', 'u1', 'ua1', 'uai1', 'uan1', 'uang1', 'ui1', 'un1', 'uo1', 'v1', 'van1', 've1', 'vn1', 'E2', 'En2', 'a2', 'ai2', 'an2', 'ang2', 'ao2', 'e2', 'ei2', 'en2', 'eng2', 'er2', 'i2', 'i02', 'ia2', 'ian2', 'iang2', 'iao2', 'ie2', 'in2', 'ing2', 'iong2', 'ir2', 'iu2', 'o2', 'ong2', 'ou2', 'u2', 'ua2', 'uai2', 'uan2', 'uang2', 'ui2', 'un2', 'uo2', 'v2', 'van2', 've2', 'vn2', 'E3', 'En3', 'a3', 'ai3', 'an3', 'ang3', 'ao3', 'e3', 'ei3', 'en3', 'eng3', 'er3', 'i3', 'i03', 'ia3', 'ian3', 'iang3', 'iao3', 'ie3', 'in3', 'ing3', 'iong3', 'ir3', 'iu3', 'o3', 'ong3', 'ou3', 'u3', 'ua3', 'uai3', 'uan3', 'uang3', 'ui3', 'un3', 'uo3', 'v3', 'van3', 've3', 'vn3', 'E4', 'En4', 'a4', 'ai4', 'an4', 'ang4', 'ao4', 'e4', 'ei4', 'en4', 'eng4', 'er4', 'i4', 'i04', 'ia4', 'ian4', 'iang4', 'iao4', 'ie4', 'in4', 'ing4', 'iong4', 'ir4', 'iu4', 'o4', 'ong4', 'ou4', 'u4', 'ua4', 'uai4', 'uan4', 'uang4', 'ui4', 'un4', 'uo4', 'v4', 'van4', 've4', 'vn4', 'E5', 'En5', 'a5', 'ai5', 'an5', 'ang5', 'ao5', 'e5', 'ei5', 'en5', 'eng5', 'er5', 'i5', 'i05', 'ia5', 'ian5', 'iang5', 'iao5', 'ie5', 'in5', 'ing5', 'iong5', 'ir5', 'iu5', 'o5', 'ong5', 'ou5', 'u5', 'ua5', 'uai5', 'uan5', 'uang5', 'ui5', 'un5', 'uo5', 'v5', 'van5', 've5', 'vn5'] 11 | 12 | v_without_tone = ['E', 'En', 'a', 'ai', 'an', 'ang', 'ao', 'e', 'ei', 'en', 'eng', 'er', 'i', 'i0', 'ia', 'ian', 'iang', 'iao', 'ie', 'in', 'ing', 'iong', 'ir', 'iu', 'o', 'ong', 'ou', 'u', 'ua', 'uai', 'uan', 'uang', 'ui', 'un', 'uo', 'v', 'van', 've', 'vn'] 13 | 14 | # japanese 15 | ja_symbols = ['I', 'N', 'U', 'a', 'b', 'by', 'ch', 'cl', 'd', 'dy', 'e', 'f', 'g', 'gy', 'h', 'hy', 'i', 'j', 'k', 'ky', 16 | 'm', 'my', 'n', 'ny', 'o', 'p', 'py', 'r', 'ry', 's', 'sh', 't', 'ts', 'u', 'v', 'w', 'y', 'z'] 17 | 18 | arpa = {'AH0', 'S', 'AH1', 'EY2', 'AE2', 'EH0', 'OW2', 'UH0', 'NG', 'B', 'G', 'AY0', 'M', 'AA0', 'F', 'AO0', 'ER2', 'UH1', 'IY1', 'AH2', 'DH', 'IY0', 'EY1', 'IH0', 'K', 'N', 'W', 'IY2', 'T', 'AA1', 'ER1', 'EH2', 'OY0', 'UH2', 'UW1', 'Z', 'AW2', 'AW1', 'V', 'UW2', 'AA2', 'ER', 'AW0', 'UW0', 'R', 'OW1', 'EH1', 'ZH', 'AE0', 'IH2', 'IH', 'Y', 'JH', 'P', 'AY1', 'EY0', 'OY2', 'TH', 'HH', 'D', 'ER0', 'CH', 'AO1', 'AE1', 'AO2', 'OY1', 'AY2', 'IH1', 'OW0', 'L', 'SH'} 19 | 20 | symbols = [pad] + c + v + ja_symbols + pu_symbols + list(arpa) 21 | symbols = sorted(set(symbols)) 22 | if __name__ == '__main__': 23 | print(len(symbols)) -------------------------------------------------------------------------------- /tools/my_utils.py: -------------------------------------------------------------------------------- 1 | import ffmpeg 2 | import numpy as np 3 | 4 | def load_audio(file, sr): 5 | try: 6 | # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26 7 | # This launches a subprocess to decode audio while down-mixing and resampling as necessary. 8 | # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed. 9 | file = ( 10 | file.strip(" ").strip('"').strip("\n").strip('"').strip(" ") 11 | ) 12 | out, _ = ( 13 | ffmpeg.input(file, threads=0) 14 | .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr) 15 | .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True) 16 | ) 17 | except Exception as e: 18 | raise RuntimeError(f"Failed to load audio: {e}") 19 | 20 | return np.frombuffer(out, np.float32).flatten() 21 | 22 | class DictToAttrRecursive(dict): 23 | def __init__(self, input_dict): 24 | super().__init__(input_dict) 25 | for key, value in input_dict.items(): 26 | if isinstance(value, dict): 27 | value = DictToAttrRecursive(value) 28 | self[key] = value 29 | setattr(self, key, value) 30 | 31 | def __getattr__(self, item): 32 | try: 33 | return self[item] 34 | except KeyError: 35 | raise AttributeError(f"Attribute {item} not found") 36 | 37 | def __setattr__(self, key, value): 38 | if isinstance(value, dict): 39 | value = DictToAttrRecursive(value) 40 | super(DictToAttrRecursive, self).__setitem__(key, value) 41 | super().__setattr__(key, value) 42 | 43 | def __delattr__(self, item): 44 | try: 45 | del self[item] 46 | except KeyError: 47 | raise AttributeError(f"Attribute {item} not found") 48 | 49 | 50 | splits = { 51 | ",", 52 | "。", 53 | "?", 54 | "!", 55 | ",", 56 | ".", 57 | "?", 58 | "!", 59 | "~", 60 | ":", 61 | ":", 62 | "—", 63 | "…", 64 | } 65 | 66 | def split(todo_text): 67 | todo_text = todo_text.replace("……", "。").replace("——", ",") 68 | if todo_text[-1] not in splits: 69 | todo_text += "。" 70 | i_split_head = i_split_tail = 0 71 | len_text = len(todo_text) 72 | todo_texts = [] 73 | while 1: 74 | if i_split_head >= len_text: 75 | break 76 | if todo_text[i_split_head] in splits: 77 | i_split_head += 1 78 | todo_texts.append(todo_text[i_split_tail:i_split_head]) 79 | i_split_tail = i_split_head 80 | else: 81 | i_split_head += 1 82 | return todo_texts 83 | -------------------------------------------------------------------------------- /tools/slice_audio.py: -------------------------------------------------------------------------------- 1 | import os,sys,numpy as np 2 | import traceback 3 | from scipy.io import wavfile 4 | from my_utils import load_audio 5 | from slicer2 import Slicer 6 | 7 | def slice(inp,opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,i_part,all_part): 8 | os.makedirs(opt_root,exist_ok=True) 9 | if os.path.isfile(inp): 10 | input=[inp] 11 | elif os.path.isdir(inp): 12 | input=["%s/%s"%(inp,name)for name in sorted(list(os.listdir(inp)))] 13 | else: 14 | return "Input path exists but is neither a file nor a folder" 15 | slicer = Slicer( 16 | sr=32000, 17 | threshold= int(threshold), 18 | min_length= int(min_length), 19 | min_interval= int(min_interval), 20 | hop_size= int(hop_size), 21 | max_sil_kept= int(max_sil_kept), 22 | ) 23 | _max=float(_max) 24 | alpha=float(alpha) 25 | for inp_path in input[int(i_part)::int(all_part)]: 26 | try: 27 | name = os.path.basename(inp_path) 28 | audio = load_audio(inp_path, 32000) 29 | for chunk, start, end in slicer.slice(audio): 30 | tmp_max = np.abs(chunk).max() 31 | if(tmp_max>1):chunk/=tmp_max 32 | chunk = (chunk / tmp_max * (_max * alpha)) + (1 - alpha) * chunk 33 | wavfile.write( 34 | "%s/%s_%s_%s.wav" % (opt_root, name, start, end), 35 | 32000, 36 | (chunk * 32767).astype(np.int16), 37 | ) 38 | except: 39 | print(inp_path,"->fail->",traceback.format_exc()) 40 | return "When execution is complete, check the output file" 41 | 42 | print(slice(*sys.argv[1:])) --------------------------------------------------------------------------------