├── LICENSE ├── README.md ├── attentions.py ├── commons.py ├── configs ├── cross-lingual-emotional-freezefinetune-en.json ├── cross-lingual-emotional.json ├── cross-lingual-freezefinetune-en.json └── cross-lingual.json ├── data_utils_whisper_hier_multi_pure.py ├── dataset ├── ESD │ └── 0001 │ │ └── Angry │ │ ├── 0001_000351.wav │ │ ├── 0001_000352.wav │ │ ├── 0001_000353.wav │ │ └── 0001_000354.wav ├── ESD_largev2 │ └── 0001 │ │ └── Angry │ │ ├── 0001_000351_largev2ppg.npy │ │ ├── 0001_000352_largev2ppg.npy │ │ ├── 0001_000353_largev2ppg.npy │ │ └── 0001_000354_largev2ppg.npy ├── aishell3 │ └── SSB0005 │ │ ├── SSB00050001.wav │ │ ├── SSB00050002.wav │ │ └── SSB00050003.wav ├── downsample.py ├── jvs │ └── jvs001 │ │ ├── BASIC5000_0025.wav │ │ ├── BASIC5000_0235.wav │ │ └── BASIC5000_0408.wav ├── vctk │ └── p225 │ │ ├── p225_001.wav │ │ ├── p225_002.wav │ │ ├── p225_003.wav │ │ ├── p225_004.wav │ │ └── p225_005.wav └── vctk_largev2 │ └── p225 │ ├── p225_001_largev2ppg.npy │ ├── p225_002_largev2ppg.npy │ ├── p225_003_largev2ppg.npy │ ├── p225_004_largev2ppg.npy │ └── p225_005_largev2ppg.npy ├── filelist ├── aishell3-jieba-test.txt ├── aishell3-jieba-train.txt ├── aishell3-jieba.txt ├── aishell3-vctk-jvs-audio-phone-test.txt ├── aishell3-vctk-jvs-audio-phone-train.txt ├── aishell3-vctk-jvs-audio-phone.txt ├── esd_audio_phone-test.txt ├── esd_audio_phone-train.txt ├── esd_audio_phone.txt ├── esd_audio_phone_en-test.txt ├── esd_audio_phone_en-train.txt ├── esd_audio_phone_en.txt ├── esd_audio_phone_jieba-test.txt ├── esd_audio_phone_jieba-train.txt ├── esd_audio_phone_jieba.txt ├── train_test_split.py ├── vctk_audio_phone-test.txt ├── vctk_audio_phone-train.txt └── vctk_audio_phone.txt ├── inference-cross-lingual-TTS-cn.py ├── inference-cross-lingual-TTS-en.py ├── inference-cross-lingual-emotional-TTS-en.py ├── inference-cross-lingual-emotional-VC.py ├── jieba_all.py ├── losses.py ├── mel_processing.py ├── models_whisper_hier_multi_pure.py ├── modules.py ├── monotonic_align.py ├── monotonic_align ├── __init__.py ├── core.pyx └── setup.py ├── preprocess_cn.py ├── preprocess_en.py ├── preprocess_jvs.py ├── preprocess_weo.py ├── requirement.txt ├── text_cn ├── LICENSE ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-37.pyc │ ├── cleaners.cpython-310.pyc │ ├── cleaners.cpython-37.pyc │ └── symbols.cpython-37.pyc ├── cleaners.py └── symbols.py ├── train_whisper_hier_multi_pure_3.py ├── train_whisper_hier_multi_pure_3_freeze.py ├── train_whisper_hier_multi_pure_esd.py ├── train_whisper_hier_multi_pure_esd_freeze.py ├── transforms.py ├── utils.py ├── whisper ├── LICENSE ├── README.md ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── audio.cpython-37.pyc │ ├── decoding.cpython-37.pyc │ ├── model.cpython-37.pyc │ ├── tokenizer.cpython-37.pyc │ └── utils.cpython-37.pyc ├── audio.py ├── decoding.py ├── inference.py ├── model.py ├── tokenizer.py └── utils.py ├── whisper_pretrain └── README.md └── x-speech-biger.png /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/README.md -------------------------------------------------------------------------------- /attentions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/attentions.py -------------------------------------------------------------------------------- /commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/commons.py -------------------------------------------------------------------------------- /configs/cross-lingual-emotional-freezefinetune-en.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/configs/cross-lingual-emotional-freezefinetune-en.json -------------------------------------------------------------------------------- /configs/cross-lingual-emotional.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/configs/cross-lingual-emotional.json -------------------------------------------------------------------------------- /configs/cross-lingual-freezefinetune-en.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/configs/cross-lingual-freezefinetune-en.json -------------------------------------------------------------------------------- /configs/cross-lingual.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/configs/cross-lingual.json -------------------------------------------------------------------------------- /data_utils_whisper_hier_multi_pure.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/data_utils_whisper_hier_multi_pure.py -------------------------------------------------------------------------------- /dataset/ESD/0001/Angry/0001_000351.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/ESD/0001/Angry/0001_000351.wav -------------------------------------------------------------------------------- /dataset/ESD/0001/Angry/0001_000352.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/ESD/0001/Angry/0001_000352.wav -------------------------------------------------------------------------------- /dataset/ESD/0001/Angry/0001_000353.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/ESD/0001/Angry/0001_000353.wav -------------------------------------------------------------------------------- /dataset/ESD/0001/Angry/0001_000354.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/ESD/0001/Angry/0001_000354.wav -------------------------------------------------------------------------------- /dataset/ESD_largev2/0001/Angry/0001_000351_largev2ppg.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/ESD_largev2/0001/Angry/0001_000351_largev2ppg.npy -------------------------------------------------------------------------------- /dataset/ESD_largev2/0001/Angry/0001_000352_largev2ppg.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/ESD_largev2/0001/Angry/0001_000352_largev2ppg.npy -------------------------------------------------------------------------------- /dataset/ESD_largev2/0001/Angry/0001_000353_largev2ppg.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/ESD_largev2/0001/Angry/0001_000353_largev2ppg.npy -------------------------------------------------------------------------------- /dataset/ESD_largev2/0001/Angry/0001_000354_largev2ppg.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/ESD_largev2/0001/Angry/0001_000354_largev2ppg.npy -------------------------------------------------------------------------------- /dataset/aishell3/SSB0005/SSB00050001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/aishell3/SSB0005/SSB00050001.wav -------------------------------------------------------------------------------- /dataset/aishell3/SSB0005/SSB00050002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/aishell3/SSB0005/SSB00050002.wav -------------------------------------------------------------------------------- /dataset/aishell3/SSB0005/SSB00050003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/aishell3/SSB0005/SSB00050003.wav -------------------------------------------------------------------------------- /dataset/downsample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/downsample.py -------------------------------------------------------------------------------- /dataset/jvs/jvs001/BASIC5000_0025.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/jvs/jvs001/BASIC5000_0025.wav -------------------------------------------------------------------------------- /dataset/jvs/jvs001/BASIC5000_0235.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/jvs/jvs001/BASIC5000_0235.wav -------------------------------------------------------------------------------- /dataset/jvs/jvs001/BASIC5000_0408.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/jvs/jvs001/BASIC5000_0408.wav -------------------------------------------------------------------------------- /dataset/vctk/p225/p225_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/vctk/p225/p225_001.wav -------------------------------------------------------------------------------- /dataset/vctk/p225/p225_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/vctk/p225/p225_002.wav -------------------------------------------------------------------------------- /dataset/vctk/p225/p225_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/vctk/p225/p225_003.wav -------------------------------------------------------------------------------- /dataset/vctk/p225/p225_004.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/vctk/p225/p225_004.wav -------------------------------------------------------------------------------- /dataset/vctk/p225/p225_005.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/vctk/p225/p225_005.wav -------------------------------------------------------------------------------- /dataset/vctk_largev2/p225/p225_001_largev2ppg.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/vctk_largev2/p225/p225_001_largev2ppg.npy -------------------------------------------------------------------------------- /dataset/vctk_largev2/p225/p225_002_largev2ppg.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/vctk_largev2/p225/p225_002_largev2ppg.npy -------------------------------------------------------------------------------- /dataset/vctk_largev2/p225/p225_003_largev2ppg.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/vctk_largev2/p225/p225_003_largev2ppg.npy -------------------------------------------------------------------------------- /dataset/vctk_largev2/p225/p225_004_largev2ppg.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/vctk_largev2/p225/p225_004_largev2ppg.npy -------------------------------------------------------------------------------- /dataset/vctk_largev2/p225/p225_005_largev2ppg.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/dataset/vctk_largev2/p225/p225_005_largev2ppg.npy -------------------------------------------------------------------------------- /filelist/aishell3-jieba-test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/aishell3-jieba-test.txt -------------------------------------------------------------------------------- /filelist/aishell3-jieba-train.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/aishell3-jieba-train.txt -------------------------------------------------------------------------------- /filelist/aishell3-jieba.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/aishell3-jieba.txt -------------------------------------------------------------------------------- /filelist/aishell3-vctk-jvs-audio-phone-test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/aishell3-vctk-jvs-audio-phone-test.txt -------------------------------------------------------------------------------- /filelist/aishell3-vctk-jvs-audio-phone-train.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/aishell3-vctk-jvs-audio-phone-train.txt -------------------------------------------------------------------------------- /filelist/aishell3-vctk-jvs-audio-phone.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/aishell3-vctk-jvs-audio-phone.txt -------------------------------------------------------------------------------- /filelist/esd_audio_phone-test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/esd_audio_phone-test.txt -------------------------------------------------------------------------------- /filelist/esd_audio_phone-train.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/esd_audio_phone-train.txt -------------------------------------------------------------------------------- /filelist/esd_audio_phone.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/esd_audio_phone.txt -------------------------------------------------------------------------------- /filelist/esd_audio_phone_en-test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/esd_audio_phone_en-test.txt -------------------------------------------------------------------------------- /filelist/esd_audio_phone_en-train.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/esd_audio_phone_en-train.txt -------------------------------------------------------------------------------- /filelist/esd_audio_phone_en.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/esd_audio_phone_en.txt -------------------------------------------------------------------------------- /filelist/esd_audio_phone_jieba-test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/esd_audio_phone_jieba-test.txt -------------------------------------------------------------------------------- /filelist/esd_audio_phone_jieba-train.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/esd_audio_phone_jieba-train.txt -------------------------------------------------------------------------------- /filelist/esd_audio_phone_jieba.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/esd_audio_phone_jieba.txt -------------------------------------------------------------------------------- /filelist/train_test_split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/train_test_split.py -------------------------------------------------------------------------------- /filelist/vctk_audio_phone-test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/vctk_audio_phone-test.txt -------------------------------------------------------------------------------- /filelist/vctk_audio_phone-train.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/vctk_audio_phone-train.txt -------------------------------------------------------------------------------- /filelist/vctk_audio_phone.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/filelist/vctk_audio_phone.txt -------------------------------------------------------------------------------- /inference-cross-lingual-TTS-cn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/inference-cross-lingual-TTS-cn.py -------------------------------------------------------------------------------- /inference-cross-lingual-TTS-en.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/inference-cross-lingual-TTS-en.py -------------------------------------------------------------------------------- /inference-cross-lingual-emotional-TTS-en.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/inference-cross-lingual-emotional-TTS-en.py -------------------------------------------------------------------------------- /inference-cross-lingual-emotional-VC.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/inference-cross-lingual-emotional-VC.py -------------------------------------------------------------------------------- /jieba_all.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/jieba_all.py -------------------------------------------------------------------------------- /losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/losses.py -------------------------------------------------------------------------------- /mel_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/mel_processing.py -------------------------------------------------------------------------------- /models_whisper_hier_multi_pure.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/models_whisper_hier_multi_pure.py -------------------------------------------------------------------------------- /modules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/modules.py -------------------------------------------------------------------------------- /monotonic_align.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/monotonic_align.py -------------------------------------------------------------------------------- /monotonic_align/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/monotonic_align/__init__.py -------------------------------------------------------------------------------- /monotonic_align/core.pyx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/monotonic_align/core.pyx -------------------------------------------------------------------------------- /monotonic_align/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/monotonic_align/setup.py -------------------------------------------------------------------------------- /preprocess_cn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/preprocess_cn.py -------------------------------------------------------------------------------- /preprocess_en.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/preprocess_en.py -------------------------------------------------------------------------------- /preprocess_jvs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/preprocess_jvs.py -------------------------------------------------------------------------------- /preprocess_weo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/preprocess_weo.py -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/requirement.txt -------------------------------------------------------------------------------- /text_cn/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/text_cn/LICENSE -------------------------------------------------------------------------------- /text_cn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/text_cn/__init__.py -------------------------------------------------------------------------------- /text_cn/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/text_cn/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /text_cn/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/text_cn/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /text_cn/__pycache__/cleaners.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/text_cn/__pycache__/cleaners.cpython-310.pyc -------------------------------------------------------------------------------- /text_cn/__pycache__/cleaners.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/text_cn/__pycache__/cleaners.cpython-37.pyc -------------------------------------------------------------------------------- /text_cn/__pycache__/symbols.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/text_cn/__pycache__/symbols.cpython-37.pyc -------------------------------------------------------------------------------- /text_cn/cleaners.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/text_cn/cleaners.py -------------------------------------------------------------------------------- /text_cn/symbols.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/text_cn/symbols.py -------------------------------------------------------------------------------- /train_whisper_hier_multi_pure_3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/train_whisper_hier_multi_pure_3.py -------------------------------------------------------------------------------- /train_whisper_hier_multi_pure_3_freeze.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/train_whisper_hier_multi_pure_3_freeze.py -------------------------------------------------------------------------------- /train_whisper_hier_multi_pure_esd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/train_whisper_hier_multi_pure_esd.py -------------------------------------------------------------------------------- /train_whisper_hier_multi_pure_esd_freeze.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/train_whisper_hier_multi_pure_esd_freeze.py -------------------------------------------------------------------------------- /transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/transforms.py -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/utils.py -------------------------------------------------------------------------------- /whisper/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/whisper/LICENSE -------------------------------------------------------------------------------- /whisper/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/whisper/README.md -------------------------------------------------------------------------------- /whisper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /whisper/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/whisper/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /whisper/__pycache__/audio.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/whisper/__pycache__/audio.cpython-37.pyc -------------------------------------------------------------------------------- /whisper/__pycache__/decoding.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/whisper/__pycache__/decoding.cpython-37.pyc -------------------------------------------------------------------------------- /whisper/__pycache__/model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/whisper/__pycache__/model.cpython-37.pyc -------------------------------------------------------------------------------- /whisper/__pycache__/tokenizer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/whisper/__pycache__/tokenizer.cpython-37.pyc -------------------------------------------------------------------------------- /whisper/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/whisper/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /whisper/audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/whisper/audio.py -------------------------------------------------------------------------------- /whisper/decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/whisper/decoding.py -------------------------------------------------------------------------------- /whisper/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/whisper/inference.py -------------------------------------------------------------------------------- /whisper/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/whisper/model.py -------------------------------------------------------------------------------- /whisper/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/whisper/tokenizer.py -------------------------------------------------------------------------------- /whisper/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/whisper/utils.py -------------------------------------------------------------------------------- /whisper_pretrain/README.md: -------------------------------------------------------------------------------- 1 | Path for: 2 | 3 | large-v2.pt -------------------------------------------------------------------------------- /x-speech-biger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-E-Speech/X-E-Speech-code/HEAD/x-speech-biger.png --------------------------------------------------------------------------------