├── CODE_OF_CONDUCT.md ├── Dockerfile ├── LICENSE ├── assets ├── Chinese_prompt.npy ├── Chinese_prompt.wav ├── English_prompt.npy ├── English_prompt.wav └── fig │ ├── Hi.gif │ ├── table_tts.png │ └── table_wavvae.png ├── readme.md ├── requirements.txt └── tts ├── frontend_function.py ├── gradio_api.py ├── infer_cli.py ├── modules ├── aligner │ └── whisper_small.py ├── ar_dur │ ├── ar_dur_predictor.py │ └── commons │ │ ├── layers.py │ │ ├── nar_tts_modules.py │ │ ├── rel_transformer.py │ │ ├── rot_transformer.py │ │ ├── seq_utils.py │ │ └── transformer.py ├── llm_dit │ ├── cfm.py │ ├── dit.py │ ├── time_embedding.py │ └── transformer.py └── wavvae │ ├── decoder │ ├── diag_gaussian.py │ ├── hifigan_modules.py │ ├── seanet_encoder.py │ └── wavvae_v3.py │ └── encoder │ └── common_modules │ ├── conv.py │ ├── lstm.py │ └── seanet.py └── utils ├── audio_utils ├── align.py ├── io.py └── plot.py ├── commons ├── ckpt_utils.py └── hparams.py └── text_utils ├── dict.json ├── ph_tone_convert.py ├── split_text.py └── text_encoder.py /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/LICENSE -------------------------------------------------------------------------------- /assets/Chinese_prompt.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/assets/Chinese_prompt.npy -------------------------------------------------------------------------------- /assets/Chinese_prompt.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/assets/Chinese_prompt.wav -------------------------------------------------------------------------------- /assets/English_prompt.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/assets/English_prompt.npy -------------------------------------------------------------------------------- /assets/English_prompt.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/assets/English_prompt.wav -------------------------------------------------------------------------------- /assets/fig/Hi.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/assets/fig/Hi.gif -------------------------------------------------------------------------------- /assets/fig/table_tts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/assets/fig/table_tts.png -------------------------------------------------------------------------------- /assets/fig/table_wavvae.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/assets/fig/table_wavvae.png -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/readme.md -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/requirements.txt -------------------------------------------------------------------------------- /tts/frontend_function.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/frontend_function.py -------------------------------------------------------------------------------- /tts/gradio_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/gradio_api.py -------------------------------------------------------------------------------- /tts/infer_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/infer_cli.py -------------------------------------------------------------------------------- /tts/modules/aligner/whisper_small.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/aligner/whisper_small.py -------------------------------------------------------------------------------- /tts/modules/ar_dur/ar_dur_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/ar_dur/ar_dur_predictor.py -------------------------------------------------------------------------------- /tts/modules/ar_dur/commons/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/ar_dur/commons/layers.py -------------------------------------------------------------------------------- /tts/modules/ar_dur/commons/nar_tts_modules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/ar_dur/commons/nar_tts_modules.py -------------------------------------------------------------------------------- /tts/modules/ar_dur/commons/rel_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/ar_dur/commons/rel_transformer.py -------------------------------------------------------------------------------- /tts/modules/ar_dur/commons/rot_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/ar_dur/commons/rot_transformer.py -------------------------------------------------------------------------------- /tts/modules/ar_dur/commons/seq_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/ar_dur/commons/seq_utils.py -------------------------------------------------------------------------------- /tts/modules/ar_dur/commons/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/ar_dur/commons/transformer.py -------------------------------------------------------------------------------- /tts/modules/llm_dit/cfm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/llm_dit/cfm.py -------------------------------------------------------------------------------- /tts/modules/llm_dit/dit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/llm_dit/dit.py -------------------------------------------------------------------------------- /tts/modules/llm_dit/time_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/llm_dit/time_embedding.py -------------------------------------------------------------------------------- /tts/modules/llm_dit/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/llm_dit/transformer.py -------------------------------------------------------------------------------- /tts/modules/wavvae/decoder/diag_gaussian.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/wavvae/decoder/diag_gaussian.py -------------------------------------------------------------------------------- /tts/modules/wavvae/decoder/hifigan_modules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/wavvae/decoder/hifigan_modules.py -------------------------------------------------------------------------------- /tts/modules/wavvae/decoder/seanet_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/wavvae/decoder/seanet_encoder.py -------------------------------------------------------------------------------- /tts/modules/wavvae/decoder/wavvae_v3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/wavvae/decoder/wavvae_v3.py -------------------------------------------------------------------------------- /tts/modules/wavvae/encoder/common_modules/conv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/wavvae/encoder/common_modules/conv.py -------------------------------------------------------------------------------- /tts/modules/wavvae/encoder/common_modules/lstm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/wavvae/encoder/common_modules/lstm.py -------------------------------------------------------------------------------- /tts/modules/wavvae/encoder/common_modules/seanet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/modules/wavvae/encoder/common_modules/seanet.py -------------------------------------------------------------------------------- /tts/utils/audio_utils/align.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/utils/audio_utils/align.py -------------------------------------------------------------------------------- /tts/utils/audio_utils/io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/utils/audio_utils/io.py -------------------------------------------------------------------------------- /tts/utils/audio_utils/plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/utils/audio_utils/plot.py -------------------------------------------------------------------------------- /tts/utils/commons/ckpt_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/utils/commons/ckpt_utils.py -------------------------------------------------------------------------------- /tts/utils/commons/hparams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/utils/commons/hparams.py -------------------------------------------------------------------------------- /tts/utils/text_utils/dict.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/utils/text_utils/dict.json -------------------------------------------------------------------------------- /tts/utils/text_utils/ph_tone_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/utils/text_utils/ph_tone_convert.py -------------------------------------------------------------------------------- /tts/utils/text_utils/split_text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/utils/text_utils/split_text.py -------------------------------------------------------------------------------- /tts/utils/text_utils/text_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/MegaTTS3/HEAD/tts/utils/text_utils/text_encoder.py --------------------------------------------------------------------------------