├── .gitignore ├── LICENSE ├── README.md ├── cli ├── SparkTTS.py └── inference.py ├── example ├── infer.sh ├── prompt_audio.wav └── results │ └── 20250225113521.wav ├── requirements.txt ├── runtime └── triton_trtllm │ ├── Dockerfile.server │ ├── README.md │ ├── client_grpc.py │ ├── client_http.py │ ├── docker-compose.yml │ ├── model_repo │ ├── audio_tokenizer │ │ ├── 1 │ │ │ └── model.py │ │ └── config.pbtxt │ ├── spark_tts │ │ ├── 1 │ │ │ └── model.py │ │ └── config.pbtxt │ ├── tensorrt_llm │ │ ├── 1 │ │ │ └── .gitkeep │ │ └── config.pbtxt │ └── vocoder │ │ ├── 1 │ │ └── model.py │ │ └── config.pbtxt │ ├── run.sh │ └── scripts │ ├── convert_checkpoint.py │ └── fill_template.py ├── sparktts ├── models │ ├── audio_tokenizer.py │ └── bicodec.py ├── modules │ ├── blocks │ │ ├── layers.py │ │ ├── samper.py │ │ └── vocos.py │ ├── encoder_decoder │ │ ├── feat_decoder.py │ │ ├── feat_encoder.py │ │ └── wave_generator.py │ ├── fsq │ │ ├── finite_scalar_quantization.py │ │ └── residual_fsq.py │ ├── speaker │ │ ├── ecapa_tdnn.py │ │ ├── perceiver_encoder.py │ │ ├── pooling_layers.py │ │ └── speaker_encoder.py │ └── vq │ │ └── factorized_vector_quantize.py └── utils │ ├── __init__.py │ ├── audio.py │ ├── file.py │ ├── parse_options.sh │ └── token_parser.py ├── src ├── demos │ ├── trump │ │ └── trump_en.wav │ ├── zhongli │ │ └── zhongli_en.wav │ ├── 余承东 │ │ └── yuchengdong_zh.wav │ ├── 刘德华 │ │ └── dehua_zh.wav │ ├── 哪吒 │ │ └── nezha_zh.wav │ ├── 徐志胜 │ │ └── zhisheng_zh.wav │ ├── 李靖 │ │ └── lijing_zh.wav │ ├── 杨澜 │ │ └── yanglan_zh.wav │ ├── 马云 │ │ └── mayun_zh.wav │ └── 鲁豫 │ │ └── luyu_zh.wav ├── figures │ ├── gradio_TTS.png │ ├── gradio_control.png │ ├── infer_control.png │ └── infer_voice_cloning.png └── logo │ ├── HKUST.jpg │ ├── NPU.jpg │ ├── NTU.jpg │ ├── SJU.jpg │ ├── SparkAudio.jpg │ ├── SparkAudio2.jpg │ ├── SparkTTS.jpg │ ├── SparkTTS.png │ ├── mobvoi.jpg │ └── mobvoi.png └── webui.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/README.md -------------------------------------------------------------------------------- /cli/SparkTTS.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/cli/SparkTTS.py -------------------------------------------------------------------------------- /cli/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/cli/inference.py -------------------------------------------------------------------------------- /example/infer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/example/infer.sh -------------------------------------------------------------------------------- /example/prompt_audio.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/example/prompt_audio.wav -------------------------------------------------------------------------------- /example/results/20250225113521.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/example/results/20250225113521.wav -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/requirements.txt -------------------------------------------------------------------------------- /runtime/triton_trtllm/Dockerfile.server: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/runtime/triton_trtllm/Dockerfile.server -------------------------------------------------------------------------------- /runtime/triton_trtllm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/runtime/triton_trtllm/README.md -------------------------------------------------------------------------------- /runtime/triton_trtllm/client_grpc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/runtime/triton_trtllm/client_grpc.py -------------------------------------------------------------------------------- /runtime/triton_trtllm/client_http.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/runtime/triton_trtllm/client_http.py -------------------------------------------------------------------------------- /runtime/triton_trtllm/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/runtime/triton_trtllm/docker-compose.yml -------------------------------------------------------------------------------- /runtime/triton_trtllm/model_repo/audio_tokenizer/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/runtime/triton_trtllm/model_repo/audio_tokenizer/1/model.py -------------------------------------------------------------------------------- /runtime/triton_trtllm/model_repo/audio_tokenizer/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/runtime/triton_trtllm/model_repo/audio_tokenizer/config.pbtxt -------------------------------------------------------------------------------- /runtime/triton_trtllm/model_repo/spark_tts/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/runtime/triton_trtllm/model_repo/spark_tts/1/model.py -------------------------------------------------------------------------------- /runtime/triton_trtllm/model_repo/spark_tts/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/runtime/triton_trtllm/model_repo/spark_tts/config.pbtxt -------------------------------------------------------------------------------- /runtime/triton_trtllm/model_repo/tensorrt_llm/1/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/triton_trtllm/model_repo/tensorrt_llm/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/runtime/triton_trtllm/model_repo/tensorrt_llm/config.pbtxt -------------------------------------------------------------------------------- /runtime/triton_trtllm/model_repo/vocoder/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/runtime/triton_trtllm/model_repo/vocoder/1/model.py -------------------------------------------------------------------------------- /runtime/triton_trtllm/model_repo/vocoder/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/runtime/triton_trtllm/model_repo/vocoder/config.pbtxt -------------------------------------------------------------------------------- /runtime/triton_trtllm/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/runtime/triton_trtllm/run.sh -------------------------------------------------------------------------------- /runtime/triton_trtllm/scripts/convert_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/runtime/triton_trtllm/scripts/convert_checkpoint.py -------------------------------------------------------------------------------- /runtime/triton_trtllm/scripts/fill_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/runtime/triton_trtllm/scripts/fill_template.py -------------------------------------------------------------------------------- /sparktts/models/audio_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/models/audio_tokenizer.py -------------------------------------------------------------------------------- /sparktts/models/bicodec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/models/bicodec.py -------------------------------------------------------------------------------- /sparktts/modules/blocks/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/modules/blocks/layers.py -------------------------------------------------------------------------------- /sparktts/modules/blocks/samper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/modules/blocks/samper.py -------------------------------------------------------------------------------- /sparktts/modules/blocks/vocos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/modules/blocks/vocos.py -------------------------------------------------------------------------------- /sparktts/modules/encoder_decoder/feat_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/modules/encoder_decoder/feat_decoder.py -------------------------------------------------------------------------------- /sparktts/modules/encoder_decoder/feat_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/modules/encoder_decoder/feat_encoder.py -------------------------------------------------------------------------------- /sparktts/modules/encoder_decoder/wave_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/modules/encoder_decoder/wave_generator.py -------------------------------------------------------------------------------- /sparktts/modules/fsq/finite_scalar_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/modules/fsq/finite_scalar_quantization.py -------------------------------------------------------------------------------- /sparktts/modules/fsq/residual_fsq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/modules/fsq/residual_fsq.py -------------------------------------------------------------------------------- /sparktts/modules/speaker/ecapa_tdnn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/modules/speaker/ecapa_tdnn.py -------------------------------------------------------------------------------- /sparktts/modules/speaker/perceiver_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/modules/speaker/perceiver_encoder.py -------------------------------------------------------------------------------- /sparktts/modules/speaker/pooling_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/modules/speaker/pooling_layers.py -------------------------------------------------------------------------------- /sparktts/modules/speaker/speaker_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/modules/speaker/speaker_encoder.py -------------------------------------------------------------------------------- /sparktts/modules/vq/factorized_vector_quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/modules/vq/factorized_vector_quantize.py -------------------------------------------------------------------------------- /sparktts/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sparktts/utils/audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/utils/audio.py -------------------------------------------------------------------------------- /sparktts/utils/file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/utils/file.py -------------------------------------------------------------------------------- /sparktts/utils/parse_options.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/utils/parse_options.sh -------------------------------------------------------------------------------- /sparktts/utils/token_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/sparktts/utils/token_parser.py -------------------------------------------------------------------------------- /src/demos/trump/trump_en.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/demos/trump/trump_en.wav -------------------------------------------------------------------------------- /src/demos/zhongli/zhongli_en.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/demos/zhongli/zhongli_en.wav -------------------------------------------------------------------------------- /src/demos/余承东/yuchengdong_zh.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/demos/余承东/yuchengdong_zh.wav -------------------------------------------------------------------------------- /src/demos/刘德华/dehua_zh.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/demos/刘德华/dehua_zh.wav -------------------------------------------------------------------------------- /src/demos/哪吒/nezha_zh.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/demos/哪吒/nezha_zh.wav -------------------------------------------------------------------------------- /src/demos/徐志胜/zhisheng_zh.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/demos/徐志胜/zhisheng_zh.wav -------------------------------------------------------------------------------- /src/demos/李靖/lijing_zh.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/demos/李靖/lijing_zh.wav -------------------------------------------------------------------------------- /src/demos/杨澜/yanglan_zh.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/demos/杨澜/yanglan_zh.wav -------------------------------------------------------------------------------- /src/demos/马云/mayun_zh.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/demos/马云/mayun_zh.wav -------------------------------------------------------------------------------- /src/demos/鲁豫/luyu_zh.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/demos/鲁豫/luyu_zh.wav -------------------------------------------------------------------------------- /src/figures/gradio_TTS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/figures/gradio_TTS.png -------------------------------------------------------------------------------- /src/figures/gradio_control.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/figures/gradio_control.png -------------------------------------------------------------------------------- /src/figures/infer_control.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/figures/infer_control.png -------------------------------------------------------------------------------- /src/figures/infer_voice_cloning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/figures/infer_voice_cloning.png -------------------------------------------------------------------------------- /src/logo/HKUST.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/logo/HKUST.jpg -------------------------------------------------------------------------------- /src/logo/NPU.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/logo/NPU.jpg -------------------------------------------------------------------------------- /src/logo/NTU.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/logo/NTU.jpg -------------------------------------------------------------------------------- /src/logo/SJU.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/logo/SJU.jpg -------------------------------------------------------------------------------- /src/logo/SparkAudio.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/logo/SparkAudio.jpg -------------------------------------------------------------------------------- /src/logo/SparkAudio2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/logo/SparkAudio2.jpg -------------------------------------------------------------------------------- /src/logo/SparkTTS.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/logo/SparkTTS.jpg -------------------------------------------------------------------------------- /src/logo/SparkTTS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/logo/SparkTTS.png -------------------------------------------------------------------------------- /src/logo/mobvoi.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/logo/mobvoi.jpg -------------------------------------------------------------------------------- /src/logo/mobvoi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/src/logo/mobvoi.png -------------------------------------------------------------------------------- /webui.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkAudio/Spark-TTS/HEAD/webui.py --------------------------------------------------------------------------------