├── .gitignore ├── .latest-version.generated.txt ├── .vscode └── settings.json ├── LICENSE ├── README.md ├── benchmark-multilingual-fr.py ├── benchmark-multilingual-zh.py ├── benchmark.py ├── docs ├── audio-sample-01.mp3 ├── audio-sample-02.mp3 ├── audio-sample-03.mp3 ├── benchmark-text-1.txt ├── benchmark-text-2.txt ├── benchmark-text-fr-1.txt ├── benchmark-text-zh-1.txt ├── chatterbox-architecture.svg ├── de_f1.flac ├── fr_f1.flac ├── vllm-cfg-impl.svg ├── zh_f2.flac └── zh_m1.mp3 ├── example-tts-min-vram.py ├── example-tts-multilingual.py ├── example-tts.py ├── gradio_tts_app.py ├── pyproject.toml ├── src └── chatterbox_vllm │ ├── __init__.py │ ├── models │ ├── s3gen │ │ ├── __init__.py │ │ ├── configs.py │ │ ├── const.py │ │ ├── decoder.py │ │ ├── f0_predictor.py │ │ ├── flow.py │ │ ├── flow_matching.py │ │ ├── hifigan.py │ │ ├── matcha │ │ │ ├── decoder.py │ │ │ ├── flow_matching.py │ │ │ ├── text_encoder.py │ │ │ └── transformer.py │ │ ├── s3gen.py │ │ ├── transformer │ │ │ ├── __init__.py │ │ │ ├── activation.py │ │ │ ├── attention.py │ │ │ ├── convolution.py │ │ │ ├── embedding.py │ │ │ ├── encoder_layer.py │ │ │ ├── positionwise_feed_forward.py │ │ │ ├── subsampling.py │ │ │ └── upsample_encoder.py │ │ ├── utils │ │ │ ├── class_utils.py │ │ │ ├── mask.py │ │ │ └── mel.py │ │ └── xvector.py │ ├── s3tokenizer │ │ ├── __init__.py │ │ └── s3tokenizer.py │ ├── t3 │ │ ├── __init__.py │ │ ├── entokenizer.py │ │ ├── grapheme_mtl_merged_expanded_v1.json │ │ ├── modules │ │ │ ├── cond_enc.py │ │ │ ├── learned_pos_emb.py │ │ │ ├── perceiver.py │ │ │ └── t3_config.py │ │ ├── mtltokenizer.py │ │ ├── t3.py │ │ └── tokenizer.json │ └── voice_encoder │ │ ├── __init__.py │ │ ├── config.py │ │ ├── melspec.py │ │ └── voice_encoder.py │ ├── text_utils.py │ └── tts.py ├── t3-model-multilingual └── config.json ├── t3-model └── config.json ├── upload-package.sh └── uv.lock /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/.gitignore -------------------------------------------------------------------------------- /.latest-version.generated.txt: -------------------------------------------------------------------------------- 1 | 0.2.1 2 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/README.md -------------------------------------------------------------------------------- /benchmark-multilingual-fr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/benchmark-multilingual-fr.py -------------------------------------------------------------------------------- /benchmark-multilingual-zh.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/benchmark-multilingual-zh.py -------------------------------------------------------------------------------- /benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/benchmark.py -------------------------------------------------------------------------------- /docs/audio-sample-01.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/docs/audio-sample-01.mp3 -------------------------------------------------------------------------------- /docs/audio-sample-02.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/docs/audio-sample-02.mp3 -------------------------------------------------------------------------------- /docs/audio-sample-03.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/docs/audio-sample-03.mp3 -------------------------------------------------------------------------------- /docs/benchmark-text-1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/docs/benchmark-text-1.txt -------------------------------------------------------------------------------- /docs/benchmark-text-2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/docs/benchmark-text-2.txt -------------------------------------------------------------------------------- /docs/benchmark-text-fr-1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/docs/benchmark-text-fr-1.txt -------------------------------------------------------------------------------- /docs/benchmark-text-zh-1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/docs/benchmark-text-zh-1.txt -------------------------------------------------------------------------------- /docs/chatterbox-architecture.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/docs/chatterbox-architecture.svg -------------------------------------------------------------------------------- /docs/de_f1.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/docs/de_f1.flac -------------------------------------------------------------------------------- /docs/fr_f1.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/docs/fr_f1.flac -------------------------------------------------------------------------------- /docs/vllm-cfg-impl.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/docs/vllm-cfg-impl.svg -------------------------------------------------------------------------------- /docs/zh_f2.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/docs/zh_f2.flac -------------------------------------------------------------------------------- /docs/zh_m1.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/docs/zh_m1.mp3 -------------------------------------------------------------------------------- /example-tts-min-vram.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/example-tts-min-vram.py -------------------------------------------------------------------------------- /example-tts-multilingual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/example-tts-multilingual.py -------------------------------------------------------------------------------- /example-tts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/example-tts.py -------------------------------------------------------------------------------- /gradio_tts_app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/gradio_tts_app.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/pyproject.toml -------------------------------------------------------------------------------- /src/chatterbox_vllm/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/__init__.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/configs.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/const.py: -------------------------------------------------------------------------------- 1 | S3GEN_SR = 24000 2 | -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/decoder.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/f0_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/f0_predictor.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/flow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/flow.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/flow_matching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/flow_matching.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/hifigan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/hifigan.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/matcha/decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/matcha/decoder.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/matcha/flow_matching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/matcha/flow_matching.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/matcha/text_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/matcha/text_encoder.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/matcha/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/matcha/transformer.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/s3gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/s3gen.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/transformer/activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/transformer/activation.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/transformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/transformer/attention.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/transformer/convolution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/transformer/convolution.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/transformer/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/transformer/embedding.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/transformer/encoder_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/transformer/encoder_layer.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/transformer/positionwise_feed_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/transformer/positionwise_feed_forward.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/transformer/subsampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/transformer/subsampling.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/transformer/upsample_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/transformer/upsample_encoder.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/utils/class_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/utils/class_utils.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/utils/mask.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/utils/mask.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/utils/mel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/utils/mel.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3gen/xvector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3gen/xvector.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3tokenizer/__init__.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/s3tokenizer/s3tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/s3tokenizer/s3tokenizer.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/t3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/t3/__init__.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/t3/entokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/t3/entokenizer.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/t3/grapheme_mtl_merged_expanded_v1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/t3/grapheme_mtl_merged_expanded_v1.json -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/t3/modules/cond_enc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/t3/modules/cond_enc.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/t3/modules/learned_pos_emb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/t3/modules/learned_pos_emb.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/t3/modules/perceiver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/t3/modules/perceiver.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/t3/modules/t3_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/t3/modules/t3_config.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/t3/mtltokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/t3/mtltokenizer.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/t3/t3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/t3/t3.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/t3/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/t3/tokenizer.json -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/voice_encoder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/voice_encoder/__init__.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/voice_encoder/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/voice_encoder/config.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/voice_encoder/melspec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/voice_encoder/melspec.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/models/voice_encoder/voice_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/models/voice_encoder/voice_encoder.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/text_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/text_utils.py -------------------------------------------------------------------------------- /src/chatterbox_vllm/tts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/src/chatterbox_vllm/tts.py -------------------------------------------------------------------------------- /t3-model-multilingual/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/t3-model-multilingual/config.json -------------------------------------------------------------------------------- /t3-model/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/t3-model/config.json -------------------------------------------------------------------------------- /upload-package.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/upload-package.sh -------------------------------------------------------------------------------- /uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randombk/chatterbox-vllm/HEAD/uv.lock --------------------------------------------------------------------------------