├── .gitignore ├── LICENSE ├── README.md ├── example_for_mac.py ├── example_tts.py ├── example_vc.py ├── gradio_local.py ├── gradio_tts_app.py ├── gradio_vc_app.py ├── pyproject.toml ├── src ├── chatterbox │ ├── __init__.py │ ├── models │ │ ├── s3gen │ │ │ ├── __init__.py │ │ │ ├── const.py │ │ │ ├── decoder.py │ │ │ ├── f0_predictor.py │ │ │ ├── flow.py │ │ │ ├── flow_matching.py │ │ │ ├── hifigan.py │ │ │ ├── matcha │ │ │ │ ├── decoder.py │ │ │ │ ├── flow_matching.py │ │ │ │ ├── text_encoder.py │ │ │ │ └── transformer.py │ │ │ ├── s3gen.py │ │ │ ├── transformer │ │ │ │ ├── __init__.py │ │ │ │ ├── activation.py │ │ │ │ ├── attention.py │ │ │ │ ├── convolution.py │ │ │ │ ├── embedding.py │ │ │ │ ├── encoder_layer.py │ │ │ │ ├── positionwise_feed_forward.py │ │ │ │ ├── subsampling.py │ │ │ │ └── upsample_encoder.py │ │ │ ├── utils │ │ │ │ ├── class_utils.py │ │ │ │ ├── mask.py │ │ │ │ └── mel.py │ │ │ └── xvector.py │ │ ├── s3tokenizer │ │ │ ├── __init__.py │ │ │ └── s3tokenizer.py │ │ ├── t3 │ │ │ ├── __init__.py │ │ │ ├── inference │ │ │ │ ├── alignment_stream_analyzer.py │ │ │ │ └── t3_hf_backend.py │ │ │ ├── llama_configs.py │ │ │ ├── modules │ │ │ │ ├── cond_enc.py │ │ │ │ ├── learned_pos_emb.py │ │ │ │ ├── perceiver.py │ │ │ │ └── t3_config.py │ │ │ └── t3.py │ │ ├── tokenizers │ │ │ ├── __init__.py │ │ │ └── tokenizer.py │ │ └── voice_encoder │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ ├── melspec.py │ │ │ └── voice_encoder.py │ ├── nbs │ │ └── strip_weight_prefix.ipynb │ ├── tts.py │ ├── utils │ │ ├── t3data_arguments.py │ │ └── t3dataset.py │ └── vc.py ├── finetune_s3gen.py └── finetune_t3.py └── voice_conversion.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/README.md -------------------------------------------------------------------------------- /example_for_mac.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/example_for_mac.py -------------------------------------------------------------------------------- /example_tts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/example_tts.py -------------------------------------------------------------------------------- /example_vc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/example_vc.py -------------------------------------------------------------------------------- /gradio_local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/gradio_local.py -------------------------------------------------------------------------------- /gradio_tts_app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/gradio_tts_app.py -------------------------------------------------------------------------------- /gradio_vc_app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/gradio_vc_app.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/pyproject.toml -------------------------------------------------------------------------------- /src/chatterbox/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/__init__.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/__init__.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/const.py: -------------------------------------------------------------------------------- 1 | S3GEN_SR = 24000 2 | -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/decoder.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/f0_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/f0_predictor.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/flow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/flow.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/flow_matching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/flow_matching.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/hifigan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/hifigan.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/matcha/decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/matcha/decoder.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/matcha/flow_matching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/matcha/flow_matching.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/matcha/text_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/matcha/text_encoder.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/matcha/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/matcha/transformer.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/s3gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/s3gen.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/transformer/activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/transformer/activation.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/transformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/transformer/attention.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/transformer/convolution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/transformer/convolution.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/transformer/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/transformer/embedding.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/transformer/encoder_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/transformer/encoder_layer.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/transformer/positionwise_feed_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/transformer/positionwise_feed_forward.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/transformer/subsampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/transformer/subsampling.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/transformer/upsample_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/transformer/upsample_encoder.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/utils/class_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/utils/class_utils.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/utils/mask.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/utils/mask.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/utils/mel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/utils/mel.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3gen/xvector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3gen/xvector.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3tokenizer/__init__.py -------------------------------------------------------------------------------- /src/chatterbox/models/s3tokenizer/s3tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/s3tokenizer/s3tokenizer.py -------------------------------------------------------------------------------- /src/chatterbox/models/t3/__init__.py: -------------------------------------------------------------------------------- 1 | from .t3 import T3 2 | -------------------------------------------------------------------------------- /src/chatterbox/models/t3/inference/alignment_stream_analyzer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/t3/inference/alignment_stream_analyzer.py -------------------------------------------------------------------------------- /src/chatterbox/models/t3/inference/t3_hf_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/t3/inference/t3_hf_backend.py -------------------------------------------------------------------------------- /src/chatterbox/models/t3/llama_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/t3/llama_configs.py -------------------------------------------------------------------------------- /src/chatterbox/models/t3/modules/cond_enc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/t3/modules/cond_enc.py -------------------------------------------------------------------------------- /src/chatterbox/models/t3/modules/learned_pos_emb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/t3/modules/learned_pos_emb.py -------------------------------------------------------------------------------- /src/chatterbox/models/t3/modules/perceiver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/t3/modules/perceiver.py -------------------------------------------------------------------------------- /src/chatterbox/models/t3/modules/t3_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/t3/modules/t3_config.py -------------------------------------------------------------------------------- /src/chatterbox/models/t3/t3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/t3/t3.py -------------------------------------------------------------------------------- /src/chatterbox/models/tokenizers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/tokenizers/__init__.py -------------------------------------------------------------------------------- /src/chatterbox/models/tokenizers/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/tokenizers/tokenizer.py -------------------------------------------------------------------------------- /src/chatterbox/models/voice_encoder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/voice_encoder/__init__.py -------------------------------------------------------------------------------- /src/chatterbox/models/voice_encoder/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/voice_encoder/config.py -------------------------------------------------------------------------------- /src/chatterbox/models/voice_encoder/melspec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/voice_encoder/melspec.py -------------------------------------------------------------------------------- /src/chatterbox/models/voice_encoder/voice_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/models/voice_encoder/voice_encoder.py -------------------------------------------------------------------------------- /src/chatterbox/nbs/strip_weight_prefix.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/nbs/strip_weight_prefix.ipynb -------------------------------------------------------------------------------- /src/chatterbox/tts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/tts.py -------------------------------------------------------------------------------- /src/chatterbox/utils/t3data_arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/utils/t3data_arguments.py -------------------------------------------------------------------------------- /src/chatterbox/utils/t3dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/utils/t3dataset.py -------------------------------------------------------------------------------- /src/chatterbox/vc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/chatterbox/vc.py -------------------------------------------------------------------------------- /src/finetune_s3gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/finetune_s3gen.py -------------------------------------------------------------------------------- /src/finetune_t3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/src/finetune_t3.py -------------------------------------------------------------------------------- /voice_conversion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stlohrey/chatterbox-finetuning/HEAD/voice_conversion.py --------------------------------------------------------------------------------