├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── README_ASCEND.md ├── README_ZH.md ├── assets ├── arch.png ├── compare.png ├── gradio.jpg ├── logo-large.png ├── logo-small.png ├── prompt_cn.wav └── prompt_en.wav ├── cosyvoice ├── __init__.py ├── bin │ ├── average_model.py │ ├── export_jit.py │ ├── export_onnx.py │ ├── generate.py │ ├── inference_deprecated.py │ └── train.py ├── cli │ ├── __init__.py │ ├── cosyvoice.py │ ├── frontend.py │ └── model.py ├── dataset │ ├── __init__.py │ ├── dataset.py │ └── processor.py ├── flow │ ├── decoder.py │ ├── flow.py │ ├── flow_matching.py │ └── length_regulator.py ├── hifigan │ ├── discriminator.py │ ├── f0_predictor.py │ ├── generator.py │ └── hifigan.py ├── llm │ └── llm.py ├── tokenizer │ ├── assets │ │ └── multilingual_zh_ja_yue_char_del.tiktoken │ └── tokenizer.py ├── transformer │ ├── __init__.py │ ├── activation.py │ ├── attention.py │ ├── convolution.py │ ├── decoder.py │ ├── decoder_layer.py │ ├── embedding.py │ ├── encoder.py │ ├── encoder_layer.py │ ├── label_smoothing_loss.py │ ├── positionwise_feed_forward.py │ ├── subsampling.py │ └── upsample_encoder.py ├── utils │ ├── __init__.py │ ├── class_utils.py │ ├── common.py │ ├── executor.py │ ├── file_utils.py │ ├── frontend_utils.py │ ├── losses.py │ ├── mask.py │ ├── scheduler.py │ └── train_utils.py └── vllm │ └── cosyvoice2.py ├── gradio_demo.py ├── papers └── MOSS-Speech Technical Report.pdf ├── requirements.txt └── utils └── interface.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/.gitmodules -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/README.md -------------------------------------------------------------------------------- /README_ASCEND.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/README_ASCEND.md -------------------------------------------------------------------------------- /README_ZH.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/README_ZH.md -------------------------------------------------------------------------------- /assets/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/assets/arch.png -------------------------------------------------------------------------------- /assets/compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/assets/compare.png -------------------------------------------------------------------------------- /assets/gradio.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/assets/gradio.jpg -------------------------------------------------------------------------------- /assets/logo-large.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/assets/logo-large.png -------------------------------------------------------------------------------- /assets/logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/assets/logo-small.png -------------------------------------------------------------------------------- /assets/prompt_cn.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/assets/prompt_cn.wav -------------------------------------------------------------------------------- /assets/prompt_en.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/assets/prompt_en.wav -------------------------------------------------------------------------------- /cosyvoice/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cosyvoice/bin/average_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/bin/average_model.py -------------------------------------------------------------------------------- /cosyvoice/bin/export_jit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/bin/export_jit.py -------------------------------------------------------------------------------- /cosyvoice/bin/export_onnx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/bin/export_onnx.py -------------------------------------------------------------------------------- /cosyvoice/bin/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/bin/generate.py -------------------------------------------------------------------------------- /cosyvoice/bin/inference_deprecated.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/bin/inference_deprecated.py -------------------------------------------------------------------------------- /cosyvoice/bin/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/bin/train.py -------------------------------------------------------------------------------- /cosyvoice/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cosyvoice/cli/cosyvoice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/cli/cosyvoice.py -------------------------------------------------------------------------------- /cosyvoice/cli/frontend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/cli/frontend.py -------------------------------------------------------------------------------- /cosyvoice/cli/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/cli/model.py -------------------------------------------------------------------------------- /cosyvoice/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cosyvoice/dataset/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/dataset/dataset.py -------------------------------------------------------------------------------- /cosyvoice/dataset/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/dataset/processor.py -------------------------------------------------------------------------------- /cosyvoice/flow/decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/flow/decoder.py -------------------------------------------------------------------------------- /cosyvoice/flow/flow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/flow/flow.py -------------------------------------------------------------------------------- /cosyvoice/flow/flow_matching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/flow/flow_matching.py -------------------------------------------------------------------------------- /cosyvoice/flow/length_regulator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/flow/length_regulator.py -------------------------------------------------------------------------------- /cosyvoice/hifigan/discriminator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/hifigan/discriminator.py -------------------------------------------------------------------------------- /cosyvoice/hifigan/f0_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/hifigan/f0_predictor.py -------------------------------------------------------------------------------- /cosyvoice/hifigan/generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/hifigan/generator.py -------------------------------------------------------------------------------- /cosyvoice/hifigan/hifigan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/hifigan/hifigan.py -------------------------------------------------------------------------------- /cosyvoice/llm/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/llm/llm.py -------------------------------------------------------------------------------- /cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken -------------------------------------------------------------------------------- /cosyvoice/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /cosyvoice/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cosyvoice/transformer/activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/transformer/activation.py -------------------------------------------------------------------------------- /cosyvoice/transformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/transformer/attention.py -------------------------------------------------------------------------------- /cosyvoice/transformer/convolution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/transformer/convolution.py -------------------------------------------------------------------------------- /cosyvoice/transformer/decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/transformer/decoder.py -------------------------------------------------------------------------------- /cosyvoice/transformer/decoder_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/transformer/decoder_layer.py -------------------------------------------------------------------------------- /cosyvoice/transformer/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/transformer/embedding.py -------------------------------------------------------------------------------- /cosyvoice/transformer/encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/transformer/encoder.py -------------------------------------------------------------------------------- /cosyvoice/transformer/encoder_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/transformer/encoder_layer.py -------------------------------------------------------------------------------- /cosyvoice/transformer/label_smoothing_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/transformer/label_smoothing_loss.py -------------------------------------------------------------------------------- /cosyvoice/transformer/positionwise_feed_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/transformer/positionwise_feed_forward.py -------------------------------------------------------------------------------- /cosyvoice/transformer/subsampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/transformer/subsampling.py -------------------------------------------------------------------------------- /cosyvoice/transformer/upsample_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/transformer/upsample_encoder.py -------------------------------------------------------------------------------- /cosyvoice/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cosyvoice/utils/class_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/utils/class_utils.py -------------------------------------------------------------------------------- /cosyvoice/utils/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/utils/common.py -------------------------------------------------------------------------------- /cosyvoice/utils/executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/utils/executor.py -------------------------------------------------------------------------------- /cosyvoice/utils/file_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/utils/file_utils.py -------------------------------------------------------------------------------- /cosyvoice/utils/frontend_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/utils/frontend_utils.py -------------------------------------------------------------------------------- /cosyvoice/utils/losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/utils/losses.py -------------------------------------------------------------------------------- /cosyvoice/utils/mask.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/utils/mask.py -------------------------------------------------------------------------------- /cosyvoice/utils/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/utils/scheduler.py -------------------------------------------------------------------------------- /cosyvoice/utils/train_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/utils/train_utils.py -------------------------------------------------------------------------------- /cosyvoice/vllm/cosyvoice2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/cosyvoice/vllm/cosyvoice2.py -------------------------------------------------------------------------------- /gradio_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/gradio_demo.py -------------------------------------------------------------------------------- /papers/MOSS-Speech Technical Report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/papers/MOSS-Speech Technical Report.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/requirements.txt -------------------------------------------------------------------------------- /utils/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/MOSS-Speech/HEAD/utils/interface.py --------------------------------------------------------------------------------