├── .gitignore ├── LICENSE ├── README.md ├── assets └── silence.wav ├── egs ├── zipvoice │ ├── README.md │ ├── conf │ │ └── zipvoice_base.json │ ├── local │ │ ├── pinyin.txt │ │ ├── prepare_emilia.sh │ │ ├── prepare_libritts.sh │ │ ├── prepare_token_file_char.py │ │ ├── prepare_token_file_emilia.py │ │ ├── prepare_tokens_emilia.py │ │ └── preprocess_emilia.py │ ├── run_custom.sh │ ├── run_emilia.sh │ ├── run_eval.sh │ ├── run_finetune.sh │ ├── run_libritts.sh │ └── utils │ │ ├── parse_options.sh │ │ └── validate_manifest.py └── zipvoice_dialog │ ├── README.md │ ├── local │ └── prepare_opendialog.py │ ├── run_custom.sh │ ├── run_eval.sh │ ├── run_finetune.sh │ └── run_opendialog.sh ├── pyproject.toml ├── requirements.txt ├── requirements_eval.txt ├── runtime └── nvidia_triton │ ├── Dockerfile.server │ ├── README.md │ ├── client_grpc.py │ ├── client_http.py │ ├── docker-compose.yml │ ├── model_repo │ └── zipvoice │ │ ├── 1 │ │ └── model.py │ │ └── config.pbtxt │ ├── pytriton_server.py │ ├── run.sh │ └── scripts │ └── fill_template.py └── zipvoice ├── __init__.py ├── bin ├── compute_fbank.py ├── generate_averaged_model.py ├── infer_zipvoice.py ├── infer_zipvoice_dialog.py ├── infer_zipvoice_onnx.py ├── onnx_export.py ├── prepare_dataset.py ├── prepare_tokens.py ├── tensorrt_export.py ├── train_zipvoice.py ├── train_zipvoice_dialog.py ├── train_zipvoice_dialog_stereo.py └── train_zipvoice_distill.py ├── dataset ├── datamodule.py └── dataset.py ├── eval ├── models │ ├── ecapa_tdnn_wavllm.py │ ├── ecapa_tdnn_wavlm.py │ └── utmos.py ├── mos │ └── utmos.py ├── speaker_similarity │ ├── cpsim.py │ └── sim.py ├── utils.py └── wer │ ├── dialog.py │ ├── hubert.py │ └── seedtts.py ├── models ├── modules │ ├── scaling.py │ ├── solver.py │ ├── zipformer.py │ └── zipformer_two_stream.py ├── zipvoice.py ├── zipvoice_dialog.py └── zipvoice_distill.py ├── tokenizer ├── normalizer.py └── tokenizer.py └── utils ├── checkpoint.py ├── common.py ├── diagnostics.py ├── feature.py ├── hooks.py ├── infer.py ├── lr_scheduler.py ├── optim.py ├── scaling_converter.py └── tensorrt.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/README.md -------------------------------------------------------------------------------- /assets/silence.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/assets/silence.wav -------------------------------------------------------------------------------- /egs/zipvoice/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice/README.md -------------------------------------------------------------------------------- /egs/zipvoice/conf/zipvoice_base.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice/conf/zipvoice_base.json -------------------------------------------------------------------------------- /egs/zipvoice/local/pinyin.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice/local/pinyin.txt -------------------------------------------------------------------------------- /egs/zipvoice/local/prepare_emilia.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice/local/prepare_emilia.sh -------------------------------------------------------------------------------- /egs/zipvoice/local/prepare_libritts.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice/local/prepare_libritts.sh -------------------------------------------------------------------------------- /egs/zipvoice/local/prepare_token_file_char.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice/local/prepare_token_file_char.py -------------------------------------------------------------------------------- /egs/zipvoice/local/prepare_token_file_emilia.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice/local/prepare_token_file_emilia.py -------------------------------------------------------------------------------- /egs/zipvoice/local/prepare_tokens_emilia.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice/local/prepare_tokens_emilia.py -------------------------------------------------------------------------------- /egs/zipvoice/local/preprocess_emilia.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice/local/preprocess_emilia.py -------------------------------------------------------------------------------- /egs/zipvoice/run_custom.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice/run_custom.sh -------------------------------------------------------------------------------- /egs/zipvoice/run_emilia.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice/run_emilia.sh -------------------------------------------------------------------------------- /egs/zipvoice/run_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice/run_eval.sh -------------------------------------------------------------------------------- /egs/zipvoice/run_finetune.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice/run_finetune.sh -------------------------------------------------------------------------------- /egs/zipvoice/run_libritts.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice/run_libritts.sh -------------------------------------------------------------------------------- /egs/zipvoice/utils/parse_options.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice/utils/parse_options.sh -------------------------------------------------------------------------------- /egs/zipvoice/utils/validate_manifest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice/utils/validate_manifest.py -------------------------------------------------------------------------------- /egs/zipvoice_dialog/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice_dialog/README.md -------------------------------------------------------------------------------- /egs/zipvoice_dialog/local/prepare_opendialog.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice_dialog/local/prepare_opendialog.py -------------------------------------------------------------------------------- /egs/zipvoice_dialog/run_custom.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice_dialog/run_custom.sh -------------------------------------------------------------------------------- /egs/zipvoice_dialog/run_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice_dialog/run_eval.sh -------------------------------------------------------------------------------- /egs/zipvoice_dialog/run_finetune.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice_dialog/run_finetune.sh -------------------------------------------------------------------------------- /egs/zipvoice_dialog/run_opendialog.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/egs/zipvoice_dialog/run_opendialog.sh -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/requirements.txt -------------------------------------------------------------------------------- /requirements_eval.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/requirements_eval.txt -------------------------------------------------------------------------------- /runtime/nvidia_triton/Dockerfile.server: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/runtime/nvidia_triton/Dockerfile.server -------------------------------------------------------------------------------- /runtime/nvidia_triton/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/runtime/nvidia_triton/README.md -------------------------------------------------------------------------------- /runtime/nvidia_triton/client_grpc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/runtime/nvidia_triton/client_grpc.py -------------------------------------------------------------------------------- /runtime/nvidia_triton/client_http.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/runtime/nvidia_triton/client_http.py -------------------------------------------------------------------------------- /runtime/nvidia_triton/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/runtime/nvidia_triton/docker-compose.yml -------------------------------------------------------------------------------- /runtime/nvidia_triton/model_repo/zipvoice/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/runtime/nvidia_triton/model_repo/zipvoice/1/model.py -------------------------------------------------------------------------------- /runtime/nvidia_triton/model_repo/zipvoice/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/runtime/nvidia_triton/model_repo/zipvoice/config.pbtxt -------------------------------------------------------------------------------- /runtime/nvidia_triton/pytriton_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/runtime/nvidia_triton/pytriton_server.py -------------------------------------------------------------------------------- /runtime/nvidia_triton/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/runtime/nvidia_triton/run.sh -------------------------------------------------------------------------------- /runtime/nvidia_triton/scripts/fill_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/runtime/nvidia_triton/scripts/fill_template.py -------------------------------------------------------------------------------- /zipvoice/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/__init__.py -------------------------------------------------------------------------------- /zipvoice/bin/compute_fbank.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/bin/compute_fbank.py -------------------------------------------------------------------------------- /zipvoice/bin/generate_averaged_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/bin/generate_averaged_model.py -------------------------------------------------------------------------------- /zipvoice/bin/infer_zipvoice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/bin/infer_zipvoice.py -------------------------------------------------------------------------------- /zipvoice/bin/infer_zipvoice_dialog.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/bin/infer_zipvoice_dialog.py -------------------------------------------------------------------------------- /zipvoice/bin/infer_zipvoice_onnx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/bin/infer_zipvoice_onnx.py -------------------------------------------------------------------------------- /zipvoice/bin/onnx_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/bin/onnx_export.py -------------------------------------------------------------------------------- /zipvoice/bin/prepare_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/bin/prepare_dataset.py -------------------------------------------------------------------------------- /zipvoice/bin/prepare_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/bin/prepare_tokens.py -------------------------------------------------------------------------------- /zipvoice/bin/tensorrt_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/bin/tensorrt_export.py -------------------------------------------------------------------------------- /zipvoice/bin/train_zipvoice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/bin/train_zipvoice.py -------------------------------------------------------------------------------- /zipvoice/bin/train_zipvoice_dialog.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/bin/train_zipvoice_dialog.py -------------------------------------------------------------------------------- /zipvoice/bin/train_zipvoice_dialog_stereo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/bin/train_zipvoice_dialog_stereo.py -------------------------------------------------------------------------------- /zipvoice/bin/train_zipvoice_distill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/bin/train_zipvoice_distill.py -------------------------------------------------------------------------------- /zipvoice/dataset/datamodule.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/dataset/datamodule.py -------------------------------------------------------------------------------- /zipvoice/dataset/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/dataset/dataset.py -------------------------------------------------------------------------------- /zipvoice/eval/models/ecapa_tdnn_wavllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/eval/models/ecapa_tdnn_wavllm.py -------------------------------------------------------------------------------- /zipvoice/eval/models/ecapa_tdnn_wavlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/eval/models/ecapa_tdnn_wavlm.py -------------------------------------------------------------------------------- /zipvoice/eval/models/utmos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/eval/models/utmos.py -------------------------------------------------------------------------------- /zipvoice/eval/mos/utmos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/eval/mos/utmos.py -------------------------------------------------------------------------------- /zipvoice/eval/speaker_similarity/cpsim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/eval/speaker_similarity/cpsim.py -------------------------------------------------------------------------------- /zipvoice/eval/speaker_similarity/sim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/eval/speaker_similarity/sim.py -------------------------------------------------------------------------------- /zipvoice/eval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/eval/utils.py -------------------------------------------------------------------------------- /zipvoice/eval/wer/dialog.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/eval/wer/dialog.py -------------------------------------------------------------------------------- /zipvoice/eval/wer/hubert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/eval/wer/hubert.py -------------------------------------------------------------------------------- /zipvoice/eval/wer/seedtts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/eval/wer/seedtts.py -------------------------------------------------------------------------------- /zipvoice/models/modules/scaling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/models/modules/scaling.py -------------------------------------------------------------------------------- /zipvoice/models/modules/solver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/models/modules/solver.py -------------------------------------------------------------------------------- /zipvoice/models/modules/zipformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/models/modules/zipformer.py -------------------------------------------------------------------------------- /zipvoice/models/modules/zipformer_two_stream.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/models/modules/zipformer_two_stream.py -------------------------------------------------------------------------------- /zipvoice/models/zipvoice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/models/zipvoice.py -------------------------------------------------------------------------------- /zipvoice/models/zipvoice_dialog.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/models/zipvoice_dialog.py -------------------------------------------------------------------------------- /zipvoice/models/zipvoice_distill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/models/zipvoice_distill.py -------------------------------------------------------------------------------- /zipvoice/tokenizer/normalizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/tokenizer/normalizer.py -------------------------------------------------------------------------------- /zipvoice/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /zipvoice/utils/checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/utils/checkpoint.py -------------------------------------------------------------------------------- /zipvoice/utils/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/utils/common.py -------------------------------------------------------------------------------- /zipvoice/utils/diagnostics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/utils/diagnostics.py -------------------------------------------------------------------------------- /zipvoice/utils/feature.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/utils/feature.py -------------------------------------------------------------------------------- /zipvoice/utils/hooks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/utils/hooks.py -------------------------------------------------------------------------------- /zipvoice/utils/infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/utils/infer.py -------------------------------------------------------------------------------- /zipvoice/utils/lr_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/utils/lr_scheduler.py -------------------------------------------------------------------------------- /zipvoice/utils/optim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/utils/optim.py -------------------------------------------------------------------------------- /zipvoice/utils/scaling_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/utils/scaling_converter.py -------------------------------------------------------------------------------- /zipvoice/utils/tensorrt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2-fsa/ZipVoice/HEAD/zipvoice/utils/tensorrt.py --------------------------------------------------------------------------------