├── .gitignore ├── Figures ├── MOS-preference.png ├── VibeVoice.jpg ├── VibeVoice_Realtime.png ├── VibeVoice_logo.png └── VibeVoice_logo_white.png ├── LICENSE ├── README.md ├── SECURITY.md ├── demo ├── realtime_model_inference_from_file.py ├── text_examples │ ├── 1p_abs.txt │ └── 1p_vibevoice.txt ├── vibevoice_realtime_colab.ipynb ├── vibevoice_realtime_demo.py ├── voices │ └── streaming_model │ │ ├── de-Spk0_man.pt │ │ ├── de-Spk1_woman.pt │ │ ├── en-Carter_man.pt │ │ ├── en-Davis_man.pt │ │ ├── en-Emma_woman.pt │ │ ├── en-Frank_man.pt │ │ ├── en-Grace_woman.pt │ │ ├── en-Mike_man.pt │ │ ├── fr-Spk0_man.pt │ │ ├── fr-Spk1_woman.pt │ │ ├── in-Samuel_man.pt │ │ ├── it-Spk0_woman.pt │ │ ├── it-Spk1_man.pt │ │ ├── jp-Spk0_man.pt │ │ ├── jp-Spk1_woman.pt │ │ ├── kr-Spk0_woman.pt │ │ ├── kr-Spk1_man.pt │ │ ├── nl-Spk0_man.pt │ │ ├── nl-Spk1_woman.pt │ │ ├── pl-Spk0_man.pt │ │ ├── pl-Spk1_woman.pt │ │ ├── pt-Spk0_woman.pt │ │ ├── pt-Spk1_man.pt │ │ ├── sp-Spk0_woman.pt │ │ └── sp-Spk1_man.pt └── web │ ├── app.py │ └── index.html ├── docs └── vibevoice-realtime-0.5b.md ├── pyproject.toml └── vibevoice ├── __init__.py ├── configs ├── qwen2.5_1.5b_64k.json └── qwen2.5_7b_32k.json ├── modular ├── __init__.py ├── configuration_vibevoice.py ├── configuration_vibevoice_streaming.py ├── modeling_vibevoice_streaming.py ├── modeling_vibevoice_streaming_inference.py ├── modular_vibevoice_diffusion_head.py ├── modular_vibevoice_text_tokenizer.py ├── modular_vibevoice_tokenizer.py └── streamer.py ├── processor ├── __init__.py ├── vibevoice_processor.py ├── vibevoice_streaming_processor.py └── vibevoice_tokenizer_processor.py ├── schedule ├── __init__.py ├── dpm_solver.py └── timestep_sampler.py └── scripts ├── __init__.py └── convert_nnscaler_checkpoint_to_transformers.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/.gitignore -------------------------------------------------------------------------------- /Figures/MOS-preference.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/Figures/MOS-preference.png -------------------------------------------------------------------------------- /Figures/VibeVoice.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/Figures/VibeVoice.jpg -------------------------------------------------------------------------------- /Figures/VibeVoice_Realtime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/Figures/VibeVoice_Realtime.png -------------------------------------------------------------------------------- /Figures/VibeVoice_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/Figures/VibeVoice_logo.png -------------------------------------------------------------------------------- /Figures/VibeVoice_logo_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/Figures/VibeVoice_logo_white.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/README.md -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/SECURITY.md -------------------------------------------------------------------------------- /demo/realtime_model_inference_from_file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/realtime_model_inference_from_file.py -------------------------------------------------------------------------------- /demo/text_examples/1p_abs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/text_examples/1p_abs.txt -------------------------------------------------------------------------------- /demo/text_examples/1p_vibevoice.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/text_examples/1p_vibevoice.txt -------------------------------------------------------------------------------- /demo/vibevoice_realtime_colab.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/vibevoice_realtime_colab.ipynb -------------------------------------------------------------------------------- /demo/vibevoice_realtime_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/vibevoice_realtime_demo.py -------------------------------------------------------------------------------- /demo/voices/streaming_model/de-Spk0_man.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/de-Spk0_man.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/de-Spk1_woman.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/de-Spk1_woman.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/en-Carter_man.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/en-Carter_man.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/en-Davis_man.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/en-Davis_man.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/en-Emma_woman.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/en-Emma_woman.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/en-Frank_man.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/en-Frank_man.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/en-Grace_woman.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/en-Grace_woman.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/en-Mike_man.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/en-Mike_man.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/fr-Spk0_man.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/fr-Spk0_man.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/fr-Spk1_woman.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/fr-Spk1_woman.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/in-Samuel_man.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/in-Samuel_man.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/it-Spk0_woman.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/it-Spk0_woman.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/it-Spk1_man.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/it-Spk1_man.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/jp-Spk0_man.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/jp-Spk0_man.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/jp-Spk1_woman.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/jp-Spk1_woman.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/kr-Spk0_woman.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/kr-Spk0_woman.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/kr-Spk1_man.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/kr-Spk1_man.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/nl-Spk0_man.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/nl-Spk0_man.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/nl-Spk1_woman.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/nl-Spk1_woman.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/pl-Spk0_man.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/pl-Spk0_man.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/pl-Spk1_woman.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/pl-Spk1_woman.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/pt-Spk0_woman.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/pt-Spk0_woman.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/pt-Spk1_man.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/pt-Spk1_man.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/sp-Spk0_woman.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/sp-Spk0_woman.pt -------------------------------------------------------------------------------- /demo/voices/streaming_model/sp-Spk1_man.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/voices/streaming_model/sp-Spk1_man.pt -------------------------------------------------------------------------------- /demo/web/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/web/app.py -------------------------------------------------------------------------------- /demo/web/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/demo/web/index.html -------------------------------------------------------------------------------- /docs/vibevoice-realtime-0.5b.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/docs/vibevoice-realtime-0.5b.md -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/pyproject.toml -------------------------------------------------------------------------------- /vibevoice/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/__init__.py -------------------------------------------------------------------------------- /vibevoice/configs/qwen2.5_1.5b_64k.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/configs/qwen2.5_1.5b_64k.json -------------------------------------------------------------------------------- /vibevoice/configs/qwen2.5_7b_32k.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/configs/qwen2.5_7b_32k.json -------------------------------------------------------------------------------- /vibevoice/modular/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/modular/__init__.py -------------------------------------------------------------------------------- /vibevoice/modular/configuration_vibevoice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/modular/configuration_vibevoice.py -------------------------------------------------------------------------------- /vibevoice/modular/configuration_vibevoice_streaming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/modular/configuration_vibevoice_streaming.py -------------------------------------------------------------------------------- /vibevoice/modular/modeling_vibevoice_streaming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/modular/modeling_vibevoice_streaming.py -------------------------------------------------------------------------------- /vibevoice/modular/modeling_vibevoice_streaming_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/modular/modeling_vibevoice_streaming_inference.py -------------------------------------------------------------------------------- /vibevoice/modular/modular_vibevoice_diffusion_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/modular/modular_vibevoice_diffusion_head.py -------------------------------------------------------------------------------- /vibevoice/modular/modular_vibevoice_text_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/modular/modular_vibevoice_text_tokenizer.py -------------------------------------------------------------------------------- /vibevoice/modular/modular_vibevoice_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/modular/modular_vibevoice_tokenizer.py -------------------------------------------------------------------------------- /vibevoice/modular/streamer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/modular/streamer.py -------------------------------------------------------------------------------- /vibevoice/processor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/processor/__init__.py -------------------------------------------------------------------------------- /vibevoice/processor/vibevoice_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/processor/vibevoice_processor.py -------------------------------------------------------------------------------- /vibevoice/processor/vibevoice_streaming_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/processor/vibevoice_streaming_processor.py -------------------------------------------------------------------------------- /vibevoice/processor/vibevoice_tokenizer_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/processor/vibevoice_tokenizer_processor.py -------------------------------------------------------------------------------- /vibevoice/schedule/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vibevoice/schedule/dpm_solver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/schedule/dpm_solver.py -------------------------------------------------------------------------------- /vibevoice/schedule/timestep_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/schedule/timestep_sampler.py -------------------------------------------------------------------------------- /vibevoice/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vibevoice/scripts/convert_nnscaler_checkpoint_to_transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/VibeVoice/HEAD/vibevoice/scripts/convert_nnscaler_checkpoint_to_transformers.py --------------------------------------------------------------------------------