├── .gitignore ├── LICENSE ├── MODEL-LICENSE ├── README.md ├── config.py ├── copy_codebase.py ├── data ├── __init__.py ├── combined_dataset.py ├── emilia_preprocessing │ ├── delete_tar_files.sh │ ├── encodec.py │ ├── sha256hash.py │ ├── step1_download.py │ ├── step2_log_tar_files.sh │ ├── step3_untar.sh │ ├── step4_construct_manifest.py │ ├── step5_phonemize.py │ ├── step6_encodec_encode.py │ └── step6_encodec_encode_script.sh ├── encodec.py ├── ll60k_preprocessing │ ├── config.yaml │ ├── encodec.py │ ├── step1_download.sh │ ├── step2_resplit_long.py │ ├── step3_seg_phn_manifest.py │ ├── step4_encodec_encode.py │ ├── step4_encodec_encode_script.sh │ ├── step5_find_nearest_neighbor.py │ ├── step6_forced_alignment.py │ ├── step6_forced_alignment.sh │ ├── step7_ipa_alignment.py │ └── tokenizer.py └── tokenizer.py ├── demo └── 5895_34622_000026_000002.wav ├── generated_tts └── generated.wav ├── inference_commandline.py ├── inference_gradio.py ├── inference_tts_utils.py ├── main.py ├── models ├── modules │ ├── __init__.py │ ├── activation.py │ ├── embedding.py │ ├── sampling.py │ ├── scaling.py │ ├── transformer.py │ ├── utils.py │ └── visualizer.py └── voice_star.py ├── pretrained └── .gitkeep ├── steps ├── __init__.py ├── optim.py ├── trainer.py └── trainer_utils.py └── z_scripts_new └── e1_840M_30s.sh /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/LICENSE -------------------------------------------------------------------------------- /MODEL-LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/MODEL-LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/README.md -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/config.py -------------------------------------------------------------------------------- /copy_codebase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/copy_codebase.py -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/combined_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/combined_dataset.py -------------------------------------------------------------------------------- /data/emilia_preprocessing/delete_tar_files.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/emilia_preprocessing/delete_tar_files.sh -------------------------------------------------------------------------------- /data/emilia_preprocessing/encodec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/emilia_preprocessing/encodec.py -------------------------------------------------------------------------------- /data/emilia_preprocessing/sha256hash.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/emilia_preprocessing/sha256hash.py -------------------------------------------------------------------------------- /data/emilia_preprocessing/step1_download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/emilia_preprocessing/step1_download.py -------------------------------------------------------------------------------- /data/emilia_preprocessing/step2_log_tar_files.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/emilia_preprocessing/step2_log_tar_files.sh -------------------------------------------------------------------------------- /data/emilia_preprocessing/step3_untar.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/emilia_preprocessing/step3_untar.sh -------------------------------------------------------------------------------- /data/emilia_preprocessing/step4_construct_manifest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/emilia_preprocessing/step4_construct_manifest.py -------------------------------------------------------------------------------- /data/emilia_preprocessing/step5_phonemize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/emilia_preprocessing/step5_phonemize.py -------------------------------------------------------------------------------- /data/emilia_preprocessing/step6_encodec_encode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/emilia_preprocessing/step6_encodec_encode.py -------------------------------------------------------------------------------- /data/emilia_preprocessing/step6_encodec_encode_script.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/emilia_preprocessing/step6_encodec_encode_script.sh -------------------------------------------------------------------------------- /data/encodec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/encodec.py -------------------------------------------------------------------------------- /data/ll60k_preprocessing/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/ll60k_preprocessing/config.yaml -------------------------------------------------------------------------------- /data/ll60k_preprocessing/encodec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/ll60k_preprocessing/encodec.py -------------------------------------------------------------------------------- /data/ll60k_preprocessing/step1_download.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/ll60k_preprocessing/step1_download.sh -------------------------------------------------------------------------------- /data/ll60k_preprocessing/step2_resplit_long.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/ll60k_preprocessing/step2_resplit_long.py -------------------------------------------------------------------------------- /data/ll60k_preprocessing/step3_seg_phn_manifest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/ll60k_preprocessing/step3_seg_phn_manifest.py -------------------------------------------------------------------------------- /data/ll60k_preprocessing/step4_encodec_encode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/ll60k_preprocessing/step4_encodec_encode.py -------------------------------------------------------------------------------- /data/ll60k_preprocessing/step4_encodec_encode_script.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/ll60k_preprocessing/step4_encodec_encode_script.sh -------------------------------------------------------------------------------- /data/ll60k_preprocessing/step5_find_nearest_neighbor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/ll60k_preprocessing/step5_find_nearest_neighbor.py -------------------------------------------------------------------------------- /data/ll60k_preprocessing/step6_forced_alignment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/ll60k_preprocessing/step6_forced_alignment.py -------------------------------------------------------------------------------- /data/ll60k_preprocessing/step6_forced_alignment.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/ll60k_preprocessing/step6_forced_alignment.sh -------------------------------------------------------------------------------- /data/ll60k_preprocessing/step7_ipa_alignment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/ll60k_preprocessing/step7_ipa_alignment.py -------------------------------------------------------------------------------- /data/ll60k_preprocessing/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/ll60k_preprocessing/tokenizer.py -------------------------------------------------------------------------------- /data/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/data/tokenizer.py -------------------------------------------------------------------------------- /demo/5895_34622_000026_000002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/demo/5895_34622_000026_000002.wav -------------------------------------------------------------------------------- /generated_tts/generated.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/generated_tts/generated.wav -------------------------------------------------------------------------------- /inference_commandline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/inference_commandline.py -------------------------------------------------------------------------------- /inference_gradio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/inference_gradio.py -------------------------------------------------------------------------------- /inference_tts_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/inference_tts_utils.py -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/main.py -------------------------------------------------------------------------------- /models/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/modules/activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/models/modules/activation.py -------------------------------------------------------------------------------- /models/modules/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/models/modules/embedding.py -------------------------------------------------------------------------------- /models/modules/sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/models/modules/sampling.py -------------------------------------------------------------------------------- /models/modules/scaling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/models/modules/scaling.py -------------------------------------------------------------------------------- /models/modules/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/models/modules/transformer.py -------------------------------------------------------------------------------- /models/modules/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/models/modules/utils.py -------------------------------------------------------------------------------- /models/modules/visualizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/models/modules/visualizer.py -------------------------------------------------------------------------------- /models/voice_star.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/models/voice_star.py -------------------------------------------------------------------------------- /pretrained/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /steps/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /steps/optim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/steps/optim.py -------------------------------------------------------------------------------- /steps/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/steps/trainer.py -------------------------------------------------------------------------------- /steps/trainer_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/steps/trainer_utils.py -------------------------------------------------------------------------------- /z_scripts_new/e1_840M_30s.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonppy/VoiceStar/HEAD/z_scripts_new/e1_840M_30s.sh --------------------------------------------------------------------------------