├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── README_zh.md
├── colab_webui.ipynb
├── docs
    ├── download.png
    ├── linly_logo.png
    ├── linly_watermark.png
    └── webui.png
├── env.example
├── examples
    ├── .DS_Store
    └── bk_music.mp3
├── font
    └── SimHei.ttf
├── gui.py
├── requirements.txt
├── requirements_module.txt
├── scripts
    ├── download_models.sh
    ├── huggingface_download.py
    └── modelscope_download.py
├── submodules
    ├── TTS
    │   ├── CITATION.cff
    │   ├── CODE_OF_CONDUCT.md
    │   ├── CODE_OWNERS.rst
    │   ├── CONTRIBUTING.md
    │   ├── Dockerfile
    │   ├── LICENSE.txt
    │   ├── MANIFEST.in
    │   ├── Makefile
    │   ├── README.md
    │   ├── TTS
    │   │   ├── .models.json
    │   │   ├── VERSION
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── bin
    │   │   │   ├── __init__.py
    │   │   │   ├── collect_env_info.py
    │   │   │   ├── compute_attention_masks.py
    │   │   │   ├── compute_embeddings.py
    │   │   │   ├── compute_statistics.py
    │   │   │   ├── eval_encoder.py
    │   │   │   ├── extract_tts_spectrograms.py
    │   │   │   ├── find_unique_chars.py
    │   │   │   ├── find_unique_phonemes.py
    │   │   │   ├── remove_silence_using_vad.py
    │   │   │   ├── resample.py
    │   │   │   ├── synthesize.py
    │   │   │   ├── train_encoder.py
    │   │   │   ├── train_tts.py
    │   │   │   ├── train_vocoder.py
    │   │   │   └── tune_wavegrad.py
    │   │   ├── config
    │   │   │   ├── __init__.py
    │   │   │   └── shared_configs.py
    │   │   ├── demos
    │   │   │   └── xtts_ft_demo
    │   │   │   │   ├── requirements.txt
    │   │   │   │   ├── utils
    │   │   │   │       ├── formatter.py
    │   │   │   │       └── gpt_train.py
    │   │   │   │   └── xtts_demo.py
    │   │   ├── encoder
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── configs
    │   │   │   │   ├── base_encoder_config.py
    │   │   │   │   ├── emotion_encoder_config.py
    │   │   │   │   └── speaker_encoder_config.py
    │   │   │   ├── dataset.py
    │   │   │   ├── losses.py
    │   │   │   ├── models
    │   │   │   │   ├── base_encoder.py
    │   │   │   │   ├── lstm.py
    │   │   │   │   └── resnet.py
    │   │   │   ├── requirements.txt
    │   │   │   └── utils
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── generic_utils.py
    │   │   │   │   ├── prepare_voxceleb.py
    │   │   │   │   ├── training.py
    │   │   │   │   └── visual.py
    │   │   ├── model.py
    │   │   ├── server
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── conf.json
    │   │   │   ├── server.py
    │   │   │   ├── static
    │   │   │   │   └── coqui-log-green-TTS.png
    │   │   │   └── templates
    │   │   │   │   ├── details.html
    │   │   │   │   └── index.html
    │   │   ├── tts
    │   │   │   ├── __init__.py
    │   │   │   ├── configs
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── align_tts_config.py
    │   │   │   │   ├── bark_config.py
    │   │   │   │   ├── delightful_tts_config.py
    │   │   │   │   ├── fast_pitch_config.py
    │   │   │   │   ├── fast_speech_config.py
    │   │   │   │   ├── fastspeech2_config.py
    │   │   │   │   ├── glow_tts_config.py
    │   │   │   │   ├── neuralhmm_tts_config.py
    │   │   │   │   ├── overflow_config.py
    │   │   │   │   ├── shared_configs.py
    │   │   │   │   ├── speedy_speech_config.py
    │   │   │   │   ├── tacotron2_config.py
    │   │   │   │   ├── tacotron_config.py
    │   │   │   │   ├── tortoise_config.py
    │   │   │   │   ├── vits_config.py
    │   │   │   │   └── xtts_config.py
    │   │   │   ├── datasets
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── dataset.py
    │   │   │   │   └── formatters.py
    │   │   │   ├── layers
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── align_tts
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── duration_predictor.py
    │   │   │   │   │   └── mdn.py
    │   │   │   │   ├── bark
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── hubert
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── hubert_manager.py
    │   │   │   │   │   │   ├── kmeans_hubert.py
    │   │   │   │   │   │   └── tokenizer.py
    │   │   │   │   │   ├── inference_funcs.py
    │   │   │   │   │   ├── load_model.py
    │   │   │   │   │   ├── model.py
    │   │   │   │   │   └── model_fine.py
    │   │   │   │   ├── delightful_tts
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── acoustic_model.py
    │   │   │   │   │   ├── conformer.py
    │   │   │   │   │   ├── conv_layers.py
    │   │   │   │   │   ├── encoders.py
    │   │   │   │   │   ├── energy_adaptor.py
    │   │   │   │   │   ├── kernel_predictor.py
    │   │   │   │   │   ├── networks.py
    │   │   │   │   │   ├── phoneme_prosody_predictor.py
    │   │   │   │   │   ├── pitch_adaptor.py
    │   │   │   │   │   └── variance_predictor.py
    │   │   │   │   ├── feed_forward
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── decoder.py
    │   │   │   │   │   ├── duration_predictor.py
    │   │   │   │   │   └── encoder.py
    │   │   │   │   ├── generic
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── aligner.py
    │   │   │   │   │   ├── gated_conv.py
    │   │   │   │   │   ├── normalization.py
    │   │   │   │   │   ├── pos_encoding.py
    │   │   │   │   │   ├── res_conv_bn.py
    │   │   │   │   │   ├── time_depth_sep_conv.py
    │   │   │   │   │   ├── transformer.py
    │   │   │   │   │   └── wavenet.py
    │   │   │   │   ├── glow_tts
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── decoder.py
    │   │   │   │   │   ├── duration_predictor.py
    │   │   │   │   │   ├── encoder.py
    │   │   │   │   │   ├── glow.py
    │   │   │   │   │   └── transformer.py
    │   │   │   │   ├── losses.py
    │   │   │   │   ├── overflow
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── common_layers.py
    │   │   │   │   │   ├── decoder.py
    │   │   │   │   │   ├── neural_hmm.py
    │   │   │   │   │   └── plotting_utils.py
    │   │   │   │   ├── tacotron
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── attentions.py
    │   │   │   │   │   ├── capacitron_layers.py
    │   │   │   │   │   ├── common_layers.py
    │   │   │   │   │   ├── gst_layers.py
    │   │   │   │   │   ├── tacotron.py
    │   │   │   │   │   └── tacotron2.py
    │   │   │   │   ├── tortoise
    │   │   │   │   │   ├── arch_utils.py
    │   │   │   │   │   ├── audio_utils.py
    │   │   │   │   │   ├── autoregressive.py
    │   │   │   │   │   ├── classifier.py
    │   │   │   │   │   ├── clvp.py
    │   │   │   │   │   ├── diffusion.py
    │   │   │   │   │   ├── diffusion_decoder.py
    │   │   │   │   │   ├── dpm_solver.py
    │   │   │   │   │   ├── random_latent_generator.py
    │   │   │   │   │   ├── tokenizer.py
    │   │   │   │   │   ├── transformer.py
    │   │   │   │   │   ├── utils.py
    │   │   │   │   │   ├── vocoder.py
    │   │   │   │   │   ├── wav2vec_alignment.py
    │   │   │   │   │   └── xtransformers.py
    │   │   │   │   ├── vits
    │   │   │   │   │   ├── discriminator.py
    │   │   │   │   │   ├── networks.py
    │   │   │   │   │   ├── stochastic_duration_predictor.py
    │   │   │   │   │   └── transforms.py
    │   │   │   │   └── xtts
    │   │   │   │   │   ├── dvae.py
    │   │   │   │   │   ├── gpt.py
    │   │   │   │   │   ├── gpt_inference.py
    │   │   │   │   │   ├── hifigan_decoder.py
    │   │   │   │   │   ├── latent_encoder.py
    │   │   │   │   │   ├── perceiver_encoder.py
    │   │   │   │   │   ├── stream_generator.py
    │   │   │   │   │   ├── tokenizer.py
    │   │   │   │   │   ├── trainer
    │   │   │   │   │       ├── dataset.py
    │   │   │   │   │       └── gpt_trainer.py
    │   │   │   │   │   ├── xtts_manager.py
    │   │   │   │   │   └── zh_num2words.py
    │   │   │   ├── models
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── align_tts.py
    │   │   │   │   ├── bark.py
    │   │   │   │   ├── base_tacotron.py
    │   │   │   │   ├── base_tts.py
    │   │   │   │   ├── delightful_tts.py
    │   │   │   │   ├── forward_tts.py
    │   │   │   │   ├── glow_tts.py
    │   │   │   │   ├── neuralhmm_tts.py
    │   │   │   │   ├── overflow.py
    │   │   │   │   ├── tacotron.py
    │   │   │   │   ├── tacotron2.py
    │   │   │   │   ├── tortoise.py
    │   │   │   │   ├── vits.py
    │   │   │   │   └── xtts.py
    │   │   │   └── utils
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── assets
    │   │   │   │       └── tortoise
    │   │   │   │       │   └── tokenizer.json
    │   │   │   │   ├── data.py
    │   │   │   │   ├── fairseq.py
    │   │   │   │   ├── helpers.py
    │   │   │   │   ├── languages.py
    │   │   │   │   ├── managers.py
    │   │   │   │   ├── measures.py
    │   │   │   │   ├── monotonic_align
    │   │   │   │       ├── __init__.py
    │   │   │   │       ├── core.pyx
    │   │   │   │       └── setup.py
    │   │   │   │   ├── speakers.py
    │   │   │   │   ├── ssim.py
    │   │   │   │   ├── synthesis.py
    │   │   │   │   ├── text
    │   │   │   │       ├── __init__.py
    │   │   │   │       ├── bangla
    │   │   │   │       │   ├── __init__.py
    │   │   │   │       │   └── phonemizer.py
    │   │   │   │       ├── belarusian
    │   │   │   │       │   ├── __init__.py
    │   │   │   │       │   └── phonemizer.py
    │   │   │   │       ├── characters.py
    │   │   │   │       ├── chinese_mandarin
    │   │   │   │       │   ├── __init__.py
    │   │   │   │       │   ├── numbers.py
    │   │   │   │       │   ├── phonemizer.py
    │   │   │   │       │   └── pinyinToPhonemes.py
    │   │   │   │       ├── cleaners.py
    │   │   │   │       ├── cmudict.py
    │   │   │   │       ├── english
    │   │   │   │       │   ├── __init__.py
    │   │   │   │       │   ├── abbreviations.py
    │   │   │   │       │   ├── number_norm.py
    │   │   │   │       │   └── time_norm.py
    │   │   │   │       ├── french
    │   │   │   │       │   ├── __init__.py
    │   │   │   │       │   └── abbreviations.py
    │   │   │   │       ├── japanese
    │   │   │   │       │   ├── __init__.py
    │   │   │   │       │   └── phonemizer.py
    │   │   │   │       ├── korean
    │   │   │   │       │   ├── __init__.py
    │   │   │   │       │   ├── ko_dictionary.py
    │   │   │   │       │   ├── korean.py
    │   │   │   │       │   └── phonemizer.py
    │   │   │   │       ├── phonemizers
    │   │   │   │       │   ├── __init__.py
    │   │   │   │       │   ├── bangla_phonemizer.py
    │   │   │   │       │   ├── base.py
    │   │   │   │       │   ├── belarusian_phonemizer.py
    │   │   │   │       │   ├── espeak_wrapper.py
    │   │   │   │       │   ├── gruut_wrapper.py
    │   │   │   │       │   ├── ja_jp_phonemizer.py
    │   │   │   │       │   ├── ko_kr_phonemizer.py
    │   │   │   │       │   ├── multi_phonemizer.py
    │   │   │   │       │   └── zh_cn_phonemizer.py
    │   │   │   │       ├── punctuation.py
    │   │   │   │       └── tokenizer.py
    │   │   │   │   └── visual.py
    │   │   ├── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── audio
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── numpy_transforms.py
    │   │   │   │   ├── processor.py
    │   │   │   │   └── torch_transforms.py
    │   │   │   ├── callbacks.py
    │   │   │   ├── capacitron_optimizer.py
    │   │   │   ├── distribute.py
    │   │   │   ├── download.py
    │   │   │   ├── downloaders.py
    │   │   │   ├── generic_utils.py
    │   │   │   ├── io.py
    │   │   │   ├── manage.py
    │   │   │   ├── radam.py
    │   │   │   ├── samplers.py
    │   │   │   ├── synthesizer.py
    │   │   │   ├── training.py
    │   │   │   └── vad.py
    │   │   ├── vc
    │   │   │   ├── configs
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── freevc_config.py
    │   │   │   │   └── shared_configs.py
    │   │   │   ├── models
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base_vc.py
    │   │   │   │   └── freevc.py
    │   │   │   └── modules
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── freevc
    │   │   │   │       ├── __init__.py
    │   │   │   │       ├── commons.py
    │   │   │   │       ├── mel_processing.py
    │   │   │   │       ├── modules.py
    │   │   │   │       ├── speaker_encoder
    │   │   │   │           ├── __init__.py
    │   │   │   │           ├── audio.py
    │   │   │   │           ├── hparams.py
    │   │   │   │           └── speaker_encoder.py
    │   │   │   │       └── wavlm
    │   │   │   │           ├── __init__.py
    │   │   │   │           ├── config.json
    │   │   │   │           ├── modules.py
    │   │   │   │           └── wavlm.py
    │   │   └── vocoder
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── configs
    │   │   │       ├── __init__.py
    │   │   │       ├── fullband_melgan_config.py
    │   │   │       ├── hifigan_config.py
    │   │   │       ├── melgan_config.py
    │   │   │       ├── multiband_melgan_config.py
    │   │   │       ├── parallel_wavegan_config.py
    │   │   │       ├── shared_configs.py
    │   │   │       ├── univnet_config.py
    │   │   │       ├── wavegrad_config.py
    │   │   │       └── wavernn_config.py
    │   │   │   ├── datasets
    │   │   │       ├── __init__.py
    │   │   │       ├── gan_dataset.py
    │   │   │       ├── preprocess.py
    │   │   │       ├── wavegrad_dataset.py
    │   │   │       └── wavernn_dataset.py
    │   │   │   ├── layers
    │   │   │       ├── __init__.py
    │   │   │       ├── hifigan.py
    │   │   │       ├── losses.py
    │   │   │       ├── lvc_block.py
    │   │   │       ├── melgan.py
    │   │   │       ├── parallel_wavegan.py
    │   │   │       ├── pqmf.py
    │   │   │       ├── qmf.dat
    │   │   │       ├── upsample.py
    │   │   │       └── wavegrad.py
    │   │   │   ├── models
    │   │   │       ├── __init__.py
    │   │   │       ├── base_vocoder.py
    │   │   │       ├── fullband_melgan_generator.py
    │   │   │       ├── gan.py
    │   │   │       ├── hifigan_discriminator.py
    │   │   │       ├── hifigan_generator.py
    │   │   │       ├── melgan_discriminator.py
    │   │   │       ├── melgan_generator.py
    │   │   │       ├── melgan_multiscale_discriminator.py
    │   │   │       ├── multiband_melgan_generator.py
    │   │   │       ├── parallel_wavegan_discriminator.py
    │   │   │       ├── parallel_wavegan_generator.py
    │   │   │       ├── random_window_discriminator.py
    │   │   │       ├── univnet_discriminator.py
    │   │   │       ├── univnet_generator.py
    │   │   │       ├── wavegrad.py
    │   │   │       └── wavernn.py
    │   │   │   ├── pqmf_output.wav
    │   │   │   └── utils
    │   │   │       ├── __init__.py
    │   │   │       ├── distribution.py
    │   │   │       └── generic_utils.py
    │   ├── dockerfiles
    │   │   └── Dockerfile.dev
    │   ├── docs
    │   │   ├── Makefile
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   └── source
    │   │   │   ├── _static
    │   │   │       └── logo.png
    │   │   │   ├── _templates
    │   │   │       └── page.html
    │   │   │   ├── conf.py
    │   │   │   ├── configuration.md
    │   │   │   ├── contributing.md
    │   │   │   ├── docker_images.md
    │   │   │   ├── faq.md
    │   │   │   ├── finetuning.md
    │   │   │   ├── formatting_your_dataset.md
    │   │   │   ├── implementing_a_new_language_frontend.md
    │   │   │   ├── implementing_a_new_model.md
    │   │   │   ├── index.md
    │   │   │   ├── inference.md
    │   │   │   ├── installation.md
    │   │   │   ├── main_classes
    │   │   │       ├── audio_processor.md
    │   │   │       ├── dataset.md
    │   │   │       ├── gan.md
    │   │   │       ├── model_api.md
    │   │   │       ├── speaker_manager.md
    │   │   │       └── trainer_api.md
    │   │   │   ├── make.bat
    │   │   │   ├── marytts.md
    │   │   │   ├── models
    │   │   │       ├── bark.md
    │   │   │       ├── forward_tts.md
    │   │   │       ├── glow_tts.md
    │   │   │       ├── overflow.md
    │   │   │       ├── tacotron1-2.md
    │   │   │       ├── tortoise.md
    │   │   │       ├── vits.md
    │   │   │       └── xtts.md
    │   │   │   ├── training_a_model.md
    │   │   │   ├── tts_datasets.md
    │   │   │   ├── tutorial_for_nervous_beginners.md
    │   │   │   └── what_makes_a_good_dataset.md
    │   ├── hubconf.py
    │   ├── images
    │   │   ├── TTS-performance.png
    │   │   ├── coqui-log-green-TTS.png
    │   │   ├── demo_server.gif
    │   │   ├── example_model_output.png
    │   │   ├── model.png
    │   │   ├── tts_cli.gif
    │   │   └── tts_performance.png
    │   ├── notebooks
    │   │   ├── ExtractTTSpectrogram.ipynb
    │   │   ├── PlotUmapLibriTTS.ipynb
    │   │   ├── TestAttention.ipynb
    │   │   ├── Tortoise.ipynb
    │   │   ├── Tutorial_1_use-pretrained-TTS.ipynb
    │   │   ├── Tutorial_2_train_your_first_TTS_model.ipynb
    │   │   └── dataset_analysis
    │   │   │   ├── AnalyzeDataset.ipynb
    │   │   │   ├── CheckDatasetSNR.ipynb
    │   │   │   ├── CheckPitch.ipynb
    │   │   │   ├── CheckSpectrograms.ipynb
    │   │   │   ├── PhonemeCoverage.ipynb
    │   │   │   ├── README.md
    │   │   │   └── analyze.py
    │   ├── pyproject.toml
    │   ├── recipes
    │   │   ├── README.md
    │   │   ├── bel-alex73
    │   │   │   ├── .gitignore
    │   │   │   ├── README.md
    │   │   │   ├── choose_speaker.ipynb
    │   │   │   ├── docker-prepare-start.sh
    │   │   │   ├── docker-prepare
    │   │   │   │   ├── Dockerfile
    │   │   │   │   └── runtime.sh
    │   │   │   ├── dump_config.py
    │   │   │   ├── train_glowtts.py
    │   │   │   └── train_hifigan.py
    │   │   ├── blizzard2013
    │   │   │   ├── README.md
    │   │   │   ├── tacotron1-Capacitron
    │   │   │   │   └── train_capacitron_t1.py
    │   │   │   └── tacotron2-Capacitron
    │   │   │   │   └── train_capacitron_t2.py
    │   │   ├── kokoro
    │   │   │   └── tacotron2-DDC
    │   │   │   │   ├── run.sh
    │   │   │   │   └── tacotron2-DDC.json
    │   │   ├── ljspeech
    │   │   │   ├── README.md
    │   │   │   ├── align_tts
    │   │   │   │   └── train_aligntts.py
    │   │   │   ├── delightful_tts
    │   │   │   │   └── train_delightful_tts.py
    │   │   │   ├── download_ljspeech.sh
    │   │   │   ├── fast_pitch
    │   │   │   │   └── train_fast_pitch.py
    │   │   │   ├── fast_speech
    │   │   │   │   └── train_fast_speech.py
    │   │   │   ├── fastspeech2
    │   │   │   │   └── train_fastspeech2.py
    │   │   │   ├── glow_tts
    │   │   │   │   └── train_glowtts.py
    │   │   │   ├── hifigan
    │   │   │   │   └── train_hifigan.py
    │   │   │   ├── multiband_melgan
    │   │   │   │   └── train_multiband_melgan.py
    │   │   │   ├── neuralhmm_tts
    │   │   │   │   └── train_neuralhmmtts.py
    │   │   │   ├── overflow
    │   │   │   │   ├── lj_parameters.pt
    │   │   │   │   └── train_overflow.py
    │   │   │   ├── speedy_speech
    │   │   │   │   └── train_speedy_speech.py
    │   │   │   ├── tacotron2-Capacitron
    │   │   │   │   └── train_capacitron_t2.py
    │   │   │   ├── tacotron2-DCA
    │   │   │   │   └── train_tacotron_dca.py
    │   │   │   ├── tacotron2-DDC
    │   │   │   │   └── train_tacotron_ddc.py
    │   │   │   ├── univnet
    │   │   │   │   └── train.py
    │   │   │   ├── vits_tts
    │   │   │   │   └── train_vits.py
    │   │   │   ├── wavegrad
    │   │   │   │   └── train_wavegrad.py
    │   │   │   ├── wavernn
    │   │   │   │   └── train_wavernn.py
    │   │   │   ├── xtts_v1
    │   │   │   │   └── train_gpt_xtts.py
    │   │   │   └── xtts_v2
    │   │   │   │   └── train_gpt_xtts.py
    │   │   ├── multilingual
    │   │   │   ├── cml_yourtts
    │   │   │   │   └── train_yourtts.py
    │   │   │   └── vits_tts
    │   │   │   │   ├── train_vits_tts.py
    │   │   │   │   └── train_vits_tts_phonemes.py
    │   │   ├── thorsten_DE
    │   │   │   ├── README.md
    │   │   │   ├── align_tts
    │   │   │   │   └── train_aligntts.py
    │   │   │   ├── download_thorsten_DE.sh
    │   │   │   ├── glow_tts
    │   │   │   │   └── train_glowtts.py
    │   │   │   ├── hifigan
    │   │   │   │   └── train_hifigan.py
    │   │   │   ├── multiband_melgan
    │   │   │   │   └── train_multiband_melgan.py
    │   │   │   ├── speedy_speech
    │   │   │   │   └── train_speedy_speech.py
    │   │   │   ├── tacotron2-DDC
    │   │   │   │   └── train_tacotron_ddc.py
    │   │   │   ├── univnet
    │   │   │   │   └── train_univnet.py
    │   │   │   ├── vits_tts
    │   │   │   │   └── train_vits.py
    │   │   │   ├── wavegrad
    │   │   │   │   └── train_wavegrad.py
    │   │   │   └── wavernn
    │   │   │   │   └── train_wavernn.py
    │   │   └── vctk
    │   │   │   ├── delightful_tts
    │   │   │       └── train_delightful_tts.py
    │   │   │   ├── download_vctk.sh
    │   │   │   ├── fast_pitch
    │   │   │       └── train_fast_pitch.py
    │   │   │   ├── fast_speech
    │   │   │       └── train_fast_speech.py
    │   │   │   ├── glow_tts
    │   │   │       └── train_glow_tts.py
    │   │   │   ├── resnet_speaker_encoder
    │   │   │       └── train_encoder.py
    │   │   │   ├── speedy_speech
    │   │   │       └── train_speedy_speech.py
    │   │   │   ├── tacotron-DDC
    │   │   │       └── train_tacotron-DDC.py
    │   │   │   ├── tacotron2-DDC
    │   │   │       └── train_tacotron2-ddc.py
    │   │   │   ├── tacotron2
    │   │   │       └── train_tacotron2.py
    │   │   │   ├── vits
    │   │   │       └── train_vits.py
    │   │   │   └── yourtts
    │   │   │       └── train_yourtts.py
    │   ├── requirements.dev.txt
    │   ├── requirements.ja.txt
    │   ├── requirements.notebooks.txt
    │   ├── requirements.txt
    │   ├── run_bash_tests.sh
    │   ├── scripts
    │   │   └── sync_readme.py
    │   ├── setup.cfg
    │   ├── setup.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   ├── aux_tests
    │   │       ├── __init__.py
    │   │       ├── test_audio_processor.py
    │   │       ├── test_embedding_manager.py
    │   │       ├── test_extract_tts_spectrograms.py
    │   │       ├── test_find_unique_phonemes.py
    │   │       ├── test_numpy_transforms.py
    │   │       ├── test_readme.py
    │   │       ├── test_speaker_encoder.py
    │   │       ├── test_speaker_encoder_train.py
    │   │       ├── test_speaker_manager.py
    │   │       └── test_stft_torch.py
    │   │   ├── bash_tests
    │   │       ├── test_compute_statistics.sh
    │   │       └── test_demo_server.sh
    │   │   ├── data
    │   │       ├── dummy_speakers.json
    │   │       ├── dummy_speakers.pth
    │   │       ├── dummy_speakers2.json
    │   │       └── ljspeech
    │   │       │   ├── f0_cache
    │   │       │       └── pitch_stats.npy
    │   │       │   ├── metadata.csv
    │   │       │   ├── metadata_attn_mask.txt
    │   │       │   ├── metadata_flac.csv
    │   │       │   ├── metadata_mp3.csv
    │   │       │   ├── metadata_wav.csv
    │   │       │   ├── speakers.json
    │   │       │   └── wavs
    │   │       │       ├── LJ001-0001.flac
    │   │       │       ├── LJ001-0001.mp3
    │   │       │       ├── LJ001-0001.npy
    │   │       │       ├── LJ001-0001.wav
    │   │       │       ├── LJ001-0002.flac
    │   │       │       ├── LJ001-0002.mp3
    │   │       │       ├── LJ001-0002.npy
    │   │       │       ├── LJ001-0002.wav
    │   │       │       ├── LJ001-0003.flac
    │   │       │       ├── LJ001-0003.mp3
    │   │       │       ├── LJ001-0003.npy
    │   │       │       ├── LJ001-0003.wav
    │   │       │       ├── LJ001-0004.flac
    │   │       │       ├── LJ001-0004.mp3
    │   │       │       ├── LJ001-0004.npy
    │   │       │       ├── LJ001-0004.wav
    │   │       │       ├── LJ001-0005.flac
    │   │       │       ├── LJ001-0005.mp3
    │   │       │       ├── LJ001-0005.npy
    │   │       │       ├── LJ001-0005.wav
    │   │       │       ├── LJ001-0006.flac
    │   │       │       ├── LJ001-0006.mp3
    │   │       │       ├── LJ001-0006.npy
    │   │       │       ├── LJ001-0006.wav
    │   │       │       ├── LJ001-0007.flac
    │   │       │       ├── LJ001-0007.mp3
    │   │       │       ├── LJ001-0007.npy
    │   │       │       ├── LJ001-0007.wav
    │   │       │       ├── LJ001-0008.flac
    │   │       │       ├── LJ001-0008.mp3
    │   │       │       ├── LJ001-0008.npy
    │   │       │       ├── LJ001-0008.wav
    │   │       │       ├── LJ001-0009.flac
    │   │       │       ├── LJ001-0009.mp3
    │   │       │       ├── LJ001-0009.npy
    │   │       │       ├── LJ001-0009.wav
    │   │       │       ├── LJ001-0010.flac
    │   │       │       ├── LJ001-0010.mp3
    │   │       │       ├── LJ001-0010.npy
    │   │       │       ├── LJ001-0010.wav
    │   │       │       ├── LJ001-0011.flac
    │   │       │       ├── LJ001-0011.mp3
    │   │       │       ├── LJ001-0011.npy
    │   │       │       ├── LJ001-0011.wav
    │   │       │       ├── LJ001-0012.flac
    │   │       │       ├── LJ001-0012.mp3
    │   │       │       ├── LJ001-0012.npy
    │   │       │       ├── LJ001-0012.wav
    │   │       │       ├── LJ001-0013.flac
    │   │       │       ├── LJ001-0013.mp3
    │   │       │       ├── LJ001-0013.npy
    │   │       │       ├── LJ001-0013.wav
    │   │       │       ├── LJ001-0014.flac
    │   │       │       ├── LJ001-0014.mp3
    │   │       │       ├── LJ001-0014.npy
    │   │       │       ├── LJ001-0014.wav
    │   │       │       ├── LJ001-0015.flac
    │   │       │       ├── LJ001-0015.mp3
    │   │       │       ├── LJ001-0015.npy
    │   │       │       ├── LJ001-0015.wav
    │   │       │       ├── LJ001-0016.flac
    │   │       │       ├── LJ001-0016.mp3
    │   │       │       ├── LJ001-0016.npy
    │   │       │       ├── LJ001-0016.wav
    │   │       │       ├── LJ001-0017.flac
    │   │       │       ├── LJ001-0017.mp3
    │   │       │       ├── LJ001-0017.npy
    │   │       │       ├── LJ001-0017.wav
    │   │       │       ├── LJ001-0018.flac
    │   │       │       ├── LJ001-0018.mp3
    │   │       │       ├── LJ001-0018.npy
    │   │       │       ├── LJ001-0018.wav
    │   │       │       ├── LJ001-0019.flac
    │   │       │       ├── LJ001-0019.mp3
    │   │       │       ├── LJ001-0019.npy
    │   │       │       ├── LJ001-0019.wav
    │   │       │       ├── LJ001-0020.flac
    │   │       │       ├── LJ001-0020.mp3
    │   │       │       ├── LJ001-0020.npy
    │   │       │       ├── LJ001-0020.wav
    │   │       │       ├── LJ001-0021.flac
    │   │       │       ├── LJ001-0021.mp3
    │   │       │       ├── LJ001-0021.npy
    │   │       │       ├── LJ001-0021.wav
    │   │       │       ├── LJ001-0022.flac
    │   │       │       ├── LJ001-0022.mp3
    │   │       │       ├── LJ001-0022.npy
    │   │       │       ├── LJ001-0022.wav
    │   │       │       ├── LJ001-0023.flac
    │   │       │       ├── LJ001-0023.mp3
    │   │       │       ├── LJ001-0023.npy
    │   │       │       ├── LJ001-0023.wav
    │   │       │       ├── LJ001-0024.flac
    │   │       │       ├── LJ001-0024.mp3
    │   │       │       ├── LJ001-0024.npy
    │   │       │       ├── LJ001-0024.wav
    │   │       │       ├── LJ001-0025.flac
    │   │       │       ├── LJ001-0025.mp3
    │   │       │       ├── LJ001-0025.npy
    │   │       │       ├── LJ001-0025.wav
    │   │       │       ├── LJ001-0026.flac
    │   │       │       ├── LJ001-0026.mp3
    │   │       │       ├── LJ001-0026.npy
    │   │       │       ├── LJ001-0026.wav
    │   │       │       ├── LJ001-0027.flac
    │   │       │       ├── LJ001-0027.mp3
    │   │       │       ├── LJ001-0027.npy
    │   │       │       ├── LJ001-0027.wav
    │   │       │       ├── LJ001-0028.flac
    │   │       │       ├── LJ001-0028.mp3
    │   │       │       ├── LJ001-0028.npy
    │   │       │       ├── LJ001-0028.wav
    │   │       │       ├── LJ001-0029.flac
    │   │       │       ├── LJ001-0029.mp3
    │   │       │       ├── LJ001-0029.npy
    │   │       │       ├── LJ001-0029.wav
    │   │       │       ├── LJ001-0030.flac
    │   │       │       ├── LJ001-0030.mp3
    │   │       │       ├── LJ001-0030.npy
    │   │       │       ├── LJ001-0030.wav
    │   │       │       ├── LJ001-0031.flac
    │   │       │       ├── LJ001-0031.mp3
    │   │       │       ├── LJ001-0031.npy
    │   │       │       ├── LJ001-0031.wav
    │   │       │       ├── LJ001-0032.flac
    │   │       │       ├── LJ001-0032.mp3
    │   │       │       ├── LJ001-0032.npy
    │   │       │       └── LJ001-0032.wav
    │   │   ├── data_tests
    │   │       ├── __init__.py
    │   │       ├── test_dataset_formatters.py
    │   │       ├── test_loader.py
    │   │       └── test_samplers.py
    │   │   ├── inference_tests
    │   │       ├── __init__.py
    │   │       ├── test_synthesize.py
    │   │       └── test_synthesizer.py
    │   │   ├── inputs
    │   │       ├── common_voice.tsv
    │   │       ├── dummy_model_config.json
    │   │       ├── example_1.wav
    │   │       ├── language_ids.json
    │   │       ├── scale_stats.npy
    │   │       ├── server_config.json
    │   │       ├── test_align_tts.json
    │   │       ├── test_config.json
    │   │       ├── test_glow_tts.json
    │   │       ├── test_speaker_encoder_config.json
    │   │       ├── test_speedy_speech.json
    │   │       ├── test_tacotron2_config.json
    │   │       ├── test_tacotron_bd_config.json
    │   │       ├── test_tacotron_config.json
    │   │       ├── test_vocoder_audio_config.json
    │   │       ├── test_vocoder_multiband_melgan_config.json
    │   │       ├── test_vocoder_wavegrad.json
    │   │       ├── test_vocoder_wavernn_config.json
    │   │       └── xtts_vocab.json
    │   │   ├── text_tests
    │   │       ├── __init__.py
    │   │       ├── test_belarusian_phonemizer.py
    │   │       ├── test_characters.py
    │   │       ├── test_japanese_phonemizer.py
    │   │       ├── test_korean_phonemizer.py
    │   │       ├── test_phonemizer.py
    │   │       ├── test_punctuation.py
    │   │       ├── test_text_cleaners.py
    │   │       └── test_tokenizer.py
    │   │   ├── tts_tests
    │   │       ├── __init__.py
    │   │       ├── test_helpers.py
    │   │       ├── test_losses.py
    │   │       ├── test_neuralhmm_tts_train.py
    │   │       ├── test_overflow.py
    │   │       ├── test_overflow_train.py
    │   │       ├── test_speedy_speech_train.py
    │   │       ├── test_tacotron2_d-vectors_train.py
    │   │       ├── test_tacotron2_model.py
    │   │       ├── test_tacotron2_speaker_emb_train.py
    │   │       ├── test_tacotron2_train.py
    │   │       ├── test_tacotron_layers.py
    │   │       ├── test_tacotron_model.py
    │   │       ├── test_tacotron_train.py
    │   │       ├── test_vits.py
    │   │       ├── test_vits_d-vectors_train.py
    │   │       ├── test_vits_multilingual_speaker_emb_train.py
    │   │       ├── test_vits_multilingual_train-d_vectors.py
    │   │       ├── test_vits_speaker_emb_train.py
    │   │       └── test_vits_train.py
    │   │   ├── tts_tests2
    │   │       ├── __init__.py
    │   │       ├── test_align_tts_train.py
    │   │       ├── test_delightful_tts_d-vectors_train.py
    │   │       ├── test_delightful_tts_emb_spk.py
    │   │       ├── test_delightful_tts_layers.py
    │   │       ├── test_delightful_tts_train.py
    │   │       ├── test_fast_pitch_speaker_emb_train.py
    │   │       ├── test_fast_pitch_train.py
    │   │       ├── test_fastspeech_2_speaker_emb_train.py
    │   │       ├── test_fastspeech_2_train.py
    │   │       ├── test_feed_forward_layers.py
    │   │       ├── test_forward_tts.py
    │   │       ├── test_glow_tts.py
    │   │       ├── test_glow_tts_d-vectors_train.py
    │   │       ├── test_glow_tts_speaker_emb_train.py
    │   │       └── test_glow_tts_train.py
    │   │   ├── vc_tests
    │   │       ├── __init__.py
    │   │       └── test_freevc.py
    │   │   ├── vocoder_tests
    │   │       ├── __init__.py
    │   │       ├── test_fullband_melgan_train.py
    │   │       ├── test_hifigan_train.py
    │   │       ├── test_melgan_train.py
    │   │       ├── test_multiband_melgan_train.py
    │   │       ├── test_parallel_wavegan_train.py
    │   │       ├── test_vocoder_gan_datasets.py
    │   │       ├── test_vocoder_losses.py
    │   │       ├── test_vocoder_melgan_discriminator.py
    │   │       ├── test_vocoder_melgan_generator.py
    │   │       ├── test_vocoder_parallel_wavegan_discriminator.py
    │   │       ├── test_vocoder_parallel_wavegan_generator.py
    │   │       ├── test_vocoder_pqmf.py
    │   │       ├── test_vocoder_rwd.py
    │   │       ├── test_vocoder_wavernn.py
    │   │       ├── test_vocoder_wavernn_datasets.py
    │   │       ├── test_wavegrad.py
    │   │       ├── test_wavegrad_layers.py
    │   │       ├── test_wavegrad_train.py
    │   │       └── test_wavernn_train.py
    │   │   ├── xtts_tests
    │   │       ├── test_xtts_gpt_train.py
    │   │       └── test_xtts_v2-0_gpt_train.py
    │   │   └── zoo_tests
    │   │       ├── __init__.py
    │   │       └── test_models.py
    ├── demucs
    │   ├── CODE_OF_CONDUCT.md
    │   ├── CONTRIBUTING.md
    │   ├── Demucs.ipynb
    │   ├── LICENSE
    │   ├── MANIFEST.in
    │   ├── Makefile
    │   ├── README.md
    │   ├── conf
    │   │   ├── config.yaml
    │   │   ├── dset
    │   │   │   ├── aetl.yaml
    │   │   │   ├── auto_extra_test.yaml
    │   │   │   ├── auto_mus.yaml
    │   │   │   ├── extra44.yaml
    │   │   │   ├── extra_mmi_goodclean.yaml
    │   │   │   ├── extra_test.yaml
    │   │   │   ├── musdb44.yaml
    │   │   │   ├── sdx23_bleeding.yaml
    │   │   │   └── sdx23_labelnoise.yaml
    │   │   ├── svd
    │   │   │   ├── base.yaml
    │   │   │   ├── base2.yaml
    │   │   │   └── default.yaml
    │   │   └── variant
    │   │   │   ├── default.yaml
    │   │   │   ├── example.yaml
    │   │   │   └── finetune.yaml
    │   ├── demucs.png
    │   ├── demucs
    │   │   ├── __init__.py
    │   │   ├── __main__.py
    │   │   ├── api.py
    │   │   ├── apply.py
    │   │   ├── audio.py
    │   │   ├── augment.py
    │   │   ├── demucs.py
    │   │   ├── distrib.py
    │   │   ├── ema.py
    │   │   ├── evaluate.py
    │   │   ├── grids
    │   │   │   ├── __init__.py
    │   │   │   ├── _explorers.py
    │   │   │   ├── mdx.py
    │   │   │   ├── mdx_extra.py
    │   │   │   ├── mdx_refine.py
    │   │   │   ├── mmi.py
    │   │   │   ├── mmi_ft.py
    │   │   │   ├── repro.py
    │   │   │   ├── repro_ft.py
    │   │   │   └── sdx23.py
    │   │   ├── hdemucs.py
    │   │   ├── htdemucs.py
    │   │   ├── pretrained.py
    │   │   ├── py.typed
    │   │   ├── remote
    │   │   │   ├── files.txt
    │   │   │   ├── hdemucs_mmi.yaml
    │   │   │   ├── htdemucs.yaml
    │   │   │   ├── htdemucs_6s.yaml
    │   │   │   ├── htdemucs_ft.yaml
    │   │   │   ├── mdx.yaml
    │   │   │   ├── mdx_extra.yaml
    │   │   │   ├── mdx_extra_q.yaml
    │   │   │   ├── mdx_q.yaml
    │   │   │   ├── repro_mdx_a.yaml
    │   │   │   ├── repro_mdx_a_hybrid_only.yaml
    │   │   │   └── repro_mdx_a_time_only.yaml
    │   │   ├── repitch.py
    │   │   ├── repo.py
    │   │   ├── separate.py
    │   │   ├── solver.py
    │   │   ├── spec.py
    │   │   ├── states.py
    │   │   ├── svd.py
    │   │   ├── train.py
    │   │   ├── transformer.py
    │   │   ├── utils.py
    │   │   ├── wav.py
    │   │   └── wdemucs.py
    │   ├── docs
    │   │   ├── api.md
    │   │   ├── linux.md
    │   │   ├── mac.md
    │   │   ├── mdx.md
    │   │   ├── release.md
    │   │   ├── sdx23.md
    │   │   ├── training.md
    │   │   └── windows.md
    │   ├── environment-cpu.yml
    │   ├── environment-cuda.yml
    │   ├── hubconf.py
    │   ├── mypy.ini
    │   ├── outputs.tar.gz
    │   ├── requirements.txt
    │   ├── requirements_minimal.txt
    │   ├── setup.cfg
    │   ├── setup.py
    │   ├── test.mp3
    │   └── tools
    │   │   ├── __init__.py
    │   │   ├── automix.py
    │   │   ├── bench.py
    │   │   ├── convert.py
    │   │   ├── export.py
    │   │   └── test_pretrained.py
    ├── whisper
    │   ├── CHANGELOG.md
    │   ├── LICENSE
    │   ├── MANIFEST.in
    │   ├── README.md
    │   ├── approach.png
    │   ├── data
    │   │   ├── README.md
    │   │   └── meanwhile.json
    │   ├── language-breakdown.svg
    │   ├── model-card.md
    │   ├── notebooks
    │   │   ├── LibriSpeech.ipynb
    │   │   └── Multilingual_ASR.ipynb
    │   ├── pyproject.toml
    │   ├── requirements.txt
    │   ├── setup.py
    │   ├── tests
    │   │   ├── conftest.py
    │   │   ├── jfk.flac
    │   │   ├── test_audio.py
    │   │   ├── test_normalizer.py
    │   │   ├── test_timing.py
    │   │   ├── test_tokenizer.py
    │   │   └── test_transcribe.py
    │   └── whisper
    │   │   ├── __init__.py
    │   │   ├── __main__.py
    │   │   ├── assets
    │   │       ├── gpt2.tiktoken
    │   │       ├── mel_filters.npz
    │   │       └── multilingual.tiktoken
    │   │   ├── audio.py
    │   │   ├── decoding.py
    │   │   ├── model.py
    │   │   ├── normalizers
    │   │       ├── __init__.py
    │   │       ├── basic.py
    │   │       ├── english.json
    │   │       └── english.py
    │   │   ├── timing.py
    │   │   ├── tokenizer.py
    │   │   ├── transcribe.py
    │   │   ├── triton_ops.py
    │   │   ├── utils.py
    │   │   └── version.py
    └── whisperX
    │   ├── EXAMPLES.md
    │   ├── LICENSE
    │   ├── MANIFEST.in
    │   ├── README.md
    │   ├── figures
    │       └── pipeline.png
    │   ├── requirements.txt
    │   ├── setup.py
    │   └── whisperx
    │       ├── SubtitlesProcessor.py
    │       ├── __init__.py
    │       ├── __main__.py
    │       ├── alignment.py
    │       ├── asr.py
    │       ├── assets
    │           └── mel_filters.npz
    │       ├── audio.py
    │       ├── conjunctions.py
    │       ├── diarize.py
    │       ├── transcribe.py
    │       ├── types.py
    │       ├── utils.py
    │       └── vad.py
├── tabs
    ├── __init__.py
    ├── asr_tab.py
    ├── demucs_tab.py
    ├── download_tab.py
    ├── full_auto_tab.py
    ├── linly_talker_tab.py
    ├── settings_tab.py
    ├── translation_tab.py
    ├── tts_tab.py
    └── video_tab.py
├── tools
    ├── cn_tx.py
    ├── do_everything.py
    ├── step000_video_downloader.py
    ├── step010_demucs_vr.py
    ├── step020_asr.py
    ├── step021_asr_whisperx.py
    ├── step022_asr_funasr.py
    ├── step030_translation.py
    ├── step031_translation_openai.py
    ├── step032_translation_llm.py
    ├── step033_translation_translator.py
    ├── step034_translation_ernie.py
    ├── step035_translation_qwen.py
    ├── step036_translation_ollama.py
    ├── step040_tts.py
    ├── step041_tts_bytedance.py
    ├── step042_tts_xtts.py
    ├── step043_tts_cosyvoice.py
    ├── step044_tts_edge_tts.py
    ├── step050_synthesize_video.py
    └── utils.py
├── ui_components.py
├── webui.py
└── 问题参考汇总.md


/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "CosyVoice"]
2 | 	path = CosyVoice
3 | 	url = https://github.com/FunAudioLLM/CosyVoice.git
4 | 


--------------------------------------------------------------------------------
/docs/download.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/docs/download.png


--------------------------------------------------------------------------------
/docs/linly_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/docs/linly_logo.png


--------------------------------------------------------------------------------
/docs/linly_watermark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/docs/linly_watermark.png


--------------------------------------------------------------------------------
/docs/webui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/docs/webui.png


--------------------------------------------------------------------------------
/env.example:
--------------------------------------------------------------------------------
 1 | OPENAI_API_KEY = 'sk-***'
 2 | OPENAI_API_BASE = 
 3 | # MODEL_NAME = 'gpt-3.5-turbo'
 4 | MODEL_NAME = 'qwen/Qwen1.5-4B-Chat'
 5 | # 下载模型的HF_TOKEN
 6 | HF_TOKEN = ''
 7 | 
 8 | # 火山引擎
 9 | BYTEDANCE_APPID =
10 | BYTEDANCE_ACCESS_TOKEN =
11 | 
12 | # 如果在从 huggingface 下载模型时报错，uncomment 下面的代码
13 | # HF_ENDPOINT = 'https://hf-mirror.com'
14 | BILI_BASE64 =
15 | 
16 | # 百度API
17 | BAIDU_API_KEY=''
18 | BAIDU_SECRET_KEY=''


--------------------------------------------------------------------------------
/examples/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/examples/.DS_Store


--------------------------------------------------------------------------------
/examples/bk_music.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/examples/bk_music.mp3


--------------------------------------------------------------------------------
/font/SimHei.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/font/SimHei.ttf


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # PyTorch and its dependencies
 2 | # These libraries include PyTorch and its related packages, supporting CUDA 11.8.
 3 | # --extra-index-url https://download.pytorch.org/whl/cu118
 4 | # torch
 5 | # torchvision
 6 | # torchaudio
 7 | 
 8 | numpy==1.26.3
 9 | transformers==4.39.3
10 | translators
11 | edge-tts
12 | gradio
13 | loguru
14 | yt-dlp
15 | scipy
16 | python-dotenv
17 | openai
18 | audiostretchy
19 | modelscope
20 | 
21 | # ASR
22 | # git+https://github.com/m-bain/whisperx.git
23 | # git+https://github.com/facebookresearch/demucs#egg=demucs
24 | funasr
25 | 
26 | # googletrans
27 | 
28 | # Qwen
29 | accelerate
30 | 
31 | # CoxyVoice
32 | HyperPyYAML==1.2.2
33 | librosa==0.10.2
34 | WeTextProcessing==1.0.3
35 | wget==3.2
36 | # openai-whisper==20231117
37 | modelscope
38 | diffusers==0.27.2
39 | gdown==5.1.0
40 | pyarrow
41 | conformer==0.3.2
42 | lightning==2.2.4
43 | requests
44 | dotenv
45 | loguru
46 | moviepy
47 | # ctranslate2==3.24.0


--------------------------------------------------------------------------------
/requirements_module.txt:
--------------------------------------------------------------------------------
1 | submodules/demucs
2 | submodules/whisper
3 | submodules/whisperX
4 | submodules/TTS


--------------------------------------------------------------------------------
/scripts/download_models.sh:
--------------------------------------------------------------------------------
1 | # 下载 wav2vec2 模型并保存到指定路径，如果文件已经存在，则跳过下载
2 | mkdir -p models/ASR/whisper & wget -nc https://download.pytorch.org/torchaudio/models/wav2vec2_fairseq_base_ls960_asr_ls960.pth \
3 |     -O models/ASR/whisper/wav2vec2_fairseq_base_ls960_asr_ls960.pth
4 | 
5 | # 执行下载脚本
6 | python scripts/modelscope_download.py


--------------------------------------------------------------------------------
/scripts/huggingface_download.py:
--------------------------------------------------------------------------------
 1 | # pip install huggingface_hub
 2 | from huggingface_hub import snapshot_download
 3 | 
 4 | # https://huggingface.co/coqui/XTTS-v2
 5 | snapshot_download('coqui/XTTS-v2', local_dir='models/TTS/XTTS-v2', resume_download=True, local_dir_use_symlinks=False)
 6 | 
 7 | # https://huggingface.co/FunAudioLLM/CosyVoice-300M
 8 | # snapshot_download('FunAudioLLM/CosyVoice-300M', local_dir='models/TTS/CosyVoice-300M', resume_download=True, local_dir_use_symlinks=False)
 9 | 
10 | # https://huggingface.co/Qwen/Qwen1.5-4B-Chat
11 | snapshot_download('Qwen/Qwen1.5-4B-Chat', local_dir='models/LLM/Qwen1.5-4B-Chat', resume_download=True, local_dir_use_symlinks=False)
12 | 
13 | # https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat
14 | snapshot_download('Qwen/Qwen1.5-1.8B-Chat', local_dir='models/LLM/Qwen1.5-1.8B-Chat', resume_download=True,  local_dir_use_symlinks=False)
15 | 
16 | # https://huggingface.co/Systran/faster-whisper-large-v3
17 | snapshot_download('Systran/faster-whisper-large-v3', local_dir='models/ASR/whisper/faster-whisper-large-v3', resume_download=True, local_dir_use_symlinks=False)
18 | 
19 | # 需要申请自动下载
20 | # https://huggingface.co/pyannote/speaker-diarization-3.1
21 | # snapshot_download('pyannote/speaker-diarization-3.1', local_dir='models/ASR/whisper/speaker-diarization-3.1', resume_download=True, local_dir_use_symlinks=False)
22 | 


--------------------------------------------------------------------------------
/scripts/modelscope_download.py:
--------------------------------------------------------------------------------
 1 | # pip install modelscope
 2 | from modelscope import snapshot_download
 3 | 
 4 | # https://modelscope.cn/models/AI-ModelScope/XTTS-v2
 5 | snapshot_download('AI-ModelScope/XTTS-v2', local_dir='models/TTS/XTTS-v2')
 6 | 
 7 | # https://modelscope.cn/models/iic/CosyVoice-300M
 8 | # snapshot_download('iic/CosyVoice-300M', local_dir='models/TTS/CosyVoice-300M')
 9 | 
10 | # https://modelscope.cn/models/qwen/qwen1.5-4b-chat
11 | snapshot_download('qwen/Qwen1.5-4B-Chat', local_dir='models/LLM/Qwen1.5-4B-Chat')
12 | 
13 | # https://modelscope.cn/models/qwen/Qwen1.5-1.8B-Chat
14 | # snapshot_download('qwen/Qwen1.5-1.8B-Chat', local_dir='models/LLM/Qwen1.5-1.8B-Chat')
15 | 
16 | # https://modelscope.cn/models/keepitsimple/faster-whisper-large-v3
17 | snapshot_download('keepitsimple/faster-whisper-large-v3', local_dir='models/ASR/whisper/faster-whisper-large-v3')
18 | 
19 | # 需要申请自动下载
20 | # https://modelscope.cn/models/mirror013/speaker-diarization-3.1
21 | # snapshot_download('mirror013/speaker-diarization-3.1', local_dir='models/ASR/whisper/speaker-diarization-3.1')
22 | 


--------------------------------------------------------------------------------
/submodules/TTS/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you want to cite 🐸💬, feel free to use this (but only if you loved it 😊)"
 3 | title: "Coqui TTS"
 4 | abstract: "A deep learning toolkit for Text-to-Speech, battle-tested in research and production"
 5 | date-released: 2021-01-01
 6 | authors:
 7 |   - family-names: "Eren"
 8 |     given-names: "Gölge"
 9 |   - name: "The Coqui TTS Team"
10 | version: 1.4
11 | doi: 10.5281/zenodo.6334862
12 | license: "MPL-2.0"
13 | url: "https://www.coqui.ai"
14 | repository-code: "https://github.com/coqui-ai/TTS"
15 | keywords:
16 |   - machine learning
17 |   - deep learning
18 |   - artificial intelligence
19 |   - text to speech
20 |   - TTS


--------------------------------------------------------------------------------
/submodules/TTS/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG BASE=nvidia/cuda:11.8.0-base-ubuntu22.04
 2 | FROM ${BASE}
 3 | 
 4 | RUN apt-get update && apt-get upgrade -y
 5 | RUN apt-get install -y --no-install-recommends gcc g++ make python3 python3-dev python3-pip python3-venv python3-wheel espeak-ng libsndfile1-dev && rm -rf /var/lib/apt/lists/*
 6 | RUN pip3 install llvmlite --ignore-installed
 7 | 
 8 | # Install Dependencies:
 9 | RUN pip3 install torch torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
10 | RUN rm -rf /root/.cache/pip
11 | 
12 | # Copy TTS repository contents:
13 | WORKDIR /root
14 | COPY . /root
15 | 
16 | RUN make install
17 | 
18 | ENTRYPOINT ["tts"]
19 | CMD ["--help"]
20 | 


--------------------------------------------------------------------------------
/submodules/TTS/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include README.md
 2 | include LICENSE.txt
 3 | include requirements.*.txt
 4 | include *.cff
 5 | include requirements.txt
 6 | include TTS/VERSION
 7 | recursive-include TTS *.json
 8 | recursive-include TTS *.html
 9 | recursive-include TTS *.png
10 | recursive-include TTS *.md
11 | recursive-include TTS *.py
12 | recursive-include TTS *.pyx
13 | recursive-include images *.png
14 | recursive-exclude tests *
15 | prune tests*
16 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/VERSION:
--------------------------------------------------------------------------------
1 | 0.22.0
2 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | with open(os.path.join(os.path.dirname(__file__), "VERSION"), "r", encoding="utf-8") as f:
4 |     version = f.read().strip()
5 | 
6 | __version__ = version
7 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/bin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/bin/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/bin/collect_env_info.py:
--------------------------------------------------------------------------------
 1 | """Get detailed info about the working environment."""
 2 | import os
 3 | import platform
 4 | import sys
 5 | 
 6 | import numpy
 7 | import torch
 8 | 
 9 | sys.path += [os.path.abspath(".."), os.path.abspath(".")]
10 | import json
11 | 
12 | import TTS
13 | 
14 | 
15 | def system_info():
16 |     return {
17 |         "OS": platform.system(),
18 |         "architecture": platform.architecture(),
19 |         "version": platform.version(),
20 |         "processor": platform.processor(),
21 |         "python": platform.python_version(),
22 |     }
23 | 
24 | 
25 | def cuda_info():
26 |     return {
27 |         "GPU": [torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())],
28 |         "available": torch.cuda.is_available(),
29 |         "version": torch.version.cuda,
30 |     }
31 | 
32 | 
33 | def package_info():
34 |     return {
35 |         "numpy": numpy.__version__,
36 |         "PyTorch_version": torch.__version__,
37 |         "PyTorch_debug": torch.version.debug,
38 |         "TTS": TTS.__version__,
39 |     }
40 | 
41 | 
42 | def main():
43 |     details = {"System": system_info(), "CUDA": cuda_info(), "Packages": package_info()}
44 |     print(json.dumps(details, indent=4, sort_keys=True))
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     main()
49 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/demos/xtts_ft_demo/requirements.txt:
--------------------------------------------------------------------------------
1 | faster_whisper==0.9.0
2 | gradio==4.7.1


--------------------------------------------------------------------------------
/submodules/TTS/TTS/encoder/README.md:
--------------------------------------------------------------------------------
 1 | ### Speaker Encoder
 2 | 
 3 | This is an implementation of https://arxiv.org/abs/1710.10467. This model can be used for voice and speaker embedding.
 4 | 
 5 | With the code here you can generate d-vectors for both multi-speaker and single-speaker TTS datasets, then visualise and explore them along with the associated audio files in an interactive chart.
 6 | 
 7 | Below is an example showing embedding results of various speakers. You can generate the same plot with the provided notebook as demonstrated in [this video](https://youtu.be/KW3oO7JVa7Q).
 8 | 
 9 | ![](umap.png)
10 | 
11 | Download a pretrained model from [Released Models](https://github.com/mozilla/TTS/wiki/Released-Models) page.
12 | 
13 | To run the code, you need to follow the same flow as in TTS.
14 | 
15 | - Define 'config.json' for your needs. Note that, audio parameters should match your TTS model.
16 | - Example training call ```python speaker_encoder/train.py --config_path speaker_encoder/config.json --data_path ~/Data/Libri-TTS/train-clean-360```
17 | - Generate embedding vectors ```python speaker_encoder/compute_embeddings.py --use_cuda true /model/path/best_model.pth model/config/path/config.json dataset/path/ output_path``` . This code parses all .wav files at the given dataset path and generates the same folder structure under the output path with the generated embedding files.
18 | - Watch training on Tensorboard as in TTS
19 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/encoder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/encoder/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/encoder/configs/emotion_encoder_config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import asdict, dataclass
 2 | 
 3 | from TTS.encoder.configs.base_encoder_config import BaseEncoderConfig
 4 | 
 5 | 
 6 | @dataclass
 7 | class EmotionEncoderConfig(BaseEncoderConfig):
 8 |     """Defines parameters for Emotion Encoder model."""
 9 | 
10 |     model: str = "emotion_encoder"
11 |     map_classid_to_classname: dict = None
12 |     class_name_key: str = "emotion_name"
13 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/encoder/configs/speaker_encoder_config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import asdict, dataclass
 2 | 
 3 | from TTS.encoder.configs.base_encoder_config import BaseEncoderConfig
 4 | 
 5 | 
 6 | @dataclass
 7 | class SpeakerEncoderConfig(BaseEncoderConfig):
 8 |     """Defines parameters for Speaker Encoder model."""
 9 | 
10 |     model: str = "speaker_encoder"
11 |     class_name_key: str = "speaker_name"
12 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/encoder/requirements.txt:
--------------------------------------------------------------------------------
1 | umap-learn
2 | numpy>=1.17.0
3 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/encoder/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/encoder/utils/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/encoder/utils/visual.py:
--------------------------------------------------------------------------------
 1 | import matplotlib
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | import umap
 5 | 
 6 | matplotlib.use("Agg")
 7 | 
 8 | 
 9 | colormap = (
10 |     np.array(
11 |         [
12 |             [76, 255, 0],
13 |             [0, 127, 70],
14 |             [255, 0, 0],
15 |             [255, 217, 38],
16 |             [0, 135, 255],
17 |             [165, 0, 165],
18 |             [255, 167, 255],
19 |             [0, 255, 255],
20 |             [255, 96, 38],
21 |             [142, 76, 0],
22 |             [33, 0, 127],
23 |             [0, 0, 0],
24 |             [183, 183, 183],
25 |         ],
26 |         dtype=float,
27 |     )
28 |     / 255
29 | )
30 | 
31 | 
32 | def plot_embeddings(embeddings, num_classes_in_batch):
33 |     num_utter_per_class = embeddings.shape[0] // num_classes_in_batch
34 | 
35 |     # if necessary get just the first 10 classes
36 |     if num_classes_in_batch > 10:
37 |         num_classes_in_batch = 10
38 |         embeddings = embeddings[: num_classes_in_batch * num_utter_per_class]
39 | 
40 |     model = umap.UMAP()
41 |     projection = model.fit_transform(embeddings)
42 |     ground_truth = np.repeat(np.arange(num_classes_in_batch), num_utter_per_class)
43 |     colors = [colormap[i] for i in ground_truth]
44 |     fig, ax = plt.subplots(figsize=(16, 10))
45 |     _ = ax.scatter(projection[:, 0], projection[:, 1], c=colors)
46 |     plt.gca().set_aspect("equal", "datalim")
47 |     plt.title("UMAP projection")
48 |     plt.tight_layout()
49 |     plt.savefig("umap")
50 |     return fig
51 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/server/README.md:
--------------------------------------------------------------------------------
 1 | # :frog: TTS demo server
 2 | Before you use the server, make sure you [install](https://github.com/coqui-ai/TTS/tree/dev#install-tts)) :frog: TTS properly. Then, you can follow the steps below.
 3 | 
 4 | **Note:** If you install :frog:TTS using ```pip```, you can also use the ```tts-server``` end point on the terminal.
 5 | 
 6 | Examples runs:
 7 | 
 8 | List officially released models.
 9 | ```python TTS/server/server.py  --list_models ```
10 | 
11 | Run the server with the official models.
12 | ```python TTS/server/server.py  --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/multiband-melgan```
13 | 
14 | Run the server with the official models on a GPU.
15 | ```CUDA_VISIBLE_DEVICES="0" python TTS/server/server.py  --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/multiband-melgan --use_cuda True```
16 | 
17 | Run the server with a custom models.
18 | ```python TTS/server/server.py  --tts_checkpoint /path/to/tts/model.pth --tts_config /path/to/tts/config.json --vocoder_checkpoint /path/to/vocoder/model.pth --vocoder_config /path/to/vocoder/config.json```
19 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/server/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/server/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/server/conf.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tts_path":"/media/erogol/data_ssd/Models/libri_tts/5049/",  // tts model root folder
 3 |     "tts_file":"best_model.pth",     // tts checkpoint file
 4 |     "tts_config":"config.json",     // tts config.json file
 5 |     "tts_speakers": null,           // json file listing speaker ids. null if no speaker embedding.
 6 |     "vocoder_config":null,
 7 |     "vocoder_file": null,
 8 |     "is_wavernn_batched":true,
 9 |     "port": 5002,
10 |     "use_cuda": true,
11 |     "debug": true
12 | }
13 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/server/static/coqui-log-green-TTS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/server/static/coqui-log-green-TTS.png


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/configs/__init__.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import os
 3 | from inspect import isclass
 4 | 
 5 | # import all files under configs/
 6 | # configs_dir = os.path.dirname(__file__)
 7 | # for file in os.listdir(configs_dir):
 8 | #     path = os.path.join(configs_dir, file)
 9 | #     if not file.startswith("_") and not file.startswith(".") and (file.endswith(".py") or os.path.isdir(path)):
10 | #         config_name = file[: file.find(".py")] if file.endswith(".py") else file
11 | #         module = importlib.import_module("TTS.tts.configs." + config_name)
12 | #         for attribute_name in dir(module):
13 | #             attribute = getattr(module, attribute_name)
14 | 
15 | #             if isclass(attribute):
16 | #                 # Add the class to this package's variables
17 | #                 globals()[attribute_name] = attribute
18 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/configs/tacotron2_config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | from TTS.tts.configs.tacotron_config import TacotronConfig
 4 | 
 5 | 
 6 | @dataclass
 7 | class Tacotron2Config(TacotronConfig):
 8 |     """Defines parameters for Tacotron2 based models.
 9 | 
10 |     Example:
11 | 
12 |         >>> from TTS.tts.configs.tacotron2_config import Tacotron2Config
13 |         >>> config = Tacotron2Config()
14 | 
15 |     Check `TacotronConfig` for argument descriptions.
16 |     """
17 | 
18 |     model: str = "tacotron2"
19 |     out_channels: int = 80
20 |     encoder_in_features: int = 512
21 |     decoder_in_features: int = 512
22 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from TTS.tts.layers.losses import *
2 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/layers/align_tts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/layers/align_tts/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/layers/align_tts/duration_predictor.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from TTS.tts.layers.generic.pos_encoding import PositionalEncoding
 4 | from TTS.tts.layers.generic.transformer import FFTransformerBlock
 5 | 
 6 | 
 7 | class DurationPredictor(nn.Module):
 8 |     def __init__(self, num_chars, hidden_channels, hidden_channels_ffn, num_heads):
 9 |         super().__init__()
10 |         self.embed = nn.Embedding(num_chars, hidden_channels)
11 |         self.pos_enc = PositionalEncoding(hidden_channels, dropout_p=0.1)
12 |         self.FFT = FFTransformerBlock(hidden_channels, num_heads, hidden_channels_ffn, 2, 0.1)
13 |         self.out_layer = nn.Conv1d(hidden_channels, 1, 1)
14 | 
15 |     def forward(self, text, text_lengths):
16 |         # B, L -> B, L
17 |         emb = self.embed(text)
18 |         emb = self.pos_enc(emb.transpose(1, 2))
19 |         x = self.FFT(emb, text_lengths)
20 |         x = self.out_layer(x).squeeze(-1)
21 |         return x
22 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/layers/align_tts/mdn.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | 
 4 | class MDNBlock(nn.Module):
 5 |     """Mixture of Density Network implementation
 6 |     https://arxiv.org/pdf/2003.01950.pdf
 7 |     """
 8 | 
 9 |     def __init__(self, in_channels, out_channels):
10 |         super().__init__()
11 |         self.out_channels = out_channels
12 |         self.conv1 = nn.Conv1d(in_channels, in_channels, 1)
13 |         self.norm = nn.LayerNorm(in_channels)
14 |         self.relu = nn.ReLU()
15 |         self.dropout = nn.Dropout(0.1)
16 |         self.conv2 = nn.Conv1d(in_channels, out_channels, 1)
17 | 
18 |     def forward(self, x):
19 |         o = self.conv1(x)
20 |         o = o.transpose(1, 2)
21 |         o = self.norm(o)
22 |         o = o.transpose(1, 2)
23 |         o = self.relu(o)
24 |         o = self.dropout(o)
25 |         mu_sigma = self.conv2(o)
26 |         # TODO: check this sigmoid
27 |         # mu = torch.sigmoid(mu_sigma[:, :self.out_channels//2, :])
28 |         mu = mu_sigma[:, : self.out_channels // 2, :]
29 |         log_sigma = mu_sigma[:, self.out_channels // 2 :, :]
30 |         return mu, log_sigma
31 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/layers/bark/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/layers/bark/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/layers/bark/hubert/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/layers/bark/hubert/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/layers/bark/hubert/hubert_manager.py:
--------------------------------------------------------------------------------
 1 | # From https://github.com/gitmylo/bark-voice-cloning-HuBERT-quantizer
 2 | 
 3 | import os.path
 4 | import shutil
 5 | import urllib.request
 6 | 
 7 | import huggingface_hub
 8 | 
 9 | 
10 | class HubertManager:
11 |     @staticmethod
12 |     def make_sure_hubert_installed(
13 |         download_url: str = "https://dl.fbaipublicfiles.com/hubert/hubert_base_ls960.pt", model_path: str = ""
14 |     ):
15 |         if not os.path.isfile(model_path):
16 |             print("Downloading HuBERT base model")
17 |             urllib.request.urlretrieve(download_url, model_path)
18 |             print("Downloaded HuBERT")
19 |             return model_path
20 |         return None
21 | 
22 |     @staticmethod
23 |     def make_sure_tokenizer_installed(
24 |         model: str = "quantifier_hubert_base_ls960_14.pth",
25 |         repo: str = "GitMylo/bark-voice-cloning",
26 |         model_path: str = "",
27 |     ):
28 |         model_dir = os.path.dirname(model_path)
29 |         if not os.path.isfile(model_path):
30 |             print("Downloading HuBERT custom tokenizer")
31 |             huggingface_hub.hf_hub_download(repo, model, local_dir=model_dir, local_dir_use_symlinks=False)
32 |             shutil.move(os.path.join(model_dir, model), model_path)
33 |             print("Downloaded tokenizer")
34 |             return model_path
35 |         return None
36 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/layers/delightful_tts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/layers/delightful_tts/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/layers/feed_forward/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/layers/feed_forward/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/layers/feed_forward/duration_predictor.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from TTS.tts.layers.generic.res_conv_bn import Conv1dBN
 4 | 
 5 | 
 6 | class DurationPredictor(nn.Module):
 7 |     """Speedy Speech duration predictor model.
 8 |     Predicts phoneme durations from encoder outputs.
 9 | 
10 |     Note:
11 |         Outputs interpreted as log(durations)
12 |         To get actual durations, do exp transformation
13 | 
14 |     conv_BN_4x1 -> conv_BN_3x1 -> conv_BN_1x1 -> conv_1x1
15 | 
16 |     Args:
17 |         hidden_channels (int): number of channels in the inner layers.
18 |     """
19 | 
20 |     def __init__(self, hidden_channels):
21 |         super().__init__()
22 | 
23 |         self.layers = nn.ModuleList(
24 |             [
25 |                 Conv1dBN(hidden_channels, hidden_channels, 4, 1),
26 |                 Conv1dBN(hidden_channels, hidden_channels, 3, 1),
27 |                 Conv1dBN(hidden_channels, hidden_channels, 1, 1),
28 |                 nn.Conv1d(hidden_channels, 1, 1),
29 |             ]
30 |         )
31 | 
32 |     def forward(self, x, x_mask):
33 |         """
34 |         Shapes:
35 |             x: [B, C, T]
36 |             x_mask: [B, 1, T]
37 |         """
38 |         o = x
39 |         for layer in self.layers:
40 |             o = layer(o) * x_mask
41 |         return o
42 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/layers/generic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/layers/generic/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/layers/generic/gated_conv.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from .normalization import LayerNorm
 4 | 
 5 | 
 6 | class GatedConvBlock(nn.Module):
 7 |     """Gated convolutional block as in https://arxiv.org/pdf/1612.08083.pdf
 8 |     Args:
 9 |         in_out_channels (int): number of input/output channels.
10 |         kernel_size (int): convolution kernel size.
11 |         dropout_p (float): dropout rate.
12 |     """
13 | 
14 |     def __init__(self, in_out_channels, kernel_size, dropout_p, num_layers):
15 |         super().__init__()
16 |         # class arguments
17 |         self.dropout_p = dropout_p
18 |         self.num_layers = num_layers
19 |         # define layers
20 |         self.conv_layers = nn.ModuleList()
21 |         self.norm_layers = nn.ModuleList()
22 |         self.layers = nn.ModuleList()
23 |         for _ in range(num_layers):
24 |             self.conv_layers += [nn.Conv1d(in_out_channels, 2 * in_out_channels, kernel_size, padding=kernel_size // 2)]
25 |             self.norm_layers += [LayerNorm(2 * in_out_channels)]
26 | 
27 |     def forward(self, x, x_mask):
28 |         o = x
29 |         res = x
30 |         for idx in range(self.num_layers):
31 |             o = nn.functional.dropout(o, p=self.dropout_p, training=self.training)
32 |             o = self.conv_layers[idx](o * x_mask)
33 |             o = self.norm_layers[idx](o)
34 |             o = nn.functional.glu(o, dim=1)
35 |             o = res + o
36 |             res = o
37 |         return o
38 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/layers/glow_tts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/layers/glow_tts/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/layers/overflow/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/layers/overflow/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/layers/tacotron/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/layers/tacotron/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/layers/tortoise/tokenizer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch
 4 | from tokenizers import Tokenizer
 5 | 
 6 | from TTS.tts.utils.text.cleaners import english_cleaners
 7 | 
 8 | DEFAULT_VOCAB_FILE = os.path.join(
 9 |     os.path.dirname(os.path.realpath(__file__)), "../../utils/assets/tortoise/tokenizer.json"
10 | )
11 | 
12 | 
13 | class VoiceBpeTokenizer:
14 |     def __init__(self, vocab_file=DEFAULT_VOCAB_FILE, vocab_str=None):
15 |         self.tokenizer = None
16 |         if vocab_file is not None:
17 |             self.tokenizer = Tokenizer.from_file(vocab_file)
18 |         if vocab_str is not None:
19 |             self.tokenizer = Tokenizer.from_str(vocab_str)
20 | 
21 |     def preprocess_text(self, txt):
22 |         txt = english_cleaners(txt)
23 |         return txt
24 | 
25 |     def encode(self, txt):
26 |         txt = self.preprocess_text(txt)
27 |         txt = txt.replace(" ", "[SPACE]")
28 |         return self.tokenizer.encode(txt).ids
29 | 
30 |     def decode(self, seq):
31 |         if isinstance(seq, torch.Tensor):
32 |             seq = seq.cpu().numpy()
33 |         txt = self.tokenizer.decode(seq, skip_special_tokens=False).replace(" ", "")
34 |         txt = txt.replace("[SPACE]", " ")
35 |         txt = txt.replace("[STOP]", "")
36 |         txt = txt.replace("[UNK]", "")
37 |         return txt
38 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/layers/xtts/xtts_manager.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | class SpeakerManager():
 4 |     def __init__(self, speaker_file_path=None):
 5 |         self.speakers = torch.load(speaker_file_path)
 6 | 
 7 |     @property
 8 |     def name_to_id(self):
 9 |         return self.speakers.keys()
10 |     
11 |     @property
12 |     def num_speakers(self):
13 |         return len(self.name_to_id)
14 |     
15 |     @property
16 |     def speaker_names(self):
17 |         return list(self.name_to_id.keys())
18 |     
19 | 
20 | class LanguageManager():
21 |     def __init__(self, config):
22 |         self.langs = config["languages"]
23 | 
24 |     @property
25 |     def name_to_id(self):
26 |         return self.langs
27 |     
28 |     @property
29 |     def num_languages(self):
30 |         return len(self.name_to_id)
31 |     
32 |     @property
33 |     def language_names(self):
34 |         return list(self.name_to_id)
35 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Union
 2 | 
 3 | from TTS.utils.generic_utils import find_module
 4 | 
 5 | 
 6 | def setup_model(config: "Coqpit", samples: Union[List[List], List[Dict]] = None) -> "BaseTTS":
 7 |     print(" > Using model: {}".format(config.model))
 8 |     # fetch the right model implementation.
 9 |     if "base_model" in config and config["base_model"] is not None:
10 |         MyModel = find_module("TTS.tts.models", config.base_model.lower())
11 |     else:
12 |         MyModel = find_module("TTS.tts.models", config.model.lower())
13 |     model = MyModel.init_from_config(config=config, samples=samples)
14 |     return model
15 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/utils/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/measures.py:
--------------------------------------------------------------------------------
 1 | def alignment_diagonal_score(alignments, binary=False):
 2 |     """
 3 |     Compute how diagonal alignment predictions are. It is useful
 4 |     to measure the alignment consistency of a model
 5 |     Args:
 6 |         alignments (torch.Tensor): batch of alignments.
 7 |         binary (bool): if True, ignore scores and consider attention
 8 |         as a binary mask.
 9 |     Shape:
10 |         - alignments : :math:`[B, T_de, T_en]`
11 |     """
12 |     maxs = alignments.max(dim=1)[0]
13 |     if binary:
14 |         maxs[maxs > 0] = 1
15 |     return maxs.mean(dim=1).mean(dim=0).item()
16 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/monotonic_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/utils/monotonic_align/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/monotonic_align/core.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | cimport cython
 4 | cimport numpy as np
 5 | 
 6 | from cython.parallel import prange
 7 | 
 8 | 
 9 | @cython.boundscheck(False)
10 | @cython.wraparound(False)
11 | cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_x, int t_y, float max_neg_val) nogil:
12 |   cdef int x
13 |   cdef int y
14 |   cdef float v_prev
15 |   cdef float v_cur
16 |   cdef float tmp
17 |   cdef int index = t_x - 1
18 | 
19 |   for y in range(t_y):
20 |     for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
21 |       if x == y:
22 |         v_cur = max_neg_val
23 |       else:
24 |         v_cur = value[x, y-1]
25 |       if x == 0:
26 |         if y == 0:
27 |           v_prev = 0.
28 |         else:
29 |           v_prev = max_neg_val
30 |       else:
31 |         v_prev = value[x-1, y-1]
32 |       value[x, y] = max(v_cur, v_prev) + value[x, y]
33 | 
34 |   for y in range(t_y - 1, -1, -1):
35 |     path[index, y] = 1
36 |     if index != 0 and (index == y or value[index, y-1] < value[index-1, y-1]):
37 |       index = index - 1
38 | 
39 | 
40 | @cython.boundscheck(False)
41 | @cython.wraparound(False)
42 | cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_xs, int[::1] t_ys, float max_neg_val=-1e9) nogil:
43 |   cdef int b = values.shape[0]
44 | 
45 |   cdef int i
46 |   for i in prange(b, nogil=True):
47 |     maximum_path_each(paths[i], values[i], t_xs[i], t_ys[i], max_neg_val)
48 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/monotonic_align/setup.py:
--------------------------------------------------------------------------------
1 | # from distutils.core import setup
2 | # from Cython.Build import cythonize
3 | # import numpy
4 | 
5 | # setup(name='monotonic_align',
6 | #       ext_modules=cythonize("core.pyx"),
7 | #       include_dirs=[numpy.get_include()])
8 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/text/__init__.py:
--------------------------------------------------------------------------------
1 | from TTS.tts.utils.text.tokenizer import TTSTokenizer
2 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/text/bangla/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/utils/text/bangla/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/text/belarusian/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/utils/text/belarusian/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/text/belarusian/phonemizer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | finder = None
 4 | 
 5 | 
 6 | def init():
 7 |     try:
 8 |         import jpype
 9 |         import jpype.imports
10 |     except ModuleNotFoundError:
11 |         raise ModuleNotFoundError(
12 |             "Belarusian phonemizer requires to install module 'jpype1' manually. Try `pip install jpype1`."
13 |         )
14 | 
15 |     try:
16 |         jar_path = os.environ["BEL_FANETYKA_JAR"]
17 |     except KeyError:
18 |         raise KeyError("You need to define 'BEL_FANETYKA_JAR' environment variable as path to the fanetyka.jar file")
19 | 
20 |     jpype.startJVM(classpath=[jar_path])
21 | 
22 |     # import the Java modules
23 |     from org.alex73.korpus.base import GrammarDB2, GrammarFinder
24 | 
25 |     grammar_db = GrammarDB2.initializeFromJar()
26 |     global finder
27 |     finder = GrammarFinder(grammar_db)
28 | 
29 | 
30 | def belarusian_text_to_phonemes(text: str) -> str:
31 |     # Initialize only on first run
32 |     if finder is None:
33 |         init()
34 | 
35 |     from org.alex73.fanetyka.impl import FanetykaText
36 | 
37 |     return str(FanetykaText(finder, text).ipa)
38 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/text/chinese_mandarin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/utils/text/chinese_mandarin/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/text/chinese_mandarin/phonemizer.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import jieba
 4 | import pypinyin
 5 | 
 6 | from .pinyinToPhonemes import PINYIN_DICT
 7 | 
 8 | 
 9 | def _chinese_character_to_pinyin(text: str) -> List[str]:
10 |     pinyins = pypinyin.pinyin(text, style=pypinyin.Style.TONE3, heteronym=False, neutral_tone_with_five=True)
11 |     pinyins_flat_list = [item for sublist in pinyins for item in sublist]
12 |     return pinyins_flat_list
13 | 
14 | 
15 | def _chinese_pinyin_to_phoneme(pinyin: str) -> str:
16 |     segment = pinyin[:-1]
17 |     tone = pinyin[-1]
18 |     phoneme = PINYIN_DICT.get(segment, [""])[0]
19 |     return phoneme + tone
20 | 
21 | 
22 | def chinese_text_to_phonemes(text: str, seperator: str = "|") -> str:
23 |     tokenized_text = jieba.cut(text, HMM=False)
24 |     tokenized_text = " ".join(tokenized_text)
25 |     pinyined_text: List[str] = _chinese_character_to_pinyin(tokenized_text)
26 | 
27 |     results: List[str] = []
28 | 
29 |     for token in pinyined_text:
30 |         if token[-1] in "12345":  # TODO transform to is_pinyin()
31 |             pinyin_phonemes = _chinese_pinyin_to_phoneme(token)
32 | 
33 |             results += list(pinyin_phonemes)
34 |         else:  # is ponctuation or other
35 |             results += list(token)
36 | 
37 |     return seperator.join(results)
38 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/text/english/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/utils/text/english/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/text/english/abbreviations.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | # List of (regular expression, replacement) pairs for abbreviations in english:
 4 | abbreviations_en = [
 5 |     (re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1])
 6 |     for x in [
 7 |         ("mrs", "misess"),
 8 |         ("mr", "mister"),
 9 |         ("dr", "doctor"),
10 |         ("st", "saint"),
11 |         ("co", "company"),
12 |         ("jr", "junior"),
13 |         ("maj", "major"),
14 |         ("gen", "general"),
15 |         ("drs", "doctors"),
16 |         ("rev", "reverend"),
17 |         ("lt", "lieutenant"),
18 |         ("hon", "honorable"),
19 |         ("sgt", "sergeant"),
20 |         ("capt", "captain"),
21 |         ("esq", "esquire"),
22 |         ("ltd", "limited"),
23 |         ("col", "colonel"),
24 |         ("ft", "fort"),
25 |     ]
26 | ]
27 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/text/english/time_norm.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | import inflect
 4 | 
 5 | _inflect = inflect.engine()
 6 | 
 7 | _time_re = re.compile(
 8 |     r"""\b
 9 |                           ((0?[0-9])|(1[0-1])|(1[2-9])|(2[0-3]))  # hours
10 |                           :
11 |                           ([0-5][0-9])                            # minutes
12 |                           \s*(a\\.m\\.|am|pm|p\\.m\\.|a\\.m|p\\.m)? # am/pm
13 |                           \b""",
14 |     re.IGNORECASE | re.X,
15 | )
16 | 
17 | 
18 | def _expand_num(n: int) -> str:
19 |     return _inflect.number_to_words(n)
20 | 
21 | 
22 | def _expand_time_english(match: "re.Match") -> str:
23 |     hour = int(match.group(1))
24 |     past_noon = hour >= 12
25 |     time = []
26 |     if hour > 12:
27 |         hour -= 12
28 |     elif hour == 0:
29 |         hour = 12
30 |         past_noon = True
31 |     time.append(_expand_num(hour))
32 | 
33 |     minute = int(match.group(6))
34 |     if minute > 0:
35 |         if minute < 10:
36 |             time.append("oh")
37 |         time.append(_expand_num(minute))
38 |     am_pm = match.group(7)
39 |     if am_pm is None:
40 |         time.append("p m" if past_noon else "a m")
41 |     else:
42 |         time.extend(list(am_pm.replace(".", "")))
43 |     return " ".join(time)
44 | 
45 | 
46 | def expand_time_english(text: str) -> str:
47 |     return re.sub(_time_re, _expand_time_english, text)
48 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/text/french/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/utils/text/french/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/text/japanese/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/utils/text/japanese/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/text/korean/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/tts/utils/text/korean/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/text/korean/ko_dictionary.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | # Add the word you want to the dictionary.
 3 | etc_dictionary = {"1+1": "원플러스원", "2+1": "투플러스원"}
 4 | 
 5 | 
 6 | english_dictionary = {
 7 |     "KOREA": "코리아",
 8 |     "IDOL": "아이돌",
 9 |     "IT": "아이티",
10 |     "IQ": "아이큐",
11 |     "UP": "업",
12 |     "DOWN": "다운",
13 |     "PC": "피씨",
14 |     "CCTV": "씨씨티비",
15 |     "SNS": "에스엔에스",
16 |     "AI": "에이아이",
17 |     "CEO": "씨이오",
18 |     "A": "에이",
19 |     "B": "비",
20 |     "C": "씨",
21 |     "D": "디",
22 |     "E": "이",
23 |     "F": "에프",
24 |     "G": "지",
25 |     "H": "에이치",
26 |     "I": "아이",
27 |     "J": "제이",
28 |     "K": "케이",
29 |     "L": "엘",
30 |     "M": "엠",
31 |     "N": "엔",
32 |     "O": "오",
33 |     "P": "피",
34 |     "Q": "큐",
35 |     "R": "알",
36 |     "S": "에스",
37 |     "T": "티",
38 |     "U": "유",
39 |     "V": "브이",
40 |     "W": "더블유",
41 |     "X": "엑스",
42 |     "Y": "와이",
43 |     "Z": "제트",
44 | }
45 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/text/korean/korean.py:
--------------------------------------------------------------------------------
 1 | ﻿# coding: utf-8
 2 | # Code based on https://github.com/carpedm20/multi-speaker-tacotron-tensorflow/blob/master/text/korean.py
 3 | import re
 4 | 
 5 | from TTS.tts.utils.text.korean.ko_dictionary import english_dictionary, etc_dictionary
 6 | 
 7 | 
 8 | def normalize(text):
 9 |     text = text.strip()
10 |     text = re.sub("[⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]", "", text)
11 |     text = normalize_with_dictionary(text, etc_dictionary)
12 |     text = normalize_english(text)
13 |     text = text.lower()
14 |     return text
15 | 
16 | 
17 | def normalize_with_dictionary(text, dic):
18 |     if any(key in text for key in dic.keys()):
19 |         pattern = re.compile("|".join(re.escape(key) for key in dic.keys()))
20 |         return pattern.sub(lambda x: dic[x.group()], text)
21 |     return text
22 | 
23 | 
24 | def normalize_english(text):
25 |     def fn(m):
26 |         word = m.group()
27 |         if word in english_dictionary:
28 |             return english_dictionary.get(word)
29 |         return word
30 | 
31 |     text = re.sub("([A-Za-z]+)", fn, text)
32 |     return text
33 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/tts/utils/text/korean/phonemizer.py:
--------------------------------------------------------------------------------
 1 | from jamo import hangul_to_jamo
 2 | 
 3 | from TTS.tts.utils.text.korean.korean import normalize
 4 | 
 5 | g2p = None
 6 | 
 7 | 
 8 | def korean_text_to_phonemes(text, character: str = "hangeul") -> str:
 9 |     """
10 | 
11 |     The input and output values look the same, but they are different in Unicode.
12 | 
13 |     example :
14 | 
15 |         input = '하늘' (Unicode : \ud558\ub298), (하 + 늘)
16 |         output = '하늘' (Unicode :\u1112\u1161\u1102\u1173\u11af), (ᄒ + ᅡ + ᄂ + ᅳ + ᆯ)
17 | 
18 |     """
19 |     global g2p  # pylint: disable=global-statement
20 |     if g2p is None:
21 |         from g2pkk import G2p
22 | 
23 |         g2p = G2p()
24 | 
25 |     if character == "english":
26 |         from anyascii import anyascii
27 | 
28 |         text = normalize(text)
29 |         text = g2p(text)
30 |         text = anyascii(text)
31 |         return text
32 | 
33 |     text = normalize(text)
34 |     text = g2p(text)
35 |     text = list(hangul_to_jamo(text))  # '하늘' --> ['ᄒ', 'ᅡ', 'ᄂ', 'ᅳ', 'ᆯ']
36 |     return "".join(text)
37 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/utils/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/utils/audio/__init__.py:
--------------------------------------------------------------------------------
1 | from TTS.utils.audio.processor import AudioProcessor
2 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/utils/distribute.py:
--------------------------------------------------------------------------------
 1 | # edited from https://github.com/fastai/imagenet-fast/blob/master/imagenet_nv/distributed.py
 2 | import torch
 3 | import torch.distributed as dist
 4 | 
 5 | 
 6 | def reduce_tensor(tensor, num_gpus):
 7 |     rt = tensor.clone()
 8 |     dist.all_reduce(rt, op=dist.reduce_op.SUM)
 9 |     rt /= num_gpus
10 |     return rt
11 | 
12 | 
13 | def init_distributed(rank, num_gpus, group_name, dist_backend, dist_url):
14 |     assert torch.cuda.is_available(), "Distributed mode requires CUDA."
15 | 
16 |     # Set cuda device so everything is done on the right GPU.
17 |     torch.cuda.set_device(rank % torch.cuda.device_count())
18 | 
19 |     # Initialize distributed communication
20 |     dist.init_process_group(dist_backend, init_method=dist_url, world_size=num_gpus, rank=rank, group_name=group_name)
21 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/vc/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/vc/configs/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/vc/models/__init__.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import re
 3 | from typing import Dict, List, Union
 4 | 
 5 | 
 6 | def to_camel(text):
 7 |     text = text.capitalize()
 8 |     return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), text)
 9 | 
10 | 
11 | def setup_model(config: "Coqpit", samples: Union[List[List], List[Dict]] = None) -> "BaseVC":
12 |     print(" > Using model: {}".format(config.model))
13 |     # fetch the right model implementation.
14 |     if "model" in config and config["model"].lower() == "freevc":
15 |         MyModel = importlib.import_module("TTS.vc.models.freevc").FreeVC
16 |         model = MyModel.init_from_config(config, samples)
17 |     return model
18 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/vc/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/vc/modules/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/vc/modules/freevc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/vc/modules/freevc/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/vc/modules/freevc/speaker_encoder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/vc/modules/freevc/speaker_encoder/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/vc/modules/freevc/speaker_encoder/hparams.py:
--------------------------------------------------------------------------------
 1 | ## Mel-filterbank
 2 | mel_window_length = 25  # In milliseconds
 3 | mel_window_step = 10  # In milliseconds
 4 | mel_n_channels = 40
 5 | 
 6 | 
 7 | ## Audio
 8 | sampling_rate = 16000
 9 | # Number of spectrogram frames in a partial utterance
10 | partials_n_frames = 160  # 1600 ms
11 | 
12 | 
13 | ## Voice Activation Detection
14 | # Window size of the VAD. Must be either 10, 20 or 30 milliseconds.
15 | # This sets the granularity of the VAD. Should not need to be changed.
16 | vad_window_length = 30  # In milliseconds
17 | # Number of frames to average together when performing the moving average smoothing.
18 | # The larger this value, the larger the VAD variations must be to not get smoothed out.
19 | vad_moving_average_width = 8
20 | # Maximum number of consecutive silent frames a segment can have.
21 | vad_max_silence_length = 6
22 | 
23 | 
24 | ## Audio volume normalization
25 | audio_norm_target_dBFS = -30
26 | 
27 | 
28 | ## Model parameters
29 | model_hidden_size = 256
30 | model_embedding_size = 256
31 | model_num_layers = 3
32 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/vc/modules/freevc/wavlm/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import urllib.request
 3 | 
 4 | import torch
 5 | 
 6 | from TTS.utils.generic_utils import get_user_data_dir
 7 | from TTS.vc.modules.freevc.wavlm.wavlm import WavLM, WavLMConfig
 8 | 
 9 | model_uri = "https://github.com/coqui-ai/TTS/releases/download/v0.13.0_models/WavLM-Large.pt"
10 | 
11 | 
12 | def get_wavlm(device="cpu"):
13 |     """Download the model and return the model object."""
14 | 
15 |     output_path = get_user_data_dir("tts")
16 | 
17 |     output_path = os.path.join(output_path, "wavlm")
18 |     if not os.path.exists(output_path):
19 |         os.makedirs(output_path)
20 | 
21 |     output_path = os.path.join(output_path, "WavLM-Large.pt")
22 |     if not os.path.exists(output_path):
23 |         print(f" > Downloading WavLM model to {output_path} ...")
24 |         urllib.request.urlretrieve(model_uri, output_path)
25 | 
26 |     checkpoint = torch.load(output_path, map_location=torch.device(device))
27 |     cfg = WavLMConfig(checkpoint["cfg"])
28 |     wavlm = WavLM(cfg).to(device)
29 |     wavlm.load_state_dict(checkpoint["model"])
30 |     wavlm.eval()
31 |     return wavlm
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     wavlm = get_wavlm()
36 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/vocoder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/vocoder/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/vocoder/configs/__init__.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import os
 3 | from inspect import isclass
 4 | 
 5 | # import all files under configs/
 6 | configs_dir = os.path.dirname(__file__)
 7 | for file in os.listdir(configs_dir):
 8 |     path = os.path.join(configs_dir, file)
 9 |     if not file.startswith("_") and not file.startswith(".") and (file.endswith(".py") or os.path.isdir(path)):
10 |         config_name = file[: file.find(".py")] if file.endswith(".py") else file
11 |         module = importlib.import_module("TTS.vocoder.configs." + config_name)
12 |         for attribute_name in dir(module):
13 |             attribute = getattr(module, attribute_name)
14 | 
15 |             if isclass(attribute):
16 |                 # Add the class to this package's variables
17 |                 globals()[attribute_name] = attribute
18 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/vocoder/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/vocoder/layers/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/TTS/vocoder/models/fullband_melgan_generator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from TTS.vocoder.models.melgan_generator import MelganGenerator
 4 | 
 5 | 
 6 | class FullbandMelganGenerator(MelganGenerator):
 7 |     def __init__(
 8 |         self,
 9 |         in_channels=80,
10 |         out_channels=1,
11 |         proj_kernel=7,
12 |         base_channels=512,
13 |         upsample_factors=(2, 8, 2, 2),
14 |         res_kernel=3,
15 |         num_res_blocks=4,
16 |     ):
17 |         super().__init__(
18 |             in_channels=in_channels,
19 |             out_channels=out_channels,
20 |             proj_kernel=proj_kernel,
21 |             base_channels=base_channels,
22 |             upsample_factors=upsample_factors,
23 |             res_kernel=res_kernel,
24 |             num_res_blocks=num_res_blocks,
25 |         )
26 | 
27 |     @torch.no_grad()
28 |     def inference(self, cond_features):
29 |         cond_features = cond_features.to(self.layers[1].weight.device)
30 |         cond_features = torch.nn.functional.pad(
31 |             cond_features, (self.inference_padding, self.inference_padding), "replicate"
32 |         )
33 |         return self.layers(cond_features)
34 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/vocoder/models/multiband_melgan_generator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from TTS.vocoder.layers.pqmf import PQMF
 4 | from TTS.vocoder.models.melgan_generator import MelganGenerator
 5 | 
 6 | 
 7 | class MultibandMelganGenerator(MelganGenerator):
 8 |     def __init__(
 9 |         self,
10 |         in_channels=80,
11 |         out_channels=4,
12 |         proj_kernel=7,
13 |         base_channels=384,
14 |         upsample_factors=(2, 8, 2, 2),
15 |         res_kernel=3,
16 |         num_res_blocks=3,
17 |     ):
18 |         super().__init__(
19 |             in_channels=in_channels,
20 |             out_channels=out_channels,
21 |             proj_kernel=proj_kernel,
22 |             base_channels=base_channels,
23 |             upsample_factors=upsample_factors,
24 |             res_kernel=res_kernel,
25 |             num_res_blocks=num_res_blocks,
26 |         )
27 |         self.pqmf_layer = PQMF(N=4, taps=62, cutoff=0.15, beta=9.0)
28 | 
29 |     def pqmf_analysis(self, x):
30 |         return self.pqmf_layer.analysis(x)
31 | 
32 |     def pqmf_synthesis(self, x):
33 |         return self.pqmf_layer.synthesis(x)
34 | 
35 |     @torch.no_grad()
36 |     def inference(self, cond_features):
37 |         cond_features = cond_features.to(self.layers[1].weight.device)
38 |         cond_features = torch.nn.functional.pad(
39 |             cond_features, (self.inference_padding, self.inference_padding), "replicate"
40 |         )
41 |         return self.pqmf_synthesis(self.layers(cond_features))
42 | 


--------------------------------------------------------------------------------
/submodules/TTS/TTS/vocoder/pqmf_output.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/vocoder/pqmf_output.wav


--------------------------------------------------------------------------------
/submodules/TTS/TTS/vocoder/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/TTS/vocoder/utils/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/dockerfiles/Dockerfile.dev:
--------------------------------------------------------------------------------
 1 | ARG BASE=nvidia/cuda:11.8.0-base-ubuntu22.04
 2 | FROM ${BASE}
 3 | 
 4 | # Install OS dependencies:
 5 | RUN apt-get update && apt-get upgrade -y
 6 | RUN apt-get install -y --no-install-recommends \
 7 |     gcc g++ \
 8 |     make \
 9 |     python3 python3-dev python3-pip python3-venv python3-wheel \
10 |     espeak-ng libsndfile1-dev \
11 |     && rm -rf /var/lib/apt/lists/*
12 | 
13 | # Install Major Python Dependencies:
14 | RUN pip3 install llvmlite --ignore-installed
15 | RUN pip3 install torch torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
16 | RUN rm -rf /root/.cache/pip
17 | 
18 | WORKDIR /root
19 | 
20 | # Copy Dependency Lock Files:
21 | COPY \
22 |     Makefile \
23 |     pyproject.toml \
24 |     setup.py \
25 |     requirements.dev.txt \
26 |     requirements.ja.txt \
27 |     requirements.notebooks.txt \
28 |     requirements.txt \
29 |     /root/
30 | 
31 | # Install Project Dependencies
32 | # Separate stage to limit re-downloading:
33 | RUN pip install \
34 |     -r requirements.txt \
35 |     -r requirements.dev.txt \
36 |     -r requirements.ja.txt \
37 |     -r requirements.notebooks.txt
38 | 
39 | # Copy TTS repository contents:
40 | COPY . /root
41 | 
42 | # Installing the TTS package itself:
43 | RUN make install
44 | 
45 | 


--------------------------------------------------------------------------------
/submodules/TTS/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?= -j auto -WT --keep-going
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/submodules/TTS/docs/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/docs/README.md


--------------------------------------------------------------------------------
/submodules/TTS/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | furo
2 | myst-parser == 2.0.0
3 | sphinx == 7.2.5
4 | sphinx_inline_tabs
5 | sphinx_copybutton
6 | linkify-it-py


--------------------------------------------------------------------------------
/submodules/TTS/docs/source/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/docs/source/_static/logo.png


--------------------------------------------------------------------------------
/submodules/TTS/docs/source/_templates/page.html:
--------------------------------------------------------------------------------
1 | {% extends "!page.html" %}
2 | {% block scripts %}
3 |     {{ super() }}
4 | {% endblock %}
5 | 


--------------------------------------------------------------------------------
/submodules/TTS/docs/source/contributing.md:
--------------------------------------------------------------------------------
1 | ```{include} ../../CONTRIBUTING.md
2 | :relative-images:
3 | ```
4 | 


--------------------------------------------------------------------------------
/submodules/TTS/docs/source/implementing_a_new_language_frontend.md:
--------------------------------------------------------------------------------
 1 | # Implementing a New Language Frontend
 2 | 
 3 | - Language frontends are located under `TTS.tts.utils.text`
 4 | - Each special language has a separate folder.
 5 | - Each folder contains all the utilities for processing the text input.
 6 | - `TTS.tts.utils.text.phonemizers` contains the main phonemizer for a language. This is the class that uses the utilities
 7 | from the previous step and used to convert the text to phonemes or graphemes for the model.
 8 | - After you implement your phonemizer, you need to add it to the `TTS/tts/utils/text/phonemizers/__init__.py` to be able to
 9 | map the language code in the model config - `config.phoneme_language` - to the phonemizer class and initiate the phonemizer automatically.
10 | - You should also add tests to `tests/text_tests` if you want to make a PR.
11 | 
12 | We suggest you to check the available implementations as reference. Good luck!
13 | 


--------------------------------------------------------------------------------
/submodules/TTS/docs/source/index.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ```{include} ../../README.md
 3 | :relative-images:
 4 | ```
 5 | ----
 6 | 
 7 | # Documentation Content
 8 | ```{eval-rst}
 9 | .. toctree::
10 |     :maxdepth: 2
11 |     :caption: Get started
12 | 
13 |     tutorial_for_nervous_beginners
14 |     installation
15 |     faq
16 |     contributing
17 | 
18 | .. toctree::
19 |     :maxdepth: 2
20 |     :caption: Using 🐸TTS
21 | 
22 |     inference
23 |     docker_images
24 |     implementing_a_new_model
25 |     implementing_a_new_language_frontend
26 |     training_a_model
27 |     finetuning
28 |     configuration
29 |     formatting_your_dataset
30 |     what_makes_a_good_dataset
31 |     tts_datasets
32 |     marytts
33 | 
34 | .. toctree::
35 |     :maxdepth: 2
36 |     :caption: Main Classes
37 | 
38 |     main_classes/trainer_api
39 |     main_classes/audio_processor
40 |     main_classes/model_api
41 |     main_classes/dataset
42 |     main_classes/gan
43 |     main_classes/speaker_manager
44 | 
45 | .. toctree::
46 |     :maxdepth: 2
47 |     :caption: `tts` Models
48 | 
49 |     models/glow_tts.md
50 |     models/vits.md
51 |     models/forward_tts.md
52 |     models/tacotron1-2.md
53 |     models/overflow.md
54 |     models/tortoise.md
55 |     models/bark.md
56 |     models/xtts.md
57 | 
58 | .. toctree::
59 |     :maxdepth: 2
60 |     :caption: `vocoder` Models
61 | 
62 | ```
63 | 


--------------------------------------------------------------------------------
/submodules/TTS/docs/source/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | 🐸TTS supports python >=3.7 <3.11.0 and tested on Ubuntu 18.10, 19.10, 20.10.
 4 | 
 5 | ## Using `pip`
 6 | 
 7 | `pip` is recommended if you want to use 🐸TTS only for inference.
 8 | 
 9 | You can install from PyPI as follows:
10 | 
11 | ```bash
12 | pip install TTS  # from PyPI
13 | ```
14 | 
15 | Or install from Github:
16 | 
17 | ```bash
18 | pip install git+https://github.com/coqui-ai/TTS  # from Github
19 | ```
20 | 
21 | ## Installing From Source
22 | 
23 | This is recommended for development and more control over 🐸TTS.
24 | 
25 | ```bash
26 | git clone https://github.com/coqui-ai/TTS/
27 | cd TTS
28 | make system-deps  # only on Linux systems.
29 | make install
30 | ```
31 | 
32 | ## On Windows
33 | If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](https://stackoverflow.com/questions/66726331/


--------------------------------------------------------------------------------
/submodules/TTS/docs/source/main_classes/audio_processor.md:
--------------------------------------------------------------------------------
 1 | # AudioProcessor API
 2 | 
 3 | `TTS.utils.audio.AudioProcessor` is the core class for all the audio processing routines. It provides an API for
 4 | 
 5 | - Feature extraction.
 6 | - Sound normalization.
 7 | - Reading and writing audio files.
 8 | - Sampling audio signals.
 9 | - Normalizing and denormalizing audio signals.
10 | - Griffin-Lim vocoder.
11 | 
12 | The `AudioProcessor` needs to be initialized with `TTS.config.shared_configs.BaseAudioConfig`. Any model config
13 | also must inherit or initiate `BaseAudioConfig`.
14 | 
15 | ## AudioProcessor
16 | ```{eval-rst}
17 | .. autoclass:: TTS.utils.audio.AudioProcessor
18 |     :members:
19 | ```
20 | 
21 | ## BaseAudioConfig
22 | ```{eval-rst}
23 | .. autoclass:: TTS.config.shared_configs.BaseAudioConfig
24 |     :members:
25 | ```


--------------------------------------------------------------------------------
/submodules/TTS/docs/source/main_classes/dataset.md:
--------------------------------------------------------------------------------
 1 | # Datasets
 2 | 
 3 | ## TTS Dataset
 4 | 
 5 | ```{eval-rst}
 6 | .. autoclass:: TTS.tts.datasets.TTSDataset
 7 |     :members:
 8 | ```
 9 | 
10 | ## Vocoder Dataset
11 | 
12 | ```{eval-rst}
13 | .. autoclass:: TTS.vocoder.datasets.gan_dataset.GANDataset
14 |     :members:
15 | ```
16 | 
17 | ```{eval-rst}
18 | .. autoclass:: TTS.vocoder.datasets.wavegrad_dataset.WaveGradDataset
19 |     :members:
20 | ```
21 | 
22 | ```{eval-rst}
23 | .. autoclass:: TTS.vocoder.datasets.wavernn_dataset.WaveRNNDataset
24 |     :members:
25 | ```


--------------------------------------------------------------------------------
/submodules/TTS/docs/source/main_classes/gan.md:
--------------------------------------------------------------------------------
 1 | # GAN API
 2 | 
 3 | The {class}`TTS.vocoder.models.gan.GAN` provides an easy way to implementing new GAN based models. You just need
 4 | to define the model architectures for the generator and the discriminator networks and give them to the `GAN` class
 5 | to do its ✨️.
 6 | 
 7 | 
 8 | ## GAN
 9 | ```{eval-rst}
10 | .. autoclass:: TTS.vocoder.models.gan.GAN
11 |     :members:
12 | ```


--------------------------------------------------------------------------------
/submodules/TTS/docs/source/main_classes/model_api.md:
--------------------------------------------------------------------------------
 1 | # Model API
 2 | Model API provides you a set of functions that easily make your model compatible with the `Trainer`,
 3 | `Synthesizer` and `ModelZoo`.
 4 | 
 5 | ## Base TTS Model
 6 | 
 7 | ```{eval-rst}
 8 | .. autoclass:: TTS.model.BaseTrainerModel
 9 |     :members:
10 | ```
11 | 
12 | ## Base tts Model
13 | 
14 | ```{eval-rst}
15 | .. autoclass:: TTS.tts.models.base_tts.BaseTTS
16 |     :members:
17 | ```
18 | 
19 | ## Base vocoder Model
20 | 
21 | ```{eval-rst}
22 | .. autoclass:: TTS.vocoder.models.base_vocoder.BaseVocoder
23 |     :members:
24 | ```


--------------------------------------------------------------------------------
/submodules/TTS/docs/source/main_classes/speaker_manager.md:
--------------------------------------------------------------------------------
 1 | # Speaker Manager API
 2 | 
 3 | The {class}`TTS.tts.utils.speakers.SpeakerManager` organize speaker related data and information for 🐸TTS models. It is
 4 | especially useful for multi-speaker models.
 5 | 
 6 | 
 7 | ## Speaker Manager
 8 | ```{eval-rst}
 9 | .. automodule:: TTS.tts.utils.speakers
10 |     :members:
11 | ```


--------------------------------------------------------------------------------
/submodules/TTS/docs/source/main_classes/trainer_api.md:
--------------------------------------------------------------------------------
1 | # Trainer API
2 | 
3 | We made the trainer a separate project on https://github.com/coqui-ai/Trainer
4 | 


--------------------------------------------------------------------------------
/submodules/TTS/docs/source/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/submodules/TTS/docs/source/models/glow_tts.md:
--------------------------------------------------------------------------------
 1 | # Glow TTS
 2 | 
 3 | Glow TTS is a normalizing flow model for text-to-speech. It is built on the generic Glow model that is previously
 4 | used in computer vision and vocoder models. It uses "monotonic alignment search" (MAS) to fine the text-to-speech alignment
 5 | and uses the output to train a separate duration predictor network for faster inference run-time.
 6 | 
 7 | ## Important resources & papers
 8 | - GlowTTS: https://arxiv.org/abs/2005.11129
 9 | - Glow (Generative Flow with invertible 1x1 Convolutions): https://arxiv.org/abs/1807.03039
10 | - Normalizing Flows: https://blog.evjang.com/2018/01/nf1.html
11 | 
12 | ## GlowTTS Config
13 | ```{eval-rst}
14 | .. autoclass:: TTS.tts.configs.glow_tts_config.GlowTTSConfig
15 |     :members:
16 | ```
17 | 
18 | ## GlowTTS Model
19 | ```{eval-rst}
20 | .. autoclass:: TTS.tts.models.glow_tts.GlowTTS
21 |     :members:
22 | ```
23 | 


--------------------------------------------------------------------------------
/submodules/TTS/docs/source/tts_datasets.md:
--------------------------------------------------------------------------------
 1 | # TTS Datasets
 2 | 
 3 | Some of the known public datasets that we successfully applied 🐸TTS:
 4 | 
 5 | - [English - LJ Speech](https://keithito.com/LJ-Speech-Dataset/)
 6 | - [English - Nancy](http://www.cstr.ed.ac.uk/projects/blizzard/2011/lessac_blizzard2011/)
 7 | - [English - TWEB](https://www.kaggle.com/bryanpark/the-world-english-bible-speech-dataset)
 8 | - [English - LibriTTS](https://openslr.org/60/)
 9 | - [English - VCTK](https://datashare.ed.ac.uk/handle/10283/2950)
10 | - [Multilingual - M-AI-Labs](http://www.caito.de/2019/01/the-m-ailabs-speech-dataset/)
11 | - [Spanish](https://drive.google.com/file/d/1Sm_zyBo67XHkiFhcRSQ4YaHPYM0slO_e/view?usp=sharing) - thx! @carlfm01
12 | - [German - Thorsten OGVD](https://github.com/thorstenMueller/deep-learning-german-tts)
13 | - [Japanese - Kokoro](https://www.kaggle.com/kaiida/kokoro-speech-dataset-v11-small/version/1)
14 | - [Chinese](https://www.data-baker.com/data/index/source/)
15 | - [Ukrainian - LADA](https://github.com/egorsmkv/ukrainian-tts-datasets/tree/main/lada)
16 | 
17 | Let us know if you use 🐸TTS on a different dataset.
18 | 


--------------------------------------------------------------------------------
/submodules/TTS/images/TTS-performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/images/TTS-performance.png


--------------------------------------------------------------------------------
/submodules/TTS/images/coqui-log-green-TTS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/images/coqui-log-green-TTS.png


--------------------------------------------------------------------------------
/submodules/TTS/images/demo_server.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/images/demo_server.gif


--------------------------------------------------------------------------------
/submodules/TTS/images/example_model_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/images/example_model_output.png


--------------------------------------------------------------------------------
/submodules/TTS/images/model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/images/model.png


--------------------------------------------------------------------------------
/submodules/TTS/images/tts_cli.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/images/tts_cli.gif


--------------------------------------------------------------------------------
/submodules/TTS/images/tts_performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/images/tts_performance.png


--------------------------------------------------------------------------------
/submodules/TTS/notebooks/dataset_analysis/README.md:
--------------------------------------------------------------------------------
1 | ## Simple Notebook to Analyze a Dataset
2 | 
3 | By the use of this notebook, you can easily analyze a brand new dataset, find exceptional cases and define your training set.
4 | 
5 | What we are looking in here is reasonable distribution of instances in terms of sequence-length, audio-length and word-coverage. 
6 | 
7 | This notebook is inspired from https://github.com/MycroftAI/mimic2
8 | 


--------------------------------------------------------------------------------
/submodules/TTS/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = [
 3 |     "setuptools",
 4 |     "wheel",
 5 |     "cython~=0.29.30",
 6 |     "numpy>=1.22.0",
 7 |     "packaging",
 8 | ]
 9 | 
10 | [flake8]
11 | max-line-length=120
12 | 
13 | [tool.black]
14 | line-length = 120
15 | target-version = ['py39']
16 | 
17 | [tool.isort]
18 | line_length = 120
19 | profile = "black"
20 | multi_line_output = 3
21 | 


--------------------------------------------------------------------------------
/submodules/TTS/recipes/README.md:
--------------------------------------------------------------------------------
 1 | # 🐸💬 TTS Training Recipes
 2 | 
 3 | TTS recipes intended to host scripts running all the necessary steps to train a TTS model on a particular dataset.
 4 | 
 5 | For each dataset, you need to download the dataset once. Then you run the training for the model you want.
 6 | 
 7 | Run each script from the root TTS folder as follows.
 8 | 
 9 | ```console
10 | $ sh ./recipes/<dataset>/download_<dataset>.sh
11 | $ python recipes/<dataset>/<model_name>/train.py
12 | ```
13 | 
14 | For some datasets you might need to resample the audio files. For example, VCTK dataset can be resampled to 22050Hz as follows.
15 | 
16 | ```console
17 | python TTS/bin/resample.py --input_dir recipes/vctk/VCTK/wav48_silence_trimmed --output_sr 22050 --output_dir recipes/vctk/VCTK/wav48_silence_trimmed --n_jobs 8 --file_ext flac
18 | ```
19 | 
20 | If you train a new model using TTS, feel free to share your training to expand the list of recipes.
21 | 
22 | You can also open a new discussion and share your progress with the 🐸 community.


--------------------------------------------------------------------------------
/submodules/TTS/recipes/bel-alex73/.gitignore:
--------------------------------------------------------------------------------
1 | /docker-prepare/*.txt
2 | 


--------------------------------------------------------------------------------
/submodules/TTS/recipes/bel-alex73/docker-prepare-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | cd $( dirname -- "$0"; )
 5 | 
 6 | cp ../../requirements*.txt docker-prepare/
 7 | 
 8 | docker build -t tts-learn -f docker-prepare/Dockerfile docker-prepare/
 9 | 
10 | mkdir -p ../../../storage
11 | docker run --rm -it \
12 |     -p 2525:2525 \
13 |     --shm-size=256M \
14 |     --name tts-learn-run \
15 |     -v $(pwd)/../../:/a/TTS \
16 |     -v $(pwd)/../../../cv-corpus:/a/cv-corpus \
17 |     -v $(pwd)/../../../fanetyka/:/a/fanetyka/ \
18 |     -v $(pwd)/../../../storage:/storage \
19 |     tts-learn
20 | 


--------------------------------------------------------------------------------
/submodules/TTS/recipes/bel-alex73/docker-prepare/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | RUN apt -y update
 4 | RUN apt -y upgrade
 5 | RUN apt -y install --no-install-recommends pip ffmpeg openjdk-19-jre-headless
 6 | 
 7 | RUN mkdir /a/
 8 | ADD requirements*.txt /a/
 9 | WORKDIR /a/
10 | RUN pip install -r requirements.txt -r requirements.dev.txt -r requirements.notebooks.txt
11 | RUN pip install seaborn pydub notebook
12 | 
13 | RUN apt -y install --no-install-recommends gcc libpython3.10-dev
14 | 
15 | ADD runtime.sh /a/
16 | 
17 | WORKDIR /a/TTS/
18 | CMD /a/runtime.sh
19 | 


--------------------------------------------------------------------------------
/submodules/TTS/recipes/bel-alex73/docker-prepare/runtime.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | cd /a/TTS
4 | pip install -e .[all,dev,notebooks]
5 | 
6 | LANG=C.utf8 bash
7 | 


--------------------------------------------------------------------------------
/submodules/TTS/recipes/bel-alex73/dump_config.py:
--------------------------------------------------------------------------------
1 | import json
2 | import re
3 | 
4 | from train_glowtts import config
5 | 
6 | s = json.dumps(config, default=vars, indent=2)
7 | s = re.sub(r'"test_sentences":\s*\[\],', "", s)
8 | print(s)
9 | 


--------------------------------------------------------------------------------
/submodules/TTS/recipes/blizzard2013/README.md:
--------------------------------------------------------------------------------
 1 | # How to get the Blizzard 2013 Dataset
 2 | 
 3 | The Capacitron model is a variational encoder extension of standard Tacotron based models to model prosody.
 4 | 
 5 | To take full advantage of the model, it is advised to train the model with a dataset that contains a significant amount of prosodic information in the utterances. A tested candidate for such applications is the blizzard2013 dataset from the Blizzard Challenge, containing many hours of high quality audio book recordings.
 6 | 
 7 | To get a license and download link for this dataset, you need to visit the [website](https://www.cstr.ed.ac.uk/projects/blizzard/2013/lessac_blizzard2013/license.html) of the Centre for Speech Technology Research of the University of Edinburgh.
 8 | 
 9 | You get access to the raw dataset in a couple of days. There are a few preprocessing steps you need to do to be able to use the high fidelity dataset.
10 | 
11 | 1. Get the forced time alignments for the blizzard dataset from [here](https://github.com/mueller91/tts_alignments).
12 | 2. Segment the high fidelity audio-book files based on the instructions [here](https://github.com/Tomiinek/Blizzard2013_Segmentation).


--------------------------------------------------------------------------------
/submodules/TTS/recipes/kokoro/tacotron2-DDC/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # take the scripts's parent's directory to prefix all the output paths.
 3 | RUN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
 4 | CORPUS=kokoro-speech-v1_1-small
 5 | echo $RUN_DIR
 6 | if [ \! -d $RUN_DIR/$CORPUS ] ; then
 7 |     echo "$RUN_DIR/$CORPUS doesn't exist."
 8 |     echo "Follow the instruction of https://github.com/kaiidams/Kokoro-Speech-Dataset to make the corpus."
 9 |     exit 1
10 | fi
11 | # create train-val splits
12 | shuf $RUN_DIR/$CORPUS/metadata.csv > $RUN_DIR/$CORPUS/metadata_shuf.csv
13 | head -n 8000 $RUN_DIR/$CORPUS/metadata_shuf.csv > $RUN_DIR/$CORPUS/metadata_train.csv
14 | tail -n 812 $RUN_DIR/$CORPUS/metadata_shuf.csv > $RUN_DIR/$CORPUS/metadata_val.csv
15 | # compute dataset mean and variance for normalization
16 | python TTS/bin/compute_statistics.py $RUN_DIR/tacotron2-DDC.json $RUN_DIR/scale_stats.npy --data_path $RUN_DIR/$CORPUS/wavs/
17 | # training ....
18 | # change the GPU id if needed
19 | CUDA_VISIBLE_DEVICES="0" python TTS/bin/train_tts.py --config_path $RUN_DIR/tacotron2-DDC.json \
20 |                                                      --coqpit.output_path $RUN_DIR \
21 |                                                      --coqpit.datasets.0.path $RUN_DIR/$CORPUS \
22 |                                                      --coqpit.audio.stats_path $RUN_DIR/scale_stats.npy \
23 |                                                      --coqpit.phoneme_cache_path $RUN_DIR/phoneme_cache \


--------------------------------------------------------------------------------
/submodules/TTS/recipes/ljspeech/README.md:
--------------------------------------------------------------------------------
 1 | # 🐸💬 TTS LJspeech Recipes
 2 | 
 3 | For running the recipes
 4 | 
 5 | 1. Download the LJSpeech dataset here either manually from [its official website](https://keithito.com/LJ-Speech-Dataset/) or using ```download_ljspeech.sh```.
 6 | 2. Go to your desired model folder and run the training.
 7 | 
 8 |     Running Python files. (Choose the desired GPU ID for your run and set ```CUDA_VISIBLE_DEVICES```)
 9 |     ```terminal
10 |     CUDA_VISIBLE_DEVICES="0" python train_modelX.py
11 |     ```
12 | 
13 |     Running bash scripts.
14 |     ```terminal
15 |     bash run.sh
16 |     ```
17 | 
18 | 💡 Note that these runs are just templates to help you start training your first model. They are not optimized for the best
19 | result. Double-check the configurations and feel free to share your experiments to find better parameters together 💪.
20 | 


--------------------------------------------------------------------------------
/submodules/TTS/recipes/ljspeech/download_ljspeech.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # take the scripts's parent's directory to prefix all the output paths.
 3 | RUN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
 4 | echo $RUN_DIR
 5 | # download LJSpeech dataset
 6 | wget http://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2
 7 | # extract
 8 | tar -xjf LJSpeech-1.1.tar.bz2
 9 | # create train-val splits
10 | shuf LJSpeech-1.1/metadata.csv > LJSpeech-1.1/metadata_shuf.csv
11 | head -n 12000 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_train.csv
12 | tail -n 1100 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_val.csv
13 | mv LJSpeech-1.1 $RUN_DIR/recipes/ljspeech/
14 | rm LJSpeech-1.1.tar.bz2


--------------------------------------------------------------------------------
/submodules/TTS/recipes/ljspeech/hifigan/train_hifigan.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from trainer import Trainer, TrainerArgs
 4 | 
 5 | from TTS.utils.audio import AudioProcessor
 6 | from TTS.vocoder.configs import HifiganConfig
 7 | from TTS.vocoder.datasets.preprocess import load_wav_data
 8 | from TTS.vocoder.models.gan import GAN
 9 | 
10 | output_path = os.path.dirname(os.path.abspath(__file__))
11 | 
12 | config = HifiganConfig(
13 |     batch_size=32,
14 |     eval_batch_size=16,
15 |     num_loader_workers=4,
16 |     num_eval_loader_workers=4,
17 |     run_eval=True,
18 |     test_delay_epochs=5,
19 |     epochs=1000,
20 |     seq_len=8192,
21 |     pad_short=2000,
22 |     use_noise_augment=True,
23 |     eval_split_size=10,
24 |     print_step=25,
25 |     print_eval=False,
26 |     mixed_precision=False,
27 |     lr_gen=1e-4,
28 |     lr_disc=1e-4,
29 |     data_path=os.path.join(output_path, "../LJSpeech-1.1/wavs/"),
30 |     output_path=output_path,
31 | )
32 | 
33 | # init audio processor
34 | ap = AudioProcessor(**config.audio.to_dict())
35 | 
36 | # load training samples
37 | eval_samples, train_samples = load_wav_data(config.data_path, config.eval_split_size)
38 | 
39 | # init model
40 | model = GAN(config, ap)
41 | 
42 | # init the trainer and 🚀
43 | trainer = Trainer(
44 |     TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples
45 | )
46 | trainer.fit()
47 | 


--------------------------------------------------------------------------------
/submodules/TTS/recipes/ljspeech/multiband_melgan/train_multiband_melgan.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from trainer import Trainer, TrainerArgs
 4 | 
 5 | from TTS.utils.audio import AudioProcessor
 6 | from TTS.vocoder.configs import MultibandMelganConfig
 7 | from TTS.vocoder.datasets.preprocess import load_wav_data
 8 | from TTS.vocoder.models.gan import GAN
 9 | 
10 | output_path = os.path.dirname(os.path.abspath(__file__))
11 | 
12 | config = MultibandMelganConfig(
13 |     batch_size=32,
14 |     eval_batch_size=16,
15 |     num_loader_workers=4,
16 |     num_eval_loader_workers=4,
17 |     run_eval=True,
18 |     test_delay_epochs=5,
19 |     epochs=1000,
20 |     seq_len=8192,
21 |     pad_short=2000,
22 |     use_noise_augment=True,
23 |     eval_split_size=10,
24 |     print_step=25,
25 |     print_eval=False,
26 |     mixed_precision=False,
27 |     lr_gen=1e-4,
28 |     lr_disc=1e-4,
29 |     data_path=os.path.join(output_path, "../LJSpeech-1.1/wavs/"),
30 |     output_path=output_path,
31 | )
32 | 
33 | # init audio processor
34 | ap = AudioProcessor(**config.audio.to_dict())
35 | 
36 | # load training samples
37 | eval_samples, train_samples = load_wav_data(config.data_path, config.eval_split_size)
38 | 
39 | # init model
40 | model = GAN(config, ap)
41 | 
42 | # init the trainer and 🚀
43 | trainer = Trainer(
44 |     TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples
45 | )
46 | trainer.fit()
47 | 


--------------------------------------------------------------------------------
/submodules/TTS/recipes/ljspeech/overflow/lj_parameters.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/recipes/ljspeech/overflow/lj_parameters.pt


--------------------------------------------------------------------------------
/submodules/TTS/recipes/ljspeech/univnet/train.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from trainer import Trainer, TrainerArgs
 4 | 
 5 | from TTS.utils.audio import AudioProcessor
 6 | from TTS.vocoder.configs import UnivnetConfig
 7 | from TTS.vocoder.datasets.preprocess import load_wav_data
 8 | from TTS.vocoder.models.gan import GAN
 9 | 
10 | output_path = os.path.dirname(os.path.abspath(__file__))
11 | config = UnivnetConfig(
12 |     batch_size=64,
13 |     eval_batch_size=16,
14 |     num_loader_workers=4,
15 |     num_eval_loader_workers=4,
16 |     run_eval=True,
17 |     test_delay_epochs=-1,
18 |     epochs=1000,
19 |     seq_len=8192,
20 |     pad_short=2000,
21 |     use_noise_augment=True,
22 |     eval_split_size=10,
23 |     print_step=25,
24 |     print_eval=False,
25 |     mixed_precision=False,
26 |     lr_gen=1e-4,
27 |     lr_disc=1e-4,
28 |     data_path=os.path.join(output_path, "../LJSpeech-1.1/wavs/"),
29 |     output_path=output_path,
30 | )
31 | 
32 | # init audio processor
33 | ap = AudioProcessor(**config.audio.to_dict())
34 | 
35 | # load training samples
36 | eval_samples, train_samples = load_wav_data(config.data_path, config.eval_split_size)
37 | 
38 | # init model
39 | model = GAN(config, ap)
40 | 
41 | # init the trainer and 🚀
42 | trainer = Trainer(
43 |     TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples
44 | )
45 | trainer.fit()
46 | 


--------------------------------------------------------------------------------
/submodules/TTS/recipes/ljspeech/wavegrad/train_wavegrad.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from trainer import Trainer, TrainerArgs
 4 | 
 5 | from TTS.utils.audio import AudioProcessor
 6 | from TTS.vocoder.configs import WavegradConfig
 7 | from TTS.vocoder.datasets.preprocess import load_wav_data
 8 | from TTS.vocoder.models.wavegrad import Wavegrad
 9 | 
10 | output_path = os.path.dirname(os.path.abspath(__file__))
11 | config = WavegradConfig(
12 |     batch_size=32,
13 |     eval_batch_size=16,
14 |     num_loader_workers=4,
15 |     num_eval_loader_workers=4,
16 |     run_eval=True,
17 |     test_delay_epochs=-1,
18 |     epochs=1000,
19 |     seq_len=6144,
20 |     pad_short=2000,
21 |     use_noise_augment=True,
22 |     eval_split_size=50,
23 |     print_step=50,
24 |     print_eval=True,
25 |     mixed_precision=False,
26 |     data_path=os.path.join(output_path, "../LJSpeech-1.1/wavs/"),
27 |     output_path=output_path,
28 | )
29 | 
30 | # init audio processor
31 | ap = AudioProcessor(**config.audio.to_dict())
32 | 
33 | # load training samples
34 | eval_samples, train_samples = load_wav_data(config.data_path, config.eval_split_size)
35 | 
36 | # init model
37 | model = Wavegrad(config)
38 | 
39 | # init the trainer and 🚀
40 | trainer = Trainer(
41 |     TrainerArgs(),
42 |     config,
43 |     output_path,
44 |     model=model,
45 |     train_samples=train_samples,
46 |     eval_samples=eval_samples,
47 |     training_assets={"audio_processor": ap},
48 | )
49 | trainer.fit()
50 | 


--------------------------------------------------------------------------------
/submodules/TTS/recipes/ljspeech/wavernn/train_wavernn.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from trainer import Trainer, TrainerArgs
 4 | 
 5 | from TTS.utils.audio import AudioProcessor
 6 | from TTS.vocoder.configs import WavernnConfig
 7 | from TTS.vocoder.datasets.preprocess import load_wav_data
 8 | from TTS.vocoder.models.wavernn import Wavernn
 9 | 
10 | output_path = os.path.dirname(os.path.abspath(__file__))
11 | config = WavernnConfig(
12 |     batch_size=64,
13 |     eval_batch_size=16,
14 |     num_loader_workers=4,
15 |     num_eval_loader_workers=4,
16 |     run_eval=True,
17 |     test_delay_epochs=-1,
18 |     epochs=10000,
19 |     seq_len=1280,
20 |     pad_short=2000,
21 |     use_noise_augment=False,
22 |     eval_split_size=10,
23 |     print_step=25,
24 |     print_eval=True,
25 |     mixed_precision=False,
26 |     lr=1e-4,
27 |     grad_clip=4,
28 |     data_path=os.path.join(output_path, "../LJSpeech-1.1/wavs/"),
29 |     output_path=output_path,
30 | )
31 | 
32 | # init audio processor
33 | ap = AudioProcessor(**config.audio.to_dict())
34 | 
35 | # load training samples
36 | eval_samples, train_samples = load_wav_data(config.data_path, config.eval_split_size)
37 | 
38 | # init model
39 | model = Wavernn(config)
40 | 
41 | # init the trainer and 🚀
42 | trainer = Trainer(
43 |     TrainerArgs(),
44 |     config,
45 |     output_path,
46 |     model=model,
47 |     train_samples=train_samples,
48 |     eval_samples=eval_samples,
49 |     training_assets={"audio_processor": ap},
50 | )
51 | trainer.fit()
52 | 


--------------------------------------------------------------------------------
/submodules/TTS/recipes/thorsten_DE/README.md:
--------------------------------------------------------------------------------
 1 | # 🐸💬 TTS Thorsten Recipes
 2 | 
 3 | For running the recipes you need the [Thorsten-Voice](https://github.com/thorstenMueller/Thorsten-Voice) dataset.
 4 | 
 5 | You can download it manually from [the official website](https://www.thorsten-voice.de/) or use ```download_thorsten_de.sh``` alternatively running any of the **train_modelX.py**scripts will download the dataset if not already present.
 6 | 
 7 | Then, go to your desired model folder and run the training.
 8 | 
 9 |     Running Python files. (Choose the desired GPU ID for your run and set ```CUDA_VISIBLE_DEVICES```)
10 |     ```terminal
11 |     CUDA_VISIBLE_DEVICES="0" python train_modelX.py
12 |     ```
13 | 
14 | 💡 Note that these runs are just templates to help you start training your first model. They are not optimized for the best
15 | result. Double-check the configurations and feel free to share your experiments to find better parameters together 💪.
16 | 


--------------------------------------------------------------------------------
/submodules/TTS/recipes/thorsten_DE/download_thorsten_DE.sh:
--------------------------------------------------------------------------------
 1 | # create venv
 2 | python3 -m venv env
 3 | source .env/bin/activate
 4 | pip install pip --upgrade
 5 | 
 6 | # download Thorsten_DE dataset
 7 | pip install gdown
 8 | gdown --id 1yKJM1LAOQpRVojKunD9r8WN_p5KzBxjc -O dataset.tgz
 9 | tar -xzf dataset.tgz
10 | 
11 | # create train-val splits
12 | shuf LJSpeech-1.1/metadata.csv > LJSpeech-1.1/metadata_shuf.csv
13 | head -n 20668 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_train.csv
14 | tail -n 2000 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_val.csv
15 | 
16 | # rename dataset and remove archive
17 | mv LJSpeech-1.1 thorsten-de
18 | rm dataset.tgz
19 | 
20 | # destry venv
21 | rm -rf env
22 | 


--------------------------------------------------------------------------------
/submodules/TTS/recipes/vctk/download_vctk.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # take the scripts's parent's directory to prefix all the output paths.
 3 | RUN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
 4 | echo $RUN_DIR
 5 | # download VCTK dataset
 6 | wget https://datashare.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip -O VCTK-Corpus-0.92.zip
 7 | # extract
 8 | mkdir VCTK
 9 | unzip VCTK-Corpus-0.92 -d VCTK
10 | # create train-val splits
11 | mv VCTK $RUN_DIR/recipes/vctk/
12 | rm VCTK-Corpus-0.92.zip
13 | 


--------------------------------------------------------------------------------
/submodules/TTS/requirements.dev.txt:
--------------------------------------------------------------------------------
1 | black
2 | coverage
3 | isort
4 | nose2
5 | pylint==2.10.2
6 | 


--------------------------------------------------------------------------------
/submodules/TTS/requirements.ja.txt:
--------------------------------------------------------------------------------
1 | # These cause some compatibility issues on some systems and are not strictly necessary
2 | # japanese g2p deps
3 | mecab-python3==1.0.6
4 | unidic-lite==1.0.8
5 | cutlet
6 | 


--------------------------------------------------------------------------------
/submodules/TTS/requirements.notebooks.txt:
--------------------------------------------------------------------------------
1 | bokeh==1.4.0


--------------------------------------------------------------------------------
/submodules/TTS/requirements.txt:
--------------------------------------------------------------------------------
 1 | # core deps
 2 | # numpy==1.22.0;python_version<="3.10"
 3 | # numpy>=1.24.3;python_version>"3.10"
 4 | cython>=0.29.30
 5 | scipy>=1.11.2
 6 | # torch>=2.1
 7 | # torchaudio
 8 | soundfile>=0.12.0
 9 | librosa>=0.10.0
10 | scikit-learn>=1.3.0
11 | numba==0.55.1;python_version<"3.9"
12 | numba>=0.57.0;python_version>="3.9"
13 | inflect>=5.6.0
14 | tqdm>=4.64.1
15 | anyascii>=0.3.0
16 | pyyaml>=6.0
17 | fsspec>=2023.6.0 # <= 2023.9.1 makes aux tests fail
18 | aiohttp>=3.8.1
19 | packaging>=23.1
20 | mutagen==1.47.0
21 | # deps for examples
22 | flask>=2.0.1
23 | # deps for inference
24 | pysbd>=0.3.4
25 | # deps for notebooks
26 | umap-learn>=0.5.1
27 | pandas>=1.4,<2.0
28 | # deps for training
29 | matplotlib>=3.7.0
30 | # coqui stack
31 | trainer>=0.0.36
32 | # config management
33 | coqpit>=0.0.16
34 | # chinese g2p deps
35 | jieba
36 | pypinyin
37 | # korean
38 | hangul_romanize
39 | # gruut+supported langs
40 | gruut[de,es,fr]==2.2.3
41 | # deps for korean
42 | jamo
43 | nltk
44 | g2pkk>=0.1.1
45 | # deps for bangla
46 | bangla
47 | bnnumerizer
48 | bnunicodenormalizer
49 | #deps for tortoise
50 | einops>=0.6.0
51 | transformers>=4.33.0
52 | #deps for bark
53 | encodec>=0.1.1
54 | # deps for XTTS
55 | unidecode>=1.3.2
56 | num2words
57 | spacy[ja]>=3


--------------------------------------------------------------------------------
/submodules/TTS/run_bash_tests.sh:
--------------------------------------------------------------------------------
1 | set -e
2 | TF_CPP_MIN_LOG_LEVEL=3
3 | 
4 | # runtime bash based tests
5 | # TODO: move these to python
6 | ./tests/bash_tests/test_demo_server.sh && \
7 | ./tests/bash_tests/test_compute_statistics.sh
8 | 


--------------------------------------------------------------------------------
/submodules/TTS/scripts/sync_readme.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from pathlib import Path
 3 | 
 4 | 
 5 | def replace_between_markers(content, marker: str, replacement: str) -> str:
 6 |     start_marker = f"<!-- begin-{marker} -->\n\n"
 7 |     end_marker = f"\n\n<!-- end-{marker} -->\n"
 8 |     start_index = content.index(start_marker) + len(start_marker)
 9 |     end_index = content.index(end_marker)
10 |     content = content[:start_index] + replacement + content[end_index:]
11 |     return content
12 | 
13 | 
14 | def sync_readme():
15 |     ap = argparse.ArgumentParser()
16 |     ap.add_argument("--check", action="store_true", default=False)
17 |     args = ap.parse_args()
18 |     readme_path = Path(__file__).parent.parent / "README.md"
19 |     orig_content = readme_path.read_text()
20 |     from TTS.bin.synthesize import description
21 | 
22 |     new_content = replace_between_markers(orig_content, "tts-readme", description.strip())
23 |     if args.check:
24 |         if orig_content != new_content:
25 |             print("README.md is out of sync; please edit TTS/bin/TTS_README.md and run scripts/sync_readme.py")
26 |             exit(42)
27 |     readme_path.write_text(new_content)
28 |     print("Updated README.md")
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     sync_readme()
33 | 


--------------------------------------------------------------------------------
/submodules/TTS/setup.cfg:
--------------------------------------------------------------------------------
1 | [build_py]
2 | build_lib=temp_build
3 | 
4 | [bdist_wheel]
5 | bdist_dir=temp_build
6 | 
7 | [install_lib]
8 | build_dir=temp_build
9 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/aux_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/aux_tests/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/tests/aux_tests/test_readme.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import sys
 3 | from pathlib import Path
 4 | 
 5 | 
 6 | def test_readme_up_to_date():
 7 |     root = Path(__file__).parent.parent.parent
 8 |     sync_readme = root / "scripts" / "sync_readme.py"
 9 |     subprocess.check_call([sys.executable, str(sync_readme), "--check"], cwd=root)
10 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/aux_tests/test_stft_torch.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/aux_tests/test_stft_torch.py


--------------------------------------------------------------------------------
/submodules/TTS/tests/bash_tests/test_compute_statistics.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -xe
3 | BASEDIR=$(dirname "$0")
4 | echo "$BASEDIR"
5 | # run training
6 | CUDA_VISIBLE_DEVICES="" python TTS/bin/compute_statistics.py --config_path $BASEDIR/../inputs/test_glow_tts.json --out_path $BASEDIR/../outputs/scale_stats.npy
7 | 
8 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/bash_tests/test_demo_server.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -xe
 3 | 
 4 | python -m TTS.server.server &
 5 | SERVER_PID=$!
 6 | 
 7 | echo 'Waiting for server...'
 8 | sleep 30
 9 | 
10 | curl -o /tmp/audio.wav "http://localhost:5002/api/tts?text=synthesis%20schmynthesis"
11 | python -c 'import sys; import wave; print(wave.open(sys.argv[1]).getnframes())' /tmp/audio.wav
12 | 
13 | kill $SERVER_PID
14 | 
15 | rm /tmp/audio.wav
16 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/dummy_speakers.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/dummy_speakers.pth


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/f0_cache/pitch_stats.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/f0_cache/pitch_stats.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0001.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0001.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0001.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0001.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0001.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0001.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0001.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0002.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0002.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0002.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0002.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0002.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0002.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0002.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0003.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0003.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0003.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0003.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0003.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0003.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0003.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0004.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0004.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0004.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0004.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0004.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0004.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0004.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0004.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0005.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0005.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0005.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0005.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0005.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0005.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0005.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0005.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0006.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0006.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0006.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0006.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0006.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0006.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0006.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0006.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0007.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0007.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0007.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0007.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0007.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0007.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0007.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0007.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0008.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0008.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0008.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0008.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0008.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0008.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0008.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0008.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0009.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0009.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0009.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0009.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0009.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0009.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0009.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0009.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0010.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0010.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0010.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0010.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0010.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0010.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0010.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0010.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0011.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0011.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0011.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0011.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0011.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0011.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0011.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0011.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0012.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0012.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0012.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0012.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0012.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0012.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0012.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0012.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0013.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0013.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0013.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0013.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0013.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0013.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0013.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0013.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0014.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0014.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0014.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0014.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0014.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0014.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0014.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0014.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0015.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0015.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0015.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0015.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0015.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0015.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0015.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0015.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0016.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0016.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0016.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0016.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0016.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0016.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0016.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0016.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0017.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0017.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0017.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0017.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0017.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0017.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0017.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0017.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0018.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0018.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0018.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0018.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0018.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0018.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0018.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0018.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0019.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0019.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0019.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0019.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0019.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0019.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0019.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0019.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0020.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0020.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0020.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0020.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0020.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0020.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0020.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0020.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0021.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0021.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0021.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0021.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0021.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0021.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0021.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0021.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0022.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0022.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0022.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0022.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0022.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0022.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0022.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0022.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0023.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0023.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0023.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0023.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0023.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0023.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0023.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0023.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0024.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0024.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0024.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0024.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0024.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0024.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0024.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0024.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0025.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0025.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0025.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0025.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0025.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0025.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0025.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0025.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0026.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0026.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0026.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0026.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0026.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0026.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0026.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0026.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0027.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0027.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0027.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0027.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0027.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0027.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0027.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0027.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0028.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0028.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0028.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0028.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0028.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0028.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0028.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0028.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0029.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0029.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0029.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0029.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0029.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0029.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0029.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0029.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0030.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0030.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0030.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0030.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0030.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0030.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0030.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0030.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0031.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0031.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0031.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0031.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0031.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0031.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0031.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0031.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0032.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0032.flac


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0032.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0032.mp3


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0032.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0032.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0032.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data/ljspeech/wavs/LJ001-0032.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/data_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/data_tests/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/tests/data_tests/test_dataset_formatters.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | 
 4 | from tests import get_tests_input_path
 5 | from TTS.tts.datasets.formatters import common_voice
 6 | 
 7 | 
 8 | class TestTTSFormatters(unittest.TestCase):
 9 |     def test_common_voice_preprocessor(self):  # pylint: disable=no-self-use
10 |         root_path = get_tests_input_path()
11 |         meta_file = "common_voice.tsv"
12 |         items = common_voice(root_path, meta_file)
13 |         assert items[0]["text"] == "The applicants are invited for coffee and visa is given immediately."
14 |         assert items[0]["audio_file"] == os.path.join(get_tests_input_path(), "clips", "common_voice_en_20005954.wav")
15 | 
16 |         assert items[-1]["text"] == "Competition for limited resources has also resulted in some local conflicts."
17 |         assert items[-1]["audio_file"] == os.path.join(get_tests_input_path(), "clips", "common_voice_en_19737074.wav")
18 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/inference_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/inference_tests/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/tests/inference_tests/test_synthesize.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from tests import get_tests_output_path, run_cli
 4 | 
 5 | 
 6 | def test_synthesize():
 7 |     """Test synthesize.py with diffent arguments."""
 8 |     output_path = os.path.join(get_tests_output_path(), "output.wav")
 9 |     run_cli("tts --list_models")
10 | 
11 |     # single speaker model
12 |     run_cli(f'tts --text "This is an example." --out_path "{output_path}"')
13 |     run_cli(
14 |         "tts --model_name tts_models/en/ljspeech/glow-tts " f'--text "This is an example." --out_path "{output_path}"'
15 |     )
16 |     run_cli(
17 |         "tts --model_name tts_models/en/ljspeech/glow-tts  "
18 |         "--vocoder_name vocoder_models/en/ljspeech/multiband-melgan "
19 |         f'--text "This is an example." --out_path "{output_path}"'
20 |     )
21 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/inputs/common_voice.tsv:
--------------------------------------------------------------------------------
1 | client_id	path	sentence	up_votes	down_votes	age	gender	accent	locale	segment
2 | 95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b	common_voice_en_20005954.mp3	The applicants are invited for coffee and visa is given immediately.	3	0				en	
3 | 95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b	common_voice_en_20005955.mp3	Developmental robotics is related to, but differs from, evolutionary robotics.	2	0				en	
4 | 95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b	common_voice_en_20005956.mp3	The musical was originally directed and choreographed by Alan Lund.	2	0				en	
5 | 954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6	common_voice_en_19737073.mp3	He graduated from Columbia High School, in Brown County, South Dakota.	2	0				en	
6 | 954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6	common_voice_en_19737074.mp3	Competition for limited resources has also resulted in some local conflicts.	2	0				en	
7 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/inputs/example_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/inputs/example_1.wav


--------------------------------------------------------------------------------
/submodules/TTS/tests/inputs/language_ids.json:
--------------------------------------------------------------------------------
1 | {
2 |     "en": 0,
3 |     "fr-fr": 1,
4 |     "pt-br": 2
5 | }


--------------------------------------------------------------------------------
/submodules/TTS/tests/inputs/scale_stats.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/inputs/scale_stats.npy


--------------------------------------------------------------------------------
/submodules/TTS/tests/inputs/server_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tts_checkpoint":"checkpoint_10.pth",     // tts checkpoint file
 3 |     "tts_config":"dummy_model_config.json",     // tts config.json file
 4 |     "tts_speakers": null,           // json file listing speaker ids. null if no speaker embedding.
 5 |     "wavernn_lib_path": null,   // Rootpath to wavernn project folder to be imported. If this is null, model uses GL for speech synthesis.
 6 |     "wavernn_file": null, // wavernn checkpoint file name
 7 |     "wavernn_config": null, // wavernn config file
 8 |     "vocoder_config":null,
 9 |     "vocoder_checkpoint": null,
10 |     "is_wavernn_batched":true,
11 |     "port": 5002,
12 |     "use_cuda": false,
13 |     "debug": true
14 | }
15 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/text_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/text_tests/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/tests/text_tests/test_belarusian_phonemizer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | import warnings
 4 | 
 5 | from TTS.tts.utils.text.belarusian.phonemizer import belarusian_text_to_phonemes
 6 | 
 7 | _TEST_CASES = """
 8 | Фанетычны канвертар/fanʲɛˈtɨt͡ʂnɨ kanˈvʲɛrtar
 9 | Гэтак мы працавалі/ˈɣɛtak ˈmɨ prat͡saˈvalʲi
10 | """
11 | 
12 | 
13 | class TestText(unittest.TestCase):
14 |     def test_belarusian_text_to_phonemes(self):
15 |         try:
16 |             os.environ["BEL_FANETYKA_JAR"]
17 |         except KeyError:
18 |             warnings.warn(
19 |                 "You need to define 'BEL_FANETYKA_JAR' environment variable as path to the fanetyka.jar file to test Belarusian phonemizer",
20 |                 Warning,
21 |             )
22 |             return
23 | 
24 |         for line in _TEST_CASES.strip().split("\n"):
25 |             text, phonemes = line.split("/")
26 |             self.assertEqual(belarusian_text_to_phonemes(text), phonemes)
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     unittest.main()
31 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/text_tests/test_japanese_phonemizer.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from TTS.tts.utils.text.japanese.phonemizer import japanese_text_to_phonemes
 4 | 
 5 | _TEST_CASES = """
 6 | どちらに行きますか？/dochiraniikimasuka?
 7 | 今日は温泉に、行きます。/kyo:waoNseNni,ikimasu.
 8 | 「A」から「Z」までです。/e:karazeqtomadedesu.
 9 | そうですね！/so:desune!
10 | クジラは哺乳類です。/kujirawahonyu:ruidesu.
11 | ヴィディオを見ます。/bidioomimasu.
12 | 今日は８月22日です/kyo:wahachigatsuniju:ninichidesu
13 | xyzとαβγ/eqkusuwaizeqtotoarufabe:tagaNma
14 | 値段は$12.34です/nedaNwaju:niteNsaNyoNdorudesu
15 | """
16 | 
17 | 
18 | class TestText(unittest.TestCase):
19 |     def test_japanese_text_to_phonemes(self):
20 |         for line in _TEST_CASES.strip().split("\n"):
21 |             text, phone = line.split("/")
22 |             self.assertEqual(japanese_text_to_phonemes(text), phone)
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     unittest.main()
27 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/text_tests/test_korean_phonemizer.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from TTS.tts.utils.text.korean.phonemizer import korean_text_to_phonemes
 4 | 
 5 | _TEST_CASES = """
 6 | 포상은 열심히 한 아이에게만 주어지기 때문에 포상인 것입니다./포상으 녈심히 하 나이에게만 주어지기 때무네 포상인 거심니다.
 7 | 오늘은 8월 31일 입니다./오느른 파뤌 삼시비리 림니다.
 8 | 친구 100명 만들기가 목표입니다./친구 뱅명 만들기가 목표임니다.
 9 | A부터 Z까지 입니다./에이부터 제트까지 임니다.
10 | 이게 제 마음이에요./이게 제 마으미에요.
11 | """
12 | _TEST_CASES_EN = """
13 | 이제야 이쪽을 보는구나./IJeYa IJjoGeul BoNeunGuNa.
14 | 크고 맛있는 cake를 부탁해요./KeuGo MaSinNeun KeIKeuLeul BuTaKaeYo.
15 | 전부 거짓말이야./JeonBu GeoJinMaLiYa.
16 | 좋은 노래를 찾았어요./JoEun NoLaeLeul ChaJaSseoYo.
17 | """
18 | 
19 | 
20 | class TestText(unittest.TestCase):
21 |     def test_korean_text_to_phonemes(self):
22 |         for line in _TEST_CASES.strip().split("\n"):
23 |             text, phone = line.split("/")
24 |             self.assertEqual(korean_text_to_phonemes(text), phone)
25 |         for line in _TEST_CASES_EN.strip().split("\n"):
26 |             text, phone = line.split("/")
27 |             self.assertEqual(korean_text_to_phonemes(text, character="english"), phone)
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     unittest.main()
32 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/text_tests/test_text_cleaners.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from TTS.tts.utils.text.cleaners import english_cleaners, phoneme_cleaners
 4 | 
 5 | 
 6 | def test_time() -> None:
 7 |     assert english_cleaners("It's 11:00") == "it's eleven a m"
 8 |     assert english_cleaners("It's 9:01") == "it's nine oh one a m"
 9 |     assert english_cleaners("It's 16:00") == "it's four p m"
10 |     assert english_cleaners("It's 00:00 am") == "it's twelve a m"
11 | 
12 | 
13 | def test_currency() -> None:
14 |     assert phoneme_cleaners("It's $10.50") == "It's ten dollars fifty cents"
15 |     assert phoneme_cleaners("£1.1") == "one pound sterling one penny"
16 |     assert phoneme_cleaners("¥1") == "one yen"
17 | 
18 | 
19 | def test_expand_numbers() -> None:
20 |     assert phoneme_cleaners("-1") == "minus one"
21 |     assert phoneme_cleaners("1") == "one"
22 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/tts_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/tts_tests/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/tests/tts_tests2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/tts_tests2/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/tests/vc_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/vc_tests/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/tests/vocoder_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/vocoder_tests/__init__.py


--------------------------------------------------------------------------------
/submodules/TTS/tests/vocoder_tests/test_hifigan_train.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import shutil
 4 | 
 5 | from tests import get_device_id, get_tests_output_path, run_cli
 6 | from TTS.vocoder.configs import HifiganConfig
 7 | 
 8 | config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
 9 | output_path = os.path.join(get_tests_output_path(), "train_outputs")
10 | 
11 | 
12 | config = HifiganConfig(
13 |     batch_size=8,
14 |     eval_batch_size=8,
15 |     num_loader_workers=0,
16 |     num_eval_loader_workers=0,
17 |     run_eval=True,
18 |     test_delay_epochs=-1,
19 |     epochs=1,
20 |     seq_len=1024,
21 |     eval_split_size=1,
22 |     print_step=1,
23 |     print_eval=True,
24 |     data_path="tests/data/ljspeech",
25 |     output_path=output_path,
26 | )
27 | config.audio.do_trim_silence = True
28 | config.audio.trim_db = 60
29 | config.save_json(config_path)
30 | 
31 | # train the model for one epoch
32 | command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder.py --config_path {config_path} "
33 | run_cli(command_train)
34 | 
35 | # Find latest folder
36 | continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
37 | 
38 | # restore the model and continue training for one more epoch
39 | command_train = (
40 |     f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder.py --continue_path {continue_path} "
41 | )
42 | run_cli(command_train)
43 | shutil.rmtree(continue_path)
44 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/vocoder_tests/test_melgan_train.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import shutil
 4 | 
 5 | from tests import get_device_id, get_tests_output_path, run_cli
 6 | from TTS.vocoder.configs import MelganConfig
 7 | 
 8 | config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
 9 | output_path = os.path.join(get_tests_output_path(), "train_outputs")
10 | 
11 | config = MelganConfig(
12 |     batch_size=4,
13 |     eval_batch_size=4,
14 |     num_loader_workers=0,
15 |     num_eval_loader_workers=0,
16 |     run_eval=True,
17 |     test_delay_epochs=-1,
18 |     epochs=1,
19 |     seq_len=2048,
20 |     eval_split_size=1,
21 |     print_step=1,
22 |     discriminator_model_params={"base_channels": 16, "max_channels": 64, "downsample_factors": [4, 4, 4]},
23 |     print_eval=True,
24 |     data_path="tests/data/ljspeech",
25 |     output_path=output_path,
26 | )
27 | config.audio.do_trim_silence = True
28 | config.audio.trim_db = 60
29 | config.save_json(config_path)
30 | 
31 | # train the model for one epoch
32 | command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder.py --config_path {config_path} "
33 | run_cli(command_train)
34 | 
35 | # Find latest folder
36 | continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
37 | 
38 | # restore the model and continue training for one more epoch
39 | command_train = (
40 |     f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder.py --continue_path {continue_path} "
41 | )
42 | run_cli(command_train)
43 | shutil.rmtree(continue_path)
44 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/vocoder_tests/test_parallel_wavegan_train.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import shutil
 4 | 
 5 | from tests import get_device_id, get_tests_output_path, run_cli
 6 | from TTS.vocoder.configs import ParallelWaveganConfig
 7 | 
 8 | config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
 9 | output_path = os.path.join(get_tests_output_path(), "train_outputs")
10 | 
11 | config = ParallelWaveganConfig(
12 |     batch_size=4,
13 |     eval_batch_size=4,
14 |     num_loader_workers=0,
15 |     num_eval_loader_workers=0,
16 |     run_eval=True,
17 |     test_delay_epochs=-1,
18 |     epochs=1,
19 |     seq_len=2048,
20 |     eval_split_size=1,
21 |     print_step=1,
22 |     print_eval=True,
23 |     data_path="tests/data/ljspeech",
24 |     output_path=output_path,
25 | )
26 | config.audio.do_trim_silence = True
27 | config.audio.trim_db = 60
28 | config.save_json(config_path)
29 | 
30 | # train the model for one epoch
31 | command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder.py --config_path {config_path} "
32 | run_cli(command_train)
33 | 
34 | # Find latest folder
35 | continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
36 | 
37 | # restore the model and continue training for one more epoch
38 | command_train = (
39 |     f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder.py --continue_path {continue_path} "
40 | )
41 | run_cli(command_train)
42 | shutil.rmtree(continue_path)
43 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/vocoder_tests/test_vocoder_melgan_discriminator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from TTS.vocoder.models.melgan_discriminator import MelganDiscriminator
 5 | from TTS.vocoder.models.melgan_multiscale_discriminator import MelganMultiscaleDiscriminator
 6 | 
 7 | 
 8 | def test_melgan_discriminator():
 9 |     model = MelganDiscriminator()
10 |     print(model)
11 |     dummy_input = torch.rand((4, 1, 256 * 10))
12 |     output, _ = model(dummy_input)
13 |     assert np.all(output.shape == (4, 1, 10))
14 | 
15 | 
16 | def test_melgan_multi_scale_discriminator():
17 |     model = MelganMultiscaleDiscriminator()
18 |     print(model)
19 |     dummy_input = torch.rand((4, 1, 256 * 16))
20 |     scores, feats = model(dummy_input)
21 |     assert len(scores) == 3
22 |     assert len(scores) == len(feats)
23 |     assert np.all(scores[0].shape == (4, 1, 64))
24 |     assert np.all(feats[0][0].shape == (4, 16, 4096))
25 |     assert np.all(feats[0][1].shape == (4, 64, 1024))
26 |     assert np.all(feats[0][2].shape == (4, 256, 256))
27 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/vocoder_tests/test_vocoder_melgan_generator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from TTS.vocoder.models.melgan_generator import MelganGenerator
 5 | 
 6 | 
 7 | def test_melgan_generator():
 8 |     model = MelganGenerator()
 9 |     print(model)
10 |     dummy_input = torch.rand((4, 80, 64))
11 |     output = model(dummy_input)
12 |     assert np.all(output.shape == (4, 1, 64 * 256))
13 |     output = model.inference(dummy_input)
14 |     assert np.all(output.shape == (4, 1, (64 + 4) * 256))
15 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/vocoder_tests/test_vocoder_parallel_wavegan_discriminator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from TTS.vocoder.models.parallel_wavegan_discriminator import (
 5 |     ParallelWaveganDiscriminator,
 6 |     ResidualParallelWaveganDiscriminator,
 7 | )
 8 | 
 9 | 
10 | def test_pwgan_disciminator():
11 |     model = ParallelWaveganDiscriminator(
12 |         in_channels=1,
13 |         out_channels=1,
14 |         kernel_size=3,
15 |         num_layers=10,
16 |         conv_channels=64,
17 |         dilation_factor=1,
18 |         nonlinear_activation="LeakyReLU",
19 |         nonlinear_activation_params={"negative_slope": 0.2},
20 |         bias=True,
21 |     )
22 |     dummy_x = torch.rand((4, 1, 64 * 256))
23 |     output = model(dummy_x)
24 |     assert np.all(output.shape == (4, 1, 64 * 256))
25 |     model.remove_weight_norm()
26 | 
27 | 
28 | def test_redisual_pwgan_disciminator():
29 |     model = ResidualParallelWaveganDiscriminator(
30 |         in_channels=1,
31 |         out_channels=1,
32 |         kernel_size=3,
33 |         num_layers=30,
34 |         stacks=3,
35 |         res_channels=64,
36 |         gate_channels=128,
37 |         skip_channels=64,
38 |         dropout=0.0,
39 |         bias=True,
40 |         nonlinear_activation="LeakyReLU",
41 |         nonlinear_activation_params={"negative_slope": 0.2},
42 |     )
43 |     dummy_x = torch.rand((4, 1, 64 * 256))
44 |     output = model(dummy_x)
45 |     assert np.all(output.shape == (4, 1, 64 * 256))
46 |     model.remove_weight_norm()
47 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/vocoder_tests/test_vocoder_parallel_wavegan_generator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from TTS.vocoder.models.parallel_wavegan_generator import ParallelWaveganGenerator
 5 | 
 6 | 
 7 | def test_pwgan_generator():
 8 |     model = ParallelWaveganGenerator(
 9 |         in_channels=1,
10 |         out_channels=1,
11 |         kernel_size=3,
12 |         num_res_blocks=30,
13 |         stacks=3,
14 |         res_channels=64,
15 |         gate_channels=128,
16 |         skip_channels=64,
17 |         aux_channels=80,
18 |         dropout=0.0,
19 |         bias=True,
20 |         use_weight_norm=True,
21 |         upsample_factors=[4, 4, 4, 4],
22 |     )
23 |     dummy_c = torch.rand((2, 80, 5))
24 |     output = model(dummy_c)
25 |     assert np.all(output.shape == (2, 1, 5 * 256)), output.shape
26 |     model.remove_weight_norm()
27 |     output = model.inference(dummy_c)
28 |     assert np.all(output.shape == (2, 1, (5 + 4) * 256))
29 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/vocoder_tests/test_vocoder_pqmf.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import soundfile as sf
 4 | import torch
 5 | from librosa.core import load
 6 | 
 7 | from tests import get_tests_input_path, get_tests_output_path, get_tests_path
 8 | from TTS.vocoder.layers.pqmf import PQMF
 9 | 
10 | TESTS_PATH = get_tests_path()
11 | WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav")
12 | 
13 | 
14 | def test_pqmf():
15 |     w, sr = load(WAV_FILE)
16 | 
17 |     layer = PQMF(N=4, taps=62, cutoff=0.15, beta=9.0)
18 |     w, sr = load(WAV_FILE)
19 |     w2 = torch.from_numpy(w[None, None, :])
20 |     b2 = layer.analysis(w2)
21 |     w2_ = layer.synthesis(b2)
22 | 
23 |     print(w2_.max())
24 |     print(w2_.min())
25 |     print(w2_.mean())
26 |     sf.write(os.path.join(get_tests_output_path(), "pqmf_output.wav"), w2_.flatten().detach(), sr)
27 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/vocoder_tests/test_vocoder_rwd.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from TTS.vocoder.models.random_window_discriminator import RandomWindowDiscriminator
 5 | 
 6 | 
 7 | def test_rwd():
 8 |     layer = RandomWindowDiscriminator(
 9 |         cond_channels=80,
10 |         window_sizes=(512, 1024, 2048, 4096, 8192),
11 |         cond_disc_downsample_factors=[(8, 4, 2, 2, 2), (8, 4, 2, 2), (8, 4, 2), (8, 4), (4, 2, 2)],
12 |         hop_length=256,
13 |     )
14 |     x = torch.rand([4, 1, 22050])
15 |     c = torch.rand([4, 80, 22050 // 256])
16 | 
17 |     scores, _ = layer(x, c)
18 |     assert len(scores) == 10
19 |     assert np.all(scores[0].shape == (4, 1, 1))
20 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/vocoder_tests/test_wavegrad_train.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import shutil
 4 | 
 5 | from tests import get_device_id, get_tests_output_path, run_cli
 6 | from TTS.vocoder.configs import WavegradConfig
 7 | 
 8 | config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
 9 | output_path = os.path.join(get_tests_output_path(), "train_outputs")
10 | 
11 | config = WavegradConfig(
12 |     batch_size=8,
13 |     eval_batch_size=8,
14 |     num_loader_workers=0,
15 |     num_eval_loader_workers=0,
16 |     run_eval=True,
17 |     test_delay_epochs=-1,
18 |     epochs=1,
19 |     seq_len=8192,
20 |     eval_split_size=1,
21 |     print_step=1,
22 |     print_eval=True,
23 |     data_path="tests/data/ljspeech",
24 |     output_path=output_path,
25 |     test_noise_schedule={"min_val": 1e-6, "max_val": 1e-2, "num_steps": 2},
26 | )
27 | config.audio.do_trim_silence = True
28 | config.audio.trim_db = 60
29 | config.save_json(config_path)
30 | 
31 | # train the model for one epoch
32 | command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder.py --config_path {config_path} "
33 | run_cli(command_train)
34 | 
35 | # Find latest folder
36 | continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
37 | 
38 | # restore the model and continue training for one more epoch
39 | command_train = (
40 |     f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder.py --continue_path {continue_path} "
41 | )
42 | run_cli(command_train)
43 | shutil.rmtree(continue_path)
44 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/vocoder_tests/test_wavernn_train.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import shutil
 4 | 
 5 | from tests import get_device_id, get_tests_output_path, run_cli
 6 | from TTS.vocoder.configs import WavernnConfig
 7 | from TTS.vocoder.models.wavernn import WavernnArgs
 8 | 
 9 | config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
10 | output_path = os.path.join(get_tests_output_path(), "train_outputs")
11 | 
12 | 
13 | config = WavernnConfig(
14 |     model_args=WavernnArgs(),
15 |     batch_size=8,
16 |     eval_batch_size=8,
17 |     num_loader_workers=0,
18 |     num_eval_loader_workers=0,
19 |     run_eval=True,
20 |     test_delay_epochs=-1,
21 |     epochs=1,
22 |     seq_len=256,  # for shorter test time
23 |     eval_split_size=1,
24 |     print_step=1,
25 |     print_eval=True,
26 |     data_path="tests/data/ljspeech",
27 |     output_path=output_path,
28 | )
29 | config.audio.do_trim_silence = True
30 | config.audio.trim_db = 60
31 | config.save_json(config_path)
32 | 
33 | # train the model for one epoch
34 | command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder.py --config_path {config_path} "
35 | run_cli(command_train)
36 | 
37 | # Find latest folder
38 | continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
39 | 
40 | # restore the model and continue training for one more epoch
41 | command_train = (
42 |     f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder.py --continue_path {continue_path} "
43 | )
44 | run_cli(command_train)
45 | shutil.rmtree(continue_path)
46 | 


--------------------------------------------------------------------------------
/submodules/TTS/tests/zoo_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/TTS/tests/zoo_tests/__init__.py


--------------------------------------------------------------------------------
/submodules/demucs/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Demucs
 2 | 
 3 | ## Pull Requests
 4 | 
 5 | In order to accept your pull request, we need you to submit a CLA. You only need
 6 | to do this once to work on any of Facebook's open source projects.
 7 | 
 8 | Complete your CLA here: <https://code.facebook.com/cla>
 9 | 
10 | Demucs is the implementation of a research paper.
11 | Therefore, we do not plan on accepting many pull requests for new features.
12 | We certainly welcome them for bug fixes.
13 | 
14 | 
15 | ## Issues
16 | 
17 | We use GitHub issues to track public bugs. Please ensure your description is
18 | clear and has sufficient instructions to be able to reproduce the issue.
19 | 
20 | 
21 | ## License
22 | By contributing to this repository, you agree that your contributions will be licensed
23 | under the LICENSE file in the root directory of this source tree.
24 | 


--------------------------------------------------------------------------------
/submodules/demucs/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) Meta Platforms, Inc. and affiliates.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/submodules/demucs/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | recursive-exclude env *
 2 | recursive-include conf *.yaml
 3 | include Makefile
 4 | include LICENSE
 5 | include demucs.png
 6 | include outputs.tar.gz
 7 | include test.mp3
 8 | include requirements.txt
 9 | include requirements_minimal.txt
10 | include mypy.ini
11 | include demucs/py.typed
12 | include demucs/remote/*.txt
13 | include demucs/remote/*.yaml
14 | 


--------------------------------------------------------------------------------
/submodules/demucs/Makefile:
--------------------------------------------------------------------------------
 1 | all: linter tests
 2 | 
 3 | linter:
 4 | 	flake8 demucs
 5 | 	mypy demucs
 6 | 
 7 | tests: test_train test_eval
 8 | 
 9 | test_train: tests/musdb
10 | 	_DORA_TEST_PATH=/tmp/demucs python3 -m dora run --clear \
11 | 		dset.musdb=./tests/musdb dset.segment=4 dset.shift=2 epochs=2 model=demucs \
12 | 		demucs.depth=2 demucs.channels=4 test.sdr=false misc.num_workers=0 test.workers=0 \
13 | 		test.shifts=0
14 | 
15 | test_eval:
16 | 	python3 -m demucs -n demucs_unittest test.mp3
17 | 	python3 -m demucs -n demucs_unittest --two-stems=vocals test.mp3
18 | 	python3 -m demucs -n demucs_unittest --mp3 test.mp3
19 | 	python3 -m demucs -n demucs_unittest --flac --int24 test.mp3
20 | 	python3 -m demucs -n demucs_unittest --int24 --clip-mode clamp test.mp3
21 | 	python3 -m demucs -n demucs_unittest --segment 8 test.mp3
22 | 	python3 -m demucs.api -n demucs_unittest --segment 8 test.mp3
23 | 	python3 -m demucs --list-models
24 | 
25 | tests/musdb:
26 | 	test -e tests || mkdir tests
27 | 	python3 -c 'import musdb; musdb.DB("tests/tmp", download=True)'
28 | 	musdbconvert tests/tmp tests/musdb
29 | 
30 | dist:
31 | 	python3 setup.py sdist
32 | 
33 | clean:
34 | 	rm -r dist build *.egg-info
35 | 
36 | .PHONY: linter dist test_train test_eval
37 | 


--------------------------------------------------------------------------------
/submodules/demucs/conf/dset/aetl.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # automix dataset with Musdb, extra training data and the test set of Musdb.
 4 | # This used even more remixes than auto_extra_test.
 5 | dset:
 6 |   wav: /checkpoint/defossez/datasets/aetl
 7 |   samplerate: 44100
 8 |   channels: 2
 9 | epochs: 320
10 | max_batches: 500
11 | 
12 | augment:
13 |   shift_same: true
14 |   scale:
15 |     proba: 0.
16 |   remix:
17 |     proba: 0
18 |   repitch:
19 |     proba: 0
20 | 


--------------------------------------------------------------------------------
/submodules/demucs/conf/dset/auto_extra_test.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # automix dataset with Musdb, extra training data and the test set of Musdb.
 4 | dset:
 5 |   wav: /checkpoint/defossez/datasets/automix_extra_test2
 6 |   samplerate: 44100
 7 |   channels: 2
 8 | epochs: 320
 9 | max_batches: 500
10 | 
11 | augment:
12 |   shift_same: true
13 |   scale:
14 |     proba: 0.
15 |   remix:
16 |     proba: 0
17 |   repitch:
18 |     proba: 0
19 | 


--------------------------------------------------------------------------------
/submodules/demucs/conf/dset/auto_mus.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # Automix dataset based on musdb train set.
 4 | dset:
 5 |   wav: /checkpoint/defossez/datasets/automix_musdb
 6 |   samplerate: 44100
 7 |   channels: 2
 8 | epochs: 360
 9 | max_batches: 300
10 | test:
11 |   every: 4
12 | 
13 | augment:
14 |   shift_same: true
15 |   scale:
16 |     proba: 0.5
17 |   remix:
18 |     proba: 0
19 |   repitch:
20 |     proba: 0
21 | 


--------------------------------------------------------------------------------
/submodules/demucs/conf/dset/extra44.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 | 
3 | # Musdb + extra tracks
4 | dset:
5 |   wav: /checkpoint/defossez/datasets/allstems_44/
6 |   samplerate: 44100
7 |   channels: 2
8 | epochs: 320
9 | 


--------------------------------------------------------------------------------
/submodules/demucs/conf/dset/extra_mmi_goodclean.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # Musdb + extra tracks
 4 | dset:
 5 |   wav: /checkpoint/defossez/datasets/allstems_44/
 6 |   wav2: /checkpoint/defossez/datasets/mmi44_goodclean
 7 |   samplerate: 44100
 8 |   channels: 2
 9 |   wav2_weight: null
10 |   wav2_valid: false
11 |   valid_samples: 100
12 | epochs: 1200
13 | 


--------------------------------------------------------------------------------
/submodules/demucs/conf/dset/extra_test.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # Musdb + extra tracks + test set from musdb.
 4 | dset:
 5 |   wav: /checkpoint/defossez/datasets/allstems_test_44/
 6 |   samplerate: 44100
 7 |   channels: 2
 8 | epochs: 320
 9 | max_batches: 700
10 | test:
11 |   sdr: false
12 |   every: 500
13 | 


--------------------------------------------------------------------------------
/submodules/demucs/conf/dset/musdb44.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 | 
3 | dset:
4 |   samplerate: 44100
5 |   channels: 2


--------------------------------------------------------------------------------
/submodules/demucs/conf/dset/sdx23_bleeding.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # Musdb + extra tracks
 4 | dset:
 5 |   wav: /shared/home/defossez/data/datasets/moisesdb23_bleeding_v1.0/
 6 |   use_musdb: false
 7 |   samplerate: 44100
 8 |   channels: 2
 9 |   backend: soundfile   # must use soundfile as some mixture would clip with sox.
10 | epochs: 320
11 | 


--------------------------------------------------------------------------------
/submodules/demucs/conf/dset/sdx23_labelnoise.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # Musdb + extra tracks
 4 | dset:
 5 |   wav: /shared/home/defossez/data/datasets/moisesdb23_labelnoise_v1.0
 6 |   use_musdb: false
 7 |   samplerate: 44100
 8 |   channels: 2
 9 |   backend: soundfile   # must use soundfile as some mixture would clip with sox.
10 | epochs: 320
11 | 


--------------------------------------------------------------------------------
/submodules/demucs/conf/svd/base.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | svd:
 4 |   penalty: 0
 5 |   min_size: 1
 6 |   dim: 50
 7 |   niters: 4
 8 |   powm: false
 9 |   proba: 1
10 |   conv_only: false
11 |   convtr: false  # ideally this should be true, but some models were trained with this to false.
12 | 
13 | optim:
14 |   beta2: 0.9998


--------------------------------------------------------------------------------
/submodules/demucs/conf/svd/base2.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | svd:
 4 |   penalty: 0
 5 |   min_size: 1
 6 |   dim: 100
 7 |   niters: 4
 8 |   powm: false
 9 |   proba: 1
10 |   conv_only: false
11 |   convtr: true
12 | 
13 | optim:
14 |   beta2: 0.9998


--------------------------------------------------------------------------------
/submodules/demucs/conf/svd/default.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 | 


--------------------------------------------------------------------------------
/submodules/demucs/conf/variant/default.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 | 


--------------------------------------------------------------------------------
/submodules/demucs/conf/variant/example.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 | 
3 | model: hdemucs
4 | hdemucs:
5 |   channels: 32


--------------------------------------------------------------------------------
/submodules/demucs/conf/variant/finetune.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | epochs: 4
 4 | batch_size: 16
 5 | optim:
 6 |   lr: 0.0006
 7 | test:
 8 |   every: 1
 9 |   sdr: false
10 | dset:
11 |   segment: 28
12 |   shift: 2
13 | 
14 | augment:
15 |   scale:
16 |     proba: 0
17 |   shift_same: true
18 |   remix:
19 |     proba: 0
20 | 


--------------------------------------------------------------------------------
/submodules/demucs/demucs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/demucs/demucs.png


--------------------------------------------------------------------------------
/submodules/demucs/demucs/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 
7 | __version__ = "4.1.0a2"
8 | 


--------------------------------------------------------------------------------
/submodules/demucs/demucs/__main__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .separate import main
 8 | 
 9 | if __name__ == '__main__':
10 |     main()
11 | 


--------------------------------------------------------------------------------
/submodules/demucs/demucs/grids/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/demucs/demucs/grids/__init__.py


--------------------------------------------------------------------------------
/submodules/demucs/demucs/grids/mdx.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | """
 7 | Main training for the Track A MDX models.
 8 | """
 9 | 
10 | from ._explorers import MyExplorer
11 | from ..train import main
12 | 
13 | 
14 | TRACK_A = ['0d19c1c6', '7ecf8ec1', 'c511e2ab', '7d865c68']
15 | 
16 | 
17 | @MyExplorer
18 | def explorer(launcher):
19 |     launcher.slurm_(
20 |         gpus=8,
21 |         time=3 * 24 * 60,
22 |         partition='learnlab')
23 | 
24 |     # Reproduce results from MDX competition Track A
25 |     # This trains the first round of models. Once this is trained,
26 |     # you will need to schedule `mdx_refine`.
27 |     for sig in TRACK_A:
28 |         xp = main.get_xp_from_sig(sig)
29 |         parent = xp.cfg.continue_from
30 |         xp = main.get_xp_from_sig(parent)
31 |         launcher(xp.argv)
32 |         launcher(xp.argv, {'quant.diffq': 1e-4})
33 |         launcher(xp.argv, {'quant.diffq': 3e-4})
34 | 


--------------------------------------------------------------------------------
/submodules/demucs/demucs/grids/mdx_extra.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | """
 7 | Main training for the Track A MDX models.
 8 | """
 9 | 
10 | from ._explorers import MyExplorer
11 | from ..train import main
12 | 
13 | TRACK_B = ['e51eebcc', 'a1d90b5c', '5d2d6c55', 'cfa93e08']
14 | 
15 | 
16 | @MyExplorer
17 | def explorer(launcher):
18 |     launcher.slurm_(
19 |         gpus=8,
20 |         time=3 * 24 * 60,
21 |         partition='learnlab')
22 | 
23 |     # Reproduce results from MDX competition Track A
24 |     # This trains the first round of models. Once this is trained,
25 |     # you will need to schedule `mdx_refine`.
26 |     for sig in TRACK_B:
27 |         while sig is not None:
28 |             xp = main.get_xp_from_sig(sig)
29 |             sig = xp.cfg.continue_from
30 | 
31 |         for dset in ['extra44', 'extra_test']:
32 |             sub = launcher.bind(xp.argv, dset=dset)
33 |             sub()
34 |             if dset == 'extra_test':
35 |                 sub({'quant.diffq': 1e-4})
36 |                 sub({'quant.diffq': 3e-4})
37 | 


--------------------------------------------------------------------------------
/submodules/demucs/demucs/grids/mdx_refine.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | """
 7 | Main training for the Track A MDX models.
 8 | """
 9 | 
10 | from ._explorers import MyExplorer
11 | from .mdx import TRACK_A
12 | from ..train import main
13 | 
14 | 
15 | @MyExplorer
16 | def explorer(launcher):
17 |     launcher.slurm_(
18 |         gpus=8,
19 |         time=3 * 24 * 60,
20 |         partition='learnlab')
21 | 
22 |     # Reproduce results from MDX competition Track A
23 |     # WARNING: all the experiments in the `mdx` grid must have completed.
24 |     for sig in TRACK_A:
25 |         xp = main.get_xp_from_sig(sig)
26 |         launcher(xp.argv)
27 |         for diffq in [1e-4, 3e-4]:
28 |             xp_src = main.get_xp_from_sig(xp.cfg.continue_from)
29 |             q_argv = [f'quant.diffq={diffq}']
30 |             actual_src = main.get_xp(xp_src.argv + q_argv)
31 |             actual_src.link.load()
32 |             assert len(actual_src.link.history) == actual_src.cfg.epochs
33 |             argv = xp.argv + q_argv + [f'continue_from="{actual_src.sig}"']
34 |             launcher(argv)
35 | 


--------------------------------------------------------------------------------
/submodules/demucs/demucs/grids/sdx23.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from ._explorers import MyExplorer
 8 | from dora import Launcher
 9 | 
10 | 
11 | @MyExplorer
12 | def explorer(launcher: Launcher):
13 |     launcher.slurm_(gpus=8, time=3 * 24 * 60, partition="speechgpt,learnfair",
14 |                     mem_per_gpu=None, constraint='')
15 |     launcher.bind_({"dset.use_musdb": False})
16 | 
17 |     with launcher.job_array():
18 |         launcher(dset='sdx23_bleeding')
19 |         launcher(dset='sdx23_labelnoise')
20 | 


--------------------------------------------------------------------------------
/submodules/demucs/demucs/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/demucs/demucs/py.typed


--------------------------------------------------------------------------------
/submodules/demucs/demucs/remote/files.txt:
--------------------------------------------------------------------------------
 1 | # MDX Models
 2 | root: mdx_final/
 3 | 0d19c1c6-0f06f20e.th
 4 | 5d2d6c55-db83574e.th
 5 | 7d865c68-3d5dd56b.th
 6 | 7ecf8ec1-70f50cc9.th
 7 | a1d90b5c-ae9d2452.th
 8 | c511e2ab-fe698775.th
 9 | cfa93e08-61801ae1.th
10 | e51eebcc-c1b80bdd.th
11 | 6b9c2ca1-3fd82607.th
12 | b72baf4e-8778635e.th
13 | 42e558d4-196e0e1b.th
14 | 305bc58f-18378783.th
15 | 14fc6a69-a89dd0ee.th
16 | 464b36d7-e5a9386e.th
17 | 7fd6ef75-a905dd85.th
18 | 83fc094f-4a16d450.th
19 | 1ef250f1-592467ce.th
20 | 902315c2-b39ce9c9.th
21 | 9a6b4851-03af0aa6.th
22 | fa0cb7f9-100d8bf4.th
23 | # Hybrid Transformer models
24 | root: hybrid_transformer/
25 | 955717e8-8726e21a.th
26 | f7e0c4bc-ba3fe64a.th
27 | d12395a8-e57c48e6.th
28 | 92cfc3b6-ef3bcb9c.th
29 | 04573f0d-f3cf25b2.th
30 | 75fc33f5-1941ce65.th
31 | # Experimental 6 sources model
32 | 5c90dfd2-34c22ccb.th
33 | 


--------------------------------------------------------------------------------
/submodules/demucs/demucs/remote/hdemucs_mmi.yaml:
--------------------------------------------------------------------------------
1 | models: ['75fc33f5']
2 | segment: 44
3 | 


--------------------------------------------------------------------------------
/submodules/demucs/demucs/remote/htdemucs.yaml:
--------------------------------------------------------------------------------
1 | models: ['955717e8']
2 | 


--------------------------------------------------------------------------------
/submodules/demucs/demucs/remote/htdemucs_6s.yaml:
--------------------------------------------------------------------------------
1 | models: ['5c90dfd2']
2 | 


--------------------------------------------------------------------------------
/submodules/demucs/demucs/remote/htdemucs_ft.yaml:
--------------------------------------------------------------------------------
1 | models: ['f7e0c4bc', 'd12395a8', '92cfc3b6', '04573f0d']
2 | weights: [
3 |   [1., 0., 0., 0.],
4 |   [0., 1., 0., 0.],
5 |   [0., 0., 1., 0.],
6 |   [0., 0., 0., 1.],
7 | ]


--------------------------------------------------------------------------------
/submodules/demucs/demucs/remote/mdx.yaml:
--------------------------------------------------------------------------------
1 | models: ['0d19c1c6', '7ecf8ec1', 'c511e2ab', '7d865c68']
2 | weights: [
3 |   [1., 1., 0., 0.],
4 |   [0., 1., 0., 0.],
5 |   [1., 0., 1., 1.],
6 |   [1., 0., 1., 1.],
7 | ]
8 | segment: 44
9 | 


--------------------------------------------------------------------------------
/submodules/demucs/demucs/remote/mdx_extra.yaml:
--------------------------------------------------------------------------------
1 | models: ['e51eebcc', 'a1d90b5c', '5d2d6c55', 'cfa93e08']
2 | segment: 44


--------------------------------------------------------------------------------
/submodules/demucs/demucs/remote/mdx_extra_q.yaml:
--------------------------------------------------------------------------------
1 | models: ['83fc094f', '464b36d7', '14fc6a69', '7fd6ef75']
2 | segment: 44
3 | 


--------------------------------------------------------------------------------
/submodules/demucs/demucs/remote/mdx_q.yaml:
--------------------------------------------------------------------------------
1 | models: ['6b9c2ca1', 'b72baf4e', '42e558d4', '305bc58f']
2 | weights: [
3 |   [1., 1., 0., 0.],
4 |   [0., 1., 0., 0.],
5 |   [1., 0., 1., 1.],
6 |   [1., 0., 1., 1.],
7 | ]
8 | segment: 44
9 | 


--------------------------------------------------------------------------------
/submodules/demucs/demucs/remote/repro_mdx_a.yaml:
--------------------------------------------------------------------------------
1 | models: ['9a6b4851', '1ef250f1', 'fa0cb7f9', '902315c2']
2 | segment: 44
3 | 


--------------------------------------------------------------------------------
/submodules/demucs/demucs/remote/repro_mdx_a_hybrid_only.yaml:
--------------------------------------------------------------------------------
1 | models: ['fa0cb7f9', '902315c2', 'fa0cb7f9', '902315c2']
2 | segment: 44
3 | 


--------------------------------------------------------------------------------
/submodules/demucs/demucs/remote/repro_mdx_a_time_only.yaml:
--------------------------------------------------------------------------------
1 | models: ['9a6b4851', '9a6b4851', '1ef250f1', '1ef250f1']
2 | segment: 44
3 | 


--------------------------------------------------------------------------------
/submodules/demucs/demucs/wdemucs.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | # For compat
 7 | from .hdemucs import HDemucs
 8 | 
 9 | WDemucs = HDemucs
10 | 


--------------------------------------------------------------------------------
/submodules/demucs/docs/linux.md:
--------------------------------------------------------------------------------
 1 | # Linux support for Demucs
 2 | 
 3 | If your distribution has at least Python 3.8, and you just wish to separate
 4 | tracks with Demucs, not train it, you can just run
 5 | 
 6 | ```bash
 7 | pip3 install --user -U demucs
 8 | # Then anytime you want to use demucs, just do
 9 | python3 -m demucs -d cpu PATH_TO_AUDIO_FILE_1
10 | # If you have added the user specific pip bin/ folder to your path, you can also do
11 | demucs -d cpu PATH_TO_AUDIO_FILE_1
12 | ```
13 | 
14 | If Python is too old, or you want to be able to train, I recommend [installing Miniconda][miniconda], with Python 3.8 or more.
15 | 
16 | ```bash
17 | conda activate
18 | pip3 install -U demucs
19 | # Then anytime you want to use demucs, first do conda activate, then
20 | demucs -d cpu PATH_TO_AUDIO_FILE_1
21 | ```
22 | 
23 | Of course, you can also use a specific env for Demucs.
24 | 
25 | **Important, torchaudio 0.12 update:** Torchaudio no longer supports decoding mp3s without ffmpeg installed. You must have ffmpeg installed, either through Anaconda (`conda install ffmpeg -c conda-forge`) or as a distribution package (e.g. `sudo apt-get install ffmpeg`).
26 | 
27 | 
28 | [miniconda]: https://docs.conda.io/en/latest/miniconda.html#linux-installers
29 | 


--------------------------------------------------------------------------------
/submodules/demucs/docs/mac.md:
--------------------------------------------------------------------------------
 1 | # macOS support for Demucs
 2 | 
 3 | If you have a sufficiently recent version of macOS, you can just run
 4 | 
 5 | ```bash
 6 | python3 -m pip install --user -U demucs
 7 | # Then anytime you want to use demucs, just do
 8 | python3 -m demucs -d cpu PATH_TO_AUDIO_FILE_1
 9 | # If you have added the user specific pip bin/ folder to your path, you can also do
10 | demucs -d cpu PATH_TO_AUDIO_FILE_1
11 | ```
12 | 
13 | If you do not already have Anaconda installed or much experience with the terminal on macOS, here are some detailed instructions:
14 | 
15 | 1. Download [Anaconda 3.8 (or more recent) 64-bit for macOS][anaconda]:
16 | 2. Open [Anaconda Prompt in macOS][prompt]
17 | 3. Follow these commands:
18 | ```bash
19 | conda activate
20 | pip3 install -U demucs
21 | # Then anytime you want to use demucs, first do conda activate, then
22 | demucs -d cpu PATH_TO_AUDIO_FILE_1
23 | ```
24 | 
25 | **Important, torchaudio 0.12 update:** Torchaudio no longer supports decoding mp3s without ffmpeg installed. You must have ffmpeg installed, either through Anaconda (`conda install ffmpeg -c conda-forge`) or with Homebrew for instance (`brew install ffmpeg`).
26 | 
27 | [anaconda]:  https://www.anaconda.com/download
28 | [prompt]: https://docs.anaconda.com/anaconda/user-guide/getting-started/#open-nav-mac
29 | 


--------------------------------------------------------------------------------
/submodules/demucs/environment-cpu.yml:
--------------------------------------------------------------------------------
 1 | name: demucs
 2 | 
 3 | channels:
 4 |   - pytorch
 5 |   - conda-forge
 6 | 
 7 | dependencies:
 8 |   - python>=3.8,<3.10
 9 |   - ffmpeg>=4.2
10 |   - pytorch>=1.8.1
11 |   - torchaudio>=0.8
12 |   - tqdm>=4.36
13 |   - pip
14 |   - pip:
15 |     - diffq>=0.2
16 |     - dora-search
17 |     - einops
18 |     - hydra-colorlog>=1.1
19 |     - hydra-core>=1.1
20 |     - julius>=0.2.3
21 |     - lameenc>=1.2
22 |     - openunmix
23 |     - musdb>=0.4.0
24 |     - museval>=0.4.0
25 |     - soundfile
26 |     - submitit
27 |     - treetable>=0.2.3
28 | 
29 | 


--------------------------------------------------------------------------------
/submodules/demucs/environment-cuda.yml:
--------------------------------------------------------------------------------
 1 | name: demucs
 2 | 
 3 | channels:
 4 |   - pytorch
 5 |   - conda-forge
 6 | 
 7 | dependencies:
 8 |   - python>=3.8,<3.10
 9 |   - ffmpeg>=4.2
10 |   - pytorch>=1.8.1
11 |   - torchaudio>=0.8
12 |   - cudatoolkit>=10
13 |   - tqdm>=4.36
14 |   - pip
15 |   - pip:
16 |     - diffq>=0.2
17 |     - dora-search
18 |     - einops
19 |     - hydra-colorlog>=1.1
20 |     - hydra-core>=1.1
21 |     - julius>=0.2.3
22 |     - lameenc>=1.2
23 |     - openunmix
24 |     - musdb>=0.4.0
25 |     - museval>=0.4.0
26 |     - soundfile
27 |     - submitit
28 |     - treetable>=0.2.3
29 | 


--------------------------------------------------------------------------------
/submodules/demucs/hubconf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | dependencies = ['dora-search', 'julius', 'lameenc', 'openunmix', 'pyyaml',
 8 |                 'torch', 'torchaudio', 'tqdm']
 9 | 
10 | from demucs.pretrained import get_model
11 | 
12 | 


--------------------------------------------------------------------------------
/submodules/demucs/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | 
3 | [mypy-treetable,torchaudio.*,diffq,yaml,tqdm,lameenc,musdb,museval,openunmix.*,einops,xformers.*]
4 | ignore_missing_imports = True
5 | 
6 | 


--------------------------------------------------------------------------------
/submodules/demucs/outputs.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/demucs/outputs.tar.gz


--------------------------------------------------------------------------------
/submodules/demucs/requirements.txt:
--------------------------------------------------------------------------------
 1 | # please make sure you have already a pytorch install that is cuda enabled!
 2 | dora-search>=0.1.12
 3 | diffq>=0.2.1
 4 | einops
 5 | flake8
 6 | hydra-colorlog>=1.1
 7 | hydra-core>=1.1
 8 | julius>=0.2.3
 9 | lameenc>=1.2
10 | museval
11 | mypy
12 | openunmix
13 | pyyaml
14 | submitit
15 | # torch>=1.8.1
16 | # torchaudio>=0.8,<2.1
17 | tqdm
18 | treetable
19 | soundfile>=0.10.3;sys_platform=="win32"
20 | 


--------------------------------------------------------------------------------
/submodules/demucs/requirements_minimal.txt:
--------------------------------------------------------------------------------
 1 | # please make sure you have already a pytorch install that is cuda enabled!
 2 | dora-search
 3 | einops
 4 | julius>=0.2.3
 5 | lameenc>=1.2
 6 | openunmix
 7 | pyyaml
 8 | # torch>=1.8.1
 9 | # torchaudio>=0.8,<2.1
10 | tqdm
11 | 


--------------------------------------------------------------------------------
/submodules/demucs/setup.cfg:
--------------------------------------------------------------------------------
1 | [pep8]
2 | max-line-length = 100
3 | 
4 | [flake8]
5 | max-line-length = 100
6 | 
7 | [yapf]
8 | column_limit = 100
9 | 


--------------------------------------------------------------------------------
/submodules/demucs/test.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/demucs/test.mp3


--------------------------------------------------------------------------------
/submodules/demucs/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 


--------------------------------------------------------------------------------
/submodules/demucs/tools/test_pretrained.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | # Script to evaluate pretrained models.
 8 | 
 9 | from argparse import ArgumentParser
10 | import logging
11 | import sys
12 | 
13 | import torch
14 | 
15 | from demucs import train, pretrained, evaluate
16 | 
17 | 
18 | def main():
19 |     torch.set_num_threads(1)
20 |     logging.basicConfig(stream=sys.stderr, level=logging.INFO)
21 |     parser = ArgumentParser("tools.test_pretrained",
22 |                             description="Evaluate pre-trained models or bags of models "
23 |                                         "on MusDB.")
24 |     pretrained.add_model_flags(parser)
25 |     parser.add_argument('overrides', nargs='*',
26 |                         help='Extra overrides, e.g. test.shifts=2.')
27 |     args = parser.parse_args()
28 | 
29 |     xp = train.main.get_xp(args.overrides)
30 |     with xp.enter():
31 |         solver = train.get_solver(xp.cfg)
32 | 
33 |     model = pretrained.get_model_from_args(args)
34 |     solver.model = model.to(solver.device)
35 |     solver.model.eval()
36 | 
37 |     with torch.no_grad():
38 |         results = evaluate.evaluate(solver, xp.cfg.test.sdr)
39 |     print(results)
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     main()
44 | 


--------------------------------------------------------------------------------
/submodules/whisper/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 OpenAI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/submodules/whisper/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | include README.md
3 | include LICENSE
4 | include whisper/assets/*
5 | include whisper/normalizers/english.json
6 | 


--------------------------------------------------------------------------------
/submodules/whisper/approach.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/whisper/approach.png


--------------------------------------------------------------------------------
/submodules/whisper/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | 
3 | [tool.isort]
4 | profile = "black"
5 | include_trailing_comma = true
6 | line_length = 88
7 | multi_line_output = 3
8 | 
9 | 


--------------------------------------------------------------------------------
/submodules/whisper/requirements.txt:
--------------------------------------------------------------------------------
1 | numba
2 | numpy
3 | # torch
4 | tqdm
5 | more-itertools
6 | tiktoken
7 | triton>=2.0.0,<3;platform_machine=="x86_64" and sys_platform=="linux" or sys_platform=="linux2"
8 | 


--------------------------------------------------------------------------------
/submodules/whisper/setup.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | import sys
 3 | from pathlib import Path
 4 | 
 5 | import pkg_resources
 6 | from setuptools import find_packages, setup
 7 | 
 8 | 
 9 | def read_version(fname="whisper/version.py"):
10 |     exec(compile(open(fname, encoding="utf-8").read(), fname, "exec"))
11 |     return locals()["__version__"]
12 | 
13 | 
14 | requirements = []
15 | if sys.platform.startswith("linux") and platform.machine() == "x86_64":
16 |     requirements.append("triton>=2.0.0,<3")
17 | 
18 | setup(
19 |     name="openai-whisper",
20 |     py_modules=["whisper"],
21 |     version=read_version(),
22 |     description="Robust Speech Recognition via Large-Scale Weak Supervision",
23 |     long_description=open("README.md", encoding="utf-8").read(),
24 |     long_description_content_type="text/markdown",
25 |     readme="README.md",
26 |     python_requires=">=3.8",
27 |     author="OpenAI",
28 |     url="https://github.com/openai/whisper",
29 |     license="MIT",
30 |     packages=find_packages(exclude=["tests*"]),
31 |     install_requires=[
32 |         str(r)
33 |         for r in pkg_resources.parse_requirements(
34 |             Path(__file__).with_name("requirements.txt").open()
35 |         )
36 |     ],
37 |     entry_points={
38 |         "console_scripts": ["whisper=whisper.transcribe:cli"],
39 |     },
40 |     include_package_data=True,
41 |     extras_require={"dev": ["pytest", "scipy", "black", "flake8", "isort"]},
42 | )
43 | 


--------------------------------------------------------------------------------
/submodules/whisper/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import random as rand
 2 | 
 3 | import numpy
 4 | import pytest
 5 | 
 6 | 
 7 | def pytest_configure(config):
 8 |     config.addinivalue_line("markers", "requires_cuda")
 9 | 
10 | 
11 | @pytest.fixture
12 | def random():
13 |     rand.seed(42)
14 |     numpy.random.seed(42)
15 | 


--------------------------------------------------------------------------------
/submodules/whisper/tests/jfk.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/whisper/tests/jfk.flac


--------------------------------------------------------------------------------
/submodules/whisper/tests/test_audio.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | 
 3 | import numpy as np
 4 | 
 5 | from whisper.audio import SAMPLE_RATE, load_audio, log_mel_spectrogram
 6 | 
 7 | 
 8 | def test_audio():
 9 |     audio_path = os.path.join(os.path.dirname(__file__), "jfk.flac")
10 |     audio = load_audio(audio_path)
11 |     assert audio.ndim == 1
12 |     assert SAMPLE_RATE * 10 < audio.shape[0] < SAMPLE_RATE * 12
13 |     assert 0 < audio.std() < 1
14 | 
15 |     mel_from_audio = log_mel_spectrogram(audio)
16 |     mel_from_file = log_mel_spectrogram(audio_path)
17 | 
18 |     assert np.allclose(mel_from_audio, mel_from_file)
19 |     assert mel_from_audio.max() - mel_from_audio.min() <= 2.0
20 | 


--------------------------------------------------------------------------------
/submodules/whisper/tests/test_tokenizer.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from whisper.tokenizer import get_tokenizer
 4 | 
 5 | 
 6 | @pytest.mark.parametrize("multilingual", [True, False])
 7 | def test_tokenizer(multilingual):
 8 |     tokenizer = get_tokenizer(multilingual=False)
 9 |     assert tokenizer.sot in tokenizer.sot_sequence
10 |     assert len(tokenizer.all_language_codes) == len(tokenizer.all_language_tokens)
11 |     assert all(c < tokenizer.timestamp_begin for c in tokenizer.all_language_tokens)
12 | 
13 | 
14 | def test_multilingual_tokenizer():
15 |     gpt2_tokenizer = get_tokenizer(multilingual=False)
16 |     multilingual_tokenizer = get_tokenizer(multilingual=True)
17 | 
18 |     text = "다람쥐 헌 쳇바퀴에 타고파"
19 |     gpt2_tokens = gpt2_tokenizer.encode(text)
20 |     multilingual_tokens = multilingual_tokenizer.encode(text)
21 | 
22 |     assert gpt2_tokenizer.decode(gpt2_tokens) == text
23 |     assert multilingual_tokenizer.decode(multilingual_tokens) == text
24 |     assert len(gpt2_tokens) > len(multilingual_tokens)
25 | 
26 | 
27 | def test_split_on_unicode():
28 |     multilingual_tokenizer = get_tokenizer(multilingual=True)
29 | 
30 |     tokens = [8404, 871, 287, 6, 246, 526, 3210, 20378]
31 |     words, word_tokens = multilingual_tokenizer.split_tokens_on_unicode(tokens)
32 | 
33 |     assert words == [" elle", " est", " l", "'", "\ufffd", "é", "rit", "oire"]
34 |     assert word_tokens == [[8404], [871], [287], [6], [246], [526], [3210], [20378]]
35 | 


--------------------------------------------------------------------------------
/submodules/whisper/whisper/__main__.py:
--------------------------------------------------------------------------------
1 | from .transcribe import cli
2 | 
3 | cli()
4 | 


--------------------------------------------------------------------------------
/submodules/whisper/whisper/assets/mel_filters.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/whisper/whisper/assets/mel_filters.npz


--------------------------------------------------------------------------------
/submodules/whisper/whisper/normalizers/__init__.py:
--------------------------------------------------------------------------------
1 | from .basic import BasicTextNormalizer as BasicTextNormalizer
2 | from .english import EnglishTextNormalizer as EnglishTextNormalizer
3 | 


--------------------------------------------------------------------------------
/submodules/whisper/whisper/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "20231117"
2 | 


--------------------------------------------------------------------------------
/submodules/whisperX/EXAMPLES.md:
--------------------------------------------------------------------------------
 1 | # More Examples
 2 | 
 3 | ## Other Languages
 4 | 
 5 | For non-english ASR, it is best to use the `large` whisper model. Alignment models are automatically picked by the chosen language from the default [lists](https://github.com/m-bain/whisperX/blob/main/whisperx/alignment.py#L18).
 6 | 
 7 | Currently support default models tested for {en, fr, de, es, it, ja, zh, nl}
 8 | 
 9 | 
10 | If the detected language is not in this list, you need to find a phoneme-based ASR model from [huggingface model hub](https://huggingface.co/models) and test it on your data.
11 | 
12 | ### French
13 |     whisperx --model large --language fr examples/sample_fr_01.wav
14 | 
15 | 
16 | https://user-images.githubusercontent.com/36994049/208298804-31c49d6f-6787-444e-a53f-e93c52706752.mov
17 | 
18 | 
19 | ### German
20 |     whisperx --model large --language de examples/sample_de_01.wav
21 | 
22 | 
23 | https://user-images.githubusercontent.com/36994049/208298811-e36002ba-3698-4731-97d4-0aebd07e0eb3.mov
24 | 
25 | 
26 | ### Italian
27 |     whisperx --model large --language de examples/sample_it_01.wav
28 | 
29 | 
30 | https://user-images.githubusercontent.com/36994049/208298819-6f462b2c-8cae-4c54-b8e1-90855794efc7.mov
31 | 
32 | 
33 | ### Japanese
34 |     whisperx --model large --language ja examples/sample_ja_01.wav
35 | 
36 | 
37 | https://user-images.githubusercontent.com/19920981/208731743-311f2360-b73b-4c60-809d-aaf3cd7e06f4.mov
38 | 


--------------------------------------------------------------------------------
/submodules/whisperX/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2024, Max Bain
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/submodules/whisperX/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include whisperx/assets/*
2 | include whisperx/assets/gpt2/*
3 | include whisperx/assets/multilingual/*
4 | include whisperx/normalizers/english.json
5 | 


--------------------------------------------------------------------------------
/submodules/whisperX/figures/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/whisperX/figures/pipeline.png


--------------------------------------------------------------------------------
/submodules/whisperX/requirements.txt:
--------------------------------------------------------------------------------
1 | # torch>=2
2 | # torchaudio>=2
3 | faster-whisper==1.0.0
4 | transformers
5 | pandas
6 | setuptools>=65
7 | nltk
8 | 


--------------------------------------------------------------------------------
/submodules/whisperX/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import platform
 3 | 
 4 | import pkg_resources
 5 | from setuptools import find_packages, setup
 6 | 
 7 | setup(
 8 |     name="whisperx",
 9 |     py_modules=["whisperx"],
10 |     version="3.1.1",
11 |     description="Time-Accurate Automatic Speech Recognition using Whisper.",
12 |     readme="README.md",
13 |     python_requires=">=3.8",
14 |     author="Max Bain",
15 |     url="https://github.com/m-bain/whisperx",
16 |     license="MIT",
17 |     packages=find_packages(exclude=["tests*"]),
18 |     install_requires=[
19 |         str(r)
20 |         for r in pkg_resources.parse_requirements(
21 |             open(os.path.join(os.path.dirname(__file__), "requirements.txt"))
22 |         )
23 |     ]
24 |     + [f"pyannote.audio==3.1.1"],
25 |     entry_points={
26 |         "console_scripts": ["whisperx=whisperx.transcribe:cli"],
27 |     },
28 |     include_package_data=True,
29 |     extras_require={"dev": ["pytest"]},
30 | )
31 | 


--------------------------------------------------------------------------------
/submodules/whisperX/whisperx/__init__.py:
--------------------------------------------------------------------------------
1 | from .transcribe import load_model
2 | from .alignment import load_align_model, align
3 | from .audio import load_audio
4 | from .diarize import assign_word_speakers, DiarizationPipeline


--------------------------------------------------------------------------------
/submodules/whisperX/whisperx/__main__.py:
--------------------------------------------------------------------------------
1 | from .transcribe import cli
2 | 
3 | 
4 | cli()
5 | 


--------------------------------------------------------------------------------
/submodules/whisperX/whisperx/assets/mel_filters.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/submodules/whisperX/whisperx/assets/mel_filters.npz


--------------------------------------------------------------------------------
/submodules/whisperX/whisperx/types.py:
--------------------------------------------------------------------------------
 1 | from typing import TypedDict, Optional, List
 2 | 
 3 | 
 4 | class SingleWordSegment(TypedDict):
 5 |     """
 6 |     A single word of a speech.
 7 |     """
 8 |     word: str
 9 |     start: float
10 |     end: float
11 |     score: float
12 | 
13 | class SingleCharSegment(TypedDict):
14 |     """
15 |     A single char of a speech.
16 |     """
17 |     char: str
18 |     start: float
19 |     end: float
20 |     score: float
21 | 
22 | 
23 | class SingleSegment(TypedDict):
24 |     """
25 |     A single segment (up to multiple sentences) of a speech.
26 |     """
27 | 
28 |     start: float
29 |     end: float
30 |     text: str
31 | 
32 | 
33 | class SingleAlignedSegment(TypedDict):
34 |     """
35 |     A single segment (up to multiple sentences) of a speech with word alignment.
36 |     """
37 | 
38 |     start: float
39 |     end: float
40 |     text: str
41 |     words: List[SingleWordSegment]
42 |     chars: Optional[List[SingleCharSegment]]
43 | 
44 | 
45 | class TranscriptionResult(TypedDict):
46 |     """
47 |     A list of segments and word segments of a speech.
48 |     """
49 |     segments: List[SingleSegment]
50 |     language: str
51 | 
52 | 
53 | class AlignedTranscriptionResult(TypedDict):
54 |     """
55 |     A list of segments and word segments of a speech.
56 |     """
57 |     segments: List[SingleAlignedSegment]
58 |     word_segments: List[SingleWordSegment]
59 | 


--------------------------------------------------------------------------------
/tabs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kedreamix/Linly-Dubbing/5677191ee544afae8250cee8e801c03839bcba24/tabs/__init__.py


--------------------------------------------------------------------------------
/tabs/linly_talker_tab.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from PySide6.QtWidgets import (QWidget, QVBoxLayout, QLabel, QLineEdit,
 3 |                                QComboBox, QMessageBox)
 4 | 
 5 | from ui_components import VideoPlayer
 6 | 
 7 | 
 8 | class LinlyTalkerTab(QWidget):
 9 |     def __init__(self, parent=None):
10 |         super().__init__(parent)
11 |         self.layout = QVBoxLayout(self)
12 | 
13 |         # 视频文件夹
14 |         self.video_folder = QLineEdit("videos")
15 |         self.layout.addWidget(QLabel("视频文件夹"))
16 |         self.layout.addWidget(self.video_folder)
17 | 
18 |         # AI配音方式
19 |         self.talker_method = QComboBox()
20 |         self.talker_method.addItems(['Wav2Lip', 'Wav2Lipv2', 'SadTalker'])
21 |         self.layout.addWidget(QLabel("AI配音方式"))
22 |         self.layout.addWidget(self.talker_method)
23 | 
24 |         # 施工中提示
25 |         construction_label = QLabel("施工中，请静候佳音 可参考 https://github.com/Kedreamix/Linly-Talker")
26 |         construction_label.setOpenExternalLinks(True)
27 |         self.layout.addWidget(construction_label)
28 | 
29 |         # 状态显示
30 |         self.status_label = QLabel("功能开发中")
31 |         self.layout.addWidget(QLabel("合成状态:"))
32 |         self.layout.addWidget(self.status_label)
33 | 
34 |         # 视频播放器
35 |         self.video_player = VideoPlayer("合成视频")
36 |         self.layout.addWidget(self.video_player)
37 | 
38 |         self.setLayout(self.layout)


--------------------------------------------------------------------------------
/tools/step031_translation_openai.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | from openai import OpenAI
 4 | from dotenv import load_dotenv
 5 | from loguru import logger
 6 | 
 7 | extra_body = {
 8 |     'repetition_penalty': 1.1,
 9 | }
10 | model_name = os.getenv('MODEL_NAME', 'gpt-3.5-turbo')
11 | def openai_response(messages):
12 |     client = OpenAI(
13 |         # This is the default and can be omitted
14 |         base_url=os.getenv('OPENAI_API_BASE', 'https://api.openai.com/v1'),
15 |         api_key=os.getenv('OPENAI_API_KEY')
16 |     )
17 |     if 'gpt' not in model_name:
18 |         model_name = 'gpt-3.5-turbo'
19 |     response = client.chat.completions.create(
20 |         model=model_name,
21 |         messages=messages,
22 |         timeout=240,
23 |         extra_body=extra_body
24 |     )
25 |     return response.choices[0].message.content
26 | 
27 | if __name__ == '__main__':
28 |     test_message = [{"role": "user", "content": "你好，介绍一下你自己"}]
29 |     response = openai_response(test_message)
30 |     print(response)


--------------------------------------------------------------------------------
/tools/step033_translation_translator.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import json
 3 | import os
 4 | import translators as ts
 5 | from dotenv import load_dotenv
 6 | from loguru import logger
 7 | load_dotenv()
 8 | 
 9 | def translator_response(messages, to_language = 'zh-CN', translator_server = 'bing'):
10 |     if '中文' in to_language:
11 |         to_language = 'zh-CN'
12 |     elif 'English' in to_language:
13 |         to_language = 'en'
14 |     translation = ''
15 |     for retry in range(3):
16 |         try:
17 |             translation = ts.translate_text(query_text=messages, translator=translator_server, from_language='auto', to_language=to_language)
18 |             break
19 |         except Exception as e:
20 |             logger.info(f'translate failed! {e}')
21 |             print('tranlate failed!')
22 |     return translation
23 | 
24 | if __name__ == '__main__':
25 |     response = translator_response('Hello, how are you?', '中文', 'bing')
26 |     print(response)
27 |     response = translator_response('你好，最近怎么样？ ', 'en', 'google')
28 |     print(response)


--------------------------------------------------------------------------------
/tools/step035_translation_qwen.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | from openai import OpenAI
 4 | from dotenv import load_dotenv
 5 | from loguru import logger
 6 | 
 7 | extra_body = {
 8 |     'repetition_penalty': 1.1,
 9 | }
10 | model_name = os.getenv('QWEN_MODEL_ID', 'qwen-max-2025-01-25')
11 | def qwen_response(messages):
12 |     client = OpenAI(
13 |         # This is the default and can be omitted
14 |         base_url=os.getenv('QWEN_API_BASE', 'https://dashscope.aliyuncs.com/compatible-mode/v1'),
15 |         api_key=os.getenv('QWEN_API_KEY')
16 |     )
17 |     response = client.chat.completions.create(
18 |         model=model_name,
19 |         messages=messages,
20 |         timeout=240,
21 |         extra_body=extra_body
22 |     )
23 |     return response.choices[0].message.content
24 | 
25 | if __name__ == '__main__':
26 |     test_message = [{"role": "user", "content": "你好，介绍一下你自己"}]
27 |     response = qwen_response(test_message)
28 |     print(response)


--------------------------------------------------------------------------------
/tools/step044_tts_edge_tts.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from loguru import logger
 3 | import numpy as np
 4 | import torch
 5 | import time
 6 | from .utils import save_wav
 7 | import sys
 8 | 
 9 | import torchaudio
10 | model = None
11 | 
12 | 
13 | 
14 | #  <|zh|><|en|><|jp|><|yue|><|ko|> for Chinese/English/Japanese/Cantonese/Korean
15 | language_map = {
16 |     '中文': 'zh-CN-XiaoxiaoNeural',
17 |     'English': 'en-US-MichelleNeural',
18 |     'Japanese': 'ja-JP-NanamiNeural',
19 |     '粤语': 'zh-HK-HiuMaanNeural',
20 |     'Korean': 'ko-KR-SunHiNeural'
21 | }
22 | 
23 | def tts(text, output_path, target_language='中文', voice = 'zh-CN-XiaoxiaoNeural'):
24 |     if os.path.exists(output_path):
25 |         logger.info(f'TTS {text} 已存在')
26 |         return
27 |     for retry in range(3):
28 |         try:
29 |             os.system(f'edge-tts --text "{text}" --write-media "{output_path.replace(".wav", ".mp3")}" --voice {voice}')
30 |             logger.info(f'TTS {text}')
31 |             break
32 |         except Exception as e:
33 |             logger.warning(f'TTS {text} 失败')
34 |             logger.warning(e)
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     speaker_wav = r'videos/村长台钓加拿大/20240805 英文无字幕 阿里这小子在水城威尼斯发来问候/audio_vocals.wav'
39 |     while True:
40 |         text = input('请输入：')
41 |         tts(text, f'playground/{text}.wav', target_language='中文')
42 |         
43 | 


--------------------------------------------------------------------------------
/问题参考汇总.md:
--------------------------------------------------------------------------------
 1 | # 问题参考汇总
 2 | 
 3 | ## 目录 Content
 4 | 
 5 | - [yt-dlp下载失败](#yt-dlp下载失败)
 6 | - [Could not load library libcudnn_ops_infer.so.8](#could-not-load-library-libcudnn_ops_inferso8)
 7 | 
 8 | ## yt-dlp下载失败
 9 | 
10 | 有时下载失败可能是由于缺少cookie引起的。可以通过以下命令生成一个`cookies.txt`文件，并将其放在程序的根目录下解决问题（可在本地生成然后上传到服务器）。
11 | 
12 | > 参考链接：https://github.com/yt-dlp/yt-dlp/wiki/FAQ
13 | 
14 | ```bash
15 | yt-dlp --cookies-from-browser chrome --cookies cookies.txt
16 | ```
17 | 
18 | ## Could not load library libcudnn_ops_infer.so.8
19 | 
20 | 此错误通常是由于找不到库文件路径，可以通过设置`torch`的路径来解决，下面的命令可以用来设置环境变量`LD_LIBRARY_PATH`
21 | 
22 | > 参考链接：https://github.com/SYSTRAN/faster-whisper/issues/516
23 | 
24 | ```bash
25 | export LD_LIBRARY_PATH=`python3 -c 'import os; import torch; print(os.path.dirname(os.path.dirname(torch.__file__)) +"/nvidia/cudnn/lib")'`:$LD_LIBRARY_PATH
26 | # 你也可以尝试以下命令
27 | # export LD_LIBRARY_PATH=`python3 -c 'import os; import nvidia.cublas.lib; import nvidia.cudnn.lib; print(os.path.dirname(nvidia.cublas.lib.__file__) + ":" + os.path.dirname(nvidia.cudnn.lib.__file__))'`:$LD_LIBRARY_PATH
28 | ```
29 | 
30 | 


--------------------------------------------------------------------------------