├── README.md └── singing_voice_conversion_colab.ipynb /README.md: -------------------------------------------------------------------------------- 1 | 🐣 Please follow me for new updates https://twitter.com/camenduru
2 | 🔥 Please join our discord server https://discord.gg/k5BwmmvJJU
3 | 🥳 Please join my patreon community https://patreon.com/camenduru
4 | 5 | ### 🦒 Colab 6 | 7 | | Colab | Info 8 | | --- | --- | 9 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/singing-voice-conversion-colab/blob/main/singing_voice_conversion_colab.ipynb) | singing_voice_conversion_colab 10 | 11 | ### 🧬 Code 12 | https://github.com/open-mmlab/Amphion
13 | https://github.com/open-mmlab/Amphion/tree/main/egs/svc/MultipleContentsSVC
14 | 15 | ### 📄 Paper 16 | https://arxiv.org/abs/2310.11160 17 | 18 | ### 🌐 Page 19 | https://www.zhangxueyao.com/data/MultipleContentsSVC/index.html 20 | 21 | ### 🖼 Output 22 | 23 | https://github.com/camenduru/singing-voice-conversion-colab/assets/54370274/caae796f-20a8-4072-9596-be355cde1628 24 | 25 | ### 🏢 Sponsor 26 | https://modelslab.com 27 | -------------------------------------------------------------------------------- /singing_voice_conversion_colab.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github" 7 | }, 8 | "source": [ 9 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/singing-voice-conversion-colab/blob/main/singing_voice_conversion_colab.ipynb)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "id": "VjYy0F2gZIPR" 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "%cd /content\n", 21 | "!git clone -b dev https://github.com/camenduru/singing_voice_conversion-hf\n", 22 | "%cd /content/singing_voice_conversion-hf\n", 23 | "\n", 24 | "!pip install -q fairseq git+https://github.com/lhotse-speech/lhotse encodec phonemizer==3.2.1 pypinyin==0.48.0 gradio==4.8.0 diffusers accelerate json5 ruamel_yaml ffmpeg-python unidecode\n", 25 | "!pip install -q pandas torchcrepe pyworld diffsptk tgt -U\n", 26 | "!pip uninstall -y parselmouth\n", 27 | "!pip install -q praat-parselmouth\n", 28 | "\n", 29 | "!apt -y install -qq aria2\n", 30 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/ckpts/svc/vocalist_l1_contentvec+whisper/checkpoint/epoch-6852_step-0678447_loss-1.946773/optimizer.bin -d /content/singing_voice_conversion-hf/ckpts/svc/vocalist_l1_contentvec+whisper/checkpoint -o epoch-6852_step-0678447_loss-1.946773/optimizer.bin\n", 31 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/ckpts/svc/vocalist_l1_contentvec+whisper/checkpoint/epoch-6852_step-0678447_loss-1.946773/pytorch_model.bin -d /content/singing_voice_conversion-hf/ckpts/svc/vocalist_l1_contentvec+whisper/checkpoint -o epoch-6852_step-0678447_loss-1.946773/pytorch_model.bin\n", 32 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/ckpts/svc/vocalist_l1_contentvec+whisper/checkpoint/epoch-6852_step-0678447_loss-1.946773/random_states_0.pkl -d /content/singing_voice_conversion-hf/ckpts/svc/vocalist_l1_contentvec+whisper/checkpoint -o epoch-6852_step-0678447_loss-1.946773/random_states_0.pkl\n", 33 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/ckpts/svc/vocalist_l1_contentvec+whisper/data/vocalist_l1/mel_min_max_stats/mel_max.npy -d /content/singing_voice_conversion-hf/ckpts/svc/vocalist_l1_contentvec+whisper/data/vocalist_l1 -o mel_min_max_stats/mel_max.npy\n", 34 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/ckpts/svc/vocalist_l1_contentvec+whisper/data/vocalist_l1/mel_min_max_stats/mel_min.npy -d /content/singing_voice_conversion-hf/ckpts/svc/vocalist_l1_contentvec+whisper/data/vocalist_l1 -o mel_min_max_stats/mel_min.npy\n", 35 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/ckpts/svc/vocalist_l1_contentvec+whisper/log/vocalist_l1_contentvec+whisper/events.out.tfevents.1696052302.mmnewyardnodesz63219.120.0 -d /content/singing_voice_conversion-hf/ckpts/svc/vocalist_l1_contentvec+whisper/log/vocalist_l1_contentvec+whisper -o events.out.tfevents.1696052302.mmnewyardnodesz63219.120.0\n", 36 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/ckpts/svc/vocalist_l1_contentvec+whisper/log/vocalist_l1_contentvec+whisper/events.out.tfevents.1696052302.mmnewyardnodesz63219.120.1 -d /content/singing_voice_conversion-hf/ckpts/svc/vocalist_l1_contentvec+whisper/log/vocalist_l1_contentvec+whisper -o events.out.tfevents.1696052302.mmnewyardnodesz63219.120.1\n", 37 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/examples/chinese_female_recordings.wav -d /content/singing_voice_conversion-hf/examples -o chinese_female_recordings.wav\n", 38 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/examples/chinese_male_seperated.wav -d /content/singing_voice_conversion-hf/examples -o chinese_male_seperated.wav\n", 39 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/examples/english_female_seperated.wav -d /content/singing_voice_conversion-hf/examples -o english_female_seperated.wav\n", 40 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/examples/english_male_recordings.wav -d /content/singing_voice_conversion-hf/examples -o english_male_recordings.wav\n", 41 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/examples/output/chinese_female_recordings_vocalist_l1_JohnMayer.wav -d /content/singing_voice_conversion-hf/examples/output -o chinese_female_recordings_vocalist_l1_JohnMayer.wav\n", 42 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/examples/output/chinese_male_seperated_vocalist_l1_TaylorSwift.wav -d /content/singing_voice_conversion-hf/examples/output -o chinese_male_seperated_vocalist_l1_TaylorSwift.wav\n", 43 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/examples/output/english_female_seperated_vocalist_l1_汪峰.wav -d /content/singing_voice_conversion-hf/examples/output -o english_female_seperated_vocalist_l1_汪峰.wav\n", 44 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/examples/output/english_male_recordings_vocalist_l1_石倚洁.wav -d /content/singing_voice_conversion-hf/examples/output -o english_male_recordings_vocalist_l1_石倚洁.wav\n", 45 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/modules/whisper_extractor/assets/mel_filters.npz -d /content/singing_voice_conversion-hf/modules/whisper_extractor/assets -o mel_filters.npz\n", 46 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/pretrained/bigvgan/400000.pt -d /content/singing_voice_conversion-hf/pretrained/bigvgan -o 400000.pt\n", 47 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/pretrained/contentvec/checkpoint_best_legacy_500.pt -d /content/singing_voice_conversion-hf/pretrained/contentvec -o checkpoint_best_legacy_500.pt\n", 48 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/amphion/singing_voice_conversion/resolve/main/pretrained/whisper/medium.pt -d /content/singing_voice_conversion-hf/pretrained/whisper -o medium.pt\n", 49 | "\n", 50 | "!python app.py" 51 | ] 52 | } 53 | ], 54 | "metadata": { 55 | "accelerator": "GPU", 56 | "colab": { 57 | "gpuType": "T4", 58 | "provenance": [] 59 | }, 60 | "kernelspec": { 61 | "display_name": "Python 3", 62 | "name": "python3" 63 | }, 64 | "language_info": { 65 | "name": "python" 66 | } 67 | }, 68 | "nbformat": 4, 69 | "nbformat_minor": 0 70 | } 71 | --------------------------------------------------------------------------------