├── README.md
└── Edge_TTS_+_SO_VITS.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # Edge-TTS-SOVITS
2 | Combining Edge TTS + SO-VITS Voice Converter Using Google Colab
3 | ## Leave A Star if This Repo Was Helpful
4 | [](https://ko-fi.com/R6R7AH1FA)
5 |
6 |
7 |
8 |
9 | ### Tutorial (Indonesian)
10 | https://youtu.be/MEsep7cg0dI
11 |
12 | ### Google Colab
13 | [](https://colab.research.google.com/github/ardha27/Edge_TTS_Sovits/blob/main/Edge_TTS_%2B_SO_VITS.ipynb)
14 |
--------------------------------------------------------------------------------
/Edge_TTS_+_SO_VITS.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "gpuType": "T4",
8 | "include_colab_link": true
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | },
14 | "language_info": {
15 | "name": "python"
16 | },
17 | "accelerator": "GPU",
18 | "gpuClass": "standard"
19 | },
20 | "cells": [
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {
24 | "id": "view-in-github",
25 | "colab_type": "text"
26 | },
27 | "source": [
28 | "
"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "source": [
34 | "#@title Mount Google Drive\n",
35 | "from google.colab import drive\n",
36 | "drive.mount('/content/drive')"
37 | ],
38 | "metadata": {
39 | "id": "GCEVfxIOh7tE"
40 | },
41 | "execution_count": null,
42 | "outputs": []
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": null,
47 | "metadata": {
48 | "collapsed": true,
49 | "id": "6BnxhPgVPvzU"
50 | },
51 | "outputs": [],
52 | "source": [
53 | "#@title Install dependencies\n",
54 | "!pip install pyworld==0.3.2\n",
55 | "!python -m pip install -U pip wheel\n",
56 | "!pip install -U ipython\n",
57 | "!pip install -U so-vits-svc-fork\n",
58 | "!pip install edge-tts\n",
59 | "!pip install audiosegment"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "source": [
65 | "#@title Credit - https://huggingface.co/spaces/zomehwh/sovits-models\n",
66 | "!mkdir so-vits-test\n",
67 | "!wget -N \"https://huggingface.co/spaces/zomehwh/sovits-models/resolve/main/models/alice/alice.pth\" -P so-vits-test/\n",
68 | "!wget -N \"https://huggingface.co/spaces/zomehwh/sovits-models/resolve/main/models/alice/config.json\" -P so-vits-test/"
69 | ],
70 | "metadata": {
71 | "id": "LPysEbyRUOTH"
72 | },
73 | "execution_count": null,
74 | "outputs": []
75 | },
76 | {
77 | "cell_type": "markdown",
78 | "source": [
79 | "Voice on this code are using Indonesian speaker id-ID-ArdiNeural (Male) and id-ID-GadisNeural (Female). You can run this command to see other voice model\n",
80 | "```\n",
81 | "!edge-tts --list-voices\n",
82 | "```"
83 | ],
84 | "metadata": {
85 | "id": "ZepPJfm6is49"
86 | }
87 | },
88 | {
89 | "cell_type": "code",
90 | "source": [
91 | "#@title Generate TTS and Inference\n",
92 | "#@markdown don't give space on any folder/file name on the path\n",
93 | "import subprocess\n",
94 | "import audiosegment\n",
95 | "from IPython.display import Audio, display\n",
96 | "\n",
97 | "gender = \"Female\" #@param [\"Male\", \"Female\"]\n",
98 | "text = \"Selamat siang\" #@param {type:\"string\"}\n",
99 | "\n",
100 | "if gender == \"Male\":\n",
101 | " command = ['edge-tts', '--voice', 'id-ID-ArdiNeural', '--text', text, '--write-media', 'edge.mp3', '--write-subtitles', 'edge.vtt']\n",
102 | " result = subprocess.run(command, stdout=subprocess.PIPE, text=True)\n",
103 | " print(result.stdout)\n",
104 | "elif gender == \"Female\":\n",
105 | " command = ['edge-tts', '--voice', 'id-ID-GadisNeural', '--text', text, '--write-media', 'edge.mp3', '--write-subtitles', 'edge.vtt']\n",
106 | " result = subprocess.run(command, stdout=subprocess.PIPE, text=True)\n",
107 | " print(result.stdout)\n",
108 | "\n",
109 | "try:\n",
110 | " display(Audio(\"edge.mp3\", autoplay=True))\n",
111 | "except Exception as e: print(\"Error:\", str(e))\n",
112 | "\n",
113 | "audio = audiosegment.from_file(\"edge.mp3\")\n",
114 | "\n",
115 | "# Set the output format to WAV\n",
116 | "audio = audio.set_sample_width(2)\n",
117 | "audio = audio.set_frame_rate(44100)\n",
118 | "audio = audio.set_channels(1)\n",
119 | "\n",
120 | "# Export the audio to WAV format\n",
121 | "audio.export(\"edge-conv.wav\", format='wav')\n",
122 | "\n",
123 | "AUDIO = \"/content/edge-conv\" #@param {type:\"string\"}\n",
124 | "MODEL = \"/content/so-vits-test/alice.pth\" #@param {type:\"string\"}\n",
125 | "CONFIG = \"/content/so-vits-test/config.json\" #@param {type:\"string\"}\n",
126 | "METHOD = \"harvest\" #@param [\"harvest\", \"dio\", \"crepe\", \"crepe-tiny\", \"parselmouth\"]\n",
127 | "PITCH = 0 #@param {type:\"slider\", min:-12, max:12, step:1}\n",
128 | "\n",
129 | "# Auto Pitch Mode\n",
130 | "!svc infer {AUDIO}.wav -c {CONFIG} -m {MODEL} -fm {METHOD}\n",
131 | "\n",
132 | "# Manual Pitch Mode\n",
133 | "# !svc infer {AUDIO}.wav -c {CONFIG} -m {MODEL} -fm {METHOD} -na -t {PITCH}\n",
134 | "\n",
135 | "# Try comment this line below if you got Runtime Error\n",
136 | "try:\n",
137 | " display(Audio(f\"{AUDIO}.out.wav\", autoplay=True))\n",
138 | "except Exception as e: print(\"Error:\", str(e))"
139 | ],
140 | "metadata": {
141 | "id": "-vXBn5jFQITv"
142 | },
143 | "execution_count": null,
144 | "outputs": []
145 | }
146 | ]
147 | }
--------------------------------------------------------------------------------