├── README.md
└── Edge_TTS_+_SO_VITS.ipynb


/README.md:
--------------------------------------------------------------------------------
 1 | # Edge-TTS-SOVITS
 2 | Combining Edge TTS + SO-VITS Voice Converter Using Google Colab
 3 | ## Leave A Star if This Repo Was Helpful
 4 | [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/R6R7AH1FA)
 5 | <a href="https://trakteer.id/ardha27">
 6 |     <img src="https://cdn.trakteer.id/images/embed/trbtn-red-1.png" alt="Trakteer" height="35">
 7 | </a>
 8 | 
 9 | ### Tutorial (Indonesian)
10 | https://youtu.be/MEsep7cg0dI
11 | 
12 | ### Google Colab
13 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ardha27/Edge_TTS_Sovits/blob/main/Edge_TTS_%2B_SO_VITS.ipynb)
14 | 


--------------------------------------------------------------------------------
/Edge_TTS_+_SO_VITS.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "gpuType": "T4",
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     },
 17 |     "accelerator": "GPU",
 18 |     "gpuClass": "standard"
 19 |   },
 20 |   "cells": [
 21 |     {
 22 |       "cell_type": "markdown",
 23 |       "metadata": {
 24 |         "id": "view-in-github",
 25 |         "colab_type": "text"
 26 |       },
 27 |       "source": [
 28 |         "<a href=\"https://colab.research.google.com/github/ardha27/Edge_TTS_Sovits/blob/main/Edge_TTS_%2B_SO_VITS.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 29 |       ]
 30 |     },
 31 |     {
 32 |       "cell_type": "code",
 33 |       "source": [
 34 |         "#@title Mount Google Drive\n",
 35 |         "from google.colab import drive\n",
 36 |         "drive.mount('/content/drive')"
 37 |       ],
 38 |       "metadata": {
 39 |         "id": "GCEVfxIOh7tE"
 40 |       },
 41 |       "execution_count": null,
 42 |       "outputs": []
 43 |     },
 44 |     {
 45 |       "cell_type": "code",
 46 |       "execution_count": null,
 47 |       "metadata": {
 48 |         "collapsed": true,
 49 |         "id": "6BnxhPgVPvzU"
 50 |       },
 51 |       "outputs": [],
 52 |       "source": [
 53 |         "#@title Install dependencies\n",
 54 |         "!pip install pyworld==0.3.2\n",
 55 |         "!python -m pip install -U pip wheel\n",
 56 |         "!pip install -U ipython\n",
 57 |         "!pip install -U so-vits-svc-fork\n",
 58 |         "!pip install edge-tts\n",
 59 |         "!pip install audiosegment"
 60 |       ]
 61 |     },
 62 |     {
 63 |       "cell_type": "code",
 64 |       "source": [
 65 |         "#@title Credit - https://huggingface.co/spaces/zomehwh/sovits-models\n",
 66 |         "!mkdir so-vits-test\n",
 67 |         "!wget -N \"https://huggingface.co/spaces/zomehwh/sovits-models/resolve/main/models/alice/alice.pth\" -P so-vits-test/\n",
 68 |         "!wget -N \"https://huggingface.co/spaces/zomehwh/sovits-models/resolve/main/models/alice/config.json\" -P so-vits-test/"
 69 |       ],
 70 |       "metadata": {
 71 |         "id": "LPysEbyRUOTH"
 72 |       },
 73 |       "execution_count": null,
 74 |       "outputs": []
 75 |     },
 76 |     {
 77 |       "cell_type": "markdown",
 78 |       "source": [
 79 |         "Voice on this code are using Indonesian speaker id-ID-ArdiNeural (Male) and id-ID-GadisNeural (Female). You can run this command to see other voice model\n",
 80 |         "```\n",
 81 |         "!edge-tts --list-voices\n",
 82 |         "```"
 83 |       ],
 84 |       "metadata": {
 85 |         "id": "ZepPJfm6is49"
 86 |       }
 87 |     },
 88 |     {
 89 |       "cell_type": "code",
 90 |       "source": [
 91 |         "#@title Generate TTS and Inference\n",
 92 |         "#@markdown don't give space on any folder/file name on the path\n",
 93 |         "import subprocess\n",
 94 |         "import audiosegment\n",
 95 |         "from IPython.display import Audio, display\n",
 96 |         "\n",
 97 |         "gender = \"Female\" #@param [\"Male\", \"Female\"]\n",
 98 |         "text = \"Selamat siang\" #@param {type:\"string\"}\n",
 99 |         "\n",
100 |         "if gender == \"Male\":\n",
101 |         "  command = ['edge-tts', '--voice', 'id-ID-ArdiNeural', '--text', text, '--write-media', 'edge.mp3', '--write-subtitles', 'edge.vtt']\n",
102 |         "  result = subprocess.run(command, stdout=subprocess.PIPE, text=True)\n",
103 |         "  print(result.stdout)\n",
104 |         "elif gender == \"Female\":\n",
105 |         "  command = ['edge-tts', '--voice', 'id-ID-GadisNeural', '--text', text, '--write-media', 'edge.mp3', '--write-subtitles', 'edge.vtt']\n",
106 |         "  result = subprocess.run(command, stdout=subprocess.PIPE, text=True)\n",
107 |         "  print(result.stdout)\n",
108 |         "\n",
109 |         "try:\n",
110 |         "  display(Audio(\"edge.mp3\", autoplay=True))\n",
111 |         "except Exception as e:  print(\"Error:\", str(e))\n",
112 |         "\n",
113 |         "audio = audiosegment.from_file(\"edge.mp3\")\n",
114 |         "\n",
115 |         "# Set the output format to WAV\n",
116 |         "audio = audio.set_sample_width(2)\n",
117 |         "audio = audio.set_frame_rate(44100)\n",
118 |         "audio = audio.set_channels(1)\n",
119 |         "\n",
120 |         "# Export the audio to WAV format\n",
121 |         "audio.export(\"edge-conv.wav\", format='wav')\n",
122 |         "\n",
123 |         "AUDIO = \"/content/edge-conv\" #@param {type:\"string\"}\n",
124 |         "MODEL = \"/content/so-vits-test/alice.pth\" #@param {type:\"string\"}\n",
125 |         "CONFIG = \"/content/so-vits-test/config.json\" #@param {type:\"string\"}\n",
126 |         "METHOD = \"harvest\" #@param [\"harvest\", \"dio\", \"crepe\", \"crepe-tiny\", \"parselmouth\"]\n",
127 |         "PITCH = 0 #@param {type:\"slider\", min:-12, max:12, step:1}\n",
128 |         "\n",
129 |         "# Auto Pitch Mode\n",
130 |         "!svc infer {AUDIO}.wav -c {CONFIG} -m {MODEL} -fm {METHOD}\n",
131 |         "\n",
132 |         "# Manual Pitch Mode\n",
133 |         "# !svc infer {AUDIO}.wav -c {CONFIG} -m {MODEL} -fm {METHOD} -na -t {PITCH}\n",
134 |         "\n",
135 |         "# Try comment this line below if you got Runtime Error\n",
136 |         "try:\n",
137 |         "  display(Audio(f\"{AUDIO}.out.wav\", autoplay=True))\n",
138 |         "except Exception as e:  print(\"Error:\", str(e))"
139 |       ],
140 |       "metadata": {
141 |         "id": "-vXBn5jFQITv"
142 |       },
143 |       "execution_count": null,
144 |       "outputs": []
145 |     }
146 |   ]
147 | }


--------------------------------------------------------------------------------