├── README.md └── Edge_TTS_+_SO_VITS.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Edge-TTS-SOVITS 2 | Combining Edge TTS + SO-VITS Voice Converter Using Google Colab 3 | ## Leave A Star if This Repo Was Helpful 4 | [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/R6R7AH1FA) 5 | 6 | Trakteer 7 | 8 | 9 | ### Tutorial (Indonesian) 10 | https://youtu.be/MEsep7cg0dI 11 | 12 | ### Google Colab 13 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ardha27/Edge_TTS_Sovits/blob/main/Edge_TTS_%2B_SO_VITS.ipynb) 14 | -------------------------------------------------------------------------------- /Edge_TTS_+_SO_VITS.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "gpuType": "T4", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | }, 17 | "accelerator": "GPU", 18 | "gpuClass": "standard" 19 | }, 20 | "cells": [ 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "id": "view-in-github", 25 | "colab_type": "text" 26 | }, 27 | "source": [ 28 | "\"Open" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "source": [ 34 | "#@title Mount Google Drive\n", 35 | "from google.colab import drive\n", 36 | "drive.mount('/content/drive')" 37 | ], 38 | "metadata": { 39 | "id": "GCEVfxIOh7tE" 40 | }, 41 | "execution_count": null, 42 | "outputs": [] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "collapsed": true, 49 | "id": "6BnxhPgVPvzU" 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "#@title Install dependencies\n", 54 | "!pip install pyworld==0.3.2\n", 55 | "!python -m pip install -U pip wheel\n", 56 | "!pip install -U ipython\n", 57 | "!pip install -U so-vits-svc-fork\n", 58 | "!pip install edge-tts\n", 59 | "!pip install audiosegment" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "source": [ 65 | "#@title Credit - https://huggingface.co/spaces/zomehwh/sovits-models\n", 66 | "!mkdir so-vits-test\n", 67 | "!wget -N \"https://huggingface.co/spaces/zomehwh/sovits-models/resolve/main/models/alice/alice.pth\" -P so-vits-test/\n", 68 | "!wget -N \"https://huggingface.co/spaces/zomehwh/sovits-models/resolve/main/models/alice/config.json\" -P so-vits-test/" 69 | ], 70 | "metadata": { 71 | "id": "LPysEbyRUOTH" 72 | }, 73 | "execution_count": null, 74 | "outputs": [] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "source": [ 79 | "Voice on this code are using Indonesian speaker id-ID-ArdiNeural (Male) and id-ID-GadisNeural (Female). You can run this command to see other voice model\n", 80 | "```\n", 81 | "!edge-tts --list-voices\n", 82 | "```" 83 | ], 84 | "metadata": { 85 | "id": "ZepPJfm6is49" 86 | } 87 | }, 88 | { 89 | "cell_type": "code", 90 | "source": [ 91 | "#@title Generate TTS and Inference\n", 92 | "#@markdown don't give space on any folder/file name on the path\n", 93 | "import subprocess\n", 94 | "import audiosegment\n", 95 | "from IPython.display import Audio, display\n", 96 | "\n", 97 | "gender = \"Female\" #@param [\"Male\", \"Female\"]\n", 98 | "text = \"Selamat siang\" #@param {type:\"string\"}\n", 99 | "\n", 100 | "if gender == \"Male\":\n", 101 | " command = ['edge-tts', '--voice', 'id-ID-ArdiNeural', '--text', text, '--write-media', 'edge.mp3', '--write-subtitles', 'edge.vtt']\n", 102 | " result = subprocess.run(command, stdout=subprocess.PIPE, text=True)\n", 103 | " print(result.stdout)\n", 104 | "elif gender == \"Female\":\n", 105 | " command = ['edge-tts', '--voice', 'id-ID-GadisNeural', '--text', text, '--write-media', 'edge.mp3', '--write-subtitles', 'edge.vtt']\n", 106 | " result = subprocess.run(command, stdout=subprocess.PIPE, text=True)\n", 107 | " print(result.stdout)\n", 108 | "\n", 109 | "try:\n", 110 | " display(Audio(\"edge.mp3\", autoplay=True))\n", 111 | "except Exception as e: print(\"Error:\", str(e))\n", 112 | "\n", 113 | "audio = audiosegment.from_file(\"edge.mp3\")\n", 114 | "\n", 115 | "# Set the output format to WAV\n", 116 | "audio = audio.set_sample_width(2)\n", 117 | "audio = audio.set_frame_rate(44100)\n", 118 | "audio = audio.set_channels(1)\n", 119 | "\n", 120 | "# Export the audio to WAV format\n", 121 | "audio.export(\"edge-conv.wav\", format='wav')\n", 122 | "\n", 123 | "AUDIO = \"/content/edge-conv\" #@param {type:\"string\"}\n", 124 | "MODEL = \"/content/so-vits-test/alice.pth\" #@param {type:\"string\"}\n", 125 | "CONFIG = \"/content/so-vits-test/config.json\" #@param {type:\"string\"}\n", 126 | "METHOD = \"harvest\" #@param [\"harvest\", \"dio\", \"crepe\", \"crepe-tiny\", \"parselmouth\"]\n", 127 | "PITCH = 0 #@param {type:\"slider\", min:-12, max:12, step:1}\n", 128 | "\n", 129 | "# Auto Pitch Mode\n", 130 | "!svc infer {AUDIO}.wav -c {CONFIG} -m {MODEL} -fm {METHOD}\n", 131 | "\n", 132 | "# Manual Pitch Mode\n", 133 | "# !svc infer {AUDIO}.wav -c {CONFIG} -m {MODEL} -fm {METHOD} -na -t {PITCH}\n", 134 | "\n", 135 | "# Try comment this line below if you got Runtime Error\n", 136 | "try:\n", 137 | " display(Audio(f\"{AUDIO}.out.wav\", autoplay=True))\n", 138 | "except Exception as e: print(\"Error:\", str(e))" 139 | ], 140 | "metadata": { 141 | "id": "-vXBn5jFQITv" 142 | }, 143 | "execution_count": null, 144 | "outputs": [] 145 | } 146 | ] 147 | } --------------------------------------------------------------------------------