├── FUNDING.yml ├── README.md └── WhisperX_Youtube_SRT.ipynb /FUNDING.yml: -------------------------------------------------------------------------------- 1 | ko_fi: ardhach 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # WhisperX-Youtube-SRT 2 | Create Youtube SRT with WhisperX using Google Colab 3 | 4 | [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/R6R7AH1FA) 5 | 6 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ardha27/WhisperX-Youtube-SRT/blob/main/WhisperX_Youtube_SRT.ipynb) 7 | -------------------------------------------------------------------------------- /WhisperX_Youtube_SRT.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "gpuType": "T4", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | }, 17 | "accelerator": "GPU", 18 | "gpuClass": "standard" 19 | }, 20 | "cells": [ 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "id": "view-in-github", 25 | "colab_type": "text" 26 | }, 27 | "source": [ 28 | "\"Open" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "source": [ 34 | "# Install WhisperX" 35 | ], 36 | "metadata": { 37 | "id": "80yOVjb-vAsD" 38 | } 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "id": "Hcc35ui3ux8l" 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "%%capture\n", 49 | "!pip install virtualenv\n", 50 | "!virtualenv whisper-env\n", 51 | "!source whisper-env/bin/activate\n", 52 | "!whisper-env/bin/pip install git+https://github.com/m-bain/whisperx.git@v2.0.1" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "source": [ 58 | "!pip uninstall torchaudio -y\n", 59 | "!pip install torchaudio==0.13.1\n", 60 | "!pip install torch==1.13.1 --index-url https://download.pytorch.org/whl/cu117" 61 | ], 62 | "metadata": { 63 | "id": "B9jVTgLUooEK" 64 | }, 65 | "execution_count": null, 66 | "outputs": [] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "source": [ 71 | "#@title 1. Install Library for Youtube MP3 Download\n", 72 | "!pip install yt_dlp\n", 73 | "!pip install ffmpeg\n", 74 | "!mkdir youtubeaudio" 75 | ], 76 | "metadata": { 77 | "id": "gy550p0Vj0DG" 78 | }, 79 | "execution_count": null, 80 | "outputs": [] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "source": [ 85 | "#@title Download Youtube MP3\n", 86 | "from __future__ import unicode_literals\n", 87 | "import yt_dlp\n", 88 | "import ffmpeg\n", 89 | "import sys\n", 90 | "\n", 91 | "ydl_opts = {\n", 92 | " 'format': 'bestaudio/best',\n", 93 | "# 'outtmpl': 'output.%(ext)s',\n", 94 | " 'postprocessors': [{\n", 95 | " 'key': 'FFmpegExtractAudio',\n", 96 | " 'preferredcodec': 'mp3',\n", 97 | " }],\n", 98 | " \"outtmpl\": 'audio', # this is where you can edit how you'd like the filenames to be formatted\n", 99 | "}\n", 100 | "def download_from_url(url):\n", 101 | " ydl.download([url])\n", 102 | " # stream = ffmpeg.input('output.m4a')\n", 103 | " # stream = ffmpeg.output(stream, 'output.wav')\n", 104 | "\n", 105 | "\n", 106 | "with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n", 107 | " url = \"https://www.youtube.com/watch?v=l665JvVDKhk&t=9s\" #@param {type:\"string\"}\n", 108 | " download_from_url(url)\n", 109 | "\n", 110 | "\n", 111 | "\n" 112 | ], 113 | "metadata": { 114 | "id": "Z2leVovKj26n" 115 | }, 116 | "execution_count": null, 117 | "outputs": [] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "source": [ 122 | "# Get SRT Subtitle File Saved using WhisperX (Change the language)" 123 | ], 124 | "metadata": { 125 | "id": "QxEqkpyuQL5_" 126 | } 127 | }, 128 | { 129 | "cell_type": "code", 130 | "source": [ 131 | "!whisper-env/bin/whisperx audio.mp3 --language id --model large-v2 --vad_filter True --output_format srt --align_model WAV2VEC2_ASR_LARGE_LV60K_960H" 132 | ], 133 | "metadata": { 134 | "id": "qWk9Y3Uxv9qu" 135 | }, 136 | "execution_count": null, 137 | "outputs": [] 138 | } 139 | ] 140 | } --------------------------------------------------------------------------------