├── font ├── README.md └── arial.ttf ├── auto_subtitle ├── __init__.py ├── utils.py └── cli.py ├── .gitignore ├── requirements.txt ├── setup.py ├── README.md ├── LICENSE └── whisper_me_this.ipynb /font/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /auto_subtitle/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | .DS_Store 3 | *.egg-info 4 | auto_subtitle/__pycache__ 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | youtube-dl 2 | git+https://github.com/openai/whisper.git 3 | -------------------------------------------------------------------------------- /font/arial.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/altryne/whisper-me-this/main/font/arial.ttf -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pkg_resources 4 | from setuptools import setup, find_packages 5 | 6 | setup( 7 | version="1.0", 8 | name="auto_subtitle", 9 | packages=find_packages(), 10 | py_modules=["auto_subtitle"], 11 | author="Miguel Piedrafita", 12 | install_requires=[ 13 | 'youtube-dl', 14 | 'whisper @ git+https://github.com/openai/whisper.git@main#egg=whisper' 15 | ], 16 | description="Automatically generate and embed subtitles into your videos", 17 | entry_points={ 18 | 'console_scripts': ['auto_subtitle=auto_subtitle.cli:main'], 19 | }, 20 | include_package_data=True, 21 | ) 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Whisper me this - Automatic subtitles with OpenAi whisper 2 | 3 | [@altryne](https://twitter.com/altryne) [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/N4N3DWMR1) 4 | 5 | Provide a link on the web, and whisper-me-this will download, transcribe or translate your video, and let you download a subbed video or just the srt files. 6 | 7 | # COLAB LINK: 8 | 9 | [![ko-fi](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/altryne/whisper-me-this/blob/main/whisper_me_this.ipynb) 10 | 11 | # Possible ethical uses: 12 | 13 | Translate the historic [Ukraine President Zelensky NATO address](https://twitter.com/altryne/status/1575887440782716929). 14 | Whisper is really good in Ukranian. 15 | 16 | Or translate the [batshit crazy](https://twitter.com/altryne/status/1575992634023108610) Orc like shit that the russian orcs are spewing. 17 | 18 | ## License 19 | 20 | This script is open-source and licensed under the MIT License. For more details, check the [LICENSE](LICENSE) file. 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Miguel Piedrafita 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /auto_subtitle/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Iterator, TextIO 3 | 4 | 5 | def str2bool(string): 6 | str2val = {"True": True, "False": False} 7 | if string in str2val: 8 | return str2val[string] 9 | else: 10 | raise ValueError( 11 | f"Expected one of {set(str2val.keys())}, got {string}") 12 | 13 | 14 | def format_timestamp(seconds: float, always_include_hours: bool = False): 15 | assert seconds >= 0, "non-negative timestamp expected" 16 | milliseconds = round(seconds * 1000.0) 17 | 18 | hours = milliseconds // 3_600_000 19 | milliseconds -= hours * 3_600_000 20 | 21 | minutes = milliseconds // 60_000 22 | milliseconds -= minutes * 60_000 23 | 24 | seconds = milliseconds // 1_000 25 | milliseconds -= seconds * 1_000 26 | 27 | hours_marker = f"{hours}:" if always_include_hours or hours > 0 else "" 28 | return f"{hours_marker}{minutes:02d}:{seconds:02d}.{milliseconds:03d}" 29 | 30 | 31 | def write_srt(transcript: Iterator[dict], file: TextIO): 32 | for i, segment in enumerate(transcript, start=1): 33 | print( 34 | f"{i}\n" 35 | f"{format_timestamp(segment['start'], always_include_hours=True)} --> " 36 | f"{format_timestamp(segment['end'], always_include_hours=True)}\n" 37 | f"{segment['text'].strip().replace('-->', '->')}\n", 38 | file=file, 39 | flush=True, 40 | ) 41 | 42 | 43 | def filename(path): 44 | return os.path.splitext(os.path.basename(path))[0] 45 | -------------------------------------------------------------------------------- /auto_subtitle/cli.py: -------------------------------------------------------------------------------- 1 | import os 2 | import ffmpeg 3 | import whisper 4 | import argparse 5 | import warnings 6 | import tempfile 7 | from .utils import filename, str2bool, write_srt 8 | 9 | 10 | def main(): 11 | parser = argparse.ArgumentParser( 12 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 13 | parser.add_argument("video", nargs="+", type=str, 14 | help="paths to video files to transcribe") 15 | parser.add_argument("--model", default="small", 16 | choices=whisper.available_models(), help="name of the Whisper model to use") 17 | parser.add_argument("--output_dir", "-o", type=str, 18 | default=".", help="directory to save the outputs") 19 | 20 | parser.add_argument("--save_subtitles", type=str2bool, default=True, 21 | help="Whether to save the srt file alongside the video") 22 | 23 | parser.add_argument("--verbose", type=str2bool, default=False, 24 | help="Whether to print out the progress and debug messages") 25 | 26 | parser.add_argument("--task", type=str, default="transcribe", choices=[ 27 | "transcribe", "translate"], help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')") 28 | 29 | args = parser.parse_args().__dict__ 30 | model_name: str = args.pop("model") 31 | output_dir: str = args.pop("output_dir") 32 | save_subtitles: bool = args.pop("save_subtitles") 33 | os.makedirs(output_dir, exist_ok=True) 34 | 35 | if model_name.endswith(".en"): 36 | warnings.warn( 37 | f"{model_name} is an English-only model, forcing English detection.") 38 | args["language"] = "en" 39 | 40 | model = whisper.load_model(model_name) 41 | audios = get_audio(args.pop("video")) 42 | subtitles = get_subtitles( 43 | audios, lambda audio_path: model.transcribe(audio_path, **args), save_subtitles, output_dir 44 | ) 45 | # bash command to download a youtube video with `youtube-dl` and save it as `video.mp4`: 46 | # youtube-dl -f 22 -o video.mp4 https://www.youtube.com/watch?v=QH2-TGUlwu4 47 | 48 | for path, srt_path in subtitles.items(): 49 | out_path = os.path.join(output_dir, f"{filename(path)}.mp4") 50 | 51 | print(f"Adding subtitles to {filename(path)}...") 52 | 53 | video = ffmpeg.input(path) 54 | audio = video.audio 55 | style = "FontName=Arial,FontSize=4" 56 | fonts_dir = "../font/arial.ttf" 57 | stderr = ffmpeg.concat( 58 | video.filter('subtitles', srt_path, fontsdir=fonts_dir,force_style="OutlineColour=&H40000000,BorderStyle=3,FontName=Arial"), audio, v=1, a=1 59 | ).output(out_path).run(quiet=True, overwrite_output=True) 60 | 61 | print(f"Saved subtitled video to {os.path.abspath(out_path)}.") 62 | 63 | 64 | def get_audio(paths): 65 | temp_dir = tempfile.gettempdir() 66 | 67 | audio_paths = {} 68 | 69 | for path in paths: 70 | print(f"Extracting audio from {filename(path)}...") 71 | output_path = os.path.join(temp_dir, f"{filename(path)}.wav") 72 | 73 | ffmpeg.input(path).output( 74 | output_path, 75 | acodec="pcm_s16le", ac=1, ar="16k" 76 | ).run(quiet=True, overwrite_output=True) 77 | 78 | audio_paths[path] = output_path 79 | 80 | return audio_paths 81 | 82 | 83 | def get_subtitles(audio_paths: list, transcribe: callable, save_subtitles: bool, output_dir: str): 84 | temp_dir = tempfile.gettempdir() 85 | subtitles_path = {} 86 | 87 | for path, audio_path in audio_paths.items(): 88 | if not save_subtitles: 89 | srt_path = os.path.join(temp_dir, f"{filename(path)}.srt") 90 | else: 91 | srt_path = os.path.join(output_dir, f"{filename(path)}.srt") 92 | 93 | print( 94 | f"Generating subtitles for {filename(path)}... This might take a while." 95 | ) 96 | 97 | warnings.filterwarnings("ignore") 98 | result = transcribe(audio_path) 99 | warnings.filterwarnings("default") 100 | 101 | with open(srt_path, "w", encoding="utf-8") as srt: 102 | write_srt(result["segments"], file=srt) 103 | 104 | subtitles_path[path] = srt_path 105 | 106 | return subtitles_path 107 | 108 | 109 | if __name__ == '__main__': 110 | main() 111 | -------------------------------------------------------------------------------- /whisper_me_this.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "private_outputs": true, 7 | "provenance": [], 8 | "machine_shape": "hm", 9 | "collapsed_sections": [], 10 | "authorship_tag": "ABX9TyN9aFsa63CY2IZVFwJxU9I+", 11 | "include_colab_link": true 12 | }, 13 | "kernelspec": { 14 | "name": "python3", 15 | "display_name": "Python 3" 16 | }, 17 | "language_info": { 18 | "name": "python" 19 | }, 20 | "accelerator": "GPU", 21 | "gpuClass": "standard" 22 | }, 23 | "cells": [ 24 | { 25 | "cell_type": "markdown", 26 | "metadata": { 27 | "id": "view-in-github", 28 | "colab_type": "text" 29 | }, 30 | "source": [ 31 | "\"Open" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "source": [ 37 | "# Whisper me this - Add subtitles to any video downloadable with yt-download by [@altryne](https://twitter.com/altryne) [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/N4N3DWMR1)\n" 38 | ], 39 | "metadata": { 40 | "id": "xIJcmCwc_ybP" 41 | } 42 | }, 43 | { 44 | "cell_type": "code", 45 | "source": [ 46 | "#@title ⬇️ ឵឵Build Requirements { vertical-output: true }\n", 47 | "import os, uuid, re, IPython\n", 48 | "import ipywidgets as widgets\n", 49 | "import time\n", 50 | "\n", 51 | "from glob import glob\n", 52 | "from google.colab import output, drive\n", 53 | "\n", 54 | "from IPython.display import clear_output\n", 55 | "import os, sys, urllib.request\n", 56 | "HOME = os.path.expanduser(\"~\")\n", 57 | "pathDoneCMD = f'{HOME}/doneCMD.sh'\n", 58 | "if not os.path.exists(f\"{HOME}/.ipython/ttmg.py\"):\n", 59 | " hCode = \"https://raw.githubusercontent.com/yunooooo/gcct/master/res/ttmg.py\"\n", 60 | " urllib.request.urlretrieve(hCode, f\"{HOME}/.ipython/ttmg.py\")\n", 61 | "\n", 62 | "from ttmg import (\n", 63 | " loadingAn,\n", 64 | " textAn,\n", 65 | ")\n", 66 | "\n", 67 | "loadingAn(name=\"lds\")\n", 68 | "textAn(\"Cloning Repositories...\", ty='twg')\n", 69 | "!git clone https://github.com/XniceCraft/ffmpeg-colab.git\n", 70 | "!chmod 755 ./ffmpeg-colab/install\n", 71 | "textAn(\"Installing FFmpeg...\", ty='twg')\n", 72 | "!./ffmpeg-colab/install\n", 73 | "clear_output()\n", 74 | "print('FFMPEG installed!')\n", 75 | "!rm -fr /content/ffmpeg-colab\n", 76 | "textAn(\"Installing whisper and auto-subtitle\", ty='twg')\n", 77 | "\n", 78 | "!pip install git+https://github.com/altryne/whisper-me-this.git\n", 79 | "!pip install -U kora\n", 80 | "\n", 81 | "clear_output()\n", 82 | "print('Whisper and auto-subtitle installed')\n", 83 | "textAn(\"Installing youtube-dl\", ty='twg')\n", 84 | "!pip install youtube-dl\n", 85 | "clear_output()\n", 86 | "print('Installation complete. Please add a URL in the next box')" 87 | ], 88 | "metadata": { 89 | "id": "LMKssMq4AY6e", 90 | "cellView": "form" 91 | }, 92 | "execution_count": null, 93 | "outputs": [] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "source": [ 98 | "#@title ⬇️ ឵឵Add a URL and configure settings { vertical-output: true }\n", 99 | "\n", 100 | "#@markdown youtube-dl supports video URLs from youtube, twitter and many other sites. \n", 101 | "#@markdown if your provided URL didn't work, download manually and check `use_uploaded_file` and provide a path to your video\n", 102 | "\n", 103 | "url = \"https://cuteus.blob.core.windows.net/media/fj5cOpfSUvM_244_bestaudio_117.2_256_638002045416108584.mp4?sp=rli&st=2021-07-04T09:18:09Z&se=2029-07-04T17:18:09Z&sv=2021-06-08&sr=c&sig=bOUEq3OrpO7l2HTZhBToQbYbLftYQ0ocPq8q632US%2BQ%3D\" #@param {type:\"string\"}\n", 104 | "use_uploaded_file = True #@param {type:\"boolean\"}\n", 105 | "\n", 106 | "uploaded_file_path = \"nonstop-short-subliminal.mp4\" #@param {type:\"string\"}\n", 107 | "\n", 108 | "\n", 109 | "#@markdown Choose a Whisper model (chose .en if you know the source is english 100%)\n", 110 | "whisper_size = 'medium.en' #@param [\"tiny.en\",\"tiny\",\"base.en\",\"base\",\"small.en\",\"small\",\"medium.en\",\"medium\",\"large\"] \n", 111 | "#@markdown Translate to english or only transcribe the audio in native language?\n", 112 | "task = 'transcribe' #@param [\"transcribe\", \"translate\"] " 113 | ], 114 | "metadata": { 115 | "id": "PrxY7FyiCQSO", 116 | "cellView": "form" 117 | }, 118 | "execution_count": null, 119 | "outputs": [] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "source": [ 124 | "#@title 🎥 ឵឵Download video and start processing subtitles - may take a while { vertical-output: true }\n", 125 | "try:\n", 126 | " if not use_uploaded_file:\n", 127 | " !youtube-dl {url} -o {filename}\n", 128 | " filename = !youtube-dl --get-filename -o 'init_vid.%(ext)s' {url} --restrict-filenames\n", 129 | "\n", 130 | " filename = filename[0]\n", 131 | " print(f'Downloaded `{filename}`, starting subtitle additions')\n", 132 | " else:\n", 133 | " filename = uploaded_file_path\n", 134 | " !auto_subtitle {filename} --model {whisper_size} --output_dir 'output' --task {task}\n", 135 | "except Exception as e:\n", 136 | " print(\"oops, something went wrong\")\n", 137 | " print(e)\n" 138 | ], 139 | "metadata": { 140 | "id": "GX6NosX5Ebor", 141 | "cellView": "form" 142 | }, 143 | "execution_count": null, 144 | "outputs": [] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "source": [ 149 | "#@title ⬇️ ឵឵Download subtitled video file { vertical-output: true }\n", 150 | "from google.colab import files\n", 151 | "from kora import drive\n", 152 | "#@markdown Selecting this will request permissoin to your google drive and upload the file with public permission, much faster than downloading the file from colab.\n", 153 | "\n", 154 | "download_with_gooogle_drive = True #@param {type:\"boolean\"}\n", 155 | "\n", 156 | "if download_with_gooogle_drive:\n", 157 | " uploaded = drive.upload_public(f'output/{filename}')\n", 158 | " print(uploaded)\n", 159 | "else:\n", 160 | " files.download(f'output/{filename}')" 161 | ], 162 | "metadata": { 163 | "cellView": "form", 164 | "id": "1fGJ96rMnG6x" 165 | }, 166 | "execution_count": null, 167 | "outputs": [] 168 | } 169 | ] 170 | } --------------------------------------------------------------------------------