├── font
    ├── README.md
    └── arial.ttf
├── auto_subtitle
    ├── __init__.py
    ├── utils.py
    └── cli.py
├── .gitignore
├── requirements.txt
├── setup.py
├── README.md
├── LICENSE
└── whisper_me_this.ipynb


/font/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/auto_subtitle/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | .DS_Store
3 | *.egg-info
4 | auto_subtitle/__pycache__
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | youtube-dl
2 | git+https://github.com/openai/whisper.git
3 | 


--------------------------------------------------------------------------------
/font/arial.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/altryne/whisper-me-this/main/font/arial.ttf


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pkg_resources
 4 | from setuptools import setup, find_packages
 5 | 
 6 | setup(
 7 |     version="1.0",
 8 |     name="auto_subtitle",
 9 |     packages=find_packages(),
10 |     py_modules=["auto_subtitle"],
11 |     author="Miguel Piedrafita",
12 |     install_requires=[
13 |         'youtube-dl',
14 |         'whisper @ git+https://github.com/openai/whisper.git@main#egg=whisper'
15 |     ],
16 |     description="Automatically generate and embed subtitles into your videos",
17 |     entry_points={
18 |         'console_scripts': ['auto_subtitle=auto_subtitle.cli:main'],
19 |     },
20 |     include_package_data=True,
21 | )
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Whisper me this - Automatic subtitles with OpenAi whisper
 2 | 
 3 | [@altryne](https://twitter.com/altryne) [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/N4N3DWMR1)
 4 | 
 5 | Provide a link on the web, and whisper-me-this will download, transcribe or translate your video, and let you download a subbed video or just the srt files. 
 6 | 
 7 | # COLAB LINK: 
 8 | 
 9 | [![ko-fi](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/altryne/whisper-me-this/blob/main/whisper_me_this.ipynb)
10 | 
11 | # Possible ethical uses: 
12 | 
13 | Translate the historic [Ukraine President Zelensky NATO address](https://twitter.com/altryne/status/1575887440782716929). 
14 | Whisper is really good in Ukranian.
15 | 
16 | Or translate the [batshit crazy](https://twitter.com/altryne/status/1575992634023108610) Orc like shit that the russian orcs are spewing. 
17 | 
18 | ## License
19 | 
20 | This script is open-source and licensed under the MIT License. For more details, check the [LICENSE](LICENSE) file.
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Miguel Piedrafita <soy@miguelpiedrafita.com>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/auto_subtitle/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Iterator, TextIO
 3 | 
 4 | 
 5 | def str2bool(string):
 6 |     str2val = {"True": True, "False": False}
 7 |     if string in str2val:
 8 |         return str2val[string]
 9 |     else:
10 |         raise ValueError(
11 |             f"Expected one of {set(str2val.keys())}, got {string}")
12 | 
13 | 
14 | def format_timestamp(seconds: float, always_include_hours: bool = False):
15 |     assert seconds >= 0, "non-negative timestamp expected"
16 |     milliseconds = round(seconds * 1000.0)
17 | 
18 |     hours = milliseconds // 3_600_000
19 |     milliseconds -= hours * 3_600_000
20 | 
21 |     minutes = milliseconds // 60_000
22 |     milliseconds -= minutes * 60_000
23 | 
24 |     seconds = milliseconds // 1_000
25 |     milliseconds -= seconds * 1_000
26 | 
27 |     hours_marker = f"{hours}:" if always_include_hours or hours > 0 else ""
28 |     return f"{hours_marker}{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
29 | 
30 | 
31 | def write_srt(transcript: Iterator[dict], file: TextIO):
32 |     for i, segment in enumerate(transcript, start=1):
33 |         print(
34 |             f"{i}\n"
35 |             f"{format_timestamp(segment['start'], always_include_hours=True)} --> "
36 |             f"{format_timestamp(segment['end'], always_include_hours=True)}\n"
37 |             f"{segment['text'].strip().replace('-->', '->')}\n",
38 |             file=file,
39 |             flush=True,
40 |         )
41 | 
42 | 
43 | def filename(path):
44 |     return os.path.splitext(os.path.basename(path))[0]
45 | 


--------------------------------------------------------------------------------
/auto_subtitle/cli.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import ffmpeg
  3 | import whisper
  4 | import argparse
  5 | import warnings
  6 | import tempfile
  7 | from .utils import filename, str2bool, write_srt
  8 | 
  9 | 
 10 | def main():
 11 |     parser = argparse.ArgumentParser(
 12 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 13 |     parser.add_argument("video", nargs="+", type=str,
 14 |                         help="paths to video files to transcribe")
 15 |     parser.add_argument("--model", default="small",
 16 |                         choices=whisper.available_models(), help="name of the Whisper model to use")
 17 |     parser.add_argument("--output_dir", "-o", type=str,
 18 |                         default=".", help="directory to save the outputs")
 19 |     
 20 |     parser.add_argument("--save_subtitles", type=str2bool, default=True,
 21 |                         help="Whether to save the srt file alongside the video")
 22 |     
 23 |     parser.add_argument("--verbose", type=str2bool, default=False,
 24 |                         help="Whether to print out the progress and debug messages")
 25 | 
 26 |     parser.add_argument("--task", type=str, default="transcribe", choices=[
 27 |                         "transcribe", "translate"], help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
 28 | 
 29 |     args = parser.parse_args().__dict__
 30 |     model_name: str = args.pop("model")
 31 |     output_dir: str = args.pop("output_dir")
 32 |     save_subtitles: bool = args.pop("save_subtitles")
 33 |     os.makedirs(output_dir, exist_ok=True)
 34 | 
 35 |     if model_name.endswith(".en"):
 36 |         warnings.warn(
 37 |             f"{model_name} is an English-only model, forcing English detection.")
 38 |         args["language"] = "en"
 39 | 
 40 |     model = whisper.load_model(model_name)
 41 |     audios = get_audio(args.pop("video"))
 42 |     subtitles = get_subtitles(
 43 |         audios, lambda audio_path: model.transcribe(audio_path, **args), save_subtitles, output_dir
 44 |     )
 45 |     # bash command to download a youtube video with `youtube-dl` and save it as `video.mp4`:
 46 |     # youtube-dl -f 22 -o video.mp4 https://www.youtube.com/watch?v=QH2-TGUlwu4
 47 | 
 48 |     for path, srt_path in subtitles.items():
 49 |         out_path = os.path.join(output_dir, f"{filename(path)}.mp4")
 50 | 
 51 |         print(f"Adding subtitles to {filename(path)}...")
 52 | 
 53 |         video = ffmpeg.input(path)
 54 |         audio = video.audio
 55 |         style = "FontName=Arial,FontSize=4"
 56 |         fonts_dir = "../font/arial.ttf"
 57 |         stderr = ffmpeg.concat(
 58 |             video.filter('subtitles', srt_path, fontsdir=fonts_dir,force_style="OutlineColour=&H40000000,BorderStyle=3,FontName=Arial"), audio, v=1, a=1
 59 |         ).output(out_path).run(quiet=True, overwrite_output=True)
 60 | 
 61 |         print(f"Saved subtitled video to {os.path.abspath(out_path)}.")
 62 | 
 63 | 
 64 | def get_audio(paths):
 65 |     temp_dir = tempfile.gettempdir()
 66 | 
 67 |     audio_paths = {}
 68 | 
 69 |     for path in paths:
 70 |         print(f"Extracting audio from {filename(path)}...")
 71 |         output_path = os.path.join(temp_dir, f"{filename(path)}.wav")
 72 | 
 73 |         ffmpeg.input(path).output(
 74 |             output_path,
 75 |             acodec="pcm_s16le", ac=1, ar="16k"
 76 |         ).run(quiet=True, overwrite_output=True)
 77 | 
 78 |         audio_paths[path] = output_path
 79 | 
 80 |     return audio_paths
 81 | 
 82 | 
 83 | def get_subtitles(audio_paths: list, transcribe: callable, save_subtitles: bool, output_dir: str):
 84 |     temp_dir = tempfile.gettempdir()
 85 |     subtitles_path = {}
 86 | 
 87 |     for path, audio_path in audio_paths.items():
 88 |         if not save_subtitles:
 89 |             srt_path = os.path.join(temp_dir, f"{filename(path)}.srt")
 90 |         else:
 91 |             srt_path = os.path.join(output_dir, f"{filename(path)}.srt")
 92 |             
 93 |         print(
 94 |             f"Generating subtitles for {filename(path)}... This might take a while."
 95 |         )
 96 | 
 97 |         warnings.filterwarnings("ignore")
 98 |         result = transcribe(audio_path)
 99 |         warnings.filterwarnings("default")
100 | 
101 |         with open(srt_path, "w", encoding="utf-8") as srt:
102 |             write_srt(result["segments"], file=srt)
103 | 
104 |         subtitles_path[path] = srt_path
105 | 
106 |     return subtitles_path
107 | 
108 | 
109 | if __name__ == '__main__':
110 |     main()
111 | 


--------------------------------------------------------------------------------
/whisper_me_this.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "private_outputs": true,
  7 |       "provenance": [],
  8 |       "machine_shape": "hm",
  9 |       "collapsed_sections": [],
 10 |       "authorship_tag": "ABX9TyN9aFsa63CY2IZVFwJxU9I+",
 11 |       "include_colab_link": true
 12 |     },
 13 |     "kernelspec": {
 14 |       "name": "python3",
 15 |       "display_name": "Python 3"
 16 |     },
 17 |     "language_info": {
 18 |       "name": "python"
 19 |     },
 20 |     "accelerator": "GPU",
 21 |     "gpuClass": "standard"
 22 |   },
 23 |   "cells": [
 24 |     {
 25 |       "cell_type": "markdown",
 26 |       "metadata": {
 27 |         "id": "view-in-github",
 28 |         "colab_type": "text"
 29 |       },
 30 |       "source": [
 31 |         "<a href=\"https://colab.research.google.com/github/altryne/whisper-me-this/blob/main/whisper_me_this.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 32 |       ]
 33 |     },
 34 |     {
 35 |       "cell_type": "markdown",
 36 |       "source": [
 37 |         "# Whisper me this - Add subtitles to any video downloadable with yt-download by [@altryne](https://twitter.com/altryne) [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/N4N3DWMR1)\n"
 38 |       ],
 39 |       "metadata": {
 40 |         "id": "xIJcmCwc_ybP"
 41 |       }
 42 |     },
 43 |     {
 44 |       "cell_type": "code",
 45 |       "source": [
 46 |         "#@title <font size=\"5\">⬇️ ឵឵<i>Build Requirements</font> { vertical-output: true }\n",
 47 |         "import os, uuid, re, IPython\n",
 48 |         "import ipywidgets as widgets\n",
 49 |         "import time\n",
 50 |         "\n",
 51 |         "from glob import glob\n",
 52 |         "from google.colab import output, drive\n",
 53 |         "\n",
 54 |         "from IPython.display import clear_output\n",
 55 |         "import os, sys, urllib.request\n",
 56 |         "HOME = os.path.expanduser(\"~\")\n",
 57 |         "pathDoneCMD = f'{HOME}/doneCMD.sh'\n",
 58 |         "if not os.path.exists(f\"{HOME}/.ipython/ttmg.py\"):\n",
 59 |         "    hCode = \"https://raw.githubusercontent.com/yunooooo/gcct/master/res/ttmg.py\"\n",
 60 |         "    urllib.request.urlretrieve(hCode, f\"{HOME}/.ipython/ttmg.py\")\n",
 61 |         "\n",
 62 |         "from ttmg import (\n",
 63 |         "    loadingAn,\n",
 64 |         "    textAn,\n",
 65 |         ")\n",
 66 |         "\n",
 67 |         "loadingAn(name=\"lds\")\n",
 68 |         "textAn(\"Cloning Repositories...\", ty='twg')\n",
 69 |         "!git clone https://github.com/XniceCraft/ffmpeg-colab.git\n",
 70 |         "!chmod 755 ./ffmpeg-colab/install\n",
 71 |         "textAn(\"Installing FFmpeg...\", ty='twg')\n",
 72 |         "!./ffmpeg-colab/install\n",
 73 |         "clear_output()\n",
 74 |         "print('FFMPEG installed!')\n",
 75 |         "!rm -fr /content/ffmpeg-colab\n",
 76 |         "textAn(\"Installing whisper and auto-subtitle\", ty='twg')\n",
 77 |         "\n",
 78 |         "!pip install git+https://github.com/altryne/whisper-me-this.git\n",
 79 |         "!pip install -U kora\n",
 80 |         "\n",
 81 |         "clear_output()\n",
 82 |         "print('Whisper and auto-subtitle installed')\n",
 83 |         "textAn(\"Installing youtube-dl\", ty='twg')\n",
 84 |         "!pip install youtube-dl\n",
 85 |         "clear_output()\n",
 86 |         "print('Installation complete. Please add a URL in the next box')"
 87 |       ],
 88 |       "metadata": {
 89 |         "id": "LMKssMq4AY6e",
 90 |         "cellView": "form"
 91 |       },
 92 |       "execution_count": null,
 93 |       "outputs": []
 94 |     },
 95 |     {
 96 |       "cell_type": "code",
 97 |       "source": [
 98 |         "#@title <font size=\"5\">⬇️ ឵឵<i>Add a URL</font> and configure settings  { vertical-output: true }\n",
 99 |         "\n",
100 |         "#@markdown youtube-dl supports video URLs from youtube, twitter and many other sites. \n",
101 |         "#@markdown if your provided URL didn't work, download manually and check `use_uploaded_file` and provide a path to your video\n",
102 |         "\n",
103 |         "url = \"https://cuteus.blob.core.windows.net/media/fj5cOpfSUvM_244_bestaudio_117.2_256_638002045416108584.mp4?sp=rli&st=2021-07-04T09:18:09Z&se=2029-07-04T17:18:09Z&sv=2021-06-08&sr=c&sig=bOUEq3OrpO7l2HTZhBToQbYbLftYQ0ocPq8q632US%2BQ%3D\" #@param {type:\"string\"}\n",
104 |         "use_uploaded_file = True #@param {type:\"boolean\"}\n",
105 |         "\n",
106 |         "uploaded_file_path = \"nonstop-short-subliminal.mp4\" #@param {type:\"string\"}\n",
107 |         "\n",
108 |         "\n",
109 |         "#@markdown Choose a Whisper model (chose .en if you know the source is english 100%)\n",
110 |         "whisper_size = 'medium.en' #@param [\"tiny.en\",\"tiny\",\"base.en\",\"base\",\"small.en\",\"small\",\"medium.en\",\"medium\",\"large\"] \n",
111 |         "#@markdown Translate to english or only transcribe the audio in native language?\n",
112 |         "task = 'transcribe' #@param [\"transcribe\", \"translate\"] "
113 |       ],
114 |       "metadata": {
115 |         "id": "PrxY7FyiCQSO",
116 |         "cellView": "form"
117 |       },
118 |       "execution_count": null,
119 |       "outputs": []
120 |     },
121 |     {
122 |       "cell_type": "code",
123 |       "source": [
124 |         "#@title <font size=\"5\">🎥 ឵឵<i>Download video</font> and start processing subtitles - may take a while  { vertical-output: true }\n",
125 |         "try:\n",
126 |         "  if not use_uploaded_file:\n",
127 |         "    !youtube-dl {url} -o {filename}\n",
128 |         "    filename = !youtube-dl --get-filename -o 'init_vid.%(ext)s' {url} --restrict-filenames\n",
129 |         "\n",
130 |         "    filename = filename[0]\n",
131 |         "    print(f'Downloaded `{filename}`, starting subtitle additions')\n",
132 |         "  else:\n",
133 |         "    filename = uploaded_file_path\n",
134 |         "  !auto_subtitle {filename} --model {whisper_size} --output_dir 'output' --task {task}\n",
135 |         "except Exception as e:\n",
136 |         "  print(\"oops, something went wrong\")\n",
137 |         "  print(e)\n"
138 |       ],
139 |       "metadata": {
140 |         "id": "GX6NosX5Ebor",
141 |         "cellView": "form"
142 |       },
143 |       "execution_count": null,
144 |       "outputs": []
145 |     },
146 |     {
147 |       "cell_type": "code",
148 |       "source": [
149 |         "#@title <font size=\"5\">⬇️ ឵឵<i>Download subtitled video file</font>  { vertical-output: true }\n",
150 |         "from google.colab import files\n",
151 |         "from kora import drive\n",
152 |         "#@markdown Selecting this will request permissoin to your google drive and upload the file with public permission, much faster than downloading the file from colab.\n",
153 |         "\n",
154 |         "download_with_gooogle_drive = True #@param {type:\"boolean\"}\n",
155 |         "\n",
156 |         "if download_with_gooogle_drive:\n",
157 |         "  uploaded = drive.upload_public(f'output/{filename}')\n",
158 |         "  print(uploaded)\n",
159 |         "else:\n",
160 |         "  files.download(f'output/{filename}')"
161 |       ],
162 |       "metadata": {
163 |         "cellView": "form",
164 |         "id": "1fGJ96rMnG6x"
165 |       },
166 |       "execution_count": null,
167 |       "outputs": []
168 |     }
169 |   ]
170 | }


--------------------------------------------------------------------------------