├── auto_subtitle
    ├── __init__.py
    ├── utils.py
    └── cli.py
├── requirements.txt
├── .gitignore
├── setup.py
├── LICENSE
└── README.md


/auto_subtitle/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | openai-whisper
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | .DS_Store
3 | *.egg-info
4 | auto_subtitle/__pycache__
5 | build
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     version="1.0",
 5 |     name="auto_subtitle",
 6 |     packages=find_packages(),
 7 |     py_modules=["auto_subtitle"],
 8 |     author="Miguel Piedrafita",
 9 |     install_requires=[
10 |         'openai-whisper',
11 |     ],
12 |     description="Automatically generate and embed subtitles into your videos",
13 |     entry_points={
14 |         'console_scripts': ['auto_subtitle=auto_subtitle.cli:main'],
15 |     },
16 |     include_package_data=True,
17 | )
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Miguel Piedrafita <soy@miguelpiedrafita.com>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/auto_subtitle/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Iterator, TextIO
 3 | 
 4 | 
 5 | def str2bool(string):
 6 |     string = string.lower()
 7 |     str2val = {"true": True, "false": False}
 8 | 
 9 |     if string in str2val:
10 |         return str2val[string]
11 |     else:
12 |         raise ValueError(
13 |             f"Expected one of {set(str2val.keys())}, got {string}")
14 | 
15 | 
16 | def format_timestamp(seconds: float, always_include_hours: bool = False):
17 |     assert seconds >= 0, "non-negative timestamp expected"
18 |     milliseconds = round(seconds * 1000.0)
19 | 
20 |     hours = milliseconds // 3_600_000
21 |     milliseconds -= hours * 3_600_000
22 | 
23 |     minutes = milliseconds // 60_000
24 |     milliseconds -= minutes * 60_000
25 | 
26 |     seconds = milliseconds // 1_000
27 |     milliseconds -= seconds * 1_000
28 | 
29 |     hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
30 |     return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}"
31 | 
32 | 
33 | def write_srt(transcript: Iterator[dict], file: TextIO):
34 |     for i, segment in enumerate(transcript, start=1):
35 |         print(
36 |             f"{i}\n"
37 |             f"{format_timestamp(segment['start'], always_include_hours=True)} --> "
38 |             f"{format_timestamp(segment['end'], always_include_hours=True)}\n"
39 |             f"{segment['text'].strip().replace('-->', '->')}\n",
40 |             file=file,
41 |             flush=True,
42 |         )
43 | 
44 | 
45 | def filename(path):
46 |     return os.path.splitext(os.path.basename(path))[0]
47 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Automatic subtitles in your videos
 2 | 
 3 | This repository uses `ffmpeg` and [OpenAI's Whisper](https://openai.com/blog/whisper) to automatically generate and overlay subtitles on any video.
 4 | 
 5 | ## Installation
 6 | 
 7 | To get started, you'll need Python 3.7 or newer. Install the binary by running the following command:
 8 | 
 9 |     pip install git+https://github.com/m1guelpf/auto-subtitle.git
10 | 
11 | You'll also need to install [`ffmpeg`](https://ffmpeg.org/), which is available from most package managers:
12 | 
13 | ```bash
14 | # on Ubuntu or Debian
15 | sudo apt update && sudo apt install ffmpeg
16 | 
17 | # on MacOS using Homebrew (https://brew.sh/)
18 | brew install ffmpeg
19 | 
20 | # on Windows using Chocolatey (https://chocolatey.org/)
21 | choco install ffmpeg
22 | ```
23 | 
24 | ## Usage
25 | 
26 | The following command will generate a `subtitled/video.mp4` file contained the input video with overlayed subtitles.
27 | 
28 |     auto_subtitle /path/to/video.mp4 -o subtitled/
29 | 
30 | The default setting (which selects the `small` model) works well for transcribing English. You can optionally use a bigger model for better results (especially with other languages). The available models are `tiny`, `tiny.en`, `base`, `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large`.
31 | 
32 |     auto_subtitle /path/to/video.mp4 --model medium
33 | 
34 | Adding `--task translate` will translate the subtitles into English:
35 | 
36 |     auto_subtitle /path/to/video.mp4 --task translate
37 | 
38 | Run the following to view all available options:
39 | 
40 |     auto_subtitle --help
41 | 
42 | ## License
43 | 
44 | This script is open-source and licensed under the MIT License. For more details, check the [LICENSE](LICENSE) file.
45 | 


--------------------------------------------------------------------------------
/auto_subtitle/cli.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import ffmpeg
  3 | import whisper
  4 | import argparse
  5 | import warnings
  6 | import tempfile
  7 | from .utils import filename, str2bool, write_srt
  8 | 
  9 | 
 10 | def main():
 11 |     parser = argparse.ArgumentParser(
 12 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 13 |     parser.add_argument("video", nargs="+", type=str,
 14 |                         help="paths to video files to transcribe")
 15 |     parser.add_argument("--model", default="small",
 16 |                         choices=whisper.available_models(), help="name of the Whisper model to use")
 17 |     parser.add_argument("--output_dir", "-o", type=str,
 18 |                         default=".", help="directory to save the outputs")
 19 |     parser.add_argument("--output_srt", type=str2bool, default=False,
 20 |                         help="whether to output the .srt file along with the video files")
 21 |     parser.add_argument("--srt_only", type=str2bool, default=False,
 22 |                         help="only generate the .srt file and not create overlayed video")
 23 |     parser.add_argument("--verbose", type=str2bool, default=False,
 24 |                         help="whether to print out the progress and debug messages")
 25 | 
 26 |     parser.add_argument("--task", type=str, default="transcribe", choices=[
 27 |                         "transcribe", "translate"], help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
 28 |     parser.add_argument("--language", type=str, default="auto", choices=["auto","af","am","ar","as","az","ba","be","bg","bn","bo","br","bs","ca","cs","cy","da","de","el","en","es","et","eu","fa","fi","fo","fr","gl","gu","ha","haw","he","hi","hr","ht","hu","hy","id","is","it","ja","jw","ka","kk","km","kn","ko","la","lb","ln","lo","lt","lv","mg","mi","mk","ml","mn","mr","ms","mt","my","ne","nl","nn","no","oc","pa","pl","ps","pt","ro","ru","sa","sd","si","sk","sl","sn","so","sq","sr","su","sv","sw","ta","te","tg","th","tk","tl","tr","tt","uk","ur","uz","vi","yi","yo","zh"], 
 29 |     help="What is the origin language of the video? If unset, it is detected automatically.")
 30 | 
 31 |     args = parser.parse_args().__dict__
 32 |     model_name: str = args.pop("model")
 33 |     output_dir: str = args.pop("output_dir")
 34 |     output_srt: bool = args.pop("output_srt")
 35 |     srt_only: bool = args.pop("srt_only")
 36 |     language: str = args.pop("language")
 37 |     
 38 |     os.makedirs(output_dir, exist_ok=True)
 39 | 
 40 |     if model_name.endswith(".en"):
 41 |         warnings.warn(
 42 |             f"{model_name} is an English-only model, forcing English detection.")
 43 |         args["language"] = "en"
 44 |     # if translate task used and language argument is set, then use it
 45 |     elif language != "auto":
 46 |         args["language"] = language
 47 |         
 48 |     model = whisper.load_model(model_name)
 49 |     audios = get_audio(args.pop("video"))
 50 |     subtitles = get_subtitles(
 51 |         audios, output_srt or srt_only, output_dir, lambda audio_path: model.transcribe(audio_path, **args)
 52 |     )
 53 | 
 54 |     if srt_only:
 55 |         return
 56 | 
 57 |     for path, srt_path in subtitles.items():
 58 |         out_path = os.path.join(output_dir, f"{filename(path)}.mp4")
 59 | 
 60 |         print(f"Adding subtitles to {filename(path)}...")
 61 | 
 62 |         video = ffmpeg.input(path)
 63 |         audio = video.audio
 64 | 
 65 |         ffmpeg.concat(
 66 |             video.filter('subtitles', srt_path, force_style="OutlineColour=&H40000000,BorderStyle=3"), audio, v=1, a=1
 67 |         ).output(out_path).run(quiet=True, overwrite_output=True)
 68 | 
 69 |         print(f"Saved subtitled video to {os.path.abspath(out_path)}.")
 70 | 
 71 | 
 72 | def get_audio(paths):
 73 |     temp_dir = tempfile.gettempdir()
 74 | 
 75 |     audio_paths = {}
 76 | 
 77 |     for path in paths:
 78 |         print(f"Extracting audio from {filename(path)}...")
 79 |         output_path = os.path.join(temp_dir, f"{filename(path)}.wav")
 80 | 
 81 |         ffmpeg.input(path).output(
 82 |             output_path,
 83 |             acodec="pcm_s16le", ac=1, ar="16k"
 84 |         ).run(quiet=True, overwrite_output=True)
 85 | 
 86 |         audio_paths[path] = output_path
 87 | 
 88 |     return audio_paths
 89 | 
 90 | 
 91 | def get_subtitles(audio_paths: list, output_srt: bool, output_dir: str, transcribe: callable):
 92 |     subtitles_path = {}
 93 | 
 94 |     for path, audio_path in audio_paths.items():
 95 |         srt_path = output_dir if output_srt else tempfile.gettempdir()
 96 |         srt_path = os.path.join(srt_path, f"{filename(path)}.srt")
 97 |         
 98 |         print(
 99 |             f"Generating subtitles for {filename(path)}... This might take a while."
100 |         )
101 | 
102 |         warnings.filterwarnings("ignore")
103 |         result = transcribe(audio_path)
104 |         warnings.filterwarnings("default")
105 | 
106 |         with open(srt_path, "w", encoding="utf-8") as srt:
107 |             write_srt(result["segments"], file=srt)
108 | 
109 |         subtitles_path[path] = srt_path
110 | 
111 |     return subtitles_path
112 | 
113 | 
114 | if __name__ == '__main__':
115 |     main()
116 | 


--------------------------------------------------------------------------------