├── auto_subtitle ├── __init__.py ├── utils.py └── cli.py ├── requirements.txt ├── .gitignore ├── setup.py ├── LICENSE └── README.md /auto_subtitle/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | openai-whisper 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | .DS_Store 3 | *.egg-info 4 | auto_subtitle/__pycache__ 5 | build 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | version="1.0", 5 | name="auto_subtitle", 6 | packages=find_packages(), 7 | py_modules=["auto_subtitle"], 8 | author="Miguel Piedrafita", 9 | install_requires=[ 10 | 'openai-whisper', 11 | ], 12 | description="Automatically generate and embed subtitles into your videos", 13 | entry_points={ 14 | 'console_scripts': ['auto_subtitle=auto_subtitle.cli:main'], 15 | }, 16 | include_package_data=True, 17 | ) 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Miguel Piedrafita 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /auto_subtitle/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Iterator, TextIO 3 | 4 | 5 | def str2bool(string): 6 | string = string.lower() 7 | str2val = {"true": True, "false": False} 8 | 9 | if string in str2val: 10 | return str2val[string] 11 | else: 12 | raise ValueError( 13 | f"Expected one of {set(str2val.keys())}, got {string}") 14 | 15 | 16 | def format_timestamp(seconds: float, always_include_hours: bool = False): 17 | assert seconds >= 0, "non-negative timestamp expected" 18 | milliseconds = round(seconds * 1000.0) 19 | 20 | hours = milliseconds // 3_600_000 21 | milliseconds -= hours * 3_600_000 22 | 23 | minutes = milliseconds // 60_000 24 | milliseconds -= minutes * 60_000 25 | 26 | seconds = milliseconds // 1_000 27 | milliseconds -= seconds * 1_000 28 | 29 | hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else "" 30 | return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}" 31 | 32 | 33 | def write_srt(transcript: Iterator[dict], file: TextIO): 34 | for i, segment in enumerate(transcript, start=1): 35 | print( 36 | f"{i}\n" 37 | f"{format_timestamp(segment['start'], always_include_hours=True)} --> " 38 | f"{format_timestamp(segment['end'], always_include_hours=True)}\n" 39 | f"{segment['text'].strip().replace('-->', '->')}\n", 40 | file=file, 41 | flush=True, 42 | ) 43 | 44 | 45 | def filename(path): 46 | return os.path.splitext(os.path.basename(path))[0] 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Automatic subtitles in your videos 2 | 3 | This repository uses `ffmpeg` and [OpenAI's Whisper](https://openai.com/blog/whisper) to automatically generate and overlay subtitles on any video. 4 | 5 | ## Installation 6 | 7 | To get started, you'll need Python 3.7 or newer. Install the binary by running the following command: 8 | 9 | pip install git+https://github.com/m1guelpf/auto-subtitle.git 10 | 11 | You'll also need to install [`ffmpeg`](https://ffmpeg.org/), which is available from most package managers: 12 | 13 | ```bash 14 | # on Ubuntu or Debian 15 | sudo apt update && sudo apt install ffmpeg 16 | 17 | # on MacOS using Homebrew (https://brew.sh/) 18 | brew install ffmpeg 19 | 20 | # on Windows using Chocolatey (https://chocolatey.org/) 21 | choco install ffmpeg 22 | ``` 23 | 24 | ## Usage 25 | 26 | The following command will generate a `subtitled/video.mp4` file contained the input video with overlayed subtitles. 27 | 28 | auto_subtitle /path/to/video.mp4 -o subtitled/ 29 | 30 | The default setting (which selects the `small` model) works well for transcribing English. You can optionally use a bigger model for better results (especially with other languages). The available models are `tiny`, `tiny.en`, `base`, `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large`. 31 | 32 | auto_subtitle /path/to/video.mp4 --model medium 33 | 34 | Adding `--task translate` will translate the subtitles into English: 35 | 36 | auto_subtitle /path/to/video.mp4 --task translate 37 | 38 | Run the following to view all available options: 39 | 40 | auto_subtitle --help 41 | 42 | ## License 43 | 44 | This script is open-source and licensed under the MIT License. For more details, check the [LICENSE](LICENSE) file. 45 | -------------------------------------------------------------------------------- /auto_subtitle/cli.py: -------------------------------------------------------------------------------- 1 | import os 2 | import ffmpeg 3 | import whisper 4 | import argparse 5 | import warnings 6 | import tempfile 7 | from .utils import filename, str2bool, write_srt 8 | 9 | 10 | def main(): 11 | parser = argparse.ArgumentParser( 12 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 13 | parser.add_argument("video", nargs="+", type=str, 14 | help="paths to video files to transcribe") 15 | parser.add_argument("--model", default="small", 16 | choices=whisper.available_models(), help="name of the Whisper model to use") 17 | parser.add_argument("--output_dir", "-o", type=str, 18 | default=".", help="directory to save the outputs") 19 | parser.add_argument("--output_srt", type=str2bool, default=False, 20 | help="whether to output the .srt file along with the video files") 21 | parser.add_argument("--srt_only", type=str2bool, default=False, 22 | help="only generate the .srt file and not create overlayed video") 23 | parser.add_argument("--verbose", type=str2bool, default=False, 24 | help="whether to print out the progress and debug messages") 25 | 26 | parser.add_argument("--task", type=str, default="transcribe", choices=[ 27 | "transcribe", "translate"], help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')") 28 | parser.add_argument("--language", type=str, default="auto", choices=["auto","af","am","ar","as","az","ba","be","bg","bn","bo","br","bs","ca","cs","cy","da","de","el","en","es","et","eu","fa","fi","fo","fr","gl","gu","ha","haw","he","hi","hr","ht","hu","hy","id","is","it","ja","jw","ka","kk","km","kn","ko","la","lb","ln","lo","lt","lv","mg","mi","mk","ml","mn","mr","ms","mt","my","ne","nl","nn","no","oc","pa","pl","ps","pt","ro","ru","sa","sd","si","sk","sl","sn","so","sq","sr","su","sv","sw","ta","te","tg","th","tk","tl","tr","tt","uk","ur","uz","vi","yi","yo","zh"], 29 | help="What is the origin language of the video? If unset, it is detected automatically.") 30 | 31 | args = parser.parse_args().__dict__ 32 | model_name: str = args.pop("model") 33 | output_dir: str = args.pop("output_dir") 34 | output_srt: bool = args.pop("output_srt") 35 | srt_only: bool = args.pop("srt_only") 36 | language: str = args.pop("language") 37 | 38 | os.makedirs(output_dir, exist_ok=True) 39 | 40 | if model_name.endswith(".en"): 41 | warnings.warn( 42 | f"{model_name} is an English-only model, forcing English detection.") 43 | args["language"] = "en" 44 | # if translate task used and language argument is set, then use it 45 | elif language != "auto": 46 | args["language"] = language 47 | 48 | model = whisper.load_model(model_name) 49 | audios = get_audio(args.pop("video")) 50 | subtitles = get_subtitles( 51 | audios, output_srt or srt_only, output_dir, lambda audio_path: model.transcribe(audio_path, **args) 52 | ) 53 | 54 | if srt_only: 55 | return 56 | 57 | for path, srt_path in subtitles.items(): 58 | out_path = os.path.join(output_dir, f"{filename(path)}.mp4") 59 | 60 | print(f"Adding subtitles to {filename(path)}...") 61 | 62 | video = ffmpeg.input(path) 63 | audio = video.audio 64 | 65 | ffmpeg.concat( 66 | video.filter('subtitles', srt_path, force_style="OutlineColour=&H40000000,BorderStyle=3"), audio, v=1, a=1 67 | ).output(out_path).run(quiet=True, overwrite_output=True) 68 | 69 | print(f"Saved subtitled video to {os.path.abspath(out_path)}.") 70 | 71 | 72 | def get_audio(paths): 73 | temp_dir = tempfile.gettempdir() 74 | 75 | audio_paths = {} 76 | 77 | for path in paths: 78 | print(f"Extracting audio from {filename(path)}...") 79 | output_path = os.path.join(temp_dir, f"{filename(path)}.wav") 80 | 81 | ffmpeg.input(path).output( 82 | output_path, 83 | acodec="pcm_s16le", ac=1, ar="16k" 84 | ).run(quiet=True, overwrite_output=True) 85 | 86 | audio_paths[path] = output_path 87 | 88 | return audio_paths 89 | 90 | 91 | def get_subtitles(audio_paths: list, output_srt: bool, output_dir: str, transcribe: callable): 92 | subtitles_path = {} 93 | 94 | for path, audio_path in audio_paths.items(): 95 | srt_path = output_dir if output_srt else tempfile.gettempdir() 96 | srt_path = os.path.join(srt_path, f"{filename(path)}.srt") 97 | 98 | print( 99 | f"Generating subtitles for {filename(path)}... This might take a while." 100 | ) 101 | 102 | warnings.filterwarnings("ignore") 103 | result = transcribe(audio_path) 104 | warnings.filterwarnings("default") 105 | 106 | with open(srt_path, "w", encoding="utf-8") as srt: 107 | write_srt(result["segments"], file=srt) 108 | 109 | subtitles_path[path] = srt_path 110 | 111 | return subtitles_path 112 | 113 | 114 | if __name__ == '__main__': 115 | main() 116 | --------------------------------------------------------------------------------