├── .gitignore
├── Makefile
├── README.md
├── config.py
├── core
    ├── __init__.py
    ├── audio.py
    ├── client.py
    ├── downloader.py
    ├── prompts.py
    ├── schema.py
    ├── utils.py
    └── video.py
├── main.py
├── poetry.lock
├── poetry.toml
├── pyproject.toml
├── src
    └── fonts
    │   └── PlaypenSans.ttf
├── tests
    └── __init__.py
├── vt
    └── __init__.py
└── webui.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | .idea/
161 | .download
162 | output
163 | .DS_Store
164 | local_settings.py
165 | .env


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PYTHON_FILES=.
 2 | lint format: PYTHON_FILES=.
 3 | 
 4 | lint:
 5 | 	poetry run ruff .
 6 | 	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
 7 | 	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff --select I $(PYTHON_FILES)
 8 | 	[ "$(PYTHON_FILES)" = "" ] || poetry run mypy $(PYTHON_FILES)
 9 | 
10 | format:
11 | 	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES)
12 | 	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff --select I --fix $(PYTHON_FILES)
13 | 
14 | ######################
15 | # HELP
16 | ######################
17 | 
18 | help:
19 | 	@echo '===================='
20 | 	@echo 'format                       - run code formatters'
21 | 	@echo 'lint                         - run linters'
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## video-translation
  2 | 
  3 | video-translation is an ongoing project leveraging [OpenAI Whisper](https://github.com/openai/whisper) and the OpenAI API ([TTS](https://platform.openai.com/docs/guides/text-to-speech)) to accomplish the following objectives:
  4 | 
  5 | 1. Video Download: ✅
  6 | 2. Extract Audio from Video: Default format is mp3. ✅
  7 | 3. Generate Subtitles from Audio and Translate: ✅
  8 | 4. Embed Hard Subtitles into Videos: ✅
  9 | 5. Support for Video Production in Different Languages based on Subtitles: In progress
 10 | 
 11 | Hence, vt autonomously adds subtitles in multiple languages to both online and local videos. Future capabilities aim to automatically convert video voices into various languages. Feel free to follow and contribute to this project.
 12 | 
 13 | ## Setup
 14 | 
 15 | Begin by installing and updating using poetry:
 16 | 
 17 | ```bash
 18 | poetry install
 19 | ```
 20 | 
 21 | Additionally, ensure you have the command-line tool `ffmpeg` installed on your system. It's available via most package managers:
 22 | ```bash
 23 | # on Ubuntu or Debian
 24 | sudo apt update && sudo apt install ffmpeg
 25 | 
 26 | # on Arch Linux
 27 | sudo pacman -S ffmpeg
 28 | 
 29 | # on MacOS using Homebrew (https://brew.sh/)
 30 | brew install ffmpeg
 31 | 
 32 | # on Windows using Chocolatey (https://chocolatey.org/)
 33 | choco install ffmpeg
 34 | 
 35 | # on Windows using Scoop (https://scoop.sh/)
 36 | scoop install ffmpeg
 37 | ```
 38 | 
 39 | ## Streamlit-based Web UI
 40 | 
 41 | Our project's Web UI, powered by Streamlit, allows users to add subtitles to videos through an intuitive web interface. This section outlines the primary features of this functionality:
 42 | 
 43 | 1. **Configurable Options**: Users can specify multiple settings such as the Whisper model for audio processing, language preferences, video file types, and subtitle styles to suit various requirements.
 44 | 2. **Automatic Subtitle Generation**: The tool supports generating subtitles directly from the video content, leveraging advanced speech-to-text technology for accuracy.
 45 | 3. **Subtitle Preview Mode**: After adding subtitles, users can engage a preview mode to see how subtitles appear on the actual video, ensuring proper synchronization and styling.
 46 | 4. **Editable Subtitles with Auto Save**: This feature allows users to edit subtitles and see changes in real-time. The 'Auto Save' option can be toggled to apply modifications instantly.
 47 | 5. **Subtitle File Download**: Once subtitles are created or edited, users have the option to download these files in different formats (e.g., SRT, VTT), making them compatible with various media players and platforms.
 48 | 6. **Export Video with Subtitles**: Users can export the final video with subtitles embedded, ready for sharing on different platforms or for personal use.
 49 | 
 50 | Launching the Web UI:
 51 | 
 52 | ```bash
 53 | poetry run streamlit run webui.py
 54 | ```
 55 | 
 56 | And, open your web browser and enter the following address to access the Web UI: `http://localhost:8501/`
 57 | 
 58 | ## Command-line usage
 59 | 
 60 | There are two primary command-line usage types:
 61 | 
 62 | 1. **Generate Subtitle Files**: Supports srt, vtt format based on video content. You can use video tools or website functions to add these subtitles to videos.
 63 | 2. **Directly Add Subtitles to Videos**: Currently supports three methods of subtitle addition.
 64 | 
 65 | ### Generate subtitle files(Supports srt, vtt, json, txt, tsv and all format)
 66 | 
 67 | ```bash
 68 | poetry run vt subtitle --path='https://www.bilibili.com/video/BV1tb4y1L7yA' --language=en --method=whisper --format=vtt --output=/Users/169/Downloads
 69 | ```
 70 | 
 71 | ### Download video online and add subtitle
 72 | 
 73 | ```bash
 74 | poetry run python main.py web --url=https://www.youtube.com/watch?v=CqRrByI-ONE
 75 | ```
 76 | 
 77 | The `web` subcommand also supports more options, as follows:
 78 | 
 79 | ```bash
 80 | poetry run vt web --help
 81 | Usage: vt web [OPTIONS]
 82 | 
 83 | Options:
 84 |   -u, --url TEXT        Video url.  [required]
 85 |   -o, --output TEXT     Generated video dir.
 86 |   -b, --bilingual TEXT  Use bilingual subtitle. Notice: this option is mutually exclusive
 87 |                         with subtitles.
 88 |   -s, --subtitles TEXT  Subtitle languages. split by ",". Notice: this option is mutually
 89 |                         exclusive with bilingual.
 90 |   --help                Show this message and exit.
 91 | ```
 92 | 
 93 | ### Use local video and add subtitle
 94 | 
 95 | ```bash
 96 | poetry run vt local --path='/Users/169/Movies/test.mov
 97 | ```
 98 | 
 99 | The `local` subcommand also supports more options, as follows:
100 | 
101 | ```bash
102 | poetry run vt local --help
103 | Usage: vt local [OPTIONS]
104 | 
105 | Options:
106 |   -p, --path TEXT       Local file path.  [required]
107 |   -o, --output TEXT     Generated video dir.
108 |   -b, --bilingual TEXT  Use bilingual subtitle. you can specify the subtitle
109 |                         language by `--subtitles`. Notice: this option is
110 |                         mutually exclusive with subtitles.
111 |   -s, --subtitles TEXT  Subtitle languages. split by ",". Notice: this option
112 |                         is mutually exclusive with bilingual.
113 |   --help                Show this message and exit.
114 | ```
115 | 
116 | ### Add bilingual subtitles to local video
117 | 
118 | Use the `--bilingual` option to create bilingual subtitles. Separate the two languages with commas, with the first language on top and the latter at the bottom.
119 | 
120 | ```bash
121 | poetry run vt local --path="/Users/169/videos/Langchain C3_L6.mp4" --bilingual="cn,en" --output="/Users/169/Downloads"
122 | ```
123 | 
124 | ## Custom config
125 | 
126 | All configuration items reside in [config.py](https://github.com/169/video-translation/blob/main/config.py).
127 | 
128 | The project also supports limited custom configuration. Add a file named `local_settings.py` within the project and include the settings you wish to override.
129 | 
130 | For instance, if the default debug is set to `False` in `config.py`, you can enable debug mode by adding `debug = True` in local_settings.py:
131 | 
132 | ```bash
133 | cat local_settings.py
134 | debug = True
135 | ```
136 | 
137 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | DEBUG = False
 4 | HERE = Path(__file__).parent.absolute()
 5 | DOWNLOAD_DIR: str | Path = HERE / ".download"
 6 | OUTPUT_DIR: str | Path = HERE / "output"
 7 | WHISPER_MODEL = "large"
 8 | FFMPEG_BIN = "ffmpeg"
 9 | FFMPEG_PREFIX_OPTS = "-hide_banner -loglevel error -y"
10 | FFMPEG_FORMAT_OPTS = "-c:v libx264 -preset fast -qp 0 -c:a aac -vf format=yuv420p"
11 | ZH_FONT_NAME = "Yuanti SC"
12 | EN_FONT_NAME = "Playpen Sans"
13 | BI_FONT_NAME = "Songti SC"
14 | ZH_FONT_SIZE = 15
15 | EN_FONT_SIZE = 20
16 | BI_FONT_SIZE = 12
17 | # https://wamingo.net/rgbbgr/
18 | FONT_COLOR = "&H00FFFFFF"
19 | OUTLINE_COLOR = "&H00504eff"
20 | MISMATCH_LIMIT = 5
21 | TEXT_LIMIT = 20
22 | 
23 | OPENAI_API_KEY = ""
24 | OPENAI_MODEL = "gpt-4o"
25 | INITIAL_PROMPT_MAP = {
26 |     "zh": "以下是普通话的句子，请以简体输出。",
27 | }
28 | 
29 | ZH_FORCE_STYLE = f"Fontname={ZH_FONT_NAME},PrimaryColour={FONT_COLOR},OutlineColour={OUTLINE_COLOR},BorderStyle=3,Fontsize={ZH_FONT_SIZE}"
30 | EN_FORCE_STYLE = f"Fontname={EN_FONT_NAME},PrimaryColour={FONT_COLOR},OutlineColour={OUTLINE_COLOR},BorderStyle=3,Fontsize={EN_FONT_SIZE}"
31 | BI_FORCE_STYLE = f"Fontname={EN_FONT_NAME},PrimaryColour={FONT_COLOR},OutlineColour={OUTLINE_COLOR},BorderStyle=3,Fontsize={BI_FONT_SIZE}"
32 | 
33 | try:
34 |     from local_settings import *  # noqa: F403
35 | except ImportError:
36 |     ...
37 | 


--------------------------------------------------------------------------------
/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/169/intelli-video/52f880e53c0c2208953c212557fecd53988b2255/core/__init__.py


--------------------------------------------------------------------------------
/core/audio.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | import warnings
  4 | from pathlib import Path
  5 | 
  6 | import openai
  7 | import tenacity
  8 | import whisper
  9 | from loguru import logger
 10 | from whisper.utils import get_writer
 11 | 
 12 | from config import (
 13 |     DEBUG,
 14 |     DOWNLOAD_DIR,
 15 |     FFMPEG_BIN,
 16 |     FFMPEG_PREFIX_OPTS,
 17 |     TEXT_LIMIT,
 18 |     WHISPER_MODEL,
 19 |     INITIAL_PROMPT_MAP,
 20 | )
 21 | from core.client import Client
 22 | from core.prompts import render_translate_prompt
 23 | from core.utils import (
 24 |     assign_texts,
 25 |     batched,
 26 |     check_fallback_to_openai,
 27 |     parse_vtt,
 28 |     write_bilingual_vtt,
 29 |     write_vtt,
 30 | )
 31 | 
 32 | 
 33 | def transcribe(
 34 |     audio: str, language: str = "en", model_name: str = WHISPER_MODEL
 35 | ) -> dict:
 36 |     warnings.filterwarnings("ignore")
 37 |     logger.info(f"Transcribe: {audio} <Language: {language}>")
 38 |     model = whisper.load_model(model_name)
 39 |     result = model.transcribe(audio, language=language, verbose=DEBUG,
 40 |                               initial_prompt=INITIAL_PROMPT_MAP.get(language, None))
 41 |     logger.info(f"Transcribed: {audio} <Language: {language}>")
 42 |     warnings.filterwarnings("default")
 43 |     return result
 44 | 
 45 | 
 46 | def generate_subtitle(
 47 |     audio: str,
 48 |     method: str,
 49 |     language: str,
 50 |     format: str,
 51 |     output_directory: str,
 52 |     model_name: str,
 53 | ) -> str:
 54 |     if method == "whisper":
 55 |         filename = f"{output_directory}/{os.path.basename(audio).removesuffix('.mp3')}.{format}"
 56 |         result = transcribe(audio, language=language, model_name=model_name)
 57 |         writer = get_writer(format, output_directory)
 58 |         writer(result, audio)
 59 |     elif method == "openai_api":
 60 |         filename = f"{output_directory}/{os.path.basename(audio).removesuffix('.mp3')}.{language}.{format}"
 61 |         client = Client()
 62 |         text = client.transcribe(audio, response_format=format, language=language)  # type: ignore[arg-type]
 63 | 
 64 |         with open(filename, "w") as f:
 65 |             f.write(text)
 66 |     else:
 67 |         raise ValueError(f"Unknown method: {method}")
 68 | 
 69 |     logger.info(f"Subtitle generated: {filename} <Language: {language}>")
 70 |     return filename
 71 | 
 72 | 
 73 | def generate_vtt_from_api(
 74 |     audio: str, title_language: str, other_language: str
 75 | ) -> list[list[str]]:
 76 |     logger.info(f"Generate {title_language} vtt from OpenAI API: {audio}")
 77 |     client = Client()
 78 |     transcript = client.transcribe(audio)
 79 |     all_segments = [i.model_dump() for i in parse_vtt(transcript)]
 80 |     segments = []
 81 |     for lst in batched(all_segments, TEXT_LIMIT):
 82 |         texts = [seg["text"] for seg in lst]
 83 |         content = "\n".join(texts)
 84 |         text_map = client.translate(render_translate_prompt(content, other_language))
 85 |         text_map = assign_texts(text_map, texts)
 86 |         for seg in lst:
 87 |             text = seg["text"]
 88 |             trans = text_map.get(text.strip(), "")
 89 |             if title_language == "en":
 90 |                 seg["title"] = f"<strong>{text}</strong>"
 91 |                 seg["subtitle"] = trans
 92 |             else:
 93 |                 seg["title"] = f"<strong>{trans}</strong>"
 94 |                 seg["subtitle"] = text
 95 |             segments.append(seg)
 96 |     return [[write_bilingual_vtt(segments, audio), "bilingual"]]
 97 | 
 98 | 
 99 | def generate_vtt(audio: str, bilingual: str, subtitles: str) -> list[list[str]]:
100 |     warnings.filterwarnings("ignore")
101 |     result = transcribe(audio)
102 |     source_language = result["language"]
103 | 
104 |     client = Client()
105 | 
106 |     srts = []
107 | 
108 |     if subtitles:
109 |         subtitles_ = subtitles.split(",")
110 |         if source_language in subtitles_:
111 |             srts.append(
112 |                 [write_vtt(result["segments"], audio, source_language), source_language]
113 |             )
114 |             logger.info(f"Generate {source_language} vtt: {srts[0][0]}")
115 | 
116 |         for dist_language in subtitles_:
117 |             if dist_language != source_language:
118 |                 result = transcribe(audio, language=dist_language)
119 |                 srts.append(
120 |                     [write_vtt(result["segments"], audio, dist_language), dist_language]
121 |                 )
122 |                 logger.info(f"Generate {dist_language} vtt: {srts[-1][0]}")
123 |     if bilingual:
124 |         title_language, subtitle_language = bilingual.split(",")
125 |         if source_language in (title_language, subtitle_language):
126 |             if "en" in (title_language, subtitle_language):
127 |                 result = transcribe(audio, language="en")
128 |                 other_language = (
129 |                     subtitle_language if title_language == "en" else title_language
130 |                 )
131 |                 segments = []
132 |                 for lst in batched(result["segments"], TEXT_LIMIT):
133 |                     lst = list(lst)
134 |                     texts = [seg["text"] for seg in lst]
135 |                     content = "\n".join(texts)
136 |                     text_map = client.translate(
137 |                         render_translate_prompt(content, other_language)
138 |                     )
139 |                     need_use_api = check_fallback_to_openai(text_map, texts)
140 |                     if need_use_api:
141 |                         try:
142 |                             return generate_vtt_from_api(
143 |                                 audio, title_language, other_language
144 |                             )
145 |                         except (openai.APIStatusError, tenacity.RetryError):
146 |                             ...
147 |                     text_map = assign_texts(text_map, texts)
148 |                     for seg in lst:
149 |                         text = seg["text"]
150 |                         trans = text_map.get(text.strip(), "")
151 |                         if title_language == "en":
152 |                             seg["title"] = f"<strong>{text}</strong>"
153 |                             seg["subtitle"] = trans
154 |                         else:
155 |                             seg["title"] = f"<strong>{trans}</strong>"
156 |                             seg["subtitle"] = text
157 |                         segments.append(seg)
158 |                 srts.append(
159 |                     [write_bilingual_vtt(segments, audio), "bilingual"],
160 |                 )
161 |                 logger.info(f"Generate {bilingual} bilingual vtt: {srts[0][0]}")
162 |         else:
163 |             # TODO: support other languages
164 |             ...
165 | 
166 |     warnings.filterwarnings("default")
167 |     return srts
168 | 
169 | 
170 | def generate_audio(video: str | Path, suffix=".mp3") -> str:
171 |     if not isinstance(video, Path):
172 |         video = Path(video)
173 | 
174 |     audio = f"{DOWNLOAD_DIR}/{video.parts[-1].removesuffix(video.suffix)}{suffix}"
175 | 
176 |     subprocess.check_call(
177 |         f"{FFMPEG_BIN} {FFMPEG_PREFIX_OPTS} -i '{video.as_posix()}' -b:a 192K -vn '{audio}'",
178 |         shell=True,
179 |     )
180 |     logger.info(f"Audio generated: {audio}")
181 |     return audio
182 | 


--------------------------------------------------------------------------------
/core/client.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from typing import Literal
 3 | 
 4 | from loguru import logger
 5 | from openai import OpenAI
 6 | from pydantic import ValidationError
 7 | from tenacity import retry, stop_after_attempt
 8 | 
 9 | from config import DEBUG, OPENAI_API_KEY, OPENAI_MODEL
10 | 
11 | 
12 | class Client:
13 |     def __init__(self):
14 |         if not OPENAI_API_KEY:
15 |             raise ValidationError(
16 |                 "`OPENAI_API_KEY` is required. please add it to `local_settings.py`. "
17 |             )
18 |         self.client = OpenAI(api_key=OPENAI_API_KEY)
19 | 
20 |     def translate(self, prompt) -> dict:
21 |         if DEBUG:
22 |             logger.debug(prompt)
23 |         chat_completion = self.client.chat.completions.create(
24 |             messages=[
25 |                 {
26 |                     "role": "user",
27 |                     "content": prompt,
28 |                 }
29 |             ],
30 |             model=OPENAI_MODEL,
31 |             response_format={"type": "json_object"},
32 |         )
33 | 
34 |         return json.loads(chat_completion.choices[0].message.content)  # type: ignore[arg-type]
35 | 
36 |     @retry(stop=stop_after_attempt(3))
37 |     def transcribe(
38 |         self,
39 |         audio: str,
40 |         response_format: Literal["vvt", "srt"] = Literal["vtt"],  # type: ignore[assignment]
41 |         language="en",
42 |     ) -> str:
43 |         transcript = self.client.audio.transcriptions.create(
44 |             model="whisper-1",
45 |             language=language,
46 |             file=open(audio, "rb"),
47 |             response_format=response_format,  # type: ignore[arg-type]
48 |         )
49 |         return transcript  # type: ignore
50 | 


--------------------------------------------------------------------------------
/core/downloader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | 
 4 | import yt_dlp
 5 | from loguru import logger
 6 | 
 7 | from config import DOWNLOAD_DIR
 8 | from core.schema import DownloadMedia
 9 | 
10 | if isinstance(DOWNLOAD_DIR, Path):
11 |     DOWNLOAD_DIR = DOWNLOAD_DIR.as_posix()
12 | 
13 | ydl_opts = {
14 |     "quiet": True,
15 |     "format": "bv+ba/b",
16 |     "paths": {"home": DOWNLOAD_DIR},
17 |     "keepvideo": True,
18 |     "postprocessors": [
19 |         {
20 |             "key": "FFmpegExtractAudio",
21 |             "preferredcodec": "mp3",
22 |             "preferredquality": "0",
23 |         }
24 |     ],
25 | }
26 | 
27 | 
28 | def download(url: str) -> DownloadMedia:
29 |     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
30 |         logger.info(f"Downloading: {url}")
31 |         r = ydl.extract_info(url, download=True)
32 |         requested_downloads = r["requested_downloads"][0]
33 |         for file in requested_downloads.get("__files_to_merge", []):
34 |             try:
35 |                 os.remove(file)
36 |             except FileNotFoundError:
37 |                 ...
38 |         logger.info(f"Downloaded: {requested_downloads['filename']}")
39 |         return DownloadMedia(
40 |             title=r["title"],
41 |             video=requested_downloads["filename"],
42 |             audio=requested_downloads["filepath"],
43 |         )
44 | 


--------------------------------------------------------------------------------
/core/prompts.py:
--------------------------------------------------------------------------------
 1 | from string import Template
 2 | 
 3 | translate_prompt = """Translate to $other_language language strictly line by line. return json data
 4 | 
 5 | Input: Hello, my name is John.
 6 | I'm from the United States.
 7 | Output: {"Hello, my name is John.": "你好，我的名字叫约翰.", "I'm from the United States.": "我来自美国."}
 8 | Input: ${content}
 9 | Output: 
10 | """
11 | 
12 | 
13 | def render_translate_prompt(content: str, other_language: str) -> str:
14 |     return Template(translate_prompt).substitute(
15 |         content=content, other_language=other_language
16 |     )
17 | 


--------------------------------------------------------------------------------
/core/schema.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class DownloadMedia(BaseModel):
 7 |     title: str
 8 |     video: str
 9 |     audio: str
10 | 
11 | 
12 | class Segment(BaseModel):
13 |     text: str
14 |     timestamp: Optional[str]
15 |     start: Optional[float | None] = None
16 |     end: Optional[float | None] = None
17 |     title: Optional[str | None] = None
18 |     subtitle: Optional[str | None] = None
19 | 


--------------------------------------------------------------------------------
/core/utils.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from itertools import islice
  3 | from typing import Generator
  4 | 
  5 | from config import MISMATCH_LIMIT, TEXT_LIMIT
  6 | from core.schema import Segment
  7 | 
  8 | 
  9 | def format_timestamp(
 10 |     seconds: float,
 11 | ):
 12 |     assert seconds >= 0, "non-negative timestamp expected"
 13 |     milliseconds = round(seconds * 1000.0)
 14 | 
 15 |     hours = milliseconds // 3_600_000
 16 |     milliseconds -= hours * 3_600_000
 17 | 
 18 |     minutes = milliseconds // 60_000
 19 |     milliseconds -= minutes * 60_000
 20 | 
 21 |     seconds = milliseconds // 1_000
 22 |     milliseconds -= seconds * 1_000
 23 | 
 24 |     return f"{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
 25 | 
 26 | 
 27 | def write_vtt(
 28 |     transcript: list[dict | Segment], audio: str, language: str | None = None
 29 | ) -> str:
 30 |     if language is not None:
 31 |         vtt_filename = f"{audio.removesuffix('.mp3')}.{language}.vtt"
 32 |     else:
 33 |         vtt_filename = f"{audio.removesuffix('.mp3')}.vtt"
 34 |     return _write_vtt(transcript, vtt_filename)
 35 | 
 36 | 
 37 | def _write_vtt(transcript: list[dict | Segment], vtt_filename: str) -> str:
 38 |     with open(vtt_filename, "w") as f:
 39 |         f.write("WEBVTT\n")
 40 |         for segment in transcript:
 41 |             if isinstance(segment, Segment):
 42 |                 segment = segment.model_dump()
 43 |             start = format_timestamp(segment["start"])
 44 |             end = format_timestamp(segment["end"])
 45 |             f.write(f"\n{start} --> {end}\n{segment['text'].strip()}\n")
 46 | 
 47 |     return vtt_filename
 48 | 
 49 | 
 50 | def write_bilingual_vtt(transcript: list[dict], audio: str) -> str:
 51 |     srt_filename = f"{audio.removesuffix('.mp3')}.bilingual.vtt"
 52 | 
 53 |     with open(srt_filename, "w") as f:
 54 |         f.write("WEBVTT\n")
 55 |         for segment in transcript:
 56 |             if "start" in segment and segment["start"] is not None:
 57 |                 start = format_timestamp(segment["start"])
 58 |                 end = format_timestamp(segment["end"])
 59 |                 timestamp = f"{start} --> {end}"
 60 |             else:
 61 |                 timestamp = segment["timestamp"]
 62 |             title = segment["title"].strip()
 63 |             subtitle = segment["subtitle"].strip()
 64 |             f.write(f"\n{timestamp}\n{title}\n{subtitle}\n")
 65 | 
 66 |     return srt_filename
 67 | 
 68 | 
 69 | def batched(iterable, n) -> Generator:
 70 |     # batched('ABCDEFG', 3) --> ABC DEF G
 71 |     if n < 1:
 72 |         raise ValueError("n must be at least one")
 73 |     it = iter(iterable)
 74 |     while batch := tuple(islice(it, n)):
 75 |         yield batch
 76 | 
 77 | 
 78 | def check_fallback_to_openai(text_map: dict, texts: list[str]) -> bool:
 79 |     texts = [text.strip() for text in texts]
 80 |     not_seen = {}
 81 |     for k, v in text_map.copy().items():
 82 |         k = k.strip()
 83 |         if k not in texts:
 84 |             try:
 85 |                 not_seen[k] = v.strip()
 86 |             except AttributeError:
 87 |                 print(f"AttributeError: {k}\t{v}")
 88 |                 not_seen[k] = v[0].strip()
 89 |     return len(not_seen) > MISMATCH_LIMIT
 90 | 
 91 | 
 92 | def get_seconds(time_str: str) -> float:
 93 |     m, s = time_str.strip().split(":")
 94 |     return float(m) * 60 + float(s)
 95 | 
 96 | 
 97 | def parse_vtt(text: str) -> list[Segment]:
 98 |     texts = []
 99 |     for items in batched(text.splitlines()[2:], 3):
100 |         try:
101 |             timestamp, text, _ = list(items)
102 |         except ValueError:
103 |             timestamp, text = list(items)
104 |         start, end = timestamp.split("-->")
105 |         texts.append(
106 |             Segment(
107 |                 timestamp=timestamp,
108 |                 start=get_seconds(start),
109 |                 end=get_seconds(end),
110 |                 text=text,
111 |             )
112 |         )
113 |     return texts
114 | 
115 | 
116 | def assign_texts(text_map: dict, texts: list[str]) -> dict:
117 |     for _ in range(TEXT_LIMIT):
118 |         for i in range(1, len(texts)):
119 |             new_text = " ".join(texts[:i])
120 |             if val := text_map.get(new_text):
121 |                 if i != 1:
122 |                     sep = round(len(val) / i)
123 |                     for index, text in enumerate(texts[:i]):
124 |                         text_map[text] = val[index * sep : (index + 1) * sep]
125 |                 texts = texts[i:]
126 |     for text in texts:
127 |         combined_text = ""
128 |         match = False
129 |         for sentence in re.split(",|\.|\?", text):
130 |             sentence = sentence.strip()
131 |             for term in (sentence, f"{sentence},", f"{sentence}.", f"{sentence}?"):
132 |                 if val := text_map.get(term):
133 |                     combined_text += f" {val}"
134 |                     match = True
135 |         if match:
136 |             text_map[text] = combined_text
137 |     return text_map
138 | 


--------------------------------------------------------------------------------
/core/video.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | from pathlib import Path
 3 | from typing import Generator
 4 | 
 5 | from loguru import logger
 6 | 
 7 | from config import (
 8 |     BI_FORCE_STYLE,
 9 |     DEBUG,
10 |     EN_FORCE_STYLE,
11 |     FFMPEG_BIN,
12 |     FFMPEG_FORMAT_OPTS,
13 |     FFMPEG_PREFIX_OPTS,
14 |     ZH_FORCE_STYLE,
15 | )
16 | 
17 | STYLE_MAP = {
18 |     "zh": ZH_FORCE_STYLE,
19 |     "en": EN_FORCE_STYLE,
20 |     "bilingual": BI_FORCE_STYLE,
21 | }
22 | 
23 | 
24 | def generate_video(
25 |     video: str | Path, vtts: list[list[str]], output_dir: str
26 | ) -> Generator:
27 |     if not isinstance(video, Path):
28 |         video = Path(video)
29 | 
30 |     suffix = video.suffix
31 |     if suffix == ".webm":
32 |         suffix = ".mp4"
33 | 
34 |     for vtt, language in vtts:
35 |         output = f"{output_dir}/{video.parts[-1].removesuffix(video.suffix)}.{language}{suffix}"
36 |         logger.info(f"Transcoding: {output}")
37 |         style = STYLE_MAP.get(language, STYLE_MAP["en"])
38 |         cmd = f"""{FFMPEG_BIN} {FFMPEG_PREFIX_OPTS} -i '{video.as_posix()}' {FFMPEG_FORMAT_OPTS} -vf "subtitles='{vtt}':fontsdir=./src/fonts/:force_style='{style}'" '{output}'"""
39 |         if DEBUG:
40 |             logger.debug(f"CMD: {cmd}")
41 |         subprocess.check_call(cmd, shell=True)
42 |         logger.info(f"Video generated: {output}")
43 |         yield output
44 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import click
  2 | from loguru import logger
  3 | from whisper import _MODELS
  4 | from whisper.tokenizer import LANGUAGES
  5 | 
  6 | from config import OUTPUT_DIR, WHISPER_MODEL
  7 | from core.audio import generate_audio, generate_subtitle, generate_vtt
  8 | from core.downloader import download
  9 | from core.video import generate_video
 10 | 
 11 | WHISPER_SUPPORT_TYPES = ["vtt", "srt", "json", "txt", "tsv", "all"]
 12 | OPENAI_API_SUPPORT_TYPES = ["json", "text", "vtt", "srt"]
 13 | 
 14 | 
 15 | class Mutex(click.Option):
 16 |     def __init__(self, *args, **kwargs):
 17 |         self.not_required_if: list = kwargs.pop("not_required_if", [])
 18 | 
 19 |         assert self.not_required_if, "'not_required_if' parameter required"
 20 |         kwargs["help"] = (
 21 |             kwargs.get("help", "")
 22 |             + ". Notice: this option is mutually exclusive with "
 23 |             + ", ".join(self.not_required_if)
 24 |             + "."
 25 |         ).strip()
 26 |         super(Mutex, self).__init__(*args, **kwargs)
 27 | 
 28 |     def handle_parse_result(self, ctx, opts, args):
 29 |         current_opt: bool = self.name in opts
 30 |         for mutex_opt in self.not_required_if:
 31 |             if mutex_opt in opts:
 32 |                 if current_opt:
 33 |                     raise click.UsageError(
 34 |                         "Illegal usage: '"
 35 |                         + str(self.name)
 36 |                         + "' is mutually exclusive with "
 37 |                         + str(mutex_opt)
 38 |                         + "."
 39 |                     )
 40 |                 else:
 41 |                     self.prompt = None
 42 |         if current_opt and not opts[self.name]:
 43 |             raise click.UsageError(
 44 |                 f"Illegal usage: The value of '{self.name}' cannot be empty."
 45 |             )
 46 |         return super(Mutex, self).handle_parse_result(ctx, opts, args)
 47 | 
 48 | 
 49 | @click.group()
 50 | def cli():
 51 |     """`vt` is a tool for adding subtitle to videos and generating videos in other languages."""
 52 | 
 53 | 
 54 | @cli.command()
 55 | @click.option("-p", "--path", required=True, help="Local file path or video url.")
 56 | @click.option("-o", "--output", default=OUTPUT_DIR, help="Generated video dir.")
 57 | @click.option(
 58 |     "-l",
 59 |     "--language",
 60 |     default="en",
 61 |     type=click.Choice(LANGUAGES.keys(), case_sensitive=False),
 62 |     help="Subtitle language.",
 63 | )
 64 | @click.option(
 65 |     "-m",
 66 |     "--model",
 67 |     default=WHISPER_MODEL,
 68 |     type=click.Choice(_MODELS.keys(), case_sensitive=False),
 69 |     help="Whisper model name.",
 70 | )
 71 | @click.option(
 72 |     "-m",
 73 |     "--method",
 74 |     default="whisper",
 75 |     type=click.Choice(["whisper", "openai_api"], case_sensitive=False),
 76 |     help="Method for generating subtitle files.",
 77 | )
 78 | @click.option(
 79 |     "-f",
 80 |     "--format",
 81 |     default="vtt",
 82 |     help="Subtitle format.",
 83 | )
 84 | def subtitle(path, output, language, model, method, format):
 85 |     if method == "whisper" and format not in WHISPER_SUPPORT_TYPES:
 86 |         raise click.UsageError(
 87 |             f"Illegal usage: `whisper` method only support: {WHISPER_SUPPORT_TYPES}."
 88 |         )
 89 |     elif method == "openai_api" and format not in OPENAI_API_SUPPORT_TYPES:
 90 |         raise click.UsageError(
 91 |             f"Illegal usage: `openai_api` method only support: {OPENAI_API_SUPPORT_TYPES}."
 92 |         )
 93 | 
 94 |     if path.startswith("http"):
 95 |         media = download(path)
 96 |         audio = media.audio
 97 |     else:
 98 |         audio = generate_audio(path)
 99 |     generate_subtitle(audio, method, language, format, output, model_name=model)
100 | 
101 | 
102 | @cli.command()
103 | @click.option("-p", "--path", required=True, help="Local file path.")
104 | @click.option("-o", "--output", default=OUTPUT_DIR, help="Generated video dir.")
105 | @click.option(
106 |     "-b",
107 |     "--bilingual",
108 |     default="",
109 |     cls=Mutex,
110 |     not_required_if=["subtitles"],
111 |     help="Use bilingual subtitle. you can specify the subtitle language by `--subtitles`",
112 | )
113 | @click.option(
114 |     "-s",
115 |     "--subtitles",
116 |     default="",
117 |     cls=Mutex,
118 |     not_required_if=["bilingual"],
119 |     help='Subtitle languages. split by ","',
120 | )
121 | def local(path, output, bilingual, subtitles):
122 |     if bilingual and "," not in bilingual:
123 |         raise click.UsageError(
124 |             "Illegal usage: `--bilingual` requires 2 language subtitles, you can use `cn,en` or `en,cn`"
125 |         )
126 |     audio = generate_audio(path)
127 |     vtts = generate_vtt(audio, bilingual, subtitles)
128 |     if not vtts:
129 |         logger.warning("No subtitles generated.")
130 |     for video in generate_video(path, vtts, output):
131 |         logger.info(f"Generated: {video}")
132 | 
133 | 
134 | @cli.command()
135 | @click.option("-u", "--url", required=True, help="Video url.")
136 | @click.option("-o", "--output", default=OUTPUT_DIR, help="Generated video dir.")
137 | @click.option(
138 |     "-b",
139 |     "--bilingual",
140 |     default="",
141 |     cls=Mutex,
142 |     not_required_if=["subtitles"],
143 |     help="Use bilingual subtitle",
144 | )
145 | @click.option(
146 |     "-s",
147 |     "--subtitles",
148 |     default="zh,en",
149 |     cls=Mutex,
150 |     not_required_if=["bilingual"],
151 |     help='Subtitle languages. split by ","',
152 | )
153 | def web(url, output, bilingual, subtitles):
154 |     if bilingual and "," not in bilingual:
155 |         raise click.UsageError(
156 |             "Illegal usage: `--bilingual` requires 2 language subtitles, you can use `cn,en` or `en,cn`"
157 |         )
158 |     media = download(url)
159 |     vtts = generate_vtt(media.audio, bilingual, subtitles)
160 |     if not vtts:
161 |         logger.warning("No subtitles generated.")
162 |     for video in generate_video(media.video, vtts, output):
163 |         logger.info(f"Generated: {video}")
164 | 
165 | 
166 | if __name__ == "__main__":
167 |     cli()
168 | 


--------------------------------------------------------------------------------
/poetry.toml:
--------------------------------------------------------------------------------
1 | [virtualenvs]
2 | in-project = true
3 | 
4 | [installer]
5 | modern-installation = false


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "vt"
 3 | version = "0.1.0"
 4 | description = ""
 5 | authors = ["Bob Lin <bob401710@gmail.com>"]
 6 | readme = "README.md"
 7 | 
 8 | [tool.poetry.dependencies]
 9 | python = "^3.11"
10 | yt-dlp = "^2023.11.16"
11 | pydantic = "^2.5.2"
12 | click = {version = "^8.1.7"}
13 | ruff = "^0.1.7"
14 | mypy = "^1.7.1"
15 | openai-whisper = {git = "https://github.com/169/whisper.git", rev = "marker"}
16 | loguru = "^0.7.2"
17 | streamlit = "^1.29.0"
18 | openai = "^1.4.0"
19 | tenacity = "^8.2.3"
20 | watchdog = "^3.0.0"
21 | streamlit-component-video = "^0.3.0"
22 | 
23 | [tool.mypy]
24 | ignore_missing_imports = true
25 | check_untyped_defs = true
26 | 
27 | [build-system]
28 | requires = ["poetry-core"]
29 | build-backend = "poetry.core.masonry.api"
30 | 
31 | [tool.poetry.scripts]
32 | vt = "main:cli"


--------------------------------------------------------------------------------
/src/fonts/PlaypenSans.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/169/intelli-video/52f880e53c0c2208953c212557fecd53988b2255/src/fonts/PlaypenSans.ttf


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/169/intelli-video/52f880e53c0c2208953c212557fecd53988b2255/tests/__init__.py


--------------------------------------------------------------------------------
/vt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/169/intelli-video/52f880e53c0c2208953c212557fecd53988b2255/vt/__init__.py


--------------------------------------------------------------------------------
/webui.py:
--------------------------------------------------------------------------------
  1 | # TODO:
  2 | # 1. auto save
  3 | import mimetypes
  4 | import os.path
  5 | 
  6 | import streamlit as st
  7 | import whisper
  8 | from streamlit_component_video import streamlit_component_video
  9 | from whisper.tokenizer import LANGUAGES
 10 | 
 11 | from config import (
 12 |     DOWNLOAD_DIR,
 13 |     EN_FONT_NAME,
 14 |     EN_FONT_SIZE,
 15 |     FONT_COLOR,
 16 |     OUTLINE_COLOR,
 17 |     OUTPUT_DIR,
 18 |     WHISPER_MODEL,
 19 |     ZH_FONT_NAME,
 20 |     ZH_FONT_SIZE,
 21 | )
 22 | from core.audio import generate_audio, generate_subtitle
 23 | from core.downloader import download
 24 | from core.utils import _write_vtt, parse_vtt
 25 | from core.video import generate_video
 26 | 
 27 | if "subtitle_path" not in st.session_state:
 28 |     st.session_state.update(
 29 |         {
 30 |             "segments": [],
 31 |             "replaced_segments": [],
 32 |             "current_seg_index": 0,
 33 |             "current_time": 0,
 34 |             "vtt_content": "",
 35 |             "subtitle_path": "",
 36 |             "widget_values": {},
 37 |             "video": {
 38 |                 "mimetype": "video/mp4",
 39 |                 "path": "",
 40 |                 "track": "",
 41 |                 "current_time": 0,
 42 |             },
 43 |         }
 44 |     )
 45 | 
 46 | 
 47 | def make_recording_widget(f):
 48 |     def wrapper(label, *args, **kwargs):
 49 |         widget_value = f(*args, **kwargs)
 50 |         st.session_state.widget_values[label] = widget_value
 51 |         return widget_value
 52 | 
 53 |     return wrapper
 54 | 
 55 | 
 56 | with st.sidebar:
 57 |     st.info("🎈Configure")
 58 | 
 59 |     st.write("## File")
 60 |     mimetype = st.selectbox(
 61 |         "Video MIMETYPE",
 62 |         [v for k, v in mimetypes.types_map.items() if "video" in v],
 63 |         index=2,
 64 |     )
 65 | 
 66 |     download_dir: str = st.text_input("Download Dir", DOWNLOAD_DIR) or ""
 67 |     output_dir: str = st.text_input("Output Dir", OUTPUT_DIR) or ""
 68 | 
 69 |     st.write("## Audio")
 70 | 
 71 |     try:
 72 |         index = whisper.available_models().index(WHISPER_MODEL)
 73 |     except ValueError:
 74 |         index = 0
 75 | 
 76 |     model: str = (
 77 |         st.selectbox(
 78 |             "Whisper model",
 79 |             whisper.available_models(),
 80 |             index=index,
 81 |         )
 82 |         or ""
 83 |     )
 84 | 
 85 |     st.write("## Subtitle")
 86 | 
 87 |     language: str = (
 88 |         st.selectbox(
 89 |             "Language",
 90 |             LANGUAGES.keys(),
 91 |             index=0,
 92 |         )
 93 |         or ""
 94 |     )
 95 |     method: str = (
 96 |         st.selectbox(
 97 |             "Method",
 98 |             ["whisper", "openai_api"],
 99 |             index=0,
100 |         )
101 |         or ""
102 |     )
103 |     zh_font_name = st.text_input("ZH Font Name", ZH_FONT_NAME)
104 |     en_font_name = st.text_input("ZH Font Name", EN_FONT_NAME)
105 |     en_font_size = st.text_input("ZH Font Size", ZH_FONT_SIZE)
106 |     zh_font_size = st.text_input("ZH Font Size", EN_FONT_SIZE)
107 |     font_color = st.text_input("Font Color", FONT_COLOR)
108 |     outline_color = st.text_input("ZH Font Name", OUTLINE_COLOR)
109 | 
110 |     st.write("## Video")
111 | 
112 | video_path = st.text_input("Video Path or URL")
113 | subtitle_path = st.text_input("VTT Path or URL", value=st.session_state.subtitle_path)
114 | 
115 | if subtitle_path and not st.session_state.segments:
116 |     with open(subtitle_path) as f:
117 |         content = f.read()
118 |         st.session_state.vtt_content = content
119 |         if not st.session_state.segments:
120 |             st.session_state.segments = parse_vtt(content)
121 |             st.session_state.replaced_segments = parse_vtt(content)
122 | 
123 | 
124 | def get_current_vtt_content() -> str:
125 |     values = st.session_state["widget_values"]
126 |     if not values:
127 |         return ""
128 |     if not values["video_component"]:
129 |         return ""
130 |     current_time = values["video_component"]["current_time"]
131 |     for index, seg in enumerate(st.session_state.segments):
132 |         if seg.start <= current_time <= seg.end:
133 |             st.session_state["current_seg_index"] = index
134 |             return seg.text
135 |     return ""
136 | 
137 | 
138 | def subtitle_callback(path: str) -> None:
139 |     if not path:
140 |         st.error("Video Path or URL is required.")
141 |         return
142 |     with st.status("Generate subtitle..."):
143 |         if path.startswith("http"):
144 |             st.write("Download video...")
145 |             media = download(path)
146 |             audio = media.audio
147 |         else:
148 |             st.write("Generating audio...")
149 |             audio = generate_audio(path)
150 |         st.write("Generating subtitle...")
151 |         st.session_state.subtitle_path = generate_subtitle(
152 |             audio, method, language, "vtt", output_dir, model_name=model
153 |         )
154 | 
155 | 
156 | def save_callback() -> None:
157 |     if not subtitle_path:
158 |         st.error("Subtitle is required.")
159 |     else:
160 |         _write_vtt(st.session_state.replaced_segments, subtitle_path)
161 |         st.session_state.segments = st.session_state.replaced_segments
162 | 
163 | 
164 | def preview_callback() -> None:
165 |     track = subtitle_path or st.session_state.subtitle_path
166 |     if not track:
167 |         st.error("Subtitle is required.")
168 |     elif not video_path:
169 |         st.error("Video Path or URL is required.")
170 |     else:
171 |         st.session_state["video"] = dict(
172 |             path=video_path,
173 |             mimetype="video/mp4",
174 |             track=track,
175 |             current_time="",
176 |         )
177 | 
178 | 
179 | def generate_callback() -> None:
180 |     if not video_path:
181 |         st.error("Video Path or URL is required.")
182 |     elif not subtitle_path:
183 |         st.error("Subtitle is required.")
184 |     else:
185 |         vtts = [[subtitle_path, language]]
186 |         for video in generate_video(video_path, vtts, output_dir):
187 |             st.success(f"Video Generated: {video}")
188 | 
189 | 
190 | col1, col2, col3, col4, col5 = st.columns(5)
191 | 
192 | with col1:
193 |     st.button("Preview", on_click=preview_callback)
194 | 
195 | with col2:
196 |     st.button(
197 |         "Generate VTT",
198 |         on_click=subtitle_callback,
199 |         args=(video_path,),
200 |     )
201 | 
202 | with col3:
203 |     st.button(
204 |         "Save VTT",
205 |         on_click=save_callback,
206 |     )
207 | with col4:
208 |     st.download_button(
209 |         "Download VTT", st.session_state.vtt_content, os.path.basename(subtitle_path)
210 |     )
211 | 
212 | with col5:
213 |     st.button("Generate", on_click=generate_callback)
214 | 
215 | make_recording_widget(streamlit_component_video)(
216 |     label="video_component",
217 |     path=st.session_state["video"]["path"],
218 |     mimetype=st.session_state["video"]["mimetype"],
219 |     track=st.session_state["video"]["track"],
220 |     current_time=st.session_state["current_time"],
221 | )
222 | 
223 | current_vtt = st.text_input("Subtitle", value=get_current_vtt_content())
224 | if st.session_state.segments and current_vtt:
225 |     st.session_state.replaced_segments[
226 |         st.session_state.current_seg_index
227 |     ].text = current_vtt
228 | 


--------------------------------------------------------------------------------