├── docs ├── blog │ ├── index.md │ ├── .meta.yml │ ├── .authors.yml │ └── posts │ │ ├── rec_other_lan.md │ │ └── rec_other_lan.en.md ├── doc_whl.md ├── sponsor.md ├── sponsor.en.md ├── faq.md ├── faq.en.md ├── online_demo.md ├── online_demo.en.md ├── quickstart.md ├── quickstart.en.md ├── tutorial │ ├── beginner.md │ ├── beginner.en.md │ ├── intermediate.md │ ├── intermediate.en.md │ ├── senior.md │ └── senior.en.md ├── index.md ├── index.en.md ├── changelog.md ├── README_zh.md └── changelog.en.md ├── requirements.txt ├── assets ├── logo.png ├── colab-badge.svg └── RapidVideOCRDemo.ipynb ├── tests ├── test_files │ ├── 2.mp4 │ ├── RGBImages │ │ ├── 0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg │ │ ├── 0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg │ │ ├── 0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg │ │ └── 0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg │ └── TXTImages │ │ ├── 0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg │ │ ├── 0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg │ │ ├── 0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg │ │ └── 0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg └── test_main.py ├── rapid_videocr ├── utils │ ├── __init__.py │ ├── typings.py │ ├── logger.py │ ├── crop_by_project.py │ └── utils.py ├── __init__.py ├── vsf_cli.py ├── export.py ├── main.py ├── vsf_ocr_cli.py └── ocr_processor.py ├── overrides ├── main.html └── partials │ └── comments.html ├── .pre-commit-config.yaml ├── .github ├── ISSUE_TEMPLATE │ ├── bug.md │ └── config.yml ├── workflows │ ├── SyncToGitee.yml │ ├── AutoPushToPypi.yml │ └── docs_build_develop.yml └── FUNDING.yml ├── demo.py ├── setup.py ├── .gitignore ├── mkdocs.yml ├── cliff.toml ├── README.md └── LICENSE /docs/blog/index.md: -------------------------------------------------------------------------------- 1 | # Blog 2 | -------------------------------------------------------------------------------- /docs/blog/.meta.yml: -------------------------------------------------------------------------------- 1 | comments: true 2 | hide: 3 | - feedback -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tqdm 2 | rapidocr>=3.0.0,<4.0.0 3 | onnxruntime 4 | colorlog -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/HEAD/assets/logo.png -------------------------------------------------------------------------------- /docs/doc_whl.md: -------------------------------------------------------------------------------- 1 | ### See [Documentation](https://swhl.github.io/RapidVideOCR/docs/) 2 | -------------------------------------------------------------------------------- /tests/test_files/2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/HEAD/tests/test_files/2.mp4 -------------------------------------------------------------------------------- /docs/sponsor.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | hide: 4 | - navigation 5 | - toc 6 | --- 7 | 8 | 请点击 [link](https://github.com/SWHL#-sponsor). 9 | -------------------------------------------------------------------------------- /rapid_videocr/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .logger import Logger 5 | -------------------------------------------------------------------------------- /docs/sponsor.en.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | hide: 4 | - navigation 5 | - toc 6 | --- 7 | 8 | Please click [link](https://github.com/SWHL#-sponsor). 9 | -------------------------------------------------------------------------------- /docs/blog/.authors.yml: -------------------------------------------------------------------------------- 1 | authors: 2 | SWHL: 3 | name: SWHL 4 | description: Creator 5 | avatar: https://avatars.githubusercontent.com/u/28639377?v=4 6 | url: https://github.com/SWHL 7 | -------------------------------------------------------------------------------- /tests/test_files/RGBImages/0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/HEAD/tests/test_files/RGBImages/0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg -------------------------------------------------------------------------------- /tests/test_files/RGBImages/0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/HEAD/tests/test_files/RGBImages/0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg -------------------------------------------------------------------------------- /tests/test_files/RGBImages/0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/HEAD/tests/test_files/RGBImages/0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg -------------------------------------------------------------------------------- /tests/test_files/RGBImages/0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/HEAD/tests/test_files/RGBImages/0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg -------------------------------------------------------------------------------- /tests/test_files/TXTImages/0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/HEAD/tests/test_files/TXTImages/0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg -------------------------------------------------------------------------------- /tests/test_files/TXTImages/0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/HEAD/tests/test_files/TXTImages/0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg -------------------------------------------------------------------------------- /tests/test_files/TXTImages/0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/HEAD/tests/test_files/TXTImages/0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg -------------------------------------------------------------------------------- /tests/test_files/TXTImages/0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/HEAD/tests/test_files/TXTImages/0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg -------------------------------------------------------------------------------- /overrides/main.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block outdated %} 4 | You're not viewing the latest version. 5 | 6 | Click here to go to latest. 7 | 8 | {% endblock %} 9 | -------------------------------------------------------------------------------- /rapid_videocr/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .main import RapidVideOCR, RapidVideOCRExeception, RapidVideOCRInput 5 | from .vsf_ocr_cli import RapidVideoSubFinderOCR 6 | -------------------------------------------------------------------------------- /docs/faq.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | hide: 4 | - toc 5 | --- 6 | 7 | #### Q: 装完环境之后,运行`python main.py`之后,报错**OSError: [WinError 126] 找不到指定的模組** 8 | 9 | **A**: 原因是Shapely库没有正确安装,如果是在Windows,可以在[Shapely whl](https://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely)下载对应的whl包,离线安装即可;另外一种解决办法是用conda安装也可。(@[hongyuntw](https://github.com/hongyuntw)) 10 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/myint/autoflake 3 | rev: v2.1.1 4 | hooks: 5 | - id: autoflake 6 | args: 7 | [ 8 | "--recursive", 9 | "--in-place", 10 | "--remove-all-unused-imports", 11 | "--remove-unused-variable", 12 | "--ignore-init-module-imports", 13 | ] 14 | - repo: https://github.com/psf/black 15 | rev: 23.1.0 16 | hooks: 17 | - id: black 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🐞 Bug 3 | about: Bug 4 | title: 'Bug' 5 | labels: 'Bug' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 请提供下述完整信息以便快速定位问题 11 | (Please provide the following information to quickly locate the problem) 12 | - **系统环境/System Environment**: 13 | - **使用的是哪门语言的程序/Which programing language**: 14 | - **所使用语言相关版本信息/Version**: 15 | - **OnnxRuntime版本/OnnxRuntime Version**: 16 | - **使用当前库的版本/Use version**: 17 | - **可复现问题的demo和文件/Demo of reproducible problems**: 18 | - **完整报错/Complete Error Message**: 19 | - **可能的解决方案/Possible solutions**: -------------------------------------------------------------------------------- /docs/faq.en.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | hide: 4 | - toc 5 | --- 6 | 7 | 8 | #### Q: After setting up the environment and running `python main.py`, an error message appears: **OSError: [WinError 126] The specified module could not be found** 9 | 10 | **A**: The reason is that the Shapely library is not installed correctly. If you are using Windows, you can download the corresponding whl package from [Shapely whl](https://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely) and use the offline installer. Another option is to install it using conda. (@[hongyuntw](https://github.com/hongyuntw)) 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: ❓ Questions 4 | url: https://github.com/SWHL/RapidVideOCR/discussions/categories/q-a 5 | about: Please use the community forum for help and questions regarding RapidVideOCR. 6 | - name: 💡 Feature requests and ideas 7 | url: https://github.com/SWHL/RapidVideOCR/discussions/categories/ideas 8 | about: Please vote for and post new feature ideas in the community forum. 9 | - name: 📖 Documentation 10 | url: https://swhl.github.io/RapidVideOCR/docs 11 | about: A great place to find instructions and answers about RapidVideOCR. 12 | -------------------------------------------------------------------------------- /docs/online_demo.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | hide: 4 | - navigation 5 | - toc 6 | --- 7 | 8 | 9 | ## 简介 10 | 11 | Hugging Face上的Demo和魔搭Demo是同一套代码,都是基于[streamlit](https://streamlit.io/)搭建而来的。 12 | 13 | 两者区别在于,魔搭是国内平台,不需要科学上网即可访问使用;Hugging Face则需要科学上网。 14 | 搭建demo的目的在于: 快速查看提取字幕效果 15 | 16 | ## Demo源码 17 | 18 | Hugging Face Demo: 19 | 20 | 魔搭社区: 21 | 22 | ## 在线Demo 23 | 24 | Hugging Face Demo: 25 | 26 | 魔搭社区: 27 | -------------------------------------------------------------------------------- /.github/workflows/SyncToGitee.yml: -------------------------------------------------------------------------------- 1 | name: syncToGitee 2 | on: 3 | push: 4 | branches: 5 | - main 6 | 7 | jobs: 8 | repo-sync: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout source codes 12 | uses: actions/checkout@v2 13 | 14 | - name: Mirror the Github organization repos to Gitee. 15 | uses: Yikun/hub-mirror-action@master 16 | with: 17 | src: 'github/SWHL' 18 | dst: 'gitee/SWHL' 19 | dst_key: ${{ secrets.GITEE_PRIVATE_KEY }} 20 | dst_token: ${{ secrets.GITEE_TOKEN }} 21 | force_update: true 22 | # only sync this repo 23 | static_list: "RapidVideOCR" 24 | debug: true 25 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: https://raw.githubusercontent.com/RapidAI/.github/6db6b6b9273f3151094a462a61fbc8e88564562c/assets/Sponsor.png 14 | -------------------------------------------------------------------------------- /docs/online_demo.en.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | hide: 4 | - navigation 5 | - toc 6 | --- 7 | 8 | 9 | ## Introduction 10 | 11 | The demo on Hugging Face and MoDa are based on the same code, and both are built using [streamlit](https://streamlit.io/). 12 | 13 | The difference between the two is that MoDa is a Chinese domestic platform and does not require a VPN; Hugging Face requires a VPN (note: this is not applicable outside of mainland China) 14 | 15 | The purpose of the demos are to:Quickly preview the results of extracted subtitles 16 | 17 | ## Demo Source Code 18 | 19 | Hugging Face Demo: 20 | 21 | ModelScope Spaces: 22 | 23 | ## Online Demo 24 | 25 | Hugging Face Demo: 26 | 27 | ModelScope Spaces: 28 | -------------------------------------------------------------------------------- /docs/quickstart.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | hide: 4 | - navigation 5 | - toc 6 | --- 7 | 8 | !!! note 9 | 10 | 该库的输入必须是来自VideoSubFinder软件输出的RGBImages或者TXTImages目录的路径。VideoSubFinder教程:[link](https://blog.csdn.net/shiwanghualuo/article/details/129174857?spm=1001.2014.3001.5501) 11 | 12 | ### 1. 安装 13 | 14 | ```bash linenums="1" 15 | pip install rapid_videocr 16 | ``` 17 | 18 | ### 2. 使用 19 | 20 | === "命令行使用" 21 | 22 | ```bash linenums="1" 23 | rapid_videocr -i test_files/RGBImages 24 | ``` 25 | 26 | === "Python使用" 27 | 28 | ```python linenums="1" 29 | from rapid_videocr import RapidVideOCR, RapidVideOCRInput 30 | 31 | input_args = RapidVideOCRInput(is_batch_rec=False) 32 | extractor = RapidVideOCR(input_args) 33 | 34 | rgb_dir = "tests/test_files/RGBImages" 35 | save_dir = "outputs" 36 | save_name = "a" 37 | 38 | # outputs/a.srt outputs/a.ass outputs/a.txt 39 | extractor(rgb_dir, save_dir, save_name=save_name) 40 | ``` 41 | -------------------------------------------------------------------------------- /rapid_videocr/utils/typings.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from dataclasses import dataclass 5 | from enum import Enum 6 | from typing import Any, Dict, Optional 7 | 8 | IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"} 9 | 10 | 11 | class VideoFormat(Enum): 12 | MP4 = ".mp4" 13 | AVI = ".avi" 14 | MOV = ".mov" 15 | MKV = ".mkv" 16 | 17 | 18 | class OutputFormat(Enum): 19 | TXT = "txt" 20 | SRT = "srt" 21 | ASS = "ass" 22 | ALL = "all" 23 | 24 | 25 | @dataclass 26 | class RapidVideOCRInput: 27 | is_batch_rec: bool = False 28 | batch_size: int = 10 29 | out_format: str = OutputFormat.ALL.value 30 | ocr_params: Optional[Dict[str, Any]] = None 31 | log_level: str = "info" # debug / info / warning / error / critical 32 | 33 | 34 | LOG_LEVEL_MAP = { 35 | 50: "CRITICAL", 36 | 40: "ERROR", 37 | 30: "WARNING", 38 | 20: "INFO", 39 | 10: "DEBUG", 40 | 0: "NOTSET", 41 | } 42 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | 5 | # 提取 + 识别 6 | from rapid_videocr import RapidVideOCRInput, RapidVideoSubFinderOCR, VideoSubFinderInput 7 | 8 | vsf_exe_path = ( 9 | r"G:\ProgramFiles\VideoSubFinder_6.10_x64\Release_x64\VideoSubFinderWXW.exe" 10 | ) 11 | vsf_input_params = VideoSubFinderInput(vsf_exe_path=vsf_exe_path) 12 | input_args = RapidVideOCRInput(is_batch_rec=False) 13 | vsf_ocr = RapidVideoSubFinderOCR(vsf_input_params, input_args) 14 | 15 | # video_path可以是目录或者具体video路径 16 | video_path = "test_files/tiny/2.mp4" 17 | save_dir = "outputs" 18 | vsf_ocr(video_path, save_dir) 19 | 20 | 21 | # 只识别 22 | from rapid_videocr import RapidVideOCR, RapidVideOCRInput 23 | 24 | input_args = RapidVideOCRInput(is_batch_rec=False, log_level="critical") 25 | extractor = RapidVideOCR(input_args) 26 | 27 | rgb_dir = "tests/test_files/RGBImages" 28 | save_dir = "outputs" 29 | save_name = "a" 30 | 31 | # outputs/a.srt outputs/a.ass outputs/a.txt 32 | extractor(rgb_dir, save_dir, save_name=save_name) 33 | -------------------------------------------------------------------------------- /docs/quickstart.en.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | hide: 4 | - navigation 5 | - toc 6 | --- 7 | 8 | !!! note 9 | 10 | The input for this library must be a path to an RGBImages or TXTImages directory outputted by VideoSubFinder. VideoSubFinder tutorial (note: this blog post is written in Chinese):[link](https://blog.csdn.net/shiwanghualuo/article/details/129174857?spm=1001.2014.3001.5501) 11 | 12 | ### 1. Installation 13 | 14 | ```bash linenums="1" 15 | pip install rapid_videocr 16 | ``` 17 | 18 | ### 2. Usage 19 | 20 | === "CLI" 21 | 22 | ```bash linenums="1" 23 | rapid_videocr -i test_files/RGBImages 24 | ``` 25 | 26 | === "Python" 27 | 28 | ```python linenums="1" 29 | from rapid_videocr import RapidVideOCR, RapidVideOCRInput 30 | 31 | input_args = RapidVideOCRInput(is_batch_rec=False) 32 | extractor = RapidVideOCR(input_args) 33 | 34 | rgb_dir = "tests/test_files/RGBImages" 35 | save_dir = "outputs" 36 | save_name = "a" 37 | 38 | # outputs/a.srt outputs/a.ass outputs/a.txt 39 | extractor(rgb_dir, save_dir, save_name=save_name) 40 | ``` 41 | -------------------------------------------------------------------------------- /.github/workflows/AutoPushToPypi.yml: -------------------------------------------------------------------------------- 1 | name: Push rapid_videocr to pypi 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | 8 | jobs: 9 | UnitTesting: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Pull latest code 13 | uses: actions/checkout@v4 14 | 15 | - name: Set up Python 3.10 16 | uses: actions/setup-python@v4 17 | with: 18 | python-version: '3.10' 19 | architecture: 'x64' 20 | 21 | - name: Unit testings 22 | run: | 23 | pip install -r requirements.txt --break-system-packages 24 | pip install pytest six --break-system-packages 25 | pytest -s tests/test_*.py 26 | 27 | GenerateWHL_PushPyPi: 28 | needs: UnitTesting 29 | runs-on: ubuntu-latest 30 | 31 | steps: 32 | - uses: actions/checkout@v4 33 | 34 | - name: Run setup.py 35 | run: | 36 | pip install wheel get_pypi_latest_version --break-system-packages 37 | 38 | python -m pip install --upgrade pip --break-system-packages 39 | python setup.py bdist_wheel ${{ github.ref_name }} 40 | 41 | - name: Publish distribution 📦 to PyPI 42 | uses: pypa/gh-action-pypi-publish@v1.5.0 43 | with: 44 | password: ${{ secrets.PYPI_API_TOKEN }} 45 | packages_dir: dist/ 46 | -------------------------------------------------------------------------------- /rapid_videocr/utils/logger.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import logging 5 | 6 | import colorlog 7 | 8 | 9 | class Logger: 10 | def __init__(self, log_level=logging.DEBUG, logger_name=None): 11 | self.logger = logging.getLogger(logger_name) 12 | self.logger.setLevel(log_level) 13 | self.logger.propagate = False 14 | 15 | formatter = colorlog.ColoredFormatter( 16 | "%(log_color)s[%(levelname)s] %(asctime)s %(filename)s:%(lineno)d: %(message)s", 17 | log_colors={ 18 | "DEBUG": "cyan", 19 | "INFO": "green", 20 | "WARNING": "yellow", 21 | "ERROR": "red", 22 | "CRITICAL": "red,bg_white", 23 | }, 24 | ) 25 | 26 | if not self.logger.handlers: 27 | console_handler = logging.StreamHandler() 28 | console_handler.setFormatter(formatter) 29 | 30 | for handler in self.logger.handlers: 31 | self.logger.removeHandler(handler) 32 | 33 | console_handler.setLevel(log_level) 34 | self.logger.addHandler(console_handler) 35 | 36 | def get_log(self): 37 | return self.logger 38 | 39 | 40 | logger = Logger(log_level=logging.INFO).get_log() 41 | -------------------------------------------------------------------------------- /.github/workflows/docs_build_develop.yml: -------------------------------------------------------------------------------- 1 | name: Build/Publish Develop Docs 2 | on: 3 | push: 4 | paths: 5 | - 'docs/**' 6 | - 'mkdocs.yml' 7 | - 'overrides/**' 8 | - '.github/workflows/docs_build_develop.yml' 9 | 10 | permissions: 11 | contents: write 12 | jobs: 13 | deploy: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v4 17 | with: 18 | fetch-depth: 0 19 | - name: Configure Git Credentials 20 | run: | 21 | git config user.name github-actions[bot] 22 | git config user.email 41898282+github-actions[bot]@users.noreply.github.com 23 | - uses: actions/setup-python@v5 24 | with: 25 | python-version: 3.x 26 | - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 27 | - uses: actions/cache@v4 28 | with: 29 | key: mkdocs-material-${{ env.cache_id }} 30 | path: .cache 31 | restore-keys: | 32 | mkdocs-material- 33 | - run: pip install mike mkdocs-material jieba mkdocs-git-revision-date-localized-plugin mkdocs-git-committers-plugin-2 mkdocs-static-i18n 34 | - run: | 35 | git fetch origin gh-pages --depth=1 36 | mike set-default main 37 | mike deploy --push --update-aliases main latest 38 | mike alias main --push 39 | -------------------------------------------------------------------------------- /rapid_videocr/utils/crop_by_project.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import cv2 5 | import numpy as np 6 | 7 | 8 | class CropByProject: 9 | """投影法裁剪""" 10 | 11 | def __init__(self, threshold=250): 12 | self.threshold = threshold 13 | 14 | def __call__(self, origin_img): 15 | image = cv2.cvtColor(origin_img, cv2.COLOR_BGR2GRAY) 16 | 17 | # 将图片二值化 18 | retval, img = cv2.threshold(image, self.threshold, 255, cv2.THRESH_BINARY_INV) 19 | 20 | # 使文字增长成块 21 | closed = cv2.dilate(img, None, iterations=1) 22 | 23 | # 水平投影 24 | x0, x1 = self.get_project_loc(closed, direction="width") 25 | 26 | # 竖直投影 27 | y0, y1 = self.get_project_loc(closed, direction="height") 28 | 29 | return origin_img[y0:y1, x0:x1] 30 | 31 | @staticmethod 32 | def get_project_loc(img, direction): 33 | """获得裁剪的起始和终点索引位置 34 | Args: 35 | img (ndarray): 二值化后得到的图像 36 | direction (str): 'width/height' 37 | Raises: 38 | ValueError: 不支持的求和方向 39 | Returns: 40 | tuple: 起始索引位置 41 | """ 42 | if direction == "width": 43 | axis = 0 44 | elif direction == "height": 45 | axis = 1 46 | else: 47 | raise ValueError(f"direction {direction} is not supported!") 48 | 49 | loc_sum = np.sum(img == 255, axis=axis) 50 | loc_range = np.argwhere(loc_sum > 0) 51 | i0, i1 = loc_range[0][0], loc_range[-1][0] 52 | return i0, i1 53 | -------------------------------------------------------------------------------- /docs/tutorial/beginner.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | hide: 4 | - toc 5 | --- 6 | 7 | ### 引言 8 | 9 | 考虑到提取视频字幕的小伙伴大多不是程序员行当,为了降低使用门槛,特此推出界面版的RapidVideOCR Desktop。 10 | 11 | RapidVideOCR Desktop需要搭配VideoSubFinder使用。它们两个关系如下图所示: 12 | 13 | ```mermaid 14 | flowchart LR 15 | A(VideoSubFinder) --提取字幕关键帧--> B(RapidVideOCR) --OCR--> C(SRT) 16 | ``` 17 | 18 | ### [VideoSubFinder使用教程](https://blog.csdn.net/shiwanghualuo/article/details/129174857) 19 | 20 | ### RapidVideOCR Desktop使用教程 21 | 22 | #### Step 1: 下载对应平台的 **RapidVideOCR Desktop** 压缩包 23 | 24 | [Github下载最新版本](https://github.com/SWHL/RapidVideOCRDesktop/releases) 25 | 26 | QQ群共享文件下载,QQ群号:706807542, 或者扫码加入: 27 |
28 | 29 |
30 | 31 | #### Step 2: 解压zip包,双击`RapidVideOCR.exe`(以Windows平台为例) 32 | 33 | #### Step 3: 界面如下图所示 34 | 35 |
36 | 37 |
38 | 39 | #### Step 4: 界面各个部分介绍 40 | 41 | RapidVideOCR: 42 | 43 | - **图像目录**:指的是 **VideoSubFinder** 软件生成的 **RGBImages** 或者 **TXTImages** 目录,必须是这两个目录之一 44 | - **保存路径**:识别转换后的结果,包括srt文件、ass文件、和txt文件 45 | - **叠图识别**:勾选后,识别速度会变快。准确率可能差些 46 | 47 | VideoSubFinder + RapidVideOCR: 48 | 49 | - **VSF exe路径** :本地安装的VideoSubFinder全路径,例如:`G:\ProgramFiles\VideoSubFinder_6.10_x64\Release_x64\VideoSubFinderWXW.exe` 50 | - **视频目录** :想要提取硬字幕的视频所在目录 51 | - **保存路径** :保存提取结果的目录 52 | 53 | #### Step 5:上述都填好之后,点击OK按钮,即可开始识别 54 | -------------------------------------------------------------------------------- /docs/blog/posts/rec_other_lan.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 如何识别其他语言字幕? 3 | date: 2025-09-10 4 | authors: [SWHL] 5 | slug: how-to-rec-other-lan 6 | categories: 7 | - General 8 | comments: true 9 | --- 10 | 11 | 12 | ### 引言 13 | 14 | - 当前,RapidVideOCR是直接使用的`rapidocr_onnxruntime`的默认配置,因此仅能识别中英文的字幕文字。 15 | - 由于`rapidocr_onnxruntime`具备传入其他多语言识别模型的接口,因此RapidVieOCR具备了可扩展性,本篇文章特此来说明如何操作使用。 16 | - 本篇文章以[discussions #40](https://github.com/SWHL/RapidVideOCR/discussions/40)中提出的识别法语字幕为例说明,其他语种同理可得。 17 | 18 | ### 1. 正确安装使用RapidVideOCR 19 | 20 | 请参考教程[link](https://swhl.github.io/RapidVideOCR/docs/tutorial/senior/) 21 | 22 | ### 2. 借助PaddleOCRConvert工具来转换法语识别模型为ONNX 23 | 24 | 请参考教程[link](https://github.com/RapidAI/PaddleOCRModelConvert) 25 | 26 | 其中, 27 | 28 | 模型路径:`https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar` 29 | 30 | 字典路径:`https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/dygraph/ppocr/utils/dict/french_dict.txt` 31 | 32 | 其他语言的模型下载地址参见:[paddleocr whl](https://files.pythonhosted.org/packages/8f/d0/1a2f9430f61781beb16556182baa938e8f93c8b46c27ad5865a5655fae05/paddleocr-2.7.0.3-py3-none-any.whl)源码中`paddleocr.py`文件中 33 | 34 | 字典链接参见:[link](https://github.com/PaddlePaddle/PaddleOCR/tree/799c144ab3b0b5d19a37c7e85c47e88ff27c643d/ppocr/utils/dict) 35 | 36 | 最终可以得到一个法语识别模型:`french_mobile_v2.0_rec_infer.onnx` 37 | 38 | ### 3. 识别法语字幕 39 | 40 | !!! note 41 | 42 | `rapid_videocr>=v3.0.0` 43 | 44 | ```python linenums="1" 45 | from rapid_videocr import RapidVideOCR, RapidVideOCRInput 46 | 47 | input_args = RapidVideOCRInput( 48 | is_batch_rec=False, 49 | ocr_params={"Rec.model_path": "french_mobile_v2.0_rec_infer.onnx"}, 50 | ) 51 | extractor = RapidVideOCR(input_args) 52 | 53 | rgb_dir = "test_files/RGBImagesTiny" 54 | save_dir = "outputs" 55 | save_name = "a" 56 | 57 | # outputs/a.srt outputs/a.ass outputs/a.txt 58 | extractor(rgb_dir, save_dir, save_name=save_name) 59 | ``` 60 | -------------------------------------------------------------------------------- /overrides/partials/comments.html: -------------------------------------------------------------------------------- 1 | {% if page.meta.comments %} 2 |

{{ lang.t("meta.comments") }}

3 | 4 | 5 | 10 | 11 | 12 | 13 | 47 | {% endif %} -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | hide: 4 | - navigation 5 | - toc 6 | --- 7 | 8 |
9 | 10 |
 
11 | 12 | 13 | 14 | 15 | PyPI 16 | 17 | 18 | SemVer2.0 19 | 20 | 21 |
22 | 23 | ### 简介 24 | 25 | `rapid_videocr`是一个自动视频硬字幕提取,生成对应`srt | ass | txt`文件的工具。 26 | 27 | 支持字幕语言:[支持语种列表](https://rapidai.github.io/RapidOCRDocs/main/model_list/#_4),因为该工具依赖`rapidocr`库,因此`rapidocr`支持识别的语言,`rapid_videocr`均是支持的。 28 | 29 | 优势如下: 30 | 31 | - **提取更快**:与[VideoSubFinder](https://sourceforge.net/projects/videosubfinder/)软件结合使用,提取关键字幕帧更快。 32 | - **识别更准**:采用[RapidOCR](https://github.com/RapidAI/RapidOCR)作为识别库。 33 | - **使用更方便**:pip直接安装即可使用。 34 | 35 | 如果有帮助到您的话,请给个小星星⭐。 36 | 37 | ### 整体框架 38 | 39 | ```mermaid 40 | flowchart LR 41 | A[/Video/] --Extract subtitle key frame--> B(VideoSubFinder) --OCR-->C(RapidVideOCR) 42 | C --Convert--> D[/"SRT | ASS | TXT"/] 43 | ``` 44 | 45 | ### [在线Demo](https://huggingface.co/spaces/SWHL/RapidVideOCR) 46 | 47 |
48 | Demo 49 |
50 | -------------------------------------------------------------------------------- /assets/colab-badge.svg: -------------------------------------------------------------------------------- 1 | Open in ColabOpen in Colab 2 | -------------------------------------------------------------------------------- /docs/tutorial/beginner.en.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | hide: 4 | - toc 5 | --- 6 | 7 | ### Introduction 8 | 9 | Considering that most people who extract video subtitles are not programmers, in order to lower the threshold of use, we hereby launch the GUI version of RapidVideOCR Desktop. 10 | 11 | RapidVideOCR Desktop needs to be used with VideoSubFinder. The relationship between the two is shown in the figure below: 12 | 13 | ```mermaid 14 | flowchart LR 15 | A[VideoSubFinder] -- Extract subtitle keyframes --> B[RapidVideOCR] -- OCR --> C(SRT) 16 | ``` 17 | 18 | ### [VideoSubFinder Tutorial (in Chinese)](https://blog.csdn.net/shiwanghualuo/article/details/129174857) 19 | 20 | ### RapidVideOCR Desktop Tutorial 21 | 22 | #### Step 1: Download the **RapidVideOCR Desktop** release archive for your corresponding platform 23 | 24 | - [Download the latest version from Github](https://github.com/SWHL/RapidVideOCRDesktop/releases) 25 | - Download from QQ Group shared file, QQ Group number: 706807542, or scan the QR code to join: 26 | 27 |
28 | 29 |
30 | 31 | #### Step 2: Uncompress the zip archive, and double-click `RapidVideOCR.exe` (using Windows for example) 32 | 33 | #### Step 3: The GUI is shown below 34 | 35 |
36 | 37 |
38 | 39 | #### Step 4: Introduction to each part of the GUI 40 | 41 | RapidVideOCR: 42 | 43 | - **Image Directory (图像目录)**: Points to the **RGBImages** or **TXTImages** directory generated by **VideoSubFinder**,must be one of the two directories 44 | - **Output Directory (保存路径)**: Where to save the converted results,including srt files, ass files, and txt files 45 | - **Overlay Recognition (叠图识别)**: After checking, the recognition speed will be 46 | faster. The accuracy may be lower 47 | 48 | VideoSubFinder + RapidVideOCR: 49 | 50 | - **VSF exe path**: The full path to the locally installed VideoSubFinder executable, for example: `G:\ProgramFiles\VideoSubFinder_6.10_x64\Release_x64\VideoSubFinderWXW.exe` 51 | - **Video path**: The path to where the video to extract hard subtitles from is located 52 | - **Output directory**:The directory where the extraction results are saved 53 | 54 | #### Step 5: After filling in the above, click the OK button to start recognition 55 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import sys 5 | from pathlib import Path 6 | from typing import List 7 | 8 | import setuptools 9 | from get_pypi_latest_version import GetPyPiLatestVersion 10 | 11 | 12 | def read_txt(txt_path: str) -> List: 13 | if not isinstance(txt_path, str): 14 | txt_path = str(txt_path) 15 | 16 | with open(txt_path, "r", encoding="utf-8") as f: 17 | data = list(map(lambda x: x.rstrip("\n"), f)) 18 | return data 19 | 20 | 21 | def get_readme() -> str: 22 | root_dir = Path(__file__).resolve().parent 23 | readme_path = str(root_dir / "docs" / "doc_whl.md") 24 | with open(readme_path, "r", encoding="utf-8") as f: 25 | readme = f.read() 26 | return readme 27 | 28 | 29 | MODULE_NAME = "rapid_videocr" 30 | 31 | obtainer = GetPyPiLatestVersion() 32 | latest_version = obtainer(MODULE_NAME) 33 | VERSION_NUM = obtainer.version_add_one(latest_version) 34 | 35 | # 优先提取commit message中的语义化版本号,如无,则自动加1 36 | if len(sys.argv) > 2: 37 | match_str = " ".join(sys.argv[2:]) 38 | matched_versions = obtainer.extract_version(match_str) 39 | if matched_versions: 40 | VERSION_NUM = matched_versions 41 | sys.argv = sys.argv[:2] 42 | 43 | setuptools.setup( 44 | name=MODULE_NAME, 45 | version=VERSION_NUM, 46 | platforms="Any", 47 | description="Tool for extracting hard subtitles from videos.", 48 | long_description=get_readme(), 49 | long_description_content_type="text/markdown", 50 | author="SWHL", 51 | author_email="liekkaskono@163.com", 52 | url="https://github.com/SWHL/RapidVideOCR.git", 53 | license="Apache-2.0", 54 | include_package_data=True, 55 | install_requires=read_txt("requirements.txt"), 56 | packages=setuptools.find_packages(), 57 | keywords=["rapidocr,videocr,subtitle"], 58 | classifiers=[ 59 | "Programming Language :: Python :: 3.6", 60 | "Programming Language :: Python :: 3.7", 61 | "Programming Language :: Python :: 3.8", 62 | "Programming Language :: Python :: 3.9", 63 | "Programming Language :: Python :: 3.10", 64 | "Programming Language :: Python :: 3.11", 65 | "Programming Language :: Python :: 3.12", 66 | "Programming Language :: Python :: 3.13", 67 | ], 68 | python_requires=">=3.6", 69 | entry_points={ 70 | "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"], 71 | }, 72 | ) 73 | -------------------------------------------------------------------------------- /docs/blog/posts/rec_other_lan.en.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: How to recognize other language subtitle? 3 | date: 2025-09-10 4 | authors: [SWHL] 5 | slug: how-to-rec-other-lan 6 | categories: 7 | - General 8 | comments: true 9 | 10 | --- 11 | ### Introduction 12 | 13 | Currently, RapidVideOCR directly uses the default configuration of `rapidocr_onnxruntime`, so it can only do OCR for subtitles in Chinese and English. 14 | 15 | Since `rapidocr_onnxruntime` has an interface for passing in other multilingual recognition models, RapidVieOCR has scalability. This article is here to explain how to use it. 16 | 17 | This article takes the French OCR solution proposed in [discussions #40](https://github.com/SWHL/RapidVideOCR/discussions/40) as an example, and other languages can be done in the same way. 18 | 19 | ### 1. Correctly install and use RapidVideOCR 20 | 21 | Please refer to this [link](https://swhl.github.io/RapidVideOCR/en/docs/tutorial/senior/) 22 | 23 | ### 2. Use PaddleOCR Convert tool to convert French recognition model to ONNX 24 | 25 | Please refer to the tutorial [link](https://github.com/RapidAI/PaddleOCRModelConvert) 26 | 27 | Using: 28 | 29 | Model path: `https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar`, 30 | 31 | Dictionary path: `https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/dygraph/ppocr/utils/dict/french_dict.txt` 32 | 33 | For model download links for other languages, please refer to: [paddleocr whl](https://files.pythonhosted.org/packages/8f/d0/1a2f9430f61781beb16556182baa938e8f93c8b46c27ad5865a5655fae05/paddleocr-2.7.0.3-py3-none-any.whl) in the source `paddleocr.py` file 34 | 35 | For dictionary models, see: [link](https://github.com/PaddlePaddle/PaddleOCR/tree/799c144ab3b0b5d19a37c7e85c47e88ff27c643d/ppocr/utils/dict) 36 | 37 | Finally, a French recognition model can be obtained: `french_mobile_v2.0_rec_infer.onnx` 38 | 39 | ### 3. OCR French subtitles 40 | 41 | !!! note 42 | 43 | `rapid_videocr>=v3.0.0` 44 | 45 | ```python linenums="1" 46 | from rapid_videocr import RapidVideOCR, RapidVideOCRInput 47 | 48 | input_args = RapidVideOCRInput( 49 | is_batch_rec=False, 50 | ocr_params={"Rec.model_path": "french_mobile_v2.0_rec_infer.onnx"}, 51 | ) 52 | extractor = RapidVideOCR(input_args) 53 | 54 | rgb_dir = "test_files/RGBImagesTiny" 55 | save_dir = "outputs" 56 | save_name = "a" 57 | 58 | # outputs/a.srt outputs/a.ass outputs/a.txt 59 | extractor(rgb_dir, save_dir, save_name=save_name) 60 | ``` 61 | -------------------------------------------------------------------------------- /rapid_videocr/vsf_cli.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import subprocess 5 | from dataclasses import asdict, dataclass 6 | from typing import Optional 7 | 8 | 9 | @dataclass 10 | class VideoSubFinderInput: 11 | vsf_exe_path: str 12 | clear_dirs: bool = True 13 | run_search: bool = True 14 | create_cleared_text_images: bool = True 15 | create_empty_sub: Optional[str] = None 16 | create_sub_from_cleared_txt_images: Optional[str] = None 17 | create_sub_from_txt_results: Optional[str] = None 18 | open_video_opencv: bool = True 19 | open_video_ffmpeg: bool = False 20 | use_cuda: bool = False 21 | start_time: Optional[str] = None 22 | end_time: Optional[str] = None 23 | top_video_image_percent_end: float = 0.2 24 | bottom_video_image_percent_end: float = 0.0 25 | left_video_image_percent_end: float = 0.0 26 | right_video_image_percent_end: float = 1.0 27 | general_settings: Optional[str] = None 28 | num_threads: int = 2 29 | num_ocr_threads: int = 1 30 | 31 | 32 | class VideoSubFinder: 33 | SHORT_FLAG_MAP = { 34 | "clear_dirs": "-c", 35 | "run_search": "-r", 36 | "create_cleared_text_images": "-ccti", 37 | "create_empty_sub": "-ces", 38 | "create_sub_from_cleared_txt_images": "-cscti", 39 | "create_sub_from_txt_results": "-cstxt", 40 | "open_video_opencv": "-ovocv", 41 | "open_video_ffmpeg": "-ovffmpeg", 42 | "use_cuda": "-uc", 43 | "start_time": "-s", 44 | "end_time": "-e", 45 | "top_video_image_percent_end": "-te", 46 | "bottom_video_image_percent_end": "-be", 47 | "left_video_image_percent_end": "-le", 48 | "right_video_image_percent_end": "-re", 49 | "general_settings": "-gs", 50 | "num_threads": "-nthr", 51 | "num_ocr_threads": "-nocrthr", 52 | } 53 | 54 | def __init__(self, input_params: VideoSubFinderInput): 55 | param_dict = asdict(input_params) 56 | run_list = [input_params.vsf_exe_path] 57 | for k, v in param_dict.items(): 58 | if k == "vsf_exe_path": 59 | continue 60 | 61 | if v is None or str(v) == "False": 62 | continue 63 | 64 | flag = self.SHORT_FLAG_MAP[k] 65 | run_list.append(f"{flag}" if str(v) == "True" else f"{flag} {v}") 66 | self.run_list = run_list 67 | 68 | def __call__(self, video_path: str, output_dir: str) -> str: 69 | self.run_list.extend(["--input_video", video_path, "--output_dir", output_dir]) 70 | try: 71 | subprocess.run(self.run_list, check=False) 72 | return output_dir 73 | except Exception as e: 74 | raise e 75 | -------------------------------------------------------------------------------- /docs/index.en.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | hide: 4 | - navigation 5 | - toc 6 | --- 7 | 8 |
9 | 10 | 11 |
 
12 | 13 | 14 | 15 | 16 | 17 | PyPI 18 | 19 | 20 | SemVer2.0 21 | 22 | 23 |
24 | 25 | ### Introduction 26 | 27 | - Video hard subtitle extraction, automatically generate the corresponding `srt | ass | txt` file. 28 | - Supported subtitle languages: Chinese | English (For other supported languages, see: [List of supported languages](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99)) 29 | - The advantages are as follows: 30 | - **Faster extraction**: Used in conjunction with [VideoSubFinder](https://sourceforge.net/projects/videosubfinder/) software to extract key subtitle frames faster. 31 | - **More accurate recognition**: Use [RapidOCR](https://github.com/RapidAI/RapidOCR) as the recognition library. 32 | - **More convenient to use**: pip can be installed directly and used. 33 | 34 | - For desktop EXE version, please go to [RapidVideOCRDesktop](https://github.com/SWHL/RapidVideOCRDesktop). 35 | - If it helps you, please give a star ⭐. 36 | 37 | ### Overall framework 38 | 39 | ```mermaid 40 | flowchart LR 41 | A[/Video/] --Extract subtitle key frame--> B(VideoSubFinder) --OCR-->C(RapidVideOCR) 42 | C --Convert--> D[/"SRT | ASS | TXT"/] 43 | ``` 44 | 45 | ### [Online Demo](https://huggingface.co/spaces/SWHL/RapidVideOCR) 46 | 47 |
48 | Demo 49 |
50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | rapid_videocr/video_sub_finder/libs 2 | *.vscode 3 | outputs 4 | 5 | *.pyc 6 | 7 | *.onnx 8 | 9 | temp/ 10 | test_files/ 11 | 12 | .DS_Store 13 | 14 | *.bin 15 | 16 | .mypy_cache 17 | 18 | # Created by .ignore support plugin (hsz.mobi) 19 | ### Python template 20 | # Byte-compiled / optimized / DLL files 21 | __pycache__/ 22 | *.py[cod] 23 | *$py.class 24 | .pytest_cache 25 | 26 | # C extensions 27 | *.so 28 | 29 | # Distribution / packaging 30 | .Python 31 | build/ 32 | develop-eggs/ 33 | dist/ 34 | downloads/ 35 | eggs/ 36 | .eggs/ 37 | lib/ 38 | lib64/ 39 | parts/ 40 | sdist/ 41 | var/ 42 | wheels/ 43 | pip-wheel-metadata/ 44 | share/python-wheels/ 45 | *.egg-info/ 46 | .installed.cfg 47 | *.egg 48 | MANIFEST 49 | 50 | # PyInstaller 51 | # Usually these files are written by a python script from a template 52 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 53 | # *.manifest 54 | # *.spec 55 | *.res 56 | 57 | # Installer logs 58 | pip-log.txt 59 | pip-delete-this-directory.txt 60 | 61 | # Unit test / coverage reports 62 | htmlcov/ 63 | .tox/ 64 | .nox/ 65 | .coverage 66 | .coverage.* 67 | .cache 68 | nosetests.xml 69 | coverage.xml 70 | *.cover 71 | *.py,cover 72 | .hypothesis/ 73 | .pytest_cache/ 74 | 75 | # Translations 76 | *.mo 77 | *.pot 78 | 79 | # Django stuff: 80 | *.log 81 | local_settings.py 82 | db.sqlite3 83 | db.sqlite3-journal 84 | 85 | # Flask stuff: 86 | instance/ 87 | .webassets-cache 88 | 89 | # Scrapy stuff: 90 | .scrapy 91 | 92 | # Sphinx documentation 93 | docs/_build/ 94 | 95 | # PyBuilder 96 | target/ 97 | 98 | # Jupyter Notebook 99 | .ipynb_checkpoints 100 | 101 | # IPython 102 | profile_default/ 103 | ipython_config.py 104 | 105 | # pyenv 106 | .python-version 107 | 108 | # pipenv 109 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 110 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 111 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 112 | # install all needed dependencies. 113 | #Pipfile.lock 114 | 115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 116 | __pypackages__/ 117 | 118 | # Celery stuff 119 | celerybeat-schedule 120 | celerybeat.pid 121 | 122 | # SageMath parsed files 123 | *.sage.py 124 | 125 | # Environments 126 | .env 127 | .venv 128 | env/ 129 | venv/ 130 | ENV/ 131 | env.bak/ 132 | venv.bak/ 133 | 134 | # Spyder project settings 135 | .spyderproject 136 | .spyproject 137 | 138 | # Rope project settings 139 | .ropeproject 140 | 141 | # mkdocs documentation 142 | /site 143 | 144 | # mypy 145 | .mypy_cache/ 146 | .dmypy.json 147 | dmypy.json 148 | 149 | # Pyre type checker 150 | .pyre/ 151 | 152 | #idea 153 | .vs 154 | .vscode 155 | .idea 156 | /images 157 | /models 158 | 159 | #models 160 | *.onnx 161 | 162 | *.ttf 163 | *.ttc 164 | 165 | long1.jpg 166 | 167 | *.bin 168 | *.mapping 169 | *.xml 170 | 171 | *.pdiparams 172 | *.pdiparams.info 173 | *.pdmodel 174 | 175 | .DS_Store -------------------------------------------------------------------------------- /rapid_videocr/export.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from abc import ABC, abstractmethod 5 | from pathlib import Path 6 | from typing import List 7 | 8 | from .utils.typings import OutputFormat 9 | from .utils.utils import write_txt 10 | 11 | 12 | class ExportStrategy(ABC): 13 | @abstractmethod 14 | def export( 15 | self, 16 | save_dir: Path, 17 | save_name: str, 18 | srt_result: List[str], 19 | ass_result: List[str], 20 | txt_result: List[str], 21 | ): 22 | pass 23 | 24 | 25 | class TxtExportStrategy(ExportStrategy): 26 | def export( 27 | self, 28 | save_dir: Path, 29 | save_name: str, 30 | srt_result: List[str], 31 | ass_result: List[str], 32 | txt_result: List[str], 33 | ): 34 | file_path = save_dir / f"{save_name}.txt" 35 | write_txt(file_path, txt_result) 36 | 37 | 38 | class SrtExportStrategy(ExportStrategy): 39 | def export( 40 | self, 41 | save_dir: Path, 42 | save_name: str, 43 | srt_result: List[str], 44 | ass_result: List[str], 45 | txt_result: List[str], 46 | ): 47 | file_path = save_dir / f"{save_name}.srt" 48 | write_txt(file_path, srt_result) 49 | 50 | 51 | class AssExportStrategy(ExportStrategy): 52 | def export( 53 | self, 54 | save_dir: Path, 55 | save_name: str, 56 | srt_result: List[str], # unused here but kept for signature 57 | ass_result: List[str], 58 | txt_result: List[str], 59 | ): 60 | header = [ 61 | "[Script Info]", 62 | "; Script generated by RapidVideOCR", 63 | "ScriptType: v4.00+", 64 | "PlayResX: 1920", 65 | "PlayResY: 1080", 66 | "", 67 | "[V4+ Styles]", 68 | "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, " 69 | "Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, " 70 | "Alignment, MarginL, MarginR, MarginV, Encoding", 71 | "Style: Default,Arial,54,&H00FFFFFF,&H0000FFFF,&H00000000,&H64000000,0,0,0,0,100,100,0,0,1,2,2,2,10,10,10,1", 72 | "", 73 | "[Events]", 74 | "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text", 75 | ] 76 | 77 | file_path = save_dir / f"{save_name}.ass" 78 | write_txt(file_path, header + [""] + ass_result) 79 | 80 | 81 | class AllExportStrategy(ExportStrategy): 82 | def export( 83 | self, 84 | save_dir: Path, 85 | save_name: str, 86 | srt_result: List[str], 87 | ass_result: List[str], 88 | txt_result: List[str], 89 | ): 90 | txt_export = TxtExportStrategy() 91 | srt_export = SrtExportStrategy() 92 | ass_export = AssExportStrategy() 93 | 94 | txt_export.export(save_dir, save_name, srt_result, ass_result, txt_result) 95 | srt_export.export(save_dir, save_name, srt_result, ass_result, txt_result) 96 | ass_export.export(save_dir, save_name, srt_result, ass_result, txt_result) 97 | 98 | 99 | class ExportStrategyFactory: 100 | @staticmethod 101 | def create_strategy(out_format: str = OutputFormat.ALL.value) -> ExportStrategy: 102 | strategies = { 103 | OutputFormat.TXT.value: TxtExportStrategy(), 104 | OutputFormat.SRT.value: SrtExportStrategy(), 105 | OutputFormat.ASS.value: AssExportStrategy(), 106 | OutputFormat.ALL.value: AllExportStrategy(), 107 | } 108 | 109 | if strategy := strategies.get(out_format): 110 | return strategy 111 | raise ValueError(f"Unsupported output format: {out_format}") 112 | -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | hide: 4 | - toc 5 | --- 6 | 7 | 8 | ### 📣 后续更新日志将移步到[Release](https://github.com/SWHL/RapidVideOCR/releases)界面,这里不再更新 9 | 10 | #### 🚩2023-10-08 v2.2.8 update 11 | 12 | - 适配`rapidocr_onnxruntime`的相关参数,可以通过RapidVideOCR类传入,从而更加灵活指定不同语言的模型。 13 | 14 | #### ♦ 2023-08-05 v2.2.4 update 15 | 16 | - 修复批量识别模式下,索引错误。 17 | - 添加日志记录模块,便于使用桌面版,快速记录问题,便于反馈。 18 | 19 | #### 🛶2023-07-19 v2.2.3 update 20 | 21 | - 增加对VSF的参数的适配,命令行模式和类初始化时,可以指定VSF命令的同名参数。详细使用参见[link](https://github.com/SWHL/RapidVideOCR/wiki/RapidVideOCR%E9%AB%98%E7%BA%A7%E6%95%99%E7%A8%8B%EF%BC%88%E6%9C%89python%E5%9F%BA%E7%A1%80%E7%9A%84%E5%B0%8F%E4%BC%99%E4%BC%B4%EF%BC%89) 22 | 23 | #### 🤓2023-07-08 v2.2.2 update 24 | 25 | - 修复批量识别时,不能读取中文路径的问题 26 | - 修复漏轴时,SRT中跳过问题。目前当出现某一轴未能识别,则会空出位置,便于校对。 27 | - 保留VSF识别的中间结果 28 | 29 | #### 🐲2023-06-22 v2.2.0 update 30 | 31 | - 该版本是向`v2.1.x`兼容的,也就是之前用法依然可以。 32 | - 将VSF的CLI整合到库中,只需指定`VideoSubFinderWXW.exe`的全路径即可。 33 | - 增加批量识别功能,指定视频目录,即可自动提取目录下所有视频字幕 34 | - 使用示例, 参见:[demo.py](https://github.com/SWHL/RapidVideOCR/blob/main/demo.py) 35 | 36 | #### 😀2023-05-12 v2.1.7 update 37 | 38 | - 优化代码 39 | - 添加`save_name`参数,可以灵活指定保存的`srt | txt`文件名称,默认是`result` 40 | 41 | #### 🐱2023-03-27 v2.1.6 update 42 | 43 | - 修复时间轴对不齐问题,详情参见[issue 23](https://github.com/SWHL/RapidVideOCR/issues/23) 44 | 45 | #### 👽2023-03-23 v2.1.5 update 46 | 47 | - 添加打印到屏幕的控制参数`is_print_console` 48 | - 调整`out_format`参数位置到初始化类时 49 | 50 | #### 😀2023-03-14 v2.1.3 update 51 | 52 | - 修复输入`TXTImages`目录且叠字识别时错误 53 | 54 | #### 😜2023-03-12 v2.1.2 update 55 | 56 | - 修复索引错误,[#22](https://github.com/SWHL/RapidVideOCR/issues/22) 57 | 58 | #### 🎢2023-03-11 v2.1.1 update 59 | 60 | - 修复单图识别与之前版本差异问题 61 | - 默认识别模式更改为单图识别,是否使用叠图识别,请自行决定 62 | 63 | #### 🥇2023-03-10 v2.1.0 update 64 | 65 | - 添加叠字识别功能,速度更快,默认是叠字识别功能 66 | 67 | #### 🎈2023-03-02 v2.0.5~7 update 68 | 69 | - 修复生成的srt文件中的格式错误, [#19](https://github.com/SWHL/RapidVideOCR/issues/19) 70 | 71 | #### 🎫2023-02-17 v2.0.4 update 72 | 73 | - 针对传入的`TXTImages`目录,作了优化处理。相比于传入`RGBImages`,会更快和更准。推荐传入`TXTImages`目录 74 | 75 | #### 💎2023-02-17 v2.0.2 update 76 | 77 | - 修复同行字幕识别丢失空格问题 78 | 79 | #### 🎈2023-01-29 v1.1.10 update 80 | 81 | - 修复帧索引转时间戳时,索引为空错误 82 | 83 | #### 🧨2023-01-28 v1.1.9 update 84 | 85 | - 修复时间轴对不齐问题 86 | 87 | #### 👊 2023-01-15 v1.1.4 update 88 | 89 | - 添加输出txt格式的选项,目前v1.1.4版本默认输出srt和txt两种格式 90 | - 添加根据运行程序屏幕大小,调节选择字幕的框大小 91 | 92 | #### 🌈2023-01-10 v1.0.3 update 93 | 94 | - 将decord替换为OpenCV,因为decord处理MP4时,存在内存泄漏问题。详情参见:[#208](https://github.com/dmlc/decord/issues/208) 95 | 96 | #### 🎄2022-12-04 update 97 | 98 | - 添加交互式框定字幕位置功能,默认开启,更加好用,详情可参考下面的GIF图。感谢@[Johndirr](https://github.com/Johndirr)的建议。 99 | - 优化代码结构,将RapidOCR相关模型和配置文件放到`rapidocr`目录下 100 | - `rapidvideocr`的配置文件也放到对应目录下 101 | 102 | #### 🌼2022-05-08 update 103 | 104 | - 添加交互式确定二值化字幕图像阈值操作,仅仅支持Windows系统,可以通过`is_select_threshold = True`来使用 105 | - 优化代码 106 | 107 | #### 🎉2022-05-03 update 108 | 109 | - 添加GPU支持,具体配置教程参见:[onnxruntime-gpu版推理配置](https://github.com/RapidAI/RapidOCR/blob/main/python/onnxruntime_infer/README.md#onnxruntime-gpu%E7%89%88%E6%8E%A8%E7%90%86%E9%85%8D%E7%BD%AE) 110 | - 添加日文的支持,可以支持更多语种,具体参见:[支持语种列表](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99) 111 | 112 | #### 💡2022-05-01 update 113 | 114 | - 添加语音模块部分位于分支`asr_module` 115 | - 添加语音识别模块,由于该模块中解码部分只能在Linux和Mac上运行,因此如果想要使用该模块,请在Linux和Mac上。 116 | - 目前语音识别代码来自[RapidASR/python](https://github.com/RapidAI/RapidASR/tree/main/python/base_paddlespeech)部分。模型来自[PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell/asr0) 117 | - 经过简单测试,语音识别模块不是太准。-_-! 118 | 119 | #### 2022-03-09 update 120 | 121 | - 添加[常见问题模块](./faq.md),可以帮助大家跳过常见的小问题 122 | 123 | #### 2021-12-14 update 124 | 125 | - [x] 背景去除效果不好,导致丢失某些帧 126 | - 尝试采用图像分割的方法,经过测试,CPU下推理速度太慢,暂时放弃 127 | - 目前采用的固定的二值化阈值 128 | - [x] (2021-12-14)完善对应的英文文档 129 | - [x] (2021-12-14)添加运行耗时基准 130 | - [x] 添加具体参数说明 131 | - [x] 制作项目Logo 132 | - [ ] 更多的测试 133 | -------------------------------------------------------------------------------- /docs/README_zh.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
 
5 | 6 | 7 | 8 | 9 | 10 | PyPI 11 | 12 | 13 | SemVer2.0 14 | 15 | 16 | 简体中文 | [English](https://github.com/SWHL/RapidVideOCR) 17 |
18 | 19 | ### 简介 20 | 21 | `rapid_videocr`是一个自动视频硬字幕提取,生成对应`srt | ass | txt`文件的工具。 22 | 23 | 支持字幕语言:[支持语种列表](https://rapidai.github.io/RapidOCRDocs/main/model_list/#_4),因为该工具依赖`rapidocr`库,因此`rapidocr`支持识别的语言,`rapid_videocr`均是支持的。 24 | 25 | 优势如下: 26 | 27 | - **提取更快**:与[VideoSubFinder](https://sourceforge.net/projects/videosubfinder/)软件结合使用,提取关键字幕帧更快。 28 | - **识别更准**:采用[RapidOCR](https://github.com/RapidAI/RapidOCR)作为识别库。 29 | - **使用更方便**:pip直接安装即可使用。 30 | 31 | 桌面EXE版,请移步[RapidVideOCRDesktop](https://github.com/SWHL/RapidVideOCRDesktop) 32 | 33 | 如果有帮助到您的话,请给个小星星⭐。 34 | 35 | ### [在线Demo](https://www.modelscope.cn/studios/liekkas/RapidVideOCR/summary) 36 | 37 |
38 | Demo 39 |
40 | 41 | ### 整体框架 42 | 43 | ```mermaid 44 | flowchart LR 45 | A[/Video/] --Extract subtitle key frame--> B(VideoSubFinder) --OCR-->C(RapidVideOCR) 46 | C --Convert--> D[/"SRT | ASS | TXT"/] 47 | ``` 48 | 49 | ### 安装 50 | 51 | ```bash 52 | pip install rapid_videocr 53 | ``` 54 | 55 | ### 使用 56 | 57 | > [!NOTE] 58 | > 59 | > `rapid_videocr`输入图像路径必须是**VideoSubFinder**软件输出的RGBImages或TXTImages的路径。 60 | 61 | ```bash 62 | rapid_videocr -i RGBImages 63 | ``` 64 | 65 | 或者python脚本: 66 | 67 | ```python 68 | from rapid_videocr import RapidVideOCR, RapidVideOCRInput 69 | 70 | input_args = RapidVideOCRInput(is_batch_rec=False) 71 | extractor = RapidVideOCR(input_args) 72 | 73 | rgb_dir = "tests/test_files/RGBImages" 74 | save_dir = "outputs" 75 | save_name = "a" 76 | 77 | # outputs/a.srt outputs/a.ass outputs/a.txt 78 | extractor(rgb_dir, save_dir, save_name=save_name) 79 | ``` 80 | 81 | ### 文档 82 | 83 | 完整文档请移步:[docs](https://swhl.github.io/RapidVideOCR/) 84 | 85 | ### 贡献者 86 | 87 |

88 | 89 | 90 | 91 |

92 | 93 | ### 贡献指南 94 | 95 | 我们感谢所有的贡献者为改进和提升 RapidVideOCR 所作出的努力。 96 | 97 | 欢迎提交请求。对于重大更改,请先打开issue讨论您想要改变的内容。 98 | 99 | 请确保适当更新测试。 100 | 101 | ### 加入社区 102 | 103 | 微信扫描以下二维码,关注 **RapidAI公众号**,回复video即可加入RapidVideOCR微信交流群: 104 |
105 | 106 |
107 | 108 | 扫码加入QQ群(706807542): 109 |
110 | 111 |
112 | 113 | ### [赞助](https://swhl.github.io/RapidVideOCR/docs/sponsor/) 114 | 115 | 如果您想要赞助该项目,可直接点击当前页最上面的Sponsor按钮,请写好备注( **您的Github账号名称** ),方便添加到赞助列表中。 116 | 117 | ### 开源许可证 118 | 119 | 该项目采用 [Apache 2.0 license](../LICENSE) 开源许可证。 120 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: RapidVideOCR 文档 2 | site_url: https://swhl.github.io/RapidVideOCR/ 3 | site_author: SWHL 4 | site_description: 5 | 🎦 Extract video hard subtitles and automatically generate corresponding srt files. 6 | 7 | repo_name: SWHL/RapidVideOCR 8 | repo_url: https://github.com/SWHL/RapidVideOCR 9 | 10 | copyright: Copyright © 2025 Maintained by SWHL. 11 | 12 | theme: 13 | name: material 14 | language: zh 15 | custom_dir: overrides 16 | features: 17 | - announce.dismiss 18 | - content.tooltips 19 | - content.code.copy 20 | - content.tabs.link 21 | - content.footnote.tooltips 22 | - navigation.expand # 默认打开所有的子节 23 | - navigation.tabs # 顶级索引被作为tab 24 | - navigation.tabs.sticky # tab始终可见 25 | - navigation.top # 开启顶部导航栏 26 | - navigation.tracking # 导航栏跟踪 27 | - navigation.footer 28 | - navigation.indexes 29 | - search.highlight # 搜索高亮 30 | - search.share # 搜索分享 31 | - search.suggest # 搜索建议 32 | - toc.follow # 目录跟踪-页面右侧的小目录 33 | 34 | palette: 35 | - media: "(prefers-color-scheme)" 36 | toggle: 37 | icon: material/brightness-auto 38 | name: Switch to light mode 39 | 40 | - media: "(prefers-color-scheme: light)" 41 | scheme: default 42 | toggle: 43 | icon: material/brightness-7 44 | name: Switch to dark mode 45 | 46 | - media: "(prefers-color-scheme: dark)" 47 | scheme: slate 48 | toggle: 49 | icon: material/brightness-4 50 | name: Switch to system preference 51 | 52 | icon: 53 | logo: logo 54 | previous: fontawesome/solid/angle-left 55 | next: fontawesome/solid/angle-right 56 | repo: fontawesome/brands/github 57 | tag: 58 | default-tag: fontawesome/solid/tag 59 | hardware-tag: fontawesome/solid/microchip 60 | software-tag: fontawesome/solid/laptop-code 61 | 62 | plugins: 63 | - tags 64 | - search: 65 | separator: '[\s\u200b\-_,:!=\[\]()"`/]+|\.(?!\d)|&[lg]t;|(?!\b)(?=[A-Z][a-z])' 66 | - i18n: 67 | docs_structure: suffix 68 | fallback_to_default: true 69 | reconfigure_material: true 70 | reconfigure_search: true 71 | languages: 72 | - locale: zh 73 | name: 简体中文 74 | default: true 75 | build: true 76 | - locale: en 77 | name: English 78 | site_name: RapidVideOCR Documentation 79 | link: /en/ 80 | nav_translations: 81 | 概览: Overview 82 | 快速开始: Quick Start 83 | 在线demo: Online Demo 84 | 使用教程: Tutorial 85 | 初级教程: Beginner Tutorial 86 | 中级教程: Intermediate Tutorial 87 | 高级教程: Senior Tutorial 88 | 博客: Blog 89 | 如何识别其他语言字幕?: How to recognize other language subtitle? 90 | 常见问题: FAQ 91 | 赞助: Sponsor 92 | 更新日志: Changelog 93 | - git-committers: 94 | repository: SWHL/RapidVideOCR 95 | branch: main 96 | token: !!python/object/apply:os.getenv ["MKDOCS_GIT_COMMITTERS_APIKEY"] 97 | - git-revision-date-localized: 98 | enable_creation_date: true 99 | 100 | 101 | markdown_extensions: 102 | - abbr 103 | - attr_list 104 | - pymdownx.snippets 105 | - pymdownx.critic 106 | - pymdownx.caret 107 | - pymdownx.keys 108 | - pymdownx.mark 109 | - pymdownx.tilde 110 | - footnotes 111 | - def_list 112 | - md_in_html 113 | - tables 114 | - pymdownx.tasklist: 115 | custom_checkbox: true 116 | - toc: 117 | permalink: true 118 | - pymdownx.betterem: 119 | smart_enable: all 120 | - pymdownx.superfences: 121 | custom_fences: 122 | - name: mermaid 123 | class: mermaid 124 | format: !!python/name:pymdownx.superfences.fence_code_format 125 | - pymdownx.emoji: 126 | emoji_index: !!python/name:material.extensions.emoji.twemoji 127 | emoji_generator: !!python/name:material.extensions.emoji.to_svg 128 | - pymdownx.highlight: 129 | anchor_linenums: true 130 | line_spans: __span 131 | pygments_lang_class: true 132 | - pymdownx.inlinehilite 133 | - pymdownx.tabbed: 134 | alternate_style: true 135 | - admonition 136 | - pymdownx.details 137 | 138 | extra: 139 | version: 140 | provider: mike 141 | 142 | nav: 143 | - 概览: index.md 144 | - 快速开始: quickstart.md 145 | - 在线demo: online_demo.md 146 | - 使用教程: 147 | - 初级教程: tutorial/beginner.md 148 | - 中级教程: tutorial/intermediate.md 149 | - 高级教程: tutorial/senior.md 150 | - 常见问题: faq.md 151 | - 博客: 152 | - 如何识别其他语言字幕?: blog/posts/rec_other_lan.md 153 | - 赞助: sponsor.md 154 | - 更新日志: changelog.md 155 | -------------------------------------------------------------------------------- /cliff.toml: -------------------------------------------------------------------------------- 1 | # git-cliff ~ configuration file 2 | # https://git-cliff.org/docs/configuration 3 | 4 | [changelog] 5 | # A Tera template to be rendered as the changelog's footer. 6 | # See https://keats.github.io/tera/docs/#introduction 7 | # header = """ 8 | # # Changelog\n 9 | # All notable changes to this project will be documented in this file. See [conventional commits](https://www.conventionalcommits.org/) for commit guidelines.\n 10 | # """ 11 | # A Tera template to be rendered for each release in the changelog. 12 | # See https://keats.github.io/tera/docs/#introduction 13 | body = """ 14 | {% for group, commits in commits | group_by(attribute="group") %} 15 | ### {{ group | striptags | trim | upper_first }} 16 | {% for commit in commits 17 | | filter(attribute="scope") 18 | | sort(attribute="scope") %} 19 | - **({{commit.scope}})**{% if commit.breaking %} [**breaking**]{% endif %} \ 20 | {{ commit.message }} by [@{{ commit.author.name }}](https://github.com/{{ commit.author.name }}) in [{{ commit.id | truncate(length=7, end="") }}]($REPO/commit/{{ commit.id }}) 21 | {%- endfor -%} 22 | {% raw %}\n{% endraw %}\ 23 | {%- for commit in commits %} 24 | {%- if commit.scope -%} 25 | {% else -%} 26 | - {% if commit.breaking %} [**breaking**]{% endif %}\ 27 | {{ commit.message }} by [@{{ commit.author.name }}](https://github.com/{{ commit.author.name }}) in [{{ commit.id | truncate(length=7, end="") }}]($REPO/commit/{{ commit.id }}) 28 | {% endif -%} 29 | {% endfor -%} 30 | {% endfor %} 31 | 32 | 33 | {% if github.contributors | length > 0 %} 34 | ### 🎉 Contributors 35 | 36 | {% for contributor in github.contributors %} 37 | - [@{{ contributor.username }}](https://github.com/{{ contributor.username }}) 38 | {%- endfor -%} 39 | {% endif %} 40 | 41 | 42 | {% if version %} 43 | {% if previous.version %}\ 44 | **Full Changelog**: [{{ version | trim_start_matches(pat="v") }}]($REPO/compare/{{ previous.version }}..{{ version }}) 45 | {% else %}\ 46 | **Full Changelog**: [{{ version | trim_start_matches(pat="v") }}] 47 | {% endif %}\ 48 | {% else %}\ 49 | ## [unreleased] 50 | {% endif %} 51 | """ 52 | # A Tera template to be rendered as the changelog's footer. 53 | # See https://keats.github.io/tera/docs/#introduction 54 | 55 | footer = """ 56 | 57 | """ 58 | 59 | # Remove leading and trailing whitespaces from the changelog's body. 60 | trim = true 61 | # postprocessors 62 | postprocessors = [ 63 | # Replace the placeholder `` with a URL. 64 | { pattern = '\$REPO', replace = "https://github.com/SWHL/RapidVideOCR" }, # replace repository URL 65 | ] 66 | 67 | [git] 68 | # Parse commits according to the conventional commits specification. 69 | # See https://www.conventionalcommits.org 70 | conventional_commits = true 71 | # Exclude commits that do not match the conventional commits specification. 72 | filter_unconventional = true 73 | # Split commits on newlines, treating each line as an individual commit. 74 | split_commits = false 75 | # An array of regex based parsers to modify commit messages prior to further processing. 76 | commit_preprocessors = [ 77 | # Replace issue numbers with link templates to be updated in `changelog.postprocessors`. 78 | #{ pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](https://github.com/orhun/git-cliff/issues/${2}))"}, 79 | ] 80 | # An array of regex based parsers for extracting data from the commit message. 81 | # Assigns commits to groups. 82 | # Optionally sets the commit's scope and can decide to exclude commits from further processing. 83 | commit_parsers = [ 84 | { message = "^feat", group = "🚀 Features" }, 85 | { message = "^fix", group = "🐛 Bug Fixes" }, 86 | { message = "^doc", group = "📚 Documentation" }, 87 | { message = "^perf", group = "⚡ Performance" }, 88 | { message = "^refactor", group = "🚜 Refactor" }, 89 | { message = "^style", group = "🎨 Styling" }, 90 | { message = "^test", group = "🧪 Testing" }, 91 | { message = "^chore\\(release\\): prepare for", skip = true }, 92 | { message = "^chore\\(deps.*\\)", skip = true }, 93 | { message = "^chore\\(pr\\)", skip = true }, 94 | { message = "^chore\\(pull\\)", skip = true }, 95 | { message = "^chore|^ci", group = "⚙️ Miscellaneous Tasks" }, 96 | { body = ".*security", group = "🛡️ Security" }, 97 | { message = "^revert", group = "◀️ Revert" }, 98 | { message = ".*", group = "💼 Other" }, 99 | ] 100 | # Exclude commits that are not matched by any commit parser. 101 | filter_commits = false 102 | # Order releases topologically instead of chronologically. 103 | topo_order = false 104 | # Order of commits in each group/release within the changelog. 105 | # Allowed values: newest, oldest 106 | sort_commits = "newest" -------------------------------------------------------------------------------- /rapid_videocr/utils/utils.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import argparse 5 | from pathlib import Path 6 | from typing import List, Tuple, Union 7 | 8 | import cv2 9 | import numpy as np 10 | import shapely 11 | from shapely.geometry import MultiPoint, Polygon 12 | 13 | 14 | def compute_centroid(points: np.ndarray) -> List: 15 | """计算所给框的质心坐标 16 | 17 | :param points ([type]): (4, 2) 18 | :return: [description] 19 | """ 20 | x_min, x_max = np.min(points[:, 0]), np.max(points[:, 0]) 21 | y_min, y_max = np.min(points[:, 1]), np.max(points[:, 1]) 22 | return [(x_min + x_max) / 2, (y_min + y_max) / 2] 23 | 24 | 25 | def write_txt( 26 | save_path: Union[str, Path], contents: Union[List[str], str], mode: str = "w" 27 | ) -> None: 28 | if not isinstance(contents, list): 29 | contents = [contents] 30 | 31 | with open(save_path, mode, encoding="utf-8") as f: 32 | for value in contents: 33 | f.write(f"{value}\n") 34 | 35 | 36 | def read_img(img_path: Union[str, Path]) -> np.ndarray: 37 | img = cv2.imdecode(np.fromfile(str(img_path), dtype=np.uint8), 1) 38 | return img 39 | 40 | 41 | def padding_img( 42 | img: np.ndarray, 43 | padding_value: Tuple[int, int, int, int], 44 | padding_color: Tuple[int, int, int] = (0, 0, 0), 45 | ) -> np.ndarray: 46 | padded_img = cv2.copyMakeBorder( 47 | img, 48 | padding_value[0], 49 | padding_value[1], 50 | padding_value[2], 51 | padding_value[3], 52 | cv2.BORDER_CONSTANT, 53 | value=padding_color, 54 | ) 55 | return padded_img 56 | 57 | 58 | def mkdir(dir_path): 59 | Path(dir_path).mkdir(parents=True, exist_ok=True) 60 | 61 | 62 | def read_txt(txt_path: Union[str, Path]) -> List[str]: 63 | if not isinstance(txt_path, str): 64 | txt_path = str(txt_path) 65 | 66 | with open(txt_path, "r", encoding="utf-8") as f: 67 | data = list(map(lambda x: x.rstrip("\n"), f)) 68 | return data 69 | 70 | 71 | def compute_poly_iou(a: np.ndarray, b: np.ndarray) -> float: 72 | """计算两个多边形的IOU 73 | 74 | Args: 75 | poly1 (np.ndarray): (4, 2) 76 | poly2 (np.ndarray): (4, 2) 77 | 78 | Returns: 79 | float: iou 80 | """ 81 | poly1 = Polygon(a).convex_hull 82 | poly2 = Polygon(b).convex_hull 83 | 84 | union_poly = np.concatenate((a, b)) 85 | 86 | if not poly1.intersects(poly2): 87 | return 0.0 88 | 89 | try: 90 | inter_area = poly1.intersection(poly2).area 91 | union_area = MultiPoint(union_poly).convex_hull.area 92 | except shapely._geos.TopologicalError: 93 | print("shapely.geos.TopologicalError occured, iou set to 0") 94 | return 0.0 95 | 96 | if union_area == 0: 97 | return 0.0 98 | 99 | return float(inter_area) / union_area 100 | 101 | 102 | def is_inclusive_each_other(box1: np.ndarray, box2: np.ndarray) -> bool: 103 | """判断两个多边形框是否存在包含关系 104 | 105 | Args: 106 | box1 (np.ndarray): (4, 2) 107 | box2 (np.ndarray): (4, 2) 108 | 109 | Returns: 110 | bool: 是否存在包含关系 111 | """ 112 | poly1 = Polygon(box1) 113 | poly2 = Polygon(box2) 114 | 115 | poly1_area = poly1.convex_hull.area 116 | poly2_area = poly2.convex_hull.area 117 | 118 | if poly1_area > poly2_area: 119 | box_max = box1 120 | box_min = box2 121 | else: 122 | box_max = box2 123 | box_min = box1 124 | 125 | x0, y0 = np.min(box_min[:, 0]), np.min(box_min[:, 1]) 126 | x1, y1 = np.max(box_min[:, 0]), np.max(box_min[:, 1]) 127 | 128 | edge_x0, edge_y0 = np.min(box_max[:, 0]), np.min(box_max[:, 1]) 129 | edge_x1, edge_y1 = np.max(box_max[:, 0]), np.max(box_max[:, 1]) 130 | 131 | if x0 >= edge_x0 and y0 >= edge_y0 and x1 <= edge_x1 and y1 <= edge_y1: 132 | return True 133 | return False 134 | 135 | 136 | def float_range(mini, maxi): 137 | """Return function handle of an argument type function for 138 | ArgumentParser checking a float range: mini <= arg <= maxi 139 | mini - minimum acceptable argument 140 | maxi - maximum acceptable argument""" 141 | 142 | # Define the function with default arguments 143 | def float_range_checker(arg): 144 | """New Type function for argparse - a float within predefined range.""" 145 | 146 | try: 147 | f = float(arg) 148 | except ValueError as exc: 149 | raise argparse.ArgumentTypeError("must be a floating point number") from exc 150 | 151 | if f < mini or f > maxi: 152 | raise argparse.ArgumentTypeError( 153 | "must be in range [" + str(mini) + " .. " + str(maxi) + "]" 154 | ) 155 | return f 156 | 157 | # Return function handle to checking function 158 | return float_range_checker 159 | -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import shutil 5 | import sys 6 | from pathlib import Path 7 | 8 | import pytest 9 | 10 | cur_dir = Path(__file__).resolve().parent 11 | root_dir = cur_dir.parent 12 | 13 | sys.path.append(str(root_dir)) 14 | 15 | from rapid_videocr import RapidVideOCR, RapidVideOCRExeception, RapidVideOCRInput 16 | from rapid_videocr.utils.utils import mkdir, read_txt 17 | 18 | test_dir = cur_dir / "test_files" 19 | 20 | 21 | @pytest.fixture 22 | def setup_and_teardown(): 23 | save_dir = test_dir / "tmp" 24 | mkdir(save_dir) 25 | 26 | srt_path = save_dir / "result.srt" 27 | ass_path = save_dir / "result.ass" 28 | txt_path = save_dir / "result.txt" 29 | 30 | yield save_dir, srt_path, ass_path, txt_path 31 | 32 | shutil.rmtree(save_dir) 33 | 34 | 35 | @pytest.mark.parametrize( 36 | "img_dir", 37 | [test_dir / "RGBImages", test_dir / "TXTImages"], 38 | ) 39 | def test_single_rec(setup_and_teardown, img_dir): 40 | save_dir, srt_path, ass_path, txt_path = setup_and_teardown 41 | 42 | extractor = RapidVideOCR(RapidVideOCRInput()) 43 | extractor(img_dir, save_dir) 44 | 45 | srt_data = read_txt(srt_path) 46 | assert len(srt_data) == 16 47 | assert srt_data[2] == "空间里面他绝对赢不了的" 48 | assert srt_data[-2] == "你们接着善后" 49 | 50 | ass_data = read_txt(ass_path) 51 | assert len(ass_data) == 17 52 | assert ass_data[13].split(",", 9)[-1] == "空间里面他绝对赢不了的" 53 | assert ass_data[-1].split(",", 9)[-1] == "你们接着善后" 54 | 55 | txt_data = read_txt(txt_path) 56 | assert len(txt_data) == 8 57 | assert txt_data[-2] == "你们接着善后" 58 | 59 | 60 | @pytest.mark.parametrize("img_dir", [test_dir / "RGBImages"]) 61 | def test_concat_rec(setup_and_teardown, img_dir): 62 | save_dir, srt_path, ass_path, txt_path = setup_and_teardown 63 | 64 | input_param = RapidVideOCRInput(is_batch_rec=True) 65 | extractor = RapidVideOCR(input_param) 66 | extractor(img_dir, save_dir) 67 | 68 | srt_data = read_txt(srt_path) 69 | assert len(srt_data) == 16 70 | assert srt_data[2] == "空间里面他绝对赢不了的" 71 | assert srt_data[-2] == "你们接着善后" 72 | 73 | ass_data = read_txt(ass_path) 74 | assert len(ass_data) == 17 75 | assert ass_data[13].split(",", 9)[-1] == "空间里面他绝对赢不了的" 76 | assert ass_data[-1].split(",", 9)[-1] == "你们接着善后" 77 | 78 | txt_data = read_txt(txt_path) 79 | assert len(txt_data) == 8 80 | assert txt_data[-2] == "你们接着善后" 81 | 82 | 83 | @pytest.mark.parametrize( 84 | "img_dir", 85 | [test_dir / "RGBImage", test_dir / "TXTImage"], 86 | ) 87 | def test_empty_dir(img_dir): 88 | extractor = RapidVideOCR(RapidVideOCRInput()) 89 | mkdir(img_dir) 90 | 91 | with pytest.raises(RapidVideOCRExeception) as exc_info: 92 | extractor(img_dir, test_dir) 93 | assert exc_info.type is RapidVideOCRExeception 94 | 95 | shutil.rmtree(img_dir) 96 | 97 | 98 | @pytest.mark.parametrize( 99 | "img_dir", 100 | [test_dir / "RGBImage", test_dir / "TXTImage"], 101 | ) 102 | def test_nothing_dir(img_dir): 103 | extractor = RapidVideOCR(RapidVideOCRInput()) 104 | mkdir(img_dir) 105 | with pytest.raises(RapidVideOCRExeception) as exc_info: 106 | extractor(img_dir, test_dir) 107 | assert exc_info.type is RapidVideOCRExeception 108 | 109 | shutil.rmtree(img_dir) 110 | 111 | 112 | def test_out_only_srt(setup_and_teardown): 113 | save_dir, srt_path, ass_path, txt_path = setup_and_teardown 114 | 115 | img_dir = test_dir / "RGBImages" 116 | input_param = RapidVideOCRInput(is_batch_rec=True, out_format="srt") 117 | extractor = RapidVideOCR(input_param) 118 | extractor(img_dir, save_dir) 119 | 120 | srt_data = read_txt(srt_path) 121 | assert len(srt_data) == 16 122 | assert srt_data[2] == "空间里面他绝对赢不了的" 123 | assert srt_data[-2] == "你们接着善后" 124 | 125 | 126 | def test_out_only_ass(setup_and_teardown): 127 | save_dir, srt_path, ass_path, txt_path = setup_and_teardown 128 | 129 | img_dir = test_dir / "RGBImages" 130 | input_param = RapidVideOCRInput(is_batch_rec=True, out_format="ass") 131 | extractor = RapidVideOCR(input_param) 132 | extractor(img_dir, save_dir) 133 | 134 | ass_data = read_txt(ass_path) 135 | assert len(ass_data) == 17 136 | assert ass_data[13].split(",", 9)[-1] == "空间里面他绝对赢不了的" 137 | assert ass_data[-1].split(",", 9)[-1] == "你们接着善后" 138 | 139 | 140 | def test_out_only_txt(setup_and_teardown): 141 | save_dir, srt_path, ass_path, txt_path = setup_and_teardown 142 | 143 | img_dir = test_dir / "RGBImages" 144 | input_param = RapidVideOCRInput(is_batch_rec=True, out_format="txt") 145 | extractor = RapidVideOCR(input_param) 146 | extractor(img_dir, save_dir) 147 | 148 | txt_data = read_txt(txt_path) 149 | assert len(txt_data) == 8 150 | assert txt_data[-2] == "你们接着善后" 151 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
 
5 | 6 | 7 | 8 | 9 | 10 | PyPI 11 | 12 | 13 | SemVer2.0 14 | 15 | 16 | [简体中文](https://github.com/SWHL/RapidVideOCR/blob/main/docs/README_zh.md) | English 17 |
18 | 19 | ### Introduction 20 | 21 | `rapid_videocr` is a tool designed for the automatic extraction of hard subtitles from videos and generates corresponding `srt | ass | txt` files. 22 | 23 | Supported subtitle languages: [List of Supported Languages](https://rapidai.github.io/RapidOCRDocs/main/model_list/#_4). Since this tool relies on the `rapidocr` library, any language supported by `rapidocr` for recognition is also supported by `rapid_videocr`. 24 | 25 | Its advantages include: 26 | 27 | - **Faster Extraction**: When used in conjunction with the [VideoSubFinder](https://sourceforge.net/projects/videosubfinder/) software, it can extract key subtitle frames more quickly. 28 | - **More Accurate Recognition**: It utilizes [RapidOCR](https://github.com/RapidAI/RapidOCR) as its recognition library. 29 | - **Easier to Use**: It can be installed directly via pip and is ready to use. 30 | 31 | For the desktop EXE version, please visit [RapidVideOCRDesktop](https://github.com/SWHL/RapidVideOCRDesktop). 32 | 33 | If this tool has been helpful to you, please give it a star ⭐. 34 | 35 | ### [Online Demo](https://huggingface.co/spaces/SWHL/RapidVideOCR) 36 | 37 |
38 | Demo 39 |
40 | 41 | ### Overall framework 42 | 43 | ```mermaid 44 | flowchart LR 45 | A[/Video/] --Extract subtitle key frame--> B(VideoSubFinder) --OCR-->C(RapidVideOCR) 46 | C --Convert--> D[/"SRT | ASS | TXT"/] 47 | ``` 48 | 49 | ### Installation 50 | 51 | ```bash 52 | pip install rapid_videocr 53 | ``` 54 | 55 | ### Usage 56 | 57 | > [!NOTE] 58 | > 59 | > The input image path of `rapid_videocr` must be the path of **RGBImages** or **TXTImages** output by **VideoSubFinder** software. 60 | 61 | ```bash 62 | rapid_videocr -i RGBImages 63 | ``` 64 | 65 | or python script: 66 | 67 | ```python 68 | from rapid_videocr import RapidVideOCR, RapidVideOCRInput 69 | 70 | input_args = RapidVideOCRInput(is_batch_rec=False) 71 | extractor = RapidVideOCR(input_args) 72 | 73 | rgb_dir = "tests/test_files/RGBImages" 74 | save_dir = "outputs" 75 | save_name = "a" 76 | 77 | # outputs/a.srt outputs/a.ass outputs/a.txt 78 | extractor(rgb_dir, save_dir, save_name=save_name) 79 | ``` 80 | 81 | ### Documentation 82 | 83 | Full documentation can be found on [docs](https://swhl.github.io/RapidVideOCR) in Chinese. 84 | 85 | ### Code Contributors 86 | 87 |

88 | 89 | 90 | 91 |

92 | 93 | ### Contributing 94 | 95 | Pull requests are welcome. For major changes, please open an issue first 96 | to discuss what you would like to change. 97 | 98 | Please make sure to update tests as appropriate. 99 | 100 | ### [Sponsor](https://swhl.github.io/RapidVideOCR/docs/sponsor/) 101 | 102 | If you want to sponsor the project, you can directly click the **Buy me a coffee** image, please write a note (e.g. your github account name) to facilitate adding to the sponsorship list below. 103 | 104 |
105 | 106 |
107 | 108 | ### License 109 | 110 | This project is released under the [Apache 2.0 license](./LICENSE). 111 | -------------------------------------------------------------------------------- /rapid_videocr/main.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import argparse 5 | from pathlib import Path 6 | from typing import List, Union 7 | 8 | from .export import ExportStrategyFactory, OutputFormat 9 | from .ocr_processor import OCRProcessor 10 | from .utils.crop_by_project import CropByProject 11 | from .utils.logger import logger 12 | from .utils.typings import IMAGE_EXTENSIONS, RapidVideOCRInput 13 | from .utils.utils import mkdir 14 | 15 | 16 | class RapidVideOCR: 17 | def __init__(self, input_params: RapidVideOCRInput): 18 | logger.setLevel(input_params.log_level.upper()) 19 | 20 | self.ocr_processor = OCRProcessor( 21 | input_params.ocr_params, input_params.batch_size 22 | ) 23 | 24 | self.cropper = CropByProject() 25 | 26 | self.is_batch_rec = input_params.is_batch_rec 27 | self.out_format = input_params.out_format 28 | 29 | def __call__( 30 | self, 31 | vsf_dir: Union[str, Path], 32 | save_dir: Union[str, Path], 33 | save_name: str = "result", 34 | ) -> List[str]: 35 | vsf_dir = Path(vsf_dir) 36 | if not vsf_dir.exists(): 37 | raise RapidVideOCRExeception(f"{vsf_dir} does not exist.") 38 | 39 | img_list = self.get_img_list(vsf_dir) 40 | srt_result, ass_result, txt_result = self.ocr_processor( 41 | img_list, self.is_batch_rec, self.is_txt_dir(vsf_dir) 42 | ) 43 | 44 | self.export_file(Path(save_dir), save_name, srt_result, ass_result, txt_result) 45 | return txt_result 46 | 47 | def get_img_list(self, vsf_dir: Path) -> List[Path]: 48 | def get_sort_key(x: Path) -> int: 49 | return int("".join(str(x.stem).split("_")[:4])) 50 | 51 | img_list = [] 52 | for v in vsf_dir.glob("*.*"): 53 | if not v.is_file(): 54 | continue 55 | 56 | if v.suffix.lower() not in IMAGE_EXTENSIONS: 57 | continue 58 | 59 | img_list.append(v) 60 | 61 | if not img_list: 62 | raise RapidVideOCRExeception(f"{vsf_dir} does not have valid images") 63 | 64 | img_list = sorted(img_list, key=get_sort_key) 65 | return img_list 66 | 67 | @staticmethod 68 | def is_txt_dir(vsf_dir: Path) -> bool: 69 | return "TXTImages" in vsf_dir.name 70 | 71 | def export_file( 72 | self, 73 | save_dir: Path, 74 | save_name: str, 75 | srt_result: List[str], 76 | ass_result: List[str], 77 | txt_result: List[str], 78 | ): 79 | try: 80 | strategy = ExportStrategyFactory.create_strategy(self.out_format) 81 | mkdir(save_dir) 82 | strategy.export(save_dir, save_name, srt_result, ass_result, txt_result) 83 | logger.info(f"[OCR] Results saved to directory: {save_dir}") 84 | except ValueError as e: 85 | logger.error(f"Export failed: {e}") 86 | raise 87 | 88 | def print_console(self, txt_result: List): 89 | for v in txt_result: 90 | print(v.strip()) 91 | 92 | 93 | class RapidVideOCRExeception(Exception): 94 | pass 95 | 96 | 97 | def main(): 98 | parser = argparse.ArgumentParser() 99 | parser.add_argument( 100 | "-i", 101 | "--img_dir", 102 | type=str, 103 | required=True, 104 | help="The full path of RGBImages or TXTImages.", 105 | ) 106 | parser.add_argument( 107 | "-s", 108 | "--save_dir", 109 | type=str, 110 | default="outputs", 111 | help='The path of saving the recognition result. Default is "outputs" under the current directory.', 112 | ) 113 | parser.add_argument( 114 | "-f", 115 | "--file_name", 116 | type=str, 117 | default="result", 118 | help='The name of the resulting file name. Default is "result".', 119 | ) 120 | parser.add_argument( 121 | "-o", 122 | "--out_format", 123 | type=str, 124 | default=OutputFormat.ALL.value, 125 | choices=[v.value for v in OutputFormat], 126 | help='Output file format. Default is "all".', 127 | ) 128 | parser.add_argument( 129 | "--is_batch_rec", 130 | action="store_true", 131 | default=False, 132 | help="Which mode to run (concat recognition or single recognition). Default is False.", 133 | ) 134 | parser.add_argument( 135 | "-b", 136 | "--batch_size", 137 | type=int, 138 | default=10, 139 | help="The batch of concating image nums in concat recognition mode. Default is 10.", 140 | ) 141 | args = parser.parse_args() 142 | 143 | ocr_input_params = RapidVideOCRInput( 144 | is_batch_rec=args.is_batch_rec, 145 | batch_size=args.batch_size, 146 | out_format=args.out_format, 147 | ) 148 | extractor = RapidVideOCR(ocr_input_params) 149 | extractor(args.img_dir, args.save_dir, args.file_name) 150 | 151 | 152 | if __name__ == "__main__": 153 | main() 154 | -------------------------------------------------------------------------------- /docs/tutorial/intermediate.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | hide: 4 | - toc 5 | --- 6 | 7 | 8 | ## 引言 9 | 10 | - 本篇文章旨在帮助不会python编程的小伙伴,快速使用RapidVideOCR视频硬字幕提取工具。 11 | - 可以运行的操作系统: `Windows | Mac | Linux` 12 | 13 | ## 配置环境 14 | 15 | ### 1. 安装VideoSubFinder软件(用于提取字幕关键帧) 16 | 17 | [[RapidVideOCR周边] VideoSubFinder提取字幕关键帧教程](https://blog.csdn.net/shiwanghualuo/article/details/129174857?spm=1001.2014.3001.5501) 18 | 19 | ### 2. 安装python软件(用于运行RapidVideOCR) 20 | 21 | !!! tip 22 | 23 | 声明:以下部分图像来自[终极保姆教程之安装python的教程_3.10.7版本](https://www.cnblogs.com/zyc-666/p/16689739.html) 24 | 25 | #### 1. 下载python安装包 26 | 27 | 打开python官网 → ,选择自己系统进入下载界面(VideoSubFinder工具目前只有Windows的) 28 | 29 |
30 | 31 |
32 | 33 | #### 2. 找到自己想要的版本 34 | 35 | 以python 3.10.7 为例。如果网速下载较慢的话,可以加入RapidVideOCR QQ群(706807542),群文件获取。 36 | 37 |
38 | 39 |
40 | 41 | #### 3. 下载完成之后,双击打开这个exe,即可开始准备安装 42 | 43 | 点击自定义安装,选择安装位置。同时,记得勾选最后一项`Add Python 3.10 to PATH` 44 | 45 |
46 | 47 |
48 | 49 | #### 4. 直接点击下一步 50 | 51 |
52 | 53 |
54 | 55 | #### 5. 勾选改路径 56 | 57 |
58 | 59 |
60 | 61 | #### 6. 点击Install,等待安装完毕即可 62 | 63 |
64 | 65 |
66 | 67 | #### 7. `Win + r` 输入`cmd`,回车,进入命令窗口 68 | 69 |
70 | 71 |
72 | 73 | #### 8. 输入python,看是否出现类似下图样子,如出现,证明安装成功 74 | 75 |
76 | 77 |
78 | 79 | #### 9. 添加`Scripts`目录到环境变量中 80 | 81 | 1. `Win + q` 输入“编辑” → 点击 **编辑系统环境变量** 82 |
83 | 84 |
85 | 86 | 2. 打开 **环境变量** → **用户变量** → **Path** → **编辑** 87 |
88 | 89 |
90 | 91 | 3. 新建Python安装目录下的Script目录路径,如下图所示,记得点击保存哈。 92 | 93 |
94 | 95 |
96 | 97 | ### 3. 安装RapidVideOCR工具 98 | 99 | #### 1. `Win + r` 输入`cmd`,回车,进入命令窗口 100 | 101 |
102 | 103 |
104 | 105 | #### 2. 安装`rapid_videocr` 106 | 107 | ```bash linenums="1" 108 | pip install rapid_videocr -i https://pypi.tuna.tsinghua.edu.cn/simple/ 109 | ``` 110 | 111 |
112 | 113 |
114 | 115 | #### 3. 测试是否安装成功,输入`rapid_videocr -h`,如果出现类似下图输出,则说明安装成功 116 | 117 |
118 | 119 |
120 | 121 | #### 4. 命令行使用 122 | 123 | `Win + r` 输入`cmd`,回车,进入命令行窗口 124 | 125 | ```bash linenums="1" 126 | rapid_videocr -i RGBImages -s result -m concat 127 | ``` 128 | 129 | 其中`RGBImages`为VideoSubFinder软件生成,可以自定义,例如:`G:\ProgramFiles\_self\RapidVideOCR\test_files\RGBImages` 等等。 130 | 131 |
132 | 133 |
134 | 135 | #### 5. 脚本使用 136 | 137 | 1. 在桌面上新建TXT文件,命名为`rapid_videocr.py`,注意后缀名改为`.py`。 138 | 2. 用记事本打开,将以下代码拷贝到`rapid_videocr.py`里面 139 | 140 | ```python linenums="1" 141 | from rapid_videocr import RapidVideOCR, RapidVideOCRInput 142 | 143 | # RapidVideOCRInput有两个初始化参数 144 | # is_concat_rec: 是否用单张图识别,默认是False,也就是默认用单图识别 145 | # concat_batch: 叠图识别的图像张数,默认10,可自行调节 146 | # out_format: 输出格式选择,[srt, ass, txt, all], 默认是 all 147 | # is_print_console: 是否打印结果,[0, 1], 默认是0,不打印 148 | input_args = RapidVideOCRInput( 149 | is_batch_rec=False, ocr_params={"Global.with_paddle": True} 150 | ) 151 | extractor = RapidVideOCR(input_args) 152 | 153 | rgb_dir = "tests/test_files/RGBImages" 154 | save_dir = "outputs" 155 | save_name = "a" 156 | 157 | # outputs/a.srt outputs/a.ass outputs/a.t 158 | extractor(rgb_dir, save_dir, save_name=save_name) 159 | ``` 160 | 161 | 3. 更改`rgb_dir` 后面的目录为VideoSubFinder生成的`RGBImages`目录路径。 162 | 163 | === "Windows下路径写法" 164 | 165 | ```python linenums="1" 166 | rgb_dir = r'G:\ProgramFiles\_self\RapidVideOCR\test_files\RGBImages' 167 | ``` 168 | 169 | === "Linux/Mac下路径写法" 170 | 171 | ```python linenums="1" 172 | rgb_dir = 'test_files/TXTImages' 173 | ``` 174 | 175 | 4. `Win + r` 打开终端输入以下代码,回车执行即可。 176 | 177 | ```bash linenums="1" 178 | cd Desktop 179 | python rapid_videocr.py 180 | ``` 181 | 182 |
183 | 184 |
185 | -------------------------------------------------------------------------------- /docs/changelog.en.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | hide: 4 | - toc 5 | --- 6 | 7 | ### 📣 Subsequent update logs will be moved to [Release](https://github.com/SWHL/RapidVideOCR/releases), and will no longer be updated here 8 | 9 | #### 🚩2023-10-08 v2.2.8 update 10 | 11 | - Adapt the relevant parameters of `rapidocr_onnxruntime`, which can be passed in through the RapidVideOCR class, so as to more flexibly specify models of different languages. 12 | 13 | #### ♦ 2023-08-05 v2.2.4 update 14 | 15 | - Fix the index error in batch recognition mode. 16 | 17 | - Add a logging module to facilitate the use of the desktop version, quickly record problems, and facilitate feedback. 18 | 19 | #### 🛶2023-07-19 v2.2.3 update 20 | 21 | - Added adaptation to VSF parameters. When in command line mode and during class initialization, you can specify the same-name parameters of the VSF command. For detailed usage, please refer to [link](https://github.com/SWHL/RapidVideOCR/wiki/RapidVideOCR%E9%AB%98%E7%BA%A7%E6%95%99%E7%A8%8B%EF%BC%88%E6%9C%89python%E5%9F%BA%E7%A1%80%E7%9A%84%E5%B0%8F%E4%BC%99%E4%BC%B4%EF%BC%89) 22 | 23 | #### 🤓2023-07-08 v2.2.2 update 24 | 25 | - Fixed the problem that Chinese paths cannot be read during batch recognition 26 | - Fixed the problem of skipping in SRT when missing axes. Currently, when an axis fails to be recognized, a position will be vacated for easy proofreading. 27 | - Keep the intermediate results of VSF recognition 28 | 29 | #### 🐲2023-06-22 v2.2.0 update 30 | 31 | - This version is compatible with `v2.1.x`, which means that the previous usage is still possible. 32 | - Integrate VSF's CLI into the library, just specify the full path of `VideoSubFinderWXW.exe`. 33 | - Added batch recognition function, specify the video directory, and automatically extract all video subtitles in the directory 34 | - For usage examples, see: [demo.py](https://github.com/SWHL/RapidVideOCR/blob/main/demo.py) 35 | 36 | #### 😀2023-05-12 v2.1.7 update 37 | 38 | - Optimized code 39 | - Added `save_name` parameter, which can flexibly specify the name of the saved `srt | txt` file, the default is `result` 40 | 41 | #### 🐱2023-03-27 v2.1.6 update 42 | 43 | - Fixed the timeline misalignment problem, see [issue 23](https://github.com/SWHL/RapidVideOCR/issues/23) for details 44 | 45 | #### 👽2023-03-23 v2.1.5 update 46 | 47 | - Added control parameter `is_print_console` for printing to the screen 48 | - Adjust the `out_format` parameter position to the initialization class 49 | 50 | #### 😀2023-03-14 v2.1.3 update 51 | 52 | - Fix the error when passing in the `TXTImages` directory and identifying duplicate characters 53 | 54 | #### 😜2023-03-12 v2.1.2 update 55 | 56 | - Fix index error, [#22](https://github.com/SWHL/RapidVideOCR/issues/22) 57 | 58 | #### 🎢2023-03-11 v2.1.1 update 59 | 60 | - Fix the difference between single image recognition and previous versions 61 | 62 | - The default recognition mode is changed to single image recognition. Whether to use duplicate image recognition is up to you 63 | 64 | #### 🥇2023-03-10 v2.1.0 update 65 | 66 | - Added duplicate character recognition function, faster, and the default is duplicate character recognition function 67 | 68 | #### 🎈2023-03-02 v2.0.5~7 update 69 | 70 | - Fix format errors in generated srt files, [#19](https://github.com/SWHL/RapidVideOCR/issues/19) 71 | 72 | #### 🎫2023-02-17 v2.0.4 update 73 | 74 | - Optimized passing in the `TXTImages` directory. Compared with passing in `RGBImages`, it will be faster and more accurate. It is recommended to pass in the `TXTImages` directory 75 | 76 | #### 💎2023-02-17 v2.0.2 update 77 | 78 | - Fix the problem of missing spaces in peer subtitle recognition 79 | 80 | #### 🎈2023-01-29 v1.1.10 update 81 | 82 | - Fix the error of empty index when converting frame index to timestamp 83 | 84 | #### 🧨2023-01-28 v1.1.9 update 85 | 86 | - Fix the problem of timeline misalignment 87 | 88 | #### 👊 2023-01-15 v1.1.4 update 89 | 90 | - Add the option of outputting txt format. Currently, the v1.1.4 version outputs srt and txt formats by default 91 | 92 | - Add the option to adjust the box size of subtitle selection according to the screen size of the running program 93 | 94 | #### 🌈2023-01-10 v1.0.3 update 95 | 96 | - Replace decord with OpenCV because decord has a memory leak when processing MP4. For details, see: [#208](https://github.com/dmlc/decord/issues/208) 97 | 98 | #### 🎄2022-12-04 update 99 | 100 | - Added interactive subtitle positioning function, which is enabled by default and is more user-friendly. For details, please refer to the GIF below. Thanks to @[Johndirr](https://github.com/Johndirr) for the suggestion. 101 | - Optimize the code structure, put RapidOCR related models and configuration files in the `rapidocr` directory 102 | - The configuration files of `rapidvideocr` are also placed in the corresponding directory 103 | 104 | #### 🌼2022-05-08 update 105 | 106 | - Add interactive determination of the threshold of the binary subtitle image, which only supports Windows and can be used by `is_select_threshold = True` 107 | - Optimize the code 108 | 109 | #### 🎉2022-05-03 update 110 | 111 | - Add GPU support, for specific configuration tutorials, see: [onnxruntime-gpu version inference configuration](https://github.com/RapidAI/RapidOCR/blob/main/python/onnxruntime_infer/README.md#onnxruntime-gpu%E7%89%88%E6%8E%A8%E7%90%86%E9%85%8D%E7%BD%AE) 112 | - Added support for Japanese, which can support more languages. For details, see: [Supported Language List](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99) 113 | 114 | #### 💡2022-05-01 update 115 | 116 | - Added speech module part is located in the branch `asr_module` 117 | 118 | - Added speech recognition module. Since the decoding part of this module can only run on Linux and Mac, if you want to use this module, please use Linux and Mac. 119 | - Currently, the speech recognition code comes from the [RapidASR/python](https://github.com/RapidAI/RapidASR/tree/main/python/base_paddlespeech) part. The model comes from [PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell/asr0) 120 | - After a simple test, the speech recognition module is not very accurate. -_-! 121 | 122 | #### 2022-03-09 update 123 | 124 | - Added [FAQ module](./faq.md) to help everyone skip common small problems 125 | 126 | #### 2021-12-14 update 127 | 128 | - [x] Background removal is not effective, resulting in the loss of some frames 129 | 130 | - Tried to use the image segmentation method. After testing, the inference speed under CPU is too slow, so it is temporarily abandoned 131 | 132 | - Currently using a fixed binary threshold 133 | 134 | - [x] (2021-12-14) Improve the corresponding English document 135 | 136 | - [x] (2021-12-14) Add running time benchmark 137 | 138 | - [x] Add specific parameter description 139 | 140 | - [x] Make a project logo 141 | 142 | - [ ] More tests 143 | -------------------------------------------------------------------------------- /docs/tutorial/intermediate.en.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | hide: 4 | - toc 5 | --- 6 | 7 | 8 | ## Introduction 9 | 10 | This article is aimed towards helping beginner Python programmers to quickly start using the RapidVideOCR subtitle extraction tool 11 | 12 | Supported operating systems: `Windows | Mac | Linux` 13 | 14 | ## Environment Configuration 15 | 16 | ### 1. Install VideoSubFinder (used for extracting subtitle keyframes) 17 | 18 | [[RapidVideOCR extras] VideoSubFinder subtitle keyframe extraction tutorial](https://blog.csdn.net/shiwanghualuo/article/details/129174857?spm=1001.2014.3001.5501) 19 | 20 | ### 2. Install python(used for running RapidVideOCR) 21 | 22 | !!! tip 23 | 24 | Disclaimer: Some of the following images are from [终极保姆教's tutorial on installing Python_3.10.7](https://www.cnblogs.com/zyc-666/p/16689739.html) 25 | 26 | #### 1. Download the Python installer 27 | 28 | Open the official Python website → , and select the installer for your own operating system (VideoSubFinder is currently only available for Windows) 29 | 30 |
31 | 32 |
33 | 34 | #### 2. Find the version you want 35 | 36 | Take Python 3.10.7 for example, if the download speed is too slow you can join the RapidVideOCR QQ group: (706807542) to download the files. 37 | 38 |
39 | 40 |
41 | 42 | #### 3. After the download is complete, double-click to open the exe and start the installation 43 | 44 | Click Customize installation and select the installation location. Also, remember to check the last item `Add Python 3.10 to PATH` 45 | 46 |
47 | 48 |
49 | 50 | #### 4. Click Next 51 | 52 |
53 | 54 |
55 | 56 | #### 5. Check the installation path 57 | 58 |
59 | 60 |
61 | 62 | #### 6. Click install, and wait for the installation to finish 63 | 64 |
65 | 66 |
67 | 68 | #### 7. Press `Win + r` input `cmd`, and press Enter to open the command prompt 69 | 70 |
71 | 72 |
73 | 74 | #### 8. Enter python and see if an output something similar to the following image appears. If so, then the installation was successful 75 | 76 |
77 | 78 |
79 | 80 | #### 9. Add the `Scripts` directory to the environment variables 81 | 82 | 1. Press `Win + q` enter `Edit the system`→ Click **Edit the system environment variables** 83 |
84 | 85 |
86 | 87 | 2. Click **Environment Variables** → **User variables** → **Path** → **Edit** 88 |
89 | 90 |
91 | 92 | 3. Create a new entry for the Script directory under the Python installation directory, as shown in the image below, and remember to click save. 93 | 94 |
95 | 96 |
97 | 98 | ### 3. Install RapidVideOCR 99 | 100 | #### 1. Press `Win + r` input `cmd`, and press Enter to open the command prompt 101 | 102 |
103 | 104 |
105 | 106 | #### 2. Install `rapid_videocr` 107 | 108 | ```bash linenums="1" 109 | pip install rapid_videocr -i https://pypi.tuna.tsinghua.edu.cn/simple/ 110 | ``` 111 | 112 |
113 | 114 |
115 | 116 | #### 3. To test whether the installation was successful, enter `rapid_videocr -h` 117 | 118 |
119 | 120 |
121 | 122 | #### 4. Command line usage 123 | 124 | Press `Win + r` input `cmd`, and press Enter to open the command prompt 125 | 126 | ```bash linenums="1" 127 | rapid_videocr -i RGBImages -s result -m concat 128 | ``` 129 | 130 | `RGBImages` is generated by VideoSubFinder and its output path can be customized, for example: `G:\ProgramFiles\_self\RapidVideOCR\test_files\RGBImages` and so on. 131 | 132 |
133 | 134 |
135 | 136 | #### 5. Script usage 137 | 138 | 1. Create a new TXT file on the desktop and name it `rapid_videocr.py`. Note that the file extension is changed to `*.py`. 139 | 2. Open `rapid_videocr.py` with Notepad and copy the following code into it 140 | 141 | ```python linenums="1" 142 | from rapid_videocr import RapidVideOCR, RapidVideOCRInput 143 | 144 | # RapidVideOCRInput has two initialization parameters 145 | # is_concat_rec: Use a single image for recognition or not. The default is False, which means that a single image is used for recognition by default. 146 | # concat_batch: The number of images to be used in overlay is 10 by default and can be adjusted 147 | # out_format: Output format selection, [srt, ass, txt, all], the default is all 148 | # is_print_console: Whether to print the result, [0, 1], the default is 0 for not printing 149 | input_args = RapidVideOCRInput( 150 | is_batch_rec=False, ocr_params={"Global.with_paddle": True} 151 | ) 152 | extractor = RapidVideOCR(input_args) 153 | 154 | rgb_dir = "tests/test_files/RGBImages" 155 | save_dir = "outputs" 156 | save_name = "a" 157 | 158 | # outputs/a.srt outputs/a.ass outputs/a.t 159 | extractor(rgb_dir, save_dir, save_name=save_name) 160 | ``` 161 | 162 | 3. Change `rgb_dir` to the path to the `RGBImages` directory generated by VideoSubFinder。 163 | 164 | === "Windows path style" 165 | 166 | ```python linenums="1" 167 | rgb_dir = r'G:\ProgramFiles\_self\RapidVideOCR\test_files\RGBImages' 168 | ``` 169 | 170 | === "Linux/Mac path style" 171 | 172 | ```python linenums="1" 173 | rgb_dir = 'test_files/TXTImages' 174 | ``` 175 | 176 | 4. Press `Win + r` and open the command prompt, and run the following commands 177 | 178 | ```bash linenums="1" 179 | cd Desktop 180 | python rapid_videocr.py 181 | ``` 182 | 183 |
184 | 185 |
186 | -------------------------------------------------------------------------------- /docs/tutorial/senior.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | hide: 4 | - toc 5 | --- 6 | 7 | ### 1. 安装使用VideoSubFinder软件 8 | 9 | 下载地址:Windows & Linux ([videosubfinder官网](https://sourceforge.net/projects/videosubfinder/) / QQ群(706807542)共享文件) | [Mac版](https://github.com/eritpchy/videosubfinder-cli) 10 | 11 | 使用教程:[VideoSubFinder提取字幕关键帧教程](https://juejin.cn/post/7203362527082053691) 12 | 13 | 最终生成的`RGBImages`和`TXTImages`目录一般会在软件安装目录下 14 | 15 | ✧ 推荐用`RGBImages`目录中图像(感谢小伙伴[dyphire](https://github.com/dyphire)在[#21](https://github.com/SWHL/RapidVideOCR/issues/21)的反馈) 16 | 17 | ### 2. 安装rapid_videocr 18 | 19 | ```bash linenums="1" 20 | pip install rapid_videocr 21 | ``` 22 | 23 | ### 3. Python使用 24 | 25 | === "Only OCR" 26 | 27 | ```python linenums="1" 28 | from rapid_videocr import RapidVideOCR 29 | 30 | # RapidVideOCRInput有两个初始化参数 31 | # is_concat_rec: 是否用单张图识别,默认是False,也就是默认用单图识别 32 | # concat_batch: 叠图识别的图像张数,默认10,可自行调节 33 | # out_format: 输出格式选择,[srt, ass, txt, all], 默认是 all 34 | # is_print_console: 是否打印结果,[0, 1], 默认是0,不打印 35 | input_args = RapidVideOCRInput( 36 | is_batch_rec=False, ocr_params={"Global.with_paddle": True} 37 | ) 38 | extractor = RapidVideOCR(input_args) 39 | 40 | rgb_dir = "tests/test_files/RGBImages" 41 | save_dir = "outputs" 42 | save_name = "a" 43 | 44 | # outputs/a.srt outputs/a.ass outputs/a.t 45 | extractor(rgb_dir, save_dir, save_name=save_name) 46 | ``` 47 | 48 | === "Extract + OCR" 49 | 50 | ```python linenums="1" 51 | from rapid_videocr import RapidVideoSubFinderOCR 52 | 53 | vsf_exe = r"G:\ProgramFiles\VideoSubFinder_6.10_x64\Release_x64\VideoSubFinderWXW.exe" 54 | extractor = RapidVideoSubFinderOCR(vsf_exe_path=vsf_exe, is_concat_rec=True) 55 | 56 | # video_path can be directory path or video full path. 57 | video_path = 'test_files/tiny/2.mp4' 58 | save_dir = 'outputs' 59 | extractor(video_path, save_dir) 60 | ``` 61 | 62 | ### 4. 命令行使用 63 | 64 | === "Only OCR" 65 | 66 | ```bash linenums="1" 67 | rapid_videocr -i RGBImages 68 | ``` 69 | 70 | === "Extract + OCR" 71 | 72 | ```bash linenums="1" 73 | rapid_videocr -vsf G:\ProgramFiles\VideoSubFinder_6.10_x64\Release_x64\VideoSubFinderWXW.exe -video_dir G:\ProgramFiles\RapidVideOCR\test_files\tiny 74 | ``` 75 | 76 |
77 | 78 | 详细参数 79 | 80 | ```bash linenums="1" 81 | $ rapid_videocr -h 82 | usage: rapid_videocr [-h] [-video_dir VIDEO_DIR] [-i IMG_DIR] [-s SAVE_DIR] 83 | [-o {srt,ass,txt,all}] [--is_concat_rec] [-b CONCAT_BATCH] [-p] 84 | [-vsf VSF_EXE_PATH] [-c] [-r] [-ccti] [-ces CREATE_EMPTY_SUB] 85 | [-cscti CREATE_SUB_FROM_CLEARED_TXT_IMAGES] 86 | [-cstxt CREATE_SUB_FROM_TXT_RESULTS] [-ovocv] [-ovffmpeg] [-uc] 87 | [--start_time START_TIME] [--end_time END_TIME] 88 | [-te TOP_VIDEO_IMAGE_PERCENT_END] 89 | [-be BOTTOM_VIDEO_IMAGE_PERCENT_END] 90 | [-le LEFT_VIDEO_IMAGE_PERCENT_END] 91 | [-re RIGHT_VIDEO_IMAGE_PERCENT_END] [-gs GENERAL_SETTINGS] 92 | [-nthr NUM_THREADS] [-nocrthr NUM_OCR_THREADS] 93 | 94 | optional arguments: 95 | -h, --help show this help message and exit 96 | 97 | VideOCRParameters: 98 | -video_dir VIDEO_DIR, --video_dir VIDEO_DIR 99 | The full path of video or the path of video directory. 100 | -i IMG_DIR, --img_dir IMG_DIR 101 | The full path of RGBImages or TXTImages. 102 | -s SAVE_DIR, --save_dir SAVE_DIR 103 | The path of saving the recognition result. Default is 104 | "outputs" under the current directory. 105 | -o {srt,ass,txt,all}, --out_format {srt,ass,txt,all} 106 | Output file format. Default is "all". 107 | --is_concat_rec Which mode to run (concat recognition or single 108 | recognition). Default is False. 109 | -b CONCAT_BATCH, --concat_batch CONCAT_BATCH 110 | The batch of concating image nums in concat 111 | recognition mode. Default is 10. 112 | -p, --print_console Whether to print the subtitle results to console. -p 113 | means to print. 114 | 115 | VSFParameters: 116 | -vsf VSF_EXE_PATH, --vsf_exe_path VSF_EXE_PATH 117 | The full path of VideoSubFinderWXW.exe. 118 | -c, --clear_dirs Clear Folders (remove all images), performed before 119 | any other steps. Default is True 120 | -r, --run_search Run Search (find frames with hardcoded text (hardsub) 121 | on video) Default is True 122 | -ccti, --create_cleared_text_images 123 | Create Cleared Text Images. Default is True 124 | -ces CREATE_EMPTY_SUB, --create_empty_sub CREATE_EMPTY_SUB 125 | Create Empty Sub With Provided Output File Name (*.ass 126 | or *.srt) 127 | -cscti CREATE_SUB_FROM_CLEARED_TXT_IMAGES, --create_sub_from_cleared_txt_images CREATE_SUB_FROM_CLEARED_TXT_IMAGES 128 | Create Sub From Cleared TXT Images With Provided 129 | Output File Name (*.ass or *.srt) 130 | -cstxt CREATE_SUB_FROM_TXT_RESULTS, --create_sub_from_txt_results CREATE_SUB_FROM_TXT_RESULTS 131 | Create Sub From TXT Results With Provided Output File 132 | Name (*.ass or *.srt) 133 | -ovocv, --open_video_opencv 134 | open video by OpenCV (default). Default is True 135 | -ovffmpeg, --open_video_ffmpeg 136 | open video by FFMPEG 137 | -uc, --use_cuda use cuda 138 | --start_time START_TIME 139 | start time, default = 0:00:00:000 (in format 140 | hour:min:sec:milisec) 141 | --end_time END_TIME end time, default = video length 142 | -te TOP_VIDEO_IMAGE_PERCENT_END, --top_video_image_percent_end TOP_VIDEO_IMAGE_PERCENT_END 143 | top video image percent offset from image bottom, can 144 | be in range [0.0,1.0], default = 1.0 145 | -be BOTTOM_VIDEO_IMAGE_PERCENT_END, --bottom_video_image_percent_end BOTTOM_VIDEO_IMAGE_PERCENT_END 146 | bottom video image percent offset from image bottom, 147 | can be in range [0.0,1.0], default = 0.0 148 | -le LEFT_VIDEO_IMAGE_PERCENT_END, --left_video_image_percent_end LEFT_VIDEO_IMAGE_PERCENT_END 149 | left video image percent end, can be in range 150 | [0.0,1.0], default = 0.0 151 | -re RIGHT_VIDEO_IMAGE_PERCENT_END, --right_video_image_percent_end RIGHT_VIDEO_IMAGE_PERCENT_END 152 | right video image percent end, can be in range 153 | [0.0,1.0], default = 1.0 154 | -gs GENERAL_SETTINGS, --general_settings GENERAL_SETTINGS 155 | general settings (path to general settings *.cfg file, 156 | default = settings/general.cfg) 157 | -nthr NUM_THREADS, --num_threads NUM_THREADS 158 | number of threads used for Run Search 159 | -nocrthr NUM_OCR_THREADS, --num_ocr_threads NUM_OCR_THREADS 160 | number of threads used for Create Cleared TXT Images 161 | ``` 162 | 163 |
164 | 165 | ### 5. 查看结果 166 | 167 | !!! info 168 | 169 | "如果想要让视频播放软件自动挂载srt文件或ass文件,需要更改srt或ass文件名字为视频文件名字,且放到同一目录下,亦或者手动指定加载。 170 | 171 | 前往`save_dir`目录下即可查看结果。 172 | -------------------------------------------------------------------------------- /docs/tutorial/senior.en.md: -------------------------------------------------------------------------------- 1 | --- 2 | comments: true 3 | hide: 4 | - toc 5 | --- 6 | 7 | ### 1. Install and use VideoSubFinder 8 | 9 | - Download link: Windows & Linux ([videosubfinder](https://sourceforge.net/projects/videosubfinder/) / QQ group (706807542) shared files) | [Mac version](https://github.com/eritpchy/videosubfinder-cli) 10 | - Tutorial: [Tutorial for extracting subtitle keyframes with VideoSubFinder](https://juejin.cn/post/7203362527082053691) 11 | - The final `RGBImages` and `TXTImages` are usually in the installation directory for VideoSubFinder 12 | - ✧ It is recommended to use `RGBImages` (thanks to [dyphire](https://github.com/dyphire) for the feedback in [#21](https://github.com/SWHL/RapidVideOCR/issues/21)) 13 | 14 | ### 2. Install rapid_videocr 15 | 16 | ```bash linenums="1" 17 | pip install rapid_videocr 18 | ``` 19 | 20 | ### 3. Python usage 21 | 22 | {{< tabs tabTotal="2">}} 23 | {{% tab tabName="Only OCR" %}} 24 | 25 | ```python linenums="1" 26 | from rapid_videocr import RapidVideOCR 27 | 28 | # RapidVideOCRInput has two initialization parameters 29 | # is_concat_rec: Use a single image for recognition or not. The default is False, which means that a single image is used for recognition by default. 30 | # concat_batch: The number of images to be used in overlay is 10 by default and can be adjusted 31 | # out_format: Output format selection, [srt, ass, txt, all], the default is all 32 | # is_print_console: Whether to print the result, [0, 1], the default is 0 for not printing 33 | ocr_input_params = RapidVideOCRInput( 34 | is_batch_rec=False, ocr_params={"Global.with_paddle": True} 35 | ) 36 | extractor = RapidVideOCR(ocr_input_params) 37 | 38 | rgb_dir = "tests/test_files/RGBImages" 39 | save_dir = "outputs" 40 | save_name = "a" 41 | 42 | # outputs/a.srt outputs/a.ass outputs/a.t 43 | extractor(rgb_dir, save_dir, save_name=save_name) 44 | ``` 45 | 46 | {{% /tab %}} 47 | {{% tab tabName="Extract + OCR" %}} 48 | 49 | ```python linenums="1" 50 | from rapid_videocr import RapidVideoSubFinderOCR 51 | 52 | vsf_exe = r"G:\ProgramFiles\VideoSubFinder_6.10_x64\Release_x64\VideoSubFinderWXW.exe" 53 | extractor = RapidVideoSubFinderOCR(vsf_exe_path=vsf_exe, is_concat_rec=True) 54 | 55 | # video_path can be directory path or video full path. 56 | video_path = 'test_files/tiny/2.mp4' 57 | save_dir = 'outputs' 58 | extractor(video_path, save_dir) 59 | ``` 60 | 61 | {{% /tab %}} 62 | {{< /tabs >}} 63 | 64 | ### 4. Command line usage 65 | 66 | {{< tabs tabTotal="2">}} 67 | {{% tab tabName="Only OCR" %}} 68 | 69 | ```bash linenums="1" 70 | rapid_videocr -i RGBImages 71 | ``` 72 | 73 | {{% /tab %}} 74 | {{% tab tabName="Extract + OCR" %}} 75 | 76 | ```bash linenums="1" 77 | rapid_videocr -vsf G:\ProgramFiles\VideoSubFinder_6.10_x64\Release_x64\VideoSubFinderWXW.exe -video_dir G:\ProgramFiles\RapidVideOCR\test_files\tiny 78 | ``` 79 | 80 | {{% /tab %}} 81 | {{< /tabs >}} 82 | 83 | Parameter details: 84 |
85 | 86 | ```bash linenums="1" 87 | $ rapid_videocr -h 88 | usage: rapid_videocr [-h] [-video_dir VIDEO_DIR] [-i IMG_DIR] [-s SAVE_DIR] 89 | [-o {srt,ass,txt,all}] [--is_concat_rec] [-b CONCAT_BATCH] [-p] 90 | [-vsf VSF_EXE_PATH] [-c] [-r] [-ccti] [-ces CREATE_EMPTY_SUB] 91 | [-cscti CREATE_SUB_FROM_CLEARED_TXT_IMAGES] 92 | [-cstxt CREATE_SUB_FROM_TXT_RESULTS] [-ovocv] [-ovffmpeg] [-uc] 93 | [--start_time START_TIME] [--end_time END_TIME] 94 | [-te TOP_VIDEO_IMAGE_PERCENT_END] 95 | [-be BOTTOM_VIDEO_IMAGE_PERCENT_END] 96 | [-le LEFT_VIDEO_IMAGE_PERCENT_END] 97 | [-re RIGHT_VIDEO_IMAGE_PERCENT_END] [-gs GENERAL_SETTINGS] 98 | [-nthr NUM_THREADS] [-nocrthr NUM_OCR_THREADS] 99 | 100 | optional arguments: 101 | -h, --help show this help message and exit 102 | 103 | VideOCRParameters: 104 | -video_dir VIDEO_DIR, --video_dir VIDEO_DIR 105 | The full path of video or the path of video directory. 106 | -i IMG_DIR, --img_dir IMG_DIR 107 | The full path of RGBImages or TXTImages. 108 | -s SAVE_DIR, --save_dir SAVE_DIR 109 | The path of saving the recognition result. Default is 110 | "outputs" under the current directory. 111 | -o {srt,ass,txt,all}, --out_format {srt,ass,txt,all} 112 | Output file format. Default is "all". 113 | --is_concat_rec Which mode to run (concat recognition or single 114 | recognition). Default is False. 115 | -b CONCAT_BATCH, --concat_batch CONCAT_BATCH 116 | The batch of concating image nums in concat 117 | recognition mode. Default is 10. 118 | -p, --print_console Whether to print the subtitle results to console. -p 119 | means to print. 120 | 121 | VSFParameters: 122 | -vsf VSF_EXE_PATH, --vsf_exe_path VSF_EXE_PATH 123 | The full path of VideoSubFinderWXW.exe. 124 | -c, --clear_dirs Clear Folders (remove all images), performed before 125 | any other steps. Default is True 126 | -r, --run_search Run Search (find frames with hardcoded text (hardsub) 127 | on video) Default is True 128 | -ccti, --create_cleared_text_images 129 | Create Cleared Text Images. Default is True 130 | -ces CREATE_EMPTY_SUB, --create_empty_sub CREATE_EMPTY_SUB 131 | Create Empty Sub With Provided Output File Name (*.ass 132 | or *.srt) 133 | -cscti CREATE_SUB_FROM_CLEARED_TXT_IMAGES, --create_sub_from_cleared_txt_images CREATE_SUB_FROM_CLEARED_TXT_IMAGES 134 | Create Sub From Cleared TXT Images With Provided 135 | Output File Name (*.ass or *.srt) 136 | -cstxt CREATE_SUB_FROM_TXT_RESULTS, --create_sub_from_txt_results CREATE_SUB_FROM_TXT_RESULTS 137 | Create Sub From TXT Results With Provided Output File 138 | Name (*.ass or *.srt) 139 | -ovocv, --open_video_opencv 140 | open video by OpenCV (default). Default is True 141 | -ovffmpeg, --open_video_ffmpeg 142 | open video by FFMPEG 143 | -uc, --use_cuda use cuda 144 | --start_time START_TIME 145 | start time, default = 0:00:00:000 (in format 146 | hour:min:sec:milisec) 147 | --end_time END_TIME end time, default = video length 148 | -te TOP_VIDEO_IMAGE_PERCENT_END, --top_video_image_percent_end TOP_VIDEO_IMAGE_PERCENT_END 149 | top video image percent offset from image bottom, can 150 | be in range [0.0,1.0], default = 1.0 151 | -be BOTTOM_VIDEO_IMAGE_PERCENT_END, --bottom_video_image_percent_end BOTTOM_VIDEO_IMAGE_PERCENT_END 152 | bottom video image percent offset from image bottom, 153 | can be in range [0.0,1.0], default = 0.0 154 | -le LEFT_VIDEO_IMAGE_PERCENT_END, --left_video_image_percent_end LEFT_VIDEO_IMAGE_PERCENT_END 155 | left video image percent end, can be in range 156 | [0.0,1.0], default = 0.0 157 | -re RIGHT_VIDEO_IMAGE_PERCENT_END, --right_video_image_percent_end RIGHT_VIDEO_IMAGE_PERCENT_END 158 | right video image percent end, can be in range 159 | [0.0,1.0], default = 1.0 160 | -gs GENERAL_SETTINGS, --general_settings GENERAL_SETTINGS 161 | general settings (path to general settings *.cfg file, 162 | default = settings/general.cfg) 163 | -nthr NUM_THREADS, --num_threads NUM_THREADS 164 | number of threads used for Run Search 165 | -nocrthr NUM_OCR_THREADS, --num_ocr_threads NUM_OCR_THREADS 166 | number of threads used for Create Cleared TXT Images 167 | ``` 168 | 169 |
170 | 171 | ### 5. View results 172 | 173 | Go to the `save_dir` directory to view the results. 174 | 175 | {{< alert context="info" text="If you want the video playback software to automatically mount the srt file or ass file, you need to change the srt or ass filename to be the same as the video file and put it in the same directory, or manually specify it." />}} 176 | -------------------------------------------------------------------------------- /rapid_videocr/vsf_ocr_cli.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import argparse 5 | from pathlib import Path 6 | 7 | from .main import OutputFormat, RapidVideOCR, RapidVideOCRInput 8 | from .utils.logger import logger 9 | from .utils.typings import VideoFormat 10 | from .utils.utils import float_range 11 | from .vsf_cli import VideoSubFinder, VideoSubFinderInput 12 | 13 | 14 | class RapidVideoSubFinderOCR: 15 | def __init__( 16 | self, 17 | vsf_input_params: VideoSubFinderInput, 18 | ocr_input_params: RapidVideOCRInput, 19 | ): 20 | self.vsf = VideoSubFinder(vsf_input_params) 21 | self.video_ocr = RapidVideOCR(ocr_input_params) 22 | self.video_formats = [VideoFormat[v].value for v in VideoFormat.__members__] 23 | 24 | def __call__(self, video_path: str, output_dir: str = "outputs"): 25 | if Path(video_path).is_dir(): 26 | video_list = Path(video_path).rglob("*.*") 27 | video_list = [ 28 | v for v in video_list if v.suffix.lower() in self.video_formats 29 | ] 30 | else: 31 | video_list = [video_path] 32 | 33 | logger.info( 34 | "Extracting subtitle images with VideoSubFinder (takes quite a long time) ..." 35 | ) 36 | video_num = len(video_list) 37 | for i, one_video in enumerate(video_list): 38 | logger.info( 39 | f"[{i + 1}{video_num}] Starting to extract {one_video} key frame" 40 | ) 41 | 42 | save_name = Path(one_video).stem 43 | save_dir = Path(output_dir) / save_name 44 | save_vsf_dir = save_dir / "VSF_Results" 45 | 46 | try: 47 | self.vsf(str(one_video), str(save_vsf_dir)) 48 | except Exception as e: 49 | logger.error(f"Extract {one_video} error, {e}, skip") 50 | continue 51 | 52 | logger.info(f"{i + 1}/{video_num}] Starting to run {one_video} ocr") 53 | 54 | rgb_dir = Path(save_vsf_dir) / "RGBImages" 55 | if not list(rgb_dir.iterdir()): 56 | logger.warning(f"Extracting frames from {one_video} is 0, skip") 57 | continue 58 | self.video_ocr(rgb_dir, save_dir, save_name=save_name) 59 | 60 | 61 | def main(): 62 | parser = argparse.ArgumentParser() 63 | 64 | videocr_param_group = parser.add_argument_group(title="VideOCRParameters") 65 | videocr_param_group.add_argument( 66 | "-video_dir", 67 | "--video_dir", 68 | type=str, 69 | default=None, 70 | help="The full path of video or the path of video directory.", 71 | ) 72 | videocr_param_group.add_argument( 73 | "-i", 74 | "--img_dir", 75 | type=str, 76 | default=None, 77 | help="The full path of RGBImages or TXTImages.", 78 | ) 79 | videocr_param_group.add_argument( 80 | "-s", 81 | "--save_dir", 82 | type=str, 83 | default="outputs", 84 | help='The path of saving the recognition result. Default is "outputs" under the current directory.', 85 | ) 86 | videocr_param_group.add_argument( 87 | "-o", 88 | "--out_format", 89 | type=str, 90 | default=OutputFormat.ALL.value, 91 | choices=[OutputFormat[v].value for v in OutputFormat.__members__], 92 | help='Output file format. Default is "all".', 93 | ) 94 | videocr_param_group.add_argument( 95 | "--is_batch_rec", 96 | action="store_true", 97 | default=False, 98 | help="Which mode to run (concat recognition or single recognition). Default is False.", 99 | ) 100 | videocr_param_group.add_argument( 101 | "-b", 102 | "--batch_size", 103 | type=int, 104 | default=10, 105 | help="The batch of concating image nums in concat recognition mode. Default is 10.", 106 | ) 107 | 108 | vsf_param_group = parser.add_argument_group(title="VSFParameters") 109 | vsf_param_group.add_argument( 110 | "-vsf", 111 | "--vsf_exe_path", 112 | type=str, 113 | default=None, 114 | help="The full path of VideoSubFinderWXW.exe.", 115 | ) 116 | vsf_param_group.add_argument( 117 | "-c", 118 | "--clear_dirs", 119 | action="store_false", 120 | default=True, 121 | help="Clear Folders (remove all images), performed before any other steps. Default is True", 122 | ) 123 | vsf_param_group.add_argument( 124 | "-r", 125 | "--run_search", 126 | action="store_false", 127 | default=True, 128 | help="Run Search (find frames with hardcoded text (hardsub) on video) Default is True", 129 | ) 130 | vsf_param_group.add_argument( 131 | "-ccti", 132 | "--create_cleared_text_images", 133 | action="store_true", 134 | default=False, 135 | help="Create Cleared Text Images. Default is True", 136 | ) 137 | vsf_param_group.add_argument( 138 | "-ces", 139 | "--create_empty_sub", 140 | type=str, 141 | default=None, 142 | help="Create Empty Sub With Provided Output File Name (*.ass or *.srt)", 143 | ) 144 | vsf_param_group.add_argument( 145 | "-cscti", 146 | "--create_sub_from_cleared_txt_images", 147 | type=str, 148 | default=None, 149 | help="Create Sub From Cleared TXT Images With Provided Output File Name (*.ass or *.srt)", 150 | ) 151 | vsf_param_group.add_argument( 152 | "-cstxt", 153 | "--create_sub_from_txt_results", 154 | type=str, 155 | default=None, 156 | help="Create Sub From TXT Results With Provided Output File Name (*.ass or *.srt)", 157 | ) 158 | vsf_param_group.add_argument( 159 | "-ovocv", 160 | "--open_video_opencv", 161 | action="store_false", 162 | default=True, 163 | help="open video by OpenCV (default). Default is True", 164 | ) 165 | vsf_param_group.add_argument( 166 | "-ovffmpeg", 167 | "--open_video_ffmpeg", 168 | action="store_true", 169 | default=False, 170 | help="open video by FFMPEG", 171 | ) 172 | vsf_param_group.add_argument( 173 | "-uc", "--use_cuda", action="store_true", default=False, help="use cuda" 174 | ) 175 | vsf_param_group.add_argument( 176 | "--start_time", 177 | type=str, 178 | default="0:00:00:000", 179 | help="start time, default = 0:00:00:000 (in format hour:min:sec:milisec)", 180 | ) 181 | vsf_param_group.add_argument( 182 | "--end_time", 183 | type=str, 184 | default=None, 185 | help="end time, default = video length", 186 | ) 187 | vsf_param_group.add_argument( 188 | "-te", 189 | "--top_video_image_percent_end", 190 | type=float_range(0, 1.0), 191 | default=0.2, 192 | help="top video image percent offset from image bottom, can be in range [0.0,1.0], default = 1.0", 193 | ) 194 | vsf_param_group.add_argument( 195 | "-be", 196 | "--bottom_video_image_percent_end", 197 | type=float_range(0, 1.0), 198 | default=0.0, 199 | help="bottom video image percent offset from image bottom, can be in range [0.0,1.0], default = 0.0", 200 | ) 201 | vsf_param_group.add_argument( 202 | "-le", 203 | "--left_video_image_percent_end", 204 | type=float_range(0, 1.0), 205 | default=0.0, 206 | help="left video image percent end, can be in range [0.0,1.0], default = 0.0", 207 | ) 208 | vsf_param_group.add_argument( 209 | "-re", 210 | "--right_video_image_percent_end", 211 | type=float_range(0, 1.0), 212 | default=1.0, 213 | help="right video image percent end, can be in range [0.0,1.0], default = 1.0", 214 | ) 215 | vsf_param_group.add_argument( 216 | "-gs", 217 | "--general_settings", 218 | default=None, 219 | help="general settings (path to general settings *.cfg file, default = settings/general.cfg)", 220 | ) 221 | vsf_param_group.add_argument( 222 | "-nthr", 223 | "--num_threads", 224 | type=int, 225 | default=1, 226 | help="number of threads used for Run Search", 227 | ) 228 | vsf_param_group.add_argument( 229 | "-nocrthr", 230 | "--num_ocr_threads", 231 | type=int, 232 | default=1, 233 | help="number of threads used for Create Cleared TXT Images", 234 | ) 235 | args = parser.parse_args() 236 | 237 | ocr_input_params = RapidVideOCRInput( 238 | is_batch_rec=args.is_batch_rec, 239 | batch_size=args.batch_size, 240 | out_format=args.out_format, 241 | ) 242 | 243 | if args.vsf_exe_path and args.video_dir: 244 | vsf_input_params = VideoSubFinderInput(**vars(args)) 245 | extractor = RapidVideoSubFinderOCR(vsf_input_params, ocr_input_params) 246 | extractor(args.video_dir, args.save_dir) 247 | elif args.img_dir: 248 | extractor = RapidVideOCR(ocr_input_params) 249 | extractor(args.img_dir, args.save_dir) 250 | else: 251 | pass 252 | 253 | 254 | if __name__ == "__main__": 255 | main() 256 | -------------------------------------------------------------------------------- /rapid_videocr/ocr_processor.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from pathlib import Path 5 | from typing import Dict, List, Optional, Tuple 6 | 7 | import cv2 8 | import numpy as np 9 | from rapidocr import RapidOCR 10 | from tqdm import tqdm 11 | 12 | from .utils.logger import logger 13 | from .utils.typings import LOG_LEVEL_MAP 14 | from .utils.utils import ( 15 | compute_centroid, 16 | compute_poly_iou, 17 | is_inclusive_each_other, 18 | padding_img, 19 | read_img, 20 | ) 21 | 22 | 23 | class OCRProcessor: 24 | def __init__(self, ocr_params: Optional[Dict] = None, batch_size: int = 10): 25 | self.ocr_engine = self._init_ocr_engine(ocr_params) 26 | self.batch_size = batch_size 27 | 28 | def _init_ocr_engine(self, ocr_params: Optional[Dict] = None) -> RapidOCR: 29 | log_level_dict = {"Global.log_level": LOG_LEVEL_MAP[logger.level]} 30 | if ocr_params is None: 31 | return RapidOCR(params=log_level_dict) 32 | 33 | ocr_params.update(log_level_dict) 34 | return RapidOCR(params=ocr_params) 35 | 36 | def __call__( 37 | self, img_list: List[Path], is_batch_rec: bool, is_txt_dir: bool 38 | ) -> Tuple[List[str], List[str], List[str]]: 39 | self.is_txt_dir = is_txt_dir 40 | process_func = self.batch_rec if is_batch_rec else self.single_rec 41 | rec_results = process_func(img_list) 42 | srt_results = self._generate_srt_results(rec_results) 43 | ass_results = self._generate_ass_results(rec_results) 44 | txt_results = self._generate_txt_result(rec_results) 45 | return srt_results, ass_results, txt_results 46 | 47 | def single_rec(self, img_list: List[Path]) -> List[Tuple[int, str, str, str]]: 48 | logger.info("[OCR] Running with single recognition.") 49 | 50 | rec_results = [] 51 | for i, img_path in enumerate(tqdm(img_list, desc="OCR")): 52 | time_str = self._get_srt_timestamp(img_path) 53 | ass_time_str = self._get_ass_timestamp(img_path) 54 | img = self._preprocess_image(img_path) 55 | 56 | dt_boxes, rec_res = self.get_ocr_result(img) 57 | txts = ( 58 | self.process_same_line(dt_boxes, rec_res) 59 | if dt_boxes is not None 60 | else "" 61 | ) 62 | rec_results.append([i, time_str, txts, ass_time_str]) 63 | return rec_results 64 | 65 | @staticmethod 66 | def _get_srt_timestamp(file_path: Path) -> str: 67 | """0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg""" 68 | 69 | def format_time(time_parts): 70 | time_parts[0] = f"{time_parts[0]:0>2}" 71 | return ":".join(time_parts[:3]) + f",{time_parts[3]}" 72 | 73 | split_paths = file_path.stem.split("_") 74 | start_time = split_paths[:4] 75 | end_time = split_paths[5:9] 76 | return f"{format_time(start_time)} --> {format_time(end_time)}" 77 | 78 | @staticmethod 79 | def _get_ass_timestamp(file_path: Path) -> str: 80 | s = file_path.stem 81 | 82 | h1 = int(s[0:1]) 83 | m1 = int(s[2:4]) 84 | sec1 = int(s[5:7]) 85 | ms1 = int(s[8:11]) 86 | 87 | h2 = int(s[13:14]) 88 | m2 = int(s[15:17]) 89 | sec2 = int(s[18:20]) 90 | ms2 = int(s[21:24]) 91 | 92 | # compute absolute times in milliseconds 93 | bt = (h1 * 3600 + m1 * 60 + sec1) * 1000 + ms1 94 | et = (h2 * 3600 + m2 * 60 + sec2) * 1000 + ms2 95 | 96 | def to_ass(ts_ms: int) -> str: 97 | # centiseconds (drop the last digit, no rounding) 98 | cs_total = ts_ms // 10 99 | cs = cs_total % 100 100 | total_s = ts_ms // 1000 101 | s = total_s % 60 102 | total_m = total_s // 60 103 | m = total_m % 60 104 | h = total_m // 60 105 | # H:MM:SS.CC 106 | return f"{h}:{m:02d}:{s:02d}.{cs:02d}" 107 | 108 | return f"{to_ass(bt)},{to_ass(et)}" 109 | 110 | @staticmethod 111 | def _preprocess_image(img_path: Path) -> np.ndarray: 112 | img = read_img(img_path) 113 | img = padding_img(img, (img.shape[0], img.shape[0], 0, 0)) 114 | return img 115 | 116 | @staticmethod 117 | def _generate_srt_results( 118 | rec_results: List[Tuple[int, str, str, str]], 119 | ) -> List[str]: 120 | return [f"{i + 1}\n{time_str}\n{txt}\n" for i, time_str, txt, _ in rec_results] 121 | 122 | @staticmethod 123 | def _generate_ass_results( 124 | rec_results: List[Tuple[int, str, str, str]], 125 | ) -> List[str]: 126 | return [ 127 | f"Dialogue: 0,{ass_time_str},Default,,0,0,0,,{txt}" 128 | for _, _, txt, ass_time_str in rec_results 129 | ] 130 | 131 | @staticmethod 132 | def _generate_txt_result(rec_results: List[Tuple[int, str, str, str]]) -> List[str]: 133 | return [f"{txt}\n" for _, _, txt, _ in rec_results] 134 | 135 | def batch_rec(self, img_list: List[Path]) -> List[Tuple[int, str, str, str]]: 136 | logger.info("[OCR] Running with concat recognition.") 137 | 138 | img_nums = len(img_list) 139 | rec_results = [] 140 | for start_i in tqdm(range(0, img_nums, self.batch_size), desc="Concat Rec"): 141 | end_i = min(img_nums, start_i + self.batch_size) 142 | 143 | concat_img, img_coordinates, img_paths = self._prepare_batch( 144 | img_list[start_i:end_i] 145 | ) 146 | dt_boxes, rec_res = self.get_ocr_result(concat_img) 147 | if rec_res is None or dt_boxes is None: 148 | continue 149 | 150 | one_batch_rec_results = self._process_batch_results( 151 | start_i, img_coordinates, dt_boxes, rec_res, img_paths 152 | ) 153 | rec_results.extend(one_batch_rec_results) 154 | return rec_results 155 | 156 | def _prepare_batch( 157 | self, img_list: List[Path] 158 | ) -> Tuple[np.ndarray, np.ndarray, List[Path]]: 159 | padding_value = 10 160 | array_img_list, img_coordinates = [], [] 161 | for i, img_path in enumerate(img_list): 162 | img = read_img(img_path) 163 | if self.is_txt_dir: 164 | img = cv2.resize(img, None, fx=0.25, fy=0.25) 165 | 166 | pad_img = padding_img(img, (0, padding_value, 0, 0)) 167 | array_img_list.append(pad_img) 168 | 169 | h, w = img.shape[:2] 170 | x0, y0 = 0, i * (h + padding_value) 171 | x1, y1 = w, (i + 1) * (h + padding_value) 172 | img_coordinates.append([(x0, y0), (x1, y0), (x1, y1), (x0, y1)]) 173 | 174 | return np.vstack(array_img_list), np.array(img_coordinates), img_list 175 | 176 | def _process_batch_results( 177 | self, 178 | start_i: int, 179 | img_coordinates: np.ndarray, 180 | dt_boxes: np.ndarray, 181 | rec_res: Tuple[str], 182 | img_paths: List[Path], 183 | ) -> List[Tuple[int, str, str, str]]: 184 | match_dict = self._match_boxes_to_images( 185 | img_coordinates, dt_boxes, rec_res, img_paths 186 | ) 187 | 188 | results = [] 189 | for k, v in match_dict.items(): 190 | cur_frame_idx = start_i + k 191 | if v: 192 | img_path, boxes, recs = list(zip(*v)) 193 | time_str = self._get_srt_timestamp(img_path[0]) 194 | ass_time_str = self._get_ass_timestamp(img_path[0]) 195 | txts = self.process_same_line(boxes, recs) 196 | else: 197 | time_str = self._get_srt_timestamp(img_paths[k]) 198 | ass_time_str = self._get_ass_timestamp(img_paths[k]) 199 | txts = "" 200 | 201 | results.append([cur_frame_idx, time_str, txts, ass_time_str]) 202 | return results 203 | 204 | def _match_boxes_to_images( 205 | self, 206 | img_coordinates: np.ndarray, 207 | dt_boxes: np.ndarray, 208 | rec_res: List[str], 209 | img_paths: List[Path], 210 | ) -> Dict[int, List[Tuple[Path, np.ndarray, str]]]: 211 | """将检测框匹配到对应图像""" 212 | match_dict = {k: [] for k in range(len(img_coordinates))} 213 | visited_idx = set() 214 | 215 | for i, frame_boxes in enumerate(img_coordinates): 216 | for idx, (dt_box, txt) in enumerate(zip(dt_boxes, rec_res)): 217 | if idx in visited_idx: 218 | continue 219 | 220 | if self._is_box_matched(frame_boxes, dt_box): 221 | match_dict[i].append((img_paths[i], dt_box, txt)) 222 | visited_idx.add(idx) 223 | 224 | return match_dict 225 | 226 | def _is_box_matched(self, frame_boxes: np.ndarray, dt_box: np.ndarray) -> bool: 227 | """判断检测框是否匹配到图像""" 228 | box_iou = compute_poly_iou(frame_boxes, dt_box) 229 | return is_inclusive_each_other(frame_boxes, dt_box) or box_iou > 0.1 230 | 231 | def get_ocr_result( 232 | self, img: np.ndarray 233 | ) -> Tuple[Optional[np.ndarray], Optional[Tuple[str]]]: 234 | ocr_result = self.ocr_engine(img) 235 | if ocr_result.boxes is None: 236 | return None, None 237 | return ocr_result.boxes, ocr_result.txts 238 | 239 | def process_same_line(self, dt_boxes: np.ndarray, rec_res: List[str]) -> str: 240 | if len(rec_res) == 1: 241 | return rec_res[0] 242 | 243 | y_centroids = [compute_centroid(box)[1] for box in dt_boxes] 244 | line_groups = self._group_by_lines(y_centroids) 245 | return self._merge_line_text(line_groups, rec_res) 246 | 247 | def _group_by_lines(self, y_centroids: List[float]) -> List[List[int]]: 248 | """将文本框按行分组""" 249 | bool_res = self._is_same_line(y_centroids) 250 | groups = [] 251 | current_group = [0] 252 | for i, is_same in enumerate(bool_res, 1): 253 | if is_same: 254 | current_group.append(i) 255 | else: 256 | groups.append(current_group) 257 | current_group = [i] 258 | 259 | groups.append(current_group) 260 | return groups 261 | 262 | @staticmethod 263 | def _is_same_line(points: List) -> List[bool]: 264 | threshold = 5 265 | 266 | align_points = list(zip(points, points[1:])) 267 | bool_res = [False] * len(align_points) 268 | for i, point in enumerate(align_points): 269 | y0, y1 = point 270 | if abs(y0 - y1) <= threshold: 271 | bool_res[i] = True 272 | return bool_res 273 | 274 | def _merge_line_text(self, line_groups: List[List[int]], rec_res: List[str]) -> str: 275 | lines = [] 276 | for group in line_groups: 277 | line_text = " ".join(rec_res[i] for i in group) 278 | lines.append(line_text) 279 | return "\n".join(lines) 280 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /assets/RapidVideOCRDemo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "YBjLERcEsTES" 7 | }, 8 | "source": [ 9 | "## [RapidVideOCR Demo](https://github.com/SWHL/RapidVideOCR)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "IPBSdGqbjNpc" 16 | }, 17 | "source": [ 18 | "#### Require:\n", 19 | "- The RGBImages of [Video](https://www.youtube.com/watch?v=Z2Bg_usMYiA) from the VideoSubFinder software.\n", 20 | "- Install the RapidVideOCR" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "id": "IT1t_86aq4QU" 27 | }, 28 | "source": [ 29 | "#### Download the RGBImages.zip and unzip it." 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 33, 35 | "metadata": { 36 | "colab": { 37 | "base_uri": "https://localhost:8080/" 38 | }, 39 | "id": "qWiWiKJWjcH1", 40 | "outputId": "9b5c8098-061a-4f85-b7a8-822e1f26b166" 41 | }, 42 | "outputs": [ 43 | { 44 | "name": "stdout", 45 | "output_type": "stream", 46 | "text": [ 47 | "--2023-04-09 01:47:40-- https://github.com/SWHL/RapidVideOCR/files/11184614/RGBImages.zip\n", 48 | "Resolving github.com (github.com)... 140.82.112.4\n", 49 | "Connecting to github.com (github.com)|140.82.112.4|:443... connected.\n", 50 | "HTTP request sent, awaiting response... 302 Found\n", 51 | "Location: https://objects.githubusercontent.com/github-production-repository-file-5c1aeb/405589029/11184614?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20230409%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230409T014740Z&X-Amz-Expires=300&X-Amz-Signature=fa9c0cb7ec18b1113504c94f60ed8bd6c8250cd040d056396c0dc6caf5184dea&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=405589029&response-content-disposition=attachment%3Bfilename%3DRGBImages.zip&response-content-type=application%2Fx-zip-compressed [following]\n", 52 | "--2023-04-09 01:47:40-- https://objects.githubusercontent.com/github-production-repository-file-5c1aeb/405589029/11184614?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20230409%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230409T014740Z&X-Amz-Expires=300&X-Amz-Signature=fa9c0cb7ec18b1113504c94f60ed8bd6c8250cd040d056396c0dc6caf5184dea&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=405589029&response-content-disposition=attachment%3Bfilename%3DRGBImages.zip&response-content-type=application%2Fx-zip-compressed\n", 53 | "Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", 54 | "Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.108.133|:443... connected.\n", 55 | "HTTP request sent, awaiting response... 200 OK\n", 56 | "Length: 8314498 (7.9M) [application/x-zip-compressed]\n", 57 | "Saving to: ‘RGBImages.zip’\n", 58 | "\n", 59 | "RGBImages.zip 100%[===================>] 7.93M 43.3MB/s in 0.2s \n", 60 | "\n", 61 | "2023-04-09 01:47:40 (43.3 MB/s) - ‘RGBImages.zip’ saved [8314498/8314498]\n", 62 | "\n", 63 | "Archive: RGBImages.zip\n", 64 | " creating: RGBImages/\n", 65 | " inflating: RGBImages/0_00_20_640__0_00_23_999_0055800000012800072001280.jpeg \n", 66 | " inflating: RGBImages/0_00_25_120__0_00_25_999_0055800000012800072001280.jpeg \n", 67 | " inflating: RGBImages/0_00_26_000__0_00_26_599_0055800000012800072001280.jpeg \n", 68 | " inflating: RGBImages/0_00_27_760__0_00_28_999_0055800000012800072001280.jpeg \n", 69 | " inflating: RGBImages/0_00_30_280__0_00_30_599_0055800000012800072001280.jpeg \n", 70 | " inflating: RGBImages/0_00_30_600__0_00_32_199_0055800000012800072001280.jpeg \n", 71 | " inflating: RGBImages/0_00_32_800__0_00_33_199_0055800000012800072001280.jpeg \n", 72 | " inflating: RGBImages/0_00_33_200__0_00_34_959_0055800000012800072001280.jpeg \n", 73 | " inflating: RGBImages/0_00_34_960__0_00_35_519_0055800000012800072001280.jpeg \n", 74 | " inflating: RGBImages/0_00_39_040__0_00_39_479_0055800000012800072001280.jpeg \n", 75 | " inflating: RGBImages/0_00_40_040__0_00_41_679_0055800000012800072001280.jpeg \n", 76 | " inflating: RGBImages/0_00_41_680__0_00_42_919_0055800000012800072001280.jpeg \n", 77 | " inflating: RGBImages/0_00_42_920__0_00_43_439_0055800000012800072001280.jpeg \n", 78 | " inflating: RGBImages/0_00_43_440__0_00_43_799_0055800000012800072001280.jpeg \n", 79 | " inflating: RGBImages/0_00_44_920__0_00_45_359_0055800000012800072001280.jpeg \n", 80 | " inflating: RGBImages/0_00_45_360__0_00_47_799_0055800000012800072001280.jpeg \n", 81 | " inflating: RGBImages/0_00_47_800__0_00_48_159_0055800000012800072001280.jpeg \n", 82 | " inflating: RGBImages/0_00_48_160__0_00_48_559_0055800000012800072001280.jpeg \n", 83 | " inflating: RGBImages/0_00_50_520__0_00_54_079_0055800000012800072001280.jpeg \n", 84 | " inflating: RGBImages/0_00_54_080__0_00_55_799_0055800000012800072001280.jpeg \n", 85 | " inflating: RGBImages/0_00_56_000__0_00_56_359_0055800000012800072001280.jpeg \n", 86 | " inflating: RGBImages/0_00_57_680__0_00_57_999_0055800000012800072001280.jpeg \n", 87 | " inflating: RGBImages/0_01_00_120__0_01_00_759_0055800000012800072001280.jpeg \n", 88 | " inflating: RGBImages/0_01_02_160__0_01_02_919_0055800000012800072001280.jpeg \n", 89 | " inflating: RGBImages/0_01_04_320__0_01_06_759_0055800000012800072001280.jpeg \n", 90 | " inflating: RGBImages/0_01_08_120__0_01_08_679_0055800000012800072001280.jpeg \n", 91 | " inflating: RGBImages/0_01_08_680__0_01_13_119_0055800000012800072001280.jpeg \n", 92 | " inflating: RGBImages/0_01_13_120__0_01_13_799_0055800000012800072001280.jpeg \n", 93 | " inflating: RGBImages/0_01_13_800__0_01_16_079_0055800000012800072001280.jpeg \n", 94 | " inflating: RGBImages/0_01_16_080__0_01_17_039_0055800000012800072001280.jpeg \n", 95 | " inflating: RGBImages/0_01_19_320__0_01_20_359_0055800000012800072001280.jpeg \n", 96 | " inflating: RGBImages/0_01_20_360__0_01_21_919_0055800000012800072001280.jpeg \n", 97 | " inflating: RGBImages/0_01_23_120__0_01_23_559_0055800000012800072001280.jpeg \n", 98 | " inflating: RGBImages/0_01_23_560__0_01_24_959_0055800000012800072001280.jpeg \n", 99 | " inflating: RGBImages/0_01_24_960__0_01_25_559_0055800000012800072001280.jpeg \n", 100 | " inflating: RGBImages/0_01_25_560__0_01_26_159_0055800000012800072001280.jpeg \n", 101 | " inflating: RGBImages/0_01_27_560__0_01_27_919_0055800000012800072001280.jpeg \n", 102 | " inflating: RGBImages/0_01_27_920__0_01_30_439_0055800000012800072001280.jpeg \n", 103 | " inflating: RGBImages/0_01_30_440__0_01_31_119_0055800000012800072001280.jpeg \n", 104 | " inflating: RGBImages/0_01_31_120__0_01_31_599_0055800000012800072001280.jpeg \n", 105 | " inflating: RGBImages/0_01_31_600__0_01_32_119_0055800000012800072001280.jpeg \n", 106 | " inflating: RGBImages/0_01_33_040__0_01_34_639_0055800000012800072001280.jpeg \n", 107 | " inflating: RGBImages/0_01_34_640__0_01_38_439_0055800000012800072001280.jpeg \n", 108 | " inflating: RGBImages/0_01_38_440__0_01_38_839_0055800000012800072001280.jpeg \n", 109 | " inflating: RGBImages/0_01_39_960__0_01_40_279_0055800000012800072001280.jpeg \n", 110 | " inflating: RGBImages/0_01_40_280__0_01_40_879_0055800000012800072001280.jpeg \n", 111 | " inflating: RGBImages/0_01_47_920__0_01_48_559_0055800000012800072001280.jpeg \n", 112 | " inflating: RGBImages/0_01_48_560__0_01_50_679_0055800000012800072001280.jpeg \n", 113 | " inflating: RGBImages/0_01_50_920__0_01_51_319_0055800000012800072001280.jpeg \n", 114 | " inflating: RGBImages/0_01_52_520__0_01_53_359_0055800000012800072001280.jpeg \n", 115 | " inflating: RGBImages/0_01_53_360__0_01_53_999_0055800000012800072001280.jpeg \n", 116 | " inflating: RGBImages/0_01_54_000__0_01_56_159_0055800000012800072001280.jpeg \n", 117 | " inflating: RGBImages/0_01_56_160__0_01_56_959_0055800000012800072001280.jpeg \n", 118 | " inflating: RGBImages/0_01_58_040__0_01_58_399_0055800000012800072001280.jpeg \n", 119 | " inflating: RGBImages/0_01_58_400__0_01_59_639_0055800000012800072001280.jpeg \n", 120 | " inflating: RGBImages/0_01_59_640__0_02_00_479_0055800000012800072001280.jpeg \n", 121 | " inflating: RGBImages/0_02_00_480__0_02_01_039_0055800000012800072001280.jpeg \n", 122 | " inflating: RGBImages/0_02_02_240__0_02_02_799_0055800000012800072001280.jpeg \n", 123 | " inflating: RGBImages/0_02_02_800__0_02_04_039_0055800000012800072001280.jpeg \n", 124 | " inflating: RGBImages/0_02_08_000__0_02_09_038_0055800000012800072001280.jpeg \n", 125 | " inflating: RGBImages/0_02_09_039__0_02_10_198_0055800000012800072001280.jpeg \n", 126 | " inflating: RGBImages/0_02_11_720__0_02_13_119_0055800000012800072001280.jpeg \n", 127 | " inflating: RGBImages/0_02_13_280__0_02_13_799_0055800000012800072001280.jpeg \n", 128 | " inflating: RGBImages/0_02_13_800__0_02_14_719_0055800000012800072001280.jpeg \n", 129 | " inflating: RGBImages/0_02_14_720__0_02_15_239_0055800000012800072001280.jpeg \n", 130 | " inflating: RGBImages/0_02_15_240__0_02_15_839_0055800000012800072001280.jpeg \n", 131 | " inflating: RGBImages/0_02_17_640__0_02_21_719_0055800000012800072001280.jpeg \n", 132 | " inflating: RGBImages/0_02_21_720__0_02_22_639_0055800000012800072001280.jpeg \n", 133 | " inflating: RGBImages/0_02_26_640__0_02_27_239_0055800000012800072001280.jpeg \n", 134 | " inflating: RGBImages/0_02_27_240__0_02_27_879_0055800000012800072001280.jpeg \n", 135 | " inflating: RGBImages/0_02_27_920__0_02_28_479_0055800000012800072001280.jpeg \n", 136 | " inflating: RGBImages/0_02_29_360__0_02_30_119_0055800000012800072001280.jpeg \n", 137 | " inflating: RGBImages/0_02_30_240__0_02_30_639_0055800000012800072001280.jpeg \n", 138 | " inflating: RGBImages/0_02_31_200__0_02_31_599_0055800000012800072001280.jpeg \n", 139 | " inflating: RGBImages/0_02_31_600__0_02_32_559_0055800000012800072001280.jpeg \n", 140 | " inflating: RGBImages/0_02_32_560__0_02_33_439_0055800000012800072001280.jpeg \n", 141 | " inflating: RGBImages/0_02_33_440__0_02_34_079_0055800000012800072001280.jpeg \n", 142 | " inflating: RGBImages/0_02_35_520__0_02_37_159_0055800000012800072001280.jpeg \n", 143 | " inflating: RGBImages/0_02_37_160__0_02_41_959_0055800000012800072001280.jpeg \n", 144 | " inflating: RGBImages/0_02_46_440__0_02_47_039_0055800000012800072001280.jpeg \n", 145 | " inflating: RGBImages/0_02_47_040__0_02_48_199_0055800000012800072001280.jpeg \n", 146 | " inflating: RGBImages/0_02_50_520__0_02_50_879_0055800000012800072001280.jpeg \n", 147 | " inflating: RGBImages/0_02_50_880__0_02_53_279_0055800000012800072001280.jpeg \n", 148 | " inflating: RGBImages/0_02_54_840__0_02_56_679_0055800000012800072001280.jpeg \n", 149 | " inflating: RGBImages/0_02_56_680__0_02_57_519_0055800000012800072001280.jpeg \n", 150 | " inflating: RGBImages/0_02_57_520__0_02_57_999_0055800000012800072001280.jpeg \n", 151 | " inflating: RGBImages/0_03_00_360__0_03_00_919_0055800000012800072001280.jpeg \n", 152 | " inflating: RGBImages/0_03_00_920__0_03_01_519_0055800000012800072001280.jpeg \n", 153 | " inflating: RGBImages/0_03_01_560__0_03_04_599_0055800000012800072001280.jpeg \n", 154 | " inflating: RGBImages/0_03_04_600__0_03_05_879_0055800000012800072001280.jpeg \n", 155 | " inflating: RGBImages/0_03_05_880__0_03_06_759_0055800000012800072001280.jpeg \n", 156 | " inflating: RGBImages/0_03_10_160__0_03_10_559_0055800000012800072001280.jpeg \n", 157 | " inflating: RGBImages/0_03_11_680__0_03_11_999_0055800000012800072001280.jpeg \n", 158 | " inflating: RGBImages/0_03_12_040__0_03_12_399_0055800000012800072001280.jpeg \n", 159 | " inflating: RGBImages/0_03_12_400__0_03_12_919_0055800000012800072001280.jpeg \n", 160 | " inflating: RGBImages/0_03_12_920__0_03_13_239_0055800000012800072001280.jpeg \n", 161 | " inflating: RGBImages/0_03_13_240__0_03_13_599_0055800000012800072001280.jpeg \n", 162 | " inflating: RGBImages/0_03_21_000__0_03_21_479_0055800000012800072001280.jpeg \n", 163 | " inflating: RGBImages/0_03_21_600__0_03_21_919_0055800000012800072001280.jpeg \n", 164 | " inflating: RGBImages/0_03_21_920__0_03_22_239_0055800000012800072001280.jpeg \n", 165 | " inflating: RGBImages/0_03_24_480__0_03_24_919_0055800000012800072001280.jpeg \n", 166 | " inflating: RGBImages/0_03_24_920__0_03_25_639_0055800000012800072001280.jpeg \n", 167 | " inflating: RGBImages/0_03_25_640__0_03_27_119_0055800000012800072001280.jpeg \n", 168 | " inflating: RGBImages/0_03_27_120__0_03_27_999_0055800000012800072001280.jpeg \n", 169 | " inflating: RGBImages/0_03_29_520__0_03_30_039_0055800000012800072001280.jpeg \n", 170 | " inflating: RGBImages/0_03_30_120__0_03_30_759_0055800000012800072001280.jpeg \n", 171 | " inflating: RGBImages/0_03_30_760__0_03_31_639_0055800000012800072001280.jpeg \n", 172 | " inflating: RGBImages/0_03_31_640__0_03_31_959_0055800000012800072001280.jpeg \n", 173 | " inflating: RGBImages/0_03_31_960__0_03_32_319_0055800000012800072001280.jpeg \n", 174 | " inflating: RGBImages/0_03_33_680__0_03_33_999_0055800000012800072001280.jpeg \n", 175 | " inflating: RGBImages/0_03_34_000__0_03_34_599_0055800000012800072001280.jpeg \n", 176 | " inflating: RGBImages/0_03_34_600__0_03_35_399_0055800000012800072001280.jpeg \n", 177 | " inflating: RGBImages/0_03_35_520__0_03_37_959_0055800000012800072001280.jpeg \n", 178 | " inflating: RGBImages/0_03_38_400__0_03_38_879_0055800000012800072001280.jpeg \n", 179 | " inflating: RGBImages/0_03_38_880__0_03_39_439_0055800000012800072001280.jpeg \n", 180 | " inflating: RGBImages/0_03_39_440__0_03_39_919_0055800000012800072001280.jpeg \n", 181 | " inflating: RGBImages/0_03_40_160__0_03_40_599_0055800000012800072001280.jpeg \n", 182 | " inflating: RGBImages/0_03_40_600__0_03_40_919_0055800000012800072001280.jpeg \n", 183 | " inflating: RGBImages/0_03_40_920__0_03_41_399_0055800000012800072001280.jpeg \n", 184 | " inflating: RGBImages/0_03_44_240__0_03_44_679_0055800000012800072001280.jpeg \n", 185 | " inflating: RGBImages/0_03_44_680__0_03_44_999_0055800000012800072001280.jpeg \n", 186 | " inflating: RGBImages/0_03_45_000__0_03_49_239_0055800000012800072001280.jpeg \n", 187 | " inflating: RGBImages/0_03_49_240__0_03_50_799_0055800000012800072001280.jpeg \n", 188 | " inflating: RGBImages/0_03_50_840__0_03_51_199_0055800000012800072001280.jpeg \n", 189 | " inflating: RGBImages/0_03_51_200__0_03_51_599_0055800000012800072001280.jpeg \n", 190 | " inflating: RGBImages/0_03_57_240__0_03_57_919_0055800000012800072001280.jpeg \n", 191 | " inflating: RGBImages/0_03_58_440__0_03_59_199_0055800000012800072001280.jpeg \n", 192 | " inflating: RGBImages/0_03_59_200__0_04_05_279_0055800000012800072001280.jpeg \n", 193 | " inflating: RGBImages/0_04_05_280__0_04_06_919_0055800000012800072001280.jpeg \n", 194 | " inflating: RGBImages/0_04_20_840__0_04_21_159_0055800000012800072001280.jpeg \n", 195 | " inflating: RGBImages/0_04_34_720__0_04_35_879_0055800000012800072001280.jpeg \n" 196 | ] 197 | } 198 | ], 199 | "source": [ 200 | "!wget https://github.com/SWHL/RapidVideOCR/files/11184614/RGBImages.zip\n", 201 | "!unzip RGBImages.zip\n", 202 | "!rm RGBImages.zip" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": { 208 | "id": "1cTofr4Zq_WB" 209 | }, 210 | "source": [ 211 | "#### Install the RapidVideOCR package." 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 34, 217 | "metadata": { 218 | "colab": { 219 | "base_uri": "https://localhost:8080/" 220 | }, 221 | "id": "5URYsomEqnuh", 222 | "outputId": "3a6093db-bfaa-4069-e92e-2e7bab97f987" 223 | }, 224 | "outputs": [ 225 | { 226 | "name": "stdout", 227 | "output_type": "stream", 228 | "text": [ 229 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 230 | "Requirement already satisfied: rapid_videocr in /usr/local/lib/python3.9/dist-packages (2.1.6)\n", 231 | "Requirement already satisfied: rapidocr-onnxruntime>=1.2.2 in /usr/local/lib/python3.9/dist-packages (from rapid_videocr) (1.2.5)\n", 232 | "Requirement already satisfied: tqdm>=4.52.0 in /usr/local/lib/python3.9/dist-packages (from rapid_videocr) (4.65.0)\n", 233 | "Requirement already satisfied: PyYAML in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (6.0)\n", 234 | "Requirement already satisfied: Pillow in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (8.4.0)\n", 235 | "Requirement already satisfied: onnxruntime>=1.7.0 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.14.1)\n", 236 | "Requirement already satisfied: Shapely>=1.7.1 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (2.0.1)\n", 237 | "Requirement already satisfied: pyclipper>=1.2.1 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.3.0.post4)\n", 238 | "Requirement already satisfied: numpy>=1.19.3 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.22.4)\n", 239 | "Requirement already satisfied: opencv-python>=4.5.1.48 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (4.7.0.72)\n", 240 | "Requirement already satisfied: six>=1.15.0 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.16.0)\n", 241 | "Requirement already satisfied: sympy in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.11.1)\n", 242 | "Requirement already satisfied: flatbuffers in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (23.3.3)\n", 243 | "Requirement already satisfied: protobuf in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (3.20.3)\n", 244 | "Requirement already satisfied: packaging in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (23.0)\n", 245 | "Requirement already satisfied: coloredlogs in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (15.0.1)\n", 246 | "Requirement already satisfied: humanfriendly>=9.1 in /usr/local/lib/python3.9/dist-packages (from coloredlogs->onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (10.0)\n", 247 | "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.9/dist-packages (from sympy->onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.3.0)\n" 248 | ] 249 | } 250 | ], 251 | "source": [ 252 | "!pip install rapid_videocr" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 35, 258 | "metadata": { 259 | "colab": { 260 | "base_uri": "https://localhost:8080/" 261 | }, 262 | "id": "13GXToLcrFl8", 263 | "outputId": "d18fb2c0-79ae-4e29-9b27-de7f7e980707" 264 | }, 265 | "outputs": [ 266 | { 267 | "name": "stdout", 268 | "output_type": "stream", 269 | "text": [ 270 | "Running with concat recognition.\n", 271 | "OCR: 100% 14/14 [00:28<00:00, 2.07s/it]\n", 272 | "The file has been saved in the result/result.srt\n", 273 | "The result has been saved to result directory.\n" 274 | ] 275 | } 276 | ], 277 | "source": [ 278 | "!rapid_videocr -i /content/RGBImages -s result -o srt" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": { 284 | "id": "DR8RbHFisLZK" 285 | }, 286 | "source": [ 287 | "#### Look the result." 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 36, 293 | "metadata": { 294 | "colab": { 295 | "base_uri": "https://localhost:8080/" 296 | }, 297 | "id": "g2PdZnGJrsdx", 298 | "outputId": "c2107dd0-f099-464c-91a0-247294e69c60" 299 | }, 300 | "outputs": [ 301 | { 302 | "name": "stdout", 303 | "output_type": "stream", 304 | "text": [ 305 | "1\n", 306 | "00:00:20,640 --> 00:00:23,999\n", 307 | "Eyelyinightinmydreams\n", 308 | "\n", 309 | "2\n", 310 | "00:00:25,120 --> 00:00:25,999\n", 311 | "Iseeyou\n", 312 | "\n", 313 | "3\n", 314 | "00:00:26,000 --> 00:00:26,599\n", 315 | "Iseeyou\n", 316 | "\n", 317 | "4\n", 318 | "00:00:27,760 --> 00:00:28,999\n", 319 | "Ifell you\n", 320 | "\n", 321 | "5\n", 322 | "00:00:30,280 --> 00:00:30,599\n", 323 | "That is hiow I know you go on\n", 324 | "\n", 325 | "6\n", 326 | "00:00:30,600 --> 00:00:32,199\n", 327 | "howIknowyougoon\n", 328 | "\n", 329 | "7\n", 330 | "00:00:32,800 --> 00:00:33,199\n", 331 | "That is howIkhowyougo.on\n", 332 | "\n", 333 | "8\n", 334 | "00:00:33,200 --> 00:00:34,959\n", 335 | "That is how I know you go on\n", 336 | "\n", 337 | "9\n", 338 | "00:00:34,960 --> 00:00:35,519\n", 339 | "That is how I know you go on\n", 340 | "\n", 341 | "11\n", 342 | "00:00:40,040 --> 00:00:41,679\n", 343 | "Faracrossthedistance\n", 344 | "\n", 345 | "12\n", 346 | "00:00:41,680 --> 00:00:42,919\n", 347 | "Faracross the distance\n", 348 | "\n", 349 | "13\n", 350 | "00:00:42,920 --> 00:00:43,439\n", 351 | "Faracross thelistance\n", 352 | "\n", 353 | "14\n", 354 | "00:00:43,440 --> 00:00:43,799\n", 355 | "Faracrosshe Mistance\n", 356 | "\n", 357 | "15\n", 358 | "00:00:44,920 --> 00:00:45,359\n", 359 | "and Spaces between us\n", 360 | "\n", 361 | "16\n", 362 | "00:00:45,360 --> 00:00:47,799\n", 363 | "and Spaces between us\n", 364 | "\n", 365 | "17\n", 366 | "00:00:47,800 --> 00:00:48,159\n", 367 | "and Spaces betweenus\n", 368 | "\n", 369 | "18\n", 370 | "00:00:48,160 --> 00:00:48,559\n", 371 | "and Spacesbetween us\n", 372 | "\n", 373 | "19\n", 374 | "00:00:50,520 --> 00:00:54,079\n", 375 | "u havecometo showyou go on\n", 376 | "\n", 377 | "20\n", 378 | "00:00:54,080 --> 00:00:55,799\n", 379 | "You have come to show vou go on\n", 380 | "\n", 381 | "21\n", 382 | "00:00:56,000 --> 00:00:56,359\n", 383 | "You haveoreto show y\n", 384 | "\n", 385 | "23\n", 386 | "00:01:00,120 --> 00:01:00,759\n", 387 | "Near\n", 388 | "\n", 389 | "24\n", 390 | "00:01:02,160 --> 00:01:02,919\n", 391 | "far\n", 392 | "\n", 393 | "25\n", 394 | "00:01:04,320 --> 00:01:06,759\n", 395 | "reveryouare\n", 396 | "\n", 397 | "26\n", 398 | "00:01:08,120 --> 00:01:08,679\n", 399 | "I belieye that the heart does go\n", 400 | "on\n", 401 | "\n", 402 | "27\n", 403 | "00:01:08,680 --> 00:01:13,119\n", 404 | "I believe that the heart does go\n", 405 | "on\n", 406 | "\n", 407 | "28\n", 408 | "00:01:13,120 --> 00:01:13,799\n", 409 | "I believe that the heart does go\n", 410 | "on\n", 411 | "\n", 412 | "29\n", 413 | "00:01:13,800 --> 00:01:16,079\n", 414 | "I believe that the heart does go\n", 415 | "on\n", 416 | "\n", 417 | "30\n", 418 | "00:01:16,080 --> 00:01:17,039\n", 419 | "I believe that the heart does go\n", 420 | "on\n", 421 | "\n", 422 | "31\n", 423 | "00:01:19,320 --> 00:01:20,359\n", 424 | "Once more\n", 425 | "\n", 426 | "32\n", 427 | "00:01:20,360 --> 00:01:21,919\n", 428 | "Once more\n", 429 | "\n", 430 | "33\n", 431 | "00:01:23,120 --> 00:01:23,559\n", 432 | "thedoor\n", 433 | "you\n", 434 | "\n", 435 | "34\n", 436 | "00:01:23,560 --> 00:01:24,959\n", 437 | "you open the door\n", 438 | "\n", 439 | "35\n", 440 | "00:01:24,960 --> 00:01:25,559\n", 441 | "youopen thedoor\n", 442 | "\n", 443 | "36\n", 444 | "00:01:25,560 --> 00:01:26,159\n", 445 | "you open the door\n", 446 | "\n", 447 | "37\n", 448 | "00:01:27,560 --> 00:01:27,919\n", 449 | "And you're here in my heart\n", 450 | "\n", 451 | "38\n", 452 | "00:01:27,920 --> 00:01:30,439\n", 453 | "And you're here in my heart\n", 454 | "\n", 455 | "39\n", 456 | "00:01:30,440 --> 00:01:31,119\n", 457 | "And you're here in iny heart\n", 458 | "\n", 459 | "40\n", 460 | "00:01:31,120 --> 00:01:31,599\n", 461 | "Andyou're here inm heart\n", 462 | "\n", 463 | "41\n", 464 | "00:01:31,600 --> 00:01:32,119\n", 465 | "Andeinmneart\n", 466 | "\n", 467 | "42\n", 468 | "00:01:33,040 --> 00:01:34,639\n", 469 | "my heartwim goonand\n", 470 | "on\n", 471 | "\n", 472 | "43\n", 473 | "00:01:34,640 --> 00:01:38,439\n", 474 | "my heart will go on and\n", 475 | "on\n", 476 | "\n", 477 | "44\n", 478 | "00:01:38,440 --> 00:01:38,839\n", 479 | "my heart will go on and\n", 480 | "on\n", 481 | "\n", 482 | "47\n", 483 | "00:01:47,920 --> 00:01:48,559\n", 484 | "Love can touch us onetime\n", 485 | "\n", 486 | "48\n", 487 | "00:01:48,560 --> 00:01:50,679\n", 488 | "Lovecantouch usonetime\n", 489 | "\n", 490 | "49\n", 491 | "00:01:50,920 --> 00:01:51,319\n", 492 | "Love cantouch usone time\n", 493 | "\n", 494 | "50\n", 495 | "00:01:52,520 --> 00:01:53,359\n", 496 | "And lastforaJifetime\n", 497 | "\n", 498 | "51\n", 499 | "00:01:53,360 --> 00:01:53,999\n", 500 | "And lastfora lifetime\n", 501 | "\n", 502 | "52\n", 503 | "00:01:54,000 --> 00:01:56,159\n", 504 | "And last fora lifetime\n", 505 | "\n", 506 | "53\n", 507 | "00:01:56,160 --> 00:01:56,959\n", 508 | "An st for a lifetime\n", 509 | "\n", 510 | "54\n", 511 | "00:01:58,040 --> 00:01:58,399\n", 512 | "And never let go till\n", 513 | "\n", 514 | "55\n", 515 | "00:01:58,400 --> 00:01:59,639\n", 516 | "And never let go till\n", 517 | "\n", 518 | "56\n", 519 | "00:01:59,640 --> 00:02:00,479\n", 520 | "And never let go till\n", 521 | "\n", 522 | "57\n", 523 | "00:02:00,480 --> 00:02:01,039\n", 524 | "And never let go till\n", 525 | "\n", 526 | "58\n", 527 | "00:02:02,240 --> 00:02:02,799\n", 528 | "we're gone\n", 529 | "\n", 530 | "59\n", 531 | "00:02:02,800 --> 00:02:04,039\n", 532 | "we're gone\n", 533 | "\n", 534 | "62\n", 535 | "00:02:11,720 --> 00:02:13,119\n", 536 | "one true time\n", 537 | "\n", 538 | "65\n", 539 | "00:02:14,720 --> 00:02:15,239\n", 540 | "Tholdto\n", 541 | "\n", 542 | "66\n", 543 | "00:02:15,240 --> 00:02:15,839\n", 544 | "I holdto\n", 545 | "\n", 546 | "67\n", 547 | "00:02:17,640 --> 00:02:21,719\n", 548 | "Imy lifewe'll alwaysgo on\n", 549 | "\n", 550 | "68\n", 551 | "00:02:21,720 --> 00:02:22,639\n", 552 | "I my life we'll always go on\n", 553 | "\n", 554 | "69\n", 555 | "00:02:26,640 --> 00:02:27,239\n", 556 | "Near\n", 557 | "\n", 558 | "70\n", 559 | "00:02:27,240 --> 00:02:27,879\n", 560 | "Near\n", 561 | "\n", 562 | "72\n", 563 | "00:02:29,360 --> 00:02:30,119\n", 564 | "far\n", 565 | "\n", 566 | "74\n", 567 | "00:02:31,200 --> 00:02:31,599\n", 568 | "whereveryou are\n", 569 | "\n", 570 | "75\n", 571 | "00:02:31,600 --> 00:02:32,559\n", 572 | "wherever you-are\n", 573 | "\n", 574 | "76\n", 575 | "00:02:32,560 --> 00:02:33,439\n", 576 | "whereveryou are\n", 577 | "\n", 578 | "77\n", 579 | "00:02:33,440 --> 00:02:34,079\n", 580 | "whereveryou are\n", 581 | "\n", 582 | "78\n", 583 | "00:02:35,520 --> 00:02:37,159\n", 584 | "I believe that the heart does go\n", 585 | "on\n", 586 | "\n", 587 | "79\n", 588 | "00:02:37,160 --> 00:02:41,959\n", 589 | "I believe that the heart does go\n", 590 | "on\n", 591 | "\n", 592 | "80\n", 593 | "00:02:46,440 --> 00:02:47,039\n", 594 | "Once more\n", 595 | "\n", 596 | "81\n", 597 | "00:02:47,040 --> 00:02:48,199\n", 598 | "Oncemore\n", 599 | "\n", 600 | "82\n", 601 | "00:02:50,520 --> 00:02:50,879\n", 602 | "you openthe dooi\n", 603 | "\n", 604 | "83\n", 605 | "00:02:50,880 --> 00:02:53,279\n", 606 | "you open the door\n", 607 | "\n", 608 | "84\n", 609 | "00:02:54,840 --> 00:02:56,679\n", 610 | "And you're here in my heart\n", 611 | "\n", 612 | "85\n", 613 | "00:02:56,680 --> 00:02:57,519\n", 614 | "And you're here in my heart\n", 615 | "\n", 616 | "86\n", 617 | "00:02:57,520 --> 00:02:57,999\n", 618 | "And you're here in my heart\n", 619 | "\n", 620 | "87\n", 621 | "00:03:00,360 --> 00:03:00,919\n", 622 | "heartwill goonand\n", 623 | "\n", 624 | "88\n", 625 | "00:03:00,920 --> 00:03:01,519\n", 626 | "my heart will go on and\n", 627 | "on\n", 628 | "\n", 629 | "89\n", 630 | "00:03:01,560 --> 00:03:04,599\n", 631 | "my heart will go on and\n", 632 | "on\n", 633 | "on\n", 634 | "my heart will go on and\n", 635 | "\n", 636 | "90\n", 637 | "00:03:04,600 --> 00:03:05,879\n", 638 | "on\n", 639 | "\n", 640 | "91\n", 641 | "00:03:05,880 --> 00:03:06,759\n", 642 | "my heart will go on and\n", 643 | "on\n", 644 | "\n", 645 | "102\n", 646 | "00:03:24,920 --> 00:03:25,639\n", 647 | "You're here\n", 648 | "\n", 649 | "103\n", 650 | "00:03:25,640 --> 00:03:27,119\n", 651 | "You're here\n", 652 | "\n", 653 | "104\n", 654 | "00:03:27,120 --> 00:03:27,999\n", 655 | "You're here\n", 656 | "\n", 657 | "105\n", 658 | "00:03:29,520 --> 00:03:30,039\n", 659 | "there's nothing I fear\n", 660 | "\n", 661 | "106\n", 662 | "00:03:30,120 --> 00:03:30,759\n", 663 | "there's nothing Ifear\n", 664 | "\n", 665 | "107\n", 666 | "00:03:30,760 --> 00:03:31,639\n", 667 | "there's nothing Ifear\n", 668 | "\n", 669 | "108\n", 670 | "00:03:31,640 --> 00:03:31,959\n", 671 | "there's nothing I fear\n", 672 | "\n", 673 | "109\n", 674 | "00:03:31,960 --> 00:03:32,319\n", 675 | "there nothigIfear\n", 676 | "\n", 677 | "110\n", 678 | "00:03:33,680 --> 00:03:33,999\n", 679 | "AndIknow\n", 680 | "\n", 681 | "111\n", 682 | "00:03:34,000 --> 00:03:34,599\n", 683 | "AndIknow\n", 684 | "\n", 685 | "112\n", 686 | "00:03:34,600 --> 00:03:35,399\n", 687 | "AndIknow\n", 688 | "\n", 689 | "113\n", 690 | "00:03:35,520 --> 00:03:37,959\n", 691 | "that my heart will you go on\n", 692 | "\n", 693 | "114\n", 694 | "00:03:38,400 --> 00:03:38,879\n", 695 | "that my heart will you go on\n", 696 | "\n", 697 | "115\n", 698 | "00:03:38,880 --> 00:03:39,439\n", 699 | "that my heart will you go on\n", 700 | "\n", 701 | "116\n", 702 | "00:03:39,440 --> 00:03:39,919\n", 703 | "that my heart will you go on\n", 704 | "\n", 705 | "117\n", 706 | "00:03:40,160 --> 00:03:40,599\n", 707 | "that my heart will you go on\n", 708 | "\n", 709 | "118\n", 710 | "00:03:40,600 --> 00:03:40,919\n", 711 | "that my heart will you go on\n", 712 | "\n", 713 | "119\n", 714 | "00:03:40,920 --> 00:03:41,399\n", 715 | "that my heart will you go on\n", 716 | "\n", 717 | "120\n", 718 | "00:03:44,240 --> 00:03:44,679\n", 719 | "Weill stay foreverthsway\n", 720 | "\n", 721 | "121\n", 722 | "00:03:44,680 --> 00:03:44,999\n", 723 | "We'll stayforever this way\n", 724 | "\n", 725 | "122\n", 726 | "00:03:45,000 --> 00:03:49,239\n", 727 | "We'll stay forever this way\n", 728 | "\n", 729 | "123\n", 730 | "00:03:49,240 --> 00:03:50,799\n", 731 | "We'll stay forever this way\n", 732 | "\n", 733 | "124\n", 734 | "00:03:50,840 --> 00:03:51,199\n", 735 | "We'll stay forever this way\n", 736 | "\n", 737 | "125\n", 738 | "00:03:51,200 --> 00:03:51,599\n", 739 | "We'll stay forever this way\n", 740 | "\n", 741 | "126\n", 742 | "00:03:57,240 --> 00:03:57,919\n", 743 | "You are And in my heart\n", 744 | "\n", 745 | "127\n", 746 | "00:03:58,440 --> 00:03:59,199\n", 747 | "my heart will go on and\n", 748 | "on\n", 749 | "\n", 750 | "128\n", 751 | "00:03:59,200 --> 00:04:05,279\n", 752 | "my heart will go on and\n", 753 | "on\n", 754 | "\n", 755 | "129\n", 756 | "00:04:05,280 --> 00:04:06,919\n", 757 | "my heart will go on and\n", 758 | "on\n", 759 | "\n" 760 | ] 761 | } 762 | ], 763 | "source": [ 764 | "!cat result/result.srt" 765 | ] 766 | }, 767 | { 768 | "cell_type": "code", 769 | "execution_count": null, 770 | "metadata": { 771 | "id": "cNjpqvivs1ZA" 772 | }, 773 | "outputs": [], 774 | "source": [] 775 | } 776 | ], 777 | "metadata": { 778 | "colab": { 779 | "provenance": [] 780 | }, 781 | "kernelspec": { 782 | "display_name": "Python 3", 783 | "name": "python3" 784 | } 785 | }, 786 | "nbformat": 4, 787 | "nbformat_minor": 0 788 | } 789 | --------------------------------------------------------------------------------