├── .github
├── FUNDING.yml
├── ISSUE_TEMPLATE
│ ├── bug.md
│ └── config.yml
└── workflows
│ ├── AutoPushToPypi.yml
│ └── SyncToGitee.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── assets
├── RapidVideOCRDemo.ipynb
├── colab-badge.svg
└── logo.png
├── cliff.toml
├── demo.py
├── docs
├── README_zh.md
└── doc_whl.md
├── rapid_videocr
├── __init__.py
├── export.py
├── main.py
├── ocr_processor.py
├── utils
│ ├── __init__.py
│ ├── crop_by_project.py
│ ├── logger.py
│ └── utils.py
├── vsf_cli.py
└── vsf_ocr_cli.py
├── requirements.txt
├── setup.py
└── tests
├── test_files
├── 2.mp4
├── RGBImages
│ ├── 0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg
│ ├── 0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg
│ ├── 0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg
│ └── 0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg
└── TXTImages
│ ├── 0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg
│ ├── 0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg
│ ├── 0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg
│ └── 0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg
└── test_main.py
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: https://raw.githubusercontent.com/RapidAI/.github/6db6b6b9273f3151094a462a61fbc8e88564562c/assets/Sponsor.png
14 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: 🐞 Bug
3 | about: Bug
4 | title: 'Bug'
5 | labels: 'Bug'
6 | assignees: ''
7 |
8 | ---
9 |
10 | 请提供下述完整信息以便快速定位问题
11 | (Please provide the following information to quickly locate the problem)
12 | - **系统环境/System Environment**:
13 | - **使用的是哪门语言的程序/Which programing language**:
14 | - **所使用语言相关版本信息/Version**:
15 | - **OnnxRuntime版本/OnnxRuntime Version**:
16 | - **使用当前库的版本/Use version**:
17 | - **可复现问题的demo和文件/Demo of reproducible problems**:
18 | - **完整报错/Complete Error Message**:
19 | - **可能的解决方案/Possible solutions**:
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 | - name: ❓ Questions
4 | url: https://github.com/SWHL/RapidVideOCR/discussions/categories/q-a
5 | about: Please use the community forum for help and questions regarding RapidVideOCR.
6 | - name: 💡 Feature requests and ideas
7 | url: https://github.com/SWHL/RapidVideOCR/discussions/categories/ideas
8 | about: Please vote for and post new feature ideas in the community forum.
9 | - name: 📖 Documentation
10 | url: https://swhl.github.io/RapidVideOCR/docs
11 | about: A great place to find instructions and answers about RapidVideOCR.
12 |
--------------------------------------------------------------------------------
/.github/workflows/AutoPushToPypi.yml:
--------------------------------------------------------------------------------
1 | name: Push rapid_videocr to pypi
2 |
3 | on:
4 | push:
5 | tags:
6 | - v*
7 |
8 | jobs:
9 | UnitTesting:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - name: Pull latest code
13 | uses: actions/checkout@v4
14 |
15 | - name: Set up Python 3.10
16 | uses: actions/setup-python@v4
17 | with:
18 | python-version: '3.10'
19 | architecture: 'x64'
20 |
21 | - name: Unit testings
22 | run: |
23 | pip install -r requirements.txt --break-system-packages
24 | pip install pytest six --break-system-packages
25 | pytest -s tests/test_*.py
26 |
27 | GenerateWHL_PushPyPi:
28 | needs: UnitTesting
29 | runs-on: ubuntu-latest
30 |
31 | steps:
32 | - uses: actions/checkout@v4
33 |
34 | - name: Run setup.py
35 | run: |
36 | pip install wheel get_pypi_latest_version --break-system-packages
37 |
38 | python -m pip install --upgrade pip --break-system-packages
39 | python setup.py bdist_wheel ${{ github.ref_name }}
40 |
41 | - name: Publish distribution 📦 to PyPI
42 | uses: pypa/gh-action-pypi-publish@v1.5.0
43 | with:
44 | password: ${{ secrets.PYPI_API_TOKEN }}
45 | packages_dir: dist/
46 |
--------------------------------------------------------------------------------
/.github/workflows/SyncToGitee.yml:
--------------------------------------------------------------------------------
1 | name: syncToGitee
2 | on:
3 | push:
4 | branches:
5 | - '**'
6 | jobs:
7 | repo-sync:
8 | runs-on: ubuntu-latest
9 | steps:
10 | - name: Checkout source codes
11 | uses: actions/checkout@v2
12 |
13 | - name: Mirror the Github organization repos to Gitee.
14 | uses: Yikun/hub-mirror-action@master
15 | with:
16 | src: 'github/SWHL'
17 | dst: 'gitee/SWHL'
18 | dst_key: ${{ secrets.GITEE_PRIVATE_KEY }}
19 | dst_token: ${{ secrets.GITEE_TOKEN }}
20 | force_update: true
21 | # only sync this repo
22 | static_list: "RapidVideOCR"
23 | debug: true
24 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | rapid_videocr/video_sub_finder/libs
2 | *.vscode
3 | outputs
4 |
5 | *.pyc
6 |
7 | *.onnx
8 |
9 | temp/
10 | test_files/
11 |
12 | .DS_Store
13 |
14 | *.bin
15 |
16 | .mypy_cache
17 |
18 | # Created by .ignore support plugin (hsz.mobi)
19 | ### Python template
20 | # Byte-compiled / optimized / DLL files
21 | __pycache__/
22 | *.py[cod]
23 | *$py.class
24 | .pytest_cache
25 |
26 | # C extensions
27 | *.so
28 |
29 | # Distribution / packaging
30 | .Python
31 | build/
32 | develop-eggs/
33 | dist/
34 | downloads/
35 | eggs/
36 | .eggs/
37 | lib/
38 | lib64/
39 | parts/
40 | sdist/
41 | var/
42 | wheels/
43 | pip-wheel-metadata/
44 | share/python-wheels/
45 | *.egg-info/
46 | .installed.cfg
47 | *.egg
48 | MANIFEST
49 |
50 | # PyInstaller
51 | # Usually these files are written by a python script from a template
52 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
53 | # *.manifest
54 | # *.spec
55 | *.res
56 |
57 | # Installer logs
58 | pip-log.txt
59 | pip-delete-this-directory.txt
60 |
61 | # Unit test / coverage reports
62 | htmlcov/
63 | .tox/
64 | .nox/
65 | .coverage
66 | .coverage.*
67 | .cache
68 | nosetests.xml
69 | coverage.xml
70 | *.cover
71 | *.py,cover
72 | .hypothesis/
73 | .pytest_cache/
74 |
75 | # Translations
76 | *.mo
77 | *.pot
78 |
79 | # Django stuff:
80 | *.log
81 | local_settings.py
82 | db.sqlite3
83 | db.sqlite3-journal
84 |
85 | # Flask stuff:
86 | instance/
87 | .webassets-cache
88 |
89 | # Scrapy stuff:
90 | .scrapy
91 |
92 | # Sphinx documentation
93 | docs/_build/
94 |
95 | # PyBuilder
96 | target/
97 |
98 | # Jupyter Notebook
99 | .ipynb_checkpoints
100 |
101 | # IPython
102 | profile_default/
103 | ipython_config.py
104 |
105 | # pyenv
106 | .python-version
107 |
108 | # pipenv
109 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
110 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
111 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
112 | # install all needed dependencies.
113 | #Pipfile.lock
114 |
115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
116 | __pypackages__/
117 |
118 | # Celery stuff
119 | celerybeat-schedule
120 | celerybeat.pid
121 |
122 | # SageMath parsed files
123 | *.sage.py
124 |
125 | # Environments
126 | .env
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 |
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 |
138 | # Rope project settings
139 | .ropeproject
140 |
141 | # mkdocs documentation
142 | /site
143 |
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 |
149 | # Pyre type checker
150 | .pyre/
151 |
152 | #idea
153 | .vs
154 | .vscode
155 | .idea
156 | /images
157 | /models
158 |
159 | #models
160 | *.onnx
161 |
162 | *.ttf
163 | *.ttc
164 |
165 | long1.jpg
166 |
167 | *.bin
168 | *.mapping
169 | *.xml
170 |
171 | *.pdiparams
172 | *.pdiparams.info
173 | *.pdmodel
174 |
175 | .DS_Store
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/myint/autoflake
3 | rev: v2.1.1
4 | hooks:
5 | - id: autoflake
6 | args:
7 | [
8 | "--recursive",
9 | "--in-place",
10 | "--remove-all-unused-imports",
11 | "--remove-unused-variable",
12 | "--ignore-init-module-imports",
13 | ]
14 | - repo: https://github.com/psf/black
15 | rev: 23.1.0
16 | hooks:
17 | - id: black
18 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | [简体中文](https://github.com/SWHL/RapidVideOCR/blob/main/docs/README_zh.md) | English
17 |
18 |
19 | ### Introduction
20 |
21 | - Video hard subtitle extraction, automatically generate the corresponding `srt | ass | txt` file.
22 | - Supported subtitle languages: Chinese | English (For other supported languages, see: [List of supported languages](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99))
23 | - The advantages are as follows:
24 | - **Faster extraction**: Used in conjunction with [VideoSubFinder](https://sourceforge.net/projects/videosubfinder/) software to extract key subtitle frames faster.
25 | - **More accurate recognition**: Use [RapidOCR](https://github.com/RapidAI/RapidOCR) as the recognition library.
26 | - **More convenient to use**: pip can be installed directly and used.
27 |
28 | - For desktop EXE version, please go to [RapidVideOCRDesktop](https://github.com/SWHL/RapidVideOCRDesktop).
29 | - If it helps you, please give a star ⭐.
30 |
31 | ### [Online Demo](https://huggingface.co/spaces/SWHL/RapidVideOCR)
32 |
33 |
34 |
35 |
36 |
37 | ### Overall framework
38 |
39 | ```mermaid
40 | flowchart LR
41 | A[/Video/] --Extract subtitle key frame--> B(VideoSubFinder) --OCR-->C(RapidVideOCR)
42 | C --Convert--> D[/"SRT | ASS | TXT"/]
43 | ```
44 |
45 | ### Installation
46 |
47 | ```bash
48 | pip install rapid_videocr
49 | ```
50 |
51 | ### Usage
52 |
53 | > [!NOTE]
54 | >
55 | > The input image path of `rapid_videocr` must be the path of **RGBImages** or **TXTImages** output by **VideoSubFinder** software.
56 |
57 | ```bash
58 | rapid_videocr -i RGBImages
59 | ```
60 |
61 | ### Documentation
62 |
63 | Full documentation can be found on [docs](https://swhl.github.io/RapidVideOCR/docs), in Chinese.
64 |
65 | ### Code Contributors
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 | ### Contributing
74 |
75 | - Pull requests are welcome. For major changes, please open an issue first
76 | to discuss what you would like to change.
77 | - Please make sure to update tests as appropriate.
78 |
79 | ### [Sponsor](https://swhl.github.io/RapidVideOCR/docs/sponsor/)
80 |
81 | If you want to sponsor the project, you can directly click the **Buy me a coffee** image, please write a note (e.g. your github account name) to facilitate adding to the sponsorship list below.
82 |
83 |
84 |
85 |
86 |
87 | ### License
88 |
89 | This project is released under the [Apache 2.0 license](./LICENSE).
90 |
--------------------------------------------------------------------------------
/assets/RapidVideOCRDemo.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "YBjLERcEsTES"
7 | },
8 | "source": [
9 | "## [RapidVideOCR Demo](https://github.com/SWHL/RapidVideOCR)"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {
15 | "id": "IPBSdGqbjNpc"
16 | },
17 | "source": [
18 | "#### Require:\n",
19 | "- The RGBImages of [Video](https://www.youtube.com/watch?v=Z2Bg_usMYiA) from the VideoSubFinder software.\n",
20 | "- Install the RapidVideOCR"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {
26 | "id": "IT1t_86aq4QU"
27 | },
28 | "source": [
29 | "#### Download the RGBImages.zip and unzip it."
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 33,
35 | "metadata": {
36 | "colab": {
37 | "base_uri": "https://localhost:8080/"
38 | },
39 | "id": "qWiWiKJWjcH1",
40 | "outputId": "9b5c8098-061a-4f85-b7a8-822e1f26b166"
41 | },
42 | "outputs": [
43 | {
44 | "name": "stdout",
45 | "output_type": "stream",
46 | "text": [
47 | "--2023-04-09 01:47:40-- https://github.com/SWHL/RapidVideOCR/files/11184614/RGBImages.zip\n",
48 | "Resolving github.com (github.com)... 140.82.112.4\n",
49 | "Connecting to github.com (github.com)|140.82.112.4|:443... connected.\n",
50 | "HTTP request sent, awaiting response... 302 Found\n",
51 | "Location: https://objects.githubusercontent.com/github-production-repository-file-5c1aeb/405589029/11184614?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20230409%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230409T014740Z&X-Amz-Expires=300&X-Amz-Signature=fa9c0cb7ec18b1113504c94f60ed8bd6c8250cd040d056396c0dc6caf5184dea&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=405589029&response-content-disposition=attachment%3Bfilename%3DRGBImages.zip&response-content-type=application%2Fx-zip-compressed [following]\n",
52 | "--2023-04-09 01:47:40-- https://objects.githubusercontent.com/github-production-repository-file-5c1aeb/405589029/11184614?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20230409%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230409T014740Z&X-Amz-Expires=300&X-Amz-Signature=fa9c0cb7ec18b1113504c94f60ed8bd6c8250cd040d056396c0dc6caf5184dea&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=405589029&response-content-disposition=attachment%3Bfilename%3DRGBImages.zip&response-content-type=application%2Fx-zip-compressed\n",
53 | "Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
54 | "Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.108.133|:443... connected.\n",
55 | "HTTP request sent, awaiting response... 200 OK\n",
56 | "Length: 8314498 (7.9M) [application/x-zip-compressed]\n",
57 | "Saving to: ‘RGBImages.zip’\n",
58 | "\n",
59 | "RGBImages.zip 100%[===================>] 7.93M 43.3MB/s in 0.2s \n",
60 | "\n",
61 | "2023-04-09 01:47:40 (43.3 MB/s) - ‘RGBImages.zip’ saved [8314498/8314498]\n",
62 | "\n",
63 | "Archive: RGBImages.zip\n",
64 | " creating: RGBImages/\n",
65 | " inflating: RGBImages/0_00_20_640__0_00_23_999_0055800000012800072001280.jpeg \n",
66 | " inflating: RGBImages/0_00_25_120__0_00_25_999_0055800000012800072001280.jpeg \n",
67 | " inflating: RGBImages/0_00_26_000__0_00_26_599_0055800000012800072001280.jpeg \n",
68 | " inflating: RGBImages/0_00_27_760__0_00_28_999_0055800000012800072001280.jpeg \n",
69 | " inflating: RGBImages/0_00_30_280__0_00_30_599_0055800000012800072001280.jpeg \n",
70 | " inflating: RGBImages/0_00_30_600__0_00_32_199_0055800000012800072001280.jpeg \n",
71 | " inflating: RGBImages/0_00_32_800__0_00_33_199_0055800000012800072001280.jpeg \n",
72 | " inflating: RGBImages/0_00_33_200__0_00_34_959_0055800000012800072001280.jpeg \n",
73 | " inflating: RGBImages/0_00_34_960__0_00_35_519_0055800000012800072001280.jpeg \n",
74 | " inflating: RGBImages/0_00_39_040__0_00_39_479_0055800000012800072001280.jpeg \n",
75 | " inflating: RGBImages/0_00_40_040__0_00_41_679_0055800000012800072001280.jpeg \n",
76 | " inflating: RGBImages/0_00_41_680__0_00_42_919_0055800000012800072001280.jpeg \n",
77 | " inflating: RGBImages/0_00_42_920__0_00_43_439_0055800000012800072001280.jpeg \n",
78 | " inflating: RGBImages/0_00_43_440__0_00_43_799_0055800000012800072001280.jpeg \n",
79 | " inflating: RGBImages/0_00_44_920__0_00_45_359_0055800000012800072001280.jpeg \n",
80 | " inflating: RGBImages/0_00_45_360__0_00_47_799_0055800000012800072001280.jpeg \n",
81 | " inflating: RGBImages/0_00_47_800__0_00_48_159_0055800000012800072001280.jpeg \n",
82 | " inflating: RGBImages/0_00_48_160__0_00_48_559_0055800000012800072001280.jpeg \n",
83 | " inflating: RGBImages/0_00_50_520__0_00_54_079_0055800000012800072001280.jpeg \n",
84 | " inflating: RGBImages/0_00_54_080__0_00_55_799_0055800000012800072001280.jpeg \n",
85 | " inflating: RGBImages/0_00_56_000__0_00_56_359_0055800000012800072001280.jpeg \n",
86 | " inflating: RGBImages/0_00_57_680__0_00_57_999_0055800000012800072001280.jpeg \n",
87 | " inflating: RGBImages/0_01_00_120__0_01_00_759_0055800000012800072001280.jpeg \n",
88 | " inflating: RGBImages/0_01_02_160__0_01_02_919_0055800000012800072001280.jpeg \n",
89 | " inflating: RGBImages/0_01_04_320__0_01_06_759_0055800000012800072001280.jpeg \n",
90 | " inflating: RGBImages/0_01_08_120__0_01_08_679_0055800000012800072001280.jpeg \n",
91 | " inflating: RGBImages/0_01_08_680__0_01_13_119_0055800000012800072001280.jpeg \n",
92 | " inflating: RGBImages/0_01_13_120__0_01_13_799_0055800000012800072001280.jpeg \n",
93 | " inflating: RGBImages/0_01_13_800__0_01_16_079_0055800000012800072001280.jpeg \n",
94 | " inflating: RGBImages/0_01_16_080__0_01_17_039_0055800000012800072001280.jpeg \n",
95 | " inflating: RGBImages/0_01_19_320__0_01_20_359_0055800000012800072001280.jpeg \n",
96 | " inflating: RGBImages/0_01_20_360__0_01_21_919_0055800000012800072001280.jpeg \n",
97 | " inflating: RGBImages/0_01_23_120__0_01_23_559_0055800000012800072001280.jpeg \n",
98 | " inflating: RGBImages/0_01_23_560__0_01_24_959_0055800000012800072001280.jpeg \n",
99 | " inflating: RGBImages/0_01_24_960__0_01_25_559_0055800000012800072001280.jpeg \n",
100 | " inflating: RGBImages/0_01_25_560__0_01_26_159_0055800000012800072001280.jpeg \n",
101 | " inflating: RGBImages/0_01_27_560__0_01_27_919_0055800000012800072001280.jpeg \n",
102 | " inflating: RGBImages/0_01_27_920__0_01_30_439_0055800000012800072001280.jpeg \n",
103 | " inflating: RGBImages/0_01_30_440__0_01_31_119_0055800000012800072001280.jpeg \n",
104 | " inflating: RGBImages/0_01_31_120__0_01_31_599_0055800000012800072001280.jpeg \n",
105 | " inflating: RGBImages/0_01_31_600__0_01_32_119_0055800000012800072001280.jpeg \n",
106 | " inflating: RGBImages/0_01_33_040__0_01_34_639_0055800000012800072001280.jpeg \n",
107 | " inflating: RGBImages/0_01_34_640__0_01_38_439_0055800000012800072001280.jpeg \n",
108 | " inflating: RGBImages/0_01_38_440__0_01_38_839_0055800000012800072001280.jpeg \n",
109 | " inflating: RGBImages/0_01_39_960__0_01_40_279_0055800000012800072001280.jpeg \n",
110 | " inflating: RGBImages/0_01_40_280__0_01_40_879_0055800000012800072001280.jpeg \n",
111 | " inflating: RGBImages/0_01_47_920__0_01_48_559_0055800000012800072001280.jpeg \n",
112 | " inflating: RGBImages/0_01_48_560__0_01_50_679_0055800000012800072001280.jpeg \n",
113 | " inflating: RGBImages/0_01_50_920__0_01_51_319_0055800000012800072001280.jpeg \n",
114 | " inflating: RGBImages/0_01_52_520__0_01_53_359_0055800000012800072001280.jpeg \n",
115 | " inflating: RGBImages/0_01_53_360__0_01_53_999_0055800000012800072001280.jpeg \n",
116 | " inflating: RGBImages/0_01_54_000__0_01_56_159_0055800000012800072001280.jpeg \n",
117 | " inflating: RGBImages/0_01_56_160__0_01_56_959_0055800000012800072001280.jpeg \n",
118 | " inflating: RGBImages/0_01_58_040__0_01_58_399_0055800000012800072001280.jpeg \n",
119 | " inflating: RGBImages/0_01_58_400__0_01_59_639_0055800000012800072001280.jpeg \n",
120 | " inflating: RGBImages/0_01_59_640__0_02_00_479_0055800000012800072001280.jpeg \n",
121 | " inflating: RGBImages/0_02_00_480__0_02_01_039_0055800000012800072001280.jpeg \n",
122 | " inflating: RGBImages/0_02_02_240__0_02_02_799_0055800000012800072001280.jpeg \n",
123 | " inflating: RGBImages/0_02_02_800__0_02_04_039_0055800000012800072001280.jpeg \n",
124 | " inflating: RGBImages/0_02_08_000__0_02_09_038_0055800000012800072001280.jpeg \n",
125 | " inflating: RGBImages/0_02_09_039__0_02_10_198_0055800000012800072001280.jpeg \n",
126 | " inflating: RGBImages/0_02_11_720__0_02_13_119_0055800000012800072001280.jpeg \n",
127 | " inflating: RGBImages/0_02_13_280__0_02_13_799_0055800000012800072001280.jpeg \n",
128 | " inflating: RGBImages/0_02_13_800__0_02_14_719_0055800000012800072001280.jpeg \n",
129 | " inflating: RGBImages/0_02_14_720__0_02_15_239_0055800000012800072001280.jpeg \n",
130 | " inflating: RGBImages/0_02_15_240__0_02_15_839_0055800000012800072001280.jpeg \n",
131 | " inflating: RGBImages/0_02_17_640__0_02_21_719_0055800000012800072001280.jpeg \n",
132 | " inflating: RGBImages/0_02_21_720__0_02_22_639_0055800000012800072001280.jpeg \n",
133 | " inflating: RGBImages/0_02_26_640__0_02_27_239_0055800000012800072001280.jpeg \n",
134 | " inflating: RGBImages/0_02_27_240__0_02_27_879_0055800000012800072001280.jpeg \n",
135 | " inflating: RGBImages/0_02_27_920__0_02_28_479_0055800000012800072001280.jpeg \n",
136 | " inflating: RGBImages/0_02_29_360__0_02_30_119_0055800000012800072001280.jpeg \n",
137 | " inflating: RGBImages/0_02_30_240__0_02_30_639_0055800000012800072001280.jpeg \n",
138 | " inflating: RGBImages/0_02_31_200__0_02_31_599_0055800000012800072001280.jpeg \n",
139 | " inflating: RGBImages/0_02_31_600__0_02_32_559_0055800000012800072001280.jpeg \n",
140 | " inflating: RGBImages/0_02_32_560__0_02_33_439_0055800000012800072001280.jpeg \n",
141 | " inflating: RGBImages/0_02_33_440__0_02_34_079_0055800000012800072001280.jpeg \n",
142 | " inflating: RGBImages/0_02_35_520__0_02_37_159_0055800000012800072001280.jpeg \n",
143 | " inflating: RGBImages/0_02_37_160__0_02_41_959_0055800000012800072001280.jpeg \n",
144 | " inflating: RGBImages/0_02_46_440__0_02_47_039_0055800000012800072001280.jpeg \n",
145 | " inflating: RGBImages/0_02_47_040__0_02_48_199_0055800000012800072001280.jpeg \n",
146 | " inflating: RGBImages/0_02_50_520__0_02_50_879_0055800000012800072001280.jpeg \n",
147 | " inflating: RGBImages/0_02_50_880__0_02_53_279_0055800000012800072001280.jpeg \n",
148 | " inflating: RGBImages/0_02_54_840__0_02_56_679_0055800000012800072001280.jpeg \n",
149 | " inflating: RGBImages/0_02_56_680__0_02_57_519_0055800000012800072001280.jpeg \n",
150 | " inflating: RGBImages/0_02_57_520__0_02_57_999_0055800000012800072001280.jpeg \n",
151 | " inflating: RGBImages/0_03_00_360__0_03_00_919_0055800000012800072001280.jpeg \n",
152 | " inflating: RGBImages/0_03_00_920__0_03_01_519_0055800000012800072001280.jpeg \n",
153 | " inflating: RGBImages/0_03_01_560__0_03_04_599_0055800000012800072001280.jpeg \n",
154 | " inflating: RGBImages/0_03_04_600__0_03_05_879_0055800000012800072001280.jpeg \n",
155 | " inflating: RGBImages/0_03_05_880__0_03_06_759_0055800000012800072001280.jpeg \n",
156 | " inflating: RGBImages/0_03_10_160__0_03_10_559_0055800000012800072001280.jpeg \n",
157 | " inflating: RGBImages/0_03_11_680__0_03_11_999_0055800000012800072001280.jpeg \n",
158 | " inflating: RGBImages/0_03_12_040__0_03_12_399_0055800000012800072001280.jpeg \n",
159 | " inflating: RGBImages/0_03_12_400__0_03_12_919_0055800000012800072001280.jpeg \n",
160 | " inflating: RGBImages/0_03_12_920__0_03_13_239_0055800000012800072001280.jpeg \n",
161 | " inflating: RGBImages/0_03_13_240__0_03_13_599_0055800000012800072001280.jpeg \n",
162 | " inflating: RGBImages/0_03_21_000__0_03_21_479_0055800000012800072001280.jpeg \n",
163 | " inflating: RGBImages/0_03_21_600__0_03_21_919_0055800000012800072001280.jpeg \n",
164 | " inflating: RGBImages/0_03_21_920__0_03_22_239_0055800000012800072001280.jpeg \n",
165 | " inflating: RGBImages/0_03_24_480__0_03_24_919_0055800000012800072001280.jpeg \n",
166 | " inflating: RGBImages/0_03_24_920__0_03_25_639_0055800000012800072001280.jpeg \n",
167 | " inflating: RGBImages/0_03_25_640__0_03_27_119_0055800000012800072001280.jpeg \n",
168 | " inflating: RGBImages/0_03_27_120__0_03_27_999_0055800000012800072001280.jpeg \n",
169 | " inflating: RGBImages/0_03_29_520__0_03_30_039_0055800000012800072001280.jpeg \n",
170 | " inflating: RGBImages/0_03_30_120__0_03_30_759_0055800000012800072001280.jpeg \n",
171 | " inflating: RGBImages/0_03_30_760__0_03_31_639_0055800000012800072001280.jpeg \n",
172 | " inflating: RGBImages/0_03_31_640__0_03_31_959_0055800000012800072001280.jpeg \n",
173 | " inflating: RGBImages/0_03_31_960__0_03_32_319_0055800000012800072001280.jpeg \n",
174 | " inflating: RGBImages/0_03_33_680__0_03_33_999_0055800000012800072001280.jpeg \n",
175 | " inflating: RGBImages/0_03_34_000__0_03_34_599_0055800000012800072001280.jpeg \n",
176 | " inflating: RGBImages/0_03_34_600__0_03_35_399_0055800000012800072001280.jpeg \n",
177 | " inflating: RGBImages/0_03_35_520__0_03_37_959_0055800000012800072001280.jpeg \n",
178 | " inflating: RGBImages/0_03_38_400__0_03_38_879_0055800000012800072001280.jpeg \n",
179 | " inflating: RGBImages/0_03_38_880__0_03_39_439_0055800000012800072001280.jpeg \n",
180 | " inflating: RGBImages/0_03_39_440__0_03_39_919_0055800000012800072001280.jpeg \n",
181 | " inflating: RGBImages/0_03_40_160__0_03_40_599_0055800000012800072001280.jpeg \n",
182 | " inflating: RGBImages/0_03_40_600__0_03_40_919_0055800000012800072001280.jpeg \n",
183 | " inflating: RGBImages/0_03_40_920__0_03_41_399_0055800000012800072001280.jpeg \n",
184 | " inflating: RGBImages/0_03_44_240__0_03_44_679_0055800000012800072001280.jpeg \n",
185 | " inflating: RGBImages/0_03_44_680__0_03_44_999_0055800000012800072001280.jpeg \n",
186 | " inflating: RGBImages/0_03_45_000__0_03_49_239_0055800000012800072001280.jpeg \n",
187 | " inflating: RGBImages/0_03_49_240__0_03_50_799_0055800000012800072001280.jpeg \n",
188 | " inflating: RGBImages/0_03_50_840__0_03_51_199_0055800000012800072001280.jpeg \n",
189 | " inflating: RGBImages/0_03_51_200__0_03_51_599_0055800000012800072001280.jpeg \n",
190 | " inflating: RGBImages/0_03_57_240__0_03_57_919_0055800000012800072001280.jpeg \n",
191 | " inflating: RGBImages/0_03_58_440__0_03_59_199_0055800000012800072001280.jpeg \n",
192 | " inflating: RGBImages/0_03_59_200__0_04_05_279_0055800000012800072001280.jpeg \n",
193 | " inflating: RGBImages/0_04_05_280__0_04_06_919_0055800000012800072001280.jpeg \n",
194 | " inflating: RGBImages/0_04_20_840__0_04_21_159_0055800000012800072001280.jpeg \n",
195 | " inflating: RGBImages/0_04_34_720__0_04_35_879_0055800000012800072001280.jpeg \n"
196 | ]
197 | }
198 | ],
199 | "source": [
200 | "!wget https://github.com/SWHL/RapidVideOCR/files/11184614/RGBImages.zip\n",
201 | "!unzip RGBImages.zip\n",
202 | "!rm RGBImages.zip"
203 | ]
204 | },
205 | {
206 | "cell_type": "markdown",
207 | "metadata": {
208 | "id": "1cTofr4Zq_WB"
209 | },
210 | "source": [
211 | "#### Install the RapidVideOCR package."
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": 34,
217 | "metadata": {
218 | "colab": {
219 | "base_uri": "https://localhost:8080/"
220 | },
221 | "id": "5URYsomEqnuh",
222 | "outputId": "3a6093db-bfaa-4069-e92e-2e7bab97f987"
223 | },
224 | "outputs": [
225 | {
226 | "name": "stdout",
227 | "output_type": "stream",
228 | "text": [
229 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
230 | "Requirement already satisfied: rapid_videocr in /usr/local/lib/python3.9/dist-packages (2.1.6)\n",
231 | "Requirement already satisfied: rapidocr-onnxruntime>=1.2.2 in /usr/local/lib/python3.9/dist-packages (from rapid_videocr) (1.2.5)\n",
232 | "Requirement already satisfied: tqdm>=4.52.0 in /usr/local/lib/python3.9/dist-packages (from rapid_videocr) (4.65.0)\n",
233 | "Requirement already satisfied: PyYAML in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (6.0)\n",
234 | "Requirement already satisfied: Pillow in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (8.4.0)\n",
235 | "Requirement already satisfied: onnxruntime>=1.7.0 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.14.1)\n",
236 | "Requirement already satisfied: Shapely>=1.7.1 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (2.0.1)\n",
237 | "Requirement already satisfied: pyclipper>=1.2.1 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.3.0.post4)\n",
238 | "Requirement already satisfied: numpy>=1.19.3 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.22.4)\n",
239 | "Requirement already satisfied: opencv-python>=4.5.1.48 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (4.7.0.72)\n",
240 | "Requirement already satisfied: six>=1.15.0 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.16.0)\n",
241 | "Requirement already satisfied: sympy in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.11.1)\n",
242 | "Requirement already satisfied: flatbuffers in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (23.3.3)\n",
243 | "Requirement already satisfied: protobuf in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (3.20.3)\n",
244 | "Requirement already satisfied: packaging in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (23.0)\n",
245 | "Requirement already satisfied: coloredlogs in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (15.0.1)\n",
246 | "Requirement already satisfied: humanfriendly>=9.1 in /usr/local/lib/python3.9/dist-packages (from coloredlogs->onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (10.0)\n",
247 | "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.9/dist-packages (from sympy->onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.3.0)\n"
248 | ]
249 | }
250 | ],
251 | "source": [
252 | "!pip install rapid_videocr"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": 35,
258 | "metadata": {
259 | "colab": {
260 | "base_uri": "https://localhost:8080/"
261 | },
262 | "id": "13GXToLcrFl8",
263 | "outputId": "d18fb2c0-79ae-4e29-9b27-de7f7e980707"
264 | },
265 | "outputs": [
266 | {
267 | "name": "stdout",
268 | "output_type": "stream",
269 | "text": [
270 | "Running with concat recognition.\n",
271 | "OCR: 100% 14/14 [00:28<00:00, 2.07s/it]\n",
272 | "The file has been saved in the result/result.srt\n",
273 | "The result has been saved to result directory.\n"
274 | ]
275 | }
276 | ],
277 | "source": [
278 | "!rapid_videocr -i /content/RGBImages -s result -o srt"
279 | ]
280 | },
281 | {
282 | "cell_type": "markdown",
283 | "metadata": {
284 | "id": "DR8RbHFisLZK"
285 | },
286 | "source": [
287 | "#### Look the result."
288 | ]
289 | },
290 | {
291 | "cell_type": "code",
292 | "execution_count": 36,
293 | "metadata": {
294 | "colab": {
295 | "base_uri": "https://localhost:8080/"
296 | },
297 | "id": "g2PdZnGJrsdx",
298 | "outputId": "c2107dd0-f099-464c-91a0-247294e69c60"
299 | },
300 | "outputs": [
301 | {
302 | "name": "stdout",
303 | "output_type": "stream",
304 | "text": [
305 | "1\n",
306 | "00:00:20,640 --> 00:00:23,999\n",
307 | "Eyelyinightinmydreams\n",
308 | "\n",
309 | "2\n",
310 | "00:00:25,120 --> 00:00:25,999\n",
311 | "Iseeyou\n",
312 | "\n",
313 | "3\n",
314 | "00:00:26,000 --> 00:00:26,599\n",
315 | "Iseeyou\n",
316 | "\n",
317 | "4\n",
318 | "00:00:27,760 --> 00:00:28,999\n",
319 | "Ifell you\n",
320 | "\n",
321 | "5\n",
322 | "00:00:30,280 --> 00:00:30,599\n",
323 | "That is hiow I know you go on\n",
324 | "\n",
325 | "6\n",
326 | "00:00:30,600 --> 00:00:32,199\n",
327 | "howIknowyougoon\n",
328 | "\n",
329 | "7\n",
330 | "00:00:32,800 --> 00:00:33,199\n",
331 | "That is howIkhowyougo.on\n",
332 | "\n",
333 | "8\n",
334 | "00:00:33,200 --> 00:00:34,959\n",
335 | "That is how I know you go on\n",
336 | "\n",
337 | "9\n",
338 | "00:00:34,960 --> 00:00:35,519\n",
339 | "That is how I know you go on\n",
340 | "\n",
341 | "11\n",
342 | "00:00:40,040 --> 00:00:41,679\n",
343 | "Faracrossthedistance\n",
344 | "\n",
345 | "12\n",
346 | "00:00:41,680 --> 00:00:42,919\n",
347 | "Faracross the distance\n",
348 | "\n",
349 | "13\n",
350 | "00:00:42,920 --> 00:00:43,439\n",
351 | "Faracross thelistance\n",
352 | "\n",
353 | "14\n",
354 | "00:00:43,440 --> 00:00:43,799\n",
355 | "Faracrosshe Mistance\n",
356 | "\n",
357 | "15\n",
358 | "00:00:44,920 --> 00:00:45,359\n",
359 | "and Spaces between us\n",
360 | "\n",
361 | "16\n",
362 | "00:00:45,360 --> 00:00:47,799\n",
363 | "and Spaces between us\n",
364 | "\n",
365 | "17\n",
366 | "00:00:47,800 --> 00:00:48,159\n",
367 | "and Spaces betweenus\n",
368 | "\n",
369 | "18\n",
370 | "00:00:48,160 --> 00:00:48,559\n",
371 | "and Spacesbetween us\n",
372 | "\n",
373 | "19\n",
374 | "00:00:50,520 --> 00:00:54,079\n",
375 | "u havecometo showyou go on\n",
376 | "\n",
377 | "20\n",
378 | "00:00:54,080 --> 00:00:55,799\n",
379 | "You have come to show vou go on\n",
380 | "\n",
381 | "21\n",
382 | "00:00:56,000 --> 00:00:56,359\n",
383 | "You haveoreto show y\n",
384 | "\n",
385 | "23\n",
386 | "00:01:00,120 --> 00:01:00,759\n",
387 | "Near\n",
388 | "\n",
389 | "24\n",
390 | "00:01:02,160 --> 00:01:02,919\n",
391 | "far\n",
392 | "\n",
393 | "25\n",
394 | "00:01:04,320 --> 00:01:06,759\n",
395 | "reveryouare\n",
396 | "\n",
397 | "26\n",
398 | "00:01:08,120 --> 00:01:08,679\n",
399 | "I belieye that the heart does go\n",
400 | "on\n",
401 | "\n",
402 | "27\n",
403 | "00:01:08,680 --> 00:01:13,119\n",
404 | "I believe that the heart does go\n",
405 | "on\n",
406 | "\n",
407 | "28\n",
408 | "00:01:13,120 --> 00:01:13,799\n",
409 | "I believe that the heart does go\n",
410 | "on\n",
411 | "\n",
412 | "29\n",
413 | "00:01:13,800 --> 00:01:16,079\n",
414 | "I believe that the heart does go\n",
415 | "on\n",
416 | "\n",
417 | "30\n",
418 | "00:01:16,080 --> 00:01:17,039\n",
419 | "I believe that the heart does go\n",
420 | "on\n",
421 | "\n",
422 | "31\n",
423 | "00:01:19,320 --> 00:01:20,359\n",
424 | "Once more\n",
425 | "\n",
426 | "32\n",
427 | "00:01:20,360 --> 00:01:21,919\n",
428 | "Once more\n",
429 | "\n",
430 | "33\n",
431 | "00:01:23,120 --> 00:01:23,559\n",
432 | "thedoor\n",
433 | "you\n",
434 | "\n",
435 | "34\n",
436 | "00:01:23,560 --> 00:01:24,959\n",
437 | "you open the door\n",
438 | "\n",
439 | "35\n",
440 | "00:01:24,960 --> 00:01:25,559\n",
441 | "youopen thedoor\n",
442 | "\n",
443 | "36\n",
444 | "00:01:25,560 --> 00:01:26,159\n",
445 | "you open the door\n",
446 | "\n",
447 | "37\n",
448 | "00:01:27,560 --> 00:01:27,919\n",
449 | "And you're here in my heart\n",
450 | "\n",
451 | "38\n",
452 | "00:01:27,920 --> 00:01:30,439\n",
453 | "And you're here in my heart\n",
454 | "\n",
455 | "39\n",
456 | "00:01:30,440 --> 00:01:31,119\n",
457 | "And you're here in iny heart\n",
458 | "\n",
459 | "40\n",
460 | "00:01:31,120 --> 00:01:31,599\n",
461 | "Andyou're here inm heart\n",
462 | "\n",
463 | "41\n",
464 | "00:01:31,600 --> 00:01:32,119\n",
465 | "Andeinmneart\n",
466 | "\n",
467 | "42\n",
468 | "00:01:33,040 --> 00:01:34,639\n",
469 | "my heartwim goonand\n",
470 | "on\n",
471 | "\n",
472 | "43\n",
473 | "00:01:34,640 --> 00:01:38,439\n",
474 | "my heart will go on and\n",
475 | "on\n",
476 | "\n",
477 | "44\n",
478 | "00:01:38,440 --> 00:01:38,839\n",
479 | "my heart will go on and\n",
480 | "on\n",
481 | "\n",
482 | "47\n",
483 | "00:01:47,920 --> 00:01:48,559\n",
484 | "Love can touch us onetime\n",
485 | "\n",
486 | "48\n",
487 | "00:01:48,560 --> 00:01:50,679\n",
488 | "Lovecantouch usonetime\n",
489 | "\n",
490 | "49\n",
491 | "00:01:50,920 --> 00:01:51,319\n",
492 | "Love cantouch usone time\n",
493 | "\n",
494 | "50\n",
495 | "00:01:52,520 --> 00:01:53,359\n",
496 | "And lastforaJifetime\n",
497 | "\n",
498 | "51\n",
499 | "00:01:53,360 --> 00:01:53,999\n",
500 | "And lastfora lifetime\n",
501 | "\n",
502 | "52\n",
503 | "00:01:54,000 --> 00:01:56,159\n",
504 | "And last fora lifetime\n",
505 | "\n",
506 | "53\n",
507 | "00:01:56,160 --> 00:01:56,959\n",
508 | "An st for a lifetime\n",
509 | "\n",
510 | "54\n",
511 | "00:01:58,040 --> 00:01:58,399\n",
512 | "And never let go till\n",
513 | "\n",
514 | "55\n",
515 | "00:01:58,400 --> 00:01:59,639\n",
516 | "And never let go till\n",
517 | "\n",
518 | "56\n",
519 | "00:01:59,640 --> 00:02:00,479\n",
520 | "And never let go till\n",
521 | "\n",
522 | "57\n",
523 | "00:02:00,480 --> 00:02:01,039\n",
524 | "And never let go till\n",
525 | "\n",
526 | "58\n",
527 | "00:02:02,240 --> 00:02:02,799\n",
528 | "we're gone\n",
529 | "\n",
530 | "59\n",
531 | "00:02:02,800 --> 00:02:04,039\n",
532 | "we're gone\n",
533 | "\n",
534 | "62\n",
535 | "00:02:11,720 --> 00:02:13,119\n",
536 | "one true time\n",
537 | "\n",
538 | "65\n",
539 | "00:02:14,720 --> 00:02:15,239\n",
540 | "Tholdto\n",
541 | "\n",
542 | "66\n",
543 | "00:02:15,240 --> 00:02:15,839\n",
544 | "I holdto\n",
545 | "\n",
546 | "67\n",
547 | "00:02:17,640 --> 00:02:21,719\n",
548 | "Imy lifewe'll alwaysgo on\n",
549 | "\n",
550 | "68\n",
551 | "00:02:21,720 --> 00:02:22,639\n",
552 | "I my life we'll always go on\n",
553 | "\n",
554 | "69\n",
555 | "00:02:26,640 --> 00:02:27,239\n",
556 | "Near\n",
557 | "\n",
558 | "70\n",
559 | "00:02:27,240 --> 00:02:27,879\n",
560 | "Near\n",
561 | "\n",
562 | "72\n",
563 | "00:02:29,360 --> 00:02:30,119\n",
564 | "far\n",
565 | "\n",
566 | "74\n",
567 | "00:02:31,200 --> 00:02:31,599\n",
568 | "whereveryou are\n",
569 | "\n",
570 | "75\n",
571 | "00:02:31,600 --> 00:02:32,559\n",
572 | "wherever you-are\n",
573 | "\n",
574 | "76\n",
575 | "00:02:32,560 --> 00:02:33,439\n",
576 | "whereveryou are\n",
577 | "\n",
578 | "77\n",
579 | "00:02:33,440 --> 00:02:34,079\n",
580 | "whereveryou are\n",
581 | "\n",
582 | "78\n",
583 | "00:02:35,520 --> 00:02:37,159\n",
584 | "I believe that the heart does go\n",
585 | "on\n",
586 | "\n",
587 | "79\n",
588 | "00:02:37,160 --> 00:02:41,959\n",
589 | "I believe that the heart does go\n",
590 | "on\n",
591 | "\n",
592 | "80\n",
593 | "00:02:46,440 --> 00:02:47,039\n",
594 | "Once more\n",
595 | "\n",
596 | "81\n",
597 | "00:02:47,040 --> 00:02:48,199\n",
598 | "Oncemore\n",
599 | "\n",
600 | "82\n",
601 | "00:02:50,520 --> 00:02:50,879\n",
602 | "you openthe dooi\n",
603 | "\n",
604 | "83\n",
605 | "00:02:50,880 --> 00:02:53,279\n",
606 | "you open the door\n",
607 | "\n",
608 | "84\n",
609 | "00:02:54,840 --> 00:02:56,679\n",
610 | "And you're here in my heart\n",
611 | "\n",
612 | "85\n",
613 | "00:02:56,680 --> 00:02:57,519\n",
614 | "And you're here in my heart\n",
615 | "\n",
616 | "86\n",
617 | "00:02:57,520 --> 00:02:57,999\n",
618 | "And you're here in my heart\n",
619 | "\n",
620 | "87\n",
621 | "00:03:00,360 --> 00:03:00,919\n",
622 | "heartwill goonand\n",
623 | "\n",
624 | "88\n",
625 | "00:03:00,920 --> 00:03:01,519\n",
626 | "my heart will go on and\n",
627 | "on\n",
628 | "\n",
629 | "89\n",
630 | "00:03:01,560 --> 00:03:04,599\n",
631 | "my heart will go on and\n",
632 | "on\n",
633 | "on\n",
634 | "my heart will go on and\n",
635 | "\n",
636 | "90\n",
637 | "00:03:04,600 --> 00:03:05,879\n",
638 | "on\n",
639 | "\n",
640 | "91\n",
641 | "00:03:05,880 --> 00:03:06,759\n",
642 | "my heart will go on and\n",
643 | "on\n",
644 | "\n",
645 | "102\n",
646 | "00:03:24,920 --> 00:03:25,639\n",
647 | "You're here\n",
648 | "\n",
649 | "103\n",
650 | "00:03:25,640 --> 00:03:27,119\n",
651 | "You're here\n",
652 | "\n",
653 | "104\n",
654 | "00:03:27,120 --> 00:03:27,999\n",
655 | "You're here\n",
656 | "\n",
657 | "105\n",
658 | "00:03:29,520 --> 00:03:30,039\n",
659 | "there's nothing I fear\n",
660 | "\n",
661 | "106\n",
662 | "00:03:30,120 --> 00:03:30,759\n",
663 | "there's nothing Ifear\n",
664 | "\n",
665 | "107\n",
666 | "00:03:30,760 --> 00:03:31,639\n",
667 | "there's nothing Ifear\n",
668 | "\n",
669 | "108\n",
670 | "00:03:31,640 --> 00:03:31,959\n",
671 | "there's nothing I fear\n",
672 | "\n",
673 | "109\n",
674 | "00:03:31,960 --> 00:03:32,319\n",
675 | "there nothigIfear\n",
676 | "\n",
677 | "110\n",
678 | "00:03:33,680 --> 00:03:33,999\n",
679 | "AndIknow\n",
680 | "\n",
681 | "111\n",
682 | "00:03:34,000 --> 00:03:34,599\n",
683 | "AndIknow\n",
684 | "\n",
685 | "112\n",
686 | "00:03:34,600 --> 00:03:35,399\n",
687 | "AndIknow\n",
688 | "\n",
689 | "113\n",
690 | "00:03:35,520 --> 00:03:37,959\n",
691 | "that my heart will you go on\n",
692 | "\n",
693 | "114\n",
694 | "00:03:38,400 --> 00:03:38,879\n",
695 | "that my heart will you go on\n",
696 | "\n",
697 | "115\n",
698 | "00:03:38,880 --> 00:03:39,439\n",
699 | "that my heart will you go on\n",
700 | "\n",
701 | "116\n",
702 | "00:03:39,440 --> 00:03:39,919\n",
703 | "that my heart will you go on\n",
704 | "\n",
705 | "117\n",
706 | "00:03:40,160 --> 00:03:40,599\n",
707 | "that my heart will you go on\n",
708 | "\n",
709 | "118\n",
710 | "00:03:40,600 --> 00:03:40,919\n",
711 | "that my heart will you go on\n",
712 | "\n",
713 | "119\n",
714 | "00:03:40,920 --> 00:03:41,399\n",
715 | "that my heart will you go on\n",
716 | "\n",
717 | "120\n",
718 | "00:03:44,240 --> 00:03:44,679\n",
719 | "Weill stay foreverthsway\n",
720 | "\n",
721 | "121\n",
722 | "00:03:44,680 --> 00:03:44,999\n",
723 | "We'll stayforever this way\n",
724 | "\n",
725 | "122\n",
726 | "00:03:45,000 --> 00:03:49,239\n",
727 | "We'll stay forever this way\n",
728 | "\n",
729 | "123\n",
730 | "00:03:49,240 --> 00:03:50,799\n",
731 | "We'll stay forever this way\n",
732 | "\n",
733 | "124\n",
734 | "00:03:50,840 --> 00:03:51,199\n",
735 | "We'll stay forever this way\n",
736 | "\n",
737 | "125\n",
738 | "00:03:51,200 --> 00:03:51,599\n",
739 | "We'll stay forever this way\n",
740 | "\n",
741 | "126\n",
742 | "00:03:57,240 --> 00:03:57,919\n",
743 | "You are And in my heart\n",
744 | "\n",
745 | "127\n",
746 | "00:03:58,440 --> 00:03:59,199\n",
747 | "my heart will go on and\n",
748 | "on\n",
749 | "\n",
750 | "128\n",
751 | "00:03:59,200 --> 00:04:05,279\n",
752 | "my heart will go on and\n",
753 | "on\n",
754 | "\n",
755 | "129\n",
756 | "00:04:05,280 --> 00:04:06,919\n",
757 | "my heart will go on and\n",
758 | "on\n",
759 | "\n"
760 | ]
761 | }
762 | ],
763 | "source": [
764 | "!cat result/result.srt"
765 | ]
766 | },
767 | {
768 | "cell_type": "code",
769 | "execution_count": null,
770 | "metadata": {
771 | "id": "cNjpqvivs1ZA"
772 | },
773 | "outputs": [],
774 | "source": []
775 | }
776 | ],
777 | "metadata": {
778 | "colab": {
779 | "provenance": []
780 | },
781 | "kernelspec": {
782 | "display_name": "Python 3",
783 | "name": "python3"
784 | }
785 | },
786 | "nbformat": 4,
787 | "nbformat_minor": 0
788 | }
789 |
--------------------------------------------------------------------------------
/assets/colab-badge.svg:
--------------------------------------------------------------------------------
1 | Open in Colab Open in Colab
2 |
--------------------------------------------------------------------------------
/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/assets/logo.png
--------------------------------------------------------------------------------
/cliff.toml:
--------------------------------------------------------------------------------
1 | # git-cliff ~ configuration file
2 | # https://git-cliff.org/docs/configuration
3 |
4 | [changelog]
5 | # A Tera template to be rendered as the changelog's footer.
6 | # See https://keats.github.io/tera/docs/#introduction
7 | # header = """
8 | # # Changelog\n
9 | # All notable changes to this project will be documented in this file. See [conventional commits](https://www.conventionalcommits.org/) for commit guidelines.\n
10 | # """
11 | # A Tera template to be rendered for each release in the changelog.
12 | # See https://keats.github.io/tera/docs/#introduction
13 | body = """
14 | {% for group, commits in commits | group_by(attribute="group") %}
15 | ### {{ group | striptags | trim | upper_first }}
16 | {% for commit in commits
17 | | filter(attribute="scope")
18 | | sort(attribute="scope") %}
19 | - **({{commit.scope}})**{% if commit.breaking %} [**breaking**]{% endif %} \
20 | {{ commit.message }} by [@{{ commit.author.name }}](https://github.com/{{ commit.author.name }}) in [{{ commit.id | truncate(length=7, end="") }}]($REPO/commit/{{ commit.id }})
21 | {%- endfor -%}
22 | {% raw %}\n{% endraw %}\
23 | {%- for commit in commits %}
24 | {%- if commit.scope -%}
25 | {% else -%}
26 | - {% if commit.breaking %} [**breaking**]{% endif %}\
27 | {{ commit.message }} by [@{{ commit.author.name }}](https://github.com/{{ commit.author.name }}) in [{{ commit.id | truncate(length=7, end="") }}]($REPO/commit/{{ commit.id }})
28 | {% endif -%}
29 | {% endfor -%}
30 | {% endfor %}
31 |
32 |
33 | {% if github.contributors | length > 0 %}
34 | ### 🎉 Contributors
35 |
36 | {% for contributor in github.contributors %}
37 | - [@{{ contributor.username }}](https://github.com/{{ contributor.username }})
38 | {%- endfor -%}
39 | {% endif %}
40 |
41 |
42 | {% if version %}
43 | {% if previous.version %}\
44 | **Full Changelog**: [{{ version | trim_start_matches(pat="v") }}]($REPO/compare/{{ previous.version }}..{{ version }})
45 | {% else %}\
46 | **Full Changelog**: [{{ version | trim_start_matches(pat="v") }}]
47 | {% endif %}\
48 | {% else %}\
49 | ## [unreleased]
50 | {% endif %}
51 | """
52 | # A Tera template to be rendered as the changelog's footer.
53 | # See https://keats.github.io/tera/docs/#introduction
54 |
55 | footer = """
56 |
57 | """
58 |
59 | # Remove leading and trailing whitespaces from the changelog's body.
60 | trim = true
61 | # postprocessors
62 | postprocessors = [
63 | # Replace the placeholder `` with a URL.
64 | { pattern = '\$REPO', replace = "https://github.com/SWHL/RapidVideOCR" }, # replace repository URL
65 | ]
66 |
67 | [git]
68 | # Parse commits according to the conventional commits specification.
69 | # See https://www.conventionalcommits.org
70 | conventional_commits = true
71 | # Exclude commits that do not match the conventional commits specification.
72 | filter_unconventional = true
73 | # Split commits on newlines, treating each line as an individual commit.
74 | split_commits = false
75 | # An array of regex based parsers to modify commit messages prior to further processing.
76 | commit_preprocessors = [
77 | # Replace issue numbers with link templates to be updated in `changelog.postprocessors`.
78 | #{ pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](https://github.com/orhun/git-cliff/issues/${2}))"},
79 | ]
80 | # An array of regex based parsers for extracting data from the commit message.
81 | # Assigns commits to groups.
82 | # Optionally sets the commit's scope and can decide to exclude commits from further processing.
83 | commit_parsers = [
84 | { message = "^feat", group = "🚀 Features" },
85 | { message = "^fix", group = "🐛 Bug Fixes" },
86 | { message = "^doc", group = "📚 Documentation" },
87 | { message = "^perf", group = "⚡ Performance" },
88 | { message = "^refactor", group = "🚜 Refactor" },
89 | { message = "^style", group = "🎨 Styling" },
90 | { message = "^test", group = "🧪 Testing" },
91 | { message = "^chore\\(release\\): prepare for", skip = true },
92 | { message = "^chore\\(deps.*\\)", skip = true },
93 | { message = "^chore\\(pr\\)", skip = true },
94 | { message = "^chore\\(pull\\)", skip = true },
95 | { message = "^chore|^ci", group = "⚙️ Miscellaneous Tasks" },
96 | { body = ".*security", group = "🛡️ Security" },
97 | { message = "^revert", group = "◀️ Revert" },
98 | { message = ".*", group = "💼 Other" },
99 | ]
100 | # Exclude commits that are not matched by any commit parser.
101 | filter_commits = false
102 | # Order releases topologically instead of chronologically.
103 | topo_order = false
104 | # Order of commits in each group/release within the changelog.
105 | # Allowed values: newest, oldest
106 | sort_commits = "newest"
--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 |
5 | # 提取 + 识别
6 | from rapid_videocr import RapidVideOCRInput, RapidVideoSubFinderOCR, VideoSubFinderInput
7 |
8 | vsf_exe_path = (
9 | r"G:\ProgramFiles\VideoSubFinder_6.10_x64\Release_x64\VideoSubFinderWXW.exe"
10 | )
11 | vsf_input_params = VideoSubFinderInput(vsf_exe_path=vsf_exe_path)
12 | ocr_input_params = RapidVideOCRInput(is_batch_rec=False)
13 | vsf_ocr = RapidVideoSubFinderOCR(vsf_input_params, ocr_input_params)
14 |
15 | # video_path可以是目录或者具体video路径
16 | video_path = "test_files/tiny/2.mp4"
17 | save_dir = "outputs"
18 | vsf_ocr(video_path, save_dir)
19 |
20 |
21 | # # 只识别
22 | from rapid_videocr import RapidVideOCR, RapidVideOCRInput
23 |
24 | ocr_input_params = RapidVideOCRInput(is_batch_rec=False)
25 | extractor = RapidVideOCR(ocr_input_params)
26 |
27 | rgb_dir = "tests/test_files/RGBImages"
28 | save_dir = "outputs"
29 | save_name = "a"
30 |
31 | # outputs/a.srt outputs/a.ass outputs/a.txt
32 | extractor(rgb_dir, save_dir, save_name=save_name)
33 |
--------------------------------------------------------------------------------
/docs/README_zh.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | 简体中文 | [English](https://github.com/SWHL/RapidVideOCR)
17 |
18 |
19 | ### 简介
20 |
21 | - 视频硬字幕提取,自动生成对应`srt | ass | txt`文件。
22 | - 支持字幕语言:中文 | 英文 (其他可以支持的语言参见:[支持语种列表](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99))
23 | - 优势如下:
24 | - **提取更快**:与[VideoSubFinder](https://sourceforge.net/projects/videosubfinder/)软件结合使用,提取关键字幕帧更快。
25 | - **识别更准**:采用[RapidOCR](https://github.com/RapidAI/RapidOCR)作为识别库。
26 | - **使用更方便**:pip直接安装即可使用。
27 |
28 | - 桌面EXE版,请移步[RapidVideOCRDesktop](https://github.com/SWHL/RapidVideOCRDesktop)
29 | - 如果有帮助到您的话,请给个小星星⭐。
30 |
31 | ### [在线Demo](https://www.modelscope.cn/studios/liekkas/RapidVideOCR/summary)
32 |
33 |
34 |
35 |
36 |
37 | ### 整体框架
38 |
39 | ```mermaid
40 | flowchart LR
41 | A[/Video/] --Extract subtitle key frame--> B(VideoSubFinder) --OCR-->C(RapidVideOCR)
42 | C --Convert--> D[/"SRT | ASS | TXT"/]
43 | ```
44 |
45 | ### 安装
46 |
47 | ```bash
48 | pip install rapid_videocr
49 | ```
50 |
51 | ### 使用
52 |
53 | > [!NOTE]
54 | >
55 | > `rapid_videocr`输入图像路径必须是**VideoSubFinder**软件输出的RGBImages或TXTImages的路径。
56 |
57 | ```bash
58 | rapid_videocr -i RGBImages
59 | ```
60 |
61 | ### 文档
62 |
63 | 完整文档请移步:[docs](https://swhl.github.io/RapidVideOCR/docs)
64 |
65 | ### 贡献者
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 | ### 贡献指南
74 |
75 | 我们感谢所有的贡献者为改进和提升 RapidVideOCR 所作出的努力。
76 |
77 | - 欢迎提交请求。对于重大更改,请先打开issue讨论您想要改变的内容。
78 | - 请确保适当更新测试。
79 |
80 | ### 加入我们
81 |
82 | - 微信扫描以下二维码,关注**RapidAI公众号**,回复video即可加入RapidVideOCR微信交流群:
83 |
84 |
85 |
86 |
87 | - 扫码加入QQ群(706807542):
88 |
89 |
90 |
91 |
92 | ### [赞助](https://swhl.github.io/RapidVideOCR/docs/sponsor/)
93 |
94 | 如果您想要赞助该项目,可直接点击当前页最上面的Sponsor按钮,请写好备注(**您的Github账号名称**),方便添加到赞助列表中。
95 |
96 | ### 开源许可证
97 |
98 | 该项目采用 [Apache 2.0 license](../LICENSE) 开源许可证。
99 |
--------------------------------------------------------------------------------
/docs/doc_whl.md:
--------------------------------------------------------------------------------
1 | ### See [Documentation](https://swhl.github.io/RapidVideOCR/docs/)
2 |
--------------------------------------------------------------------------------
/rapid_videocr/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import RapidVideOCR, RapidVideOCRExeception, RapidVideOCRInput
5 | from .vsf_ocr_cli import RapidVideoSubFinderOCR
6 |
--------------------------------------------------------------------------------
/rapid_videocr/export.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from abc import ABC, abstractmethod
5 | from enum import Enum
6 | from pathlib import Path
7 | from typing import List
8 |
9 | from .utils.utils import write_txt
10 |
11 |
12 | class OutputFormat(Enum):
13 | TXT = "txt"
14 | SRT = "srt"
15 | ASS = "ass"
16 | ALL = "all"
17 |
18 |
19 | class ExportStrategy(ABC):
20 | @abstractmethod
21 | def export(
22 | self,
23 | save_dir: Path,
24 | save_name: str,
25 | srt_result: List[str],
26 | ass_result: List[str],
27 | txt_result: List[str],
28 | ):
29 | pass
30 |
31 |
32 | class TxtExportStrategy(ExportStrategy):
33 | def export(
34 | self,
35 | save_dir: Path,
36 | save_name: str,
37 | srt_result: List[str],
38 | ass_result: List[str],
39 | txt_result: List[str],
40 | ):
41 | file_path = save_dir / f"{save_name}.txt"
42 | write_txt(file_path, txt_result)
43 |
44 |
45 | class SrtExportStrategy(ExportStrategy):
46 | def export(
47 | self,
48 | save_dir: Path,
49 | save_name: str,
50 | srt_result: List[str],
51 | ass_result: List[str],
52 | txt_result: List[str],
53 | ):
54 | file_path = save_dir / f"{save_name}.srt"
55 | write_txt(file_path, srt_result)
56 |
57 |
58 | class AssExportStrategy(ExportStrategy):
59 | def export(
60 | self,
61 | save_dir: Path,
62 | save_name: str,
63 | srt_result: List[str], # unused here but kept for signature
64 | ass_result: List[str],
65 | txt_result: List[str],
66 | ):
67 | header = [
68 | "[Script Info]",
69 | "; Script generated by RapidVideOCR",
70 | "ScriptType: v4.00+",
71 | "PlayResX: 1920",
72 | "PlayResY: 1080",
73 | "",
74 | "[V4+ Styles]",
75 | "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, "
76 | "Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, "
77 | "Alignment, MarginL, MarginR, MarginV, Encoding",
78 | "Style: Default,Arial,54,&H00FFFFFF,&H0000FFFF,&H00000000,&H64000000,0,0,0,0,100,100,0,0,1,2,2,2,10,10,10,1",
79 | "",
80 | "[Events]",
81 | "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text",
82 | ]
83 |
84 | file_path = save_dir / f"{save_name}.ass"
85 | write_txt(file_path, header + [""] + ass_result)
86 |
87 |
88 | class AllExportStrategy(ExportStrategy):
89 | def export(
90 | self,
91 | save_dir: Path,
92 | save_name: str,
93 | srt_result: List[str],
94 | ass_result: List[str],
95 | txt_result: List[str],
96 | ):
97 | txt_export = TxtExportStrategy()
98 | srt_export = SrtExportStrategy()
99 | ass_export = AssExportStrategy()
100 |
101 | txt_export.export(save_dir, save_name, srt_result, ass_result, txt_result)
102 | srt_export.export(save_dir, save_name, srt_result, ass_result, txt_result)
103 | ass_export.export(save_dir, save_name, srt_result, ass_result, txt_result)
104 |
105 |
106 | class ExportStrategyFactory:
107 | @staticmethod
108 | def create_strategy(out_format: str = OutputFormat.ALL.value) -> ExportStrategy:
109 | strategies = {
110 | OutputFormat.TXT.value: TxtExportStrategy(),
111 | OutputFormat.SRT.value: SrtExportStrategy(),
112 | OutputFormat.ASS.value: AssExportStrategy(),
113 | OutputFormat.ALL.value: AllExportStrategy(),
114 | }
115 |
116 | if strategy := strategies.get(out_format):
117 | return strategy
118 | raise ValueError(f"Unsupported output format: {out_format}")
119 |
--------------------------------------------------------------------------------
/rapid_videocr/main.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import argparse
5 | from dataclasses import dataclass
6 | from pathlib import Path
7 | from typing import Any, Dict, List, Optional, Union
8 |
9 | from .export import ExportStrategyFactory, OutputFormat
10 | from .ocr_processor import OCRProcessor
11 | from .utils.crop_by_project import CropByProject
12 | from .utils.logger import Logger
13 | from .utils.utils import mkdir
14 |
15 | IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"}
16 |
17 |
18 | @dataclass
19 | class RapidVideOCRInput:
20 | is_batch_rec: bool = False
21 | batch_size: int = 10
22 | out_format: str = OutputFormat.ALL.value
23 | ocr_params: Optional[Dict[str, Any]] = None
24 |
25 |
26 | class RapidVideOCR:
27 | def __init__(self, input_params: RapidVideOCRInput):
28 | self.logger = Logger(logger_name=__name__).get_log()
29 |
30 | self.ocr_processor = OCRProcessor(
31 | input_params.ocr_params, input_params.batch_size
32 | )
33 |
34 | self.cropper = CropByProject()
35 |
36 | self.is_batch_rec = input_params.is_batch_rec
37 | self.out_format = input_params.out_format
38 |
39 | def __call__(
40 | self,
41 | vsf_dir: Union[str, Path],
42 | save_dir: Union[str, Path],
43 | save_name: str = "result",
44 | ) -> List[str]:
45 | vsf_dir = Path(vsf_dir)
46 | if not vsf_dir.exists():
47 | raise RapidVideOCRExeception(f"{vsf_dir} does not exist.")
48 |
49 | img_list = self.get_img_list(vsf_dir)
50 | srt_result, ass_result, txt_result = self.ocr_processor(
51 | img_list, self.is_batch_rec, self.is_txt_dir(vsf_dir)
52 | )
53 |
54 | self.export_file(Path(save_dir), save_name, srt_result, ass_result, txt_result)
55 | return txt_result
56 |
57 | def get_img_list(self, vsf_dir: Path) -> List[Path]:
58 | def get_sort_key(x: Path) -> int:
59 | return int("".join(str(x.stem).split("_")[:4]))
60 |
61 | img_list = []
62 | for v in vsf_dir.glob("*.*"):
63 | if not v.is_file():
64 | continue
65 |
66 | if v.suffix.lower() not in IMAGE_EXTENSIONS:
67 | continue
68 |
69 | img_list.append(v)
70 |
71 | if not img_list:
72 | raise RapidVideOCRExeception(f"{vsf_dir} does not have valid images")
73 |
74 | img_list = sorted(img_list, key=get_sort_key)
75 | return img_list
76 |
77 | @staticmethod
78 | def is_txt_dir(vsf_dir: Path) -> bool:
79 | return "TXTImages" in vsf_dir.name
80 |
81 | def export_file(
82 | self,
83 | save_dir: Path,
84 | save_name: str,
85 | srt_result: List[str],
86 | ass_result: List[str],
87 | txt_result: List[str],
88 | ):
89 | try:
90 | strategy = ExportStrategyFactory.create_strategy(self.out_format)
91 |
92 | mkdir(save_dir)
93 | strategy.export(save_dir, save_name, srt_result, ass_result, txt_result)
94 | self.logger.info("[OCR] Results saved to directory: %s", save_dir)
95 | except ValueError as e:
96 | self.logger.error("Export failed: %s", str(e))
97 | raise
98 |
99 | def print_console(self, txt_result: List):
100 | for v in txt_result:
101 | print(v.strip())
102 |
103 |
104 | class RapidVideOCRExeception(Exception):
105 | pass
106 |
107 |
108 | def main():
109 | parser = argparse.ArgumentParser()
110 | parser.add_argument(
111 | "-i",
112 | "--img_dir",
113 | type=str,
114 | required=True,
115 | help="The full path of RGBImages or TXTImages.",
116 | )
117 | parser.add_argument(
118 | "-s",
119 | "--save_dir",
120 | type=str,
121 | default="outputs",
122 | help='The path of saving the recognition result. Default is "outputs" under the current directory.',
123 | )
124 | parser.add_argument(
125 | "-f",
126 | "--file_name",
127 | type=str,
128 | default="result",
129 | help='The name of the resulting file name. Default is "result".',
130 | )
131 | parser.add_argument(
132 | "-o",
133 | "--out_format",
134 | type=str,
135 | default=OutputFormat.ALL.value,
136 | choices=[v.value for v in OutputFormat],
137 | help='Output file format. Default is "all".',
138 | )
139 | parser.add_argument(
140 | "--is_batch_rec",
141 | action="store_true",
142 | default=False,
143 | help="Which mode to run (concat recognition or single recognition). Default is False.",
144 | )
145 | parser.add_argument(
146 | "-b",
147 | "--batch_size",
148 | type=int,
149 | default=10,
150 | help="The batch of concating image nums in concat recognition mode. Default is 10.",
151 | )
152 | args = parser.parse_args()
153 |
154 | ocr_input_params = RapidVideOCRInput(
155 | is_batch_rec=args.is_batch_rec,
156 | batch_size=args.batch_size,
157 | out_format=args.out_format,
158 | )
159 | extractor = RapidVideOCR(ocr_input_params)
160 | extractor(args.img_dir, args.save_dir, args.file_name)
161 |
162 |
163 | if __name__ == "__main__":
164 | main()
165 |
--------------------------------------------------------------------------------
/rapid_videocr/ocr_processor.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from pathlib import Path
5 | from typing import Dict, List, Optional, Tuple
6 |
7 | import cv2
8 | import numpy as np
9 | from rapidocr import RapidOCR
10 | from tqdm import tqdm
11 |
12 | from .utils.logger import Logger
13 | from .utils.utils import (
14 | compute_centroid,
15 | compute_poly_iou,
16 | is_inclusive_each_other,
17 | padding_img,
18 | read_img,
19 | )
20 |
21 |
22 | class OCRProcessor:
23 | def __init__(self, ocr_params: Optional[Dict] = None, batch_size: int = 10):
24 | self.logger = Logger(logger_name=__name__).get_log()
25 | self.ocr_engine = self._init_ocr_engine(ocr_params)
26 | self.batch_size = batch_size
27 |
28 | def _init_ocr_engine(self, ocr_params: Optional[Dict] = None) -> RapidOCR:
29 | return RapidOCR(params=ocr_params)
30 |
31 | def __call__(
32 | self, img_list: List[Path], is_batch_rec: bool, is_txt_dir: bool
33 | ) -> Tuple[List[str], List[str], List[str]]:
34 | self.is_txt_dir = is_txt_dir
35 | process_func = self.batch_rec if is_batch_rec else self.single_rec
36 | rec_results = process_func(img_list)
37 | srt_results = self._generate_srt_results(rec_results)
38 | ass_results = self._generate_ass_results(rec_results)
39 | txt_results = self._generate_txt_result(rec_results)
40 | return srt_results, ass_results, txt_results
41 |
42 | def single_rec(self, img_list: List[Path]) -> List[Tuple[int, str, str, str]]:
43 | self.logger.info("[OCR] Running with single recognition.")
44 |
45 | rec_results = []
46 | for i, img_path in enumerate(tqdm(img_list, desc="OCR")):
47 | time_str = self._get_srt_timestamp(img_path)
48 | ass_time_str = self._get_ass_timestamp(img_path)
49 | img = self._preprocess_image(img_path)
50 |
51 | dt_boxes, rec_res = self.get_ocr_result(img)
52 | txts = (
53 | self.process_same_line(dt_boxes, rec_res)
54 | if dt_boxes is not None
55 | else ""
56 | )
57 | rec_results.append([i, time_str, txts, ass_time_str])
58 | return rec_results
59 |
60 | @staticmethod
61 | def _get_srt_timestamp(file_path: Path) -> str:
62 | """0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg"""
63 |
64 | def format_time(time_parts):
65 | time_parts[0] = f"{time_parts[0]:0>2}"
66 | return ":".join(time_parts[:3]) + f",{time_parts[3]}"
67 |
68 | split_paths = file_path.stem.split("_")
69 | start_time = split_paths[:4]
70 | end_time = split_paths[5:9]
71 | return f"{format_time(start_time)} --> {format_time(end_time)}"
72 |
73 | @staticmethod
74 | def _get_ass_timestamp(file_path: Path) -> str:
75 | s = file_path.stem
76 |
77 | h1 = int(s[0:1])
78 | m1 = int(s[2:4])
79 | sec1 = int(s[5:7])
80 | ms1 = int(s[8:11])
81 |
82 | h2 = int(s[13:14])
83 | m2 = int(s[15:17])
84 | sec2 = int(s[18:20])
85 | ms2 = int(s[21:24])
86 |
87 | # compute absolute times in milliseconds
88 | bt = (h1 * 3600 + m1 * 60 + sec1) * 1000 + ms1
89 | et = (h2 * 3600 + m2 * 60 + sec2) * 1000 + ms2
90 |
91 | def to_ass(ts_ms: int) -> str:
92 | # centiseconds (drop the last digit, no rounding)
93 | cs_total = ts_ms // 10
94 | cs = cs_total % 100
95 | total_s = ts_ms // 1000
96 | s = total_s % 60
97 | total_m = total_s // 60
98 | m = total_m % 60
99 | h = total_m // 60
100 | # H:MM:SS.CC
101 | return f"{h}:{m:02d}:{s:02d}.{cs:02d}"
102 |
103 | return f"{to_ass(bt)},{to_ass(et)}"
104 |
105 | @staticmethod
106 | def _preprocess_image(img_path: Path) -> np.ndarray:
107 | img = read_img(img_path)
108 | img = padding_img(img, (img.shape[0], img.shape[0], 0, 0))
109 | return img
110 |
111 | @staticmethod
112 | def _generate_srt_results(rec_results: List[Tuple[int, str, str, str]]) -> List[str]:
113 | return [f"{i+1}\n{time_str}\n{txt}\n" for i, time_str, txt, _ in rec_results]
114 |
115 | @staticmethod
116 | def _generate_ass_results(rec_results: List[Tuple[int, str, str, str]]) -> List[str]:
117 | return [f"Dialogue: 0,{ass_time_str},Default,,0,0,0,,{txt}" for _, _, txt, ass_time_str in rec_results]
118 |
119 | @staticmethod
120 | def _generate_txt_result(rec_results: List[Tuple[int, str, str, str]]) -> List[str]:
121 | return [f"{txt}\n" for _, _, txt, _ in rec_results]
122 |
123 | def batch_rec(self, img_list: List[Path]) -> List[Tuple[int, str, str, str]]:
124 | self.logger.info("[OCR] Running with concat recognition.")
125 |
126 | img_nums = len(img_list)
127 | rec_results = []
128 | for start_i in tqdm(range(0, img_nums, self.batch_size), desc="Concat Rec"):
129 | end_i = min(img_nums, start_i + self.batch_size)
130 |
131 | concat_img, img_coordinates, img_paths = self._prepare_batch(
132 | img_list[start_i:end_i]
133 | )
134 | dt_boxes, rec_res = self.get_ocr_result(concat_img)
135 | if rec_res is None or dt_boxes is None:
136 | continue
137 |
138 | one_batch_rec_results = self._process_batch_results(
139 | start_i, img_coordinates, dt_boxes, rec_res, img_paths
140 | )
141 | rec_results.extend(one_batch_rec_results)
142 | return rec_results
143 |
144 | def _prepare_batch(
145 | self, img_list: List[Path]
146 | ) -> Tuple[np.ndarray, np.ndarray, List[Path]]:
147 | padding_value = 10
148 | array_img_list, img_coordinates = [], []
149 | for i, img_path in enumerate(img_list):
150 | img = read_img(img_path)
151 | if self.is_txt_dir:
152 | img = cv2.resize(img, None, fx=0.25, fy=0.25)
153 |
154 | pad_img = padding_img(img, (0, padding_value, 0, 0))
155 | array_img_list.append(pad_img)
156 |
157 | h, w = img.shape[:2]
158 | x0, y0 = 0, i * (h + padding_value)
159 | x1, y1 = w, (i + 1) * (h + padding_value)
160 | img_coordinates.append([(x0, y0), (x1, y0), (x1, y1), (x0, y1)])
161 |
162 | return np.vstack(array_img_list), np.array(img_coordinates), img_list
163 |
164 | def _process_batch_results(
165 | self,
166 | start_i: int,
167 | img_coordinates: np.ndarray,
168 | dt_boxes: np.ndarray,
169 | rec_res: Tuple[str],
170 | img_paths: List[Path],
171 | ) -> List[Tuple[int, str, str, str]]:
172 | match_dict = self._match_boxes_to_images(
173 | img_coordinates, dt_boxes, rec_res, img_paths
174 | )
175 |
176 | results = []
177 | for k, v in match_dict.items():
178 | cur_frame_idx = start_i + k
179 | if v:
180 | img_path, boxes, recs = list(zip(*v))
181 | time_str = self._get_srt_timestamp(img_path[0])
182 | ass_time_str = self._get_ass_timestamp(img_path[0])
183 | txts = self.process_same_line(boxes, recs)
184 | else:
185 | time_str = self._get_srt_timestamp(img_paths[k])
186 | ass_time_str = self._get_ass_timestamp(img_paths[k])
187 | txts = ""
188 |
189 | results.append([cur_frame_idx, time_str, txts, ass_time_str])
190 | return results
191 |
192 | def _match_boxes_to_images(
193 | self,
194 | img_coordinates: np.ndarray,
195 | dt_boxes: np.ndarray,
196 | rec_res: List[str],
197 | img_paths: List[Path],
198 | ) -> Dict[int, List[Tuple[Path, np.ndarray, str]]]:
199 | """将检测框匹配到对应图像"""
200 | match_dict = {k: [] for k in range(len(img_coordinates))}
201 | visited_idx = set()
202 |
203 | for i, frame_boxes in enumerate(img_coordinates):
204 | for idx, (dt_box, txt) in enumerate(zip(dt_boxes, rec_res)):
205 | if idx in visited_idx:
206 | continue
207 |
208 | if self._is_box_matched(frame_boxes, dt_box):
209 | match_dict[i].append((img_paths[i], dt_box, txt))
210 | visited_idx.add(idx)
211 |
212 | return match_dict
213 |
214 | def _is_box_matched(self, frame_boxes: np.ndarray, dt_box: np.ndarray) -> bool:
215 | """判断检测框是否匹配到图像"""
216 | box_iou = compute_poly_iou(frame_boxes, dt_box)
217 | return is_inclusive_each_other(frame_boxes, dt_box) or box_iou > 0.1
218 |
219 | def get_ocr_result(
220 | self, img: np.ndarray
221 | ) -> Tuple[Optional[np.ndarray], Optional[Tuple[str]]]:
222 | ocr_result = self.ocr_engine(img)
223 | if ocr_result.boxes is None:
224 | return None, None
225 | return ocr_result.boxes, ocr_result.txts
226 |
227 | def process_same_line(self, dt_boxes: np.ndarray, rec_res: List[str]) -> str:
228 | if len(rec_res) == 1:
229 | return rec_res[0]
230 |
231 | y_centroids = [compute_centroid(box)[1] for box in dt_boxes]
232 | line_groups = self._group_by_lines(y_centroids)
233 | return self._merge_line_text(line_groups, rec_res)
234 |
235 | def _group_by_lines(self, y_centroids: List[float]) -> List[List[int]]:
236 | """将文本框按行分组"""
237 |
238 | @staticmethod
239 | def is_same_line(points: List) -> List[bool]:
240 | threshold = 5
241 |
242 | align_points = list(zip(points, points[1:]))
243 | bool_res = [False] * len(align_points)
244 | for i, point in enumerate(align_points):
245 | y0, y1 = point
246 | if abs(y0 - y1) <= threshold:
247 | bool_res[i] = True
248 | return bool_res
249 |
250 | bool_res = is_same_line(y_centroids)
251 | groups = []
252 | current_group = [0]
253 | for i, is_same in enumerate(bool_res, 1):
254 | if is_same:
255 | current_group.append(i)
256 | else:
257 | groups.append(current_group)
258 | current_group = [i]
259 |
260 | groups.append(current_group)
261 | return groups
262 |
263 | def _merge_line_text(self, line_groups: List[List[int]], rec_res: List[str]) -> str:
264 | lines = []
265 | for group in line_groups:
266 | line_text = " ".join(rec_res[i] for i in group)
267 | lines.append(line_text)
268 | return "\n".join(lines)
269 |
--------------------------------------------------------------------------------
/rapid_videocr/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .logger import Logger
5 |
--------------------------------------------------------------------------------
/rapid_videocr/utils/crop_by_project.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import cv2
5 | import numpy as np
6 |
7 |
8 | class CropByProject:
9 | """投影法裁剪"""
10 |
11 | def __init__(self, threshold=250):
12 | self.threshold = threshold
13 |
14 | def __call__(self, origin_img):
15 | image = cv2.cvtColor(origin_img, cv2.COLOR_BGR2GRAY)
16 |
17 | # 将图片二值化
18 | retval, img = cv2.threshold(image, self.threshold, 255, cv2.THRESH_BINARY_INV)
19 |
20 | # 使文字增长成块
21 | closed = cv2.dilate(img, None, iterations=1)
22 |
23 | # 水平投影
24 | x0, x1 = self.get_project_loc(closed, direction="width")
25 |
26 | # 竖直投影
27 | y0, y1 = self.get_project_loc(closed, direction="height")
28 |
29 | return origin_img[y0:y1, x0:x1]
30 |
31 | @staticmethod
32 | def get_project_loc(img, direction):
33 | """获得裁剪的起始和终点索引位置
34 | Args:
35 | img (ndarray): 二值化后得到的图像
36 | direction (str): 'width/height'
37 | Raises:
38 | ValueError: 不支持的求和方向
39 | Returns:
40 | tuple: 起始索引位置
41 | """
42 | if direction == "width":
43 | axis = 0
44 | elif direction == "height":
45 | axis = 1
46 | else:
47 | raise ValueError(f"direction {direction} is not supported!")
48 |
49 | loc_sum = np.sum(img == 255, axis=axis)
50 | loc_range = np.argwhere(loc_sum > 0)
51 | i0, i1 = loc_range[0][0], loc_range[-1][0]
52 | return i0, i1
53 |
--------------------------------------------------------------------------------
/rapid_videocr/utils/logger.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import logging
5 |
6 | import colorlog
7 |
8 |
9 | class Logger:
10 | def __init__(self, log_level=logging.DEBUG, logger_name=None):
11 | self.logger = logging.getLogger(logger_name)
12 | self.logger.setLevel(log_level)
13 | self.logger.propagate = False
14 |
15 | formatter = colorlog.ColoredFormatter(
16 | "%(log_color)s[%(levelname)s] %(asctime)s [RapidVideOCR] %(filename)s:%(lineno)d: %(message)s",
17 | log_colors={
18 | "DEBUG": "cyan",
19 | "INFO": "green",
20 | "WARNING": "yellow",
21 | "ERROR": "red",
22 | "CRITICAL": "red,bg_white",
23 | },
24 | )
25 |
26 | if not self.logger.handlers:
27 | console_handler = logging.StreamHandler()
28 | console_handler.setFormatter(formatter)
29 |
30 | for handler in self.logger.handlers:
31 | self.logger.removeHandler(handler)
32 |
33 | console_handler.setLevel(log_level)
34 | self.logger.addHandler(console_handler)
35 |
36 | def get_log(self):
37 | return self.logger
38 |
--------------------------------------------------------------------------------
/rapid_videocr/utils/utils.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import argparse
5 | from pathlib import Path
6 | from typing import List, Tuple, Union
7 |
8 | import cv2
9 | import numpy as np
10 | import shapely
11 | from shapely.geometry import MultiPoint, Polygon
12 |
13 |
14 | def compute_centroid(points: np.ndarray) -> List:
15 | """计算所给框的质心坐标
16 |
17 | :param points ([type]): (4, 2)
18 | :return: [description]
19 | """
20 | x_min, x_max = np.min(points[:, 0]), np.max(points[:, 0])
21 | y_min, y_max = np.min(points[:, 1]), np.max(points[:, 1])
22 | return [(x_min + x_max) / 2, (y_min + y_max) / 2]
23 |
24 |
25 | def write_txt(
26 | save_path: Union[str, Path], contents: Union[List[str], str], mode: str = "w"
27 | ) -> None:
28 | if not isinstance(contents, list):
29 | contents = [contents]
30 |
31 | with open(save_path, mode, encoding="utf-8") as f:
32 | for value in contents:
33 | f.write(f"{value}\n")
34 |
35 |
36 | def read_img(img_path: Union[str, Path]) -> np.ndarray:
37 | img = cv2.imdecode(np.fromfile(str(img_path), dtype=np.uint8), 1)
38 | return img
39 |
40 |
41 | def padding_img(
42 | img: np.ndarray,
43 | padding_value: Tuple[int, int, int, int],
44 | padding_color: Tuple[int, int, int] = (0, 0, 0),
45 | ) -> np.ndarray:
46 | padded_img = cv2.copyMakeBorder(
47 | img,
48 | padding_value[0],
49 | padding_value[1],
50 | padding_value[2],
51 | padding_value[3],
52 | cv2.BORDER_CONSTANT,
53 | value=padding_color,
54 | )
55 | return padded_img
56 |
57 |
58 | def mkdir(dir_path):
59 | Path(dir_path).mkdir(parents=True, exist_ok=True)
60 |
61 |
62 | def read_txt(txt_path: Union[str, Path]) -> List[str]:
63 | if not isinstance(txt_path, str):
64 | txt_path = str(txt_path)
65 |
66 | with open(txt_path, "r", encoding="utf-8") as f:
67 | data = list(map(lambda x: x.rstrip("\n"), f))
68 | return data
69 |
70 |
71 | def compute_poly_iou(a: np.ndarray, b: np.ndarray) -> float:
72 | """计算两个多边形的IOU
73 |
74 | Args:
75 | poly1 (np.ndarray): (4, 2)
76 | poly2 (np.ndarray): (4, 2)
77 |
78 | Returns:
79 | float: iou
80 | """
81 | poly1 = Polygon(a).convex_hull
82 | poly2 = Polygon(b).convex_hull
83 |
84 | union_poly = np.concatenate((a, b))
85 |
86 | if not poly1.intersects(poly2):
87 | return 0.0
88 |
89 | try:
90 | inter_area = poly1.intersection(poly2).area
91 | union_area = MultiPoint(union_poly).convex_hull.area
92 | except shapely.geos.TopologicalError:
93 | print("shapely.geos.TopologicalError occured, iou set to 0")
94 | return 0.0
95 |
96 | if union_area == 0:
97 | return 0.0
98 |
99 | return float(inter_area) / union_area
100 |
101 |
102 | def is_inclusive_each_other(box1: np.ndarray, box2: np.ndarray) -> bool:
103 | """判断两个多边形框是否存在包含关系
104 |
105 | Args:
106 | box1 (np.ndarray): (4, 2)
107 | box2 (np.ndarray): (4, 2)
108 |
109 | Returns:
110 | bool: 是否存在包含关系
111 | """
112 | poly1 = Polygon(box1)
113 | poly2 = Polygon(box2)
114 |
115 | poly1_area = poly1.convex_hull.area
116 | poly2_area = poly2.convex_hull.area
117 |
118 | if poly1_area > poly2_area:
119 | box_max = box1
120 | box_min = box2
121 | else:
122 | box_max = box2
123 | box_min = box1
124 |
125 | x0, y0 = np.min(box_min[:, 0]), np.min(box_min[:, 1])
126 | x1, y1 = np.max(box_min[:, 0]), np.max(box_min[:, 1])
127 |
128 | edge_x0, edge_y0 = np.min(box_max[:, 0]), np.min(box_max[:, 1])
129 | edge_x1, edge_y1 = np.max(box_max[:, 0]), np.max(box_max[:, 1])
130 |
131 | if x0 >= edge_x0 and y0 >= edge_y0 and x1 <= edge_x1 and y1 <= edge_y1:
132 | return True
133 | return False
134 |
135 |
136 | def float_range(mini, maxi):
137 | """Return function handle of an argument type function for
138 | ArgumentParser checking a float range: mini <= arg <= maxi
139 | mini - minimum acceptable argument
140 | maxi - maximum acceptable argument"""
141 |
142 | # Define the function with default arguments
143 | def float_range_checker(arg):
144 | """New Type function for argparse - a float within predefined range."""
145 |
146 | try:
147 | f = float(arg)
148 | except ValueError as exc:
149 | raise argparse.ArgumentTypeError("must be a floating point number") from exc
150 |
151 | if f < mini or f > maxi:
152 | raise argparse.ArgumentTypeError(
153 | "must be in range [" + str(mini) + " .. " + str(maxi) + "]"
154 | )
155 | return f
156 |
157 | # Return function handle to checking function
158 | return float_range_checker
159 |
--------------------------------------------------------------------------------
/rapid_videocr/vsf_cli.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import subprocess
5 | from dataclasses import asdict, dataclass
6 | from typing import Optional
7 |
8 |
9 | @dataclass
10 | class VideoSubFinderInput:
11 | vsf_exe_path: str
12 | clear_dirs: bool = True
13 | run_search: bool = True
14 | create_cleared_text_images: bool = True
15 | create_empty_sub: Optional[str] = None
16 | create_sub_from_cleared_txt_images: Optional[str] = None
17 | create_sub_from_txt_results: Optional[str] = None
18 | open_video_opencv: bool = True
19 | open_video_ffmpeg: bool = False
20 | use_cuda: bool = False
21 | start_time: Optional[str] = None
22 | end_time: Optional[str] = None
23 | top_video_image_percent_end: float = 0.2
24 | bottom_video_image_percent_end: float = 0.0
25 | left_video_image_percent_end: float = 0.0
26 | right_video_image_percent_end: float = 1.0
27 | general_settings: Optional[str] = None
28 | num_threads: int = 2
29 | num_ocr_threads: int = 1
30 |
31 |
32 | class VideoSubFinder:
33 | def __init__(self, input_params: VideoSubFinderInput):
34 | param_dict = asdict(input_params)
35 | run_list = [input_params.vsf_exe_path]
36 | for k, v in param_dict.items():
37 | if v is None or str(v) == "False":
38 | continue
39 |
40 | run_list.append(f"--{str(k)}" if str(v) == "True" else f"--{k} {v}")
41 | self.run_list = run_list
42 |
43 | def __call__(self, video_path: str, output_dir: str) -> str:
44 | self.run_list.extend(["--input_video", video_path, "--output_dir", output_dir])
45 | try:
46 | subprocess.run(self.run_list, check=False)
47 | return output_dir
48 | except Exception as e:
49 | raise e
50 |
--------------------------------------------------------------------------------
/rapid_videocr/vsf_ocr_cli.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import argparse
5 | from enum import Enum
6 | from pathlib import Path
7 |
8 | from .main import OutputFormat, RapidVideOCR, RapidVideOCRInput
9 | from .utils.logger import Logger
10 | from .utils.utils import float_range
11 | from .vsf_cli import VideoSubFinder, VideoSubFinderInput
12 |
13 |
14 | class VideoFormat(Enum):
15 | MP4 = ".mp4"
16 | AVI = ".avi"
17 | MOV = ".mov"
18 | MKV = ".mkv"
19 |
20 |
21 | class RapidVideoSubFinderOCR:
22 | def __init__(
23 | self,
24 | vsf_input_params: VideoSubFinderInput,
25 | ocr_input_params: RapidVideOCRInput,
26 | ):
27 | self.logger = Logger(logger_name=__name__).get_log()
28 | self.vsf = VideoSubFinder(vsf_input_params)
29 | self.video_ocr = RapidVideOCR(ocr_input_params)
30 | self.video_formats = [VideoFormat[v].value for v in VideoFormat.__members__]
31 |
32 | def __call__(self, video_path: str, output_dir: str = "outputs"):
33 | if Path(video_path).is_dir():
34 | video_list = Path(video_path).rglob("*.*")
35 | video_list = [
36 | v for v in video_list if v.suffix.lower() in self.video_formats
37 | ]
38 | else:
39 | video_list = [video_path]
40 |
41 | self.logger.info(
42 | "Extracting subtitle images with VideoSubFinder (takes quite a long time) ..."
43 | )
44 | video_num = len(video_list)
45 | for i, one_video in enumerate(video_list):
46 | self.logger.info(
47 | "[%s/%s] Starting to extract %s key frame", i + 1, video_num, one_video
48 | )
49 |
50 | save_name = Path(one_video).stem
51 | save_dir = Path(output_dir) / save_name
52 | save_vsf_dir = save_dir / "VSF_Results"
53 |
54 | try:
55 | self.vsf(str(one_video), str(save_vsf_dir))
56 | except Exception as e:
57 | self.logger.error("Extract %s error, %s, skip", one_video, e)
58 | continue
59 |
60 | self.logger.info(
61 | "[%s/%s] Starting to run %s ocr", i + 1, video_num, one_video
62 | )
63 |
64 | rgb_dir = Path(save_vsf_dir) / "RGBImages"
65 | if not list(rgb_dir.iterdir()):
66 | self.logger.warning("Extracting frames from %s is 0, skip", one_video)
67 | continue
68 | self.video_ocr(rgb_dir, save_dir, save_name=save_name)
69 |
70 |
71 | def main():
72 | parser = argparse.ArgumentParser()
73 |
74 | videocr_param_group = parser.add_argument_group(title="VideOCRParameters")
75 | videocr_param_group.add_argument(
76 | "-video_dir",
77 | "--video_dir",
78 | type=str,
79 | default=None,
80 | help="The full path of video or the path of video directory.",
81 | )
82 | videocr_param_group.add_argument(
83 | "-i",
84 | "--img_dir",
85 | type=str,
86 | default=None,
87 | help="The full path of RGBImages or TXTImages.",
88 | )
89 | videocr_param_group.add_argument(
90 | "-s",
91 | "--save_dir",
92 | type=str,
93 | default="outputs",
94 | help='The path of saving the recognition result. Default is "outputs" under the current directory.',
95 | )
96 | videocr_param_group.add_argument(
97 | "-o",
98 | "--out_format",
99 | type=str,
100 | default=OutputFormat.ALL.value,
101 | choices=[OutputFormat[v].value for v in OutputFormat.__members__],
102 | help='Output file format. Default is "all".',
103 | )
104 | videocr_param_group.add_argument(
105 | "--is_batch_rec",
106 | action="store_true",
107 | default=False,
108 | help="Which mode to run (concat recognition or single recognition). Default is False.",
109 | )
110 | videocr_param_group.add_argument(
111 | "-b",
112 | "--batch_size",
113 | type=int,
114 | default=10,
115 | help="The batch of concating image nums in concat recognition mode. Default is 10.",
116 | )
117 |
118 | vsf_param_group = parser.add_argument_group(title="VSFParameters")
119 | vsf_param_group.add_argument(
120 | "-vsf",
121 | "--vsf_exe_path",
122 | type=str,
123 | default=None,
124 | help="The full path of VideoSubFinderWXW.exe.",
125 | )
126 | vsf_param_group.add_argument(
127 | "-c",
128 | "--clear_dirs",
129 | action="store_false",
130 | default=True,
131 | help="Clear Folders (remove all images), performed before any other steps. Default is True",
132 | )
133 | vsf_param_group.add_argument(
134 | "-r",
135 | "--run_search",
136 | action="store_false",
137 | default=True,
138 | help="Run Search (find frames with hardcoded text (hardsub) on video) Default is True",
139 | )
140 | vsf_param_group.add_argument(
141 | "-ccti",
142 | "--create_cleared_text_images",
143 | action="store_true",
144 | default=False,
145 | help="Create Cleared Text Images. Default is True",
146 | )
147 | vsf_param_group.add_argument(
148 | "-ces",
149 | "--create_empty_sub",
150 | type=str,
151 | default=None,
152 | help="Create Empty Sub With Provided Output File Name (*.ass or *.srt)",
153 | )
154 | vsf_param_group.add_argument(
155 | "-cscti",
156 | "--create_sub_from_cleared_txt_images",
157 | type=str,
158 | default=None,
159 | help="Create Sub From Cleared TXT Images With Provided Output File Name (*.ass or *.srt)",
160 | )
161 | vsf_param_group.add_argument(
162 | "-cstxt",
163 | "--create_sub_from_txt_results",
164 | type=str,
165 | default=None,
166 | help="Create Sub From TXT Results With Provided Output File Name (*.ass or *.srt)",
167 | )
168 | vsf_param_group.add_argument(
169 | "-ovocv",
170 | "--open_video_opencv",
171 | action="store_false",
172 | default=True,
173 | help="open video by OpenCV (default). Default is True",
174 | )
175 | vsf_param_group.add_argument(
176 | "-ovffmpeg",
177 | "--open_video_ffmpeg",
178 | action="store_true",
179 | default=False,
180 | help="open video by FFMPEG",
181 | )
182 | vsf_param_group.add_argument(
183 | "-uc", "--use_cuda", action="store_true", default=False, help="use cuda"
184 | )
185 | vsf_param_group.add_argument(
186 | "--start_time",
187 | type=str,
188 | default="0:00:00:000",
189 | help="start time, default = 0:00:00:000 (in format hour:min:sec:milisec)",
190 | )
191 | vsf_param_group.add_argument(
192 | "--end_time",
193 | type=str,
194 | default=None,
195 | help="end time, default = video length",
196 | )
197 | vsf_param_group.add_argument(
198 | "-te",
199 | "--top_video_image_percent_end",
200 | type=float_range(0, 1.0),
201 | default=0.2,
202 | help="top video image percent offset from image bottom, can be in range [0.0,1.0], default = 1.0",
203 | )
204 | vsf_param_group.add_argument(
205 | "-be",
206 | "--bottom_video_image_percent_end",
207 | type=float_range(0, 1.0),
208 | default=0.0,
209 | help="bottom video image percent offset from image bottom, can be in range [0.0,1.0], default = 0.0",
210 | )
211 | vsf_param_group.add_argument(
212 | "-le",
213 | "--left_video_image_percent_end",
214 | type=float_range(0, 1.0),
215 | default=0.0,
216 | help="left video image percent end, can be in range [0.0,1.0], default = 0.0",
217 | )
218 | vsf_param_group.add_argument(
219 | "-re",
220 | "--right_video_image_percent_end",
221 | type=float_range(0, 1.0),
222 | default=1.0,
223 | help="right video image percent end, can be in range [0.0,1.0], default = 1.0",
224 | )
225 | vsf_param_group.add_argument(
226 | "-gs",
227 | "--general_settings",
228 | default=None,
229 | help="general settings (path to general settings *.cfg file, default = settings/general.cfg)",
230 | )
231 | vsf_param_group.add_argument(
232 | "-nthr",
233 | "--num_threads",
234 | type=int,
235 | default=1,
236 | help="number of threads used for Run Search",
237 | )
238 | vsf_param_group.add_argument(
239 | "-nocrthr",
240 | "--num_ocr_threads",
241 | type=int,
242 | default=1,
243 | help="number of threads used for Create Cleared TXT Images",
244 | )
245 | args = parser.parse_args()
246 |
247 | ocr_input_params = RapidVideOCRInput(
248 | is_batch_rec=args.is_batch_rec,
249 | batch_size=args.batch_size,
250 | out_format=args.out_format,
251 | )
252 |
253 | if args.vsf_exe_path and args.video_dir:
254 | vsf_input_params = VideoSubFinderInput(**vars(args))
255 | extractor = RapidVideoSubFinderOCR(vsf_input_params, ocr_input_params)
256 | extractor(args.video_dir, args.save_dir)
257 | elif args.img_dir:
258 | extractor = RapidVideOCR(ocr_input_params)
259 | extractor(args.img_dir, args.save_dir)
260 | else:
261 | pass
262 |
263 |
264 | if __name__ == "__main__":
265 | main()
266 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tqdm
2 | rapidocr
3 | onnxruntime
4 | colorlog
5 | tqdm
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import sys
5 | from pathlib import Path
6 | from typing import List
7 |
8 | import setuptools
9 | from get_pypi_latest_version import GetPyPiLatestVersion
10 |
11 |
12 | def read_txt(txt_path: str) -> List:
13 | if not isinstance(txt_path, str):
14 | txt_path = str(txt_path)
15 |
16 | with open(txt_path, "r", encoding="utf-8") as f:
17 | data = list(map(lambda x: x.rstrip("\n"), f))
18 | return data
19 |
20 |
21 | def get_readme() -> str:
22 | root_dir = Path(__file__).resolve().parent
23 | readme_path = str(root_dir / "docs" / "doc_whl.md")
24 | with open(readme_path, "r", encoding="utf-8") as f:
25 | readme = f.read()
26 | return readme
27 |
28 |
29 | MODULE_NAME = "rapid_videocr"
30 |
31 | obtainer = GetPyPiLatestVersion()
32 | latest_version = obtainer(MODULE_NAME)
33 | VERSION_NUM = obtainer.version_add_one(latest_version)
34 |
35 | # 优先提取commit message中的语义化版本号,如无,则自动加1
36 | if len(sys.argv) > 2:
37 | match_str = " ".join(sys.argv[2:])
38 | matched_versions = obtainer.extract_version(match_str)
39 | if matched_versions:
40 | VERSION_NUM = matched_versions
41 | sys.argv = sys.argv[:2]
42 |
43 | setuptools.setup(
44 | name=MODULE_NAME,
45 | version=VERSION_NUM,
46 | platforms="Any",
47 | description="Tool for extracting hard subtitles from videos.",
48 | long_description=get_readme(),
49 | long_description_content_type="text/markdown",
50 | author="SWHL",
51 | author_email="liekkaskono@163.com",
52 | url="https://github.com/SWHL/RapidVideOCR.git",
53 | license="Apache-2.0",
54 | include_package_data=True,
55 | install_requires=read_txt("requirements.txt"),
56 | packages=setuptools.find_packages(),
57 | keywords=["rapidocr,videocr,subtitle"],
58 | classifiers=[
59 | "Programming Language :: Python :: 3.6",
60 | "Programming Language :: Python :: 3.7",
61 | "Programming Language :: Python :: 3.8",
62 | "Programming Language :: Python :: 3.9",
63 | "Programming Language :: Python :: 3.10",
64 | "Programming Language :: Python :: 3.11",
65 | "Programming Language :: Python :: 3.12",
66 | "Programming Language :: Python :: 3.13",
67 | ],
68 | python_requires=">=3.6",
69 | entry_points={
70 | "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"],
71 | },
72 | )
73 |
--------------------------------------------------------------------------------
/tests/test_files/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/2.mp4
--------------------------------------------------------------------------------
/tests/test_files/RGBImages/0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/RGBImages/0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg
--------------------------------------------------------------------------------
/tests/test_files/RGBImages/0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/RGBImages/0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg
--------------------------------------------------------------------------------
/tests/test_files/RGBImages/0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/RGBImages/0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg
--------------------------------------------------------------------------------
/tests/test_files/RGBImages/0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/RGBImages/0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg
--------------------------------------------------------------------------------
/tests/test_files/TXTImages/0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/TXTImages/0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg
--------------------------------------------------------------------------------
/tests/test_files/TXTImages/0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/TXTImages/0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg
--------------------------------------------------------------------------------
/tests/test_files/TXTImages/0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/TXTImages/0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg
--------------------------------------------------------------------------------
/tests/test_files/TXTImages/0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/TXTImages/0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg
--------------------------------------------------------------------------------
/tests/test_main.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import shutil
5 | import sys
6 | from pathlib import Path
7 |
8 | import pytest
9 |
10 | cur_dir = Path(__file__).resolve().parent
11 | root_dir = cur_dir.parent
12 |
13 | sys.path.append(str(root_dir))
14 |
15 | from rapid_videocr import RapidVideOCR, RapidVideOCRExeception, RapidVideOCRInput
16 | from rapid_videocr.utils.utils import mkdir, read_txt
17 |
18 | test_dir = cur_dir / "test_files"
19 |
20 |
21 | @pytest.fixture
22 | def setup_and_teardown():
23 | save_dir = test_dir / "tmp"
24 | mkdir(save_dir)
25 |
26 | srt_path = save_dir / "result.srt"
27 | ass_path = save_dir / "result.ass"
28 | txt_path = save_dir / "result.txt"
29 |
30 | yield save_dir, srt_path, ass_path, txt_path
31 |
32 | shutil.rmtree(save_dir)
33 |
34 |
35 | @pytest.mark.parametrize(
36 | "img_dir",
37 | [test_dir / "RGBImages", test_dir / "TXTImages"],
38 | )
39 | def test_single_rec(setup_and_teardown, img_dir):
40 | save_dir, srt_path, ass_path, txt_path = setup_and_teardown
41 |
42 | extractor = RapidVideOCR(RapidVideOCRInput())
43 | extractor(img_dir, save_dir)
44 |
45 | srt_data = read_txt(srt_path)
46 | assert len(srt_data) == 16
47 | assert srt_data[2] == "空间里面他绝对赢不了的"
48 | assert srt_data[-2] == "你们接着善后"
49 |
50 | ass_data = read_txt(ass_path)
51 | assert len(ass_data) == 17
52 | assert ass_data[13].split(",", 9)[-1] == "空间里面他绝对赢不了的"
53 | assert ass_data[-1].split(",", 9)[-1] == "你们接着善后"
54 |
55 | txt_data = read_txt(txt_path)
56 | assert len(txt_data) == 8
57 | assert txt_data[-2] == "你们接着善后"
58 |
59 |
60 | @pytest.mark.parametrize("img_dir", [test_dir / "RGBImages"])
61 | def test_concat_rec(setup_and_teardown, img_dir):
62 | save_dir, srt_path, ass_path, txt_path = setup_and_teardown
63 |
64 | input_param = RapidVideOCRInput(is_batch_rec=True)
65 | extractor = RapidVideOCR(input_param)
66 | extractor(img_dir, save_dir)
67 |
68 | srt_data = read_txt(srt_path)
69 | assert len(srt_data) == 16
70 | assert srt_data[2] == "空间里面他绝对赢不了的"
71 | assert srt_data[-2] == "你们接着善后"
72 |
73 | ass_data = read_txt(ass_path)
74 | assert len(ass_data) == 17
75 | assert ass_data[13].split(",", 9)[-1] == "空间里面他绝对赢不了的"
76 | assert ass_data[-1].split(",", 9)[-1] == "你们接着善后"
77 |
78 | txt_data = read_txt(txt_path)
79 | assert len(txt_data) == 8
80 | assert txt_data[-2] == "你们接着善后"
81 |
82 |
83 | @pytest.mark.parametrize(
84 | "img_dir",
85 | [test_dir / "RGBImage", test_dir / "TXTImage"],
86 | )
87 | def test_empty_dir(img_dir):
88 | extractor = RapidVideOCR(RapidVideOCRInput())
89 | mkdir(img_dir)
90 |
91 | with pytest.raises(RapidVideOCRExeception) as exc_info:
92 | extractor(img_dir, test_dir)
93 | assert exc_info.type is RapidVideOCRExeception
94 |
95 | shutil.rmtree(img_dir)
96 |
97 |
98 | @pytest.mark.parametrize(
99 | "img_dir",
100 | [test_dir / "RGBImage", test_dir / "TXTImage"],
101 | )
102 | def test_nothing_dir(img_dir):
103 | extractor = RapidVideOCR(RapidVideOCRInput())
104 | mkdir(img_dir)
105 | with pytest.raises(RapidVideOCRExeception) as exc_info:
106 | extractor(img_dir, test_dir)
107 | assert exc_info.type is RapidVideOCRExeception
108 |
109 | shutil.rmtree(img_dir)
110 |
111 |
112 | def test_out_only_srt(setup_and_teardown):
113 | save_dir, srt_path, ass_path, txt_path = setup_and_teardown
114 |
115 | img_dir = test_dir / "RGBImages"
116 | input_param = RapidVideOCRInput(is_batch_rec=True, out_format="srt")
117 | extractor = RapidVideOCR(input_param)
118 | extractor(img_dir, save_dir)
119 |
120 | srt_data = read_txt(srt_path)
121 | assert len(srt_data) == 16
122 | assert srt_data[2] == "空间里面他绝对赢不了的"
123 | assert srt_data[-2] == "你们接着善后"
124 |
125 |
126 | def test_out_only_ass(setup_and_teardown):
127 | save_dir, srt_path, ass_path, txt_path = setup_and_teardown
128 |
129 | img_dir = test_dir / "RGBImages"
130 | input_param = RapidVideOCRInput(is_batch_rec=True, out_format="ass")
131 | extractor = RapidVideOCR(input_param)
132 | extractor(img_dir, save_dir)
133 |
134 | ass_data = read_txt(ass_path)
135 | assert len(ass_data) == 17
136 | assert ass_data[13].split(",", 9)[-1] == "空间里面他绝对赢不了的"
137 | assert ass_data[-1].split(",", 9)[-1] == "你们接着善后"
138 |
139 |
140 | def test_out_only_txt(setup_and_teardown):
141 | save_dir, srt_path, ass_path, txt_path = setup_and_teardown
142 |
143 | img_dir = test_dir / "RGBImages"
144 | input_param = RapidVideOCRInput(is_batch_rec=True, out_format="txt")
145 | extractor = RapidVideOCR(input_param)
146 | extractor(img_dir, save_dir)
147 |
148 | txt_data = read_txt(txt_path)
149 | assert len(txt_data) == 8
150 | assert txt_data[-2] == "你们接着善后"
151 |
--------------------------------------------------------------------------------