├── .github
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── bug.md
    │   └── config.yml
    └── workflows
    │   ├── AutoPushToPypi.yml
    │   └── SyncToGitee.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── assets
    ├── RapidVideOCRDemo.ipynb
    ├── colab-badge.svg
    └── logo.png
├── cliff.toml
├── demo.py
├── docs
    ├── README_zh.md
    └── doc_whl.md
├── rapid_videocr
    ├── __init__.py
    ├── export.py
    ├── main.py
    ├── ocr_processor.py
    ├── utils
    │   ├── __init__.py
    │   ├── crop_by_project.py
    │   ├── logger.py
    │   └── utils.py
    ├── vsf_cli.py
    └── vsf_ocr_cli.py
├── requirements.txt
├── setup.py
└── tests
    ├── test_files
        ├── 2.mp4
        ├── RGBImages
        │   ├── 0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg
        │   ├── 0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg
        │   ├── 0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg
        │   └── 0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg
        └── TXTImages
        │   ├── 0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg
        │   ├── 0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg
        │   ├── 0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg
        │   └── 0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg
    └── test_main.py


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: https://raw.githubusercontent.com/RapidAI/.github/6db6b6b9273f3151094a462a61fbc8e88564562c/assets/Sponsor.png
14 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: 🐞 Bug
 3 | about: Bug
 4 | title: 'Bug'
 5 | labels: 'Bug'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | 请提供下述完整信息以便快速定位问题
11 | (Please provide the following information to quickly locate the problem)
12 | - **系统环境/System Environment**:
13 | - **使用的是哪门语言的程序/Which programing language**:
14 | - **所使用语言相关版本信息/Version**:
15 | - **OnnxRuntime版本/OnnxRuntime Version**:
16 | - **使用当前库的版本/Use version**:
17 | - **可复现问题的demo和文件/Demo of reproducible problems**:
18 | - **完整报错/Complete Error Message**:
19 | - **可能的解决方案/Possible solutions**:


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
 1 | blank_issues_enabled: false
 2 | contact_links:
 3 |   - name: ❓ Questions
 4 |     url: https://github.com/SWHL/RapidVideOCR/discussions/categories/q-a
 5 |     about: Please use the community forum for help and questions regarding RapidVideOCR.
 6 |   - name: 💡 Feature requests and ideas
 7 |     url: https://github.com/SWHL/RapidVideOCR/discussions/categories/ideas
 8 |     about: Please vote for and post new feature ideas in the community forum.
 9 |   - name: 📖 Documentation
10 |     url: https://swhl.github.io/RapidVideOCR/docs
11 |     about: A great place to find instructions and answers about RapidVideOCR.
12 | 


--------------------------------------------------------------------------------
/.github/workflows/AutoPushToPypi.yml:
--------------------------------------------------------------------------------
 1 | name: Push rapid_videocr to pypi
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - v*
 7 | 
 8 | jobs:
 9 |   UnitTesting:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Pull latest code
13 |         uses: actions/checkout@v4
14 | 
15 |       - name: Set up Python 3.10
16 |         uses: actions/setup-python@v4
17 |         with:
18 |           python-version: '3.10'
19 |           architecture: 'x64'
20 | 
21 |       - name: Unit testings
22 |         run: |
23 |           pip install -r requirements.txt --break-system-packages
24 |           pip install pytest six --break-system-packages
25 |           pytest -s tests/test_*.py
26 | 
27 |   GenerateWHL_PushPyPi:
28 |     needs: UnitTesting
29 |     runs-on: ubuntu-latest
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v4
33 | 
34 |       - name: Run setup.py
35 |         run: |
36 |           pip install wheel get_pypi_latest_version --break-system-packages
37 | 
38 |           python -m pip install --upgrade pip --break-system-packages
39 |           python setup.py bdist_wheel ${{ github.ref_name }}
40 | 
41 |       - name: Publish distribution 📦 to PyPI
42 |         uses: pypa/gh-action-pypi-publish@v1.5.0
43 |         with:
44 |           password: ${{ secrets.PYPI_API_TOKEN }}
45 |           packages_dir: dist/
46 | 


--------------------------------------------------------------------------------
/.github/workflows/SyncToGitee.yml:
--------------------------------------------------------------------------------
 1 | name: syncToGitee
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - '**'
 6 | jobs:
 7 |   repo-sync:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - name: Checkout source codes
11 |         uses: actions/checkout@v2
12 | 
13 |       - name: Mirror the Github organization repos to Gitee.
14 |         uses: Yikun/hub-mirror-action@master
15 |         with:
16 |           src: 'github/SWHL'
17 |           dst: 'gitee/SWHL'
18 |           dst_key: ${{ secrets.GITEE_PRIVATE_KEY }}
19 |           dst_token:  ${{ secrets.GITEE_TOKEN }}
20 |           force_update: true
21 |           # only sync this repo
22 |           static_list: "RapidVideOCR"
23 |           debug: true
24 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | rapid_videocr/video_sub_finder/libs
  2 | *.vscode
  3 | outputs
  4 | 
  5 | *.pyc
  6 | 
  7 | *.onnx
  8 | 
  9 | temp/
 10 | test_files/
 11 | 
 12 | .DS_Store
 13 | 
 14 | *.bin
 15 | 
 16 | .mypy_cache
 17 | 
 18 | # Created by .ignore support plugin (hsz.mobi)
 19 | ### Python template
 20 | # Byte-compiled / optimized / DLL files
 21 | __pycache__/
 22 | *.py[cod]
 23 | *$py.class
 24 | .pytest_cache
 25 | 
 26 | # C extensions
 27 | *.so
 28 | 
 29 | # Distribution / packaging
 30 | .Python
 31 | build/
 32 | develop-eggs/
 33 | dist/
 34 | downloads/
 35 | eggs/
 36 | .eggs/
 37 | lib/
 38 | lib64/
 39 | parts/
 40 | sdist/
 41 | var/
 42 | wheels/
 43 | pip-wheel-metadata/
 44 | share/python-wheels/
 45 | *.egg-info/
 46 | .installed.cfg
 47 | *.egg
 48 | MANIFEST
 49 | 
 50 | # PyInstaller
 51 | #  Usually these files are written by a python script from a template
 52 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 53 | # *.manifest
 54 | # *.spec
 55 | *.res
 56 | 
 57 | # Installer logs
 58 | pip-log.txt
 59 | pip-delete-this-directory.txt
 60 | 
 61 | # Unit test / coverage reports
 62 | htmlcov/
 63 | .tox/
 64 | .nox/
 65 | .coverage
 66 | .coverage.*
 67 | .cache
 68 | nosetests.xml
 69 | coverage.xml
 70 | *.cover
 71 | *.py,cover
 72 | .hypothesis/
 73 | .pytest_cache/
 74 | 
 75 | # Translations
 76 | *.mo
 77 | *.pot
 78 | 
 79 | # Django stuff:
 80 | *.log
 81 | local_settings.py
 82 | db.sqlite3
 83 | db.sqlite3-journal
 84 | 
 85 | # Flask stuff:
 86 | instance/
 87 | .webassets-cache
 88 | 
 89 | # Scrapy stuff:
 90 | .scrapy
 91 | 
 92 | # Sphinx documentation
 93 | docs/_build/
 94 | 
 95 | # PyBuilder
 96 | target/
 97 | 
 98 | # Jupyter Notebook
 99 | .ipynb_checkpoints
100 | 
101 | # IPython
102 | profile_default/
103 | ipython_config.py
104 | 
105 | # pyenv
106 | .python-version
107 | 
108 | # pipenv
109 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
110 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
111 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
112 | #   install all needed dependencies.
113 | #Pipfile.lock
114 | 
115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
116 | __pypackages__/
117 | 
118 | # Celery stuff
119 | celerybeat-schedule
120 | celerybeat.pid
121 | 
122 | # SageMath parsed files
123 | *.sage.py
124 | 
125 | # Environments
126 | .env
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 | 
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 | 
138 | # Rope project settings
139 | .ropeproject
140 | 
141 | # mkdocs documentation
142 | /site
143 | 
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 | 
149 | # Pyre type checker
150 | .pyre/
151 | 
152 | #idea
153 | .vs
154 | .vscode
155 | .idea
156 | /images
157 | /models
158 | 
159 | #models
160 | *.onnx
161 | 
162 | *.ttf
163 | *.ttc
164 | 
165 | long1.jpg
166 | 
167 | *.bin
168 | *.mapping
169 | *.xml
170 | 
171 | *.pdiparams
172 | *.pdiparams.info
173 | *.pdmodel
174 | 
175 | .DS_Store


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/myint/autoflake
 3 |   rev: v2.1.1
 4 |   hooks:
 5 |     - id: autoflake
 6 |       args:
 7 |         [
 8 |           "--recursive",
 9 |           "--in-place",
10 |           "--remove-all-unused-imports",
11 |           "--remove-unused-variable",
12 |           "--ignore-init-module-imports",
13 |         ]
14 | - repo: https://github.com/psf/black
15 |   rev: 23.1.0
16 |   hooks:
17 |     - id: black
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <div align="center">
 2 |   <img src="https://raw.githubusercontent.com/SWHL/RapidVideOCR/main/assets/logo.png" width="55%" height="55%"/>
 3 | 
 4 | <div>&nbsp;</div>
 5 | 
 6 | <a href="https://huggingface.co/spaces/SWHL/RapidVideOCR" target="_blank"><img src="https://img.shields.io/badge/%F0%9F%A4%97-Hugging Face Demo-blue"></a>
 7 | <a href="https://colab.research.google.com/github/SWHL/RapidVideOCR/blob/main/assets/RapidVideOCRDemo.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg"></a>
 8 | <a href=""><img src="https://img.shields.io/badge/Python->=3.6-aff.svg"></a>
 9 | <a href=""><img src="https://img.shields.io/badge/OS-Linux%2C%20Win%2C%20Mac-pink.svg"></a>
10 | <a href="https://pypi.org/project/rapid-videocr/"><img alt="PyPI" src="https://img.shields.io/pypi/v/rapid_videocr"></a>
11 | <a href="https://github.com/SWHL/RapidVideOCR/stargazers"><img src="https://img.shields.io/github/stars/SWHL/RapidVideOCR?color=ccf"></a>
12 | <a href="https://pepy.tech/project/rapid-videocr"><img src="https://static.pepy.tech/personalized-badge/rapid-videocr?period=total&units=abbreviation&left_color=grey&right_color=blue&left_text=Downloads"></a>
13 | <a href="https://semver.org/"><img alt="SemVer2.0" src="https://img.shields.io/badge/SemVer-2.0-brightgreen"></a>
14 | <a href="https://github.com/psf/black"><img src="https://img.shields.io/badge/code%20style-black-000000.svg"></a>
15 | 
16 | [简体中文](https://github.com/SWHL/RapidVideOCR/blob/main/docs/README_zh.md) | English
17 | </div>
18 | 
19 | ### Introduction
20 | 
21 | - Video hard subtitle extraction, automatically generate the corresponding `srt | ass | txt` file.
22 | - Supported subtitle languages: Chinese | English (For other supported languages, see: [List of supported languages](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99))
23 | - The advantages are as follows:
24 |     - **Faster extraction**: Used in conjunction with [VideoSubFinder](https://sourceforge.net/projects/videosubfinder/) software to extract key subtitle frames faster.
25 |     - **More accurate recognition**: Use [RapidOCR](https://github.com/RapidAI/RapidOCR) as the recognition library.
26 |     - **More convenient to use**: pip can be installed directly and used.
27 | 
28 | - For desktop EXE version, please go to [RapidVideOCRDesktop](https://github.com/SWHL/RapidVideOCRDesktop).
29 | - If it helps you, please give a star ⭐.
30 | 
31 | ### [Online Demo](https://huggingface.co/spaces/SWHL/RapidVideOCR)
32 | 
33 | <div align="center">
34 |     <img src="https://github.com/SWHL/RapidVideOCR/releases/download/v2.0.1/OnlineDemo.gif" alt="Demo" width="100%" height="100%">
35 | </div>
36 | 
37 | ### Overall framework
38 | 
39 | ```mermaid
40 | flowchart LR
41 |     A[/Video/] --Extract subtitle key frame--> B(VideoSubFinder) --OCR-->C(RapidVideOCR)
42 |     C --Convert--> D[/"SRT | ASS | TXT"/]
43 | ```
44 | 
45 | ### Installation
46 | 
47 | ```bash
48 | pip install rapid_videocr
49 | ```
50 | 
51 | ### Usage
52 | 
53 | > [!NOTE]
54 | >
55 | > The input image path of `rapid_videocr` must be the path of **RGBImages** or **TXTImages** output by **VideoSubFinder** software.
56 | 
57 | ```bash
58 | rapid_videocr -i RGBImages
59 | ```
60 | 
61 | ### Documentation
62 | 
63 | Full documentation can be found on [docs](https://swhl.github.io/RapidVideOCR/docs), in Chinese.
64 | 
65 | ### Code Contributors
66 | 
67 | <p align="left">
68 |   <a href="https://github.com/SWHL/RapidVideOCR/graphs/contributors">
69 |     <img src="https://contrib.rocks/image?repo=SWHL/RapidVideOCR" width="20%"/>
70 |   </a>
71 | </p>
72 | 
73 | ### Contributing
74 | 
75 | - Pull requests are welcome. For major changes, please open an issue first
76 | to discuss what you would like to change.
77 | - Please make sure to update tests as appropriate.
78 | 
79 | ### [Sponsor](https://swhl.github.io/RapidVideOCR/docs/sponsor/)
80 | 
81 | If you want to sponsor the project, you can directly click the **Buy me a coffee** image, please write a note (e.g. your github account name) to facilitate adding to the sponsorship list below.
82 | 
83 | <div align="left">
84 |   <a href="https://www.buymeacoffee.com/SWHL"><img src="https://raw.githubusercontent.com/RapidAI/.github/main/assets/buymeacoffe.png" width="30%" height="30%"></a>
85 | </div>
86 | 
87 | ### License
88 | 
89 | This project is released under the [Apache 2.0 license](./LICENSE).
90 | 


--------------------------------------------------------------------------------
/assets/RapidVideOCRDemo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "YBjLERcEsTES"
  7 |       },
  8 |       "source": [
  9 |         "## [RapidVideOCR Demo](https://github.com/SWHL/RapidVideOCR)"
 10 |       ]
 11 |     },
 12 |     {
 13 |       "cell_type": "markdown",
 14 |       "metadata": {
 15 |         "id": "IPBSdGqbjNpc"
 16 |       },
 17 |       "source": [
 18 |         "#### Require:\n",
 19 |         "- The RGBImages of [Video](https://www.youtube.com/watch?v=Z2Bg_usMYiA) from the VideoSubFinder software.\n",
 20 |         "- Install the RapidVideOCR"
 21 |       ]
 22 |     },
 23 |     {
 24 |       "cell_type": "markdown",
 25 |       "metadata": {
 26 |         "id": "IT1t_86aq4QU"
 27 |       },
 28 |       "source": [
 29 |         "#### Download the RGBImages.zip and unzip it."
 30 |       ]
 31 |     },
 32 |     {
 33 |       "cell_type": "code",
 34 |       "execution_count": 33,
 35 |       "metadata": {
 36 |         "colab": {
 37 |           "base_uri": "https://localhost:8080/"
 38 |         },
 39 |         "id": "qWiWiKJWjcH1",
 40 |         "outputId": "9b5c8098-061a-4f85-b7a8-822e1f26b166"
 41 |       },
 42 |       "outputs": [
 43 |         {
 44 |           "name": "stdout",
 45 |           "output_type": "stream",
 46 |           "text": [
 47 |             "--2023-04-09 01:47:40--  https://github.com/SWHL/RapidVideOCR/files/11184614/RGBImages.zip\n",
 48 |             "Resolving github.com (github.com)... 140.82.112.4\n",
 49 |             "Connecting to github.com (github.com)|140.82.112.4|:443... connected.\n",
 50 |             "HTTP request sent, awaiting response... 302 Found\n",
 51 |             "Location: https://objects.githubusercontent.com/github-production-repository-file-5c1aeb/405589029/11184614?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20230409%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230409T014740Z&X-Amz-Expires=300&X-Amz-Signature=fa9c0cb7ec18b1113504c94f60ed8bd6c8250cd040d056396c0dc6caf5184dea&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=405589029&response-content-disposition=attachment%3Bfilename%3DRGBImages.zip&response-content-type=application%2Fx-zip-compressed [following]\n",
 52 |             "--2023-04-09 01:47:40--  https://objects.githubusercontent.com/github-production-repository-file-5c1aeb/405589029/11184614?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20230409%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230409T014740Z&X-Amz-Expires=300&X-Amz-Signature=fa9c0cb7ec18b1113504c94f60ed8bd6c8250cd040d056396c0dc6caf5184dea&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=405589029&response-content-disposition=attachment%3Bfilename%3DRGBImages.zip&response-content-type=application%2Fx-zip-compressed\n",
 53 |             "Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
 54 |             "Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.108.133|:443... connected.\n",
 55 |             "HTTP request sent, awaiting response... 200 OK\n",
 56 |             "Length: 8314498 (7.9M) [application/x-zip-compressed]\n",
 57 |             "Saving to: ‘RGBImages.zip’\n",
 58 |             "\n",
 59 |             "RGBImages.zip       100%[===================>]   7.93M  43.3MB/s    in 0.2s    \n",
 60 |             "\n",
 61 |             "2023-04-09 01:47:40 (43.3 MB/s) - ‘RGBImages.zip’ saved [8314498/8314498]\n",
 62 |             "\n",
 63 |             "Archive:  RGBImages.zip\n",
 64 |             "   creating: RGBImages/\n",
 65 |             "  inflating: RGBImages/0_00_20_640__0_00_23_999_0055800000012800072001280.jpeg  \n",
 66 |             "  inflating: RGBImages/0_00_25_120__0_00_25_999_0055800000012800072001280.jpeg  \n",
 67 |             "  inflating: RGBImages/0_00_26_000__0_00_26_599_0055800000012800072001280.jpeg  \n",
 68 |             "  inflating: RGBImages/0_00_27_760__0_00_28_999_0055800000012800072001280.jpeg  \n",
 69 |             "  inflating: RGBImages/0_00_30_280__0_00_30_599_0055800000012800072001280.jpeg  \n",
 70 |             "  inflating: RGBImages/0_00_30_600__0_00_32_199_0055800000012800072001280.jpeg  \n",
 71 |             "  inflating: RGBImages/0_00_32_800__0_00_33_199_0055800000012800072001280.jpeg  \n",
 72 |             "  inflating: RGBImages/0_00_33_200__0_00_34_959_0055800000012800072001280.jpeg  \n",
 73 |             "  inflating: RGBImages/0_00_34_960__0_00_35_519_0055800000012800072001280.jpeg  \n",
 74 |             "  inflating: RGBImages/0_00_39_040__0_00_39_479_0055800000012800072001280.jpeg  \n",
 75 |             "  inflating: RGBImages/0_00_40_040__0_00_41_679_0055800000012800072001280.jpeg  \n",
 76 |             "  inflating: RGBImages/0_00_41_680__0_00_42_919_0055800000012800072001280.jpeg  \n",
 77 |             "  inflating: RGBImages/0_00_42_920__0_00_43_439_0055800000012800072001280.jpeg  \n",
 78 |             "  inflating: RGBImages/0_00_43_440__0_00_43_799_0055800000012800072001280.jpeg  \n",
 79 |             "  inflating: RGBImages/0_00_44_920__0_00_45_359_0055800000012800072001280.jpeg  \n",
 80 |             "  inflating: RGBImages/0_00_45_360__0_00_47_799_0055800000012800072001280.jpeg  \n",
 81 |             "  inflating: RGBImages/0_00_47_800__0_00_48_159_0055800000012800072001280.jpeg  \n",
 82 |             "  inflating: RGBImages/0_00_48_160__0_00_48_559_0055800000012800072001280.jpeg  \n",
 83 |             "  inflating: RGBImages/0_00_50_520__0_00_54_079_0055800000012800072001280.jpeg  \n",
 84 |             "  inflating: RGBImages/0_00_54_080__0_00_55_799_0055800000012800072001280.jpeg  \n",
 85 |             "  inflating: RGBImages/0_00_56_000__0_00_56_359_0055800000012800072001280.jpeg  \n",
 86 |             "  inflating: RGBImages/0_00_57_680__0_00_57_999_0055800000012800072001280.jpeg  \n",
 87 |             "  inflating: RGBImages/0_01_00_120__0_01_00_759_0055800000012800072001280.jpeg  \n",
 88 |             "  inflating: RGBImages/0_01_02_160__0_01_02_919_0055800000012800072001280.jpeg  \n",
 89 |             "  inflating: RGBImages/0_01_04_320__0_01_06_759_0055800000012800072001280.jpeg  \n",
 90 |             "  inflating: RGBImages/0_01_08_120__0_01_08_679_0055800000012800072001280.jpeg  \n",
 91 |             "  inflating: RGBImages/0_01_08_680__0_01_13_119_0055800000012800072001280.jpeg  \n",
 92 |             "  inflating: RGBImages/0_01_13_120__0_01_13_799_0055800000012800072001280.jpeg  \n",
 93 |             "  inflating: RGBImages/0_01_13_800__0_01_16_079_0055800000012800072001280.jpeg  \n",
 94 |             "  inflating: RGBImages/0_01_16_080__0_01_17_039_0055800000012800072001280.jpeg  \n",
 95 |             "  inflating: RGBImages/0_01_19_320__0_01_20_359_0055800000012800072001280.jpeg  \n",
 96 |             "  inflating: RGBImages/0_01_20_360__0_01_21_919_0055800000012800072001280.jpeg  \n",
 97 |             "  inflating: RGBImages/0_01_23_120__0_01_23_559_0055800000012800072001280.jpeg  \n",
 98 |             "  inflating: RGBImages/0_01_23_560__0_01_24_959_0055800000012800072001280.jpeg  \n",
 99 |             "  inflating: RGBImages/0_01_24_960__0_01_25_559_0055800000012800072001280.jpeg  \n",
100 |             "  inflating: RGBImages/0_01_25_560__0_01_26_159_0055800000012800072001280.jpeg  \n",
101 |             "  inflating: RGBImages/0_01_27_560__0_01_27_919_0055800000012800072001280.jpeg  \n",
102 |             "  inflating: RGBImages/0_01_27_920__0_01_30_439_0055800000012800072001280.jpeg  \n",
103 |             "  inflating: RGBImages/0_01_30_440__0_01_31_119_0055800000012800072001280.jpeg  \n",
104 |             "  inflating: RGBImages/0_01_31_120__0_01_31_599_0055800000012800072001280.jpeg  \n",
105 |             "  inflating: RGBImages/0_01_31_600__0_01_32_119_0055800000012800072001280.jpeg  \n",
106 |             "  inflating: RGBImages/0_01_33_040__0_01_34_639_0055800000012800072001280.jpeg  \n",
107 |             "  inflating: RGBImages/0_01_34_640__0_01_38_439_0055800000012800072001280.jpeg  \n",
108 |             "  inflating: RGBImages/0_01_38_440__0_01_38_839_0055800000012800072001280.jpeg  \n",
109 |             "  inflating: RGBImages/0_01_39_960__0_01_40_279_0055800000012800072001280.jpeg  \n",
110 |             "  inflating: RGBImages/0_01_40_280__0_01_40_879_0055800000012800072001280.jpeg  \n",
111 |             "  inflating: RGBImages/0_01_47_920__0_01_48_559_0055800000012800072001280.jpeg  \n",
112 |             "  inflating: RGBImages/0_01_48_560__0_01_50_679_0055800000012800072001280.jpeg  \n",
113 |             "  inflating: RGBImages/0_01_50_920__0_01_51_319_0055800000012800072001280.jpeg  \n",
114 |             "  inflating: RGBImages/0_01_52_520__0_01_53_359_0055800000012800072001280.jpeg  \n",
115 |             "  inflating: RGBImages/0_01_53_360__0_01_53_999_0055800000012800072001280.jpeg  \n",
116 |             "  inflating: RGBImages/0_01_54_000__0_01_56_159_0055800000012800072001280.jpeg  \n",
117 |             "  inflating: RGBImages/0_01_56_160__0_01_56_959_0055800000012800072001280.jpeg  \n",
118 |             "  inflating: RGBImages/0_01_58_040__0_01_58_399_0055800000012800072001280.jpeg  \n",
119 |             "  inflating: RGBImages/0_01_58_400__0_01_59_639_0055800000012800072001280.jpeg  \n",
120 |             "  inflating: RGBImages/0_01_59_640__0_02_00_479_0055800000012800072001280.jpeg  \n",
121 |             "  inflating: RGBImages/0_02_00_480__0_02_01_039_0055800000012800072001280.jpeg  \n",
122 |             "  inflating: RGBImages/0_02_02_240__0_02_02_799_0055800000012800072001280.jpeg  \n",
123 |             "  inflating: RGBImages/0_02_02_800__0_02_04_039_0055800000012800072001280.jpeg  \n",
124 |             "  inflating: RGBImages/0_02_08_000__0_02_09_038_0055800000012800072001280.jpeg  \n",
125 |             "  inflating: RGBImages/0_02_09_039__0_02_10_198_0055800000012800072001280.jpeg  \n",
126 |             "  inflating: RGBImages/0_02_11_720__0_02_13_119_0055800000012800072001280.jpeg  \n",
127 |             "  inflating: RGBImages/0_02_13_280__0_02_13_799_0055800000012800072001280.jpeg  \n",
128 |             "  inflating: RGBImages/0_02_13_800__0_02_14_719_0055800000012800072001280.jpeg  \n",
129 |             "  inflating: RGBImages/0_02_14_720__0_02_15_239_0055800000012800072001280.jpeg  \n",
130 |             "  inflating: RGBImages/0_02_15_240__0_02_15_839_0055800000012800072001280.jpeg  \n",
131 |             "  inflating: RGBImages/0_02_17_640__0_02_21_719_0055800000012800072001280.jpeg  \n",
132 |             "  inflating: RGBImages/0_02_21_720__0_02_22_639_0055800000012800072001280.jpeg  \n",
133 |             "  inflating: RGBImages/0_02_26_640__0_02_27_239_0055800000012800072001280.jpeg  \n",
134 |             "  inflating: RGBImages/0_02_27_240__0_02_27_879_0055800000012800072001280.jpeg  \n",
135 |             "  inflating: RGBImages/0_02_27_920__0_02_28_479_0055800000012800072001280.jpeg  \n",
136 |             "  inflating: RGBImages/0_02_29_360__0_02_30_119_0055800000012800072001280.jpeg  \n",
137 |             "  inflating: RGBImages/0_02_30_240__0_02_30_639_0055800000012800072001280.jpeg  \n",
138 |             "  inflating: RGBImages/0_02_31_200__0_02_31_599_0055800000012800072001280.jpeg  \n",
139 |             "  inflating: RGBImages/0_02_31_600__0_02_32_559_0055800000012800072001280.jpeg  \n",
140 |             "  inflating: RGBImages/0_02_32_560__0_02_33_439_0055800000012800072001280.jpeg  \n",
141 |             "  inflating: RGBImages/0_02_33_440__0_02_34_079_0055800000012800072001280.jpeg  \n",
142 |             "  inflating: RGBImages/0_02_35_520__0_02_37_159_0055800000012800072001280.jpeg  \n",
143 |             "  inflating: RGBImages/0_02_37_160__0_02_41_959_0055800000012800072001280.jpeg  \n",
144 |             "  inflating: RGBImages/0_02_46_440__0_02_47_039_0055800000012800072001280.jpeg  \n",
145 |             "  inflating: RGBImages/0_02_47_040__0_02_48_199_0055800000012800072001280.jpeg  \n",
146 |             "  inflating: RGBImages/0_02_50_520__0_02_50_879_0055800000012800072001280.jpeg  \n",
147 |             "  inflating: RGBImages/0_02_50_880__0_02_53_279_0055800000012800072001280.jpeg  \n",
148 |             "  inflating: RGBImages/0_02_54_840__0_02_56_679_0055800000012800072001280.jpeg  \n",
149 |             "  inflating: RGBImages/0_02_56_680__0_02_57_519_0055800000012800072001280.jpeg  \n",
150 |             "  inflating: RGBImages/0_02_57_520__0_02_57_999_0055800000012800072001280.jpeg  \n",
151 |             "  inflating: RGBImages/0_03_00_360__0_03_00_919_0055800000012800072001280.jpeg  \n",
152 |             "  inflating: RGBImages/0_03_00_920__0_03_01_519_0055800000012800072001280.jpeg  \n",
153 |             "  inflating: RGBImages/0_03_01_560__0_03_04_599_0055800000012800072001280.jpeg  \n",
154 |             "  inflating: RGBImages/0_03_04_600__0_03_05_879_0055800000012800072001280.jpeg  \n",
155 |             "  inflating: RGBImages/0_03_05_880__0_03_06_759_0055800000012800072001280.jpeg  \n",
156 |             "  inflating: RGBImages/0_03_10_160__0_03_10_559_0055800000012800072001280.jpeg  \n",
157 |             "  inflating: RGBImages/0_03_11_680__0_03_11_999_0055800000012800072001280.jpeg  \n",
158 |             "  inflating: RGBImages/0_03_12_040__0_03_12_399_0055800000012800072001280.jpeg  \n",
159 |             "  inflating: RGBImages/0_03_12_400__0_03_12_919_0055800000012800072001280.jpeg  \n",
160 |             "  inflating: RGBImages/0_03_12_920__0_03_13_239_0055800000012800072001280.jpeg  \n",
161 |             "  inflating: RGBImages/0_03_13_240__0_03_13_599_0055800000012800072001280.jpeg  \n",
162 |             "  inflating: RGBImages/0_03_21_000__0_03_21_479_0055800000012800072001280.jpeg  \n",
163 |             "  inflating: RGBImages/0_03_21_600__0_03_21_919_0055800000012800072001280.jpeg  \n",
164 |             "  inflating: RGBImages/0_03_21_920__0_03_22_239_0055800000012800072001280.jpeg  \n",
165 |             "  inflating: RGBImages/0_03_24_480__0_03_24_919_0055800000012800072001280.jpeg  \n",
166 |             "  inflating: RGBImages/0_03_24_920__0_03_25_639_0055800000012800072001280.jpeg  \n",
167 |             "  inflating: RGBImages/0_03_25_640__0_03_27_119_0055800000012800072001280.jpeg  \n",
168 |             "  inflating: RGBImages/0_03_27_120__0_03_27_999_0055800000012800072001280.jpeg  \n",
169 |             "  inflating: RGBImages/0_03_29_520__0_03_30_039_0055800000012800072001280.jpeg  \n",
170 |             "  inflating: RGBImages/0_03_30_120__0_03_30_759_0055800000012800072001280.jpeg  \n",
171 |             "  inflating: RGBImages/0_03_30_760__0_03_31_639_0055800000012800072001280.jpeg  \n",
172 |             "  inflating: RGBImages/0_03_31_640__0_03_31_959_0055800000012800072001280.jpeg  \n",
173 |             "  inflating: RGBImages/0_03_31_960__0_03_32_319_0055800000012800072001280.jpeg  \n",
174 |             "  inflating: RGBImages/0_03_33_680__0_03_33_999_0055800000012800072001280.jpeg  \n",
175 |             "  inflating: RGBImages/0_03_34_000__0_03_34_599_0055800000012800072001280.jpeg  \n",
176 |             "  inflating: RGBImages/0_03_34_600__0_03_35_399_0055800000012800072001280.jpeg  \n",
177 |             "  inflating: RGBImages/0_03_35_520__0_03_37_959_0055800000012800072001280.jpeg  \n",
178 |             "  inflating: RGBImages/0_03_38_400__0_03_38_879_0055800000012800072001280.jpeg  \n",
179 |             "  inflating: RGBImages/0_03_38_880__0_03_39_439_0055800000012800072001280.jpeg  \n",
180 |             "  inflating: RGBImages/0_03_39_440__0_03_39_919_0055800000012800072001280.jpeg  \n",
181 |             "  inflating: RGBImages/0_03_40_160__0_03_40_599_0055800000012800072001280.jpeg  \n",
182 |             "  inflating: RGBImages/0_03_40_600__0_03_40_919_0055800000012800072001280.jpeg  \n",
183 |             "  inflating: RGBImages/0_03_40_920__0_03_41_399_0055800000012800072001280.jpeg  \n",
184 |             "  inflating: RGBImages/0_03_44_240__0_03_44_679_0055800000012800072001280.jpeg  \n",
185 |             "  inflating: RGBImages/0_03_44_680__0_03_44_999_0055800000012800072001280.jpeg  \n",
186 |             "  inflating: RGBImages/0_03_45_000__0_03_49_239_0055800000012800072001280.jpeg  \n",
187 |             "  inflating: RGBImages/0_03_49_240__0_03_50_799_0055800000012800072001280.jpeg  \n",
188 |             "  inflating: RGBImages/0_03_50_840__0_03_51_199_0055800000012800072001280.jpeg  \n",
189 |             "  inflating: RGBImages/0_03_51_200__0_03_51_599_0055800000012800072001280.jpeg  \n",
190 |             "  inflating: RGBImages/0_03_57_240__0_03_57_919_0055800000012800072001280.jpeg  \n",
191 |             "  inflating: RGBImages/0_03_58_440__0_03_59_199_0055800000012800072001280.jpeg  \n",
192 |             "  inflating: RGBImages/0_03_59_200__0_04_05_279_0055800000012800072001280.jpeg  \n",
193 |             "  inflating: RGBImages/0_04_05_280__0_04_06_919_0055800000012800072001280.jpeg  \n",
194 |             "  inflating: RGBImages/0_04_20_840__0_04_21_159_0055800000012800072001280.jpeg  \n",
195 |             "  inflating: RGBImages/0_04_34_720__0_04_35_879_0055800000012800072001280.jpeg  \n"
196 |           ]
197 |         }
198 |       ],
199 |       "source": [
200 |         "!wget https://github.com/SWHL/RapidVideOCR/files/11184614/RGBImages.zip\n",
201 |         "!unzip RGBImages.zip\n",
202 |         "!rm RGBImages.zip"
203 |       ]
204 |     },
205 |     {
206 |       "cell_type": "markdown",
207 |       "metadata": {
208 |         "id": "1cTofr4Zq_WB"
209 |       },
210 |       "source": [
211 |         "#### Install the RapidVideOCR package."
212 |       ]
213 |     },
214 |     {
215 |       "cell_type": "code",
216 |       "execution_count": 34,
217 |       "metadata": {
218 |         "colab": {
219 |           "base_uri": "https://localhost:8080/"
220 |         },
221 |         "id": "5URYsomEqnuh",
222 |         "outputId": "3a6093db-bfaa-4069-e92e-2e7bab97f987"
223 |       },
224 |       "outputs": [
225 |         {
226 |           "name": "stdout",
227 |           "output_type": "stream",
228 |           "text": [
229 |             "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
230 |             "Requirement already satisfied: rapid_videocr in /usr/local/lib/python3.9/dist-packages (2.1.6)\n",
231 |             "Requirement already satisfied: rapidocr-onnxruntime>=1.2.2 in /usr/local/lib/python3.9/dist-packages (from rapid_videocr) (1.2.5)\n",
232 |             "Requirement already satisfied: tqdm>=4.52.0 in /usr/local/lib/python3.9/dist-packages (from rapid_videocr) (4.65.0)\n",
233 |             "Requirement already satisfied: PyYAML in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (6.0)\n",
234 |             "Requirement already satisfied: Pillow in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (8.4.0)\n",
235 |             "Requirement already satisfied: onnxruntime>=1.7.0 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.14.1)\n",
236 |             "Requirement already satisfied: Shapely>=1.7.1 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (2.0.1)\n",
237 |             "Requirement already satisfied: pyclipper>=1.2.1 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.3.0.post4)\n",
238 |             "Requirement already satisfied: numpy>=1.19.3 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.22.4)\n",
239 |             "Requirement already satisfied: opencv-python>=4.5.1.48 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (4.7.0.72)\n",
240 |             "Requirement already satisfied: six>=1.15.0 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.16.0)\n",
241 |             "Requirement already satisfied: sympy in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.11.1)\n",
242 |             "Requirement already satisfied: flatbuffers in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (23.3.3)\n",
243 |             "Requirement already satisfied: protobuf in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (3.20.3)\n",
244 |             "Requirement already satisfied: packaging in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (23.0)\n",
245 |             "Requirement already satisfied: coloredlogs in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (15.0.1)\n",
246 |             "Requirement already satisfied: humanfriendly>=9.1 in /usr/local/lib/python3.9/dist-packages (from coloredlogs->onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (10.0)\n",
247 |             "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.9/dist-packages (from sympy->onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.3.0)\n"
248 |           ]
249 |         }
250 |       ],
251 |       "source": [
252 |         "!pip install rapid_videocr"
253 |       ]
254 |     },
255 |     {
256 |       "cell_type": "code",
257 |       "execution_count": 35,
258 |       "metadata": {
259 |         "colab": {
260 |           "base_uri": "https://localhost:8080/"
261 |         },
262 |         "id": "13GXToLcrFl8",
263 |         "outputId": "d18fb2c0-79ae-4e29-9b27-de7f7e980707"
264 |       },
265 |       "outputs": [
266 |         {
267 |           "name": "stdout",
268 |           "output_type": "stream",
269 |           "text": [
270 |             "Running with concat recognition.\n",
271 |             "OCR: 100% 14/14 [00:28<00:00,  2.07s/it]\n",
272 |             "The file has been saved in the result/result.srt\n",
273 |             "The result has been saved to result directory.\n"
274 |           ]
275 |         }
276 |       ],
277 |       "source": [
278 |         "!rapid_videocr -i /content/RGBImages -s result -o srt"
279 |       ]
280 |     },
281 |     {
282 |       "cell_type": "markdown",
283 |       "metadata": {
284 |         "id": "DR8RbHFisLZK"
285 |       },
286 |       "source": [
287 |         "#### Look the result."
288 |       ]
289 |     },
290 |     {
291 |       "cell_type": "code",
292 |       "execution_count": 36,
293 |       "metadata": {
294 |         "colab": {
295 |           "base_uri": "https://localhost:8080/"
296 |         },
297 |         "id": "g2PdZnGJrsdx",
298 |         "outputId": "c2107dd0-f099-464c-91a0-247294e69c60"
299 |       },
300 |       "outputs": [
301 |         {
302 |           "name": "stdout",
303 |           "output_type": "stream",
304 |           "text": [
305 |             "1\n",
306 |             "00:00:20,640 --> 00:00:23,999\n",
307 |             "Eyelyinightinmydreams\n",
308 |             "\n",
309 |             "2\n",
310 |             "00:00:25,120 --> 00:00:25,999\n",
311 |             "Iseeyou\n",
312 |             "\n",
313 |             "3\n",
314 |             "00:00:26,000 --> 00:00:26,599\n",
315 |             "Iseeyou\n",
316 |             "\n",
317 |             "4\n",
318 |             "00:00:27,760 --> 00:00:28,999\n",
319 |             "Ifell you\n",
320 |             "\n",
321 |             "5\n",
322 |             "00:00:30,280 --> 00:00:30,599\n",
323 |             "That is hiow I know you go on\n",
324 |             "\n",
325 |             "6\n",
326 |             "00:00:30,600 --> 00:00:32,199\n",
327 |             "howIknowyougoon\n",
328 |             "\n",
329 |             "7\n",
330 |             "00:00:32,800 --> 00:00:33,199\n",
331 |             "That is howIkhowyougo.on\n",
332 |             "\n",
333 |             "8\n",
334 |             "00:00:33,200 --> 00:00:34,959\n",
335 |             "That is how I know you go on\n",
336 |             "\n",
337 |             "9\n",
338 |             "00:00:34,960 --> 00:00:35,519\n",
339 |             "That is how I know you go on\n",
340 |             "\n",
341 |             "11\n",
342 |             "00:00:40,040 --> 00:00:41,679\n",
343 |             "Faracrossthedistance\n",
344 |             "\n",
345 |             "12\n",
346 |             "00:00:41,680 --> 00:00:42,919\n",
347 |             "Faracross the distance\n",
348 |             "\n",
349 |             "13\n",
350 |             "00:00:42,920 --> 00:00:43,439\n",
351 |             "Faracross thelistance\n",
352 |             "\n",
353 |             "14\n",
354 |             "00:00:43,440 --> 00:00:43,799\n",
355 |             "Faracrosshe Mistance\n",
356 |             "\n",
357 |             "15\n",
358 |             "00:00:44,920 --> 00:00:45,359\n",
359 |             "and Spaces between us\n",
360 |             "\n",
361 |             "16\n",
362 |             "00:00:45,360 --> 00:00:47,799\n",
363 |             "and Spaces between us\n",
364 |             "\n",
365 |             "17\n",
366 |             "00:00:47,800 --> 00:00:48,159\n",
367 |             "and Spaces betweenus\n",
368 |             "\n",
369 |             "18\n",
370 |             "00:00:48,160 --> 00:00:48,559\n",
371 |             "and Spacesbetween us\n",
372 |             "\n",
373 |             "19\n",
374 |             "00:00:50,520 --> 00:00:54,079\n",
375 |             "u havecometo showyou go on\n",
376 |             "\n",
377 |             "20\n",
378 |             "00:00:54,080 --> 00:00:55,799\n",
379 |             "You have come to show vou go on\n",
380 |             "\n",
381 |             "21\n",
382 |             "00:00:56,000 --> 00:00:56,359\n",
383 |             "You haveoreto show y\n",
384 |             "\n",
385 |             "23\n",
386 |             "00:01:00,120 --> 00:01:00,759\n",
387 |             "Near\n",
388 |             "\n",
389 |             "24\n",
390 |             "00:01:02,160 --> 00:01:02,919\n",
391 |             "far\n",
392 |             "\n",
393 |             "25\n",
394 |             "00:01:04,320 --> 00:01:06,759\n",
395 |             "reveryouare\n",
396 |             "\n",
397 |             "26\n",
398 |             "00:01:08,120 --> 00:01:08,679\n",
399 |             "I belieye that the heart does go\n",
400 |             "on\n",
401 |             "\n",
402 |             "27\n",
403 |             "00:01:08,680 --> 00:01:13,119\n",
404 |             "I believe that the heart does go\n",
405 |             "on\n",
406 |             "\n",
407 |             "28\n",
408 |             "00:01:13,120 --> 00:01:13,799\n",
409 |             "I believe that the heart does go\n",
410 |             "on\n",
411 |             "\n",
412 |             "29\n",
413 |             "00:01:13,800 --> 00:01:16,079\n",
414 |             "I believe that the heart does go\n",
415 |             "on\n",
416 |             "\n",
417 |             "30\n",
418 |             "00:01:16,080 --> 00:01:17,039\n",
419 |             "I believe that the heart does go\n",
420 |             "on\n",
421 |             "\n",
422 |             "31\n",
423 |             "00:01:19,320 --> 00:01:20,359\n",
424 |             "Once more\n",
425 |             "\n",
426 |             "32\n",
427 |             "00:01:20,360 --> 00:01:21,919\n",
428 |             "Once more\n",
429 |             "\n",
430 |             "33\n",
431 |             "00:01:23,120 --> 00:01:23,559\n",
432 |             "thedoor\n",
433 |             "you\n",
434 |             "\n",
435 |             "34\n",
436 |             "00:01:23,560 --> 00:01:24,959\n",
437 |             "you open the door\n",
438 |             "\n",
439 |             "35\n",
440 |             "00:01:24,960 --> 00:01:25,559\n",
441 |             "youopen thedoor\n",
442 |             "\n",
443 |             "36\n",
444 |             "00:01:25,560 --> 00:01:26,159\n",
445 |             "you open the door\n",
446 |             "\n",
447 |             "37\n",
448 |             "00:01:27,560 --> 00:01:27,919\n",
449 |             "And you're here in my heart\n",
450 |             "\n",
451 |             "38\n",
452 |             "00:01:27,920 --> 00:01:30,439\n",
453 |             "And you're here in my heart\n",
454 |             "\n",
455 |             "39\n",
456 |             "00:01:30,440 --> 00:01:31,119\n",
457 |             "And you're here in iny heart\n",
458 |             "\n",
459 |             "40\n",
460 |             "00:01:31,120 --> 00:01:31,599\n",
461 |             "Andyou're here inm heart\n",
462 |             "\n",
463 |             "41\n",
464 |             "00:01:31,600 --> 00:01:32,119\n",
465 |             "Andeinmneart\n",
466 |             "\n",
467 |             "42\n",
468 |             "00:01:33,040 --> 00:01:34,639\n",
469 |             "my heartwim goonand\n",
470 |             "on\n",
471 |             "\n",
472 |             "43\n",
473 |             "00:01:34,640 --> 00:01:38,439\n",
474 |             "my heart will go on and\n",
475 |             "on\n",
476 |             "\n",
477 |             "44\n",
478 |             "00:01:38,440 --> 00:01:38,839\n",
479 |             "my heart will go on and\n",
480 |             "on\n",
481 |             "\n",
482 |             "47\n",
483 |             "00:01:47,920 --> 00:01:48,559\n",
484 |             "Love can touch us onetime\n",
485 |             "\n",
486 |             "48\n",
487 |             "00:01:48,560 --> 00:01:50,679\n",
488 |             "Lovecantouch usonetime\n",
489 |             "\n",
490 |             "49\n",
491 |             "00:01:50,920 --> 00:01:51,319\n",
492 |             "Love cantouch usone time\n",
493 |             "\n",
494 |             "50\n",
495 |             "00:01:52,520 --> 00:01:53,359\n",
496 |             "And lastforaJifetime\n",
497 |             "\n",
498 |             "51\n",
499 |             "00:01:53,360 --> 00:01:53,999\n",
500 |             "And lastfora lifetime\n",
501 |             "\n",
502 |             "52\n",
503 |             "00:01:54,000 --> 00:01:56,159\n",
504 |             "And last fora lifetime\n",
505 |             "\n",
506 |             "53\n",
507 |             "00:01:56,160 --> 00:01:56,959\n",
508 |             "An st for a lifetime\n",
509 |             "\n",
510 |             "54\n",
511 |             "00:01:58,040 --> 00:01:58,399\n",
512 |             "And never let go till\n",
513 |             "\n",
514 |             "55\n",
515 |             "00:01:58,400 --> 00:01:59,639\n",
516 |             "And never let go till\n",
517 |             "\n",
518 |             "56\n",
519 |             "00:01:59,640 --> 00:02:00,479\n",
520 |             "And never let go till\n",
521 |             "\n",
522 |             "57\n",
523 |             "00:02:00,480 --> 00:02:01,039\n",
524 |             "And never let go till\n",
525 |             "\n",
526 |             "58\n",
527 |             "00:02:02,240 --> 00:02:02,799\n",
528 |             "we're gone\n",
529 |             "\n",
530 |             "59\n",
531 |             "00:02:02,800 --> 00:02:04,039\n",
532 |             "we're gone\n",
533 |             "\n",
534 |             "62\n",
535 |             "00:02:11,720 --> 00:02:13,119\n",
536 |             "one true time\n",
537 |             "\n",
538 |             "65\n",
539 |             "00:02:14,720 --> 00:02:15,239\n",
540 |             "Tholdto\n",
541 |             "\n",
542 |             "66\n",
543 |             "00:02:15,240 --> 00:02:15,839\n",
544 |             "I holdto\n",
545 |             "\n",
546 |             "67\n",
547 |             "00:02:17,640 --> 00:02:21,719\n",
548 |             "Imy lifewe'll alwaysgo on\n",
549 |             "\n",
550 |             "68\n",
551 |             "00:02:21,720 --> 00:02:22,639\n",
552 |             "I my life we'll always go on\n",
553 |             "\n",
554 |             "69\n",
555 |             "00:02:26,640 --> 00:02:27,239\n",
556 |             "Near\n",
557 |             "\n",
558 |             "70\n",
559 |             "00:02:27,240 --> 00:02:27,879\n",
560 |             "Near\n",
561 |             "\n",
562 |             "72\n",
563 |             "00:02:29,360 --> 00:02:30,119\n",
564 |             "far\n",
565 |             "\n",
566 |             "74\n",
567 |             "00:02:31,200 --> 00:02:31,599\n",
568 |             "whereveryou are\n",
569 |             "\n",
570 |             "75\n",
571 |             "00:02:31,600 --> 00:02:32,559\n",
572 |             "wherever you-are\n",
573 |             "\n",
574 |             "76\n",
575 |             "00:02:32,560 --> 00:02:33,439\n",
576 |             "whereveryou are\n",
577 |             "\n",
578 |             "77\n",
579 |             "00:02:33,440 --> 00:02:34,079\n",
580 |             "whereveryou are\n",
581 |             "\n",
582 |             "78\n",
583 |             "00:02:35,520 --> 00:02:37,159\n",
584 |             "I believe that the heart does go\n",
585 |             "on\n",
586 |             "\n",
587 |             "79\n",
588 |             "00:02:37,160 --> 00:02:41,959\n",
589 |             "I believe that the heart does go\n",
590 |             "on\n",
591 |             "\n",
592 |             "80\n",
593 |             "00:02:46,440 --> 00:02:47,039\n",
594 |             "Once more\n",
595 |             "\n",
596 |             "81\n",
597 |             "00:02:47,040 --> 00:02:48,199\n",
598 |             "Oncemore\n",
599 |             "\n",
600 |             "82\n",
601 |             "00:02:50,520 --> 00:02:50,879\n",
602 |             "you openthe dooi\n",
603 |             "\n",
604 |             "83\n",
605 |             "00:02:50,880 --> 00:02:53,279\n",
606 |             "you open the door\n",
607 |             "\n",
608 |             "84\n",
609 |             "00:02:54,840 --> 00:02:56,679\n",
610 |             "And you're here in my heart\n",
611 |             "\n",
612 |             "85\n",
613 |             "00:02:56,680 --> 00:02:57,519\n",
614 |             "And you're here in my heart\n",
615 |             "\n",
616 |             "86\n",
617 |             "00:02:57,520 --> 00:02:57,999\n",
618 |             "And you're here in my heart\n",
619 |             "\n",
620 |             "87\n",
621 |             "00:03:00,360 --> 00:03:00,919\n",
622 |             "heartwill goonand\n",
623 |             "\n",
624 |             "88\n",
625 |             "00:03:00,920 --> 00:03:01,519\n",
626 |             "my heart will go on and\n",
627 |             "on\n",
628 |             "\n",
629 |             "89\n",
630 |             "00:03:01,560 --> 00:03:04,599\n",
631 |             "my heart will go on and\n",
632 |             "on\n",
633 |             "on\n",
634 |             "my heart will go on and\n",
635 |             "\n",
636 |             "90\n",
637 |             "00:03:04,600 --> 00:03:05,879\n",
638 |             "on\n",
639 |             "\n",
640 |             "91\n",
641 |             "00:03:05,880 --> 00:03:06,759\n",
642 |             "my heart will go on and\n",
643 |             "on\n",
644 |             "\n",
645 |             "102\n",
646 |             "00:03:24,920 --> 00:03:25,639\n",
647 |             "You're here\n",
648 |             "\n",
649 |             "103\n",
650 |             "00:03:25,640 --> 00:03:27,119\n",
651 |             "You're here\n",
652 |             "\n",
653 |             "104\n",
654 |             "00:03:27,120 --> 00:03:27,999\n",
655 |             "You're here\n",
656 |             "\n",
657 |             "105\n",
658 |             "00:03:29,520 --> 00:03:30,039\n",
659 |             "there's nothing I fear\n",
660 |             "\n",
661 |             "106\n",
662 |             "00:03:30,120 --> 00:03:30,759\n",
663 |             "there's nothing Ifear\n",
664 |             "\n",
665 |             "107\n",
666 |             "00:03:30,760 --> 00:03:31,639\n",
667 |             "there's nothing Ifear\n",
668 |             "\n",
669 |             "108\n",
670 |             "00:03:31,640 --> 00:03:31,959\n",
671 |             "there's nothing I fear\n",
672 |             "\n",
673 |             "109\n",
674 |             "00:03:31,960 --> 00:03:32,319\n",
675 |             "there nothigIfear\n",
676 |             "\n",
677 |             "110\n",
678 |             "00:03:33,680 --> 00:03:33,999\n",
679 |             "AndIknow\n",
680 |             "\n",
681 |             "111\n",
682 |             "00:03:34,000 --> 00:03:34,599\n",
683 |             "AndIknow\n",
684 |             "\n",
685 |             "112\n",
686 |             "00:03:34,600 --> 00:03:35,399\n",
687 |             "AndIknow\n",
688 |             "\n",
689 |             "113\n",
690 |             "00:03:35,520 --> 00:03:37,959\n",
691 |             "that my heart will you go on\n",
692 |             "\n",
693 |             "114\n",
694 |             "00:03:38,400 --> 00:03:38,879\n",
695 |             "that my heart will you go on\n",
696 |             "\n",
697 |             "115\n",
698 |             "00:03:38,880 --> 00:03:39,439\n",
699 |             "that my heart will you go on\n",
700 |             "\n",
701 |             "116\n",
702 |             "00:03:39,440 --> 00:03:39,919\n",
703 |             "that my heart will you go on\n",
704 |             "\n",
705 |             "117\n",
706 |             "00:03:40,160 --> 00:03:40,599\n",
707 |             "that my heart will you go on\n",
708 |             "\n",
709 |             "118\n",
710 |             "00:03:40,600 --> 00:03:40,919\n",
711 |             "that my heart will you go on\n",
712 |             "\n",
713 |             "119\n",
714 |             "00:03:40,920 --> 00:03:41,399\n",
715 |             "that my heart will you go on\n",
716 |             "\n",
717 |             "120\n",
718 |             "00:03:44,240 --> 00:03:44,679\n",
719 |             "Weill stay foreverthsway\n",
720 |             "\n",
721 |             "121\n",
722 |             "00:03:44,680 --> 00:03:44,999\n",
723 |             "We'll stayforever this way\n",
724 |             "\n",
725 |             "122\n",
726 |             "00:03:45,000 --> 00:03:49,239\n",
727 |             "We'll stay forever this way\n",
728 |             "\n",
729 |             "123\n",
730 |             "00:03:49,240 --> 00:03:50,799\n",
731 |             "We'll stay forever this way\n",
732 |             "\n",
733 |             "124\n",
734 |             "00:03:50,840 --> 00:03:51,199\n",
735 |             "We'll stay forever this way\n",
736 |             "\n",
737 |             "125\n",
738 |             "00:03:51,200 --> 00:03:51,599\n",
739 |             "We'll stay forever this way\n",
740 |             "\n",
741 |             "126\n",
742 |             "00:03:57,240 --> 00:03:57,919\n",
743 |             "You are And in my heart\n",
744 |             "\n",
745 |             "127\n",
746 |             "00:03:58,440 --> 00:03:59,199\n",
747 |             "my heart will go on and\n",
748 |             "on\n",
749 |             "\n",
750 |             "128\n",
751 |             "00:03:59,200 --> 00:04:05,279\n",
752 |             "my heart will go on and\n",
753 |             "on\n",
754 |             "\n",
755 |             "129\n",
756 |             "00:04:05,280 --> 00:04:06,919\n",
757 |             "my heart will go on and\n",
758 |             "on\n",
759 |             "\n"
760 |           ]
761 |         }
762 |       ],
763 |       "source": [
764 |         "!cat result/result.srt"
765 |       ]
766 |     },
767 |     {
768 |       "cell_type": "code",
769 |       "execution_count": null,
770 |       "metadata": {
771 |         "id": "cNjpqvivs1ZA"
772 |       },
773 |       "outputs": [],
774 |       "source": []
775 |     }
776 |   ],
777 |   "metadata": {
778 |     "colab": {
779 |       "provenance": []
780 |     },
781 |     "kernelspec": {
782 |       "display_name": "Python 3",
783 |       "name": "python3"
784 |     }
785 |   },
786 |   "nbformat": 4,
787 |   "nbformat_minor": 0
788 | }
789 | 


--------------------------------------------------------------------------------
/assets/colab-badge.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="117" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="117" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h30v20H0z"/><path fill="#007ec6" d="M30 0h87v20H30z"/><path fill="url(#b)" d="M0 0h117v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><svg x="4px" y="0px" width="22px" height="20px" viewBox="-2 0 28 24" style="background-color: #fff;border-radius: 1px;"><path style="fill:#e8710a;" d="M1.977,16.77c-2.667-2.277-2.605-7.079,0-9.357C2.919,8.057,3.522,9.075,4.49,9.691c-1.152,1.6-1.146,3.201-0.004,4.803C3.522,15.111,2.918,16.126,1.977,16.77z"/><path style="fill:#f9ab00;" d="M12.257,17.114c-1.767-1.633-2.485-3.658-2.118-6.02c0.451-2.91,2.139-4.893,4.946-5.678c2.565-0.718,4.964-0.217,6.878,1.819c-0.884,0.743-1.707,1.547-2.434,2.446C18.488,8.827,17.319,8.435,16,8.856c-2.404,0.767-3.046,3.241-1.494,5.644c-0.241,0.275-0.493,0.541-0.721,0.826C13.295,15.939,12.511,16.3,12.257,17.114z"/><path style="fill:#e8710a;" d="M19.529,9.682c0.727-0.899,1.55-1.703,2.434-2.446c2.703,2.783,2.701,7.031-0.005,9.764c-2.648,2.674-6.936,2.725-9.701,0.115c0.254-0.814,1.038-1.175,1.528-1.788c0.228-0.285,0.48-0.552,0.721-0.826c1.053,0.916,2.254,1.268,3.6,0.83C20.502,14.551,21.151,11.927,19.529,9.682z"/><path style="fill:#f9ab00;" d="M4.49,9.691C3.522,9.075,2.919,8.057,1.977,7.413c2.209-2.398,5.721-2.942,8.476-1.355c0.555,0.32,0.719,0.606,0.285,1.128c-0.157,0.188-0.258,0.422-0.391,0.631c-0.299,0.47-0.509,1.067-0.929,1.371C8.933,9.539,8.523,8.847,8.021,8.746C6.673,8.475,5.509,8.787,4.49,9.691z"/><path style="fill:#f9ab00;" d="M1.977,16.77c0.941-0.644,1.545-1.659,2.509-2.277c1.373,1.152,2.85,1.433,4.45,0.499c0.332-0.194,0.503-0.088,0.673,0.19c0.386,0.635,0.753,1.285,1.181,1.89c0.34,0.48,0.222,0.715-0.253,1.006C7.84,19.73,4.205,19.188,1.977,16.77z"/></svg><text x="245" y="140" transform="scale(.1)" textLength="30"> </text><text x="725" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="770">Open in Colab</text><text x="725" y="140" transform="scale(.1)" textLength="770">Open in Colab</text></g> </svg>
2 | 


--------------------------------------------------------------------------------
/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/assets/logo.png


--------------------------------------------------------------------------------
/cliff.toml:
--------------------------------------------------------------------------------
  1 | # git-cliff ~ configuration file
  2 | # https://git-cliff.org/docs/configuration
  3 | 
  4 | [changelog]
  5 | # A Tera template to be rendered as the changelog's footer.
  6 | # See https://keats.github.io/tera/docs/#introduction
  7 | # header = """
  8 | # # Changelog\n
  9 | # All notable changes to this project will be documented in this file. See [conventional commits](https://www.conventionalcommits.org/) for commit guidelines.\n
 10 | # """
 11 | # A Tera template to be rendered for each release in the changelog.
 12 | # See https://keats.github.io/tera/docs/#introduction
 13 | body = """
 14 | {% for group, commits in commits | group_by(attribute="group") %}
 15 |     ### {{ group | striptags | trim | upper_first }}
 16 |     {% for commit in commits
 17 |     | filter(attribute="scope")
 18 |     | sort(attribute="scope") %}
 19 |         - **({{commit.scope}})**{% if commit.breaking %} [**breaking**]{% endif %} \
 20 |             {{ commit.message }} by [@{{ commit.author.name }}](https://github.com/{{ commit.author.name }}) in [{{ commit.id | truncate(length=7, end="") }}]($REPO/commit/{{ commit.id }})
 21 |     {%- endfor -%}
 22 |     {% raw %}\n{% endraw %}\
 23 |     {%- for commit in commits %}
 24 |         {%- if commit.scope -%}
 25 |         {% else -%}
 26 |             - {% if commit.breaking %} [**breaking**]{% endif %}\
 27 |                 {{ commit.message }} by [@{{ commit.author.name }}](https://github.com/{{ commit.author.name }}) in [{{ commit.id | truncate(length=7, end="") }}]($REPO/commit/{{ commit.id }})
 28 |         {% endif -%}
 29 |     {% endfor -%}
 30 | {% endfor %}
 31 | 
 32 | 
 33 | {% if github.contributors | length > 0 %}
 34 | ### 🎉 Contributors
 35 | 
 36 | {% for contributor in github.contributors %}
 37 |   - [@{{ contributor.username }}](https://github.com/{{ contributor.username }})
 38 | {%- endfor -%}
 39 | {% endif %}
 40 | 
 41 | 
 42 | {% if version %}
 43 |     {% if previous.version %}\
 44 |         **Full Changelog**:  [{{ version | trim_start_matches(pat="v") }}]($REPO/compare/{{ previous.version }}..{{ version }})
 45 |     {% else %}\
 46 |         **Full Changelog**:  [{{ version | trim_start_matches(pat="v") }}]
 47 |     {% endif %}\
 48 | {% else %}\
 49 |     ## [unreleased]
 50 | {% endif %}
 51 | """
 52 | # A Tera template to be rendered as the changelog's footer.
 53 | # See https://keats.github.io/tera/docs/#introduction
 54 | 
 55 | footer = """
 56 | 
 57 | """
 58 | 
 59 | # Remove leading and trailing whitespaces from the changelog's body.
 60 | trim = true
 61 | # postprocessors
 62 | postprocessors = [
 63 |     # Replace the placeholder `<REPO>` with a URL.
 64 |     { pattern = '\$REPO', replace = "https://github.com/SWHL/RapidVideOCR" }, # replace repository URL
 65 | ]
 66 | 
 67 | [git]
 68 | # Parse commits according to the conventional commits specification.
 69 | # See https://www.conventionalcommits.org
 70 | conventional_commits = true
 71 | # Exclude commits that do not match the conventional commits specification.
 72 | filter_unconventional = true
 73 | # Split commits on newlines, treating each line as an individual commit.
 74 | split_commits = false
 75 | # An array of regex based parsers to modify commit messages prior to further processing.
 76 | commit_preprocessors = [
 77 |     # Replace issue numbers with link templates to be updated in `changelog.postprocessors`.
 78 |     #{ pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](https://github.com/orhun/git-cliff/issues/${2}))"},
 79 | ]
 80 | # An array of regex based parsers for extracting data from the commit message.
 81 | # Assigns commits to groups.
 82 | # Optionally sets the commit's scope and can decide to exclude commits from further processing.
 83 | commit_parsers = [
 84 |   { message = "^feat", group = "<!-- 0 -->🚀 Features" },
 85 |   { message = "^fix", group = "<!-- 1 -->🐛 Bug Fixes" },
 86 |   { message = "^doc", group = "<!-- 3 -->📚 Documentation" },
 87 |   { message = "^perf", group = "<!-- 4 -->⚡ Performance" },
 88 |   { message = "^refactor", group = "<!-- 2 -->🚜 Refactor" },
 89 |   { message = "^style", group = "<!-- 5 -->🎨 Styling" },
 90 |   { message = "^test", group = "<!-- 6 -->🧪 Testing" },
 91 |   { message = "^chore\\(release\\): prepare for", skip = true },
 92 |   { message = "^chore\\(deps.*\\)", skip = true },
 93 |   { message = "^chore\\(pr\\)", skip = true },
 94 |   { message = "^chore\\(pull\\)", skip = true },
 95 |   { message = "^chore|^ci", group = "<!-- 7 -->⚙️ Miscellaneous Tasks" },
 96 |   { body = ".*security", group = "<!-- 8 -->🛡️ Security" },
 97 |   { message = "^revert", group = "<!-- 9 -->◀️ Revert" },
 98 |   { message = ".*", group = "<!-- 10 -->💼 Other" },
 99 | ]
100 | # Exclude commits that are not matched by any commit parser.
101 | filter_commits = false
102 | # Order releases topologically instead of chronologically.
103 | topo_order = false
104 | # Order of commits in each group/release within the changelog.
105 | # Allowed values: newest, oldest
106 | sort_commits = "newest"


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | 
 5 | # 提取 + 识别
 6 | from rapid_videocr import RapidVideOCRInput, RapidVideoSubFinderOCR, VideoSubFinderInput
 7 | 
 8 | vsf_exe_path = (
 9 |     r"G:\ProgramFiles\VideoSubFinder_6.10_x64\Release_x64\VideoSubFinderWXW.exe"
10 | )
11 | vsf_input_params = VideoSubFinderInput(vsf_exe_path=vsf_exe_path)
12 | ocr_input_params = RapidVideOCRInput(is_batch_rec=False)
13 | vsf_ocr = RapidVideoSubFinderOCR(vsf_input_params, ocr_input_params)
14 | 
15 | # video_path可以是目录或者具体video路径
16 | video_path = "test_files/tiny/2.mp4"
17 | save_dir = "outputs"
18 | vsf_ocr(video_path, save_dir)
19 | 
20 | 
21 | # # 只识别
22 | from rapid_videocr import RapidVideOCR, RapidVideOCRInput
23 | 
24 | ocr_input_params = RapidVideOCRInput(is_batch_rec=False)
25 | extractor = RapidVideOCR(ocr_input_params)
26 | 
27 | rgb_dir = "tests/test_files/RGBImages"
28 | save_dir = "outputs"
29 | save_name = "a"
30 | 
31 | # outputs/a.srt  outputs/a.ass  outputs/a.txt
32 | extractor(rgb_dir, save_dir, save_name=save_name)
33 | 


--------------------------------------------------------------------------------
/docs/README_zh.md:
--------------------------------------------------------------------------------
 1 | <div align="center">
 2 |   <img src="https://raw.githubusercontent.com/SWHL/RapidVideOCR/main/assets/logo.png" width="55%" height="55%"/>
 3 | 
 4 | <div>&nbsp;</div>
 5 | 
 6 | <a href="https://www.modelscope.cn/studios/liekkas/RapidVideOCR/summary" target="_blank"><img src="https://img.shields.io/badge/ModelScope-Demo-blue"></a>
 7 | <a href="https://colab.research.google.com/github/SWHL/RapidVideOCR/blob/main/assets/RapidVideOCRDemo.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg"></a>
 8 | <a href=""><img src="https://img.shields.io/badge/Python->=3.6-aff.svg"></a>
 9 | <a href=""><img src="https://img.shields.io/badge/OS-Linux%2C%20Win%2C%20Mac-pink.svg"></a>
10 | <a href="https://pypi.org/project/rapid-videocr/"><img alt="PyPI" src="https://img.shields.io/pypi/v/rapid_videocr"></a>
11 | <a href="https://github.com/SWHL/RapidVideOCR/stargazers"><img src="https://img.shields.io/github/stars/SWHL/RapidVideOCR?color=ccf"></a>
12 | <a href="https://pepy.tech/project/rapid-videocr"><img src="https://static.pepy.tech/personalized-badge/rapid-videocr?period=total&units=abbreviation&left_color=grey&right_color=blue&left_text=Downloads"></a>
13 | <a href="https://semver.org/"><img alt="SemVer2.0" src="https://img.shields.io/badge/SemVer-2.0-brightgreen"></a>
14 | <a href="https://github.com/psf/black"><img src="https://img.shields.io/badge/code%20style-black-000000.svg"></a>
15 | 
16 | 简体中文 | [English](https://github.com/SWHL/RapidVideOCR)
17 | </div>
18 | 
19 | ### 简介
20 | 
21 | - 视频硬字幕提取，自动生成对应`srt | ass | txt`文件。
22 | - 支持字幕语言：中文 | 英文 （其他可以支持的语言参见：[支持语种列表](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99))
23 | - 优势如下：
24 |     - **提取更快**：与[VideoSubFinder](https://sourceforge.net/projects/videosubfinder/)软件结合使用，提取关键字幕帧更快。
25 |     - **识别更准**：采用[RapidOCR](https://github.com/RapidAI/RapidOCR)作为识别库。
26 |     - **使用更方便**：pip直接安装即可使用。
27 | 
28 | - 桌面EXE版，请移步[RapidVideOCRDesktop](https://github.com/SWHL/RapidVideOCRDesktop)
29 | - 如果有帮助到您的话，请给个小星星⭐。
30 | 
31 | ### [在线Demo](https://www.modelscope.cn/studios/liekkas/RapidVideOCR/summary)
32 | 
33 | <div align="center">
34 |     <img src="https://github.com/SWHL/RapidVideOCR/releases/download/v2.0.1/OnlineDemo.gif" alt="Demo" width="100%" height="100%">
35 | </div>
36 | 
37 | ### 整体框架
38 | 
39 | ```mermaid
40 | flowchart LR
41 |     A[/Video/] --Extract subtitle key frame--> B(VideoSubFinder) --OCR-->C(RapidVideOCR)
42 |     C --Convert--> D[/"SRT | ASS | TXT"/]
43 | ```
44 | 
45 | ### 安装
46 | 
47 | ```bash
48 | pip install rapid_videocr
49 | ```
50 | 
51 | ### 使用
52 | 
53 | > [!NOTE]
54 | >
55 | > `rapid_videocr`输入图像路径必须是**VideoSubFinder**软件输出的RGBImages或TXTImages的路径。
56 | 
57 | ```bash
58 | rapid_videocr -i RGBImages
59 | ```
60 | 
61 | ### 文档
62 | 
63 | 完整文档请移步：[docs](https://swhl.github.io/RapidVideOCR/docs)
64 | 
65 | ### 贡献者
66 | 
67 | <p align="left">
68 |   <a href="https://github.com/SWHL/RapidVideOCR/graphs/contributors">
69 |     <img src="https://contrib.rocks/image?repo=SWHL/RapidVideOCR" width="20%"/>
70 |   </a>
71 | </p>
72 | 
73 | ### 贡献指南
74 | 
75 | 我们感谢所有的贡献者为改进和提升 RapidVideOCR 所作出的努力。
76 | 
77 | - 欢迎提交请求。对于重大更改，请先打开issue讨论您想要改变的内容。
78 | - 请确保适当更新测试。
79 | 
80 | ### 加入我们
81 | 
82 | - 微信扫描以下二维码，关注**RapidAI公众号**，回复video即可加入RapidVideOCR微信交流群：
83 |     <div align="center">
84 |         <img src="https://raw.githubusercontent.com/RapidAI/.github/main/assets/RapidAI_WeChatAccount_round_corner.png" width="25%" height="25%" align="center">
85 |     </div>
86 | 
87 | - 扫码加入QQ群（706807542）：
88 |     <div align="center">
89 |         <img src="https://github.com/SWHL/RapidVideOCR/releases/download/v2.0.1/QQGroup.png" width="25%" height="25%" align="center">
90 |     </div>
91 | 
92 | ### [赞助](https://swhl.github.io/RapidVideOCR/docs/sponsor/)
93 | 
94 | 如果您想要赞助该项目，可直接点击当前页最上面的Sponsor按钮，请写好备注(**您的Github账号名称**)，方便添加到赞助列表中。
95 | 
96 | ### 开源许可证
97 | 
98 | 该项目采用 [Apache 2.0 license](../LICENSE) 开源许可证。
99 | 


--------------------------------------------------------------------------------
/docs/doc_whl.md:
--------------------------------------------------------------------------------
1 | ### See [Documentation](https://swhl.github.io/RapidVideOCR/docs/)
2 | 


--------------------------------------------------------------------------------
/rapid_videocr/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import RapidVideOCR, RapidVideOCRExeception, RapidVideOCRInput
5 | from .vsf_ocr_cli import RapidVideoSubFinderOCR
6 | 


--------------------------------------------------------------------------------
/rapid_videocr/export.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | # @Author: SWHL
  3 | # @Contact: liekkaskono@163.com
  4 | from abc import ABC, abstractmethod
  5 | from enum import Enum
  6 | from pathlib import Path
  7 | from typing import List
  8 | 
  9 | from .utils.utils import write_txt
 10 | 
 11 | 
 12 | class OutputFormat(Enum):
 13 |     TXT = "txt"
 14 |     SRT = "srt"
 15 |     ASS = "ass"
 16 |     ALL = "all"
 17 | 
 18 | 
 19 | class ExportStrategy(ABC):
 20 |     @abstractmethod
 21 |     def export(
 22 |         self,
 23 |         save_dir: Path,
 24 |         save_name: str,
 25 |         srt_result: List[str],
 26 |         ass_result: List[str],
 27 |         txt_result: List[str],
 28 |     ):
 29 |         pass
 30 | 
 31 | 
 32 | class TxtExportStrategy(ExportStrategy):
 33 |     def export(
 34 |         self,
 35 |         save_dir: Path,
 36 |         save_name: str,
 37 |         srt_result: List[str],
 38 |         ass_result: List[str],
 39 |         txt_result: List[str],
 40 |     ):
 41 |         file_path = save_dir / f"{save_name}.txt"
 42 |         write_txt(file_path, txt_result)
 43 | 
 44 | 
 45 | class SrtExportStrategy(ExportStrategy):
 46 |     def export(
 47 |         self,
 48 |         save_dir: Path,
 49 |         save_name: str,
 50 |         srt_result: List[str],
 51 |         ass_result: List[str],
 52 |         txt_result: List[str],
 53 |     ):
 54 |         file_path = save_dir / f"{save_name}.srt"
 55 |         write_txt(file_path, srt_result)
 56 | 
 57 | 
 58 | class AssExportStrategy(ExportStrategy):
 59 |     def export(
 60 |         self,
 61 |         save_dir: Path,
 62 |         save_name: str,
 63 |         srt_result: List[str],    # unused here but kept for signature
 64 |         ass_result: List[str],
 65 |         txt_result: List[str],
 66 |     ):
 67 |         header = [
 68 |             "[Script Info]",
 69 |             "; Script generated by RapidVideOCR",
 70 |             "ScriptType: v4.00+",
 71 |             "PlayResX: 1920",
 72 |             "PlayResY: 1080",
 73 |             "",
 74 |             "[V4+ Styles]",
 75 |             "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, "
 76 |             "Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, "
 77 |             "Alignment, MarginL, MarginR, MarginV, Encoding",
 78 |             "Style: Default,Arial,54,&H00FFFFFF,&H0000FFFF,&H00000000,&H64000000,0,0,0,0,100,100,0,0,1,2,2,2,10,10,10,1",
 79 |             "",
 80 |             "[Events]",
 81 |             "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text",
 82 |         ]
 83 | 
 84 |         file_path = save_dir / f"{save_name}.ass"
 85 |         write_txt(file_path, header + [""] + ass_result)
 86 | 
 87 | 
 88 | class AllExportStrategy(ExportStrategy):
 89 |     def export(
 90 |         self,
 91 |         save_dir: Path,
 92 |         save_name: str,
 93 |         srt_result: List[str],
 94 |         ass_result: List[str],
 95 |         txt_result: List[str],
 96 |     ):
 97 |         txt_export = TxtExportStrategy()
 98 |         srt_export = SrtExportStrategy()
 99 |         ass_export = AssExportStrategy()
100 | 
101 |         txt_export.export(save_dir, save_name, srt_result, ass_result, txt_result)
102 |         srt_export.export(save_dir, save_name, srt_result, ass_result, txt_result)
103 |         ass_export.export(save_dir, save_name, srt_result, ass_result, txt_result)
104 | 
105 | 
106 | class ExportStrategyFactory:
107 |     @staticmethod
108 |     def create_strategy(out_format: str = OutputFormat.ALL.value) -> ExportStrategy:
109 |         strategies = {
110 |             OutputFormat.TXT.value: TxtExportStrategy(),
111 |             OutputFormat.SRT.value: SrtExportStrategy(),
112 |             OutputFormat.ASS.value: AssExportStrategy(),
113 |             OutputFormat.ALL.value: AllExportStrategy(),
114 |         }
115 | 
116 |         if strategy := strategies.get(out_format):
117 |             return strategy
118 |         raise ValueError(f"Unsupported output format: {out_format}")
119 | 


--------------------------------------------------------------------------------
/rapid_videocr/main.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | # @Author: SWHL
  3 | # @Contact: liekkaskono@163.com
  4 | import argparse
  5 | from dataclasses import dataclass
  6 | from pathlib import Path
  7 | from typing import Any, Dict, List, Optional, Union
  8 | 
  9 | from .export import ExportStrategyFactory, OutputFormat
 10 | from .ocr_processor import OCRProcessor
 11 | from .utils.crop_by_project import CropByProject
 12 | from .utils.logger import Logger
 13 | from .utils.utils import mkdir
 14 | 
 15 | IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"}
 16 | 
 17 | 
 18 | @dataclass
 19 | class RapidVideOCRInput:
 20 |     is_batch_rec: bool = False
 21 |     batch_size: int = 10
 22 |     out_format: str = OutputFormat.ALL.value
 23 |     ocr_params: Optional[Dict[str, Any]] = None
 24 | 
 25 | 
 26 | class RapidVideOCR:
 27 |     def __init__(self, input_params: RapidVideOCRInput):
 28 |         self.logger = Logger(logger_name=__name__).get_log()
 29 | 
 30 |         self.ocr_processor = OCRProcessor(
 31 |             input_params.ocr_params, input_params.batch_size
 32 |         )
 33 | 
 34 |         self.cropper = CropByProject()
 35 | 
 36 |         self.is_batch_rec = input_params.is_batch_rec
 37 |         self.out_format = input_params.out_format
 38 | 
 39 |     def __call__(
 40 |         self,
 41 |         vsf_dir: Union[str, Path],
 42 |         save_dir: Union[str, Path],
 43 |         save_name: str = "result",
 44 |     ) -> List[str]:
 45 |         vsf_dir = Path(vsf_dir)
 46 |         if not vsf_dir.exists():
 47 |             raise RapidVideOCRExeception(f"{vsf_dir} does not exist.")
 48 | 
 49 |         img_list = self.get_img_list(vsf_dir)
 50 |         srt_result, ass_result, txt_result = self.ocr_processor(
 51 |             img_list, self.is_batch_rec, self.is_txt_dir(vsf_dir)
 52 |         )
 53 | 
 54 |         self.export_file(Path(save_dir), save_name, srt_result, ass_result, txt_result)
 55 |         return txt_result
 56 | 
 57 |     def get_img_list(self, vsf_dir: Path) -> List[Path]:
 58 |         def get_sort_key(x: Path) -> int:
 59 |             return int("".join(str(x.stem).split("_")[:4]))
 60 | 
 61 |         img_list = []
 62 |         for v in vsf_dir.glob("*.*"):
 63 |             if not v.is_file():
 64 |                 continue
 65 | 
 66 |             if v.suffix.lower() not in IMAGE_EXTENSIONS:
 67 |                 continue
 68 | 
 69 |             img_list.append(v)
 70 | 
 71 |         if not img_list:
 72 |             raise RapidVideOCRExeception(f"{vsf_dir} does not have valid images")
 73 | 
 74 |         img_list = sorted(img_list, key=get_sort_key)
 75 |         return img_list
 76 | 
 77 |     @staticmethod
 78 |     def is_txt_dir(vsf_dir: Path) -> bool:
 79 |         return "TXTImages" in vsf_dir.name
 80 | 
 81 |     def export_file(
 82 |         self,
 83 |         save_dir: Path,
 84 |         save_name: str,
 85 |         srt_result: List[str],
 86 |         ass_result: List[str],
 87 |         txt_result: List[str],
 88 |     ):
 89 |         try:
 90 |             strategy = ExportStrategyFactory.create_strategy(self.out_format)
 91 | 
 92 |             mkdir(save_dir)
 93 |             strategy.export(save_dir, save_name, srt_result, ass_result, txt_result)
 94 |             self.logger.info("[OCR] Results saved to directory: %s", save_dir)
 95 |         except ValueError as e:
 96 |             self.logger.error("Export failed: %s", str(e))
 97 |             raise
 98 | 
 99 |     def print_console(self, txt_result: List):
100 |         for v in txt_result:
101 |             print(v.strip())
102 | 
103 | 
104 | class RapidVideOCRExeception(Exception):
105 |     pass
106 | 
107 | 
108 | def main():
109 |     parser = argparse.ArgumentParser()
110 |     parser.add_argument(
111 |         "-i",
112 |         "--img_dir",
113 |         type=str,
114 |         required=True,
115 |         help="The full path of RGBImages or TXTImages.",
116 |     )
117 |     parser.add_argument(
118 |         "-s",
119 |         "--save_dir",
120 |         type=str,
121 |         default="outputs",
122 |         help='The path of saving the recognition result. Default is "outputs" under the current directory.',
123 |     )
124 |     parser.add_argument(
125 |         "-f",
126 |         "--file_name",
127 |         type=str,
128 |         default="result",
129 |         help='The name of the resulting file name. Default is "result".',
130 |     )
131 |     parser.add_argument(
132 |         "-o",
133 |         "--out_format",
134 |         type=str,
135 |         default=OutputFormat.ALL.value,
136 |         choices=[v.value for v in OutputFormat],
137 |         help='Output file format. Default is "all".',
138 |     )
139 |     parser.add_argument(
140 |         "--is_batch_rec",
141 |         action="store_true",
142 |         default=False,
143 |         help="Which mode to run (concat recognition or single recognition). Default is False.",
144 |     )
145 |     parser.add_argument(
146 |         "-b",
147 |         "--batch_size",
148 |         type=int,
149 |         default=10,
150 |         help="The batch of concating image nums in concat recognition mode. Default is 10.",
151 |     )
152 |     args = parser.parse_args()
153 | 
154 |     ocr_input_params = RapidVideOCRInput(
155 |         is_batch_rec=args.is_batch_rec,
156 |         batch_size=args.batch_size,
157 |         out_format=args.out_format,
158 |     )
159 |     extractor = RapidVideOCR(ocr_input_params)
160 |     extractor(args.img_dir, args.save_dir, args.file_name)
161 | 
162 | 
163 | if __name__ == "__main__":
164 |     main()
165 | 


--------------------------------------------------------------------------------
/rapid_videocr/ocr_processor.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | # @Author: SWHL
  3 | # @Contact: liekkaskono@163.com
  4 | from pathlib import Path
  5 | from typing import Dict, List, Optional, Tuple
  6 | 
  7 | import cv2
  8 | import numpy as np
  9 | from rapidocr import RapidOCR
 10 | from tqdm import tqdm
 11 | 
 12 | from .utils.logger import Logger
 13 | from .utils.utils import (
 14 |     compute_centroid,
 15 |     compute_poly_iou,
 16 |     is_inclusive_each_other,
 17 |     padding_img,
 18 |     read_img,
 19 | )
 20 | 
 21 | 
 22 | class OCRProcessor:
 23 |     def __init__(self, ocr_params: Optional[Dict] = None, batch_size: int = 10):
 24 |         self.logger = Logger(logger_name=__name__).get_log()
 25 |         self.ocr_engine = self._init_ocr_engine(ocr_params)
 26 |         self.batch_size = batch_size
 27 | 
 28 |     def _init_ocr_engine(self, ocr_params: Optional[Dict] = None) -> RapidOCR:
 29 |         return RapidOCR(params=ocr_params)
 30 | 
 31 |     def __call__(
 32 |         self, img_list: List[Path], is_batch_rec: bool, is_txt_dir: bool
 33 |     ) -> Tuple[List[str], List[str], List[str]]:
 34 |         self.is_txt_dir = is_txt_dir
 35 |         process_func = self.batch_rec if is_batch_rec else self.single_rec
 36 |         rec_results = process_func(img_list)
 37 |         srt_results = self._generate_srt_results(rec_results)
 38 |         ass_results = self._generate_ass_results(rec_results)
 39 |         txt_results = self._generate_txt_result(rec_results)
 40 |         return srt_results, ass_results, txt_results
 41 | 
 42 |     def single_rec(self, img_list: List[Path]) -> List[Tuple[int, str, str, str]]:
 43 |         self.logger.info("[OCR] Running with single recognition.")
 44 | 
 45 |         rec_results = []
 46 |         for i, img_path in enumerate(tqdm(img_list, desc="OCR")):
 47 |             time_str = self._get_srt_timestamp(img_path)
 48 |             ass_time_str = self._get_ass_timestamp(img_path)
 49 |             img = self._preprocess_image(img_path)
 50 | 
 51 |             dt_boxes, rec_res = self.get_ocr_result(img)
 52 |             txts = (
 53 |                 self.process_same_line(dt_boxes, rec_res)
 54 |                 if dt_boxes is not None
 55 |                 else ""
 56 |             )
 57 |             rec_results.append([i, time_str, txts, ass_time_str])
 58 |         return rec_results
 59 | 
 60 |     @staticmethod
 61 |     def _get_srt_timestamp(file_path: Path) -> str:
 62 |         """0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg"""
 63 | 
 64 |         def format_time(time_parts):
 65 |             time_parts[0] = f"{time_parts[0]:0>2}"
 66 |             return ":".join(time_parts[:3]) + f",{time_parts[3]}"
 67 | 
 68 |         split_paths = file_path.stem.split("_")
 69 |         start_time = split_paths[:4]
 70 |         end_time = split_paths[5:9]
 71 |         return f"{format_time(start_time)} --> {format_time(end_time)}"
 72 | 
 73 |     @staticmethod
 74 |     def _get_ass_timestamp(file_path: Path) -> str:
 75 |         s = file_path.stem
 76 | 
 77 |         h1   = int(s[0:1])
 78 |         m1   = int(s[2:4])
 79 |         sec1 = int(s[5:7])
 80 |         ms1  = int(s[8:11])
 81 | 
 82 |         h2   = int(s[13:14])
 83 |         m2   = int(s[15:17])
 84 |         sec2 = int(s[18:20])
 85 |         ms2  = int(s[21:24])
 86 | 
 87 |         # compute absolute times in milliseconds
 88 |         bt = (h1 * 3600 + m1 * 60 + sec1) * 1000 + ms1
 89 |         et = (h2 * 3600 + m2 * 60 + sec2) * 1000 + ms2
 90 | 
 91 |         def to_ass(ts_ms: int) -> str:
 92 |             # centiseconds (drop the last digit, no rounding)
 93 |             cs_total = ts_ms // 10  
 94 |             cs = cs_total % 100
 95 |             total_s = ts_ms // 1000
 96 |             s = total_s % 60
 97 |             total_m = total_s // 60
 98 |             m = total_m % 60
 99 |             h = total_m // 60
100 |             # H:MM:SS.CC
101 |             return f"{h}:{m:02d}:{s:02d}.{cs:02d}"
102 | 
103 |         return f"{to_ass(bt)},{to_ass(et)}"
104 | 
105 |     @staticmethod
106 |     def _preprocess_image(img_path: Path) -> np.ndarray:
107 |         img = read_img(img_path)
108 |         img = padding_img(img, (img.shape[0], img.shape[0], 0, 0))
109 |         return img
110 | 
111 |     @staticmethod
112 |     def _generate_srt_results(rec_results: List[Tuple[int, str, str, str]]) -> List[str]:
113 |         return [f"{i+1}\n{time_str}\n{txt}\n" for i, time_str, txt, _ in rec_results]
114 | 
115 |     @staticmethod
116 |     def _generate_ass_results(rec_results: List[Tuple[int, str, str, str]]) -> List[str]:
117 |         return [f"Dialogue: 0,{ass_time_str},Default,,0,0,0,,{txt}" for _, _, txt, ass_time_str in rec_results]
118 | 
119 |     @staticmethod
120 |     def _generate_txt_result(rec_results: List[Tuple[int, str, str, str]]) -> List[str]:
121 |         return [f"{txt}\n" for _, _, txt, _ in rec_results]
122 | 
123 |     def batch_rec(self, img_list: List[Path]) -> List[Tuple[int, str, str, str]]:
124 |         self.logger.info("[OCR] Running with concat recognition.")
125 | 
126 |         img_nums = len(img_list)
127 |         rec_results = []
128 |         for start_i in tqdm(range(0, img_nums, self.batch_size), desc="Concat Rec"):
129 |             end_i = min(img_nums, start_i + self.batch_size)
130 | 
131 |             concat_img, img_coordinates, img_paths = self._prepare_batch(
132 |                 img_list[start_i:end_i]
133 |             )
134 |             dt_boxes, rec_res = self.get_ocr_result(concat_img)
135 |             if rec_res is None or dt_boxes is None:
136 |                 continue
137 | 
138 |             one_batch_rec_results = self._process_batch_results(
139 |                 start_i, img_coordinates, dt_boxes, rec_res, img_paths
140 |             )
141 |             rec_results.extend(one_batch_rec_results)
142 |         return rec_results
143 | 
144 |     def _prepare_batch(
145 |         self, img_list: List[Path]
146 |     ) -> Tuple[np.ndarray, np.ndarray, List[Path]]:
147 |         padding_value = 10
148 |         array_img_list, img_coordinates = [], []
149 |         for i, img_path in enumerate(img_list):
150 |             img = read_img(img_path)
151 |             if self.is_txt_dir:
152 |                 img = cv2.resize(img, None, fx=0.25, fy=0.25)
153 | 
154 |             pad_img = padding_img(img, (0, padding_value, 0, 0))
155 |             array_img_list.append(pad_img)
156 | 
157 |             h, w = img.shape[:2]
158 |             x0, y0 = 0, i * (h + padding_value)
159 |             x1, y1 = w, (i + 1) * (h + padding_value)
160 |             img_coordinates.append([(x0, y0), (x1, y0), (x1, y1), (x0, y1)])
161 | 
162 |         return np.vstack(array_img_list), np.array(img_coordinates), img_list
163 | 
164 |     def _process_batch_results(
165 |         self,
166 |         start_i: int,
167 |         img_coordinates: np.ndarray,
168 |         dt_boxes: np.ndarray,
169 |         rec_res: Tuple[str],
170 |         img_paths: List[Path],
171 |     ) -> List[Tuple[int, str, str, str]]:
172 |         match_dict = self._match_boxes_to_images(
173 |             img_coordinates, dt_boxes, rec_res, img_paths
174 |         )
175 | 
176 |         results = []
177 |         for k, v in match_dict.items():
178 |             cur_frame_idx = start_i + k
179 |             if v:
180 |                 img_path, boxes, recs = list(zip(*v))
181 |                 time_str = self._get_srt_timestamp(img_path[0])
182 |                 ass_time_str = self._get_ass_timestamp(img_path[0])
183 |                 txts = self.process_same_line(boxes, recs)
184 |             else:
185 |                 time_str = self._get_srt_timestamp(img_paths[k])
186 |                 ass_time_str = self._get_ass_timestamp(img_paths[k])
187 |                 txts = ""
188 | 
189 |             results.append([cur_frame_idx, time_str, txts, ass_time_str])
190 |         return results
191 | 
192 |     def _match_boxes_to_images(
193 |         self,
194 |         img_coordinates: np.ndarray,
195 |         dt_boxes: np.ndarray,
196 |         rec_res: List[str],
197 |         img_paths: List[Path],
198 |     ) -> Dict[int, List[Tuple[Path, np.ndarray, str]]]:
199 |         """将检测框匹配到对应图像"""
200 |         match_dict = {k: [] for k in range(len(img_coordinates))}
201 |         visited_idx = set()
202 | 
203 |         for i, frame_boxes in enumerate(img_coordinates):
204 |             for idx, (dt_box, txt) in enumerate(zip(dt_boxes, rec_res)):
205 |                 if idx in visited_idx:
206 |                     continue
207 | 
208 |                 if self._is_box_matched(frame_boxes, dt_box):
209 |                     match_dict[i].append((img_paths[i], dt_box, txt))
210 |                     visited_idx.add(idx)
211 | 
212 |         return match_dict
213 | 
214 |     def _is_box_matched(self, frame_boxes: np.ndarray, dt_box: np.ndarray) -> bool:
215 |         """判断检测框是否匹配到图像"""
216 |         box_iou = compute_poly_iou(frame_boxes, dt_box)
217 |         return is_inclusive_each_other(frame_boxes, dt_box) or box_iou > 0.1
218 | 
219 |     def get_ocr_result(
220 |         self, img: np.ndarray
221 |     ) -> Tuple[Optional[np.ndarray], Optional[Tuple[str]]]:
222 |         ocr_result = self.ocr_engine(img)
223 |         if ocr_result.boxes is None:
224 |             return None, None
225 |         return ocr_result.boxes, ocr_result.txts
226 | 
227 |     def process_same_line(self, dt_boxes: np.ndarray, rec_res: List[str]) -> str:
228 |         if len(rec_res) == 1:
229 |             return rec_res[0]
230 | 
231 |         y_centroids = [compute_centroid(box)[1] for box in dt_boxes]
232 |         line_groups = self._group_by_lines(y_centroids)
233 |         return self._merge_line_text(line_groups, rec_res)
234 | 
235 |     def _group_by_lines(self, y_centroids: List[float]) -> List[List[int]]:
236 |         """将文本框按行分组"""
237 | 
238 |         @staticmethod
239 |         def is_same_line(points: List) -> List[bool]:
240 |             threshold = 5
241 | 
242 |             align_points = list(zip(points, points[1:]))
243 |             bool_res = [False] * len(align_points)
244 |             for i, point in enumerate(align_points):
245 |                 y0, y1 = point
246 |                 if abs(y0 - y1) <= threshold:
247 |                     bool_res[i] = True
248 |             return bool_res
249 | 
250 |         bool_res = is_same_line(y_centroids)
251 |         groups = []
252 |         current_group = [0]
253 |         for i, is_same in enumerate(bool_res, 1):
254 |             if is_same:
255 |                 current_group.append(i)
256 |             else:
257 |                 groups.append(current_group)
258 |                 current_group = [i]
259 | 
260 |         groups.append(current_group)
261 |         return groups
262 | 
263 |     def _merge_line_text(self, line_groups: List[List[int]], rec_res: List[str]) -> str:
264 |         lines = []
265 |         for group in line_groups:
266 |             line_text = " ".join(rec_res[i] for i in group)
267 |             lines.append(line_text)
268 |         return "\n".join(lines)
269 | 


--------------------------------------------------------------------------------
/rapid_videocr/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .logger import Logger
5 | 


--------------------------------------------------------------------------------
/rapid_videocr/utils/crop_by_project.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import cv2
 5 | import numpy as np
 6 | 
 7 | 
 8 | class CropByProject:
 9 |     """投影法裁剪"""
10 | 
11 |     def __init__(self, threshold=250):
12 |         self.threshold = threshold
13 | 
14 |     def __call__(self, origin_img):
15 |         image = cv2.cvtColor(origin_img, cv2.COLOR_BGR2GRAY)
16 | 
17 |         # 将图片二值化
18 |         retval, img = cv2.threshold(image, self.threshold, 255, cv2.THRESH_BINARY_INV)
19 | 
20 |         # 使文字增长成块
21 |         closed = cv2.dilate(img, None, iterations=1)
22 | 
23 |         # 水平投影
24 |         x0, x1 = self.get_project_loc(closed, direction="width")
25 | 
26 |         # 竖直投影
27 |         y0, y1 = self.get_project_loc(closed, direction="height")
28 | 
29 |         return origin_img[y0:y1, x0:x1]
30 | 
31 |     @staticmethod
32 |     def get_project_loc(img, direction):
33 |         """获得裁剪的起始和终点索引位置
34 |         Args:
35 |             img (ndarray): 二值化后得到的图像
36 |             direction (str): 'width/height'
37 |         Raises:
38 |             ValueError: 不支持的求和方向
39 |         Returns:
40 |             tuple: 起始索引位置
41 |         """
42 |         if direction == "width":
43 |             axis = 0
44 |         elif direction == "height":
45 |             axis = 1
46 |         else:
47 |             raise ValueError(f"direction {direction} is not supported!")
48 | 
49 |         loc_sum = np.sum(img == 255, axis=axis)
50 |         loc_range = np.argwhere(loc_sum > 0)
51 |         i0, i1 = loc_range[0][0], loc_range[-1][0]
52 |         return i0, i1
53 | 


--------------------------------------------------------------------------------
/rapid_videocr/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import logging
 5 | 
 6 | import colorlog
 7 | 
 8 | 
 9 | class Logger:
10 |     def __init__(self, log_level=logging.DEBUG, logger_name=None):
11 |         self.logger = logging.getLogger(logger_name)
12 |         self.logger.setLevel(log_level)
13 |         self.logger.propagate = False
14 | 
15 |         formatter = colorlog.ColoredFormatter(
16 |             "%(log_color)s[%(levelname)s] %(asctime)s [RapidVideOCR] %(filename)s:%(lineno)d: %(message)s",
17 |             log_colors={
18 |                 "DEBUG": "cyan",
19 |                 "INFO": "green",
20 |                 "WARNING": "yellow",
21 |                 "ERROR": "red",
22 |                 "CRITICAL": "red,bg_white",
23 |             },
24 |         )
25 | 
26 |         if not self.logger.handlers:
27 |             console_handler = logging.StreamHandler()
28 |             console_handler.setFormatter(formatter)
29 | 
30 |             for handler in self.logger.handlers:
31 |                 self.logger.removeHandler(handler)
32 | 
33 |             console_handler.setLevel(log_level)
34 |             self.logger.addHandler(console_handler)
35 | 
36 |     def get_log(self):
37 |         return self.logger
38 | 


--------------------------------------------------------------------------------
/rapid_videocr/utils/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | # @Author: SWHL
  3 | # @Contact: liekkaskono@163.com
  4 | import argparse
  5 | from pathlib import Path
  6 | from typing import List, Tuple, Union
  7 | 
  8 | import cv2
  9 | import numpy as np
 10 | import shapely
 11 | from shapely.geometry import MultiPoint, Polygon
 12 | 
 13 | 
 14 | def compute_centroid(points: np.ndarray) -> List:
 15 |     """计算所给框的质心坐标
 16 | 
 17 |     :param points ([type]): (4, 2)
 18 |     :return: [description]
 19 |     """
 20 |     x_min, x_max = np.min(points[:, 0]), np.max(points[:, 0])
 21 |     y_min, y_max = np.min(points[:, 1]), np.max(points[:, 1])
 22 |     return [(x_min + x_max) / 2, (y_min + y_max) / 2]
 23 | 
 24 | 
 25 | def write_txt(
 26 |     save_path: Union[str, Path], contents: Union[List[str], str], mode: str = "w"
 27 | ) -> None:
 28 |     if not isinstance(contents, list):
 29 |         contents = [contents]
 30 | 
 31 |     with open(save_path, mode, encoding="utf-8") as f:
 32 |         for value in contents:
 33 |             f.write(f"{value}\n")
 34 | 
 35 | 
 36 | def read_img(img_path: Union[str, Path]) -> np.ndarray:
 37 |     img = cv2.imdecode(np.fromfile(str(img_path), dtype=np.uint8), 1)
 38 |     return img
 39 | 
 40 | 
 41 | def padding_img(
 42 |     img: np.ndarray,
 43 |     padding_value: Tuple[int, int, int, int],
 44 |     padding_color: Tuple[int, int, int] = (0, 0, 0),
 45 | ) -> np.ndarray:
 46 |     padded_img = cv2.copyMakeBorder(
 47 |         img,
 48 |         padding_value[0],
 49 |         padding_value[1],
 50 |         padding_value[2],
 51 |         padding_value[3],
 52 |         cv2.BORDER_CONSTANT,
 53 |         value=padding_color,
 54 |     )
 55 |     return padded_img
 56 | 
 57 | 
 58 | def mkdir(dir_path):
 59 |     Path(dir_path).mkdir(parents=True, exist_ok=True)
 60 | 
 61 | 
 62 | def read_txt(txt_path: Union[str, Path]) -> List[str]:
 63 |     if not isinstance(txt_path, str):
 64 |         txt_path = str(txt_path)
 65 | 
 66 |     with open(txt_path, "r", encoding="utf-8") as f:
 67 |         data = list(map(lambda x: x.rstrip("\n"), f))
 68 |     return data
 69 | 
 70 | 
 71 | def compute_poly_iou(a: np.ndarray, b: np.ndarray) -> float:
 72 |     """计算两个多边形的IOU
 73 | 
 74 |     Args:
 75 |         poly1 (np.ndarray): (4, 2)
 76 |         poly2 (np.ndarray): (4, 2)
 77 | 
 78 |     Returns:
 79 |         float: iou
 80 |     """
 81 |     poly1 = Polygon(a).convex_hull
 82 |     poly2 = Polygon(b).convex_hull
 83 | 
 84 |     union_poly = np.concatenate((a, b))
 85 | 
 86 |     if not poly1.intersects(poly2):
 87 |         return 0.0
 88 | 
 89 |     try:
 90 |         inter_area = poly1.intersection(poly2).area
 91 |         union_area = MultiPoint(union_poly).convex_hull.area
 92 |     except shapely.geos.TopologicalError:
 93 |         print("shapely.geos.TopologicalError occured, iou set to 0")
 94 |         return 0.0
 95 | 
 96 |     if union_area == 0:
 97 |         return 0.0
 98 | 
 99 |     return float(inter_area) / union_area
100 | 
101 | 
102 | def is_inclusive_each_other(box1: np.ndarray, box2: np.ndarray) -> bool:
103 |     """判断两个多边形框是否存在包含关系
104 | 
105 |     Args:
106 |         box1 (np.ndarray): (4, 2)
107 |         box2 (np.ndarray): (4, 2)
108 | 
109 |     Returns:
110 |         bool: 是否存在包含关系
111 |     """
112 |     poly1 = Polygon(box1)
113 |     poly2 = Polygon(box2)
114 | 
115 |     poly1_area = poly1.convex_hull.area
116 |     poly2_area = poly2.convex_hull.area
117 | 
118 |     if poly1_area > poly2_area:
119 |         box_max = box1
120 |         box_min = box2
121 |     else:
122 |         box_max = box2
123 |         box_min = box1
124 | 
125 |     x0, y0 = np.min(box_min[:, 0]), np.min(box_min[:, 1])
126 |     x1, y1 = np.max(box_min[:, 0]), np.max(box_min[:, 1])
127 | 
128 |     edge_x0, edge_y0 = np.min(box_max[:, 0]), np.min(box_max[:, 1])
129 |     edge_x1, edge_y1 = np.max(box_max[:, 0]), np.max(box_max[:, 1])
130 | 
131 |     if x0 >= edge_x0 and y0 >= edge_y0 and x1 <= edge_x1 and y1 <= edge_y1:
132 |         return True
133 |     return False
134 | 
135 | 
136 | def float_range(mini, maxi):
137 |     """Return function handle of an argument type function for
138 |     ArgumentParser checking a float range: mini <= arg <= maxi
139 |       mini - minimum acceptable argument
140 |       maxi - maximum acceptable argument"""
141 | 
142 |     # Define the function with default arguments
143 |     def float_range_checker(arg):
144 |         """New Type function for argparse - a float within predefined range."""
145 | 
146 |         try:
147 |             f = float(arg)
148 |         except ValueError as exc:
149 |             raise argparse.ArgumentTypeError("must be a floating point number") from exc
150 | 
151 |         if f < mini or f > maxi:
152 |             raise argparse.ArgumentTypeError(
153 |                 "must be in range [" + str(mini) + " .. " + str(maxi) + "]"
154 |             )
155 |         return f
156 | 
157 |     # Return function handle to checking function
158 |     return float_range_checker
159 | 


--------------------------------------------------------------------------------
/rapid_videocr/vsf_cli.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import subprocess
 5 | from dataclasses import asdict, dataclass
 6 | from typing import Optional
 7 | 
 8 | 
 9 | @dataclass
10 | class VideoSubFinderInput:
11 |     vsf_exe_path: str
12 |     clear_dirs: bool = True
13 |     run_search: bool = True
14 |     create_cleared_text_images: bool = True
15 |     create_empty_sub: Optional[str] = None
16 |     create_sub_from_cleared_txt_images: Optional[str] = None
17 |     create_sub_from_txt_results: Optional[str] = None
18 |     open_video_opencv: bool = True
19 |     open_video_ffmpeg: bool = False
20 |     use_cuda: bool = False
21 |     start_time: Optional[str] = None
22 |     end_time: Optional[str] = None
23 |     top_video_image_percent_end: float = 0.2
24 |     bottom_video_image_percent_end: float = 0.0
25 |     left_video_image_percent_end: float = 0.0
26 |     right_video_image_percent_end: float = 1.0
27 |     general_settings: Optional[str] = None
28 |     num_threads: int = 2
29 |     num_ocr_threads: int = 1
30 | 
31 | 
32 | class VideoSubFinder:
33 |     def __init__(self, input_params: VideoSubFinderInput):
34 |         param_dict = asdict(input_params)
35 |         run_list = [input_params.vsf_exe_path]
36 |         for k, v in param_dict.items():
37 |             if v is None or str(v) == "False":
38 |                 continue
39 | 
40 |             run_list.append(f"--{str(k)}" if str(v) == "True" else f"--{k} {v}")
41 |         self.run_list = run_list
42 | 
43 |     def __call__(self, video_path: str, output_dir: str) -> str:
44 |         self.run_list.extend(["--input_video", video_path, "--output_dir", output_dir])
45 |         try:
46 |             subprocess.run(self.run_list, check=False)
47 |             return output_dir
48 |         except Exception as e:
49 |             raise e
50 | 


--------------------------------------------------------------------------------
/rapid_videocr/vsf_ocr_cli.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | # @Author: SWHL
  3 | # @Contact: liekkaskono@163.com
  4 | import argparse
  5 | from enum import Enum
  6 | from pathlib import Path
  7 | 
  8 | from .main import OutputFormat, RapidVideOCR, RapidVideOCRInput
  9 | from .utils.logger import Logger
 10 | from .utils.utils import float_range
 11 | from .vsf_cli import VideoSubFinder, VideoSubFinderInput
 12 | 
 13 | 
 14 | class VideoFormat(Enum):
 15 |     MP4 = ".mp4"
 16 |     AVI = ".avi"
 17 |     MOV = ".mov"
 18 |     MKV = ".mkv"
 19 | 
 20 | 
 21 | class RapidVideoSubFinderOCR:
 22 |     def __init__(
 23 |         self,
 24 |         vsf_input_params: VideoSubFinderInput,
 25 |         ocr_input_params: RapidVideOCRInput,
 26 |     ):
 27 |         self.logger = Logger(logger_name=__name__).get_log()
 28 |         self.vsf = VideoSubFinder(vsf_input_params)
 29 |         self.video_ocr = RapidVideOCR(ocr_input_params)
 30 |         self.video_formats = [VideoFormat[v].value for v in VideoFormat.__members__]
 31 | 
 32 |     def __call__(self, video_path: str, output_dir: str = "outputs"):
 33 |         if Path(video_path).is_dir():
 34 |             video_list = Path(video_path).rglob("*.*")
 35 |             video_list = [
 36 |                 v for v in video_list if v.suffix.lower() in self.video_formats
 37 |             ]
 38 |         else:
 39 |             video_list = [video_path]
 40 | 
 41 |         self.logger.info(
 42 |             "Extracting subtitle images with VideoSubFinder (takes quite a long time) ..."
 43 |         )
 44 |         video_num = len(video_list)
 45 |         for i, one_video in enumerate(video_list):
 46 |             self.logger.info(
 47 |                 "[%s/%s] Starting to extract %s key frame", i + 1, video_num, one_video
 48 |             )
 49 | 
 50 |             save_name = Path(one_video).stem
 51 |             save_dir = Path(output_dir) / save_name
 52 |             save_vsf_dir = save_dir / "VSF_Results"
 53 | 
 54 |             try:
 55 |                 self.vsf(str(one_video), str(save_vsf_dir))
 56 |             except Exception as e:
 57 |                 self.logger.error("Extract %s error, %s, skip", one_video, e)
 58 |                 continue
 59 | 
 60 |             self.logger.info(
 61 |                 "[%s/%s] Starting to run %s ocr", i + 1, video_num, one_video
 62 |             )
 63 | 
 64 |             rgb_dir = Path(save_vsf_dir) / "RGBImages"
 65 |             if not list(rgb_dir.iterdir()):
 66 |                 self.logger.warning("Extracting frames from %s is 0, skip", one_video)
 67 |                 continue
 68 |             self.video_ocr(rgb_dir, save_dir, save_name=save_name)
 69 | 
 70 | 
 71 | def main():
 72 |     parser = argparse.ArgumentParser()
 73 | 
 74 |     videocr_param_group = parser.add_argument_group(title="VideOCRParameters")
 75 |     videocr_param_group.add_argument(
 76 |         "-video_dir",
 77 |         "--video_dir",
 78 |         type=str,
 79 |         default=None,
 80 |         help="The full path of video or the path of video directory.",
 81 |     )
 82 |     videocr_param_group.add_argument(
 83 |         "-i",
 84 |         "--img_dir",
 85 |         type=str,
 86 |         default=None,
 87 |         help="The full path of RGBImages or TXTImages.",
 88 |     )
 89 |     videocr_param_group.add_argument(
 90 |         "-s",
 91 |         "--save_dir",
 92 |         type=str,
 93 |         default="outputs",
 94 |         help='The path of saving the recognition result. Default is "outputs" under the current directory.',
 95 |     )
 96 |     videocr_param_group.add_argument(
 97 |         "-o",
 98 |         "--out_format",
 99 |         type=str,
100 |         default=OutputFormat.ALL.value,
101 |         choices=[OutputFormat[v].value for v in OutputFormat.__members__],
102 |         help='Output file format. Default is "all".',
103 |     )
104 |     videocr_param_group.add_argument(
105 |         "--is_batch_rec",
106 |         action="store_true",
107 |         default=False,
108 |         help="Which mode to run (concat recognition or single recognition). Default is False.",
109 |     )
110 |     videocr_param_group.add_argument(
111 |         "-b",
112 |         "--batch_size",
113 |         type=int,
114 |         default=10,
115 |         help="The batch of concating image nums in concat recognition mode. Default is 10.",
116 |     )
117 | 
118 |     vsf_param_group = parser.add_argument_group(title="VSFParameters")
119 |     vsf_param_group.add_argument(
120 |         "-vsf",
121 |         "--vsf_exe_path",
122 |         type=str,
123 |         default=None,
124 |         help="The full path of VideoSubFinderWXW.exe.",
125 |     )
126 |     vsf_param_group.add_argument(
127 |         "-c",
128 |         "--clear_dirs",
129 |         action="store_false",
130 |         default=True,
131 |         help="Clear Folders (remove all images), performed before any other steps. Default is True",
132 |     )
133 |     vsf_param_group.add_argument(
134 |         "-r",
135 |         "--run_search",
136 |         action="store_false",
137 |         default=True,
138 |         help="Run Search (find frames with hardcoded text (hardsub) on video) Default is True",
139 |     )
140 |     vsf_param_group.add_argument(
141 |         "-ccti",
142 |         "--create_cleared_text_images",
143 |         action="store_true",
144 |         default=False,
145 |         help="Create Cleared Text Images. Default is True",
146 |     )
147 |     vsf_param_group.add_argument(
148 |         "-ces",
149 |         "--create_empty_sub",
150 |         type=str,
151 |         default=None,
152 |         help="Create Empty Sub With Provided Output File Name (*.ass or *.srt)",
153 |     )
154 |     vsf_param_group.add_argument(
155 |         "-cscti",
156 |         "--create_sub_from_cleared_txt_images",
157 |         type=str,
158 |         default=None,
159 |         help="Create Sub From Cleared TXT Images With Provided Output File Name (*.ass or *.srt)",
160 |     )
161 |     vsf_param_group.add_argument(
162 |         "-cstxt",
163 |         "--create_sub_from_txt_results",
164 |         type=str,
165 |         default=None,
166 |         help="Create Sub From TXT Results With Provided Output File Name (*.ass or *.srt)",
167 |     )
168 |     vsf_param_group.add_argument(
169 |         "-ovocv",
170 |         "--open_video_opencv",
171 |         action="store_false",
172 |         default=True,
173 |         help="open video by OpenCV (default). Default is True",
174 |     )
175 |     vsf_param_group.add_argument(
176 |         "-ovffmpeg",
177 |         "--open_video_ffmpeg",
178 |         action="store_true",
179 |         default=False,
180 |         help="open video by FFMPEG",
181 |     )
182 |     vsf_param_group.add_argument(
183 |         "-uc", "--use_cuda", action="store_true", default=False, help="use cuda"
184 |     )
185 |     vsf_param_group.add_argument(
186 |         "--start_time",
187 |         type=str,
188 |         default="0:00:00:000",
189 |         help="start time, default = 0:00:00:000 (in format hour:min:sec:milisec)",
190 |     )
191 |     vsf_param_group.add_argument(
192 |         "--end_time",
193 |         type=str,
194 |         default=None,
195 |         help="end time, default = video length",
196 |     )
197 |     vsf_param_group.add_argument(
198 |         "-te",
199 |         "--top_video_image_percent_end",
200 |         type=float_range(0, 1.0),
201 |         default=0.2,
202 |         help="top video image percent offset from image bottom, can be in range [0.0,1.0], default = 1.0",
203 |     )
204 |     vsf_param_group.add_argument(
205 |         "-be",
206 |         "--bottom_video_image_percent_end",
207 |         type=float_range(0, 1.0),
208 |         default=0.0,
209 |         help="bottom video image percent offset from image bottom, can be in range [0.0,1.0], default = 0.0",
210 |     )
211 |     vsf_param_group.add_argument(
212 |         "-le",
213 |         "--left_video_image_percent_end",
214 |         type=float_range(0, 1.0),
215 |         default=0.0,
216 |         help="left video image percent end, can be in range [0.0,1.0], default = 0.0",
217 |     )
218 |     vsf_param_group.add_argument(
219 |         "-re",
220 |         "--right_video_image_percent_end",
221 |         type=float_range(0, 1.0),
222 |         default=1.0,
223 |         help="right video image percent end, can be in range [0.0,1.0], default = 1.0",
224 |     )
225 |     vsf_param_group.add_argument(
226 |         "-gs",
227 |         "--general_settings",
228 |         default=None,
229 |         help="general settings (path to general settings *.cfg file, default = settings/general.cfg)",
230 |     )
231 |     vsf_param_group.add_argument(
232 |         "-nthr",
233 |         "--num_threads",
234 |         type=int,
235 |         default=1,
236 |         help="number of threads used for Run Search",
237 |     )
238 |     vsf_param_group.add_argument(
239 |         "-nocrthr",
240 |         "--num_ocr_threads",
241 |         type=int,
242 |         default=1,
243 |         help="number of threads used for Create Cleared TXT Images",
244 |     )
245 |     args = parser.parse_args()
246 | 
247 |     ocr_input_params = RapidVideOCRInput(
248 |         is_batch_rec=args.is_batch_rec,
249 |         batch_size=args.batch_size,
250 |         out_format=args.out_format,
251 |     )
252 | 
253 |     if args.vsf_exe_path and args.video_dir:
254 |         vsf_input_params = VideoSubFinderInput(**vars(args))
255 |         extractor = RapidVideoSubFinderOCR(vsf_input_params, ocr_input_params)
256 |         extractor(args.video_dir, args.save_dir)
257 |     elif args.img_dir:
258 |         extractor = RapidVideOCR(ocr_input_params)
259 |         extractor(args.img_dir, args.save_dir)
260 |     else:
261 |         pass
262 | 
263 | 
264 | if __name__ == "__main__":
265 |     main()
266 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tqdm
2 | rapidocr
3 | onnxruntime
4 | colorlog
5 | tqdm


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import sys
 5 | from pathlib import Path
 6 | from typing import List
 7 | 
 8 | import setuptools
 9 | from get_pypi_latest_version import GetPyPiLatestVersion
10 | 
11 | 
12 | def read_txt(txt_path: str) -> List:
13 |     if not isinstance(txt_path, str):
14 |         txt_path = str(txt_path)
15 | 
16 |     with open(txt_path, "r", encoding="utf-8") as f:
17 |         data = list(map(lambda x: x.rstrip("\n"), f))
18 |     return data
19 | 
20 | 
21 | def get_readme() -> str:
22 |     root_dir = Path(__file__).resolve().parent
23 |     readme_path = str(root_dir / "docs" / "doc_whl.md")
24 |     with open(readme_path, "r", encoding="utf-8") as f:
25 |         readme = f.read()
26 |     return readme
27 | 
28 | 
29 | MODULE_NAME = "rapid_videocr"
30 | 
31 | obtainer = GetPyPiLatestVersion()
32 | latest_version = obtainer(MODULE_NAME)
33 | VERSION_NUM = obtainer.version_add_one(latest_version)
34 | 
35 | # 优先提取commit message中的语义化版本号，如无，则自动加1
36 | if len(sys.argv) > 2:
37 |     match_str = " ".join(sys.argv[2:])
38 |     matched_versions = obtainer.extract_version(match_str)
39 |     if matched_versions:
40 |         VERSION_NUM = matched_versions
41 | sys.argv = sys.argv[:2]
42 | 
43 | setuptools.setup(
44 |     name=MODULE_NAME,
45 |     version=VERSION_NUM,
46 |     platforms="Any",
47 |     description="Tool for extracting hard subtitles from videos.",
48 |     long_description=get_readme(),
49 |     long_description_content_type="text/markdown",
50 |     author="SWHL",
51 |     author_email="liekkaskono@163.com",
52 |     url="https://github.com/SWHL/RapidVideOCR.git",
53 |     license="Apache-2.0",
54 |     include_package_data=True,
55 |     install_requires=read_txt("requirements.txt"),
56 |     packages=setuptools.find_packages(),
57 |     keywords=["rapidocr,videocr,subtitle"],
58 |     classifiers=[
59 |         "Programming Language :: Python :: 3.6",
60 |         "Programming Language :: Python :: 3.7",
61 |         "Programming Language :: Python :: 3.8",
62 |         "Programming Language :: Python :: 3.9",
63 |         "Programming Language :: Python :: 3.10",
64 |         "Programming Language :: Python :: 3.11",
65 |         "Programming Language :: Python :: 3.12",
66 |         "Programming Language :: Python :: 3.13",
67 |     ],
68 |     python_requires=">=3.6",
69 |     entry_points={
70 |         "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"],
71 |     },
72 | )
73 | 


--------------------------------------------------------------------------------
/tests/test_files/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/2.mp4


--------------------------------------------------------------------------------
/tests/test_files/RGBImages/0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/RGBImages/0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg


--------------------------------------------------------------------------------
/tests/test_files/RGBImages/0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/RGBImages/0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg


--------------------------------------------------------------------------------
/tests/test_files/RGBImages/0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/RGBImages/0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg


--------------------------------------------------------------------------------
/tests/test_files/RGBImages/0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/RGBImages/0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg


--------------------------------------------------------------------------------
/tests/test_files/TXTImages/0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/TXTImages/0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg


--------------------------------------------------------------------------------
/tests/test_files/TXTImages/0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/TXTImages/0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg


--------------------------------------------------------------------------------
/tests/test_files/TXTImages/0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/TXTImages/0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg


--------------------------------------------------------------------------------
/tests/test_files/TXTImages/0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/TXTImages/0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg


--------------------------------------------------------------------------------
/tests/test_main.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | # @Author: SWHL
  3 | # @Contact: liekkaskono@163.com
  4 | import shutil
  5 | import sys
  6 | from pathlib import Path
  7 | 
  8 | import pytest
  9 | 
 10 | cur_dir = Path(__file__).resolve().parent
 11 | root_dir = cur_dir.parent
 12 | 
 13 | sys.path.append(str(root_dir))
 14 | 
 15 | from rapid_videocr import RapidVideOCR, RapidVideOCRExeception, RapidVideOCRInput
 16 | from rapid_videocr.utils.utils import mkdir, read_txt
 17 | 
 18 | test_dir = cur_dir / "test_files"
 19 | 
 20 | 
 21 | @pytest.fixture
 22 | def setup_and_teardown():
 23 |     save_dir = test_dir / "tmp"
 24 |     mkdir(save_dir)
 25 | 
 26 |     srt_path = save_dir / "result.srt"
 27 |     ass_path = save_dir / "result.ass"
 28 |     txt_path = save_dir / "result.txt"
 29 | 
 30 |     yield save_dir, srt_path, ass_path, txt_path
 31 | 
 32 |     shutil.rmtree(save_dir)
 33 | 
 34 | 
 35 | @pytest.mark.parametrize(
 36 |     "img_dir",
 37 |     [test_dir / "RGBImages", test_dir / "TXTImages"],
 38 | )
 39 | def test_single_rec(setup_and_teardown, img_dir):
 40 |     save_dir, srt_path, ass_path, txt_path = setup_and_teardown
 41 | 
 42 |     extractor = RapidVideOCR(RapidVideOCRInput())
 43 |     extractor(img_dir, save_dir)
 44 | 
 45 |     srt_data = read_txt(srt_path)
 46 |     assert len(srt_data) == 16
 47 |     assert srt_data[2] == "空间里面他绝对赢不了的"
 48 |     assert srt_data[-2] == "你们接着善后"
 49 | 
 50 |     ass_data = read_txt(ass_path)
 51 |     assert len(ass_data) == 17
 52 |     assert ass_data[13].split(",", 9)[-1] == "空间里面他绝对赢不了的"
 53 |     assert ass_data[-1].split(",", 9)[-1] == "你们接着善后"
 54 | 
 55 |     txt_data = read_txt(txt_path)
 56 |     assert len(txt_data) == 8
 57 |     assert txt_data[-2] == "你们接着善后"
 58 | 
 59 | 
 60 | @pytest.mark.parametrize("img_dir", [test_dir / "RGBImages"])
 61 | def test_concat_rec(setup_and_teardown, img_dir):
 62 |     save_dir, srt_path, ass_path, txt_path = setup_and_teardown
 63 | 
 64 |     input_param = RapidVideOCRInput(is_batch_rec=True)
 65 |     extractor = RapidVideOCR(input_param)
 66 |     extractor(img_dir, save_dir)
 67 | 
 68 |     srt_data = read_txt(srt_path)
 69 |     assert len(srt_data) == 16
 70 |     assert srt_data[2] == "空间里面他绝对赢不了的"
 71 |     assert srt_data[-2] == "你们接着善后"
 72 | 
 73 |     ass_data = read_txt(ass_path)
 74 |     assert len(ass_data) == 17
 75 |     assert ass_data[13].split(",", 9)[-1] == "空间里面他绝对赢不了的"
 76 |     assert ass_data[-1].split(",", 9)[-1] == "你们接着善后"
 77 | 
 78 |     txt_data = read_txt(txt_path)
 79 |     assert len(txt_data) == 8
 80 |     assert txt_data[-2] == "你们接着善后"
 81 | 
 82 | 
 83 | @pytest.mark.parametrize(
 84 |     "img_dir",
 85 |     [test_dir / "RGBImage", test_dir / "TXTImage"],
 86 | )
 87 | def test_empty_dir(img_dir):
 88 |     extractor = RapidVideOCR(RapidVideOCRInput())
 89 |     mkdir(img_dir)
 90 | 
 91 |     with pytest.raises(RapidVideOCRExeception) as exc_info:
 92 |         extractor(img_dir, test_dir)
 93 |     assert exc_info.type is RapidVideOCRExeception
 94 | 
 95 |     shutil.rmtree(img_dir)
 96 | 
 97 | 
 98 | @pytest.mark.parametrize(
 99 |     "img_dir",
100 |     [test_dir / "RGBImage", test_dir / "TXTImage"],
101 | )
102 | def test_nothing_dir(img_dir):
103 |     extractor = RapidVideOCR(RapidVideOCRInput())
104 |     mkdir(img_dir)
105 |     with pytest.raises(RapidVideOCRExeception) as exc_info:
106 |         extractor(img_dir, test_dir)
107 |     assert exc_info.type is RapidVideOCRExeception
108 | 
109 |     shutil.rmtree(img_dir)
110 | 
111 | 
112 | def test_out_only_srt(setup_and_teardown):
113 |     save_dir, srt_path, ass_path, txt_path = setup_and_teardown
114 | 
115 |     img_dir = test_dir / "RGBImages"
116 |     input_param = RapidVideOCRInput(is_batch_rec=True, out_format="srt")
117 |     extractor = RapidVideOCR(input_param)
118 |     extractor(img_dir, save_dir)
119 | 
120 |     srt_data = read_txt(srt_path)
121 |     assert len(srt_data) == 16
122 |     assert srt_data[2] == "空间里面他绝对赢不了的"
123 |     assert srt_data[-2] == "你们接着善后"
124 | 
125 | 
126 | def test_out_only_ass(setup_and_teardown):
127 |     save_dir, srt_path, ass_path, txt_path = setup_and_teardown
128 | 
129 |     img_dir = test_dir / "RGBImages"
130 |     input_param = RapidVideOCRInput(is_batch_rec=True, out_format="ass")
131 |     extractor = RapidVideOCR(input_param)
132 |     extractor(img_dir, save_dir)
133 | 
134 |     ass_data = read_txt(ass_path)
135 |     assert len(ass_data) == 17
136 |     assert ass_data[13].split(",", 9)[-1] == "空间里面他绝对赢不了的"
137 |     assert ass_data[-1].split(",", 9)[-1] == "你们接着善后"
138 | 
139 | 
140 | def test_out_only_txt(setup_and_teardown):
141 |     save_dir, srt_path, ass_path, txt_path = setup_and_teardown
142 | 
143 |     img_dir = test_dir / "RGBImages"
144 |     input_param = RapidVideOCRInput(is_batch_rec=True, out_format="txt")
145 |     extractor = RapidVideOCR(input_param)
146 |     extractor(img_dir, save_dir)
147 | 
148 |     txt_data = read_txt(txt_path)
149 |     assert len(txt_data) == 8
150 |     assert txt_data[-2] == "你们接着善后"
151 | 


--------------------------------------------------------------------------------