├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug.md │ └── config.yml └── workflows │ ├── AutoPushToPypi.yml │ └── SyncToGitee.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── assets ├── RapidVideOCRDemo.ipynb ├── colab-badge.svg └── logo.png ├── cliff.toml ├── demo.py ├── docs ├── README_zh.md └── doc_whl.md ├── rapid_videocr ├── __init__.py ├── export.py ├── main.py ├── ocr_processor.py ├── utils │ ├── __init__.py │ ├── crop_by_project.py │ ├── logger.py │ └── utils.py ├── vsf_cli.py └── vsf_ocr_cli.py ├── requirements.txt ├── setup.py └── tests ├── test_files ├── 2.mp4 ├── RGBImages │ ├── 0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg │ ├── 0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg │ ├── 0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg │ └── 0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg └── TXTImages │ ├── 0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg │ ├── 0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg │ ├── 0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg │ └── 0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg └── test_main.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: https://raw.githubusercontent.com/RapidAI/.github/6db6b6b9273f3151094a462a61fbc8e88564562c/assets/Sponsor.png 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🐞 Bug 3 | about: Bug 4 | title: 'Bug' 5 | labels: 'Bug' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 请提供下述完整信息以便快速定位问题 11 | (Please provide the following information to quickly locate the problem) 12 | - **系统环境/System Environment**: 13 | - **使用的是哪门语言的程序/Which programing language**: 14 | - **所使用语言相关版本信息/Version**: 15 | - **OnnxRuntime版本/OnnxRuntime Version**: 16 | - **使用当前库的版本/Use version**: 17 | - **可复现问题的demo和文件/Demo of reproducible problems**: 18 | - **完整报错/Complete Error Message**: 19 | - **可能的解决方案/Possible solutions**: -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: ❓ Questions 4 | url: https://github.com/SWHL/RapidVideOCR/discussions/categories/q-a 5 | about: Please use the community forum for help and questions regarding RapidVideOCR. 6 | - name: 💡 Feature requests and ideas 7 | url: https://github.com/SWHL/RapidVideOCR/discussions/categories/ideas 8 | about: Please vote for and post new feature ideas in the community forum. 9 | - name: 📖 Documentation 10 | url: https://swhl.github.io/RapidVideOCR/docs 11 | about: A great place to find instructions and answers about RapidVideOCR. 12 | -------------------------------------------------------------------------------- /.github/workflows/AutoPushToPypi.yml: -------------------------------------------------------------------------------- 1 | name: Push rapid_videocr to pypi 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | 8 | jobs: 9 | UnitTesting: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Pull latest code 13 | uses: actions/checkout@v4 14 | 15 | - name: Set up Python 3.10 16 | uses: actions/setup-python@v4 17 | with: 18 | python-version: '3.10' 19 | architecture: 'x64' 20 | 21 | - name: Unit testings 22 | run: | 23 | pip install -r requirements.txt --break-system-packages 24 | pip install pytest six --break-system-packages 25 | pytest -s tests/test_*.py 26 | 27 | GenerateWHL_PushPyPi: 28 | needs: UnitTesting 29 | runs-on: ubuntu-latest 30 | 31 | steps: 32 | - uses: actions/checkout@v4 33 | 34 | - name: Run setup.py 35 | run: | 36 | pip install wheel get_pypi_latest_version --break-system-packages 37 | 38 | python -m pip install --upgrade pip --break-system-packages 39 | python setup.py bdist_wheel ${{ github.ref_name }} 40 | 41 | - name: Publish distribution 📦 to PyPI 42 | uses: pypa/gh-action-pypi-publish@v1.5.0 43 | with: 44 | password: ${{ secrets.PYPI_API_TOKEN }} 45 | packages_dir: dist/ 46 | -------------------------------------------------------------------------------- /.github/workflows/SyncToGitee.yml: -------------------------------------------------------------------------------- 1 | name: syncToGitee 2 | on: 3 | push: 4 | branches: 5 | - '**' 6 | jobs: 7 | repo-sync: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout source codes 11 | uses: actions/checkout@v2 12 | 13 | - name: Mirror the Github organization repos to Gitee. 14 | uses: Yikun/hub-mirror-action@master 15 | with: 16 | src: 'github/SWHL' 17 | dst: 'gitee/SWHL' 18 | dst_key: ${{ secrets.GITEE_PRIVATE_KEY }} 19 | dst_token: ${{ secrets.GITEE_TOKEN }} 20 | force_update: true 21 | # only sync this repo 22 | static_list: "RapidVideOCR" 23 | debug: true 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | rapid_videocr/video_sub_finder/libs 2 | *.vscode 3 | outputs 4 | 5 | *.pyc 6 | 7 | *.onnx 8 | 9 | temp/ 10 | test_files/ 11 | 12 | .DS_Store 13 | 14 | *.bin 15 | 16 | .mypy_cache 17 | 18 | # Created by .ignore support plugin (hsz.mobi) 19 | ### Python template 20 | # Byte-compiled / optimized / DLL files 21 | __pycache__/ 22 | *.py[cod] 23 | *$py.class 24 | .pytest_cache 25 | 26 | # C extensions 27 | *.so 28 | 29 | # Distribution / packaging 30 | .Python 31 | build/ 32 | develop-eggs/ 33 | dist/ 34 | downloads/ 35 | eggs/ 36 | .eggs/ 37 | lib/ 38 | lib64/ 39 | parts/ 40 | sdist/ 41 | var/ 42 | wheels/ 43 | pip-wheel-metadata/ 44 | share/python-wheels/ 45 | *.egg-info/ 46 | .installed.cfg 47 | *.egg 48 | MANIFEST 49 | 50 | # PyInstaller 51 | # Usually these files are written by a python script from a template 52 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 53 | # *.manifest 54 | # *.spec 55 | *.res 56 | 57 | # Installer logs 58 | pip-log.txt 59 | pip-delete-this-directory.txt 60 | 61 | # Unit test / coverage reports 62 | htmlcov/ 63 | .tox/ 64 | .nox/ 65 | .coverage 66 | .coverage.* 67 | .cache 68 | nosetests.xml 69 | coverage.xml 70 | *.cover 71 | *.py,cover 72 | .hypothesis/ 73 | .pytest_cache/ 74 | 75 | # Translations 76 | *.mo 77 | *.pot 78 | 79 | # Django stuff: 80 | *.log 81 | local_settings.py 82 | db.sqlite3 83 | db.sqlite3-journal 84 | 85 | # Flask stuff: 86 | instance/ 87 | .webassets-cache 88 | 89 | # Scrapy stuff: 90 | .scrapy 91 | 92 | # Sphinx documentation 93 | docs/_build/ 94 | 95 | # PyBuilder 96 | target/ 97 | 98 | # Jupyter Notebook 99 | .ipynb_checkpoints 100 | 101 | # IPython 102 | profile_default/ 103 | ipython_config.py 104 | 105 | # pyenv 106 | .python-version 107 | 108 | # pipenv 109 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 110 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 111 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 112 | # install all needed dependencies. 113 | #Pipfile.lock 114 | 115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 116 | __pypackages__/ 117 | 118 | # Celery stuff 119 | celerybeat-schedule 120 | celerybeat.pid 121 | 122 | # SageMath parsed files 123 | *.sage.py 124 | 125 | # Environments 126 | .env 127 | .venv 128 | env/ 129 | venv/ 130 | ENV/ 131 | env.bak/ 132 | venv.bak/ 133 | 134 | # Spyder project settings 135 | .spyderproject 136 | .spyproject 137 | 138 | # Rope project settings 139 | .ropeproject 140 | 141 | # mkdocs documentation 142 | /site 143 | 144 | # mypy 145 | .mypy_cache/ 146 | .dmypy.json 147 | dmypy.json 148 | 149 | # Pyre type checker 150 | .pyre/ 151 | 152 | #idea 153 | .vs 154 | .vscode 155 | .idea 156 | /images 157 | /models 158 | 159 | #models 160 | *.onnx 161 | 162 | *.ttf 163 | *.ttc 164 | 165 | long1.jpg 166 | 167 | *.bin 168 | *.mapping 169 | *.xml 170 | 171 | *.pdiparams 172 | *.pdiparams.info 173 | *.pdmodel 174 | 175 | .DS_Store -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/myint/autoflake 3 | rev: v2.1.1 4 | hooks: 5 | - id: autoflake 6 | args: 7 | [ 8 | "--recursive", 9 | "--in-place", 10 | "--remove-all-unused-imports", 11 | "--remove-unused-variable", 12 | "--ignore-init-module-imports", 13 | ] 14 | - repo: https://github.com/psf/black 15 | rev: 23.1.0 16 | hooks: 17 | - id: black 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
 
5 | 6 | 7 | 8 | 9 | 10 | PyPI 11 | 12 | 13 | SemVer2.0 14 | 15 | 16 | [简体中文](https://github.com/SWHL/RapidVideOCR/blob/main/docs/README_zh.md) | English 17 |
18 | 19 | ### Introduction 20 | 21 | - Video hard subtitle extraction, automatically generate the corresponding `srt | ass | txt` file. 22 | - Supported subtitle languages: Chinese | English (For other supported languages, see: [List of supported languages](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99)) 23 | - The advantages are as follows: 24 | - **Faster extraction**: Used in conjunction with [VideoSubFinder](https://sourceforge.net/projects/videosubfinder/) software to extract key subtitle frames faster. 25 | - **More accurate recognition**: Use [RapidOCR](https://github.com/RapidAI/RapidOCR) as the recognition library. 26 | - **More convenient to use**: pip can be installed directly and used. 27 | 28 | - For desktop EXE version, please go to [RapidVideOCRDesktop](https://github.com/SWHL/RapidVideOCRDesktop). 29 | - If it helps you, please give a star ⭐. 30 | 31 | ### [Online Demo](https://huggingface.co/spaces/SWHL/RapidVideOCR) 32 | 33 |
34 | Demo 35 |
36 | 37 | ### Overall framework 38 | 39 | ```mermaid 40 | flowchart LR 41 | A[/Video/] --Extract subtitle key frame--> B(VideoSubFinder) --OCR-->C(RapidVideOCR) 42 | C --Convert--> D[/"SRT | ASS | TXT"/] 43 | ``` 44 | 45 | ### Installation 46 | 47 | ```bash 48 | pip install rapid_videocr 49 | ``` 50 | 51 | ### Usage 52 | 53 | > [!NOTE] 54 | > 55 | > The input image path of `rapid_videocr` must be the path of **RGBImages** or **TXTImages** output by **VideoSubFinder** software. 56 | 57 | ```bash 58 | rapid_videocr -i RGBImages 59 | ``` 60 | 61 | ### Documentation 62 | 63 | Full documentation can be found on [docs](https://swhl.github.io/RapidVideOCR/docs), in Chinese. 64 | 65 | ### Code Contributors 66 | 67 |

68 | 69 | 70 | 71 |

72 | 73 | ### Contributing 74 | 75 | - Pull requests are welcome. For major changes, please open an issue first 76 | to discuss what you would like to change. 77 | - Please make sure to update tests as appropriate. 78 | 79 | ### [Sponsor](https://swhl.github.io/RapidVideOCR/docs/sponsor/) 80 | 81 | If you want to sponsor the project, you can directly click the **Buy me a coffee** image, please write a note (e.g. your github account name) to facilitate adding to the sponsorship list below. 82 | 83 |
84 | 85 |
86 | 87 | ### License 88 | 89 | This project is released under the [Apache 2.0 license](./LICENSE). 90 | -------------------------------------------------------------------------------- /assets/RapidVideOCRDemo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "YBjLERcEsTES" 7 | }, 8 | "source": [ 9 | "## [RapidVideOCR Demo](https://github.com/SWHL/RapidVideOCR)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "IPBSdGqbjNpc" 16 | }, 17 | "source": [ 18 | "#### Require:\n", 19 | "- The RGBImages of [Video](https://www.youtube.com/watch?v=Z2Bg_usMYiA) from the VideoSubFinder software.\n", 20 | "- Install the RapidVideOCR" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "id": "IT1t_86aq4QU" 27 | }, 28 | "source": [ 29 | "#### Download the RGBImages.zip and unzip it." 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 33, 35 | "metadata": { 36 | "colab": { 37 | "base_uri": "https://localhost:8080/" 38 | }, 39 | "id": "qWiWiKJWjcH1", 40 | "outputId": "9b5c8098-061a-4f85-b7a8-822e1f26b166" 41 | }, 42 | "outputs": [ 43 | { 44 | "name": "stdout", 45 | "output_type": "stream", 46 | "text": [ 47 | "--2023-04-09 01:47:40-- https://github.com/SWHL/RapidVideOCR/files/11184614/RGBImages.zip\n", 48 | "Resolving github.com (github.com)... 140.82.112.4\n", 49 | "Connecting to github.com (github.com)|140.82.112.4|:443... connected.\n", 50 | "HTTP request sent, awaiting response... 302 Found\n", 51 | "Location: https://objects.githubusercontent.com/github-production-repository-file-5c1aeb/405589029/11184614?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20230409%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230409T014740Z&X-Amz-Expires=300&X-Amz-Signature=fa9c0cb7ec18b1113504c94f60ed8bd6c8250cd040d056396c0dc6caf5184dea&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=405589029&response-content-disposition=attachment%3Bfilename%3DRGBImages.zip&response-content-type=application%2Fx-zip-compressed [following]\n", 52 | "--2023-04-09 01:47:40-- https://objects.githubusercontent.com/github-production-repository-file-5c1aeb/405589029/11184614?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20230409%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230409T014740Z&X-Amz-Expires=300&X-Amz-Signature=fa9c0cb7ec18b1113504c94f60ed8bd6c8250cd040d056396c0dc6caf5184dea&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=405589029&response-content-disposition=attachment%3Bfilename%3DRGBImages.zip&response-content-type=application%2Fx-zip-compressed\n", 53 | "Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", 54 | "Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.108.133|:443... connected.\n", 55 | "HTTP request sent, awaiting response... 200 OK\n", 56 | "Length: 8314498 (7.9M) [application/x-zip-compressed]\n", 57 | "Saving to: ‘RGBImages.zip’\n", 58 | "\n", 59 | "RGBImages.zip 100%[===================>] 7.93M 43.3MB/s in 0.2s \n", 60 | "\n", 61 | "2023-04-09 01:47:40 (43.3 MB/s) - ‘RGBImages.zip’ saved [8314498/8314498]\n", 62 | "\n", 63 | "Archive: RGBImages.zip\n", 64 | " creating: RGBImages/\n", 65 | " inflating: RGBImages/0_00_20_640__0_00_23_999_0055800000012800072001280.jpeg \n", 66 | " inflating: RGBImages/0_00_25_120__0_00_25_999_0055800000012800072001280.jpeg \n", 67 | " inflating: RGBImages/0_00_26_000__0_00_26_599_0055800000012800072001280.jpeg \n", 68 | " inflating: RGBImages/0_00_27_760__0_00_28_999_0055800000012800072001280.jpeg \n", 69 | " inflating: RGBImages/0_00_30_280__0_00_30_599_0055800000012800072001280.jpeg \n", 70 | " inflating: RGBImages/0_00_30_600__0_00_32_199_0055800000012800072001280.jpeg \n", 71 | " inflating: RGBImages/0_00_32_800__0_00_33_199_0055800000012800072001280.jpeg \n", 72 | " inflating: RGBImages/0_00_33_200__0_00_34_959_0055800000012800072001280.jpeg \n", 73 | " inflating: RGBImages/0_00_34_960__0_00_35_519_0055800000012800072001280.jpeg \n", 74 | " inflating: RGBImages/0_00_39_040__0_00_39_479_0055800000012800072001280.jpeg \n", 75 | " inflating: RGBImages/0_00_40_040__0_00_41_679_0055800000012800072001280.jpeg \n", 76 | " inflating: RGBImages/0_00_41_680__0_00_42_919_0055800000012800072001280.jpeg \n", 77 | " inflating: RGBImages/0_00_42_920__0_00_43_439_0055800000012800072001280.jpeg \n", 78 | " inflating: RGBImages/0_00_43_440__0_00_43_799_0055800000012800072001280.jpeg \n", 79 | " inflating: RGBImages/0_00_44_920__0_00_45_359_0055800000012800072001280.jpeg \n", 80 | " inflating: RGBImages/0_00_45_360__0_00_47_799_0055800000012800072001280.jpeg \n", 81 | " inflating: RGBImages/0_00_47_800__0_00_48_159_0055800000012800072001280.jpeg \n", 82 | " inflating: RGBImages/0_00_48_160__0_00_48_559_0055800000012800072001280.jpeg \n", 83 | " inflating: RGBImages/0_00_50_520__0_00_54_079_0055800000012800072001280.jpeg \n", 84 | " inflating: RGBImages/0_00_54_080__0_00_55_799_0055800000012800072001280.jpeg \n", 85 | " inflating: RGBImages/0_00_56_000__0_00_56_359_0055800000012800072001280.jpeg \n", 86 | " inflating: RGBImages/0_00_57_680__0_00_57_999_0055800000012800072001280.jpeg \n", 87 | " inflating: RGBImages/0_01_00_120__0_01_00_759_0055800000012800072001280.jpeg \n", 88 | " inflating: RGBImages/0_01_02_160__0_01_02_919_0055800000012800072001280.jpeg \n", 89 | " inflating: RGBImages/0_01_04_320__0_01_06_759_0055800000012800072001280.jpeg \n", 90 | " inflating: RGBImages/0_01_08_120__0_01_08_679_0055800000012800072001280.jpeg \n", 91 | " inflating: RGBImages/0_01_08_680__0_01_13_119_0055800000012800072001280.jpeg \n", 92 | " inflating: RGBImages/0_01_13_120__0_01_13_799_0055800000012800072001280.jpeg \n", 93 | " inflating: RGBImages/0_01_13_800__0_01_16_079_0055800000012800072001280.jpeg \n", 94 | " inflating: RGBImages/0_01_16_080__0_01_17_039_0055800000012800072001280.jpeg \n", 95 | " inflating: RGBImages/0_01_19_320__0_01_20_359_0055800000012800072001280.jpeg \n", 96 | " inflating: RGBImages/0_01_20_360__0_01_21_919_0055800000012800072001280.jpeg \n", 97 | " inflating: RGBImages/0_01_23_120__0_01_23_559_0055800000012800072001280.jpeg \n", 98 | " inflating: RGBImages/0_01_23_560__0_01_24_959_0055800000012800072001280.jpeg \n", 99 | " inflating: RGBImages/0_01_24_960__0_01_25_559_0055800000012800072001280.jpeg \n", 100 | " inflating: RGBImages/0_01_25_560__0_01_26_159_0055800000012800072001280.jpeg \n", 101 | " inflating: RGBImages/0_01_27_560__0_01_27_919_0055800000012800072001280.jpeg \n", 102 | " inflating: RGBImages/0_01_27_920__0_01_30_439_0055800000012800072001280.jpeg \n", 103 | " inflating: RGBImages/0_01_30_440__0_01_31_119_0055800000012800072001280.jpeg \n", 104 | " inflating: RGBImages/0_01_31_120__0_01_31_599_0055800000012800072001280.jpeg \n", 105 | " inflating: RGBImages/0_01_31_600__0_01_32_119_0055800000012800072001280.jpeg \n", 106 | " inflating: RGBImages/0_01_33_040__0_01_34_639_0055800000012800072001280.jpeg \n", 107 | " inflating: RGBImages/0_01_34_640__0_01_38_439_0055800000012800072001280.jpeg \n", 108 | " inflating: RGBImages/0_01_38_440__0_01_38_839_0055800000012800072001280.jpeg \n", 109 | " inflating: RGBImages/0_01_39_960__0_01_40_279_0055800000012800072001280.jpeg \n", 110 | " inflating: RGBImages/0_01_40_280__0_01_40_879_0055800000012800072001280.jpeg \n", 111 | " inflating: RGBImages/0_01_47_920__0_01_48_559_0055800000012800072001280.jpeg \n", 112 | " inflating: RGBImages/0_01_48_560__0_01_50_679_0055800000012800072001280.jpeg \n", 113 | " inflating: RGBImages/0_01_50_920__0_01_51_319_0055800000012800072001280.jpeg \n", 114 | " inflating: RGBImages/0_01_52_520__0_01_53_359_0055800000012800072001280.jpeg \n", 115 | " inflating: RGBImages/0_01_53_360__0_01_53_999_0055800000012800072001280.jpeg \n", 116 | " inflating: RGBImages/0_01_54_000__0_01_56_159_0055800000012800072001280.jpeg \n", 117 | " inflating: RGBImages/0_01_56_160__0_01_56_959_0055800000012800072001280.jpeg \n", 118 | " inflating: RGBImages/0_01_58_040__0_01_58_399_0055800000012800072001280.jpeg \n", 119 | " inflating: RGBImages/0_01_58_400__0_01_59_639_0055800000012800072001280.jpeg \n", 120 | " inflating: RGBImages/0_01_59_640__0_02_00_479_0055800000012800072001280.jpeg \n", 121 | " inflating: RGBImages/0_02_00_480__0_02_01_039_0055800000012800072001280.jpeg \n", 122 | " inflating: RGBImages/0_02_02_240__0_02_02_799_0055800000012800072001280.jpeg \n", 123 | " inflating: RGBImages/0_02_02_800__0_02_04_039_0055800000012800072001280.jpeg \n", 124 | " inflating: RGBImages/0_02_08_000__0_02_09_038_0055800000012800072001280.jpeg \n", 125 | " inflating: RGBImages/0_02_09_039__0_02_10_198_0055800000012800072001280.jpeg \n", 126 | " inflating: RGBImages/0_02_11_720__0_02_13_119_0055800000012800072001280.jpeg \n", 127 | " inflating: RGBImages/0_02_13_280__0_02_13_799_0055800000012800072001280.jpeg \n", 128 | " inflating: RGBImages/0_02_13_800__0_02_14_719_0055800000012800072001280.jpeg \n", 129 | " inflating: RGBImages/0_02_14_720__0_02_15_239_0055800000012800072001280.jpeg \n", 130 | " inflating: RGBImages/0_02_15_240__0_02_15_839_0055800000012800072001280.jpeg \n", 131 | " inflating: RGBImages/0_02_17_640__0_02_21_719_0055800000012800072001280.jpeg \n", 132 | " inflating: RGBImages/0_02_21_720__0_02_22_639_0055800000012800072001280.jpeg \n", 133 | " inflating: RGBImages/0_02_26_640__0_02_27_239_0055800000012800072001280.jpeg \n", 134 | " inflating: RGBImages/0_02_27_240__0_02_27_879_0055800000012800072001280.jpeg \n", 135 | " inflating: RGBImages/0_02_27_920__0_02_28_479_0055800000012800072001280.jpeg \n", 136 | " inflating: RGBImages/0_02_29_360__0_02_30_119_0055800000012800072001280.jpeg \n", 137 | " inflating: RGBImages/0_02_30_240__0_02_30_639_0055800000012800072001280.jpeg \n", 138 | " inflating: RGBImages/0_02_31_200__0_02_31_599_0055800000012800072001280.jpeg \n", 139 | " inflating: RGBImages/0_02_31_600__0_02_32_559_0055800000012800072001280.jpeg \n", 140 | " inflating: RGBImages/0_02_32_560__0_02_33_439_0055800000012800072001280.jpeg \n", 141 | " inflating: RGBImages/0_02_33_440__0_02_34_079_0055800000012800072001280.jpeg \n", 142 | " inflating: RGBImages/0_02_35_520__0_02_37_159_0055800000012800072001280.jpeg \n", 143 | " inflating: RGBImages/0_02_37_160__0_02_41_959_0055800000012800072001280.jpeg \n", 144 | " inflating: RGBImages/0_02_46_440__0_02_47_039_0055800000012800072001280.jpeg \n", 145 | " inflating: RGBImages/0_02_47_040__0_02_48_199_0055800000012800072001280.jpeg \n", 146 | " inflating: RGBImages/0_02_50_520__0_02_50_879_0055800000012800072001280.jpeg \n", 147 | " inflating: RGBImages/0_02_50_880__0_02_53_279_0055800000012800072001280.jpeg \n", 148 | " inflating: RGBImages/0_02_54_840__0_02_56_679_0055800000012800072001280.jpeg \n", 149 | " inflating: RGBImages/0_02_56_680__0_02_57_519_0055800000012800072001280.jpeg \n", 150 | " inflating: RGBImages/0_02_57_520__0_02_57_999_0055800000012800072001280.jpeg \n", 151 | " inflating: RGBImages/0_03_00_360__0_03_00_919_0055800000012800072001280.jpeg \n", 152 | " inflating: RGBImages/0_03_00_920__0_03_01_519_0055800000012800072001280.jpeg \n", 153 | " inflating: RGBImages/0_03_01_560__0_03_04_599_0055800000012800072001280.jpeg \n", 154 | " inflating: RGBImages/0_03_04_600__0_03_05_879_0055800000012800072001280.jpeg \n", 155 | " inflating: RGBImages/0_03_05_880__0_03_06_759_0055800000012800072001280.jpeg \n", 156 | " inflating: RGBImages/0_03_10_160__0_03_10_559_0055800000012800072001280.jpeg \n", 157 | " inflating: RGBImages/0_03_11_680__0_03_11_999_0055800000012800072001280.jpeg \n", 158 | " inflating: RGBImages/0_03_12_040__0_03_12_399_0055800000012800072001280.jpeg \n", 159 | " inflating: RGBImages/0_03_12_400__0_03_12_919_0055800000012800072001280.jpeg \n", 160 | " inflating: RGBImages/0_03_12_920__0_03_13_239_0055800000012800072001280.jpeg \n", 161 | " inflating: RGBImages/0_03_13_240__0_03_13_599_0055800000012800072001280.jpeg \n", 162 | " inflating: RGBImages/0_03_21_000__0_03_21_479_0055800000012800072001280.jpeg \n", 163 | " inflating: RGBImages/0_03_21_600__0_03_21_919_0055800000012800072001280.jpeg \n", 164 | " inflating: RGBImages/0_03_21_920__0_03_22_239_0055800000012800072001280.jpeg \n", 165 | " inflating: RGBImages/0_03_24_480__0_03_24_919_0055800000012800072001280.jpeg \n", 166 | " inflating: RGBImages/0_03_24_920__0_03_25_639_0055800000012800072001280.jpeg \n", 167 | " inflating: RGBImages/0_03_25_640__0_03_27_119_0055800000012800072001280.jpeg \n", 168 | " inflating: RGBImages/0_03_27_120__0_03_27_999_0055800000012800072001280.jpeg \n", 169 | " inflating: RGBImages/0_03_29_520__0_03_30_039_0055800000012800072001280.jpeg \n", 170 | " inflating: RGBImages/0_03_30_120__0_03_30_759_0055800000012800072001280.jpeg \n", 171 | " inflating: RGBImages/0_03_30_760__0_03_31_639_0055800000012800072001280.jpeg \n", 172 | " inflating: RGBImages/0_03_31_640__0_03_31_959_0055800000012800072001280.jpeg \n", 173 | " inflating: RGBImages/0_03_31_960__0_03_32_319_0055800000012800072001280.jpeg \n", 174 | " inflating: RGBImages/0_03_33_680__0_03_33_999_0055800000012800072001280.jpeg \n", 175 | " inflating: RGBImages/0_03_34_000__0_03_34_599_0055800000012800072001280.jpeg \n", 176 | " inflating: RGBImages/0_03_34_600__0_03_35_399_0055800000012800072001280.jpeg \n", 177 | " inflating: RGBImages/0_03_35_520__0_03_37_959_0055800000012800072001280.jpeg \n", 178 | " inflating: RGBImages/0_03_38_400__0_03_38_879_0055800000012800072001280.jpeg \n", 179 | " inflating: RGBImages/0_03_38_880__0_03_39_439_0055800000012800072001280.jpeg \n", 180 | " inflating: RGBImages/0_03_39_440__0_03_39_919_0055800000012800072001280.jpeg \n", 181 | " inflating: RGBImages/0_03_40_160__0_03_40_599_0055800000012800072001280.jpeg \n", 182 | " inflating: RGBImages/0_03_40_600__0_03_40_919_0055800000012800072001280.jpeg \n", 183 | " inflating: RGBImages/0_03_40_920__0_03_41_399_0055800000012800072001280.jpeg \n", 184 | " inflating: RGBImages/0_03_44_240__0_03_44_679_0055800000012800072001280.jpeg \n", 185 | " inflating: RGBImages/0_03_44_680__0_03_44_999_0055800000012800072001280.jpeg \n", 186 | " inflating: RGBImages/0_03_45_000__0_03_49_239_0055800000012800072001280.jpeg \n", 187 | " inflating: RGBImages/0_03_49_240__0_03_50_799_0055800000012800072001280.jpeg \n", 188 | " inflating: RGBImages/0_03_50_840__0_03_51_199_0055800000012800072001280.jpeg \n", 189 | " inflating: RGBImages/0_03_51_200__0_03_51_599_0055800000012800072001280.jpeg \n", 190 | " inflating: RGBImages/0_03_57_240__0_03_57_919_0055800000012800072001280.jpeg \n", 191 | " inflating: RGBImages/0_03_58_440__0_03_59_199_0055800000012800072001280.jpeg \n", 192 | " inflating: RGBImages/0_03_59_200__0_04_05_279_0055800000012800072001280.jpeg \n", 193 | " inflating: RGBImages/0_04_05_280__0_04_06_919_0055800000012800072001280.jpeg \n", 194 | " inflating: RGBImages/0_04_20_840__0_04_21_159_0055800000012800072001280.jpeg \n", 195 | " inflating: RGBImages/0_04_34_720__0_04_35_879_0055800000012800072001280.jpeg \n" 196 | ] 197 | } 198 | ], 199 | "source": [ 200 | "!wget https://github.com/SWHL/RapidVideOCR/files/11184614/RGBImages.zip\n", 201 | "!unzip RGBImages.zip\n", 202 | "!rm RGBImages.zip" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": { 208 | "id": "1cTofr4Zq_WB" 209 | }, 210 | "source": [ 211 | "#### Install the RapidVideOCR package." 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 34, 217 | "metadata": { 218 | "colab": { 219 | "base_uri": "https://localhost:8080/" 220 | }, 221 | "id": "5URYsomEqnuh", 222 | "outputId": "3a6093db-bfaa-4069-e92e-2e7bab97f987" 223 | }, 224 | "outputs": [ 225 | { 226 | "name": "stdout", 227 | "output_type": "stream", 228 | "text": [ 229 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 230 | "Requirement already satisfied: rapid_videocr in /usr/local/lib/python3.9/dist-packages (2.1.6)\n", 231 | "Requirement already satisfied: rapidocr-onnxruntime>=1.2.2 in /usr/local/lib/python3.9/dist-packages (from rapid_videocr) (1.2.5)\n", 232 | "Requirement already satisfied: tqdm>=4.52.0 in /usr/local/lib/python3.9/dist-packages (from rapid_videocr) (4.65.0)\n", 233 | "Requirement already satisfied: PyYAML in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (6.0)\n", 234 | "Requirement already satisfied: Pillow in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (8.4.0)\n", 235 | "Requirement already satisfied: onnxruntime>=1.7.0 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.14.1)\n", 236 | "Requirement already satisfied: Shapely>=1.7.1 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (2.0.1)\n", 237 | "Requirement already satisfied: pyclipper>=1.2.1 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.3.0.post4)\n", 238 | "Requirement already satisfied: numpy>=1.19.3 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.22.4)\n", 239 | "Requirement already satisfied: opencv-python>=4.5.1.48 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (4.7.0.72)\n", 240 | "Requirement already satisfied: six>=1.15.0 in /usr/local/lib/python3.9/dist-packages (from rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.16.0)\n", 241 | "Requirement already satisfied: sympy in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.11.1)\n", 242 | "Requirement already satisfied: flatbuffers in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (23.3.3)\n", 243 | "Requirement already satisfied: protobuf in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (3.20.3)\n", 244 | "Requirement already satisfied: packaging in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (23.0)\n", 245 | "Requirement already satisfied: coloredlogs in /usr/local/lib/python3.9/dist-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (15.0.1)\n", 246 | "Requirement already satisfied: humanfriendly>=9.1 in /usr/local/lib/python3.9/dist-packages (from coloredlogs->onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (10.0)\n", 247 | "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.9/dist-packages (from sympy->onnxruntime>=1.7.0->rapidocr-onnxruntime>=1.2.2->rapid_videocr) (1.3.0)\n" 248 | ] 249 | } 250 | ], 251 | "source": [ 252 | "!pip install rapid_videocr" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 35, 258 | "metadata": { 259 | "colab": { 260 | "base_uri": "https://localhost:8080/" 261 | }, 262 | "id": "13GXToLcrFl8", 263 | "outputId": "d18fb2c0-79ae-4e29-9b27-de7f7e980707" 264 | }, 265 | "outputs": [ 266 | { 267 | "name": "stdout", 268 | "output_type": "stream", 269 | "text": [ 270 | "Running with concat recognition.\n", 271 | "OCR: 100% 14/14 [00:28<00:00, 2.07s/it]\n", 272 | "The file has been saved in the result/result.srt\n", 273 | "The result has been saved to result directory.\n" 274 | ] 275 | } 276 | ], 277 | "source": [ 278 | "!rapid_videocr -i /content/RGBImages -s result -o srt" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": { 284 | "id": "DR8RbHFisLZK" 285 | }, 286 | "source": [ 287 | "#### Look the result." 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 36, 293 | "metadata": { 294 | "colab": { 295 | "base_uri": "https://localhost:8080/" 296 | }, 297 | "id": "g2PdZnGJrsdx", 298 | "outputId": "c2107dd0-f099-464c-91a0-247294e69c60" 299 | }, 300 | "outputs": [ 301 | { 302 | "name": "stdout", 303 | "output_type": "stream", 304 | "text": [ 305 | "1\n", 306 | "00:00:20,640 --> 00:00:23,999\n", 307 | "Eyelyinightinmydreams\n", 308 | "\n", 309 | "2\n", 310 | "00:00:25,120 --> 00:00:25,999\n", 311 | "Iseeyou\n", 312 | "\n", 313 | "3\n", 314 | "00:00:26,000 --> 00:00:26,599\n", 315 | "Iseeyou\n", 316 | "\n", 317 | "4\n", 318 | "00:00:27,760 --> 00:00:28,999\n", 319 | "Ifell you\n", 320 | "\n", 321 | "5\n", 322 | "00:00:30,280 --> 00:00:30,599\n", 323 | "That is hiow I know you go on\n", 324 | "\n", 325 | "6\n", 326 | "00:00:30,600 --> 00:00:32,199\n", 327 | "howIknowyougoon\n", 328 | "\n", 329 | "7\n", 330 | "00:00:32,800 --> 00:00:33,199\n", 331 | "That is howIkhowyougo.on\n", 332 | "\n", 333 | "8\n", 334 | "00:00:33,200 --> 00:00:34,959\n", 335 | "That is how I know you go on\n", 336 | "\n", 337 | "9\n", 338 | "00:00:34,960 --> 00:00:35,519\n", 339 | "That is how I know you go on\n", 340 | "\n", 341 | "11\n", 342 | "00:00:40,040 --> 00:00:41,679\n", 343 | "Faracrossthedistance\n", 344 | "\n", 345 | "12\n", 346 | "00:00:41,680 --> 00:00:42,919\n", 347 | "Faracross the distance\n", 348 | "\n", 349 | "13\n", 350 | "00:00:42,920 --> 00:00:43,439\n", 351 | "Faracross thelistance\n", 352 | "\n", 353 | "14\n", 354 | "00:00:43,440 --> 00:00:43,799\n", 355 | "Faracrosshe Mistance\n", 356 | "\n", 357 | "15\n", 358 | "00:00:44,920 --> 00:00:45,359\n", 359 | "and Spaces between us\n", 360 | "\n", 361 | "16\n", 362 | "00:00:45,360 --> 00:00:47,799\n", 363 | "and Spaces between us\n", 364 | "\n", 365 | "17\n", 366 | "00:00:47,800 --> 00:00:48,159\n", 367 | "and Spaces betweenus\n", 368 | "\n", 369 | "18\n", 370 | "00:00:48,160 --> 00:00:48,559\n", 371 | "and Spacesbetween us\n", 372 | "\n", 373 | "19\n", 374 | "00:00:50,520 --> 00:00:54,079\n", 375 | "u havecometo showyou go on\n", 376 | "\n", 377 | "20\n", 378 | "00:00:54,080 --> 00:00:55,799\n", 379 | "You have come to show vou go on\n", 380 | "\n", 381 | "21\n", 382 | "00:00:56,000 --> 00:00:56,359\n", 383 | "You haveoreto show y\n", 384 | "\n", 385 | "23\n", 386 | "00:01:00,120 --> 00:01:00,759\n", 387 | "Near\n", 388 | "\n", 389 | "24\n", 390 | "00:01:02,160 --> 00:01:02,919\n", 391 | "far\n", 392 | "\n", 393 | "25\n", 394 | "00:01:04,320 --> 00:01:06,759\n", 395 | "reveryouare\n", 396 | "\n", 397 | "26\n", 398 | "00:01:08,120 --> 00:01:08,679\n", 399 | "I belieye that the heart does go\n", 400 | "on\n", 401 | "\n", 402 | "27\n", 403 | "00:01:08,680 --> 00:01:13,119\n", 404 | "I believe that the heart does go\n", 405 | "on\n", 406 | "\n", 407 | "28\n", 408 | "00:01:13,120 --> 00:01:13,799\n", 409 | "I believe that the heart does go\n", 410 | "on\n", 411 | "\n", 412 | "29\n", 413 | "00:01:13,800 --> 00:01:16,079\n", 414 | "I believe that the heart does go\n", 415 | "on\n", 416 | "\n", 417 | "30\n", 418 | "00:01:16,080 --> 00:01:17,039\n", 419 | "I believe that the heart does go\n", 420 | "on\n", 421 | "\n", 422 | "31\n", 423 | "00:01:19,320 --> 00:01:20,359\n", 424 | "Once more\n", 425 | "\n", 426 | "32\n", 427 | "00:01:20,360 --> 00:01:21,919\n", 428 | "Once more\n", 429 | "\n", 430 | "33\n", 431 | "00:01:23,120 --> 00:01:23,559\n", 432 | "thedoor\n", 433 | "you\n", 434 | "\n", 435 | "34\n", 436 | "00:01:23,560 --> 00:01:24,959\n", 437 | "you open the door\n", 438 | "\n", 439 | "35\n", 440 | "00:01:24,960 --> 00:01:25,559\n", 441 | "youopen thedoor\n", 442 | "\n", 443 | "36\n", 444 | "00:01:25,560 --> 00:01:26,159\n", 445 | "you open the door\n", 446 | "\n", 447 | "37\n", 448 | "00:01:27,560 --> 00:01:27,919\n", 449 | "And you're here in my heart\n", 450 | "\n", 451 | "38\n", 452 | "00:01:27,920 --> 00:01:30,439\n", 453 | "And you're here in my heart\n", 454 | "\n", 455 | "39\n", 456 | "00:01:30,440 --> 00:01:31,119\n", 457 | "And you're here in iny heart\n", 458 | "\n", 459 | "40\n", 460 | "00:01:31,120 --> 00:01:31,599\n", 461 | "Andyou're here inm heart\n", 462 | "\n", 463 | "41\n", 464 | "00:01:31,600 --> 00:01:32,119\n", 465 | "Andeinmneart\n", 466 | "\n", 467 | "42\n", 468 | "00:01:33,040 --> 00:01:34,639\n", 469 | "my heartwim goonand\n", 470 | "on\n", 471 | "\n", 472 | "43\n", 473 | "00:01:34,640 --> 00:01:38,439\n", 474 | "my heart will go on and\n", 475 | "on\n", 476 | "\n", 477 | "44\n", 478 | "00:01:38,440 --> 00:01:38,839\n", 479 | "my heart will go on and\n", 480 | "on\n", 481 | "\n", 482 | "47\n", 483 | "00:01:47,920 --> 00:01:48,559\n", 484 | "Love can touch us onetime\n", 485 | "\n", 486 | "48\n", 487 | "00:01:48,560 --> 00:01:50,679\n", 488 | "Lovecantouch usonetime\n", 489 | "\n", 490 | "49\n", 491 | "00:01:50,920 --> 00:01:51,319\n", 492 | "Love cantouch usone time\n", 493 | "\n", 494 | "50\n", 495 | "00:01:52,520 --> 00:01:53,359\n", 496 | "And lastforaJifetime\n", 497 | "\n", 498 | "51\n", 499 | "00:01:53,360 --> 00:01:53,999\n", 500 | "And lastfora lifetime\n", 501 | "\n", 502 | "52\n", 503 | "00:01:54,000 --> 00:01:56,159\n", 504 | "And last fora lifetime\n", 505 | "\n", 506 | "53\n", 507 | "00:01:56,160 --> 00:01:56,959\n", 508 | "An st for a lifetime\n", 509 | "\n", 510 | "54\n", 511 | "00:01:58,040 --> 00:01:58,399\n", 512 | "And never let go till\n", 513 | "\n", 514 | "55\n", 515 | "00:01:58,400 --> 00:01:59,639\n", 516 | "And never let go till\n", 517 | "\n", 518 | "56\n", 519 | "00:01:59,640 --> 00:02:00,479\n", 520 | "And never let go till\n", 521 | "\n", 522 | "57\n", 523 | "00:02:00,480 --> 00:02:01,039\n", 524 | "And never let go till\n", 525 | "\n", 526 | "58\n", 527 | "00:02:02,240 --> 00:02:02,799\n", 528 | "we're gone\n", 529 | "\n", 530 | "59\n", 531 | "00:02:02,800 --> 00:02:04,039\n", 532 | "we're gone\n", 533 | "\n", 534 | "62\n", 535 | "00:02:11,720 --> 00:02:13,119\n", 536 | "one true time\n", 537 | "\n", 538 | "65\n", 539 | "00:02:14,720 --> 00:02:15,239\n", 540 | "Tholdto\n", 541 | "\n", 542 | "66\n", 543 | "00:02:15,240 --> 00:02:15,839\n", 544 | "I holdto\n", 545 | "\n", 546 | "67\n", 547 | "00:02:17,640 --> 00:02:21,719\n", 548 | "Imy lifewe'll alwaysgo on\n", 549 | "\n", 550 | "68\n", 551 | "00:02:21,720 --> 00:02:22,639\n", 552 | "I my life we'll always go on\n", 553 | "\n", 554 | "69\n", 555 | "00:02:26,640 --> 00:02:27,239\n", 556 | "Near\n", 557 | "\n", 558 | "70\n", 559 | "00:02:27,240 --> 00:02:27,879\n", 560 | "Near\n", 561 | "\n", 562 | "72\n", 563 | "00:02:29,360 --> 00:02:30,119\n", 564 | "far\n", 565 | "\n", 566 | "74\n", 567 | "00:02:31,200 --> 00:02:31,599\n", 568 | "whereveryou are\n", 569 | "\n", 570 | "75\n", 571 | "00:02:31,600 --> 00:02:32,559\n", 572 | "wherever you-are\n", 573 | "\n", 574 | "76\n", 575 | "00:02:32,560 --> 00:02:33,439\n", 576 | "whereveryou are\n", 577 | "\n", 578 | "77\n", 579 | "00:02:33,440 --> 00:02:34,079\n", 580 | "whereveryou are\n", 581 | "\n", 582 | "78\n", 583 | "00:02:35,520 --> 00:02:37,159\n", 584 | "I believe that the heart does go\n", 585 | "on\n", 586 | "\n", 587 | "79\n", 588 | "00:02:37,160 --> 00:02:41,959\n", 589 | "I believe that the heart does go\n", 590 | "on\n", 591 | "\n", 592 | "80\n", 593 | "00:02:46,440 --> 00:02:47,039\n", 594 | "Once more\n", 595 | "\n", 596 | "81\n", 597 | "00:02:47,040 --> 00:02:48,199\n", 598 | "Oncemore\n", 599 | "\n", 600 | "82\n", 601 | "00:02:50,520 --> 00:02:50,879\n", 602 | "you openthe dooi\n", 603 | "\n", 604 | "83\n", 605 | "00:02:50,880 --> 00:02:53,279\n", 606 | "you open the door\n", 607 | "\n", 608 | "84\n", 609 | "00:02:54,840 --> 00:02:56,679\n", 610 | "And you're here in my heart\n", 611 | "\n", 612 | "85\n", 613 | "00:02:56,680 --> 00:02:57,519\n", 614 | "And you're here in my heart\n", 615 | "\n", 616 | "86\n", 617 | "00:02:57,520 --> 00:02:57,999\n", 618 | "And you're here in my heart\n", 619 | "\n", 620 | "87\n", 621 | "00:03:00,360 --> 00:03:00,919\n", 622 | "heartwill goonand\n", 623 | "\n", 624 | "88\n", 625 | "00:03:00,920 --> 00:03:01,519\n", 626 | "my heart will go on and\n", 627 | "on\n", 628 | "\n", 629 | "89\n", 630 | "00:03:01,560 --> 00:03:04,599\n", 631 | "my heart will go on and\n", 632 | "on\n", 633 | "on\n", 634 | "my heart will go on and\n", 635 | "\n", 636 | "90\n", 637 | "00:03:04,600 --> 00:03:05,879\n", 638 | "on\n", 639 | "\n", 640 | "91\n", 641 | "00:03:05,880 --> 00:03:06,759\n", 642 | "my heart will go on and\n", 643 | "on\n", 644 | "\n", 645 | "102\n", 646 | "00:03:24,920 --> 00:03:25,639\n", 647 | "You're here\n", 648 | "\n", 649 | "103\n", 650 | "00:03:25,640 --> 00:03:27,119\n", 651 | "You're here\n", 652 | "\n", 653 | "104\n", 654 | "00:03:27,120 --> 00:03:27,999\n", 655 | "You're here\n", 656 | "\n", 657 | "105\n", 658 | "00:03:29,520 --> 00:03:30,039\n", 659 | "there's nothing I fear\n", 660 | "\n", 661 | "106\n", 662 | "00:03:30,120 --> 00:03:30,759\n", 663 | "there's nothing Ifear\n", 664 | "\n", 665 | "107\n", 666 | "00:03:30,760 --> 00:03:31,639\n", 667 | "there's nothing Ifear\n", 668 | "\n", 669 | "108\n", 670 | "00:03:31,640 --> 00:03:31,959\n", 671 | "there's nothing I fear\n", 672 | "\n", 673 | "109\n", 674 | "00:03:31,960 --> 00:03:32,319\n", 675 | "there nothigIfear\n", 676 | "\n", 677 | "110\n", 678 | "00:03:33,680 --> 00:03:33,999\n", 679 | "AndIknow\n", 680 | "\n", 681 | "111\n", 682 | "00:03:34,000 --> 00:03:34,599\n", 683 | "AndIknow\n", 684 | "\n", 685 | "112\n", 686 | "00:03:34,600 --> 00:03:35,399\n", 687 | "AndIknow\n", 688 | "\n", 689 | "113\n", 690 | "00:03:35,520 --> 00:03:37,959\n", 691 | "that my heart will you go on\n", 692 | "\n", 693 | "114\n", 694 | "00:03:38,400 --> 00:03:38,879\n", 695 | "that my heart will you go on\n", 696 | "\n", 697 | "115\n", 698 | "00:03:38,880 --> 00:03:39,439\n", 699 | "that my heart will you go on\n", 700 | "\n", 701 | "116\n", 702 | "00:03:39,440 --> 00:03:39,919\n", 703 | "that my heart will you go on\n", 704 | "\n", 705 | "117\n", 706 | "00:03:40,160 --> 00:03:40,599\n", 707 | "that my heart will you go on\n", 708 | "\n", 709 | "118\n", 710 | "00:03:40,600 --> 00:03:40,919\n", 711 | "that my heart will you go on\n", 712 | "\n", 713 | "119\n", 714 | "00:03:40,920 --> 00:03:41,399\n", 715 | "that my heart will you go on\n", 716 | "\n", 717 | "120\n", 718 | "00:03:44,240 --> 00:03:44,679\n", 719 | "Weill stay foreverthsway\n", 720 | "\n", 721 | "121\n", 722 | "00:03:44,680 --> 00:03:44,999\n", 723 | "We'll stayforever this way\n", 724 | "\n", 725 | "122\n", 726 | "00:03:45,000 --> 00:03:49,239\n", 727 | "We'll stay forever this way\n", 728 | "\n", 729 | "123\n", 730 | "00:03:49,240 --> 00:03:50,799\n", 731 | "We'll stay forever this way\n", 732 | "\n", 733 | "124\n", 734 | "00:03:50,840 --> 00:03:51,199\n", 735 | "We'll stay forever this way\n", 736 | "\n", 737 | "125\n", 738 | "00:03:51,200 --> 00:03:51,599\n", 739 | "We'll stay forever this way\n", 740 | "\n", 741 | "126\n", 742 | "00:03:57,240 --> 00:03:57,919\n", 743 | "You are And in my heart\n", 744 | "\n", 745 | "127\n", 746 | "00:03:58,440 --> 00:03:59,199\n", 747 | "my heart will go on and\n", 748 | "on\n", 749 | "\n", 750 | "128\n", 751 | "00:03:59,200 --> 00:04:05,279\n", 752 | "my heart will go on and\n", 753 | "on\n", 754 | "\n", 755 | "129\n", 756 | "00:04:05,280 --> 00:04:06,919\n", 757 | "my heart will go on and\n", 758 | "on\n", 759 | "\n" 760 | ] 761 | } 762 | ], 763 | "source": [ 764 | "!cat result/result.srt" 765 | ] 766 | }, 767 | { 768 | "cell_type": "code", 769 | "execution_count": null, 770 | "metadata": { 771 | "id": "cNjpqvivs1ZA" 772 | }, 773 | "outputs": [], 774 | "source": [] 775 | } 776 | ], 777 | "metadata": { 778 | "colab": { 779 | "provenance": [] 780 | }, 781 | "kernelspec": { 782 | "display_name": "Python 3", 783 | "name": "python3" 784 | } 785 | }, 786 | "nbformat": 4, 787 | "nbformat_minor": 0 788 | } 789 | -------------------------------------------------------------------------------- /assets/colab-badge.svg: -------------------------------------------------------------------------------- 1 | Open in ColabOpen in Colab 2 | -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/assets/logo.png -------------------------------------------------------------------------------- /cliff.toml: -------------------------------------------------------------------------------- 1 | # git-cliff ~ configuration file 2 | # https://git-cliff.org/docs/configuration 3 | 4 | [changelog] 5 | # A Tera template to be rendered as the changelog's footer. 6 | # See https://keats.github.io/tera/docs/#introduction 7 | # header = """ 8 | # # Changelog\n 9 | # All notable changes to this project will be documented in this file. See [conventional commits](https://www.conventionalcommits.org/) for commit guidelines.\n 10 | # """ 11 | # A Tera template to be rendered for each release in the changelog. 12 | # See https://keats.github.io/tera/docs/#introduction 13 | body = """ 14 | {% for group, commits in commits | group_by(attribute="group") %} 15 | ### {{ group | striptags | trim | upper_first }} 16 | {% for commit in commits 17 | | filter(attribute="scope") 18 | | sort(attribute="scope") %} 19 | - **({{commit.scope}})**{% if commit.breaking %} [**breaking**]{% endif %} \ 20 | {{ commit.message }} by [@{{ commit.author.name }}](https://github.com/{{ commit.author.name }}) in [{{ commit.id | truncate(length=7, end="") }}]($REPO/commit/{{ commit.id }}) 21 | {%- endfor -%} 22 | {% raw %}\n{% endraw %}\ 23 | {%- for commit in commits %} 24 | {%- if commit.scope -%} 25 | {% else -%} 26 | - {% if commit.breaking %} [**breaking**]{% endif %}\ 27 | {{ commit.message }} by [@{{ commit.author.name }}](https://github.com/{{ commit.author.name }}) in [{{ commit.id | truncate(length=7, end="") }}]($REPO/commit/{{ commit.id }}) 28 | {% endif -%} 29 | {% endfor -%} 30 | {% endfor %} 31 | 32 | 33 | {% if github.contributors | length > 0 %} 34 | ### 🎉 Contributors 35 | 36 | {% for contributor in github.contributors %} 37 | - [@{{ contributor.username }}](https://github.com/{{ contributor.username }}) 38 | {%- endfor -%} 39 | {% endif %} 40 | 41 | 42 | {% if version %} 43 | {% if previous.version %}\ 44 | **Full Changelog**: [{{ version | trim_start_matches(pat="v") }}]($REPO/compare/{{ previous.version }}..{{ version }}) 45 | {% else %}\ 46 | **Full Changelog**: [{{ version | trim_start_matches(pat="v") }}] 47 | {% endif %}\ 48 | {% else %}\ 49 | ## [unreleased] 50 | {% endif %} 51 | """ 52 | # A Tera template to be rendered as the changelog's footer. 53 | # See https://keats.github.io/tera/docs/#introduction 54 | 55 | footer = """ 56 | 57 | """ 58 | 59 | # Remove leading and trailing whitespaces from the changelog's body. 60 | trim = true 61 | # postprocessors 62 | postprocessors = [ 63 | # Replace the placeholder `` with a URL. 64 | { pattern = '\$REPO', replace = "https://github.com/SWHL/RapidVideOCR" }, # replace repository URL 65 | ] 66 | 67 | [git] 68 | # Parse commits according to the conventional commits specification. 69 | # See https://www.conventionalcommits.org 70 | conventional_commits = true 71 | # Exclude commits that do not match the conventional commits specification. 72 | filter_unconventional = true 73 | # Split commits on newlines, treating each line as an individual commit. 74 | split_commits = false 75 | # An array of regex based parsers to modify commit messages prior to further processing. 76 | commit_preprocessors = [ 77 | # Replace issue numbers with link templates to be updated in `changelog.postprocessors`. 78 | #{ pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](https://github.com/orhun/git-cliff/issues/${2}))"}, 79 | ] 80 | # An array of regex based parsers for extracting data from the commit message. 81 | # Assigns commits to groups. 82 | # Optionally sets the commit's scope and can decide to exclude commits from further processing. 83 | commit_parsers = [ 84 | { message = "^feat", group = "🚀 Features" }, 85 | { message = "^fix", group = "🐛 Bug Fixes" }, 86 | { message = "^doc", group = "📚 Documentation" }, 87 | { message = "^perf", group = "⚡ Performance" }, 88 | { message = "^refactor", group = "🚜 Refactor" }, 89 | { message = "^style", group = "🎨 Styling" }, 90 | { message = "^test", group = "🧪 Testing" }, 91 | { message = "^chore\\(release\\): prepare for", skip = true }, 92 | { message = "^chore\\(deps.*\\)", skip = true }, 93 | { message = "^chore\\(pr\\)", skip = true }, 94 | { message = "^chore\\(pull\\)", skip = true }, 95 | { message = "^chore|^ci", group = "⚙️ Miscellaneous Tasks" }, 96 | { body = ".*security", group = "🛡️ Security" }, 97 | { message = "^revert", group = "◀️ Revert" }, 98 | { message = ".*", group = "💼 Other" }, 99 | ] 100 | # Exclude commits that are not matched by any commit parser. 101 | filter_commits = false 102 | # Order releases topologically instead of chronologically. 103 | topo_order = false 104 | # Order of commits in each group/release within the changelog. 105 | # Allowed values: newest, oldest 106 | sort_commits = "newest" -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | 5 | # 提取 + 识别 6 | from rapid_videocr import RapidVideOCRInput, RapidVideoSubFinderOCR, VideoSubFinderInput 7 | 8 | vsf_exe_path = ( 9 | r"G:\ProgramFiles\VideoSubFinder_6.10_x64\Release_x64\VideoSubFinderWXW.exe" 10 | ) 11 | vsf_input_params = VideoSubFinderInput(vsf_exe_path=vsf_exe_path) 12 | ocr_input_params = RapidVideOCRInput(is_batch_rec=False) 13 | vsf_ocr = RapidVideoSubFinderOCR(vsf_input_params, ocr_input_params) 14 | 15 | # video_path可以是目录或者具体video路径 16 | video_path = "test_files/tiny/2.mp4" 17 | save_dir = "outputs" 18 | vsf_ocr(video_path, save_dir) 19 | 20 | 21 | # # 只识别 22 | from rapid_videocr import RapidVideOCR, RapidVideOCRInput 23 | 24 | ocr_input_params = RapidVideOCRInput(is_batch_rec=False) 25 | extractor = RapidVideOCR(ocr_input_params) 26 | 27 | rgb_dir = "tests/test_files/RGBImages" 28 | save_dir = "outputs" 29 | save_name = "a" 30 | 31 | # outputs/a.srt outputs/a.ass outputs/a.txt 32 | extractor(rgb_dir, save_dir, save_name=save_name) 33 | -------------------------------------------------------------------------------- /docs/README_zh.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
 
5 | 6 | 7 | 8 | 9 | 10 | PyPI 11 | 12 | 13 | SemVer2.0 14 | 15 | 16 | 简体中文 | [English](https://github.com/SWHL/RapidVideOCR) 17 |
18 | 19 | ### 简介 20 | 21 | - 视频硬字幕提取,自动生成对应`srt | ass | txt`文件。 22 | - 支持字幕语言:中文 | 英文 (其他可以支持的语言参见:[支持语种列表](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99)) 23 | - 优势如下: 24 | - **提取更快**:与[VideoSubFinder](https://sourceforge.net/projects/videosubfinder/)软件结合使用,提取关键字幕帧更快。 25 | - **识别更准**:采用[RapidOCR](https://github.com/RapidAI/RapidOCR)作为识别库。 26 | - **使用更方便**:pip直接安装即可使用。 27 | 28 | - 桌面EXE版,请移步[RapidVideOCRDesktop](https://github.com/SWHL/RapidVideOCRDesktop) 29 | - 如果有帮助到您的话,请给个小星星⭐。 30 | 31 | ### [在线Demo](https://www.modelscope.cn/studios/liekkas/RapidVideOCR/summary) 32 | 33 |
34 | Demo 35 |
36 | 37 | ### 整体框架 38 | 39 | ```mermaid 40 | flowchart LR 41 | A[/Video/] --Extract subtitle key frame--> B(VideoSubFinder) --OCR-->C(RapidVideOCR) 42 | C --Convert--> D[/"SRT | ASS | TXT"/] 43 | ``` 44 | 45 | ### 安装 46 | 47 | ```bash 48 | pip install rapid_videocr 49 | ``` 50 | 51 | ### 使用 52 | 53 | > [!NOTE] 54 | > 55 | > `rapid_videocr`输入图像路径必须是**VideoSubFinder**软件输出的RGBImages或TXTImages的路径。 56 | 57 | ```bash 58 | rapid_videocr -i RGBImages 59 | ``` 60 | 61 | ### 文档 62 | 63 | 完整文档请移步:[docs](https://swhl.github.io/RapidVideOCR/docs) 64 | 65 | ### 贡献者 66 | 67 |

68 | 69 | 70 | 71 |

72 | 73 | ### 贡献指南 74 | 75 | 我们感谢所有的贡献者为改进和提升 RapidVideOCR 所作出的努力。 76 | 77 | - 欢迎提交请求。对于重大更改,请先打开issue讨论您想要改变的内容。 78 | - 请确保适当更新测试。 79 | 80 | ### 加入我们 81 | 82 | - 微信扫描以下二维码,关注**RapidAI公众号**,回复video即可加入RapidVideOCR微信交流群: 83 |
84 | 85 |
86 | 87 | - 扫码加入QQ群(706807542): 88 |
89 | 90 |
91 | 92 | ### [赞助](https://swhl.github.io/RapidVideOCR/docs/sponsor/) 93 | 94 | 如果您想要赞助该项目,可直接点击当前页最上面的Sponsor按钮,请写好备注(**您的Github账号名称**),方便添加到赞助列表中。 95 | 96 | ### 开源许可证 97 | 98 | 该项目采用 [Apache 2.0 license](../LICENSE) 开源许可证。 99 | -------------------------------------------------------------------------------- /docs/doc_whl.md: -------------------------------------------------------------------------------- 1 | ### See [Documentation](https://swhl.github.io/RapidVideOCR/docs/) 2 | -------------------------------------------------------------------------------- /rapid_videocr/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .main import RapidVideOCR, RapidVideOCRExeception, RapidVideOCRInput 5 | from .vsf_ocr_cli import RapidVideoSubFinderOCR 6 | -------------------------------------------------------------------------------- /rapid_videocr/export.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from abc import ABC, abstractmethod 5 | from enum import Enum 6 | from pathlib import Path 7 | from typing import List 8 | 9 | from .utils.utils import write_txt 10 | 11 | 12 | class OutputFormat(Enum): 13 | TXT = "txt" 14 | SRT = "srt" 15 | ASS = "ass" 16 | ALL = "all" 17 | 18 | 19 | class ExportStrategy(ABC): 20 | @abstractmethod 21 | def export( 22 | self, 23 | save_dir: Path, 24 | save_name: str, 25 | srt_result: List[str], 26 | ass_result: List[str], 27 | txt_result: List[str], 28 | ): 29 | pass 30 | 31 | 32 | class TxtExportStrategy(ExportStrategy): 33 | def export( 34 | self, 35 | save_dir: Path, 36 | save_name: str, 37 | srt_result: List[str], 38 | ass_result: List[str], 39 | txt_result: List[str], 40 | ): 41 | file_path = save_dir / f"{save_name}.txt" 42 | write_txt(file_path, txt_result) 43 | 44 | 45 | class SrtExportStrategy(ExportStrategy): 46 | def export( 47 | self, 48 | save_dir: Path, 49 | save_name: str, 50 | srt_result: List[str], 51 | ass_result: List[str], 52 | txt_result: List[str], 53 | ): 54 | file_path = save_dir / f"{save_name}.srt" 55 | write_txt(file_path, srt_result) 56 | 57 | 58 | class AssExportStrategy(ExportStrategy): 59 | def export( 60 | self, 61 | save_dir: Path, 62 | save_name: str, 63 | srt_result: List[str], # unused here but kept for signature 64 | ass_result: List[str], 65 | txt_result: List[str], 66 | ): 67 | header = [ 68 | "[Script Info]", 69 | "; Script generated by RapidVideOCR", 70 | "ScriptType: v4.00+", 71 | "PlayResX: 1920", 72 | "PlayResY: 1080", 73 | "", 74 | "[V4+ Styles]", 75 | "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, " 76 | "Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, " 77 | "Alignment, MarginL, MarginR, MarginV, Encoding", 78 | "Style: Default,Arial,54,&H00FFFFFF,&H0000FFFF,&H00000000,&H64000000,0,0,0,0,100,100,0,0,1,2,2,2,10,10,10,1", 79 | "", 80 | "[Events]", 81 | "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text", 82 | ] 83 | 84 | file_path = save_dir / f"{save_name}.ass" 85 | write_txt(file_path, header + [""] + ass_result) 86 | 87 | 88 | class AllExportStrategy(ExportStrategy): 89 | def export( 90 | self, 91 | save_dir: Path, 92 | save_name: str, 93 | srt_result: List[str], 94 | ass_result: List[str], 95 | txt_result: List[str], 96 | ): 97 | txt_export = TxtExportStrategy() 98 | srt_export = SrtExportStrategy() 99 | ass_export = AssExportStrategy() 100 | 101 | txt_export.export(save_dir, save_name, srt_result, ass_result, txt_result) 102 | srt_export.export(save_dir, save_name, srt_result, ass_result, txt_result) 103 | ass_export.export(save_dir, save_name, srt_result, ass_result, txt_result) 104 | 105 | 106 | class ExportStrategyFactory: 107 | @staticmethod 108 | def create_strategy(out_format: str = OutputFormat.ALL.value) -> ExportStrategy: 109 | strategies = { 110 | OutputFormat.TXT.value: TxtExportStrategy(), 111 | OutputFormat.SRT.value: SrtExportStrategy(), 112 | OutputFormat.ASS.value: AssExportStrategy(), 113 | OutputFormat.ALL.value: AllExportStrategy(), 114 | } 115 | 116 | if strategy := strategies.get(out_format): 117 | return strategy 118 | raise ValueError(f"Unsupported output format: {out_format}") 119 | -------------------------------------------------------------------------------- /rapid_videocr/main.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import argparse 5 | from dataclasses import dataclass 6 | from pathlib import Path 7 | from typing import Any, Dict, List, Optional, Union 8 | 9 | from .export import ExportStrategyFactory, OutputFormat 10 | from .ocr_processor import OCRProcessor 11 | from .utils.crop_by_project import CropByProject 12 | from .utils.logger import Logger 13 | from .utils.utils import mkdir 14 | 15 | IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"} 16 | 17 | 18 | @dataclass 19 | class RapidVideOCRInput: 20 | is_batch_rec: bool = False 21 | batch_size: int = 10 22 | out_format: str = OutputFormat.ALL.value 23 | ocr_params: Optional[Dict[str, Any]] = None 24 | 25 | 26 | class RapidVideOCR: 27 | def __init__(self, input_params: RapidVideOCRInput): 28 | self.logger = Logger(logger_name=__name__).get_log() 29 | 30 | self.ocr_processor = OCRProcessor( 31 | input_params.ocr_params, input_params.batch_size 32 | ) 33 | 34 | self.cropper = CropByProject() 35 | 36 | self.is_batch_rec = input_params.is_batch_rec 37 | self.out_format = input_params.out_format 38 | 39 | def __call__( 40 | self, 41 | vsf_dir: Union[str, Path], 42 | save_dir: Union[str, Path], 43 | save_name: str = "result", 44 | ) -> List[str]: 45 | vsf_dir = Path(vsf_dir) 46 | if not vsf_dir.exists(): 47 | raise RapidVideOCRExeception(f"{vsf_dir} does not exist.") 48 | 49 | img_list = self.get_img_list(vsf_dir) 50 | srt_result, ass_result, txt_result = self.ocr_processor( 51 | img_list, self.is_batch_rec, self.is_txt_dir(vsf_dir) 52 | ) 53 | 54 | self.export_file(Path(save_dir), save_name, srt_result, ass_result, txt_result) 55 | return txt_result 56 | 57 | def get_img_list(self, vsf_dir: Path) -> List[Path]: 58 | def get_sort_key(x: Path) -> int: 59 | return int("".join(str(x.stem).split("_")[:4])) 60 | 61 | img_list = [] 62 | for v in vsf_dir.glob("*.*"): 63 | if not v.is_file(): 64 | continue 65 | 66 | if v.suffix.lower() not in IMAGE_EXTENSIONS: 67 | continue 68 | 69 | img_list.append(v) 70 | 71 | if not img_list: 72 | raise RapidVideOCRExeception(f"{vsf_dir} does not have valid images") 73 | 74 | img_list = sorted(img_list, key=get_sort_key) 75 | return img_list 76 | 77 | @staticmethod 78 | def is_txt_dir(vsf_dir: Path) -> bool: 79 | return "TXTImages" in vsf_dir.name 80 | 81 | def export_file( 82 | self, 83 | save_dir: Path, 84 | save_name: str, 85 | srt_result: List[str], 86 | ass_result: List[str], 87 | txt_result: List[str], 88 | ): 89 | try: 90 | strategy = ExportStrategyFactory.create_strategy(self.out_format) 91 | 92 | mkdir(save_dir) 93 | strategy.export(save_dir, save_name, srt_result, ass_result, txt_result) 94 | self.logger.info("[OCR] Results saved to directory: %s", save_dir) 95 | except ValueError as e: 96 | self.logger.error("Export failed: %s", str(e)) 97 | raise 98 | 99 | def print_console(self, txt_result: List): 100 | for v in txt_result: 101 | print(v.strip()) 102 | 103 | 104 | class RapidVideOCRExeception(Exception): 105 | pass 106 | 107 | 108 | def main(): 109 | parser = argparse.ArgumentParser() 110 | parser.add_argument( 111 | "-i", 112 | "--img_dir", 113 | type=str, 114 | required=True, 115 | help="The full path of RGBImages or TXTImages.", 116 | ) 117 | parser.add_argument( 118 | "-s", 119 | "--save_dir", 120 | type=str, 121 | default="outputs", 122 | help='The path of saving the recognition result. Default is "outputs" under the current directory.', 123 | ) 124 | parser.add_argument( 125 | "-f", 126 | "--file_name", 127 | type=str, 128 | default="result", 129 | help='The name of the resulting file name. Default is "result".', 130 | ) 131 | parser.add_argument( 132 | "-o", 133 | "--out_format", 134 | type=str, 135 | default=OutputFormat.ALL.value, 136 | choices=[v.value for v in OutputFormat], 137 | help='Output file format. Default is "all".', 138 | ) 139 | parser.add_argument( 140 | "--is_batch_rec", 141 | action="store_true", 142 | default=False, 143 | help="Which mode to run (concat recognition or single recognition). Default is False.", 144 | ) 145 | parser.add_argument( 146 | "-b", 147 | "--batch_size", 148 | type=int, 149 | default=10, 150 | help="The batch of concating image nums in concat recognition mode. Default is 10.", 151 | ) 152 | args = parser.parse_args() 153 | 154 | ocr_input_params = RapidVideOCRInput( 155 | is_batch_rec=args.is_batch_rec, 156 | batch_size=args.batch_size, 157 | out_format=args.out_format, 158 | ) 159 | extractor = RapidVideOCR(ocr_input_params) 160 | extractor(args.img_dir, args.save_dir, args.file_name) 161 | 162 | 163 | if __name__ == "__main__": 164 | main() 165 | -------------------------------------------------------------------------------- /rapid_videocr/ocr_processor.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from pathlib import Path 5 | from typing import Dict, List, Optional, Tuple 6 | 7 | import cv2 8 | import numpy as np 9 | from rapidocr import RapidOCR 10 | from tqdm import tqdm 11 | 12 | from .utils.logger import Logger 13 | from .utils.utils import ( 14 | compute_centroid, 15 | compute_poly_iou, 16 | is_inclusive_each_other, 17 | padding_img, 18 | read_img, 19 | ) 20 | 21 | 22 | class OCRProcessor: 23 | def __init__(self, ocr_params: Optional[Dict] = None, batch_size: int = 10): 24 | self.logger = Logger(logger_name=__name__).get_log() 25 | self.ocr_engine = self._init_ocr_engine(ocr_params) 26 | self.batch_size = batch_size 27 | 28 | def _init_ocr_engine(self, ocr_params: Optional[Dict] = None) -> RapidOCR: 29 | return RapidOCR(params=ocr_params) 30 | 31 | def __call__( 32 | self, img_list: List[Path], is_batch_rec: bool, is_txt_dir: bool 33 | ) -> Tuple[List[str], List[str], List[str]]: 34 | self.is_txt_dir = is_txt_dir 35 | process_func = self.batch_rec if is_batch_rec else self.single_rec 36 | rec_results = process_func(img_list) 37 | srt_results = self._generate_srt_results(rec_results) 38 | ass_results = self._generate_ass_results(rec_results) 39 | txt_results = self._generate_txt_result(rec_results) 40 | return srt_results, ass_results, txt_results 41 | 42 | def single_rec(self, img_list: List[Path]) -> List[Tuple[int, str, str, str]]: 43 | self.logger.info("[OCR] Running with single recognition.") 44 | 45 | rec_results = [] 46 | for i, img_path in enumerate(tqdm(img_list, desc="OCR")): 47 | time_str = self._get_srt_timestamp(img_path) 48 | ass_time_str = self._get_ass_timestamp(img_path) 49 | img = self._preprocess_image(img_path) 50 | 51 | dt_boxes, rec_res = self.get_ocr_result(img) 52 | txts = ( 53 | self.process_same_line(dt_boxes, rec_res) 54 | if dt_boxes is not None 55 | else "" 56 | ) 57 | rec_results.append([i, time_str, txts, ass_time_str]) 58 | return rec_results 59 | 60 | @staticmethod 61 | def _get_srt_timestamp(file_path: Path) -> str: 62 | """0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg""" 63 | 64 | def format_time(time_parts): 65 | time_parts[0] = f"{time_parts[0]:0>2}" 66 | return ":".join(time_parts[:3]) + f",{time_parts[3]}" 67 | 68 | split_paths = file_path.stem.split("_") 69 | start_time = split_paths[:4] 70 | end_time = split_paths[5:9] 71 | return f"{format_time(start_time)} --> {format_time(end_time)}" 72 | 73 | @staticmethod 74 | def _get_ass_timestamp(file_path: Path) -> str: 75 | s = file_path.stem 76 | 77 | h1 = int(s[0:1]) 78 | m1 = int(s[2:4]) 79 | sec1 = int(s[5:7]) 80 | ms1 = int(s[8:11]) 81 | 82 | h2 = int(s[13:14]) 83 | m2 = int(s[15:17]) 84 | sec2 = int(s[18:20]) 85 | ms2 = int(s[21:24]) 86 | 87 | # compute absolute times in milliseconds 88 | bt = (h1 * 3600 + m1 * 60 + sec1) * 1000 + ms1 89 | et = (h2 * 3600 + m2 * 60 + sec2) * 1000 + ms2 90 | 91 | def to_ass(ts_ms: int) -> str: 92 | # centiseconds (drop the last digit, no rounding) 93 | cs_total = ts_ms // 10 94 | cs = cs_total % 100 95 | total_s = ts_ms // 1000 96 | s = total_s % 60 97 | total_m = total_s // 60 98 | m = total_m % 60 99 | h = total_m // 60 100 | # H:MM:SS.CC 101 | return f"{h}:{m:02d}:{s:02d}.{cs:02d}" 102 | 103 | return f"{to_ass(bt)},{to_ass(et)}" 104 | 105 | @staticmethod 106 | def _preprocess_image(img_path: Path) -> np.ndarray: 107 | img = read_img(img_path) 108 | img = padding_img(img, (img.shape[0], img.shape[0], 0, 0)) 109 | return img 110 | 111 | @staticmethod 112 | def _generate_srt_results(rec_results: List[Tuple[int, str, str, str]]) -> List[str]: 113 | return [f"{i+1}\n{time_str}\n{txt}\n" for i, time_str, txt, _ in rec_results] 114 | 115 | @staticmethod 116 | def _generate_ass_results(rec_results: List[Tuple[int, str, str, str]]) -> List[str]: 117 | return [f"Dialogue: 0,{ass_time_str},Default,,0,0,0,,{txt}" for _, _, txt, ass_time_str in rec_results] 118 | 119 | @staticmethod 120 | def _generate_txt_result(rec_results: List[Tuple[int, str, str, str]]) -> List[str]: 121 | return [f"{txt}\n" for _, _, txt, _ in rec_results] 122 | 123 | def batch_rec(self, img_list: List[Path]) -> List[Tuple[int, str, str, str]]: 124 | self.logger.info("[OCR] Running with concat recognition.") 125 | 126 | img_nums = len(img_list) 127 | rec_results = [] 128 | for start_i in tqdm(range(0, img_nums, self.batch_size), desc="Concat Rec"): 129 | end_i = min(img_nums, start_i + self.batch_size) 130 | 131 | concat_img, img_coordinates, img_paths = self._prepare_batch( 132 | img_list[start_i:end_i] 133 | ) 134 | dt_boxes, rec_res = self.get_ocr_result(concat_img) 135 | if rec_res is None or dt_boxes is None: 136 | continue 137 | 138 | one_batch_rec_results = self._process_batch_results( 139 | start_i, img_coordinates, dt_boxes, rec_res, img_paths 140 | ) 141 | rec_results.extend(one_batch_rec_results) 142 | return rec_results 143 | 144 | def _prepare_batch( 145 | self, img_list: List[Path] 146 | ) -> Tuple[np.ndarray, np.ndarray, List[Path]]: 147 | padding_value = 10 148 | array_img_list, img_coordinates = [], [] 149 | for i, img_path in enumerate(img_list): 150 | img = read_img(img_path) 151 | if self.is_txt_dir: 152 | img = cv2.resize(img, None, fx=0.25, fy=0.25) 153 | 154 | pad_img = padding_img(img, (0, padding_value, 0, 0)) 155 | array_img_list.append(pad_img) 156 | 157 | h, w = img.shape[:2] 158 | x0, y0 = 0, i * (h + padding_value) 159 | x1, y1 = w, (i + 1) * (h + padding_value) 160 | img_coordinates.append([(x0, y0), (x1, y0), (x1, y1), (x0, y1)]) 161 | 162 | return np.vstack(array_img_list), np.array(img_coordinates), img_list 163 | 164 | def _process_batch_results( 165 | self, 166 | start_i: int, 167 | img_coordinates: np.ndarray, 168 | dt_boxes: np.ndarray, 169 | rec_res: Tuple[str], 170 | img_paths: List[Path], 171 | ) -> List[Tuple[int, str, str, str]]: 172 | match_dict = self._match_boxes_to_images( 173 | img_coordinates, dt_boxes, rec_res, img_paths 174 | ) 175 | 176 | results = [] 177 | for k, v in match_dict.items(): 178 | cur_frame_idx = start_i + k 179 | if v: 180 | img_path, boxes, recs = list(zip(*v)) 181 | time_str = self._get_srt_timestamp(img_path[0]) 182 | ass_time_str = self._get_ass_timestamp(img_path[0]) 183 | txts = self.process_same_line(boxes, recs) 184 | else: 185 | time_str = self._get_srt_timestamp(img_paths[k]) 186 | ass_time_str = self._get_ass_timestamp(img_paths[k]) 187 | txts = "" 188 | 189 | results.append([cur_frame_idx, time_str, txts, ass_time_str]) 190 | return results 191 | 192 | def _match_boxes_to_images( 193 | self, 194 | img_coordinates: np.ndarray, 195 | dt_boxes: np.ndarray, 196 | rec_res: List[str], 197 | img_paths: List[Path], 198 | ) -> Dict[int, List[Tuple[Path, np.ndarray, str]]]: 199 | """将检测框匹配到对应图像""" 200 | match_dict = {k: [] for k in range(len(img_coordinates))} 201 | visited_idx = set() 202 | 203 | for i, frame_boxes in enumerate(img_coordinates): 204 | for idx, (dt_box, txt) in enumerate(zip(dt_boxes, rec_res)): 205 | if idx in visited_idx: 206 | continue 207 | 208 | if self._is_box_matched(frame_boxes, dt_box): 209 | match_dict[i].append((img_paths[i], dt_box, txt)) 210 | visited_idx.add(idx) 211 | 212 | return match_dict 213 | 214 | def _is_box_matched(self, frame_boxes: np.ndarray, dt_box: np.ndarray) -> bool: 215 | """判断检测框是否匹配到图像""" 216 | box_iou = compute_poly_iou(frame_boxes, dt_box) 217 | return is_inclusive_each_other(frame_boxes, dt_box) or box_iou > 0.1 218 | 219 | def get_ocr_result( 220 | self, img: np.ndarray 221 | ) -> Tuple[Optional[np.ndarray], Optional[Tuple[str]]]: 222 | ocr_result = self.ocr_engine(img) 223 | if ocr_result.boxes is None: 224 | return None, None 225 | return ocr_result.boxes, ocr_result.txts 226 | 227 | def process_same_line(self, dt_boxes: np.ndarray, rec_res: List[str]) -> str: 228 | if len(rec_res) == 1: 229 | return rec_res[0] 230 | 231 | y_centroids = [compute_centroid(box)[1] for box in dt_boxes] 232 | line_groups = self._group_by_lines(y_centroids) 233 | return self._merge_line_text(line_groups, rec_res) 234 | 235 | def _group_by_lines(self, y_centroids: List[float]) -> List[List[int]]: 236 | """将文本框按行分组""" 237 | 238 | @staticmethod 239 | def is_same_line(points: List) -> List[bool]: 240 | threshold = 5 241 | 242 | align_points = list(zip(points, points[1:])) 243 | bool_res = [False] * len(align_points) 244 | for i, point in enumerate(align_points): 245 | y0, y1 = point 246 | if abs(y0 - y1) <= threshold: 247 | bool_res[i] = True 248 | return bool_res 249 | 250 | bool_res = is_same_line(y_centroids) 251 | groups = [] 252 | current_group = [0] 253 | for i, is_same in enumerate(bool_res, 1): 254 | if is_same: 255 | current_group.append(i) 256 | else: 257 | groups.append(current_group) 258 | current_group = [i] 259 | 260 | groups.append(current_group) 261 | return groups 262 | 263 | def _merge_line_text(self, line_groups: List[List[int]], rec_res: List[str]) -> str: 264 | lines = [] 265 | for group in line_groups: 266 | line_text = " ".join(rec_res[i] for i in group) 267 | lines.append(line_text) 268 | return "\n".join(lines) 269 | -------------------------------------------------------------------------------- /rapid_videocr/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .logger import Logger 5 | -------------------------------------------------------------------------------- /rapid_videocr/utils/crop_by_project.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import cv2 5 | import numpy as np 6 | 7 | 8 | class CropByProject: 9 | """投影法裁剪""" 10 | 11 | def __init__(self, threshold=250): 12 | self.threshold = threshold 13 | 14 | def __call__(self, origin_img): 15 | image = cv2.cvtColor(origin_img, cv2.COLOR_BGR2GRAY) 16 | 17 | # 将图片二值化 18 | retval, img = cv2.threshold(image, self.threshold, 255, cv2.THRESH_BINARY_INV) 19 | 20 | # 使文字增长成块 21 | closed = cv2.dilate(img, None, iterations=1) 22 | 23 | # 水平投影 24 | x0, x1 = self.get_project_loc(closed, direction="width") 25 | 26 | # 竖直投影 27 | y0, y1 = self.get_project_loc(closed, direction="height") 28 | 29 | return origin_img[y0:y1, x0:x1] 30 | 31 | @staticmethod 32 | def get_project_loc(img, direction): 33 | """获得裁剪的起始和终点索引位置 34 | Args: 35 | img (ndarray): 二值化后得到的图像 36 | direction (str): 'width/height' 37 | Raises: 38 | ValueError: 不支持的求和方向 39 | Returns: 40 | tuple: 起始索引位置 41 | """ 42 | if direction == "width": 43 | axis = 0 44 | elif direction == "height": 45 | axis = 1 46 | else: 47 | raise ValueError(f"direction {direction} is not supported!") 48 | 49 | loc_sum = np.sum(img == 255, axis=axis) 50 | loc_range = np.argwhere(loc_sum > 0) 51 | i0, i1 = loc_range[0][0], loc_range[-1][0] 52 | return i0, i1 53 | -------------------------------------------------------------------------------- /rapid_videocr/utils/logger.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import logging 5 | 6 | import colorlog 7 | 8 | 9 | class Logger: 10 | def __init__(self, log_level=logging.DEBUG, logger_name=None): 11 | self.logger = logging.getLogger(logger_name) 12 | self.logger.setLevel(log_level) 13 | self.logger.propagate = False 14 | 15 | formatter = colorlog.ColoredFormatter( 16 | "%(log_color)s[%(levelname)s] %(asctime)s [RapidVideOCR] %(filename)s:%(lineno)d: %(message)s", 17 | log_colors={ 18 | "DEBUG": "cyan", 19 | "INFO": "green", 20 | "WARNING": "yellow", 21 | "ERROR": "red", 22 | "CRITICAL": "red,bg_white", 23 | }, 24 | ) 25 | 26 | if not self.logger.handlers: 27 | console_handler = logging.StreamHandler() 28 | console_handler.setFormatter(formatter) 29 | 30 | for handler in self.logger.handlers: 31 | self.logger.removeHandler(handler) 32 | 33 | console_handler.setLevel(log_level) 34 | self.logger.addHandler(console_handler) 35 | 36 | def get_log(self): 37 | return self.logger 38 | -------------------------------------------------------------------------------- /rapid_videocr/utils/utils.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import argparse 5 | from pathlib import Path 6 | from typing import List, Tuple, Union 7 | 8 | import cv2 9 | import numpy as np 10 | import shapely 11 | from shapely.geometry import MultiPoint, Polygon 12 | 13 | 14 | def compute_centroid(points: np.ndarray) -> List: 15 | """计算所给框的质心坐标 16 | 17 | :param points ([type]): (4, 2) 18 | :return: [description] 19 | """ 20 | x_min, x_max = np.min(points[:, 0]), np.max(points[:, 0]) 21 | y_min, y_max = np.min(points[:, 1]), np.max(points[:, 1]) 22 | return [(x_min + x_max) / 2, (y_min + y_max) / 2] 23 | 24 | 25 | def write_txt( 26 | save_path: Union[str, Path], contents: Union[List[str], str], mode: str = "w" 27 | ) -> None: 28 | if not isinstance(contents, list): 29 | contents = [contents] 30 | 31 | with open(save_path, mode, encoding="utf-8") as f: 32 | for value in contents: 33 | f.write(f"{value}\n") 34 | 35 | 36 | def read_img(img_path: Union[str, Path]) -> np.ndarray: 37 | img = cv2.imdecode(np.fromfile(str(img_path), dtype=np.uint8), 1) 38 | return img 39 | 40 | 41 | def padding_img( 42 | img: np.ndarray, 43 | padding_value: Tuple[int, int, int, int], 44 | padding_color: Tuple[int, int, int] = (0, 0, 0), 45 | ) -> np.ndarray: 46 | padded_img = cv2.copyMakeBorder( 47 | img, 48 | padding_value[0], 49 | padding_value[1], 50 | padding_value[2], 51 | padding_value[3], 52 | cv2.BORDER_CONSTANT, 53 | value=padding_color, 54 | ) 55 | return padded_img 56 | 57 | 58 | def mkdir(dir_path): 59 | Path(dir_path).mkdir(parents=True, exist_ok=True) 60 | 61 | 62 | def read_txt(txt_path: Union[str, Path]) -> List[str]: 63 | if not isinstance(txt_path, str): 64 | txt_path = str(txt_path) 65 | 66 | with open(txt_path, "r", encoding="utf-8") as f: 67 | data = list(map(lambda x: x.rstrip("\n"), f)) 68 | return data 69 | 70 | 71 | def compute_poly_iou(a: np.ndarray, b: np.ndarray) -> float: 72 | """计算两个多边形的IOU 73 | 74 | Args: 75 | poly1 (np.ndarray): (4, 2) 76 | poly2 (np.ndarray): (4, 2) 77 | 78 | Returns: 79 | float: iou 80 | """ 81 | poly1 = Polygon(a).convex_hull 82 | poly2 = Polygon(b).convex_hull 83 | 84 | union_poly = np.concatenate((a, b)) 85 | 86 | if not poly1.intersects(poly2): 87 | return 0.0 88 | 89 | try: 90 | inter_area = poly1.intersection(poly2).area 91 | union_area = MultiPoint(union_poly).convex_hull.area 92 | except shapely.geos.TopologicalError: 93 | print("shapely.geos.TopologicalError occured, iou set to 0") 94 | return 0.0 95 | 96 | if union_area == 0: 97 | return 0.0 98 | 99 | return float(inter_area) / union_area 100 | 101 | 102 | def is_inclusive_each_other(box1: np.ndarray, box2: np.ndarray) -> bool: 103 | """判断两个多边形框是否存在包含关系 104 | 105 | Args: 106 | box1 (np.ndarray): (4, 2) 107 | box2 (np.ndarray): (4, 2) 108 | 109 | Returns: 110 | bool: 是否存在包含关系 111 | """ 112 | poly1 = Polygon(box1) 113 | poly2 = Polygon(box2) 114 | 115 | poly1_area = poly1.convex_hull.area 116 | poly2_area = poly2.convex_hull.area 117 | 118 | if poly1_area > poly2_area: 119 | box_max = box1 120 | box_min = box2 121 | else: 122 | box_max = box2 123 | box_min = box1 124 | 125 | x0, y0 = np.min(box_min[:, 0]), np.min(box_min[:, 1]) 126 | x1, y1 = np.max(box_min[:, 0]), np.max(box_min[:, 1]) 127 | 128 | edge_x0, edge_y0 = np.min(box_max[:, 0]), np.min(box_max[:, 1]) 129 | edge_x1, edge_y1 = np.max(box_max[:, 0]), np.max(box_max[:, 1]) 130 | 131 | if x0 >= edge_x0 and y0 >= edge_y0 and x1 <= edge_x1 and y1 <= edge_y1: 132 | return True 133 | return False 134 | 135 | 136 | def float_range(mini, maxi): 137 | """Return function handle of an argument type function for 138 | ArgumentParser checking a float range: mini <= arg <= maxi 139 | mini - minimum acceptable argument 140 | maxi - maximum acceptable argument""" 141 | 142 | # Define the function with default arguments 143 | def float_range_checker(arg): 144 | """New Type function for argparse - a float within predefined range.""" 145 | 146 | try: 147 | f = float(arg) 148 | except ValueError as exc: 149 | raise argparse.ArgumentTypeError("must be a floating point number") from exc 150 | 151 | if f < mini or f > maxi: 152 | raise argparse.ArgumentTypeError( 153 | "must be in range [" + str(mini) + " .. " + str(maxi) + "]" 154 | ) 155 | return f 156 | 157 | # Return function handle to checking function 158 | return float_range_checker 159 | -------------------------------------------------------------------------------- /rapid_videocr/vsf_cli.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import subprocess 5 | from dataclasses import asdict, dataclass 6 | from typing import Optional 7 | 8 | 9 | @dataclass 10 | class VideoSubFinderInput: 11 | vsf_exe_path: str 12 | clear_dirs: bool = True 13 | run_search: bool = True 14 | create_cleared_text_images: bool = True 15 | create_empty_sub: Optional[str] = None 16 | create_sub_from_cleared_txt_images: Optional[str] = None 17 | create_sub_from_txt_results: Optional[str] = None 18 | open_video_opencv: bool = True 19 | open_video_ffmpeg: bool = False 20 | use_cuda: bool = False 21 | start_time: Optional[str] = None 22 | end_time: Optional[str] = None 23 | top_video_image_percent_end: float = 0.2 24 | bottom_video_image_percent_end: float = 0.0 25 | left_video_image_percent_end: float = 0.0 26 | right_video_image_percent_end: float = 1.0 27 | general_settings: Optional[str] = None 28 | num_threads: int = 2 29 | num_ocr_threads: int = 1 30 | 31 | 32 | class VideoSubFinder: 33 | def __init__(self, input_params: VideoSubFinderInput): 34 | param_dict = asdict(input_params) 35 | run_list = [input_params.vsf_exe_path] 36 | for k, v in param_dict.items(): 37 | if v is None or str(v) == "False": 38 | continue 39 | 40 | run_list.append(f"--{str(k)}" if str(v) == "True" else f"--{k} {v}") 41 | self.run_list = run_list 42 | 43 | def __call__(self, video_path: str, output_dir: str) -> str: 44 | self.run_list.extend(["--input_video", video_path, "--output_dir", output_dir]) 45 | try: 46 | subprocess.run(self.run_list, check=False) 47 | return output_dir 48 | except Exception as e: 49 | raise e 50 | -------------------------------------------------------------------------------- /rapid_videocr/vsf_ocr_cli.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import argparse 5 | from enum import Enum 6 | from pathlib import Path 7 | 8 | from .main import OutputFormat, RapidVideOCR, RapidVideOCRInput 9 | from .utils.logger import Logger 10 | from .utils.utils import float_range 11 | from .vsf_cli import VideoSubFinder, VideoSubFinderInput 12 | 13 | 14 | class VideoFormat(Enum): 15 | MP4 = ".mp4" 16 | AVI = ".avi" 17 | MOV = ".mov" 18 | MKV = ".mkv" 19 | 20 | 21 | class RapidVideoSubFinderOCR: 22 | def __init__( 23 | self, 24 | vsf_input_params: VideoSubFinderInput, 25 | ocr_input_params: RapidVideOCRInput, 26 | ): 27 | self.logger = Logger(logger_name=__name__).get_log() 28 | self.vsf = VideoSubFinder(vsf_input_params) 29 | self.video_ocr = RapidVideOCR(ocr_input_params) 30 | self.video_formats = [VideoFormat[v].value for v in VideoFormat.__members__] 31 | 32 | def __call__(self, video_path: str, output_dir: str = "outputs"): 33 | if Path(video_path).is_dir(): 34 | video_list = Path(video_path).rglob("*.*") 35 | video_list = [ 36 | v for v in video_list if v.suffix.lower() in self.video_formats 37 | ] 38 | else: 39 | video_list = [video_path] 40 | 41 | self.logger.info( 42 | "Extracting subtitle images with VideoSubFinder (takes quite a long time) ..." 43 | ) 44 | video_num = len(video_list) 45 | for i, one_video in enumerate(video_list): 46 | self.logger.info( 47 | "[%s/%s] Starting to extract %s key frame", i + 1, video_num, one_video 48 | ) 49 | 50 | save_name = Path(one_video).stem 51 | save_dir = Path(output_dir) / save_name 52 | save_vsf_dir = save_dir / "VSF_Results" 53 | 54 | try: 55 | self.vsf(str(one_video), str(save_vsf_dir)) 56 | except Exception as e: 57 | self.logger.error("Extract %s error, %s, skip", one_video, e) 58 | continue 59 | 60 | self.logger.info( 61 | "[%s/%s] Starting to run %s ocr", i + 1, video_num, one_video 62 | ) 63 | 64 | rgb_dir = Path(save_vsf_dir) / "RGBImages" 65 | if not list(rgb_dir.iterdir()): 66 | self.logger.warning("Extracting frames from %s is 0, skip", one_video) 67 | continue 68 | self.video_ocr(rgb_dir, save_dir, save_name=save_name) 69 | 70 | 71 | def main(): 72 | parser = argparse.ArgumentParser() 73 | 74 | videocr_param_group = parser.add_argument_group(title="VideOCRParameters") 75 | videocr_param_group.add_argument( 76 | "-video_dir", 77 | "--video_dir", 78 | type=str, 79 | default=None, 80 | help="The full path of video or the path of video directory.", 81 | ) 82 | videocr_param_group.add_argument( 83 | "-i", 84 | "--img_dir", 85 | type=str, 86 | default=None, 87 | help="The full path of RGBImages or TXTImages.", 88 | ) 89 | videocr_param_group.add_argument( 90 | "-s", 91 | "--save_dir", 92 | type=str, 93 | default="outputs", 94 | help='The path of saving the recognition result. Default is "outputs" under the current directory.', 95 | ) 96 | videocr_param_group.add_argument( 97 | "-o", 98 | "--out_format", 99 | type=str, 100 | default=OutputFormat.ALL.value, 101 | choices=[OutputFormat[v].value for v in OutputFormat.__members__], 102 | help='Output file format. Default is "all".', 103 | ) 104 | videocr_param_group.add_argument( 105 | "--is_batch_rec", 106 | action="store_true", 107 | default=False, 108 | help="Which mode to run (concat recognition or single recognition). Default is False.", 109 | ) 110 | videocr_param_group.add_argument( 111 | "-b", 112 | "--batch_size", 113 | type=int, 114 | default=10, 115 | help="The batch of concating image nums in concat recognition mode. Default is 10.", 116 | ) 117 | 118 | vsf_param_group = parser.add_argument_group(title="VSFParameters") 119 | vsf_param_group.add_argument( 120 | "-vsf", 121 | "--vsf_exe_path", 122 | type=str, 123 | default=None, 124 | help="The full path of VideoSubFinderWXW.exe.", 125 | ) 126 | vsf_param_group.add_argument( 127 | "-c", 128 | "--clear_dirs", 129 | action="store_false", 130 | default=True, 131 | help="Clear Folders (remove all images), performed before any other steps. Default is True", 132 | ) 133 | vsf_param_group.add_argument( 134 | "-r", 135 | "--run_search", 136 | action="store_false", 137 | default=True, 138 | help="Run Search (find frames with hardcoded text (hardsub) on video) Default is True", 139 | ) 140 | vsf_param_group.add_argument( 141 | "-ccti", 142 | "--create_cleared_text_images", 143 | action="store_true", 144 | default=False, 145 | help="Create Cleared Text Images. Default is True", 146 | ) 147 | vsf_param_group.add_argument( 148 | "-ces", 149 | "--create_empty_sub", 150 | type=str, 151 | default=None, 152 | help="Create Empty Sub With Provided Output File Name (*.ass or *.srt)", 153 | ) 154 | vsf_param_group.add_argument( 155 | "-cscti", 156 | "--create_sub_from_cleared_txt_images", 157 | type=str, 158 | default=None, 159 | help="Create Sub From Cleared TXT Images With Provided Output File Name (*.ass or *.srt)", 160 | ) 161 | vsf_param_group.add_argument( 162 | "-cstxt", 163 | "--create_sub_from_txt_results", 164 | type=str, 165 | default=None, 166 | help="Create Sub From TXT Results With Provided Output File Name (*.ass or *.srt)", 167 | ) 168 | vsf_param_group.add_argument( 169 | "-ovocv", 170 | "--open_video_opencv", 171 | action="store_false", 172 | default=True, 173 | help="open video by OpenCV (default). Default is True", 174 | ) 175 | vsf_param_group.add_argument( 176 | "-ovffmpeg", 177 | "--open_video_ffmpeg", 178 | action="store_true", 179 | default=False, 180 | help="open video by FFMPEG", 181 | ) 182 | vsf_param_group.add_argument( 183 | "-uc", "--use_cuda", action="store_true", default=False, help="use cuda" 184 | ) 185 | vsf_param_group.add_argument( 186 | "--start_time", 187 | type=str, 188 | default="0:00:00:000", 189 | help="start time, default = 0:00:00:000 (in format hour:min:sec:milisec)", 190 | ) 191 | vsf_param_group.add_argument( 192 | "--end_time", 193 | type=str, 194 | default=None, 195 | help="end time, default = video length", 196 | ) 197 | vsf_param_group.add_argument( 198 | "-te", 199 | "--top_video_image_percent_end", 200 | type=float_range(0, 1.0), 201 | default=0.2, 202 | help="top video image percent offset from image bottom, can be in range [0.0,1.0], default = 1.0", 203 | ) 204 | vsf_param_group.add_argument( 205 | "-be", 206 | "--bottom_video_image_percent_end", 207 | type=float_range(0, 1.0), 208 | default=0.0, 209 | help="bottom video image percent offset from image bottom, can be in range [0.0,1.0], default = 0.0", 210 | ) 211 | vsf_param_group.add_argument( 212 | "-le", 213 | "--left_video_image_percent_end", 214 | type=float_range(0, 1.0), 215 | default=0.0, 216 | help="left video image percent end, can be in range [0.0,1.0], default = 0.0", 217 | ) 218 | vsf_param_group.add_argument( 219 | "-re", 220 | "--right_video_image_percent_end", 221 | type=float_range(0, 1.0), 222 | default=1.0, 223 | help="right video image percent end, can be in range [0.0,1.0], default = 1.0", 224 | ) 225 | vsf_param_group.add_argument( 226 | "-gs", 227 | "--general_settings", 228 | default=None, 229 | help="general settings (path to general settings *.cfg file, default = settings/general.cfg)", 230 | ) 231 | vsf_param_group.add_argument( 232 | "-nthr", 233 | "--num_threads", 234 | type=int, 235 | default=1, 236 | help="number of threads used for Run Search", 237 | ) 238 | vsf_param_group.add_argument( 239 | "-nocrthr", 240 | "--num_ocr_threads", 241 | type=int, 242 | default=1, 243 | help="number of threads used for Create Cleared TXT Images", 244 | ) 245 | args = parser.parse_args() 246 | 247 | ocr_input_params = RapidVideOCRInput( 248 | is_batch_rec=args.is_batch_rec, 249 | batch_size=args.batch_size, 250 | out_format=args.out_format, 251 | ) 252 | 253 | if args.vsf_exe_path and args.video_dir: 254 | vsf_input_params = VideoSubFinderInput(**vars(args)) 255 | extractor = RapidVideoSubFinderOCR(vsf_input_params, ocr_input_params) 256 | extractor(args.video_dir, args.save_dir) 257 | elif args.img_dir: 258 | extractor = RapidVideOCR(ocr_input_params) 259 | extractor(args.img_dir, args.save_dir) 260 | else: 261 | pass 262 | 263 | 264 | if __name__ == "__main__": 265 | main() 266 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tqdm 2 | rapidocr 3 | onnxruntime 4 | colorlog 5 | tqdm -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import sys 5 | from pathlib import Path 6 | from typing import List 7 | 8 | import setuptools 9 | from get_pypi_latest_version import GetPyPiLatestVersion 10 | 11 | 12 | def read_txt(txt_path: str) -> List: 13 | if not isinstance(txt_path, str): 14 | txt_path = str(txt_path) 15 | 16 | with open(txt_path, "r", encoding="utf-8") as f: 17 | data = list(map(lambda x: x.rstrip("\n"), f)) 18 | return data 19 | 20 | 21 | def get_readme() -> str: 22 | root_dir = Path(__file__).resolve().parent 23 | readme_path = str(root_dir / "docs" / "doc_whl.md") 24 | with open(readme_path, "r", encoding="utf-8") as f: 25 | readme = f.read() 26 | return readme 27 | 28 | 29 | MODULE_NAME = "rapid_videocr" 30 | 31 | obtainer = GetPyPiLatestVersion() 32 | latest_version = obtainer(MODULE_NAME) 33 | VERSION_NUM = obtainer.version_add_one(latest_version) 34 | 35 | # 优先提取commit message中的语义化版本号,如无,则自动加1 36 | if len(sys.argv) > 2: 37 | match_str = " ".join(sys.argv[2:]) 38 | matched_versions = obtainer.extract_version(match_str) 39 | if matched_versions: 40 | VERSION_NUM = matched_versions 41 | sys.argv = sys.argv[:2] 42 | 43 | setuptools.setup( 44 | name=MODULE_NAME, 45 | version=VERSION_NUM, 46 | platforms="Any", 47 | description="Tool for extracting hard subtitles from videos.", 48 | long_description=get_readme(), 49 | long_description_content_type="text/markdown", 50 | author="SWHL", 51 | author_email="liekkaskono@163.com", 52 | url="https://github.com/SWHL/RapidVideOCR.git", 53 | license="Apache-2.0", 54 | include_package_data=True, 55 | install_requires=read_txt("requirements.txt"), 56 | packages=setuptools.find_packages(), 57 | keywords=["rapidocr,videocr,subtitle"], 58 | classifiers=[ 59 | "Programming Language :: Python :: 3.6", 60 | "Programming Language :: Python :: 3.7", 61 | "Programming Language :: Python :: 3.8", 62 | "Programming Language :: Python :: 3.9", 63 | "Programming Language :: Python :: 3.10", 64 | "Programming Language :: Python :: 3.11", 65 | "Programming Language :: Python :: 3.12", 66 | "Programming Language :: Python :: 3.13", 67 | ], 68 | python_requires=">=3.6", 69 | entry_points={ 70 | "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"], 71 | }, 72 | ) 73 | -------------------------------------------------------------------------------- /tests/test_files/2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/2.mp4 -------------------------------------------------------------------------------- /tests/test_files/RGBImages/0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/RGBImages/0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg -------------------------------------------------------------------------------- /tests/test_files/RGBImages/0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/RGBImages/0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg -------------------------------------------------------------------------------- /tests/test_files/RGBImages/0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/RGBImages/0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg -------------------------------------------------------------------------------- /tests/test_files/RGBImages/0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/RGBImages/0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg -------------------------------------------------------------------------------- /tests/test_files/TXTImages/0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/TXTImages/0_00_00_041__0_00_00_415_0070000000019200080001920.jpeg -------------------------------------------------------------------------------- /tests/test_files/TXTImages/0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/TXTImages/0_00_00_416__0_00_01_165_0070000000019200080001920.jpeg -------------------------------------------------------------------------------- /tests/test_files/TXTImages/0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/TXTImages/0_00_01_166__0_00_01_540_0070000000019200080001920.jpeg -------------------------------------------------------------------------------- /tests/test_files/TXTImages/0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SWHL/RapidVideOCR/c80949b372e5d552571be1946da826fab9ddffc5/tests/test_files/TXTImages/0_00_01_541__0_00_02_540_0070000000019200080001920.jpeg -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import shutil 5 | import sys 6 | from pathlib import Path 7 | 8 | import pytest 9 | 10 | cur_dir = Path(__file__).resolve().parent 11 | root_dir = cur_dir.parent 12 | 13 | sys.path.append(str(root_dir)) 14 | 15 | from rapid_videocr import RapidVideOCR, RapidVideOCRExeception, RapidVideOCRInput 16 | from rapid_videocr.utils.utils import mkdir, read_txt 17 | 18 | test_dir = cur_dir / "test_files" 19 | 20 | 21 | @pytest.fixture 22 | def setup_and_teardown(): 23 | save_dir = test_dir / "tmp" 24 | mkdir(save_dir) 25 | 26 | srt_path = save_dir / "result.srt" 27 | ass_path = save_dir / "result.ass" 28 | txt_path = save_dir / "result.txt" 29 | 30 | yield save_dir, srt_path, ass_path, txt_path 31 | 32 | shutil.rmtree(save_dir) 33 | 34 | 35 | @pytest.mark.parametrize( 36 | "img_dir", 37 | [test_dir / "RGBImages", test_dir / "TXTImages"], 38 | ) 39 | def test_single_rec(setup_and_teardown, img_dir): 40 | save_dir, srt_path, ass_path, txt_path = setup_and_teardown 41 | 42 | extractor = RapidVideOCR(RapidVideOCRInput()) 43 | extractor(img_dir, save_dir) 44 | 45 | srt_data = read_txt(srt_path) 46 | assert len(srt_data) == 16 47 | assert srt_data[2] == "空间里面他绝对赢不了的" 48 | assert srt_data[-2] == "你们接着善后" 49 | 50 | ass_data = read_txt(ass_path) 51 | assert len(ass_data) == 17 52 | assert ass_data[13].split(",", 9)[-1] == "空间里面他绝对赢不了的" 53 | assert ass_data[-1].split(",", 9)[-1] == "你们接着善后" 54 | 55 | txt_data = read_txt(txt_path) 56 | assert len(txt_data) == 8 57 | assert txt_data[-2] == "你们接着善后" 58 | 59 | 60 | @pytest.mark.parametrize("img_dir", [test_dir / "RGBImages"]) 61 | def test_concat_rec(setup_and_teardown, img_dir): 62 | save_dir, srt_path, ass_path, txt_path = setup_and_teardown 63 | 64 | input_param = RapidVideOCRInput(is_batch_rec=True) 65 | extractor = RapidVideOCR(input_param) 66 | extractor(img_dir, save_dir) 67 | 68 | srt_data = read_txt(srt_path) 69 | assert len(srt_data) == 16 70 | assert srt_data[2] == "空间里面他绝对赢不了的" 71 | assert srt_data[-2] == "你们接着善后" 72 | 73 | ass_data = read_txt(ass_path) 74 | assert len(ass_data) == 17 75 | assert ass_data[13].split(",", 9)[-1] == "空间里面他绝对赢不了的" 76 | assert ass_data[-1].split(",", 9)[-1] == "你们接着善后" 77 | 78 | txt_data = read_txt(txt_path) 79 | assert len(txt_data) == 8 80 | assert txt_data[-2] == "你们接着善后" 81 | 82 | 83 | @pytest.mark.parametrize( 84 | "img_dir", 85 | [test_dir / "RGBImage", test_dir / "TXTImage"], 86 | ) 87 | def test_empty_dir(img_dir): 88 | extractor = RapidVideOCR(RapidVideOCRInput()) 89 | mkdir(img_dir) 90 | 91 | with pytest.raises(RapidVideOCRExeception) as exc_info: 92 | extractor(img_dir, test_dir) 93 | assert exc_info.type is RapidVideOCRExeception 94 | 95 | shutil.rmtree(img_dir) 96 | 97 | 98 | @pytest.mark.parametrize( 99 | "img_dir", 100 | [test_dir / "RGBImage", test_dir / "TXTImage"], 101 | ) 102 | def test_nothing_dir(img_dir): 103 | extractor = RapidVideOCR(RapidVideOCRInput()) 104 | mkdir(img_dir) 105 | with pytest.raises(RapidVideOCRExeception) as exc_info: 106 | extractor(img_dir, test_dir) 107 | assert exc_info.type is RapidVideOCRExeception 108 | 109 | shutil.rmtree(img_dir) 110 | 111 | 112 | def test_out_only_srt(setup_and_teardown): 113 | save_dir, srt_path, ass_path, txt_path = setup_and_teardown 114 | 115 | img_dir = test_dir / "RGBImages" 116 | input_param = RapidVideOCRInput(is_batch_rec=True, out_format="srt") 117 | extractor = RapidVideOCR(input_param) 118 | extractor(img_dir, save_dir) 119 | 120 | srt_data = read_txt(srt_path) 121 | assert len(srt_data) == 16 122 | assert srt_data[2] == "空间里面他绝对赢不了的" 123 | assert srt_data[-2] == "你们接着善后" 124 | 125 | 126 | def test_out_only_ass(setup_and_teardown): 127 | save_dir, srt_path, ass_path, txt_path = setup_and_teardown 128 | 129 | img_dir = test_dir / "RGBImages" 130 | input_param = RapidVideOCRInput(is_batch_rec=True, out_format="ass") 131 | extractor = RapidVideOCR(input_param) 132 | extractor(img_dir, save_dir) 133 | 134 | ass_data = read_txt(ass_path) 135 | assert len(ass_data) == 17 136 | assert ass_data[13].split(",", 9)[-1] == "空间里面他绝对赢不了的" 137 | assert ass_data[-1].split(",", 9)[-1] == "你们接着善后" 138 | 139 | 140 | def test_out_only_txt(setup_and_teardown): 141 | save_dir, srt_path, ass_path, txt_path = setup_and_teardown 142 | 143 | img_dir = test_dir / "RGBImages" 144 | input_param = RapidVideOCRInput(is_batch_rec=True, out_format="txt") 145 | extractor = RapidVideOCR(input_param) 146 | extractor(img_dir, save_dir) 147 | 148 | txt_data = read_txt(txt_path) 149 | assert len(txt_data) == 8 150 | assert txt_data[-2] == "你们接着善后" 151 | --------------------------------------------------------------------------------