├── .gitattributes ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug.md │ └── config.yml └── workflows │ ├── SyncToGitee.yml │ ├── gen_whl_to_pypi_rapidocr.yml │ ├── gen_whl_to_pypi_rapidocr_ort.yml │ ├── gen_whl_to_pypi_rapidocr_paddle.yml │ ├── gen_whl_to_pypi_rapidocr_torch.yml │ ├── gen_whl_to_pypi_rapidocr_vino.yml │ ├── gen_whl_to_pypi_rapidocr_web.yml │ └── package_ocrweb.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── android └── README.md ├── api └── README.md ├── assets ├── RapidOCRDemo.ipynb ├── RapidOCR_LOGO.png └── colab-badge.svg ├── cliff.toml ├── cpp └── README.md ├── docs ├── README_zh.md ├── doc_whl_rapidocr.md ├── doc_whl_rapidocr_ort.md ├── doc_whl_rapidocr_paddle.md ├── doc_whl_rapidocr_vino.md └── doc_whl_rapidocr_web.md ├── dotnet └── README.md ├── ios └── README.md ├── jvm └── README.md ├── ocrweb ├── README.md ├── rapidocr_web │ ├── __init__.py │ ├── ocrweb.py │ ├── ocrweb.spec │ ├── static │ │ ├── css │ │ │ ├── favicon.ico │ │ │ └── main.css │ │ └── js │ │ │ └── jquery-3.0.0.min.js │ ├── task.py │ └── templates │ │ └── index.html ├── requirements.txt └── setup.py ├── ocrweb_multi ├── README.md ├── assets │ └── ocr_web_multi.jpg ├── build.py ├── config.yaml ├── main.py ├── main.spec ├── models │ └── .gitkeep ├── rapidocr │ ├── __init__.py │ ├── classify.py │ ├── detect.py │ ├── detect_process.py │ ├── main.py │ ├── rapid_ocr_api.py │ └── recognize.py ├── requirements.txt ├── static │ ├── css │ │ └── main.css │ ├── favicon.ico │ ├── hint.svg │ ├── index.html │ └── js │ │ └── jquery-3.0.0.min.js ├── utils │ ├── config.py │ └── utils.py ├── wrapper.c └── wrapper.rc └── python ├── README.md ├── demo.py ├── rapidocr ├── __init__.py ├── cal_rec_boxes │ ├── __init__.py │ └── main.py ├── ch_ppocr_cls │ ├── __init__.py │ ├── main.py │ └── utils.py ├── ch_ppocr_det │ ├── __init__.py │ ├── main.py │ └── utils.py ├── ch_ppocr_rec │ ├── __init__.py │ ├── main.py │ ├── typings.py │ └── utils.py ├── cli.py ├── config.yaml ├── default_models.yaml ├── inference_engine │ ├── __init__.py │ ├── base.py │ ├── onnxruntime.py │ ├── openvino.py │ ├── paddle.py │ └── torch.py ├── main.py ├── models │ └── .gitkeep ├── networks │ ├── __init__.py │ ├── arch_config.yaml │ ├── architectures │ │ ├── __init__.py │ │ └── base_model.py │ ├── backbones │ │ ├── __init__.py │ │ ├── det_mobilenet_v3.py │ │ ├── rec_hgnet.py │ │ ├── rec_lcnetv3.py │ │ ├── rec_mobilenet_v3.py │ │ ├── rec_mv1_enhance.py │ │ └── rec_svtrnet.py │ ├── common.py │ ├── heads │ │ ├── __init__.py │ │ ├── cls_head.py │ │ ├── det_db_head.py │ │ ├── rec_ctc_head.py │ │ └── rec_multi_head.py │ └── necks │ │ ├── __init__.py │ │ ├── db_fpn.py │ │ ├── intracl.py │ │ └── rnn.py └── utils │ ├── __init__.py │ ├── download_file.py │ ├── load_image.py │ ├── logger.py │ ├── output.py │ ├── parse_parameters.py │ ├── process_img.py │ ├── typings.py │ ├── utils.py │ └── vis_res.py ├── rapidocr_onnxruntime ├── __init__.py ├── cal_rec_boxes │ ├── __init__.py │ └── main.py ├── ch_ppocr_cls │ ├── __init__.py │ ├── text_cls.py │ └── utils.py ├── ch_ppocr_det │ ├── __init__.py │ ├── text_detect.py │ └── utils.py ├── ch_ppocr_rec │ ├── __init__.py │ ├── text_recognize.py │ └── utils.py ├── config.yaml ├── main.py ├── models │ └── .gitkeep └── utils │ ├── __init__.py │ ├── infer_engine.py │ ├── load_image.py │ ├── logger.py │ ├── parse_parameters.py │ ├── process_img.py │ └── vis_res.py ├── rapidocr_openvino ├── __init__.py ├── cal_rec_boxes │ ├── __init__.py │ └── main.py ├── ch_ppocr_cls │ ├── __init__.py │ ├── text_cls.py │ └── utils.py ├── ch_ppocr_det │ ├── __init__.py │ ├── text_detect.py │ └── utils.py ├── ch_ppocr_rec │ ├── __init__.py │ ├── ppocr_keys_v1.txt │ ├── text_recognize.py │ └── utils.py ├── config.yaml ├── main.py ├── models │ └── .gitkeep └── utils │ ├── __init__.py │ ├── infer_engine.py │ ├── load_image.py │ ├── logger.py │ ├── parse_parameters.py │ ├── process_img.py │ └── vis_res.py ├── rapidocr_paddle ├── __init__.py ├── cal_rec_boxes │ ├── __init__.py │ └── main.py ├── ch_ppocr_cls │ ├── __init__.py │ ├── text_cls.py │ └── utils.py ├── ch_ppocr_det │ ├── __init__.py │ ├── text_detect.py │ └── utils.py ├── ch_ppocr_rec │ ├── __init__.py │ ├── ppocr_keys_v1.txt │ ├── text_recognize.py │ └── utils.py ├── config.yaml ├── main.py ├── models │ └── .gitkeep └── utils │ ├── __init__.py │ ├── infer_engine.py │ ├── load_image.py │ ├── logger.py │ ├── parse_parameters.py │ ├── process_img.py │ └── vis_res.py ├── rapidocr_torch ├── __init__.py ├── arch_config.yaml ├── cal_rec_boxes │ ├── __init__.py │ └── main.py ├── ch_ppocr_cls │ ├── __init__.py │ ├── text_cls.py │ └── utils.py ├── ch_ppocr_det │ ├── __init__.py │ ├── text_detect.py │ └── utils.py ├── ch_ppocr_rec │ ├── __init__.py │ ├── ppocr_keys_v1.txt │ ├── text_recognize.py │ └── utils.py ├── config.yaml ├── main.py ├── modeling │ ├── __init__.py │ ├── architectures │ │ ├── __init__.py │ │ └── base_model.py │ ├── backbones │ │ ├── __init__.py │ │ ├── det_mobilenet_v3.py │ │ ├── rec_hgnet.py │ │ ├── rec_lcnetv3.py │ │ ├── rec_mobilenet_v3.py │ │ └── rec_svtrnet.py │ ├── common.py │ ├── heads │ │ ├── __init__.py │ │ ├── cls_head.py │ │ ├── det_db_head.py │ │ ├── rec_ctc_head.py │ │ └── rec_multi_head.py │ └── necks │ │ ├── __init__.py │ │ ├── db_fpn.py │ │ ├── intracl.py │ │ └── rnn.py ├── models │ └── .gitkeep └── utils │ ├── __init__.py │ ├── infer_engine.py │ ├── load_image.py │ ├── logger.py │ ├── parse_parameters.py │ ├── process_img.py │ └── vis_res.py ├── requirements.txt ├── requirements_ort.txt ├── requirements_paddle.txt ├── requirements_torch.txt ├── requirements_vino.txt ├── setup.py ├── setup_onnxruntime.py ├── setup_openvino.py ├── setup_paddle.py ├── setup_torch.py └── tests ├── __init__.py ├── base_module.py ├── test_files ├── black_font_color_transparent.png ├── ch_doc_server.png ├── ch_en_num.jpg ├── devanagari.jpg ├── empty_black.jpg ├── en.jpg ├── issue_170.png ├── japan.jpg ├── korean.jpg ├── short.png ├── test_letterbox_like.jpg ├── test_without_det.jpg ├── text_cls.jpg ├── text_det.jpg ├── text_rec.jpg ├── text_vertical_words.png ├── two_dim_image.npy └── white_font_color_transparent.png ├── test_main.py ├── test_ort.py ├── test_paddle.py ├── test_torch.py └── test_vino.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set the default behavior, in case people don't have core.autocrlf set. 2 | * text=auto 3 | 4 | # Explicitly declare text files you want to always be normalized and converted 5 | # to native line endings on checkout. 6 | *.c text 7 | *.h text 8 | *.py text 9 | *.md text 10 | *.js text 11 | *.cpp text 12 | 13 | # Declare files that will always have CRLF line endings on checkout. 14 | *.sln text eol=crlf 15 | 16 | # Denote all files that are truly binary and should not be modified. 17 | *.png binary 18 | *.jpg binary 19 | *.pdf binary -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: https://rapidai.github.io/RapidOCRDocs/sponsor/ 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🐞 Bug 3 | about: Bug 4 | title: 'Bug' 5 | labels: 'Bug' 6 | assignees: '' 7 | 8 | --- 9 | 10 | #### 问题描述 / Problem Description 11 | 12 | 13 | #### 运行环境 / Runtime Environment 14 | 15 | 16 | #### 复现代码 / Reproduction Code 17 | ```python 18 | 19 | ``` 20 | 21 | #### 可能解决方案 / Possible solutions 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: ❓ Questions 4 | url: https://github.com/RapidAI/RapidOCR/discussions/categories/q-a 5 | about: Please use the community forum for help and questions regarding RapidOCR. 6 | - name: 💡 Feature requests and ideas 7 | url: https://github.com/RapidAI/RapidOCR/discussions/categories/ideas 8 | about: Please vote for and post new feature ideas in the community forum. 9 | - name: 📖 Documentation 10 | url: https://rapidai.github.io/RapidOCRDocs/docs/ 11 | about: A great place to find instructions and answers about RapidOCR. 12 | -------------------------------------------------------------------------------- /.github/workflows/SyncToGitee.yml: -------------------------------------------------------------------------------- 1 | name: SyncToGitee 2 | on: 3 | push: 4 | branches: 5 | - main 6 | jobs: 7 | repo-sync: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout source codes 11 | uses: actions/checkout@v4 12 | 13 | - name: Mirror the Github organization repos to Gitee. 14 | uses: Yikun/hub-mirror-action@v1.4 15 | with: 16 | src: 'github/RapidAI' 17 | dst: 'gitee/RapidAI' 18 | dst_key: ${{ secrets.GITEE_PRIVATE_KEY }} 19 | dst_token: ${{ secrets.GITEE_TOKEN }} 20 | force_update: true 21 | # only sync this repo 22 | static_list: "RapidOCR" 23 | debug: true 24 | 25 | - name: Mirror the Github organization repos to Gitee. 26 | uses: Yikun/hub-mirror-action@v1.4 27 | with: 28 | src: 'github/RapidAI' 29 | dst: 'gitee/openKylin' 30 | dst_key: ${{ secrets.GITEE_PRIVATE_KEY }} 31 | dst_token: ${{ secrets.GITEE_TOKEN }} 32 | force_update: true 33 | # only sync this repo 34 | static_list: "RapidOCR" 35 | debug: true -------------------------------------------------------------------------------- /.github/workflows/gen_whl_to_pypi_rapidocr.yml: -------------------------------------------------------------------------------- 1 | name: Push rapidocr to pypi 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | 8 | env: 9 | RESOURCES_URL: https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/required_for_whl_v3.0.0.zip 10 | 11 | jobs: 12 | UnitTesting: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Pull latest code 16 | uses: actions/checkout@v4 17 | 18 | - name: Set up Python 3.10 19 | uses: actions/setup-python@v4 20 | with: 21 | python-version: '3.10' 22 | architecture: 'x64' 23 | 24 | - name: Display Python version 25 | run: python -c "import sys; print(sys.version)" 26 | 27 | - name: Unit testings 28 | run: | 29 | cd python 30 | pip install -r requirements.txt 31 | pip install pytest wheel get_pypi_latest_version openvino==2023.3.0 onnxruntime 32 | pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 33 | python -m pip install paddlepaddle==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/ 34 | 35 | pytest tests/test_main.py 36 | 37 | GenerateWHL_PushPyPi: 38 | needs: UnitTesting 39 | runs-on: ubuntu-latest 40 | 41 | steps: 42 | - uses: actions/checkout@v4 43 | 44 | - name: Set up Python 3.10 45 | uses: actions/setup-python@v4 46 | with: 47 | python-version: '3.10' 48 | architecture: 'x64' 49 | 50 | - name: Download models 51 | run: | 52 | cd python 53 | wget $RESOURCES_URL 54 | ZIP_NAME=${RESOURCES_URL##*/} 55 | DIR_NAME=${ZIP_NAME%.*} 56 | unzip $ZIP_NAME 57 | cp $DIR_NAME/resources/models/*.* rapidocr/models 58 | 59 | - name: Run setup.py 60 | run: | 61 | cd python 62 | pip install setuptools get_pypi_latest_version wheel 63 | mkdir rapidocr_t 64 | mv rapidocr rapidocr_t 65 | mv rapidocr_t rapidocr 66 | cd rapidocr 67 | echo "from .rapidocr.main import RapidOCR, VisRes" > __init__.py 68 | 69 | cd .. 70 | python -m pip install --upgrade pip 71 | python setup.py bdist_wheel ${{ github.ref_name }} 72 | mv dist ../ 73 | 74 | - name: Publish distribution 📦 to PyPI 75 | uses: pypa/gh-action-pypi-publish@v1.5.0 76 | with: 77 | password: ${{ secrets.RAPIDOCR }} 78 | packages_dir: dist/ 79 | -------------------------------------------------------------------------------- /.github/workflows/gen_whl_to_pypi_rapidocr_ort.yml: -------------------------------------------------------------------------------- 1 | name: Push rapidocr_onnxruntime to pypi 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | paths: 7 | - 'python/rapidocr_onnxruntime/**' 8 | - 'docs/doc_whl_rapidocr_ort.md' 9 | - 'python/setup_onnxruntime.py' 10 | # - '.github/workflows/gen_whl_to_pypi_rapidocr_ort.yml' 11 | 12 | 13 | env: 14 | RESOURCES_URL: https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/required_for_whl_v1.3.0.zip 15 | 16 | jobs: 17 | UnitTesting: 18 | runs-on: ubuntu-latest 19 | steps: 20 | - name: Pull latest code 21 | uses: actions/checkout@v4 22 | 23 | - name: Set up Python 3.10 24 | uses: actions/setup-python@v4 25 | with: 26 | python-version: '3.10' 27 | architecture: 'x64' 28 | 29 | - name: Display Python version 30 | run: python -c "import sys; print(sys.version)" 31 | 32 | - name: Unit testings 33 | run: | 34 | wget $RESOURCES_URL 35 | ZIP_NAME=${RESOURCES_URL##*/} 36 | DIR_NAME=${ZIP_NAME%.*} 37 | unzip $DIR_NAME 38 | cp $DIR_NAME/resources/models/*.onnx python/rapidocr_onnxruntime/models/ 39 | 40 | cd python 41 | pip install -r requirements_ort.txt 42 | pip install pytest wheel get_pypi_latest_version openvino 43 | 44 | cd tests 45 | pytest test_ort.py 46 | 47 | GenerateWHL_PushPyPi: 48 | needs: UnitTesting 49 | runs-on: ubuntu-latest 50 | 51 | steps: 52 | - uses: actions/checkout@v4 53 | 54 | - name: Set up Python 3.10 55 | uses: actions/setup-python@v4 56 | with: 57 | python-version: '3.10' 58 | architecture: 'x64' 59 | 60 | - name: Download models 61 | run: | 62 | cd python 63 | wget $RESOURCES_URL 64 | ZIP_NAME=${RESOURCES_URL##*/} 65 | DIR_NAME=${ZIP_NAME%.*} 66 | unzip $ZIP_NAME 67 | cp $DIR_NAME/resources/models/*.onnx rapidocr_onnxruntime/models 68 | 69 | - name: Run setup_onnxruntime.py 70 | run: | 71 | cd python 72 | pip install setuptools get_pypi_latest_version wheel 73 | mkdir rapidocr_onnxruntime_t 74 | mv rapidocr_onnxruntime rapidocr_onnxruntime_t 75 | mv rapidocr_onnxruntime_t rapidocr_onnxruntime 76 | cd rapidocr_onnxruntime 77 | echo "from .rapidocr_onnxruntime.main import RapidOCR, VisRes" > __init__.py 78 | 79 | cd .. 80 | python -m pip install --upgrade pip 81 | python setup_onnxruntime.py bdist_wheel ${{ github.ref_name }} 82 | mv dist ../ 83 | 84 | - name: Publish distribution 📦 to PyPI 85 | uses: pypa/gh-action-pypi-publish@v1.5.0 86 | with: 87 | password: ${{ secrets.PYPI_API_TOKEN }} 88 | packages_dir: dist/ 89 | -------------------------------------------------------------------------------- /.github/workflows/gen_whl_to_pypi_rapidocr_paddle.yml: -------------------------------------------------------------------------------- 1 | name: Push rapidocr_paddle to pypi 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | paths: 7 | - 'python/rapidocr_paddle/**' 8 | - 'docs/doc_whl_rapidocr_paddle.md' 9 | - 'python/setup_paddle.py' 10 | - '.github/workflows/gen_whl_to_pypi_rapidocr_paddle.yml' 11 | - 'python/requirements_paddle.txt' 12 | # tags: 13 | # - v* 14 | 15 | env: 16 | RESOURCES_URL: https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/paddle_models_v4.zip 17 | 18 | jobs: 19 | UnitTesting: 20 | runs-on: ubuntu-latest 21 | steps: 22 | - name: Pull latest code 23 | uses: actions/checkout@v4 24 | 25 | - name: Set up Python 3.10 26 | uses: actions/setup-python@v4 27 | with: 28 | python-version: '3.10' 29 | architecture: 'x64' 30 | 31 | - name: Display Python version 32 | run: python -c "import sys; print(sys.version)" 33 | 34 | - name: Unit testings 35 | run: | 36 | wget $RESOURCES_URL 37 | ZIP_NAME=${RESOURCES_URL##*/} 38 | DIR_NAME=${ZIP_NAME%.*} 39 | unzip $DIR_NAME 40 | cp -r models/* python/rapidocr_paddle/models/ 41 | cd python 42 | pip install -r requirements_paddle.txt 43 | pip install pytest wheel get_pypi_latest_version 44 | pip install paddlepaddle==3.0.0rc0 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/ 45 | cd tests 46 | pytest test_paddle.py 47 | 48 | GenerateWHL_PushPyPi: 49 | needs: UnitTesting 50 | runs-on: ubuntu-latest 51 | 52 | steps: 53 | - uses: actions/checkout@v4 54 | 55 | - name: Set up Python 3.10 56 | uses: actions/setup-python@v4 57 | with: 58 | python-version: '3.10' 59 | architecture: 'x64' 60 | 61 | - name: Download models 62 | run: | 63 | cd python 64 | wget $RESOURCES_URL 65 | ZIP_NAME=${RESOURCES_URL##*/} 66 | DIR_NAME=${ZIP_NAME%.*} 67 | unzip $ZIP_NAME 68 | cp -r models/* rapidocr_paddle/models 69 | 70 | - name: Run setup_paddle.py 71 | run: | 72 | cd python 73 | pip install setuptools get_pypi_latest_version wheel 74 | 75 | mkdir rapidocr_paddle_t 76 | mv rapidocr_paddle rapidocr_paddle_t 77 | mv rapidocr_paddle_t rapidocr_paddle 78 | cd rapidocr_paddle 79 | echo "from .rapidocr_paddle.main import RapidOCR, VisRes" > __init__.py 80 | 81 | cd .. 82 | python -m pip install --upgrade pip 83 | 84 | echo "${{ github.event.head_commit.message }}" 85 | python setup_paddle.py bdist_wheel "${{ github.event.head_commit.message }}" 86 | mv dist ../ 87 | 88 | - name: Publish distribution 📦 to PyPI 89 | uses: pypa/gh-action-pypi-publish@v1.5.0 90 | with: 91 | password: ${{ secrets.RAPIDOCR_OPENVINO }} 92 | packages_dir: dist/ 93 | -------------------------------------------------------------------------------- /.github/workflows/gen_whl_to_pypi_rapidocr_torch.yml: -------------------------------------------------------------------------------- 1 | name: Push rapidocr_torch to pypi 2 | 3 | on: 4 | push: 5 | # branches: [ main ] 6 | # paths: 7 | # - 'python/rapidocr_torch/**' 8 | # - 'python/setup_torch.py' 9 | # - '.github/workflows/gen_whl_to_pypi_rapidocr_torch.yml' 10 | # - 'python/requirements_torch.txt' 11 | tags: 12 | - torch_v* 13 | 14 | env: 15 | RESOURCES_URL: https://github.com/Joker1212/RapidOCR/releases/download/v0.0.0/torch_test.zip 16 | 17 | jobs: 18 | UnitTesting: 19 | runs-on: ubuntu-latest 20 | steps: 21 | - name: Pull latest code 22 | uses: actions/checkout@v4 23 | 24 | - name: Set up Python 3.7 25 | uses: actions/setup-python@v4 26 | with: 27 | python-version: '3.7' 28 | architecture: 'x64' 29 | 30 | - name: Display Python version 31 | run: python -c "import sys; print(sys.version)" 32 | 33 | - name: Unit testings 34 | run: | 35 | wget $RESOURCES_URL 36 | ZIP_NAME=${RESOURCES_URL##*/} 37 | DIR_NAME=${ZIP_NAME%.*} 38 | unzip $DIR_NAME 39 | cp $DIR_NAME/resources/models/*.pth python/rapidocr_torch/models/ 40 | 41 | cd python 42 | pip install -r requirements_torch.txt 43 | pip install pytest wheel get_pypi_latest_version 44 | 45 | cd tests 46 | pytest test_torch.py 47 | 48 | GenerateWHL_PushPyPi: 49 | needs: UnitTesting 50 | runs-on: ubuntu-latest 51 | 52 | steps: 53 | - uses: actions/checkout@v4 54 | 55 | - name: Set up Python 3.7 56 | uses: actions/setup-python@v4 57 | with: 58 | python-version: '3.7' 59 | architecture: 'x64' 60 | 61 | # - name: Set SSH Environment 62 | # env: 63 | # DEPLOY_KEYS: ${{ secrets.GEN_PYTHON_SDK }} 64 | # run: | 65 | # mkdir -p ~/.ssh/ 66 | # echo "$DEPLOY_KEYS" > ~/.ssh/id_rsa 67 | # chmod 600 ~/.ssh/id_rsa 68 | # chmod 700 ~/.ssh && chmod 600 ~/.ssh/* 69 | 70 | - name: Download models 71 | run: | 72 | cd python 73 | wget $RESOURCES_URL 74 | ZIP_NAME=${RESOURCES_URL##*/} 75 | DIR_NAME=${ZIP_NAME%.*} 76 | unzip $ZIP_NAME 77 | cp $DIR_NAME/resources/models/*.pth rapidocr_torch/models/ 78 | 79 | - name: Run setup_torch.py 80 | run: | 81 | cd python 82 | pip install setuptools get_pypi_latest_version wheel 83 | mkdir rapidocr_torch_t 84 | mv rapidocr_torch rapidocr_torch_t 85 | mv rapidocr_torch_t rapidocr_torch 86 | cd rapidocr_torch 87 | echo "from .rapidocr_torch.main import RapidOCR, VisRes" > __init__.py 88 | 89 | cd .. 90 | python -m pip install --upgrade pip 91 | python setup_torch.py bdist_wheel ${{ github.ref_name }} 92 | mv dist ../ 93 | 94 | # - name: Publish distribution 📦 to PyPI 95 | # uses: pypa/gh-action-pypi-publish@v1.5.0 96 | # with: 97 | # password: ${{ secrets.PYPI_API_TOKEN }} 98 | # packages_dir: dist/ 99 | - name: Publish distribution 📦 to Test PyPI 100 | uses: pypa/gh-action-pypi-publish@v1.5.0 101 | with: 102 | username: TEST_PYPI_API_TOKEN 103 | password: ${{ secrets.TEST_PYPI_API_TOKEN }} 104 | repository_url: https://test.pypi.org/legacy/ 105 | packages_dir: dist/ 106 | -------------------------------------------------------------------------------- /.github/workflows/gen_whl_to_pypi_rapidocr_vino.yml: -------------------------------------------------------------------------------- 1 | name: Push rapidocr_openvino to pypi 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | paths: 7 | - 'python/rapidocr_openvino/**' 8 | - 'docs/doc_whl_rapidocr_vino.md' 9 | - 'python/setup_openvino.py' 10 | - '.github/workflows/gen_whl_to_pypi_rapidocr_vino.yml' 11 | - 'python/requirements_vino.txt' 12 | 13 | 14 | env: 15 | RESOURCES_URL: https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/required_for_whl_v1.3.0.zip 16 | 17 | jobs: 18 | UnitTesting: 19 | runs-on: ubuntu-latest 20 | steps: 21 | - name: Pull latest code 22 | uses: actions/checkout@v4 23 | 24 | - name: Set up Python 3.10 25 | uses: actions/setup-python@v4 26 | with: 27 | python-version: '3.10' 28 | architecture: 'x64' 29 | 30 | - name: Display Python version 31 | run: python -c "import sys; print(sys.version)" 32 | 33 | - name: Unit testings 34 | run: | 35 | wget $RESOURCES_URL 36 | ZIP_NAME=${RESOURCES_URL##*/} 37 | DIR_NAME=${ZIP_NAME%.*} 38 | unzip $DIR_NAME 39 | cp $DIR_NAME/resources/models/*.onnx python/rapidocr_openvino/models/ 40 | cd python 41 | pip install -r requirements_vino.txt 42 | pip install pytest wheel get_pypi_latest_version onnxruntime 43 | cd tests 44 | pytest test_vino.py 45 | 46 | GenerateWHL_PushPyPi: 47 | needs: UnitTesting 48 | runs-on: ubuntu-latest 49 | 50 | steps: 51 | - uses: actions/checkout@v4 52 | 53 | - name: Set up Python 3.10 54 | uses: actions/setup-python@v4 55 | with: 56 | python-version: '3.10' 57 | architecture: 'x64' 58 | 59 | - name: Download models 60 | run: | 61 | cd python 62 | wget $RESOURCES_URL 63 | ZIP_NAME=${RESOURCES_URL##*/} 64 | DIR_NAME=${ZIP_NAME%.*} 65 | unzip $ZIP_NAME 66 | cp $DIR_NAME/resources/models/*.onnx rapidocr_openvino/models 67 | 68 | - name: Run setup_openvino.py 69 | run: | 70 | cd python 71 | pip install setuptools get_pypi_latest_version wheel 72 | mkdir rapidocr_openvino_t 73 | mv rapidocr_openvino rapidocr_openvino_t 74 | mv rapidocr_openvino_t rapidocr_openvino 75 | cd rapidocr_openvino 76 | echo "from .rapidocr_openvino.main import RapidOCR, VisRes" > __init__.py 77 | 78 | cd .. 79 | python -m pip install --upgrade pip 80 | python setup_openvino.py bdist_wheel "${{ github.event.head_commit.message }}" 81 | mv dist ../ 82 | 83 | - name: Publish distribution 📦 to PyPI 84 | uses: pypa/gh-action-pypi-publish@v1.5.0 85 | with: 86 | password: ${{ secrets.RAPIDOCR_OPENVINO }} 87 | packages_dir: dist/ 88 | -------------------------------------------------------------------------------- /.github/workflows/gen_whl_to_pypi_rapidocr_web.yml: -------------------------------------------------------------------------------- 1 | name: Push rapidocr_web to pypi 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | paths: 7 | - 'ocrweb/rapidocr_web/**' 8 | - '!ocrweb/rapidocr_web/ocr_web.spec' 9 | - 'docs/doc_whl_rapidocr_web.md' 10 | - 'ocrweb/setup.py' 11 | - '.github/workflows/gen_whl_to_pypi_rapidocr_web.yml' 12 | 13 | jobs: 14 | GenerateWHL_PushPyPi: 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | 20 | - name: Set up Python 3.7 21 | uses: actions/setup-python@v4 22 | with: 23 | python-version: '3.7' 24 | architecture: 'x64' 25 | 26 | - name: Set SSH Environment 27 | env: 28 | DEPLOY_KEYS: ${{ secrets.GEN_PYTHON_SDK }} 29 | run: | 30 | mkdir -p ~/.ssh/ 31 | echo "$DEPLOY_KEYS" > ~/.ssh/id_rsa 32 | chmod 600 ~/.ssh/id_rsa 33 | chmod 700 ~/.ssh && chmod 600 ~/.ssh/* 34 | 35 | - name: Run setup.py 36 | run: | 37 | cd ocrweb 38 | pip install -r requirements.txt 39 | 40 | python -m pip install --upgrade pip 41 | python setup.py bdist_wheel "${{ github.event.head_commit.message }}" 42 | 43 | - name: Publish distribution 📦 to PyPI 44 | uses: pypa/gh-action-pypi-publish@v1.5.0 45 | with: 46 | password: ${{ secrets.RAPIDOCR_OPENVINO }} 47 | packages_dir: ocrweb/dist/ 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | .pytest_cache 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | # *.manifest 37 | # *.spec 38 | *.res 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | 135 | #idea 136 | .vs 137 | .vscode 138 | .idea 139 | /images 140 | /models 141 | 142 | #models 143 | *.onnx 144 | 145 | *.ttf 146 | *.ttc 147 | 148 | long1.jpg 149 | 150 | *.bin 151 | *.mapping 152 | *.xml 153 | 154 | *.pdiparams 155 | *.pdiparams.info 156 | *.pdmodel 157 | 158 | .DS_Store 159 | *.npy -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://gitee.com/SWHL/autoflake 3 | rev: v2.1.1 4 | hooks: 5 | - id: autoflake 6 | args: 7 | [ 8 | "--recursive", 9 | "--in-place", 10 | "--remove-all-unused-imports", 11 | "--ignore-init-module-imports", 12 | ] 13 | files: \.py$ 14 | - repo: https://gitee.com/SWHL/black 15 | rev: 23.1.0 16 | hooks: 17 | - id: black 18 | files: \.py$ 19 | -------------------------------------------------------------------------------- /android/README.md: -------------------------------------------------------------------------------- 1 | See [RapidOcrAndroidOnnx](https://github.com/RapidAI/RapidOcrAndroidOnnx) for details. 2 | -------------------------------------------------------------------------------- /api/README.md: -------------------------------------------------------------------------------- 1 | ### See [RapidOCRAPI](https://github.com/RapidAI/RapidOCRAPI) for details 2 | 3 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr_api/usage/) 4 | -------------------------------------------------------------------------------- /assets/RapidOCR_LOGO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/assets/RapidOCR_LOGO.png -------------------------------------------------------------------------------- /assets/colab-badge.svg: -------------------------------------------------------------------------------- 1 | Open in ColabOpen in Colab 2 | -------------------------------------------------------------------------------- /cpp/README.md: -------------------------------------------------------------------------------- 1 | See [RapidOcrNcnn](https://github.com/RapidAI/RapidOcrNcnn) for details. 2 | 3 | See [RapidOcrOnnx](https://github.com/RapidAI/RapidOcrOnnx) for details. 4 | -------------------------------------------------------------------------------- /docs/doc_whl_rapidocr.md: -------------------------------------------------------------------------------- 1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs) 2 | -------------------------------------------------------------------------------- /docs/doc_whl_rapidocr_ort.md: -------------------------------------------------------------------------------- 1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr/usage/) 2 | -------------------------------------------------------------------------------- /docs/doc_whl_rapidocr_paddle.md: -------------------------------------------------------------------------------- 1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr_paddle/usage/) 2 | -------------------------------------------------------------------------------- /docs/doc_whl_rapidocr_vino.md: -------------------------------------------------------------------------------- 1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr/usage/) 2 | -------------------------------------------------------------------------------- /docs/doc_whl_rapidocr_web.md: -------------------------------------------------------------------------------- 1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr_web/rapidocr_web/) 2 | -------------------------------------------------------------------------------- /dotnet/README.md: -------------------------------------------------------------------------------- 1 | See [RapidOCRCSharp](https://github.com/RapidAI/RapidOCRCSharp) for details. 2 | -------------------------------------------------------------------------------- /ios/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 需要志愿者捐赠,请直接进群联系,qq群号:887298230 4 | 5 | A contributor is wanted. 6 | -------------------------------------------------------------------------------- /jvm/README.md: -------------------------------------------------------------------------------- 1 | See [RapidOcrNcnnJvm](https://github.com/RapidAI/RapidOcrNcnnJvm) for details. 2 | 3 | See [RapidOcrOnnxJvm](https://github.com/RapidAI/RapidOcrOnnxJvm) for details. 4 | -------------------------------------------------------------------------------- /ocrweb/README.md: -------------------------------------------------------------------------------- 1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr_web/usage/) 2 | -------------------------------------------------------------------------------- /ocrweb/rapidocr_web/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com -------------------------------------------------------------------------------- /ocrweb/rapidocr_web/ocrweb.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import argparse 5 | from pathlib import Path 6 | from wsgiref.simple_server import make_server 7 | 8 | from flask import Flask, render_template, request 9 | 10 | try: 11 | from rapidocr_web.task import OCRWebUtils 12 | except: 13 | from task import OCRWebUtils 14 | 15 | root_dir = Path(__file__).resolve().parent 16 | 17 | app = Flask(__name__, template_folder="templates") 18 | app.config["MAX_CONTENT_LENGTH"] = 3 * 1024 * 1024 19 | processor = OCRWebUtils() 20 | 21 | 22 | @app.route("/") 23 | def index(): 24 | return render_template("index.html") 25 | 26 | 27 | @app.route("/ocr", methods=["POST"]) 28 | def ocr(): 29 | if request.method == "POST": 30 | img_str = request.get_json().get("file", None) 31 | ocr_res = processor(img_str) 32 | return ocr_res 33 | 34 | 35 | def main(): 36 | parser = argparse.ArgumentParser("rapidocr_web") 37 | parser.add_argument("-ip", "--ip", type=str, default="0.0.0.0", help="IP Address") 38 | parser.add_argument("-p", "--port", type=int, default=9003, help="IP port") 39 | args = parser.parse_args() 40 | 41 | print(f"Successfully launched and visit http://{args.ip}:{args.port} to view.") 42 | server = make_server(args.ip, args.port, app) 43 | server.serve_forever() 44 | 45 | 46 | if __name__ == "__main__": 47 | main() 48 | -------------------------------------------------------------------------------- /ocrweb/rapidocr_web/ocrweb.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | from pathlib import Path 3 | 4 | import rapidocr_onnxruntime 5 | 6 | block_cipher = None 7 | 8 | package_name = 'rapidocr_onnxruntime' 9 | install_dir = Path(rapidocr_onnxruntime.__file__).resolve().parent 10 | 11 | onnx_paths = list(install_dir.rglob('*.onnx')) 12 | yaml_paths = list(install_dir.rglob('*.yaml')) 13 | 14 | onnx_add_data = [(str(v.parent), f'{package_name}/{v.parent.name}') 15 | for v in onnx_paths] 16 | 17 | yaml_add_data = [] 18 | for v in yaml_paths: 19 | if package_name == v.parent.name: 20 | yaml_add_data.append((str(v.parent / '*.yaml'), package_name)) 21 | else: 22 | yaml_add_data.append( 23 | (str(v.parent / '*.yaml'), f'{package_name}/{v.parent.name}')) 24 | 25 | add_data = list(set(yaml_add_data + onnx_add_data)) 26 | 27 | 28 | a = Analysis( 29 | ['ocrweb.py'], 30 | pathex=[], 31 | binaries=[], 32 | datas=add_data, 33 | hiddenimports=[], 34 | hookspath=[], 35 | hooksconfig={}, 36 | runtime_hooks=[], 37 | excludes=[], 38 | win_no_prefer_redirects=False, 39 | win_private_assemblies=False, 40 | cipher=block_cipher, 41 | noarchive=False, 42 | ) 43 | pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) 44 | 45 | exe = EXE( 46 | pyz, 47 | a.scripts, 48 | [], 49 | exclude_binaries=True, 50 | name='RapidOCRWeb', 51 | debug=False, 52 | bootloader_ignore_signals=False, 53 | strip=False, 54 | upx=True, 55 | console=True, 56 | disable_windowed_traceback=False, 57 | argv_emulation=False, 58 | target_arch=None, 59 | codesign_identity=None, 60 | entitlements_file=None, 61 | icon=['./static/css/favicon.ico'], 62 | ) 63 | coll = COLLECT( 64 | exe, 65 | a.binaries, 66 | a.zipfiles, 67 | a.datas, 68 | strip=False, 69 | upx=True, 70 | upx_exclude=[], 71 | name='RapidOCRWeb', 72 | ) 73 | -------------------------------------------------------------------------------- /ocrweb/rapidocr_web/static/css/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/ocrweb/rapidocr_web/static/css/favicon.ico -------------------------------------------------------------------------------- /ocrweb/rapidocr_web/static/css/main.css: -------------------------------------------------------------------------------- 1 | body{ 2 | background-color:#ebedef; 3 | min-height: 100%; 4 | margin: 0; 5 | } 6 | .btn-gen { 7 | /* position: absolute;top:540px; left: 500px; */ 8 | background-color: #00a1d6; 9 | text-align: center; 10 | border-radius: 18px; 11 | margin-top: 5px; 12 | font-size: 15px; 13 | padding: 5px 10px; 14 | height: 20px; 15 | width: 120px; 16 | text-transform: uppercase; 17 | color: #fff; 18 | border:none; 19 | } 20 | .btn-gen:hover, 21 | .btn-gen:focus { 22 | border-color: #23AAEE; 23 | background-color: #23AAEE; 24 | color: white; 25 | cursor: pointer; 26 | } 27 | .area{ 28 | text-align: center; 29 | height: auto; 30 | margin: auto; 31 | } 32 | .leftarea{ 33 | float: left; 34 | width: 50%; 35 | height: auto; 36 | position: relative; 37 | } 38 | 39 | .rightarea{ 40 | float: left; 41 | width: 50%; 42 | height: auto; 43 | } 44 | .table{ 45 | width: auto; 46 | height: auto; 47 | margin: 0 auto; 48 | 49 | } 50 | .span_title{ 51 | width: 98%; 52 | height: 36px; 53 | margin-top: 4px; 54 | line-height: 32px; 55 | background-color: #00a1d6; 56 | border: 1px solid #00a1d6; 57 | border-radius: 20px; 58 | color: #fff; 59 | display: inline-block; 60 | text-align: center; 61 | font-size: 22px; 62 | transition: .3s; 63 | box-sizing: border-box; 64 | } 65 | .uplodNote{ 66 | font-size: 10px; 67 | color:#A1A1A1; 68 | } 69 | 70 | a{ 71 | text-decoration:none; 72 | } 73 | 74 | /* wrapper */ 75 | .leftarea>#wrapper { 76 | position: absolute; top: 45px; left:0px; 77 | width: 98%; 78 | height: 100%; 79 | background: 80 | linear-gradient(#1a98ca, #1a98ca), 81 | linear-gradient(90deg, #ffffff33 1px,transparent 0,transparent 19px), 82 | linear-gradient( #ffffff33 1px,transparent 0,transparent 19px), 83 | linear-gradient(transparent, #1a98ca); 84 | background-size:100% 1.5%, 10% 100%,100% 8%, 100% 100%; 85 | background-repeat:no-repeat, repeat, repeat, no-repeat; 86 | background-position: 0% 100%, 0 0, 0 0, 0 0; 87 | /* 初始位置 */ 88 | clip-path: polygon(0% 0%, 100% 0%, 100% 1.5%, 0% 1.5%); 89 | /* 添加动画效果 */ 90 | animation: move 1s infinite linear; 91 | } 92 | 93 | @keyframes move{ 94 | to{ 95 | background-position: 0 100%,0 0, 0 0, 0 0; 96 | /* 终止位置 */ 97 | clip-path: polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%); 98 | } 99 | } -------------------------------------------------------------------------------- /ocrweb/rapidocr_web/task.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import base64 5 | import copy 6 | import json 7 | from collections import namedtuple 8 | from functools import reduce 9 | from typing import List, Tuple, Union 10 | 11 | import cv2 12 | import numpy as np 13 | from rapidocr_onnxruntime import RapidOCR 14 | 15 | 16 | class OCRWebUtils: 17 | def __init__(self) -> None: 18 | self.ocr = RapidOCR() 19 | self.WebReturn = namedtuple( 20 | "WebReturn", 21 | ["image", "total_elapse", "elapse_part", "rec_res", "det_boxes"], 22 | ) 23 | 24 | def __call__(self, img_content: str) -> namedtuple: 25 | if img_content is None: 26 | raise ValueError("img is None") 27 | img = self.prepare_img(img_content) 28 | ocr_res, elapse = self.ocr(img) 29 | return self.get_web_result(img, ocr_res, elapse) 30 | 31 | def prepare_img(self, img_str: str) -> np.ndarray: 32 | img_str = img_str.split(",")[1] 33 | image = base64.b64decode(img_str + "=" * (-len(img_str) % 4)) 34 | nparr = np.frombuffer(image, np.uint8) 35 | image = cv2.imdecode(nparr, cv2.IMREAD_COLOR) 36 | if image.ndim == 2: 37 | image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) 38 | return image 39 | 40 | def get_web_result( 41 | self, img: np.ndarray, ocr_res: List, elapse: List 42 | ) -> Tuple[Union[str, List, str, str]]: 43 | if ocr_res is None: 44 | total_elapse, elapse_part = 0, "" 45 | img_str = self.img_to_base64(img) 46 | rec_res = json.dumps([], indent=2, ensure_ascii=False) 47 | boxes = "" 48 | else: 49 | boxes, txts, scores = list(zip(*ocr_res)) 50 | scores = [f"{v:.4f}" for v in scores] 51 | rec_res = list(zip(range(len(txts)), txts, scores)) 52 | rec_res = json.dumps(rec_res, indent=2, ensure_ascii=False) 53 | 54 | det_im = self.draw_text_det_res(np.array(boxes), img) 55 | img_str = self.img_to_base64(det_im) 56 | 57 | total_elapse = reduce(lambda x, y: float(x) + float(y), elapse) 58 | elapse_part = ",".join([f"{x:.4f}" for x in elapse]) 59 | 60 | web_return = self.WebReturn( 61 | image=img_str, 62 | total_elapse=f"{total_elapse:.4f}", 63 | elapse_part=elapse_part, 64 | rec_res=rec_res, 65 | det_boxes=boxes, 66 | ) 67 | return json.dumps(web_return._asdict()) 68 | 69 | @staticmethod 70 | def img_to_base64(img) -> str: 71 | img = cv2.imencode(".png", img)[1] 72 | img_str = str(base64.b64encode(img))[2:-1] 73 | return img_str 74 | 75 | @staticmethod 76 | def draw_text_det_res(dt_boxes: np.ndarray, raw_im: np.ndarray) -> np.ndarray: 77 | src_im = copy.deepcopy(raw_im) 78 | for i, box in enumerate(dt_boxes): 79 | box = np.array(box).astype(np.int32).reshape(-1, 2) 80 | cv2.polylines(src_im, [box], True, color=(0, 0, 255), thickness=1) 81 | cv2.putText( 82 | src_im, 83 | str(i), 84 | (int(box[0][0]), int(box[0][1])), 85 | cv2.FONT_HERSHEY_SIMPLEX, 86 | 0.5, 87 | (0, 0, 0), 88 | 2, 89 | ) 90 | return src_im 91 | -------------------------------------------------------------------------------- /ocrweb/requirements.txt: -------------------------------------------------------------------------------- 1 | Pillow<=10.0.0 2 | requests 3 | Flask>=2.1.0, <=3.0.0 4 | rapidocr_onnxruntime>=1.3.0,<=2.0.0 5 | get_pypi_latest_version 6 | wheel 7 | -------------------------------------------------------------------------------- /ocrweb/setup.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import sys 5 | from pathlib import Path 6 | 7 | import setuptools 8 | from get_pypi_latest_version import GetPyPiLatestVersion 9 | 10 | 11 | def get_readme(): 12 | root_dir = Path(__file__).resolve().parent.parent 13 | readme_path = str(root_dir / "docs" / "doc_whl_rapidocr_web.md") 14 | with open(readme_path, "r", encoding="utf-8") as f: 15 | readme = f.read() 16 | return readme 17 | 18 | 19 | MODULE_NAME = "rapidocr_web" 20 | 21 | obtainer = GetPyPiLatestVersion() 22 | latest_version = obtainer(MODULE_NAME) 23 | VERSION_NUM = obtainer.version_add_one(latest_version) 24 | 25 | # 优先提取commit message中的语义化版本号,如无,则自动加1 26 | if len(sys.argv) > 2: 27 | match_str = " ".join(sys.argv[2:]) 28 | matched_versions = obtainer.extract_version(match_str) 29 | if matched_versions: 30 | VERSION_NUM = matched_versions 31 | sys.argv = sys.argv[:2] 32 | 33 | setuptools.setup( 34 | name=MODULE_NAME, 35 | version=VERSION_NUM, 36 | platforms="Any", 37 | description="A cross platform OCR Library based on OnnxRuntime.", 38 | long_description=get_readme(), 39 | long_description_content_type="text/markdown", 40 | author="SWHL", 41 | author_email="liekkaskono@163.com", 42 | url="https://github.com/RapidAI/RapidOCR", 43 | download_url="https://github.com/RapidAI/RapidOCR.git", 44 | license="Apache-2.0", 45 | include_package_data=True, 46 | install_requires=["requests", "Flask>=2.1.0", "rapidocr_onnxruntime"], 47 | packages=[ 48 | MODULE_NAME, 49 | f"{MODULE_NAME}.static.css", 50 | f"{MODULE_NAME}.static.js", 51 | f"{MODULE_NAME}.templates", 52 | ], 53 | package_data={"": ["*.ico", "*.css", "*.js", "*.html"]}, 54 | keywords=[ 55 | "ocr,text_detection,text_recognition,db,onnxruntime,paddleocr,openvino,rapidocr" 56 | ], 57 | classifiers=[ 58 | "Programming Language :: Python :: 3.6", 59 | "Programming Language :: Python :: 3.7", 60 | "Programming Language :: Python :: 3.8", 61 | "Programming Language :: Python :: 3.9", 62 | "Programming Language :: Python :: 3.10", 63 | "Programming Language :: Python :: 3.11", 64 | "Programming Language :: Python :: 3.12", 65 | ], 66 | python_requires=">=3.6,<3.13", 67 | entry_points={ 68 | "console_scripts": [ 69 | f"{MODULE_NAME}={MODULE_NAME}.ocrweb:main", 70 | ], 71 | }, 72 | ) 73 | -------------------------------------------------------------------------------- /ocrweb_multi/README.md: -------------------------------------------------------------------------------- 1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr_web/ocrweb_multi/) 2 | -------------------------------------------------------------------------------- /ocrweb_multi/assets/ocr_web_multi.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/ocrweb_multi/assets/ocr_web_multi.jpg -------------------------------------------------------------------------------- /ocrweb_multi/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | print("Compile ocrweb") 5 | os.system("pyinstaller -y main.spec") 6 | 7 | print("Compile wrapper") 8 | os.system("windres .\wrapper.rc -O coff -o wrapper.res") 9 | os.system("gcc .\wrapper.c wrapper.res -o dist/ocrweb.exe") 10 | 11 | print("Copy config.yaml") 12 | shutil.copy2("config.yaml", "dist/config.yaml") 13 | 14 | print("Copy models") 15 | shutil.copytree("models", "dist/models", dirs_exist_ok=True) 16 | os.remove("dist/models/.gitkeep") 17 | 18 | print("Pack to ocrweb.zip") 19 | shutil.make_archive("ocrweb", "zip", "dist") 20 | 21 | print("Done") 22 | -------------------------------------------------------------------------------- /ocrweb_multi/config.yaml: -------------------------------------------------------------------------------- 1 | server: 2 | host: 127.0.0.1 3 | port: 8001 4 | # OCR接口Token, 为null时将跳过Token验证 5 | token: null 6 | 7 | global: 8 | use_cuda: false 9 | verbose: false 10 | cuda_provider: 11 | device_id: 0 12 | arena_extend_strategy: kNextPowerOfTwo 13 | cudnn_conv_algo_search: EXHAUSTIVE 14 | do_copy_in_default_stream: true 15 | 16 | # 模型配置 17 | models: 18 | # 位置检测模型 19 | detect: 20 | det_en: 21 | path: models/en_PP-OCRv3_det_infer.onnx 22 | config: &detectConfig 23 | pre_process: 24 | - class: DetResizeForTest 25 | limit_side_len: 736 26 | limit_type: min 27 | - class: NormalizeImage 28 | std: [0.229, 0.224, 0.225] 29 | mean: [0.485, 0.456, 0.406] 30 | # 1 / 255 31 | scale: 0.00392156862745098 32 | order: hwc 33 | - class: ToCHWImage 34 | - class: KeepKeys 35 | keep_keys: ["image", "shape"] 36 | post_process: 37 | thresh: 0.3 38 | box_thresh: 0.5 39 | max_candidates: 1000 40 | unclip_ratio: 1.6 41 | use_dilation: true 42 | det_ch: 43 | path: models/ch_PP-OCRv3_det_infer.onnx 44 | config: *detectConfig 45 | det_ml: 46 | path: models/ch_PP-OCRv3_det_infer.onnx 47 | config: *detectConfig 48 | # 方向检测模型 49 | classify: 50 | cls_ml: 51 | path: models/ch_ppocr_mobile_v2.0_cls_infer.meta.onnx 52 | config: 53 | batch_size: 8 54 | score_thresh: 0.9 55 | # 文字识别模型 56 | recognize: 57 | rec_ch: 58 | path: models/ch_PP-OCRv3_rec_infer.meta.onnx 59 | config: &recognizeConfig 60 | batch_size: 8 61 | rec_cht: 62 | path: models/chinese_cht_PP-OCRv3_rec_infer.meta.onnx 63 | config: *recognizeConfig 64 | rec_en: 65 | path: models/en_PP-OCRv3_rec_infer.meta.onnx 66 | config: *recognizeConfig 67 | rec_ja: 68 | path: models/japan_PP-OCRv3_rec_infer.meta.onnx 69 | config: *recognizeConfig 70 | 71 | # 多语言配置 72 | languages: 73 | ch: 74 | name: 中文 75 | models: 76 | detect: det_ch 77 | classify: cls_ml 78 | recognize: rec_ch 79 | config: &languageConfig 80 | text_score: 0.5 81 | use_angle_cls: true 82 | verbose: false 83 | min_height: 30 84 | cht: 85 | name: 繁体中文 86 | models: 87 | detect: det_ch 88 | classify: cls_ml 89 | recognize: rec_cht 90 | config: *languageConfig 91 | ja: 92 | name: 日文 93 | models: 94 | detect: det_ch 95 | classify: cls_ml 96 | recognize: rec_ja 97 | config: *languageConfig 98 | en: 99 | name: 英文 100 | models: 101 | detect: det_en 102 | classify: cls_ml 103 | recognize: rec_en 104 | config: *languageConfig 105 | -------------------------------------------------------------------------------- /ocrweb_multi/main.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import logging 5 | import cv2 6 | import numpy as np 7 | from flask import Flask, send_file, request, make_response 8 | from waitress import serve 9 | 10 | 11 | from rapidocr.main import detect_recognize 12 | from utils.config import conf 13 | from utils.utils import tojson, parse_bool 14 | 15 | app = Flask(__name__) 16 | log = logging.getLogger("app") 17 | # 设置上传文件大小 18 | app.config["MAX_CONTENT_LENGTH"] = 3 * 1024 * 1024 19 | 20 | 21 | @app.route("/") 22 | def index(): 23 | return send_file("static/index.html") 24 | 25 | 26 | def json_response(data, status=200): 27 | return make_response(tojson(data), status, {"content-type": "application/json"}) 28 | 29 | 30 | @app.route("/lang") 31 | def get_languages(): 32 | """返回可用语言列表""" 33 | data = [ 34 | {"code": key, "name": val["name"]} for key, val in conf["languages"].items() 35 | ] 36 | result = {"msg": "OK", "data": data} 37 | log.info("Send langs: %s", data) 38 | return json_response(result) 39 | 40 | 41 | @app.route("/ocr", methods=["POST", "GET"]) 42 | def ocr(): 43 | """执行文字识别""" 44 | if conf["server"].get("token"): 45 | if request.values.get("token") != conf["server"]["token"]: 46 | return json_response({"msg": "invalid token"}, status=403) 47 | 48 | lang = request.values.get("lang") or "ch" 49 | detect = parse_bool(request.values.get("detect") or "true") 50 | classify = parse_bool(request.values.get("classify") or "true") 51 | 52 | image_file = request.files.get("image") 53 | if not image_file: 54 | return json_response({"msg": "no image"}, 400) 55 | nparr = np.frombuffer(image_file.stream.read(), np.uint8) 56 | image = cv2.imdecode(nparr, cv2.IMREAD_COLOR) 57 | log.info( 58 | "Input: image %s, lang=%s, detect=%s, classify=%s", 59 | image.shape, 60 | lang, 61 | detect, 62 | classify, 63 | ) 64 | if image.ndim == 2: 65 | image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) 66 | result = detect_recognize(image, lang=lang, detect=detect, classify=classify) 67 | log.info("OCR Done %s %s", result["ts"], len(result["results"])) 68 | return json_response({"msg": "OK", "data": result}) 69 | 70 | 71 | if __name__ == "__main__": 72 | logging.basicConfig(level="INFO") 73 | logging.getLogger("waitress").setLevel(logging.INFO) 74 | if parse_bool(conf.get("debug", "0")): 75 | # Debug 76 | app.run(host=conf["server"]["host"], port=conf["server"]["port"], debug=True) 77 | else: 78 | # Deploy with waitress 79 | serve(app, host=conf["server"]["host"], port=conf["server"]["port"]) 80 | -------------------------------------------------------------------------------- /ocrweb_multi/main.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | 3 | 4 | block_cipher = None 5 | 6 | 7 | a = Analysis( 8 | ['main.py'], 9 | pathex=[], 10 | binaries=[], 11 | datas=[ 12 | ('static', 'static'), 13 | ], 14 | hiddenimports=[], 15 | hookspath=[], 16 | hooksconfig={}, 17 | runtime_hooks=[], 18 | excludes=[], 19 | win_no_prefer_redirects=False, 20 | win_private_assemblies=False, 21 | cipher=block_cipher, 22 | noarchive=False, 23 | ) 24 | pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) 25 | 26 | exe = EXE( 27 | pyz, 28 | a.scripts, 29 | [], 30 | exclude_binaries=True, 31 | name='main', 32 | debug=False, 33 | bootloader_ignore_signals=False, 34 | strip=False, 35 | upx=True, 36 | console=True, 37 | disable_windowed_traceback=False, 38 | argv_emulation=False, 39 | target_arch=None, 40 | codesign_identity=None, 41 | entitlements_file=None, 42 | ) 43 | coll = COLLECT( 44 | exe, 45 | a.binaries, 46 | a.zipfiles, 47 | a.datas, 48 | strip=False, 49 | upx=True, 50 | upx_exclude=[], 51 | name='ocrweb', 52 | ) 53 | -------------------------------------------------------------------------------- /ocrweb_multi/models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/ocrweb_multi/models/.gitkeep -------------------------------------------------------------------------------- /ocrweb_multi/rapidocr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/ocrweb_multi/rapidocr/__init__.py -------------------------------------------------------------------------------- /ocrweb_multi/rapidocr/detect.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # -*- encoding: utf-8 -*- 15 | # @Author: SWHL 16 | # @Contact: liekkaskono@163.com 17 | 18 | import numpy as np 19 | 20 | from utils.utils import OrtInferSession 21 | from .detect_process import DBPostProcess, create_operators, transform 22 | 23 | 24 | class TextDetector: 25 | def __init__(self, path, config): 26 | self.preprocess_op = create_operators(config["pre_process"]) 27 | self.postprocess_op = DBPostProcess(**config["post_process"]) 28 | 29 | session_instance = OrtInferSession(path) 30 | self.session = session_instance.session 31 | self.input_name = session_instance.get_input_name() 32 | 33 | def __call__(self, img): 34 | if img is None: 35 | raise ValueError("img is None") 36 | 37 | ori_im_shape = img.shape[:2] 38 | 39 | data = {"image": img} 40 | data = transform(data, self.preprocess_op) 41 | img, shape_list = data 42 | if img is None: 43 | return None, 0 44 | 45 | img = np.expand_dims(img, axis=0).astype(np.float32) 46 | shape_list = np.expand_dims(shape_list, axis=0) 47 | 48 | preds = self.session.run(None, {self.input_name: img}) 49 | 50 | post_result = self.postprocess_op(preds[0], shape_list) 51 | 52 | dt_boxes = post_result[0]["points"] 53 | dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im_shape) 54 | return dt_boxes 55 | 56 | def order_points_clockwise(self, pts): 57 | """ 58 | reference from: 59 | https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py 60 | sort the points based on their x-coordinates 61 | """ 62 | xSorted = pts[np.argsort(pts[:, 0]), :] 63 | 64 | # grab the left-most and right-most points from the sorted 65 | # x-roodinate points 66 | leftMost = xSorted[:2, :] 67 | rightMost = xSorted[2:, :] 68 | 69 | # now, sort the left-most coordinates according to their 70 | # y-coordinates so we can grab the top-left and bottom-left 71 | # points, respectively 72 | leftMost = leftMost[np.argsort(leftMost[:, 1]), :] 73 | (tl, bl) = leftMost 74 | 75 | rightMost = rightMost[np.argsort(rightMost[:, 1]), :] 76 | (tr, br) = rightMost 77 | 78 | rect = np.array([tl, tr, br, bl], dtype="float32") 79 | return rect 80 | 81 | def clip_det_res(self, points, img_height, img_width): 82 | for pno in range(points.shape[0]): 83 | points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1)) 84 | points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1)) 85 | return points 86 | 87 | def filter_tag_det_res(self, dt_boxes, image_shape): 88 | """对检测结果进行过滤""" 89 | img_height, img_width = image_shape[:2] 90 | dt_boxes_new = [] 91 | for box in dt_boxes: 92 | box = self.order_points_clockwise(box) 93 | box = self.clip_det_res(box, img_height, img_width) 94 | rect_width = int(np.linalg.norm(box[0] - box[1])) 95 | rect_height = int(np.linalg.norm(box[0] - box[3])) 96 | if rect_width <= 3 or rect_height <= 3: 97 | continue 98 | dt_boxes_new.append(box) 99 | return dt_boxes_new 100 | -------------------------------------------------------------------------------- /ocrweb_multi/rapidocr/main.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import copy 5 | from functools import lru_cache 6 | from pathlib import Path 7 | 8 | import numpy as np 9 | import cv2 10 | 11 | from utils.config import conf 12 | from rapidocr.rapid_ocr_api import RapidOCR 13 | 14 | 15 | @lru_cache(maxsize=None) 16 | def load_language_model(lang="ch"): 17 | models = conf["languages"][lang] 18 | print("model", models) 19 | return RapidOCR(models) 20 | 21 | 22 | def detect_recognize(image, lang="ch", detect=True, classify=True): 23 | model = load_language_model(lang) 24 | results, ts = model(image, detect=detect, classify=classify) 25 | ts["total"] = sum(ts.values()) 26 | return {"ts": ts, "results": results} 27 | 28 | 29 | def check_and_read_gif(img_path): 30 | if Path(img_path).suffix.lower() == "gif": 31 | gif = cv2.VideoCapture(img_path) 32 | ret, frame = gif.read() 33 | if not ret: 34 | print("Cannot read {}. This gif image maybe corrupted.") 35 | return None, False 36 | if len(frame.shape) == 2 or frame.shape[-1] == 1: 37 | frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) 38 | imgvalue = frame[:, :, ::-1] 39 | return imgvalue, True 40 | return None, False 41 | 42 | 43 | def draw_text_det_res(dt_boxes, raw_im): 44 | src_im = copy.deepcopy(raw_im) 45 | for i, box in enumerate(dt_boxes): 46 | box = np.array(box).astype(np.int32).reshape(-1, 2) 47 | cv2.polylines(src_im, [box], True, color=(0, 0, 255), thickness=1) 48 | cv2.putText( 49 | src_im, 50 | str(i), 51 | (int(box[0][0]), int(box[0][1])), 52 | cv2.FONT_HERSHEY_SIMPLEX, 53 | 0.5, 54 | (0, 0, 0), 55 | 2, 56 | ) 57 | return src_im 58 | -------------------------------------------------------------------------------- /ocrweb_multi/requirements.txt: -------------------------------------------------------------------------------- 1 | onnxruntime>=1.7.0 2 | opencv-python-headless==4.5.4.60 3 | six>=1.15.0 4 | pyclipper>=1.2.1 5 | numpy>=1.19.1 6 | Shapely>=1.7.1 7 | Flask>=2.1.2 8 | PyYAML 9 | waitress 10 | -------------------------------------------------------------------------------- /ocrweb_multi/static/css/main.css: -------------------------------------------------------------------------------- 1 | html { 2 | height: 100%; 3 | margin: 0; 4 | } 5 | 6 | body { 7 | background-color: #ebedef; 8 | min-height: 100%; 9 | margin: 0; 10 | } 11 | 12 | .btn-gen { 13 | background-color: #00a1d6; 14 | text-align: center; 15 | border-radius: 18px; 16 | margin: 0 5px 0 5px; 17 | font-size: 15px; 18 | padding: 5px 10px; 19 | height: 20px; 20 | min-width: 120px; 21 | text-transform: uppercase; 22 | color: #fff; 23 | border: none; 24 | } 25 | 26 | .btn-gen:hover, 27 | .btn-gen:focus { 28 | border-color: #23AAEE; 29 | background-color: #23AAEE; 30 | color: white; 31 | cursor: pointer; 32 | } 33 | 34 | .row { 35 | margin: 15px; 36 | } 37 | 38 | .small { 39 | font-size: 0.8em; 40 | } 41 | 42 | .verysmall { 43 | font-size: 0.5em; 44 | } 45 | 46 | .area { 47 | text-align: center; 48 | height: auto; 49 | margin: auto; 50 | } 51 | 52 | .leftarea { 53 | float: left; 54 | width: 50%; 55 | height: auto; 56 | position: relative; 57 | } 58 | 59 | .rightarea { 60 | float: left; 61 | width: 50%; 62 | height: auto; 63 | overflow-y: auto; 64 | } 65 | 66 | .table { 67 | width: auto; 68 | height: auto; 69 | margin: 0 auto; 70 | 71 | } 72 | 73 | .span_title { 74 | width: 98%; 75 | height: 36px; 76 | margin-top: 4px; 77 | line-height: 32px; 78 | background-color: #00a1d6; 79 | border: 1px solid #00a1d6; 80 | border-radius: 20px; 81 | color: #fff; 82 | display: inline-block; 83 | text-align: center; 84 | font-size: 22px; 85 | transition: .3s; 86 | box-sizing: border-box; 87 | cursor: default; 88 | } 89 | 90 | .uplodNote { 91 | font-size: 10px; 92 | color: #A1A1A1; 93 | } 94 | 95 | a { 96 | text-decoration: none; 97 | } 98 | 99 | #input-hint { 100 | margin: auto; 101 | cursor: pointer; 102 | 103 | } 104 | 105 | #result_view { 106 | position: relative; 107 | width: 95%; 108 | margin: auto; 109 | } 110 | 111 | #result_view canvas { 112 | width: 100%; 113 | height: 100%; 114 | } 115 | 116 | /* wrapper */ 117 | #wrapper { 118 | position: absolute; 119 | top: 0; 120 | left: 0; 121 | width: 100%; 122 | height: 100%; 123 | background: 124 | linear-gradient(#1a98ca, #1a98ca), 125 | linear-gradient(90deg, #ffffff33 1px, transparent 0, transparent 19px), 126 | linear-gradient(#ffffff33 1px, transparent 0, transparent 19px), 127 | linear-gradient(transparent, #1a98ca); 128 | background-size: 100% 1.5%, 10% 100%, 100% 8%, 100% 100%; 129 | background-repeat: no-repeat, repeat, repeat, no-repeat; 130 | background-position: 0% 100%, 0 0, 0 0, 0 0; 131 | /* 初始位置 */ 132 | clip-path: polygon(0% 0%, 100% 0%, 100% 1.5%, 0% 1.5%); 133 | /* 添加动画效果 */ 134 | animation: move 1s infinite linear; 135 | } 136 | 137 | @keyframes move { 138 | to { 139 | background-position: 0 100%, 0 0, 0 0, 0 0; 140 | /* 终止位置 */ 141 | clip-path: polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%); 142 | } 143 | } -------------------------------------------------------------------------------- /ocrweb_multi/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/ocrweb_multi/static/favicon.ico -------------------------------------------------------------------------------- /ocrweb_multi/static/hint.svg: -------------------------------------------------------------------------------- 1 | 3 | 未选择图片 4 | -------------------------------------------------------------------------------- /ocrweb_multi/utils/config.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | import sys 3 | from pathlib import Path 4 | import yaml 5 | 6 | root_dir = Path(__file__).parent.parent 7 | 8 | 9 | def get_resource_path(name: str): 10 | """依次检查资源文件的多个可能路径, 返回首个存在的路径""" 11 | for path in [ 12 | # wrapper.exe 所在目录 13 | Path(root_dir.parent, name), 14 | # main.exe 所在目录 / main.py 所在目录 15 | Path(root_dir, name), 16 | # main.exe 所在目录 17 | Path(sys.argv[0]).parent / name, 18 | # 工作目录 19 | Path(name), 20 | ]: 21 | if path.exists(): 22 | print("Loaded:", path) 23 | return path 24 | raise FileNotFoundError(name) 25 | 26 | 27 | conf = yaml.safe_load(get_resource_path("config.yaml").read_text(encoding="utf-8")) 28 | -------------------------------------------------------------------------------- /ocrweb_multi/utils/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | import warnings 4 | 5 | from onnxruntime import ( 6 | get_available_providers, 7 | get_device, 8 | SessionOptions, 9 | InferenceSession, 10 | ) 11 | from utils.config import conf, get_resource_path 12 | 13 | 14 | def parse_bool(val): 15 | if not isinstance(val, str): 16 | return bool(val) 17 | return val.lower() in ("1", "true", "yes") 18 | 19 | 20 | def default(obj): 21 | if hasattr(obj, "tolist"): 22 | return obj.tolist() 23 | return obj 24 | 25 | 26 | def tojson(obj, **kws): 27 | return json.dumps(obj, default=default, ensure_ascii=False, **kws) + "\n" 28 | 29 | 30 | class OrtInferSession: 31 | def __init__(self, model_path): 32 | ort_conf = conf["global"] 33 | sess_opt = SessionOptions() 34 | sess_opt.log_severity_level = 4 35 | sess_opt.enable_cpu_mem_arena = False 36 | 37 | cuda_ep = "CUDAExecutionProvider" 38 | cpu_ep = "CPUExecutionProvider" 39 | 40 | providers = [] 41 | if ( 42 | ort_conf["use_cuda"] 43 | and get_device() == "GPU" 44 | and cuda_ep in get_available_providers() 45 | ): 46 | providers = [(cuda_ep, ort_conf[cuda_ep])] 47 | 48 | providers.append(cpu_ep) 49 | 50 | self.session = InferenceSession( 51 | str(get_resource_path(model_path)), 52 | sess_options=sess_opt, 53 | providers=providers, 54 | ) 55 | 56 | if ort_conf["use_cuda"] and cuda_ep not in self.session.get_providers(): 57 | warnings.warn( 58 | f"{cuda_ep} is not avaiable for current env, the inference part is automatically shifted to be executed under {cpu_ep}.\n" 59 | "Please ensure the installed onnxruntime-gpu version matches your cuda and cudnn version, " 60 | "you can check their relations from the offical web site: " 61 | "https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html", 62 | RuntimeWarning, 63 | ) 64 | 65 | def get_input_name(self, input_idx=0): 66 | return self.session.get_inputs()[input_idx].name 67 | 68 | def get_output_name(self, output_idx=0): 69 | return self.session.get_outputs()[output_idx].name 70 | 71 | 72 | class Ticker: 73 | def __init__(self, reset=True) -> None: 74 | self.ts = time.perf_counter() 75 | self.reset = reset 76 | self.maps = {} 77 | 78 | def tick(self, name, reset=None): 79 | ts = time.perf_counter() 80 | if reset is None: 81 | reset = self.reset 82 | dt = ts - self.ts 83 | if reset: 84 | self.ts = ts 85 | self.maps[name] = dt 86 | return dt 87 | -------------------------------------------------------------------------------- /ocrweb_multi/wrapper.c: -------------------------------------------------------------------------------- 1 | /* 2 | 针对Pyinstaller目录下文件过多的问题, 使用外部exe+system调用的方式实现资源文件/依赖库分离 3 | */ 4 | #include 5 | #include 6 | 7 | void combine(char *destination, const char *path1, const char *path2) 8 | { 9 | if (path1 == NULL && path2 == NULL) 10 | { 11 | strcpy(destination, ""); 12 | } 13 | else if (path2 == NULL || strlen(path2) == 0) 14 | { 15 | strcpy(destination, path1); 16 | } 17 | else if (path1 == NULL || strlen(path1) == 0) 18 | { 19 | strcpy(destination, path2); 20 | } 21 | else 22 | { 23 | strcpy(destination, path1); 24 | 25 | size_t idx = 0, sepIdx = 0; 26 | size_t size1 = strlen(path1); 27 | while (idx < size1) 28 | { 29 | idx++; 30 | if (destination[idx] == '\\' || destination[idx] == '/') 31 | { 32 | sepIdx = idx; 33 | } 34 | } 35 | // Trim destination: delete from last separator to end. 36 | destination[sepIdx + 1] = '\0'; 37 | strcat(destination, path2); 38 | } 39 | } 40 | 41 | void main() 42 | { 43 | // Set title 44 | system("title Rapid OCR Server"); 45 | // Get wrapper exe path 46 | TCHAR path[MAX_PATH]; 47 | GetModuleFileName(NULL, path, MAX_PATH); 48 | 49 | TCHAR exe_path[MAX_PATH]; 50 | // Get real exe path from wrapper exe path 51 | combine(exe_path, path, "ocrweb\\main.exe"); 52 | printf("Run real exe: %s\n", exe_path); 53 | // Run real exe 54 | system(exe_path); 55 | } 56 | -------------------------------------------------------------------------------- /ocrweb_multi/wrapper.rc: -------------------------------------------------------------------------------- 1 | id ICON "static/favicon.ico" 2 | -------------------------------------------------------------------------------- /python/README.md: -------------------------------------------------------------------------------- 1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr/install/) 2 | -------------------------------------------------------------------------------- /python/demo.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from rapidocr import EngineType, ModelType, OCRVersion, RapidOCR 5 | 6 | engine = RapidOCR( 7 | params={ 8 | "Rec.ocr_version": OCRVersion.PPOCRV5, 9 | "Rec.engine_type": EngineType.PADDLE, 10 | "Rec.model_type": ModelType.MOBILE, 11 | } 12 | ) 13 | 14 | img_url = "https://img1.baidu.com/it/u=3619974146,1266987475&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=516" 15 | result = engine(img_url) 16 | print(result) 17 | 18 | result.vis("vis_result.jpg") 19 | -------------------------------------------------------------------------------- /python/rapidocr/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .main import RapidOCR 5 | from .utils import LoadImageError, VisRes 6 | from .utils.typings import EngineType, LangCls, LangDet, LangRec, ModelType, OCRVersion 7 | -------------------------------------------------------------------------------- /python/rapidocr/cal_rec_boxes/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .main import CalRecBoxes 5 | -------------------------------------------------------------------------------- /python/rapidocr/ch_ppocr_cls/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .main import TextClassifier 5 | from .utils import TextClsOutput 6 | -------------------------------------------------------------------------------- /python/rapidocr/ch_ppocr_cls/main.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import copy 15 | import math 16 | import time 17 | from typing import Any, Dict, List, Union 18 | 19 | import cv2 20 | import numpy as np 21 | 22 | from rapidocr.inference_engine.base import get_engine 23 | 24 | from .utils import ClsPostProcess, TextClsOutput 25 | 26 | 27 | class TextClassifier: 28 | def __init__(self, cfg: Dict[str, Any]): 29 | self.cls_image_shape = cfg["cls_image_shape"] 30 | self.cls_batch_num = cfg["cls_batch_num"] 31 | self.cls_thresh = cfg["cls_thresh"] 32 | self.postprocess_op = ClsPostProcess(cfg["label_list"]) 33 | 34 | self.session = get_engine(cfg.engine_type)(cfg) 35 | 36 | def __call__(self, img_list: Union[np.ndarray, List[np.ndarray]]) -> TextClsOutput: 37 | if isinstance(img_list, np.ndarray): 38 | img_list = [img_list] 39 | 40 | img_list = copy.deepcopy(img_list) 41 | 42 | # Calculate the aspect ratio of all text bars 43 | width_list = [img.shape[1] / float(img.shape[0]) for img in img_list] 44 | 45 | # Sorting can speed up the cls process 46 | indices = np.argsort(np.array(width_list)) 47 | 48 | img_num = len(img_list) 49 | cls_res = [("", 0.0)] * img_num 50 | batch_num = self.cls_batch_num 51 | elapse = 0 52 | for beg_img_no in range(0, img_num, batch_num): 53 | end_img_no = min(img_num, beg_img_no + batch_num) 54 | 55 | norm_img_batch = [] 56 | for ino in range(beg_img_no, end_img_no): 57 | norm_img = self.resize_norm_img(img_list[indices[ino]]) 58 | norm_img = norm_img[np.newaxis, :] 59 | norm_img_batch.append(norm_img) 60 | norm_img_batch = np.concatenate(norm_img_batch).astype(np.float32) 61 | 62 | starttime = time.time() 63 | prob_out = self.session(norm_img_batch) 64 | cls_result = self.postprocess_op(prob_out) 65 | elapse += time.time() - starttime 66 | 67 | for rno, (label, score) in enumerate(cls_result): 68 | cls_res[indices[beg_img_no + rno]] = (label, score) 69 | if "180" in label and score > self.cls_thresh: 70 | img_list[indices[beg_img_no + rno]] = cv2.rotate( 71 | img_list[indices[beg_img_no + rno]], 1 72 | ) 73 | return TextClsOutput(img_list=img_list, cls_res=cls_res, elapse=elapse) 74 | 75 | def resize_norm_img(self, img: np.ndarray) -> np.ndarray: 76 | img_c, img_h, img_w = self.cls_image_shape 77 | h, w = img.shape[:2] 78 | ratio = w / float(h) 79 | if math.ceil(img_h * ratio) > img_w: 80 | resized_w = img_w 81 | else: 82 | resized_w = int(math.ceil(img_h * ratio)) 83 | 84 | resized_image = cv2.resize(img, (resized_w, img_h)) 85 | resized_image = resized_image.astype("float32") 86 | if img_c == 1: 87 | resized_image = resized_image / 255 88 | resized_image = resized_image[np.newaxis, :] 89 | else: 90 | resized_image = resized_image.transpose((2, 0, 1)) / 255 91 | 92 | resized_image -= 0.5 93 | resized_image /= 0.5 94 | padding_im = np.zeros((img_c, img_h, img_w), dtype=np.float32) 95 | padding_im[:, :, :resized_w] = resized_image 96 | return padding_im 97 | -------------------------------------------------------------------------------- /python/rapidocr/ch_ppocr_cls/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from dataclasses import dataclass 15 | from pathlib import Path 16 | from typing import List, Optional, Tuple, Union 17 | 18 | import numpy as np 19 | 20 | from ..utils.logger import Logger 21 | from ..utils.utils import save_img 22 | from ..utils.vis_res import VisRes 23 | 24 | logger = Logger(logger_name=__name__).get_log() 25 | 26 | 27 | @dataclass 28 | class TextClsOutput: 29 | img_list: Optional[List[np.ndarray]] = None 30 | cls_res: Optional[List[Tuple[str, float]]] = None 31 | elapse: Optional[float] = None 32 | 33 | def __len__(self): 34 | if self.img_list is None: 35 | return 0 36 | return len(self.img_list) 37 | 38 | def vis(self, save_path: Optional[Union[str, Path]] = None) -> Optional[np.ndarray]: 39 | if self.img_list is None or self.cls_res is None: 40 | logger.warning("No image or txts to visualize.") 41 | return None 42 | 43 | txts = [f"{txt} {score:.2f}" for txt, score in self.cls_res] 44 | scores = [score for _, score in self.cls_res] 45 | 46 | vis = VisRes() 47 | vis_img = vis.draw_rec_res(self.img_list, txts, scores) 48 | 49 | if save_path is not None: 50 | save_img(save_path, vis_img) 51 | logger.info("Visualization saved as %s", save_path) 52 | return vis_img 53 | 54 | 55 | class ClsPostProcess: 56 | def __init__(self, label_list: List[str]): 57 | self.label_list = label_list 58 | 59 | def __call__(self, preds: np.ndarray) -> List[Tuple[str, float]]: 60 | pred_idxs = preds.argmax(axis=1) 61 | decode_out = [ 62 | (self.label_list[int(idx)], preds[i, int(idx)]) 63 | for i, idx in enumerate(pred_idxs) 64 | ] 65 | return decode_out 66 | -------------------------------------------------------------------------------- /python/rapidocr/ch_ppocr_det/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .main import TextDetector 5 | from .utils import TextDetOutput 6 | -------------------------------------------------------------------------------- /python/rapidocr/ch_ppocr_det/main.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # -*- encoding: utf-8 -*- 15 | # @Author: SWHL 16 | # @Contact: liekkaskono@163.com 17 | import time 18 | from typing import Any, Dict, List 19 | 20 | import numpy as np 21 | 22 | from rapidocr.inference_engine.base import get_engine 23 | 24 | from .utils import DBPostProcess, DetPreProcess, TextDetOutput 25 | 26 | 27 | class TextDetector: 28 | def __init__(self, cfg: Dict[str, Any]): 29 | self.limit_side_len = cfg.get("limit_side_len") 30 | self.limit_type = cfg.get("limit_type") 31 | self.mean = cfg.get("mean") 32 | self.std = cfg.get("std") 33 | self.preprocess_op = None 34 | 35 | post_process = { 36 | "thresh": cfg.get("thresh", 0.3), 37 | "box_thresh": cfg.get("box_thresh", 0.5), 38 | "max_candidates": cfg.get("max_candidates", 1000), 39 | "unclip_ratio": cfg.get("unclip_ratio", 1.6), 40 | "use_dilation": cfg.get("use_dilation", True), 41 | "score_mode": cfg.get("score_mode", "fast"), 42 | } 43 | self.postprocess_op = DBPostProcess(**post_process) 44 | 45 | self.session = get_engine(cfg.engine_type)(cfg) 46 | 47 | def __call__(self, img: np.ndarray) -> TextDetOutput: 48 | start_time = time.perf_counter() 49 | 50 | if img is None: 51 | raise ValueError("img is None") 52 | 53 | ori_img_shape = img.shape[0], img.shape[1] 54 | self.preprocess_op = self.get_preprocess(max(img.shape[0], img.shape[1])) 55 | prepro_img = self.preprocess_op(img) 56 | if prepro_img is None: 57 | return TextDetOutput() 58 | 59 | preds = self.session(prepro_img) 60 | boxes, scores = self.postprocess_op(preds, ori_img_shape) 61 | if len(boxes) < 1: 62 | return TextDetOutput() 63 | 64 | boxes = self.sorted_boxes(boxes) 65 | elapse = time.perf_counter() - start_time 66 | return TextDetOutput(img, boxes, scores, elapse=elapse) 67 | 68 | def get_preprocess(self, max_wh: int) -> DetPreProcess: 69 | if self.limit_type == "min": 70 | limit_side_len = self.limit_side_len 71 | elif max_wh < 960: 72 | limit_side_len = 960 73 | elif max_wh < 1500: 74 | limit_side_len = 1500 75 | else: 76 | limit_side_len = 2000 77 | return DetPreProcess(limit_side_len, self.limit_type, self.mean, self.std) 78 | 79 | @staticmethod 80 | def sorted_boxes(dt_boxes: np.ndarray) -> List[np.ndarray]: 81 | """ 82 | Sort text boxes in order from top to bottom, left to right 83 | args: 84 | dt_boxes(array):detected text boxes with shape [4, 2] 85 | return: 86 | sorted boxes(array) with shape [4, 2] 87 | """ 88 | num_boxes = dt_boxes.shape[0] 89 | sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) 90 | _boxes = list(sorted_boxes) 91 | 92 | for i in range(num_boxes - 1): 93 | for j in range(i, -1, -1): 94 | if ( 95 | abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 96 | and _boxes[j + 1][0][0] < _boxes[j][0][0] 97 | ): 98 | tmp = _boxes[j] 99 | _boxes[j] = _boxes[j + 1] 100 | _boxes[j + 1] = tmp 101 | else: 102 | break 103 | return _boxes 104 | -------------------------------------------------------------------------------- /python/rapidocr/ch_ppocr_rec/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .main import TextRecognizer 5 | from .typings import TextRecInput, TextRecOutput 6 | -------------------------------------------------------------------------------- /python/rapidocr/ch_ppocr_rec/typings.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from dataclasses import dataclass, field 5 | from enum import Enum 6 | from pathlib import Path 7 | from typing import List, Optional, Tuple, Union 8 | 9 | import numpy as np 10 | 11 | from ..utils.logger import Logger 12 | from ..utils.utils import save_img 13 | from ..utils.vis_res import VisRes 14 | 15 | logger = Logger(logger_name=__name__).get_log() 16 | 17 | 18 | @dataclass 19 | class TextRecConfig: 20 | intra_op_num_threads: int = -1 21 | inter_op_num_threads: int = -1 22 | use_cuda: bool = False 23 | use_dml: bool = False 24 | model_path: Union[str, Path, None] = None 25 | 26 | rec_batch_num: int = 6 27 | rec_img_shape: Tuple[int, int, int] = (3, 48, 320) 28 | rec_keys_path: Union[str, Path, None] = None 29 | 30 | 31 | @dataclass 32 | class TextRecInput: 33 | img: Union[np.ndarray, List[np.ndarray], None] = None 34 | return_word_box: bool = False 35 | 36 | 37 | @dataclass 38 | class TextRecOutput: 39 | imgs: Optional[List[np.ndarray]] = None 40 | txts: Optional[Tuple[str]] = None 41 | scores: Tuple[float] = (1.0,) 42 | word_results: Tuple[Tuple[str, float, Optional[List[List[int]]]]] = ( 43 | ("", 1.0, None), 44 | ) 45 | elapse: Optional[float] = None 46 | lang_type: Optional[str] = None 47 | 48 | def __len__(self): 49 | if self.txts is None: 50 | return 0 51 | return len(self.txts) 52 | 53 | def vis(self, save_path: Optional[Union[str, Path]] = None) -> Optional[np.ndarray]: 54 | if self.imgs is None or self.txts is None: 55 | logger.warning("No image or txts to visualize.") 56 | return None 57 | 58 | vis = VisRes() 59 | vis_img = vis.draw_rec_res( 60 | self.imgs, self.txts, self.scores, lang_type=self.lang_type 61 | ) 62 | 63 | if save_path is not None: 64 | save_img(save_path, vis_img) 65 | logger.info("Visualization saved as %s", save_path) 66 | return vis_img 67 | 68 | 69 | class WordType(Enum): 70 | CN = "cn" 71 | EN = "en" 72 | NUM = "num" 73 | EN_NUM = "en&num" 74 | 75 | 76 | @dataclass 77 | class WordInfo: 78 | words: List[List[str]] = field(default_factory=list) 79 | word_cols: List[List[int]] = field(default_factory=list) 80 | word_types: List[WordType] = field(default_factory=list) 81 | line_txt_len: float = 0.0 82 | confs: List[float] = field(default_factory=list) 83 | -------------------------------------------------------------------------------- /python/rapidocr/cli.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import shutil 5 | from pathlib import Path 6 | 7 | 8 | root_dir = Path(__file__).resolve().parent 9 | DEFAULT_CFG_PATH = root_dir / "config.yaml" 10 | 11 | 12 | def generate_cfg(args): 13 | if args.save_cfg_file is None: 14 | args.save_cfg_file = "./default_rapidocr.yaml" 15 | 16 | shutil.copyfile(DEFAULT_CFG_PATH, args.save_cfg_file) 17 | print(f"The config file has saved in {args.save_cfg_file}") 18 | 19 | 20 | def check_install(ocr_engine): 21 | img_url = "https://github.com/RapidAI/RapidOCR/blob/a9bb7c1f44b6e00556ada90ac588f020d7637c4b/python/tests/test_files/ch_en_num.jpg?raw=true" 22 | result = ocr_engine(img_url) 23 | 24 | if result.txts is None or result.txts[0] != "正品促销": 25 | raise ValueError("The installation is incorrect!") 26 | 27 | print("Success! rapidocr is installed correctly!") 28 | -------------------------------------------------------------------------------- /python/rapidocr/config.yaml: -------------------------------------------------------------------------------- 1 | Global: 2 | text_score: 0.5 3 | 4 | use_det: true 5 | use_cls: true 6 | use_rec: true 7 | 8 | min_height: 30 9 | width_height_ratio: 8 10 | max_side_len: 2000 11 | min_side_len: 30 12 | 13 | return_word_box: false 14 | 15 | font_path: null 16 | 17 | EngineConfig: 18 | onnxruntime: 19 | intra_op_num_threads: -1 20 | inter_op_num_threads: -1 21 | enable_cpu_mem_arena: false 22 | use_cuda: false 23 | use_dml: false 24 | 25 | openvino: 26 | inference_num_threads: -1 27 | 28 | paddle: 29 | cpu_math_library_num_threads: -1 30 | use_cuda: false 31 | gpu_id: 0 32 | gpu_mem: 500 33 | 34 | torch: 35 | use_cuda: false 36 | gpu_id: 0 37 | 38 | Det: 39 | engine_type: 'onnxruntime' 40 | lang_type: 'ch' 41 | model_type: 'mobile' 42 | ocr_version: 'PP-OCRv4' 43 | 44 | task_type: 'det' 45 | 46 | model_path: null 47 | model_dir: null 48 | 49 | limit_side_len: 736 50 | limit_type: min 51 | std: [ 0.5, 0.5, 0.5 ] 52 | mean: [ 0.5, 0.5, 0.5 ] 53 | 54 | thresh: 0.3 55 | box_thresh: 0.5 56 | max_candidates: 1000 57 | unclip_ratio: 1.6 58 | use_dilation: true 59 | score_mode: fast 60 | 61 | Cls: 62 | engine_type: 'onnxruntime' 63 | lang_type: 'ch' 64 | model_type: 'mobile' 65 | ocr_version: 'PP-OCRv4' 66 | 67 | task_type: 'cls' 68 | 69 | model_path: null 70 | model_dir: null 71 | 72 | cls_image_shape: [3, 48, 192] 73 | cls_batch_num: 6 74 | cls_thresh: 0.9 75 | label_list: ['0', '180'] 76 | 77 | Rec: 78 | engine_type: 'onnxruntime' 79 | lang_type: 'ch' 80 | model_type: 'mobile' 81 | ocr_version: 'PP-OCRv4' 82 | 83 | task_type: 'rec' 84 | 85 | model_path: null 86 | model_dir: null 87 | 88 | rec_keys_path: null 89 | rec_img_shape: [3, 48, 320] 90 | rec_batch_num: 6 91 | -------------------------------------------------------------------------------- /python/rapidocr/inference_engine/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | -------------------------------------------------------------------------------- /python/rapidocr/inference_engine/base.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import abc 5 | from dataclasses import dataclass 6 | from enum import Enum 7 | from pathlib import Path 8 | from typing import Dict, Union 9 | 10 | import numpy as np 11 | from omegaconf import OmegaConf 12 | 13 | from ..utils.logger import Logger 14 | from ..utils.typings import EngineType, ModelType, OCRVersion, TaskType 15 | from ..utils.utils import import_package 16 | 17 | cur_dir = Path(__file__).resolve().parent.parent 18 | MODEL_URL_PATH = cur_dir / "default_models.yaml" 19 | 20 | logger = Logger(logger_name=__name__).get_log() 21 | 22 | 23 | def get_engine(engine_type: EngineType): 24 | logger.info("Using engine_name: %s", engine_type.value) 25 | 26 | if engine_type == EngineType.ONNXRUNTIME: 27 | if not import_package(engine_type.value): 28 | raise ImportError(f"{engine_type.value} is not installed.") 29 | 30 | from .onnxruntime import OrtInferSession 31 | 32 | return OrtInferSession 33 | 34 | if engine_type == EngineType.OPENVINO: 35 | if not import_package(engine_type.value): 36 | raise ImportError(f"{engine_type.value} is not installed") 37 | 38 | from .openvino import OpenVINOInferSession 39 | 40 | return OpenVINOInferSession 41 | 42 | if engine_type == EngineType.PADDLE: 43 | if not import_package(engine_type.value): 44 | raise ImportError(f"{engine_type.value} is not installed") 45 | 46 | from .paddle import PaddleInferSession 47 | 48 | return PaddleInferSession 49 | 50 | if engine_type == EngineType.TORCH: 51 | if not import_package(engine_type.value): 52 | raise ImportError(f"{engine_type.value} is not installed") 53 | 54 | from .torch import TorchInferSession 55 | 56 | return TorchInferSession 57 | 58 | raise ValueError(f"Unsupported engine: {engine_type.value}") 59 | 60 | 61 | @dataclass 62 | class FileInfo: 63 | engine_type: EngineType 64 | ocr_version: OCRVersion 65 | task_type: TaskType 66 | lang_type: Enum 67 | model_type: ModelType 68 | 69 | 70 | class InferSession(abc.ABC): 71 | model_info = OmegaConf.load(MODEL_URL_PATH) 72 | DEFAULT_MODEL_PATH = cur_dir / "models" 73 | logger = Logger(logger_name=__name__).get_log() 74 | 75 | @abc.abstractmethod 76 | def __init__(self, config): 77 | pass 78 | 79 | @abc.abstractmethod 80 | def __call__(self, input_content: np.ndarray) -> np.ndarray: 81 | pass 82 | 83 | @staticmethod 84 | def _verify_model(model_path: Union[str, Path, None]): 85 | if model_path is None: 86 | raise ValueError("model_path is None!") 87 | 88 | model_path = Path(model_path) 89 | if not model_path.exists(): 90 | raise FileNotFoundError(f"{model_path} does not exists.") 91 | 92 | if not model_path.is_file(): 93 | raise FileExistsError(f"{model_path} is not a file.") 94 | 95 | @abc.abstractmethod 96 | def have_key(self, key: str = "character") -> bool: 97 | pass 98 | 99 | @classmethod 100 | def get_model_url(cls, file_info: FileInfo) -> Dict[str, str]: 101 | model_dict = OmegaConf.select( 102 | cls.model_info, 103 | f"{file_info.engine_type.value}.{file_info.ocr_version.value}.{file_info.task_type.value}", 104 | ) 105 | 106 | # 优先查找 server 模型 107 | if file_info.model_type == ModelType.SERVER: 108 | for k in model_dict: 109 | if ( 110 | k.startswith(file_info.lang_type.value) 111 | and file_info.model_type.value in k 112 | ): 113 | return model_dict[k] 114 | 115 | for k in model_dict: 116 | if k.startswith(file_info.lang_type.value): 117 | return model_dict[k] 118 | 119 | raise KeyError("File not found") 120 | 121 | @classmethod 122 | def get_dict_key_url(cls, file_info: FileInfo) -> str: 123 | model_dict = cls.get_model_url(file_info) 124 | return model_dict["dict_url"] 125 | -------------------------------------------------------------------------------- /python/rapidocr/inference_engine/openvino.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import os 5 | import traceback 6 | from pathlib import Path 7 | 8 | import numpy as np 9 | from omegaconf import DictConfig 10 | from openvino.runtime import Core 11 | 12 | from ..utils import Logger 13 | from ..utils.download_file import DownloadFile, DownloadFileInput 14 | from .base import FileInfo, InferSession 15 | 16 | 17 | class OpenVINOInferSession(InferSession): 18 | def __init__(self, cfg: DictConfig): 19 | super().__init__(cfg) 20 | self.logger = Logger(logger_name=__name__).get_log() 21 | 22 | core = Core() 23 | 24 | model_path = cfg.get("model_path", None) 25 | if model_path is None: 26 | model_info = self.get_model_url( 27 | FileInfo( 28 | engine_type=cfg.engine_type, 29 | ocr_version=cfg.ocr_version, 30 | task_type=cfg.task_type, 31 | lang_type=cfg.lang_type, 32 | model_type=cfg.model_type, 33 | ) 34 | ) 35 | model_path = self.DEFAULT_MODEL_PATH / Path(model_info["model_dir"]).name 36 | download_params = DownloadFileInput( 37 | file_url=model_info["model_dir"], 38 | sha256=model_info["SHA256"], 39 | save_path=model_path, 40 | logger=self.logger, 41 | ) 42 | DownloadFile.run(download_params) 43 | 44 | self.logger.info(f"Using {model_path}") 45 | model_path = Path(model_path) 46 | self._verify_model(model_path) 47 | 48 | cpu_nums = os.cpu_count() 49 | infer_num_threads = cfg.get("inference_num_threads", -1) 50 | if infer_num_threads != -1 and 1 <= infer_num_threads <= cpu_nums: 51 | core.set_property("CPU", {"INFERENCE_NUM_THREADS": str(infer_num_threads)}) 52 | 53 | model_onnx = core.read_model(model_path) 54 | compile_model = core.compile_model(model=model_onnx, device_name="CPU") 55 | self.session = compile_model.create_infer_request() 56 | 57 | def __call__(self, input_content: np.ndarray) -> np.ndarray: 58 | try: 59 | self.session.infer(inputs=[input_content]) 60 | return self.session.get_output_tensor().data 61 | except Exception as e: 62 | error_info = traceback.format_exc() 63 | raise OpenVIONError(error_info) from e 64 | 65 | def have_key(self, key: str = "character") -> bool: 66 | return False 67 | 68 | 69 | class OpenVIONError(Exception): 70 | pass 71 | -------------------------------------------------------------------------------- /python/rapidocr/inference_engine/torch.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from pathlib import Path 5 | 6 | import numpy as np 7 | import torch 8 | from omegaconf import OmegaConf 9 | 10 | from ..networks.architectures.base_model import BaseModel 11 | from ..utils.download_file import DownloadFile, DownloadFileInput 12 | from ..utils.logger import Logger 13 | from .base import FileInfo, InferSession 14 | 15 | root_dir = Path(__file__).resolve().parent.parent 16 | DEFAULT_CFG_PATH = root_dir / "networks" / "arch_config.yaml" 17 | 18 | 19 | class TorchInferSession(InferSession): 20 | def __init__(self, cfg) -> None: 21 | self.logger = Logger(logger_name=__name__).get_log() 22 | 23 | model_path = cfg.get("model_path", None) 24 | if model_path is None: 25 | model_info = self.get_model_url( 26 | FileInfo( 27 | engine_type=cfg.engine_type, 28 | ocr_version=cfg.ocr_version, 29 | task_type=cfg.task_type, 30 | lang_type=cfg.lang_type, 31 | model_type=cfg.model_type, 32 | ) 33 | ) 34 | default_model_url = model_info["model_dir"] 35 | model_path = self.DEFAULT_MODEL_PATH / Path(default_model_url).name 36 | DownloadFile.run( 37 | DownloadFileInput( 38 | file_url=default_model_url, 39 | sha256=model_info["SHA256"], 40 | save_path=model_path, 41 | logger=self.logger, 42 | ) 43 | ) 44 | 45 | self.logger.info(f"Using {model_path}") 46 | model_path = Path(model_path) 47 | self._verify_model(model_path) 48 | 49 | all_arch_config = OmegaConf.load(DEFAULT_CFG_PATH) 50 | file_name = model_path.stem 51 | if file_name not in all_arch_config: 52 | raise ValueError(f"architecture {file_name} is not in arch_config.yaml") 53 | 54 | arch_config = all_arch_config.get(file_name) 55 | self.predictor = BaseModel(arch_config) 56 | self.predictor.load_state_dict(torch.load(model_path, weights_only=True)) 57 | self.predictor.eval() 58 | 59 | self.use_gpu = False 60 | if cfg.engine_cfg.use_cuda: 61 | self.device = torch.device(f"cuda:{cfg.engine_cfg.gpu_id}") 62 | self.predictor.to(self.device) 63 | self.use_gpu = True 64 | 65 | def __call__(self, img: np.ndarray): 66 | with torch.no_grad(): 67 | inp = torch.from_numpy(img) 68 | if self.use_gpu: 69 | inp = inp.to(self.device) 70 | 71 | # 适配跟onnx对齐取值逻辑 72 | outputs = self.predictor(inp).cpu().numpy() 73 | return outputs 74 | 75 | def have_key(self, key: str = "character") -> bool: 76 | return False 77 | 78 | 79 | class TorchInferError(Exception): 80 | pass 81 | -------------------------------------------------------------------------------- /python/rapidocr/models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr/models/.gitkeep -------------------------------------------------------------------------------- /python/rapidocr/networks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr/networks/__init__.py -------------------------------------------------------------------------------- /python/rapidocr/networks/architectures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import copy 16 | 17 | __all__ = ["build_model"] 18 | 19 | 20 | def build_model(config, **kwargs): 21 | from .base_model import BaseModel 22 | 23 | config = copy.deepcopy(config) 24 | module_class = BaseModel(config, **kwargs) 25 | return module_class 26 | -------------------------------------------------------------------------------- /python/rapidocr/networks/architectures/base_model.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from ..backbones import build_backbone 4 | from ..heads import build_head 5 | from ..necks import build_neck 6 | 7 | 8 | class BaseModel(nn.Module): 9 | def __init__(self, config, **kwargs): 10 | """ 11 | the module for OCR. 12 | args: 13 | config (dict): the super parameters for module. 14 | """ 15 | super(BaseModel, self).__init__() 16 | 17 | in_channels = config.get("in_channels", 3) 18 | model_type = config["model_type"] 19 | # build backbone, backbone is need for del, rec and cls 20 | if "Backbone" not in config or config["Backbone"] is None: 21 | self.use_backbone = False 22 | else: 23 | self.use_backbone = True 24 | config["Backbone"]["in_channels"] = in_channels 25 | self.backbone = build_backbone(config["Backbone"], model_type) 26 | in_channels = self.backbone.out_channels 27 | 28 | # build neck 29 | # for rec, neck can be cnn,rnn or reshape(None) 30 | # for det, neck can be FPN, BIFPN and so on. 31 | # for cls, neck should be none 32 | if "Neck" not in config or config["Neck"] is None: 33 | self.use_neck = False 34 | else: 35 | self.use_neck = True 36 | config["Neck"]["in_channels"] = in_channels 37 | self.neck = build_neck(config["Neck"]) 38 | in_channels = self.neck.out_channels 39 | 40 | # # build head, head is need for det, rec and cls 41 | if "Head" not in config or config["Head"] is None: 42 | self.use_head = False 43 | else: 44 | self.use_head = True 45 | config["Head"]["in_channels"] = in_channels 46 | self.head = build_head(config["Head"], **kwargs) 47 | 48 | self.return_all_feats = config.get("return_all_feats", False) 49 | 50 | self._initialize_weights() 51 | 52 | def _initialize_weights(self): 53 | # weight initialization 54 | for m in self.modules(): 55 | if isinstance(m, nn.Conv2d): 56 | nn.init.kaiming_normal_(m.weight, mode="fan_out") 57 | if m.bias is not None: 58 | nn.init.zeros_(m.bias) 59 | elif isinstance(m, nn.BatchNorm2d): 60 | nn.init.ones_(m.weight) 61 | nn.init.zeros_(m.bias) 62 | elif isinstance(m, nn.Linear): 63 | nn.init.normal_(m.weight, 0, 0.01) 64 | if m.bias is not None: 65 | nn.init.zeros_(m.bias) 66 | elif isinstance(m, nn.ConvTranspose2d): 67 | nn.init.kaiming_normal_(m.weight, mode="fan_out") 68 | if m.bias is not None: 69 | nn.init.zeros_(m.bias) 70 | 71 | def forward(self, x): 72 | y = dict() 73 | if self.use_backbone: 74 | x = self.backbone(x) 75 | if isinstance(x, dict): 76 | y.update(x) 77 | else: 78 | y["backbone_out"] = x 79 | final_name = "backbone_out" 80 | if self.use_neck: 81 | x = self.neck(x) 82 | if isinstance(x, dict): 83 | y.update(x) 84 | else: 85 | y["neck_out"] = x 86 | final_name = "neck_out" 87 | if self.use_head: 88 | x = self.head(x) 89 | # for multi head, save ctc neck out for udml 90 | if isinstance(x, dict) and "ctc_nect" in x.keys(): 91 | y["neck_out"] = x["ctc_neck"] 92 | y["head_out"] = x 93 | elif isinstance(x, dict): 94 | y.update(x) 95 | else: 96 | y["head_out"] = x 97 | if self.return_all_feats: 98 | if self.training: 99 | return y 100 | elif isinstance(x, dict): 101 | return x 102 | else: 103 | return {final_name: x} 104 | else: 105 | return x 106 | -------------------------------------------------------------------------------- /python/rapidocr/networks/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __all__ = ["build_backbone"] 16 | 17 | 18 | def build_backbone(config, model_type): 19 | if model_type == "det": 20 | from .det_mobilenet_v3 import MobileNetV3 21 | from .rec_hgnet import PPHGNet_small 22 | from .rec_lcnetv3 import PPLCNetV3 23 | 24 | support_dict = [ 25 | "MobileNetV3", 26 | "ResNet", 27 | "ResNet_vd", 28 | "ResNet_SAST", 29 | "PPLCNetV3", 30 | "PPHGNet_small", 31 | ] 32 | elif model_type == "rec" or model_type == "cls": 33 | from .rec_hgnet import PPHGNet_small 34 | from .rec_lcnetv3 import PPLCNetV3 35 | from .rec_mobilenet_v3 import MobileNetV3 36 | from .rec_svtrnet import SVTRNet 37 | from .rec_mv1_enhance import MobileNetV1Enhance 38 | 39 | support_dict = [ 40 | "MobileNetV1Enhance", 41 | "MobileNetV3", 42 | "ResNet", 43 | "ResNetFPN", 44 | "MTB", 45 | "ResNet31", 46 | "SVTRNet", 47 | "ViTSTR", 48 | "DenseNet", 49 | "PPLCNetV3", 50 | "PPHGNet_small", 51 | ] 52 | else: 53 | raise NotImplementedError 54 | 55 | module_name = config.pop("name") 56 | assert module_name in support_dict, Exception( 57 | "when model typs is {}, backbone only support {}".format( 58 | model_type, support_dict 59 | ) 60 | ) 61 | module_class = eval(module_name)(**config) 62 | return module_class 63 | -------------------------------------------------------------------------------- /python/rapidocr/networks/common.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch import nn 4 | 5 | 6 | class Hswish(nn.Module): 7 | def __init__(self, inplace=True): 8 | super(Hswish, self).__init__() 9 | self.inplace = inplace 10 | 11 | def forward(self, x): 12 | return x * F.relu6(x + 3.0, inplace=self.inplace) / 6.0 13 | 14 | 15 | # out = max(0, min(1, slop*x+offset)) 16 | # paddle.fluid.layers.hard_sigmoid(x, slope=0.2, offset=0.5, name=None) 17 | class Hsigmoid(nn.Module): 18 | def __init__(self, inplace=True): 19 | super(Hsigmoid, self).__init__() 20 | self.inplace = inplace 21 | 22 | def forward(self, x): 23 | # torch: F.relu6(x + 3., inplace=self.inplace) / 6. 24 | # paddle: F.relu6(1.2 * x + 3., inplace=self.inplace) / 6. 25 | return F.relu6(1.2 * x + 3.0, inplace=self.inplace) / 6.0 26 | 27 | 28 | class GELU(nn.Module): 29 | def __init__(self, inplace=True): 30 | super(GELU, self).__init__() 31 | self.inplace = inplace 32 | 33 | def forward(self, x): 34 | return torch.nn.functional.gelu(x) 35 | 36 | 37 | class Swish(nn.Module): 38 | def __init__(self, inplace=True): 39 | super(Swish, self).__init__() 40 | self.inplace = inplace 41 | 42 | def forward(self, x): 43 | if self.inplace: 44 | x.mul_(torch.sigmoid(x)) 45 | return x 46 | else: 47 | return x * torch.sigmoid(x) 48 | 49 | 50 | class Activation(nn.Module): 51 | def __init__(self, act_type, inplace=True): 52 | super(Activation, self).__init__() 53 | act_type = act_type.lower() 54 | if act_type == "relu": 55 | self.act = nn.ReLU(inplace=inplace) 56 | elif act_type == "relu6": 57 | self.act = nn.ReLU6(inplace=inplace) 58 | elif act_type == "sigmoid": 59 | raise NotImplementedError 60 | elif act_type == "hard_sigmoid": 61 | self.act = Hsigmoid( 62 | inplace 63 | ) # nn.Hardsigmoid(inplace=inplace)#Hsigmoid(inplace)# 64 | elif act_type == "hard_swish" or act_type == "hswish": 65 | self.act = Hswish(inplace=inplace) 66 | elif act_type == "leakyrelu": 67 | self.act = nn.LeakyReLU(inplace=inplace) 68 | elif act_type == "gelu": 69 | self.act = GELU(inplace=inplace) 70 | elif act_type == "swish": 71 | self.act = Swish(inplace=inplace) 72 | else: 73 | raise NotImplementedError 74 | 75 | def forward(self, inputs): 76 | return self.act(inputs) 77 | -------------------------------------------------------------------------------- /python/rapidocr/networks/heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __all__ = ["build_head"] 16 | 17 | 18 | def build_head(config, **kwargs): 19 | # det head 20 | from .det_db_head import DBHead, PFHeadLocal 21 | 22 | # rec head 23 | from .rec_ctc_head import CTCHead 24 | from .rec_multi_head import MultiHead 25 | 26 | # cls head 27 | from .cls_head import ClsHead 28 | 29 | support_dict = [ 30 | "DBHead", 31 | "CTCHead", 32 | "ClsHead", 33 | "MultiHead", 34 | "PFHeadLocal", 35 | ] 36 | 37 | module_name = config.pop("name") 38 | char_num = config.pop("char_num", 6625) 39 | assert module_name in support_dict, Exception( 40 | "head only support {}".format(support_dict) 41 | ) 42 | module_class = eval(module_name)(**config, **kwargs) 43 | return module_class 44 | -------------------------------------------------------------------------------- /python/rapidocr/networks/heads/cls_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch import nn 4 | 5 | 6 | class ClsHead(nn.Module): 7 | """ 8 | Class orientation 9 | Args: 10 | params(dict): super parameters for build Class network 11 | """ 12 | 13 | def __init__(self, in_channels, class_dim, **kwargs): 14 | super(ClsHead, self).__init__() 15 | self.pool = nn.AdaptiveAvgPool2d(1) 16 | self.fc = nn.Linear(in_channels, class_dim, bias=True) 17 | 18 | def forward(self, x): 19 | x = self.pool(x) 20 | x = torch.reshape(x, shape=[x.shape[0], x.shape[1]]) 21 | x = self.fc(x) 22 | x = F.softmax(x, dim=1) 23 | return x 24 | -------------------------------------------------------------------------------- /python/rapidocr/networks/heads/rec_ctc_head.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | from torch import nn 3 | 4 | 5 | class CTCHead(nn.Module): 6 | def __init__( 7 | self, 8 | in_channels, 9 | out_channels=6625, 10 | fc_decay=0.0004, 11 | mid_channels=None, 12 | return_feats=False, 13 | **kwargs 14 | ): 15 | super(CTCHead, self).__init__() 16 | if mid_channels is None: 17 | self.fc = nn.Linear( 18 | in_channels, 19 | out_channels, 20 | bias=True, 21 | ) 22 | else: 23 | self.fc1 = nn.Linear( 24 | in_channels, 25 | mid_channels, 26 | bias=True, 27 | ) 28 | self.fc2 = nn.Linear( 29 | mid_channels, 30 | out_channels, 31 | bias=True, 32 | ) 33 | 34 | self.out_channels = out_channels 35 | self.mid_channels = mid_channels 36 | self.return_feats = return_feats 37 | 38 | def forward(self, x, labels=None): 39 | if self.mid_channels is None: 40 | predicts = self.fc(x) 41 | else: 42 | x = self.fc1(x) 43 | predicts = self.fc2(x) 44 | 45 | if self.return_feats: 46 | result = (x, predicts) 47 | else: 48 | result = predicts 49 | 50 | if not self.training: 51 | predicts = F.softmax(predicts, dim=2) 52 | result = predicts 53 | 54 | return result 55 | -------------------------------------------------------------------------------- /python/rapidocr/networks/heads/rec_multi_head.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from ..necks.rnn import Im2Seq, SequenceEncoder 4 | from .rec_ctc_head import CTCHead 5 | 6 | 7 | class FCTranspose(nn.Module): 8 | def __init__(self, in_channels, out_channels, only_transpose=False): 9 | super().__init__() 10 | self.only_transpose = only_transpose 11 | if not self.only_transpose: 12 | self.fc = nn.Linear(in_channels, out_channels, bias=False) 13 | 14 | def forward(self, x): 15 | if self.only_transpose: 16 | return x.permute([0, 2, 1]) 17 | else: 18 | return self.fc(x.permute([0, 2, 1])) 19 | 20 | 21 | class MultiHead(nn.Module): 22 | def __init__(self, in_channels, out_channels_list, **kwargs): 23 | super().__init__() 24 | self.head_list = kwargs.pop("head_list") 25 | 26 | self.gtc_head = "sar" 27 | assert len(self.head_list) >= 2 28 | for idx, head_name in enumerate(self.head_list): 29 | name = list(head_name)[0] 30 | if name == "SARHead": 31 | pass 32 | 33 | elif name == "NRTRHead": 34 | pass 35 | elif name == "CTCHead": 36 | # ctc neck 37 | self.encoder_reshape = Im2Seq(in_channels) 38 | neck_args = self.head_list[idx][name]["Neck"] 39 | encoder_type = neck_args.pop("name") 40 | self.ctc_encoder = SequenceEncoder( 41 | in_channels=in_channels, encoder_type=encoder_type, **neck_args 42 | ) 43 | # ctc head 44 | head_args = self.head_list[idx][name].get("Head", {}) 45 | if head_args is None: 46 | head_args = {} 47 | 48 | self.ctc_head = CTCHead( 49 | in_channels=self.ctc_encoder.out_channels, 50 | out_channels=out_channels_list["CTCLabelDecode"], 51 | **head_args, 52 | ) 53 | else: 54 | raise NotImplementedError(f"{name} is not supported in MultiHead yet") 55 | 56 | def forward(self, x, data=None): 57 | ctc_encoder = self.ctc_encoder(x) 58 | return self.ctc_head(ctc_encoder) 59 | -------------------------------------------------------------------------------- /python/rapidocr/networks/necks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __all__ = ["build_neck"] 16 | 17 | 18 | def build_neck(config): 19 | from .db_fpn import DBFPN, LKPAN, RSEFPN 20 | from .rnn import SequenceEncoder 21 | 22 | support_dict = ["DBFPN", "SequenceEncoder", "RSEFPN", "LKPAN"] 23 | 24 | module_name = config.pop("name") 25 | assert module_name in support_dict, Exception( 26 | "neck only support {}".format(support_dict) 27 | ) 28 | module_class = eval(module_name)(**config) 29 | return module_class 30 | -------------------------------------------------------------------------------- /python/rapidocr/networks/necks/intracl.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | class IntraCLBlock(nn.Module): 5 | def __init__(self, in_channels=96, reduce_factor=4): 6 | super(IntraCLBlock, self).__init__() 7 | self.channels = in_channels 8 | self.rf = reduce_factor 9 | self.conv1x1_reduce_channel = nn.Conv2d( 10 | self.channels, self.channels // self.rf, kernel_size=1, stride=1, padding=0 11 | ) 12 | self.conv1x1_return_channel = nn.Conv2d( 13 | self.channels // self.rf, self.channels, kernel_size=1, stride=1, padding=0 14 | ) 15 | 16 | self.v_layer_7x1 = nn.Conv2d( 17 | self.channels // self.rf, 18 | self.channels // self.rf, 19 | kernel_size=(7, 1), 20 | stride=(1, 1), 21 | padding=(3, 0), 22 | ) 23 | self.v_layer_5x1 = nn.Conv2d( 24 | self.channels // self.rf, 25 | self.channels // self.rf, 26 | kernel_size=(5, 1), 27 | stride=(1, 1), 28 | padding=(2, 0), 29 | ) 30 | self.v_layer_3x1 = nn.Conv2d( 31 | self.channels // self.rf, 32 | self.channels // self.rf, 33 | kernel_size=(3, 1), 34 | stride=(1, 1), 35 | padding=(1, 0), 36 | ) 37 | 38 | self.q_layer_1x7 = nn.Conv2d( 39 | self.channels // self.rf, 40 | self.channels // self.rf, 41 | kernel_size=(1, 7), 42 | stride=(1, 1), 43 | padding=(0, 3), 44 | ) 45 | self.q_layer_1x5 = nn.Conv2d( 46 | self.channels // self.rf, 47 | self.channels // self.rf, 48 | kernel_size=(1, 5), 49 | stride=(1, 1), 50 | padding=(0, 2), 51 | ) 52 | self.q_layer_1x3 = nn.Conv2d( 53 | self.channels // self.rf, 54 | self.channels // self.rf, 55 | kernel_size=(1, 3), 56 | stride=(1, 1), 57 | padding=(0, 1), 58 | ) 59 | 60 | # base 61 | self.c_layer_7x7 = nn.Conv2d( 62 | self.channels // self.rf, 63 | self.channels // self.rf, 64 | kernel_size=(7, 7), 65 | stride=(1, 1), 66 | padding=(3, 3), 67 | ) 68 | self.c_layer_5x5 = nn.Conv2d( 69 | self.channels // self.rf, 70 | self.channels // self.rf, 71 | kernel_size=(5, 5), 72 | stride=(1, 1), 73 | padding=(2, 2), 74 | ) 75 | self.c_layer_3x3 = nn.Conv2d( 76 | self.channels // self.rf, 77 | self.channels // self.rf, 78 | kernel_size=(3, 3), 79 | stride=(1, 1), 80 | padding=(1, 1), 81 | ) 82 | 83 | self.bn = nn.BatchNorm2d(self.channels) 84 | self.relu = nn.ReLU() 85 | 86 | def forward(self, x): 87 | x_new = self.conv1x1_reduce_channel(x) 88 | 89 | x_7_c = self.c_layer_7x7(x_new) 90 | x_7_v = self.v_layer_7x1(x_new) 91 | x_7_q = self.q_layer_1x7(x_new) 92 | x_7 = x_7_c + x_7_v + x_7_q 93 | 94 | x_5_c = self.c_layer_5x5(x_7) 95 | x_5_v = self.v_layer_5x1(x_7) 96 | x_5_q = self.q_layer_1x5(x_7) 97 | x_5 = x_5_c + x_5_v + x_5_q 98 | 99 | x_3_c = self.c_layer_3x3(x_5) 100 | x_3_v = self.v_layer_3x1(x_5) 101 | x_3_q = self.q_layer_1x3(x_5) 102 | x_3 = x_3_c + x_3_v + x_3_q 103 | 104 | x_relation = self.conv1x1_return_channel(x_3) 105 | 106 | x_relation = self.bn(x_relation) 107 | x_relation = self.relu(x_relation) 108 | 109 | return x + x_relation 110 | 111 | 112 | def build_intraclblock_list(num_block): 113 | IntraCLBlock_list = nn.ModuleList() 114 | for i in range(num_block): 115 | IntraCLBlock_list.append(IntraCLBlock()) 116 | 117 | return IntraCLBlock_list 118 | -------------------------------------------------------------------------------- /python/rapidocr/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .download_file import DownloadFile, DownloadFileException, DownloadFileInput 5 | from .load_image import LoadImage, LoadImageError 6 | from .logger import Logger 7 | from .output import RapidOCROutput 8 | from .parse_parameters import ParseParams 9 | from .process_img import add_round_letterbox, increase_min_side, reduce_max_side 10 | from .vis_res import VisRes 11 | -------------------------------------------------------------------------------- /python/rapidocr/utils/download_file.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import logging 5 | import sys 6 | from dataclasses import dataclass 7 | from pathlib import Path 8 | from typing import Optional, Union 9 | 10 | import requests 11 | from tqdm import tqdm 12 | 13 | from .utils import get_file_sha256 14 | 15 | 16 | @dataclass 17 | class DownloadFileInput: 18 | file_url: str 19 | save_path: Union[str, Path] 20 | logger: logging.Logger 21 | sha256: Optional[str] = None 22 | 23 | 24 | class DownloadFile: 25 | BLOCK_SIZE = 1024 # 1 KiB 26 | REQUEST_TIMEOUT = 60 27 | 28 | @classmethod 29 | def run(cls, input_params: DownloadFileInput): 30 | save_path = Path(input_params.save_path) 31 | 32 | logger = input_params.logger 33 | cls._ensure_parent_dir_exists(save_path) 34 | if cls._should_skip_download(save_path, input_params.sha256, logger): 35 | return 36 | 37 | response = cls._make_http_request(input_params.file_url, logger) 38 | cls._save_response_with_progress(response, save_path, logger) 39 | 40 | @staticmethod 41 | def _ensure_parent_dir_exists(path: Path): 42 | path.parent.mkdir(parents=True, exist_ok=True) 43 | 44 | @classmethod 45 | def _should_skip_download( 46 | cls, path: Path, expected_sha256: Optional[str], logger: logging.Logger 47 | ) -> bool: 48 | if not path.exists(): 49 | return False 50 | 51 | if expected_sha256 is None: 52 | logger.info("File exists (no checksum verification): %s", path) 53 | return True 54 | 55 | if cls.check_file_sha256(path, expected_sha256): 56 | logger.info("File exists and is valid: %s", path) 57 | return True 58 | 59 | logger.warning("File exists but is invalid, redownloading: %s", path) 60 | return False 61 | 62 | @classmethod 63 | def _make_http_request(cls, url: str, logger: logging.Logger) -> requests.Response: 64 | logger.info("Initiating download: %s", url) 65 | try: 66 | response = requests.get(url, stream=True, timeout=cls.REQUEST_TIMEOUT) 67 | response.raise_for_status() # Raises HTTPError for 4XX/5XX 68 | return response 69 | except requests.RequestException as e: 70 | logger.error("Download failed: %s", url) 71 | raise DownloadFileException(f"Failed to download {url}") from e 72 | 73 | @classmethod 74 | def _save_response_with_progress( 75 | cls, response: requests.Response, save_path: Path, logger: logging.Logger 76 | ) -> None: 77 | total_size = int(response.headers.get("content-length", 0)) 78 | logger.info("Download size: %.2fMB", total_size / 1024 / 1024) 79 | 80 | with ( 81 | tqdm( 82 | total=total_size, 83 | unit="iB", 84 | unit_scale=True, 85 | disable=not cls.check_is_atty(), 86 | ) as progress_bar, 87 | open(save_path, "wb") as output_file, 88 | ): 89 | for chunk in response.iter_content(chunk_size=cls.BLOCK_SIZE): 90 | progress_bar.update(len(chunk)) 91 | output_file.write(chunk) 92 | 93 | logger.info("Successfully saved to: %s", save_path) 94 | 95 | @staticmethod 96 | def check_file_sha256(file_path: Union[str, Path], gt_sha256: str) -> bool: 97 | return get_file_sha256(file_path) == gt_sha256 98 | 99 | @staticmethod 100 | def check_is_atty() -> bool: 101 | try: 102 | is_interactive = sys.stderr.isatty() 103 | except AttributeError: 104 | return False 105 | return is_interactive 106 | 107 | 108 | class DownloadFileException(Exception): 109 | pass 110 | -------------------------------------------------------------------------------- /python/rapidocr/utils/logger.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import logging 5 | 6 | import colorlog 7 | 8 | 9 | class Logger: 10 | def __init__(self, log_level=logging.DEBUG, logger_name=None): 11 | self.logger = logging.getLogger(logger_name) 12 | self.logger.setLevel(log_level) 13 | self.logger.propagate = False 14 | 15 | formatter = colorlog.ColoredFormatter( 16 | "%(log_color)s[%(levelname)s] %(asctime)s [RapidOCR] %(filename)s:%(lineno)d: %(message)s", 17 | log_colors={ 18 | "DEBUG": "cyan", 19 | "INFO": "green", 20 | "WARNING": "yellow", 21 | "ERROR": "red", 22 | "CRITICAL": "red,bg_white", 23 | }, 24 | ) 25 | 26 | if not self.logger.handlers: 27 | console_handler = logging.StreamHandler() 28 | console_handler.setFormatter(formatter) 29 | 30 | for handler in self.logger.handlers: 31 | self.logger.removeHandler(handler) 32 | 33 | console_handler.setLevel(log_level) 34 | self.logger.addHandler(console_handler) 35 | 36 | def get_log(self): 37 | return self.logger 38 | -------------------------------------------------------------------------------- /python/rapidocr/utils/output.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from dataclasses import dataclass, field 5 | from typing import List, Optional, Tuple, Union 6 | 7 | import numpy as np 8 | 9 | from .logger import Logger 10 | from .utils import save_img 11 | from .vis_res import VisRes 12 | 13 | logger = Logger(logger_name=__name__).get_log() 14 | 15 | 16 | @dataclass 17 | class RapidOCROutput: 18 | img: Optional[np.ndarray] = None 19 | boxes: Optional[np.ndarray] = None 20 | txts: Optional[Tuple[str]] = None 21 | scores: Optional[Tuple[float]] = None 22 | word_results: Tuple[Tuple[str, float, Optional[List[List[int]]]]] = ( 23 | ("", 1.0, None), 24 | ) 25 | elapse_list: List[Union[float, None]] = field(default_factory=list) 26 | elapse: float = field(init=False) 27 | lang_type: Optional[str] = None 28 | 29 | def __post_init__(self): 30 | self.elapse = sum(v for v in self.elapse_list if isinstance(v, float)) 31 | 32 | def __len__(self): 33 | if self.txts is None: 34 | return 0 35 | return len(self.txts) 36 | 37 | def to_json(self): 38 | pass 39 | 40 | def vis(self, save_path: Optional[str] = None, font_path: Optional[str] = None): 41 | if self.img is None or self.boxes is None: 42 | logger.warning("No image or boxes to visualize.") 43 | return 44 | 45 | vis = VisRes() 46 | if all(v is None for v in self.word_results): 47 | vis_img = vis( 48 | self.img, 49 | self.boxes, 50 | self.txts, 51 | self.scores, 52 | font_path=font_path, 53 | lang_type=self.lang_type, 54 | ) 55 | 56 | if save_path is not None: 57 | save_img(save_path, vis_img) 58 | logger.info("Visualization saved as %s", save_path) 59 | return vis_img 60 | 61 | # single word vis 62 | words_results = self.word_results 63 | words, words_scores, words_boxes = list(zip(*words_results)) 64 | vis_img = vis( 65 | self.img, 66 | words_boxes, 67 | words, 68 | words_scores, 69 | font_path=font_path, 70 | lang_type=self.lang_type, 71 | ) 72 | 73 | if save_path is not None: 74 | save_img(save_path, vis_img) 75 | logger.info("Single word visualization saved as %s", save_path) 76 | return vis_img 77 | -------------------------------------------------------------------------------- /python/rapidocr/utils/parse_parameters.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from enum import Enum 5 | from pathlib import Path 6 | from typing import Any, Dict, Union 7 | 8 | from omegaconf import DictConfig, OmegaConf 9 | 10 | from .typings import ( 11 | EngineType, 12 | LangCls, 13 | LangDet, 14 | LangRec, 15 | ModelType, 16 | OCRVersion, 17 | TaskType, 18 | ) 19 | 20 | 21 | class ParseParams(OmegaConf): 22 | def __init__(self): 23 | pass 24 | 25 | @classmethod 26 | def load(cls, file_path: Union[str, Path]): 27 | cfg = OmegaConf.load(file_path) 28 | 29 | cfg.Det = cls._convert_value_to_enum(cfg.Det) 30 | cfg.Cls = cls._convert_value_to_enum(cfg.Cls) 31 | cfg.Rec = cls._convert_value_to_enum(cfg.Rec) 32 | return cfg 33 | 34 | @classmethod 35 | def update_batch(cls, cfg: DictConfig, params: Dict[str, Any]) -> DictConfig: 36 | global_keys = list(OmegaConf.to_container(cfg.Global).keys()) 37 | enum_params = [ 38 | "engine_type", 39 | "model_type", 40 | "ocr_version", 41 | "lang_type", 42 | "task_type", 43 | ] 44 | for k, v in params.items(): 45 | if k.startswith("Global") and k.split(".")[1] not in global_keys: 46 | raise ValueError(f"{k} is not a valid key.") 47 | 48 | if k.split(".")[1] in enum_params and not isinstance(v, Enum): 49 | raise TypeError(f"The value of {k} must be Enum Type.") 50 | 51 | cls.update(cfg, k, v) 52 | return cfg 53 | 54 | @classmethod 55 | def _convert_value_to_enum(cls, cfg: DictConfig): 56 | cfg.engine_type = EngineType(cfg.engine_type) 57 | cfg.model_type = ModelType(cfg.model_type) 58 | cfg.ocr_version = OCRVersion(cfg.ocr_version) 59 | cfg.task_type = TaskType(cfg.task_type) 60 | cfg.lang_type = cls.LangType(cfg.task_type, cfg.lang_type) 61 | return cfg 62 | 63 | @staticmethod 64 | def LangType(task_type: TaskType, lang_type: str): 65 | if task_type == TaskType.DET: 66 | return LangDet(lang_type) 67 | 68 | if task_type == TaskType.CLS: 69 | return LangCls(lang_type) 70 | 71 | if task_type == TaskType.REC: 72 | return LangRec(lang_type) 73 | 74 | raise ValueError(f"task_type {task_type.value} is not in [Det, Cls, Rec]") 75 | -------------------------------------------------------------------------------- /python/rapidocr/utils/process_img.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from typing import Tuple 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | 10 | def reduce_max_side( 11 | img: np.ndarray, max_side_len: int = 2000 12 | ) -> Tuple[np.ndarray, float, float]: 13 | h, w = img.shape[:2] 14 | 15 | ratio = 1.0 16 | if max(h, w) > max_side_len: 17 | if h > w: 18 | ratio = float(max_side_len) / h 19 | else: 20 | ratio = float(max_side_len) / w 21 | 22 | resize_h = int(h * ratio) 23 | resize_w = int(w * ratio) 24 | 25 | resize_h = int(round(resize_h / 32) * 32) 26 | resize_w = int(round(resize_w / 32) * 32) 27 | 28 | try: 29 | if int(resize_w) <= 0 or int(resize_h) <= 0: 30 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0") 31 | img = cv2.resize(img, (resize_w, resize_h)) 32 | except Exception as exc: 33 | raise ResizeImgError() from exc 34 | 35 | ratio_h = h / resize_h 36 | ratio_w = w / resize_w 37 | return img, ratio_h, ratio_w 38 | 39 | 40 | def increase_min_side( 41 | img: np.ndarray, min_side_len: int = 30 42 | ) -> Tuple[np.ndarray, float, float]: 43 | h, w = img.shape[:2] 44 | 45 | ratio = 1.0 46 | if min(h, w) < min_side_len: 47 | if h < w: 48 | ratio = float(min_side_len) / h 49 | else: 50 | ratio = float(min_side_len) / w 51 | 52 | resize_h = int(h * ratio) 53 | resize_w = int(w * ratio) 54 | 55 | resize_h = int(round(resize_h / 32) * 32) 56 | resize_w = int(round(resize_w / 32) * 32) 57 | 58 | try: 59 | if int(resize_w) <= 0 or int(resize_h) <= 0: 60 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0") 61 | img = cv2.resize(img, (resize_w, resize_h)) 62 | except Exception as exc: 63 | raise ResizeImgError() from exc 64 | 65 | ratio_h = h / resize_h 66 | ratio_w = w / resize_w 67 | return img, ratio_h, ratio_w 68 | 69 | 70 | def add_round_letterbox( 71 | img: np.ndarray, 72 | padding_tuple: Tuple[int, int, int, int], 73 | ) -> np.ndarray: 74 | padded_img = cv2.copyMakeBorder( 75 | img, 76 | padding_tuple[0], 77 | padding_tuple[1], 78 | padding_tuple[2], 79 | padding_tuple[3], 80 | cv2.BORDER_CONSTANT, 81 | value=(0, 0, 0), 82 | ) 83 | return padded_img 84 | 85 | 86 | class ResizeImgError(Exception): 87 | pass 88 | -------------------------------------------------------------------------------- /python/rapidocr/utils/typings.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from enum import Enum 5 | 6 | 7 | class LangDet(Enum): 8 | CH = "ch" 9 | EN = "en" 10 | MULTI = "multi" 11 | 12 | 13 | class LangCls(Enum): 14 | CH = "ch" 15 | 16 | 17 | class LangRec(Enum): 18 | CH = "ch" 19 | CH_DOC = "ch_doc" 20 | EN = "en" 21 | ARABIC = "arabic" 22 | CHINESE_CHT = "chinese_cht" 23 | CYRILLIC = "cyrillic" 24 | DEVANAGARI = "devanagari" 25 | JAPAN = "japan" 26 | KOREAN = "korean" 27 | KA = "ka" 28 | LATIN = "latin" 29 | TA = "ta" 30 | TE = "te" 31 | 32 | 33 | class OCRVersion(Enum): 34 | PPOCRV4 = "PP-OCRv4" 35 | PPOCRV5 = "PP-OCRv5" 36 | 37 | 38 | class EngineType(Enum): 39 | ONNXRUNTIME = "onnxruntime" 40 | OPENVINO = "openvino" 41 | PADDLE = "paddle" 42 | TORCH = "torch" 43 | 44 | 45 | class ModelType(Enum): 46 | MOBILE = "mobile" 47 | SERVER = "server" 48 | 49 | 50 | class TaskType(Enum): 51 | DET = "det" 52 | CLS = "cls" 53 | REC = "rec" 54 | -------------------------------------------------------------------------------- /python/rapidocr/utils/utils.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import hashlib 5 | import importlib 6 | from pathlib import Path 7 | from typing import Tuple, Union 8 | from urllib.parse import urlparse 9 | 10 | import cv2 11 | import numpy as np 12 | 13 | 14 | def quads_to_rect_bbox(bbox: np.ndarray) -> Tuple[float, float, float, float]: 15 | if bbox.ndim != 3: 16 | raise ValueError("bbox shape must be 3") 17 | 18 | if bbox.shape[1] != 4 and bbox.shape[2] != 2: 19 | raise ValueError("bbox shape must be (N, 4, 2)") 20 | 21 | all_x, all_y = (bbox[:, :, 0].flatten(), bbox[:, :, 1].flatten()) 22 | x_min, y_min = np.min(all_x), np.min(all_y) 23 | x_max, y_max = np.max(all_x), np.max(all_y) 24 | return float(x_min), float(y_min), float(x_max), float(y_max) 25 | 26 | 27 | def has_chinese_char(text: str) -> bool: 28 | return any("\u4e00" <= ch <= "\u9fff" for ch in text) 29 | 30 | 31 | def get_file_sha256(file_path: Union[str, Path], chunk_size: int = 65536) -> str: 32 | with open(file_path, "rb") as file: 33 | sha_signature = hashlib.sha256() 34 | while True: 35 | chunk = file.read(chunk_size) 36 | if not chunk: 37 | break 38 | sha_signature.update(chunk) 39 | 40 | return sha_signature.hexdigest() 41 | 42 | 43 | def save_img(save_path: Union[str, Path], img: np.ndarray): 44 | if not Path(save_path).parent.exists(): 45 | Path(save_path).parent.mkdir(parents=True, exist_ok=True) 46 | 47 | cv2.imwrite(str(save_path), img) 48 | 49 | 50 | def is_url(url: str) -> bool: 51 | try: 52 | result = urlparse(url) 53 | return all([result.scheme, result.netloc]) 54 | except Exception as e: 55 | return False 56 | 57 | 58 | def import_package(name, package=None): 59 | try: 60 | module = importlib.import_module(name, package=package) 61 | return module 62 | except ModuleNotFoundError: 63 | return None 64 | -------------------------------------------------------------------------------- /python/rapidocr_onnxruntime/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .main import RapidOCR 5 | from .utils import LoadImageError, VisRes 6 | -------------------------------------------------------------------------------- /python/rapidocr_onnxruntime/cal_rec_boxes/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .main import CalRecBoxes 5 | -------------------------------------------------------------------------------- /python/rapidocr_onnxruntime/ch_ppocr_cls/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .text_cls import TextClassifier 5 | -------------------------------------------------------------------------------- /python/rapidocr_onnxruntime/ch_ppocr_cls/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import List, Tuple 15 | 16 | import numpy as np 17 | 18 | 19 | class ClsPostProcess: 20 | def __init__(self, label_list: List[str]): 21 | self.label_list = label_list 22 | 23 | def __call__(self, preds: np.ndarray) -> List[Tuple[str, float]]: 24 | pred_idxs = preds.argmax(axis=1) 25 | decode_out = [ 26 | (self.label_list[idx], preds[i, idx]) for i, idx in enumerate(pred_idxs) 27 | ] 28 | return decode_out 29 | -------------------------------------------------------------------------------- /python/rapidocr_onnxruntime/ch_ppocr_det/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .text_detect import TextDetector 5 | -------------------------------------------------------------------------------- /python/rapidocr_onnxruntime/ch_ppocr_rec/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .text_recognize import TextRecognizer 5 | -------------------------------------------------------------------------------- /python/rapidocr_onnxruntime/config.yaml: -------------------------------------------------------------------------------- 1 | Global: 2 | text_score: 0.5 3 | use_det: true 4 | use_cls: true 5 | use_rec: true 6 | print_verbose: false 7 | min_height: 30 8 | width_height_ratio: 8 9 | max_side_len: 2000 10 | min_side_len: 30 11 | return_word_box: false 12 | 13 | intra_op_num_threads: &intra_nums -1 14 | inter_op_num_threads: &inter_nums -1 15 | 16 | Det: 17 | intra_op_num_threads: *intra_nums 18 | inter_op_num_threads: *inter_nums 19 | 20 | use_cuda: false 21 | use_dml: false 22 | 23 | model_path: models/ch_PP-OCRv4_det_infer.onnx 24 | 25 | limit_side_len: 736 26 | limit_type: min 27 | std: [ 0.5, 0.5, 0.5 ] 28 | mean: [ 0.5, 0.5, 0.5 ] 29 | 30 | thresh: 0.3 31 | box_thresh: 0.5 32 | max_candidates: 1000 33 | unclip_ratio: 1.6 34 | use_dilation: true 35 | score_mode: fast 36 | 37 | Cls: 38 | intra_op_num_threads: *intra_nums 39 | inter_op_num_threads: *inter_nums 40 | 41 | use_cuda: false 42 | use_dml: false 43 | 44 | model_path: models/ch_ppocr_mobile_v2.0_cls_infer.onnx 45 | 46 | cls_image_shape: [3, 48, 192] 47 | cls_batch_num: 6 48 | cls_thresh: 0.9 49 | label_list: ['0', '180'] 50 | 51 | Rec: 52 | intra_op_num_threads: *intra_nums 53 | inter_op_num_threads: *inter_nums 54 | 55 | use_cuda: false 56 | use_dml: false 57 | 58 | model_path: models/ch_PP-OCRv4_rec_infer.onnx 59 | 60 | rec_img_shape: [3, 48, 320] 61 | rec_batch_num: 6 62 | -------------------------------------------------------------------------------- /python/rapidocr_onnxruntime/models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr_onnxruntime/models/.gitkeep -------------------------------------------------------------------------------- /python/rapidocr_onnxruntime/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from pathlib import Path 5 | from typing import Dict, Union 6 | 7 | import yaml 8 | 9 | from .infer_engine import OrtInferSession 10 | from .load_image import LoadImage, LoadImageError 11 | from .logger import get_logger 12 | from .parse_parameters import UpdateParameters, init_args, update_model_path 13 | from .process_img import add_round_letterbox, increase_min_side, reduce_max_side 14 | from .vis_res import VisRes 15 | 16 | 17 | def read_yaml(yaml_path: Union[str, Path]) -> Dict[str, Dict]: 18 | with open(yaml_path, "rb") as f: 19 | data = yaml.load(f, Loader=yaml.Loader) 20 | return data 21 | -------------------------------------------------------------------------------- /python/rapidocr_onnxruntime/utils/logger.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import logging 5 | from functools import lru_cache 6 | 7 | 8 | @lru_cache(maxsize=32) 9 | def get_logger(name: str) -> logging.Logger: 10 | logger = logging.getLogger(name) 11 | logger.setLevel(logging.DEBUG) 12 | 13 | fmt = "%(asctime)s - %(name)s - %(levelname)s: %(message)s" 14 | format_str = logging.Formatter(fmt) 15 | 16 | sh = logging.StreamHandler() 17 | sh.setLevel(logging.DEBUG) 18 | 19 | logger.addHandler(sh) 20 | sh.setFormatter(format_str) 21 | return logger 22 | -------------------------------------------------------------------------------- /python/rapidocr_onnxruntime/utils/process_img.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from typing import Tuple 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | 10 | def reduce_max_side( 11 | img: np.ndarray, max_side_len: int = 2000 12 | ) -> Tuple[np.ndarray, float, float]: 13 | h, w = img.shape[:2] 14 | 15 | ratio = 1.0 16 | if max(h, w) > max_side_len: 17 | if h > w: 18 | ratio = float(max_side_len) / h 19 | else: 20 | ratio = float(max_side_len) / w 21 | 22 | resize_h = int(h * ratio) 23 | resize_w = int(w * ratio) 24 | 25 | resize_h = int(round(resize_h / 32) * 32) 26 | resize_w = int(round(resize_w / 32) * 32) 27 | 28 | try: 29 | if int(resize_w) <= 0 or int(resize_h) <= 0: 30 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0") 31 | img = cv2.resize(img, (resize_w, resize_h)) 32 | except Exception as exc: 33 | raise ResizeImgError() from exc 34 | 35 | ratio_h = h / resize_h 36 | ratio_w = w / resize_w 37 | return img, ratio_h, ratio_w 38 | 39 | 40 | def increase_min_side( 41 | img: np.ndarray, min_side_len: int = 30 42 | ) -> Tuple[np.ndarray, float, float]: 43 | h, w = img.shape[:2] 44 | 45 | ratio = 1.0 46 | if min(h, w) < min_side_len: 47 | if h < w: 48 | ratio = float(min_side_len) / h 49 | else: 50 | ratio = float(min_side_len) / w 51 | 52 | resize_h = int(h * ratio) 53 | resize_w = int(w * ratio) 54 | 55 | resize_h = int(round(resize_h / 32) * 32) 56 | resize_w = int(round(resize_w / 32) * 32) 57 | 58 | try: 59 | if int(resize_w) <= 0 or int(resize_h) <= 0: 60 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0") 61 | img = cv2.resize(img, (resize_w, resize_h)) 62 | except Exception as exc: 63 | raise ResizeImgError() from exc 64 | 65 | ratio_h = h / resize_h 66 | ratio_w = w / resize_w 67 | return img, ratio_h, ratio_w 68 | 69 | 70 | def add_round_letterbox( 71 | img: np.ndarray, 72 | padding_tuple: Tuple[int, int, int, int], 73 | ) -> np.ndarray: 74 | padded_img = cv2.copyMakeBorder( 75 | img, 76 | padding_tuple[0], 77 | padding_tuple[1], 78 | padding_tuple[2], 79 | padding_tuple[3], 80 | cv2.BORDER_CONSTANT, 81 | value=(0, 0, 0), 82 | ) 83 | return padded_img 84 | 85 | 86 | class ResizeImgError(Exception): 87 | pass 88 | -------------------------------------------------------------------------------- /python/rapidocr_openvino/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .main import RapidOCR 5 | from .utils import LoadImageError, VisRes 6 | -------------------------------------------------------------------------------- /python/rapidocr_openvino/cal_rec_boxes/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .main import CalRecBoxes 5 | -------------------------------------------------------------------------------- /python/rapidocr_openvino/ch_ppocr_cls/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .text_cls import TextClassifier 5 | -------------------------------------------------------------------------------- /python/rapidocr_openvino/ch_ppocr_cls/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import List, Tuple 15 | 16 | import numpy as np 17 | 18 | 19 | class ClsPostProcess: 20 | def __init__(self, label_list: List[str]): 21 | self.label_list = label_list 22 | 23 | def __call__(self, preds: np.ndarray) -> List[Tuple[str, float]]: 24 | pred_idxs = preds.argmax(axis=1) 25 | decode_out = [ 26 | (self.label_list[idx], preds[i, idx]) for i, idx in enumerate(pred_idxs) 27 | ] 28 | return decode_out 29 | -------------------------------------------------------------------------------- /python/rapidocr_openvino/ch_ppocr_det/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .text_detect import TextDetector 5 | -------------------------------------------------------------------------------- /python/rapidocr_openvino/ch_ppocr_rec/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .text_recognize import TextRecognizer 5 | -------------------------------------------------------------------------------- /python/rapidocr_openvino/config.yaml: -------------------------------------------------------------------------------- 1 | Global: 2 | text_score: 0.5 3 | use_det: true 4 | use_cls: true 5 | use_rec: true 6 | print_verbose: false 7 | min_height: 30 8 | width_height_ratio: 8 9 | max_side_len: 2000 10 | min_side_len: 30 11 | return_word_box: false 12 | 13 | inference_num_threads: &infer_num_threads -1 14 | 15 | Det: 16 | inference_num_threads: *infer_num_threads 17 | 18 | use_cuda: false 19 | 20 | model_path: models/ch_PP-OCRv4_det_infer.onnx 21 | 22 | limit_side_len: 736 23 | limit_type: min 24 | std: [ 0.5, 0.5, 0.5 ] 25 | mean: [ 0.5, 0.5, 0.5 ] 26 | 27 | thresh: 0.3 28 | box_thresh: 0.5 29 | max_candidates: 1000 30 | unclip_ratio: 1.6 31 | use_dilation: true 32 | score_mode: fast 33 | 34 | Cls: 35 | inference_num_threads: *infer_num_threads 36 | 37 | use_cuda: false 38 | 39 | model_path: models/ch_ppocr_mobile_v2.0_cls_infer.onnx 40 | 41 | cls_image_shape: [3, 48, 192] 42 | cls_batch_num: 6 43 | cls_thresh: 0.9 44 | label_list: ['0', '180'] 45 | 46 | Rec: 47 | inference_num_threads: *infer_num_threads 48 | 49 | use_cuda: false 50 | 51 | model_path: models/ch_PP-OCRv4_rec_infer.onnx 52 | 53 | rec_img_shape: [3, 48, 320] 54 | rec_batch_num: 6 55 | -------------------------------------------------------------------------------- /python/rapidocr_openvino/models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr_openvino/models/.gitkeep -------------------------------------------------------------------------------- /python/rapidocr_openvino/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from pathlib import Path 5 | from typing import Dict, Union 6 | 7 | import yaml 8 | 9 | from .infer_engine import OpenVINOInferSession 10 | from .load_image import LoadImage, LoadImageError 11 | from .logger import get_logger 12 | from .parse_parameters import UpdateParameters, init_args, update_model_path 13 | from .process_img import add_round_letterbox, increase_min_side, reduce_max_side 14 | from .vis_res import VisRes 15 | 16 | 17 | def read_yaml(yaml_path: Union[str, Path]) -> Dict[str, Dict]: 18 | with open(yaml_path, "rb") as f: 19 | data = yaml.load(f, Loader=yaml.Loader) 20 | return data 21 | -------------------------------------------------------------------------------- /python/rapidocr_openvino/utils/infer_engine.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import os 5 | import traceback 6 | from pathlib import Path 7 | 8 | import numpy as np 9 | from openvino.runtime import Core 10 | 11 | 12 | class OpenVINOInferSession: 13 | def __init__(self, config): 14 | core = Core() 15 | 16 | self._verify_model(config["model_path"]) 17 | model_onnx = core.read_model(config["model_path"]) 18 | 19 | cpu_nums = os.cpu_count() 20 | infer_num_threads = config.get("inference_num_threads", -1) 21 | if infer_num_threads != -1 and 1 <= infer_num_threads <= cpu_nums: 22 | core.set_property("CPU", {"INFERENCE_NUM_THREADS": str(infer_num_threads)}) 23 | 24 | compile_model = core.compile_model(model=model_onnx, device_name="CPU") 25 | self.session = compile_model.create_infer_request() 26 | 27 | def __call__(self, input_content: np.ndarray) -> np.ndarray: 28 | try: 29 | self.session.infer(inputs=[input_content]) 30 | return self.session.get_output_tensor().data 31 | except Exception as e: 32 | error_info = traceback.format_exc() 33 | raise OpenVIONError(error_info) from e 34 | 35 | @staticmethod 36 | def _verify_model(model_path): 37 | model_path = Path(model_path) 38 | if not model_path.exists(): 39 | raise FileNotFoundError(f"{model_path} does not exists.") 40 | if not model_path.is_file(): 41 | raise FileExistsError(f"{model_path} is not a file.") 42 | 43 | 44 | class OpenVIONError(Exception): 45 | pass 46 | -------------------------------------------------------------------------------- /python/rapidocr_openvino/utils/logger.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import logging 5 | from functools import lru_cache 6 | 7 | 8 | @lru_cache(maxsize=32) 9 | def get_logger(name: str): 10 | logger = logging.getLogger(name) 11 | logger.setLevel(logging.DEBUG) 12 | 13 | fmt = "%(asctime)s - %(name)s - %(levelname)s: %(message)s" 14 | format_str = logging.Formatter(fmt) 15 | 16 | sh = logging.StreamHandler() 17 | sh.setLevel(logging.DEBUG) 18 | 19 | logger.addHandler(sh) 20 | sh.setFormatter(format_str) 21 | return logger 22 | -------------------------------------------------------------------------------- /python/rapidocr_openvino/utils/process_img.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from typing import Tuple 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | 10 | def reduce_max_side( 11 | img: np.ndarray, max_side_len: int = 2000 12 | ) -> Tuple[np.ndarray, float, float]: 13 | h, w = img.shape[:2] 14 | 15 | ratio = 1.0 16 | if max(h, w) > max_side_len: 17 | if h > w: 18 | ratio = float(max_side_len) / h 19 | else: 20 | ratio = float(max_side_len) / w 21 | 22 | resize_h = int(h * ratio) 23 | resize_w = int(w * ratio) 24 | 25 | resize_h = int(round(resize_h / 32) * 32) 26 | resize_w = int(round(resize_w / 32) * 32) 27 | 28 | try: 29 | if int(resize_w) <= 0 or int(resize_h) <= 0: 30 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0") 31 | img = cv2.resize(img, (resize_w, resize_h)) 32 | except Exception as exc: 33 | raise ResizeImgError() from exc 34 | 35 | ratio_h = h / resize_h 36 | ratio_w = w / resize_w 37 | return img, ratio_h, ratio_w 38 | 39 | 40 | def increase_min_side( 41 | img: np.ndarray, min_side_len: int = 30 42 | ) -> Tuple[np.ndarray, float, float]: 43 | h, w = img.shape[:2] 44 | 45 | ratio = 1.0 46 | if min(h, w) < min_side_len: 47 | if h < w: 48 | ratio = float(min_side_len) / h 49 | else: 50 | ratio = float(min_side_len) / w 51 | 52 | resize_h = int(h * ratio) 53 | resize_w = int(w * ratio) 54 | 55 | resize_h = int(round(resize_h / 32) * 32) 56 | resize_w = int(round(resize_w / 32) * 32) 57 | 58 | try: 59 | if int(resize_w) <= 0 or int(resize_h) <= 0: 60 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0") 61 | img = cv2.resize(img, (resize_w, resize_h)) 62 | except Exception as exc: 63 | raise ResizeImgError() from exc 64 | 65 | ratio_h = h / resize_h 66 | ratio_w = w / resize_w 67 | return img, ratio_h, ratio_w 68 | 69 | 70 | def add_round_letterbox( 71 | img: np.ndarray, 72 | padding_tuple: Tuple[int, int, int, int], 73 | ) -> np.ndarray: 74 | padded_img = cv2.copyMakeBorder( 75 | img, 76 | padding_tuple[0], 77 | padding_tuple[1], 78 | padding_tuple[2], 79 | padding_tuple[3], 80 | cv2.BORDER_CONSTANT, 81 | value=(0, 0, 0), 82 | ) 83 | return padded_img 84 | 85 | 86 | class ResizeImgError(Exception): 87 | pass 88 | -------------------------------------------------------------------------------- /python/rapidocr_paddle/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .main import RapidOCR 5 | from .utils import LoadImageError, VisRes 6 | -------------------------------------------------------------------------------- /python/rapidocr_paddle/cal_rec_boxes/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .main import CalRecBoxes 5 | -------------------------------------------------------------------------------- /python/rapidocr_paddle/ch_ppocr_cls/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .text_cls import TextClassifier 5 | -------------------------------------------------------------------------------- /python/rapidocr_paddle/ch_ppocr_cls/text_cls.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import copy 15 | import math 16 | import time 17 | from typing import List 18 | 19 | import cv2 20 | import numpy as np 21 | 22 | from rapidocr_paddle.utils import PaddleInferSession 23 | 24 | from .utils import ClsPostProcess 25 | 26 | 27 | class TextClassifier: 28 | def __init__(self, config): 29 | self.cls_image_shape = config["cls_image_shape"] 30 | self.cls_batch_num = config["cls_batch_num"] 31 | self.cls_thresh = config["cls_thresh"] 32 | self.postprocess_op = ClsPostProcess(config["label_list"]) 33 | 34 | self.infer = PaddleInferSession(config) 35 | 36 | def __call__(self, img_list: List[np.ndarray]): 37 | if isinstance(img_list, np.ndarray): 38 | img_list = [img_list] 39 | 40 | img_list = copy.deepcopy(img_list) 41 | 42 | # Calculate the aspect ratio of all text bars 43 | width_list = [img.shape[1] / float(img.shape[0]) for img in img_list] 44 | 45 | # Sorting can speed up the cls process 46 | indices = np.argsort(np.array(width_list)) 47 | 48 | img_num = len(img_list) 49 | cls_res = [["", 0.0]] * img_num 50 | batch_num = self.cls_batch_num 51 | elapse = 0 52 | for beg_img_no in range(0, img_num, batch_num): 53 | end_img_no = min(img_num, beg_img_no + batch_num) 54 | 55 | norm_img_batch = [] 56 | for ino in range(beg_img_no, end_img_no): 57 | norm_img = self.resize_norm_img(img_list[indices[ino]]) 58 | norm_img = norm_img[np.newaxis, :] 59 | norm_img_batch.append(norm_img) 60 | norm_img_batch = np.concatenate(norm_img_batch).astype(np.float32) 61 | 62 | starttime = time.time() 63 | prob_out = self.infer(norm_img_batch)[0] 64 | cls_result = self.postprocess_op(prob_out) 65 | elapse += time.time() - starttime 66 | 67 | for rno in range(len(cls_result)): 68 | label, score = cls_result[rno] 69 | cls_res[indices[beg_img_no + rno]] = [label, score] 70 | if "180" in label and score > self.cls_thresh: 71 | img_list[indices[beg_img_no + rno]] = cv2.rotate( 72 | img_list[indices[beg_img_no + rno]], 1 73 | ) 74 | return img_list, cls_res, elapse 75 | 76 | def resize_norm_img(self, img): 77 | img_c, img_h, img_w = self.cls_image_shape 78 | h, w = img.shape[:2] 79 | ratio = w / float(h) 80 | if math.ceil(img_h * ratio) > img_w: 81 | resized_w = img_w 82 | else: 83 | resized_w = int(math.ceil(img_h * ratio)) 84 | 85 | resized_image = cv2.resize(img, (resized_w, img_h)) 86 | resized_image = resized_image.astype("float32") 87 | if img_c == 1: 88 | resized_image = resized_image / 255 89 | resized_image = resized_image[np.newaxis, :] 90 | else: 91 | resized_image = resized_image.transpose((2, 0, 1)) / 255 92 | 93 | resized_image -= 0.5 94 | resized_image /= 0.5 95 | padding_im = np.zeros((img_c, img_h, img_w), dtype=np.float32) 96 | padding_im[:, :, :resized_w] = resized_image 97 | return padding_im 98 | -------------------------------------------------------------------------------- /python/rapidocr_paddle/ch_ppocr_cls/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import List, Tuple 15 | 16 | import numpy as np 17 | 18 | 19 | class ClsPostProcess: 20 | def __init__(self, label_list: List[str]): 21 | self.label_list = label_list 22 | 23 | def __call__(self, preds: np.ndarray) -> List[Tuple[str, float]]: 24 | pred_idxs = preds.argmax(axis=1) 25 | decode_out = [ 26 | (self.label_list[idx], preds[i, idx]) for i, idx in enumerate(pred_idxs) 27 | ] 28 | return decode_out 29 | -------------------------------------------------------------------------------- /python/rapidocr_paddle/ch_ppocr_det/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .text_detect import TextDetector 5 | -------------------------------------------------------------------------------- /python/rapidocr_paddle/ch_ppocr_rec/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .text_recognize import TextRecognizer 5 | -------------------------------------------------------------------------------- /python/rapidocr_paddle/config.yaml: -------------------------------------------------------------------------------- 1 | Global: 2 | text_score: 0.5 3 | use_det: true 4 | use_cls: true 5 | use_rec: true 6 | print_verbose: false 7 | min_height: 30 8 | width_height_ratio: 8 9 | max_side_len: 2000 10 | min_side_len: 30 11 | return_word_box: false 12 | 13 | cpu_math_library_num_threads: &infer_num_threads -1 14 | 15 | Det: 16 | use_cuda: false 17 | gpu_id: 0 18 | gpu_mem: 500 19 | 20 | cpu_math_library_num_threads: *infer_num_threads 21 | 22 | model_path: models/ch_PP-OCRv4_det_infer 23 | 24 | limit_side_len: 736 25 | limit_type: min 26 | std: [ 0.5, 0.5, 0.5 ] 27 | mean: [ 0.5, 0.5, 0.5 ] 28 | 29 | thresh: 0.3 30 | box_thresh: 0.5 31 | max_candidates: 1000 32 | unclip_ratio: 1.6 33 | use_dilation: true 34 | score_mode: fast 35 | 36 | Cls: 37 | use_cuda: false 38 | gpu_id: 0 39 | gpu_mem: 500 40 | 41 | cpu_math_library_num_threads: *infer_num_threads 42 | 43 | model_path: models/ch_ppocr_mobile_v2_cls_infer 44 | 45 | cls_image_shape: [3, 48, 192] 46 | cls_batch_num: 6 47 | cls_thresh: 0.9 48 | label_list: ['0', '180'] 49 | 50 | Rec: 51 | use_cuda: false 52 | gpu_id: 0 53 | gpu_mem: 500 54 | 55 | cpu_math_library_num_threads: *infer_num_threads 56 | 57 | model_path: models/ch_PP-OCRv4_rec_infer 58 | 59 | rec_img_shape: [3, 48, 320] 60 | rec_batch_num: 6 61 | -------------------------------------------------------------------------------- /python/rapidocr_paddle/models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr_paddle/models/.gitkeep -------------------------------------------------------------------------------- /python/rapidocr_paddle/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from pathlib import Path 5 | from typing import Dict, Union 6 | 7 | import yaml 8 | 9 | from .infer_engine import PaddleInferSession 10 | from .load_image import LoadImage, LoadImageError 11 | from .logger import get_logger 12 | from .parse_parameters import UpdateParameters, init_args, update_model_path 13 | from .process_img import add_round_letterbox, increase_min_side, reduce_max_side 14 | from .vis_res import VisRes 15 | 16 | 17 | def read_yaml(yaml_path: Union[str, Path]) -> Dict[str, Dict]: 18 | with open(yaml_path, "rb") as f: 19 | data = yaml.load(f, Loader=yaml.Loader) 20 | return data 21 | -------------------------------------------------------------------------------- /python/rapidocr_paddle/utils/logger.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import logging 5 | from functools import lru_cache 6 | 7 | 8 | @lru_cache(maxsize=32) 9 | def get_logger(name: str): 10 | logger = logging.getLogger(name) 11 | logger.setLevel(logging.DEBUG) 12 | 13 | fmt = "%(asctime)s - %(name)s - %(levelname)s: %(message)s" 14 | format_str = logging.Formatter(fmt) 15 | 16 | sh = logging.StreamHandler() 17 | sh.setLevel(logging.DEBUG) 18 | 19 | logger.addHandler(sh) 20 | sh.setFormatter(format_str) 21 | return logger 22 | -------------------------------------------------------------------------------- /python/rapidocr_paddle/utils/process_img.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from typing import Tuple 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | 10 | def reduce_max_side( 11 | img: np.ndarray, max_side_len: int = 2000 12 | ) -> Tuple[np.ndarray, float, float]: 13 | h, w = img.shape[:2] 14 | 15 | ratio = 1.0 16 | if max(h, w) > max_side_len: 17 | if h > w: 18 | ratio = float(max_side_len) / h 19 | else: 20 | ratio = float(max_side_len) / w 21 | 22 | resize_h = int(h * ratio) 23 | resize_w = int(w * ratio) 24 | 25 | resize_h = int(round(resize_h / 32) * 32) 26 | resize_w = int(round(resize_w / 32) * 32) 27 | 28 | try: 29 | if int(resize_w) <= 0 or int(resize_h) <= 0: 30 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0") 31 | img = cv2.resize(img, (resize_w, resize_h)) 32 | except Exception as exc: 33 | raise ResizeImgError() from exc 34 | 35 | ratio_h = h / resize_h 36 | ratio_w = w / resize_w 37 | return img, ratio_h, ratio_w 38 | 39 | 40 | def increase_min_side( 41 | img: np.ndarray, min_side_len: int = 30 42 | ) -> Tuple[np.ndarray, float, float]: 43 | h, w = img.shape[:2] 44 | 45 | ratio = 1.0 46 | if min(h, w) < min_side_len: 47 | if h < w: 48 | ratio = float(min_side_len) / h 49 | else: 50 | ratio = float(min_side_len) / w 51 | 52 | resize_h = int(h * ratio) 53 | resize_w = int(w * ratio) 54 | 55 | resize_h = int(round(resize_h / 32) * 32) 56 | resize_w = int(round(resize_w / 32) * 32) 57 | 58 | try: 59 | if int(resize_w) <= 0 or int(resize_h) <= 0: 60 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0") 61 | img = cv2.resize(img, (resize_w, resize_h)) 62 | except Exception as exc: 63 | raise ResizeImgError() from exc 64 | 65 | ratio_h = h / resize_h 66 | ratio_w = w / resize_w 67 | return img, ratio_h, ratio_w 68 | 69 | 70 | def add_round_letterbox( 71 | img: np.ndarray, 72 | padding_tuple: Tuple[int, int, int, int], 73 | ) -> np.ndarray: 74 | padded_img = cv2.copyMakeBorder( 75 | img, 76 | padding_tuple[0], 77 | padding_tuple[1], 78 | padding_tuple[2], 79 | padding_tuple[3], 80 | cv2.BORDER_CONSTANT, 81 | value=(0, 0, 0), 82 | ) 83 | return padded_img 84 | 85 | 86 | class ResizeImgError(Exception): 87 | pass 88 | -------------------------------------------------------------------------------- /python/rapidocr_torch/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .main import RapidOCR 5 | from .utils import LoadImageError, VisRes 6 | -------------------------------------------------------------------------------- /python/rapidocr_torch/arch_config.yaml: -------------------------------------------------------------------------------- 1 | ch_ptocr_mobile_v2.0_cls_infer: 2 | model_type: cls 3 | algorithm: CLS 4 | Transform: 5 | Backbone: 6 | name: MobileNetV3 7 | scale: 0.35 8 | model_name: small 9 | Neck: 10 | Head: 11 | name: ClsHead 12 | class_dim: 2 13 | 14 | ch_PP-OCRv4_det_infer: 15 | model_type: det 16 | algorithm: DB 17 | Transform: null 18 | Backbone: 19 | name: PPLCNetV3 20 | scale: 0.75 21 | det: True 22 | Neck: 23 | name: RSEFPN 24 | out_channels: 96 25 | shortcut: True 26 | Head: 27 | name: DBHead 28 | k: 50 29 | 30 | 31 | ch_PP-OCRv4_det_server_infer: 32 | model_type: det 33 | algorithm: DB 34 | Transform: null 35 | Backbone: 36 | name: PPHGNet_small 37 | det: True 38 | Neck: 39 | name: LKPAN 40 | out_channels: 256 41 | intracl: true 42 | Head: 43 | name: PFHeadLocal 44 | k: 50 45 | mode: "large" 46 | 47 | 48 | ch_PP-OCRv4_rec_infer: 49 | model_type: rec 50 | algorithm: SVTR_LCNet 51 | Transform: 52 | Backbone: 53 | name: PPLCNetV3 54 | scale: 0.95 55 | Head: 56 | name: MultiHead 57 | out_channels_list: 58 | CTCLabelDecode: 6625 #'blank' + ...(6623) + ' ' 59 | head_list: 60 | - CTCHead: 61 | Neck: 62 | name: svtr 63 | dims: 120 64 | depth: 2 65 | hidden_dims: 120 66 | kernel_size: [ 1, 3 ] 67 | use_guide: True 68 | Head: 69 | fc_decay: 0.00001 70 | - NRTRHead: 71 | nrtr_dim: 384 72 | max_text_length: 25 73 | 74 | 75 | ch_PP-OCRv4_rec_server_infer: 76 | model_type: rec 77 | algorithm: SVTR_HGNet 78 | Transform: 79 | Backbone: 80 | name: PPHGNet_small 81 | Head: 82 | name: MultiHead 83 | out_channels_list: 84 | CTCLabelDecode: 6625 #'blank' + ...(6623) + ' ' 85 | head_list: 86 | - CTCHead: 87 | Neck: 88 | name: svtr 89 | dims: 120 90 | depth: 2 91 | hidden_dims: 120 92 | kernel_size: [ 1, 3 ] 93 | use_guide: True 94 | Head: 95 | fc_decay: 0.00001 96 | - NRTRHead: 97 | nrtr_dim: 384 98 | max_text_length: 25 -------------------------------------------------------------------------------- /python/rapidocr_torch/cal_rec_boxes/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .main import CalRecBoxes 5 | -------------------------------------------------------------------------------- /python/rapidocr_torch/ch_ppocr_cls/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .text_cls import TextClassifier 5 | -------------------------------------------------------------------------------- /python/rapidocr_torch/ch_ppocr_cls/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import List, Tuple 15 | 16 | import numpy as np 17 | 18 | 19 | class ClsPostProcess: 20 | def __init__(self, label_list: List[str]): 21 | self.label_list = label_list 22 | 23 | def __call__(self, preds: np.ndarray) -> List[Tuple[str, float]]: 24 | pred_idxs = preds.argmax(axis=1) 25 | decode_out = [ 26 | (self.label_list[idx], preds[i, idx]) for i, idx in enumerate(pred_idxs) 27 | ] 28 | return decode_out 29 | -------------------------------------------------------------------------------- /python/rapidocr_torch/ch_ppocr_det/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .text_detect import TextDetector 5 | -------------------------------------------------------------------------------- /python/rapidocr_torch/ch_ppocr_rec/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .text_recognize import TextRecognizer 5 | -------------------------------------------------------------------------------- /python/rapidocr_torch/config.yaml: -------------------------------------------------------------------------------- 1 | Global: 2 | text_score: 0.5 3 | use_det: true 4 | use_cls: true 5 | use_rec: true 6 | print_verbose: false 7 | min_height: 30 8 | width_height_ratio: 8 9 | max_side_len: 2000 10 | min_side_len: 30 11 | return_word_box: false 12 | 13 | intra_op_num_threads: &intra_nums -1 14 | inter_op_num_threads: &inter_nums -1 15 | 16 | Det: 17 | intra_op_num_threads: *intra_nums 18 | inter_op_num_threads: *inter_nums 19 | 20 | use_cuda: false 21 | use_dml: false 22 | 23 | model_path: models/ch_PP-OCRv4_det_infer.pth 24 | 25 | limit_side_len: 736 26 | limit_type: min 27 | 28 | thresh: 0.3 29 | box_thresh: 0.5 30 | max_candidates: 1000 31 | unclip_ratio: 1.5 32 | use_dilation: true 33 | score_mode: fast 34 | 35 | Cls: 36 | intra_op_num_threads: *intra_nums 37 | inter_op_num_threads: *inter_nums 38 | 39 | use_cuda: false 40 | use_dml: false 41 | 42 | model_path: models/ch_ptocr_mobile_v2.0_cls_infer.pth 43 | 44 | cls_image_shape: [3, 48, 192] 45 | cls_batch_num: 6 46 | cls_thresh: 0.9 47 | label_list: ['0', '180'] 48 | 49 | Rec: 50 | intra_op_num_threads: *intra_nums 51 | inter_op_num_threads: *inter_nums 52 | 53 | use_cuda: false 54 | use_dml: false 55 | 56 | model_path: models/ch_PP-OCRv4_rec_infer.pth 57 | 58 | rec_img_shape: [3, 48, 320] 59 | rec_batch_num: 6 60 | -------------------------------------------------------------------------------- /python/rapidocr_torch/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr_torch/modeling/__init__.py -------------------------------------------------------------------------------- /python/rapidocr_torch/modeling/architectures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import copy 16 | 17 | __all__ = ['build_model'] 18 | 19 | 20 | def build_model(config, **kwargs): 21 | from .base_model import BaseModel 22 | 23 | config = copy.deepcopy(config) 24 | module_class = BaseModel(config, **kwargs) 25 | return module_class -------------------------------------------------------------------------------- /python/rapidocr_torch/modeling/architectures/base_model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from rapidocr_torch.modeling.backbones import build_backbone 3 | from rapidocr_torch.modeling.heads import build_head 4 | from rapidocr_torch.modeling.necks import build_neck 5 | 6 | 7 | class BaseModel(nn.Module): 8 | def __init__(self, config, **kwargs): 9 | """ 10 | the module for OCR. 11 | args: 12 | config (dict): the super parameters for module. 13 | """ 14 | super(BaseModel, self).__init__() 15 | 16 | in_channels = config.get('in_channels', 3) 17 | model_type = config['model_type'] 18 | # build backbone, backbone is need for del, rec and cls 19 | if 'Backbone' not in config or config['Backbone'] is None: 20 | self.use_backbone = False 21 | else: 22 | self.use_backbone = True 23 | config["Backbone"]['in_channels'] = in_channels 24 | self.backbone = build_backbone(config["Backbone"], model_type) 25 | in_channels = self.backbone.out_channels 26 | 27 | # build neck 28 | # for rec, neck can be cnn,rnn or reshape(None) 29 | # for det, neck can be FPN, BIFPN and so on. 30 | # for cls, neck should be none 31 | if 'Neck' not in config or config['Neck'] is None: 32 | self.use_neck = False 33 | else: 34 | self.use_neck = True 35 | config['Neck']['in_channels'] = in_channels 36 | self.neck = build_neck(config['Neck']) 37 | in_channels = self.neck.out_channels 38 | 39 | # # build head, head is need for det, rec and cls 40 | if 'Head' not in config or config['Head'] is None: 41 | self.use_head = False 42 | else: 43 | self.use_head = True 44 | config["Head"]['in_channels'] = in_channels 45 | self.head = build_head(config["Head"], **kwargs) 46 | 47 | self.return_all_feats = config.get("return_all_feats", False) 48 | 49 | self._initialize_weights() 50 | 51 | def _initialize_weights(self): 52 | # weight initialization 53 | for m in self.modules(): 54 | if isinstance(m, nn.Conv2d): 55 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 56 | if m.bias is not None: 57 | nn.init.zeros_(m.bias) 58 | elif isinstance(m, nn.BatchNorm2d): 59 | nn.init.ones_(m.weight) 60 | nn.init.zeros_(m.bias) 61 | elif isinstance(m, nn.Linear): 62 | nn.init.normal_(m.weight, 0, 0.01) 63 | if m.bias is not None: 64 | nn.init.zeros_(m.bias) 65 | elif isinstance(m, nn.ConvTranspose2d): 66 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 67 | if m.bias is not None: 68 | nn.init.zeros_(m.bias) 69 | 70 | 71 | def forward(self, x): 72 | y = dict() 73 | if self.use_backbone: 74 | x = self.backbone(x) 75 | if isinstance(x, dict): 76 | y.update(x) 77 | else: 78 | y["backbone_out"] = x 79 | final_name = "backbone_out" 80 | if self.use_neck: 81 | x = self.neck(x) 82 | if isinstance(x, dict): 83 | y.update(x) 84 | else: 85 | y["neck_out"] = x 86 | final_name = "neck_out" 87 | if self.use_head: 88 | x = self.head(x) 89 | # for multi head, save ctc neck out for udml 90 | if isinstance(x, dict) and 'ctc_nect' in x.keys(): 91 | y['neck_out'] = x['ctc_neck'] 92 | y['head_out'] = x 93 | elif isinstance(x, dict): 94 | y.update(x) 95 | else: 96 | y["head_out"] = x 97 | if self.return_all_feats: 98 | if self.training: 99 | return y 100 | elif isinstance(x, dict): 101 | return x 102 | else: 103 | return {final_name: x} 104 | else: 105 | return x -------------------------------------------------------------------------------- /python/rapidocr_torch/modeling/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __all__ = ['build_backbone'] 16 | 17 | 18 | def build_backbone(config, model_type): 19 | if model_type == 'det': 20 | from .det_mobilenet_v3 import MobileNetV3 21 | from .rec_lcnetv3 import PPLCNetV3 22 | from .rec_hgnet import PPHGNet_small 23 | support_dict = ['MobileNetV3', 'ResNet', 'ResNet_vd', 'ResNet_SAST', 'PPLCNetV3', 'PPHGNet_small'] 24 | elif model_type == 'rec' or model_type == 'cls': 25 | from .rec_mobilenet_v3 import MobileNetV3 26 | from .rec_svtrnet import SVTRNet 27 | from .rec_lcnetv3 import PPLCNetV3 28 | from .rec_hgnet import PPHGNet_small 29 | support_dict = ['MobileNetV1Enhance', 'MobileNetV3', 'ResNet', 'ResNetFPN', 'MTB', 30 | 'ResNet31', 'SVTRNet', 'ViTSTR', 'DenseNet', 'PPLCNetV3', 'PPHGNet_small'] 31 | else: 32 | raise NotImplementedError 33 | 34 | module_name = config.pop('name') 35 | assert module_name in support_dict, Exception( 36 | 'when model typs is {}, backbone only support {}'.format(model_type, 37 | support_dict)) 38 | module_class = eval(module_name)(**config) 39 | return module_class -------------------------------------------------------------------------------- /python/rapidocr_torch/modeling/common.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class Hswish(nn.Module): 7 | def __init__(self, inplace=True): 8 | super(Hswish, self).__init__() 9 | self.inplace = inplace 10 | 11 | def forward(self, x): 12 | return x * F.relu6(x + 3.0, inplace=self.inplace) / 6.0 13 | 14 | 15 | # out = max(0, min(1, slop*x+offset)) 16 | # paddle.fluid.layers.hard_sigmoid(x, slope=0.2, offset=0.5, name=None) 17 | class Hsigmoid(nn.Module): 18 | def __init__(self, inplace=True): 19 | super(Hsigmoid, self).__init__() 20 | self.inplace = inplace 21 | 22 | def forward(self, x): 23 | # torch: F.relu6(x + 3., inplace=self.inplace) / 6. 24 | # paddle: F.relu6(1.2 * x + 3., inplace=self.inplace) / 6. 25 | return F.relu6(1.2 * x + 3.0, inplace=self.inplace) / 6.0 26 | 27 | 28 | class GELU(nn.Module): 29 | def __init__(self, inplace=True): 30 | super(GELU, self).__init__() 31 | self.inplace = inplace 32 | 33 | def forward(self, x): 34 | return torch.nn.functional.gelu(x) 35 | 36 | 37 | class Swish(nn.Module): 38 | def __init__(self, inplace=True): 39 | super(Swish, self).__init__() 40 | self.inplace = inplace 41 | 42 | def forward(self, x): 43 | if self.inplace: 44 | x.mul_(torch.sigmoid(x)) 45 | return x 46 | else: 47 | return x * torch.sigmoid(x) 48 | 49 | 50 | class Activation(nn.Module): 51 | def __init__(self, act_type, inplace=True): 52 | super(Activation, self).__init__() 53 | act_type = act_type.lower() 54 | if act_type == "relu": 55 | self.act = nn.ReLU(inplace=inplace) 56 | elif act_type == "relu6": 57 | self.act = nn.ReLU6(inplace=inplace) 58 | elif act_type == "sigmoid": 59 | raise NotImplementedError 60 | elif act_type == "hard_sigmoid": 61 | self.act = Hsigmoid( 62 | inplace 63 | ) # nn.Hardsigmoid(inplace=inplace)#Hsigmoid(inplace)# 64 | elif act_type == "hard_swish" or act_type == "hswish": 65 | self.act = Hswish(inplace=inplace) 66 | elif act_type == "leakyrelu": 67 | self.act = nn.LeakyReLU(inplace=inplace) 68 | elif act_type == "gelu": 69 | self.act = GELU(inplace=inplace) 70 | elif act_type == "swish": 71 | self.act = Swish(inplace=inplace) 72 | else: 73 | raise NotImplementedError 74 | 75 | def forward(self, inputs): 76 | return self.act(inputs) 77 | -------------------------------------------------------------------------------- /python/rapidocr_torch/modeling/heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __all__ = ['build_head'] 16 | 17 | 18 | def build_head(config, **kwargs): 19 | # det head 20 | from .det_db_head import DBHead, PFHeadLocal 21 | # rec head 22 | from .rec_ctc_head import CTCHead 23 | from .rec_multi_head import MultiHead 24 | 25 | # cls head 26 | from .cls_head import ClsHead 27 | support_dict = [ 28 | 'DBHead', 'CTCHead', 'ClsHead', 'MultiHead', 'PFHeadLocal', 29 | ] 30 | 31 | module_name = config.pop('name') 32 | char_num = config.pop('char_num', 6625) 33 | assert module_name in support_dict, Exception('head only support {}'.format( 34 | support_dict)) 35 | module_class = eval(module_name)(**config, **kwargs) 36 | return module_class -------------------------------------------------------------------------------- /python/rapidocr_torch/modeling/heads/cls_head.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | class ClsHead(nn.Module): 7 | """ 8 | Class orientation 9 | Args: 10 | params(dict): super parameters for build Class network 11 | """ 12 | 13 | def __init__(self, in_channels, class_dim, **kwargs): 14 | super(ClsHead, self).__init__() 15 | self.pool = nn.AdaptiveAvgPool2d(1) 16 | self.fc = nn.Linear( 17 | in_channels, 18 | class_dim, 19 | bias=True) 20 | 21 | def forward(self, x): 22 | x = self.pool(x) 23 | x = torch.reshape(x, shape=[x.shape[0], x.shape[1]]) 24 | x = self.fc(x) 25 | x = F.softmax(x, dim=1) 26 | return x -------------------------------------------------------------------------------- /python/rapidocr_torch/modeling/heads/rec_ctc_head.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | class CTCHead(nn.Module): 7 | def __init__(self, 8 | in_channels, 9 | out_channels=6625, 10 | fc_decay=0.0004, 11 | mid_channels=None, 12 | return_feats=False, 13 | **kwargs): 14 | super(CTCHead, self).__init__() 15 | if mid_channels is None: 16 | self.fc = nn.Linear( 17 | in_channels, 18 | out_channels, 19 | bias=True,) 20 | else: 21 | self.fc1 = nn.Linear( 22 | in_channels, 23 | mid_channels, 24 | bias=True, 25 | ) 26 | self.fc2 = nn.Linear( 27 | mid_channels, 28 | out_channels, 29 | bias=True, 30 | ) 31 | 32 | self.out_channels = out_channels 33 | self.mid_channels = mid_channels 34 | self.return_feats = return_feats 35 | 36 | 37 | def forward(self, x, labels=None): 38 | if self.mid_channels is None: 39 | predicts = self.fc(x) 40 | else: 41 | x = self.fc1(x) 42 | predicts = self.fc2(x) 43 | 44 | if self.return_feats: 45 | result = (x, predicts) 46 | else: 47 | result = predicts 48 | 49 | if not self.training: 50 | predicts = F.softmax(predicts, dim=2) 51 | result = predicts 52 | 53 | return result -------------------------------------------------------------------------------- /python/rapidocr_torch/modeling/heads/rec_multi_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from rapidocr_torch.modeling.necks.rnn import Im2Seq, SequenceEncoder 5 | from .rec_ctc_head import CTCHead 6 | 7 | class FCTranspose(nn.Module): 8 | def __init__(self, in_channels, out_channels, only_transpose=False): 9 | super().__init__() 10 | self.only_transpose = only_transpose 11 | if not self.only_transpose: 12 | self.fc = nn.Linear(in_channels, out_channels, bias=False) 13 | 14 | def forward(self, x): 15 | if self.only_transpose: 16 | return x.permute([0, 2, 1]) 17 | else: 18 | return self.fc(x.permute([0, 2, 1])) 19 | 20 | 21 | class MultiHead(nn.Module): 22 | def __init__(self, in_channels, out_channels_list, **kwargs): 23 | super().__init__() 24 | self.head_list = kwargs.pop('head_list') 25 | 26 | self.gtc_head = 'sar' 27 | assert len(self.head_list) >= 2 28 | for idx, head_name in enumerate(self.head_list): 29 | name = list(head_name)[0] 30 | if name == 'SARHead': 31 | pass 32 | 33 | elif name == 'NRTRHead': 34 | pass 35 | elif name == 'CTCHead': 36 | # ctc neck 37 | self.encoder_reshape = Im2Seq(in_channels) 38 | neck_args = self.head_list[idx][name]['Neck'] 39 | encoder_type = neck_args.pop('name') 40 | self.ctc_encoder = SequenceEncoder(in_channels=in_channels, \ 41 | encoder_type=encoder_type, **neck_args) 42 | # ctc head 43 | head_args = self.head_list[idx][name].get('Head', {}) 44 | if head_args is None: 45 | head_args = {} 46 | self.ctc_head = eval(name)(in_channels=self.ctc_encoder.out_channels, \ 47 | out_channels=out_channels_list['CTCLabelDecode'], **head_args) 48 | else: 49 | raise NotImplementedError( 50 | '{} is not supported in MultiHead yet'.format(name)) 51 | 52 | def forward(self, x, data=None): 53 | ctc_encoder = self.ctc_encoder(x) 54 | return self.ctc_head(ctc_encoder) 55 | 56 | -------------------------------------------------------------------------------- /python/rapidocr_torch/modeling/necks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __all__ = ['build_neck'] 16 | 17 | 18 | def build_neck(config): 19 | from .db_fpn import DBFPN, RSEFPN, LKPAN 20 | from .rnn import SequenceEncoder 21 | support_dict = ['DBFPN', 'SequenceEncoder', 'RSEFPN', 'LKPAN'] 22 | 23 | module_name = config.pop('name') 24 | assert module_name in support_dict, Exception('neck only support {}'.format( 25 | support_dict)) 26 | module_class = eval(module_name)(**config) 27 | return module_class 28 | -------------------------------------------------------------------------------- /python/rapidocr_torch/modeling/necks/intracl.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | class IntraCLBlock(nn.Module): 5 | def __init__(self, in_channels=96, reduce_factor=4): 6 | super(IntraCLBlock, self).__init__() 7 | self.channels = in_channels 8 | self.rf = reduce_factor 9 | self.conv1x1_reduce_channel = nn.Conv2d( 10 | self.channels, self.channels // self.rf, kernel_size=1, stride=1, padding=0 11 | ) 12 | self.conv1x1_return_channel = nn.Conv2d( 13 | self.channels // self.rf, self.channels, kernel_size=1, stride=1, padding=0 14 | ) 15 | 16 | self.v_layer_7x1 = nn.Conv2d( 17 | self.channels // self.rf, 18 | self.channels // self.rf, 19 | kernel_size=(7, 1), 20 | stride=(1, 1), 21 | padding=(3, 0), 22 | ) 23 | self.v_layer_5x1 = nn.Conv2d( 24 | self.channels // self.rf, 25 | self.channels // self.rf, 26 | kernel_size=(5, 1), 27 | stride=(1, 1), 28 | padding=(2, 0), 29 | ) 30 | self.v_layer_3x1 = nn.Conv2d( 31 | self.channels // self.rf, 32 | self.channels // self.rf, 33 | kernel_size=(3, 1), 34 | stride=(1, 1), 35 | padding=(1, 0), 36 | ) 37 | 38 | self.q_layer_1x7 = nn.Conv2d( 39 | self.channels // self.rf, 40 | self.channels // self.rf, 41 | kernel_size=(1, 7), 42 | stride=(1, 1), 43 | padding=(0, 3), 44 | ) 45 | self.q_layer_1x5 = nn.Conv2d( 46 | self.channels // self.rf, 47 | self.channels // self.rf, 48 | kernel_size=(1, 5), 49 | stride=(1, 1), 50 | padding=(0, 2), 51 | ) 52 | self.q_layer_1x3 = nn.Conv2d( 53 | self.channels // self.rf, 54 | self.channels // self.rf, 55 | kernel_size=(1, 3), 56 | stride=(1, 1), 57 | padding=(0, 1), 58 | ) 59 | 60 | # base 61 | self.c_layer_7x7 = nn.Conv2d( 62 | self.channels // self.rf, 63 | self.channels // self.rf, 64 | kernel_size=(7, 7), 65 | stride=(1, 1), 66 | padding=(3, 3), 67 | ) 68 | self.c_layer_5x5 = nn.Conv2d( 69 | self.channels // self.rf, 70 | self.channels // self.rf, 71 | kernel_size=(5, 5), 72 | stride=(1, 1), 73 | padding=(2, 2), 74 | ) 75 | self.c_layer_3x3 = nn.Conv2d( 76 | self.channels // self.rf, 77 | self.channels // self.rf, 78 | kernel_size=(3, 3), 79 | stride=(1, 1), 80 | padding=(1, 1), 81 | ) 82 | 83 | self.bn = nn.BatchNorm2d(self.channels) 84 | self.relu = nn.ReLU() 85 | 86 | def forward(self, x): 87 | x_new = self.conv1x1_reduce_channel(x) 88 | 89 | x_7_c = self.c_layer_7x7(x_new) 90 | x_7_v = self.v_layer_7x1(x_new) 91 | x_7_q = self.q_layer_1x7(x_new) 92 | x_7 = x_7_c + x_7_v + x_7_q 93 | 94 | x_5_c = self.c_layer_5x5(x_7) 95 | x_5_v = self.v_layer_5x1(x_7) 96 | x_5_q = self.q_layer_1x5(x_7) 97 | x_5 = x_5_c + x_5_v + x_5_q 98 | 99 | x_3_c = self.c_layer_3x3(x_5) 100 | x_3_v = self.v_layer_3x1(x_5) 101 | x_3_q = self.q_layer_1x3(x_5) 102 | x_3 = x_3_c + x_3_v + x_3_q 103 | 104 | x_relation = self.conv1x1_return_channel(x_3) 105 | 106 | x_relation = self.bn(x_relation) 107 | x_relation = self.relu(x_relation) 108 | 109 | return x + x_relation 110 | 111 | 112 | def build_intraclblock_list(num_block): 113 | IntraCLBlock_list = nn.ModuleList() 114 | for i in range(num_block): 115 | IntraCLBlock_list.append(IntraCLBlock()) 116 | 117 | return IntraCLBlock_list 118 | -------------------------------------------------------------------------------- /python/rapidocr_torch/models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr_torch/models/.gitkeep -------------------------------------------------------------------------------- /python/rapidocr_torch/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from pathlib import Path 5 | from typing import Dict, Union 6 | 7 | import yaml 8 | 9 | from .infer_engine import TorchInferSession 10 | from .load_image import LoadImage, LoadImageError 11 | from .logger import get_logger 12 | from .parse_parameters import UpdateParameters, init_args, update_model_path 13 | from .process_img import add_round_letterbox, increase_min_side, reduce_max_side 14 | from .vis_res import VisRes 15 | 16 | 17 | def read_yaml(yaml_path: Union[str, Path]) -> Dict[str, Dict]: 18 | with open(yaml_path, "rb") as f: 19 | data = yaml.load(f, Loader=yaml.Loader) 20 | return data 21 | -------------------------------------------------------------------------------- /python/rapidocr_torch/utils/infer_engine.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from pathlib import Path 5 | from typing import Dict, Optional, Union 6 | 7 | import numpy as np 8 | import torch 9 | import yaml 10 | 11 | root_dir = Path(__file__).resolve().parent.parent 12 | DEFAULT_CFG_PATH = root_dir / "arch_config.yaml" 13 | 14 | 15 | def read_yaml(yaml_path: Union[str, Path]) -> Dict[str, Dict]: 16 | with open(yaml_path, "rb") as f: 17 | data = yaml.load(f, Loader=yaml.Loader) 18 | return data 19 | 20 | 21 | from rapidocr_torch.modeling.architectures.base_model import BaseModel 22 | 23 | from .logger import get_logger 24 | 25 | 26 | class TorchInferSession: 27 | def __init__(self, config, mode: Optional[str] = None) -> None: 28 | all_arch_config = read_yaml(DEFAULT_CFG_PATH) 29 | 30 | self.logger = get_logger("TorchInferSession") 31 | self.mode = mode 32 | model_path = Path(config["model_path"]) 33 | self._verify_model(model_path) 34 | file_name = model_path.stem 35 | if file_name not in all_arch_config: 36 | raise ValueError(f"architecture {file_name} is not in config.yaml") 37 | arch_config = all_arch_config[file_name] 38 | self.predictor = BaseModel(arch_config) 39 | self.predictor.load_state_dict(torch.load(model_path, weights_only=True)) 40 | self.predictor.eval() 41 | self.use_gpu = False 42 | if config["use_cuda"]: 43 | self.predictor.cuda() 44 | self.use_gpu = True 45 | 46 | def __call__(self, img: np.ndarray): 47 | with torch.no_grad(): 48 | inp = torch.from_numpy(img) 49 | if self.use_gpu: 50 | inp = inp.cuda() 51 | # 适配跟onnx对齐取值逻辑 52 | outputs = self.predictor(inp).unsqueeze(0) 53 | return outputs.cpu().numpy() 54 | 55 | @staticmethod 56 | def _verify_model(model_path): 57 | model_path = Path(model_path) 58 | if not model_path.exists(): 59 | raise FileNotFoundError(f"{model_path} does not exists.") 60 | if not model_path.is_file(): 61 | raise FileExistsError(f"{model_path} is not a file.") 62 | 63 | 64 | class TorchInferError(Exception): 65 | pass 66 | -------------------------------------------------------------------------------- /python/rapidocr_torch/utils/logger.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import logging 5 | from functools import lru_cache 6 | 7 | 8 | @lru_cache(maxsize=32) 9 | def get_logger(name: str) -> logging.Logger: 10 | logger = logging.getLogger(name) 11 | logger.setLevel(logging.DEBUG) 12 | 13 | fmt = "%(asctime)s - %(name)s - %(levelname)s: %(message)s" 14 | format_str = logging.Formatter(fmt) 15 | 16 | sh = logging.StreamHandler() 17 | sh.setLevel(logging.DEBUG) 18 | 19 | logger.addHandler(sh) 20 | sh.setFormatter(format_str) 21 | return logger 22 | -------------------------------------------------------------------------------- /python/rapidocr_torch/utils/process_img.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from typing import Tuple 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | 10 | def reduce_max_side( 11 | img: np.ndarray, max_side_len: int = 2000 12 | ) -> Tuple[np.ndarray, float, float]: 13 | h, w = img.shape[:2] 14 | 15 | ratio = 1.0 16 | if max(h, w) > max_side_len: 17 | if h > w: 18 | ratio = float(max_side_len) / h 19 | else: 20 | ratio = float(max_side_len) / w 21 | 22 | resize_h = int(h * ratio) 23 | resize_w = int(w * ratio) 24 | 25 | resize_h = int(round(resize_h / 32) * 32) 26 | resize_w = int(round(resize_w / 32) * 32) 27 | 28 | try: 29 | if int(resize_w) <= 0 or int(resize_h) <= 0: 30 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0") 31 | img = cv2.resize(img, (resize_w, resize_h)) 32 | except Exception as exc: 33 | raise ResizeImgError() from exc 34 | 35 | ratio_h = h / resize_h 36 | ratio_w = w / resize_w 37 | return img, ratio_h, ratio_w 38 | 39 | 40 | def increase_min_side( 41 | img: np.ndarray, min_side_len: int = 30 42 | ) -> Tuple[np.ndarray, float, float]: 43 | h, w = img.shape[:2] 44 | 45 | ratio = 1.0 46 | if min(h, w) < min_side_len: 47 | if h < w: 48 | ratio = float(min_side_len) / h 49 | else: 50 | ratio = float(min_side_len) / w 51 | 52 | resize_h = int(h * ratio) 53 | resize_w = int(w * ratio) 54 | 55 | resize_h = int(round(resize_h / 32) * 32) 56 | resize_w = int(round(resize_w / 32) * 32) 57 | 58 | try: 59 | if int(resize_w) <= 0 or int(resize_h) <= 0: 60 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0") 61 | img = cv2.resize(img, (resize_w, resize_h)) 62 | except Exception as exc: 63 | raise ResizeImgError() from exc 64 | 65 | ratio_h = h / resize_h 66 | ratio_w = w / resize_w 67 | return img, ratio_h, ratio_w 68 | 69 | 70 | def add_round_letterbox( 71 | img: np.ndarray, 72 | padding_tuple: Tuple[int, int, int, int], 73 | ) -> np.ndarray: 74 | padded_img = cv2.copyMakeBorder( 75 | img, 76 | padding_tuple[0], 77 | padding_tuple[1], 78 | padding_tuple[2], 79 | padding_tuple[3], 80 | cv2.BORDER_CONSTANT, 81 | value=(0, 0, 0), 82 | ) 83 | return padded_img 84 | 85 | 86 | class ResizeImgError(Exception): 87 | pass 88 | -------------------------------------------------------------------------------- /python/requirements.txt: -------------------------------------------------------------------------------- 1 | pyclipper>=1.2.0 2 | opencv_python>=4.5.1.48 3 | numpy>=1.19.5,<3.0.0 4 | six>=1.15.0 5 | Shapely>=1.7.1,!=2.0.4 # python3.12 2.0.4 bug 6 | PyYAML 7 | Pillow 8 | tqdm 9 | omegaconf 10 | requests 11 | colorlog -------------------------------------------------------------------------------- /python/requirements_ort.txt: -------------------------------------------------------------------------------- 1 | pyclipper>=1.2.0 2 | opencv_python>=4.5.1.48 3 | numpy>=1.19.5,<3.0.0 4 | six>=1.15.0 5 | Shapely>=1.7.1,!=2.0.4 # python3.12 2.0.4 bug 6 | PyYAML 7 | Pillow 8 | onnxruntime>=1.7.0 9 | tqdm -------------------------------------------------------------------------------- /python/requirements_paddle.txt: -------------------------------------------------------------------------------- 1 | pyclipper>=1.2.0 2 | opencv_python>=4.5.1.48 3 | numpy>=1.19.5,<3.0.0 4 | six>=1.15.0 5 | Shapely>=1.7.1,!=2.0.4 # python3.12 2.0.4 bug 6 | PyYAML 7 | Pillow 8 | tqdm -------------------------------------------------------------------------------- /python/requirements_torch.txt: -------------------------------------------------------------------------------- 1 | pyclipper>=1.2.0 2 | opencv_python>=4.5.1.48 3 | numpy>=1.19.5,<3.0.0 4 | six>=1.15.0 5 | Shapely>=1.7.1,!=2.0.4 # python3.12 2.0.4 bug 6 | PyYAML 7 | Pillow 8 | tqdm 9 | torch 10 | torchvision -------------------------------------------------------------------------------- /python/requirements_vino.txt: -------------------------------------------------------------------------------- 1 | pyclipper>=1.2.0 2 | openvino>=2022.2.0,<=2024.0.0 3 | opencv_python>=4.5.1.48 4 | numpy>=1.19.5,<3.0.0 5 | six>=1.15.0 6 | Shapely>=1.7.1,!=2.0.4 # python3.12 2.0.4 bug 7 | PyYAML 8 | Pillow 9 | tqdm -------------------------------------------------------------------------------- /python/setup.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import sys 5 | from pathlib import Path 6 | from typing import List, Union 7 | 8 | import setuptools 9 | from get_pypi_latest_version import GetPyPiLatestVersion 10 | 11 | 12 | def read_txt(txt_path: Union[Path, str]) -> List[str]: 13 | with open(txt_path, "r", encoding="utf-8") as f: 14 | data = [v.rstrip("\n") for v in f] 15 | return data 16 | 17 | 18 | def get_readme(): 19 | root_dir = Path(__file__).resolve().parent.parent 20 | readme_path = str(root_dir / "docs" / "doc_whl_rapidocr.md") 21 | print(readme_path) 22 | with open(readme_path, "r", encoding="utf-8") as f: 23 | readme = f.read() 24 | return readme 25 | 26 | 27 | MODULE_NAME = "rapidocr" 28 | 29 | obtainer = GetPyPiLatestVersion() 30 | try: 31 | latest_version = obtainer(MODULE_NAME) 32 | except Exception as e: 33 | latest_version = "0.0.0" 34 | VERSION_NUM = obtainer.version_add_one(latest_version, add_patch=True) 35 | 36 | if len(sys.argv) > 2: 37 | match_str = " ".join(sys.argv[2:]) 38 | matched_versions = obtainer.extract_version(match_str) 39 | if matched_versions: 40 | VERSION_NUM = matched_versions 41 | sys.argv = sys.argv[:2] 42 | 43 | project_urls = { 44 | "Documentation": "https://rapidai.github.io/RapidOCRDocs", 45 | "Changelog": "https://github.com/RapidAI/RapidOCR/releases", 46 | } 47 | 48 | setuptools.setup( 49 | name=MODULE_NAME, 50 | version=VERSION_NUM, 51 | platforms="Any", 52 | description="Awesome OCR Library", 53 | long_description=get_readme(), 54 | long_description_content_type="text/markdown", 55 | author="SWHL", 56 | author_email="liekkaskono@163.com", 57 | url="https://github.com/RapidAI/RapidOCR", 58 | project_urls=project_urls, 59 | license="Apache-2.0", 60 | include_package_data=True, 61 | install_requires=read_txt("requirements.txt"), 62 | package_dir={"": MODULE_NAME}, 63 | packages=setuptools.find_namespace_packages(where=MODULE_NAME), 64 | package_data={"": ["*.onnx", "*.yaml", "*.txt"]}, 65 | keywords=[ 66 | "ocr,text_detection,text_recognition,db,onnxruntime,paddleocr,openvino,rapidocr" 67 | ], 68 | classifiers=[ 69 | "Programming Language :: Python :: 3.6", 70 | "Programming Language :: Python :: 3.7", 71 | "Programming Language :: Python :: 3.8", 72 | "Programming Language :: Python :: 3.9", 73 | "Programming Language :: Python :: 3.10", 74 | "Programming Language :: Python :: 3.11", 75 | "Programming Language :: Python :: 3.12", 76 | ], 77 | python_requires=">=3.6,<4", 78 | entry_points={ 79 | "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"], 80 | }, 81 | ) 82 | -------------------------------------------------------------------------------- /python/setup_onnxruntime.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import sys 5 | from pathlib import Path 6 | from typing import List, Union 7 | 8 | import setuptools 9 | from get_pypi_latest_version import GetPyPiLatestVersion 10 | 11 | 12 | def read_txt(txt_path: Union[Path, str]) -> List[str]: 13 | with open(txt_path, "r", encoding="utf-8") as f: 14 | data = [v.rstrip("\n") for v in f] 15 | return data 16 | 17 | 18 | def get_readme(): 19 | root_dir = Path(__file__).resolve().parent.parent 20 | readme_path = str(root_dir / "docs" / "doc_whl_rapidocr_ort.md") 21 | print(readme_path) 22 | with open(readme_path, "r", encoding="utf-8") as f: 23 | readme = f.read() 24 | return readme 25 | 26 | 27 | MODULE_NAME = "rapidocr_onnxruntime" 28 | 29 | obtainer = GetPyPiLatestVersion() 30 | latest_version = obtainer(MODULE_NAME) 31 | VERSION_NUM = obtainer.version_add_one(latest_version, add_patch=True) 32 | 33 | if len(sys.argv) > 2: 34 | match_str = " ".join(sys.argv[2:]) 35 | matched_versions = obtainer.extract_version(match_str) 36 | if matched_versions: 37 | VERSION_NUM = matched_versions 38 | sys.argv = sys.argv[:2] 39 | 40 | project_urls = { 41 | "Documentation": "https://rapidai.github.io/RapidOCRDocs", 42 | "Changelog": "https://github.com/RapidAI/RapidOCR/releases", 43 | } 44 | 45 | setuptools.setup( 46 | name=MODULE_NAME, 47 | version=VERSION_NUM, 48 | platforms="Any", 49 | description="A cross platform OCR Library based on OnnxRuntime.", 50 | long_description=get_readme(), 51 | long_description_content_type="text/markdown", 52 | author="SWHL", 53 | author_email="liekkaskono@163.com", 54 | url="https://github.com/RapidAI/RapidOCR", 55 | project_urls=project_urls, 56 | license="Apache-2.0", 57 | include_package_data=True, 58 | install_requires=read_txt("requirements_ort.txt"), 59 | package_dir={"": MODULE_NAME}, 60 | packages=setuptools.find_namespace_packages(where=MODULE_NAME), 61 | package_data={"": ["*.onnx", "*.yaml"]}, 62 | keywords=[ 63 | "ocr,text_detection,text_recognition,db,onnxruntime,paddleocr,openvino,rapidocr" 64 | ], 65 | classifiers=[ 66 | "Programming Language :: Python :: 3.6", 67 | "Programming Language :: Python :: 3.7", 68 | "Programming Language :: Python :: 3.8", 69 | "Programming Language :: Python :: 3.9", 70 | "Programming Language :: Python :: 3.10", 71 | "Programming Language :: Python :: 3.11", 72 | "Programming Language :: Python :: 3.12", 73 | ], 74 | python_requires=">=3.6,<3.13", 75 | entry_points={ 76 | "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"], 77 | }, 78 | ) 79 | -------------------------------------------------------------------------------- /python/setup_openvino.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import sys 5 | from pathlib import Path 6 | from typing import List, Union 7 | 8 | import setuptools 9 | from get_pypi_latest_version import GetPyPiLatestVersion 10 | 11 | 12 | def read_txt(txt_path: Union[Path, str]) -> List[str]: 13 | with open(txt_path, "r", encoding="utf-8") as f: 14 | data = [v.rstrip("\n") for v in f] 15 | return data 16 | 17 | 18 | def get_readme(): 19 | root_dir = Path(__file__).resolve().parent.parent 20 | readme_path = str(root_dir / "docs" / "doc_whl_rapidocr_vino.md") 21 | print(readme_path) 22 | with open(readme_path, "r", encoding="utf-8") as f: 23 | readme = f.read() 24 | return readme 25 | 26 | 27 | MODULE_NAME = "rapidocr_openvino" 28 | 29 | obtainer = GetPyPiLatestVersion() 30 | latest_version = obtainer(MODULE_NAME) 31 | VERSION_NUM = obtainer.version_add_one(latest_version, add_patch=True) 32 | 33 | if len(sys.argv) > 2: 34 | match_str = " ".join(sys.argv[2:]) 35 | matched_versions = obtainer.extract_version(match_str) 36 | if matched_versions: 37 | VERSION_NUM = matched_versions 38 | sys.argv = sys.argv[:2] 39 | 40 | setuptools.setup( 41 | name=MODULE_NAME, 42 | version=VERSION_NUM, 43 | platforms="Any", 44 | description="A cross platform OCR Library based on OpenVINO.", 45 | long_description=get_readme(), 46 | long_description_content_type="text/markdown", 47 | author="SWHL", 48 | author_email="liekkaskono@163.com", 49 | url="https://github.com/RapidAI/RapidOCR", 50 | license="Apache-2.0", 51 | include_package_data=True, 52 | install_requires=read_txt("requirements_vino.txt"), 53 | package_dir={"": MODULE_NAME}, 54 | packages=setuptools.find_namespace_packages(where=MODULE_NAME), 55 | package_data={"": ["*.onnx", "*.yaml", "*.txt"]}, 56 | keywords=[ 57 | "ocr,text_detection,text_recognition,db,onnxruntime,paddleocr,openvino,rapidocr" 58 | ], 59 | classifiers=[ 60 | "Programming Language :: Python :: 3.6", 61 | "Programming Language :: Python :: 3.7", 62 | "Programming Language :: Python :: 3.8", 63 | "Programming Language :: Python :: 3.9", 64 | "Programming Language :: Python :: 3.10", 65 | "Programming Language :: Python :: 3.11", 66 | "Programming Language :: Python :: 3.12", 67 | ], 68 | python_requires=">=3.6,<3.13", 69 | entry_points={ 70 | "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"], 71 | }, 72 | ) 73 | -------------------------------------------------------------------------------- /python/setup_paddle.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import sys 5 | from pathlib import Path 6 | from typing import List, Union 7 | 8 | import setuptools 9 | from get_pypi_latest_version import GetPyPiLatestVersion 10 | 11 | 12 | def read_txt(txt_path: Union[Path, str]) -> List[str]: 13 | with open(txt_path, "r", encoding="utf-8") as f: 14 | data = [v.rstrip("\n") for v in f] 15 | return data 16 | 17 | 18 | def get_readme(): 19 | root_dir = Path(__file__).resolve().parent.parent 20 | readme_path = str(root_dir / "docs" / "doc_whl_rapidocr_paddle.md") 21 | print(readme_path) 22 | with open(readme_path, "r", encoding="utf-8") as f: 23 | readme = f.read() 24 | return readme 25 | 26 | 27 | MODULE_NAME = "rapidocr_paddle" 28 | 29 | obtainer = GetPyPiLatestVersion() 30 | try: 31 | latest_version = obtainer(MODULE_NAME) 32 | VERSION_NUM = obtainer.version_add_one(latest_version, add_patch=True) 33 | except: 34 | VERSION_NUM = "0.0.1" 35 | 36 | if len(sys.argv) > 2: 37 | match_str = " ".join(sys.argv[2:]) 38 | matched_versions = obtainer.extract_version(match_str) 39 | if matched_versions: 40 | VERSION_NUM = matched_versions 41 | sys.argv = sys.argv[:2] 42 | 43 | setuptools.setup( 44 | name=MODULE_NAME, 45 | version=VERSION_NUM, 46 | platforms="Any", 47 | description="A cross platform OCR Library based on PaddlePaddle.", 48 | long_description=get_readme(), 49 | long_description_content_type="text/markdown", 50 | author="SWHL", 51 | author_email="liekkaskono@163.com", 52 | url="https://github.com/RapidAI/RapidOCR", 53 | license="Apache-2.0", 54 | include_package_data=True, 55 | install_requires=read_txt("requirements_paddle.txt"), 56 | package_dir={"": MODULE_NAME}, 57 | packages=setuptools.find_namespace_packages(where=MODULE_NAME), 58 | package_data={ 59 | "": ["*.txt", "*.yaml", "*.pdiparams", "*.pdiparams.info", "*.pdmodel"] 60 | }, 61 | keywords=[ 62 | "ocr,text_detection,text_recognition,dbnet,paddlepaddle,paddleocr,rapidocr" 63 | ], 64 | classifiers=[ 65 | "Programming Language :: Python :: 3.6", 66 | "Programming Language :: Python :: 3.7", 67 | "Programming Language :: Python :: 3.8", 68 | "Programming Language :: Python :: 3.9", 69 | "Programming Language :: Python :: 3.10", 70 | "Programming Language :: Python :: 3.11", 71 | "Programming Language :: Python :: 3.12", 72 | ], 73 | python_requires=">=3.6,<3.13", 74 | entry_points={ 75 | "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"], 76 | }, 77 | extras_require={ 78 | "cpu": ["paddlepaddle"], 79 | "gpu": ["paddlepaddle-gpu"], 80 | }, 81 | ) 82 | -------------------------------------------------------------------------------- /python/setup_torch.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import sys 5 | from pathlib import Path 6 | from typing import List, Union 7 | 8 | import setuptools 9 | from get_pypi_latest_version import GetPyPiLatestVersion 10 | 11 | 12 | def read_txt(txt_path: Union[Path, str]) -> List[str]: 13 | with open(txt_path, "r", encoding="utf-8") as f: 14 | data = [v.rstrip("\n") for v in f] 15 | return data 16 | 17 | 18 | def get_readme(): 19 | root_dir = Path(__file__).resolve().parent.parent 20 | readme_path = str(root_dir / "docs" / "doc_whl_rapidocr_ort.md") 21 | print(readme_path) 22 | with open(readme_path, "r", encoding="utf-8") as f: 23 | readme = f.read() 24 | return readme 25 | 26 | 27 | MODULE_NAME = "rapidocr_torch" 28 | 29 | obtainer = GetPyPiLatestVersion() 30 | try: 31 | latest_version = obtainer(MODULE_NAME) 32 | except Exception: 33 | latest_version = "0.0.0" 34 | VERSION_NUM = obtainer.version_add_one(latest_version, add_patch=True) 35 | 36 | if len(sys.argv) > 2: 37 | match_str = " ".join(sys.argv[2:]) 38 | matched_versions = obtainer.extract_version(match_str) 39 | if matched_versions: 40 | VERSION_NUM = matched_versions 41 | sys.argv = sys.argv[:2] 42 | 43 | project_urls = { 44 | "Documentation": "https://rapidai.github.io/RapidOCRDocs", 45 | "Changelog": "https://github.com/RapidAI/RapidOCR/releases", 46 | } 47 | 48 | setuptools.setup( 49 | name=MODULE_NAME, 50 | version=VERSION_NUM, 51 | platforms="Any", 52 | description="A cross platform OCR Library based on pytorch.", 53 | long_description=get_readme(), 54 | long_description_content_type="text/markdown", 55 | author="SWHL", 56 | author_email="liekkaskono@163.com", 57 | url="https://github.com/RapidAI/RapidOCR", 58 | project_urls=project_urls, 59 | license="Apache-2.0", 60 | include_package_data=True, 61 | install_requires=read_txt("requirements_torch.txt"), 62 | package_dir={"": MODULE_NAME}, 63 | packages=setuptools.find_namespace_packages(where=MODULE_NAME), 64 | package_data={"": ["*.pth", "*.yaml", "*.txt"]}, 65 | keywords=[ 66 | "ocr,text_detection,text_recognition,db,onnxruntime,pytorch,paddleocr,openvino,rapidocr" 67 | ], 68 | classifiers=[ 69 | "Programming Language :: Python :: 3.10", 70 | "Programming Language :: Python :: 3.11", 71 | "Programming Language :: Python :: 3.12", 72 | ], 73 | python_requires=">=3.10,<3.13", 74 | entry_points={ 75 | "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"], 76 | }, 77 | ) 78 | -------------------------------------------------------------------------------- /python/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | -------------------------------------------------------------------------------- /python/tests/base_module.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import importlib 5 | import sys 6 | from dataclasses import dataclass 7 | from pathlib import Path 8 | from typing import Optional, Union 9 | 10 | import requests 11 | import yaml 12 | from tqdm import tqdm 13 | 14 | 15 | class BaseModule: 16 | def __init__(self, package_name: str = "rapidocr_onnxruntime"): 17 | self.package_name = package_name 18 | self.root_dir = Path(__file__).resolve().parent.parent 19 | self.package_dir = self.root_dir / self.package_name 20 | self.tests_dir = self.root_dir / "tests" 21 | 22 | sys.path.append(str(self.root_dir)) 23 | sys.path.append(str(self.package_dir)) 24 | 25 | def init_module(self, module_name: str, class_name: Optional[str] = None): 26 | if class_name is None: 27 | module_part = importlib.import_module(f"{self.package_name}") 28 | return module_part 29 | module_part = importlib.import_module(f"{self.package_name}.{module_name}") 30 | return getattr(module_part, class_name) 31 | 32 | @staticmethod 33 | def read_yaml(yaml_path: str): 34 | with open(yaml_path, "rb") as f: 35 | data = yaml.load(f, Loader=yaml.Loader) 36 | return data 37 | 38 | 39 | def download_file(url: str, save_path: Union[str, Path]): 40 | response = requests.get(url, stream=True, timeout=60) 41 | status_code = response.status_code 42 | 43 | if status_code != 200: 44 | raise DownloadModelError("Something went wrong while downloading models") 45 | 46 | total_size_in_bytes = int(response.headers.get("content-length", 1)) 47 | block_size = 1024 # 1 Kibibyte 48 | with tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True) as pb: 49 | with open(save_path, "wb") as file: 50 | for data in response.iter_content(block_size): 51 | pb.update(len(data)) 52 | file.write(data) 53 | 54 | 55 | class DownloadModelError(Exception): 56 | pass 57 | 58 | 59 | @dataclass 60 | class Platform: 61 | mac: str = "Darwin" 62 | windows: str = "Windows" 63 | linux: str = "Linux" 64 | -------------------------------------------------------------------------------- /python/tests/test_files/black_font_color_transparent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/black_font_color_transparent.png -------------------------------------------------------------------------------- /python/tests/test_files/ch_doc_server.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/ch_doc_server.png -------------------------------------------------------------------------------- /python/tests/test_files/ch_en_num.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/ch_en_num.jpg -------------------------------------------------------------------------------- /python/tests/test_files/devanagari.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/devanagari.jpg -------------------------------------------------------------------------------- /python/tests/test_files/empty_black.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/empty_black.jpg -------------------------------------------------------------------------------- /python/tests/test_files/en.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/en.jpg -------------------------------------------------------------------------------- /python/tests/test_files/issue_170.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/issue_170.png -------------------------------------------------------------------------------- /python/tests/test_files/japan.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/japan.jpg -------------------------------------------------------------------------------- /python/tests/test_files/korean.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/korean.jpg -------------------------------------------------------------------------------- /python/tests/test_files/short.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/short.png -------------------------------------------------------------------------------- /python/tests/test_files/test_letterbox_like.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/test_letterbox_like.jpg -------------------------------------------------------------------------------- /python/tests/test_files/test_without_det.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/test_without_det.jpg -------------------------------------------------------------------------------- /python/tests/test_files/text_cls.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/text_cls.jpg -------------------------------------------------------------------------------- /python/tests/test_files/text_det.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/text_det.jpg -------------------------------------------------------------------------------- /python/tests/test_files/text_rec.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/text_rec.jpg -------------------------------------------------------------------------------- /python/tests/test_files/text_vertical_words.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/text_vertical_words.png -------------------------------------------------------------------------------- /python/tests/test_files/two_dim_image.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/two_dim_image.npy -------------------------------------------------------------------------------- /python/tests/test_files/white_font_color_transparent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/white_font_color_transparent.png --------------------------------------------------------------------------------