├── .gitattributes
├── .github
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── bug.md
    │   └── config.yml
    └── workflows
    │   ├── SyncToGitee.yml
    │   ├── gen_whl_to_pypi_rapidocr.yml
    │   ├── gen_whl_to_pypi_rapidocr_ort.yml
    │   ├── gen_whl_to_pypi_rapidocr_paddle.yml
    │   ├── gen_whl_to_pypi_rapidocr_torch.yml
    │   ├── gen_whl_to_pypi_rapidocr_vino.yml
    │   ├── gen_whl_to_pypi_rapidocr_web.yml
    │   └── package_ocrweb.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── android
    └── README.md
├── api
    └── README.md
├── assets
    ├── RapidOCRDemo.ipynb
    ├── RapidOCR_LOGO.png
    └── colab-badge.svg
├── cliff.toml
├── cpp
    └── README.md
├── docs
    ├── README_zh.md
    ├── doc_whl_rapidocr.md
    ├── doc_whl_rapidocr_ort.md
    ├── doc_whl_rapidocr_paddle.md
    ├── doc_whl_rapidocr_vino.md
    └── doc_whl_rapidocr_web.md
├── dotnet
    └── README.md
├── ios
    └── README.md
├── jvm
    └── README.md
├── ocrweb
    ├── README.md
    ├── rapidocr_web
    │   ├── __init__.py
    │   ├── ocrweb.py
    │   ├── ocrweb.spec
    │   ├── static
    │   │   ├── css
    │   │   │   ├── favicon.ico
    │   │   │   └── main.css
    │   │   └── js
    │   │   │   └── jquery-3.0.0.min.js
    │   ├── task.py
    │   └── templates
    │   │   └── index.html
    ├── requirements.txt
    └── setup.py
├── ocrweb_multi
    ├── README.md
    ├── assets
    │   └── ocr_web_multi.jpg
    ├── build.py
    ├── config.yaml
    ├── main.py
    ├── main.spec
    ├── models
    │   └── .gitkeep
    ├── rapidocr
    │   ├── __init__.py
    │   ├── classify.py
    │   ├── detect.py
    │   ├── detect_process.py
    │   ├── main.py
    │   ├── rapid_ocr_api.py
    │   └── recognize.py
    ├── requirements.txt
    ├── static
    │   ├── css
    │   │   └── main.css
    │   ├── favicon.ico
    │   ├── hint.svg
    │   ├── index.html
    │   └── js
    │   │   └── jquery-3.0.0.min.js
    ├── utils
    │   ├── config.py
    │   └── utils.py
    ├── wrapper.c
    └── wrapper.rc
└── python
    ├── README.md
    ├── demo.py
    ├── rapidocr
        ├── __init__.py
        ├── cal_rec_boxes
        │   ├── __init__.py
        │   └── main.py
        ├── ch_ppocr_cls
        │   ├── __init__.py
        │   ├── main.py
        │   └── utils.py
        ├── ch_ppocr_det
        │   ├── __init__.py
        │   ├── main.py
        │   └── utils.py
        ├── ch_ppocr_rec
        │   ├── __init__.py
        │   ├── main.py
        │   ├── typings.py
        │   └── utils.py
        ├── cli.py
        ├── config.yaml
        ├── default_models.yaml
        ├── inference_engine
        │   ├── __init__.py
        │   ├── base.py
        │   ├── onnxruntime.py
        │   ├── openvino.py
        │   ├── paddle.py
        │   └── torch.py
        ├── main.py
        ├── models
        │   └── .gitkeep
        ├── networks
        │   ├── __init__.py
        │   ├── arch_config.yaml
        │   ├── architectures
        │   │   ├── __init__.py
        │   │   └── base_model.py
        │   ├── backbones
        │   │   ├── __init__.py
        │   │   ├── det_mobilenet_v3.py
        │   │   ├── rec_hgnet.py
        │   │   ├── rec_lcnetv3.py
        │   │   ├── rec_mobilenet_v3.py
        │   │   ├── rec_mv1_enhance.py
        │   │   └── rec_svtrnet.py
        │   ├── common.py
        │   ├── heads
        │   │   ├── __init__.py
        │   │   ├── cls_head.py
        │   │   ├── det_db_head.py
        │   │   ├── rec_ctc_head.py
        │   │   └── rec_multi_head.py
        │   └── necks
        │   │   ├── __init__.py
        │   │   ├── db_fpn.py
        │   │   ├── intracl.py
        │   │   └── rnn.py
        └── utils
        │   ├── __init__.py
        │   ├── download_file.py
        │   ├── load_image.py
        │   ├── logger.py
        │   ├── output.py
        │   ├── parse_parameters.py
        │   ├── process_img.py
        │   ├── typings.py
        │   ├── utils.py
        │   └── vis_res.py
    ├── rapidocr_onnxruntime
        ├── __init__.py
        ├── cal_rec_boxes
        │   ├── __init__.py
        │   └── main.py
        ├── ch_ppocr_cls
        │   ├── __init__.py
        │   ├── text_cls.py
        │   └── utils.py
        ├── ch_ppocr_det
        │   ├── __init__.py
        │   ├── text_detect.py
        │   └── utils.py
        ├── ch_ppocr_rec
        │   ├── __init__.py
        │   ├── text_recognize.py
        │   └── utils.py
        ├── config.yaml
        ├── main.py
        ├── models
        │   └── .gitkeep
        └── utils
        │   ├── __init__.py
        │   ├── infer_engine.py
        │   ├── load_image.py
        │   ├── logger.py
        │   ├── parse_parameters.py
        │   ├── process_img.py
        │   └── vis_res.py
    ├── rapidocr_openvino
        ├── __init__.py
        ├── cal_rec_boxes
        │   ├── __init__.py
        │   └── main.py
        ├── ch_ppocr_cls
        │   ├── __init__.py
        │   ├── text_cls.py
        │   └── utils.py
        ├── ch_ppocr_det
        │   ├── __init__.py
        │   ├── text_detect.py
        │   └── utils.py
        ├── ch_ppocr_rec
        │   ├── __init__.py
        │   ├── ppocr_keys_v1.txt
        │   ├── text_recognize.py
        │   └── utils.py
        ├── config.yaml
        ├── main.py
        ├── models
        │   └── .gitkeep
        └── utils
        │   ├── __init__.py
        │   ├── infer_engine.py
        │   ├── load_image.py
        │   ├── logger.py
        │   ├── parse_parameters.py
        │   ├── process_img.py
        │   └── vis_res.py
    ├── rapidocr_paddle
        ├── __init__.py
        ├── cal_rec_boxes
        │   ├── __init__.py
        │   └── main.py
        ├── ch_ppocr_cls
        │   ├── __init__.py
        │   ├── text_cls.py
        │   └── utils.py
        ├── ch_ppocr_det
        │   ├── __init__.py
        │   ├── text_detect.py
        │   └── utils.py
        ├── ch_ppocr_rec
        │   ├── __init__.py
        │   ├── ppocr_keys_v1.txt
        │   ├── text_recognize.py
        │   └── utils.py
        ├── config.yaml
        ├── main.py
        ├── models
        │   └── .gitkeep
        └── utils
        │   ├── __init__.py
        │   ├── infer_engine.py
        │   ├── load_image.py
        │   ├── logger.py
        │   ├── parse_parameters.py
        │   ├── process_img.py
        │   └── vis_res.py
    ├── rapidocr_torch
        ├── __init__.py
        ├── arch_config.yaml
        ├── cal_rec_boxes
        │   ├── __init__.py
        │   └── main.py
        ├── ch_ppocr_cls
        │   ├── __init__.py
        │   ├── text_cls.py
        │   └── utils.py
        ├── ch_ppocr_det
        │   ├── __init__.py
        │   ├── text_detect.py
        │   └── utils.py
        ├── ch_ppocr_rec
        │   ├── __init__.py
        │   ├── ppocr_keys_v1.txt
        │   ├── text_recognize.py
        │   └── utils.py
        ├── config.yaml
        ├── main.py
        ├── modeling
        │   ├── __init__.py
        │   ├── architectures
        │   │   ├── __init__.py
        │   │   └── base_model.py
        │   ├── backbones
        │   │   ├── __init__.py
        │   │   ├── det_mobilenet_v3.py
        │   │   ├── rec_hgnet.py
        │   │   ├── rec_lcnetv3.py
        │   │   ├── rec_mobilenet_v3.py
        │   │   └── rec_svtrnet.py
        │   ├── common.py
        │   ├── heads
        │   │   ├── __init__.py
        │   │   ├── cls_head.py
        │   │   ├── det_db_head.py
        │   │   ├── rec_ctc_head.py
        │   │   └── rec_multi_head.py
        │   └── necks
        │   │   ├── __init__.py
        │   │   ├── db_fpn.py
        │   │   ├── intracl.py
        │   │   └── rnn.py
        ├── models
        │   └── .gitkeep
        └── utils
        │   ├── __init__.py
        │   ├── infer_engine.py
        │   ├── load_image.py
        │   ├── logger.py
        │   ├── parse_parameters.py
        │   ├── process_img.py
        │   └── vis_res.py
    ├── requirements.txt
    ├── requirements_ort.txt
    ├── requirements_paddle.txt
    ├── requirements_torch.txt
    ├── requirements_vino.txt
    ├── setup.py
    ├── setup_onnxruntime.py
    ├── setup_openvino.py
    ├── setup_paddle.py
    ├── setup_torch.py
    └── tests
        ├── __init__.py
        ├── base_module.py
        ├── test_files
            ├── black_font_color_transparent.png
            ├── ch_doc_server.png
            ├── ch_en_num.jpg
            ├── devanagari.jpg
            ├── empty_black.jpg
            ├── en.jpg
            ├── issue_170.png
            ├── japan.jpg
            ├── korean.jpg
            ├── short.png
            ├── test_letterbox_like.jpg
            ├── test_without_det.jpg
            ├── text_cls.jpg
            ├── text_det.jpg
            ├── text_rec.jpg
            ├── text_vertical_words.png
            ├── two_dim_image.npy
            └── white_font_color_transparent.png
        ├── test_main.py
        ├── test_ort.py
        ├── test_paddle.py
        ├── test_torch.py
        └── test_vino.py


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Set the default behavior, in case people don't have core.autocrlf set.
 2 | * text=auto
 3 | 
 4 | # Explicitly declare text files you want to always be normalized and converted
 5 | # to native line endings on checkout.
 6 | *.c text
 7 | *.h text
 8 | *.py text
 9 | *.md text
10 | *.js text
11 | *.cpp text
12 | 
13 | # Declare files that will always have CRLF line endings on checkout.
14 | *.sln text eol=crlf
15 | 
16 | # Denote all files that are truly binary and should not be modified.
17 | *.png binary
18 | *.jpg binary
19 | *.pdf binary


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: https://rapidai.github.io/RapidOCRDocs/sponsor/
14 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: 🐞 Bug
 3 | about: Bug
 4 | title: 'Bug'
 5 | labels: 'Bug'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | #### 问题描述 / Problem Description
11 | 
12 | 
13 | #### 运行环境 / Runtime Environment
14 | 
15 | 
16 | #### 复现代码 / Reproduction Code
17 | ```python
18 | 
19 | ```
20 | 
21 | #### 可能解决方案 / Possible solutions
22 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
 1 | blank_issues_enabled: false
 2 | contact_links:
 3 |   - name: ❓ Questions
 4 |     url: https://github.com/RapidAI/RapidOCR/discussions/categories/q-a
 5 |     about: Please use the community forum for help and questions regarding RapidOCR.
 6 |   - name: 💡 Feature requests and ideas
 7 |     url: https://github.com/RapidAI/RapidOCR/discussions/categories/ideas
 8 |     about: Please vote for and post new feature ideas in the community forum.
 9 |   - name: 📖 Documentation
10 |     url: https://rapidai.github.io/RapidOCRDocs/docs/
11 |     about: A great place to find instructions and answers about RapidOCR.
12 | 


--------------------------------------------------------------------------------
/.github/workflows/SyncToGitee.yml:
--------------------------------------------------------------------------------
 1 | name: SyncToGitee
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 | jobs:
 7 |   repo-sync:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - name: Checkout source codes
11 |         uses: actions/checkout@v4
12 | 
13 |       - name: Mirror the Github organization repos to Gitee.
14 |         uses: Yikun/hub-mirror-action@v1.4
15 |         with:
16 |           src: 'github/RapidAI'
17 |           dst: 'gitee/RapidAI'
18 |           dst_key: ${{ secrets.GITEE_PRIVATE_KEY }}
19 |           dst_token:  ${{ secrets.GITEE_TOKEN }}
20 |           force_update: true
21 |           # only sync this repo
22 |           static_list: "RapidOCR"
23 |           debug: true
24 | 
25 |       - name: Mirror the Github organization repos to Gitee.
26 |         uses: Yikun/hub-mirror-action@v1.4
27 |         with:
28 |           src: 'github/RapidAI'
29 |           dst: 'gitee/openKylin'
30 |           dst_key: ${{ secrets.GITEE_PRIVATE_KEY }}
31 |           dst_token:  ${{ secrets.GITEE_TOKEN }}
32 |           force_update: true
33 |           # only sync this repo
34 |           static_list: "RapidOCR"
35 |           debug: true


--------------------------------------------------------------------------------
/.github/workflows/gen_whl_to_pypi_rapidocr.yml:
--------------------------------------------------------------------------------
 1 | name: Push rapidocr to pypi
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - v*
 7 | 
 8 | env:
 9 |   RESOURCES_URL: https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/required_for_whl_v3.0.0.zip
10 | 
11 | jobs:
12 |   UnitTesting:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - name: Pull latest code
16 |         uses: actions/checkout@v4
17 | 
18 |       - name: Set up Python 3.10
19 |         uses: actions/setup-python@v4
20 |         with:
21 |           python-version: '3.10'
22 |           architecture: 'x64'
23 | 
24 |       - name: Display Python version
25 |         run: python -c "import sys; print(sys.version)"
26 | 
27 |       - name: Unit testings
28 |         run: |
29 |           cd python
30 |           pip install -r requirements.txt
31 |           pip install pytest wheel get_pypi_latest_version openvino==2023.3.0 onnxruntime
32 |           pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
33 |           python -m pip install paddlepaddle==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/
34 | 
35 |           pytest tests/test_main.py
36 | 
37 |   GenerateWHL_PushPyPi:
38 |     needs: UnitTesting
39 |     runs-on: ubuntu-latest
40 | 
41 |     steps:
42 |       - uses: actions/checkout@v4
43 | 
44 |       - name: Set up Python 3.10
45 |         uses: actions/setup-python@v4
46 |         with:
47 |           python-version: '3.10'
48 |           architecture: 'x64'
49 | 
50 |       - name: Download models
51 |         run: |
52 |           cd python
53 |           wget $RESOURCES_URL
54 |           ZIP_NAME=${RESOURCES_URL##*/}
55 |           DIR_NAME=${ZIP_NAME%.*}
56 |           unzip $ZIP_NAME
57 |           cp $DIR_NAME/resources/models/*.* rapidocr/models
58 | 
59 |       - name: Run setup.py
60 |         run: |
61 |           cd python
62 |           pip install setuptools get_pypi_latest_version wheel
63 |           mkdir rapidocr_t
64 |           mv rapidocr rapidocr_t
65 |           mv rapidocr_t rapidocr
66 |           cd rapidocr
67 |           echo "from .rapidocr.main import RapidOCR, VisRes" > __init__.py
68 | 
69 |           cd ..
70 |           python -m pip install --upgrade pip
71 |           python setup.py bdist_wheel ${{ github.ref_name }}
72 |           mv dist ../
73 | 
74 |       - name: Publish distribution 📦 to PyPI
75 |         uses: pypa/gh-action-pypi-publish@v1.5.0
76 |         with:
77 |           password: ${{ secrets.RAPIDOCR }}
78 |           packages_dir: dist/
79 | 


--------------------------------------------------------------------------------
/.github/workflows/gen_whl_to_pypi_rapidocr_ort.yml:
--------------------------------------------------------------------------------
 1 | name: Push rapidocr_onnxruntime to pypi
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |     paths:
 7 |       - 'python/rapidocr_onnxruntime/**'
 8 |       - 'docs/doc_whl_rapidocr_ort.md'
 9 |       - 'python/setup_onnxruntime.py'
10 |       # - '.github/workflows/gen_whl_to_pypi_rapidocr_ort.yml'
11 | 
12 | 
13 | env:
14 |   RESOURCES_URL: https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/required_for_whl_v1.3.0.zip
15 | 
16 | jobs:
17 |   UnitTesting:
18 |     runs-on: ubuntu-latest
19 |     steps:
20 |       - name: Pull latest code
21 |         uses: actions/checkout@v4
22 | 
23 |       - name: Set up Python 3.10
24 |         uses: actions/setup-python@v4
25 |         with:
26 |           python-version: '3.10'
27 |           architecture: 'x64'
28 | 
29 |       - name: Display Python version
30 |         run: python -c "import sys; print(sys.version)"
31 | 
32 |       - name: Unit testings
33 |         run: |
34 |           wget $RESOURCES_URL
35 |           ZIP_NAME=${RESOURCES_URL##*/}
36 |           DIR_NAME=${ZIP_NAME%.*}
37 |           unzip $DIR_NAME
38 |           cp $DIR_NAME/resources/models/*.onnx python/rapidocr_onnxruntime/models/
39 | 
40 |           cd python
41 |           pip install -r requirements_ort.txt
42 |           pip install pytest wheel get_pypi_latest_version openvino
43 | 
44 |           cd tests
45 |           pytest test_ort.py
46 | 
47 |   GenerateWHL_PushPyPi:
48 |     needs: UnitTesting
49 |     runs-on: ubuntu-latest
50 | 
51 |     steps:
52 |       - uses: actions/checkout@v4
53 | 
54 |       - name: Set up Python 3.10
55 |         uses: actions/setup-python@v4
56 |         with:
57 |           python-version: '3.10'
58 |           architecture: 'x64'
59 | 
60 |       - name: Download models
61 |         run: |
62 |           cd python
63 |           wget $RESOURCES_URL
64 |           ZIP_NAME=${RESOURCES_URL##*/}
65 |           DIR_NAME=${ZIP_NAME%.*}
66 |           unzip $ZIP_NAME
67 |           cp $DIR_NAME/resources/models/*.onnx rapidocr_onnxruntime/models
68 | 
69 |       - name: Run setup_onnxruntime.py
70 |         run: |
71 |           cd python
72 |           pip install setuptools get_pypi_latest_version wheel
73 |           mkdir rapidocr_onnxruntime_t
74 |           mv rapidocr_onnxruntime rapidocr_onnxruntime_t
75 |           mv rapidocr_onnxruntime_t rapidocr_onnxruntime
76 |           cd rapidocr_onnxruntime
77 |           echo "from .rapidocr_onnxruntime.main import RapidOCR, VisRes" > __init__.py
78 | 
79 |           cd ..
80 |           python -m pip install --upgrade pip
81 |           python setup_onnxruntime.py bdist_wheel ${{ github.ref_name }}
82 |           mv dist ../
83 | 
84 |       - name: Publish distribution 📦 to PyPI
85 |         uses: pypa/gh-action-pypi-publish@v1.5.0
86 |         with:
87 |           password: ${{ secrets.PYPI_API_TOKEN }}
88 |           packages_dir: dist/
89 | 


--------------------------------------------------------------------------------
/.github/workflows/gen_whl_to_pypi_rapidocr_paddle.yml:
--------------------------------------------------------------------------------
 1 | name: Push rapidocr_paddle to pypi
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |     paths:
 7 |       - 'python/rapidocr_paddle/**'
 8 |       - 'docs/doc_whl_rapidocr_paddle.md'
 9 |       - 'python/setup_paddle.py'
10 |       - '.github/workflows/gen_whl_to_pypi_rapidocr_paddle.yml'
11 |       - 'python/requirements_paddle.txt'
12 |     # tags:
13 |     #   - v*
14 | 
15 | env:
16 |   RESOURCES_URL: https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/paddle_models_v4.zip
17 | 
18 | jobs:
19 |   UnitTesting:
20 |     runs-on: ubuntu-latest
21 |     steps:
22 |       - name: Pull latest code
23 |         uses: actions/checkout@v4
24 | 
25 |       - name: Set up Python 3.10
26 |         uses: actions/setup-python@v4
27 |         with:
28 |           python-version: '3.10'
29 |           architecture: 'x64'
30 | 
31 |       - name: Display Python version
32 |         run: python -c "import sys; print(sys.version)"
33 | 
34 |       - name: Unit testings
35 |         run: |
36 |           wget $RESOURCES_URL
37 |           ZIP_NAME=${RESOURCES_URL##*/}
38 |           DIR_NAME=${ZIP_NAME%.*}
39 |           unzip $DIR_NAME
40 |           cp -r models/* python/rapidocr_paddle/models/
41 |           cd python
42 |           pip install -r requirements_paddle.txt
43 |           pip install pytest wheel get_pypi_latest_version
44 |           pip install paddlepaddle==3.0.0rc0 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/
45 |           cd tests
46 |           pytest test_paddle.py
47 | 
48 |   GenerateWHL_PushPyPi:
49 |     needs: UnitTesting
50 |     runs-on: ubuntu-latest
51 | 
52 |     steps:
53 |       - uses: actions/checkout@v4
54 | 
55 |       - name: Set up Python 3.10
56 |         uses: actions/setup-python@v4
57 |         with:
58 |           python-version: '3.10'
59 |           architecture: 'x64'
60 | 
61 |       - name: Download models
62 |         run: |
63 |           cd python
64 |           wget $RESOURCES_URL
65 |           ZIP_NAME=${RESOURCES_URL##*/}
66 |           DIR_NAME=${ZIP_NAME%.*}
67 |           unzip $ZIP_NAME
68 |           cp -r models/* rapidocr_paddle/models
69 | 
70 |       - name: Run setup_paddle.py
71 |         run: |
72 |           cd python
73 |           pip install setuptools get_pypi_latest_version wheel
74 | 
75 |           mkdir rapidocr_paddle_t
76 |           mv rapidocr_paddle rapidocr_paddle_t
77 |           mv rapidocr_paddle_t rapidocr_paddle
78 |           cd rapidocr_paddle
79 |           echo "from .rapidocr_paddle.main import RapidOCR, VisRes" > __init__.py
80 | 
81 |           cd ..
82 |           python -m pip install --upgrade pip
83 | 
84 |           echo "${{ github.event.head_commit.message }}"
85 |           python setup_paddle.py bdist_wheel "${{ github.event.head_commit.message }}"
86 |           mv dist ../
87 | 
88 |       - name: Publish distribution 📦 to PyPI
89 |         uses: pypa/gh-action-pypi-publish@v1.5.0
90 |         with:
91 |           password: ${{ secrets.RAPIDOCR_OPENVINO }}
92 |           packages_dir: dist/
93 | 


--------------------------------------------------------------------------------
/.github/workflows/gen_whl_to_pypi_rapidocr_torch.yml:
--------------------------------------------------------------------------------
  1 | name: Push rapidocr_torch to pypi
  2 | 
  3 | on:
  4 |   push:
  5 | #    branches: [ main ]
  6 | #    paths:
  7 | #      - 'python/rapidocr_torch/**'
  8 | #      - 'python/setup_torch.py'
  9 | #      - '.github/workflows/gen_whl_to_pypi_rapidocr_torch.yml'
 10 | #      - 'python/requirements_torch.txt'
 11 |     tags:
 12 |       - torch_v*
 13 | 
 14 | env:
 15 |   RESOURCES_URL: https://github.com/Joker1212/RapidOCR/releases/download/v0.0.0/torch_test.zip
 16 | 
 17 | jobs:
 18 |   UnitTesting:
 19 |     runs-on: ubuntu-latest
 20 |     steps:
 21 |       - name: Pull latest code
 22 |         uses: actions/checkout@v4
 23 | 
 24 |       - name: Set up Python 3.7
 25 |         uses: actions/setup-python@v4
 26 |         with:
 27 |           python-version: '3.7'
 28 |           architecture: 'x64'
 29 | 
 30 |       - name: Display Python version
 31 |         run: python -c "import sys; print(sys.version)"
 32 | 
 33 |       - name: Unit testings
 34 |         run: |
 35 |           wget $RESOURCES_URL
 36 |           ZIP_NAME=${RESOURCES_URL##*/}
 37 |           DIR_NAME=${ZIP_NAME%.*}
 38 |           unzip $DIR_NAME
 39 |           cp $DIR_NAME/resources/models/*.pth python/rapidocr_torch/models/
 40 | 
 41 |           cd python
 42 |           pip install -r requirements_torch.txt
 43 |           pip install pytest wheel get_pypi_latest_version
 44 | 
 45 |           cd tests
 46 |           pytest test_torch.py
 47 | 
 48 |   GenerateWHL_PushPyPi:
 49 |     needs: UnitTesting
 50 |     runs-on: ubuntu-latest
 51 | 
 52 |     steps:
 53 |       - uses: actions/checkout@v4
 54 | 
 55 |       - name: Set up Python 3.7
 56 |         uses: actions/setup-python@v4
 57 |         with:
 58 |           python-version: '3.7'
 59 |           architecture: 'x64'
 60 | 
 61 | #      - name: Set SSH Environment
 62 | #        env:
 63 | #          DEPLOY_KEYS: ${{ secrets.GEN_PYTHON_SDK }}
 64 | #        run: |
 65 | #          mkdir -p ~/.ssh/
 66 | #          echo "$DEPLOY_KEYS" > ~/.ssh/id_rsa
 67 | #          chmod 600 ~/.ssh/id_rsa
 68 | #          chmod 700 ~/.ssh && chmod 600 ~/.ssh/*
 69 | 
 70 |       - name: Download models
 71 |         run: |
 72 |           cd python
 73 |           wget $RESOURCES_URL
 74 |           ZIP_NAME=${RESOURCES_URL##*/}
 75 |           DIR_NAME=${ZIP_NAME%.*}
 76 |           unzip $ZIP_NAME
 77 |           cp $DIR_NAME/resources/models/*.pth rapidocr_torch/models/
 78 | 
 79 |       - name: Run setup_torch.py
 80 |         run: |
 81 |           cd python
 82 |           pip install setuptools get_pypi_latest_version wheel
 83 |           mkdir rapidocr_torch_t
 84 |           mv rapidocr_torch rapidocr_torch_t
 85 |           mv rapidocr_torch_t rapidocr_torch
 86 |           cd rapidocr_torch
 87 |           echo "from .rapidocr_torch.main import RapidOCR, VisRes" > __init__.py
 88 | 
 89 |           cd ..
 90 |           python -m pip install --upgrade pip
 91 |           python setup_torch.py bdist_wheel ${{ github.ref_name }}
 92 |           mv dist ../
 93 | 
 94 | #      - name: Publish distribution 📦 to PyPI
 95 | #        uses: pypa/gh-action-pypi-publish@v1.5.0
 96 | #        with:
 97 | #          password: ${{ secrets.PYPI_API_TOKEN }}
 98 | #          packages_dir: dist/
 99 |       - name: Publish distribution 📦 to Test PyPI
100 |         uses: pypa/gh-action-pypi-publish@v1.5.0
101 |         with:
102 |             username: TEST_PYPI_API_TOKEN
103 |             password: ${{ secrets.TEST_PYPI_API_TOKEN }}
104 |             repository_url: https://test.pypi.org/legacy/
105 |             packages_dir:  dist/
106 | 


--------------------------------------------------------------------------------
/.github/workflows/gen_whl_to_pypi_rapidocr_vino.yml:
--------------------------------------------------------------------------------
 1 | name: Push rapidocr_openvino to pypi
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |     paths:
 7 |       - 'python/rapidocr_openvino/**'
 8 |       - 'docs/doc_whl_rapidocr_vino.md'
 9 |       - 'python/setup_openvino.py'
10 |       - '.github/workflows/gen_whl_to_pypi_rapidocr_vino.yml'
11 |       - 'python/requirements_vino.txt'
12 | 
13 | 
14 | env:
15 |   RESOURCES_URL: https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/required_for_whl_v1.3.0.zip
16 | 
17 | jobs:
18 |   UnitTesting:
19 |     runs-on: ubuntu-latest
20 |     steps:
21 |       - name: Pull latest code
22 |         uses: actions/checkout@v4
23 | 
24 |       - name: Set up Python 3.10
25 |         uses: actions/setup-python@v4
26 |         with:
27 |           python-version: '3.10'
28 |           architecture: 'x64'
29 | 
30 |       - name: Display Python version
31 |         run: python -c "import sys; print(sys.version)"
32 | 
33 |       - name: Unit testings
34 |         run: |
35 |           wget $RESOURCES_URL
36 |           ZIP_NAME=${RESOURCES_URL##*/}
37 |           DIR_NAME=${ZIP_NAME%.*}
38 |           unzip $DIR_NAME
39 |           cp $DIR_NAME/resources/models/*.onnx python/rapidocr_openvino/models/
40 |           cd python
41 |           pip install -r requirements_vino.txt
42 |           pip install pytest wheel get_pypi_latest_version onnxruntime
43 |           cd tests
44 |           pytest test_vino.py
45 | 
46 |   GenerateWHL_PushPyPi:
47 |     needs: UnitTesting
48 |     runs-on: ubuntu-latest
49 | 
50 |     steps:
51 |       - uses: actions/checkout@v4
52 | 
53 |       - name: Set up Python 3.10
54 |         uses: actions/setup-python@v4
55 |         with:
56 |           python-version: '3.10'
57 |           architecture: 'x64'
58 | 
59 |       - name: Download models
60 |         run: |
61 |           cd python
62 |           wget $RESOURCES_URL
63 |           ZIP_NAME=${RESOURCES_URL##*/}
64 |           DIR_NAME=${ZIP_NAME%.*}
65 |           unzip $ZIP_NAME
66 |           cp $DIR_NAME/resources/models/*.onnx rapidocr_openvino/models
67 | 
68 |       - name: Run setup_openvino.py
69 |         run: |
70 |           cd python
71 |           pip install setuptools get_pypi_latest_version wheel
72 |           mkdir rapidocr_openvino_t
73 |           mv rapidocr_openvino rapidocr_openvino_t
74 |           mv rapidocr_openvino_t rapidocr_openvino
75 |           cd rapidocr_openvino
76 |           echo "from .rapidocr_openvino.main import RapidOCR, VisRes" > __init__.py
77 | 
78 |           cd ..
79 |           python -m pip install --upgrade pip
80 |           python setup_openvino.py bdist_wheel "${{ github.event.head_commit.message }}"
81 |           mv dist ../
82 | 
83 |       - name: Publish distribution 📦 to PyPI
84 |         uses: pypa/gh-action-pypi-publish@v1.5.0
85 |         with:
86 |           password: ${{ secrets.RAPIDOCR_OPENVINO }}
87 |           packages_dir: dist/
88 | 


--------------------------------------------------------------------------------
/.github/workflows/gen_whl_to_pypi_rapidocr_web.yml:
--------------------------------------------------------------------------------
 1 | name: Push rapidocr_web to pypi
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |     paths:
 7 |       - 'ocrweb/rapidocr_web/**'
 8 |       - '!ocrweb/rapidocr_web/ocr_web.spec'
 9 |       - 'docs/doc_whl_rapidocr_web.md'
10 |       - 'ocrweb/setup.py'
11 |       - '.github/workflows/gen_whl_to_pypi_rapidocr_web.yml'
12 | 
13 | jobs:
14 |   GenerateWHL_PushPyPi:
15 |     runs-on: ubuntu-latest
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v4
19 | 
20 |       - name: Set up Python 3.7
21 |         uses: actions/setup-python@v4
22 |         with:
23 |           python-version: '3.7'
24 |           architecture: 'x64'
25 | 
26 |       - name: Set SSH Environment
27 |         env:
28 |           DEPLOY_KEYS: ${{ secrets.GEN_PYTHON_SDK }}
29 |         run: |
30 |           mkdir -p ~/.ssh/
31 |           echo "$DEPLOY_KEYS" > ~/.ssh/id_rsa
32 |           chmod 600 ~/.ssh/id_rsa
33 |           chmod 700 ~/.ssh && chmod 600 ~/.ssh/*
34 | 
35 |       - name: Run setup.py
36 |         run: |
37 |           cd ocrweb
38 |           pip install -r requirements.txt
39 | 
40 |           python -m pip install --upgrade pip
41 |           python setup.py bdist_wheel "${{ github.event.head_commit.message }}"
42 | 
43 |       - name: Publish distribution 📦 to PyPI
44 |         uses: pypa/gh-action-pypi-publish@v1.5.0
45 |         with:
46 |           password: ${{ secrets.RAPIDOCR_OPENVINO }}
47 |           packages_dir: ocrweb/dist/
48 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### Python template
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | .pytest_cache
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | pip-wheel-metadata/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | # *.manifest
 37 | # *.spec
 38 | *.res
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | 
135 | #idea
136 | .vs
137 | .vscode
138 | .idea
139 | /images
140 | /models
141 | 
142 | #models
143 | *.onnx
144 | 
145 | *.ttf
146 | *.ttc
147 | 
148 | long1.jpg
149 | 
150 | *.bin
151 | *.mapping
152 | *.xml
153 | 
154 | *.pdiparams
155 | *.pdiparams.info
156 | *.pdmodel
157 | 
158 | .DS_Store
159 | *.npy


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://gitee.com/SWHL/autoflake
 3 |   rev: v2.1.1
 4 |   hooks:
 5 |     - id: autoflake
 6 |       args:
 7 |         [
 8 |           "--recursive",
 9 |           "--in-place",
10 |           "--remove-all-unused-imports",
11 |           "--ignore-init-module-imports",
12 |         ]
13 |       files: \.py$
14 | - repo: https://gitee.com/SWHL/black
15 |   rev: 23.1.0
16 |   hooks:
17 |     - id: black
18 |       files: \.py$
19 | 


--------------------------------------------------------------------------------
/android/README.md:
--------------------------------------------------------------------------------
1 | See [RapidOcrAndroidOnnx](https://github.com/RapidAI/RapidOcrAndroidOnnx) for details.
2 | 


--------------------------------------------------------------------------------
/api/README.md:
--------------------------------------------------------------------------------
1 | ### See [RapidOCRAPI](https://github.com/RapidAI/RapidOCRAPI) for details
2 | 
3 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr_api/usage/)
4 | 


--------------------------------------------------------------------------------
/assets/RapidOCR_LOGO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/assets/RapidOCR_LOGO.png


--------------------------------------------------------------------------------
/assets/colab-badge.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="117" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="117" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h30v20H0z"/><path fill="#007ec6" d="M30 0h87v20H30z"/><path fill="url(#b)" d="M0 0h117v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><svg x="4px" y="0px" width="22px" height="20px" viewBox="-2 0 28 24" style="background-color: #fff;border-radius: 1px;"><path style="fill:#e8710a;" d="M1.977,16.77c-2.667-2.277-2.605-7.079,0-9.357C2.919,8.057,3.522,9.075,4.49,9.691c-1.152,1.6-1.146,3.201-0.004,4.803C3.522,15.111,2.918,16.126,1.977,16.77z"/><path style="fill:#f9ab00;" d="M12.257,17.114c-1.767-1.633-2.485-3.658-2.118-6.02c0.451-2.91,2.139-4.893,4.946-5.678c2.565-0.718,4.964-0.217,6.878,1.819c-0.884,0.743-1.707,1.547-2.434,2.446C18.488,8.827,17.319,8.435,16,8.856c-2.404,0.767-3.046,3.241-1.494,5.644c-0.241,0.275-0.493,0.541-0.721,0.826C13.295,15.939,12.511,16.3,12.257,17.114z"/><path style="fill:#e8710a;" d="M19.529,9.682c0.727-0.899,1.55-1.703,2.434-2.446c2.703,2.783,2.701,7.031-0.005,9.764c-2.648,2.674-6.936,2.725-9.701,0.115c0.254-0.814,1.038-1.175,1.528-1.788c0.228-0.285,0.48-0.552,0.721-0.826c1.053,0.916,2.254,1.268,3.6,0.83C20.502,14.551,21.151,11.927,19.529,9.682z"/><path style="fill:#f9ab00;" d="M4.49,9.691C3.522,9.075,2.919,8.057,1.977,7.413c2.209-2.398,5.721-2.942,8.476-1.355c0.555,0.32,0.719,0.606,0.285,1.128c-0.157,0.188-0.258,0.422-0.391,0.631c-0.299,0.47-0.509,1.067-0.929,1.371C8.933,9.539,8.523,8.847,8.021,8.746C6.673,8.475,5.509,8.787,4.49,9.691z"/><path style="fill:#f9ab00;" d="M1.977,16.77c0.941-0.644,1.545-1.659,2.509-2.277c1.373,1.152,2.85,1.433,4.45,0.499c0.332-0.194,0.503-0.088,0.673,0.19c0.386,0.635,0.753,1.285,1.181,1.89c0.34,0.48,0.222,0.715-0.253,1.006C7.84,19.73,4.205,19.188,1.977,16.77z"/></svg><text x="245" y="140" transform="scale(.1)" textLength="30"> </text><text x="725" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="770">Open in Colab</text><text x="725" y="140" transform="scale(.1)" textLength="770">Open in Colab</text></g> </svg>
2 | 


--------------------------------------------------------------------------------
/cpp/README.md:
--------------------------------------------------------------------------------
1 | See [RapidOcrNcnn](https://github.com/RapidAI/RapidOcrNcnn) for details.
2 | 
3 | See [RapidOcrOnnx](https://github.com/RapidAI/RapidOcrOnnx) for details.
4 | 


--------------------------------------------------------------------------------
/docs/doc_whl_rapidocr.md:
--------------------------------------------------------------------------------
1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs)
2 | 


--------------------------------------------------------------------------------
/docs/doc_whl_rapidocr_ort.md:
--------------------------------------------------------------------------------
1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr/usage/)
2 | 


--------------------------------------------------------------------------------
/docs/doc_whl_rapidocr_paddle.md:
--------------------------------------------------------------------------------
1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr_paddle/usage/)
2 | 


--------------------------------------------------------------------------------
/docs/doc_whl_rapidocr_vino.md:
--------------------------------------------------------------------------------
1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr/usage/)
2 | 


--------------------------------------------------------------------------------
/docs/doc_whl_rapidocr_web.md:
--------------------------------------------------------------------------------
1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr_web/rapidocr_web/)
2 | 


--------------------------------------------------------------------------------
/dotnet/README.md:
--------------------------------------------------------------------------------
1 | See [RapidOCRCSharp](https://github.com/RapidAI/RapidOCRCSharp) for details.
2 | 


--------------------------------------------------------------------------------
/ios/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 需要志愿者捐赠，请直接进群联系，qq群号：887298230
4 | 
5 | A contributor is wanted.
6 | 


--------------------------------------------------------------------------------
/jvm/README.md:
--------------------------------------------------------------------------------
1 | See [RapidOcrNcnnJvm](https://github.com/RapidAI/RapidOcrNcnnJvm) for details.
2 | 
3 | See [RapidOcrOnnxJvm](https://github.com/RapidAI/RapidOcrOnnxJvm) for details.
4 | 


--------------------------------------------------------------------------------
/ocrweb/README.md:
--------------------------------------------------------------------------------
1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr_web/usage/)
2 | 


--------------------------------------------------------------------------------
/ocrweb/rapidocr_web/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com


--------------------------------------------------------------------------------
/ocrweb/rapidocr_web/ocrweb.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import argparse
 5 | from pathlib import Path
 6 | from wsgiref.simple_server import make_server
 7 | 
 8 | from flask import Flask, render_template, request
 9 | 
10 | try:
11 |     from rapidocr_web.task import OCRWebUtils
12 | except:
13 |     from task import OCRWebUtils
14 | 
15 | root_dir = Path(__file__).resolve().parent
16 | 
17 | app = Flask(__name__, template_folder="templates")
18 | app.config["MAX_CONTENT_LENGTH"] = 3 * 1024 * 1024
19 | processor = OCRWebUtils()
20 | 
21 | 
22 | @app.route("/")
23 | def index():
24 |     return render_template("index.html")
25 | 
26 | 
27 | @app.route("/ocr", methods=["POST"])
28 | def ocr():
29 |     if request.method == "POST":
30 |         img_str = request.get_json().get("file", None)
31 |         ocr_res = processor(img_str)
32 |         return ocr_res
33 | 
34 | 
35 | def main():
36 |     parser = argparse.ArgumentParser("rapidocr_web")
37 |     parser.add_argument("-ip", "--ip", type=str, default="0.0.0.0", help="IP Address")
38 |     parser.add_argument("-p", "--port", type=int, default=9003, help="IP port")
39 |     args = parser.parse_args()
40 | 
41 |     print(f"Successfully launched and visit http://{args.ip}:{args.port} to view.")
42 |     server = make_server(args.ip, args.port, app)
43 |     server.serve_forever()
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     main()
48 | 


--------------------------------------------------------------------------------
/ocrweb/rapidocr_web/ocrweb.spec:
--------------------------------------------------------------------------------
 1 | # -*- mode: python ; coding: utf-8 -*-
 2 | from pathlib import Path
 3 | 
 4 | import rapidocr_onnxruntime
 5 | 
 6 | block_cipher = None
 7 | 
 8 | package_name = 'rapidocr_onnxruntime'
 9 | install_dir = Path(rapidocr_onnxruntime.__file__).resolve().parent
10 | 
11 | onnx_paths = list(install_dir.rglob('*.onnx'))
12 | yaml_paths = list(install_dir.rglob('*.yaml'))
13 | 
14 | onnx_add_data = [(str(v.parent), f'{package_name}/{v.parent.name}')
15 |                  for v in onnx_paths]
16 | 
17 | yaml_add_data = []
18 | for v in yaml_paths:
19 |     if package_name == v.parent.name:
20 |         yaml_add_data.append((str(v.parent / '*.yaml'), package_name))
21 |     else:
22 |         yaml_add_data.append(
23 |             (str(v.parent / '*.yaml'), f'{package_name}/{v.parent.name}'))
24 | 
25 | add_data = list(set(yaml_add_data + onnx_add_data))
26 | 
27 | 
28 | a = Analysis(
29 |     ['ocrweb.py'],
30 |     pathex=[],
31 |     binaries=[],
32 |     datas=add_data,
33 |     hiddenimports=[],
34 |     hookspath=[],
35 |     hooksconfig={},
36 |     runtime_hooks=[],
37 |     excludes=[],
38 |     win_no_prefer_redirects=False,
39 |     win_private_assemblies=False,
40 |     cipher=block_cipher,
41 |     noarchive=False,
42 | )
43 | pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
44 | 
45 | exe = EXE(
46 |     pyz,
47 |     a.scripts,
48 |     [],
49 |     exclude_binaries=True,
50 |     name='RapidOCRWeb',
51 |     debug=False,
52 |     bootloader_ignore_signals=False,
53 |     strip=False,
54 |     upx=True,
55 |     console=True,
56 |     disable_windowed_traceback=False,
57 |     argv_emulation=False,
58 |     target_arch=None,
59 |     codesign_identity=None,
60 |     entitlements_file=None,
61 |     icon=['./static/css/favicon.ico'],
62 | )
63 | coll = COLLECT(
64 |     exe,
65 |     a.binaries,
66 |     a.zipfiles,
67 |     a.datas,
68 |     strip=False,
69 |     upx=True,
70 |     upx_exclude=[],
71 |     name='RapidOCRWeb',
72 | )
73 | 


--------------------------------------------------------------------------------
/ocrweb/rapidocr_web/static/css/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/ocrweb/rapidocr_web/static/css/favicon.ico


--------------------------------------------------------------------------------
/ocrweb/rapidocr_web/static/css/main.css:
--------------------------------------------------------------------------------
 1 | body{
 2 |     background-color:#ebedef;
 3 |     min-height: 100%;
 4 |     margin: 0;
 5 | }
 6 | .btn-gen {
 7 |     /* position: absolute;top:540px; left: 500px; */
 8 |     background-color: #00a1d6;
 9 |     text-align: center;
10 |     border-radius: 18px;
11 |     margin-top: 5px;
12 |     font-size: 15px;
13 |     padding: 5px 10px;
14 |     height: 20px;
15 |     width: 120px;
16 |     text-transform: uppercase;
17 |     color: #fff;
18 |     border:none;
19 | }
20 | .btn-gen:hover,
21 | .btn-gen:focus {
22 |     border-color: #23AAEE;
23 |     background-color: #23AAEE;
24 |     color: white;
25 |     cursor: pointer;
26 | }
27 | .area{
28 |     text-align: center;
29 |     height: auto;
30 |     margin: auto;
31 | }
32 | .leftarea{
33 |     float: left;
34 |     width: 50%;
35 |     height: auto;
36 |     position: relative;
37 | }
38 | 
39 | .rightarea{
40 |     float: left;
41 |     width: 50%;
42 |     height: auto;
43 | }
44 | .table{
45 |     width: auto;
46 |     height: auto;
47 |     margin: 0 auto;
48 | 
49 | }
50 | .span_title{
51 |     width: 98%;
52 |     height: 36px;
53 |     margin-top: 4px;
54 |     line-height: 32px;
55 |     background-color: #00a1d6;
56 |     border: 1px solid #00a1d6;
57 |     border-radius: 20px;
58 |     color: #fff;
59 |     display: inline-block;
60 |     text-align: center;
61 |     font-size: 22px;
62 |     transition: .3s;
63 |     box-sizing: border-box;
64 | }
65 | .uplodNote{
66 |     font-size: 10px;
67 |     color:#A1A1A1;
68 | }
69 | 
70 | a{
71 |     text-decoration:none;
72 | }
73 | 
74 | /* wrapper */
75 | .leftarea>#wrapper {
76 |     position: absolute; top: 45px; left:0px;
77 |     width: 98%;
78 |     height: 100%;
79 |     background:
80 |     linear-gradient(#1a98ca, #1a98ca),
81 |     linear-gradient(90deg, #ffffff33 1px,transparent 0,transparent 19px),
82 |     linear-gradient( #ffffff33 1px,transparent 0,transparent 19px),
83 |     linear-gradient(transparent, #1a98ca);
84 |     background-size:100% 1.5%, 10% 100%,100% 8%, 100% 100%;
85 |     background-repeat:no-repeat, repeat, repeat, no-repeat;
86 |     background-position: 0% 100%, 0 0, 0 0, 0 0;
87 |     /* 初始位置 */
88 |     clip-path: polygon(0% 0%, 100% 0%, 100% 1.5%, 0% 1.5%);
89 |     /* 添加动画效果 */
90 |     animation: move 1s infinite linear;
91 | }
92 | 
93 | @keyframes move{
94 |     to{
95 |         background-position: 0 100%,0 0, 0 0, 0 0;
96 |         /* 终止位置 */
97 |         clip-path: polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%);
98 |     }
99 | }


--------------------------------------------------------------------------------
/ocrweb/rapidocr_web/task.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import base64
 5 | import copy
 6 | import json
 7 | from collections import namedtuple
 8 | from functools import reduce
 9 | from typing import List, Tuple, Union
10 | 
11 | import cv2
12 | import numpy as np
13 | from rapidocr_onnxruntime import RapidOCR
14 | 
15 | 
16 | class OCRWebUtils:
17 |     def __init__(self) -> None:
18 |         self.ocr = RapidOCR()
19 |         self.WebReturn = namedtuple(
20 |             "WebReturn",
21 |             ["image", "total_elapse", "elapse_part", "rec_res", "det_boxes"],
22 |         )
23 | 
24 |     def __call__(self, img_content: str) -> namedtuple:
25 |         if img_content is None:
26 |             raise ValueError("img is None")
27 |         img = self.prepare_img(img_content)
28 |         ocr_res, elapse = self.ocr(img)
29 |         return self.get_web_result(img, ocr_res, elapse)
30 | 
31 |     def prepare_img(self, img_str: str) -> np.ndarray:
32 |         img_str = img_str.split(",")[1]
33 |         image = base64.b64decode(img_str + "=" * (-len(img_str) % 4))
34 |         nparr = np.frombuffer(image, np.uint8)
35 |         image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
36 |         if image.ndim == 2:
37 |             image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
38 |         return image
39 | 
40 |     def get_web_result(
41 |         self, img: np.ndarray, ocr_res: List, elapse: List
42 |     ) -> Tuple[Union[str, List, str, str]]:
43 |         if ocr_res is None:
44 |             total_elapse, elapse_part = 0, ""
45 |             img_str = self.img_to_base64(img)
46 |             rec_res = json.dumps([], indent=2, ensure_ascii=False)
47 |             boxes = ""
48 |         else:
49 |             boxes, txts, scores = list(zip(*ocr_res))
50 |             scores = [f"{v:.4f}" for v in scores]
51 |             rec_res = list(zip(range(len(txts)), txts, scores))
52 |             rec_res = json.dumps(rec_res, indent=2, ensure_ascii=False)
53 | 
54 |             det_im = self.draw_text_det_res(np.array(boxes), img)
55 |             img_str = self.img_to_base64(det_im)
56 | 
57 |             total_elapse = reduce(lambda x, y: float(x) + float(y), elapse)
58 |             elapse_part = ",".join([f"{x:.4f}" for x in elapse])
59 | 
60 |         web_return = self.WebReturn(
61 |             image=img_str,
62 |             total_elapse=f"{total_elapse:.4f}",
63 |             elapse_part=elapse_part,
64 |             rec_res=rec_res,
65 |             det_boxes=boxes,
66 |         )
67 |         return json.dumps(web_return._asdict())
68 | 
69 |     @staticmethod
70 |     def img_to_base64(img) -> str:
71 |         img = cv2.imencode(".png", img)[1]
72 |         img_str = str(base64.b64encode(img))[2:-1]
73 |         return img_str
74 | 
75 |     @staticmethod
76 |     def draw_text_det_res(dt_boxes: np.ndarray, raw_im: np.ndarray) -> np.ndarray:
77 |         src_im = copy.deepcopy(raw_im)
78 |         for i, box in enumerate(dt_boxes):
79 |             box = np.array(box).astype(np.int32).reshape(-1, 2)
80 |             cv2.polylines(src_im, [box], True, color=(0, 0, 255), thickness=1)
81 |             cv2.putText(
82 |                 src_im,
83 |                 str(i),
84 |                 (int(box[0][0]), int(box[0][1])),
85 |                 cv2.FONT_HERSHEY_SIMPLEX,
86 |                 0.5,
87 |                 (0, 0, 0),
88 |                 2,
89 |             )
90 |         return src_im
91 | 


--------------------------------------------------------------------------------
/ocrweb/requirements.txt:
--------------------------------------------------------------------------------
1 | Pillow<=10.0.0
2 | requests
3 | Flask>=2.1.0, <=3.0.0
4 | rapidocr_onnxruntime>=1.3.0,<=2.0.0
5 | get_pypi_latest_version
6 | wheel
7 | 


--------------------------------------------------------------------------------
/ocrweb/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import sys
 5 | from pathlib import Path
 6 | 
 7 | import setuptools
 8 | from get_pypi_latest_version import GetPyPiLatestVersion
 9 | 
10 | 
11 | def get_readme():
12 |     root_dir = Path(__file__).resolve().parent.parent
13 |     readme_path = str(root_dir / "docs" / "doc_whl_rapidocr_web.md")
14 |     with open(readme_path, "r", encoding="utf-8") as f:
15 |         readme = f.read()
16 |     return readme
17 | 
18 | 
19 | MODULE_NAME = "rapidocr_web"
20 | 
21 | obtainer = GetPyPiLatestVersion()
22 | latest_version = obtainer(MODULE_NAME)
23 | VERSION_NUM = obtainer.version_add_one(latest_version)
24 | 
25 | # 优先提取commit message中的语义化版本号，如无，则自动加1
26 | if len(sys.argv) > 2:
27 |     match_str = " ".join(sys.argv[2:])
28 |     matched_versions = obtainer.extract_version(match_str)
29 |     if matched_versions:
30 |         VERSION_NUM = matched_versions
31 | sys.argv = sys.argv[:2]
32 | 
33 | setuptools.setup(
34 |     name=MODULE_NAME,
35 |     version=VERSION_NUM,
36 |     platforms="Any",
37 |     description="A cross platform OCR Library based on OnnxRuntime.",
38 |     long_description=get_readme(),
39 |     long_description_content_type="text/markdown",
40 |     author="SWHL",
41 |     author_email="liekkaskono@163.com",
42 |     url="https://github.com/RapidAI/RapidOCR",
43 |     download_url="https://github.com/RapidAI/RapidOCR.git",
44 |     license="Apache-2.0",
45 |     include_package_data=True,
46 |     install_requires=["requests", "Flask>=2.1.0", "rapidocr_onnxruntime"],
47 |     packages=[
48 |         MODULE_NAME,
49 |         f"{MODULE_NAME}.static.css",
50 |         f"{MODULE_NAME}.static.js",
51 |         f"{MODULE_NAME}.templates",
52 |     ],
53 |     package_data={"": ["*.ico", "*.css", "*.js", "*.html"]},
54 |     keywords=[
55 |         "ocr,text_detection,text_recognition,db,onnxruntime,paddleocr,openvino,rapidocr"
56 |     ],
57 |     classifiers=[
58 |         "Programming Language :: Python :: 3.6",
59 |         "Programming Language :: Python :: 3.7",
60 |         "Programming Language :: Python :: 3.8",
61 |         "Programming Language :: Python :: 3.9",
62 |         "Programming Language :: Python :: 3.10",
63 |         "Programming Language :: Python :: 3.11",
64 |         "Programming Language :: Python :: 3.12",
65 |     ],
66 |     python_requires=">=3.6,<3.13",
67 |     entry_points={
68 |         "console_scripts": [
69 |             f"{MODULE_NAME}={MODULE_NAME}.ocrweb:main",
70 |         ],
71 |     },
72 | )
73 | 


--------------------------------------------------------------------------------
/ocrweb_multi/README.md:
--------------------------------------------------------------------------------
1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr_web/ocrweb_multi/)
2 | 


--------------------------------------------------------------------------------
/ocrweb_multi/assets/ocr_web_multi.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/ocrweb_multi/assets/ocr_web_multi.jpg


--------------------------------------------------------------------------------
/ocrweb_multi/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | 
 4 | print("Compile ocrweb")
 5 | os.system("pyinstaller -y main.spec")
 6 | 
 7 | print("Compile wrapper")
 8 | os.system("windres .\wrapper.rc -O coff -o wrapper.res")
 9 | os.system("gcc .\wrapper.c wrapper.res -o dist/ocrweb.exe")
10 | 
11 | print("Copy config.yaml")
12 | shutil.copy2("config.yaml", "dist/config.yaml")
13 | 
14 | print("Copy models")
15 | shutil.copytree("models", "dist/models", dirs_exist_ok=True)
16 | os.remove("dist/models/.gitkeep")
17 | 
18 | print("Pack to ocrweb.zip")
19 | shutil.make_archive("ocrweb", "zip", "dist")
20 | 
21 | print("Done")
22 | 


--------------------------------------------------------------------------------
/ocrweb_multi/config.yaml:
--------------------------------------------------------------------------------
  1 | server:
  2 |   host: 127.0.0.1
  3 |   port: 8001
  4 |   # OCR接口Token, 为null时将跳过Token验证
  5 |   token: null
  6 | 
  7 | global:
  8 |   use_cuda: false
  9 |   verbose: false
 10 |   cuda_provider:
 11 |     device_id: 0
 12 |     arena_extend_strategy: kNextPowerOfTwo
 13 |     cudnn_conv_algo_search: EXHAUSTIVE
 14 |     do_copy_in_default_stream: true
 15 | 
 16 | # 模型配置
 17 | models:
 18 |   # 位置检测模型
 19 |   detect:
 20 |     det_en:
 21 |       path: models/en_PP-OCRv3_det_infer.onnx
 22 |       config: &detectConfig
 23 |         pre_process:
 24 |           - class: DetResizeForTest
 25 |             limit_side_len: 736
 26 |             limit_type: min
 27 |           - class: NormalizeImage
 28 |             std: [0.229, 0.224, 0.225]
 29 |             mean: [0.485, 0.456, 0.406]
 30 |             # 1 / 255
 31 |             scale: 0.00392156862745098
 32 |             order: hwc
 33 |           - class: ToCHWImage
 34 |           - class: KeepKeys
 35 |             keep_keys: ["image", "shape"]
 36 |         post_process:
 37 |           thresh: 0.3
 38 |           box_thresh: 0.5
 39 |           max_candidates: 1000
 40 |           unclip_ratio: 1.6
 41 |           use_dilation: true
 42 |     det_ch:
 43 |       path: models/ch_PP-OCRv3_det_infer.onnx
 44 |       config: *detectConfig
 45 |     det_ml:
 46 |       path: models/ch_PP-OCRv3_det_infer.onnx
 47 |       config: *detectConfig
 48 |   # 方向检测模型
 49 |   classify:
 50 |     cls_ml:
 51 |       path: models/ch_ppocr_mobile_v2.0_cls_infer.meta.onnx
 52 |       config:
 53 |         batch_size: 8
 54 |         score_thresh: 0.9
 55 |   # 文字识别模型
 56 |   recognize:
 57 |     rec_ch:
 58 |       path: models/ch_PP-OCRv3_rec_infer.meta.onnx
 59 |       config: &recognizeConfig
 60 |         batch_size: 8
 61 |     rec_cht:
 62 |       path: models/chinese_cht_PP-OCRv3_rec_infer.meta.onnx
 63 |       config: *recognizeConfig
 64 |     rec_en:
 65 |       path: models/en_PP-OCRv3_rec_infer.meta.onnx
 66 |       config: *recognizeConfig
 67 |     rec_ja:
 68 |       path: models/japan_PP-OCRv3_rec_infer.meta.onnx
 69 |       config: *recognizeConfig
 70 | 
 71 | # 多语言配置
 72 | languages:
 73 |   ch:
 74 |     name: 中文
 75 |     models:
 76 |       detect: det_ch
 77 |       classify: cls_ml
 78 |       recognize: rec_ch
 79 |     config: &languageConfig
 80 |       text_score: 0.5
 81 |       use_angle_cls: true
 82 |       verbose: false
 83 |       min_height: 30
 84 |   cht:
 85 |     name: 繁体中文
 86 |     models:
 87 |       detect: det_ch
 88 |       classify: cls_ml
 89 |       recognize: rec_cht
 90 |     config: *languageConfig
 91 |   ja:
 92 |     name: 日文
 93 |     models:
 94 |       detect: det_ch
 95 |       classify: cls_ml
 96 |       recognize: rec_ja
 97 |     config: *languageConfig
 98 |   en:
 99 |     name: 英文
100 |     models:
101 |       detect: det_en
102 |       classify: cls_ml
103 |       recognize: rec_en
104 |     config: *languageConfig
105 | 


--------------------------------------------------------------------------------
/ocrweb_multi/main.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import logging
 5 | import cv2
 6 | import numpy as np
 7 | from flask import Flask, send_file, request, make_response
 8 | from waitress import serve
 9 | 
10 | 
11 | from rapidocr.main import detect_recognize
12 | from utils.config import conf
13 | from utils.utils import tojson, parse_bool
14 | 
15 | app = Flask(__name__)
16 | log = logging.getLogger("app")
17 | # 设置上传文件大小
18 | app.config["MAX_CONTENT_LENGTH"] = 3 * 1024 * 1024
19 | 
20 | 
21 | @app.route("/")
22 | def index():
23 |     return send_file("static/index.html")
24 | 
25 | 
26 | def json_response(data, status=200):
27 |     return make_response(tojson(data), status, {"content-type": "application/json"})
28 | 
29 | 
30 | @app.route("/lang")
31 | def get_languages():
32 |     """返回可用语言列表"""
33 |     data = [
34 |         {"code": key, "name": val["name"]} for key, val in conf["languages"].items()
35 |     ]
36 |     result = {"msg": "OK", "data": data}
37 |     log.info("Send langs: %s", data)
38 |     return json_response(result)
39 | 
40 | 
41 | @app.route("/ocr", methods=["POST", "GET"])
42 | def ocr():
43 |     """执行文字识别"""
44 |     if conf["server"].get("token"):
45 |         if request.values.get("token") != conf["server"]["token"]:
46 |             return json_response({"msg": "invalid token"}, status=403)
47 | 
48 |     lang = request.values.get("lang") or "ch"
49 |     detect = parse_bool(request.values.get("detect") or "true")
50 |     classify = parse_bool(request.values.get("classify") or "true")
51 | 
52 |     image_file = request.files.get("image")
53 |     if not image_file:
54 |         return json_response({"msg": "no image"}, 400)
55 |     nparr = np.frombuffer(image_file.stream.read(), np.uint8)
56 |     image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
57 |     log.info(
58 |         "Input: image %s, lang=%s, detect=%s, classify=%s",
59 |         image.shape,
60 |         lang,
61 |         detect,
62 |         classify,
63 |     )
64 |     if image.ndim == 2:
65 |         image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
66 |     result = detect_recognize(image, lang=lang, detect=detect, classify=classify)
67 |     log.info("OCR Done %s %s", result["ts"], len(result["results"]))
68 |     return json_response({"msg": "OK", "data": result})
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     logging.basicConfig(level="INFO")
73 |     logging.getLogger("waitress").setLevel(logging.INFO)
74 |     if parse_bool(conf.get("debug", "0")):
75 |         # Debug
76 |         app.run(host=conf["server"]["host"], port=conf["server"]["port"], debug=True)
77 |     else:
78 |         # Deploy with waitress
79 |         serve(app, host=conf["server"]["host"], port=conf["server"]["port"])
80 | 


--------------------------------------------------------------------------------
/ocrweb_multi/main.spec:
--------------------------------------------------------------------------------
 1 | # -*- mode: python ; coding: utf-8 -*-
 2 | 
 3 | 
 4 | block_cipher = None
 5 | 
 6 | 
 7 | a = Analysis(
 8 |     ['main.py'],
 9 |     pathex=[],
10 |     binaries=[],
11 |     datas=[
12 |         ('static', 'static'),
13 |     ],
14 |     hiddenimports=[],
15 |     hookspath=[],
16 |     hooksconfig={},
17 |     runtime_hooks=[],
18 |     excludes=[],
19 |     win_no_prefer_redirects=False,
20 |     win_private_assemblies=False,
21 |     cipher=block_cipher,
22 |     noarchive=False,
23 | )
24 | pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
25 | 
26 | exe = EXE(
27 |     pyz,
28 |     a.scripts,
29 |     [],
30 |     exclude_binaries=True,
31 |     name='main',
32 |     debug=False,
33 |     bootloader_ignore_signals=False,
34 |     strip=False,
35 |     upx=True,
36 |     console=True,
37 |     disable_windowed_traceback=False,
38 |     argv_emulation=False,
39 |     target_arch=None,
40 |     codesign_identity=None,
41 |     entitlements_file=None,
42 | )
43 | coll = COLLECT(
44 |     exe,
45 |     a.binaries,
46 |     a.zipfiles,
47 |     a.datas,
48 |     strip=False,
49 |     upx=True,
50 |     upx_exclude=[],
51 |     name='ocrweb',
52 | )
53 | 


--------------------------------------------------------------------------------
/ocrweb_multi/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/ocrweb_multi/models/.gitkeep


--------------------------------------------------------------------------------
/ocrweb_multi/rapidocr/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/ocrweb_multi/rapidocr/__init__.py


--------------------------------------------------------------------------------
/ocrweb_multi/rapidocr/detect.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # -*- encoding: utf-8 -*-
 15 | # @Author: SWHL
 16 | # @Contact: liekkaskono@163.com
 17 | 
 18 | import numpy as np
 19 | 
 20 | from utils.utils import OrtInferSession
 21 | from .detect_process import DBPostProcess, create_operators, transform
 22 | 
 23 | 
 24 | class TextDetector:
 25 |     def __init__(self, path, config):
 26 |         self.preprocess_op = create_operators(config["pre_process"])
 27 |         self.postprocess_op = DBPostProcess(**config["post_process"])
 28 | 
 29 |         session_instance = OrtInferSession(path)
 30 |         self.session = session_instance.session
 31 |         self.input_name = session_instance.get_input_name()
 32 | 
 33 |     def __call__(self, img):
 34 |         if img is None:
 35 |             raise ValueError("img is None")
 36 | 
 37 |         ori_im_shape = img.shape[:2]
 38 | 
 39 |         data = {"image": img}
 40 |         data = transform(data, self.preprocess_op)
 41 |         img, shape_list = data
 42 |         if img is None:
 43 |             return None, 0
 44 | 
 45 |         img = np.expand_dims(img, axis=0).astype(np.float32)
 46 |         shape_list = np.expand_dims(shape_list, axis=0)
 47 | 
 48 |         preds = self.session.run(None, {self.input_name: img})
 49 | 
 50 |         post_result = self.postprocess_op(preds[0], shape_list)
 51 | 
 52 |         dt_boxes = post_result[0]["points"]
 53 |         dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im_shape)
 54 |         return dt_boxes
 55 | 
 56 |     def order_points_clockwise(self, pts):
 57 |         """
 58 |         reference from:
 59 |         https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
 60 |         sort the points based on their x-coordinates
 61 |         """
 62 |         xSorted = pts[np.argsort(pts[:, 0]), :]
 63 | 
 64 |         # grab the left-most and right-most points from the sorted
 65 |         # x-roodinate points
 66 |         leftMost = xSorted[:2, :]
 67 |         rightMost = xSorted[2:, :]
 68 | 
 69 |         # now, sort the left-most coordinates according to their
 70 |         # y-coordinates so we can grab the top-left and bottom-left
 71 |         # points, respectively
 72 |         leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
 73 |         (tl, bl) = leftMost
 74 | 
 75 |         rightMost = rightMost[np.argsort(rightMost[:, 1]), :]
 76 |         (tr, br) = rightMost
 77 | 
 78 |         rect = np.array([tl, tr, br, bl], dtype="float32")
 79 |         return rect
 80 | 
 81 |     def clip_det_res(self, points, img_height, img_width):
 82 |         for pno in range(points.shape[0]):
 83 |             points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))
 84 |             points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1))
 85 |         return points
 86 | 
 87 |     def filter_tag_det_res(self, dt_boxes, image_shape):
 88 |         """对检测结果进行过滤"""
 89 |         img_height, img_width = image_shape[:2]
 90 |         dt_boxes_new = []
 91 |         for box in dt_boxes:
 92 |             box = self.order_points_clockwise(box)
 93 |             box = self.clip_det_res(box, img_height, img_width)
 94 |             rect_width = int(np.linalg.norm(box[0] - box[1]))
 95 |             rect_height = int(np.linalg.norm(box[0] - box[3]))
 96 |             if rect_width <= 3 or rect_height <= 3:
 97 |                 continue
 98 |             dt_boxes_new.append(box)
 99 |         return dt_boxes_new
100 | 


--------------------------------------------------------------------------------
/ocrweb_multi/rapidocr/main.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import copy
 5 | from functools import lru_cache
 6 | from pathlib import Path
 7 | 
 8 | import numpy as np
 9 | import cv2
10 | 
11 | from utils.config import conf
12 | from rapidocr.rapid_ocr_api import RapidOCR
13 | 
14 | 
15 | @lru_cache(maxsize=None)
16 | def load_language_model(lang="ch"):
17 |     models = conf["languages"][lang]
18 |     print("model", models)
19 |     return RapidOCR(models)
20 | 
21 | 
22 | def detect_recognize(image, lang="ch", detect=True, classify=True):
23 |     model = load_language_model(lang)
24 |     results, ts = model(image, detect=detect, classify=classify)
25 |     ts["total"] = sum(ts.values())
26 |     return {"ts": ts, "results": results}
27 | 
28 | 
29 | def check_and_read_gif(img_path):
30 |     if Path(img_path).suffix.lower() == "gif":
31 |         gif = cv2.VideoCapture(img_path)
32 |         ret, frame = gif.read()
33 |         if not ret:
34 |             print("Cannot read {}. This gif image maybe corrupted.")
35 |             return None, False
36 |         if len(frame.shape) == 2 or frame.shape[-1] == 1:
37 |             frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
38 |         imgvalue = frame[:, :, ::-1]
39 |         return imgvalue, True
40 |     return None, False
41 | 
42 | 
43 | def draw_text_det_res(dt_boxes, raw_im):
44 |     src_im = copy.deepcopy(raw_im)
45 |     for i, box in enumerate(dt_boxes):
46 |         box = np.array(box).astype(np.int32).reshape(-1, 2)
47 |         cv2.polylines(src_im, [box], True, color=(0, 0, 255), thickness=1)
48 |         cv2.putText(
49 |             src_im,
50 |             str(i),
51 |             (int(box[0][0]), int(box[0][1])),
52 |             cv2.FONT_HERSHEY_SIMPLEX,
53 |             0.5,
54 |             (0, 0, 0),
55 |             2,
56 |         )
57 |     return src_im
58 | 


--------------------------------------------------------------------------------
/ocrweb_multi/requirements.txt:
--------------------------------------------------------------------------------
 1 | onnxruntime>=1.7.0
 2 | opencv-python-headless==4.5.4.60
 3 | six>=1.15.0
 4 | pyclipper>=1.2.1
 5 | numpy>=1.19.1
 6 | Shapely>=1.7.1
 7 | Flask>=2.1.2
 8 | PyYAML
 9 | waitress
10 | 


--------------------------------------------------------------------------------
/ocrweb_multi/static/css/main.css:
--------------------------------------------------------------------------------
  1 | html {
  2 |     height: 100%;
  3 |     margin: 0;
  4 | }
  5 | 
  6 | body {
  7 |     background-color: #ebedef;
  8 |     min-height: 100%;
  9 |     margin: 0;
 10 | }
 11 | 
 12 | .btn-gen {
 13 |     background-color: #00a1d6;
 14 |     text-align: center;
 15 |     border-radius: 18px;
 16 |     margin: 0 5px 0 5px;
 17 |     font-size: 15px;
 18 |     padding: 5px 10px;
 19 |     height: 20px;
 20 |     min-width: 120px;
 21 |     text-transform: uppercase;
 22 |     color: #fff;
 23 |     border: none;
 24 | }
 25 | 
 26 | .btn-gen:hover,
 27 | .btn-gen:focus {
 28 |     border-color: #23AAEE;
 29 |     background-color: #23AAEE;
 30 |     color: white;
 31 |     cursor: pointer;
 32 | }
 33 | 
 34 | .row {
 35 |     margin: 15px;
 36 | }
 37 | 
 38 | .small {
 39 |     font-size: 0.8em;
 40 | }
 41 | 
 42 | .verysmall {
 43 |     font-size: 0.5em;
 44 | }
 45 | 
 46 | .area {
 47 |     text-align: center;
 48 |     height: auto;
 49 |     margin: auto;
 50 | }
 51 | 
 52 | .leftarea {
 53 |     float: left;
 54 |     width: 50%;
 55 |     height: auto;
 56 |     position: relative;
 57 | }
 58 | 
 59 | .rightarea {
 60 |     float: left;
 61 |     width: 50%;
 62 |     height: auto;
 63 |     overflow-y: auto;
 64 | }
 65 | 
 66 | .table {
 67 |     width: auto;
 68 |     height: auto;
 69 |     margin: 0 auto;
 70 | 
 71 | }
 72 | 
 73 | .span_title {
 74 |     width: 98%;
 75 |     height: 36px;
 76 |     margin-top: 4px;
 77 |     line-height: 32px;
 78 |     background-color: #00a1d6;
 79 |     border: 1px solid #00a1d6;
 80 |     border-radius: 20px;
 81 |     color: #fff;
 82 |     display: inline-block;
 83 |     text-align: center;
 84 |     font-size: 22px;
 85 |     transition: .3s;
 86 |     box-sizing: border-box;
 87 |     cursor: default;
 88 | }
 89 | 
 90 | .uplodNote {
 91 |     font-size: 10px;
 92 |     color: #A1A1A1;
 93 | }
 94 | 
 95 | a {
 96 |     text-decoration: none;
 97 | }
 98 | 
 99 | #input-hint {
100 |     margin: auto;
101 |     cursor: pointer;
102 | 
103 | }
104 | 
105 | #result_view {
106 |     position: relative;
107 |     width: 95%;
108 |     margin: auto;
109 | }
110 | 
111 | #result_view canvas {
112 |     width: 100%;
113 |     height: 100%;
114 | }
115 | 
116 | /* wrapper */
117 | #wrapper {
118 |     position: absolute;
119 |     top: 0;
120 |     left: 0;
121 |     width: 100%;
122 |     height: 100%;
123 |     background:
124 |         linear-gradient(#1a98ca, #1a98ca),
125 |         linear-gradient(90deg, #ffffff33 1px, transparent 0, transparent 19px),
126 |         linear-gradient(#ffffff33 1px, transparent 0, transparent 19px),
127 |         linear-gradient(transparent, #1a98ca);
128 |     background-size: 100% 1.5%, 10% 100%, 100% 8%, 100% 100%;
129 |     background-repeat: no-repeat, repeat, repeat, no-repeat;
130 |     background-position: 0% 100%, 0 0, 0 0, 0 0;
131 |     /* 初始位置 */
132 |     clip-path: polygon(0% 0%, 100% 0%, 100% 1.5%, 0% 1.5%);
133 |     /* 添加动画效果 */
134 |     animation: move 1s infinite linear;
135 | }
136 | 
137 | @keyframes move {
138 |     to {
139 |         background-position: 0 100%, 0 0, 0 0, 0 0;
140 |         /* 终止位置 */
141 |         clip-path: polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%);
142 |     }
143 | }


--------------------------------------------------------------------------------
/ocrweb_multi/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/ocrweb_multi/static/favicon.ico


--------------------------------------------------------------------------------
/ocrweb_multi/static/hint.svg:
--------------------------------------------------------------------------------
1 | <svg viewBox="0 0 400 200"
2 |   xmlns="http://www.w3.org/2000/svg">
3 |   <text x="4" y="20" class="large">未选择图片</text>
4 | </svg>


--------------------------------------------------------------------------------
/ocrweb_multi/utils/config.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | import sys
 3 | from pathlib import Path
 4 | import yaml
 5 | 
 6 | root_dir = Path(__file__).parent.parent
 7 | 
 8 | 
 9 | def get_resource_path(name: str):
10 |     """依次检查资源文件的多个可能路径, 返回首个存在的路径"""
11 |     for path in [
12 |         # wrapper.exe 所在目录
13 |         Path(root_dir.parent, name),
14 |         # main.exe 所在目录 / main.py 所在目录
15 |         Path(root_dir, name),
16 |         # main.exe 所在目录
17 |         Path(sys.argv[0]).parent / name,
18 |         # 工作目录
19 |         Path(name),
20 |     ]:
21 |         if path.exists():
22 |             print("Loaded:", path)
23 |             return path
24 |     raise FileNotFoundError(name)
25 | 
26 | 
27 | conf = yaml.safe_load(get_resource_path("config.yaml").read_text(encoding="utf-8"))
28 | 


--------------------------------------------------------------------------------
/ocrweb_multi/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import time
 3 | import warnings
 4 | 
 5 | from onnxruntime import (
 6 |     get_available_providers,
 7 |     get_device,
 8 |     SessionOptions,
 9 |     InferenceSession,
10 | )
11 | from utils.config import conf, get_resource_path
12 | 
13 | 
14 | def parse_bool(val):
15 |     if not isinstance(val, str):
16 |         return bool(val)
17 |     return val.lower() in ("1", "true", "yes")
18 | 
19 | 
20 | def default(obj):
21 |     if hasattr(obj, "tolist"):
22 |         return obj.tolist()
23 |     return obj
24 | 
25 | 
26 | def tojson(obj, **kws):
27 |     return json.dumps(obj, default=default, ensure_ascii=False, **kws) + "\n"
28 | 
29 | 
30 | class OrtInferSession:
31 |     def __init__(self, model_path):
32 |         ort_conf = conf["global"]
33 |         sess_opt = SessionOptions()
34 |         sess_opt.log_severity_level = 4
35 |         sess_opt.enable_cpu_mem_arena = False
36 | 
37 |         cuda_ep = "CUDAExecutionProvider"
38 |         cpu_ep = "CPUExecutionProvider"
39 | 
40 |         providers = []
41 |         if (
42 |             ort_conf["use_cuda"]
43 |             and get_device() == "GPU"
44 |             and cuda_ep in get_available_providers()
45 |         ):
46 |             providers = [(cuda_ep, ort_conf[cuda_ep])]
47 | 
48 |         providers.append(cpu_ep)
49 | 
50 |         self.session = InferenceSession(
51 |             str(get_resource_path(model_path)),
52 |             sess_options=sess_opt,
53 |             providers=providers,
54 |         )
55 | 
56 |         if ort_conf["use_cuda"] and cuda_ep not in self.session.get_providers():
57 |             warnings.warn(
58 |                 f"{cuda_ep} is not avaiable for current env, the inference part is automatically shifted to be executed under {cpu_ep}.\n"
59 |                 "Please ensure the installed onnxruntime-gpu version matches your cuda and cudnn version, "
60 |                 "you can check their relations from the offical web site: "
61 |                 "https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html",
62 |                 RuntimeWarning,
63 |             )
64 | 
65 |     def get_input_name(self, input_idx=0):
66 |         return self.session.get_inputs()[input_idx].name
67 | 
68 |     def get_output_name(self, output_idx=0):
69 |         return self.session.get_outputs()[output_idx].name
70 | 
71 | 
72 | class Ticker:
73 |     def __init__(self, reset=True) -> None:
74 |         self.ts = time.perf_counter()
75 |         self.reset = reset
76 |         self.maps = {}
77 | 
78 |     def tick(self, name, reset=None):
79 |         ts = time.perf_counter()
80 |         if reset is None:
81 |             reset = self.reset
82 |         dt = ts - self.ts
83 |         if reset:
84 |             self.ts = ts
85 |         self.maps[name] = dt
86 |         return dt
87 | 


--------------------------------------------------------------------------------
/ocrweb_multi/wrapper.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | 针对Pyinstaller目录下文件过多的问题, 使用外部exe+system调用的方式实现资源文件/依赖库分离
 3 | */
 4 | #include <windows.h>
 5 | #include <stdio.h>
 6 | 
 7 | void combine(char *destination, const char *path1, const char *path2)
 8 | {
 9 |     if (path1 == NULL && path2 == NULL)
10 |     {
11 |         strcpy(destination, "");
12 |     }
13 |     else if (path2 == NULL || strlen(path2) == 0)
14 |     {
15 |         strcpy(destination, path1);
16 |     }
17 |     else if (path1 == NULL || strlen(path1) == 0)
18 |     {
19 |         strcpy(destination, path2);
20 |     }
21 |     else
22 |     {
23 |         strcpy(destination, path1);
24 | 
25 |         size_t idx = 0, sepIdx = 0;
26 |         size_t size1 = strlen(path1);
27 |         while (idx < size1)
28 |         {
29 |             idx++;
30 |             if (destination[idx] == '\\' || destination[idx] == '/')
31 |             {
32 |                 sepIdx = idx;
33 |             }
34 |         }
35 |         // Trim destination: delete from last separator to end.
36 |         destination[sepIdx + 1] = '\0';
37 |         strcat(destination, path2);
38 |     }
39 | }
40 | 
41 | void main()
42 | {
43 |     // Set title
44 |     system("title Rapid OCR Server");
45 |     // Get wrapper exe path
46 |     TCHAR path[MAX_PATH];
47 |     GetModuleFileName(NULL, path, MAX_PATH);
48 | 
49 |     TCHAR exe_path[MAX_PATH];
50 |     // Get real exe path from wrapper exe path
51 |     combine(exe_path, path, "ocrweb\\main.exe");
52 |     printf("Run real exe: %s\n", exe_path);
53 |     // Run real exe
54 |     system(exe_path);
55 | }
56 | 


--------------------------------------------------------------------------------
/ocrweb_multi/wrapper.rc:
--------------------------------------------------------------------------------
1 | id ICON "static/favicon.ico"
2 | 


--------------------------------------------------------------------------------
/python/README.md:
--------------------------------------------------------------------------------
1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr/install/)
2 | 


--------------------------------------------------------------------------------
/python/demo.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | from rapidocr import EngineType, ModelType, OCRVersion, RapidOCR
 5 | 
 6 | engine = RapidOCR(
 7 |     params={
 8 |         "Rec.ocr_version": OCRVersion.PPOCRV5,
 9 |         "Rec.engine_type": EngineType.PADDLE,
10 |         "Rec.model_type": ModelType.MOBILE,
11 |     }
12 | )
13 | 
14 | img_url = "https://img1.baidu.com/it/u=3619974146,1266987475&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=516"
15 | result = engine(img_url)
16 | print(result)
17 | 
18 | result.vis("vis_result.jpg")
19 | 


--------------------------------------------------------------------------------
/python/rapidocr/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import RapidOCR
5 | from .utils import LoadImageError, VisRes
6 | from .utils.typings import EngineType, LangCls, LangDet, LangRec, ModelType, OCRVersion
7 | 


--------------------------------------------------------------------------------
/python/rapidocr/cal_rec_boxes/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import CalRecBoxes
5 | 


--------------------------------------------------------------------------------
/python/rapidocr/ch_ppocr_cls/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import TextClassifier
5 | from .utils import TextClsOutput
6 | 


--------------------------------------------------------------------------------
/python/rapidocr/ch_ppocr_cls/main.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import copy
15 | import math
16 | import time
17 | from typing import Any, Dict, List, Union
18 | 
19 | import cv2
20 | import numpy as np
21 | 
22 | from rapidocr.inference_engine.base import get_engine
23 | 
24 | from .utils import ClsPostProcess, TextClsOutput
25 | 
26 | 
27 | class TextClassifier:
28 |     def __init__(self, cfg: Dict[str, Any]):
29 |         self.cls_image_shape = cfg["cls_image_shape"]
30 |         self.cls_batch_num = cfg["cls_batch_num"]
31 |         self.cls_thresh = cfg["cls_thresh"]
32 |         self.postprocess_op = ClsPostProcess(cfg["label_list"])
33 | 
34 |         self.session = get_engine(cfg.engine_type)(cfg)
35 | 
36 |     def __call__(self, img_list: Union[np.ndarray, List[np.ndarray]]) -> TextClsOutput:
37 |         if isinstance(img_list, np.ndarray):
38 |             img_list = [img_list]
39 | 
40 |         img_list = copy.deepcopy(img_list)
41 | 
42 |         # Calculate the aspect ratio of all text bars
43 |         width_list = [img.shape[1] / float(img.shape[0]) for img in img_list]
44 | 
45 |         # Sorting can speed up the cls process
46 |         indices = np.argsort(np.array(width_list))
47 | 
48 |         img_num = len(img_list)
49 |         cls_res = [("", 0.0)] * img_num
50 |         batch_num = self.cls_batch_num
51 |         elapse = 0
52 |         for beg_img_no in range(0, img_num, batch_num):
53 |             end_img_no = min(img_num, beg_img_no + batch_num)
54 | 
55 |             norm_img_batch = []
56 |             for ino in range(beg_img_no, end_img_no):
57 |                 norm_img = self.resize_norm_img(img_list[indices[ino]])
58 |                 norm_img = norm_img[np.newaxis, :]
59 |                 norm_img_batch.append(norm_img)
60 |             norm_img_batch = np.concatenate(norm_img_batch).astype(np.float32)
61 | 
62 |             starttime = time.time()
63 |             prob_out = self.session(norm_img_batch)
64 |             cls_result = self.postprocess_op(prob_out)
65 |             elapse += time.time() - starttime
66 | 
67 |             for rno, (label, score) in enumerate(cls_result):
68 |                 cls_res[indices[beg_img_no + rno]] = (label, score)
69 |                 if "180" in label and score > self.cls_thresh:
70 |                     img_list[indices[beg_img_no + rno]] = cv2.rotate(
71 |                         img_list[indices[beg_img_no + rno]], 1
72 |                     )
73 |         return TextClsOutput(img_list=img_list, cls_res=cls_res, elapse=elapse)
74 | 
75 |     def resize_norm_img(self, img: np.ndarray) -> np.ndarray:
76 |         img_c, img_h, img_w = self.cls_image_shape
77 |         h, w = img.shape[:2]
78 |         ratio = w / float(h)
79 |         if math.ceil(img_h * ratio) > img_w:
80 |             resized_w = img_w
81 |         else:
82 |             resized_w = int(math.ceil(img_h * ratio))
83 | 
84 |         resized_image = cv2.resize(img, (resized_w, img_h))
85 |         resized_image = resized_image.astype("float32")
86 |         if img_c == 1:
87 |             resized_image = resized_image / 255
88 |             resized_image = resized_image[np.newaxis, :]
89 |         else:
90 |             resized_image = resized_image.transpose((2, 0, 1)) / 255
91 | 
92 |         resized_image -= 0.5
93 |         resized_image /= 0.5
94 |         padding_im = np.zeros((img_c, img_h, img_w), dtype=np.float32)
95 |         padding_im[:, :, :resized_w] = resized_image
96 |         return padding_im
97 | 


--------------------------------------------------------------------------------
/python/rapidocr/ch_ppocr_cls/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from dataclasses import dataclass
15 | from pathlib import Path
16 | from typing import List, Optional, Tuple, Union
17 | 
18 | import numpy as np
19 | 
20 | from ..utils.logger import Logger
21 | from ..utils.utils import save_img
22 | from ..utils.vis_res import VisRes
23 | 
24 | logger = Logger(logger_name=__name__).get_log()
25 | 
26 | 
27 | @dataclass
28 | class TextClsOutput:
29 |     img_list: Optional[List[np.ndarray]] = None
30 |     cls_res: Optional[List[Tuple[str, float]]] = None
31 |     elapse: Optional[float] = None
32 | 
33 |     def __len__(self):
34 |         if self.img_list is None:
35 |             return 0
36 |         return len(self.img_list)
37 | 
38 |     def vis(self, save_path: Optional[Union[str, Path]] = None) -> Optional[np.ndarray]:
39 |         if self.img_list is None or self.cls_res is None:
40 |             logger.warning("No image or txts to visualize.")
41 |             return None
42 | 
43 |         txts = [f"{txt} {score:.2f}" for txt, score in self.cls_res]
44 |         scores = [score for _, score in self.cls_res]
45 | 
46 |         vis = VisRes()
47 |         vis_img = vis.draw_rec_res(self.img_list, txts, scores)
48 | 
49 |         if save_path is not None:
50 |             save_img(save_path, vis_img)
51 |             logger.info("Visualization saved as %s", save_path)
52 |         return vis_img
53 | 
54 | 
55 | class ClsPostProcess:
56 |     def __init__(self, label_list: List[str]):
57 |         self.label_list = label_list
58 | 
59 |     def __call__(self, preds: np.ndarray) -> List[Tuple[str, float]]:
60 |         pred_idxs = preds.argmax(axis=1)
61 |         decode_out = [
62 |             (self.label_list[int(idx)], preds[i, int(idx)])
63 |             for i, idx in enumerate(pred_idxs)
64 |         ]
65 |         return decode_out
66 | 


--------------------------------------------------------------------------------
/python/rapidocr/ch_ppocr_det/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import TextDetector
5 | from .utils import TextDetOutput
6 | 


--------------------------------------------------------------------------------
/python/rapidocr/ch_ppocr_det/main.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # -*- encoding: utf-8 -*-
 15 | # @Author: SWHL
 16 | # @Contact: liekkaskono@163.com
 17 | import time
 18 | from typing import Any, Dict, List
 19 | 
 20 | import numpy as np
 21 | 
 22 | from rapidocr.inference_engine.base import get_engine
 23 | 
 24 | from .utils import DBPostProcess, DetPreProcess, TextDetOutput
 25 | 
 26 | 
 27 | class TextDetector:
 28 |     def __init__(self, cfg: Dict[str, Any]):
 29 |         self.limit_side_len = cfg.get("limit_side_len")
 30 |         self.limit_type = cfg.get("limit_type")
 31 |         self.mean = cfg.get("mean")
 32 |         self.std = cfg.get("std")
 33 |         self.preprocess_op = None
 34 | 
 35 |         post_process = {
 36 |             "thresh": cfg.get("thresh", 0.3),
 37 |             "box_thresh": cfg.get("box_thresh", 0.5),
 38 |             "max_candidates": cfg.get("max_candidates", 1000),
 39 |             "unclip_ratio": cfg.get("unclip_ratio", 1.6),
 40 |             "use_dilation": cfg.get("use_dilation", True),
 41 |             "score_mode": cfg.get("score_mode", "fast"),
 42 |         }
 43 |         self.postprocess_op = DBPostProcess(**post_process)
 44 | 
 45 |         self.session = get_engine(cfg.engine_type)(cfg)
 46 | 
 47 |     def __call__(self, img: np.ndarray) -> TextDetOutput:
 48 |         start_time = time.perf_counter()
 49 | 
 50 |         if img is None:
 51 |             raise ValueError("img is None")
 52 | 
 53 |         ori_img_shape = img.shape[0], img.shape[1]
 54 |         self.preprocess_op = self.get_preprocess(max(img.shape[0], img.shape[1]))
 55 |         prepro_img = self.preprocess_op(img)
 56 |         if prepro_img is None:
 57 |             return TextDetOutput()
 58 | 
 59 |         preds = self.session(prepro_img)
 60 |         boxes, scores = self.postprocess_op(preds, ori_img_shape)
 61 |         if len(boxes) < 1:
 62 |             return TextDetOutput()
 63 | 
 64 |         boxes = self.sorted_boxes(boxes)
 65 |         elapse = time.perf_counter() - start_time
 66 |         return TextDetOutput(img, boxes, scores, elapse=elapse)
 67 | 
 68 |     def get_preprocess(self, max_wh: int) -> DetPreProcess:
 69 |         if self.limit_type == "min":
 70 |             limit_side_len = self.limit_side_len
 71 |         elif max_wh < 960:
 72 |             limit_side_len = 960
 73 |         elif max_wh < 1500:
 74 |             limit_side_len = 1500
 75 |         else:
 76 |             limit_side_len = 2000
 77 |         return DetPreProcess(limit_side_len, self.limit_type, self.mean, self.std)
 78 | 
 79 |     @staticmethod
 80 |     def sorted_boxes(dt_boxes: np.ndarray) -> List[np.ndarray]:
 81 |         """
 82 |         Sort text boxes in order from top to bottom, left to right
 83 |         args:
 84 |             dt_boxes(array):detected text boxes with shape [4, 2]
 85 |         return:
 86 |             sorted boxes(array) with shape [4, 2]
 87 |         """
 88 |         num_boxes = dt_boxes.shape[0]
 89 |         sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
 90 |         _boxes = list(sorted_boxes)
 91 | 
 92 |         for i in range(num_boxes - 1):
 93 |             for j in range(i, -1, -1):
 94 |                 if (
 95 |                     abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10
 96 |                     and _boxes[j + 1][0][0] < _boxes[j][0][0]
 97 |                 ):
 98 |                     tmp = _boxes[j]
 99 |                     _boxes[j] = _boxes[j + 1]
100 |                     _boxes[j + 1] = tmp
101 |                 else:
102 |                     break
103 |         return _boxes
104 | 


--------------------------------------------------------------------------------
/python/rapidocr/ch_ppocr_rec/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import TextRecognizer
5 | from .typings import TextRecInput, TextRecOutput
6 | 


--------------------------------------------------------------------------------
/python/rapidocr/ch_ppocr_rec/typings.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | from dataclasses import dataclass, field
 5 | from enum import Enum
 6 | from pathlib import Path
 7 | from typing import List, Optional, Tuple, Union
 8 | 
 9 | import numpy as np
10 | 
11 | from ..utils.logger import Logger
12 | from ..utils.utils import save_img
13 | from ..utils.vis_res import VisRes
14 | 
15 | logger = Logger(logger_name=__name__).get_log()
16 | 
17 | 
18 | @dataclass
19 | class TextRecConfig:
20 |     intra_op_num_threads: int = -1
21 |     inter_op_num_threads: int = -1
22 |     use_cuda: bool = False
23 |     use_dml: bool = False
24 |     model_path: Union[str, Path, None] = None
25 | 
26 |     rec_batch_num: int = 6
27 |     rec_img_shape: Tuple[int, int, int] = (3, 48, 320)
28 |     rec_keys_path: Union[str, Path, None] = None
29 | 
30 | 
31 | @dataclass
32 | class TextRecInput:
33 |     img: Union[np.ndarray, List[np.ndarray], None] = None
34 |     return_word_box: bool = False
35 | 
36 | 
37 | @dataclass
38 | class TextRecOutput:
39 |     imgs: Optional[List[np.ndarray]] = None
40 |     txts: Optional[Tuple[str]] = None
41 |     scores: Tuple[float] = (1.0,)
42 |     word_results: Tuple[Tuple[str, float, Optional[List[List[int]]]]] = (
43 |         ("", 1.0, None),
44 |     )
45 |     elapse: Optional[float] = None
46 |     lang_type: Optional[str] = None
47 | 
48 |     def __len__(self):
49 |         if self.txts is None:
50 |             return 0
51 |         return len(self.txts)
52 | 
53 |     def vis(self, save_path: Optional[Union[str, Path]] = None) -> Optional[np.ndarray]:
54 |         if self.imgs is None or self.txts is None:
55 |             logger.warning("No image or txts to visualize.")
56 |             return None
57 | 
58 |         vis = VisRes()
59 |         vis_img = vis.draw_rec_res(
60 |             self.imgs, self.txts, self.scores, lang_type=self.lang_type
61 |         )
62 | 
63 |         if save_path is not None:
64 |             save_img(save_path, vis_img)
65 |             logger.info("Visualization saved as %s", save_path)
66 |         return vis_img
67 | 
68 | 
69 | class WordType(Enum):
70 |     CN = "cn"
71 |     EN = "en"
72 |     NUM = "num"
73 |     EN_NUM = "en&num"
74 | 
75 | 
76 | @dataclass
77 | class WordInfo:
78 |     words: List[List[str]] = field(default_factory=list)
79 |     word_cols: List[List[int]] = field(default_factory=list)
80 |     word_types: List[WordType] = field(default_factory=list)
81 |     line_txt_len: float = 0.0
82 |     confs: List[float] = field(default_factory=list)
83 | 


--------------------------------------------------------------------------------
/python/rapidocr/cli.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import shutil
 5 | from pathlib import Path
 6 | 
 7 | 
 8 | root_dir = Path(__file__).resolve().parent
 9 | DEFAULT_CFG_PATH = root_dir / "config.yaml"
10 | 
11 | 
12 | def generate_cfg(args):
13 |     if args.save_cfg_file is None:
14 |         args.save_cfg_file = "./default_rapidocr.yaml"
15 | 
16 |     shutil.copyfile(DEFAULT_CFG_PATH, args.save_cfg_file)
17 |     print(f"The config file has saved in {args.save_cfg_file}")
18 | 
19 | 
20 | def check_install(ocr_engine):
21 |     img_url = "https://github.com/RapidAI/RapidOCR/blob/a9bb7c1f44b6e00556ada90ac588f020d7637c4b/python/tests/test_files/ch_en_num.jpg?raw=true"
22 |     result = ocr_engine(img_url)
23 | 
24 |     if result.txts is None or result.txts[0] != "正品促销":
25 |         raise ValueError("The installation is incorrect!")
26 | 
27 |     print("Success! rapidocr is installed correctly!")
28 | 


--------------------------------------------------------------------------------
/python/rapidocr/config.yaml:
--------------------------------------------------------------------------------
 1 | Global:
 2 |     text_score: 0.5
 3 | 
 4 |     use_det: true
 5 |     use_cls: true
 6 |     use_rec: true
 7 | 
 8 |     min_height: 30
 9 |     width_height_ratio: 8
10 |     max_side_len: 2000
11 |     min_side_len: 30
12 | 
13 |     return_word_box: false
14 | 
15 |     font_path: null
16 | 
17 | EngineConfig:
18 |     onnxruntime:
19 |         intra_op_num_threads: -1
20 |         inter_op_num_threads: -1
21 |         enable_cpu_mem_arena: false
22 |         use_cuda: false
23 |         use_dml: false
24 | 
25 |     openvino:
26 |         inference_num_threads: -1
27 | 
28 |     paddle:
29 |         cpu_math_library_num_threads: -1
30 |         use_cuda: false
31 |         gpu_id: 0
32 |         gpu_mem: 500
33 | 
34 |     torch:
35 |         use_cuda: false
36 |         gpu_id: 0
37 | 
38 | Det:
39 |     engine_type: 'onnxruntime'
40 |     lang_type: 'ch'
41 |     model_type: 'mobile'
42 |     ocr_version: 'PP-OCRv4'
43 | 
44 |     task_type: 'det'
45 | 
46 |     model_path: null
47 |     model_dir: null
48 | 
49 |     limit_side_len: 736
50 |     limit_type: min
51 |     std: [ 0.5, 0.5, 0.5 ]
52 |     mean: [ 0.5, 0.5, 0.5 ]
53 | 
54 |     thresh: 0.3
55 |     box_thresh: 0.5
56 |     max_candidates: 1000
57 |     unclip_ratio: 1.6
58 |     use_dilation: true
59 |     score_mode: fast
60 | 
61 | Cls:
62 |     engine_type: 'onnxruntime'
63 |     lang_type: 'ch'
64 |     model_type: 'mobile'
65 |     ocr_version: 'PP-OCRv4'
66 | 
67 |     task_type: 'cls'
68 | 
69 |     model_path: null
70 |     model_dir: null
71 | 
72 |     cls_image_shape: [3, 48, 192]
73 |     cls_batch_num: 6
74 |     cls_thresh: 0.9
75 |     label_list: ['0', '180']
76 | 
77 | Rec:
78 |     engine_type: 'onnxruntime'
79 |     lang_type: 'ch'
80 |     model_type: 'mobile'
81 |     ocr_version: 'PP-OCRv4'
82 | 
83 |     task_type: 'rec'
84 | 
85 |     model_path: null
86 |     model_dir: null
87 | 
88 |     rec_keys_path: null
89 |     rec_img_shape: [3, 48, 320]
90 |     rec_batch_num: 6
91 | 


--------------------------------------------------------------------------------
/python/rapidocr/inference_engine/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | 


--------------------------------------------------------------------------------
/python/rapidocr/inference_engine/base.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | # @Author: SWHL
  3 | # @Contact: liekkaskono@163.com
  4 | import abc
  5 | from dataclasses import dataclass
  6 | from enum import Enum
  7 | from pathlib import Path
  8 | from typing import Dict, Union
  9 | 
 10 | import numpy as np
 11 | from omegaconf import OmegaConf
 12 | 
 13 | from ..utils.logger import Logger
 14 | from ..utils.typings import EngineType, ModelType, OCRVersion, TaskType
 15 | from ..utils.utils import import_package
 16 | 
 17 | cur_dir = Path(__file__).resolve().parent.parent
 18 | MODEL_URL_PATH = cur_dir / "default_models.yaml"
 19 | 
 20 | logger = Logger(logger_name=__name__).get_log()
 21 | 
 22 | 
 23 | def get_engine(engine_type: EngineType):
 24 |     logger.info("Using engine_name: %s", engine_type.value)
 25 | 
 26 |     if engine_type == EngineType.ONNXRUNTIME:
 27 |         if not import_package(engine_type.value):
 28 |             raise ImportError(f"{engine_type.value} is not installed.")
 29 | 
 30 |         from .onnxruntime import OrtInferSession
 31 | 
 32 |         return OrtInferSession
 33 | 
 34 |     if engine_type == EngineType.OPENVINO:
 35 |         if not import_package(engine_type.value):
 36 |             raise ImportError(f"{engine_type.value} is not installed")
 37 | 
 38 |         from .openvino import OpenVINOInferSession
 39 | 
 40 |         return OpenVINOInferSession
 41 | 
 42 |     if engine_type == EngineType.PADDLE:
 43 |         if not import_package(engine_type.value):
 44 |             raise ImportError(f"{engine_type.value} is not installed")
 45 | 
 46 |         from .paddle import PaddleInferSession
 47 | 
 48 |         return PaddleInferSession
 49 | 
 50 |     if engine_type == EngineType.TORCH:
 51 |         if not import_package(engine_type.value):
 52 |             raise ImportError(f"{engine_type.value} is not installed")
 53 | 
 54 |         from .torch import TorchInferSession
 55 | 
 56 |         return TorchInferSession
 57 | 
 58 |     raise ValueError(f"Unsupported engine: {engine_type.value}")
 59 | 
 60 | 
 61 | @dataclass
 62 | class FileInfo:
 63 |     engine_type: EngineType
 64 |     ocr_version: OCRVersion
 65 |     task_type: TaskType
 66 |     lang_type: Enum
 67 |     model_type: ModelType
 68 | 
 69 | 
 70 | class InferSession(abc.ABC):
 71 |     model_info = OmegaConf.load(MODEL_URL_PATH)
 72 |     DEFAULT_MODEL_PATH = cur_dir / "models"
 73 |     logger = Logger(logger_name=__name__).get_log()
 74 | 
 75 |     @abc.abstractmethod
 76 |     def __init__(self, config):
 77 |         pass
 78 | 
 79 |     @abc.abstractmethod
 80 |     def __call__(self, input_content: np.ndarray) -> np.ndarray:
 81 |         pass
 82 | 
 83 |     @staticmethod
 84 |     def _verify_model(model_path: Union[str, Path, None]):
 85 |         if model_path is None:
 86 |             raise ValueError("model_path is None!")
 87 | 
 88 |         model_path = Path(model_path)
 89 |         if not model_path.exists():
 90 |             raise FileNotFoundError(f"{model_path} does not exists.")
 91 | 
 92 |         if not model_path.is_file():
 93 |             raise FileExistsError(f"{model_path} is not a file.")
 94 | 
 95 |     @abc.abstractmethod
 96 |     def have_key(self, key: str = "character") -> bool:
 97 |         pass
 98 | 
 99 |     @classmethod
100 |     def get_model_url(cls, file_info: FileInfo) -> Dict[str, str]:
101 |         model_dict = OmegaConf.select(
102 |             cls.model_info,
103 |             f"{file_info.engine_type.value}.{file_info.ocr_version.value}.{file_info.task_type.value}",
104 |         )
105 | 
106 |         # 优先查找 server 模型
107 |         if file_info.model_type == ModelType.SERVER:
108 |             for k in model_dict:
109 |                 if (
110 |                     k.startswith(file_info.lang_type.value)
111 |                     and file_info.model_type.value in k
112 |                 ):
113 |                     return model_dict[k]
114 | 
115 |         for k in model_dict:
116 |             if k.startswith(file_info.lang_type.value):
117 |                 return model_dict[k]
118 | 
119 |         raise KeyError("File not found")
120 | 
121 |     @classmethod
122 |     def get_dict_key_url(cls, file_info: FileInfo) -> str:
123 |         model_dict = cls.get_model_url(file_info)
124 |         return model_dict["dict_url"]
125 | 


--------------------------------------------------------------------------------
/python/rapidocr/inference_engine/openvino.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import os
 5 | import traceback
 6 | from pathlib import Path
 7 | 
 8 | import numpy as np
 9 | from omegaconf import DictConfig
10 | from openvino.runtime import Core
11 | 
12 | from ..utils import Logger
13 | from ..utils.download_file import DownloadFile, DownloadFileInput
14 | from .base import FileInfo, InferSession
15 | 
16 | 
17 | class OpenVINOInferSession(InferSession):
18 |     def __init__(self, cfg: DictConfig):
19 |         super().__init__(cfg)
20 |         self.logger = Logger(logger_name=__name__).get_log()
21 | 
22 |         core = Core()
23 | 
24 |         model_path = cfg.get("model_path", None)
25 |         if model_path is None:
26 |             model_info = self.get_model_url(
27 |                 FileInfo(
28 |                     engine_type=cfg.engine_type,
29 |                     ocr_version=cfg.ocr_version,
30 |                     task_type=cfg.task_type,
31 |                     lang_type=cfg.lang_type,
32 |                     model_type=cfg.model_type,
33 |                 )
34 |             )
35 |             model_path = self.DEFAULT_MODEL_PATH / Path(model_info["model_dir"]).name
36 |             download_params = DownloadFileInput(
37 |                 file_url=model_info["model_dir"],
38 |                 sha256=model_info["SHA256"],
39 |                 save_path=model_path,
40 |                 logger=self.logger,
41 |             )
42 |             DownloadFile.run(download_params)
43 | 
44 |         self.logger.info(f"Using {model_path}")
45 |         model_path = Path(model_path)
46 |         self._verify_model(model_path)
47 | 
48 |         cpu_nums = os.cpu_count()
49 |         infer_num_threads = cfg.get("inference_num_threads", -1)
50 |         if infer_num_threads != -1 and 1 <= infer_num_threads <= cpu_nums:
51 |             core.set_property("CPU", {"INFERENCE_NUM_THREADS": str(infer_num_threads)})
52 | 
53 |         model_onnx = core.read_model(model_path)
54 |         compile_model = core.compile_model(model=model_onnx, device_name="CPU")
55 |         self.session = compile_model.create_infer_request()
56 | 
57 |     def __call__(self, input_content: np.ndarray) -> np.ndarray:
58 |         try:
59 |             self.session.infer(inputs=[input_content])
60 |             return self.session.get_output_tensor().data
61 |         except Exception as e:
62 |             error_info = traceback.format_exc()
63 |             raise OpenVIONError(error_info) from e
64 | 
65 |     def have_key(self, key: str = "character") -> bool:
66 |         return False
67 | 
68 | 
69 | class OpenVIONError(Exception):
70 |     pass
71 | 


--------------------------------------------------------------------------------
/python/rapidocr/inference_engine/torch.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | from pathlib import Path
 5 | 
 6 | import numpy as np
 7 | import torch
 8 | from omegaconf import OmegaConf
 9 | 
10 | from ..networks.architectures.base_model import BaseModel
11 | from ..utils.download_file import DownloadFile, DownloadFileInput
12 | from ..utils.logger import Logger
13 | from .base import FileInfo, InferSession
14 | 
15 | root_dir = Path(__file__).resolve().parent.parent
16 | DEFAULT_CFG_PATH = root_dir / "networks" / "arch_config.yaml"
17 | 
18 | 
19 | class TorchInferSession(InferSession):
20 |     def __init__(self, cfg) -> None:
21 |         self.logger = Logger(logger_name=__name__).get_log()
22 | 
23 |         model_path = cfg.get("model_path", None)
24 |         if model_path is None:
25 |             model_info = self.get_model_url(
26 |                 FileInfo(
27 |                     engine_type=cfg.engine_type,
28 |                     ocr_version=cfg.ocr_version,
29 |                     task_type=cfg.task_type,
30 |                     lang_type=cfg.lang_type,
31 |                     model_type=cfg.model_type,
32 |                 )
33 |             )
34 |             default_model_url = model_info["model_dir"]
35 |             model_path = self.DEFAULT_MODEL_PATH / Path(default_model_url).name
36 |             DownloadFile.run(
37 |                 DownloadFileInput(
38 |                     file_url=default_model_url,
39 |                     sha256=model_info["SHA256"],
40 |                     save_path=model_path,
41 |                     logger=self.logger,
42 |                 )
43 |             )
44 | 
45 |         self.logger.info(f"Using {model_path}")
46 |         model_path = Path(model_path)
47 |         self._verify_model(model_path)
48 | 
49 |         all_arch_config = OmegaConf.load(DEFAULT_CFG_PATH)
50 |         file_name = model_path.stem
51 |         if file_name not in all_arch_config:
52 |             raise ValueError(f"architecture {file_name} is not in arch_config.yaml")
53 | 
54 |         arch_config = all_arch_config.get(file_name)
55 |         self.predictor = BaseModel(arch_config)
56 |         self.predictor.load_state_dict(torch.load(model_path, weights_only=True))
57 |         self.predictor.eval()
58 | 
59 |         self.use_gpu = False
60 |         if cfg.engine_cfg.use_cuda:
61 |             self.device = torch.device(f"cuda:{cfg.engine_cfg.gpu_id}")
62 |             self.predictor.to(self.device)
63 |             self.use_gpu = True
64 | 
65 |     def __call__(self, img: np.ndarray):
66 |         with torch.no_grad():
67 |             inp = torch.from_numpy(img)
68 |             if self.use_gpu:
69 |                 inp = inp.to(self.device)
70 | 
71 |             # 适配跟onnx对齐取值逻辑
72 |             outputs = self.predictor(inp).cpu().numpy()
73 |             return outputs
74 | 
75 |     def have_key(self, key: str = "character") -> bool:
76 |         return False
77 | 
78 | 
79 | class TorchInferError(Exception):
80 |     pass
81 | 


--------------------------------------------------------------------------------
/python/rapidocr/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr/models/.gitkeep


--------------------------------------------------------------------------------
/python/rapidocr/networks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr/networks/__init__.py


--------------------------------------------------------------------------------
/python/rapidocr/networks/architectures/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import copy
16 | 
17 | __all__ = ["build_model"]
18 | 
19 | 
20 | def build_model(config, **kwargs):
21 |     from .base_model import BaseModel
22 | 
23 |     config = copy.deepcopy(config)
24 |     module_class = BaseModel(config, **kwargs)
25 |     return module_class
26 | 


--------------------------------------------------------------------------------
/python/rapidocr/networks/architectures/base_model.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | 
  3 | from ..backbones import build_backbone
  4 | from ..heads import build_head
  5 | from ..necks import build_neck
  6 | 
  7 | 
  8 | class BaseModel(nn.Module):
  9 |     def __init__(self, config, **kwargs):
 10 |         """
 11 |         the module for OCR.
 12 |         args:
 13 |             config (dict): the super parameters for module.
 14 |         """
 15 |         super(BaseModel, self).__init__()
 16 | 
 17 |         in_channels = config.get("in_channels", 3)
 18 |         model_type = config["model_type"]
 19 |         # build backbone, backbone is need for del, rec and cls
 20 |         if "Backbone" not in config or config["Backbone"] is None:
 21 |             self.use_backbone = False
 22 |         else:
 23 |             self.use_backbone = True
 24 |             config["Backbone"]["in_channels"] = in_channels
 25 |             self.backbone = build_backbone(config["Backbone"], model_type)
 26 |             in_channels = self.backbone.out_channels
 27 | 
 28 |         # build neck
 29 |         # for rec, neck can be cnn,rnn or reshape(None)
 30 |         # for det, neck can be FPN, BIFPN and so on.
 31 |         # for cls, neck should be none
 32 |         if "Neck" not in config or config["Neck"] is None:
 33 |             self.use_neck = False
 34 |         else:
 35 |             self.use_neck = True
 36 |             config["Neck"]["in_channels"] = in_channels
 37 |             self.neck = build_neck(config["Neck"])
 38 |             in_channels = self.neck.out_channels
 39 | 
 40 |         # # build head, head is need for det, rec and cls
 41 |         if "Head" not in config or config["Head"] is None:
 42 |             self.use_head = False
 43 |         else:
 44 |             self.use_head = True
 45 |             config["Head"]["in_channels"] = in_channels
 46 |             self.head = build_head(config["Head"], **kwargs)
 47 | 
 48 |         self.return_all_feats = config.get("return_all_feats", False)
 49 | 
 50 |         self._initialize_weights()
 51 | 
 52 |     def _initialize_weights(self):
 53 |         # weight initialization
 54 |         for m in self.modules():
 55 |             if isinstance(m, nn.Conv2d):
 56 |                 nn.init.kaiming_normal_(m.weight, mode="fan_out")
 57 |                 if m.bias is not None:
 58 |                     nn.init.zeros_(m.bias)
 59 |             elif isinstance(m, nn.BatchNorm2d):
 60 |                 nn.init.ones_(m.weight)
 61 |                 nn.init.zeros_(m.bias)
 62 |             elif isinstance(m, nn.Linear):
 63 |                 nn.init.normal_(m.weight, 0, 0.01)
 64 |                 if m.bias is not None:
 65 |                     nn.init.zeros_(m.bias)
 66 |             elif isinstance(m, nn.ConvTranspose2d):
 67 |                 nn.init.kaiming_normal_(m.weight, mode="fan_out")
 68 |                 if m.bias is not None:
 69 |                     nn.init.zeros_(m.bias)
 70 | 
 71 |     def forward(self, x):
 72 |         y = dict()
 73 |         if self.use_backbone:
 74 |             x = self.backbone(x)
 75 |         if isinstance(x, dict):
 76 |             y.update(x)
 77 |         else:
 78 |             y["backbone_out"] = x
 79 |         final_name = "backbone_out"
 80 |         if self.use_neck:
 81 |             x = self.neck(x)
 82 |             if isinstance(x, dict):
 83 |                 y.update(x)
 84 |             else:
 85 |                 y["neck_out"] = x
 86 |             final_name = "neck_out"
 87 |         if self.use_head:
 88 |             x = self.head(x)
 89 |         # for multi head, save ctc neck out for udml
 90 |         if isinstance(x, dict) and "ctc_nect" in x.keys():
 91 |             y["neck_out"] = x["ctc_neck"]
 92 |             y["head_out"] = x
 93 |         elif isinstance(x, dict):
 94 |             y.update(x)
 95 |         else:
 96 |             y["head_out"] = x
 97 |         if self.return_all_feats:
 98 |             if self.training:
 99 |                 return y
100 |             elif isinstance(x, dict):
101 |                 return x
102 |             else:
103 |                 return {final_name: x}
104 |         else:
105 |             return x
106 | 


--------------------------------------------------------------------------------
/python/rapidocr/networks/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | __all__ = ["build_backbone"]
16 | 
17 | 
18 | def build_backbone(config, model_type):
19 |     if model_type == "det":
20 |         from .det_mobilenet_v3 import MobileNetV3
21 |         from .rec_hgnet import PPHGNet_small
22 |         from .rec_lcnetv3 import PPLCNetV3
23 | 
24 |         support_dict = [
25 |             "MobileNetV3",
26 |             "ResNet",
27 |             "ResNet_vd",
28 |             "ResNet_SAST",
29 |             "PPLCNetV3",
30 |             "PPHGNet_small",
31 |         ]
32 |     elif model_type == "rec" or model_type == "cls":
33 |         from .rec_hgnet import PPHGNet_small
34 |         from .rec_lcnetv3 import PPLCNetV3
35 |         from .rec_mobilenet_v3 import MobileNetV3
36 |         from .rec_svtrnet import SVTRNet
37 |         from .rec_mv1_enhance import MobileNetV1Enhance
38 | 
39 |         support_dict = [
40 |             "MobileNetV1Enhance",
41 |             "MobileNetV3",
42 |             "ResNet",
43 |             "ResNetFPN",
44 |             "MTB",
45 |             "ResNet31",
46 |             "SVTRNet",
47 |             "ViTSTR",
48 |             "DenseNet",
49 |             "PPLCNetV3",
50 |             "PPHGNet_small",
51 |         ]
52 |     else:
53 |         raise NotImplementedError
54 | 
55 |     module_name = config.pop("name")
56 |     assert module_name in support_dict, Exception(
57 |         "when model typs is {}, backbone only support {}".format(
58 |             model_type, support_dict
59 |         )
60 |     )
61 |     module_class = eval(module_name)(**config)
62 |     return module_class
63 | 


--------------------------------------------------------------------------------
/python/rapidocr/networks/common.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | from torch import nn
 4 | 
 5 | 
 6 | class Hswish(nn.Module):
 7 |     def __init__(self, inplace=True):
 8 |         super(Hswish, self).__init__()
 9 |         self.inplace = inplace
10 | 
11 |     def forward(self, x):
12 |         return x * F.relu6(x + 3.0, inplace=self.inplace) / 6.0
13 | 
14 | 
15 | # out = max(0, min(1, slop*x+offset))
16 | # paddle.fluid.layers.hard_sigmoid(x, slope=0.2, offset=0.5, name=None)
17 | class Hsigmoid(nn.Module):
18 |     def __init__(self, inplace=True):
19 |         super(Hsigmoid, self).__init__()
20 |         self.inplace = inplace
21 | 
22 |     def forward(self, x):
23 |         # torch: F.relu6(x + 3., inplace=self.inplace) / 6.
24 |         # paddle: F.relu6(1.2 * x + 3., inplace=self.inplace) / 6.
25 |         return F.relu6(1.2 * x + 3.0, inplace=self.inplace) / 6.0
26 | 
27 | 
28 | class GELU(nn.Module):
29 |     def __init__(self, inplace=True):
30 |         super(GELU, self).__init__()
31 |         self.inplace = inplace
32 | 
33 |     def forward(self, x):
34 |         return torch.nn.functional.gelu(x)
35 | 
36 | 
37 | class Swish(nn.Module):
38 |     def __init__(self, inplace=True):
39 |         super(Swish, self).__init__()
40 |         self.inplace = inplace
41 | 
42 |     def forward(self, x):
43 |         if self.inplace:
44 |             x.mul_(torch.sigmoid(x))
45 |             return x
46 |         else:
47 |             return x * torch.sigmoid(x)
48 | 
49 | 
50 | class Activation(nn.Module):
51 |     def __init__(self, act_type, inplace=True):
52 |         super(Activation, self).__init__()
53 |         act_type = act_type.lower()
54 |         if act_type == "relu":
55 |             self.act = nn.ReLU(inplace=inplace)
56 |         elif act_type == "relu6":
57 |             self.act = nn.ReLU6(inplace=inplace)
58 |         elif act_type == "sigmoid":
59 |             raise NotImplementedError
60 |         elif act_type == "hard_sigmoid":
61 |             self.act = Hsigmoid(
62 |                 inplace
63 |             )  # nn.Hardsigmoid(inplace=inplace)#Hsigmoid(inplace)#
64 |         elif act_type == "hard_swish" or act_type == "hswish":
65 |             self.act = Hswish(inplace=inplace)
66 |         elif act_type == "leakyrelu":
67 |             self.act = nn.LeakyReLU(inplace=inplace)
68 |         elif act_type == "gelu":
69 |             self.act = GELU(inplace=inplace)
70 |         elif act_type == "swish":
71 |             self.act = Swish(inplace=inplace)
72 |         else:
73 |             raise NotImplementedError
74 | 
75 |     def forward(self, inputs):
76 |         return self.act(inputs)
77 | 


--------------------------------------------------------------------------------
/python/rapidocr/networks/heads/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | __all__ = ["build_head"]
16 | 
17 | 
18 | def build_head(config, **kwargs):
19 |     # det head
20 |     from .det_db_head import DBHead, PFHeadLocal
21 | 
22 |     # rec head
23 |     from .rec_ctc_head import CTCHead
24 |     from .rec_multi_head import MultiHead
25 | 
26 |     # cls head
27 |     from .cls_head import ClsHead
28 | 
29 |     support_dict = [
30 |         "DBHead",
31 |         "CTCHead",
32 |         "ClsHead",
33 |         "MultiHead",
34 |         "PFHeadLocal",
35 |     ]
36 | 
37 |     module_name = config.pop("name")
38 |     char_num = config.pop("char_num", 6625)
39 |     assert module_name in support_dict, Exception(
40 |         "head only support {}".format(support_dict)
41 |     )
42 |     module_class = eval(module_name)(**config, **kwargs)
43 |     return module_class
44 | 


--------------------------------------------------------------------------------
/python/rapidocr/networks/heads/cls_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | from torch import nn
 4 | 
 5 | 
 6 | class ClsHead(nn.Module):
 7 |     """
 8 |     Class orientation
 9 |     Args:
10 |         params(dict): super parameters for build Class network
11 |     """
12 | 
13 |     def __init__(self, in_channels, class_dim, **kwargs):
14 |         super(ClsHead, self).__init__()
15 |         self.pool = nn.AdaptiveAvgPool2d(1)
16 |         self.fc = nn.Linear(in_channels, class_dim, bias=True)
17 | 
18 |     def forward(self, x):
19 |         x = self.pool(x)
20 |         x = torch.reshape(x, shape=[x.shape[0], x.shape[1]])
21 |         x = self.fc(x)
22 |         x = F.softmax(x, dim=1)
23 |         return x
24 | 


--------------------------------------------------------------------------------
/python/rapidocr/networks/heads/rec_ctc_head.py:
--------------------------------------------------------------------------------
 1 | import torch.nn.functional as F
 2 | from torch import nn
 3 | 
 4 | 
 5 | class CTCHead(nn.Module):
 6 |     def __init__(
 7 |         self,
 8 |         in_channels,
 9 |         out_channels=6625,
10 |         fc_decay=0.0004,
11 |         mid_channels=None,
12 |         return_feats=False,
13 |         **kwargs
14 |     ):
15 |         super(CTCHead, self).__init__()
16 |         if mid_channels is None:
17 |             self.fc = nn.Linear(
18 |                 in_channels,
19 |                 out_channels,
20 |                 bias=True,
21 |             )
22 |         else:
23 |             self.fc1 = nn.Linear(
24 |                 in_channels,
25 |                 mid_channels,
26 |                 bias=True,
27 |             )
28 |             self.fc2 = nn.Linear(
29 |                 mid_channels,
30 |                 out_channels,
31 |                 bias=True,
32 |             )
33 | 
34 |         self.out_channels = out_channels
35 |         self.mid_channels = mid_channels
36 |         self.return_feats = return_feats
37 | 
38 |     def forward(self, x, labels=None):
39 |         if self.mid_channels is None:
40 |             predicts = self.fc(x)
41 |         else:
42 |             x = self.fc1(x)
43 |             predicts = self.fc2(x)
44 | 
45 |         if self.return_feats:
46 |             result = (x, predicts)
47 |         else:
48 |             result = predicts
49 | 
50 |         if not self.training:
51 |             predicts = F.softmax(predicts, dim=2)
52 |             result = predicts
53 | 
54 |         return result
55 | 


--------------------------------------------------------------------------------
/python/rapidocr/networks/heads/rec_multi_head.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from ..necks.rnn import Im2Seq, SequenceEncoder
 4 | from .rec_ctc_head import CTCHead
 5 | 
 6 | 
 7 | class FCTranspose(nn.Module):
 8 |     def __init__(self, in_channels, out_channels, only_transpose=False):
 9 |         super().__init__()
10 |         self.only_transpose = only_transpose
11 |         if not self.only_transpose:
12 |             self.fc = nn.Linear(in_channels, out_channels, bias=False)
13 | 
14 |     def forward(self, x):
15 |         if self.only_transpose:
16 |             return x.permute([0, 2, 1])
17 |         else:
18 |             return self.fc(x.permute([0, 2, 1]))
19 | 
20 | 
21 | class MultiHead(nn.Module):
22 |     def __init__(self, in_channels, out_channels_list, **kwargs):
23 |         super().__init__()
24 |         self.head_list = kwargs.pop("head_list")
25 | 
26 |         self.gtc_head = "sar"
27 |         assert len(self.head_list) >= 2
28 |         for idx, head_name in enumerate(self.head_list):
29 |             name = list(head_name)[0]
30 |             if name == "SARHead":
31 |                 pass
32 | 
33 |             elif name == "NRTRHead":
34 |                 pass
35 |             elif name == "CTCHead":
36 |                 # ctc neck
37 |                 self.encoder_reshape = Im2Seq(in_channels)
38 |                 neck_args = self.head_list[idx][name]["Neck"]
39 |                 encoder_type = neck_args.pop("name")
40 |                 self.ctc_encoder = SequenceEncoder(
41 |                     in_channels=in_channels, encoder_type=encoder_type, **neck_args
42 |                 )
43 |                 # ctc head
44 |                 head_args = self.head_list[idx][name].get("Head", {})
45 |                 if head_args is None:
46 |                     head_args = {}
47 | 
48 |                 self.ctc_head = CTCHead(
49 |                     in_channels=self.ctc_encoder.out_channels,
50 |                     out_channels=out_channels_list["CTCLabelDecode"],
51 |                     **head_args,
52 |                 )
53 |             else:
54 |                 raise NotImplementedError(f"{name} is not supported in MultiHead yet")
55 | 
56 |     def forward(self, x, data=None):
57 |         ctc_encoder = self.ctc_encoder(x)
58 |         return self.ctc_head(ctc_encoder)
59 | 


--------------------------------------------------------------------------------
/python/rapidocr/networks/necks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | __all__ = ["build_neck"]
16 | 
17 | 
18 | def build_neck(config):
19 |     from .db_fpn import DBFPN, LKPAN, RSEFPN
20 |     from .rnn import SequenceEncoder
21 | 
22 |     support_dict = ["DBFPN", "SequenceEncoder", "RSEFPN", "LKPAN"]
23 | 
24 |     module_name = config.pop("name")
25 |     assert module_name in support_dict, Exception(
26 |         "neck only support {}".format(support_dict)
27 |     )
28 |     module_class = eval(module_name)(**config)
29 |     return module_class
30 | 


--------------------------------------------------------------------------------
/python/rapidocr/networks/necks/intracl.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | 
  3 | 
  4 | class IntraCLBlock(nn.Module):
  5 |     def __init__(self, in_channels=96, reduce_factor=4):
  6 |         super(IntraCLBlock, self).__init__()
  7 |         self.channels = in_channels
  8 |         self.rf = reduce_factor
  9 |         self.conv1x1_reduce_channel = nn.Conv2d(
 10 |             self.channels, self.channels // self.rf, kernel_size=1, stride=1, padding=0
 11 |         )
 12 |         self.conv1x1_return_channel = nn.Conv2d(
 13 |             self.channels // self.rf, self.channels, kernel_size=1, stride=1, padding=0
 14 |         )
 15 | 
 16 |         self.v_layer_7x1 = nn.Conv2d(
 17 |             self.channels // self.rf,
 18 |             self.channels // self.rf,
 19 |             kernel_size=(7, 1),
 20 |             stride=(1, 1),
 21 |             padding=(3, 0),
 22 |         )
 23 |         self.v_layer_5x1 = nn.Conv2d(
 24 |             self.channels // self.rf,
 25 |             self.channels // self.rf,
 26 |             kernel_size=(5, 1),
 27 |             stride=(1, 1),
 28 |             padding=(2, 0),
 29 |         )
 30 |         self.v_layer_3x1 = nn.Conv2d(
 31 |             self.channels // self.rf,
 32 |             self.channels // self.rf,
 33 |             kernel_size=(3, 1),
 34 |             stride=(1, 1),
 35 |             padding=(1, 0),
 36 |         )
 37 | 
 38 |         self.q_layer_1x7 = nn.Conv2d(
 39 |             self.channels // self.rf,
 40 |             self.channels // self.rf,
 41 |             kernel_size=(1, 7),
 42 |             stride=(1, 1),
 43 |             padding=(0, 3),
 44 |         )
 45 |         self.q_layer_1x5 = nn.Conv2d(
 46 |             self.channels // self.rf,
 47 |             self.channels // self.rf,
 48 |             kernel_size=(1, 5),
 49 |             stride=(1, 1),
 50 |             padding=(0, 2),
 51 |         )
 52 |         self.q_layer_1x3 = nn.Conv2d(
 53 |             self.channels // self.rf,
 54 |             self.channels // self.rf,
 55 |             kernel_size=(1, 3),
 56 |             stride=(1, 1),
 57 |             padding=(0, 1),
 58 |         )
 59 | 
 60 |         # base
 61 |         self.c_layer_7x7 = nn.Conv2d(
 62 |             self.channels // self.rf,
 63 |             self.channels // self.rf,
 64 |             kernel_size=(7, 7),
 65 |             stride=(1, 1),
 66 |             padding=(3, 3),
 67 |         )
 68 |         self.c_layer_5x5 = nn.Conv2d(
 69 |             self.channels // self.rf,
 70 |             self.channels // self.rf,
 71 |             kernel_size=(5, 5),
 72 |             stride=(1, 1),
 73 |             padding=(2, 2),
 74 |         )
 75 |         self.c_layer_3x3 = nn.Conv2d(
 76 |             self.channels // self.rf,
 77 |             self.channels // self.rf,
 78 |             kernel_size=(3, 3),
 79 |             stride=(1, 1),
 80 |             padding=(1, 1),
 81 |         )
 82 | 
 83 |         self.bn = nn.BatchNorm2d(self.channels)
 84 |         self.relu = nn.ReLU()
 85 | 
 86 |     def forward(self, x):
 87 |         x_new = self.conv1x1_reduce_channel(x)
 88 | 
 89 |         x_7_c = self.c_layer_7x7(x_new)
 90 |         x_7_v = self.v_layer_7x1(x_new)
 91 |         x_7_q = self.q_layer_1x7(x_new)
 92 |         x_7 = x_7_c + x_7_v + x_7_q
 93 | 
 94 |         x_5_c = self.c_layer_5x5(x_7)
 95 |         x_5_v = self.v_layer_5x1(x_7)
 96 |         x_5_q = self.q_layer_1x5(x_7)
 97 |         x_5 = x_5_c + x_5_v + x_5_q
 98 | 
 99 |         x_3_c = self.c_layer_3x3(x_5)
100 |         x_3_v = self.v_layer_3x1(x_5)
101 |         x_3_q = self.q_layer_1x3(x_5)
102 |         x_3 = x_3_c + x_3_v + x_3_q
103 | 
104 |         x_relation = self.conv1x1_return_channel(x_3)
105 | 
106 |         x_relation = self.bn(x_relation)
107 |         x_relation = self.relu(x_relation)
108 | 
109 |         return x + x_relation
110 | 
111 | 
112 | def build_intraclblock_list(num_block):
113 |     IntraCLBlock_list = nn.ModuleList()
114 |     for i in range(num_block):
115 |         IntraCLBlock_list.append(IntraCLBlock())
116 | 
117 |     return IntraCLBlock_list
118 | 


--------------------------------------------------------------------------------
/python/rapidocr/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | from .download_file import DownloadFile, DownloadFileException, DownloadFileInput
 5 | from .load_image import LoadImage, LoadImageError
 6 | from .logger import Logger
 7 | from .output import RapidOCROutput
 8 | from .parse_parameters import ParseParams
 9 | from .process_img import add_round_letterbox, increase_min_side, reduce_max_side
10 | from .vis_res import VisRes
11 | 


--------------------------------------------------------------------------------
/python/rapidocr/utils/download_file.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | # @Author: SWHL
  3 | # @Contact: liekkaskono@163.com
  4 | import logging
  5 | import sys
  6 | from dataclasses import dataclass
  7 | from pathlib import Path
  8 | from typing import Optional, Union
  9 | 
 10 | import requests
 11 | from tqdm import tqdm
 12 | 
 13 | from .utils import get_file_sha256
 14 | 
 15 | 
 16 | @dataclass
 17 | class DownloadFileInput:
 18 |     file_url: str
 19 |     save_path: Union[str, Path]
 20 |     logger: logging.Logger
 21 |     sha256: Optional[str] = None
 22 | 
 23 | 
 24 | class DownloadFile:
 25 |     BLOCK_SIZE = 1024  # 1 KiB
 26 |     REQUEST_TIMEOUT = 60
 27 | 
 28 |     @classmethod
 29 |     def run(cls, input_params: DownloadFileInput):
 30 |         save_path = Path(input_params.save_path)
 31 | 
 32 |         logger = input_params.logger
 33 |         cls._ensure_parent_dir_exists(save_path)
 34 |         if cls._should_skip_download(save_path, input_params.sha256, logger):
 35 |             return
 36 | 
 37 |         response = cls._make_http_request(input_params.file_url, logger)
 38 |         cls._save_response_with_progress(response, save_path, logger)
 39 | 
 40 |     @staticmethod
 41 |     def _ensure_parent_dir_exists(path: Path):
 42 |         path.parent.mkdir(parents=True, exist_ok=True)
 43 | 
 44 |     @classmethod
 45 |     def _should_skip_download(
 46 |         cls, path: Path, expected_sha256: Optional[str], logger: logging.Logger
 47 |     ) -> bool:
 48 |         if not path.exists():
 49 |             return False
 50 | 
 51 |         if expected_sha256 is None:
 52 |             logger.info("File exists (no checksum verification): %s", path)
 53 |             return True
 54 | 
 55 |         if cls.check_file_sha256(path, expected_sha256):
 56 |             logger.info("File exists and is valid: %s", path)
 57 |             return True
 58 | 
 59 |         logger.warning("File exists but is invalid, redownloading: %s", path)
 60 |         return False
 61 | 
 62 |     @classmethod
 63 |     def _make_http_request(cls, url: str, logger: logging.Logger) -> requests.Response:
 64 |         logger.info("Initiating download: %s", url)
 65 |         try:
 66 |             response = requests.get(url, stream=True, timeout=cls.REQUEST_TIMEOUT)
 67 |             response.raise_for_status()  # Raises HTTPError for 4XX/5XX
 68 |             return response
 69 |         except requests.RequestException as e:
 70 |             logger.error("Download failed: %s", url)
 71 |             raise DownloadFileException(f"Failed to download {url}") from e
 72 | 
 73 |     @classmethod
 74 |     def _save_response_with_progress(
 75 |         cls, response: requests.Response, save_path: Path, logger: logging.Logger
 76 |     ) -> None:
 77 |         total_size = int(response.headers.get("content-length", 0))
 78 |         logger.info("Download size: %.2fMB", total_size / 1024 / 1024)
 79 | 
 80 |         with (
 81 |             tqdm(
 82 |                 total=total_size,
 83 |                 unit="iB",
 84 |                 unit_scale=True,
 85 |                 disable=not cls.check_is_atty(),
 86 |             ) as progress_bar,
 87 |             open(save_path, "wb") as output_file,
 88 |         ):
 89 |             for chunk in response.iter_content(chunk_size=cls.BLOCK_SIZE):
 90 |                 progress_bar.update(len(chunk))
 91 |                 output_file.write(chunk)
 92 | 
 93 |         logger.info("Successfully saved to: %s", save_path)
 94 | 
 95 |     @staticmethod
 96 |     def check_file_sha256(file_path: Union[str, Path], gt_sha256: str) -> bool:
 97 |         return get_file_sha256(file_path) == gt_sha256
 98 | 
 99 |     @staticmethod
100 |     def check_is_atty() -> bool:
101 |         try:
102 |             is_interactive = sys.stderr.isatty()
103 |         except AttributeError:
104 |             return False
105 |         return is_interactive
106 | 
107 | 
108 | class DownloadFileException(Exception):
109 |     pass
110 | 


--------------------------------------------------------------------------------
/python/rapidocr/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import logging
 5 | 
 6 | import colorlog
 7 | 
 8 | 
 9 | class Logger:
10 |     def __init__(self, log_level=logging.DEBUG, logger_name=None):
11 |         self.logger = logging.getLogger(logger_name)
12 |         self.logger.setLevel(log_level)
13 |         self.logger.propagate = False
14 | 
15 |         formatter = colorlog.ColoredFormatter(
16 |             "%(log_color)s[%(levelname)s] %(asctime)s [RapidOCR] %(filename)s:%(lineno)d: %(message)s",
17 |             log_colors={
18 |                 "DEBUG": "cyan",
19 |                 "INFO": "green",
20 |                 "WARNING": "yellow",
21 |                 "ERROR": "red",
22 |                 "CRITICAL": "red,bg_white",
23 |             },
24 |         )
25 | 
26 |         if not self.logger.handlers:
27 |             console_handler = logging.StreamHandler()
28 |             console_handler.setFormatter(formatter)
29 | 
30 |             for handler in self.logger.handlers:
31 |                 self.logger.removeHandler(handler)
32 | 
33 |             console_handler.setLevel(log_level)
34 |             self.logger.addHandler(console_handler)
35 | 
36 |     def get_log(self):
37 |         return self.logger
38 | 


--------------------------------------------------------------------------------
/python/rapidocr/utils/output.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | from dataclasses import dataclass, field
 5 | from typing import List, Optional, Tuple, Union
 6 | 
 7 | import numpy as np
 8 | 
 9 | from .logger import Logger
10 | from .utils import save_img
11 | from .vis_res import VisRes
12 | 
13 | logger = Logger(logger_name=__name__).get_log()
14 | 
15 | 
16 | @dataclass
17 | class RapidOCROutput:
18 |     img: Optional[np.ndarray] = None
19 |     boxes: Optional[np.ndarray] = None
20 |     txts: Optional[Tuple[str]] = None
21 |     scores: Optional[Tuple[float]] = None
22 |     word_results: Tuple[Tuple[str, float, Optional[List[List[int]]]]] = (
23 |         ("", 1.0, None),
24 |     )
25 |     elapse_list: List[Union[float, None]] = field(default_factory=list)
26 |     elapse: float = field(init=False)
27 |     lang_type: Optional[str] = None
28 | 
29 |     def __post_init__(self):
30 |         self.elapse = sum(v for v in self.elapse_list if isinstance(v, float))
31 | 
32 |     def __len__(self):
33 |         if self.txts is None:
34 |             return 0
35 |         return len(self.txts)
36 | 
37 |     def to_json(self):
38 |         pass
39 | 
40 |     def vis(self, save_path: Optional[str] = None, font_path: Optional[str] = None):
41 |         if self.img is None or self.boxes is None:
42 |             logger.warning("No image or boxes to visualize.")
43 |             return
44 | 
45 |         vis = VisRes()
46 |         if all(v is None for v in self.word_results):
47 |             vis_img = vis(
48 |                 self.img,
49 |                 self.boxes,
50 |                 self.txts,
51 |                 self.scores,
52 |                 font_path=font_path,
53 |                 lang_type=self.lang_type,
54 |             )
55 | 
56 |             if save_path is not None:
57 |                 save_img(save_path, vis_img)
58 |                 logger.info("Visualization saved as %s", save_path)
59 |             return vis_img
60 | 
61 |         # single word vis
62 |         words_results = self.word_results
63 |         words, words_scores, words_boxes = list(zip(*words_results))
64 |         vis_img = vis(
65 |             self.img,
66 |             words_boxes,
67 |             words,
68 |             words_scores,
69 |             font_path=font_path,
70 |             lang_type=self.lang_type,
71 |         )
72 | 
73 |         if save_path is not None:
74 |             save_img(save_path, vis_img)
75 |             logger.info("Single word visualization saved as %s", save_path)
76 |         return vis_img
77 | 


--------------------------------------------------------------------------------
/python/rapidocr/utils/parse_parameters.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | from enum import Enum
 5 | from pathlib import Path
 6 | from typing import Any, Dict, Union
 7 | 
 8 | from omegaconf import DictConfig, OmegaConf
 9 | 
10 | from .typings import (
11 |     EngineType,
12 |     LangCls,
13 |     LangDet,
14 |     LangRec,
15 |     ModelType,
16 |     OCRVersion,
17 |     TaskType,
18 | )
19 | 
20 | 
21 | class ParseParams(OmegaConf):
22 |     def __init__(self):
23 |         pass
24 | 
25 |     @classmethod
26 |     def load(cls, file_path: Union[str, Path]):
27 |         cfg = OmegaConf.load(file_path)
28 | 
29 |         cfg.Det = cls._convert_value_to_enum(cfg.Det)
30 |         cfg.Cls = cls._convert_value_to_enum(cfg.Cls)
31 |         cfg.Rec = cls._convert_value_to_enum(cfg.Rec)
32 |         return cfg
33 | 
34 |     @classmethod
35 |     def update_batch(cls, cfg: DictConfig, params: Dict[str, Any]) -> DictConfig:
36 |         global_keys = list(OmegaConf.to_container(cfg.Global).keys())
37 |         enum_params = [
38 |             "engine_type",
39 |             "model_type",
40 |             "ocr_version",
41 |             "lang_type",
42 |             "task_type",
43 |         ]
44 |         for k, v in params.items():
45 |             if k.startswith("Global") and k.split(".")[1] not in global_keys:
46 |                 raise ValueError(f"{k} is not a valid key.")
47 | 
48 |             if k.split(".")[1] in enum_params and not isinstance(v, Enum):
49 |                 raise TypeError(f"The value of {k} must be Enum Type.")
50 | 
51 |             cls.update(cfg, k, v)
52 |         return cfg
53 | 
54 |     @classmethod
55 |     def _convert_value_to_enum(cls, cfg: DictConfig):
56 |         cfg.engine_type = EngineType(cfg.engine_type)
57 |         cfg.model_type = ModelType(cfg.model_type)
58 |         cfg.ocr_version = OCRVersion(cfg.ocr_version)
59 |         cfg.task_type = TaskType(cfg.task_type)
60 |         cfg.lang_type = cls.LangType(cfg.task_type, cfg.lang_type)
61 |         return cfg
62 | 
63 |     @staticmethod
64 |     def LangType(task_type: TaskType, lang_type: str):
65 |         if task_type == TaskType.DET:
66 |             return LangDet(lang_type)
67 | 
68 |         if task_type == TaskType.CLS:
69 |             return LangCls(lang_type)
70 | 
71 |         if task_type == TaskType.REC:
72 |             return LangRec(lang_type)
73 | 
74 |         raise ValueError(f"task_type {task_type.value} is not in [Det, Cls, Rec]")
75 | 


--------------------------------------------------------------------------------
/python/rapidocr/utils/process_img.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | from typing import Tuple
 5 | 
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | 
10 | def reduce_max_side(
11 |     img: np.ndarray, max_side_len: int = 2000
12 | ) -> Tuple[np.ndarray, float, float]:
13 |     h, w = img.shape[:2]
14 | 
15 |     ratio = 1.0
16 |     if max(h, w) > max_side_len:
17 |         if h > w:
18 |             ratio = float(max_side_len) / h
19 |         else:
20 |             ratio = float(max_side_len) / w
21 | 
22 |     resize_h = int(h * ratio)
23 |     resize_w = int(w * ratio)
24 | 
25 |     resize_h = int(round(resize_h / 32) * 32)
26 |     resize_w = int(round(resize_w / 32) * 32)
27 | 
28 |     try:
29 |         if int(resize_w) <= 0 or int(resize_h) <= 0:
30 |             raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
31 |         img = cv2.resize(img, (resize_w, resize_h))
32 |     except Exception as exc:
33 |         raise ResizeImgError() from exc
34 | 
35 |     ratio_h = h / resize_h
36 |     ratio_w = w / resize_w
37 |     return img, ratio_h, ratio_w
38 | 
39 | 
40 | def increase_min_side(
41 |     img: np.ndarray, min_side_len: int = 30
42 | ) -> Tuple[np.ndarray, float, float]:
43 |     h, w = img.shape[:2]
44 | 
45 |     ratio = 1.0
46 |     if min(h, w) < min_side_len:
47 |         if h < w:
48 |             ratio = float(min_side_len) / h
49 |         else:
50 |             ratio = float(min_side_len) / w
51 | 
52 |     resize_h = int(h * ratio)
53 |     resize_w = int(w * ratio)
54 | 
55 |     resize_h = int(round(resize_h / 32) * 32)
56 |     resize_w = int(round(resize_w / 32) * 32)
57 | 
58 |     try:
59 |         if int(resize_w) <= 0 or int(resize_h) <= 0:
60 |             raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
61 |         img = cv2.resize(img, (resize_w, resize_h))
62 |     except Exception as exc:
63 |         raise ResizeImgError() from exc
64 | 
65 |     ratio_h = h / resize_h
66 |     ratio_w = w / resize_w
67 |     return img, ratio_h, ratio_w
68 | 
69 | 
70 | def add_round_letterbox(
71 |     img: np.ndarray,
72 |     padding_tuple: Tuple[int, int, int, int],
73 | ) -> np.ndarray:
74 |     padded_img = cv2.copyMakeBorder(
75 |         img,
76 |         padding_tuple[0],
77 |         padding_tuple[1],
78 |         padding_tuple[2],
79 |         padding_tuple[3],
80 |         cv2.BORDER_CONSTANT,
81 |         value=(0, 0, 0),
82 |     )
83 |     return padded_img
84 | 
85 | 
86 | class ResizeImgError(Exception):
87 |     pass
88 | 


--------------------------------------------------------------------------------
/python/rapidocr/utils/typings.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | from enum import Enum
 5 | 
 6 | 
 7 | class LangDet(Enum):
 8 |     CH = "ch"
 9 |     EN = "en"
10 |     MULTI = "multi"
11 | 
12 | 
13 | class LangCls(Enum):
14 |     CH = "ch"
15 | 
16 | 
17 | class LangRec(Enum):
18 |     CH = "ch"
19 |     CH_DOC = "ch_doc"
20 |     EN = "en"
21 |     ARABIC = "arabic"
22 |     CHINESE_CHT = "chinese_cht"
23 |     CYRILLIC = "cyrillic"
24 |     DEVANAGARI = "devanagari"
25 |     JAPAN = "japan"
26 |     KOREAN = "korean"
27 |     KA = "ka"
28 |     LATIN = "latin"
29 |     TA = "ta"
30 |     TE = "te"
31 | 
32 | 
33 | class OCRVersion(Enum):
34 |     PPOCRV4 = "PP-OCRv4"
35 |     PPOCRV5 = "PP-OCRv5"
36 | 
37 | 
38 | class EngineType(Enum):
39 |     ONNXRUNTIME = "onnxruntime"
40 |     OPENVINO = "openvino"
41 |     PADDLE = "paddle"
42 |     TORCH = "torch"
43 | 
44 | 
45 | class ModelType(Enum):
46 |     MOBILE = "mobile"
47 |     SERVER = "server"
48 | 
49 | 
50 | class TaskType(Enum):
51 |     DET = "det"
52 |     CLS = "cls"
53 |     REC = "rec"
54 | 


--------------------------------------------------------------------------------
/python/rapidocr/utils/utils.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import hashlib
 5 | import importlib
 6 | from pathlib import Path
 7 | from typing import Tuple, Union
 8 | from urllib.parse import urlparse
 9 | 
10 | import cv2
11 | import numpy as np
12 | 
13 | 
14 | def quads_to_rect_bbox(bbox: np.ndarray) -> Tuple[float, float, float, float]:
15 |     if bbox.ndim != 3:
16 |         raise ValueError("bbox shape must be 3")
17 | 
18 |     if bbox.shape[1] != 4 and bbox.shape[2] != 2:
19 |         raise ValueError("bbox shape must be (N, 4, 2)")
20 | 
21 |     all_x, all_y = (bbox[:, :, 0].flatten(), bbox[:, :, 1].flatten())
22 |     x_min, y_min = np.min(all_x), np.min(all_y)
23 |     x_max, y_max = np.max(all_x), np.max(all_y)
24 |     return float(x_min), float(y_min), float(x_max), float(y_max)
25 | 
26 | 
27 | def has_chinese_char(text: str) -> bool:
28 |     return any("\u4e00" <= ch <= "\u9fff" for ch in text)
29 | 
30 | 
31 | def get_file_sha256(file_path: Union[str, Path], chunk_size: int = 65536) -> str:
32 |     with open(file_path, "rb") as file:
33 |         sha_signature = hashlib.sha256()
34 |         while True:
35 |             chunk = file.read(chunk_size)
36 |             if not chunk:
37 |                 break
38 |             sha_signature.update(chunk)
39 | 
40 |     return sha_signature.hexdigest()
41 | 
42 | 
43 | def save_img(save_path: Union[str, Path], img: np.ndarray):
44 |     if not Path(save_path).parent.exists():
45 |         Path(save_path).parent.mkdir(parents=True, exist_ok=True)
46 | 
47 |     cv2.imwrite(str(save_path), img)
48 | 
49 | 
50 | def is_url(url: str) -> bool:
51 |     try:
52 |         result = urlparse(url)
53 |         return all([result.scheme, result.netloc])
54 |     except Exception as e:
55 |         return False
56 | 
57 | 
58 | def import_package(name, package=None):
59 |     try:
60 |         module = importlib.import_module(name, package=package)
61 |         return module
62 |     except ModuleNotFoundError:
63 |         return None
64 | 


--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import RapidOCR
5 | from .utils import LoadImageError, VisRes
6 | 


--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/cal_rec_boxes/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import CalRecBoxes
5 | 


--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/ch_ppocr_cls/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_cls import TextClassifier
5 | 


--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/ch_ppocr_cls/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from typing import List, Tuple
15 | 
16 | import numpy as np
17 | 
18 | 
19 | class ClsPostProcess:
20 |     def __init__(self, label_list: List[str]):
21 |         self.label_list = label_list
22 | 
23 |     def __call__(self, preds: np.ndarray) -> List[Tuple[str, float]]:
24 |         pred_idxs = preds.argmax(axis=1)
25 |         decode_out = [
26 |             (self.label_list[idx], preds[i, idx]) for i, idx in enumerate(pred_idxs)
27 |         ]
28 |         return decode_out
29 | 


--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/ch_ppocr_det/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_detect import TextDetector
5 | 


--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/ch_ppocr_rec/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_recognize import TextRecognizer
5 | 


--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/config.yaml:
--------------------------------------------------------------------------------
 1 | Global:
 2 |     text_score: 0.5
 3 |     use_det: true
 4 |     use_cls: true
 5 |     use_rec: true
 6 |     print_verbose: false
 7 |     min_height: 30
 8 |     width_height_ratio: 8
 9 |     max_side_len: 2000
10 |     min_side_len: 30
11 |     return_word_box: false
12 | 
13 |     intra_op_num_threads: &intra_nums -1
14 |     inter_op_num_threads: &inter_nums -1
15 | 
16 | Det:
17 |     intra_op_num_threads: *intra_nums
18 |     inter_op_num_threads: *inter_nums
19 | 
20 |     use_cuda: false
21 |     use_dml: false
22 | 
23 |     model_path: models/ch_PP-OCRv4_det_infer.onnx
24 | 
25 |     limit_side_len: 736
26 |     limit_type: min
27 |     std: [ 0.5, 0.5, 0.5 ]
28 |     mean: [ 0.5, 0.5, 0.5 ]
29 | 
30 |     thresh: 0.3
31 |     box_thresh: 0.5
32 |     max_candidates: 1000
33 |     unclip_ratio: 1.6
34 |     use_dilation: true
35 |     score_mode: fast
36 | 
37 | Cls:
38 |     intra_op_num_threads: *intra_nums
39 |     inter_op_num_threads: *inter_nums
40 | 
41 |     use_cuda: false
42 |     use_dml: false
43 | 
44 |     model_path: models/ch_ppocr_mobile_v2.0_cls_infer.onnx
45 | 
46 |     cls_image_shape: [3, 48, 192]
47 |     cls_batch_num: 6
48 |     cls_thresh: 0.9
49 |     label_list: ['0', '180']
50 | 
51 | Rec:
52 |     intra_op_num_threads: *intra_nums
53 |     inter_op_num_threads: *inter_nums
54 | 
55 |     use_cuda: false
56 |     use_dml: false
57 | 
58 |     model_path: models/ch_PP-OCRv4_rec_infer.onnx
59 | 
60 |     rec_img_shape: [3, 48, 320]
61 |     rec_batch_num: 6
62 | 


--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr_onnxruntime/models/.gitkeep


--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | from pathlib import Path
 5 | from typing import Dict, Union
 6 | 
 7 | import yaml
 8 | 
 9 | from .infer_engine import OrtInferSession
10 | from .load_image import LoadImage, LoadImageError
11 | from .logger import get_logger
12 | from .parse_parameters import UpdateParameters, init_args, update_model_path
13 | from .process_img import add_round_letterbox, increase_min_side, reduce_max_side
14 | from .vis_res import VisRes
15 | 
16 | 
17 | def read_yaml(yaml_path: Union[str, Path]) -> Dict[str, Dict]:
18 |     with open(yaml_path, "rb") as f:
19 |         data = yaml.load(f, Loader=yaml.Loader)
20 |     return data
21 | 


--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import logging
 5 | from functools import lru_cache
 6 | 
 7 | 
 8 | @lru_cache(maxsize=32)
 9 | def get_logger(name: str) -> logging.Logger:
10 |     logger = logging.getLogger(name)
11 |     logger.setLevel(logging.DEBUG)
12 | 
13 |     fmt = "%(asctime)s - %(name)s - %(levelname)s: %(message)s"
14 |     format_str = logging.Formatter(fmt)
15 | 
16 |     sh = logging.StreamHandler()
17 |     sh.setLevel(logging.DEBUG)
18 | 
19 |     logger.addHandler(sh)
20 |     sh.setFormatter(format_str)
21 |     return logger
22 | 


--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/utils/process_img.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | from typing import Tuple
 5 | 
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | 
10 | def reduce_max_side(
11 |     img: np.ndarray, max_side_len: int = 2000
12 | ) -> Tuple[np.ndarray, float, float]:
13 |     h, w = img.shape[:2]
14 | 
15 |     ratio = 1.0
16 |     if max(h, w) > max_side_len:
17 |         if h > w:
18 |             ratio = float(max_side_len) / h
19 |         else:
20 |             ratio = float(max_side_len) / w
21 | 
22 |     resize_h = int(h * ratio)
23 |     resize_w = int(w * ratio)
24 | 
25 |     resize_h = int(round(resize_h / 32) * 32)
26 |     resize_w = int(round(resize_w / 32) * 32)
27 | 
28 |     try:
29 |         if int(resize_w) <= 0 or int(resize_h) <= 0:
30 |             raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
31 |         img = cv2.resize(img, (resize_w, resize_h))
32 |     except Exception as exc:
33 |         raise ResizeImgError() from exc
34 | 
35 |     ratio_h = h / resize_h
36 |     ratio_w = w / resize_w
37 |     return img, ratio_h, ratio_w
38 | 
39 | 
40 | def increase_min_side(
41 |     img: np.ndarray, min_side_len: int = 30
42 | ) -> Tuple[np.ndarray, float, float]:
43 |     h, w = img.shape[:2]
44 | 
45 |     ratio = 1.0
46 |     if min(h, w) < min_side_len:
47 |         if h < w:
48 |             ratio = float(min_side_len) / h
49 |         else:
50 |             ratio = float(min_side_len) / w
51 | 
52 |     resize_h = int(h * ratio)
53 |     resize_w = int(w * ratio)
54 | 
55 |     resize_h = int(round(resize_h / 32) * 32)
56 |     resize_w = int(round(resize_w / 32) * 32)
57 | 
58 |     try:
59 |         if int(resize_w) <= 0 or int(resize_h) <= 0:
60 |             raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
61 |         img = cv2.resize(img, (resize_w, resize_h))
62 |     except Exception as exc:
63 |         raise ResizeImgError() from exc
64 | 
65 |     ratio_h = h / resize_h
66 |     ratio_w = w / resize_w
67 |     return img, ratio_h, ratio_w
68 | 
69 | 
70 | def add_round_letterbox(
71 |     img: np.ndarray,
72 |     padding_tuple: Tuple[int, int, int, int],
73 | ) -> np.ndarray:
74 |     padded_img = cv2.copyMakeBorder(
75 |         img,
76 |         padding_tuple[0],
77 |         padding_tuple[1],
78 |         padding_tuple[2],
79 |         padding_tuple[3],
80 |         cv2.BORDER_CONSTANT,
81 |         value=(0, 0, 0),
82 |     )
83 |     return padded_img
84 | 
85 | 
86 | class ResizeImgError(Exception):
87 |     pass
88 | 


--------------------------------------------------------------------------------
/python/rapidocr_openvino/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import RapidOCR
5 | from .utils import LoadImageError, VisRes
6 | 


--------------------------------------------------------------------------------
/python/rapidocr_openvino/cal_rec_boxes/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import CalRecBoxes
5 | 


--------------------------------------------------------------------------------
/python/rapidocr_openvino/ch_ppocr_cls/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_cls import TextClassifier
5 | 


--------------------------------------------------------------------------------
/python/rapidocr_openvino/ch_ppocr_cls/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from typing import List, Tuple
15 | 
16 | import numpy as np
17 | 
18 | 
19 | class ClsPostProcess:
20 |     def __init__(self, label_list: List[str]):
21 |         self.label_list = label_list
22 | 
23 |     def __call__(self, preds: np.ndarray) -> List[Tuple[str, float]]:
24 |         pred_idxs = preds.argmax(axis=1)
25 |         decode_out = [
26 |             (self.label_list[idx], preds[i, idx]) for i, idx in enumerate(pred_idxs)
27 |         ]
28 |         return decode_out
29 | 


--------------------------------------------------------------------------------
/python/rapidocr_openvino/ch_ppocr_det/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_detect import TextDetector
5 | 


--------------------------------------------------------------------------------
/python/rapidocr_openvino/ch_ppocr_rec/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_recognize import TextRecognizer
5 | 


--------------------------------------------------------------------------------
/python/rapidocr_openvino/config.yaml:
--------------------------------------------------------------------------------
 1 | Global:
 2 |     text_score: 0.5
 3 |     use_det: true
 4 |     use_cls: true
 5 |     use_rec: true
 6 |     print_verbose: false
 7 |     min_height: 30
 8 |     width_height_ratio: 8
 9 |     max_side_len: 2000
10 |     min_side_len: 30
11 |     return_word_box: false
12 | 
13 |     inference_num_threads: &infer_num_threads -1
14 | 
15 | Det:
16 |     inference_num_threads: *infer_num_threads
17 | 
18 |     use_cuda: false
19 | 
20 |     model_path: models/ch_PP-OCRv4_det_infer.onnx
21 | 
22 |     limit_side_len: 736
23 |     limit_type: min
24 |     std: [ 0.5, 0.5, 0.5 ]
25 |     mean: [ 0.5, 0.5, 0.5 ]
26 | 
27 |     thresh: 0.3
28 |     box_thresh: 0.5
29 |     max_candidates: 1000
30 |     unclip_ratio: 1.6
31 |     use_dilation: true
32 |     score_mode: fast
33 | 
34 | Cls:
35 |     inference_num_threads: *infer_num_threads
36 | 
37 |     use_cuda: false
38 | 
39 |     model_path: models/ch_ppocr_mobile_v2.0_cls_infer.onnx
40 | 
41 |     cls_image_shape: [3, 48, 192]
42 |     cls_batch_num: 6
43 |     cls_thresh: 0.9
44 |     label_list: ['0', '180']
45 | 
46 | Rec:
47 |     inference_num_threads: *infer_num_threads
48 | 
49 |     use_cuda: false
50 | 
51 |     model_path: models/ch_PP-OCRv4_rec_infer.onnx
52 | 
53 |     rec_img_shape: [3, 48, 320]
54 |     rec_batch_num: 6
55 | 


--------------------------------------------------------------------------------
/python/rapidocr_openvino/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr_openvino/models/.gitkeep


--------------------------------------------------------------------------------
/python/rapidocr_openvino/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | from pathlib import Path
 5 | from typing import Dict, Union
 6 | 
 7 | import yaml
 8 | 
 9 | from .infer_engine import OpenVINOInferSession
10 | from .load_image import LoadImage, LoadImageError
11 | from .logger import get_logger
12 | from .parse_parameters import UpdateParameters, init_args, update_model_path
13 | from .process_img import add_round_letterbox, increase_min_side, reduce_max_side
14 | from .vis_res import VisRes
15 | 
16 | 
17 | def read_yaml(yaml_path: Union[str, Path]) -> Dict[str, Dict]:
18 |     with open(yaml_path, "rb") as f:
19 |         data = yaml.load(f, Loader=yaml.Loader)
20 |     return data
21 | 


--------------------------------------------------------------------------------
/python/rapidocr_openvino/utils/infer_engine.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import os
 5 | import traceback
 6 | from pathlib import Path
 7 | 
 8 | import numpy as np
 9 | from openvino.runtime import Core
10 | 
11 | 
12 | class OpenVINOInferSession:
13 |     def __init__(self, config):
14 |         core = Core()
15 | 
16 |         self._verify_model(config["model_path"])
17 |         model_onnx = core.read_model(config["model_path"])
18 | 
19 |         cpu_nums = os.cpu_count()
20 |         infer_num_threads = config.get("inference_num_threads", -1)
21 |         if infer_num_threads != -1 and 1 <= infer_num_threads <= cpu_nums:
22 |             core.set_property("CPU", {"INFERENCE_NUM_THREADS": str(infer_num_threads)})
23 | 
24 |         compile_model = core.compile_model(model=model_onnx, device_name="CPU")
25 |         self.session = compile_model.create_infer_request()
26 | 
27 |     def __call__(self, input_content: np.ndarray) -> np.ndarray:
28 |         try:
29 |             self.session.infer(inputs=[input_content])
30 |             return self.session.get_output_tensor().data
31 |         except Exception as e:
32 |             error_info = traceback.format_exc()
33 |             raise OpenVIONError(error_info) from e
34 | 
35 |     @staticmethod
36 |     def _verify_model(model_path):
37 |         model_path = Path(model_path)
38 |         if not model_path.exists():
39 |             raise FileNotFoundError(f"{model_path} does not exists.")
40 |         if not model_path.is_file():
41 |             raise FileExistsError(f"{model_path} is not a file.")
42 | 
43 | 
44 | class OpenVIONError(Exception):
45 |     pass
46 | 


--------------------------------------------------------------------------------
/python/rapidocr_openvino/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import logging
 5 | from functools import lru_cache
 6 | 
 7 | 
 8 | @lru_cache(maxsize=32)
 9 | def get_logger(name: str):
10 |     logger = logging.getLogger(name)
11 |     logger.setLevel(logging.DEBUG)
12 | 
13 |     fmt = "%(asctime)s - %(name)s - %(levelname)s: %(message)s"
14 |     format_str = logging.Formatter(fmt)
15 | 
16 |     sh = logging.StreamHandler()
17 |     sh.setLevel(logging.DEBUG)
18 | 
19 |     logger.addHandler(sh)
20 |     sh.setFormatter(format_str)
21 |     return logger
22 | 


--------------------------------------------------------------------------------
/python/rapidocr_openvino/utils/process_img.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | from typing import Tuple
 5 | 
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | 
10 | def reduce_max_side(
11 |     img: np.ndarray, max_side_len: int = 2000
12 | ) -> Tuple[np.ndarray, float, float]:
13 |     h, w = img.shape[:2]
14 | 
15 |     ratio = 1.0
16 |     if max(h, w) > max_side_len:
17 |         if h > w:
18 |             ratio = float(max_side_len) / h
19 |         else:
20 |             ratio = float(max_side_len) / w
21 | 
22 |     resize_h = int(h * ratio)
23 |     resize_w = int(w * ratio)
24 | 
25 |     resize_h = int(round(resize_h / 32) * 32)
26 |     resize_w = int(round(resize_w / 32) * 32)
27 | 
28 |     try:
29 |         if int(resize_w) <= 0 or int(resize_h) <= 0:
30 |             raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
31 |         img = cv2.resize(img, (resize_w, resize_h))
32 |     except Exception as exc:
33 |         raise ResizeImgError() from exc
34 | 
35 |     ratio_h = h / resize_h
36 |     ratio_w = w / resize_w
37 |     return img, ratio_h, ratio_w
38 | 
39 | 
40 | def increase_min_side(
41 |     img: np.ndarray, min_side_len: int = 30
42 | ) -> Tuple[np.ndarray, float, float]:
43 |     h, w = img.shape[:2]
44 | 
45 |     ratio = 1.0
46 |     if min(h, w) < min_side_len:
47 |         if h < w:
48 |             ratio = float(min_side_len) / h
49 |         else:
50 |             ratio = float(min_side_len) / w
51 | 
52 |     resize_h = int(h * ratio)
53 |     resize_w = int(w * ratio)
54 | 
55 |     resize_h = int(round(resize_h / 32) * 32)
56 |     resize_w = int(round(resize_w / 32) * 32)
57 | 
58 |     try:
59 |         if int(resize_w) <= 0 or int(resize_h) <= 0:
60 |             raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
61 |         img = cv2.resize(img, (resize_w, resize_h))
62 |     except Exception as exc:
63 |         raise ResizeImgError() from exc
64 | 
65 |     ratio_h = h / resize_h
66 |     ratio_w = w / resize_w
67 |     return img, ratio_h, ratio_w
68 | 
69 | 
70 | def add_round_letterbox(
71 |     img: np.ndarray,
72 |     padding_tuple: Tuple[int, int, int, int],
73 | ) -> np.ndarray:
74 |     padded_img = cv2.copyMakeBorder(
75 |         img,
76 |         padding_tuple[0],
77 |         padding_tuple[1],
78 |         padding_tuple[2],
79 |         padding_tuple[3],
80 |         cv2.BORDER_CONSTANT,
81 |         value=(0, 0, 0),
82 |     )
83 |     return padded_img
84 | 
85 | 
86 | class ResizeImgError(Exception):
87 |     pass
88 | 


--------------------------------------------------------------------------------
/python/rapidocr_paddle/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import RapidOCR
5 | from .utils import LoadImageError, VisRes
6 | 


--------------------------------------------------------------------------------
/python/rapidocr_paddle/cal_rec_boxes/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import CalRecBoxes
5 | 


--------------------------------------------------------------------------------
/python/rapidocr_paddle/ch_ppocr_cls/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_cls import TextClassifier
5 | 


--------------------------------------------------------------------------------
/python/rapidocr_paddle/ch_ppocr_cls/text_cls.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import copy
15 | import math
16 | import time
17 | from typing import List
18 | 
19 | import cv2
20 | import numpy as np
21 | 
22 | from rapidocr_paddle.utils import PaddleInferSession
23 | 
24 | from .utils import ClsPostProcess
25 | 
26 | 
27 | class TextClassifier:
28 |     def __init__(self, config):
29 |         self.cls_image_shape = config["cls_image_shape"]
30 |         self.cls_batch_num = config["cls_batch_num"]
31 |         self.cls_thresh = config["cls_thresh"]
32 |         self.postprocess_op = ClsPostProcess(config["label_list"])
33 | 
34 |         self.infer = PaddleInferSession(config)
35 | 
36 |     def __call__(self, img_list: List[np.ndarray]):
37 |         if isinstance(img_list, np.ndarray):
38 |             img_list = [img_list]
39 | 
40 |         img_list = copy.deepcopy(img_list)
41 | 
42 |         # Calculate the aspect ratio of all text bars
43 |         width_list = [img.shape[1] / float(img.shape[0]) for img in img_list]
44 | 
45 |         # Sorting can speed up the cls process
46 |         indices = np.argsort(np.array(width_list))
47 | 
48 |         img_num = len(img_list)
49 |         cls_res = [["", 0.0]] * img_num
50 |         batch_num = self.cls_batch_num
51 |         elapse = 0
52 |         for beg_img_no in range(0, img_num, batch_num):
53 |             end_img_no = min(img_num, beg_img_no + batch_num)
54 | 
55 |             norm_img_batch = []
56 |             for ino in range(beg_img_no, end_img_no):
57 |                 norm_img = self.resize_norm_img(img_list[indices[ino]])
58 |                 norm_img = norm_img[np.newaxis, :]
59 |                 norm_img_batch.append(norm_img)
60 |             norm_img_batch = np.concatenate(norm_img_batch).astype(np.float32)
61 | 
62 |             starttime = time.time()
63 |             prob_out = self.infer(norm_img_batch)[0]
64 |             cls_result = self.postprocess_op(prob_out)
65 |             elapse += time.time() - starttime
66 | 
67 |             for rno in range(len(cls_result)):
68 |                 label, score = cls_result[rno]
69 |                 cls_res[indices[beg_img_no + rno]] = [label, score]
70 |                 if "180" in label and score > self.cls_thresh:
71 |                     img_list[indices[beg_img_no + rno]] = cv2.rotate(
72 |                         img_list[indices[beg_img_no + rno]], 1
73 |                     )
74 |         return img_list, cls_res, elapse
75 | 
76 |     def resize_norm_img(self, img):
77 |         img_c, img_h, img_w = self.cls_image_shape
78 |         h, w = img.shape[:2]
79 |         ratio = w / float(h)
80 |         if math.ceil(img_h * ratio) > img_w:
81 |             resized_w = img_w
82 |         else:
83 |             resized_w = int(math.ceil(img_h * ratio))
84 | 
85 |         resized_image = cv2.resize(img, (resized_w, img_h))
86 |         resized_image = resized_image.astype("float32")
87 |         if img_c == 1:
88 |             resized_image = resized_image / 255
89 |             resized_image = resized_image[np.newaxis, :]
90 |         else:
91 |             resized_image = resized_image.transpose((2, 0, 1)) / 255
92 | 
93 |         resized_image -= 0.5
94 |         resized_image /= 0.5
95 |         padding_im = np.zeros((img_c, img_h, img_w), dtype=np.float32)
96 |         padding_im[:, :, :resized_w] = resized_image
97 |         return padding_im
98 | 


--------------------------------------------------------------------------------
/python/rapidocr_paddle/ch_ppocr_cls/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from typing import List, Tuple
15 | 
16 | import numpy as np
17 | 
18 | 
19 | class ClsPostProcess:
20 |     def __init__(self, label_list: List[str]):
21 |         self.label_list = label_list
22 | 
23 |     def __call__(self, preds: np.ndarray) -> List[Tuple[str, float]]:
24 |         pred_idxs = preds.argmax(axis=1)
25 |         decode_out = [
26 |             (self.label_list[idx], preds[i, idx]) for i, idx in enumerate(pred_idxs)
27 |         ]
28 |         return decode_out
29 | 


--------------------------------------------------------------------------------
/python/rapidocr_paddle/ch_ppocr_det/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_detect import TextDetector
5 | 


--------------------------------------------------------------------------------
/python/rapidocr_paddle/ch_ppocr_rec/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_recognize import TextRecognizer
5 | 


--------------------------------------------------------------------------------
/python/rapidocr_paddle/config.yaml:
--------------------------------------------------------------------------------
 1 | Global:
 2 |     text_score: 0.5
 3 |     use_det: true
 4 |     use_cls: true
 5 |     use_rec: true
 6 |     print_verbose: false
 7 |     min_height: 30
 8 |     width_height_ratio: 8
 9 |     max_side_len: 2000
10 |     min_side_len: 30
11 |     return_word_box: false
12 | 
13 |     cpu_math_library_num_threads: &infer_num_threads -1
14 | 
15 | Det:
16 |     use_cuda: false
17 |     gpu_id: 0
18 |     gpu_mem: 500
19 | 
20 |     cpu_math_library_num_threads: *infer_num_threads
21 | 
22 |     model_path: models/ch_PP-OCRv4_det_infer
23 | 
24 |     limit_side_len: 736
25 |     limit_type: min
26 |     std: [ 0.5, 0.5, 0.5 ]
27 |     mean: [ 0.5, 0.5, 0.5 ]
28 | 
29 |     thresh: 0.3
30 |     box_thresh: 0.5
31 |     max_candidates: 1000
32 |     unclip_ratio: 1.6
33 |     use_dilation: true
34 |     score_mode: fast
35 | 
36 | Cls:
37 |     use_cuda: false
38 |     gpu_id: 0
39 |     gpu_mem: 500
40 | 
41 |     cpu_math_library_num_threads: *infer_num_threads
42 | 
43 |     model_path: models/ch_ppocr_mobile_v2_cls_infer
44 | 
45 |     cls_image_shape: [3, 48, 192]
46 |     cls_batch_num: 6
47 |     cls_thresh: 0.9
48 |     label_list: ['0', '180']
49 | 
50 | Rec:
51 |     use_cuda: false
52 |     gpu_id: 0
53 |     gpu_mem: 500
54 | 
55 |     cpu_math_library_num_threads: *infer_num_threads
56 | 
57 |     model_path: models/ch_PP-OCRv4_rec_infer
58 | 
59 |     rec_img_shape: [3, 48, 320]
60 |     rec_batch_num: 6
61 | 


--------------------------------------------------------------------------------
/python/rapidocr_paddle/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr_paddle/models/.gitkeep


--------------------------------------------------------------------------------
/python/rapidocr_paddle/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | from pathlib import Path
 5 | from typing import Dict, Union
 6 | 
 7 | import yaml
 8 | 
 9 | from .infer_engine import PaddleInferSession
10 | from .load_image import LoadImage, LoadImageError
11 | from .logger import get_logger
12 | from .parse_parameters import UpdateParameters, init_args, update_model_path
13 | from .process_img import add_round_letterbox, increase_min_side, reduce_max_side
14 | from .vis_res import VisRes
15 | 
16 | 
17 | def read_yaml(yaml_path: Union[str, Path]) -> Dict[str, Dict]:
18 |     with open(yaml_path, "rb") as f:
19 |         data = yaml.load(f, Loader=yaml.Loader)
20 |     return data
21 | 


--------------------------------------------------------------------------------
/python/rapidocr_paddle/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import logging
 5 | from functools import lru_cache
 6 | 
 7 | 
 8 | @lru_cache(maxsize=32)
 9 | def get_logger(name: str):
10 |     logger = logging.getLogger(name)
11 |     logger.setLevel(logging.DEBUG)
12 | 
13 |     fmt = "%(asctime)s - %(name)s - %(levelname)s: %(message)s"
14 |     format_str = logging.Formatter(fmt)
15 | 
16 |     sh = logging.StreamHandler()
17 |     sh.setLevel(logging.DEBUG)
18 | 
19 |     logger.addHandler(sh)
20 |     sh.setFormatter(format_str)
21 |     return logger
22 | 


--------------------------------------------------------------------------------
/python/rapidocr_paddle/utils/process_img.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | from typing import Tuple
 5 | 
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | 
10 | def reduce_max_side(
11 |     img: np.ndarray, max_side_len: int = 2000
12 | ) -> Tuple[np.ndarray, float, float]:
13 |     h, w = img.shape[:2]
14 | 
15 |     ratio = 1.0
16 |     if max(h, w) > max_side_len:
17 |         if h > w:
18 |             ratio = float(max_side_len) / h
19 |         else:
20 |             ratio = float(max_side_len) / w
21 | 
22 |     resize_h = int(h * ratio)
23 |     resize_w = int(w * ratio)
24 | 
25 |     resize_h = int(round(resize_h / 32) * 32)
26 |     resize_w = int(round(resize_w / 32) * 32)
27 | 
28 |     try:
29 |         if int(resize_w) <= 0 or int(resize_h) <= 0:
30 |             raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
31 |         img = cv2.resize(img, (resize_w, resize_h))
32 |     except Exception as exc:
33 |         raise ResizeImgError() from exc
34 | 
35 |     ratio_h = h / resize_h
36 |     ratio_w = w / resize_w
37 |     return img, ratio_h, ratio_w
38 | 
39 | 
40 | def increase_min_side(
41 |     img: np.ndarray, min_side_len: int = 30
42 | ) -> Tuple[np.ndarray, float, float]:
43 |     h, w = img.shape[:2]
44 | 
45 |     ratio = 1.0
46 |     if min(h, w) < min_side_len:
47 |         if h < w:
48 |             ratio = float(min_side_len) / h
49 |         else:
50 |             ratio = float(min_side_len) / w
51 | 
52 |     resize_h = int(h * ratio)
53 |     resize_w = int(w * ratio)
54 | 
55 |     resize_h = int(round(resize_h / 32) * 32)
56 |     resize_w = int(round(resize_w / 32) * 32)
57 | 
58 |     try:
59 |         if int(resize_w) <= 0 or int(resize_h) <= 0:
60 |             raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
61 |         img = cv2.resize(img, (resize_w, resize_h))
62 |     except Exception as exc:
63 |         raise ResizeImgError() from exc
64 | 
65 |     ratio_h = h / resize_h
66 |     ratio_w = w / resize_w
67 |     return img, ratio_h, ratio_w
68 | 
69 | 
70 | def add_round_letterbox(
71 |     img: np.ndarray,
72 |     padding_tuple: Tuple[int, int, int, int],
73 | ) -> np.ndarray:
74 |     padded_img = cv2.copyMakeBorder(
75 |         img,
76 |         padding_tuple[0],
77 |         padding_tuple[1],
78 |         padding_tuple[2],
79 |         padding_tuple[3],
80 |         cv2.BORDER_CONSTANT,
81 |         value=(0, 0, 0),
82 |     )
83 |     return padded_img
84 | 
85 | 
86 | class ResizeImgError(Exception):
87 |     pass
88 | 


--------------------------------------------------------------------------------
/python/rapidocr_torch/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import RapidOCR
5 | from .utils import LoadImageError, VisRes
6 | 


--------------------------------------------------------------------------------
/python/rapidocr_torch/arch_config.yaml:
--------------------------------------------------------------------------------
 1 | ch_ptocr_mobile_v2.0_cls_infer:
 2 |   model_type: cls
 3 |   algorithm: CLS
 4 |   Transform:
 5 |   Backbone:
 6 |     name: MobileNetV3
 7 |     scale: 0.35
 8 |     model_name: small
 9 |   Neck:
10 |   Head:
11 |     name: ClsHead
12 |     class_dim: 2
13 | 
14 | ch_PP-OCRv4_det_infer:
15 |   model_type: det
16 |   algorithm: DB
17 |   Transform: null
18 |   Backbone:
19 |     name: PPLCNetV3
20 |     scale: 0.75
21 |     det: True
22 |   Neck:
23 |     name: RSEFPN
24 |     out_channels: 96
25 |     shortcut: True
26 |   Head:
27 |     name: DBHead
28 |     k: 50
29 | 
30 | 
31 | ch_PP-OCRv4_det_server_infer:
32 |   model_type: det
33 |   algorithm: DB
34 |   Transform: null
35 |   Backbone:
36 |     name: PPHGNet_small
37 |     det: True
38 |   Neck:
39 |     name: LKPAN
40 |     out_channels: 256
41 |     intracl: true
42 |   Head:
43 |     name: PFHeadLocal
44 |     k: 50
45 |     mode: "large"
46 | 
47 | 
48 | ch_PP-OCRv4_rec_infer:
49 |   model_type: rec
50 |   algorithm: SVTR_LCNet
51 |   Transform:
52 |   Backbone:
53 |     name: PPLCNetV3
54 |     scale: 0.95
55 |   Head:
56 |     name: MultiHead
57 |     out_channels_list:
58 |       CTCLabelDecode: 6625 #'blank' + ...(6623) + ' '
59 |     head_list:
60 |       - CTCHead:
61 |           Neck:
62 |             name: svtr
63 |             dims: 120
64 |             depth: 2
65 |             hidden_dims: 120
66 |             kernel_size: [ 1, 3 ]
67 |             use_guide: True
68 |           Head:
69 |             fc_decay: 0.00001
70 |       - NRTRHead:
71 |           nrtr_dim: 384
72 |           max_text_length: 25
73 | 
74 | 
75 | ch_PP-OCRv4_rec_server_infer:
76 |   model_type: rec
77 |   algorithm: SVTR_HGNet
78 |   Transform:
79 |   Backbone:
80 |     name: PPHGNet_small
81 |   Head:
82 |     name: MultiHead
83 |     out_channels_list:
84 |       CTCLabelDecode: 6625 #'blank' + ...(6623) + ' '
85 |     head_list:
86 |       - CTCHead:
87 |           Neck:
88 |             name: svtr
89 |             dims: 120
90 |             depth: 2
91 |             hidden_dims: 120
92 |             kernel_size: [ 1, 3 ]
93 |             use_guide: True
94 |           Head:
95 |             fc_decay: 0.00001
96 |       - NRTRHead:
97 |           nrtr_dim: 384
98 |           max_text_length: 25


--------------------------------------------------------------------------------
/python/rapidocr_torch/cal_rec_boxes/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import CalRecBoxes
5 | 


--------------------------------------------------------------------------------
/python/rapidocr_torch/ch_ppocr_cls/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_cls import TextClassifier
5 | 


--------------------------------------------------------------------------------
/python/rapidocr_torch/ch_ppocr_cls/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from typing import List, Tuple
15 | 
16 | import numpy as np
17 | 
18 | 
19 | class ClsPostProcess:
20 |     def __init__(self, label_list: List[str]):
21 |         self.label_list = label_list
22 | 
23 |     def __call__(self, preds: np.ndarray) -> List[Tuple[str, float]]:
24 |         pred_idxs = preds.argmax(axis=1)
25 |         decode_out = [
26 |             (self.label_list[idx], preds[i, idx]) for i, idx in enumerate(pred_idxs)
27 |         ]
28 |         return decode_out
29 | 


--------------------------------------------------------------------------------
/python/rapidocr_torch/ch_ppocr_det/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_detect import TextDetector
5 | 


--------------------------------------------------------------------------------
/python/rapidocr_torch/ch_ppocr_rec/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_recognize import TextRecognizer
5 | 


--------------------------------------------------------------------------------
/python/rapidocr_torch/config.yaml:
--------------------------------------------------------------------------------
 1 | Global:
 2 |     text_score: 0.5
 3 |     use_det: true
 4 |     use_cls: true
 5 |     use_rec: true
 6 |     print_verbose: false
 7 |     min_height: 30
 8 |     width_height_ratio: 8
 9 |     max_side_len: 2000
10 |     min_side_len: 30
11 |     return_word_box: false
12 | 
13 |     intra_op_num_threads: &intra_nums -1
14 |     inter_op_num_threads: &inter_nums -1
15 | 
16 | Det:
17 |     intra_op_num_threads: *intra_nums
18 |     inter_op_num_threads: *inter_nums
19 | 
20 |     use_cuda: false
21 |     use_dml: false
22 | 
23 |     model_path: models/ch_PP-OCRv4_det_infer.pth
24 | 
25 |     limit_side_len: 736
26 |     limit_type: min
27 | 
28 |     thresh: 0.3
29 |     box_thresh: 0.5
30 |     max_candidates: 1000
31 |     unclip_ratio: 1.5
32 |     use_dilation: true
33 |     score_mode: fast
34 | 
35 | Cls:
36 |     intra_op_num_threads: *intra_nums
37 |     inter_op_num_threads: *inter_nums
38 | 
39 |     use_cuda: false
40 |     use_dml: false
41 | 
42 |     model_path: models/ch_ptocr_mobile_v2.0_cls_infer.pth
43 | 
44 |     cls_image_shape: [3, 48, 192]
45 |     cls_batch_num: 6
46 |     cls_thresh: 0.9
47 |     label_list: ['0', '180']
48 | 
49 | Rec:
50 |     intra_op_num_threads: *intra_nums
51 |     inter_op_num_threads: *inter_nums
52 | 
53 |     use_cuda: false
54 |     use_dml: false
55 | 
56 |     model_path: models/ch_PP-OCRv4_rec_infer.pth
57 | 
58 |     rec_img_shape: [3, 48, 320]
59 |     rec_batch_num: 6
60 | 


--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr_torch/modeling/__init__.py


--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/architectures/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import copy
16 | 
17 | __all__ = ['build_model']
18 | 
19 | 
20 | def build_model(config, **kwargs):
21 |     from .base_model import BaseModel
22 | 
23 |     config = copy.deepcopy(config)
24 |     module_class = BaseModel(config, **kwargs)
25 |     return module_class


--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/architectures/base_model.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | from rapidocr_torch.modeling.backbones import build_backbone
  3 | from rapidocr_torch.modeling.heads import build_head
  4 | from rapidocr_torch.modeling.necks import build_neck
  5 | 
  6 | 
  7 | class BaseModel(nn.Module):
  8 |     def __init__(self, config, **kwargs):
  9 |         """
 10 |         the module for OCR.
 11 |         args:
 12 |             config (dict): the super parameters for module.
 13 |         """
 14 |         super(BaseModel, self).__init__()
 15 | 
 16 |         in_channels = config.get('in_channels', 3)
 17 |         model_type = config['model_type']
 18 |         # build backbone, backbone is need for del, rec and cls
 19 |         if 'Backbone' not in config or config['Backbone'] is None:
 20 |             self.use_backbone = False
 21 |         else:
 22 |             self.use_backbone = True
 23 |             config["Backbone"]['in_channels'] = in_channels
 24 |             self.backbone = build_backbone(config["Backbone"], model_type)
 25 |             in_channels = self.backbone.out_channels
 26 | 
 27 |         # build neck
 28 |         # for rec, neck can be cnn,rnn or reshape(None)
 29 |         # for det, neck can be FPN, BIFPN and so on.
 30 |         # for cls, neck should be none
 31 |         if 'Neck' not in config or config['Neck'] is None:
 32 |             self.use_neck = False
 33 |         else:
 34 |             self.use_neck = True
 35 |             config['Neck']['in_channels'] = in_channels
 36 |             self.neck = build_neck(config['Neck'])
 37 |             in_channels = self.neck.out_channels
 38 | 
 39 |         # # build head, head is need for det, rec and cls
 40 |         if 'Head' not in config or config['Head'] is None:
 41 |             self.use_head = False
 42 |         else:
 43 |             self.use_head = True
 44 |             config["Head"]['in_channels'] = in_channels
 45 |             self.head = build_head(config["Head"], **kwargs)
 46 | 
 47 |         self.return_all_feats = config.get("return_all_feats", False)
 48 | 
 49 |         self._initialize_weights()
 50 | 
 51 |     def _initialize_weights(self):
 52 |         # weight initialization
 53 |         for m in self.modules():
 54 |             if isinstance(m, nn.Conv2d):
 55 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
 56 |                 if m.bias is not None:
 57 |                     nn.init.zeros_(m.bias)
 58 |             elif isinstance(m, nn.BatchNorm2d):
 59 |                 nn.init.ones_(m.weight)
 60 |                 nn.init.zeros_(m.bias)
 61 |             elif isinstance(m, nn.Linear):
 62 |                 nn.init.normal_(m.weight, 0, 0.01)
 63 |                 if m.bias is not None:
 64 |                     nn.init.zeros_(m.bias)
 65 |             elif isinstance(m, nn.ConvTranspose2d):
 66 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
 67 |                 if m.bias is not None:
 68 |                     nn.init.zeros_(m.bias)
 69 | 
 70 | 
 71 |     def forward(self, x):
 72 |         y = dict()
 73 |         if self.use_backbone:
 74 |             x = self.backbone(x)
 75 |         if isinstance(x, dict):
 76 |             y.update(x)
 77 |         else:
 78 |             y["backbone_out"] = x
 79 |         final_name = "backbone_out"
 80 |         if self.use_neck:
 81 |             x = self.neck(x)
 82 |             if isinstance(x, dict):
 83 |                 y.update(x)
 84 |             else:
 85 |                 y["neck_out"] = x
 86 |             final_name = "neck_out"
 87 |         if self.use_head:
 88 |             x = self.head(x)
 89 |         # for multi head, save ctc neck out for udml
 90 |         if isinstance(x, dict) and 'ctc_nect' in x.keys():
 91 |             y['neck_out'] = x['ctc_neck']
 92 |             y['head_out'] = x
 93 |         elif isinstance(x, dict):
 94 |             y.update(x)
 95 |         else:
 96 |             y["head_out"] = x
 97 |         if self.return_all_feats:
 98 |             if self.training:
 99 |                 return y
100 |             elif isinstance(x, dict):
101 |                 return x
102 |             else:
103 |                 return {final_name: x}
104 |         else:
105 |             return x


--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | __all__ = ['build_backbone']
16 | 
17 | 
18 | def build_backbone(config, model_type):
19 |     if model_type == 'det':
20 |         from .det_mobilenet_v3 import MobileNetV3
21 |         from .rec_lcnetv3 import PPLCNetV3
22 |         from .rec_hgnet import PPHGNet_small
23 |         support_dict = ['MobileNetV3', 'ResNet', 'ResNet_vd', 'ResNet_SAST', 'PPLCNetV3', 'PPHGNet_small']
24 |     elif model_type == 'rec' or model_type == 'cls':
25 |         from .rec_mobilenet_v3 import MobileNetV3
26 |         from .rec_svtrnet import SVTRNet
27 |         from .rec_lcnetv3 import PPLCNetV3
28 |         from .rec_hgnet import PPHGNet_small
29 |         support_dict = ['MobileNetV1Enhance', 'MobileNetV3', 'ResNet', 'ResNetFPN', 'MTB',
30 |                         'ResNet31', 'SVTRNet', 'ViTSTR', 'DenseNet', 'PPLCNetV3', 'PPHGNet_small']
31 |     else:
32 |         raise NotImplementedError
33 | 
34 |     module_name = config.pop('name')
35 |     assert module_name in support_dict, Exception(
36 |         'when model typs is {}, backbone only support {}'.format(model_type,
37 |                                                                  support_dict))
38 |     module_class = eval(module_name)(**config)
39 |     return module_class


--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/common.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class Hswish(nn.Module):
 7 |     def __init__(self, inplace=True):
 8 |         super(Hswish, self).__init__()
 9 |         self.inplace = inplace
10 | 
11 |     def forward(self, x):
12 |         return x * F.relu6(x + 3.0, inplace=self.inplace) / 6.0
13 | 
14 | 
15 | # out = max(0, min(1, slop*x+offset))
16 | # paddle.fluid.layers.hard_sigmoid(x, slope=0.2, offset=0.5, name=None)
17 | class Hsigmoid(nn.Module):
18 |     def __init__(self, inplace=True):
19 |         super(Hsigmoid, self).__init__()
20 |         self.inplace = inplace
21 | 
22 |     def forward(self, x):
23 |         # torch: F.relu6(x + 3., inplace=self.inplace) / 6.
24 |         # paddle: F.relu6(1.2 * x + 3., inplace=self.inplace) / 6.
25 |         return F.relu6(1.2 * x + 3.0, inplace=self.inplace) / 6.0
26 | 
27 | 
28 | class GELU(nn.Module):
29 |     def __init__(self, inplace=True):
30 |         super(GELU, self).__init__()
31 |         self.inplace = inplace
32 | 
33 |     def forward(self, x):
34 |         return torch.nn.functional.gelu(x)
35 | 
36 | 
37 | class Swish(nn.Module):
38 |     def __init__(self, inplace=True):
39 |         super(Swish, self).__init__()
40 |         self.inplace = inplace
41 | 
42 |     def forward(self, x):
43 |         if self.inplace:
44 |             x.mul_(torch.sigmoid(x))
45 |             return x
46 |         else:
47 |             return x * torch.sigmoid(x)
48 | 
49 | 
50 | class Activation(nn.Module):
51 |     def __init__(self, act_type, inplace=True):
52 |         super(Activation, self).__init__()
53 |         act_type = act_type.lower()
54 |         if act_type == "relu":
55 |             self.act = nn.ReLU(inplace=inplace)
56 |         elif act_type == "relu6":
57 |             self.act = nn.ReLU6(inplace=inplace)
58 |         elif act_type == "sigmoid":
59 |             raise NotImplementedError
60 |         elif act_type == "hard_sigmoid":
61 |             self.act = Hsigmoid(
62 |                 inplace
63 |             )  # nn.Hardsigmoid(inplace=inplace)#Hsigmoid(inplace)#
64 |         elif act_type == "hard_swish" or act_type == "hswish":
65 |             self.act = Hswish(inplace=inplace)
66 |         elif act_type == "leakyrelu":
67 |             self.act = nn.LeakyReLU(inplace=inplace)
68 |         elif act_type == "gelu":
69 |             self.act = GELU(inplace=inplace)
70 |         elif act_type == "swish":
71 |             self.act = Swish(inplace=inplace)
72 |         else:
73 |             raise NotImplementedError
74 | 
75 |     def forward(self, inputs):
76 |         return self.act(inputs)
77 | 


--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/heads/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | __all__ = ['build_head']
16 | 
17 | 
18 | def build_head(config, **kwargs):
19 |     # det head
20 |     from .det_db_head import DBHead, PFHeadLocal
21 |     # rec head
22 |     from .rec_ctc_head import CTCHead
23 |     from .rec_multi_head import MultiHead
24 | 
25 |     # cls head
26 |     from .cls_head import ClsHead
27 |     support_dict = [
28 |         'DBHead',  'CTCHead', 'ClsHead', 'MultiHead', 'PFHeadLocal',
29 |     ]
30 | 
31 |     module_name = config.pop('name')
32 |     char_num = config.pop('char_num', 6625)
33 |     assert module_name in support_dict, Exception('head only support {}'.format(
34 |         support_dict))
35 |     module_class = eval(module_name)(**config, **kwargs)
36 |     return module_class


--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/heads/cls_head.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | class ClsHead(nn.Module):
 7 |     """
 8 |     Class orientation
 9 |     Args:
10 |         params(dict): super parameters for build Class network
11 |     """
12 | 
13 |     def __init__(self, in_channels, class_dim, **kwargs):
14 |         super(ClsHead, self).__init__()
15 |         self.pool = nn.AdaptiveAvgPool2d(1)
16 |         self.fc = nn.Linear(
17 |             in_channels,
18 |             class_dim,
19 |             bias=True)
20 | 
21 |     def forward(self, x):
22 |         x = self.pool(x)
23 |         x = torch.reshape(x, shape=[x.shape[0], x.shape[1]])
24 |         x = self.fc(x)
25 |         x = F.softmax(x, dim=1)
26 |         return x


--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/heads/rec_ctc_head.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | class CTCHead(nn.Module):
 7 |     def __init__(self,
 8 |                  in_channels,
 9 |                  out_channels=6625,
10 |                  fc_decay=0.0004,
11 |                  mid_channels=None,
12 |                  return_feats=False,
13 |                  **kwargs):
14 |         super(CTCHead, self).__init__()
15 |         if mid_channels is None:
16 |             self.fc = nn.Linear(
17 |                 in_channels,
18 |                 out_channels,
19 |                 bias=True,)
20 |         else:
21 |             self.fc1 = nn.Linear(
22 |                 in_channels,
23 |                 mid_channels,
24 |                 bias=True,
25 |             )
26 |             self.fc2 = nn.Linear(
27 |                 mid_channels,
28 |                 out_channels,
29 |                 bias=True,
30 |             )
31 | 
32 |         self.out_channels = out_channels
33 |         self.mid_channels = mid_channels
34 |         self.return_feats = return_feats
35 | 
36 | 
37 |     def forward(self, x, labels=None):
38 |         if self.mid_channels is None:
39 |             predicts = self.fc(x)
40 |         else:
41 |             x = self.fc1(x)
42 |             predicts = self.fc2(x)
43 | 
44 |         if self.return_feats:
45 |             result = (x, predicts)
46 |         else:
47 |             result = predicts
48 | 
49 |         if not self.training:
50 |             predicts = F.softmax(predicts, dim=2)
51 |             result = predicts
52 | 
53 |         return result


--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/heads/rec_multi_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from rapidocr_torch.modeling.necks.rnn import Im2Seq, SequenceEncoder
 5 | from .rec_ctc_head import CTCHead
 6 | 
 7 | class FCTranspose(nn.Module):
 8 |     def __init__(self, in_channels, out_channels, only_transpose=False):
 9 |         super().__init__()
10 |         self.only_transpose = only_transpose
11 |         if not self.only_transpose:
12 |             self.fc = nn.Linear(in_channels, out_channels, bias=False)
13 | 
14 |     def forward(self, x):
15 |         if self.only_transpose:
16 |             return x.permute([0, 2, 1])
17 |         else:
18 |             return self.fc(x.permute([0, 2, 1]))
19 | 
20 | 
21 | class MultiHead(nn.Module):
22 |     def __init__(self, in_channels, out_channels_list, **kwargs):
23 |         super().__init__()
24 |         self.head_list = kwargs.pop('head_list')
25 | 
26 |         self.gtc_head = 'sar'
27 |         assert len(self.head_list) >= 2
28 |         for idx, head_name in enumerate(self.head_list):
29 |             name = list(head_name)[0]
30 |             if name == 'SARHead':
31 |                 pass
32 | 
33 |             elif name == 'NRTRHead':
34 |                 pass
35 |             elif name == 'CTCHead':
36 |                 # ctc neck
37 |                 self.encoder_reshape = Im2Seq(in_channels)
38 |                 neck_args = self.head_list[idx][name]['Neck']
39 |                 encoder_type = neck_args.pop('name')
40 |                 self.ctc_encoder = SequenceEncoder(in_channels=in_channels, \
41 |                                                    encoder_type=encoder_type, **neck_args)
42 |                 # ctc head
43 |                 head_args = self.head_list[idx][name].get('Head', {})
44 |                 if head_args is None:
45 |                     head_args = {}
46 |                 self.ctc_head = eval(name)(in_channels=self.ctc_encoder.out_channels, \
47 |                                            out_channels=out_channels_list['CTCLabelDecode'], **head_args)
48 |             else:
49 |                 raise NotImplementedError(
50 |                     '{} is not supported in MultiHead yet'.format(name))
51 | 
52 |     def forward(self, x, data=None):
53 |         ctc_encoder = self.ctc_encoder(x)
54 |         return self.ctc_head(ctc_encoder)
55 | 
56 | 


--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/necks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | __all__ = ['build_neck']
16 | 
17 | 
18 | def build_neck(config):
19 |     from .db_fpn import DBFPN, RSEFPN, LKPAN
20 |     from .rnn import SequenceEncoder
21 |     support_dict = ['DBFPN', 'SequenceEncoder', 'RSEFPN', 'LKPAN']
22 | 
23 |     module_name = config.pop('name')
24 |     assert module_name in support_dict, Exception('neck only support {}'.format(
25 |         support_dict))
26 |     module_class = eval(module_name)(**config)
27 |     return module_class
28 | 


--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/necks/intracl.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | 
  3 | 
  4 | class IntraCLBlock(nn.Module):
  5 |     def __init__(self, in_channels=96, reduce_factor=4):
  6 |         super(IntraCLBlock, self).__init__()
  7 |         self.channels = in_channels
  8 |         self.rf = reduce_factor
  9 |         self.conv1x1_reduce_channel = nn.Conv2d(
 10 |             self.channels, self.channels // self.rf, kernel_size=1, stride=1, padding=0
 11 |         )
 12 |         self.conv1x1_return_channel = nn.Conv2d(
 13 |             self.channels // self.rf, self.channels, kernel_size=1, stride=1, padding=0
 14 |         )
 15 | 
 16 |         self.v_layer_7x1 = nn.Conv2d(
 17 |             self.channels // self.rf,
 18 |             self.channels // self.rf,
 19 |             kernel_size=(7, 1),
 20 |             stride=(1, 1),
 21 |             padding=(3, 0),
 22 |         )
 23 |         self.v_layer_5x1 = nn.Conv2d(
 24 |             self.channels // self.rf,
 25 |             self.channels // self.rf,
 26 |             kernel_size=(5, 1),
 27 |             stride=(1, 1),
 28 |             padding=(2, 0),
 29 |         )
 30 |         self.v_layer_3x1 = nn.Conv2d(
 31 |             self.channels // self.rf,
 32 |             self.channels // self.rf,
 33 |             kernel_size=(3, 1),
 34 |             stride=(1, 1),
 35 |             padding=(1, 0),
 36 |         )
 37 | 
 38 |         self.q_layer_1x7 = nn.Conv2d(
 39 |             self.channels // self.rf,
 40 |             self.channels // self.rf,
 41 |             kernel_size=(1, 7),
 42 |             stride=(1, 1),
 43 |             padding=(0, 3),
 44 |         )
 45 |         self.q_layer_1x5 = nn.Conv2d(
 46 |             self.channels // self.rf,
 47 |             self.channels // self.rf,
 48 |             kernel_size=(1, 5),
 49 |             stride=(1, 1),
 50 |             padding=(0, 2),
 51 |         )
 52 |         self.q_layer_1x3 = nn.Conv2d(
 53 |             self.channels // self.rf,
 54 |             self.channels // self.rf,
 55 |             kernel_size=(1, 3),
 56 |             stride=(1, 1),
 57 |             padding=(0, 1),
 58 |         )
 59 | 
 60 |         # base
 61 |         self.c_layer_7x7 = nn.Conv2d(
 62 |             self.channels // self.rf,
 63 |             self.channels // self.rf,
 64 |             kernel_size=(7, 7),
 65 |             stride=(1, 1),
 66 |             padding=(3, 3),
 67 |         )
 68 |         self.c_layer_5x5 = nn.Conv2d(
 69 |             self.channels // self.rf,
 70 |             self.channels // self.rf,
 71 |             kernel_size=(5, 5),
 72 |             stride=(1, 1),
 73 |             padding=(2, 2),
 74 |         )
 75 |         self.c_layer_3x3 = nn.Conv2d(
 76 |             self.channels // self.rf,
 77 |             self.channels // self.rf,
 78 |             kernel_size=(3, 3),
 79 |             stride=(1, 1),
 80 |             padding=(1, 1),
 81 |         )
 82 | 
 83 |         self.bn = nn.BatchNorm2d(self.channels)
 84 |         self.relu = nn.ReLU()
 85 | 
 86 |     def forward(self, x):
 87 |         x_new = self.conv1x1_reduce_channel(x)
 88 | 
 89 |         x_7_c = self.c_layer_7x7(x_new)
 90 |         x_7_v = self.v_layer_7x1(x_new)
 91 |         x_7_q = self.q_layer_1x7(x_new)
 92 |         x_7 = x_7_c + x_7_v + x_7_q
 93 | 
 94 |         x_5_c = self.c_layer_5x5(x_7)
 95 |         x_5_v = self.v_layer_5x1(x_7)
 96 |         x_5_q = self.q_layer_1x5(x_7)
 97 |         x_5 = x_5_c + x_5_v + x_5_q
 98 | 
 99 |         x_3_c = self.c_layer_3x3(x_5)
100 |         x_3_v = self.v_layer_3x1(x_5)
101 |         x_3_q = self.q_layer_1x3(x_5)
102 |         x_3 = x_3_c + x_3_v + x_3_q
103 | 
104 |         x_relation = self.conv1x1_return_channel(x_3)
105 | 
106 |         x_relation = self.bn(x_relation)
107 |         x_relation = self.relu(x_relation)
108 | 
109 |         return x + x_relation
110 | 
111 | 
112 | def build_intraclblock_list(num_block):
113 |     IntraCLBlock_list = nn.ModuleList()
114 |     for i in range(num_block):
115 |         IntraCLBlock_list.append(IntraCLBlock())
116 | 
117 |     return IntraCLBlock_list
118 | 


--------------------------------------------------------------------------------
/python/rapidocr_torch/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr_torch/models/.gitkeep


--------------------------------------------------------------------------------
/python/rapidocr_torch/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | from pathlib import Path
 5 | from typing import Dict, Union
 6 | 
 7 | import yaml
 8 | 
 9 | from .infer_engine import TorchInferSession
10 | from .load_image import LoadImage, LoadImageError
11 | from .logger import get_logger
12 | from .parse_parameters import UpdateParameters, init_args, update_model_path
13 | from .process_img import add_round_letterbox, increase_min_side, reduce_max_side
14 | from .vis_res import VisRes
15 | 
16 | 
17 | def read_yaml(yaml_path: Union[str, Path]) -> Dict[str, Dict]:
18 |     with open(yaml_path, "rb") as f:
19 |         data = yaml.load(f, Loader=yaml.Loader)
20 |     return data
21 | 


--------------------------------------------------------------------------------
/python/rapidocr_torch/utils/infer_engine.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | from pathlib import Path
 5 | from typing import Dict, Optional, Union
 6 | 
 7 | import numpy as np
 8 | import torch
 9 | import yaml
10 | 
11 | root_dir = Path(__file__).resolve().parent.parent
12 | DEFAULT_CFG_PATH = root_dir / "arch_config.yaml"
13 | 
14 | 
15 | def read_yaml(yaml_path: Union[str, Path]) -> Dict[str, Dict]:
16 |     with open(yaml_path, "rb") as f:
17 |         data = yaml.load(f, Loader=yaml.Loader)
18 |     return data
19 | 
20 | 
21 | from rapidocr_torch.modeling.architectures.base_model import BaseModel
22 | 
23 | from .logger import get_logger
24 | 
25 | 
26 | class TorchInferSession:
27 |     def __init__(self, config, mode: Optional[str] = None) -> None:
28 |         all_arch_config = read_yaml(DEFAULT_CFG_PATH)
29 | 
30 |         self.logger = get_logger("TorchInferSession")
31 |         self.mode = mode
32 |         model_path = Path(config["model_path"])
33 |         self._verify_model(model_path)
34 |         file_name = model_path.stem
35 |         if file_name not in all_arch_config:
36 |             raise ValueError(f"architecture {file_name} is not in config.yaml")
37 |         arch_config = all_arch_config[file_name]
38 |         self.predictor = BaseModel(arch_config)
39 |         self.predictor.load_state_dict(torch.load(model_path, weights_only=True))
40 |         self.predictor.eval()
41 |         self.use_gpu = False
42 |         if config["use_cuda"]:
43 |             self.predictor.cuda()
44 |             self.use_gpu = True
45 | 
46 |     def __call__(self, img: np.ndarray):
47 |         with torch.no_grad():
48 |             inp = torch.from_numpy(img)
49 |             if self.use_gpu:
50 |                 inp = inp.cuda()
51 |             # 适配跟onnx对齐取值逻辑
52 |             outputs = self.predictor(inp).unsqueeze(0)
53 |             return outputs.cpu().numpy()
54 | 
55 |     @staticmethod
56 |     def _verify_model(model_path):
57 |         model_path = Path(model_path)
58 |         if not model_path.exists():
59 |             raise FileNotFoundError(f"{model_path} does not exists.")
60 |         if not model_path.is_file():
61 |             raise FileExistsError(f"{model_path} is not a file.")
62 | 
63 | 
64 | class TorchInferError(Exception):
65 |     pass
66 | 


--------------------------------------------------------------------------------
/python/rapidocr_torch/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import logging
 5 | from functools import lru_cache
 6 | 
 7 | 
 8 | @lru_cache(maxsize=32)
 9 | def get_logger(name: str) -> logging.Logger:
10 |     logger = logging.getLogger(name)
11 |     logger.setLevel(logging.DEBUG)
12 | 
13 |     fmt = "%(asctime)s - %(name)s - %(levelname)s: %(message)s"
14 |     format_str = logging.Formatter(fmt)
15 | 
16 |     sh = logging.StreamHandler()
17 |     sh.setLevel(logging.DEBUG)
18 | 
19 |     logger.addHandler(sh)
20 |     sh.setFormatter(format_str)
21 |     return logger
22 | 


--------------------------------------------------------------------------------
/python/rapidocr_torch/utils/process_img.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | from typing import Tuple
 5 | 
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | 
10 | def reduce_max_side(
11 |     img: np.ndarray, max_side_len: int = 2000
12 | ) -> Tuple[np.ndarray, float, float]:
13 |     h, w = img.shape[:2]
14 | 
15 |     ratio = 1.0
16 |     if max(h, w) > max_side_len:
17 |         if h > w:
18 |             ratio = float(max_side_len) / h
19 |         else:
20 |             ratio = float(max_side_len) / w
21 | 
22 |     resize_h = int(h * ratio)
23 |     resize_w = int(w * ratio)
24 | 
25 |     resize_h = int(round(resize_h / 32) * 32)
26 |     resize_w = int(round(resize_w / 32) * 32)
27 | 
28 |     try:
29 |         if int(resize_w) <= 0 or int(resize_h) <= 0:
30 |             raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
31 |         img = cv2.resize(img, (resize_w, resize_h))
32 |     except Exception as exc:
33 |         raise ResizeImgError() from exc
34 | 
35 |     ratio_h = h / resize_h
36 |     ratio_w = w / resize_w
37 |     return img, ratio_h, ratio_w
38 | 
39 | 
40 | def increase_min_side(
41 |     img: np.ndarray, min_side_len: int = 30
42 | ) -> Tuple[np.ndarray, float, float]:
43 |     h, w = img.shape[:2]
44 | 
45 |     ratio = 1.0
46 |     if min(h, w) < min_side_len:
47 |         if h < w:
48 |             ratio = float(min_side_len) / h
49 |         else:
50 |             ratio = float(min_side_len) / w
51 | 
52 |     resize_h = int(h * ratio)
53 |     resize_w = int(w * ratio)
54 | 
55 |     resize_h = int(round(resize_h / 32) * 32)
56 |     resize_w = int(round(resize_w / 32) * 32)
57 | 
58 |     try:
59 |         if int(resize_w) <= 0 or int(resize_h) <= 0:
60 |             raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
61 |         img = cv2.resize(img, (resize_w, resize_h))
62 |     except Exception as exc:
63 |         raise ResizeImgError() from exc
64 | 
65 |     ratio_h = h / resize_h
66 |     ratio_w = w / resize_w
67 |     return img, ratio_h, ratio_w
68 | 
69 | 
70 | def add_round_letterbox(
71 |     img: np.ndarray,
72 |     padding_tuple: Tuple[int, int, int, int],
73 | ) -> np.ndarray:
74 |     padded_img = cv2.copyMakeBorder(
75 |         img,
76 |         padding_tuple[0],
77 |         padding_tuple[1],
78 |         padding_tuple[2],
79 |         padding_tuple[3],
80 |         cv2.BORDER_CONSTANT,
81 |         value=(0, 0, 0),
82 |     )
83 |     return padded_img
84 | 
85 | 
86 | class ResizeImgError(Exception):
87 |     pass
88 | 


--------------------------------------------------------------------------------
/python/requirements.txt:
--------------------------------------------------------------------------------
 1 | pyclipper>=1.2.0
 2 | opencv_python>=4.5.1.48
 3 | numpy>=1.19.5,<3.0.0
 4 | six>=1.15.0
 5 | Shapely>=1.7.1,!=2.0.4  # python3.12 2.0.4 bug
 6 | PyYAML
 7 | Pillow
 8 | tqdm
 9 | omegaconf
10 | requests
11 | colorlog


--------------------------------------------------------------------------------
/python/requirements_ort.txt:
--------------------------------------------------------------------------------
1 | pyclipper>=1.2.0
2 | opencv_python>=4.5.1.48
3 | numpy>=1.19.5,<3.0.0
4 | six>=1.15.0
5 | Shapely>=1.7.1,!=2.0.4  # python3.12 2.0.4 bug
6 | PyYAML
7 | Pillow
8 | onnxruntime>=1.7.0
9 | tqdm


--------------------------------------------------------------------------------
/python/requirements_paddle.txt:
--------------------------------------------------------------------------------
1 | pyclipper>=1.2.0
2 | opencv_python>=4.5.1.48
3 | numpy>=1.19.5,<3.0.0
4 | six>=1.15.0
5 | Shapely>=1.7.1,!=2.0.4  # python3.12 2.0.4 bug
6 | PyYAML
7 | Pillow
8 | tqdm


--------------------------------------------------------------------------------
/python/requirements_torch.txt:
--------------------------------------------------------------------------------
 1 | pyclipper>=1.2.0
 2 | opencv_python>=4.5.1.48
 3 | numpy>=1.19.5,<3.0.0
 4 | six>=1.15.0
 5 | Shapely>=1.7.1,!=2.0.4  # python3.12 2.0.4 bug
 6 | PyYAML
 7 | Pillow
 8 | tqdm
 9 | torch
10 | torchvision


--------------------------------------------------------------------------------
/python/requirements_vino.txt:
--------------------------------------------------------------------------------
1 | pyclipper>=1.2.0
2 | openvino>=2022.2.0,<=2024.0.0
3 | opencv_python>=4.5.1.48
4 | numpy>=1.19.5,<3.0.0
5 | six>=1.15.0
6 | Shapely>=1.7.1,!=2.0.4  # python3.12 2.0.4 bug
7 | PyYAML
8 | Pillow
9 | tqdm


--------------------------------------------------------------------------------
/python/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import sys
 5 | from pathlib import Path
 6 | from typing import List, Union
 7 | 
 8 | import setuptools
 9 | from get_pypi_latest_version import GetPyPiLatestVersion
10 | 
11 | 
12 | def read_txt(txt_path: Union[Path, str]) -> List[str]:
13 |     with open(txt_path, "r", encoding="utf-8") as f:
14 |         data = [v.rstrip("\n") for v in f]
15 |     return data
16 | 
17 | 
18 | def get_readme():
19 |     root_dir = Path(__file__).resolve().parent.parent
20 |     readme_path = str(root_dir / "docs" / "doc_whl_rapidocr.md")
21 |     print(readme_path)
22 |     with open(readme_path, "r", encoding="utf-8") as f:
23 |         readme = f.read()
24 |     return readme
25 | 
26 | 
27 | MODULE_NAME = "rapidocr"
28 | 
29 | obtainer = GetPyPiLatestVersion()
30 | try:
31 |     latest_version = obtainer(MODULE_NAME)
32 | except Exception as e:
33 |     latest_version = "0.0.0"
34 | VERSION_NUM = obtainer.version_add_one(latest_version, add_patch=True)
35 | 
36 | if len(sys.argv) > 2:
37 |     match_str = " ".join(sys.argv[2:])
38 |     matched_versions = obtainer.extract_version(match_str)
39 |     if matched_versions:
40 |         VERSION_NUM = matched_versions
41 | sys.argv = sys.argv[:2]
42 | 
43 | project_urls = {
44 |     "Documentation": "https://rapidai.github.io/RapidOCRDocs",
45 |     "Changelog": "https://github.com/RapidAI/RapidOCR/releases",
46 | }
47 | 
48 | setuptools.setup(
49 |     name=MODULE_NAME,
50 |     version=VERSION_NUM,
51 |     platforms="Any",
52 |     description="Awesome OCR Library",
53 |     long_description=get_readme(),
54 |     long_description_content_type="text/markdown",
55 |     author="SWHL",
56 |     author_email="liekkaskono@163.com",
57 |     url="https://github.com/RapidAI/RapidOCR",
58 |     project_urls=project_urls,
59 |     license="Apache-2.0",
60 |     include_package_data=True,
61 |     install_requires=read_txt("requirements.txt"),
62 |     package_dir={"": MODULE_NAME},
63 |     packages=setuptools.find_namespace_packages(where=MODULE_NAME),
64 |     package_data={"": ["*.onnx", "*.yaml", "*.txt"]},
65 |     keywords=[
66 |         "ocr,text_detection,text_recognition,db,onnxruntime,paddleocr,openvino,rapidocr"
67 |     ],
68 |     classifiers=[
69 |         "Programming Language :: Python :: 3.6",
70 |         "Programming Language :: Python :: 3.7",
71 |         "Programming Language :: Python :: 3.8",
72 |         "Programming Language :: Python :: 3.9",
73 |         "Programming Language :: Python :: 3.10",
74 |         "Programming Language :: Python :: 3.11",
75 |         "Programming Language :: Python :: 3.12",
76 |     ],
77 |     python_requires=">=3.6,<4",
78 |     entry_points={
79 |         "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"],
80 |     },
81 | )
82 | 


--------------------------------------------------------------------------------
/python/setup_onnxruntime.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import sys
 5 | from pathlib import Path
 6 | from typing import List, Union
 7 | 
 8 | import setuptools
 9 | from get_pypi_latest_version import GetPyPiLatestVersion
10 | 
11 | 
12 | def read_txt(txt_path: Union[Path, str]) -> List[str]:
13 |     with open(txt_path, "r", encoding="utf-8") as f:
14 |         data = [v.rstrip("\n") for v in f]
15 |     return data
16 | 
17 | 
18 | def get_readme():
19 |     root_dir = Path(__file__).resolve().parent.parent
20 |     readme_path = str(root_dir / "docs" / "doc_whl_rapidocr_ort.md")
21 |     print(readme_path)
22 |     with open(readme_path, "r", encoding="utf-8") as f:
23 |         readme = f.read()
24 |     return readme
25 | 
26 | 
27 | MODULE_NAME = "rapidocr_onnxruntime"
28 | 
29 | obtainer = GetPyPiLatestVersion()
30 | latest_version = obtainer(MODULE_NAME)
31 | VERSION_NUM = obtainer.version_add_one(latest_version, add_patch=True)
32 | 
33 | if len(sys.argv) > 2:
34 |     match_str = " ".join(sys.argv[2:])
35 |     matched_versions = obtainer.extract_version(match_str)
36 |     if matched_versions:
37 |         VERSION_NUM = matched_versions
38 | sys.argv = sys.argv[:2]
39 | 
40 | project_urls = {
41 |     "Documentation": "https://rapidai.github.io/RapidOCRDocs",
42 |     "Changelog": "https://github.com/RapidAI/RapidOCR/releases",
43 | }
44 | 
45 | setuptools.setup(
46 |     name=MODULE_NAME,
47 |     version=VERSION_NUM,
48 |     platforms="Any",
49 |     description="A cross platform OCR Library based on OnnxRuntime.",
50 |     long_description=get_readme(),
51 |     long_description_content_type="text/markdown",
52 |     author="SWHL",
53 |     author_email="liekkaskono@163.com",
54 |     url="https://github.com/RapidAI/RapidOCR",
55 |     project_urls=project_urls,
56 |     license="Apache-2.0",
57 |     include_package_data=True,
58 |     install_requires=read_txt("requirements_ort.txt"),
59 |     package_dir={"": MODULE_NAME},
60 |     packages=setuptools.find_namespace_packages(where=MODULE_NAME),
61 |     package_data={"": ["*.onnx", "*.yaml"]},
62 |     keywords=[
63 |         "ocr,text_detection,text_recognition,db,onnxruntime,paddleocr,openvino,rapidocr"
64 |     ],
65 |     classifiers=[
66 |         "Programming Language :: Python :: 3.6",
67 |         "Programming Language :: Python :: 3.7",
68 |         "Programming Language :: Python :: 3.8",
69 |         "Programming Language :: Python :: 3.9",
70 |         "Programming Language :: Python :: 3.10",
71 |         "Programming Language :: Python :: 3.11",
72 |         "Programming Language :: Python :: 3.12",
73 |     ],
74 |     python_requires=">=3.6,<3.13",
75 |     entry_points={
76 |         "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"],
77 |     },
78 | )
79 | 


--------------------------------------------------------------------------------
/python/setup_openvino.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import sys
 5 | from pathlib import Path
 6 | from typing import List, Union
 7 | 
 8 | import setuptools
 9 | from get_pypi_latest_version import GetPyPiLatestVersion
10 | 
11 | 
12 | def read_txt(txt_path: Union[Path, str]) -> List[str]:
13 |     with open(txt_path, "r", encoding="utf-8") as f:
14 |         data = [v.rstrip("\n") for v in f]
15 |     return data
16 | 
17 | 
18 | def get_readme():
19 |     root_dir = Path(__file__).resolve().parent.parent
20 |     readme_path = str(root_dir / "docs" / "doc_whl_rapidocr_vino.md")
21 |     print(readme_path)
22 |     with open(readme_path, "r", encoding="utf-8") as f:
23 |         readme = f.read()
24 |     return readme
25 | 
26 | 
27 | MODULE_NAME = "rapidocr_openvino"
28 | 
29 | obtainer = GetPyPiLatestVersion()
30 | latest_version = obtainer(MODULE_NAME)
31 | VERSION_NUM = obtainer.version_add_one(latest_version, add_patch=True)
32 | 
33 | if len(sys.argv) > 2:
34 |     match_str = " ".join(sys.argv[2:])
35 |     matched_versions = obtainer.extract_version(match_str)
36 |     if matched_versions:
37 |         VERSION_NUM = matched_versions
38 | sys.argv = sys.argv[:2]
39 | 
40 | setuptools.setup(
41 |     name=MODULE_NAME,
42 |     version=VERSION_NUM,
43 |     platforms="Any",
44 |     description="A cross platform OCR Library based on OpenVINO.",
45 |     long_description=get_readme(),
46 |     long_description_content_type="text/markdown",
47 |     author="SWHL",
48 |     author_email="liekkaskono@163.com",
49 |     url="https://github.com/RapidAI/RapidOCR",
50 |     license="Apache-2.0",
51 |     include_package_data=True,
52 |     install_requires=read_txt("requirements_vino.txt"),
53 |     package_dir={"": MODULE_NAME},
54 |     packages=setuptools.find_namespace_packages(where=MODULE_NAME),
55 |     package_data={"": ["*.onnx", "*.yaml", "*.txt"]},
56 |     keywords=[
57 |         "ocr,text_detection,text_recognition,db,onnxruntime,paddleocr,openvino,rapidocr"
58 |     ],
59 |     classifiers=[
60 |         "Programming Language :: Python :: 3.6",
61 |         "Programming Language :: Python :: 3.7",
62 |         "Programming Language :: Python :: 3.8",
63 |         "Programming Language :: Python :: 3.9",
64 |         "Programming Language :: Python :: 3.10",
65 |         "Programming Language :: Python :: 3.11",
66 |         "Programming Language :: Python :: 3.12",
67 |     ],
68 |     python_requires=">=3.6,<3.13",
69 |     entry_points={
70 |         "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"],
71 |     },
72 | )
73 | 


--------------------------------------------------------------------------------
/python/setup_paddle.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import sys
 5 | from pathlib import Path
 6 | from typing import List, Union
 7 | 
 8 | import setuptools
 9 | from get_pypi_latest_version import GetPyPiLatestVersion
10 | 
11 | 
12 | def read_txt(txt_path: Union[Path, str]) -> List[str]:
13 |     with open(txt_path, "r", encoding="utf-8") as f:
14 |         data = [v.rstrip("\n") for v in f]
15 |     return data
16 | 
17 | 
18 | def get_readme():
19 |     root_dir = Path(__file__).resolve().parent.parent
20 |     readme_path = str(root_dir / "docs" / "doc_whl_rapidocr_paddle.md")
21 |     print(readme_path)
22 |     with open(readme_path, "r", encoding="utf-8") as f:
23 |         readme = f.read()
24 |     return readme
25 | 
26 | 
27 | MODULE_NAME = "rapidocr_paddle"
28 | 
29 | obtainer = GetPyPiLatestVersion()
30 | try:
31 |     latest_version = obtainer(MODULE_NAME)
32 |     VERSION_NUM = obtainer.version_add_one(latest_version, add_patch=True)
33 | except:
34 |     VERSION_NUM = "0.0.1"
35 | 
36 | if len(sys.argv) > 2:
37 |     match_str = " ".join(sys.argv[2:])
38 |     matched_versions = obtainer.extract_version(match_str)
39 |     if matched_versions:
40 |         VERSION_NUM = matched_versions
41 | sys.argv = sys.argv[:2]
42 | 
43 | setuptools.setup(
44 |     name=MODULE_NAME,
45 |     version=VERSION_NUM,
46 |     platforms="Any",
47 |     description="A cross platform OCR Library based on PaddlePaddle.",
48 |     long_description=get_readme(),
49 |     long_description_content_type="text/markdown",
50 |     author="SWHL",
51 |     author_email="liekkaskono@163.com",
52 |     url="https://github.com/RapidAI/RapidOCR",
53 |     license="Apache-2.0",
54 |     include_package_data=True,
55 |     install_requires=read_txt("requirements_paddle.txt"),
56 |     package_dir={"": MODULE_NAME},
57 |     packages=setuptools.find_namespace_packages(where=MODULE_NAME),
58 |     package_data={
59 |         "": ["*.txt", "*.yaml", "*.pdiparams", "*.pdiparams.info", "*.pdmodel"]
60 |     },
61 |     keywords=[
62 |         "ocr,text_detection,text_recognition,dbnet,paddlepaddle,paddleocr,rapidocr"
63 |     ],
64 |     classifiers=[
65 |         "Programming Language :: Python :: 3.6",
66 |         "Programming Language :: Python :: 3.7",
67 |         "Programming Language :: Python :: 3.8",
68 |         "Programming Language :: Python :: 3.9",
69 |         "Programming Language :: Python :: 3.10",
70 |         "Programming Language :: Python :: 3.11",
71 |         "Programming Language :: Python :: 3.12",
72 |     ],
73 |     python_requires=">=3.6,<3.13",
74 |     entry_points={
75 |         "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"],
76 |     },
77 |     extras_require={
78 |         "cpu": ["paddlepaddle"],
79 |         "gpu": ["paddlepaddle-gpu"],
80 |     },
81 | )
82 | 


--------------------------------------------------------------------------------
/python/setup_torch.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import sys
 5 | from pathlib import Path
 6 | from typing import List, Union
 7 | 
 8 | import setuptools
 9 | from get_pypi_latest_version import GetPyPiLatestVersion
10 | 
11 | 
12 | def read_txt(txt_path: Union[Path, str]) -> List[str]:
13 |     with open(txt_path, "r", encoding="utf-8") as f:
14 |         data = [v.rstrip("\n") for v in f]
15 |     return data
16 | 
17 | 
18 | def get_readme():
19 |     root_dir = Path(__file__).resolve().parent.parent
20 |     readme_path = str(root_dir / "docs" / "doc_whl_rapidocr_ort.md")
21 |     print(readme_path)
22 |     with open(readme_path, "r", encoding="utf-8") as f:
23 |         readme = f.read()
24 |     return readme
25 | 
26 | 
27 | MODULE_NAME = "rapidocr_torch"
28 | 
29 | obtainer = GetPyPiLatestVersion()
30 | try:
31 |     latest_version = obtainer(MODULE_NAME)
32 | except Exception:
33 |     latest_version = "0.0.0"
34 | VERSION_NUM = obtainer.version_add_one(latest_version, add_patch=True)
35 | 
36 | if len(sys.argv) > 2:
37 |     match_str = " ".join(sys.argv[2:])
38 |     matched_versions = obtainer.extract_version(match_str)
39 |     if matched_versions:
40 |         VERSION_NUM = matched_versions
41 | sys.argv = sys.argv[:2]
42 | 
43 | project_urls = {
44 |     "Documentation": "https://rapidai.github.io/RapidOCRDocs",
45 |     "Changelog": "https://github.com/RapidAI/RapidOCR/releases",
46 | }
47 | 
48 | setuptools.setup(
49 |     name=MODULE_NAME,
50 |     version=VERSION_NUM,
51 |     platforms="Any",
52 |     description="A cross platform OCR Library based on pytorch.",
53 |     long_description=get_readme(),
54 |     long_description_content_type="text/markdown",
55 |     author="SWHL",
56 |     author_email="liekkaskono@163.com",
57 |     url="https://github.com/RapidAI/RapidOCR",
58 |     project_urls=project_urls,
59 |     license="Apache-2.0",
60 |     include_package_data=True,
61 |     install_requires=read_txt("requirements_torch.txt"),
62 |     package_dir={"": MODULE_NAME},
63 |     packages=setuptools.find_namespace_packages(where=MODULE_NAME),
64 |     package_data={"": ["*.pth", "*.yaml", "*.txt"]},
65 |     keywords=[
66 |         "ocr,text_detection,text_recognition,db,onnxruntime,pytorch,paddleocr,openvino,rapidocr"
67 |     ],
68 |     classifiers=[
69 |         "Programming Language :: Python :: 3.10",
70 |         "Programming Language :: Python :: 3.11",
71 |         "Programming Language :: Python :: 3.12",
72 |     ],
73 |     python_requires=">=3.10,<3.13",
74 |     entry_points={
75 |         "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"],
76 |     },
77 | )
78 | 


--------------------------------------------------------------------------------
/python/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | 


--------------------------------------------------------------------------------
/python/tests/base_module.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import importlib
 5 | import sys
 6 | from dataclasses import dataclass
 7 | from pathlib import Path
 8 | from typing import Optional, Union
 9 | 
10 | import requests
11 | import yaml
12 | from tqdm import tqdm
13 | 
14 | 
15 | class BaseModule:
16 |     def __init__(self, package_name: str = "rapidocr_onnxruntime"):
17 |         self.package_name = package_name
18 |         self.root_dir = Path(__file__).resolve().parent.parent
19 |         self.package_dir = self.root_dir / self.package_name
20 |         self.tests_dir = self.root_dir / "tests"
21 | 
22 |         sys.path.append(str(self.root_dir))
23 |         sys.path.append(str(self.package_dir))
24 | 
25 |     def init_module(self, module_name: str, class_name: Optional[str] = None):
26 |         if class_name is None:
27 |             module_part = importlib.import_module(f"{self.package_name}")
28 |             return module_part
29 |         module_part = importlib.import_module(f"{self.package_name}.{module_name}")
30 |         return getattr(module_part, class_name)
31 | 
32 |     @staticmethod
33 |     def read_yaml(yaml_path: str):
34 |         with open(yaml_path, "rb") as f:
35 |             data = yaml.load(f, Loader=yaml.Loader)
36 |         return data
37 | 
38 | 
39 | def download_file(url: str, save_path: Union[str, Path]):
40 |     response = requests.get(url, stream=True, timeout=60)
41 |     status_code = response.status_code
42 | 
43 |     if status_code != 200:
44 |         raise DownloadModelError("Something went wrong while downloading models")
45 | 
46 |     total_size_in_bytes = int(response.headers.get("content-length", 1))
47 |     block_size = 1024  # 1 Kibibyte
48 |     with tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True) as pb:
49 |         with open(save_path, "wb") as file:
50 |             for data in response.iter_content(block_size):
51 |                 pb.update(len(data))
52 |                 file.write(data)
53 | 
54 | 
55 | class DownloadModelError(Exception):
56 |     pass
57 | 
58 | 
59 | @dataclass
60 | class Platform:
61 |     mac: str = "Darwin"
62 |     windows: str = "Windows"
63 |     linux: str = "Linux"
64 | 


--------------------------------------------------------------------------------
/python/tests/test_files/black_font_color_transparent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/black_font_color_transparent.png


--------------------------------------------------------------------------------
/python/tests/test_files/ch_doc_server.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/ch_doc_server.png


--------------------------------------------------------------------------------
/python/tests/test_files/ch_en_num.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/ch_en_num.jpg


--------------------------------------------------------------------------------
/python/tests/test_files/devanagari.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/devanagari.jpg


--------------------------------------------------------------------------------
/python/tests/test_files/empty_black.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/empty_black.jpg


--------------------------------------------------------------------------------
/python/tests/test_files/en.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/en.jpg


--------------------------------------------------------------------------------
/python/tests/test_files/issue_170.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/issue_170.png


--------------------------------------------------------------------------------
/python/tests/test_files/japan.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/japan.jpg


--------------------------------------------------------------------------------
/python/tests/test_files/korean.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/korean.jpg


--------------------------------------------------------------------------------
/python/tests/test_files/short.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/short.png


--------------------------------------------------------------------------------
/python/tests/test_files/test_letterbox_like.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/test_letterbox_like.jpg


--------------------------------------------------------------------------------
/python/tests/test_files/test_without_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/test_without_det.jpg


--------------------------------------------------------------------------------
/python/tests/test_files/text_cls.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/text_cls.jpg


--------------------------------------------------------------------------------
/python/tests/test_files/text_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/text_det.jpg


--------------------------------------------------------------------------------
/python/tests/test_files/text_rec.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/text_rec.jpg


--------------------------------------------------------------------------------
/python/tests/test_files/text_vertical_words.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/text_vertical_words.png


--------------------------------------------------------------------------------
/python/tests/test_files/two_dim_image.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/two_dim_image.npy


--------------------------------------------------------------------------------
/python/tests/test_files/white_font_color_transparent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/white_font_color_transparent.png


--------------------------------------------------------------------------------