├── .gitattributes
├── .github
├── FUNDING.yml
├── ISSUE_TEMPLATE
│ ├── bug.md
│ └── config.yml
└── workflows
│ ├── SyncToGitee.yml
│ ├── gen_whl_to_pypi_rapidocr.yml
│ ├── gen_whl_to_pypi_rapidocr_ort.yml
│ ├── gen_whl_to_pypi_rapidocr_paddle.yml
│ ├── gen_whl_to_pypi_rapidocr_torch.yml
│ ├── gen_whl_to_pypi_rapidocr_vino.yml
│ ├── gen_whl_to_pypi_rapidocr_web.yml
│ └── package_ocrweb.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── android
└── README.md
├── api
└── README.md
├── assets
├── RapidOCRDemo.ipynb
├── RapidOCR_LOGO.png
└── colab-badge.svg
├── cliff.toml
├── cpp
└── README.md
├── docs
├── README_zh.md
├── doc_whl_rapidocr.md
├── doc_whl_rapidocr_ort.md
├── doc_whl_rapidocr_paddle.md
├── doc_whl_rapidocr_vino.md
└── doc_whl_rapidocr_web.md
├── dotnet
└── README.md
├── ios
└── README.md
├── jvm
└── README.md
├── ocrweb
├── README.md
├── rapidocr_web
│ ├── __init__.py
│ ├── ocrweb.py
│ ├── ocrweb.spec
│ ├── static
│ │ ├── css
│ │ │ ├── favicon.ico
│ │ │ └── main.css
│ │ └── js
│ │ │ └── jquery-3.0.0.min.js
│ ├── task.py
│ └── templates
│ │ └── index.html
├── requirements.txt
└── setup.py
├── ocrweb_multi
├── README.md
├── assets
│ └── ocr_web_multi.jpg
├── build.py
├── config.yaml
├── main.py
├── main.spec
├── models
│ └── .gitkeep
├── rapidocr
│ ├── __init__.py
│ ├── classify.py
│ ├── detect.py
│ ├── detect_process.py
│ ├── main.py
│ ├── rapid_ocr_api.py
│ └── recognize.py
├── requirements.txt
├── static
│ ├── css
│ │ └── main.css
│ ├── favicon.ico
│ ├── hint.svg
│ ├── index.html
│ └── js
│ │ └── jquery-3.0.0.min.js
├── utils
│ ├── config.py
│ └── utils.py
├── wrapper.c
└── wrapper.rc
└── python
├── README.md
├── demo.py
├── rapidocr
├── __init__.py
├── cal_rec_boxes
│ ├── __init__.py
│ └── main.py
├── ch_ppocr_cls
│ ├── __init__.py
│ ├── main.py
│ └── utils.py
├── ch_ppocr_det
│ ├── __init__.py
│ ├── main.py
│ └── utils.py
├── ch_ppocr_rec
│ ├── __init__.py
│ ├── main.py
│ ├── typings.py
│ └── utils.py
├── cli.py
├── config.yaml
├── default_models.yaml
├── inference_engine
│ ├── __init__.py
│ ├── base.py
│ ├── onnxruntime.py
│ ├── openvino.py
│ ├── paddle.py
│ └── torch.py
├── main.py
├── models
│ └── .gitkeep
├── networks
│ ├── __init__.py
│ ├── arch_config.yaml
│ ├── architectures
│ │ ├── __init__.py
│ │ └── base_model.py
│ ├── backbones
│ │ ├── __init__.py
│ │ ├── det_mobilenet_v3.py
│ │ ├── rec_hgnet.py
│ │ ├── rec_lcnetv3.py
│ │ ├── rec_mobilenet_v3.py
│ │ ├── rec_mv1_enhance.py
│ │ └── rec_svtrnet.py
│ ├── common.py
│ ├── heads
│ │ ├── __init__.py
│ │ ├── cls_head.py
│ │ ├── det_db_head.py
│ │ ├── rec_ctc_head.py
│ │ └── rec_multi_head.py
│ └── necks
│ │ ├── __init__.py
│ │ ├── db_fpn.py
│ │ ├── intracl.py
│ │ └── rnn.py
└── utils
│ ├── __init__.py
│ ├── download_file.py
│ ├── load_image.py
│ ├── logger.py
│ ├── output.py
│ ├── parse_parameters.py
│ ├── process_img.py
│ ├── typings.py
│ ├── utils.py
│ └── vis_res.py
├── rapidocr_onnxruntime
├── __init__.py
├── cal_rec_boxes
│ ├── __init__.py
│ └── main.py
├── ch_ppocr_cls
│ ├── __init__.py
│ ├── text_cls.py
│ └── utils.py
├── ch_ppocr_det
│ ├── __init__.py
│ ├── text_detect.py
│ └── utils.py
├── ch_ppocr_rec
│ ├── __init__.py
│ ├── text_recognize.py
│ └── utils.py
├── config.yaml
├── main.py
├── models
│ └── .gitkeep
└── utils
│ ├── __init__.py
│ ├── infer_engine.py
│ ├── load_image.py
│ ├── logger.py
│ ├── parse_parameters.py
│ ├── process_img.py
│ └── vis_res.py
├── rapidocr_openvino
├── __init__.py
├── cal_rec_boxes
│ ├── __init__.py
│ └── main.py
├── ch_ppocr_cls
│ ├── __init__.py
│ ├── text_cls.py
│ └── utils.py
├── ch_ppocr_det
│ ├── __init__.py
│ ├── text_detect.py
│ └── utils.py
├── ch_ppocr_rec
│ ├── __init__.py
│ ├── ppocr_keys_v1.txt
│ ├── text_recognize.py
│ └── utils.py
├── config.yaml
├── main.py
├── models
│ └── .gitkeep
└── utils
│ ├── __init__.py
│ ├── infer_engine.py
│ ├── load_image.py
│ ├── logger.py
│ ├── parse_parameters.py
│ ├── process_img.py
│ └── vis_res.py
├── rapidocr_paddle
├── __init__.py
├── cal_rec_boxes
│ ├── __init__.py
│ └── main.py
├── ch_ppocr_cls
│ ├── __init__.py
│ ├── text_cls.py
│ └── utils.py
├── ch_ppocr_det
│ ├── __init__.py
│ ├── text_detect.py
│ └── utils.py
├── ch_ppocr_rec
│ ├── __init__.py
│ ├── ppocr_keys_v1.txt
│ ├── text_recognize.py
│ └── utils.py
├── config.yaml
├── main.py
├── models
│ └── .gitkeep
└── utils
│ ├── __init__.py
│ ├── infer_engine.py
│ ├── load_image.py
│ ├── logger.py
│ ├── parse_parameters.py
│ ├── process_img.py
│ └── vis_res.py
├── rapidocr_torch
├── __init__.py
├── arch_config.yaml
├── cal_rec_boxes
│ ├── __init__.py
│ └── main.py
├── ch_ppocr_cls
│ ├── __init__.py
│ ├── text_cls.py
│ └── utils.py
├── ch_ppocr_det
│ ├── __init__.py
│ ├── text_detect.py
│ └── utils.py
├── ch_ppocr_rec
│ ├── __init__.py
│ ├── ppocr_keys_v1.txt
│ ├── text_recognize.py
│ └── utils.py
├── config.yaml
├── main.py
├── modeling
│ ├── __init__.py
│ ├── architectures
│ │ ├── __init__.py
│ │ └── base_model.py
│ ├── backbones
│ │ ├── __init__.py
│ │ ├── det_mobilenet_v3.py
│ │ ├── rec_hgnet.py
│ │ ├── rec_lcnetv3.py
│ │ ├── rec_mobilenet_v3.py
│ │ └── rec_svtrnet.py
│ ├── common.py
│ ├── heads
│ │ ├── __init__.py
│ │ ├── cls_head.py
│ │ ├── det_db_head.py
│ │ ├── rec_ctc_head.py
│ │ └── rec_multi_head.py
│ └── necks
│ │ ├── __init__.py
│ │ ├── db_fpn.py
│ │ ├── intracl.py
│ │ └── rnn.py
├── models
│ └── .gitkeep
└── utils
│ ├── __init__.py
│ ├── infer_engine.py
│ ├── load_image.py
│ ├── logger.py
│ ├── parse_parameters.py
│ ├── process_img.py
│ └── vis_res.py
├── requirements.txt
├── requirements_ort.txt
├── requirements_paddle.txt
├── requirements_torch.txt
├── requirements_vino.txt
├── setup.py
├── setup_onnxruntime.py
├── setup_openvino.py
├── setup_paddle.py
├── setup_torch.py
└── tests
├── __init__.py
├── base_module.py
├── test_files
├── black_font_color_transparent.png
├── ch_doc_server.png
├── ch_en_num.jpg
├── devanagari.jpg
├── empty_black.jpg
├── en.jpg
├── issue_170.png
├── japan.jpg
├── korean.jpg
├── short.png
├── test_letterbox_like.jpg
├── test_without_det.jpg
├── text_cls.jpg
├── text_det.jpg
├── text_rec.jpg
├── text_vertical_words.png
├── two_dim_image.npy
└── white_font_color_transparent.png
├── test_main.py
├── test_ort.py
├── test_paddle.py
├── test_torch.py
└── test_vino.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Set the default behavior, in case people don't have core.autocrlf set.
2 | * text=auto
3 |
4 | # Explicitly declare text files you want to always be normalized and converted
5 | # to native line endings on checkout.
6 | *.c text
7 | *.h text
8 | *.py text
9 | *.md text
10 | *.js text
11 | *.cpp text
12 |
13 | # Declare files that will always have CRLF line endings on checkout.
14 | *.sln text eol=crlf
15 |
16 | # Denote all files that are truly binary and should not be modified.
17 | *.png binary
18 | *.jpg binary
19 | *.pdf binary
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: https://rapidai.github.io/RapidOCRDocs/sponsor/
14 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: 🐞 Bug
3 | about: Bug
4 | title: 'Bug'
5 | labels: 'Bug'
6 | assignees: ''
7 |
8 | ---
9 |
10 | #### 问题描述 / Problem Description
11 |
12 |
13 | #### 运行环境 / Runtime Environment
14 |
15 |
16 | #### 复现代码 / Reproduction Code
17 | ```python
18 |
19 | ```
20 |
21 | #### 可能解决方案 / Possible solutions
22 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 | - name: ❓ Questions
4 | url: https://github.com/RapidAI/RapidOCR/discussions/categories/q-a
5 | about: Please use the community forum for help and questions regarding RapidOCR.
6 | - name: 💡 Feature requests and ideas
7 | url: https://github.com/RapidAI/RapidOCR/discussions/categories/ideas
8 | about: Please vote for and post new feature ideas in the community forum.
9 | - name: 📖 Documentation
10 | url: https://rapidai.github.io/RapidOCRDocs/docs/
11 | about: A great place to find instructions and answers about RapidOCR.
12 |
--------------------------------------------------------------------------------
/.github/workflows/SyncToGitee.yml:
--------------------------------------------------------------------------------
1 | name: SyncToGitee
2 | on:
3 | push:
4 | branches:
5 | - main
6 | jobs:
7 | repo-sync:
8 | runs-on: ubuntu-latest
9 | steps:
10 | - name: Checkout source codes
11 | uses: actions/checkout@v4
12 |
13 | - name: Mirror the Github organization repos to Gitee.
14 | uses: Yikun/hub-mirror-action@v1.4
15 | with:
16 | src: 'github/RapidAI'
17 | dst: 'gitee/RapidAI'
18 | dst_key: ${{ secrets.GITEE_PRIVATE_KEY }}
19 | dst_token: ${{ secrets.GITEE_TOKEN }}
20 | force_update: true
21 | # only sync this repo
22 | static_list: "RapidOCR"
23 | debug: true
24 |
25 | - name: Mirror the Github organization repos to Gitee.
26 | uses: Yikun/hub-mirror-action@v1.4
27 | with:
28 | src: 'github/RapidAI'
29 | dst: 'gitee/openKylin'
30 | dst_key: ${{ secrets.GITEE_PRIVATE_KEY }}
31 | dst_token: ${{ secrets.GITEE_TOKEN }}
32 | force_update: true
33 | # only sync this repo
34 | static_list: "RapidOCR"
35 | debug: true
--------------------------------------------------------------------------------
/.github/workflows/gen_whl_to_pypi_rapidocr.yml:
--------------------------------------------------------------------------------
1 | name: Push rapidocr to pypi
2 |
3 | on:
4 | push:
5 | tags:
6 | - v*
7 |
8 | env:
9 | RESOURCES_URL: https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/required_for_whl_v3.0.0.zip
10 |
11 | jobs:
12 | UnitTesting:
13 | runs-on: ubuntu-latest
14 | steps:
15 | - name: Pull latest code
16 | uses: actions/checkout@v4
17 |
18 | - name: Set up Python 3.10
19 | uses: actions/setup-python@v4
20 | with:
21 | python-version: '3.10'
22 | architecture: 'x64'
23 |
24 | - name: Display Python version
25 | run: python -c "import sys; print(sys.version)"
26 |
27 | - name: Unit testings
28 | run: |
29 | cd python
30 | pip install -r requirements.txt
31 | pip install pytest wheel get_pypi_latest_version openvino==2023.3.0 onnxruntime
32 | pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
33 | python -m pip install paddlepaddle==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/
34 |
35 | pytest tests/test_main.py
36 |
37 | GenerateWHL_PushPyPi:
38 | needs: UnitTesting
39 | runs-on: ubuntu-latest
40 |
41 | steps:
42 | - uses: actions/checkout@v4
43 |
44 | - name: Set up Python 3.10
45 | uses: actions/setup-python@v4
46 | with:
47 | python-version: '3.10'
48 | architecture: 'x64'
49 |
50 | - name: Download models
51 | run: |
52 | cd python
53 | wget $RESOURCES_URL
54 | ZIP_NAME=${RESOURCES_URL##*/}
55 | DIR_NAME=${ZIP_NAME%.*}
56 | unzip $ZIP_NAME
57 | cp $DIR_NAME/resources/models/*.* rapidocr/models
58 |
59 | - name: Run setup.py
60 | run: |
61 | cd python
62 | pip install setuptools get_pypi_latest_version wheel
63 | mkdir rapidocr_t
64 | mv rapidocr rapidocr_t
65 | mv rapidocr_t rapidocr
66 | cd rapidocr
67 | echo "from .rapidocr.main import RapidOCR, VisRes" > __init__.py
68 |
69 | cd ..
70 | python -m pip install --upgrade pip
71 | python setup.py bdist_wheel ${{ github.ref_name }}
72 | mv dist ../
73 |
74 | - name: Publish distribution 📦 to PyPI
75 | uses: pypa/gh-action-pypi-publish@v1.5.0
76 | with:
77 | password: ${{ secrets.RAPIDOCR }}
78 | packages_dir: dist/
79 |
--------------------------------------------------------------------------------
/.github/workflows/gen_whl_to_pypi_rapidocr_ort.yml:
--------------------------------------------------------------------------------
1 | name: Push rapidocr_onnxruntime to pypi
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | paths:
7 | - 'python/rapidocr_onnxruntime/**'
8 | - 'docs/doc_whl_rapidocr_ort.md'
9 | - 'python/setup_onnxruntime.py'
10 | # - '.github/workflows/gen_whl_to_pypi_rapidocr_ort.yml'
11 |
12 |
13 | env:
14 | RESOURCES_URL: https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/required_for_whl_v1.3.0.zip
15 |
16 | jobs:
17 | UnitTesting:
18 | runs-on: ubuntu-latest
19 | steps:
20 | - name: Pull latest code
21 | uses: actions/checkout@v4
22 |
23 | - name: Set up Python 3.10
24 | uses: actions/setup-python@v4
25 | with:
26 | python-version: '3.10'
27 | architecture: 'x64'
28 |
29 | - name: Display Python version
30 | run: python -c "import sys; print(sys.version)"
31 |
32 | - name: Unit testings
33 | run: |
34 | wget $RESOURCES_URL
35 | ZIP_NAME=${RESOURCES_URL##*/}
36 | DIR_NAME=${ZIP_NAME%.*}
37 | unzip $DIR_NAME
38 | cp $DIR_NAME/resources/models/*.onnx python/rapidocr_onnxruntime/models/
39 |
40 | cd python
41 | pip install -r requirements_ort.txt
42 | pip install pytest wheel get_pypi_latest_version openvino
43 |
44 | cd tests
45 | pytest test_ort.py
46 |
47 | GenerateWHL_PushPyPi:
48 | needs: UnitTesting
49 | runs-on: ubuntu-latest
50 |
51 | steps:
52 | - uses: actions/checkout@v4
53 |
54 | - name: Set up Python 3.10
55 | uses: actions/setup-python@v4
56 | with:
57 | python-version: '3.10'
58 | architecture: 'x64'
59 |
60 | - name: Download models
61 | run: |
62 | cd python
63 | wget $RESOURCES_URL
64 | ZIP_NAME=${RESOURCES_URL##*/}
65 | DIR_NAME=${ZIP_NAME%.*}
66 | unzip $ZIP_NAME
67 | cp $DIR_NAME/resources/models/*.onnx rapidocr_onnxruntime/models
68 |
69 | - name: Run setup_onnxruntime.py
70 | run: |
71 | cd python
72 | pip install setuptools get_pypi_latest_version wheel
73 | mkdir rapidocr_onnxruntime_t
74 | mv rapidocr_onnxruntime rapidocr_onnxruntime_t
75 | mv rapidocr_onnxruntime_t rapidocr_onnxruntime
76 | cd rapidocr_onnxruntime
77 | echo "from .rapidocr_onnxruntime.main import RapidOCR, VisRes" > __init__.py
78 |
79 | cd ..
80 | python -m pip install --upgrade pip
81 | python setup_onnxruntime.py bdist_wheel ${{ github.ref_name }}
82 | mv dist ../
83 |
84 | - name: Publish distribution 📦 to PyPI
85 | uses: pypa/gh-action-pypi-publish@v1.5.0
86 | with:
87 | password: ${{ secrets.PYPI_API_TOKEN }}
88 | packages_dir: dist/
89 |
--------------------------------------------------------------------------------
/.github/workflows/gen_whl_to_pypi_rapidocr_paddle.yml:
--------------------------------------------------------------------------------
1 | name: Push rapidocr_paddle to pypi
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | paths:
7 | - 'python/rapidocr_paddle/**'
8 | - 'docs/doc_whl_rapidocr_paddle.md'
9 | - 'python/setup_paddle.py'
10 | - '.github/workflows/gen_whl_to_pypi_rapidocr_paddle.yml'
11 | - 'python/requirements_paddle.txt'
12 | # tags:
13 | # - v*
14 |
15 | env:
16 | RESOURCES_URL: https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/paddle_models_v4.zip
17 |
18 | jobs:
19 | UnitTesting:
20 | runs-on: ubuntu-latest
21 | steps:
22 | - name: Pull latest code
23 | uses: actions/checkout@v4
24 |
25 | - name: Set up Python 3.10
26 | uses: actions/setup-python@v4
27 | with:
28 | python-version: '3.10'
29 | architecture: 'x64'
30 |
31 | - name: Display Python version
32 | run: python -c "import sys; print(sys.version)"
33 |
34 | - name: Unit testings
35 | run: |
36 | wget $RESOURCES_URL
37 | ZIP_NAME=${RESOURCES_URL##*/}
38 | DIR_NAME=${ZIP_NAME%.*}
39 | unzip $DIR_NAME
40 | cp -r models/* python/rapidocr_paddle/models/
41 | cd python
42 | pip install -r requirements_paddle.txt
43 | pip install pytest wheel get_pypi_latest_version
44 | pip install paddlepaddle==3.0.0rc0 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/
45 | cd tests
46 | pytest test_paddle.py
47 |
48 | GenerateWHL_PushPyPi:
49 | needs: UnitTesting
50 | runs-on: ubuntu-latest
51 |
52 | steps:
53 | - uses: actions/checkout@v4
54 |
55 | - name: Set up Python 3.10
56 | uses: actions/setup-python@v4
57 | with:
58 | python-version: '3.10'
59 | architecture: 'x64'
60 |
61 | - name: Download models
62 | run: |
63 | cd python
64 | wget $RESOURCES_URL
65 | ZIP_NAME=${RESOURCES_URL##*/}
66 | DIR_NAME=${ZIP_NAME%.*}
67 | unzip $ZIP_NAME
68 | cp -r models/* rapidocr_paddle/models
69 |
70 | - name: Run setup_paddle.py
71 | run: |
72 | cd python
73 | pip install setuptools get_pypi_latest_version wheel
74 |
75 | mkdir rapidocr_paddle_t
76 | mv rapidocr_paddle rapidocr_paddle_t
77 | mv rapidocr_paddle_t rapidocr_paddle
78 | cd rapidocr_paddle
79 | echo "from .rapidocr_paddle.main import RapidOCR, VisRes" > __init__.py
80 |
81 | cd ..
82 | python -m pip install --upgrade pip
83 |
84 | echo "${{ github.event.head_commit.message }}"
85 | python setup_paddle.py bdist_wheel "${{ github.event.head_commit.message }}"
86 | mv dist ../
87 |
88 | - name: Publish distribution 📦 to PyPI
89 | uses: pypa/gh-action-pypi-publish@v1.5.0
90 | with:
91 | password: ${{ secrets.RAPIDOCR_OPENVINO }}
92 | packages_dir: dist/
93 |
--------------------------------------------------------------------------------
/.github/workflows/gen_whl_to_pypi_rapidocr_torch.yml:
--------------------------------------------------------------------------------
1 | name: Push rapidocr_torch to pypi
2 |
3 | on:
4 | push:
5 | # branches: [ main ]
6 | # paths:
7 | # - 'python/rapidocr_torch/**'
8 | # - 'python/setup_torch.py'
9 | # - '.github/workflows/gen_whl_to_pypi_rapidocr_torch.yml'
10 | # - 'python/requirements_torch.txt'
11 | tags:
12 | - torch_v*
13 |
14 | env:
15 | RESOURCES_URL: https://github.com/Joker1212/RapidOCR/releases/download/v0.0.0/torch_test.zip
16 |
17 | jobs:
18 | UnitTesting:
19 | runs-on: ubuntu-latest
20 | steps:
21 | - name: Pull latest code
22 | uses: actions/checkout@v4
23 |
24 | - name: Set up Python 3.7
25 | uses: actions/setup-python@v4
26 | with:
27 | python-version: '3.7'
28 | architecture: 'x64'
29 |
30 | - name: Display Python version
31 | run: python -c "import sys; print(sys.version)"
32 |
33 | - name: Unit testings
34 | run: |
35 | wget $RESOURCES_URL
36 | ZIP_NAME=${RESOURCES_URL##*/}
37 | DIR_NAME=${ZIP_NAME%.*}
38 | unzip $DIR_NAME
39 | cp $DIR_NAME/resources/models/*.pth python/rapidocr_torch/models/
40 |
41 | cd python
42 | pip install -r requirements_torch.txt
43 | pip install pytest wheel get_pypi_latest_version
44 |
45 | cd tests
46 | pytest test_torch.py
47 |
48 | GenerateWHL_PushPyPi:
49 | needs: UnitTesting
50 | runs-on: ubuntu-latest
51 |
52 | steps:
53 | - uses: actions/checkout@v4
54 |
55 | - name: Set up Python 3.7
56 | uses: actions/setup-python@v4
57 | with:
58 | python-version: '3.7'
59 | architecture: 'x64'
60 |
61 | # - name: Set SSH Environment
62 | # env:
63 | # DEPLOY_KEYS: ${{ secrets.GEN_PYTHON_SDK }}
64 | # run: |
65 | # mkdir -p ~/.ssh/
66 | # echo "$DEPLOY_KEYS" > ~/.ssh/id_rsa
67 | # chmod 600 ~/.ssh/id_rsa
68 | # chmod 700 ~/.ssh && chmod 600 ~/.ssh/*
69 |
70 | - name: Download models
71 | run: |
72 | cd python
73 | wget $RESOURCES_URL
74 | ZIP_NAME=${RESOURCES_URL##*/}
75 | DIR_NAME=${ZIP_NAME%.*}
76 | unzip $ZIP_NAME
77 | cp $DIR_NAME/resources/models/*.pth rapidocr_torch/models/
78 |
79 | - name: Run setup_torch.py
80 | run: |
81 | cd python
82 | pip install setuptools get_pypi_latest_version wheel
83 | mkdir rapidocr_torch_t
84 | mv rapidocr_torch rapidocr_torch_t
85 | mv rapidocr_torch_t rapidocr_torch
86 | cd rapidocr_torch
87 | echo "from .rapidocr_torch.main import RapidOCR, VisRes" > __init__.py
88 |
89 | cd ..
90 | python -m pip install --upgrade pip
91 | python setup_torch.py bdist_wheel ${{ github.ref_name }}
92 | mv dist ../
93 |
94 | # - name: Publish distribution 📦 to PyPI
95 | # uses: pypa/gh-action-pypi-publish@v1.5.0
96 | # with:
97 | # password: ${{ secrets.PYPI_API_TOKEN }}
98 | # packages_dir: dist/
99 | - name: Publish distribution 📦 to Test PyPI
100 | uses: pypa/gh-action-pypi-publish@v1.5.0
101 | with:
102 | username: TEST_PYPI_API_TOKEN
103 | password: ${{ secrets.TEST_PYPI_API_TOKEN }}
104 | repository_url: https://test.pypi.org/legacy/
105 | packages_dir: dist/
106 |
--------------------------------------------------------------------------------
/.github/workflows/gen_whl_to_pypi_rapidocr_vino.yml:
--------------------------------------------------------------------------------
1 | name: Push rapidocr_openvino to pypi
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | paths:
7 | - 'python/rapidocr_openvino/**'
8 | - 'docs/doc_whl_rapidocr_vino.md'
9 | - 'python/setup_openvino.py'
10 | - '.github/workflows/gen_whl_to_pypi_rapidocr_vino.yml'
11 | - 'python/requirements_vino.txt'
12 |
13 |
14 | env:
15 | RESOURCES_URL: https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/required_for_whl_v1.3.0.zip
16 |
17 | jobs:
18 | UnitTesting:
19 | runs-on: ubuntu-latest
20 | steps:
21 | - name: Pull latest code
22 | uses: actions/checkout@v4
23 |
24 | - name: Set up Python 3.10
25 | uses: actions/setup-python@v4
26 | with:
27 | python-version: '3.10'
28 | architecture: 'x64'
29 |
30 | - name: Display Python version
31 | run: python -c "import sys; print(sys.version)"
32 |
33 | - name: Unit testings
34 | run: |
35 | wget $RESOURCES_URL
36 | ZIP_NAME=${RESOURCES_URL##*/}
37 | DIR_NAME=${ZIP_NAME%.*}
38 | unzip $DIR_NAME
39 | cp $DIR_NAME/resources/models/*.onnx python/rapidocr_openvino/models/
40 | cd python
41 | pip install -r requirements_vino.txt
42 | pip install pytest wheel get_pypi_latest_version onnxruntime
43 | cd tests
44 | pytest test_vino.py
45 |
46 | GenerateWHL_PushPyPi:
47 | needs: UnitTesting
48 | runs-on: ubuntu-latest
49 |
50 | steps:
51 | - uses: actions/checkout@v4
52 |
53 | - name: Set up Python 3.10
54 | uses: actions/setup-python@v4
55 | with:
56 | python-version: '3.10'
57 | architecture: 'x64'
58 |
59 | - name: Download models
60 | run: |
61 | cd python
62 | wget $RESOURCES_URL
63 | ZIP_NAME=${RESOURCES_URL##*/}
64 | DIR_NAME=${ZIP_NAME%.*}
65 | unzip $ZIP_NAME
66 | cp $DIR_NAME/resources/models/*.onnx rapidocr_openvino/models
67 |
68 | - name: Run setup_openvino.py
69 | run: |
70 | cd python
71 | pip install setuptools get_pypi_latest_version wheel
72 | mkdir rapidocr_openvino_t
73 | mv rapidocr_openvino rapidocr_openvino_t
74 | mv rapidocr_openvino_t rapidocr_openvino
75 | cd rapidocr_openvino
76 | echo "from .rapidocr_openvino.main import RapidOCR, VisRes" > __init__.py
77 |
78 | cd ..
79 | python -m pip install --upgrade pip
80 | python setup_openvino.py bdist_wheel "${{ github.event.head_commit.message }}"
81 | mv dist ../
82 |
83 | - name: Publish distribution 📦 to PyPI
84 | uses: pypa/gh-action-pypi-publish@v1.5.0
85 | with:
86 | password: ${{ secrets.RAPIDOCR_OPENVINO }}
87 | packages_dir: dist/
88 |
--------------------------------------------------------------------------------
/.github/workflows/gen_whl_to_pypi_rapidocr_web.yml:
--------------------------------------------------------------------------------
1 | name: Push rapidocr_web to pypi
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | paths:
7 | - 'ocrweb/rapidocr_web/**'
8 | - '!ocrweb/rapidocr_web/ocr_web.spec'
9 | - 'docs/doc_whl_rapidocr_web.md'
10 | - 'ocrweb/setup.py'
11 | - '.github/workflows/gen_whl_to_pypi_rapidocr_web.yml'
12 |
13 | jobs:
14 | GenerateWHL_PushPyPi:
15 | runs-on: ubuntu-latest
16 |
17 | steps:
18 | - uses: actions/checkout@v4
19 |
20 | - name: Set up Python 3.7
21 | uses: actions/setup-python@v4
22 | with:
23 | python-version: '3.7'
24 | architecture: 'x64'
25 |
26 | - name: Set SSH Environment
27 | env:
28 | DEPLOY_KEYS: ${{ secrets.GEN_PYTHON_SDK }}
29 | run: |
30 | mkdir -p ~/.ssh/
31 | echo "$DEPLOY_KEYS" > ~/.ssh/id_rsa
32 | chmod 600 ~/.ssh/id_rsa
33 | chmod 700 ~/.ssh && chmod 600 ~/.ssh/*
34 |
35 | - name: Run setup.py
36 | run: |
37 | cd ocrweb
38 | pip install -r requirements.txt
39 |
40 | python -m pip install --upgrade pip
41 | python setup.py bdist_wheel "${{ github.event.head_commit.message }}"
42 |
43 | - name: Publish distribution 📦 to PyPI
44 | uses: pypa/gh-action-pypi-publish@v1.5.0
45 | with:
46 | password: ${{ secrets.RAPIDOCR_OPENVINO }}
47 | packages_dir: ocrweb/dist/
48 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by .ignore support plugin (hsz.mobi)
2 | ### Python template
3 | # Byte-compiled / optimized / DLL files
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 | .pytest_cache
8 |
9 | # C extensions
10 | *.so
11 |
12 | # Distribution / packaging
13 | .Python
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | pip-wheel-metadata/
27 | share/python-wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | MANIFEST
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | # *.manifest
37 | # *.spec
38 | *.res
39 |
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 |
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .nox/
48 | .coverage
49 | .coverage.*
50 | .cache
51 | nosetests.xml
52 | coverage.xml
53 | *.cover
54 | *.py,cover
55 | .hypothesis/
56 | .pytest_cache/
57 |
58 | # Translations
59 | *.mo
60 | *.pot
61 |
62 | # Django stuff:
63 | *.log
64 | local_settings.py
65 | db.sqlite3
66 | db.sqlite3-journal
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 |
84 | # IPython
85 | profile_default/
86 | ipython_config.py
87 |
88 | # pyenv
89 | .python-version
90 |
91 | # pipenv
92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
95 | # install all needed dependencies.
96 | #Pipfile.lock
97 |
98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
99 | __pypackages__/
100 |
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 |
105 | # SageMath parsed files
106 | *.sage.py
107 |
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 |
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 |
121 | # Rope project settings
122 | .ropeproject
123 |
124 | # mkdocs documentation
125 | /site
126 |
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 |
132 | # Pyre type checker
133 | .pyre/
134 |
135 | #idea
136 | .vs
137 | .vscode
138 | .idea
139 | /images
140 | /models
141 |
142 | #models
143 | *.onnx
144 |
145 | *.ttf
146 | *.ttc
147 |
148 | long1.jpg
149 |
150 | *.bin
151 | *.mapping
152 | *.xml
153 |
154 | *.pdiparams
155 | *.pdiparams.info
156 | *.pdmodel
157 |
158 | .DS_Store
159 | *.npy
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://gitee.com/SWHL/autoflake
3 | rev: v2.1.1
4 | hooks:
5 | - id: autoflake
6 | args:
7 | [
8 | "--recursive",
9 | "--in-place",
10 | "--remove-all-unused-imports",
11 | "--ignore-init-module-imports",
12 | ]
13 | files: \.py$
14 | - repo: https://gitee.com/SWHL/black
15 | rev: 23.1.0
16 | hooks:
17 | - id: black
18 | files: \.py$
19 |
--------------------------------------------------------------------------------
/android/README.md:
--------------------------------------------------------------------------------
1 | See [RapidOcrAndroidOnnx](https://github.com/RapidAI/RapidOcrAndroidOnnx) for details.
2 |
--------------------------------------------------------------------------------
/api/README.md:
--------------------------------------------------------------------------------
1 | ### See [RapidOCRAPI](https://github.com/RapidAI/RapidOCRAPI) for details
2 |
3 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr_api/usage/)
4 |
--------------------------------------------------------------------------------
/assets/RapidOCR_LOGO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/assets/RapidOCR_LOGO.png
--------------------------------------------------------------------------------
/assets/colab-badge.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/cpp/README.md:
--------------------------------------------------------------------------------
1 | See [RapidOcrNcnn](https://github.com/RapidAI/RapidOcrNcnn) for details.
2 |
3 | See [RapidOcrOnnx](https://github.com/RapidAI/RapidOcrOnnx) for details.
4 |
--------------------------------------------------------------------------------
/docs/doc_whl_rapidocr.md:
--------------------------------------------------------------------------------
1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs)
2 |
--------------------------------------------------------------------------------
/docs/doc_whl_rapidocr_ort.md:
--------------------------------------------------------------------------------
1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr/usage/)
2 |
--------------------------------------------------------------------------------
/docs/doc_whl_rapidocr_paddle.md:
--------------------------------------------------------------------------------
1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr_paddle/usage/)
2 |
--------------------------------------------------------------------------------
/docs/doc_whl_rapidocr_vino.md:
--------------------------------------------------------------------------------
1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr/usage/)
2 |
--------------------------------------------------------------------------------
/docs/doc_whl_rapidocr_web.md:
--------------------------------------------------------------------------------
1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr_web/rapidocr_web/)
2 |
--------------------------------------------------------------------------------
/dotnet/README.md:
--------------------------------------------------------------------------------
1 | See [RapidOCRCSharp](https://github.com/RapidAI/RapidOCRCSharp) for details.
2 |
--------------------------------------------------------------------------------
/ios/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | 需要志愿者捐赠,请直接进群联系,qq群号:887298230
4 |
5 | A contributor is wanted.
6 |
--------------------------------------------------------------------------------
/jvm/README.md:
--------------------------------------------------------------------------------
1 | See [RapidOcrNcnnJvm](https://github.com/RapidAI/RapidOcrNcnnJvm) for details.
2 |
3 | See [RapidOcrOnnxJvm](https://github.com/RapidAI/RapidOcrOnnxJvm) for details.
4 |
--------------------------------------------------------------------------------
/ocrweb/README.md:
--------------------------------------------------------------------------------
1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr_web/usage/)
2 |
--------------------------------------------------------------------------------
/ocrweb/rapidocr_web/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
--------------------------------------------------------------------------------
/ocrweb/rapidocr_web/ocrweb.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import argparse
5 | from pathlib import Path
6 | from wsgiref.simple_server import make_server
7 |
8 | from flask import Flask, render_template, request
9 |
10 | try:
11 | from rapidocr_web.task import OCRWebUtils
12 | except:
13 | from task import OCRWebUtils
14 |
15 | root_dir = Path(__file__).resolve().parent
16 |
17 | app = Flask(__name__, template_folder="templates")
18 | app.config["MAX_CONTENT_LENGTH"] = 3 * 1024 * 1024
19 | processor = OCRWebUtils()
20 |
21 |
22 | @app.route("/")
23 | def index():
24 | return render_template("index.html")
25 |
26 |
27 | @app.route("/ocr", methods=["POST"])
28 | def ocr():
29 | if request.method == "POST":
30 | img_str = request.get_json().get("file", None)
31 | ocr_res = processor(img_str)
32 | return ocr_res
33 |
34 |
35 | def main():
36 | parser = argparse.ArgumentParser("rapidocr_web")
37 | parser.add_argument("-ip", "--ip", type=str, default="0.0.0.0", help="IP Address")
38 | parser.add_argument("-p", "--port", type=int, default=9003, help="IP port")
39 | args = parser.parse_args()
40 |
41 | print(f"Successfully launched and visit http://{args.ip}:{args.port} to view.")
42 | server = make_server(args.ip, args.port, app)
43 | server.serve_forever()
44 |
45 |
46 | if __name__ == "__main__":
47 | main()
48 |
--------------------------------------------------------------------------------
/ocrweb/rapidocr_web/ocrweb.spec:
--------------------------------------------------------------------------------
1 | # -*- mode: python ; coding: utf-8 -*-
2 | from pathlib import Path
3 |
4 | import rapidocr_onnxruntime
5 |
6 | block_cipher = None
7 |
8 | package_name = 'rapidocr_onnxruntime'
9 | install_dir = Path(rapidocr_onnxruntime.__file__).resolve().parent
10 |
11 | onnx_paths = list(install_dir.rglob('*.onnx'))
12 | yaml_paths = list(install_dir.rglob('*.yaml'))
13 |
14 | onnx_add_data = [(str(v.parent), f'{package_name}/{v.parent.name}')
15 | for v in onnx_paths]
16 |
17 | yaml_add_data = []
18 | for v in yaml_paths:
19 | if package_name == v.parent.name:
20 | yaml_add_data.append((str(v.parent / '*.yaml'), package_name))
21 | else:
22 | yaml_add_data.append(
23 | (str(v.parent / '*.yaml'), f'{package_name}/{v.parent.name}'))
24 |
25 | add_data = list(set(yaml_add_data + onnx_add_data))
26 |
27 |
28 | a = Analysis(
29 | ['ocrweb.py'],
30 | pathex=[],
31 | binaries=[],
32 | datas=add_data,
33 | hiddenimports=[],
34 | hookspath=[],
35 | hooksconfig={},
36 | runtime_hooks=[],
37 | excludes=[],
38 | win_no_prefer_redirects=False,
39 | win_private_assemblies=False,
40 | cipher=block_cipher,
41 | noarchive=False,
42 | )
43 | pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
44 |
45 | exe = EXE(
46 | pyz,
47 | a.scripts,
48 | [],
49 | exclude_binaries=True,
50 | name='RapidOCRWeb',
51 | debug=False,
52 | bootloader_ignore_signals=False,
53 | strip=False,
54 | upx=True,
55 | console=True,
56 | disable_windowed_traceback=False,
57 | argv_emulation=False,
58 | target_arch=None,
59 | codesign_identity=None,
60 | entitlements_file=None,
61 | icon=['./static/css/favicon.ico'],
62 | )
63 | coll = COLLECT(
64 | exe,
65 | a.binaries,
66 | a.zipfiles,
67 | a.datas,
68 | strip=False,
69 | upx=True,
70 | upx_exclude=[],
71 | name='RapidOCRWeb',
72 | )
73 |
--------------------------------------------------------------------------------
/ocrweb/rapidocr_web/static/css/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/ocrweb/rapidocr_web/static/css/favicon.ico
--------------------------------------------------------------------------------
/ocrweb/rapidocr_web/static/css/main.css:
--------------------------------------------------------------------------------
1 | body{
2 | background-color:#ebedef;
3 | min-height: 100%;
4 | margin: 0;
5 | }
6 | .btn-gen {
7 | /* position: absolute;top:540px; left: 500px; */
8 | background-color: #00a1d6;
9 | text-align: center;
10 | border-radius: 18px;
11 | margin-top: 5px;
12 | font-size: 15px;
13 | padding: 5px 10px;
14 | height: 20px;
15 | width: 120px;
16 | text-transform: uppercase;
17 | color: #fff;
18 | border:none;
19 | }
20 | .btn-gen:hover,
21 | .btn-gen:focus {
22 | border-color: #23AAEE;
23 | background-color: #23AAEE;
24 | color: white;
25 | cursor: pointer;
26 | }
27 | .area{
28 | text-align: center;
29 | height: auto;
30 | margin: auto;
31 | }
32 | .leftarea{
33 | float: left;
34 | width: 50%;
35 | height: auto;
36 | position: relative;
37 | }
38 |
39 | .rightarea{
40 | float: left;
41 | width: 50%;
42 | height: auto;
43 | }
44 | .table{
45 | width: auto;
46 | height: auto;
47 | margin: 0 auto;
48 |
49 | }
50 | .span_title{
51 | width: 98%;
52 | height: 36px;
53 | margin-top: 4px;
54 | line-height: 32px;
55 | background-color: #00a1d6;
56 | border: 1px solid #00a1d6;
57 | border-radius: 20px;
58 | color: #fff;
59 | display: inline-block;
60 | text-align: center;
61 | font-size: 22px;
62 | transition: .3s;
63 | box-sizing: border-box;
64 | }
65 | .uplodNote{
66 | font-size: 10px;
67 | color:#A1A1A1;
68 | }
69 |
70 | a{
71 | text-decoration:none;
72 | }
73 |
74 | /* wrapper */
75 | .leftarea>#wrapper {
76 | position: absolute; top: 45px; left:0px;
77 | width: 98%;
78 | height: 100%;
79 | background:
80 | linear-gradient(#1a98ca, #1a98ca),
81 | linear-gradient(90deg, #ffffff33 1px,transparent 0,transparent 19px),
82 | linear-gradient( #ffffff33 1px,transparent 0,transparent 19px),
83 | linear-gradient(transparent, #1a98ca);
84 | background-size:100% 1.5%, 10% 100%,100% 8%, 100% 100%;
85 | background-repeat:no-repeat, repeat, repeat, no-repeat;
86 | background-position: 0% 100%, 0 0, 0 0, 0 0;
87 | /* 初始位置 */
88 | clip-path: polygon(0% 0%, 100% 0%, 100% 1.5%, 0% 1.5%);
89 | /* 添加动画效果 */
90 | animation: move 1s infinite linear;
91 | }
92 |
93 | @keyframes move{
94 | to{
95 | background-position: 0 100%,0 0, 0 0, 0 0;
96 | /* 终止位置 */
97 | clip-path: polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%);
98 | }
99 | }
--------------------------------------------------------------------------------
/ocrweb/rapidocr_web/task.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import base64
5 | import copy
6 | import json
7 | from collections import namedtuple
8 | from functools import reduce
9 | from typing import List, Tuple, Union
10 |
11 | import cv2
12 | import numpy as np
13 | from rapidocr_onnxruntime import RapidOCR
14 |
15 |
16 | class OCRWebUtils:
17 | def __init__(self) -> None:
18 | self.ocr = RapidOCR()
19 | self.WebReturn = namedtuple(
20 | "WebReturn",
21 | ["image", "total_elapse", "elapse_part", "rec_res", "det_boxes"],
22 | )
23 |
24 | def __call__(self, img_content: str) -> namedtuple:
25 | if img_content is None:
26 | raise ValueError("img is None")
27 | img = self.prepare_img(img_content)
28 | ocr_res, elapse = self.ocr(img)
29 | return self.get_web_result(img, ocr_res, elapse)
30 |
31 | def prepare_img(self, img_str: str) -> np.ndarray:
32 | img_str = img_str.split(",")[1]
33 | image = base64.b64decode(img_str + "=" * (-len(img_str) % 4))
34 | nparr = np.frombuffer(image, np.uint8)
35 | image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
36 | if image.ndim == 2:
37 | image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
38 | return image
39 |
40 | def get_web_result(
41 | self, img: np.ndarray, ocr_res: List, elapse: List
42 | ) -> Tuple[Union[str, List, str, str]]:
43 | if ocr_res is None:
44 | total_elapse, elapse_part = 0, ""
45 | img_str = self.img_to_base64(img)
46 | rec_res = json.dumps([], indent=2, ensure_ascii=False)
47 | boxes = ""
48 | else:
49 | boxes, txts, scores = list(zip(*ocr_res))
50 | scores = [f"{v:.4f}" for v in scores]
51 | rec_res = list(zip(range(len(txts)), txts, scores))
52 | rec_res = json.dumps(rec_res, indent=2, ensure_ascii=False)
53 |
54 | det_im = self.draw_text_det_res(np.array(boxes), img)
55 | img_str = self.img_to_base64(det_im)
56 |
57 | total_elapse = reduce(lambda x, y: float(x) + float(y), elapse)
58 | elapse_part = ",".join([f"{x:.4f}" for x in elapse])
59 |
60 | web_return = self.WebReturn(
61 | image=img_str,
62 | total_elapse=f"{total_elapse:.4f}",
63 | elapse_part=elapse_part,
64 | rec_res=rec_res,
65 | det_boxes=boxes,
66 | )
67 | return json.dumps(web_return._asdict())
68 |
69 | @staticmethod
70 | def img_to_base64(img) -> str:
71 | img = cv2.imencode(".png", img)[1]
72 | img_str = str(base64.b64encode(img))[2:-1]
73 | return img_str
74 |
75 | @staticmethod
76 | def draw_text_det_res(dt_boxes: np.ndarray, raw_im: np.ndarray) -> np.ndarray:
77 | src_im = copy.deepcopy(raw_im)
78 | for i, box in enumerate(dt_boxes):
79 | box = np.array(box).astype(np.int32).reshape(-1, 2)
80 | cv2.polylines(src_im, [box], True, color=(0, 0, 255), thickness=1)
81 | cv2.putText(
82 | src_im,
83 | str(i),
84 | (int(box[0][0]), int(box[0][1])),
85 | cv2.FONT_HERSHEY_SIMPLEX,
86 | 0.5,
87 | (0, 0, 0),
88 | 2,
89 | )
90 | return src_im
91 |
--------------------------------------------------------------------------------
/ocrweb/requirements.txt:
--------------------------------------------------------------------------------
1 | Pillow<=10.0.0
2 | requests
3 | Flask>=2.1.0, <=3.0.0
4 | rapidocr_onnxruntime>=1.3.0,<=2.0.0
5 | get_pypi_latest_version
6 | wheel
7 |
--------------------------------------------------------------------------------
/ocrweb/setup.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import sys
5 | from pathlib import Path
6 |
7 | import setuptools
8 | from get_pypi_latest_version import GetPyPiLatestVersion
9 |
10 |
11 | def get_readme():
12 | root_dir = Path(__file__).resolve().parent.parent
13 | readme_path = str(root_dir / "docs" / "doc_whl_rapidocr_web.md")
14 | with open(readme_path, "r", encoding="utf-8") as f:
15 | readme = f.read()
16 | return readme
17 |
18 |
19 | MODULE_NAME = "rapidocr_web"
20 |
21 | obtainer = GetPyPiLatestVersion()
22 | latest_version = obtainer(MODULE_NAME)
23 | VERSION_NUM = obtainer.version_add_one(latest_version)
24 |
25 | # 优先提取commit message中的语义化版本号,如无,则自动加1
26 | if len(sys.argv) > 2:
27 | match_str = " ".join(sys.argv[2:])
28 | matched_versions = obtainer.extract_version(match_str)
29 | if matched_versions:
30 | VERSION_NUM = matched_versions
31 | sys.argv = sys.argv[:2]
32 |
33 | setuptools.setup(
34 | name=MODULE_NAME,
35 | version=VERSION_NUM,
36 | platforms="Any",
37 | description="A cross platform OCR Library based on OnnxRuntime.",
38 | long_description=get_readme(),
39 | long_description_content_type="text/markdown",
40 | author="SWHL",
41 | author_email="liekkaskono@163.com",
42 | url="https://github.com/RapidAI/RapidOCR",
43 | download_url="https://github.com/RapidAI/RapidOCR.git",
44 | license="Apache-2.0",
45 | include_package_data=True,
46 | install_requires=["requests", "Flask>=2.1.0", "rapidocr_onnxruntime"],
47 | packages=[
48 | MODULE_NAME,
49 | f"{MODULE_NAME}.static.css",
50 | f"{MODULE_NAME}.static.js",
51 | f"{MODULE_NAME}.templates",
52 | ],
53 | package_data={"": ["*.ico", "*.css", "*.js", "*.html"]},
54 | keywords=[
55 | "ocr,text_detection,text_recognition,db,onnxruntime,paddleocr,openvino,rapidocr"
56 | ],
57 | classifiers=[
58 | "Programming Language :: Python :: 3.6",
59 | "Programming Language :: Python :: 3.7",
60 | "Programming Language :: Python :: 3.8",
61 | "Programming Language :: Python :: 3.9",
62 | "Programming Language :: Python :: 3.10",
63 | "Programming Language :: Python :: 3.11",
64 | "Programming Language :: Python :: 3.12",
65 | ],
66 | python_requires=">=3.6,<3.13",
67 | entry_points={
68 | "console_scripts": [
69 | f"{MODULE_NAME}={MODULE_NAME}.ocrweb:main",
70 | ],
71 | },
72 | )
73 |
--------------------------------------------------------------------------------
/ocrweb_multi/README.md:
--------------------------------------------------------------------------------
1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr_web/ocrweb_multi/)
2 |
--------------------------------------------------------------------------------
/ocrweb_multi/assets/ocr_web_multi.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/ocrweb_multi/assets/ocr_web_multi.jpg
--------------------------------------------------------------------------------
/ocrweb_multi/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 |
4 | print("Compile ocrweb")
5 | os.system("pyinstaller -y main.spec")
6 |
7 | print("Compile wrapper")
8 | os.system("windres .\wrapper.rc -O coff -o wrapper.res")
9 | os.system("gcc .\wrapper.c wrapper.res -o dist/ocrweb.exe")
10 |
11 | print("Copy config.yaml")
12 | shutil.copy2("config.yaml", "dist/config.yaml")
13 |
14 | print("Copy models")
15 | shutil.copytree("models", "dist/models", dirs_exist_ok=True)
16 | os.remove("dist/models/.gitkeep")
17 |
18 | print("Pack to ocrweb.zip")
19 | shutil.make_archive("ocrweb", "zip", "dist")
20 |
21 | print("Done")
22 |
--------------------------------------------------------------------------------
/ocrweb_multi/config.yaml:
--------------------------------------------------------------------------------
1 | server:
2 | host: 127.0.0.1
3 | port: 8001
4 | # OCR接口Token, 为null时将跳过Token验证
5 | token: null
6 |
7 | global:
8 | use_cuda: false
9 | verbose: false
10 | cuda_provider:
11 | device_id: 0
12 | arena_extend_strategy: kNextPowerOfTwo
13 | cudnn_conv_algo_search: EXHAUSTIVE
14 | do_copy_in_default_stream: true
15 |
16 | # 模型配置
17 | models:
18 | # 位置检测模型
19 | detect:
20 | det_en:
21 | path: models/en_PP-OCRv3_det_infer.onnx
22 | config: &detectConfig
23 | pre_process:
24 | - class: DetResizeForTest
25 | limit_side_len: 736
26 | limit_type: min
27 | - class: NormalizeImage
28 | std: [0.229, 0.224, 0.225]
29 | mean: [0.485, 0.456, 0.406]
30 | # 1 / 255
31 | scale: 0.00392156862745098
32 | order: hwc
33 | - class: ToCHWImage
34 | - class: KeepKeys
35 | keep_keys: ["image", "shape"]
36 | post_process:
37 | thresh: 0.3
38 | box_thresh: 0.5
39 | max_candidates: 1000
40 | unclip_ratio: 1.6
41 | use_dilation: true
42 | det_ch:
43 | path: models/ch_PP-OCRv3_det_infer.onnx
44 | config: *detectConfig
45 | det_ml:
46 | path: models/ch_PP-OCRv3_det_infer.onnx
47 | config: *detectConfig
48 | # 方向检测模型
49 | classify:
50 | cls_ml:
51 | path: models/ch_ppocr_mobile_v2.0_cls_infer.meta.onnx
52 | config:
53 | batch_size: 8
54 | score_thresh: 0.9
55 | # 文字识别模型
56 | recognize:
57 | rec_ch:
58 | path: models/ch_PP-OCRv3_rec_infer.meta.onnx
59 | config: &recognizeConfig
60 | batch_size: 8
61 | rec_cht:
62 | path: models/chinese_cht_PP-OCRv3_rec_infer.meta.onnx
63 | config: *recognizeConfig
64 | rec_en:
65 | path: models/en_PP-OCRv3_rec_infer.meta.onnx
66 | config: *recognizeConfig
67 | rec_ja:
68 | path: models/japan_PP-OCRv3_rec_infer.meta.onnx
69 | config: *recognizeConfig
70 |
71 | # 多语言配置
72 | languages:
73 | ch:
74 | name: 中文
75 | models:
76 | detect: det_ch
77 | classify: cls_ml
78 | recognize: rec_ch
79 | config: &languageConfig
80 | text_score: 0.5
81 | use_angle_cls: true
82 | verbose: false
83 | min_height: 30
84 | cht:
85 | name: 繁体中文
86 | models:
87 | detect: det_ch
88 | classify: cls_ml
89 | recognize: rec_cht
90 | config: *languageConfig
91 | ja:
92 | name: 日文
93 | models:
94 | detect: det_ch
95 | classify: cls_ml
96 | recognize: rec_ja
97 | config: *languageConfig
98 | en:
99 | name: 英文
100 | models:
101 | detect: det_en
102 | classify: cls_ml
103 | recognize: rec_en
104 | config: *languageConfig
105 |
--------------------------------------------------------------------------------
/ocrweb_multi/main.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import logging
5 | import cv2
6 | import numpy as np
7 | from flask import Flask, send_file, request, make_response
8 | from waitress import serve
9 |
10 |
11 | from rapidocr.main import detect_recognize
12 | from utils.config import conf
13 | from utils.utils import tojson, parse_bool
14 |
15 | app = Flask(__name__)
16 | log = logging.getLogger("app")
17 | # 设置上传文件大小
18 | app.config["MAX_CONTENT_LENGTH"] = 3 * 1024 * 1024
19 |
20 |
21 | @app.route("/")
22 | def index():
23 | return send_file("static/index.html")
24 |
25 |
26 | def json_response(data, status=200):
27 | return make_response(tojson(data), status, {"content-type": "application/json"})
28 |
29 |
30 | @app.route("/lang")
31 | def get_languages():
32 | """返回可用语言列表"""
33 | data = [
34 | {"code": key, "name": val["name"]} for key, val in conf["languages"].items()
35 | ]
36 | result = {"msg": "OK", "data": data}
37 | log.info("Send langs: %s", data)
38 | return json_response(result)
39 |
40 |
41 | @app.route("/ocr", methods=["POST", "GET"])
42 | def ocr():
43 | """执行文字识别"""
44 | if conf["server"].get("token"):
45 | if request.values.get("token") != conf["server"]["token"]:
46 | return json_response({"msg": "invalid token"}, status=403)
47 |
48 | lang = request.values.get("lang") or "ch"
49 | detect = parse_bool(request.values.get("detect") or "true")
50 | classify = parse_bool(request.values.get("classify") or "true")
51 |
52 | image_file = request.files.get("image")
53 | if not image_file:
54 | return json_response({"msg": "no image"}, 400)
55 | nparr = np.frombuffer(image_file.stream.read(), np.uint8)
56 | image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
57 | log.info(
58 | "Input: image %s, lang=%s, detect=%s, classify=%s",
59 | image.shape,
60 | lang,
61 | detect,
62 | classify,
63 | )
64 | if image.ndim == 2:
65 | image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
66 | result = detect_recognize(image, lang=lang, detect=detect, classify=classify)
67 | log.info("OCR Done %s %s", result["ts"], len(result["results"]))
68 | return json_response({"msg": "OK", "data": result})
69 |
70 |
71 | if __name__ == "__main__":
72 | logging.basicConfig(level="INFO")
73 | logging.getLogger("waitress").setLevel(logging.INFO)
74 | if parse_bool(conf.get("debug", "0")):
75 | # Debug
76 | app.run(host=conf["server"]["host"], port=conf["server"]["port"], debug=True)
77 | else:
78 | # Deploy with waitress
79 | serve(app, host=conf["server"]["host"], port=conf["server"]["port"])
80 |
--------------------------------------------------------------------------------
/ocrweb_multi/main.spec:
--------------------------------------------------------------------------------
1 | # -*- mode: python ; coding: utf-8 -*-
2 |
3 |
4 | block_cipher = None
5 |
6 |
7 | a = Analysis(
8 | ['main.py'],
9 | pathex=[],
10 | binaries=[],
11 | datas=[
12 | ('static', 'static'),
13 | ],
14 | hiddenimports=[],
15 | hookspath=[],
16 | hooksconfig={},
17 | runtime_hooks=[],
18 | excludes=[],
19 | win_no_prefer_redirects=False,
20 | win_private_assemblies=False,
21 | cipher=block_cipher,
22 | noarchive=False,
23 | )
24 | pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
25 |
26 | exe = EXE(
27 | pyz,
28 | a.scripts,
29 | [],
30 | exclude_binaries=True,
31 | name='main',
32 | debug=False,
33 | bootloader_ignore_signals=False,
34 | strip=False,
35 | upx=True,
36 | console=True,
37 | disable_windowed_traceback=False,
38 | argv_emulation=False,
39 | target_arch=None,
40 | codesign_identity=None,
41 | entitlements_file=None,
42 | )
43 | coll = COLLECT(
44 | exe,
45 | a.binaries,
46 | a.zipfiles,
47 | a.datas,
48 | strip=False,
49 | upx=True,
50 | upx_exclude=[],
51 | name='ocrweb',
52 | )
53 |
--------------------------------------------------------------------------------
/ocrweb_multi/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/ocrweb_multi/models/.gitkeep
--------------------------------------------------------------------------------
/ocrweb_multi/rapidocr/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/ocrweb_multi/rapidocr/__init__.py
--------------------------------------------------------------------------------
/ocrweb_multi/rapidocr/detect.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # -*- encoding: utf-8 -*-
15 | # @Author: SWHL
16 | # @Contact: liekkaskono@163.com
17 |
18 | import numpy as np
19 |
20 | from utils.utils import OrtInferSession
21 | from .detect_process import DBPostProcess, create_operators, transform
22 |
23 |
24 | class TextDetector:
25 | def __init__(self, path, config):
26 | self.preprocess_op = create_operators(config["pre_process"])
27 | self.postprocess_op = DBPostProcess(**config["post_process"])
28 |
29 | session_instance = OrtInferSession(path)
30 | self.session = session_instance.session
31 | self.input_name = session_instance.get_input_name()
32 |
33 | def __call__(self, img):
34 | if img is None:
35 | raise ValueError("img is None")
36 |
37 | ori_im_shape = img.shape[:2]
38 |
39 | data = {"image": img}
40 | data = transform(data, self.preprocess_op)
41 | img, shape_list = data
42 | if img is None:
43 | return None, 0
44 |
45 | img = np.expand_dims(img, axis=0).astype(np.float32)
46 | shape_list = np.expand_dims(shape_list, axis=0)
47 |
48 | preds = self.session.run(None, {self.input_name: img})
49 |
50 | post_result = self.postprocess_op(preds[0], shape_list)
51 |
52 | dt_boxes = post_result[0]["points"]
53 | dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im_shape)
54 | return dt_boxes
55 |
56 | def order_points_clockwise(self, pts):
57 | """
58 | reference from:
59 | https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
60 | sort the points based on their x-coordinates
61 | """
62 | xSorted = pts[np.argsort(pts[:, 0]), :]
63 |
64 | # grab the left-most and right-most points from the sorted
65 | # x-roodinate points
66 | leftMost = xSorted[:2, :]
67 | rightMost = xSorted[2:, :]
68 |
69 | # now, sort the left-most coordinates according to their
70 | # y-coordinates so we can grab the top-left and bottom-left
71 | # points, respectively
72 | leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
73 | (tl, bl) = leftMost
74 |
75 | rightMost = rightMost[np.argsort(rightMost[:, 1]), :]
76 | (tr, br) = rightMost
77 |
78 | rect = np.array([tl, tr, br, bl], dtype="float32")
79 | return rect
80 |
81 | def clip_det_res(self, points, img_height, img_width):
82 | for pno in range(points.shape[0]):
83 | points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))
84 | points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1))
85 | return points
86 |
87 | def filter_tag_det_res(self, dt_boxes, image_shape):
88 | """对检测结果进行过滤"""
89 | img_height, img_width = image_shape[:2]
90 | dt_boxes_new = []
91 | for box in dt_boxes:
92 | box = self.order_points_clockwise(box)
93 | box = self.clip_det_res(box, img_height, img_width)
94 | rect_width = int(np.linalg.norm(box[0] - box[1]))
95 | rect_height = int(np.linalg.norm(box[0] - box[3]))
96 | if rect_width <= 3 or rect_height <= 3:
97 | continue
98 | dt_boxes_new.append(box)
99 | return dt_boxes_new
100 |
--------------------------------------------------------------------------------
/ocrweb_multi/rapidocr/main.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import copy
5 | from functools import lru_cache
6 | from pathlib import Path
7 |
8 | import numpy as np
9 | import cv2
10 |
11 | from utils.config import conf
12 | from rapidocr.rapid_ocr_api import RapidOCR
13 |
14 |
15 | @lru_cache(maxsize=None)
16 | def load_language_model(lang="ch"):
17 | models = conf["languages"][lang]
18 | print("model", models)
19 | return RapidOCR(models)
20 |
21 |
22 | def detect_recognize(image, lang="ch", detect=True, classify=True):
23 | model = load_language_model(lang)
24 | results, ts = model(image, detect=detect, classify=classify)
25 | ts["total"] = sum(ts.values())
26 | return {"ts": ts, "results": results}
27 |
28 |
29 | def check_and_read_gif(img_path):
30 | if Path(img_path).suffix.lower() == "gif":
31 | gif = cv2.VideoCapture(img_path)
32 | ret, frame = gif.read()
33 | if not ret:
34 | print("Cannot read {}. This gif image maybe corrupted.")
35 | return None, False
36 | if len(frame.shape) == 2 or frame.shape[-1] == 1:
37 | frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
38 | imgvalue = frame[:, :, ::-1]
39 | return imgvalue, True
40 | return None, False
41 |
42 |
43 | def draw_text_det_res(dt_boxes, raw_im):
44 | src_im = copy.deepcopy(raw_im)
45 | for i, box in enumerate(dt_boxes):
46 | box = np.array(box).astype(np.int32).reshape(-1, 2)
47 | cv2.polylines(src_im, [box], True, color=(0, 0, 255), thickness=1)
48 | cv2.putText(
49 | src_im,
50 | str(i),
51 | (int(box[0][0]), int(box[0][1])),
52 | cv2.FONT_HERSHEY_SIMPLEX,
53 | 0.5,
54 | (0, 0, 0),
55 | 2,
56 | )
57 | return src_im
58 |
--------------------------------------------------------------------------------
/ocrweb_multi/requirements.txt:
--------------------------------------------------------------------------------
1 | onnxruntime>=1.7.0
2 | opencv-python-headless==4.5.4.60
3 | six>=1.15.0
4 | pyclipper>=1.2.1
5 | numpy>=1.19.1
6 | Shapely>=1.7.1
7 | Flask>=2.1.2
8 | PyYAML
9 | waitress
10 |
--------------------------------------------------------------------------------
/ocrweb_multi/static/css/main.css:
--------------------------------------------------------------------------------
1 | html {
2 | height: 100%;
3 | margin: 0;
4 | }
5 |
6 | body {
7 | background-color: #ebedef;
8 | min-height: 100%;
9 | margin: 0;
10 | }
11 |
12 | .btn-gen {
13 | background-color: #00a1d6;
14 | text-align: center;
15 | border-radius: 18px;
16 | margin: 0 5px 0 5px;
17 | font-size: 15px;
18 | padding: 5px 10px;
19 | height: 20px;
20 | min-width: 120px;
21 | text-transform: uppercase;
22 | color: #fff;
23 | border: none;
24 | }
25 |
26 | .btn-gen:hover,
27 | .btn-gen:focus {
28 | border-color: #23AAEE;
29 | background-color: #23AAEE;
30 | color: white;
31 | cursor: pointer;
32 | }
33 |
34 | .row {
35 | margin: 15px;
36 | }
37 |
38 | .small {
39 | font-size: 0.8em;
40 | }
41 |
42 | .verysmall {
43 | font-size: 0.5em;
44 | }
45 |
46 | .area {
47 | text-align: center;
48 | height: auto;
49 | margin: auto;
50 | }
51 |
52 | .leftarea {
53 | float: left;
54 | width: 50%;
55 | height: auto;
56 | position: relative;
57 | }
58 |
59 | .rightarea {
60 | float: left;
61 | width: 50%;
62 | height: auto;
63 | overflow-y: auto;
64 | }
65 |
66 | .table {
67 | width: auto;
68 | height: auto;
69 | margin: 0 auto;
70 |
71 | }
72 |
73 | .span_title {
74 | width: 98%;
75 | height: 36px;
76 | margin-top: 4px;
77 | line-height: 32px;
78 | background-color: #00a1d6;
79 | border: 1px solid #00a1d6;
80 | border-radius: 20px;
81 | color: #fff;
82 | display: inline-block;
83 | text-align: center;
84 | font-size: 22px;
85 | transition: .3s;
86 | box-sizing: border-box;
87 | cursor: default;
88 | }
89 |
90 | .uplodNote {
91 | font-size: 10px;
92 | color: #A1A1A1;
93 | }
94 |
95 | a {
96 | text-decoration: none;
97 | }
98 |
99 | #input-hint {
100 | margin: auto;
101 | cursor: pointer;
102 |
103 | }
104 |
105 | #result_view {
106 | position: relative;
107 | width: 95%;
108 | margin: auto;
109 | }
110 |
111 | #result_view canvas {
112 | width: 100%;
113 | height: 100%;
114 | }
115 |
116 | /* wrapper */
117 | #wrapper {
118 | position: absolute;
119 | top: 0;
120 | left: 0;
121 | width: 100%;
122 | height: 100%;
123 | background:
124 | linear-gradient(#1a98ca, #1a98ca),
125 | linear-gradient(90deg, #ffffff33 1px, transparent 0, transparent 19px),
126 | linear-gradient(#ffffff33 1px, transparent 0, transparent 19px),
127 | linear-gradient(transparent, #1a98ca);
128 | background-size: 100% 1.5%, 10% 100%, 100% 8%, 100% 100%;
129 | background-repeat: no-repeat, repeat, repeat, no-repeat;
130 | background-position: 0% 100%, 0 0, 0 0, 0 0;
131 | /* 初始位置 */
132 | clip-path: polygon(0% 0%, 100% 0%, 100% 1.5%, 0% 1.5%);
133 | /* 添加动画效果 */
134 | animation: move 1s infinite linear;
135 | }
136 |
137 | @keyframes move {
138 | to {
139 | background-position: 0 100%, 0 0, 0 0, 0 0;
140 | /* 终止位置 */
141 | clip-path: polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%);
142 | }
143 | }
--------------------------------------------------------------------------------
/ocrweb_multi/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/ocrweb_multi/static/favicon.ico
--------------------------------------------------------------------------------
/ocrweb_multi/static/hint.svg:
--------------------------------------------------------------------------------
1 |
3 | 未选择图片
4 |
--------------------------------------------------------------------------------
/ocrweb_multi/utils/config.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | import sys
3 | from pathlib import Path
4 | import yaml
5 |
6 | root_dir = Path(__file__).parent.parent
7 |
8 |
9 | def get_resource_path(name: str):
10 | """依次检查资源文件的多个可能路径, 返回首个存在的路径"""
11 | for path in [
12 | # wrapper.exe 所在目录
13 | Path(root_dir.parent, name),
14 | # main.exe 所在目录 / main.py 所在目录
15 | Path(root_dir, name),
16 | # main.exe 所在目录
17 | Path(sys.argv[0]).parent / name,
18 | # 工作目录
19 | Path(name),
20 | ]:
21 | if path.exists():
22 | print("Loaded:", path)
23 | return path
24 | raise FileNotFoundError(name)
25 |
26 |
27 | conf = yaml.safe_load(get_resource_path("config.yaml").read_text(encoding="utf-8"))
28 |
--------------------------------------------------------------------------------
/ocrweb_multi/utils/utils.py:
--------------------------------------------------------------------------------
1 | import json
2 | import time
3 | import warnings
4 |
5 | from onnxruntime import (
6 | get_available_providers,
7 | get_device,
8 | SessionOptions,
9 | InferenceSession,
10 | )
11 | from utils.config import conf, get_resource_path
12 |
13 |
14 | def parse_bool(val):
15 | if not isinstance(val, str):
16 | return bool(val)
17 | return val.lower() in ("1", "true", "yes")
18 |
19 |
20 | def default(obj):
21 | if hasattr(obj, "tolist"):
22 | return obj.tolist()
23 | return obj
24 |
25 |
26 | def tojson(obj, **kws):
27 | return json.dumps(obj, default=default, ensure_ascii=False, **kws) + "\n"
28 |
29 |
30 | class OrtInferSession:
31 | def __init__(self, model_path):
32 | ort_conf = conf["global"]
33 | sess_opt = SessionOptions()
34 | sess_opt.log_severity_level = 4
35 | sess_opt.enable_cpu_mem_arena = False
36 |
37 | cuda_ep = "CUDAExecutionProvider"
38 | cpu_ep = "CPUExecutionProvider"
39 |
40 | providers = []
41 | if (
42 | ort_conf["use_cuda"]
43 | and get_device() == "GPU"
44 | and cuda_ep in get_available_providers()
45 | ):
46 | providers = [(cuda_ep, ort_conf[cuda_ep])]
47 |
48 | providers.append(cpu_ep)
49 |
50 | self.session = InferenceSession(
51 | str(get_resource_path(model_path)),
52 | sess_options=sess_opt,
53 | providers=providers,
54 | )
55 |
56 | if ort_conf["use_cuda"] and cuda_ep not in self.session.get_providers():
57 | warnings.warn(
58 | f"{cuda_ep} is not avaiable for current env, the inference part is automatically shifted to be executed under {cpu_ep}.\n"
59 | "Please ensure the installed onnxruntime-gpu version matches your cuda and cudnn version, "
60 | "you can check their relations from the offical web site: "
61 | "https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html",
62 | RuntimeWarning,
63 | )
64 |
65 | def get_input_name(self, input_idx=0):
66 | return self.session.get_inputs()[input_idx].name
67 |
68 | def get_output_name(self, output_idx=0):
69 | return self.session.get_outputs()[output_idx].name
70 |
71 |
72 | class Ticker:
73 | def __init__(self, reset=True) -> None:
74 | self.ts = time.perf_counter()
75 | self.reset = reset
76 | self.maps = {}
77 |
78 | def tick(self, name, reset=None):
79 | ts = time.perf_counter()
80 | if reset is None:
81 | reset = self.reset
82 | dt = ts - self.ts
83 | if reset:
84 | self.ts = ts
85 | self.maps[name] = dt
86 | return dt
87 |
--------------------------------------------------------------------------------
/ocrweb_multi/wrapper.c:
--------------------------------------------------------------------------------
1 | /*
2 | 针对Pyinstaller目录下文件过多的问题, 使用外部exe+system调用的方式实现资源文件/依赖库分离
3 | */
4 | #include
5 | #include
6 |
7 | void combine(char *destination, const char *path1, const char *path2)
8 | {
9 | if (path1 == NULL && path2 == NULL)
10 | {
11 | strcpy(destination, "");
12 | }
13 | else if (path2 == NULL || strlen(path2) == 0)
14 | {
15 | strcpy(destination, path1);
16 | }
17 | else if (path1 == NULL || strlen(path1) == 0)
18 | {
19 | strcpy(destination, path2);
20 | }
21 | else
22 | {
23 | strcpy(destination, path1);
24 |
25 | size_t idx = 0, sepIdx = 0;
26 | size_t size1 = strlen(path1);
27 | while (idx < size1)
28 | {
29 | idx++;
30 | if (destination[idx] == '\\' || destination[idx] == '/')
31 | {
32 | sepIdx = idx;
33 | }
34 | }
35 | // Trim destination: delete from last separator to end.
36 | destination[sepIdx + 1] = '\0';
37 | strcat(destination, path2);
38 | }
39 | }
40 |
41 | void main()
42 | {
43 | // Set title
44 | system("title Rapid OCR Server");
45 | // Get wrapper exe path
46 | TCHAR path[MAX_PATH];
47 | GetModuleFileName(NULL, path, MAX_PATH);
48 |
49 | TCHAR exe_path[MAX_PATH];
50 | // Get real exe path from wrapper exe path
51 | combine(exe_path, path, "ocrweb\\main.exe");
52 | printf("Run real exe: %s\n", exe_path);
53 | // Run real exe
54 | system(exe_path);
55 | }
56 |
--------------------------------------------------------------------------------
/ocrweb_multi/wrapper.rc:
--------------------------------------------------------------------------------
1 | id ICON "static/favicon.ico"
2 |
--------------------------------------------------------------------------------
/python/README.md:
--------------------------------------------------------------------------------
1 | ### See [Documentation](https://rapidai.github.io/RapidOCRDocs/install_usage/rapidocr/install/)
2 |
--------------------------------------------------------------------------------
/python/demo.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from rapidocr import EngineType, ModelType, OCRVersion, RapidOCR
5 |
6 | engine = RapidOCR(
7 | params={
8 | "Rec.ocr_version": OCRVersion.PPOCRV5,
9 | "Rec.engine_type": EngineType.PADDLE,
10 | "Rec.model_type": ModelType.MOBILE,
11 | }
12 | )
13 |
14 | img_url = "https://img1.baidu.com/it/u=3619974146,1266987475&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=516"
15 | result = engine(img_url)
16 | print(result)
17 |
18 | result.vis("vis_result.jpg")
19 |
--------------------------------------------------------------------------------
/python/rapidocr/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import RapidOCR
5 | from .utils import LoadImageError, VisRes
6 | from .utils.typings import EngineType, LangCls, LangDet, LangRec, ModelType, OCRVersion
7 |
--------------------------------------------------------------------------------
/python/rapidocr/cal_rec_boxes/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import CalRecBoxes
5 |
--------------------------------------------------------------------------------
/python/rapidocr/ch_ppocr_cls/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import TextClassifier
5 | from .utils import TextClsOutput
6 |
--------------------------------------------------------------------------------
/python/rapidocr/ch_ppocr_cls/main.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import copy
15 | import math
16 | import time
17 | from typing import Any, Dict, List, Union
18 |
19 | import cv2
20 | import numpy as np
21 |
22 | from rapidocr.inference_engine.base import get_engine
23 |
24 | from .utils import ClsPostProcess, TextClsOutput
25 |
26 |
27 | class TextClassifier:
28 | def __init__(self, cfg: Dict[str, Any]):
29 | self.cls_image_shape = cfg["cls_image_shape"]
30 | self.cls_batch_num = cfg["cls_batch_num"]
31 | self.cls_thresh = cfg["cls_thresh"]
32 | self.postprocess_op = ClsPostProcess(cfg["label_list"])
33 |
34 | self.session = get_engine(cfg.engine_type)(cfg)
35 |
36 | def __call__(self, img_list: Union[np.ndarray, List[np.ndarray]]) -> TextClsOutput:
37 | if isinstance(img_list, np.ndarray):
38 | img_list = [img_list]
39 |
40 | img_list = copy.deepcopy(img_list)
41 |
42 | # Calculate the aspect ratio of all text bars
43 | width_list = [img.shape[1] / float(img.shape[0]) for img in img_list]
44 |
45 | # Sorting can speed up the cls process
46 | indices = np.argsort(np.array(width_list))
47 |
48 | img_num = len(img_list)
49 | cls_res = [("", 0.0)] * img_num
50 | batch_num = self.cls_batch_num
51 | elapse = 0
52 | for beg_img_no in range(0, img_num, batch_num):
53 | end_img_no = min(img_num, beg_img_no + batch_num)
54 |
55 | norm_img_batch = []
56 | for ino in range(beg_img_no, end_img_no):
57 | norm_img = self.resize_norm_img(img_list[indices[ino]])
58 | norm_img = norm_img[np.newaxis, :]
59 | norm_img_batch.append(norm_img)
60 | norm_img_batch = np.concatenate(norm_img_batch).astype(np.float32)
61 |
62 | starttime = time.time()
63 | prob_out = self.session(norm_img_batch)
64 | cls_result = self.postprocess_op(prob_out)
65 | elapse += time.time() - starttime
66 |
67 | for rno, (label, score) in enumerate(cls_result):
68 | cls_res[indices[beg_img_no + rno]] = (label, score)
69 | if "180" in label and score > self.cls_thresh:
70 | img_list[indices[beg_img_no + rno]] = cv2.rotate(
71 | img_list[indices[beg_img_no + rno]], 1
72 | )
73 | return TextClsOutput(img_list=img_list, cls_res=cls_res, elapse=elapse)
74 |
75 | def resize_norm_img(self, img: np.ndarray) -> np.ndarray:
76 | img_c, img_h, img_w = self.cls_image_shape
77 | h, w = img.shape[:2]
78 | ratio = w / float(h)
79 | if math.ceil(img_h * ratio) > img_w:
80 | resized_w = img_w
81 | else:
82 | resized_w = int(math.ceil(img_h * ratio))
83 |
84 | resized_image = cv2.resize(img, (resized_w, img_h))
85 | resized_image = resized_image.astype("float32")
86 | if img_c == 1:
87 | resized_image = resized_image / 255
88 | resized_image = resized_image[np.newaxis, :]
89 | else:
90 | resized_image = resized_image.transpose((2, 0, 1)) / 255
91 |
92 | resized_image -= 0.5
93 | resized_image /= 0.5
94 | padding_im = np.zeros((img_c, img_h, img_w), dtype=np.float32)
95 | padding_im[:, :, :resized_w] = resized_image
96 | return padding_im
97 |
--------------------------------------------------------------------------------
/python/rapidocr/ch_ppocr_cls/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from dataclasses import dataclass
15 | from pathlib import Path
16 | from typing import List, Optional, Tuple, Union
17 |
18 | import numpy as np
19 |
20 | from ..utils.logger import Logger
21 | from ..utils.utils import save_img
22 | from ..utils.vis_res import VisRes
23 |
24 | logger = Logger(logger_name=__name__).get_log()
25 |
26 |
27 | @dataclass
28 | class TextClsOutput:
29 | img_list: Optional[List[np.ndarray]] = None
30 | cls_res: Optional[List[Tuple[str, float]]] = None
31 | elapse: Optional[float] = None
32 |
33 | def __len__(self):
34 | if self.img_list is None:
35 | return 0
36 | return len(self.img_list)
37 |
38 | def vis(self, save_path: Optional[Union[str, Path]] = None) -> Optional[np.ndarray]:
39 | if self.img_list is None or self.cls_res is None:
40 | logger.warning("No image or txts to visualize.")
41 | return None
42 |
43 | txts = [f"{txt} {score:.2f}" for txt, score in self.cls_res]
44 | scores = [score for _, score in self.cls_res]
45 |
46 | vis = VisRes()
47 | vis_img = vis.draw_rec_res(self.img_list, txts, scores)
48 |
49 | if save_path is not None:
50 | save_img(save_path, vis_img)
51 | logger.info("Visualization saved as %s", save_path)
52 | return vis_img
53 |
54 |
55 | class ClsPostProcess:
56 | def __init__(self, label_list: List[str]):
57 | self.label_list = label_list
58 |
59 | def __call__(self, preds: np.ndarray) -> List[Tuple[str, float]]:
60 | pred_idxs = preds.argmax(axis=1)
61 | decode_out = [
62 | (self.label_list[int(idx)], preds[i, int(idx)])
63 | for i, idx in enumerate(pred_idxs)
64 | ]
65 | return decode_out
66 |
--------------------------------------------------------------------------------
/python/rapidocr/ch_ppocr_det/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import TextDetector
5 | from .utils import TextDetOutput
6 |
--------------------------------------------------------------------------------
/python/rapidocr/ch_ppocr_det/main.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # -*- encoding: utf-8 -*-
15 | # @Author: SWHL
16 | # @Contact: liekkaskono@163.com
17 | import time
18 | from typing import Any, Dict, List
19 |
20 | import numpy as np
21 |
22 | from rapidocr.inference_engine.base import get_engine
23 |
24 | from .utils import DBPostProcess, DetPreProcess, TextDetOutput
25 |
26 |
27 | class TextDetector:
28 | def __init__(self, cfg: Dict[str, Any]):
29 | self.limit_side_len = cfg.get("limit_side_len")
30 | self.limit_type = cfg.get("limit_type")
31 | self.mean = cfg.get("mean")
32 | self.std = cfg.get("std")
33 | self.preprocess_op = None
34 |
35 | post_process = {
36 | "thresh": cfg.get("thresh", 0.3),
37 | "box_thresh": cfg.get("box_thresh", 0.5),
38 | "max_candidates": cfg.get("max_candidates", 1000),
39 | "unclip_ratio": cfg.get("unclip_ratio", 1.6),
40 | "use_dilation": cfg.get("use_dilation", True),
41 | "score_mode": cfg.get("score_mode", "fast"),
42 | }
43 | self.postprocess_op = DBPostProcess(**post_process)
44 |
45 | self.session = get_engine(cfg.engine_type)(cfg)
46 |
47 | def __call__(self, img: np.ndarray) -> TextDetOutput:
48 | start_time = time.perf_counter()
49 |
50 | if img is None:
51 | raise ValueError("img is None")
52 |
53 | ori_img_shape = img.shape[0], img.shape[1]
54 | self.preprocess_op = self.get_preprocess(max(img.shape[0], img.shape[1]))
55 | prepro_img = self.preprocess_op(img)
56 | if prepro_img is None:
57 | return TextDetOutput()
58 |
59 | preds = self.session(prepro_img)
60 | boxes, scores = self.postprocess_op(preds, ori_img_shape)
61 | if len(boxes) < 1:
62 | return TextDetOutput()
63 |
64 | boxes = self.sorted_boxes(boxes)
65 | elapse = time.perf_counter() - start_time
66 | return TextDetOutput(img, boxes, scores, elapse=elapse)
67 |
68 | def get_preprocess(self, max_wh: int) -> DetPreProcess:
69 | if self.limit_type == "min":
70 | limit_side_len = self.limit_side_len
71 | elif max_wh < 960:
72 | limit_side_len = 960
73 | elif max_wh < 1500:
74 | limit_side_len = 1500
75 | else:
76 | limit_side_len = 2000
77 | return DetPreProcess(limit_side_len, self.limit_type, self.mean, self.std)
78 |
79 | @staticmethod
80 | def sorted_boxes(dt_boxes: np.ndarray) -> List[np.ndarray]:
81 | """
82 | Sort text boxes in order from top to bottom, left to right
83 | args:
84 | dt_boxes(array):detected text boxes with shape [4, 2]
85 | return:
86 | sorted boxes(array) with shape [4, 2]
87 | """
88 | num_boxes = dt_boxes.shape[0]
89 | sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
90 | _boxes = list(sorted_boxes)
91 |
92 | for i in range(num_boxes - 1):
93 | for j in range(i, -1, -1):
94 | if (
95 | abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10
96 | and _boxes[j + 1][0][0] < _boxes[j][0][0]
97 | ):
98 | tmp = _boxes[j]
99 | _boxes[j] = _boxes[j + 1]
100 | _boxes[j + 1] = tmp
101 | else:
102 | break
103 | return _boxes
104 |
--------------------------------------------------------------------------------
/python/rapidocr/ch_ppocr_rec/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import TextRecognizer
5 | from .typings import TextRecInput, TextRecOutput
6 |
--------------------------------------------------------------------------------
/python/rapidocr/ch_ppocr_rec/typings.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from dataclasses import dataclass, field
5 | from enum import Enum
6 | from pathlib import Path
7 | from typing import List, Optional, Tuple, Union
8 |
9 | import numpy as np
10 |
11 | from ..utils.logger import Logger
12 | from ..utils.utils import save_img
13 | from ..utils.vis_res import VisRes
14 |
15 | logger = Logger(logger_name=__name__).get_log()
16 |
17 |
18 | @dataclass
19 | class TextRecConfig:
20 | intra_op_num_threads: int = -1
21 | inter_op_num_threads: int = -1
22 | use_cuda: bool = False
23 | use_dml: bool = False
24 | model_path: Union[str, Path, None] = None
25 |
26 | rec_batch_num: int = 6
27 | rec_img_shape: Tuple[int, int, int] = (3, 48, 320)
28 | rec_keys_path: Union[str, Path, None] = None
29 |
30 |
31 | @dataclass
32 | class TextRecInput:
33 | img: Union[np.ndarray, List[np.ndarray], None] = None
34 | return_word_box: bool = False
35 |
36 |
37 | @dataclass
38 | class TextRecOutput:
39 | imgs: Optional[List[np.ndarray]] = None
40 | txts: Optional[Tuple[str]] = None
41 | scores: Tuple[float] = (1.0,)
42 | word_results: Tuple[Tuple[str, float, Optional[List[List[int]]]]] = (
43 | ("", 1.0, None),
44 | )
45 | elapse: Optional[float] = None
46 | lang_type: Optional[str] = None
47 |
48 | def __len__(self):
49 | if self.txts is None:
50 | return 0
51 | return len(self.txts)
52 |
53 | def vis(self, save_path: Optional[Union[str, Path]] = None) -> Optional[np.ndarray]:
54 | if self.imgs is None or self.txts is None:
55 | logger.warning("No image or txts to visualize.")
56 | return None
57 |
58 | vis = VisRes()
59 | vis_img = vis.draw_rec_res(
60 | self.imgs, self.txts, self.scores, lang_type=self.lang_type
61 | )
62 |
63 | if save_path is not None:
64 | save_img(save_path, vis_img)
65 | logger.info("Visualization saved as %s", save_path)
66 | return vis_img
67 |
68 |
69 | class WordType(Enum):
70 | CN = "cn"
71 | EN = "en"
72 | NUM = "num"
73 | EN_NUM = "en&num"
74 |
75 |
76 | @dataclass
77 | class WordInfo:
78 | words: List[List[str]] = field(default_factory=list)
79 | word_cols: List[List[int]] = field(default_factory=list)
80 | word_types: List[WordType] = field(default_factory=list)
81 | line_txt_len: float = 0.0
82 | confs: List[float] = field(default_factory=list)
83 |
--------------------------------------------------------------------------------
/python/rapidocr/cli.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import shutil
5 | from pathlib import Path
6 |
7 |
8 | root_dir = Path(__file__).resolve().parent
9 | DEFAULT_CFG_PATH = root_dir / "config.yaml"
10 |
11 |
12 | def generate_cfg(args):
13 | if args.save_cfg_file is None:
14 | args.save_cfg_file = "./default_rapidocr.yaml"
15 |
16 | shutil.copyfile(DEFAULT_CFG_PATH, args.save_cfg_file)
17 | print(f"The config file has saved in {args.save_cfg_file}")
18 |
19 |
20 | def check_install(ocr_engine):
21 | img_url = "https://github.com/RapidAI/RapidOCR/blob/a9bb7c1f44b6e00556ada90ac588f020d7637c4b/python/tests/test_files/ch_en_num.jpg?raw=true"
22 | result = ocr_engine(img_url)
23 |
24 | if result.txts is None or result.txts[0] != "正品促销":
25 | raise ValueError("The installation is incorrect!")
26 |
27 | print("Success! rapidocr is installed correctly!")
28 |
--------------------------------------------------------------------------------
/python/rapidocr/config.yaml:
--------------------------------------------------------------------------------
1 | Global:
2 | text_score: 0.5
3 |
4 | use_det: true
5 | use_cls: true
6 | use_rec: true
7 |
8 | min_height: 30
9 | width_height_ratio: 8
10 | max_side_len: 2000
11 | min_side_len: 30
12 |
13 | return_word_box: false
14 |
15 | font_path: null
16 |
17 | EngineConfig:
18 | onnxruntime:
19 | intra_op_num_threads: -1
20 | inter_op_num_threads: -1
21 | enable_cpu_mem_arena: false
22 | use_cuda: false
23 | use_dml: false
24 |
25 | openvino:
26 | inference_num_threads: -1
27 |
28 | paddle:
29 | cpu_math_library_num_threads: -1
30 | use_cuda: false
31 | gpu_id: 0
32 | gpu_mem: 500
33 |
34 | torch:
35 | use_cuda: false
36 | gpu_id: 0
37 |
38 | Det:
39 | engine_type: 'onnxruntime'
40 | lang_type: 'ch'
41 | model_type: 'mobile'
42 | ocr_version: 'PP-OCRv4'
43 |
44 | task_type: 'det'
45 |
46 | model_path: null
47 | model_dir: null
48 |
49 | limit_side_len: 736
50 | limit_type: min
51 | std: [ 0.5, 0.5, 0.5 ]
52 | mean: [ 0.5, 0.5, 0.5 ]
53 |
54 | thresh: 0.3
55 | box_thresh: 0.5
56 | max_candidates: 1000
57 | unclip_ratio: 1.6
58 | use_dilation: true
59 | score_mode: fast
60 |
61 | Cls:
62 | engine_type: 'onnxruntime'
63 | lang_type: 'ch'
64 | model_type: 'mobile'
65 | ocr_version: 'PP-OCRv4'
66 |
67 | task_type: 'cls'
68 |
69 | model_path: null
70 | model_dir: null
71 |
72 | cls_image_shape: [3, 48, 192]
73 | cls_batch_num: 6
74 | cls_thresh: 0.9
75 | label_list: ['0', '180']
76 |
77 | Rec:
78 | engine_type: 'onnxruntime'
79 | lang_type: 'ch'
80 | model_type: 'mobile'
81 | ocr_version: 'PP-OCRv4'
82 |
83 | task_type: 'rec'
84 |
85 | model_path: null
86 | model_dir: null
87 |
88 | rec_keys_path: null
89 | rec_img_shape: [3, 48, 320]
90 | rec_batch_num: 6
91 |
--------------------------------------------------------------------------------
/python/rapidocr/inference_engine/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 |
--------------------------------------------------------------------------------
/python/rapidocr/inference_engine/base.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import abc
5 | from dataclasses import dataclass
6 | from enum import Enum
7 | from pathlib import Path
8 | from typing import Dict, Union
9 |
10 | import numpy as np
11 | from omegaconf import OmegaConf
12 |
13 | from ..utils.logger import Logger
14 | from ..utils.typings import EngineType, ModelType, OCRVersion, TaskType
15 | from ..utils.utils import import_package
16 |
17 | cur_dir = Path(__file__).resolve().parent.parent
18 | MODEL_URL_PATH = cur_dir / "default_models.yaml"
19 |
20 | logger = Logger(logger_name=__name__).get_log()
21 |
22 |
23 | def get_engine(engine_type: EngineType):
24 | logger.info("Using engine_name: %s", engine_type.value)
25 |
26 | if engine_type == EngineType.ONNXRUNTIME:
27 | if not import_package(engine_type.value):
28 | raise ImportError(f"{engine_type.value} is not installed.")
29 |
30 | from .onnxruntime import OrtInferSession
31 |
32 | return OrtInferSession
33 |
34 | if engine_type == EngineType.OPENVINO:
35 | if not import_package(engine_type.value):
36 | raise ImportError(f"{engine_type.value} is not installed")
37 |
38 | from .openvino import OpenVINOInferSession
39 |
40 | return OpenVINOInferSession
41 |
42 | if engine_type == EngineType.PADDLE:
43 | if not import_package(engine_type.value):
44 | raise ImportError(f"{engine_type.value} is not installed")
45 |
46 | from .paddle import PaddleInferSession
47 |
48 | return PaddleInferSession
49 |
50 | if engine_type == EngineType.TORCH:
51 | if not import_package(engine_type.value):
52 | raise ImportError(f"{engine_type.value} is not installed")
53 |
54 | from .torch import TorchInferSession
55 |
56 | return TorchInferSession
57 |
58 | raise ValueError(f"Unsupported engine: {engine_type.value}")
59 |
60 |
61 | @dataclass
62 | class FileInfo:
63 | engine_type: EngineType
64 | ocr_version: OCRVersion
65 | task_type: TaskType
66 | lang_type: Enum
67 | model_type: ModelType
68 |
69 |
70 | class InferSession(abc.ABC):
71 | model_info = OmegaConf.load(MODEL_URL_PATH)
72 | DEFAULT_MODEL_PATH = cur_dir / "models"
73 | logger = Logger(logger_name=__name__).get_log()
74 |
75 | @abc.abstractmethod
76 | def __init__(self, config):
77 | pass
78 |
79 | @abc.abstractmethod
80 | def __call__(self, input_content: np.ndarray) -> np.ndarray:
81 | pass
82 |
83 | @staticmethod
84 | def _verify_model(model_path: Union[str, Path, None]):
85 | if model_path is None:
86 | raise ValueError("model_path is None!")
87 |
88 | model_path = Path(model_path)
89 | if not model_path.exists():
90 | raise FileNotFoundError(f"{model_path} does not exists.")
91 |
92 | if not model_path.is_file():
93 | raise FileExistsError(f"{model_path} is not a file.")
94 |
95 | @abc.abstractmethod
96 | def have_key(self, key: str = "character") -> bool:
97 | pass
98 |
99 | @classmethod
100 | def get_model_url(cls, file_info: FileInfo) -> Dict[str, str]:
101 | model_dict = OmegaConf.select(
102 | cls.model_info,
103 | f"{file_info.engine_type.value}.{file_info.ocr_version.value}.{file_info.task_type.value}",
104 | )
105 |
106 | # 优先查找 server 模型
107 | if file_info.model_type == ModelType.SERVER:
108 | for k in model_dict:
109 | if (
110 | k.startswith(file_info.lang_type.value)
111 | and file_info.model_type.value in k
112 | ):
113 | return model_dict[k]
114 |
115 | for k in model_dict:
116 | if k.startswith(file_info.lang_type.value):
117 | return model_dict[k]
118 |
119 | raise KeyError("File not found")
120 |
121 | @classmethod
122 | def get_dict_key_url(cls, file_info: FileInfo) -> str:
123 | model_dict = cls.get_model_url(file_info)
124 | return model_dict["dict_url"]
125 |
--------------------------------------------------------------------------------
/python/rapidocr/inference_engine/openvino.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import os
5 | import traceback
6 | from pathlib import Path
7 |
8 | import numpy as np
9 | from omegaconf import DictConfig
10 | from openvino.runtime import Core
11 |
12 | from ..utils import Logger
13 | from ..utils.download_file import DownloadFile, DownloadFileInput
14 | from .base import FileInfo, InferSession
15 |
16 |
17 | class OpenVINOInferSession(InferSession):
18 | def __init__(self, cfg: DictConfig):
19 | super().__init__(cfg)
20 | self.logger = Logger(logger_name=__name__).get_log()
21 |
22 | core = Core()
23 |
24 | model_path = cfg.get("model_path", None)
25 | if model_path is None:
26 | model_info = self.get_model_url(
27 | FileInfo(
28 | engine_type=cfg.engine_type,
29 | ocr_version=cfg.ocr_version,
30 | task_type=cfg.task_type,
31 | lang_type=cfg.lang_type,
32 | model_type=cfg.model_type,
33 | )
34 | )
35 | model_path = self.DEFAULT_MODEL_PATH / Path(model_info["model_dir"]).name
36 | download_params = DownloadFileInput(
37 | file_url=model_info["model_dir"],
38 | sha256=model_info["SHA256"],
39 | save_path=model_path,
40 | logger=self.logger,
41 | )
42 | DownloadFile.run(download_params)
43 |
44 | self.logger.info(f"Using {model_path}")
45 | model_path = Path(model_path)
46 | self._verify_model(model_path)
47 |
48 | cpu_nums = os.cpu_count()
49 | infer_num_threads = cfg.get("inference_num_threads", -1)
50 | if infer_num_threads != -1 and 1 <= infer_num_threads <= cpu_nums:
51 | core.set_property("CPU", {"INFERENCE_NUM_THREADS": str(infer_num_threads)})
52 |
53 | model_onnx = core.read_model(model_path)
54 | compile_model = core.compile_model(model=model_onnx, device_name="CPU")
55 | self.session = compile_model.create_infer_request()
56 |
57 | def __call__(self, input_content: np.ndarray) -> np.ndarray:
58 | try:
59 | self.session.infer(inputs=[input_content])
60 | return self.session.get_output_tensor().data
61 | except Exception as e:
62 | error_info = traceback.format_exc()
63 | raise OpenVIONError(error_info) from e
64 |
65 | def have_key(self, key: str = "character") -> bool:
66 | return False
67 |
68 |
69 | class OpenVIONError(Exception):
70 | pass
71 |
--------------------------------------------------------------------------------
/python/rapidocr/inference_engine/torch.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from pathlib import Path
5 |
6 | import numpy as np
7 | import torch
8 | from omegaconf import OmegaConf
9 |
10 | from ..networks.architectures.base_model import BaseModel
11 | from ..utils.download_file import DownloadFile, DownloadFileInput
12 | from ..utils.logger import Logger
13 | from .base import FileInfo, InferSession
14 |
15 | root_dir = Path(__file__).resolve().parent.parent
16 | DEFAULT_CFG_PATH = root_dir / "networks" / "arch_config.yaml"
17 |
18 |
19 | class TorchInferSession(InferSession):
20 | def __init__(self, cfg) -> None:
21 | self.logger = Logger(logger_name=__name__).get_log()
22 |
23 | model_path = cfg.get("model_path", None)
24 | if model_path is None:
25 | model_info = self.get_model_url(
26 | FileInfo(
27 | engine_type=cfg.engine_type,
28 | ocr_version=cfg.ocr_version,
29 | task_type=cfg.task_type,
30 | lang_type=cfg.lang_type,
31 | model_type=cfg.model_type,
32 | )
33 | )
34 | default_model_url = model_info["model_dir"]
35 | model_path = self.DEFAULT_MODEL_PATH / Path(default_model_url).name
36 | DownloadFile.run(
37 | DownloadFileInput(
38 | file_url=default_model_url,
39 | sha256=model_info["SHA256"],
40 | save_path=model_path,
41 | logger=self.logger,
42 | )
43 | )
44 |
45 | self.logger.info(f"Using {model_path}")
46 | model_path = Path(model_path)
47 | self._verify_model(model_path)
48 |
49 | all_arch_config = OmegaConf.load(DEFAULT_CFG_PATH)
50 | file_name = model_path.stem
51 | if file_name not in all_arch_config:
52 | raise ValueError(f"architecture {file_name} is not in arch_config.yaml")
53 |
54 | arch_config = all_arch_config.get(file_name)
55 | self.predictor = BaseModel(arch_config)
56 | self.predictor.load_state_dict(torch.load(model_path, weights_only=True))
57 | self.predictor.eval()
58 |
59 | self.use_gpu = False
60 | if cfg.engine_cfg.use_cuda:
61 | self.device = torch.device(f"cuda:{cfg.engine_cfg.gpu_id}")
62 | self.predictor.to(self.device)
63 | self.use_gpu = True
64 |
65 | def __call__(self, img: np.ndarray):
66 | with torch.no_grad():
67 | inp = torch.from_numpy(img)
68 | if self.use_gpu:
69 | inp = inp.to(self.device)
70 |
71 | # 适配跟onnx对齐取值逻辑
72 | outputs = self.predictor(inp).cpu().numpy()
73 | return outputs
74 |
75 | def have_key(self, key: str = "character") -> bool:
76 | return False
77 |
78 |
79 | class TorchInferError(Exception):
80 | pass
81 |
--------------------------------------------------------------------------------
/python/rapidocr/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr/models/.gitkeep
--------------------------------------------------------------------------------
/python/rapidocr/networks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr/networks/__init__.py
--------------------------------------------------------------------------------
/python/rapidocr/networks/architectures/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import copy
16 |
17 | __all__ = ["build_model"]
18 |
19 |
20 | def build_model(config, **kwargs):
21 | from .base_model import BaseModel
22 |
23 | config = copy.deepcopy(config)
24 | module_class = BaseModel(config, **kwargs)
25 | return module_class
26 |
--------------------------------------------------------------------------------
/python/rapidocr/networks/architectures/base_model.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 |
3 | from ..backbones import build_backbone
4 | from ..heads import build_head
5 | from ..necks import build_neck
6 |
7 |
8 | class BaseModel(nn.Module):
9 | def __init__(self, config, **kwargs):
10 | """
11 | the module for OCR.
12 | args:
13 | config (dict): the super parameters for module.
14 | """
15 | super(BaseModel, self).__init__()
16 |
17 | in_channels = config.get("in_channels", 3)
18 | model_type = config["model_type"]
19 | # build backbone, backbone is need for del, rec and cls
20 | if "Backbone" not in config or config["Backbone"] is None:
21 | self.use_backbone = False
22 | else:
23 | self.use_backbone = True
24 | config["Backbone"]["in_channels"] = in_channels
25 | self.backbone = build_backbone(config["Backbone"], model_type)
26 | in_channels = self.backbone.out_channels
27 |
28 | # build neck
29 | # for rec, neck can be cnn,rnn or reshape(None)
30 | # for det, neck can be FPN, BIFPN and so on.
31 | # for cls, neck should be none
32 | if "Neck" not in config or config["Neck"] is None:
33 | self.use_neck = False
34 | else:
35 | self.use_neck = True
36 | config["Neck"]["in_channels"] = in_channels
37 | self.neck = build_neck(config["Neck"])
38 | in_channels = self.neck.out_channels
39 |
40 | # # build head, head is need for det, rec and cls
41 | if "Head" not in config or config["Head"] is None:
42 | self.use_head = False
43 | else:
44 | self.use_head = True
45 | config["Head"]["in_channels"] = in_channels
46 | self.head = build_head(config["Head"], **kwargs)
47 |
48 | self.return_all_feats = config.get("return_all_feats", False)
49 |
50 | self._initialize_weights()
51 |
52 | def _initialize_weights(self):
53 | # weight initialization
54 | for m in self.modules():
55 | if isinstance(m, nn.Conv2d):
56 | nn.init.kaiming_normal_(m.weight, mode="fan_out")
57 | if m.bias is not None:
58 | nn.init.zeros_(m.bias)
59 | elif isinstance(m, nn.BatchNorm2d):
60 | nn.init.ones_(m.weight)
61 | nn.init.zeros_(m.bias)
62 | elif isinstance(m, nn.Linear):
63 | nn.init.normal_(m.weight, 0, 0.01)
64 | if m.bias is not None:
65 | nn.init.zeros_(m.bias)
66 | elif isinstance(m, nn.ConvTranspose2d):
67 | nn.init.kaiming_normal_(m.weight, mode="fan_out")
68 | if m.bias is not None:
69 | nn.init.zeros_(m.bias)
70 |
71 | def forward(self, x):
72 | y = dict()
73 | if self.use_backbone:
74 | x = self.backbone(x)
75 | if isinstance(x, dict):
76 | y.update(x)
77 | else:
78 | y["backbone_out"] = x
79 | final_name = "backbone_out"
80 | if self.use_neck:
81 | x = self.neck(x)
82 | if isinstance(x, dict):
83 | y.update(x)
84 | else:
85 | y["neck_out"] = x
86 | final_name = "neck_out"
87 | if self.use_head:
88 | x = self.head(x)
89 | # for multi head, save ctc neck out for udml
90 | if isinstance(x, dict) and "ctc_nect" in x.keys():
91 | y["neck_out"] = x["ctc_neck"]
92 | y["head_out"] = x
93 | elif isinstance(x, dict):
94 | y.update(x)
95 | else:
96 | y["head_out"] = x
97 | if self.return_all_feats:
98 | if self.training:
99 | return y
100 | elif isinstance(x, dict):
101 | return x
102 | else:
103 | return {final_name: x}
104 | else:
105 | return x
106 |
--------------------------------------------------------------------------------
/python/rapidocr/networks/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | __all__ = ["build_backbone"]
16 |
17 |
18 | def build_backbone(config, model_type):
19 | if model_type == "det":
20 | from .det_mobilenet_v3 import MobileNetV3
21 | from .rec_hgnet import PPHGNet_small
22 | from .rec_lcnetv3 import PPLCNetV3
23 |
24 | support_dict = [
25 | "MobileNetV3",
26 | "ResNet",
27 | "ResNet_vd",
28 | "ResNet_SAST",
29 | "PPLCNetV3",
30 | "PPHGNet_small",
31 | ]
32 | elif model_type == "rec" or model_type == "cls":
33 | from .rec_hgnet import PPHGNet_small
34 | from .rec_lcnetv3 import PPLCNetV3
35 | from .rec_mobilenet_v3 import MobileNetV3
36 | from .rec_svtrnet import SVTRNet
37 | from .rec_mv1_enhance import MobileNetV1Enhance
38 |
39 | support_dict = [
40 | "MobileNetV1Enhance",
41 | "MobileNetV3",
42 | "ResNet",
43 | "ResNetFPN",
44 | "MTB",
45 | "ResNet31",
46 | "SVTRNet",
47 | "ViTSTR",
48 | "DenseNet",
49 | "PPLCNetV3",
50 | "PPHGNet_small",
51 | ]
52 | else:
53 | raise NotImplementedError
54 |
55 | module_name = config.pop("name")
56 | assert module_name in support_dict, Exception(
57 | "when model typs is {}, backbone only support {}".format(
58 | model_type, support_dict
59 | )
60 | )
61 | module_class = eval(module_name)(**config)
62 | return module_class
63 |
--------------------------------------------------------------------------------
/python/rapidocr/networks/common.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 | from torch import nn
4 |
5 |
6 | class Hswish(nn.Module):
7 | def __init__(self, inplace=True):
8 | super(Hswish, self).__init__()
9 | self.inplace = inplace
10 |
11 | def forward(self, x):
12 | return x * F.relu6(x + 3.0, inplace=self.inplace) / 6.0
13 |
14 |
15 | # out = max(0, min(1, slop*x+offset))
16 | # paddle.fluid.layers.hard_sigmoid(x, slope=0.2, offset=0.5, name=None)
17 | class Hsigmoid(nn.Module):
18 | def __init__(self, inplace=True):
19 | super(Hsigmoid, self).__init__()
20 | self.inplace = inplace
21 |
22 | def forward(self, x):
23 | # torch: F.relu6(x + 3., inplace=self.inplace) / 6.
24 | # paddle: F.relu6(1.2 * x + 3., inplace=self.inplace) / 6.
25 | return F.relu6(1.2 * x + 3.0, inplace=self.inplace) / 6.0
26 |
27 |
28 | class GELU(nn.Module):
29 | def __init__(self, inplace=True):
30 | super(GELU, self).__init__()
31 | self.inplace = inplace
32 |
33 | def forward(self, x):
34 | return torch.nn.functional.gelu(x)
35 |
36 |
37 | class Swish(nn.Module):
38 | def __init__(self, inplace=True):
39 | super(Swish, self).__init__()
40 | self.inplace = inplace
41 |
42 | def forward(self, x):
43 | if self.inplace:
44 | x.mul_(torch.sigmoid(x))
45 | return x
46 | else:
47 | return x * torch.sigmoid(x)
48 |
49 |
50 | class Activation(nn.Module):
51 | def __init__(self, act_type, inplace=True):
52 | super(Activation, self).__init__()
53 | act_type = act_type.lower()
54 | if act_type == "relu":
55 | self.act = nn.ReLU(inplace=inplace)
56 | elif act_type == "relu6":
57 | self.act = nn.ReLU6(inplace=inplace)
58 | elif act_type == "sigmoid":
59 | raise NotImplementedError
60 | elif act_type == "hard_sigmoid":
61 | self.act = Hsigmoid(
62 | inplace
63 | ) # nn.Hardsigmoid(inplace=inplace)#Hsigmoid(inplace)#
64 | elif act_type == "hard_swish" or act_type == "hswish":
65 | self.act = Hswish(inplace=inplace)
66 | elif act_type == "leakyrelu":
67 | self.act = nn.LeakyReLU(inplace=inplace)
68 | elif act_type == "gelu":
69 | self.act = GELU(inplace=inplace)
70 | elif act_type == "swish":
71 | self.act = Swish(inplace=inplace)
72 | else:
73 | raise NotImplementedError
74 |
75 | def forward(self, inputs):
76 | return self.act(inputs)
77 |
--------------------------------------------------------------------------------
/python/rapidocr/networks/heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | __all__ = ["build_head"]
16 |
17 |
18 | def build_head(config, **kwargs):
19 | # det head
20 | from .det_db_head import DBHead, PFHeadLocal
21 |
22 | # rec head
23 | from .rec_ctc_head import CTCHead
24 | from .rec_multi_head import MultiHead
25 |
26 | # cls head
27 | from .cls_head import ClsHead
28 |
29 | support_dict = [
30 | "DBHead",
31 | "CTCHead",
32 | "ClsHead",
33 | "MultiHead",
34 | "PFHeadLocal",
35 | ]
36 |
37 | module_name = config.pop("name")
38 | char_num = config.pop("char_num", 6625)
39 | assert module_name in support_dict, Exception(
40 | "head only support {}".format(support_dict)
41 | )
42 | module_class = eval(module_name)(**config, **kwargs)
43 | return module_class
44 |
--------------------------------------------------------------------------------
/python/rapidocr/networks/heads/cls_head.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 | from torch import nn
4 |
5 |
6 | class ClsHead(nn.Module):
7 | """
8 | Class orientation
9 | Args:
10 | params(dict): super parameters for build Class network
11 | """
12 |
13 | def __init__(self, in_channels, class_dim, **kwargs):
14 | super(ClsHead, self).__init__()
15 | self.pool = nn.AdaptiveAvgPool2d(1)
16 | self.fc = nn.Linear(in_channels, class_dim, bias=True)
17 |
18 | def forward(self, x):
19 | x = self.pool(x)
20 | x = torch.reshape(x, shape=[x.shape[0], x.shape[1]])
21 | x = self.fc(x)
22 | x = F.softmax(x, dim=1)
23 | return x
24 |
--------------------------------------------------------------------------------
/python/rapidocr/networks/heads/rec_ctc_head.py:
--------------------------------------------------------------------------------
1 | import torch.nn.functional as F
2 | from torch import nn
3 |
4 |
5 | class CTCHead(nn.Module):
6 | def __init__(
7 | self,
8 | in_channels,
9 | out_channels=6625,
10 | fc_decay=0.0004,
11 | mid_channels=None,
12 | return_feats=False,
13 | **kwargs
14 | ):
15 | super(CTCHead, self).__init__()
16 | if mid_channels is None:
17 | self.fc = nn.Linear(
18 | in_channels,
19 | out_channels,
20 | bias=True,
21 | )
22 | else:
23 | self.fc1 = nn.Linear(
24 | in_channels,
25 | mid_channels,
26 | bias=True,
27 | )
28 | self.fc2 = nn.Linear(
29 | mid_channels,
30 | out_channels,
31 | bias=True,
32 | )
33 |
34 | self.out_channels = out_channels
35 | self.mid_channels = mid_channels
36 | self.return_feats = return_feats
37 |
38 | def forward(self, x, labels=None):
39 | if self.mid_channels is None:
40 | predicts = self.fc(x)
41 | else:
42 | x = self.fc1(x)
43 | predicts = self.fc2(x)
44 |
45 | if self.return_feats:
46 | result = (x, predicts)
47 | else:
48 | result = predicts
49 |
50 | if not self.training:
51 | predicts = F.softmax(predicts, dim=2)
52 | result = predicts
53 |
54 | return result
55 |
--------------------------------------------------------------------------------
/python/rapidocr/networks/heads/rec_multi_head.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 |
3 | from ..necks.rnn import Im2Seq, SequenceEncoder
4 | from .rec_ctc_head import CTCHead
5 |
6 |
7 | class FCTranspose(nn.Module):
8 | def __init__(self, in_channels, out_channels, only_transpose=False):
9 | super().__init__()
10 | self.only_transpose = only_transpose
11 | if not self.only_transpose:
12 | self.fc = nn.Linear(in_channels, out_channels, bias=False)
13 |
14 | def forward(self, x):
15 | if self.only_transpose:
16 | return x.permute([0, 2, 1])
17 | else:
18 | return self.fc(x.permute([0, 2, 1]))
19 |
20 |
21 | class MultiHead(nn.Module):
22 | def __init__(self, in_channels, out_channels_list, **kwargs):
23 | super().__init__()
24 | self.head_list = kwargs.pop("head_list")
25 |
26 | self.gtc_head = "sar"
27 | assert len(self.head_list) >= 2
28 | for idx, head_name in enumerate(self.head_list):
29 | name = list(head_name)[0]
30 | if name == "SARHead":
31 | pass
32 |
33 | elif name == "NRTRHead":
34 | pass
35 | elif name == "CTCHead":
36 | # ctc neck
37 | self.encoder_reshape = Im2Seq(in_channels)
38 | neck_args = self.head_list[idx][name]["Neck"]
39 | encoder_type = neck_args.pop("name")
40 | self.ctc_encoder = SequenceEncoder(
41 | in_channels=in_channels, encoder_type=encoder_type, **neck_args
42 | )
43 | # ctc head
44 | head_args = self.head_list[idx][name].get("Head", {})
45 | if head_args is None:
46 | head_args = {}
47 |
48 | self.ctc_head = CTCHead(
49 | in_channels=self.ctc_encoder.out_channels,
50 | out_channels=out_channels_list["CTCLabelDecode"],
51 | **head_args,
52 | )
53 | else:
54 | raise NotImplementedError(f"{name} is not supported in MultiHead yet")
55 |
56 | def forward(self, x, data=None):
57 | ctc_encoder = self.ctc_encoder(x)
58 | return self.ctc_head(ctc_encoder)
59 |
--------------------------------------------------------------------------------
/python/rapidocr/networks/necks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | __all__ = ["build_neck"]
16 |
17 |
18 | def build_neck(config):
19 | from .db_fpn import DBFPN, LKPAN, RSEFPN
20 | from .rnn import SequenceEncoder
21 |
22 | support_dict = ["DBFPN", "SequenceEncoder", "RSEFPN", "LKPAN"]
23 |
24 | module_name = config.pop("name")
25 | assert module_name in support_dict, Exception(
26 | "neck only support {}".format(support_dict)
27 | )
28 | module_class = eval(module_name)(**config)
29 | return module_class
30 |
--------------------------------------------------------------------------------
/python/rapidocr/networks/necks/intracl.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 |
3 |
4 | class IntraCLBlock(nn.Module):
5 | def __init__(self, in_channels=96, reduce_factor=4):
6 | super(IntraCLBlock, self).__init__()
7 | self.channels = in_channels
8 | self.rf = reduce_factor
9 | self.conv1x1_reduce_channel = nn.Conv2d(
10 | self.channels, self.channels // self.rf, kernel_size=1, stride=1, padding=0
11 | )
12 | self.conv1x1_return_channel = nn.Conv2d(
13 | self.channels // self.rf, self.channels, kernel_size=1, stride=1, padding=0
14 | )
15 |
16 | self.v_layer_7x1 = nn.Conv2d(
17 | self.channels // self.rf,
18 | self.channels // self.rf,
19 | kernel_size=(7, 1),
20 | stride=(1, 1),
21 | padding=(3, 0),
22 | )
23 | self.v_layer_5x1 = nn.Conv2d(
24 | self.channels // self.rf,
25 | self.channels // self.rf,
26 | kernel_size=(5, 1),
27 | stride=(1, 1),
28 | padding=(2, 0),
29 | )
30 | self.v_layer_3x1 = nn.Conv2d(
31 | self.channels // self.rf,
32 | self.channels // self.rf,
33 | kernel_size=(3, 1),
34 | stride=(1, 1),
35 | padding=(1, 0),
36 | )
37 |
38 | self.q_layer_1x7 = nn.Conv2d(
39 | self.channels // self.rf,
40 | self.channels // self.rf,
41 | kernel_size=(1, 7),
42 | stride=(1, 1),
43 | padding=(0, 3),
44 | )
45 | self.q_layer_1x5 = nn.Conv2d(
46 | self.channels // self.rf,
47 | self.channels // self.rf,
48 | kernel_size=(1, 5),
49 | stride=(1, 1),
50 | padding=(0, 2),
51 | )
52 | self.q_layer_1x3 = nn.Conv2d(
53 | self.channels // self.rf,
54 | self.channels // self.rf,
55 | kernel_size=(1, 3),
56 | stride=(1, 1),
57 | padding=(0, 1),
58 | )
59 |
60 | # base
61 | self.c_layer_7x7 = nn.Conv2d(
62 | self.channels // self.rf,
63 | self.channels // self.rf,
64 | kernel_size=(7, 7),
65 | stride=(1, 1),
66 | padding=(3, 3),
67 | )
68 | self.c_layer_5x5 = nn.Conv2d(
69 | self.channels // self.rf,
70 | self.channels // self.rf,
71 | kernel_size=(5, 5),
72 | stride=(1, 1),
73 | padding=(2, 2),
74 | )
75 | self.c_layer_3x3 = nn.Conv2d(
76 | self.channels // self.rf,
77 | self.channels // self.rf,
78 | kernel_size=(3, 3),
79 | stride=(1, 1),
80 | padding=(1, 1),
81 | )
82 |
83 | self.bn = nn.BatchNorm2d(self.channels)
84 | self.relu = nn.ReLU()
85 |
86 | def forward(self, x):
87 | x_new = self.conv1x1_reduce_channel(x)
88 |
89 | x_7_c = self.c_layer_7x7(x_new)
90 | x_7_v = self.v_layer_7x1(x_new)
91 | x_7_q = self.q_layer_1x7(x_new)
92 | x_7 = x_7_c + x_7_v + x_7_q
93 |
94 | x_5_c = self.c_layer_5x5(x_7)
95 | x_5_v = self.v_layer_5x1(x_7)
96 | x_5_q = self.q_layer_1x5(x_7)
97 | x_5 = x_5_c + x_5_v + x_5_q
98 |
99 | x_3_c = self.c_layer_3x3(x_5)
100 | x_3_v = self.v_layer_3x1(x_5)
101 | x_3_q = self.q_layer_1x3(x_5)
102 | x_3 = x_3_c + x_3_v + x_3_q
103 |
104 | x_relation = self.conv1x1_return_channel(x_3)
105 |
106 | x_relation = self.bn(x_relation)
107 | x_relation = self.relu(x_relation)
108 |
109 | return x + x_relation
110 |
111 |
112 | def build_intraclblock_list(num_block):
113 | IntraCLBlock_list = nn.ModuleList()
114 | for i in range(num_block):
115 | IntraCLBlock_list.append(IntraCLBlock())
116 |
117 | return IntraCLBlock_list
118 |
--------------------------------------------------------------------------------
/python/rapidocr/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .download_file import DownloadFile, DownloadFileException, DownloadFileInput
5 | from .load_image import LoadImage, LoadImageError
6 | from .logger import Logger
7 | from .output import RapidOCROutput
8 | from .parse_parameters import ParseParams
9 | from .process_img import add_round_letterbox, increase_min_side, reduce_max_side
10 | from .vis_res import VisRes
11 |
--------------------------------------------------------------------------------
/python/rapidocr/utils/download_file.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import logging
5 | import sys
6 | from dataclasses import dataclass
7 | from pathlib import Path
8 | from typing import Optional, Union
9 |
10 | import requests
11 | from tqdm import tqdm
12 |
13 | from .utils import get_file_sha256
14 |
15 |
16 | @dataclass
17 | class DownloadFileInput:
18 | file_url: str
19 | save_path: Union[str, Path]
20 | logger: logging.Logger
21 | sha256: Optional[str] = None
22 |
23 |
24 | class DownloadFile:
25 | BLOCK_SIZE = 1024 # 1 KiB
26 | REQUEST_TIMEOUT = 60
27 |
28 | @classmethod
29 | def run(cls, input_params: DownloadFileInput):
30 | save_path = Path(input_params.save_path)
31 |
32 | logger = input_params.logger
33 | cls._ensure_parent_dir_exists(save_path)
34 | if cls._should_skip_download(save_path, input_params.sha256, logger):
35 | return
36 |
37 | response = cls._make_http_request(input_params.file_url, logger)
38 | cls._save_response_with_progress(response, save_path, logger)
39 |
40 | @staticmethod
41 | def _ensure_parent_dir_exists(path: Path):
42 | path.parent.mkdir(parents=True, exist_ok=True)
43 |
44 | @classmethod
45 | def _should_skip_download(
46 | cls, path: Path, expected_sha256: Optional[str], logger: logging.Logger
47 | ) -> bool:
48 | if not path.exists():
49 | return False
50 |
51 | if expected_sha256 is None:
52 | logger.info("File exists (no checksum verification): %s", path)
53 | return True
54 |
55 | if cls.check_file_sha256(path, expected_sha256):
56 | logger.info("File exists and is valid: %s", path)
57 | return True
58 |
59 | logger.warning("File exists but is invalid, redownloading: %s", path)
60 | return False
61 |
62 | @classmethod
63 | def _make_http_request(cls, url: str, logger: logging.Logger) -> requests.Response:
64 | logger.info("Initiating download: %s", url)
65 | try:
66 | response = requests.get(url, stream=True, timeout=cls.REQUEST_TIMEOUT)
67 | response.raise_for_status() # Raises HTTPError for 4XX/5XX
68 | return response
69 | except requests.RequestException as e:
70 | logger.error("Download failed: %s", url)
71 | raise DownloadFileException(f"Failed to download {url}") from e
72 |
73 | @classmethod
74 | def _save_response_with_progress(
75 | cls, response: requests.Response, save_path: Path, logger: logging.Logger
76 | ) -> None:
77 | total_size = int(response.headers.get("content-length", 0))
78 | logger.info("Download size: %.2fMB", total_size / 1024 / 1024)
79 |
80 | with (
81 | tqdm(
82 | total=total_size,
83 | unit="iB",
84 | unit_scale=True,
85 | disable=not cls.check_is_atty(),
86 | ) as progress_bar,
87 | open(save_path, "wb") as output_file,
88 | ):
89 | for chunk in response.iter_content(chunk_size=cls.BLOCK_SIZE):
90 | progress_bar.update(len(chunk))
91 | output_file.write(chunk)
92 |
93 | logger.info("Successfully saved to: %s", save_path)
94 |
95 | @staticmethod
96 | def check_file_sha256(file_path: Union[str, Path], gt_sha256: str) -> bool:
97 | return get_file_sha256(file_path) == gt_sha256
98 |
99 | @staticmethod
100 | def check_is_atty() -> bool:
101 | try:
102 | is_interactive = sys.stderr.isatty()
103 | except AttributeError:
104 | return False
105 | return is_interactive
106 |
107 |
108 | class DownloadFileException(Exception):
109 | pass
110 |
--------------------------------------------------------------------------------
/python/rapidocr/utils/logger.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import logging
5 |
6 | import colorlog
7 |
8 |
9 | class Logger:
10 | def __init__(self, log_level=logging.DEBUG, logger_name=None):
11 | self.logger = logging.getLogger(logger_name)
12 | self.logger.setLevel(log_level)
13 | self.logger.propagate = False
14 |
15 | formatter = colorlog.ColoredFormatter(
16 | "%(log_color)s[%(levelname)s] %(asctime)s [RapidOCR] %(filename)s:%(lineno)d: %(message)s",
17 | log_colors={
18 | "DEBUG": "cyan",
19 | "INFO": "green",
20 | "WARNING": "yellow",
21 | "ERROR": "red",
22 | "CRITICAL": "red,bg_white",
23 | },
24 | )
25 |
26 | if not self.logger.handlers:
27 | console_handler = logging.StreamHandler()
28 | console_handler.setFormatter(formatter)
29 |
30 | for handler in self.logger.handlers:
31 | self.logger.removeHandler(handler)
32 |
33 | console_handler.setLevel(log_level)
34 | self.logger.addHandler(console_handler)
35 |
36 | def get_log(self):
37 | return self.logger
38 |
--------------------------------------------------------------------------------
/python/rapidocr/utils/output.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from dataclasses import dataclass, field
5 | from typing import List, Optional, Tuple, Union
6 |
7 | import numpy as np
8 |
9 | from .logger import Logger
10 | from .utils import save_img
11 | from .vis_res import VisRes
12 |
13 | logger = Logger(logger_name=__name__).get_log()
14 |
15 |
16 | @dataclass
17 | class RapidOCROutput:
18 | img: Optional[np.ndarray] = None
19 | boxes: Optional[np.ndarray] = None
20 | txts: Optional[Tuple[str]] = None
21 | scores: Optional[Tuple[float]] = None
22 | word_results: Tuple[Tuple[str, float, Optional[List[List[int]]]]] = (
23 | ("", 1.0, None),
24 | )
25 | elapse_list: List[Union[float, None]] = field(default_factory=list)
26 | elapse: float = field(init=False)
27 | lang_type: Optional[str] = None
28 |
29 | def __post_init__(self):
30 | self.elapse = sum(v for v in self.elapse_list if isinstance(v, float))
31 |
32 | def __len__(self):
33 | if self.txts is None:
34 | return 0
35 | return len(self.txts)
36 |
37 | def to_json(self):
38 | pass
39 |
40 | def vis(self, save_path: Optional[str] = None, font_path: Optional[str] = None):
41 | if self.img is None or self.boxes is None:
42 | logger.warning("No image or boxes to visualize.")
43 | return
44 |
45 | vis = VisRes()
46 | if all(v is None for v in self.word_results):
47 | vis_img = vis(
48 | self.img,
49 | self.boxes,
50 | self.txts,
51 | self.scores,
52 | font_path=font_path,
53 | lang_type=self.lang_type,
54 | )
55 |
56 | if save_path is not None:
57 | save_img(save_path, vis_img)
58 | logger.info("Visualization saved as %s", save_path)
59 | return vis_img
60 |
61 | # single word vis
62 | words_results = self.word_results
63 | words, words_scores, words_boxes = list(zip(*words_results))
64 | vis_img = vis(
65 | self.img,
66 | words_boxes,
67 | words,
68 | words_scores,
69 | font_path=font_path,
70 | lang_type=self.lang_type,
71 | )
72 |
73 | if save_path is not None:
74 | save_img(save_path, vis_img)
75 | logger.info("Single word visualization saved as %s", save_path)
76 | return vis_img
77 |
--------------------------------------------------------------------------------
/python/rapidocr/utils/parse_parameters.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from enum import Enum
5 | from pathlib import Path
6 | from typing import Any, Dict, Union
7 |
8 | from omegaconf import DictConfig, OmegaConf
9 |
10 | from .typings import (
11 | EngineType,
12 | LangCls,
13 | LangDet,
14 | LangRec,
15 | ModelType,
16 | OCRVersion,
17 | TaskType,
18 | )
19 |
20 |
21 | class ParseParams(OmegaConf):
22 | def __init__(self):
23 | pass
24 |
25 | @classmethod
26 | def load(cls, file_path: Union[str, Path]):
27 | cfg = OmegaConf.load(file_path)
28 |
29 | cfg.Det = cls._convert_value_to_enum(cfg.Det)
30 | cfg.Cls = cls._convert_value_to_enum(cfg.Cls)
31 | cfg.Rec = cls._convert_value_to_enum(cfg.Rec)
32 | return cfg
33 |
34 | @classmethod
35 | def update_batch(cls, cfg: DictConfig, params: Dict[str, Any]) -> DictConfig:
36 | global_keys = list(OmegaConf.to_container(cfg.Global).keys())
37 | enum_params = [
38 | "engine_type",
39 | "model_type",
40 | "ocr_version",
41 | "lang_type",
42 | "task_type",
43 | ]
44 | for k, v in params.items():
45 | if k.startswith("Global") and k.split(".")[1] not in global_keys:
46 | raise ValueError(f"{k} is not a valid key.")
47 |
48 | if k.split(".")[1] in enum_params and not isinstance(v, Enum):
49 | raise TypeError(f"The value of {k} must be Enum Type.")
50 |
51 | cls.update(cfg, k, v)
52 | return cfg
53 |
54 | @classmethod
55 | def _convert_value_to_enum(cls, cfg: DictConfig):
56 | cfg.engine_type = EngineType(cfg.engine_type)
57 | cfg.model_type = ModelType(cfg.model_type)
58 | cfg.ocr_version = OCRVersion(cfg.ocr_version)
59 | cfg.task_type = TaskType(cfg.task_type)
60 | cfg.lang_type = cls.LangType(cfg.task_type, cfg.lang_type)
61 | return cfg
62 |
63 | @staticmethod
64 | def LangType(task_type: TaskType, lang_type: str):
65 | if task_type == TaskType.DET:
66 | return LangDet(lang_type)
67 |
68 | if task_type == TaskType.CLS:
69 | return LangCls(lang_type)
70 |
71 | if task_type == TaskType.REC:
72 | return LangRec(lang_type)
73 |
74 | raise ValueError(f"task_type {task_type.value} is not in [Det, Cls, Rec]")
75 |
--------------------------------------------------------------------------------
/python/rapidocr/utils/process_img.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from typing import Tuple
5 |
6 | import cv2
7 | import numpy as np
8 |
9 |
10 | def reduce_max_side(
11 | img: np.ndarray, max_side_len: int = 2000
12 | ) -> Tuple[np.ndarray, float, float]:
13 | h, w = img.shape[:2]
14 |
15 | ratio = 1.0
16 | if max(h, w) > max_side_len:
17 | if h > w:
18 | ratio = float(max_side_len) / h
19 | else:
20 | ratio = float(max_side_len) / w
21 |
22 | resize_h = int(h * ratio)
23 | resize_w = int(w * ratio)
24 |
25 | resize_h = int(round(resize_h / 32) * 32)
26 | resize_w = int(round(resize_w / 32) * 32)
27 |
28 | try:
29 | if int(resize_w) <= 0 or int(resize_h) <= 0:
30 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
31 | img = cv2.resize(img, (resize_w, resize_h))
32 | except Exception as exc:
33 | raise ResizeImgError() from exc
34 |
35 | ratio_h = h / resize_h
36 | ratio_w = w / resize_w
37 | return img, ratio_h, ratio_w
38 |
39 |
40 | def increase_min_side(
41 | img: np.ndarray, min_side_len: int = 30
42 | ) -> Tuple[np.ndarray, float, float]:
43 | h, w = img.shape[:2]
44 |
45 | ratio = 1.0
46 | if min(h, w) < min_side_len:
47 | if h < w:
48 | ratio = float(min_side_len) / h
49 | else:
50 | ratio = float(min_side_len) / w
51 |
52 | resize_h = int(h * ratio)
53 | resize_w = int(w * ratio)
54 |
55 | resize_h = int(round(resize_h / 32) * 32)
56 | resize_w = int(round(resize_w / 32) * 32)
57 |
58 | try:
59 | if int(resize_w) <= 0 or int(resize_h) <= 0:
60 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
61 | img = cv2.resize(img, (resize_w, resize_h))
62 | except Exception as exc:
63 | raise ResizeImgError() from exc
64 |
65 | ratio_h = h / resize_h
66 | ratio_w = w / resize_w
67 | return img, ratio_h, ratio_w
68 |
69 |
70 | def add_round_letterbox(
71 | img: np.ndarray,
72 | padding_tuple: Tuple[int, int, int, int],
73 | ) -> np.ndarray:
74 | padded_img = cv2.copyMakeBorder(
75 | img,
76 | padding_tuple[0],
77 | padding_tuple[1],
78 | padding_tuple[2],
79 | padding_tuple[3],
80 | cv2.BORDER_CONSTANT,
81 | value=(0, 0, 0),
82 | )
83 | return padded_img
84 |
85 |
86 | class ResizeImgError(Exception):
87 | pass
88 |
--------------------------------------------------------------------------------
/python/rapidocr/utils/typings.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from enum import Enum
5 |
6 |
7 | class LangDet(Enum):
8 | CH = "ch"
9 | EN = "en"
10 | MULTI = "multi"
11 |
12 |
13 | class LangCls(Enum):
14 | CH = "ch"
15 |
16 |
17 | class LangRec(Enum):
18 | CH = "ch"
19 | CH_DOC = "ch_doc"
20 | EN = "en"
21 | ARABIC = "arabic"
22 | CHINESE_CHT = "chinese_cht"
23 | CYRILLIC = "cyrillic"
24 | DEVANAGARI = "devanagari"
25 | JAPAN = "japan"
26 | KOREAN = "korean"
27 | KA = "ka"
28 | LATIN = "latin"
29 | TA = "ta"
30 | TE = "te"
31 |
32 |
33 | class OCRVersion(Enum):
34 | PPOCRV4 = "PP-OCRv4"
35 | PPOCRV5 = "PP-OCRv5"
36 |
37 |
38 | class EngineType(Enum):
39 | ONNXRUNTIME = "onnxruntime"
40 | OPENVINO = "openvino"
41 | PADDLE = "paddle"
42 | TORCH = "torch"
43 |
44 |
45 | class ModelType(Enum):
46 | MOBILE = "mobile"
47 | SERVER = "server"
48 |
49 |
50 | class TaskType(Enum):
51 | DET = "det"
52 | CLS = "cls"
53 | REC = "rec"
54 |
--------------------------------------------------------------------------------
/python/rapidocr/utils/utils.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import hashlib
5 | import importlib
6 | from pathlib import Path
7 | from typing import Tuple, Union
8 | from urllib.parse import urlparse
9 |
10 | import cv2
11 | import numpy as np
12 |
13 |
14 | def quads_to_rect_bbox(bbox: np.ndarray) -> Tuple[float, float, float, float]:
15 | if bbox.ndim != 3:
16 | raise ValueError("bbox shape must be 3")
17 |
18 | if bbox.shape[1] != 4 and bbox.shape[2] != 2:
19 | raise ValueError("bbox shape must be (N, 4, 2)")
20 |
21 | all_x, all_y = (bbox[:, :, 0].flatten(), bbox[:, :, 1].flatten())
22 | x_min, y_min = np.min(all_x), np.min(all_y)
23 | x_max, y_max = np.max(all_x), np.max(all_y)
24 | return float(x_min), float(y_min), float(x_max), float(y_max)
25 |
26 |
27 | def has_chinese_char(text: str) -> bool:
28 | return any("\u4e00" <= ch <= "\u9fff" for ch in text)
29 |
30 |
31 | def get_file_sha256(file_path: Union[str, Path], chunk_size: int = 65536) -> str:
32 | with open(file_path, "rb") as file:
33 | sha_signature = hashlib.sha256()
34 | while True:
35 | chunk = file.read(chunk_size)
36 | if not chunk:
37 | break
38 | sha_signature.update(chunk)
39 |
40 | return sha_signature.hexdigest()
41 |
42 |
43 | def save_img(save_path: Union[str, Path], img: np.ndarray):
44 | if not Path(save_path).parent.exists():
45 | Path(save_path).parent.mkdir(parents=True, exist_ok=True)
46 |
47 | cv2.imwrite(str(save_path), img)
48 |
49 |
50 | def is_url(url: str) -> bool:
51 | try:
52 | result = urlparse(url)
53 | return all([result.scheme, result.netloc])
54 | except Exception as e:
55 | return False
56 |
57 |
58 | def import_package(name, package=None):
59 | try:
60 | module = importlib.import_module(name, package=package)
61 | return module
62 | except ModuleNotFoundError:
63 | return None
64 |
--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import RapidOCR
5 | from .utils import LoadImageError, VisRes
6 |
--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/cal_rec_boxes/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import CalRecBoxes
5 |
--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/ch_ppocr_cls/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_cls import TextClassifier
5 |
--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/ch_ppocr_cls/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from typing import List, Tuple
15 |
16 | import numpy as np
17 |
18 |
19 | class ClsPostProcess:
20 | def __init__(self, label_list: List[str]):
21 | self.label_list = label_list
22 |
23 | def __call__(self, preds: np.ndarray) -> List[Tuple[str, float]]:
24 | pred_idxs = preds.argmax(axis=1)
25 | decode_out = [
26 | (self.label_list[idx], preds[i, idx]) for i, idx in enumerate(pred_idxs)
27 | ]
28 | return decode_out
29 |
--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/ch_ppocr_det/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_detect import TextDetector
5 |
--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/ch_ppocr_rec/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_recognize import TextRecognizer
5 |
--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/config.yaml:
--------------------------------------------------------------------------------
1 | Global:
2 | text_score: 0.5
3 | use_det: true
4 | use_cls: true
5 | use_rec: true
6 | print_verbose: false
7 | min_height: 30
8 | width_height_ratio: 8
9 | max_side_len: 2000
10 | min_side_len: 30
11 | return_word_box: false
12 |
13 | intra_op_num_threads: &intra_nums -1
14 | inter_op_num_threads: &inter_nums -1
15 |
16 | Det:
17 | intra_op_num_threads: *intra_nums
18 | inter_op_num_threads: *inter_nums
19 |
20 | use_cuda: false
21 | use_dml: false
22 |
23 | model_path: models/ch_PP-OCRv4_det_infer.onnx
24 |
25 | limit_side_len: 736
26 | limit_type: min
27 | std: [ 0.5, 0.5, 0.5 ]
28 | mean: [ 0.5, 0.5, 0.5 ]
29 |
30 | thresh: 0.3
31 | box_thresh: 0.5
32 | max_candidates: 1000
33 | unclip_ratio: 1.6
34 | use_dilation: true
35 | score_mode: fast
36 |
37 | Cls:
38 | intra_op_num_threads: *intra_nums
39 | inter_op_num_threads: *inter_nums
40 |
41 | use_cuda: false
42 | use_dml: false
43 |
44 | model_path: models/ch_ppocr_mobile_v2.0_cls_infer.onnx
45 |
46 | cls_image_shape: [3, 48, 192]
47 | cls_batch_num: 6
48 | cls_thresh: 0.9
49 | label_list: ['0', '180']
50 |
51 | Rec:
52 | intra_op_num_threads: *intra_nums
53 | inter_op_num_threads: *inter_nums
54 |
55 | use_cuda: false
56 | use_dml: false
57 |
58 | model_path: models/ch_PP-OCRv4_rec_infer.onnx
59 |
60 | rec_img_shape: [3, 48, 320]
61 | rec_batch_num: 6
62 |
--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr_onnxruntime/models/.gitkeep
--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from pathlib import Path
5 | from typing import Dict, Union
6 |
7 | import yaml
8 |
9 | from .infer_engine import OrtInferSession
10 | from .load_image import LoadImage, LoadImageError
11 | from .logger import get_logger
12 | from .parse_parameters import UpdateParameters, init_args, update_model_path
13 | from .process_img import add_round_letterbox, increase_min_side, reduce_max_side
14 | from .vis_res import VisRes
15 |
16 |
17 | def read_yaml(yaml_path: Union[str, Path]) -> Dict[str, Dict]:
18 | with open(yaml_path, "rb") as f:
19 | data = yaml.load(f, Loader=yaml.Loader)
20 | return data
21 |
--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/utils/logger.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import logging
5 | from functools import lru_cache
6 |
7 |
8 | @lru_cache(maxsize=32)
9 | def get_logger(name: str) -> logging.Logger:
10 | logger = logging.getLogger(name)
11 | logger.setLevel(logging.DEBUG)
12 |
13 | fmt = "%(asctime)s - %(name)s - %(levelname)s: %(message)s"
14 | format_str = logging.Formatter(fmt)
15 |
16 | sh = logging.StreamHandler()
17 | sh.setLevel(logging.DEBUG)
18 |
19 | logger.addHandler(sh)
20 | sh.setFormatter(format_str)
21 | return logger
22 |
--------------------------------------------------------------------------------
/python/rapidocr_onnxruntime/utils/process_img.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from typing import Tuple
5 |
6 | import cv2
7 | import numpy as np
8 |
9 |
10 | def reduce_max_side(
11 | img: np.ndarray, max_side_len: int = 2000
12 | ) -> Tuple[np.ndarray, float, float]:
13 | h, w = img.shape[:2]
14 |
15 | ratio = 1.0
16 | if max(h, w) > max_side_len:
17 | if h > w:
18 | ratio = float(max_side_len) / h
19 | else:
20 | ratio = float(max_side_len) / w
21 |
22 | resize_h = int(h * ratio)
23 | resize_w = int(w * ratio)
24 |
25 | resize_h = int(round(resize_h / 32) * 32)
26 | resize_w = int(round(resize_w / 32) * 32)
27 |
28 | try:
29 | if int(resize_w) <= 0 or int(resize_h) <= 0:
30 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
31 | img = cv2.resize(img, (resize_w, resize_h))
32 | except Exception as exc:
33 | raise ResizeImgError() from exc
34 |
35 | ratio_h = h / resize_h
36 | ratio_w = w / resize_w
37 | return img, ratio_h, ratio_w
38 |
39 |
40 | def increase_min_side(
41 | img: np.ndarray, min_side_len: int = 30
42 | ) -> Tuple[np.ndarray, float, float]:
43 | h, w = img.shape[:2]
44 |
45 | ratio = 1.0
46 | if min(h, w) < min_side_len:
47 | if h < w:
48 | ratio = float(min_side_len) / h
49 | else:
50 | ratio = float(min_side_len) / w
51 |
52 | resize_h = int(h * ratio)
53 | resize_w = int(w * ratio)
54 |
55 | resize_h = int(round(resize_h / 32) * 32)
56 | resize_w = int(round(resize_w / 32) * 32)
57 |
58 | try:
59 | if int(resize_w) <= 0 or int(resize_h) <= 0:
60 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
61 | img = cv2.resize(img, (resize_w, resize_h))
62 | except Exception as exc:
63 | raise ResizeImgError() from exc
64 |
65 | ratio_h = h / resize_h
66 | ratio_w = w / resize_w
67 | return img, ratio_h, ratio_w
68 |
69 |
70 | def add_round_letterbox(
71 | img: np.ndarray,
72 | padding_tuple: Tuple[int, int, int, int],
73 | ) -> np.ndarray:
74 | padded_img = cv2.copyMakeBorder(
75 | img,
76 | padding_tuple[0],
77 | padding_tuple[1],
78 | padding_tuple[2],
79 | padding_tuple[3],
80 | cv2.BORDER_CONSTANT,
81 | value=(0, 0, 0),
82 | )
83 | return padded_img
84 |
85 |
86 | class ResizeImgError(Exception):
87 | pass
88 |
--------------------------------------------------------------------------------
/python/rapidocr_openvino/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import RapidOCR
5 | from .utils import LoadImageError, VisRes
6 |
--------------------------------------------------------------------------------
/python/rapidocr_openvino/cal_rec_boxes/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import CalRecBoxes
5 |
--------------------------------------------------------------------------------
/python/rapidocr_openvino/ch_ppocr_cls/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_cls import TextClassifier
5 |
--------------------------------------------------------------------------------
/python/rapidocr_openvino/ch_ppocr_cls/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from typing import List, Tuple
15 |
16 | import numpy as np
17 |
18 |
19 | class ClsPostProcess:
20 | def __init__(self, label_list: List[str]):
21 | self.label_list = label_list
22 |
23 | def __call__(self, preds: np.ndarray) -> List[Tuple[str, float]]:
24 | pred_idxs = preds.argmax(axis=1)
25 | decode_out = [
26 | (self.label_list[idx], preds[i, idx]) for i, idx in enumerate(pred_idxs)
27 | ]
28 | return decode_out
29 |
--------------------------------------------------------------------------------
/python/rapidocr_openvino/ch_ppocr_det/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_detect import TextDetector
5 |
--------------------------------------------------------------------------------
/python/rapidocr_openvino/ch_ppocr_rec/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_recognize import TextRecognizer
5 |
--------------------------------------------------------------------------------
/python/rapidocr_openvino/config.yaml:
--------------------------------------------------------------------------------
1 | Global:
2 | text_score: 0.5
3 | use_det: true
4 | use_cls: true
5 | use_rec: true
6 | print_verbose: false
7 | min_height: 30
8 | width_height_ratio: 8
9 | max_side_len: 2000
10 | min_side_len: 30
11 | return_word_box: false
12 |
13 | inference_num_threads: &infer_num_threads -1
14 |
15 | Det:
16 | inference_num_threads: *infer_num_threads
17 |
18 | use_cuda: false
19 |
20 | model_path: models/ch_PP-OCRv4_det_infer.onnx
21 |
22 | limit_side_len: 736
23 | limit_type: min
24 | std: [ 0.5, 0.5, 0.5 ]
25 | mean: [ 0.5, 0.5, 0.5 ]
26 |
27 | thresh: 0.3
28 | box_thresh: 0.5
29 | max_candidates: 1000
30 | unclip_ratio: 1.6
31 | use_dilation: true
32 | score_mode: fast
33 |
34 | Cls:
35 | inference_num_threads: *infer_num_threads
36 |
37 | use_cuda: false
38 |
39 | model_path: models/ch_ppocr_mobile_v2.0_cls_infer.onnx
40 |
41 | cls_image_shape: [3, 48, 192]
42 | cls_batch_num: 6
43 | cls_thresh: 0.9
44 | label_list: ['0', '180']
45 |
46 | Rec:
47 | inference_num_threads: *infer_num_threads
48 |
49 | use_cuda: false
50 |
51 | model_path: models/ch_PP-OCRv4_rec_infer.onnx
52 |
53 | rec_img_shape: [3, 48, 320]
54 | rec_batch_num: 6
55 |
--------------------------------------------------------------------------------
/python/rapidocr_openvino/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr_openvino/models/.gitkeep
--------------------------------------------------------------------------------
/python/rapidocr_openvino/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from pathlib import Path
5 | from typing import Dict, Union
6 |
7 | import yaml
8 |
9 | from .infer_engine import OpenVINOInferSession
10 | from .load_image import LoadImage, LoadImageError
11 | from .logger import get_logger
12 | from .parse_parameters import UpdateParameters, init_args, update_model_path
13 | from .process_img import add_round_letterbox, increase_min_side, reduce_max_side
14 | from .vis_res import VisRes
15 |
16 |
17 | def read_yaml(yaml_path: Union[str, Path]) -> Dict[str, Dict]:
18 | with open(yaml_path, "rb") as f:
19 | data = yaml.load(f, Loader=yaml.Loader)
20 | return data
21 |
--------------------------------------------------------------------------------
/python/rapidocr_openvino/utils/infer_engine.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import os
5 | import traceback
6 | from pathlib import Path
7 |
8 | import numpy as np
9 | from openvino.runtime import Core
10 |
11 |
12 | class OpenVINOInferSession:
13 | def __init__(self, config):
14 | core = Core()
15 |
16 | self._verify_model(config["model_path"])
17 | model_onnx = core.read_model(config["model_path"])
18 |
19 | cpu_nums = os.cpu_count()
20 | infer_num_threads = config.get("inference_num_threads", -1)
21 | if infer_num_threads != -1 and 1 <= infer_num_threads <= cpu_nums:
22 | core.set_property("CPU", {"INFERENCE_NUM_THREADS": str(infer_num_threads)})
23 |
24 | compile_model = core.compile_model(model=model_onnx, device_name="CPU")
25 | self.session = compile_model.create_infer_request()
26 |
27 | def __call__(self, input_content: np.ndarray) -> np.ndarray:
28 | try:
29 | self.session.infer(inputs=[input_content])
30 | return self.session.get_output_tensor().data
31 | except Exception as e:
32 | error_info = traceback.format_exc()
33 | raise OpenVIONError(error_info) from e
34 |
35 | @staticmethod
36 | def _verify_model(model_path):
37 | model_path = Path(model_path)
38 | if not model_path.exists():
39 | raise FileNotFoundError(f"{model_path} does not exists.")
40 | if not model_path.is_file():
41 | raise FileExistsError(f"{model_path} is not a file.")
42 |
43 |
44 | class OpenVIONError(Exception):
45 | pass
46 |
--------------------------------------------------------------------------------
/python/rapidocr_openvino/utils/logger.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import logging
5 | from functools import lru_cache
6 |
7 |
8 | @lru_cache(maxsize=32)
9 | def get_logger(name: str):
10 | logger = logging.getLogger(name)
11 | logger.setLevel(logging.DEBUG)
12 |
13 | fmt = "%(asctime)s - %(name)s - %(levelname)s: %(message)s"
14 | format_str = logging.Formatter(fmt)
15 |
16 | sh = logging.StreamHandler()
17 | sh.setLevel(logging.DEBUG)
18 |
19 | logger.addHandler(sh)
20 | sh.setFormatter(format_str)
21 | return logger
22 |
--------------------------------------------------------------------------------
/python/rapidocr_openvino/utils/process_img.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from typing import Tuple
5 |
6 | import cv2
7 | import numpy as np
8 |
9 |
10 | def reduce_max_side(
11 | img: np.ndarray, max_side_len: int = 2000
12 | ) -> Tuple[np.ndarray, float, float]:
13 | h, w = img.shape[:2]
14 |
15 | ratio = 1.0
16 | if max(h, w) > max_side_len:
17 | if h > w:
18 | ratio = float(max_side_len) / h
19 | else:
20 | ratio = float(max_side_len) / w
21 |
22 | resize_h = int(h * ratio)
23 | resize_w = int(w * ratio)
24 |
25 | resize_h = int(round(resize_h / 32) * 32)
26 | resize_w = int(round(resize_w / 32) * 32)
27 |
28 | try:
29 | if int(resize_w) <= 0 or int(resize_h) <= 0:
30 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
31 | img = cv2.resize(img, (resize_w, resize_h))
32 | except Exception as exc:
33 | raise ResizeImgError() from exc
34 |
35 | ratio_h = h / resize_h
36 | ratio_w = w / resize_w
37 | return img, ratio_h, ratio_w
38 |
39 |
40 | def increase_min_side(
41 | img: np.ndarray, min_side_len: int = 30
42 | ) -> Tuple[np.ndarray, float, float]:
43 | h, w = img.shape[:2]
44 |
45 | ratio = 1.0
46 | if min(h, w) < min_side_len:
47 | if h < w:
48 | ratio = float(min_side_len) / h
49 | else:
50 | ratio = float(min_side_len) / w
51 |
52 | resize_h = int(h * ratio)
53 | resize_w = int(w * ratio)
54 |
55 | resize_h = int(round(resize_h / 32) * 32)
56 | resize_w = int(round(resize_w / 32) * 32)
57 |
58 | try:
59 | if int(resize_w) <= 0 or int(resize_h) <= 0:
60 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
61 | img = cv2.resize(img, (resize_w, resize_h))
62 | except Exception as exc:
63 | raise ResizeImgError() from exc
64 |
65 | ratio_h = h / resize_h
66 | ratio_w = w / resize_w
67 | return img, ratio_h, ratio_w
68 |
69 |
70 | def add_round_letterbox(
71 | img: np.ndarray,
72 | padding_tuple: Tuple[int, int, int, int],
73 | ) -> np.ndarray:
74 | padded_img = cv2.copyMakeBorder(
75 | img,
76 | padding_tuple[0],
77 | padding_tuple[1],
78 | padding_tuple[2],
79 | padding_tuple[3],
80 | cv2.BORDER_CONSTANT,
81 | value=(0, 0, 0),
82 | )
83 | return padded_img
84 |
85 |
86 | class ResizeImgError(Exception):
87 | pass
88 |
--------------------------------------------------------------------------------
/python/rapidocr_paddle/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import RapidOCR
5 | from .utils import LoadImageError, VisRes
6 |
--------------------------------------------------------------------------------
/python/rapidocr_paddle/cal_rec_boxes/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import CalRecBoxes
5 |
--------------------------------------------------------------------------------
/python/rapidocr_paddle/ch_ppocr_cls/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_cls import TextClassifier
5 |
--------------------------------------------------------------------------------
/python/rapidocr_paddle/ch_ppocr_cls/text_cls.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import copy
15 | import math
16 | import time
17 | from typing import List
18 |
19 | import cv2
20 | import numpy as np
21 |
22 | from rapidocr_paddle.utils import PaddleInferSession
23 |
24 | from .utils import ClsPostProcess
25 |
26 |
27 | class TextClassifier:
28 | def __init__(self, config):
29 | self.cls_image_shape = config["cls_image_shape"]
30 | self.cls_batch_num = config["cls_batch_num"]
31 | self.cls_thresh = config["cls_thresh"]
32 | self.postprocess_op = ClsPostProcess(config["label_list"])
33 |
34 | self.infer = PaddleInferSession(config)
35 |
36 | def __call__(self, img_list: List[np.ndarray]):
37 | if isinstance(img_list, np.ndarray):
38 | img_list = [img_list]
39 |
40 | img_list = copy.deepcopy(img_list)
41 |
42 | # Calculate the aspect ratio of all text bars
43 | width_list = [img.shape[1] / float(img.shape[0]) for img in img_list]
44 |
45 | # Sorting can speed up the cls process
46 | indices = np.argsort(np.array(width_list))
47 |
48 | img_num = len(img_list)
49 | cls_res = [["", 0.0]] * img_num
50 | batch_num = self.cls_batch_num
51 | elapse = 0
52 | for beg_img_no in range(0, img_num, batch_num):
53 | end_img_no = min(img_num, beg_img_no + batch_num)
54 |
55 | norm_img_batch = []
56 | for ino in range(beg_img_no, end_img_no):
57 | norm_img = self.resize_norm_img(img_list[indices[ino]])
58 | norm_img = norm_img[np.newaxis, :]
59 | norm_img_batch.append(norm_img)
60 | norm_img_batch = np.concatenate(norm_img_batch).astype(np.float32)
61 |
62 | starttime = time.time()
63 | prob_out = self.infer(norm_img_batch)[0]
64 | cls_result = self.postprocess_op(prob_out)
65 | elapse += time.time() - starttime
66 |
67 | for rno in range(len(cls_result)):
68 | label, score = cls_result[rno]
69 | cls_res[indices[beg_img_no + rno]] = [label, score]
70 | if "180" in label and score > self.cls_thresh:
71 | img_list[indices[beg_img_no + rno]] = cv2.rotate(
72 | img_list[indices[beg_img_no + rno]], 1
73 | )
74 | return img_list, cls_res, elapse
75 |
76 | def resize_norm_img(self, img):
77 | img_c, img_h, img_w = self.cls_image_shape
78 | h, w = img.shape[:2]
79 | ratio = w / float(h)
80 | if math.ceil(img_h * ratio) > img_w:
81 | resized_w = img_w
82 | else:
83 | resized_w = int(math.ceil(img_h * ratio))
84 |
85 | resized_image = cv2.resize(img, (resized_w, img_h))
86 | resized_image = resized_image.astype("float32")
87 | if img_c == 1:
88 | resized_image = resized_image / 255
89 | resized_image = resized_image[np.newaxis, :]
90 | else:
91 | resized_image = resized_image.transpose((2, 0, 1)) / 255
92 |
93 | resized_image -= 0.5
94 | resized_image /= 0.5
95 | padding_im = np.zeros((img_c, img_h, img_w), dtype=np.float32)
96 | padding_im[:, :, :resized_w] = resized_image
97 | return padding_im
98 |
--------------------------------------------------------------------------------
/python/rapidocr_paddle/ch_ppocr_cls/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from typing import List, Tuple
15 |
16 | import numpy as np
17 |
18 |
19 | class ClsPostProcess:
20 | def __init__(self, label_list: List[str]):
21 | self.label_list = label_list
22 |
23 | def __call__(self, preds: np.ndarray) -> List[Tuple[str, float]]:
24 | pred_idxs = preds.argmax(axis=1)
25 | decode_out = [
26 | (self.label_list[idx], preds[i, idx]) for i, idx in enumerate(pred_idxs)
27 | ]
28 | return decode_out
29 |
--------------------------------------------------------------------------------
/python/rapidocr_paddle/ch_ppocr_det/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_detect import TextDetector
5 |
--------------------------------------------------------------------------------
/python/rapidocr_paddle/ch_ppocr_rec/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_recognize import TextRecognizer
5 |
--------------------------------------------------------------------------------
/python/rapidocr_paddle/config.yaml:
--------------------------------------------------------------------------------
1 | Global:
2 | text_score: 0.5
3 | use_det: true
4 | use_cls: true
5 | use_rec: true
6 | print_verbose: false
7 | min_height: 30
8 | width_height_ratio: 8
9 | max_side_len: 2000
10 | min_side_len: 30
11 | return_word_box: false
12 |
13 | cpu_math_library_num_threads: &infer_num_threads -1
14 |
15 | Det:
16 | use_cuda: false
17 | gpu_id: 0
18 | gpu_mem: 500
19 |
20 | cpu_math_library_num_threads: *infer_num_threads
21 |
22 | model_path: models/ch_PP-OCRv4_det_infer
23 |
24 | limit_side_len: 736
25 | limit_type: min
26 | std: [ 0.5, 0.5, 0.5 ]
27 | mean: [ 0.5, 0.5, 0.5 ]
28 |
29 | thresh: 0.3
30 | box_thresh: 0.5
31 | max_candidates: 1000
32 | unclip_ratio: 1.6
33 | use_dilation: true
34 | score_mode: fast
35 |
36 | Cls:
37 | use_cuda: false
38 | gpu_id: 0
39 | gpu_mem: 500
40 |
41 | cpu_math_library_num_threads: *infer_num_threads
42 |
43 | model_path: models/ch_ppocr_mobile_v2_cls_infer
44 |
45 | cls_image_shape: [3, 48, 192]
46 | cls_batch_num: 6
47 | cls_thresh: 0.9
48 | label_list: ['0', '180']
49 |
50 | Rec:
51 | use_cuda: false
52 | gpu_id: 0
53 | gpu_mem: 500
54 |
55 | cpu_math_library_num_threads: *infer_num_threads
56 |
57 | model_path: models/ch_PP-OCRv4_rec_infer
58 |
59 | rec_img_shape: [3, 48, 320]
60 | rec_batch_num: 6
61 |
--------------------------------------------------------------------------------
/python/rapidocr_paddle/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr_paddle/models/.gitkeep
--------------------------------------------------------------------------------
/python/rapidocr_paddle/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from pathlib import Path
5 | from typing import Dict, Union
6 |
7 | import yaml
8 |
9 | from .infer_engine import PaddleInferSession
10 | from .load_image import LoadImage, LoadImageError
11 | from .logger import get_logger
12 | from .parse_parameters import UpdateParameters, init_args, update_model_path
13 | from .process_img import add_round_letterbox, increase_min_side, reduce_max_side
14 | from .vis_res import VisRes
15 |
16 |
17 | def read_yaml(yaml_path: Union[str, Path]) -> Dict[str, Dict]:
18 | with open(yaml_path, "rb") as f:
19 | data = yaml.load(f, Loader=yaml.Loader)
20 | return data
21 |
--------------------------------------------------------------------------------
/python/rapidocr_paddle/utils/logger.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import logging
5 | from functools import lru_cache
6 |
7 |
8 | @lru_cache(maxsize=32)
9 | def get_logger(name: str):
10 | logger = logging.getLogger(name)
11 | logger.setLevel(logging.DEBUG)
12 |
13 | fmt = "%(asctime)s - %(name)s - %(levelname)s: %(message)s"
14 | format_str = logging.Formatter(fmt)
15 |
16 | sh = logging.StreamHandler()
17 | sh.setLevel(logging.DEBUG)
18 |
19 | logger.addHandler(sh)
20 | sh.setFormatter(format_str)
21 | return logger
22 |
--------------------------------------------------------------------------------
/python/rapidocr_paddle/utils/process_img.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from typing import Tuple
5 |
6 | import cv2
7 | import numpy as np
8 |
9 |
10 | def reduce_max_side(
11 | img: np.ndarray, max_side_len: int = 2000
12 | ) -> Tuple[np.ndarray, float, float]:
13 | h, w = img.shape[:2]
14 |
15 | ratio = 1.0
16 | if max(h, w) > max_side_len:
17 | if h > w:
18 | ratio = float(max_side_len) / h
19 | else:
20 | ratio = float(max_side_len) / w
21 |
22 | resize_h = int(h * ratio)
23 | resize_w = int(w * ratio)
24 |
25 | resize_h = int(round(resize_h / 32) * 32)
26 | resize_w = int(round(resize_w / 32) * 32)
27 |
28 | try:
29 | if int(resize_w) <= 0 or int(resize_h) <= 0:
30 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
31 | img = cv2.resize(img, (resize_w, resize_h))
32 | except Exception as exc:
33 | raise ResizeImgError() from exc
34 |
35 | ratio_h = h / resize_h
36 | ratio_w = w / resize_w
37 | return img, ratio_h, ratio_w
38 |
39 |
40 | def increase_min_side(
41 | img: np.ndarray, min_side_len: int = 30
42 | ) -> Tuple[np.ndarray, float, float]:
43 | h, w = img.shape[:2]
44 |
45 | ratio = 1.0
46 | if min(h, w) < min_side_len:
47 | if h < w:
48 | ratio = float(min_side_len) / h
49 | else:
50 | ratio = float(min_side_len) / w
51 |
52 | resize_h = int(h * ratio)
53 | resize_w = int(w * ratio)
54 |
55 | resize_h = int(round(resize_h / 32) * 32)
56 | resize_w = int(round(resize_w / 32) * 32)
57 |
58 | try:
59 | if int(resize_w) <= 0 or int(resize_h) <= 0:
60 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
61 | img = cv2.resize(img, (resize_w, resize_h))
62 | except Exception as exc:
63 | raise ResizeImgError() from exc
64 |
65 | ratio_h = h / resize_h
66 | ratio_w = w / resize_w
67 | return img, ratio_h, ratio_w
68 |
69 |
70 | def add_round_letterbox(
71 | img: np.ndarray,
72 | padding_tuple: Tuple[int, int, int, int],
73 | ) -> np.ndarray:
74 | padded_img = cv2.copyMakeBorder(
75 | img,
76 | padding_tuple[0],
77 | padding_tuple[1],
78 | padding_tuple[2],
79 | padding_tuple[3],
80 | cv2.BORDER_CONSTANT,
81 | value=(0, 0, 0),
82 | )
83 | return padded_img
84 |
85 |
86 | class ResizeImgError(Exception):
87 | pass
88 |
--------------------------------------------------------------------------------
/python/rapidocr_torch/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import RapidOCR
5 | from .utils import LoadImageError, VisRes
6 |
--------------------------------------------------------------------------------
/python/rapidocr_torch/arch_config.yaml:
--------------------------------------------------------------------------------
1 | ch_ptocr_mobile_v2.0_cls_infer:
2 | model_type: cls
3 | algorithm: CLS
4 | Transform:
5 | Backbone:
6 | name: MobileNetV3
7 | scale: 0.35
8 | model_name: small
9 | Neck:
10 | Head:
11 | name: ClsHead
12 | class_dim: 2
13 |
14 | ch_PP-OCRv4_det_infer:
15 | model_type: det
16 | algorithm: DB
17 | Transform: null
18 | Backbone:
19 | name: PPLCNetV3
20 | scale: 0.75
21 | det: True
22 | Neck:
23 | name: RSEFPN
24 | out_channels: 96
25 | shortcut: True
26 | Head:
27 | name: DBHead
28 | k: 50
29 |
30 |
31 | ch_PP-OCRv4_det_server_infer:
32 | model_type: det
33 | algorithm: DB
34 | Transform: null
35 | Backbone:
36 | name: PPHGNet_small
37 | det: True
38 | Neck:
39 | name: LKPAN
40 | out_channels: 256
41 | intracl: true
42 | Head:
43 | name: PFHeadLocal
44 | k: 50
45 | mode: "large"
46 |
47 |
48 | ch_PP-OCRv4_rec_infer:
49 | model_type: rec
50 | algorithm: SVTR_LCNet
51 | Transform:
52 | Backbone:
53 | name: PPLCNetV3
54 | scale: 0.95
55 | Head:
56 | name: MultiHead
57 | out_channels_list:
58 | CTCLabelDecode: 6625 #'blank' + ...(6623) + ' '
59 | head_list:
60 | - CTCHead:
61 | Neck:
62 | name: svtr
63 | dims: 120
64 | depth: 2
65 | hidden_dims: 120
66 | kernel_size: [ 1, 3 ]
67 | use_guide: True
68 | Head:
69 | fc_decay: 0.00001
70 | - NRTRHead:
71 | nrtr_dim: 384
72 | max_text_length: 25
73 |
74 |
75 | ch_PP-OCRv4_rec_server_infer:
76 | model_type: rec
77 | algorithm: SVTR_HGNet
78 | Transform:
79 | Backbone:
80 | name: PPHGNet_small
81 | Head:
82 | name: MultiHead
83 | out_channels_list:
84 | CTCLabelDecode: 6625 #'blank' + ...(6623) + ' '
85 | head_list:
86 | - CTCHead:
87 | Neck:
88 | name: svtr
89 | dims: 120
90 | depth: 2
91 | hidden_dims: 120
92 | kernel_size: [ 1, 3 ]
93 | use_guide: True
94 | Head:
95 | fc_decay: 0.00001
96 | - NRTRHead:
97 | nrtr_dim: 384
98 | max_text_length: 25
--------------------------------------------------------------------------------
/python/rapidocr_torch/cal_rec_boxes/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import CalRecBoxes
5 |
--------------------------------------------------------------------------------
/python/rapidocr_torch/ch_ppocr_cls/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_cls import TextClassifier
5 |
--------------------------------------------------------------------------------
/python/rapidocr_torch/ch_ppocr_cls/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from typing import List, Tuple
15 |
16 | import numpy as np
17 |
18 |
19 | class ClsPostProcess:
20 | def __init__(self, label_list: List[str]):
21 | self.label_list = label_list
22 |
23 | def __call__(self, preds: np.ndarray) -> List[Tuple[str, float]]:
24 | pred_idxs = preds.argmax(axis=1)
25 | decode_out = [
26 | (self.label_list[idx], preds[i, idx]) for i, idx in enumerate(pred_idxs)
27 | ]
28 | return decode_out
29 |
--------------------------------------------------------------------------------
/python/rapidocr_torch/ch_ppocr_det/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_detect import TextDetector
5 |
--------------------------------------------------------------------------------
/python/rapidocr_torch/ch_ppocr_rec/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .text_recognize import TextRecognizer
5 |
--------------------------------------------------------------------------------
/python/rapidocr_torch/config.yaml:
--------------------------------------------------------------------------------
1 | Global:
2 | text_score: 0.5
3 | use_det: true
4 | use_cls: true
5 | use_rec: true
6 | print_verbose: false
7 | min_height: 30
8 | width_height_ratio: 8
9 | max_side_len: 2000
10 | min_side_len: 30
11 | return_word_box: false
12 |
13 | intra_op_num_threads: &intra_nums -1
14 | inter_op_num_threads: &inter_nums -1
15 |
16 | Det:
17 | intra_op_num_threads: *intra_nums
18 | inter_op_num_threads: *inter_nums
19 |
20 | use_cuda: false
21 | use_dml: false
22 |
23 | model_path: models/ch_PP-OCRv4_det_infer.pth
24 |
25 | limit_side_len: 736
26 | limit_type: min
27 |
28 | thresh: 0.3
29 | box_thresh: 0.5
30 | max_candidates: 1000
31 | unclip_ratio: 1.5
32 | use_dilation: true
33 | score_mode: fast
34 |
35 | Cls:
36 | intra_op_num_threads: *intra_nums
37 | inter_op_num_threads: *inter_nums
38 |
39 | use_cuda: false
40 | use_dml: false
41 |
42 | model_path: models/ch_ptocr_mobile_v2.0_cls_infer.pth
43 |
44 | cls_image_shape: [3, 48, 192]
45 | cls_batch_num: 6
46 | cls_thresh: 0.9
47 | label_list: ['0', '180']
48 |
49 | Rec:
50 | intra_op_num_threads: *intra_nums
51 | inter_op_num_threads: *inter_nums
52 |
53 | use_cuda: false
54 | use_dml: false
55 |
56 | model_path: models/ch_PP-OCRv4_rec_infer.pth
57 |
58 | rec_img_shape: [3, 48, 320]
59 | rec_batch_num: 6
60 |
--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr_torch/modeling/__init__.py
--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/architectures/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import copy
16 |
17 | __all__ = ['build_model']
18 |
19 |
20 | def build_model(config, **kwargs):
21 | from .base_model import BaseModel
22 |
23 | config = copy.deepcopy(config)
24 | module_class = BaseModel(config, **kwargs)
25 | return module_class
--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/architectures/base_model.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from rapidocr_torch.modeling.backbones import build_backbone
3 | from rapidocr_torch.modeling.heads import build_head
4 | from rapidocr_torch.modeling.necks import build_neck
5 |
6 |
7 | class BaseModel(nn.Module):
8 | def __init__(self, config, **kwargs):
9 | """
10 | the module for OCR.
11 | args:
12 | config (dict): the super parameters for module.
13 | """
14 | super(BaseModel, self).__init__()
15 |
16 | in_channels = config.get('in_channels', 3)
17 | model_type = config['model_type']
18 | # build backbone, backbone is need for del, rec and cls
19 | if 'Backbone' not in config or config['Backbone'] is None:
20 | self.use_backbone = False
21 | else:
22 | self.use_backbone = True
23 | config["Backbone"]['in_channels'] = in_channels
24 | self.backbone = build_backbone(config["Backbone"], model_type)
25 | in_channels = self.backbone.out_channels
26 |
27 | # build neck
28 | # for rec, neck can be cnn,rnn or reshape(None)
29 | # for det, neck can be FPN, BIFPN and so on.
30 | # for cls, neck should be none
31 | if 'Neck' not in config or config['Neck'] is None:
32 | self.use_neck = False
33 | else:
34 | self.use_neck = True
35 | config['Neck']['in_channels'] = in_channels
36 | self.neck = build_neck(config['Neck'])
37 | in_channels = self.neck.out_channels
38 |
39 | # # build head, head is need for det, rec and cls
40 | if 'Head' not in config or config['Head'] is None:
41 | self.use_head = False
42 | else:
43 | self.use_head = True
44 | config["Head"]['in_channels'] = in_channels
45 | self.head = build_head(config["Head"], **kwargs)
46 |
47 | self.return_all_feats = config.get("return_all_feats", False)
48 |
49 | self._initialize_weights()
50 |
51 | def _initialize_weights(self):
52 | # weight initialization
53 | for m in self.modules():
54 | if isinstance(m, nn.Conv2d):
55 | nn.init.kaiming_normal_(m.weight, mode='fan_out')
56 | if m.bias is not None:
57 | nn.init.zeros_(m.bias)
58 | elif isinstance(m, nn.BatchNorm2d):
59 | nn.init.ones_(m.weight)
60 | nn.init.zeros_(m.bias)
61 | elif isinstance(m, nn.Linear):
62 | nn.init.normal_(m.weight, 0, 0.01)
63 | if m.bias is not None:
64 | nn.init.zeros_(m.bias)
65 | elif isinstance(m, nn.ConvTranspose2d):
66 | nn.init.kaiming_normal_(m.weight, mode='fan_out')
67 | if m.bias is not None:
68 | nn.init.zeros_(m.bias)
69 |
70 |
71 | def forward(self, x):
72 | y = dict()
73 | if self.use_backbone:
74 | x = self.backbone(x)
75 | if isinstance(x, dict):
76 | y.update(x)
77 | else:
78 | y["backbone_out"] = x
79 | final_name = "backbone_out"
80 | if self.use_neck:
81 | x = self.neck(x)
82 | if isinstance(x, dict):
83 | y.update(x)
84 | else:
85 | y["neck_out"] = x
86 | final_name = "neck_out"
87 | if self.use_head:
88 | x = self.head(x)
89 | # for multi head, save ctc neck out for udml
90 | if isinstance(x, dict) and 'ctc_nect' in x.keys():
91 | y['neck_out'] = x['ctc_neck']
92 | y['head_out'] = x
93 | elif isinstance(x, dict):
94 | y.update(x)
95 | else:
96 | y["head_out"] = x
97 | if self.return_all_feats:
98 | if self.training:
99 | return y
100 | elif isinstance(x, dict):
101 | return x
102 | else:
103 | return {final_name: x}
104 | else:
105 | return x
--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | __all__ = ['build_backbone']
16 |
17 |
18 | def build_backbone(config, model_type):
19 | if model_type == 'det':
20 | from .det_mobilenet_v3 import MobileNetV3
21 | from .rec_lcnetv3 import PPLCNetV3
22 | from .rec_hgnet import PPHGNet_small
23 | support_dict = ['MobileNetV3', 'ResNet', 'ResNet_vd', 'ResNet_SAST', 'PPLCNetV3', 'PPHGNet_small']
24 | elif model_type == 'rec' or model_type == 'cls':
25 | from .rec_mobilenet_v3 import MobileNetV3
26 | from .rec_svtrnet import SVTRNet
27 | from .rec_lcnetv3 import PPLCNetV3
28 | from .rec_hgnet import PPHGNet_small
29 | support_dict = ['MobileNetV1Enhance', 'MobileNetV3', 'ResNet', 'ResNetFPN', 'MTB',
30 | 'ResNet31', 'SVTRNet', 'ViTSTR', 'DenseNet', 'PPLCNetV3', 'PPHGNet_small']
31 | else:
32 | raise NotImplementedError
33 |
34 | module_name = config.pop('name')
35 | assert module_name in support_dict, Exception(
36 | 'when model typs is {}, backbone only support {}'.format(model_type,
37 | support_dict))
38 | module_class = eval(module_name)(**config)
39 | return module_class
--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/common.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class Hswish(nn.Module):
7 | def __init__(self, inplace=True):
8 | super(Hswish, self).__init__()
9 | self.inplace = inplace
10 |
11 | def forward(self, x):
12 | return x * F.relu6(x + 3.0, inplace=self.inplace) / 6.0
13 |
14 |
15 | # out = max(0, min(1, slop*x+offset))
16 | # paddle.fluid.layers.hard_sigmoid(x, slope=0.2, offset=0.5, name=None)
17 | class Hsigmoid(nn.Module):
18 | def __init__(self, inplace=True):
19 | super(Hsigmoid, self).__init__()
20 | self.inplace = inplace
21 |
22 | def forward(self, x):
23 | # torch: F.relu6(x + 3., inplace=self.inplace) / 6.
24 | # paddle: F.relu6(1.2 * x + 3., inplace=self.inplace) / 6.
25 | return F.relu6(1.2 * x + 3.0, inplace=self.inplace) / 6.0
26 |
27 |
28 | class GELU(nn.Module):
29 | def __init__(self, inplace=True):
30 | super(GELU, self).__init__()
31 | self.inplace = inplace
32 |
33 | def forward(self, x):
34 | return torch.nn.functional.gelu(x)
35 |
36 |
37 | class Swish(nn.Module):
38 | def __init__(self, inplace=True):
39 | super(Swish, self).__init__()
40 | self.inplace = inplace
41 |
42 | def forward(self, x):
43 | if self.inplace:
44 | x.mul_(torch.sigmoid(x))
45 | return x
46 | else:
47 | return x * torch.sigmoid(x)
48 |
49 |
50 | class Activation(nn.Module):
51 | def __init__(self, act_type, inplace=True):
52 | super(Activation, self).__init__()
53 | act_type = act_type.lower()
54 | if act_type == "relu":
55 | self.act = nn.ReLU(inplace=inplace)
56 | elif act_type == "relu6":
57 | self.act = nn.ReLU6(inplace=inplace)
58 | elif act_type == "sigmoid":
59 | raise NotImplementedError
60 | elif act_type == "hard_sigmoid":
61 | self.act = Hsigmoid(
62 | inplace
63 | ) # nn.Hardsigmoid(inplace=inplace)#Hsigmoid(inplace)#
64 | elif act_type == "hard_swish" or act_type == "hswish":
65 | self.act = Hswish(inplace=inplace)
66 | elif act_type == "leakyrelu":
67 | self.act = nn.LeakyReLU(inplace=inplace)
68 | elif act_type == "gelu":
69 | self.act = GELU(inplace=inplace)
70 | elif act_type == "swish":
71 | self.act = Swish(inplace=inplace)
72 | else:
73 | raise NotImplementedError
74 |
75 | def forward(self, inputs):
76 | return self.act(inputs)
77 |
--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | __all__ = ['build_head']
16 |
17 |
18 | def build_head(config, **kwargs):
19 | # det head
20 | from .det_db_head import DBHead, PFHeadLocal
21 | # rec head
22 | from .rec_ctc_head import CTCHead
23 | from .rec_multi_head import MultiHead
24 |
25 | # cls head
26 | from .cls_head import ClsHead
27 | support_dict = [
28 | 'DBHead', 'CTCHead', 'ClsHead', 'MultiHead', 'PFHeadLocal',
29 | ]
30 |
31 | module_name = config.pop('name')
32 | char_num = config.pop('char_num', 6625)
33 | assert module_name in support_dict, Exception('head only support {}'.format(
34 | support_dict))
35 | module_class = eval(module_name)(**config, **kwargs)
36 | return module_class
--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/heads/cls_head.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 | class ClsHead(nn.Module):
7 | """
8 | Class orientation
9 | Args:
10 | params(dict): super parameters for build Class network
11 | """
12 |
13 | def __init__(self, in_channels, class_dim, **kwargs):
14 | super(ClsHead, self).__init__()
15 | self.pool = nn.AdaptiveAvgPool2d(1)
16 | self.fc = nn.Linear(
17 | in_channels,
18 | class_dim,
19 | bias=True)
20 |
21 | def forward(self, x):
22 | x = self.pool(x)
23 | x = torch.reshape(x, shape=[x.shape[0], x.shape[1]])
24 | x = self.fc(x)
25 | x = F.softmax(x, dim=1)
26 | return x
--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/heads/rec_ctc_head.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 | class CTCHead(nn.Module):
7 | def __init__(self,
8 | in_channels,
9 | out_channels=6625,
10 | fc_decay=0.0004,
11 | mid_channels=None,
12 | return_feats=False,
13 | **kwargs):
14 | super(CTCHead, self).__init__()
15 | if mid_channels is None:
16 | self.fc = nn.Linear(
17 | in_channels,
18 | out_channels,
19 | bias=True,)
20 | else:
21 | self.fc1 = nn.Linear(
22 | in_channels,
23 | mid_channels,
24 | bias=True,
25 | )
26 | self.fc2 = nn.Linear(
27 | mid_channels,
28 | out_channels,
29 | bias=True,
30 | )
31 |
32 | self.out_channels = out_channels
33 | self.mid_channels = mid_channels
34 | self.return_feats = return_feats
35 |
36 |
37 | def forward(self, x, labels=None):
38 | if self.mid_channels is None:
39 | predicts = self.fc(x)
40 | else:
41 | x = self.fc1(x)
42 | predicts = self.fc2(x)
43 |
44 | if self.return_feats:
45 | result = (x, predicts)
46 | else:
47 | result = predicts
48 |
49 | if not self.training:
50 | predicts = F.softmax(predicts, dim=2)
51 | result = predicts
52 |
53 | return result
--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/heads/rec_multi_head.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from rapidocr_torch.modeling.necks.rnn import Im2Seq, SequenceEncoder
5 | from .rec_ctc_head import CTCHead
6 |
7 | class FCTranspose(nn.Module):
8 | def __init__(self, in_channels, out_channels, only_transpose=False):
9 | super().__init__()
10 | self.only_transpose = only_transpose
11 | if not self.only_transpose:
12 | self.fc = nn.Linear(in_channels, out_channels, bias=False)
13 |
14 | def forward(self, x):
15 | if self.only_transpose:
16 | return x.permute([0, 2, 1])
17 | else:
18 | return self.fc(x.permute([0, 2, 1]))
19 |
20 |
21 | class MultiHead(nn.Module):
22 | def __init__(self, in_channels, out_channels_list, **kwargs):
23 | super().__init__()
24 | self.head_list = kwargs.pop('head_list')
25 |
26 | self.gtc_head = 'sar'
27 | assert len(self.head_list) >= 2
28 | for idx, head_name in enumerate(self.head_list):
29 | name = list(head_name)[0]
30 | if name == 'SARHead':
31 | pass
32 |
33 | elif name == 'NRTRHead':
34 | pass
35 | elif name == 'CTCHead':
36 | # ctc neck
37 | self.encoder_reshape = Im2Seq(in_channels)
38 | neck_args = self.head_list[idx][name]['Neck']
39 | encoder_type = neck_args.pop('name')
40 | self.ctc_encoder = SequenceEncoder(in_channels=in_channels, \
41 | encoder_type=encoder_type, **neck_args)
42 | # ctc head
43 | head_args = self.head_list[idx][name].get('Head', {})
44 | if head_args is None:
45 | head_args = {}
46 | self.ctc_head = eval(name)(in_channels=self.ctc_encoder.out_channels, \
47 | out_channels=out_channels_list['CTCLabelDecode'], **head_args)
48 | else:
49 | raise NotImplementedError(
50 | '{} is not supported in MultiHead yet'.format(name))
51 |
52 | def forward(self, x, data=None):
53 | ctc_encoder = self.ctc_encoder(x)
54 | return self.ctc_head(ctc_encoder)
55 |
56 |
--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/necks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | __all__ = ['build_neck']
16 |
17 |
18 | def build_neck(config):
19 | from .db_fpn import DBFPN, RSEFPN, LKPAN
20 | from .rnn import SequenceEncoder
21 | support_dict = ['DBFPN', 'SequenceEncoder', 'RSEFPN', 'LKPAN']
22 |
23 | module_name = config.pop('name')
24 | assert module_name in support_dict, Exception('neck only support {}'.format(
25 | support_dict))
26 | module_class = eval(module_name)(**config)
27 | return module_class
28 |
--------------------------------------------------------------------------------
/python/rapidocr_torch/modeling/necks/intracl.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 |
3 |
4 | class IntraCLBlock(nn.Module):
5 | def __init__(self, in_channels=96, reduce_factor=4):
6 | super(IntraCLBlock, self).__init__()
7 | self.channels = in_channels
8 | self.rf = reduce_factor
9 | self.conv1x1_reduce_channel = nn.Conv2d(
10 | self.channels, self.channels // self.rf, kernel_size=1, stride=1, padding=0
11 | )
12 | self.conv1x1_return_channel = nn.Conv2d(
13 | self.channels // self.rf, self.channels, kernel_size=1, stride=1, padding=0
14 | )
15 |
16 | self.v_layer_7x1 = nn.Conv2d(
17 | self.channels // self.rf,
18 | self.channels // self.rf,
19 | kernel_size=(7, 1),
20 | stride=(1, 1),
21 | padding=(3, 0),
22 | )
23 | self.v_layer_5x1 = nn.Conv2d(
24 | self.channels // self.rf,
25 | self.channels // self.rf,
26 | kernel_size=(5, 1),
27 | stride=(1, 1),
28 | padding=(2, 0),
29 | )
30 | self.v_layer_3x1 = nn.Conv2d(
31 | self.channels // self.rf,
32 | self.channels // self.rf,
33 | kernel_size=(3, 1),
34 | stride=(1, 1),
35 | padding=(1, 0),
36 | )
37 |
38 | self.q_layer_1x7 = nn.Conv2d(
39 | self.channels // self.rf,
40 | self.channels // self.rf,
41 | kernel_size=(1, 7),
42 | stride=(1, 1),
43 | padding=(0, 3),
44 | )
45 | self.q_layer_1x5 = nn.Conv2d(
46 | self.channels // self.rf,
47 | self.channels // self.rf,
48 | kernel_size=(1, 5),
49 | stride=(1, 1),
50 | padding=(0, 2),
51 | )
52 | self.q_layer_1x3 = nn.Conv2d(
53 | self.channels // self.rf,
54 | self.channels // self.rf,
55 | kernel_size=(1, 3),
56 | stride=(1, 1),
57 | padding=(0, 1),
58 | )
59 |
60 | # base
61 | self.c_layer_7x7 = nn.Conv2d(
62 | self.channels // self.rf,
63 | self.channels // self.rf,
64 | kernel_size=(7, 7),
65 | stride=(1, 1),
66 | padding=(3, 3),
67 | )
68 | self.c_layer_5x5 = nn.Conv2d(
69 | self.channels // self.rf,
70 | self.channels // self.rf,
71 | kernel_size=(5, 5),
72 | stride=(1, 1),
73 | padding=(2, 2),
74 | )
75 | self.c_layer_3x3 = nn.Conv2d(
76 | self.channels // self.rf,
77 | self.channels // self.rf,
78 | kernel_size=(3, 3),
79 | stride=(1, 1),
80 | padding=(1, 1),
81 | )
82 |
83 | self.bn = nn.BatchNorm2d(self.channels)
84 | self.relu = nn.ReLU()
85 |
86 | def forward(self, x):
87 | x_new = self.conv1x1_reduce_channel(x)
88 |
89 | x_7_c = self.c_layer_7x7(x_new)
90 | x_7_v = self.v_layer_7x1(x_new)
91 | x_7_q = self.q_layer_1x7(x_new)
92 | x_7 = x_7_c + x_7_v + x_7_q
93 |
94 | x_5_c = self.c_layer_5x5(x_7)
95 | x_5_v = self.v_layer_5x1(x_7)
96 | x_5_q = self.q_layer_1x5(x_7)
97 | x_5 = x_5_c + x_5_v + x_5_q
98 |
99 | x_3_c = self.c_layer_3x3(x_5)
100 | x_3_v = self.v_layer_3x1(x_5)
101 | x_3_q = self.q_layer_1x3(x_5)
102 | x_3 = x_3_c + x_3_v + x_3_q
103 |
104 | x_relation = self.conv1x1_return_channel(x_3)
105 |
106 | x_relation = self.bn(x_relation)
107 | x_relation = self.relu(x_relation)
108 |
109 | return x + x_relation
110 |
111 |
112 | def build_intraclblock_list(num_block):
113 | IntraCLBlock_list = nn.ModuleList()
114 | for i in range(num_block):
115 | IntraCLBlock_list.append(IntraCLBlock())
116 |
117 | return IntraCLBlock_list
118 |
--------------------------------------------------------------------------------
/python/rapidocr_torch/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/rapidocr_torch/models/.gitkeep
--------------------------------------------------------------------------------
/python/rapidocr_torch/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from pathlib import Path
5 | from typing import Dict, Union
6 |
7 | import yaml
8 |
9 | from .infer_engine import TorchInferSession
10 | from .load_image import LoadImage, LoadImageError
11 | from .logger import get_logger
12 | from .parse_parameters import UpdateParameters, init_args, update_model_path
13 | from .process_img import add_round_letterbox, increase_min_side, reduce_max_side
14 | from .vis_res import VisRes
15 |
16 |
17 | def read_yaml(yaml_path: Union[str, Path]) -> Dict[str, Dict]:
18 | with open(yaml_path, "rb") as f:
19 | data = yaml.load(f, Loader=yaml.Loader)
20 | return data
21 |
--------------------------------------------------------------------------------
/python/rapidocr_torch/utils/infer_engine.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from pathlib import Path
5 | from typing import Dict, Optional, Union
6 |
7 | import numpy as np
8 | import torch
9 | import yaml
10 |
11 | root_dir = Path(__file__).resolve().parent.parent
12 | DEFAULT_CFG_PATH = root_dir / "arch_config.yaml"
13 |
14 |
15 | def read_yaml(yaml_path: Union[str, Path]) -> Dict[str, Dict]:
16 | with open(yaml_path, "rb") as f:
17 | data = yaml.load(f, Loader=yaml.Loader)
18 | return data
19 |
20 |
21 | from rapidocr_torch.modeling.architectures.base_model import BaseModel
22 |
23 | from .logger import get_logger
24 |
25 |
26 | class TorchInferSession:
27 | def __init__(self, config, mode: Optional[str] = None) -> None:
28 | all_arch_config = read_yaml(DEFAULT_CFG_PATH)
29 |
30 | self.logger = get_logger("TorchInferSession")
31 | self.mode = mode
32 | model_path = Path(config["model_path"])
33 | self._verify_model(model_path)
34 | file_name = model_path.stem
35 | if file_name not in all_arch_config:
36 | raise ValueError(f"architecture {file_name} is not in config.yaml")
37 | arch_config = all_arch_config[file_name]
38 | self.predictor = BaseModel(arch_config)
39 | self.predictor.load_state_dict(torch.load(model_path, weights_only=True))
40 | self.predictor.eval()
41 | self.use_gpu = False
42 | if config["use_cuda"]:
43 | self.predictor.cuda()
44 | self.use_gpu = True
45 |
46 | def __call__(self, img: np.ndarray):
47 | with torch.no_grad():
48 | inp = torch.from_numpy(img)
49 | if self.use_gpu:
50 | inp = inp.cuda()
51 | # 适配跟onnx对齐取值逻辑
52 | outputs = self.predictor(inp).unsqueeze(0)
53 | return outputs.cpu().numpy()
54 |
55 | @staticmethod
56 | def _verify_model(model_path):
57 | model_path = Path(model_path)
58 | if not model_path.exists():
59 | raise FileNotFoundError(f"{model_path} does not exists.")
60 | if not model_path.is_file():
61 | raise FileExistsError(f"{model_path} is not a file.")
62 |
63 |
64 | class TorchInferError(Exception):
65 | pass
66 |
--------------------------------------------------------------------------------
/python/rapidocr_torch/utils/logger.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import logging
5 | from functools import lru_cache
6 |
7 |
8 | @lru_cache(maxsize=32)
9 | def get_logger(name: str) -> logging.Logger:
10 | logger = logging.getLogger(name)
11 | logger.setLevel(logging.DEBUG)
12 |
13 | fmt = "%(asctime)s - %(name)s - %(levelname)s: %(message)s"
14 | format_str = logging.Formatter(fmt)
15 |
16 | sh = logging.StreamHandler()
17 | sh.setLevel(logging.DEBUG)
18 |
19 | logger.addHandler(sh)
20 | sh.setFormatter(format_str)
21 | return logger
22 |
--------------------------------------------------------------------------------
/python/rapidocr_torch/utils/process_img.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from typing import Tuple
5 |
6 | import cv2
7 | import numpy as np
8 |
9 |
10 | def reduce_max_side(
11 | img: np.ndarray, max_side_len: int = 2000
12 | ) -> Tuple[np.ndarray, float, float]:
13 | h, w = img.shape[:2]
14 |
15 | ratio = 1.0
16 | if max(h, w) > max_side_len:
17 | if h > w:
18 | ratio = float(max_side_len) / h
19 | else:
20 | ratio = float(max_side_len) / w
21 |
22 | resize_h = int(h * ratio)
23 | resize_w = int(w * ratio)
24 |
25 | resize_h = int(round(resize_h / 32) * 32)
26 | resize_w = int(round(resize_w / 32) * 32)
27 |
28 | try:
29 | if int(resize_w) <= 0 or int(resize_h) <= 0:
30 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
31 | img = cv2.resize(img, (resize_w, resize_h))
32 | except Exception as exc:
33 | raise ResizeImgError() from exc
34 |
35 | ratio_h = h / resize_h
36 | ratio_w = w / resize_w
37 | return img, ratio_h, ratio_w
38 |
39 |
40 | def increase_min_side(
41 | img: np.ndarray, min_side_len: int = 30
42 | ) -> Tuple[np.ndarray, float, float]:
43 | h, w = img.shape[:2]
44 |
45 | ratio = 1.0
46 | if min(h, w) < min_side_len:
47 | if h < w:
48 | ratio = float(min_side_len) / h
49 | else:
50 | ratio = float(min_side_len) / w
51 |
52 | resize_h = int(h * ratio)
53 | resize_w = int(w * ratio)
54 |
55 | resize_h = int(round(resize_h / 32) * 32)
56 | resize_w = int(round(resize_w / 32) * 32)
57 |
58 | try:
59 | if int(resize_w) <= 0 or int(resize_h) <= 0:
60 | raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
61 | img = cv2.resize(img, (resize_w, resize_h))
62 | except Exception as exc:
63 | raise ResizeImgError() from exc
64 |
65 | ratio_h = h / resize_h
66 | ratio_w = w / resize_w
67 | return img, ratio_h, ratio_w
68 |
69 |
70 | def add_round_letterbox(
71 | img: np.ndarray,
72 | padding_tuple: Tuple[int, int, int, int],
73 | ) -> np.ndarray:
74 | padded_img = cv2.copyMakeBorder(
75 | img,
76 | padding_tuple[0],
77 | padding_tuple[1],
78 | padding_tuple[2],
79 | padding_tuple[3],
80 | cv2.BORDER_CONSTANT,
81 | value=(0, 0, 0),
82 | )
83 | return padded_img
84 |
85 |
86 | class ResizeImgError(Exception):
87 | pass
88 |
--------------------------------------------------------------------------------
/python/requirements.txt:
--------------------------------------------------------------------------------
1 | pyclipper>=1.2.0
2 | opencv_python>=4.5.1.48
3 | numpy>=1.19.5,<3.0.0
4 | six>=1.15.0
5 | Shapely>=1.7.1,!=2.0.4 # python3.12 2.0.4 bug
6 | PyYAML
7 | Pillow
8 | tqdm
9 | omegaconf
10 | requests
11 | colorlog
--------------------------------------------------------------------------------
/python/requirements_ort.txt:
--------------------------------------------------------------------------------
1 | pyclipper>=1.2.0
2 | opencv_python>=4.5.1.48
3 | numpy>=1.19.5,<3.0.0
4 | six>=1.15.0
5 | Shapely>=1.7.1,!=2.0.4 # python3.12 2.0.4 bug
6 | PyYAML
7 | Pillow
8 | onnxruntime>=1.7.0
9 | tqdm
--------------------------------------------------------------------------------
/python/requirements_paddle.txt:
--------------------------------------------------------------------------------
1 | pyclipper>=1.2.0
2 | opencv_python>=4.5.1.48
3 | numpy>=1.19.5,<3.0.0
4 | six>=1.15.0
5 | Shapely>=1.7.1,!=2.0.4 # python3.12 2.0.4 bug
6 | PyYAML
7 | Pillow
8 | tqdm
--------------------------------------------------------------------------------
/python/requirements_torch.txt:
--------------------------------------------------------------------------------
1 | pyclipper>=1.2.0
2 | opencv_python>=4.5.1.48
3 | numpy>=1.19.5,<3.0.0
4 | six>=1.15.0
5 | Shapely>=1.7.1,!=2.0.4 # python3.12 2.0.4 bug
6 | PyYAML
7 | Pillow
8 | tqdm
9 | torch
10 | torchvision
--------------------------------------------------------------------------------
/python/requirements_vino.txt:
--------------------------------------------------------------------------------
1 | pyclipper>=1.2.0
2 | openvino>=2022.2.0,<=2024.0.0
3 | opencv_python>=4.5.1.48
4 | numpy>=1.19.5,<3.0.0
5 | six>=1.15.0
6 | Shapely>=1.7.1,!=2.0.4 # python3.12 2.0.4 bug
7 | PyYAML
8 | Pillow
9 | tqdm
--------------------------------------------------------------------------------
/python/setup.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import sys
5 | from pathlib import Path
6 | from typing import List, Union
7 |
8 | import setuptools
9 | from get_pypi_latest_version import GetPyPiLatestVersion
10 |
11 |
12 | def read_txt(txt_path: Union[Path, str]) -> List[str]:
13 | with open(txt_path, "r", encoding="utf-8") as f:
14 | data = [v.rstrip("\n") for v in f]
15 | return data
16 |
17 |
18 | def get_readme():
19 | root_dir = Path(__file__).resolve().parent.parent
20 | readme_path = str(root_dir / "docs" / "doc_whl_rapidocr.md")
21 | print(readme_path)
22 | with open(readme_path, "r", encoding="utf-8") as f:
23 | readme = f.read()
24 | return readme
25 |
26 |
27 | MODULE_NAME = "rapidocr"
28 |
29 | obtainer = GetPyPiLatestVersion()
30 | try:
31 | latest_version = obtainer(MODULE_NAME)
32 | except Exception as e:
33 | latest_version = "0.0.0"
34 | VERSION_NUM = obtainer.version_add_one(latest_version, add_patch=True)
35 |
36 | if len(sys.argv) > 2:
37 | match_str = " ".join(sys.argv[2:])
38 | matched_versions = obtainer.extract_version(match_str)
39 | if matched_versions:
40 | VERSION_NUM = matched_versions
41 | sys.argv = sys.argv[:2]
42 |
43 | project_urls = {
44 | "Documentation": "https://rapidai.github.io/RapidOCRDocs",
45 | "Changelog": "https://github.com/RapidAI/RapidOCR/releases",
46 | }
47 |
48 | setuptools.setup(
49 | name=MODULE_NAME,
50 | version=VERSION_NUM,
51 | platforms="Any",
52 | description="Awesome OCR Library",
53 | long_description=get_readme(),
54 | long_description_content_type="text/markdown",
55 | author="SWHL",
56 | author_email="liekkaskono@163.com",
57 | url="https://github.com/RapidAI/RapidOCR",
58 | project_urls=project_urls,
59 | license="Apache-2.0",
60 | include_package_data=True,
61 | install_requires=read_txt("requirements.txt"),
62 | package_dir={"": MODULE_NAME},
63 | packages=setuptools.find_namespace_packages(where=MODULE_NAME),
64 | package_data={"": ["*.onnx", "*.yaml", "*.txt"]},
65 | keywords=[
66 | "ocr,text_detection,text_recognition,db,onnxruntime,paddleocr,openvino,rapidocr"
67 | ],
68 | classifiers=[
69 | "Programming Language :: Python :: 3.6",
70 | "Programming Language :: Python :: 3.7",
71 | "Programming Language :: Python :: 3.8",
72 | "Programming Language :: Python :: 3.9",
73 | "Programming Language :: Python :: 3.10",
74 | "Programming Language :: Python :: 3.11",
75 | "Programming Language :: Python :: 3.12",
76 | ],
77 | python_requires=">=3.6,<4",
78 | entry_points={
79 | "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"],
80 | },
81 | )
82 |
--------------------------------------------------------------------------------
/python/setup_onnxruntime.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import sys
5 | from pathlib import Path
6 | from typing import List, Union
7 |
8 | import setuptools
9 | from get_pypi_latest_version import GetPyPiLatestVersion
10 |
11 |
12 | def read_txt(txt_path: Union[Path, str]) -> List[str]:
13 | with open(txt_path, "r", encoding="utf-8") as f:
14 | data = [v.rstrip("\n") for v in f]
15 | return data
16 |
17 |
18 | def get_readme():
19 | root_dir = Path(__file__).resolve().parent.parent
20 | readme_path = str(root_dir / "docs" / "doc_whl_rapidocr_ort.md")
21 | print(readme_path)
22 | with open(readme_path, "r", encoding="utf-8") as f:
23 | readme = f.read()
24 | return readme
25 |
26 |
27 | MODULE_NAME = "rapidocr_onnxruntime"
28 |
29 | obtainer = GetPyPiLatestVersion()
30 | latest_version = obtainer(MODULE_NAME)
31 | VERSION_NUM = obtainer.version_add_one(latest_version, add_patch=True)
32 |
33 | if len(sys.argv) > 2:
34 | match_str = " ".join(sys.argv[2:])
35 | matched_versions = obtainer.extract_version(match_str)
36 | if matched_versions:
37 | VERSION_NUM = matched_versions
38 | sys.argv = sys.argv[:2]
39 |
40 | project_urls = {
41 | "Documentation": "https://rapidai.github.io/RapidOCRDocs",
42 | "Changelog": "https://github.com/RapidAI/RapidOCR/releases",
43 | }
44 |
45 | setuptools.setup(
46 | name=MODULE_NAME,
47 | version=VERSION_NUM,
48 | platforms="Any",
49 | description="A cross platform OCR Library based on OnnxRuntime.",
50 | long_description=get_readme(),
51 | long_description_content_type="text/markdown",
52 | author="SWHL",
53 | author_email="liekkaskono@163.com",
54 | url="https://github.com/RapidAI/RapidOCR",
55 | project_urls=project_urls,
56 | license="Apache-2.0",
57 | include_package_data=True,
58 | install_requires=read_txt("requirements_ort.txt"),
59 | package_dir={"": MODULE_NAME},
60 | packages=setuptools.find_namespace_packages(where=MODULE_NAME),
61 | package_data={"": ["*.onnx", "*.yaml"]},
62 | keywords=[
63 | "ocr,text_detection,text_recognition,db,onnxruntime,paddleocr,openvino,rapidocr"
64 | ],
65 | classifiers=[
66 | "Programming Language :: Python :: 3.6",
67 | "Programming Language :: Python :: 3.7",
68 | "Programming Language :: Python :: 3.8",
69 | "Programming Language :: Python :: 3.9",
70 | "Programming Language :: Python :: 3.10",
71 | "Programming Language :: Python :: 3.11",
72 | "Programming Language :: Python :: 3.12",
73 | ],
74 | python_requires=">=3.6,<3.13",
75 | entry_points={
76 | "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"],
77 | },
78 | )
79 |
--------------------------------------------------------------------------------
/python/setup_openvino.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import sys
5 | from pathlib import Path
6 | from typing import List, Union
7 |
8 | import setuptools
9 | from get_pypi_latest_version import GetPyPiLatestVersion
10 |
11 |
12 | def read_txt(txt_path: Union[Path, str]) -> List[str]:
13 | with open(txt_path, "r", encoding="utf-8") as f:
14 | data = [v.rstrip("\n") for v in f]
15 | return data
16 |
17 |
18 | def get_readme():
19 | root_dir = Path(__file__).resolve().parent.parent
20 | readme_path = str(root_dir / "docs" / "doc_whl_rapidocr_vino.md")
21 | print(readme_path)
22 | with open(readme_path, "r", encoding="utf-8") as f:
23 | readme = f.read()
24 | return readme
25 |
26 |
27 | MODULE_NAME = "rapidocr_openvino"
28 |
29 | obtainer = GetPyPiLatestVersion()
30 | latest_version = obtainer(MODULE_NAME)
31 | VERSION_NUM = obtainer.version_add_one(latest_version, add_patch=True)
32 |
33 | if len(sys.argv) > 2:
34 | match_str = " ".join(sys.argv[2:])
35 | matched_versions = obtainer.extract_version(match_str)
36 | if matched_versions:
37 | VERSION_NUM = matched_versions
38 | sys.argv = sys.argv[:2]
39 |
40 | setuptools.setup(
41 | name=MODULE_NAME,
42 | version=VERSION_NUM,
43 | platforms="Any",
44 | description="A cross platform OCR Library based on OpenVINO.",
45 | long_description=get_readme(),
46 | long_description_content_type="text/markdown",
47 | author="SWHL",
48 | author_email="liekkaskono@163.com",
49 | url="https://github.com/RapidAI/RapidOCR",
50 | license="Apache-2.0",
51 | include_package_data=True,
52 | install_requires=read_txt("requirements_vino.txt"),
53 | package_dir={"": MODULE_NAME},
54 | packages=setuptools.find_namespace_packages(where=MODULE_NAME),
55 | package_data={"": ["*.onnx", "*.yaml", "*.txt"]},
56 | keywords=[
57 | "ocr,text_detection,text_recognition,db,onnxruntime,paddleocr,openvino,rapidocr"
58 | ],
59 | classifiers=[
60 | "Programming Language :: Python :: 3.6",
61 | "Programming Language :: Python :: 3.7",
62 | "Programming Language :: Python :: 3.8",
63 | "Programming Language :: Python :: 3.9",
64 | "Programming Language :: Python :: 3.10",
65 | "Programming Language :: Python :: 3.11",
66 | "Programming Language :: Python :: 3.12",
67 | ],
68 | python_requires=">=3.6,<3.13",
69 | entry_points={
70 | "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"],
71 | },
72 | )
73 |
--------------------------------------------------------------------------------
/python/setup_paddle.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import sys
5 | from pathlib import Path
6 | from typing import List, Union
7 |
8 | import setuptools
9 | from get_pypi_latest_version import GetPyPiLatestVersion
10 |
11 |
12 | def read_txt(txt_path: Union[Path, str]) -> List[str]:
13 | with open(txt_path, "r", encoding="utf-8") as f:
14 | data = [v.rstrip("\n") for v in f]
15 | return data
16 |
17 |
18 | def get_readme():
19 | root_dir = Path(__file__).resolve().parent.parent
20 | readme_path = str(root_dir / "docs" / "doc_whl_rapidocr_paddle.md")
21 | print(readme_path)
22 | with open(readme_path, "r", encoding="utf-8") as f:
23 | readme = f.read()
24 | return readme
25 |
26 |
27 | MODULE_NAME = "rapidocr_paddle"
28 |
29 | obtainer = GetPyPiLatestVersion()
30 | try:
31 | latest_version = obtainer(MODULE_NAME)
32 | VERSION_NUM = obtainer.version_add_one(latest_version, add_patch=True)
33 | except:
34 | VERSION_NUM = "0.0.1"
35 |
36 | if len(sys.argv) > 2:
37 | match_str = " ".join(sys.argv[2:])
38 | matched_versions = obtainer.extract_version(match_str)
39 | if matched_versions:
40 | VERSION_NUM = matched_versions
41 | sys.argv = sys.argv[:2]
42 |
43 | setuptools.setup(
44 | name=MODULE_NAME,
45 | version=VERSION_NUM,
46 | platforms="Any",
47 | description="A cross platform OCR Library based on PaddlePaddle.",
48 | long_description=get_readme(),
49 | long_description_content_type="text/markdown",
50 | author="SWHL",
51 | author_email="liekkaskono@163.com",
52 | url="https://github.com/RapidAI/RapidOCR",
53 | license="Apache-2.0",
54 | include_package_data=True,
55 | install_requires=read_txt("requirements_paddle.txt"),
56 | package_dir={"": MODULE_NAME},
57 | packages=setuptools.find_namespace_packages(where=MODULE_NAME),
58 | package_data={
59 | "": ["*.txt", "*.yaml", "*.pdiparams", "*.pdiparams.info", "*.pdmodel"]
60 | },
61 | keywords=[
62 | "ocr,text_detection,text_recognition,dbnet,paddlepaddle,paddleocr,rapidocr"
63 | ],
64 | classifiers=[
65 | "Programming Language :: Python :: 3.6",
66 | "Programming Language :: Python :: 3.7",
67 | "Programming Language :: Python :: 3.8",
68 | "Programming Language :: Python :: 3.9",
69 | "Programming Language :: Python :: 3.10",
70 | "Programming Language :: Python :: 3.11",
71 | "Programming Language :: Python :: 3.12",
72 | ],
73 | python_requires=">=3.6,<3.13",
74 | entry_points={
75 | "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"],
76 | },
77 | extras_require={
78 | "cpu": ["paddlepaddle"],
79 | "gpu": ["paddlepaddle-gpu"],
80 | },
81 | )
82 |
--------------------------------------------------------------------------------
/python/setup_torch.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import sys
5 | from pathlib import Path
6 | from typing import List, Union
7 |
8 | import setuptools
9 | from get_pypi_latest_version import GetPyPiLatestVersion
10 |
11 |
12 | def read_txt(txt_path: Union[Path, str]) -> List[str]:
13 | with open(txt_path, "r", encoding="utf-8") as f:
14 | data = [v.rstrip("\n") for v in f]
15 | return data
16 |
17 |
18 | def get_readme():
19 | root_dir = Path(__file__).resolve().parent.parent
20 | readme_path = str(root_dir / "docs" / "doc_whl_rapidocr_ort.md")
21 | print(readme_path)
22 | with open(readme_path, "r", encoding="utf-8") as f:
23 | readme = f.read()
24 | return readme
25 |
26 |
27 | MODULE_NAME = "rapidocr_torch"
28 |
29 | obtainer = GetPyPiLatestVersion()
30 | try:
31 | latest_version = obtainer(MODULE_NAME)
32 | except Exception:
33 | latest_version = "0.0.0"
34 | VERSION_NUM = obtainer.version_add_one(latest_version, add_patch=True)
35 |
36 | if len(sys.argv) > 2:
37 | match_str = " ".join(sys.argv[2:])
38 | matched_versions = obtainer.extract_version(match_str)
39 | if matched_versions:
40 | VERSION_NUM = matched_versions
41 | sys.argv = sys.argv[:2]
42 |
43 | project_urls = {
44 | "Documentation": "https://rapidai.github.io/RapidOCRDocs",
45 | "Changelog": "https://github.com/RapidAI/RapidOCR/releases",
46 | }
47 |
48 | setuptools.setup(
49 | name=MODULE_NAME,
50 | version=VERSION_NUM,
51 | platforms="Any",
52 | description="A cross platform OCR Library based on pytorch.",
53 | long_description=get_readme(),
54 | long_description_content_type="text/markdown",
55 | author="SWHL",
56 | author_email="liekkaskono@163.com",
57 | url="https://github.com/RapidAI/RapidOCR",
58 | project_urls=project_urls,
59 | license="Apache-2.0",
60 | include_package_data=True,
61 | install_requires=read_txt("requirements_torch.txt"),
62 | package_dir={"": MODULE_NAME},
63 | packages=setuptools.find_namespace_packages(where=MODULE_NAME),
64 | package_data={"": ["*.pth", "*.yaml", "*.txt"]},
65 | keywords=[
66 | "ocr,text_detection,text_recognition,db,onnxruntime,pytorch,paddleocr,openvino,rapidocr"
67 | ],
68 | classifiers=[
69 | "Programming Language :: Python :: 3.10",
70 | "Programming Language :: Python :: 3.11",
71 | "Programming Language :: Python :: 3.12",
72 | ],
73 | python_requires=">=3.10,<3.13",
74 | entry_points={
75 | "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"],
76 | },
77 | )
78 |
--------------------------------------------------------------------------------
/python/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 |
--------------------------------------------------------------------------------
/python/tests/base_module.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import importlib
5 | import sys
6 | from dataclasses import dataclass
7 | from pathlib import Path
8 | from typing import Optional, Union
9 |
10 | import requests
11 | import yaml
12 | from tqdm import tqdm
13 |
14 |
15 | class BaseModule:
16 | def __init__(self, package_name: str = "rapidocr_onnxruntime"):
17 | self.package_name = package_name
18 | self.root_dir = Path(__file__).resolve().parent.parent
19 | self.package_dir = self.root_dir / self.package_name
20 | self.tests_dir = self.root_dir / "tests"
21 |
22 | sys.path.append(str(self.root_dir))
23 | sys.path.append(str(self.package_dir))
24 |
25 | def init_module(self, module_name: str, class_name: Optional[str] = None):
26 | if class_name is None:
27 | module_part = importlib.import_module(f"{self.package_name}")
28 | return module_part
29 | module_part = importlib.import_module(f"{self.package_name}.{module_name}")
30 | return getattr(module_part, class_name)
31 |
32 | @staticmethod
33 | def read_yaml(yaml_path: str):
34 | with open(yaml_path, "rb") as f:
35 | data = yaml.load(f, Loader=yaml.Loader)
36 | return data
37 |
38 |
39 | def download_file(url: str, save_path: Union[str, Path]):
40 | response = requests.get(url, stream=True, timeout=60)
41 | status_code = response.status_code
42 |
43 | if status_code != 200:
44 | raise DownloadModelError("Something went wrong while downloading models")
45 |
46 | total_size_in_bytes = int(response.headers.get("content-length", 1))
47 | block_size = 1024 # 1 Kibibyte
48 | with tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True) as pb:
49 | with open(save_path, "wb") as file:
50 | for data in response.iter_content(block_size):
51 | pb.update(len(data))
52 | file.write(data)
53 |
54 |
55 | class DownloadModelError(Exception):
56 | pass
57 |
58 |
59 | @dataclass
60 | class Platform:
61 | mac: str = "Darwin"
62 | windows: str = "Windows"
63 | linux: str = "Linux"
64 |
--------------------------------------------------------------------------------
/python/tests/test_files/black_font_color_transparent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/black_font_color_transparent.png
--------------------------------------------------------------------------------
/python/tests/test_files/ch_doc_server.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/ch_doc_server.png
--------------------------------------------------------------------------------
/python/tests/test_files/ch_en_num.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/ch_en_num.jpg
--------------------------------------------------------------------------------
/python/tests/test_files/devanagari.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/devanagari.jpg
--------------------------------------------------------------------------------
/python/tests/test_files/empty_black.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/empty_black.jpg
--------------------------------------------------------------------------------
/python/tests/test_files/en.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/en.jpg
--------------------------------------------------------------------------------
/python/tests/test_files/issue_170.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/issue_170.png
--------------------------------------------------------------------------------
/python/tests/test_files/japan.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/japan.jpg
--------------------------------------------------------------------------------
/python/tests/test_files/korean.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/korean.jpg
--------------------------------------------------------------------------------
/python/tests/test_files/short.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/short.png
--------------------------------------------------------------------------------
/python/tests/test_files/test_letterbox_like.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/test_letterbox_like.jpg
--------------------------------------------------------------------------------
/python/tests/test_files/test_without_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/test_without_det.jpg
--------------------------------------------------------------------------------
/python/tests/test_files/text_cls.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/text_cls.jpg
--------------------------------------------------------------------------------
/python/tests/test_files/text_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/text_det.jpg
--------------------------------------------------------------------------------
/python/tests/test_files/text_rec.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/text_rec.jpg
--------------------------------------------------------------------------------
/python/tests/test_files/text_vertical_words.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/text_vertical_words.png
--------------------------------------------------------------------------------
/python/tests/test_files/two_dim_image.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/two_dim_image.npy
--------------------------------------------------------------------------------
/python/tests/test_files/white_font_color_transparent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidOCR/123a129c613ca99c3b007f0591a3587cc01a4c32/python/tests/test_files/white_font_color_transparent.png
--------------------------------------------------------------------------------