├── .github └── workflows │ ├── builder.yml │ ├── linux.yml │ ├── macos.yml │ └── windows.yml ├── .gitignore ├── BUILD.md ├── CMakeLists.txt ├── LICENSE ├── OcrCRTLinkage.cmake ├── README.md ├── benchmark └── benchmark.cpp ├── build-default.bat ├── build-default.sh ├── build.bat ├── build.sh ├── generate-vs-project.bat ├── images ├── 1.jpg ├── clear.cmd └── clear.sh ├── include ├── AngleNet.h ├── CrnnNet.h ├── DbNet.h ├── OcrLite.h ├── OcrLiteCApi.h ├── OcrLiteImpl.h ├── OcrResultUtils.h ├── OcrStruct.h ├── OcrUtils.h ├── clipper.hpp ├── getopt.h ├── main.h └── version.h ├── models └── ppocr_keys_v1.txt ├── onnxruntime-DirectML ├── OnnxRuntimeWrapper.cmake └── windows-x64 │ └── OnnxRuntimeConfig.cmake ├── onnxruntime-gpu ├── OnnxRuntimeWrapper.cmake ├── README.md ├── linux │ └── OnnxRuntimeConfig.cmake └── windows-x64 │ └── OnnxRuntimeConfig.cmake ├── onnxruntime-static └── OnnxRuntimeWrapper.cmake ├── opencv-static └── OpenCVWrapperConfig.cmake ├── others ├── README-bin.txt ├── README-clib.txt └── README-jni.txt ├── run-benchmark.bat ├── run-benchmark.sh ├── run-test.bat ├── run-test.sh ├── src ├── AngleNet.cpp ├── CrnnNet.cpp ├── DbNet.cpp ├── OcrLite.cpp ├── OcrLiteCApi.cpp ├── OcrLiteImpl.cpp ├── OcrLiteJni.cpp ├── OcrResultUtils.cpp ├── OcrUtils.cpp ├── clipper.cpp ├── getopt.cpp └── main.cpp ├── valgrind-massif.sh └── valgrind-memcheck.sh /.github/workflows/builder.yml: -------------------------------------------------------------------------------- 1 | name: Builder 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | ubuntu1804: 8 | runs-on: ubuntu-18.04 9 | 10 | env: 11 | ONNX_VERSION: 1.14.0 12 | ONNX_PKG_NAME: onnxruntime-1.14.0-ubuntu1804-static 13 | CV_VERSION: 4.6.0 14 | CV_PKG_NAME: opencv-4.6.0-ubuntu1804 15 | 16 | steps: 17 | # 检出代码 18 | - uses: actions/checkout@v3 19 | 20 | # 下载onnxruntime-static 21 | - name: download onnxruntime-static 22 | run: | 23 | cd onnxruntime-static 24 | wget https://github.com/RapidAI/OnnxruntimeBuilder/releases/download/${{ env.ONNX_VERSION }}/${{ env.ONNX_PKG_NAME }}.7z -O ${{ env.ONNX_PKG_NAME }}.7z 25 | 7z x ${{ env.ONNX_PKG_NAME }}.7z -aoa 26 | 27 | # 下载opencv-static 28 | - name: download opencv-static 29 | run: | 30 | cd opencv-static 31 | wget https://github.com/RapidAI/OpenCVBuilder/releases/download/${{ env.CV_VERSION }}/${{ env.CV_PKG_NAME }}.7z -O ${{ env.CV_PKG_NAME }}.7z 32 | 7z x ${{ env.CV_PKG_NAME }}.7z -aoa 33 | 34 | # 编译 35 | - name: build 36 | run: | 37 | chmod a+x build-default.sh &&./build-default.sh 38 | 39 | # 使用7z压缩 40 | - name: 7zip 41 | run: | 42 | mkdir linux-bin 43 | cp run-benchmark.sh linux-bin/run-benchmark.sh 44 | cp run-test.sh linux-bin/run-test.sh 45 | cp -r images linux-bin/images 46 | mv Linux-BIN-CPU/install/bin linux-bin/Linux-BIN-CPU 47 | 7z a linux-bin.7z linux-bin 48 | mkdir linux-jni 49 | mv Linux-JNI-CPU/install linux-jni/Linux-JNI-CPU 50 | 7z a linux-jni.7z linux-jni 51 | mkdir linux-clib 52 | mv Linux-CLIB-CPU/install linux-clib/Linux-CLIB-CPU 53 | 7z a linux-clib.7z linux-clib 54 | 55 | # 上传artifact 56 | - name: upload 57 | uses: actions/upload-artifact@v3 58 | with: 59 | name: linux-bin 60 | path: linux-bin.7z 61 | 62 | - name: upload 63 | uses: actions/upload-artifact@v3 64 | with: 65 | name: linux-jni 66 | path: linux-jni.7z 67 | 68 | - name: upload 69 | uses: actions/upload-artifact@v3 70 | with: 71 | name: linux-clib 72 | path: linux-clib.7z 73 | 74 | macos1015: 75 | runs-on: macos-latest 76 | 77 | env: 78 | ONNX_VERSION: 1.14.0 79 | ONNX_PKG_NAME: onnxruntime-1.14.0-macos1015-static 80 | CV_VERSION: 4.6.0 81 | CV_PKG_NAME: opencv-4.6.0-macos1015 82 | 83 | steps: 84 | # 检出代码 85 | - uses: actions/checkout@v3 86 | 87 | # 安装openmp 88 | - name: install openmp 89 | run: | 90 | brew install libomp 91 | 92 | # 下载onnxruntime-static 93 | - name: download onnxruntime-static 94 | run: | 95 | cd onnxruntime-static 96 | wget https://github.com/RapidAI/OnnxruntimeBuilder/releases/download/${{ env.ONNX_VERSION }}/${{ env.ONNX_PKG_NAME }}.7z -O ${{ env.ONNX_PKG_NAME }}.7z 97 | 7z x ${{ env.ONNX_PKG_NAME }}.7z -aoa 98 | 99 | # 下载opencv-static 100 | - name: download opencv-static 101 | run: | 102 | cd opencv-static 103 | wget https://github.com/RapidAI/OpenCVBuilder/releases/download/${{ env.CV_VERSION }}/${{ env.CV_PKG_NAME }}.7z -O ${{ env.CV_PKG_NAME }}.7z 104 | 7z x ${{ env.CV_PKG_NAME }}.7z -aoa 105 | 106 | # 编译 107 | - name: build 108 | run: | 109 | chmod a+x build-default.sh &&./build-default.sh 110 | 111 | # 使用7z压缩 112 | - name: 7zip 113 | run: | 114 | mkdir macos-bin 115 | cp run-benchmark.sh macos-bin/run-benchmark.sh 116 | cp run-test.sh macos-bin/run-test.sh 117 | cp -r images macos-bin/images 118 | mv Darwin-BIN-CPU/install/bin macos-bin/Darwin-BIN-CPU 119 | 7z a macos-bin.7z macos-bin 120 | mkdir macos-jni 121 | mv Darwin-JNI-CPU/install macos-jni/Darwin-JNI-CPU 122 | 7z a macos-jni.7z macos-jni 123 | mkdir macos-clib 124 | mv Darwin-CLIB-CPU/install macos-clib/Darwin-CLIB-CPU 125 | 7z a macos-clib.7z macos-clib 126 | 127 | # 上传artifact 128 | - name: upload 129 | uses: actions/upload-artifact@v3 130 | with: 131 | name: macos-bin 132 | path: macos-bin.7z 133 | 134 | - name: upload 135 | uses: actions/upload-artifact@v3 136 | with: 137 | name: macos-jni 138 | path: macos-jni.7z 139 | 140 | - name: upload 141 | uses: actions/upload-artifact@v3 142 | with: 143 | name: macos-clib 144 | path: macos-clib.7z 145 | 146 | windows-vs2019: 147 | runs-on: windows-2019 148 | 149 | env: 150 | ONNX_VERSION: 1.14.0 151 | ONNX_PKG_NAME: onnxruntime-1.14.0-vs2019-static-mt 152 | CV_VERSION: 4.6.0 153 | CV_PKG_NAME: opencv-4.6.0-windows-vs2019-mt 154 | 155 | steps: 156 | # 检出代码 157 | - uses: actions/checkout@v3 158 | 159 | # 下载onnxruntime-static 160 | - name: download onnxruntime-static 161 | run: | 162 | cd onnxruntime-static 163 | Invoke-WebRequest -Uri https://github.com/RapidAI/OnnxruntimeBuilder/releases/download/${{ env.ONNX_VERSION }}/${{ env.ONNX_PKG_NAME }}.7z -OutFile ${{ env.ONNX_PKG_NAME }}.7z 164 | 7z x ${{ env.ONNX_PKG_NAME }}.7z -aoa 165 | 166 | # 下载opencv-static 167 | - name: download opencv-static 168 | run: | 169 | cd opencv-static 170 | Invoke-WebRequest -Uri https://github.com/RapidAI/OpenCVBuilder/releases/download/${{ env.CV_VERSION }}/${{ env.CV_PKG_NAME }}.7z -OutFile ${{ env.CV_PKG_NAME }}.7z 171 | 7z x ${{ env.CV_PKG_NAME }}.7z -aoa 172 | 173 | # 编译 174 | - name: build 175 | run: | 176 | ./build-default.bat 177 | 178 | # 使用7z压缩 179 | - name: 7zip 180 | run: | 181 | mkdir windows-bin 182 | cp run-benchmark.bat windows-bin/run-benchmark.bat 183 | cp run-test.bat windows-bin/run-test.bat 184 | cp -r images windows-bin/images 185 | mv win-BIN-CPU-x64/install/bin windows-bin/win-BIN-CPU-x64 186 | mv win-BIN-CPU-Win32/install/bin windows-bin/win-BIN-CPU-Win32 187 | 7z a windows-bin.7z windows-bin 188 | mkdir windows-jni 189 | mv win-JNI-CPU-x64/install windows-jni/win-JNI-CPU-x64 190 | mv win-JNI-CPU-Win32/install windows-jni/win-JNI-CPU-Win32 191 | 7z a windows-jni.7z windows-jni 192 | mkdir windows-clib 193 | mv win-CLIB-CPU-x64/install windows-clib/win-CLIB-CPU-x64 194 | mv win-CLIB-CPU-Win32/install windows-clib/win-CLIB-CPU-Win32 195 | 7z a windows-clib.7z windows-clib 196 | 197 | # 上传artifact 198 | - name: upload 199 | uses: actions/upload-artifact@v3 200 | with: 201 | name: windows-bin 202 | path: windows-bin.7z 203 | 204 | - name: upload 205 | uses: actions/upload-artifact@v3 206 | with: 207 | name: windows-jni 208 | path: windows-jni.7z 209 | 210 | - name: upload 211 | uses: actions/upload-artifact@v3 212 | with: 213 | name: windows-clib 214 | path: windows-clib.7z 215 | 216 | release: 217 | needs: [ ubuntu1804, macos1015, windows-vs2019 ] 218 | 219 | runs-on: ubuntu-latest 220 | 221 | steps: 222 | # 检出代码 223 | - uses: actions/checkout@v3 224 | 225 | # 获取所有的git log和tag 226 | - name: Unshallow 227 | run: git fetch --prune --unshallow 228 | 229 | # 获取git log 从 previousTag 到 lastTag 230 | - name: Get git log 231 | id: git-log 232 | run: | 233 | previousTag=$(git describe --abbrev=0 --tags `git rev-list --tags --skip=1 --max-count=1`) 234 | lastTag=$(git describe --abbrev=0 --tags) 235 | echo "previousTag:$previousTag ~ lastTag:$lastTag" 236 | log=$(git log $previousTag..$lastTag --pretty=format:'- %cd %an: %s\n' --date=format:'%Y-%m-%d %H:%M:%S') 237 | echo "$log" 238 | echo "log_state="$log"" >> $GITHUB_ENV 239 | 240 | # 创建Changelog文件 triggered by git tag push 241 | - name: Generate Changelog 242 | if: startsWith(github.ref, 'refs/tags/') 243 | run: | 244 | echo -e '${{ env.log_state }}' > Release.txt 245 | 246 | # Cat Changelog triggered by git tag push 247 | - name: Cat Changelog 248 | if: startsWith(github.ref, 'refs/tags/') 249 | run: | 250 | cat Release.txt 251 | 252 | # 下载artifact 253 | - name: download 254 | uses: actions/download-artifact@v3 255 | with: 256 | path: artifacts 257 | 258 | # 查看artifact 259 | - name: list artifact 260 | run: | 261 | tree artifacts 262 | 263 | # 创建release 264 | - name: create-release 265 | id: create-release 266 | uses: actions/create-release@v1 267 | env: 268 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 269 | with: 270 | tag_name: ${{ github.ref }} 271 | release_name: ${{ github.ref }} 272 | body_path: Release.txt 273 | draft: false 274 | prerelease: false 275 | 276 | - name: upload-linux-bin 277 | uses: actions/upload-release-asset@v1 278 | env: 279 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 280 | PKG_NAME: linux-bin 281 | with: 282 | upload_url: ${{ steps.create-release.outputs.upload_url }} 283 | asset_path: artifacts/${{ env.PKG_NAME }}/${{ env.PKG_NAME }}.7z 284 | asset_name: ${{ env.PKG_NAME }}.7z 285 | asset_content_type: application/x-7z-compressed 286 | 287 | - name: upload-linux-jni 288 | uses: actions/upload-release-asset@v1 289 | env: 290 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 291 | PKG_NAME: linux-jni 292 | with: 293 | upload_url: ${{ steps.create-release.outputs.upload_url }} 294 | asset_path: artifacts/${{ env.PKG_NAME }}/${{ env.PKG_NAME }}.7z 295 | asset_name: ${{ env.PKG_NAME }}.7z 296 | asset_content_type: application/x-7z-compressed 297 | 298 | - name: upload-linux-clib 299 | uses: actions/upload-release-asset@v1 300 | env: 301 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 302 | PKG_NAME: linux-clib 303 | with: 304 | upload_url: ${{ steps.create-release.outputs.upload_url }} 305 | asset_path: artifacts/${{ env.PKG_NAME }}/${{ env.PKG_NAME }}.7z 306 | asset_name: ${{ env.PKG_NAME }}.7z 307 | asset_content_type: application/x-7z-compressed 308 | 309 | - name: upload-macos-bin 310 | uses: actions/upload-release-asset@v1 311 | env: 312 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 313 | PKG_NAME: macos-bin 314 | with: 315 | upload_url: ${{ steps.create-release.outputs.upload_url }} 316 | asset_path: artifacts/${{ env.PKG_NAME }}/${{ env.PKG_NAME }}.7z 317 | asset_name: ${{ env.PKG_NAME }}.7z 318 | asset_content_type: application/x-7z-compressed 319 | 320 | - name: upload-macos-jni 321 | uses: actions/upload-release-asset@v1 322 | env: 323 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 324 | PKG_NAME: macos-jni 325 | with: 326 | upload_url: ${{ steps.create-release.outputs.upload_url }} 327 | asset_path: artifacts/${{ env.PKG_NAME }}/${{ env.PKG_NAME }}.7z 328 | asset_name: ${{ env.PKG_NAME }}.7z 329 | asset_content_type: application/x-7z-compressed 330 | 331 | - name: upload-macos-clib 332 | uses: actions/upload-release-asset@v1 333 | env: 334 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 335 | PKG_NAME: macos-clib 336 | with: 337 | upload_url: ${{ steps.create-release.outputs.upload_url }} 338 | asset_path: artifacts/${{ env.PKG_NAME }}/${{ env.PKG_NAME }}.7z 339 | asset_name: ${{ env.PKG_NAME }}.7z 340 | asset_content_type: application/x-7z-compressed 341 | 342 | - name: upload-windows-bin 343 | uses: actions/upload-release-asset@v1 344 | env: 345 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 346 | PKG_NAME: windows-bin 347 | with: 348 | upload_url: ${{ steps.create-release.outputs.upload_url }} 349 | asset_path: artifacts/${{ env.PKG_NAME }}/${{ env.PKG_NAME }}.7z 350 | asset_name: ${{ env.PKG_NAME }}.7z 351 | asset_content_type: application/x-7z-compressed 352 | 353 | - name: upload-windows-jni 354 | uses: actions/upload-release-asset@v1 355 | env: 356 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 357 | PKG_NAME: windows-jni 358 | with: 359 | upload_url: ${{ steps.create-release.outputs.upload_url }} 360 | asset_path: artifacts/${{ env.PKG_NAME }}/${{ env.PKG_NAME }}.7z 361 | asset_name: ${{ env.PKG_NAME }}.7z 362 | asset_content_type: application/x-7z-compressed 363 | 364 | - name: upload-windows-clib 365 | uses: actions/upload-release-asset@v1 366 | env: 367 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 368 | PKG_NAME: windows-clib 369 | with: 370 | upload_url: ${{ steps.create-release.outputs.upload_url }} 371 | asset_path: artifacts/${{ env.PKG_NAME }}/${{ env.PKG_NAME }}.7z 372 | asset_name: ${{ env.PKG_NAME }}.7z 373 | asset_content_type: application/x-7z-compressed 374 | 375 | # 获取指定时区的时间 376 | # https://github.com/marketplace/actions/get-time-action 377 | - name: Get Time 378 | id: time 379 | uses: JantHsueh/get-time-action@master 380 | with: 381 | timeZone: 8 382 | 383 | # 向钉钉发送消息 384 | # https://github.com/marketplace/actions/web-request-action 385 | - name: dingtalk 386 | uses: satak/webrequest-action@master 387 | with: 388 | url: ${{ secrets.DINGTALK_WEBHOOK }} 389 | method: POST 390 | payload: '{"msgtype": "text", "text": {"content": "版本更新: ${{ github.repository }}-版本号: ${{ github.ref }} \n 编译时间: ${{ steps.time.outputs.time }} \n 距上个正式版的更新记录: \n${{ env.log_state }}"}}' 391 | headers: '{"Content-Type": "application/json"}' -------------------------------------------------------------------------------- /.github/workflows/linux.yml: -------------------------------------------------------------------------------- 1 | name: linux 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | tags: 7 | - '*' 8 | 9 | jobs: 10 | linux: 11 | runs-on: ubuntu-22.04 12 | 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | arch_type: 17 | [ 18 | aarch64-linux-musl, 19 | mips64el-linux-musl, 20 | mipsel-linux-musl, 21 | mipsel-linux-musln32, 22 | mipsel-linux-musln32sf, 23 | mipsel-linux-muslsf, 24 | riscv32-linux-musl, 25 | riscv64-linux-musl, 26 | sh2-linux-musl, 27 | x86_64-linux-musl, 28 | x86_64-linux-muslx32, 29 | ] 30 | 31 | name: ${{ matrix.arch_type }} 32 | 33 | env: 34 | TOOLCHAIN_VERSION: 20231224 35 | ONNX_VERSION: 1.15.1 36 | ONNX_PKG_NAME: onnxruntime-1.15.1-${{ matrix.arch_type }}-static 37 | CV_VERSION: 4.8.1 38 | CV_PKG_NAME: opencv-4.8.1-${{ matrix.arch_type }} 39 | BIN_PKG_NAME: linux-bin-${{ matrix.arch_type }} 40 | JNI_PKG_NAME: linux-jni-${{ matrix.arch_type }} 41 | CLIB_PKG_NAME: linux-clib-${{ matrix.arch_type }} 42 | 43 | steps: 44 | # 检出代码 45 | - uses: actions/checkout@v3 46 | 47 | # 部署musl 48 | - name: deploy musl 49 | run: | 50 | wget https://github.com/benjaminwan/musl-cross-builder/releases/download/${{ env.TOOLCHAIN_VERSION }}/${{ matrix.arch_type }}.7z -O ${{ matrix.arch_type }}.7z 51 | 7z x ${{ matrix.arch_type }}.7z -aoa 52 | mv ${{ matrix.arch_type }}/ /opt/${{ matrix.arch_type }}/ 53 | 54 | # 下载onnxruntime-static 55 | - name: download onnxruntime-static 56 | run: | 57 | cd onnxruntime-static 58 | wget https://github.com/RapidAI/OnnxruntimeBuilder/releases/download/${{ env.ONNX_VERSION }}/${{ env.ONNX_PKG_NAME }}.7z -O ${{ env.ONNX_PKG_NAME }}.7z 59 | 7z x ${{ env.ONNX_PKG_NAME }}.7z -aoa 60 | 61 | # 下载opencv-static 62 | - name: download opencv-static 63 | run: | 64 | cd opencv-static 65 | wget https://github.com/RapidAI/OpenCVBuilder/releases/download/${{ env.CV_VERSION }}/${{ env.CV_PKG_NAME }}.7z -O ${{ env.CV_PKG_NAME }}.7z 66 | 7z x ${{ env.CV_PKG_NAME }}.7z -aoa 67 | 68 | # 编译 69 | - name: build 70 | run: | 71 | wget https://github.com/benjaminwan/musl-cross-builder/raw/main/musl-cross.toolchain.cmake -O musl-cross.toolchain.cmake 72 | chmod a+x build-default.sh 73 | ./build-default.sh "${{ matrix.arch_type }}" "/opt/${{ matrix.arch_type }}" 74 | 75 | # install文件夹改名linux,并使用7z压缩 76 | - name: 7zip 77 | run: | 78 | mkdir ${{ env.BIN_PKG_NAME }} 79 | cp run-benchmark.sh ${{ env.BIN_PKG_NAME }}/run-benchmark.sh 80 | cp run-test.sh ${{ env.BIN_PKG_NAME }}/run-test.sh 81 | cp -r images ${{ env.BIN_PKG_NAME }}/images 82 | mv Linux-BIN-CPU/install/bin ${{ env.BIN_PKG_NAME }}/Linux-BIN-CPU 83 | cp others/README-bin.txt ${{ env.BIN_PKG_NAME }}/README.txt 84 | 7z a ${{ env.BIN_PKG_NAME }}.7z ${{ env.BIN_PKG_NAME }} 85 | mkdir ${{ env.JNI_PKG_NAME }} 86 | mv Linux-JNI-CPU/install ${{ env.JNI_PKG_NAME }}/Linux-JNI-CPU 87 | cp others/README-jni.txt ${{ env.JNI_PKG_NAME }}/README.txt 88 | 7z a ${{ env.JNI_PKG_NAME }}.7z ${{ env.JNI_PKG_NAME }} 89 | mkdir ${{ env.CLIB_PKG_NAME }} 90 | mv Linux-CLIB-CPU/install ${{ env.CLIB_PKG_NAME }}/Linux-CLIB-CPU 91 | cp others/README-clib.txt ${{ env.CLIB_PKG_NAME }}/README.txt 92 | 7z a ${{ env.CLIB_PKG_NAME }}.7z ${{ env.CLIB_PKG_NAME }} 93 | 94 | # 上传artifact 95 | - name: upload 96 | uses: actions/upload-artifact@v3 97 | with: 98 | name: ${{ env.BIN_PKG_NAME }} 99 | path: ${{ env.BIN_PKG_NAME }}.7z 100 | 101 | - name: upload 102 | uses: actions/upload-artifact@v3 103 | with: 104 | name: ${{ env.JNI_PKG_NAME }} 105 | path: ${{ env.JNI_PKG_NAME }}.7z 106 | 107 | - name: upload 108 | uses: actions/upload-artifact@v3 109 | with: 110 | name: ${{ env.CLIB_PKG_NAME }} 111 | path: ${{ env.CLIB_PKG_NAME }}.7z 112 | 113 | # 获取所有的git log和tag 114 | - name: Unshallow 115 | run: git fetch --prune --unshallow 116 | 117 | # 获取git log 从 previousTag 到 lastTag 118 | - name: Get git log 119 | id: git-log 120 | run: | 121 | previousTag=$(git describe --abbrev=0 --tags `git rev-list --tags --skip=1 --max-count=1`) 122 | lastTag=$(git describe --abbrev=0 --tags) 123 | echo "previousTag:$previousTag ~ lastTag:$lastTag" 124 | log=$(git log $previousTag..$lastTag --pretty=format:'- %cd %an: %s\n' --date=format:'%Y-%m-%d %H:%M:%S') 125 | echo "$log" 126 | echo "log_state="$log"" >> $GITHUB_ENV 127 | 128 | # 创建Changelog文件 triggered by git tag push 129 | - name: Generate Changelog 130 | if: startsWith(github.ref, 'refs/tags/') 131 | run: | 132 | echo -e '${{ env.log_state }}' > release.md 133 | 134 | # 创建release 上传release 135 | # https://github.com/marketplace/actions/create-release 136 | - name: Create release and upload-archive 137 | uses: ncipollo/release-action@v1 138 | with: 139 | prerelease: true 140 | name: RapidOcrOnnx ${{ github.ref }} 141 | bodyFile: release.md 142 | artifacts: linux-*.7z 143 | allowUpdates: true 144 | artifactContentType: application/x-7z-compressed 145 | token: ${{ secrets.GITHUB_TOKEN }} 146 | -------------------------------------------------------------------------------- /.github/workflows/macos.yml: -------------------------------------------------------------------------------- 1 | name: macos 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | tags: 7 | - '*' 8 | 9 | jobs: 10 | macos: 11 | runs-on: macos-latest 12 | 13 | env: 14 | ONNX_VERSION: 1.15.1 15 | ONNX_PKG_NAME: onnxruntime-1.15.1-macos-static 16 | CV_VERSION: 4.8.1 17 | CV_PKG_NAME: opencv-4.8.1-macos 18 | BIN_PKG_NAME: macos-bin 19 | JNI_PKG_NAME: macos-jni 20 | CLIB_PKG_NAME: macos-clib 21 | 22 | 23 | steps: 24 | # 检出代码 25 | - uses: actions/checkout@v3 26 | 27 | # 下载onnxruntime-static 28 | - name: download onnxruntime-static 29 | run: | 30 | cd onnxruntime-static 31 | wget https://github.com/RapidAI/OnnxruntimeBuilder/releases/download/${{ env.ONNX_VERSION }}/${{ env.ONNX_PKG_NAME }}.7z -O ${{ env.ONNX_PKG_NAME }}.7z 32 | 7z x ${{ env.ONNX_PKG_NAME }}.7z -aoa 33 | 34 | # 下载opencv-static 35 | - name: download opencv-static 36 | run: | 37 | cd opencv-static 38 | wget https://github.com/RapidAI/OpenCVBuilder/releases/download/${{ env.CV_VERSION }}/${{ env.CV_PKG_NAME }}.7z -O ${{ env.CV_PKG_NAME }}.7z 39 | 7z x ${{ env.CV_PKG_NAME }}.7z -aoa 40 | 41 | # 编译 42 | - name: build 43 | run: | 44 | chmod a+x build-default.sh &&./build-default.sh 45 | 46 | # 压缩 47 | - name: 7z 48 | run: | 49 | mkdir ${{ env.BIN_PKG_NAME }} 50 | cp run-benchmark.sh ${{ env.BIN_PKG_NAME }}/run-benchmark.sh 51 | cp run-test.sh ${{ env.BIN_PKG_NAME }}/run-test.sh 52 | cp -r images ${{ env.BIN_PKG_NAME }}/images 53 | mv Darwin-BIN-CPU/install/bin ${{ env.BIN_PKG_NAME }}/Darwin-BIN-CPU 54 | cp others/README-bin.txt ${{ env.BIN_PKG_NAME }}/README.txt 55 | 7z a ${{ env.BIN_PKG_NAME }}.7z ${{ env.BIN_PKG_NAME }} 56 | mkdir ${{ env.JNI_PKG_NAME }} 57 | mv Darwin-JNI-CPU/install ${{ env.JNI_PKG_NAME }}/Darwin-JNI-CPU 58 | cp others/README-jni.txt ${{ env.JNI_PKG_NAME }}/README.txt 59 | 7z a ${{ env.JNI_PKG_NAME }}.7z ${{ env.JNI_PKG_NAME }} 60 | mkdir ${{ env.CLIB_PKG_NAME }} 61 | mv Darwin-CLIB-CPU/install ${{ env.CLIB_PKG_NAME }}/Darwin-CLIB-CPU 62 | cp others/README-clib.txt ${{ env.CLIB_PKG_NAME }}/README.txt 63 | 7z a ${{ env.CLIB_PKG_NAME }}.7z ${{ env.CLIB_PKG_NAME }} 64 | 65 | # 上传artifact 66 | - name: upload 67 | uses: actions/upload-artifact@v3 68 | with: 69 | name: ${{ env.BIN_PKG_NAME }} 70 | path: ${{ env.BIN_PKG_NAME }}.7z 71 | 72 | - name: upload 73 | uses: actions/upload-artifact@v3 74 | with: 75 | name: ${{ env.JNI_PKG_NAME }} 76 | path: ${{ env.JNI_PKG_NAME }}.7z 77 | 78 | - name: upload 79 | uses: actions/upload-artifact@v3 80 | with: 81 | name: ${{ env.CLIB_PKG_NAME }} 82 | path: ${{ env.CLIB_PKG_NAME }}.7z 83 | 84 | # 获取所有的git log和tag 85 | - name: Unshallow 86 | run: git fetch --prune --unshallow 87 | 88 | # 获取git log 从 previousTag 到 lastTag 89 | - name: Get git log 90 | id: git-log 91 | run: | 92 | previousTag=$(git describe --abbrev=0 --tags `git rev-list --tags --skip=1 --max-count=1`) 93 | lastTag=$(git describe --abbrev=0 --tags) 94 | echo "previousTag:$previousTag ~ lastTag:$lastTag" 95 | log=$(git log $previousTag..$lastTag --pretty=format:'- %cd %an: %s\n' --date=format:'%Y-%m-%d %H:%M:%S') 96 | echo "$log" 97 | echo "log_state="$log"" >> $GITHUB_ENV 98 | 99 | # 创建Changelog文件 triggered by git tag push 100 | - name: Generate Changelog 101 | if: startsWith(github.ref, 'refs/tags/') 102 | run: | 103 | echo -e '${{ env.log_state }}' > release.md 104 | 105 | # 创建release 上传release 106 | # https://github.com/marketplace/actions/create-release 107 | - name: Create release and upload-archive 108 | uses: ncipollo/release-action@v1 109 | with: 110 | prerelease: true 111 | name: RapidOcrOnnx ${{ github.ref }} 112 | bodyFile: release.md 113 | artifacts: macos-*.7z 114 | allowUpdates: true 115 | artifactContentType: application/x-7z-compressed 116 | token: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /.github/workflows/windows.yml: -------------------------------------------------------------------------------- 1 | name: windows 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | tags: 7 | - '*' 8 | 9 | jobs: 10 | windows: 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | list: 15 | - { win_ver: 2019, vs_name: vs2019, vs_ver: v142 } 16 | - { win_ver: 2022, vs_name: vs2022, vs_ver: v143 } 17 | 18 | runs-on: windows-${{ matrix.list.win_ver }} 19 | 20 | name: windows-${{ matrix.list.vs_name }} 21 | 22 | env: 23 | ONNX_VERSION: 1.15.1 24 | ONNX_PKG_NAME: onnxruntime-1.15.1-windows-${{ matrix.list.vs_name }}-static-mt 25 | CV_VERSION: 4.8.1 26 | CV_PKG_NAME: opencv-4.8.1-windows-${{ matrix.list.vs_name }}-mt 27 | BIN_PKG_NAME: windows-bin-${{ matrix.list.vs_name }}-mt 28 | JNI_PKG_NAME: windows-jni-${{ matrix.list.vs_name }}-mt 29 | CLIB_PKG_NAME: windows-clib-${{ matrix.list.vs_name }}-mt 30 | 31 | steps: 32 | # 检出代码 33 | - uses: actions/checkout@v3 34 | 35 | # 下载onnxruntime-static 36 | - name: download onnxruntime-static 37 | run: | 38 | cd onnxruntime-static 39 | Invoke-WebRequest -Uri https://github.com/RapidAI/OnnxruntimeBuilder/releases/download/${{ env.ONNX_VERSION }}/${{ env.ONNX_PKG_NAME }}.7z -OutFile ${{ env.ONNX_PKG_NAME }}.7z 40 | 7z x ${{ env.ONNX_PKG_NAME }}.7z -aoa 41 | 42 | # 下载opencv-static 43 | - name: download opencv-static 44 | run: | 45 | cd opencv-static 46 | Invoke-WebRequest -Uri https://github.com/RapidAI/OpenCVBuilder/releases/download/${{ env.CV_VERSION }}/${{ env.CV_PKG_NAME }}.7z -OutFile ${{ env.CV_PKG_NAME }}.7z 47 | 7z x ${{ env.CV_PKG_NAME }}.7z -aoa 48 | 49 | # 编译 50 | - name: build 51 | run: | 52 | ./build-default.bat ${{ matrix.list.vs_ver }} 53 | 54 | # 压缩 55 | - name: 7zip 56 | run: | 57 | mkdir ${{ env.BIN_PKG_NAME }} 58 | cp run-benchmark.bat ${{ env.BIN_PKG_NAME }}/run-benchmark.bat 59 | cp run-test.bat ${{ env.BIN_PKG_NAME }}/run-test.bat 60 | cp -r images ${{ env.BIN_PKG_NAME }}/images 61 | mv win-BIN-CPU-x64/install/bin ${{ env.BIN_PKG_NAME }}/win-BIN-CPU-x64 62 | mv win-BIN-CPU-Win32/install/bin ${{ env.BIN_PKG_NAME }}/win-BIN-CPU-Win32 63 | cp others/README-bin.txt ${{ env.BIN_PKG_NAME }}/README.txt 64 | 7z a ${{ env.BIN_PKG_NAME }}.7z ${{ env.BIN_PKG_NAME }} 65 | mkdir ${{ env.JNI_PKG_NAME }} 66 | mv win-JNI-CPU-x64/install ${{ env.JNI_PKG_NAME }}/win-JNI-CPU-x64 67 | mv win-JNI-CPU-Win32/install ${{ env.JNI_PKG_NAME }}/win-JNI-CPU-Win32 68 | cp others/README-jni.txt ${{ env.JNI_PKG_NAME }}/README.txt 69 | 7z a ${{ env.JNI_PKG_NAME }}.7z ${{ env.JNI_PKG_NAME }} 70 | mkdir ${{ env.CLIB_PKG_NAME }} 71 | mv win-CLIB-CPU-x64/install ${{ env.CLIB_PKG_NAME }}/win-CLIB-CPU-x64 72 | mv win-CLIB-CPU-Win32/install ${{ env.CLIB_PKG_NAME }}/win-CLIB-CPU-Win32 73 | cp others/README-clib.txt ${{ env.CLIB_PKG_NAME }}/README.txt 74 | 7z a ${{ env.CLIB_PKG_NAME }}.7z ${{ env.CLIB_PKG_NAME }} 75 | 76 | # 上传artifact 77 | - name: upload 78 | uses: actions/upload-artifact@v3 79 | with: 80 | name: ${{ env.BIN_PKG_NAME }} 81 | path: ${{ env.BIN_PKG_NAME }}.7z 82 | 83 | - name: upload 84 | uses: actions/upload-artifact@v3 85 | with: 86 | name: ${{ env.JNI_PKG_NAME }} 87 | path: ${{ env.JNI_PKG_NAME }}.7z 88 | 89 | - name: upload 90 | uses: actions/upload-artifact@v3 91 | with: 92 | name: ${{ env.CLIB_PKG_NAME }} 93 | path: ${{ env.CLIB_PKG_NAME }}.7z 94 | 95 | release: 96 | needs: [ windows ] 97 | 98 | runs-on: ubuntu-latest 99 | 100 | steps: 101 | # 检出代码 102 | - uses: actions/checkout@v3 103 | 104 | # 获取所有的git log和tag 105 | - name: Unshallow 106 | run: git fetch --prune --unshallow 107 | 108 | # 获取git log 从 previousTag 到 lastTag 109 | - name: Get git log 110 | id: git-log 111 | run: | 112 | previousTag=$(git describe --abbrev=0 --tags `git rev-list --tags --skip=1 --max-count=1`) 113 | lastTag=$(git describe --abbrev=0 --tags) 114 | echo "previousTag:$previousTag ~ lastTag:$lastTag" 115 | log=$(git log $previousTag..$lastTag --pretty=format:'- %cd %an: %s\n' --date=format:'%Y-%m-%d %H:%M:%S') 116 | echo "$log" 117 | echo "log_state="$log"" >> $GITHUB_ENV 118 | 119 | # 创建Changelog文件 triggered by git tag push 120 | - name: Generate Changelog 121 | if: startsWith(github.ref, 'refs/tags/') 122 | run: | 123 | echo -e '${{ env.log_state }}' > release.md 124 | 125 | # 下载artifact 126 | - name: download 127 | uses: actions/download-artifact@v3 128 | with: 129 | path: artifacts 130 | 131 | # 查看artifact 132 | - name: list artifact 133 | run: | 134 | tree artifacts 135 | 136 | # 创建release 上传release 137 | # https://github.com/marketplace/actions/create-release 138 | - name: upload-windows 139 | uses: ncipollo/release-action@v1 140 | with: 141 | prerelease: true 142 | name: RapidOcrOnnx ${{ github.ref }} 143 | bodyFile: release.md 144 | artifacts: artifacts/*/*.7z 145 | allowUpdates: true 146 | artifactContentType: application/x-7z-compressed 147 | token: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | 133 | # macOS 134 | .DS_Store 135 | 136 | #idea 137 | .idea 138 | cmake-build-debug/ 139 | build/ 140 | build-lib/ 141 | /models/*.onnx 142 | onnxruntime-gpu/* 143 | !onnxruntime-gpu/README.md 144 | !onnxruntime-gpu/OnnxRuntimeWrapper.cmake 145 | !onnxruntime-gpu/windows-x64/OnnxRuntimeConfig.cmake 146 | !onnxruntime-gpu/linux/OnnxRuntimeConfig.cmake 147 | onnxruntime-static/* 148 | !/onnxruntime-static/OnnxRuntimeWrapper.cmake 149 | opencv-static/* 150 | !/opencv-static/OpenCVWrapperConfig.cmake 151 | opencv-shared/ 152 | /images/*-result.* -------------------------------------------------------------------------------- /BUILD.md: -------------------------------------------------------------------------------- 1 | # 编译说明 2 | 3 | ### 依赖的第三方库下载 4 | 5 | 1. 下载opencv,[下载地址](https://github.com/RapidAI/OpenCVBuilder/releases) 6 | 7 | * OpenCV静态库:opencv-(版本号)-平台.7z, 8 | * 把压缩包解压到项目根目录,windows平台需要注意目录层次,解压后目录结构如下 9 | * windows平台分为mt和md版,mt代表静态链接CRT,md代表动态链接CRT 10 | ``` 11 | opencv-static 12 | ├── OpenCVWrapperConfig.cmake 13 | ├── linux 14 | ├── macos 15 | ├── windows-x64 16 | └── windows-x86 17 | ``` 18 | 19 | 2. 下载onnxruntime,[下载地址](https://github.com/RapidAI/OnnxruntimeBuilder/releases) 20 | 21 | * static为静态库:onnxruntime-(版本号)-平台-static.7z 22 | * shared为动态库:onnxruntime-(版本号)-平台-shared.7z 23 | * 一般情况下使用静态库即可 24 | * 把压缩包解压到项目根目录,windows平台需要注意目录层次,解压后目录结构如下 25 | * windows平台分为mt和md版,mt代表静态链接CRT,md代表动态链接CRT 26 | ``` 27 | onnxruntime-static 28 | ├── OnnxRuntimeWrapper.cmake 29 | ├── linux 30 | ├── macos 31 | ├── windows-x64 32 | └── windows-x86 33 | 34 | ``` 35 | 36 | 37 | ### 编译环境 38 | 39 | 1. Windows 10 x64 40 | 2. macOS 10.15 41 | 3. Linux Ubuntu 1804 x64 42 | 43 | **注意:以下说明仅适用于本机编译。如果需要交叉编译为arm等其它平台(参考android),则需要先交叉编译所有第三方依赖库(ncnn、opencv),然后再把依赖库整合替换到本项目里。** 44 | 45 | ### Windows编译说明 46 | 47 | #### 注意:从OnnxRuntime 1.7.0 ,只支持vs2019编译环境 48 | 49 | #### Windows nmake编译 50 | 51 | 1. 安装VS2019,安装时,至少选中'使用C++的桌面开发' 52 | 2. cmake>=3.12请自行下载&配置,[下载地址](https://cmake.org/download/) 53 | 3. 开始菜单打开"x64 Native Tools Command Prompt for VS 2019"或"适用于 VS2017 的 x64 本机工具",并转到本项目根目录 54 | 4. 运行```build.bat```并按照提示输入选项,最后选择'BIN可执行文件' 55 | 5. 编译完成后运行```run-test.bat```进行测试(注意修改脚本内的目标图片路径) 56 | 6. 编译JNI动态运行库(可选,可用于java调用) 57 | 58 | * 下载jdk-8u221-windows-x64.exe,安装选项默认(确保“源代码”项选中),安装完成后,打开“系统”属性->高级->环境变量 59 | * 新建“系统变量”,变量名```JAVA_HOME``` ,变量值```C:\Program Files\Java\jdk1.8.0_221`` 60 | * 新建“系统变量”,变量名```CLASSPATH``` ,变量值```.;%JAVA_HOME%\lib\dt.jar;%JAVA_HOME%\lib\tools.jar;`` 61 | * 编辑“系统变量”Path,Win7在变量值头部添加```%JAVA_HOME%\bin;``` ,win10直接添加一行```%JAVA_HOME%\bin``` 62 | * 开始菜单打开"x64 Native Tools Command Prompt for VS 2019"或"适用于 VS2017 的 x64 本机工具",并转到本项目根目录 63 | * 运行```build.bat```并按照提示输入选项,最后选择'JNI动态库' 64 | 65 | #### Windows Visual Studio编译说明 66 | 67 | 1. VS2019,cmake……等安装配置参考上述步骤。 68 | 2. 运行generate-vs-project.bat,输入数字选择要生成的visual studio项目解决方案版本。 69 | 3. 根据你的编译环境,进入build-xxxx-x86或x64文件夹,打开RapidOcrOnnx.sln。 70 | 4. 在顶部工具栏选择Release,在右边的"解决方案"窗口,右键选中"ALL_BUILD"->生成。要选择Debug,则您必须自行编译Debug版的opencv和onnxruntime。 71 | 72 | #### Windows部署说明 73 | 74 | 1. 如果有依赖的库是动态库时,部署的时候记得把dll复制到可执行文件目录。 75 | 2. 部署时如果提示缺少"VCRUNTIME140_1.dll",下载安装适用于 Visual Studio 2015、2017 和 2019 的 Microsoft Visual C++ 可再发行软件包, 76 | [下载地址](https://support.microsoft.com/zh-cn/help/2977003/the-latest-supported-visual-c-downloads) 77 | 78 | ### macOS编译说明 79 | 80 | 1. macOS Catalina 10.15.x,安装Xcode>=12,并安装Xcode Command Line Tools, 终端运行```xcode-select –install``` 81 | 2. 自行下载安装HomeBrew,cmake >=3.19[下载地址](https://cmake.org/download/) 82 | 3. libomp: ```brew install libomp``` 83 | 4. 终端打开项目根目录,```./build.sh```并按照提示输入选项,最后选择'BIN可执行文件' 84 | 5. 测试:```./run-test.sh```(注意修改脚本内的目标图片路径) 85 | 6. 编译JNI动态运行库(可选,可用于java调用) 86 | 87 | * 下载jdk-8u221-macosx-x64.dmg,安装。 88 | * 编辑用户目录下的隐藏文件```.zshrc``` ,添加```export JAVA_HOME=$(/usr/libexec/java_home)``` 89 | * 运行```build.sh```并按照提示输入选项,最后选择'JNI动态库' 90 | 91 | #### macOS部署说明 92 | 93 | 如果有依赖的库是动态库时,参考下列方法: 94 | 95 | * 把动态库所在路径加入DYLD_LIBRARY_PATH搜索路径 96 | * 把动态库复制或链接到到/usr/lib 97 | 98 | ### Linux编译说明 99 | 100 | 1. Ubuntu18.04 LTS 其它发行版(请自行编译依赖库opencv和onnxruntime,或自行适配官方发布的动态库) 101 | 2. ```sudo apt-get install build-essential``` 102 | 3. g++>=5,cmake>=3.17[下载地址](https://cmake.org/download/) 103 | 4. 终端打开项目根目录,```./build.sh```并按照提示输入选项,最后选择'BIN可执行文件' 104 | 5. 测试:```./run-test.sh```(注意修改脚本内的目标图片路径) 105 | 6. 编译JNI动态运行库(可选,可用于java调用) 106 | 107 | * 下载jdk-8u221并安装配置 108 | * 运行```build.sh```并按照提示输入选项,最后选择'JNI动态库' 109 | * **注意:编译JNI时,g++版本要求>=6** 110 | 111 | #### Linux部署说明 112 | 113 | 有依赖的库是动态库时,参考下列方法: 114 | 115 | * 把动态库所在路径加入LD_LIBRARY_PATH搜索路径 116 | * 把动态库复制或链接到到/usr/lib 117 | 118 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if (WIN32) 2 | cmake_minimum_required(VERSION 3.12) 3 | elseif (APPLE) 4 | cmake_minimum_required(VERSION 3.17) 5 | elseif (UNIX) 6 | cmake_minimum_required(VERSION 3.17) 7 | endif () 8 | project(RapidOcrOnnx) 9 | 10 | # Output BIN JNI CLIB 11 | if (NOT DEFINED OCR_OUTPUT) 12 | set(OCR_OUTPUT "BIN") 13 | message(STATUS "No OCR_OUTPUT, defaulting to BIN") 14 | endif () 15 | option(OCR_BENCHMARK "build benchmark" ON) 16 | set(OCR_BENCHMARK ON) 17 | #set(OCR_OUTPUT "BIN") 18 | 19 | if (NOT DEFINED OCR_ONNX) 20 | set(OCR_ONNX "CPU") 21 | message(STATUS "No OCR_ONNX, defaulting to CPU") 22 | endif () 23 | #set(OCR_OUTPUT "GPU") 24 | 25 | #set(CMAKE_CXX_STANDARD 11) 26 | add_definitions(-DUNICODE -D_UNICODE) 27 | if (CMAKE_BUILD_TYPE STREQUAL "Debug") 28 | add_definitions("-Wall -g -O0") 29 | else () 30 | add_definitions("-Wall") 31 | endif () 32 | 33 | # OnnxRuntime 34 | if (OCR_ONNX STREQUAL "CPU") 35 | include(${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-static/OnnxRuntimeWrapper.cmake) 36 | elseif (OCR_ONNX STREQUAL "CUDA") # CUDA 37 | include(${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-gpu/OnnxRuntimeWrapper.cmake) 38 | elseif (OCR_ONNX STREQUAL "DIRECTML") # DIRECTML 39 | include(${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-DirectML/OnnxRuntimeWrapper.cmake) 40 | endif () 41 | find_package(OnnxRuntime REQUIRED) 42 | if (OnnxRuntime_FOUND) 43 | message(STATUS "OnnxRuntime_LIBS: ${OnnxRuntime_LIBS}") 44 | message(STATUS "OnnxRuntime_INCLUDE_DIRS: ${OnnxRuntime_INCLUDE_DIRS}") 45 | else () 46 | message(FATAL_ERROR "onnxruntime Not Found!") 47 | endif (OnnxRuntime_FOUND) 48 | 49 | # OpenCV 50 | set(BUILD_SHARED_LIBS false) 51 | include(${CMAKE_CURRENT_SOURCE_DIR}/opencv-static/OpenCVWrapperConfig.cmake) 52 | find_package(OpenCV REQUIRED) 53 | if (OpenCV_FOUND) 54 | message(STATUS "OpenCV_LIBS: ${OpenCV_LIBS}") 55 | message(STATUS "OpenCV_INCLUDE_DIRS: ${OpenCV_INCLUDE_DIRS}") 56 | else () 57 | message(FATAL_ERROR "opencv Not Found!") 58 | endif (OpenCV_FOUND) 59 | 60 | # project include 61 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) 62 | 63 | # source 64 | file(GLOB OCR_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) 65 | set(OCR_COMPILE_CODE ${OCR_SRC}) 66 | 67 | # JNI 68 | if (OCR_OUTPUT STREQUAL "JNI") 69 | find_package(JNI REQUIRED) 70 | if (JNI_FOUND) 71 | message("JNI FOUND") 72 | message(STATUS "JNI_LIBS: ${JNI_LIBS}") 73 | message(STATUS "JNI_INCLUDE_DIRS: ${JNI_INCLUDE_DIRS}") 74 | include_directories(${JNI_INCLUDE_DIRS}) 75 | else () 76 | message(FATAL_ERROR "JNI Not Found!") 77 | endif () 78 | endif () 79 | 80 | if (OCR_OUTPUT STREQUAL "JNI") # JNI 81 | add_library(RapidOcrOnnx SHARED ${OCR_COMPILE_CODE}) 82 | target_compile_definitions(RapidOcrOnnx PRIVATE __JNI__) 83 | target_link_libraries(RapidOcrOnnx ${OnnxRuntime_LIBS} ${OpenCV_LIBS} ${JNI_LIBS}) 84 | elseif (OCR_OUTPUT STREQUAL "CLIB") # CLIB 85 | add_library(RapidOcrOnnx SHARED ${OCR_COMPILE_CODE}) 86 | target_compile_definitions(RapidOcrOnnx PRIVATE __CLIB__) 87 | target_link_libraries(RapidOcrOnnx ${OnnxRuntime_LIBS} ${OpenCV_LIBS}) 88 | elseif (OCR_OUTPUT STREQUAL "BIN") # BIN 89 | add_executable(RapidOcrOnnx ${OCR_COMPILE_CODE}) 90 | target_compile_definitions(RapidOcrOnnx PRIVATE __EXEC__) 91 | target_link_libraries(RapidOcrOnnx ${OnnxRuntime_LIBS} ${OpenCV_LIBS}) 92 | endif () 93 | 94 | install(TARGETS RapidOcrOnnx EXPORT RapidOcrOnnx) 95 | if (OCR_OUTPUT STREQUAL "CLIB") # CLIB 96 | file(GLOB OCR_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/include/*.h) 97 | install(FILES ${OCR_INCLUDE} DESTINATION include) 98 | endif () 99 | 100 | if (OCR_ONNX STREQUAL "CUDA") 101 | target_compile_definitions(RapidOcrOnnx PRIVATE __CUDA__) 102 | elseif(OCR_ONNX STREQUAL "DIRECTML") 103 | target_compile_definitions(RapidOcrOnnx PRIVATE __DIRECTML__) 104 | endif () 105 | 106 | # benchmark 107 | if (OCR_BENCHMARK AND (OCR_OUTPUT STREQUAL "BIN")) 108 | add_executable(benchmark benchmark/benchmark.cpp 109 | src/AngleNet.cpp 110 | src/clipper.cpp 111 | src/CrnnNet.cpp 112 | src/DbNet.cpp 113 | src/getopt.cpp 114 | src/OcrLiteImpl.cpp 115 | src/OcrLite.cpp 116 | src/OcrUtils.cpp) 117 | target_link_libraries(benchmark ${OnnxRuntime_LIBS} ${OpenCV_LIBS}) 118 | target_compile_definitions(benchmark PRIVATE __EXEC__) 119 | 120 | if (OCR_ONNX STREQUAL "CUDA") 121 | target_compile_definitions(benchmark PRIVATE __CUDA__) 122 | endif () 123 | install(TARGETS benchmark EXPORT benchmark 124 | ARCHIVE DESTINATION staticlib 125 | LIBRARY DESTINATION sharedlib 126 | RUNTIME DESTINATION bin) 127 | endif () 128 | 129 | # Windows Link CRT 130 | if (OCR_BUILD_CRT STREQUAL "True") 131 | include(${CMAKE_CURRENT_SOURCE_DIR}/OcrCRTLinkage.cmake) 132 | endif () 133 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /OcrCRTLinkage.cmake: -------------------------------------------------------------------------------- 1 | if (WIN32) 2 | foreach(flag_var 3 | CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE 4 | CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO 5 | CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE 6 | CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) 7 | if(${flag_var} MATCHES "/MD") 8 | string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") 9 | endif() 10 | if(${flag_var} MATCHES "/MDd") 11 | string(REGEX REPLACE "/MDd" "/MTd" ${flag_var} "${${flag_var}}") 12 | endif() 13 | endforeach(flag_var) 14 | 15 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:atlthunk.lib") 16 | set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} /NODEFAULTLIB:libcmt.lib /NODEFAULTLIB:libcpmt.lib /NODEFAULTLIB:msvcrt.lib") 17 | set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /NODEFAULTLIB:libcmtd.lib /NODEFAULTLIB:libcpmtd.lib /NODEFAULTLIB:msvcrtd.lib") 18 | message(STATUS "OCR_BUILD_CRT True") 19 | endif () 20 | 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RapidOcrOnnx 2 | 3 | ### 联系方式 4 | 5 | [QQ群](https://rapidai.github.io/RapidOCRDocs/main/communicate/#qq) 6 | 7 | 8 | ### Project下载 9 | 10 | * 整合好源码和依赖库的完整工程项目,可以在Release中下载(github) 11 | * 可到Q群共享内下载,以Project开头的压缩包文件为源码工程,例:Project_RapidOcrOnnx-版本号.7z 12 | * 如果想自己折腾,则请继续阅读本说明 13 | 14 | ### Demo下载(win、mac、linux) 15 | 16 | * 编译好的demo,可以在release中下载,或者Q群共享内下载 17 | * 各平台可执行文件:linux-bin.7z、macos-bin.7z、windows-bin.7z 18 | * 用于java的jni库:linux-jni.7z、macos-jni.7z、windows-jni.7z 19 | * 用于C的动态库:linux-clib.7z、macos-clib.7z、windows-clib.7z 20 | * C动态库调用范例:[RapidOcrOnnxLibTest](https://github.com/RapidAI/RapidOcrOnnxLibTest) 21 | * 注意:linux编译平台为ubuntu18.04,如果你的linux版本无法运行demo,请自行从源码编译依赖库和完整项目。 22 | 23 | ### 介绍 24 | 25 | 请查看项目主仓库:https://github.com/RapidAI/RapidOCR 26 | 27 | 这个项目使用onnxruntime框架进行推理 28 | 29 | 采用onnxruntime框架[https://github.com/microsoft/onnxruntime](https://github.com/microsoft/onnxruntime) 30 | 31 | ### 更新说明 32 | 33 | #### 代码贡献者 34 | * configuration parameters for the number of IntraOp threads by https://github.com/Gmgge 35 | * 支持传入bitmap和image的二进制数据 by https://github.com/MyMonsterCat 36 | * Enhance ONNX Runtime GPU inference performance by https://github.com/Tryanks 37 | * Added DirectML GPU support by https://github.com/airchaoz 38 | 39 | #### 2022-10-15 update v1.1.1 40 | 41 | * opencv 4.6.0 42 | * onnxruntime 1.12.1 43 | * windows支持mt版引用库 44 | * rec模型输入图片高度改为48 45 | 46 | #### 2022-10-16 update v1.1.2 47 | 48 | * 修复:字典添加空格 49 | 50 | #### 2022-10-17 update v1.1.3 51 | 52 | * 修复:scoreToTextLine方法索引越界问题 53 | * Windows控制台编码修改为UTF8 54 | 55 | #### 2022-10-20 update v1.2.0 56 | 57 | * 再次修复空格问题 58 | * 增加GPU(cuda)支持,需要自行下载整合依赖库 59 | * windows下的free()方法更焕为_aligned_free() 60 | * 修改默认输入参数 61 | * 修改benchmark输出样式 62 | 63 | #### 2022-10-28 update v1.2.1 64 | 65 | * 适配onnxruntime 1.13.1 66 | * 修了些warning 67 | 68 | #### 2023-02-13 update v1.2.2 69 | 70 | * 适配onnxruntime 1.14.0 71 | 72 | #### 2024-01-07 update v1.2.3 73 | 74 | * onnxruntime 1.15.1 75 | * opencv 4.8.1 76 | 77 | ### 模型下载 78 | 79 | 整合好的范例工程自带了模型,在models文件夹中 80 | 81 | ``` 82 | RapidOcrOnnx/models 83 | ├── ch_PP-OCRv3_det_infer.onnx 84 | ├── ch_PP-OCRv3_rec_infer.onnx 85 | ├── ch_ppocr_mobile_v2.0_cls_infer.onnx 86 | └── ppocr_keys_v1.txt 87 | ``` 88 | 89 | ### [编译说明](./BUILD.md) 90 | 91 | ### [GPU版附加说明](./onnxruntime-gpu/README.md) 92 | 93 | ### 测试说明 94 | 95 | 1. 根据系统下载对应的程序包linux-bin.7z、macos-bin.7z、windows-bin.7z,并解压. 96 | 2. 把上面的模型下载,解压到第一步解压的文件夹里. 97 | 3. 终端运行run-test.sh或命令行运行run-test.bat,查看识别结果. 98 | 4. 终端运行run-benchmark.sh或命令行运行run-benchmark.bat,查看识别过程平均耗时. 99 | 100 | ### FAQ 101 | 102 | #### windows10下bat执行错误 103 | 104 | - 在win、linux、mac系统下,文本文件使用不同的换行符格式,win是CRLF,linux是LF,mac是CR 105 | - github的"Download ZIP"按钮下载的整个仓库代码,默认格式是UNIX换行符,此时bat脚本在windows平台执行会出错 106 | - 解决方法1:从Release中下载完整工程压缩包 107 | - 解决方法2:使用git clone命令同步仓库代码,git可以自动转换文件格式 108 | - 解决方法3:使用UltraEdit等文本编辑器,把bat文件的换行符转成win格式 109 | 110 | #### windows静态链接msvc 111 | 112 | - 作用:静态链接CRT(mt)可以让编译出来的包,部署时不需要安装c++运行时,但会增大包体积; 113 | - 需要mt版的引用库,参考编译说明,下载mt版的库; 114 | 115 | #### windows提示缺少"VCRUNTIME140_1.dll" 116 | 117 | 下载安装适用于 Visual Studio 2015、2017 和 2019 的 Microsoft Visual C++ 可再发行软件包 118 | [下载地址](https://support.microsoft.com/zh-cn/help/2977003/the-latest-supported-visual-c-downloads) 119 | 120 | #### Windows7执行错误|中文乱码 121 | 122 | 1. cmd窗口左上角-属性 123 | 2. 字体选项卡-选择除了“点阵字体”以外的TrueType字体,例如:Lucida Console、宋体 124 | 3. 重新执行bat 125 | 126 | ### Windows调试运行 127 | 128 | * 下载范例项目工程自带的引用库是Release版,不能用于调试运行 129 | * debug版的引用库未压缩时容量超过1GB,极限压缩后也超过了100MB,请自行编译或到群共享里寻找 130 | * debug版的引用库必须是md版 131 | * 把debug版的引用库替换到范例工程的对应文件夹 132 | * 双击generate-vs-project.bat,选择2)Debug,生成对应的build-win-vsxxx-xx文件夹 133 | * 进入生成的文件夹,打开RapidOcrOnnx.sln 134 | * 右边解决方案管理器,选中RapidOcrOnnx,右键->设为启动项目,并生成(查看输出log,确保生成成功) 135 | * 如果引用库是dll,需要把对应的dll文件,例onnxruntime.dll复制到build-win-vsxxx-xx文件夹\Debug,跟上一步生成的RapidOcrOnnx.exe放在一起 136 | * 右边解决方案管理器,选中RapidOcrOnnx,右键->属性->调试-> 137 | 命令参数->```--models ../models --det ch_PP-OCRv3_det_infer.onnx --cls ch_ppocr_mobile_v2.0_cls_infer.onnx --rec ch_PP-OCRv3_rec_infer.onnx --keys ppocr_keys_v1.txt --image ../images/1.jpg``` 138 | * 工具栏,点击绿色三角号启动"本地Windows调试器" 139 | * 第一次运行的话,查看左下角,等待加载各dll符号,网络不好的话,要等挺久的 140 | 141 | ### 输入参数说明 142 | 143 | * 请参考main.h中的命令行参数说明。 144 | * 每个参数有一个短参数名和一个长参数名,用短的或长的均可。 145 | 146 | 1. ```-d或--models```:模型所在文件夹路径,可以相对路径也可以绝对路径。 147 | 2. ```-1或--det```:det模型文件名(含扩展名) 148 | 3. ```-2或--cls```:cls模型文件名(含扩展名) 149 | 4. ```-3或--rec```:rec模型文件名(含扩展名) 150 | 5. ```-4或--keys```:keys.txt文件名(含扩展名) 151 | 6. ```-i或--image```:目标图片路径,可以相对路径也可以绝对路径。 152 | 7. ```-t或--numThread```:线程数量。 153 | 8. ```-p或--padding```:图像预处理,在图片外周添加白边,用于提升识别率,文字框没有正确框住所有文字时,增加此值。 154 | 9. ```-s或--maxSideLen``` 155 | :按图片最长边的长度,此值为0代表不缩放,例:1024,如果图片长边大于1024则把图像整体缩小到1024再进行图像分割计算,如果图片长边小于1024则不缩放,如果图片长边小于32,则缩放到32。 156 | 10. ```-b或--boxScoreThresh```:文字框置信度门限,文字框没有正确框住所有文字时,减小此值。 157 | 11. ```-o或--boxThresh```:请自行试验。 158 | 12. ```-u或--unClipRatio```:单个文字框大小倍率,越大时单个文字框越大。此项与图片的大小相关,越大的图片此值应该越大。 159 | 13. ```-a或--doAngle```:启用(1)/禁用(0) 文字方向检测,只有图片倒置的情况下(旋转90~270度的图片),才需要启用文字方向检测。 160 | 14. ```-A或--mostAngle```:启用(1)/禁用(0) 角度投票(整张图片以最大可能文字方向来识别),当禁用文字方向检测时,此项也不起作用。 161 | 15. ```-h或--help```:打印命令行帮助。 162 | 163 | ### 关于内存泄漏与valgrind 164 | 165 | * 项目根目录的valgrind-memcheck.sh用来检查内存泄漏(需要debug编译)。 166 | * 常见的并行库有tbb,hpx,openmp,gcd,concurrency,pthread 167 | * 并行库的种类可以看:https://docs.opencv.org/4.x/db/d05/tutorial_config_reference.html 168 | * 测试了openmp和pthread,目前已知这类并行库会导致检查报告中出现"possibly lost" 169 | * opencv只做简单的图像预处理,可以完全不使用任何并行库,但需要定制编译 170 | * onnxruntime1.6.0或之前,默认引用openmp,从1.7.0开始默认关闭openmp并使用自带的ThreadPool代码 171 | * 阅读报告可以看出"possibly lost"发生位置均在引用的第三方库(如果使用了并行库的话),如opencv或onnxruntime 172 | * "possibly lost"不一定是内存泄露 173 | * 如果opencv想定制编译不使用任何并行库,可以使用以下参数进行编译 174 | 175 | ``` 176 | -DWITH_TBB=OFF 177 | -DWITH_HPX=OFF 178 | -DWITH_OPENMP=OFF 179 | -DWITH_GCD=OFF 180 | -DWITH_CONCURRENCY=OFF 181 | -DWITH_PTHREADS_PF=OFF 182 | ``` 183 | -------------------------------------------------------------------------------- /benchmark/benchmark.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "main.h" 3 | #include "version.h" 4 | #include "OcrLite.h" 5 | #include "OcrUtils.h" 6 | 7 | #ifdef _WIN32 8 | #include 9 | #endif 10 | 11 | void printHelp(FILE *out, char *argv0) { 12 | fprintf(out, " ------- Usage -------\n"); 13 | fprintf(out, "%s %s", argv0, usageMsg); 14 | fprintf(out, " ------- Required Parameters -------\n"); 15 | fprintf(out, "%s", requiredMsg); 16 | fprintf(out, " ------- Optional Parameters -------\n"); 17 | fprintf(out, "%s", optionalMsg); 18 | fprintf(out, " ------- Other Parameters -------\n"); 19 | fprintf(out, "%s", otherMsg); 20 | fprintf(out, " ------- Examples -------\n"); 21 | fprintf(out, example1Msg, argv0); 22 | fprintf(out, example2Msg, argv0); 23 | } 24 | 25 | int main(int argc, char **argv) { 26 | if (argc <= 1) { 27 | printHelp(stderr, argv[0]); 28 | return -1; 29 | } 30 | #ifdef _WIN32 31 | SetConsoleOutputCP(CP_UTF8); 32 | #endif 33 | std::string modelsDir, modelDetPath, modelClsPath, modelRecPath, keysPath; 34 | std::string imgPath, imgDir, imgName; 35 | int numThread = 4; 36 | int loopCount = 1; 37 | int padding = 50; 38 | int maxSideLen = 1024; 39 | float boxScoreThresh = 0.5f; 40 | float boxThresh = 0.3f; 41 | float unClipRatio = 1.5f; 42 | bool doAngle = true; 43 | int flagDoAngle = 1; 44 | bool mostAngle = true; 45 | int flagMostAngle = 1; 46 | int flagGpu = -1; 47 | 48 | int opt; 49 | int optionIndex = 0; 50 | while ((opt = getopt_long(argc, argv, "d:1:2:3:4:i:t:p:s:b:o:u:a:A:G:v:h:l", long_options, &optionIndex)) != -1) { 51 | //printf("option(-%c)=%s\n", opt, optarg); 52 | switch (opt) { 53 | case 'd': 54 | modelsDir = optarg; 55 | printf("modelsPath=%s\n", modelsDir.c_str()); 56 | break; 57 | case '1': 58 | modelDetPath = modelsDir + "/" + optarg; 59 | printf("model det path=%s\n", modelDetPath.c_str()); 60 | break; 61 | case '2': 62 | modelClsPath = modelsDir + "/" + optarg; 63 | printf("model cls path=%s\n", modelClsPath.c_str()); 64 | break; 65 | case '3': 66 | modelRecPath = modelsDir + "/" + optarg; 67 | printf("model rec path=%s\n", modelRecPath.c_str()); 68 | break; 69 | case '4': 70 | keysPath = modelsDir + "/" + optarg; 71 | printf("keys path=%s\n", keysPath.c_str()); 72 | break; 73 | case 'i': 74 | imgPath.assign(optarg); 75 | imgDir.assign(imgPath.substr(0, imgPath.find_last_of('/') + 1)); 76 | imgName.assign(imgPath.substr(imgPath.find_last_of('/') + 1)); 77 | printf("imgDir=%s, imgName=%s\n", imgDir.c_str(), imgName.c_str()); 78 | break; 79 | case 't': 80 | numThread = (int) strtol(optarg, NULL, 10); 81 | //printf("numThread=%d\n", numThread); 82 | break; 83 | case 'p': 84 | padding = (int) strtol(optarg, NULL, 10); 85 | //printf("padding=%d\n", padding); 86 | break; 87 | case 's': 88 | maxSideLen = (int) strtol(optarg, NULL, 10); 89 | //printf("maxSideLen=%d\n", maxSideLen); 90 | break; 91 | case 'b': 92 | boxScoreThresh = strtof(optarg, NULL); 93 | //printf("boxScoreThresh=%f\n", boxScoreThresh); 94 | break; 95 | case 'o': 96 | boxThresh = strtof(optarg, NULL); 97 | //printf("boxThresh=%f\n", boxThresh); 98 | break; 99 | case 'u': 100 | unClipRatio = strtof(optarg, NULL); 101 | //printf("unClipRatio=%f\n", unClipRatio); 102 | break; 103 | case 'a': 104 | flagDoAngle = (int) strtol(optarg, NULL, 10); 105 | if (flagDoAngle == 0) { 106 | doAngle = false; 107 | } else { 108 | doAngle = true; 109 | } 110 | //printf("doAngle=%d\n", doAngle); 111 | break; 112 | case 'A': 113 | flagMostAngle = (int) strtol(optarg, NULL, 10); 114 | if (flagMostAngle == 0) { 115 | mostAngle = false; 116 | } else { 117 | mostAngle = true; 118 | } 119 | //printf("mostAngle=%d\n", mostAngle); 120 | break; 121 | case 'v': 122 | printf("%s\n", VERSION); 123 | return 0; 124 | case 'h': 125 | printHelp(stdout, argv[0]); 126 | return 0; 127 | case 'G': 128 | flagGpu = (int) strtol(optarg, NULL, 10); 129 | break; 130 | case 'l': 131 | loopCount = (int) strtol(optarg, NULL, 10); 132 | //printf("loopCount=%d\n", loopCount); 133 | break; 134 | default: 135 | printf("other option %c :%s\n", opt, optarg); 136 | } 137 | } 138 | bool hasTargetImgFile = isFileExists(imgPath); 139 | if (!hasTargetImgFile) { 140 | fprintf(stderr, "Target image not found: %s\n", imgPath.c_str()); 141 | return -1; 142 | } 143 | bool hasModelDetFile = isFileExists(modelDetPath); 144 | if (!hasModelDetFile) { 145 | fprintf(stderr, "Model det file not found: %s\n", modelDetPath.c_str()); 146 | return -1; 147 | } 148 | bool hasModelClsFile = isFileExists(modelClsPath); 149 | if (!hasModelClsFile) { 150 | fprintf(stderr, "Model cls file not found: %s\n", modelClsPath.c_str()); 151 | return -1; 152 | } 153 | bool hasModelRecFile = isFileExists(modelRecPath); 154 | if (!hasModelRecFile) { 155 | fprintf(stderr, "Model rec file not found: %s\n", modelRecPath.c_str()); 156 | return -1; 157 | } 158 | bool hasKeysFile = isFileExists(keysPath); 159 | if (!hasKeysFile) { 160 | fprintf(stderr, "keys file not found: %s\n", keysPath.c_str()); 161 | return -1; 162 | } 163 | OcrLite ocrLite; 164 | ocrLite.setNumThread(numThread); 165 | ocrLite.initLogger( 166 | false,//isOutputConsole 167 | false,//isOutputPartImg 168 | false);//isOutputResultImg 169 | 170 | //ocrLite.enableResultTxt(imgDir.c_str(), imgName.c_str()); 171 | ocrLite.setGpuIndex(flagGpu); 172 | printf("=====Input Params=====\n"); 173 | printf("numThread(%d),padding(%d),maxSideLen(%d),boxScoreThresh(%f),boxThresh(%f),unClipRatio(%f),doAngle(%d),mostAngle(%d),GPU(%d)\n", 174 | numThread, padding, maxSideLen, boxScoreThresh, boxThresh, unClipRatio, doAngle, mostAngle, 175 | flagGpu); 176 | bool initModelsRet = ocrLite.initModels(modelDetPath, modelClsPath, modelRecPath, keysPath); 177 | if (!initModelsRet) return -1; 178 | printf("=====Warmup 2 cycles=====\n"); 179 | for (int i = 0; i < 2; ++i) { 180 | OcrResult result = ocrLite.detect(imgDir.c_str(), imgName.c_str(), padding, maxSideLen, 181 | boxScoreThresh, boxThresh, unClipRatio, doAngle, mostAngle); 182 | printf("Warmup time(%f)\n", result.detectTime); 183 | } 184 | printf("=====Start Test Loop=====\n"); 185 | double allDbTime = 0.0f; 186 | double allClsTime = 0.0f; 187 | double allRecTime = 0.0f; 188 | double allFullTime = 0.0f; 189 | for (int i = 0; i < loopCount; ++i) { 190 | printf("=====Cycle:%d Take Time(ms)=====\n", i + 1); 191 | OcrResult ocrResult = ocrLite.detect(imgDir.c_str(), imgName.c_str(), 192 | padding, maxSideLen, 193 | boxScoreThresh, boxThresh, 194 | unClipRatio, doAngle, mostAngle); 195 | double dbTime = ocrResult.dbNetTime; 196 | double clsTime = 0.0f; 197 | double recTime = 0.0f; 198 | for (const auto &item: ocrResult.textBlocks) { 199 | clsTime += item.angleTime; 200 | recTime += item.crnnTime; 201 | } 202 | double fullTime = ocrResult.detectTime; 203 | printf("det=%f cls=%f rec=%f full=%f\n", dbTime, clsTime, recTime, fullTime); 204 | allDbTime += dbTime; 205 | allClsTime += clsTime; 206 | allRecTime += recTime; 207 | allFullTime += fullTime; 208 | } 209 | printf("=====Result:Average Time(ms)=====\n"); 210 | printf("det=%f cls=%f rec=%f full=%f\n", allDbTime / loopCount, allClsTime / loopCount, 211 | allRecTime / loopCount, allFullTime / loopCount); 212 | return 0; 213 | } 214 | -------------------------------------------------------------------------------- /build-default.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | chcp 65001 3 | cls 4 | @SETLOCAL 5 | 6 | IF "%1"=="" ( 7 | echo input VS_VER none, use v142 8 | set VS_VER="v142" 9 | )^ 10 | ELSE ( 11 | echo input VS_VER:%1 12 | set VS_VER="%1" 13 | ) 14 | 15 | mkdir win-BIN-CPU-x64 16 | pushd win-BIN-CPU-x64 17 | cmake -T "%VS_VER%,host=x64" -A "x64" ^ 18 | -DCMAKE_INSTALL_PREFIX=install ^ 19 | -DCMAKE_BUILD_TYPE=Release -DOCR_OUTPUT="BIN" ^ 20 | -DOCR_BUILD_CRT="True" -DOCR_ONNX="CPU" .. 21 | cmake --build . --config Release -j %NUMBER_OF_PROCESSORS% 22 | cmake --build . --config Release --target install 23 | popd 24 | 25 | mkdir win-BIN-CPU-Win32 26 | pushd win-BIN-CPU-Win32 27 | cmake -T "%VS_VER%,host=x64" -A "Win32" ^ 28 | -DCMAKE_INSTALL_PREFIX=install ^ 29 | -DCMAKE_BUILD_TYPE=Release -DOCR_OUTPUT="BIN" ^ 30 | -DOCR_BUILD_CRT="True" -DOCR_ONNX="CPU" .. 31 | cmake --build . --config Release -j %NUMBER_OF_PROCESSORS% 32 | cmake --build . --config Release --target install 33 | popd 34 | 35 | mkdir win-JNI-CPU-x64 36 | pushd win-JNI-CPU-x64 37 | cmake -T "%VS_VER%,host=x64" -A "x64" ^ 38 | -DCMAKE_INSTALL_PREFIX=install ^ 39 | -DCMAKE_BUILD_TYPE=Release -DOCR_OUTPUT="JNI" ^ 40 | -DOCR_BUILD_CRT="True" -DOCR_ONNX="CPU" .. 41 | cmake --build . --config Release -j %NUMBER_OF_PROCESSORS% 42 | cmake --build . --config Release --target install 43 | popd 44 | 45 | mkdir win-JNI-CPU-Win32 46 | pushd win-JNI-CPU-Win32 47 | cmake -T "%VS_VER%,host=x64" -A "Win32" ^ 48 | -DCMAKE_INSTALL_PREFIX=install ^ 49 | -DCMAKE_BUILD_TYPE=Release -DOCR_OUTPUT="JNI" ^ 50 | -DOCR_BUILD_CRT="True" -DOCR_ONNX="CPU" .. 51 | cmake --build . --config Release -j %NUMBER_OF_PROCESSORS% 52 | cmake --build . --config Release --target install 53 | popd 54 | 55 | mkdir win-CLIB-CPU-x64 56 | pushd win-CLIB-CPU-x64 57 | cmake -T "%VS_VER%,host=x64" -A "x64" ^ 58 | -DCMAKE_INSTALL_PREFIX=install ^ 59 | -DCMAKE_BUILD_TYPE=Release -DOCR_OUTPUT="CLIB" ^ 60 | -DOCR_BUILD_CRT="True" -DOCR_ONNX="CPU" .. 61 | cmake --build . --config Release -j %NUMBER_OF_PROCESSORS% 62 | cmake --build . --config Release --target install 63 | popd 64 | 65 | mkdir win-CLIB-CPU-Win32 66 | pushd win-CLIB-CPU-Win32 67 | cmake -T "%VS_VER%,host=x64" -A "Win32" ^ 68 | -DCMAKE_INSTALL_PREFIX=install ^ 69 | -DCMAKE_BUILD_TYPE=Release -DOCR_OUTPUT="CLIB" ^ 70 | -DOCR_BUILD_CRT="True" -DOCR_ONNX="CPU" .. 71 | cmake --build . --config Release -j %NUMBER_OF_PROCESSORS% 72 | cmake --build . --config Release --target install 73 | popd 74 | 75 | @ENDLOCAL 76 | -------------------------------------------------------------------------------- /build-default.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | function cmakeBuild() { 4 | mkdir -p ${sysOS}-BIN-CPU 5 | pushd ${sysOS}-BIN-CPU 6 | cmake -DCMAKE_INSTALL_PREFIX=install -DCMAKE_BUILD_TYPE=Release -DOCR_OUTPUT="BIN" -DOCR_ONNX="CPU" .. 7 | cmake --build . --config Release -j $NUM_THREADS 8 | cmake --build . --config Release --target install 9 | popd 10 | 11 | mkdir -p ${sysOS}-JNI-CPU 12 | pushd ${sysOS}-JNI-CPU 13 | cmake -DCMAKE_INSTALL_PREFIX=install -DCMAKE_BUILD_TYPE=Release -DOCR_OUTPUT="JNI" -DOCR_ONNX="CPU" .. 14 | cmake --build . --config Release -j $NUM_THREADS 15 | cmake --build . --config Release --target install 16 | popd 17 | 18 | mkdir -p ${sysOS}-CLIB-CPU 19 | pushd ${sysOS}-CLIB-CPU 20 | cmake -DCMAKE_INSTALL_PREFIX=install -DCMAKE_BUILD_TYPE=Release -DOCR_OUTPUT="CLIB" -DOCR_ONNX="CPU" .. 21 | cmake --build . --config Release -j $NUM_THREADS 22 | cmake --build . --config Release --target install 23 | popd 24 | } 25 | 26 | function cmakeCrossBuild() { 27 | mkdir -p ${sysOS}-BIN-CPU 28 | pushd ${sysOS}-BIN-CPU 29 | cmake -DCMAKE_TOOLCHAIN_FILE=../musl-cross.toolchain.cmake \ 30 | -DCMAKE_INSTALL_PREFIX=install -DCMAKE_BUILD_TYPE=Release -DOCR_OUTPUT="BIN" -DOCR_ONNX="CPU" .. 31 | cmake --build . --config Release -j $NUM_THREADS 32 | cmake --build . --config Release --target install 33 | popd 34 | 35 | mkdir -p ${sysOS}-JNI-CPU 36 | pushd ${sysOS}-JNI-CPU 37 | cmake -DCMAKE_TOOLCHAIN_FILE=../musl-cross.toolchain.cmake \ 38 | -DCMAKE_INSTALL_PREFIX=install -DCMAKE_BUILD_TYPE=Release -DOCR_OUTPUT="JNI" -DOCR_ONNX="CPU" .. 39 | cmake --build . --config Release -j $NUM_THREADS 40 | cmake --build . --config Release --target install 41 | popd 42 | 43 | mkdir -p ${sysOS}-CLIB-CPU 44 | pushd ${sysOS}-CLIB-CPU 45 | cmake -DCMAKE_TOOLCHAIN_FILE=../musl-cross.toolchain.cmake \ 46 | -DCMAKE_INSTALL_PREFIX=install -DCMAKE_BUILD_TYPE=Release -DOCR_OUTPUT="CLIB" -DOCR_ONNX="CPU" .. 47 | cmake --build . --config Release -j $NUM_THREADS 48 | cmake --build . --config Release --target install 49 | popd 50 | } 51 | 52 | sysOS=$(uname -s) 53 | NUM_THREADS=1 54 | if [ $sysOS == "Darwin" ]; then 55 | #echo "I'm MacOS" 56 | NUM_THREADS=$(sysctl -n hw.ncpu) 57 | elif [ $sysOS == "Linux" ]; then 58 | #echo "I'm Linux" 59 | NUM_THREADS=$(grep ^processor /proc/cpuinfo | wc -l) 60 | if [ "$1" ] && [ "$2" ]; then 61 | echo "TOOLCHAIN_NAME=$1" 62 | echo "TOOLCHAIN_PATH=$2" 63 | export TOOLCHAIN_NAME="$1" 64 | export TOOLCHAIN_PATH="$2" 65 | echo "cross build" 66 | cmakeCrossBuild "Release" 67 | else 68 | echo "native build" 69 | cmakeBuild "Release" 70 | fi 71 | else 72 | echo "Other OS: $sysOS" 73 | fi 74 | 75 | -------------------------------------------------------------------------------- /build.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | chcp 65001 3 | cls 4 | @SETLOCAL 5 | echo "========请先参考README.md准备好编译环境========" 6 | echo. 7 | 8 | echo "========编译选项========" 9 | echo "请注意:项目默认使用Release库,除非您自行编译Debug版的Onnxruntime和Opencv,否则请不要选择Debug编译" 10 | echo "请输入编译选项并回车: 1)Release, 2)Debug" 11 | set BUILD_TYPE=Release 12 | set /p flag= 13 | if %flag% == 1 (set BUILD_TYPE=Release)^ 14 | else if %flag% == 2 (set BUILD_TYPE=Debug)^ 15 | else (echo 输入错误!Input Error!) 16 | echo. 17 | 18 | echo "请注意:如果选择2)JNI动态库时,必须安装配置Oracle JDK" 19 | echo "请选择编译输出类型并回车: 1)BIN可执行文件,2)JNI动态库,3)C动态库" 20 | set /p flag= 21 | if %flag% == 1 (set BUILD_OUTPUT="BIN")^ 22 | else if %flag% == 2 (set BUILD_OUTPUT="JNI")^ 23 | else if %flag% == 3 (set BUILD_OUTPUT="CLIB")^ 24 | else (echo 输入错误!Input Error!) 25 | echo. 26 | 27 | echo "引用库类型: 1)静态CRT(mt), 2)动态CRT(md)" 28 | echo "注意:范例工程默认集成mt版库" 29 | set /p flag= 30 | if %flag% == 1 ( 31 | set MT_ENABLED="True" 32 | )^ 33 | else (set MT_ENABLED="False") 34 | echo. 35 | 36 | echo "onnxruntime: 1)CPU(默认), 2)GPU(cuda)" 37 | echo "注意:范例工程默认集成CPU版,CUDA版仅支持x64且需下载" 38 | set /p flag= 39 | if %flag% == 1 (set ONNX_TYPE="CPU")^ 40 | else if %flag% == 2 (set ONNX_TYPE="CUDA")^ 41 | else (echo 输入错误!Input Error!) 42 | echo. 43 | 44 | echo "VS版本: 1)vs2019-x64, 2)vs2019-x86, 3)vs2022-x64, 4)vs2022-x86" 45 | set BUILD_CMAKE_T="v142" 46 | set BUILD_CMAKE_A="x64" 47 | set /p flag= 48 | if %flag% == 1 ( 49 | set BUILD_CMAKE_T="v142" 50 | set BUILD_CMAKE_A="x64" 51 | )^ 52 | else if %flag% == 2 ( 53 | set BUILD_CMAKE_T="v142" 54 | set BUILD_CMAKE_A="Win32" 55 | )^ 56 | else if %flag% == 3 ( 57 | set BUILD_CMAKE_T="v143" 58 | set BUILD_CMAKE_A="x64" 59 | )^ 60 | else if %flag% == 4 ( 61 | set BUILD_CMAKE_T="v143" 62 | set BUILD_CMAKE_A="Win32" 63 | )^ 64 | else (echo 输入错误!Input Error!) 65 | echo. 66 | 67 | mkdir win-%BUILD_OUTPUT%-%ONNX_TYPE%-%BUILD_CMAKE_A% 68 | pushd win-%BUILD_OUTPUT%-%ONNX_TYPE%-%BUILD_CMAKE_A% 69 | 70 | cmake -T "%BUILD_CMAKE_T%,host=x64" -A %BUILD_CMAKE_A% ^ 71 | -DCMAKE_INSTALL_PREFIX=install ^ 72 | -DCMAKE_BUILD_TYPE=%BUILD_TYPE% -DOCR_OUTPUT=%BUILD_OUTPUT% ^ 73 | -DOCR_BUILD_CRT=%MT_ENABLED% -DOCR_ONNX=%ONNX_TYPE% .. 74 | cmake --build . --config %BUILD_TYPE% -j %NUMBER_OF_PROCESSORS% 75 | cmake --build . --config %BUILD_TYPE% --target install 76 | 77 | popd 78 | GOTO:EOF 79 | 80 | @ENDLOCAL 81 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | echo "========请先参考README.md准备好编译环境========" 4 | echo 5 | 6 | echo "========编译选项========" 7 | echo "请输入编译选项并回车: 1)Release, 2)Debug" 8 | read -p "" BUILD_TYPE 9 | if [ $BUILD_TYPE == 1 ]; then 10 | BUILD_TYPE=Release 11 | elif [ $BUILD_TYPE == 2 ]; then 12 | BUILD_TYPE=Debug 13 | else 14 | echo -e "输入错误!Input Error!" 15 | fi 16 | 17 | echo "请选择编译输出类型并回车: 1)BIN可执行文件,2)JNI动态库,3)C动态库" 18 | echo "请注意:如果选择2)JNI动态库时,必须安装配置Oracle JDK" 19 | read -p "" BUILD_OUTPUT 20 | if [ $BUILD_OUTPUT == 1 ]; then 21 | BUILD_OUTPUT="BIN" 22 | elif [ $BUILD_OUTPUT == 2 ]; then 23 | BUILD_OUTPUT="JNI" 24 | elif [ $BUILD_OUTPUT == 3 ]; then 25 | BUILD_OUTPUT="CLIB" 26 | else 27 | echo -e "输入错误!Input Error!" 28 | fi 29 | 30 | echo "onnxruntime: 1)CPU(默认), 2)GPU(cuda)" 31 | echo "注意:范例工程默认集成CPU版,CUDA版仅支持Linux64且需下载" 32 | read -p "" ONNX_TYPE 33 | if [ $ONNX_TYPE == 1 ]; then 34 | ONNX_TYPE="CPU" 35 | elif [ $ONNX_TYPE == 2 ]; then 36 | ONNX_TYPE="CUDA" 37 | else 38 | echo -e "输入错误!Input Error!" 39 | fi 40 | 41 | sysOS=$(uname -s) 42 | NUM_THREADS=1 43 | if [ $sysOS == "Darwin" ]; then 44 | #echo "I'm MacOS" 45 | NUM_THREADS=$(sysctl -n hw.ncpu) 46 | elif [ $sysOS == "Linux" ]; then 47 | #echo "I'm Linux" 48 | NUM_THREADS=$(grep ^processor /proc/cpuinfo | wc -l) 49 | else 50 | echo "Other OS: $sysOS" 51 | fi 52 | 53 | mkdir -p $sysOS-$ONNX_TYPE-$BUILD_OUTPUT 54 | pushd $sysOS-$ONNX_TYPE-$BUILD_OUTPUT 55 | 56 | echo "cmake -DCMAKE_INSTALL_PREFIX=install -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DOCR_OUTPUT=$BUILD_OUTPUT -DOCR_ONNX=$ONNX_TYPE .." 57 | cmake -DCMAKE_INSTALL_PREFIX=install -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DOCR_OUTPUT=$BUILD_OUTPUT -DOCR_ONNX=$ONNX_TYPE .. 58 | cmake --build . --config $BUILD_TYPE -j $NUM_THREADS 59 | cmake --build . --config $BUILD_TYPE --target install 60 | popd 61 | -------------------------------------------------------------------------------- /generate-vs-project.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | chcp 65001 3 | cls 4 | @SETLOCAL 5 | echo "========请先参考README.md准备好编译环境========" 6 | echo. 7 | 8 | echo "========编译选项========" 9 | echo "请注意:项目默认使用Release库,除非您自行编译Debug版的Onnxruntime和Opencv,否则请不要选择Debug编译" 10 | echo "请输入编译选项并回车: 1)Release, 2)Debug" 11 | set BUILD_TYPE=Release 12 | set /p flag= 13 | if %flag% == 1 (set BUILD_TYPE=Release)^ 14 | else if %flag% == 2 (set BUILD_TYPE=Debug)^ 15 | else (echo 输入错误!Input Error!) 16 | echo. 17 | 18 | echo "请注意:如果选择2)JNI动态库时,必须安装配置Oracle JDK" 19 | echo "请选择编译输出类型并回车: 1)BIN可执行文件,2)JNI动态库,3)C动态库" 20 | set /p flag= 21 | if %flag% == 1 (set BUILD_OUTPUT="BIN")^ 22 | else if %flag% == 2 (set BUILD_OUTPUT="JNI")^ 23 | else if %flag% == 3 (set BUILD_OUTPUT="CLIB")^ 24 | else (echo 输入错误!Input Error!) 25 | echo. 26 | 27 | echo "引用的库类型: 1)静态CRT(mt), 2)动态CRT(md)" 28 | echo "注意:范例工程默认集成mt版库" 29 | set /p flag= 30 | if %flag% == 1 ( 31 | set MT_ENABLED="True" 32 | )^ 33 | else (set MT_ENABLED="False") 34 | echo. 35 | 36 | echo "onnxruntime: 1)CPU(默认), 2)GPU(cuda), 3)GPU(DirectML)" 37 | echo "注意:范例工程默认集成CPU版,CUDA版仅支持x64且需下载" 38 | set /p flag= 39 | if %flag% == 1 (set ONNX_TYPE="CPU")^ 40 | else if %flag% == 2 (set ONNX_TYPE="CUDA")^ 41 | else if %flag% == 3 (set ONNX_TYPE="DIRECTML")^ 42 | else (echo 输入错误!Input Error!) 43 | echo. 44 | 45 | echo "请输入选项并回车: 0)ALL, 1)vs2019-x86, 2)vs2019-x64:" 46 | set /p flag= 47 | if %flag% == 0 (call :buildALL)^ 48 | else if %flag% == 1 (call :gen2019-x86)^ 49 | else if %flag% == 2 (call :gen2019-x64)^ 50 | else (echo "输入错误!Input Error!") 51 | GOTO:EOF 52 | 53 | :buildALL 54 | call :gen2019-x86 55 | call :gen2019-x64 56 | GOTO:EOF 57 | 58 | :gen2019-x86 59 | mkdir build-win-vs2019-x86 60 | pushd build-win-vs2019-x86 61 | call :cmakeParams "Visual Studio 16 2019" "Win32" 62 | popd 63 | GOTO:EOF 64 | 65 | :gen2019-x64 66 | mkdir build-win-vs2019-x64 67 | pushd build-win-vs2019-x64 68 | call :cmakeParams "Visual Studio 16 2019" "x64" 69 | popd 70 | GOTO:EOF 71 | 72 | :cmakeParams 73 | echo cmake -G "%~1" -A "%~2" -DOCR_OUTPUT=%BUILD_OUTPUT% -DOCR_BUILD_CRT=%MT_ENABLED% -DOCR_ONNX=%ONNX_TYPE% .. 74 | cmake -G "%~1" -A "%~2" -DOCR_OUTPUT=%BUILD_OUTPUT% -DOCR_BUILD_CRT=%MT_ENABLED% -DOCR_ONNX=%ONNX_TYPE% .. 75 | GOTO:EOF 76 | 77 | @ENDLOCAL 78 | -------------------------------------------------------------------------------- /images/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidOcrOnnx/abd498c13a6dbe5f3b3c0d421d72e01bb3e6ee6d/images/1.jpg -------------------------------------------------------------------------------- /images/clear.cmd: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | echo Delete part img 3 | DEL /Q *-part-*.jpg 4 | 5 | echo Delete angle img 6 | DEL /Q *-angle-*.jpg 7 | 8 | echo Delete debug img 9 | DEL /Q *-debug-*.jpg 10 | 11 | echo Delete result img 12 | DEL /Q *-result.jpg 13 | 14 | echo Delete result txt 15 | DEL /Q *-result.txt 16 | -------------------------------------------------------------------------------- /images/clear.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo Delete part img 4 | rm -f *-part-*.jpg 5 | 6 | echo Delete angle img 7 | rm -f *-angle-*.jpg 8 | 9 | echo Delete debug img 10 | rm -f *-debug-*.jpg 11 | 12 | echo Delete result img 13 | rm -f *-result.jpg 14 | 15 | echo Delete result txt 16 | rm -f *-result.txt 17 | -------------------------------------------------------------------------------- /include/AngleNet.h: -------------------------------------------------------------------------------- 1 | #ifndef __OCR_ANGLENET_H__ 2 | #define __OCR_ANGLENET_H__ 3 | 4 | #include "OcrStruct.h" 5 | #include 6 | #include 7 | 8 | class AngleNet { 9 | public: 10 | 11 | ~AngleNet(); 12 | 13 | void setNumThread(int numOfThread); 14 | 15 | void setGpuIndex(int gpuIndex); 16 | 17 | void initModel(const std::string &pathStr); 18 | 19 | std::vector getAngles(std::vector &partImgs, const char *path, 20 | const char *imgName, bool doAngle, bool mostAngle); 21 | 22 | private: 23 | bool isOutputAngleImg = false; 24 | 25 | Ort::Session *session; 26 | Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "AngleNet"); 27 | Ort::SessionOptions sessionOptions = Ort::SessionOptions(); 28 | int numThread = 0; 29 | 30 | std::vector inputNamesPtr; 31 | std::vector outputNamesPtr; 32 | 33 | const float meanValues[3] = {127.5, 127.5, 127.5}; 34 | const float normValues[3] = {1.0 / 127.5, 1.0 / 127.5, 1.0 / 127.5}; 35 | const int dstWidth = 192; 36 | const int dstHeight = 48; 37 | 38 | Angle getAngle(cv::Mat &src); 39 | }; 40 | 41 | 42 | #endif //__OCR_ANGLENET_H__ 43 | -------------------------------------------------------------------------------- /include/CrnnNet.h: -------------------------------------------------------------------------------- 1 | #ifndef __OCR_CRNNNET_H__ 2 | #define __OCR_CRNNNET_H__ 3 | 4 | #include "OcrStruct.h" 5 | #include 6 | #include 7 | 8 | class CrnnNet { 9 | public: 10 | 11 | ~CrnnNet(); 12 | 13 | void setNumThread(int numOfThread); 14 | 15 | void setGpuIndex(int gpuIndex); 16 | 17 | void initModel(const std::string &pathStr, const std::string &keysPath); 18 | 19 | std::vector getTextLines(std::vector &partImg, const char *path, const char *imgName); 20 | 21 | private: 22 | bool isOutputDebugImg = false; 23 | Ort::Session *session; 24 | Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "CrnnNet"); 25 | Ort::SessionOptions sessionOptions = Ort::SessionOptions(); 26 | int numThread = 0; 27 | 28 | std::vector inputNamesPtr; 29 | std::vector outputNamesPtr; 30 | 31 | const float meanValues[3] = {127.5, 127.5, 127.5}; 32 | const float normValues[3] = {1.0 / 127.5, 1.0 / 127.5, 1.0 / 127.5}; 33 | const int dstHeight = 48; 34 | 35 | std::vector keys; 36 | 37 | TextLine scoreToTextLine(const std::vector &outputData, size_t h, size_t w); 38 | 39 | TextLine getTextLine(const cv::Mat &src); 40 | }; 41 | 42 | 43 | #endif //__OCR_CRNNNET_H__ 44 | -------------------------------------------------------------------------------- /include/DbNet.h: -------------------------------------------------------------------------------- 1 | #ifndef __OCR_DBNET_H__ 2 | #define __OCR_DBNET_H__ 3 | 4 | #include "OcrStruct.h" 5 | #include 6 | #include 7 | 8 | class DbNet { 9 | public: 10 | ~DbNet(); 11 | 12 | void setNumThread(int numOfThread); 13 | 14 | void setGpuIndex(int gpuIndex); 15 | 16 | void initModel(const std::string &pathStr); 17 | 18 | std::vector getTextBoxes(cv::Mat &src, ScaleParam &s, float boxScoreThresh, 19 | float boxThresh, float unClipRatio); 20 | 21 | private: 22 | Ort::Session *session; 23 | Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "DbNet"); 24 | Ort::SessionOptions sessionOptions = Ort::SessionOptions(); 25 | int numThread = 0; 26 | 27 | std::vector inputNamesPtr; 28 | std::vector outputNamesPtr; 29 | 30 | const float meanValues[3] = {0.485 * 255, 0.456 * 255, 0.406 * 255}; 31 | const float normValues[3] = {1.0 / 0.229 / 255.0, 1.0 / 0.224 / 255.0, 1.0 / 0.225 / 255.0}; 32 | }; 33 | 34 | 35 | #endif //__OCR_DBNET_H__ 36 | -------------------------------------------------------------------------------- /include/OcrLite.h: -------------------------------------------------------------------------------- 1 | #ifndef __OCR_LITE_H__ 2 | #define __OCR_LITE_H__ 3 | 4 | #include 5 | #include "OcrStruct.h" 6 | 7 | class OcrLiteImpl; 8 | 9 | class OcrLite{ 10 | public: 11 | OcrLite(); 12 | 13 | ~OcrLite(); 14 | 15 | void setNumThread(int numOfThread); 16 | 17 | void initLogger(bool isConsole, bool isPartImg, bool isResultImg); 18 | 19 | void enableResultTxt(const char *path, const char *imgName); 20 | 21 | void setGpuIndex(int gpuIndex); 22 | 23 | bool initModels(const std::string &detPath, const std::string &clsPath, 24 | const std::string &recPath, const std::string &keysPath); 25 | 26 | void Logger(const char *format, ...); 27 | 28 | OcrResult detect(const char *path, const char *imgName, 29 | int padding, int maxSideLen, 30 | float boxScoreThresh, float boxThresh, float unClipRatio, bool doAngle, bool mostAngle); 31 | 32 | OcrResult detect(const cv::Mat &mat, 33 | int padding, int maxSideLen, 34 | float boxScoreThresh, float boxThresh, float unClipRatio, bool doAngle, bool mostAngle); 35 | 36 | private: 37 | OcrLiteImpl* pImpl; 38 | }; 39 | 40 | #endif //__OCR_LITE_H__ 41 | -------------------------------------------------------------------------------- /include/OcrLiteCApi.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | #ifndef __OCR_LITE_C_API_H__ 3 | #define __OCR_LITE_C_API_H__ 4 | #include "stdint.h" 5 | extern "C" 6 | { 7 | 8 | #ifdef WIN32 9 | #ifdef __CLIB__ 10 | #define _QM_OCR_API __declspec(dllexport) 11 | #else 12 | #define _QM_OCR_API __declspec(dllimport) 13 | #endif 14 | #else 15 | #define _QM_OCR_API 16 | #endif 17 | 18 | typedef void *OCR_HANDLE; 19 | typedef char OCR_BOOL; 20 | 21 | #ifndef NULL 22 | #define NULL 0 23 | #endif 24 | #define TRUE 1 25 | #define FALSE 0 26 | 27 | typedef struct __ocr_param { 28 | int padding; 29 | int maxSideLen; 30 | float boxScoreThresh; 31 | float boxThresh; 32 | float unClipRatio; 33 | int doAngle; // 1 means do 34 | int mostAngle; // 1 means true 35 | } OCR_PARAM; 36 | typedef struct { 37 | double x; 38 | double y; 39 | } OCR_POINT; 40 | typedef struct { 41 | uint8_t *data; 42 | int type; 43 | int channels; 44 | int width; 45 | int height; 46 | long dataLength; 47 | } OCR_INPUT; 48 | typedef struct { 49 | OCR_POINT* boxPoint; 50 | float boxScore; 51 | int angleIndex; 52 | float angleScore; 53 | double angleTime; 54 | uint8_t *text; 55 | float *charScores; 56 | unsigned long long charScoresLength; 57 | unsigned long long boxPointLength; 58 | unsigned long long textLength; 59 | double crnnTime; 60 | double blockTime; 61 | } TEXT_BLOCK; 62 | typedef struct { 63 | double dbNetTime; 64 | TEXT_BLOCK *textBlocks; 65 | unsigned long long textBlocksLength; 66 | double detectTime; 67 | } OCR_RESULT; 68 | 69 | /* 70 | By default, nThreads should be the number of threads 71 | */ 72 | _QM_OCR_API OCR_HANDLE 73 | OcrInit(const char *szDetModel, const char *szClsModel, const char *szRecModel, const char *szKeyPath, int nThreads); 74 | 75 | _QM_OCR_API OCR_BOOL 76 | OcrDetect(OCR_HANDLE handle, const char *imgPath, const char *imgName, OCR_PARAM *pParam); 77 | 78 | _QM_OCR_API OCR_BOOL 79 | OcrDetectInput(OCR_HANDLE handle, OCR_INPUT *input, OCR_PARAM *pParam, OCR_RESULT *ocrResult); 80 | 81 | _QM_OCR_API OCR_BOOL 82 | OcrFreeResult(OCR_RESULT *result); 83 | 84 | _QM_OCR_API int OcrGetLen(OCR_HANDLE handle); 85 | 86 | _QM_OCR_API OCR_BOOL OcrGetResult(OCR_HANDLE handle, char *szBuf, int nLen); 87 | 88 | _QM_OCR_API void OcrDestroy(OCR_HANDLE handle); 89 | 90 | }; 91 | #endif //__OCR_LITE_C_API_H__ 92 | #endif //__cplusplus 93 | -------------------------------------------------------------------------------- /include/OcrLiteImpl.h: -------------------------------------------------------------------------------- 1 | #ifndef __OCR_LITE_IMPL_H__ 2 | #define __OCR_LITE_IMPL_H__ 3 | 4 | #include "opencv2/core.hpp" 5 | #include 6 | #include "OcrStruct.h" 7 | #include "DbNet.h" 8 | #include "AngleNet.h" 9 | #include "CrnnNet.h" 10 | #include "OcrLite.h" 11 | 12 | class OcrLiteImpl { 13 | public: 14 | OcrLiteImpl(); 15 | 16 | ~OcrLiteImpl(); 17 | 18 | void setNumThread(int numOfThread); 19 | 20 | void initLogger(bool isConsole, bool isPartImg, bool isResultImg); 21 | 22 | void enableResultTxt(const char *path, const char *imgName); 23 | 24 | void setGpuIndex(int gpuIndex); 25 | 26 | bool initModels(const std::string &detPath, const std::string &clsPath, 27 | const std::string &recPath, const std::string &keysPath); 28 | 29 | void Logger(const char *format, ...); 30 | 31 | OcrResult detect(const char *path, const char *imgName, 32 | int padding, int maxSideLen, 33 | float boxScoreThresh, float boxThresh, float unClipRatio, bool doAngle, bool mostAngle); 34 | 35 | OcrResult detect(const cv::Mat &mat, 36 | int padding, int maxSideLen, 37 | float boxScoreThresh, float boxThresh, float unClipRatio, bool doAngle, bool mostAngle); 38 | 39 | OcrResult detectImageBytes(const uint8_t *data, long dataLength, int grey, int padding, int maxSideLen, 40 | float boxScoreThresh, float boxThresh, float unClipRatio, bool doAngle, bool mostAngle); 41 | OcrResult detectBitmap(uint8_t *bitmapData, int width, int height,int channels, int padding, int maxSideLen, 42 | float boxScoreThresh, float boxThresh, float unClipRatio, bool doAngle, bool mostAngle); 43 | 44 | private: 45 | friend void OcrLite::Logger(const char *format, ...); 46 | bool isOutputConsole = false; 47 | bool isOutputPartImg = false; 48 | bool isOutputResultTxt = false; 49 | bool isOutputResultImg = false; 50 | FILE *resultTxt; 51 | DbNet dbNet; 52 | AngleNet angleNet; 53 | CrnnNet crnnNet; 54 | char *loggerBuffer; 55 | std::vector getPartImages(cv::Mat &src, std::vector &textBoxes, 56 | const char *path, const char *imgName); 57 | 58 | OcrResult detect(const char *path, const char *imgName, 59 | cv::Mat &src, cv::Rect &originRect, ScaleParam &scale, 60 | float boxScoreThresh = 0.6f, float boxThresh = 0.3f, 61 | float unClipRatio = 2.0f, bool doAngle = true, bool mostAngle = true); 62 | }; 63 | 64 | #endif //__OCR_LITE_IMPL_H__ 65 | -------------------------------------------------------------------------------- /include/OcrResultUtils.h: -------------------------------------------------------------------------------- 1 | #ifdef __JNI__ 2 | #ifndef __OCR_RESULT_UTILS_H__ 3 | #define __OCR_RESULT_UTILS_H__ 4 | #include 5 | #include "OcrStruct.h" 6 | 7 | class OcrResultUtils { 8 | public: 9 | OcrResultUtils(JNIEnv *env, OcrResult &ocrResult); 10 | 11 | ~OcrResultUtils(); 12 | 13 | jobject getJObject(); 14 | 15 | private: 16 | JNIEnv *jniEnv; 17 | jobject jOcrResult; 18 | 19 | jclass newJListClass(); 20 | 21 | jmethodID getListConstructor(jclass clazz); 22 | 23 | jobject getTextBlock(TextBlock &textBlock); 24 | 25 | jobject getTextBlocks(std::vector &textBlocks); 26 | 27 | jobject newJPoint(cv::Point &point); 28 | 29 | jobject newJBoxPoint(std::vector &boxPoint); 30 | 31 | jfloatArray newJScoreArray(std::vector &scores); 32 | 33 | }; 34 | #endif //__OCR_RESULT_UTILS_H__ 35 | #endif 36 | -------------------------------------------------------------------------------- /include/OcrStruct.h: -------------------------------------------------------------------------------- 1 | #ifndef __OCR_STRUCT_H__ 2 | #define __OCR_STRUCT_H__ 3 | 4 | #include "opencv2/core.hpp" 5 | #include 6 | 7 | struct ScaleParam { 8 | int srcWidth; 9 | int srcHeight; 10 | int dstWidth; 11 | int dstHeight; 12 | float ratioWidth; 13 | float ratioHeight; 14 | }; 15 | 16 | struct TextBox { 17 | std::vector boxPoint; 18 | float score; 19 | }; 20 | 21 | struct Angle { 22 | int index; 23 | float score; 24 | double time; 25 | }; 26 | 27 | struct TextLine { 28 | std::string text; 29 | std::vector charScores; 30 | double time; 31 | }; 32 | 33 | struct TextBlock { 34 | std::vector boxPoint; 35 | float boxScore; 36 | int angleIndex; 37 | float angleScore; 38 | double angleTime; 39 | std::string text; 40 | std::vector charScores; 41 | double crnnTime; 42 | double blockTime; 43 | }; 44 | 45 | struct OcrResult { 46 | double dbNetTime; 47 | std::vector textBlocks; 48 | cv::Mat boxImg; 49 | double detectTime; 50 | std::string strRes; 51 | }; 52 | 53 | #endif //__OCR_STRUCT_H__ 54 | -------------------------------------------------------------------------------- /include/OcrUtils.h: -------------------------------------------------------------------------------- 1 | #ifndef __OCR_UTILS_H__ 2 | #define __OCR_UTILS_H__ 3 | 4 | #include 5 | #include "OcrStruct.h" 6 | #include 7 | #include 8 | #include 9 | 10 | template 11 | static std::unique_ptr makeUnique(Ts &&... params) { 12 | return std::unique_ptr(new T(std::forward(params)...)); 13 | } 14 | 15 | template 16 | static double getMean(std::vector &input) { 17 | auto sum = accumulate(input.begin(), input.end(), 0.0); 18 | return sum / input.size(); 19 | } 20 | 21 | template 22 | static double getStdev(std::vector &input, double mean) { 23 | if (input.size() <= 1) return 0; 24 | double accum = 0.0; 25 | for_each(input.begin(), input.end(), [&](const double d) { 26 | accum += (d - mean) * (d - mean); 27 | }); 28 | double stdev = sqrt(accum / (input.size() - 1)); 29 | return stdev; 30 | } 31 | 32 | template 33 | inline T clamp(T x, T min, T max) { 34 | if (x > max) 35 | return max; 36 | if (x < min) 37 | return min; 38 | return x; 39 | } 40 | 41 | double getCurrentTime(); 42 | 43 | inline bool isFileExists(const std::string &name) { 44 | struct stat buffer; 45 | return (stat(name.c_str(), &buffer) == 0); 46 | } 47 | 48 | std::wstring strToWstr(std::string str); 49 | 50 | ScaleParam getScaleParam(cv::Mat &src, const float scale); 51 | 52 | ScaleParam getScaleParam(cv::Mat &src, const int targetSize); 53 | 54 | std::vector getBox(const cv::RotatedRect &rect); 55 | 56 | int getThickness(cv::Mat &boxImg); 57 | 58 | void drawTextBox(cv::Mat &boxImg, cv::RotatedRect &rect, int thickness); 59 | 60 | void drawTextBox(cv::Mat &boxImg, const std::vector &box, int thickness); 61 | 62 | void drawTextBoxes(cv::Mat &boxImg, std::vector &textBoxes, int thickness); 63 | 64 | cv::Mat matRotateClockWise180(cv::Mat src); 65 | 66 | cv::Mat matRotateClockWise90(cv::Mat src); 67 | 68 | cv::Mat getRotateCropImage(const cv::Mat &src, std::vector box); 69 | 70 | cv::Mat adjustTargetImg(cv::Mat &src, int dstWidth, int dstHeight); 71 | 72 | std::vector getMinBoxes(const cv::RotatedRect &boxRect, float &maxSideLen); 73 | 74 | float boxScoreFast(const std::vector &boxes, const cv::Mat &pred); 75 | 76 | cv::RotatedRect unClip(std::vector box, float unClipRatio); 77 | 78 | std::vector substractMeanNormalize(cv::Mat &src, const float *meanVals, const float *normVals); 79 | 80 | std::vector getAngleIndexes(std::vector &angles); 81 | 82 | std::vector getInputNames(Ort::Session *session); 83 | 84 | std::vector getOutputNames(Ort::Session *session); 85 | 86 | void saveImg(cv::Mat &img, const char *imgPath); 87 | 88 | std::string getSrcImgFilePath(const char *path, const char *imgName); 89 | 90 | std::string getResultTxtFilePath(const char *path, const char *imgName); 91 | 92 | std::string getResultImgFilePath(const char *path, const char *imgName); 93 | 94 | std::string getDebugImgFilePath(const char *path, const char *imgName, size_t i, const char *tag); 95 | 96 | #endif //__OCR_UTILS_H__ 97 | -------------------------------------------------------------------------------- /include/clipper.hpp: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * * 3 | * Author : Angus Johnson * 4 | * Version : 6.4.2 * 5 | * Date : 27 February 2017 * 6 | * Website : http://www.angusj.com * 7 | * Copyright : Angus Johnson 2010-2017 * 8 | * * 9 | * License: * 10 | * Use, modification & distribution is subject to Boost Software License Ver 1. * 11 | * http://www.boost.org/LICENSE_1_0.txt * 12 | * * 13 | * Attributions: * 14 | * The code in this library is an extension of Bala Vatti's clipping algorithm: * 15 | * "A generic solution to polygon clipping" * 16 | * Communications of the ACM, Vol 35, Issue 7 (July 1992) pp 56-63. * 17 | * http://portal.acm.org/citation.cfm?id=129906 * 18 | * * 19 | * Computer graphics and geometric modeling: implementation and algorithms * 20 | * By Max K. Agoston * 21 | * Springer; 1 edition (January 4, 2005) * 22 | * http://books.google.com/books?q=vatti+clipping+agoston * 23 | * * 24 | * See also: * 25 | * "Polygon Offsetting by Computing Winding Numbers" * 26 | * Paper no. DETC2005-85513 pp. 565-575 * 27 | * ASME 2005 International Design Engineering Technical Conferences * 28 | * and Computers and Information in Engineering Conference (IDETC/CIE2005) * 29 | * September 24-28, 2005 , Long Beach, California, USA * 30 | * http://www.me.berkeley.edu/~mcmains/pubs/DAC05OffsetPolygon.pdf * 31 | * * 32 | *******************************************************************************/ 33 | 34 | #ifndef clipper_hpp 35 | #define clipper_hpp 36 | 37 | #define CLIPPER_VERSION "6.4.2" 38 | 39 | //use_int32: When enabled 32bit ints are used instead of 64bit ints. This 40 | //improve performance but coordinate values are limited to the range +/- 46340 41 | //#define use_int32 42 | 43 | //use_xyz: adds a Z member to IntPoint. Adds a minor cost to perfomance. 44 | //#define use_xyz 45 | 46 | //use_lines: Enables line clipping. Adds a very minor cost to performance. 47 | #define use_lines 48 | 49 | //use_deprecated: Enables temporary support for the obsolete functions 50 | //#define use_deprecated 51 | 52 | #include 53 | #include 54 | #include 55 | #include 56 | #include 57 | #include 58 | #include 59 | #include 60 | #include 61 | 62 | namespace ClipperLib { 63 | 64 | enum ClipType { ctIntersection, ctUnion, ctDifference, ctXor }; 65 | enum PolyType { ptSubject, ptClip }; 66 | //By far the most widely used winding rules for polygon filling are 67 | //EvenOdd & NonZero (GDI, GDI+, XLib, OpenGL, Cairo, AGG, Quartz, SVG, Gr32) 68 | //Others rules include Positive, Negative and ABS_GTR_EQ_TWO (only in OpenGL) 69 | //see http://glprogramming.com/red/chapter11.html 70 | enum PolyFillType { pftEvenOdd, pftNonZero, pftPositive, pftNegative }; 71 | 72 | #ifdef use_int32 73 | typedef int cInt; 74 | static cInt const loRange = 0x7FFF; 75 | static cInt const hiRange = 0x7FFF; 76 | #else 77 | typedef signed long long cInt; 78 | static cInt const loRange = 0x3FFFFFFF; 79 | static cInt const hiRange = 0x3FFFFFFFFFFFFFFFLL; 80 | typedef signed long long long64; //used by Int128 class 81 | typedef unsigned long long ulong64; 82 | 83 | #endif 84 | 85 | struct IntPoint { 86 | cInt X; 87 | cInt Y; 88 | #ifdef use_xyz 89 | cInt Z; 90 | IntPoint(cInt x = 0, cInt y = 0, cInt z = 0): X(x), Y(y), Z(z) {}; 91 | #else 92 | IntPoint(cInt x = 0, cInt y = 0): X(x), Y(y) {}; 93 | #endif 94 | 95 | friend inline bool operator== (const IntPoint& a, const IntPoint& b) 96 | { 97 | return a.X == b.X && a.Y == b.Y; 98 | } 99 | friend inline bool operator!= (const IntPoint& a, const IntPoint& b) 100 | { 101 | return a.X != b.X || a.Y != b.Y; 102 | } 103 | }; 104 | //------------------------------------------------------------------------------ 105 | 106 | typedef std::vector< IntPoint > Path; 107 | typedef std::vector< Path > Paths; 108 | 109 | inline Path& operator <<(Path& poly, const IntPoint& p) {poly.push_back(p); return poly;} 110 | inline Paths& operator <<(Paths& polys, const Path& p) {polys.push_back(p); return polys;} 111 | 112 | std::ostream& operator <<(std::ostream &s, const IntPoint &p); 113 | std::ostream& operator <<(std::ostream &s, const Path &p); 114 | std::ostream& operator <<(std::ostream &s, const Paths &p); 115 | 116 | struct DoublePoint 117 | { 118 | double X; 119 | double Y; 120 | DoublePoint(double x = 0, double y = 0) : X(x), Y(y) {} 121 | DoublePoint(IntPoint ip) : X((double)ip.X), Y((double)ip.Y) {} 122 | }; 123 | //------------------------------------------------------------------------------ 124 | 125 | #ifdef use_xyz 126 | typedef void (*ZFillCallback)(IntPoint& e1bot, IntPoint& e1top, IntPoint& e2bot, IntPoint& e2top, IntPoint& pt); 127 | #endif 128 | 129 | enum InitOptions {ioReverseSolution = 1, ioStrictlySimple = 2, ioPreserveCollinear = 4}; 130 | enum JoinType {jtSquare, jtRound, jtMiter}; 131 | enum EndType {etClosedPolygon, etClosedLine, etOpenButt, etOpenSquare, etOpenRound}; 132 | 133 | class PolyNode; 134 | typedef std::vector< PolyNode* > PolyNodes; 135 | 136 | class PolyNode 137 | { 138 | public: 139 | PolyNode(); 140 | virtual ~PolyNode(){}; 141 | Path Contour; 142 | PolyNodes Childs; 143 | PolyNode* Parent; 144 | PolyNode* GetNext() const; 145 | bool IsHole() const; 146 | bool IsOpen() const; 147 | int ChildCount() const; 148 | private: 149 | //PolyNode& operator =(PolyNode& other); 150 | unsigned Index; //node index in Parent.Childs 151 | bool m_IsOpen; 152 | JoinType m_jointype; 153 | EndType m_endtype; 154 | PolyNode* GetNextSiblingUp() const; 155 | void AddChild(PolyNode& child); 156 | friend class Clipper; //to access Index 157 | friend class ClipperOffset; 158 | }; 159 | 160 | class PolyTree: public PolyNode 161 | { 162 | public: 163 | ~PolyTree(){ Clear(); }; 164 | PolyNode* GetFirst() const; 165 | void Clear(); 166 | int Total() const; 167 | private: 168 | //PolyTree& operator =(PolyTree& other); 169 | PolyNodes AllNodes; 170 | friend class Clipper; //to access AllNodes 171 | }; 172 | 173 | bool Orientation(const Path &poly); 174 | double Area(const Path &poly); 175 | int PointInPolygon(const IntPoint &pt, const Path &path); 176 | 177 | void SimplifyPolygon(const Path &in_poly, Paths &out_polys, PolyFillType fillType = pftEvenOdd); 178 | void SimplifyPolygons(const Paths &in_polys, Paths &out_polys, PolyFillType fillType = pftEvenOdd); 179 | void SimplifyPolygons(Paths &polys, PolyFillType fillType = pftEvenOdd); 180 | 181 | void CleanPolygon(const Path& in_poly, Path& out_poly, double distance = 1.415); 182 | void CleanPolygon(Path& poly, double distance = 1.415); 183 | void CleanPolygons(const Paths& in_polys, Paths& out_polys, double distance = 1.415); 184 | void CleanPolygons(Paths& polys, double distance = 1.415); 185 | 186 | void MinkowskiSum(const Path& pattern, const Path& path, Paths& solution, bool pathIsClosed); 187 | void MinkowskiSum(const Path& pattern, const Paths& paths, Paths& solution, bool pathIsClosed); 188 | void MinkowskiDiff(const Path& poly1, const Path& poly2, Paths& solution); 189 | 190 | void PolyTreeToPaths(const PolyTree& polytree, Paths& paths); 191 | void ClosedPathsFromPolyTree(const PolyTree& polytree, Paths& paths); 192 | void OpenPathsFromPolyTree(PolyTree& polytree, Paths& paths); 193 | 194 | void ReversePath(Path& p); 195 | void ReversePaths(Paths& p); 196 | 197 | struct IntRect { cInt left; cInt top; cInt right; cInt bottom; }; 198 | 199 | //enums that are used internally ... 200 | enum EdgeSide { esLeft = 1, esRight = 2}; 201 | 202 | //forward declarations (for stuff used internally) ... 203 | struct TEdge; 204 | struct IntersectNode; 205 | struct LocalMinimum; 206 | struct OutPt; 207 | struct OutRec; 208 | struct Join; 209 | 210 | typedef std::vector < OutRec* > PolyOutList; 211 | typedef std::vector < TEdge* > EdgeList; 212 | typedef std::vector < Join* > JoinList; 213 | typedef std::vector < IntersectNode* > IntersectList; 214 | 215 | //------------------------------------------------------------------------------ 216 | 217 | //ClipperBase is the ancestor to the Clipper class. It should not be 218 | //instantiated directly. This class simply abstracts the conversion of sets of 219 | //polygon coordinates into edge objects that are stored in a LocalMinima list. 220 | class ClipperBase 221 | { 222 | public: 223 | ClipperBase(); 224 | virtual ~ClipperBase(); 225 | virtual bool AddPath(const Path &pg, PolyType PolyTyp, bool Closed); 226 | bool AddPaths(const Paths &ppg, PolyType PolyTyp, bool Closed); 227 | virtual void Clear(); 228 | IntRect GetBounds(); 229 | bool PreserveCollinear() {return m_PreserveCollinear;}; 230 | void PreserveCollinear(bool value) {m_PreserveCollinear = value;}; 231 | protected: 232 | void DisposeLocalMinimaList(); 233 | TEdge* AddBoundsToLML(TEdge *e, bool IsClosed); 234 | virtual void Reset(); 235 | TEdge* ProcessBound(TEdge* E, bool IsClockwise); 236 | void InsertScanbeam(const cInt Y); 237 | bool PopScanbeam(cInt &Y); 238 | bool LocalMinimaPending(); 239 | bool PopLocalMinima(cInt Y, const LocalMinimum *&locMin); 240 | OutRec* CreateOutRec(); 241 | void DisposeAllOutRecs(); 242 | void DisposeOutRec(PolyOutList::size_type index); 243 | void SwapPositionsInAEL(TEdge *edge1, TEdge *edge2); 244 | void DeleteFromAEL(TEdge *e); 245 | void UpdateEdgeIntoAEL(TEdge *&e); 246 | 247 | typedef std::vector MinimaList; 248 | MinimaList::iterator m_CurrentLM; 249 | MinimaList m_MinimaList; 250 | 251 | bool m_UseFullRange; 252 | EdgeList m_edges; 253 | bool m_PreserveCollinear; 254 | bool m_HasOpenPaths; 255 | PolyOutList m_PolyOuts; 256 | TEdge *m_ActiveEdges; 257 | 258 | typedef std::priority_queue ScanbeamList; 259 | ScanbeamList m_Scanbeam; 260 | }; 261 | //------------------------------------------------------------------------------ 262 | 263 | class Clipper : public virtual ClipperBase 264 | { 265 | public: 266 | Clipper(int initOptions = 0); 267 | bool Execute(ClipType clipType, 268 | Paths &solution, 269 | PolyFillType fillType = pftEvenOdd); 270 | bool Execute(ClipType clipType, 271 | Paths &solution, 272 | PolyFillType subjFillType, 273 | PolyFillType clipFillType); 274 | bool Execute(ClipType clipType, 275 | PolyTree &polytree, 276 | PolyFillType fillType = pftEvenOdd); 277 | bool Execute(ClipType clipType, 278 | PolyTree &polytree, 279 | PolyFillType subjFillType, 280 | PolyFillType clipFillType); 281 | bool ReverseSolution() { return m_ReverseOutput; }; 282 | void ReverseSolution(bool value) {m_ReverseOutput = value;}; 283 | bool StrictlySimple() {return m_StrictSimple;}; 284 | void StrictlySimple(bool value) {m_StrictSimple = value;}; 285 | //set the callback function for z value filling on intersections (otherwise Z is 0) 286 | #ifdef use_xyz 287 | void ZFillFunction(ZFillCallback zFillFunc); 288 | #endif 289 | protected: 290 | virtual bool ExecuteInternal(); 291 | private: 292 | JoinList m_Joins; 293 | JoinList m_GhostJoins; 294 | IntersectList m_IntersectList; 295 | ClipType m_ClipType; 296 | typedef std::list MaximaList; 297 | MaximaList m_Maxima; 298 | TEdge *m_SortedEdges; 299 | bool m_ExecuteLocked; 300 | PolyFillType m_ClipFillType; 301 | PolyFillType m_SubjFillType; 302 | bool m_ReverseOutput; 303 | bool m_UsingPolyTree; 304 | bool m_StrictSimple; 305 | #ifdef use_xyz 306 | ZFillCallback m_ZFill; //custom callback 307 | #endif 308 | void SetWindingCount(TEdge& edge); 309 | bool IsEvenOddFillType(const TEdge& edge) const; 310 | bool IsEvenOddAltFillType(const TEdge& edge) const; 311 | void InsertLocalMinimaIntoAEL(const cInt botY); 312 | void InsertEdgeIntoAEL(TEdge *edge, TEdge* startEdge); 313 | void AddEdgeToSEL(TEdge *edge); 314 | bool PopEdgeFromSEL(TEdge *&edge); 315 | void CopyAELToSEL(); 316 | void DeleteFromSEL(TEdge *e); 317 | void SwapPositionsInSEL(TEdge *edge1, TEdge *edge2); 318 | bool IsContributing(const TEdge& edge) const; 319 | bool IsTopHorz(const cInt XPos); 320 | void DoMaxima(TEdge *e); 321 | void ProcessHorizontals(); 322 | void ProcessHorizontal(TEdge *horzEdge); 323 | void AddLocalMaxPoly(TEdge *e1, TEdge *e2, const IntPoint &pt); 324 | OutPt* AddLocalMinPoly(TEdge *e1, TEdge *e2, const IntPoint &pt); 325 | OutRec* GetOutRec(int idx); 326 | void AppendPolygon(TEdge *e1, TEdge *e2); 327 | void IntersectEdges(TEdge *e1, TEdge *e2, IntPoint &pt); 328 | OutPt* AddOutPt(TEdge *e, const IntPoint &pt); 329 | OutPt* GetLastOutPt(TEdge *e); 330 | bool ProcessIntersections(const cInt topY); 331 | void BuildIntersectList(const cInt topY); 332 | void ProcessIntersectList(); 333 | void ProcessEdgesAtTopOfScanbeam(const cInt topY); 334 | void BuildResult(Paths& polys); 335 | void BuildResult2(PolyTree& polytree); 336 | void SetHoleState(TEdge *e, OutRec *outrec); 337 | void DisposeIntersectNodes(); 338 | bool FixupIntersectionOrder(); 339 | void FixupOutPolygon(OutRec &outrec); 340 | void FixupOutPolyline(OutRec &outrec); 341 | bool IsHole(TEdge *e); 342 | bool FindOwnerFromSplitRecs(OutRec &outRec, OutRec *&currOrfl); 343 | void FixHoleLinkage(OutRec &outrec); 344 | void AddJoin(OutPt *op1, OutPt *op2, const IntPoint offPt); 345 | void ClearJoins(); 346 | void ClearGhostJoins(); 347 | void AddGhostJoin(OutPt *op, const IntPoint offPt); 348 | bool JoinPoints(Join *j, OutRec* outRec1, OutRec* outRec2); 349 | void JoinCommonEdges(); 350 | void DoSimplePolygons(); 351 | void FixupFirstLefts1(OutRec* OldOutRec, OutRec* NewOutRec); 352 | void FixupFirstLefts2(OutRec* InnerOutRec, OutRec* OuterOutRec); 353 | void FixupFirstLefts3(OutRec* OldOutRec, OutRec* NewOutRec); 354 | #ifdef use_xyz 355 | void SetZ(IntPoint& pt, TEdge& e1, TEdge& e2); 356 | #endif 357 | }; 358 | //------------------------------------------------------------------------------ 359 | 360 | class ClipperOffset 361 | { 362 | public: 363 | ClipperOffset(double miterLimit = 2.0, double roundPrecision = 0.25); 364 | ~ClipperOffset(); 365 | void AddPath(const Path& path, JoinType joinType, EndType endType); 366 | void AddPaths(const Paths& paths, JoinType joinType, EndType endType); 367 | void Execute(Paths& solution, double delta); 368 | void Execute(PolyTree& solution, double delta); 369 | void Clear(); 370 | double MiterLimit; 371 | double ArcTolerance; 372 | private: 373 | Paths m_destPolys; 374 | Path m_srcPoly; 375 | Path m_destPoly; 376 | std::vector m_normals; 377 | double m_delta, m_sinA, m_sin, m_cos; 378 | double m_miterLim, m_StepsPerRad; 379 | IntPoint m_lowest; 380 | PolyNode m_polyNodes; 381 | 382 | void FixOrientations(); 383 | void DoOffset(double delta); 384 | void OffsetPoint(int j, int& k, JoinType jointype); 385 | void DoSquare(int j, int k); 386 | void DoMiter(int j, int k, double r); 387 | void DoRound(int j, int k); 388 | }; 389 | //------------------------------------------------------------------------------ 390 | 391 | class clipperException : public std::exception 392 | { 393 | public: 394 | clipperException(const char* description): m_descr(description) {} 395 | virtual ~clipperException() throw() {} 396 | virtual const char* what() const throw() {return m_descr.c_str();} 397 | private: 398 | std::string m_descr; 399 | }; 400 | //------------------------------------------------------------------------------ 401 | 402 | } //ClipperLib namespace 403 | 404 | #endif //clipper_hpp 405 | 406 | 407 | -------------------------------------------------------------------------------- /include/getopt.h: -------------------------------------------------------------------------------- 1 | /* 2 | * getopt - POSIX like getopt for Windows console Application 3 | * 4 | * win-c - Windows Console Library 5 | * Copyright (c) 2015 Koji Takami 6 | * Released under the MIT license 7 | * https://github.com/takamin/win-c/blob/master/LICENSE 8 | */ 9 | #ifndef _GETOPT_H_ 10 | #define _GETOPT_H_ 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif // __cplusplus 15 | 16 | int getopt(int argc, char *const argv[], 17 | const char *optstring); 18 | 19 | extern char *optarg; 20 | extern int optind, opterr, optopt; 21 | 22 | #define no_argument 0 23 | #define required_argument 1 24 | #define optional_argument 2 25 | 26 | struct option { 27 | const char *name; 28 | int has_arg; 29 | int *flag; 30 | int val; 31 | }; 32 | 33 | int getopt_long(int argc, char *const argv[], 34 | const char *optstring, 35 | const struct option *longopts, int *longindex); 36 | /**************************************************************************** 37 | int getopt_long_only(int argc, char* const argv[], 38 | const char* optstring, 39 | const struct option* longopts, int* longindex); 40 | ****************************************************************************/ 41 | #ifdef __cplusplus 42 | } 43 | #endif // __cplusplus 44 | #endif // _GETOPT_H_ 45 | -------------------------------------------------------------------------------- /include/main.h: -------------------------------------------------------------------------------- 1 | #ifndef __MAIN_H__ 2 | #define __MAIN_H__ 3 | 4 | #include "getopt.h" 5 | 6 | static const struct option long_options[] = { 7 | {"models", required_argument, NULL, 'd'}, 8 | {"det", required_argument, NULL, '1'}, 9 | {"cls", required_argument, NULL, '2'}, 10 | {"rec", required_argument, NULL, '3'}, 11 | {"keys", required_argument, NULL, '4'}, 12 | {"image", required_argument, NULL, 'i'}, 13 | {"numThread", required_argument, NULL, 't'}, 14 | {"padding", required_argument, NULL, 'p'}, 15 | {"maxSideLen", required_argument, NULL, 's'}, 16 | {"boxScoreThresh", required_argument, NULL, 'b'}, 17 | {"boxThresh", required_argument, NULL, 'o'}, 18 | {"unClipRatio", required_argument, NULL, 'u'}, 19 | {"doAngle", required_argument, NULL, 'a'}, 20 | {"mostAngle", required_argument, NULL, 'A'}, 21 | {"version", no_argument, NULL, 'v'}, 22 | {"help", no_argument, NULL, 'h'}, 23 | {"loopCount", required_argument, NULL, 'l'}, 24 | {"GPU", required_argument, NULL, 'G'}, 25 | {NULL, no_argument, NULL, 0} 26 | }; 27 | 28 | const char *usageMsg = "(-d --models) (-1 --det) (-2 --cls) (-3 --rec) (-4 --keys) (-i --image)\n"\ 29 | "[-t --numThread] [-p --padding] [-s --maxSideLen]\n" \ 30 | "[-b --boxScoreThresh] [-o --boxThresh] [-u --unClipRatio]\n" \ 31 | "[-a --noAngle] [-A --mostAngle] [-G --GPU]\n\n"; 32 | 33 | const char *requiredMsg = "-d --models: models directory.\n" \ 34 | "-1 --det: model file name of det.\n" \ 35 | "-2 --cls: model file name of cls.\n" \ 36 | "-3 --rec: model file name of rec.\n" \ 37 | "-4 --keys: keys file name.\n" \ 38 | "-i --image: path of target image.\n\n"; 39 | 40 | const char *optionalMsg = "-t --numThread: value of numThread(int), default: 4\n" \ 41 | "-p --padding: value of padding(int), default: 50\n" \ 42 | "-s --maxSideLen: Long side of picture for resize(int), default: 1024\n" \ 43 | "-b --boxScoreThresh: value of boxScoreThresh(float), default: 0.5\n" \ 44 | "-o --boxThresh: value of boxThresh(float), default: 0.3\n" \ 45 | "-u --unClipRatio: value of unClipRatio(float), default: 1.6\n" \ 46 | "-a --doAngle: Enable(1)/Disable(0) Angle Net, default: Enable\n" \ 47 | "-A --mostAngle: Enable(1)/Disable(0) Most Possible AngleIndex, default: Enable\n\n" \ 48 | "-G --GPU: Disable(-1)/GPU0(0)/GPU1(1)/... Use Vulkan GPU accelerate, default: Disable(-1)\n\n"; 49 | 50 | const char *otherMsg = "-v --version: show version\n" \ 51 | "-h --help: print this help\n\n"; 52 | 53 | const char *example1Msg = "Example1: %s --models models --det det.onnx --cls cls.onnx --rec rec.onnx --keys keys.txt --image 1.jpg --GPU 0\n"; 54 | const char *example2Msg = "Example2: %s -d models -1 det.onnx -2 cls.onnx -3 rec.onnx -4 keys.txt -i 1.jpg -t 4 -p 50 -s 0 -b 0.6 -o 0.3 -u 2.0 -a 1 -A 1 -G 0\n"; 55 | 56 | #endif //__MAIN_H__ 57 | -------------------------------------------------------------------------------- /include/version.h: -------------------------------------------------------------------------------- 1 | #ifndef __OCR_VERSION_H__ 2 | #define __OCR_VERSION_H__ 3 | 4 | #define VERSION "1.2.3" 5 | 6 | #endif //__OCR_VERSION_H__ 7 | -------------------------------------------------------------------------------- /onnxruntime-DirectML/OnnxRuntimeWrapper.cmake: -------------------------------------------------------------------------------- 1 | if (APPLE) 2 | message(FATAL_ERROR "macOS 不支持DirectML") 3 | elseif (WIN32) 4 | if (CMAKE_CL_64) 5 | message("配置WINDOWS OnnxRuntime_DirectML x64 路径: ${CMAKE_CURRENT_LIST_DIR}/windows-x64") 6 | set(OnnxRuntime_DIR "${CMAKE_CURRENT_LIST_DIR}/windows-x64") 7 | else () 8 | message("配置WINDOWS OnnxRuntime_DirectML x86 路径: ${CMAKE_CURRENT_LIST_DIR}/windows-x86") 9 | set(OnnxRuntime_DIR "${CMAKE_CURRENT_LIST_DIR}/windows-x86") 10 | endif () 11 | elseif (UNIX) 12 | message(FATAL_ERROR "Linux 不支持DirectML") 13 | endif () 14 | 15 | -------------------------------------------------------------------------------- /onnxruntime-DirectML/windows-x64/OnnxRuntimeConfig.cmake: -------------------------------------------------------------------------------- 1 | set(OnnxRuntime_INCLUDE_DIRS "${CMAKE_CURRENT_LIST_DIR}/include" "${CMAKE_CURRENT_LIST_DIR}/include/onnxruntime/core/session") 2 | include_directories(${OnnxRuntime_INCLUDE_DIRS}) 3 | link_directories(${CMAKE_CURRENT_LIST_DIR}/lib) 4 | set(OnnxRuntime_LIBS onnxruntime.lib) 5 | -------------------------------------------------------------------------------- /onnxruntime-gpu/OnnxRuntimeWrapper.cmake: -------------------------------------------------------------------------------- 1 | if (APPLE) 2 | message("配置macOS OnnxRuntime 路径: ${CMAKE_CURRENT_LIST_DIR}/macos") 3 | set(OnnxRuntime_DIR "${CMAKE_CURRENT_LIST_DIR}/macos") 4 | elseif (WIN32) 5 | if (CMAKE_CL_64) 6 | message("配置WINDOWS OnnxRuntime x64 路径: ${CMAKE_CURRENT_LIST_DIR}/windows-x64") 7 | set(OnnxRuntime_DIR "${CMAKE_CURRENT_LIST_DIR}/windows-x64") 8 | else () 9 | message("配置WINDOWS OnnxRuntime x86 路径: ${CMAKE_CURRENT_LIST_DIR}/windows-x86") 10 | set(OnnxRuntime_DIR "${CMAKE_CURRENT_LIST_DIR}/windows-x86") 11 | endif () 12 | elseif (UNIX) 13 | message("配置Linux OnnxRuntime 路径: ${CMAKE_CURRENT_LIST_DIR}/linux") 14 | set(OnnxRuntime_DIR "${CMAKE_CURRENT_LIST_DIR}/linux") 15 | endif () 16 | 17 | -------------------------------------------------------------------------------- /onnxruntime-gpu/README.md: -------------------------------------------------------------------------------- 1 | # GPU库整合说明 2 | 3 | ### onnxruntime-gpu(下载) 4 | 5 | 1. [下载地址](https://github.com/microsoft/onnxruntime/releases) 6 | 7 | * 仅支持Linux和Windows,仅支持x64 8 | * Windows包名: onnxruntime-win-x64-gpu-版本号.zip 9 | * Linux包名: onnxruntime-linux-x64-gpu-版本号.tgz 10 | * Windows平台:把压缩包内的lib文件夹解压到windows-x64文件夹里 11 | * Linux平台:把压缩包内的lib文件夹解压到linux文件夹里 12 | * 创建include/onnxruntime/core/session,把压缩包内的所有.h文件解压到session文件夹里 13 | * 目录结构如下 14 | 15 | ``` 16 | onnxruntime-gpu 17 | ├── linux 18 | │ ├── include 19 | │ │ └── onnxruntime 20 | │ │ └── core 21 | │ │ └── session 22 | │ │ ├── cpu_provider_factory.h 23 | │ │ ├── onnxruntime_c_api.h 24 | │ │ ├── onnxruntime_cxx_api.h 25 | │ │ ├── onnxruntime_cxx_inline.h 26 | │ │ ├── onnxruntime_run_options_config_keys.h 27 | │ │ ├── onnxruntime_session_options_config_keys.h 28 | │ │ ├── provider_options.h 29 | │ │ └── tensorrt_provider_factory.h 30 | │ ├── lib 31 | │ │ ├── libonnxruntime_providers_cuda.so 32 | │ │ ├── libonnxruntime_providers_shared.so 33 | │ │ ├── libonnxruntime_providers_tensorrt.so 34 | │ │ ├── libonnxruntime.so -> libonnxruntime.so.1.12.1 35 | │ │ └── libonnxruntime.so.1.12.1 36 | │ └── OnnxRuntimeConfig.cmake 37 | └── windows-x64 38 | ├── include 39 | │ └── onnxruntime 40 | │ └── core 41 | │ └── session 42 | │ ├── cpu_provider_factory.h 43 | │ ├── onnxruntime_c_api.h 44 | │ ├── onnxruntime_cxx_api.h 45 | │ ├── onnxruntime_cxx_inline.h 46 | │ ├── onnxruntime_run_options_config_keys.h 47 | │ ├── onnxruntime_session_options_config_keys.h 48 | │ ├── provider_options.h 49 | │ └── tensorrt_provider_factory.h 50 | ├── lib 51 | │ ├── onnxruntime.dll 52 | │ ├── onnxruntime.lib 53 | │ ├── onnxruntime.pdb 54 | │ ├── onnxruntime_providers_cuda.dll 55 | │ ├── onnxruntime_providers_cuda.lib 56 | │ ├── onnxruntime_providers_cuda.pdb 57 | │ ├── onnxruntime_providers_shared.dll 58 | │ ├── onnxruntime_providers_shared.lib 59 | │ ├── onnxruntime_providers_shared.pdb 60 | │ ├── onnxruntime_providers_tensorrt.dll 61 | │ ├── onnxruntime_providers_tensorrt.lib 62 | │ └── onnxruntime_providers_tensorrt.pdb 63 | └── OnnxRuntimeConfig.cmake 64 | 65 | ``` 66 | 67 | ### cuDNN安装指南 68 | 69 | * 参考官方安装指南 https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html 70 | * windows系统还得下载 zlibwapidll [64位](http://www.winimage.com/zLibDll/zlib123dllx64.zip) 71 | * 根据onnxruntime官方文档https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html 72 | * onnxruntime v1.12,需要CUDA 11.4和cuDNN 8.2.4 (Linux) 8.2.2.26 (Windows) 73 | 74 | ### cuDNN下载 75 | 76 | * [cuDNN下载地址](https://developer.nvidia.com/rdp/cudnn-archive) 77 | * windows只需要把dll解压,跟编译好的exe放在一起即可 78 | * Linux需要把cudnn解压缩到一个文件夹(例如/opt/cudnn),然后把路径加到LD_LIBRARY_PATH 79 | * 安装的cuda也必须把路径(例如/usr/local/cuda-11.4/lib64)加到LD_LIBRARY_PATH 80 | ```export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/cudnn:/usr/local/cuda-11.4/lib64``` 81 | * 或者添加上面的路径到/etc/ld.so.conf,并以root权限运行ldconfig 82 | 83 | ### CUDA下载 84 | 85 | * [CUDA下载地址](https://developer.nvidia.com/downloads) 86 | * windows捷径 https://developer.download.nvidia.com/compute/cuda/11.4.0/local_installers/cuda_11.4.0_471.11_win10.exe 87 | * Linux捷径 https://developer.download.nvidia.com/compute/cuda/11.4.0/local_installers/cuda_11.4.0_470.42.01_linux.run 88 | * 可以自定义安装,只需要选中CUDA下的Development和Runtime,其它可以不需勾选 -------------------------------------------------------------------------------- /onnxruntime-gpu/linux/OnnxRuntimeConfig.cmake: -------------------------------------------------------------------------------- 1 | set(OnnxRuntime_INCLUDE_DIRS "${CMAKE_CURRENT_LIST_DIR}/include") 2 | include_directories(${OnnxRuntime_INCLUDE_DIRS}) 3 | link_directories(${CMAKE_CURRENT_LIST_DIR}/lib) 4 | set(OnnxRuntime_LIBS onnxruntime) 5 | -------------------------------------------------------------------------------- /onnxruntime-gpu/windows-x64/OnnxRuntimeConfig.cmake: -------------------------------------------------------------------------------- 1 | set(OnnxRuntime_INCLUDE_DIRS "${CMAKE_CURRENT_LIST_DIR}/include") 2 | include_directories(${OnnxRuntime_INCLUDE_DIRS}) 3 | link_directories(${CMAKE_CURRENT_LIST_DIR}/lib) 4 | set(OnnxRuntime_LIBS onnxruntime) 5 | -------------------------------------------------------------------------------- /onnxruntime-static/OnnxRuntimeWrapper.cmake: -------------------------------------------------------------------------------- 1 | if (APPLE) 2 | message("配置macOS OnnxRuntime 路径: ${CMAKE_CURRENT_LIST_DIR}/macos") 3 | set(OnnxRuntime_DIR "${CMAKE_CURRENT_LIST_DIR}/macos") 4 | elseif (WIN32) 5 | if (CMAKE_CL_64) 6 | message("配置WINDOWS OnnxRuntime x64 路径: ${CMAKE_CURRENT_LIST_DIR}/windows-x64") 7 | set(OnnxRuntime_DIR "${CMAKE_CURRENT_LIST_DIR}/windows-x64") 8 | else () 9 | message("配置WINDOWS OnnxRuntime x86 路径: ${CMAKE_CURRENT_LIST_DIR}/windows-x86") 10 | set(OnnxRuntime_DIR "${CMAKE_CURRENT_LIST_DIR}/windows-x86") 11 | endif () 12 | elseif (UNIX) 13 | message("配置Linux OnnxRuntime 路径: ${CMAKE_CURRENT_LIST_DIR}/linux") 14 | set(OnnxRuntime_DIR "${CMAKE_CURRENT_LIST_DIR}/linux") 15 | endif () 16 | 17 | -------------------------------------------------------------------------------- /opencv-static/OpenCVWrapperConfig.cmake: -------------------------------------------------------------------------------- 1 | if (WIN32) 2 | if (CMAKE_CL_64) 3 | message("配置WINDOWS OpenCV x64 路径: ${CMAKE_CURRENT_LIST_DIR}/windows-x64") 4 | set(OpenCV_DIR "${CMAKE_CURRENT_LIST_DIR}/windows-x64") 5 | else () 6 | message("配置WINDOWS OpenCV x86 路径: ${CMAKE_CURRENT_LIST_DIR}/windows-x86") 7 | set(OpenCV_DIR "${CMAKE_CURRENT_LIST_DIR}/windows-x86") 8 | endif () 9 | elseif (APPLE) 10 | message("配置macOS OpenCV 路径: ${CMAKE_CURRENT_LIST_DIR}/macos/lib/cmake/opencv4") 11 | set(OpenCV_DIR "${CMAKE_CURRENT_LIST_DIR}/macos/lib/cmake/opencv4") 12 | elseif (UNIX) 13 | message("配置Linux OpenCV 路径: ${CMAKE_CURRENT_LIST_DIR}/linux/lib/cmake/opencv4") 14 | set(OpenCV_DIR "${CMAKE_CURRENT_LIST_DIR}/linux/lib/cmake/opencv4") 15 | endif () 16 | -------------------------------------------------------------------------------- /others/README-bin.txt: -------------------------------------------------------------------------------- 1 | ### 测试说明 2 | 3 | 1. 根据系统下载对应的程序包并解压. 4 | 2. 终端运行run-test.sh或命令行运行run-test.bat,查看识别结果. 5 | 3. 终端运行run-benchmark.sh或命令行运行run-benchmark.bat,查看识别过程平均耗时. 6 | 7 | ### Windows7执行错误|中文乱码 8 | 9 | 1. cmd窗口左上角-属性 10 | 2. 字体选项卡-选择除了“点阵字体”以外的TrueType字体,例如:Lucida Console、宋体 11 | 3. 重新执行bat 12 | 13 | ### 输入参数说明 14 | 15 | * 请参考main.h中的命令行参数说明。 16 | * 每个参数有一个短参数名和一个长参数名,用短的或长的均可。 17 | 18 | 1. ```-d或--models```:模型所在文件夹路径,可以相对路径也可以绝对路径。 19 | 2. ```-1或--det```:det模型文件名(含扩展名) 20 | 3. ```-2或--cls```:cls模型文件名(含扩展名) 21 | 4. ```-3或--rec```:rec模型文件名(含扩展名) 22 | 5. ```-4或--keys```:keys.txt文件名(含扩展名) 23 | 6. ```-i或--image```:目标图片路径,可以相对路径也可以绝对路径。 24 | 7. ```-t或--numThread```:线程数量。 25 | 8. ```-p或--padding```:图像预处理,在图片外周添加白边,用于提升识别率,文字框没有正确框住所有文字时,增加此值。 26 | 9. ```-s或--maxSideLen``` 27 | :按图片最长边的长度,此值为0代表不缩放,例:1024,如果图片长边大于1024则把图像整体缩小到1024再进行图像分割计算,如果图片长边小于1024则不缩放,如果图片长边小于32,则缩放到32。 28 | 10. ```-b或--boxScoreThresh```:文字框置信度门限,文字框没有正确框住所有文字时,减小此值。 29 | 11. ```-o或--boxThresh```:请自行试验。 30 | 12. ```-u或--unClipRatio```:单个文字框大小倍率,越大时单个文字框越大。此项与图片的大小相关,越大的图片此值应该越大。 31 | 13. ```-a或--doAngle```:启用(1)/禁用(0) 文字方向检测,只有图片倒置的情况下(旋转90~270度的图片),才需要启用文字方向检测。 32 | 14. ```-A或--mostAngle```:启用(1)/禁用(0) 角度投票(整张图片以最大可能文字方向来识别),当禁用文字方向检测时,此项也不起作用。 33 | 15. ```-h或--help```:打印命令行帮助。 34 | 35 | -------------------------------------------------------------------------------- /others/README-clib.txt: -------------------------------------------------------------------------------- 1 | ### 说明 2 | 请参考此仓库 3 | https://github.com/RapidAI/RapidOcrOnnxLibTest 4 | 5 | -------------------------------------------------------------------------------- /others/README-jni.txt: -------------------------------------------------------------------------------- 1 | ### 说明 2 | 请参考此仓库 3 | https://github.com/RapidAI/RapidOcrOnnxJvm -------------------------------------------------------------------------------- /run-benchmark.bat: -------------------------------------------------------------------------------- 1 | chcp 65001 2 | :: Set Param 3 | @ECHO OFF 4 | @SETLOCAL 5 | echo "Setting the Number of Threads=%NUMBER_OF_PROCESSORS% Using an OpenMP Environment Variable" 6 | set OMP_NUM_THREADS=%NUMBER_OF_PROCESSORS% 7 | 8 | :MainExec 9 | echo "请输入测试选项并回车: 1)CPU-x64, 2)CPU-x86, 3)CUDA-x64" 10 | set /p flag= 11 | if %flag% == 1 (call :PrepareCpuX64)^ 12 | else if %flag% == 2 (call :PrepareCpuX86)^ 13 | else if %flag% == 3 (call :PrepareCudaX64)^ 14 | else (echo 输入错误!Input Error!) 15 | 16 | echo "请输入循环次数:" 17 | set /p LOOP_COUNT= 18 | 19 | SET TARGET_IMG=images/1.jpg 20 | if not exist %TARGET_IMG% ( 21 | echo "找不到待识别的目标图片:%TARGET_IMG%,请打开本文件并编辑TARGET_IMG" 22 | PAUSE 23 | exit 24 | ) 25 | 26 | if exist %EXE_PATH%\install\bin ( 27 | SET EXE_PATH=%EXE_PATH%\install\bin 28 | ) 29 | 30 | %EXE_PATH%\benchmark.exe --version 31 | %EXE_PATH%\benchmark.exe --models models ^ 32 | --det ch_PP-OCRv3_det_infer.onnx ^ 33 | --cls ch_ppocr_mobile_v2.0_cls_infer.onnx ^ 34 | --rec ch_PP-OCRv3_rec_infer.onnx ^ 35 | --keys ppocr_keys_v1.txt ^ 36 | --image %TARGET_IMG% ^ 37 | --numThread %NUMBER_OF_PROCESSORS% ^ 38 | --padding 50 ^ 39 | --maxSideLen 1024 ^ 40 | --boxScoreThresh 0.5 ^ 41 | --boxThresh 0.3 ^ 42 | --unClipRatio 1.6 ^ 43 | --doAngle 1 ^ 44 | --mostAngle 1 ^ 45 | --GPU %GPU_INDEX% ^ 46 | --loopCount %LOOP_COUNT% 47 | 48 | popd 49 | echo. 50 | GOTO:MainExec 51 | 52 | :PrepareCpuX64 53 | set EXE_PATH=win-BIN-CPU-x64 54 | set GPU_INDEX=-1 55 | GOTO:EOF 56 | 57 | :PrepareCpuX86 58 | set EXE_PATH=win-BIN-CPU-Win32 59 | set GPU_INDEX=-1 60 | GOTO:EOF 61 | 62 | :PrepareCudaX64 63 | set EXE_PATH=win-BIN-CUDA-x64 64 | set GPU_INDEX=0 65 | GOTO:EOF 66 | 67 | @ENDLOCAL 68 | -------------------------------------------------------------------------------- /run-benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | function PrepareVar(){ 4 | echo "onnxruntime: 1)CPU(默认), 2)GPU(cuda)" 5 | read -p "" ONNX_TYPE 6 | if [ $ONNX_TYPE == 1 ]; then 7 | ONNX_TYPE="CPU" 8 | GPU_INDEX=-1 9 | elif [ $ONNX_TYPE == 2 ]; then 10 | ONNX_TYPE="CUDA" 11 | GPU_INDEX=0 12 | else 13 | echo -e "输入错误!Input Error!" 14 | fi 15 | EXE_PATH=${sysOS}-${ONNX_TYPE}-BIN 16 | } 17 | 18 | sysOS=`uname -s` 19 | NUM_THREADS=1 20 | if [ $sysOS == "Darwin" ];then 21 | #echo "I'm MacOS" 22 | NUM_THREADS=$(sysctl -n hw.ncpu) 23 | elif [ $sysOS == "Linux" ];then 24 | #echo "I'm Linux" 25 | NUM_THREADS=$(grep ^processor /proc/cpuinfo | wc -l) 26 | else 27 | echo "Other OS: $sysOS" 28 | fi 29 | 30 | echo "Setting the Number of Threads=$NUM_THREADS Using an OpenMP Environment Variable" 31 | set OMP_NUM_THREADS=$NUM_THREADS 32 | 33 | PrepareVar 34 | 35 | echo "请输入循环次数" 36 | read -p "" LOOP_COUNT 37 | 38 | TARGET_IMG=images/1.jpg 39 | if [ ! -f "$TARGET_IMG" ]; then 40 | echo "找不到待识别的目标图片:${TARGET_IMG},请打开本文件并编辑TARGET_IMG" 41 | exit 42 | fi 43 | 44 | ./${EXE_PATH}/benchmark --version 45 | ./${EXE_PATH}/benchmark --models models \ 46 | --det ch_PP-OCRv3_det_infer.onnx \ 47 | --cls ch_ppocr_mobile_v2.0_cls_infer.onnx \ 48 | --rec ch_PP-OCRv3_rec_infer.onnx \ 49 | --keys ppocr_keys_v1.txt \ 50 | --image $TARGET_IMG \ 51 | --numThread $NUM_THREADS \ 52 | --padding 50 \ 53 | --maxSideLen 1024 \ 54 | --boxScoreThresh 0.5 \ 55 | --boxThresh 0.3 \ 56 | --unClipRatio 1.6 \ 57 | --doAngle 1 \ 58 | --mostAngle 1 \ 59 | --GPU $GPU_INDEX \ 60 | --loopCount $LOOP_COUNT -------------------------------------------------------------------------------- /run-test.bat: -------------------------------------------------------------------------------- 1 | chcp 65001 2 | :: Set Param 3 | @ECHO OFF 4 | @SETLOCAL 5 | echo "Setting the Number of Threads=%NUMBER_OF_PROCESSORS% Using an OpenMP Environment Variable" 6 | set OMP_NUM_THREADS=%NUMBER_OF_PROCESSORS% 7 | 8 | :MainExec 9 | echo "请输入测试选项并回车: 1)CPU-x64, 2)CPU-x86, 3)CUDA-x64" 10 | set /p flag= 11 | if %flag% == 1 (call :PrepareCpuX64)^ 12 | else if %flag% == 2 (call :PrepareCpuX86)^ 13 | else if %flag% == 3 (call :PrepareCudaX64)^ 14 | else (echo 输入错误!Input Error!) 15 | 16 | SET TARGET_IMG=images/1.jpg 17 | if not exist %TARGET_IMG% ( 18 | echo "找不到待识别的目标图片:%TARGET_IMG%,请打开本文件并编辑TARGET_IMG" 19 | PAUSE 20 | exit 21 | ) 22 | 23 | if exist %EXE_PATH%\install\bin ( 24 | SET EXE_PATH=%EXE_PATH%\install\bin 25 | ) 26 | 27 | %EXE_PATH%\RapidOcrOnnx.exe --version 28 | %EXE_PATH%\RapidOcrOnnx.exe --models models ^ 29 | --det ch_PP-OCRv3_det_infer.onnx ^ 30 | --cls ch_ppocr_mobile_v2.0_cls_infer.onnx ^ 31 | --rec ch_PP-OCRv3_rec_infer.onnx ^ 32 | --keys ppocr_keys_v1.txt ^ 33 | --image %TARGET_IMG% ^ 34 | --numThread %NUMBER_OF_PROCESSORS% ^ 35 | --padding 50 ^ 36 | --maxSideLen 1024 ^ 37 | --boxScoreThresh 0.5 ^ 38 | --boxThresh 0.3 ^ 39 | --unClipRatio 1.6 ^ 40 | --doAngle 1 ^ 41 | --mostAngle 1 ^ 42 | --GPU %GPU_INDEX% 43 | 44 | echo. 45 | GOTO:MainExec 46 | 47 | :PrepareCpuX64 48 | set EXE_PATH=win-BIN-CPU-x64 49 | set GPU_INDEX=-1 50 | GOTO:EOF 51 | 52 | :PrepareCpuX86 53 | set EXE_PATH=win-BIN-CPU-Win32 54 | set GPU_INDEX=-1 55 | GOTO:EOF 56 | 57 | :PrepareCudaX64 58 | set EXE_PATH=win-BIN-CUDA-x64 59 | set GPU_INDEX=0 60 | GOTO:EOF 61 | 62 | @ENDLOCAL 63 | -------------------------------------------------------------------------------- /run-test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | function PrepareVar(){ 4 | echo "onnxruntime: 1)CPU(默认), 2)GPU(cuda)" 5 | read -p "" ONNX_TYPE 6 | if [ $ONNX_TYPE == 1 ]; then 7 | ONNX_TYPE="CPU" 8 | GPU_INDEX=-1 9 | elif [ $ONNX_TYPE == 2 ]; then 10 | ONNX_TYPE="CUDA" 11 | GPU_INDEX=0 12 | else 13 | echo -e "输入错误!Input Error!" 14 | fi 15 | EXE_PATH=${sysOS}-${ONNX_TYPE}-BIN 16 | } 17 | 18 | sysOS=`uname -s` 19 | NUM_THREADS=1 20 | if [ $sysOS == "Darwin" ];then 21 | #echo "I'm MacOS" 22 | NUM_THREADS=$(sysctl -n hw.ncpu) 23 | elif [ $sysOS == "Linux" ];then 24 | #echo "I'm Linux" 25 | NUM_THREADS=$(grep ^processor /proc/cpuinfo | wc -l) 26 | else 27 | echo "Other OS: $sysOS" 28 | fi 29 | 30 | echo "Setting the Number of Threads=$NUM_THREADS Using an OpenMP Environment Variable" 31 | set OMP_NUM_THREADS=$NUM_THREADS 32 | 33 | PrepareVar 34 | 35 | TARGET_IMG=images/1.jpg 36 | if [ ! -f "$TARGET_IMG" ]; then 37 | echo "找不到待识别的目标图片:${TARGET_IMG},请打开本文件并编辑TARGET_IMG" 38 | exit 39 | fi 40 | 41 | ##### run test on MacOS or Linux 42 | ./${EXE_PATH}/RapidOcrOnnx --version 43 | ./${EXE_PATH}/RapidOcrOnnx --models models \ 44 | --det ch_PP-OCRv3_det_infer.onnx \ 45 | --cls ch_ppocr_mobile_v2.0_cls_infer.onnx \ 46 | --rec ch_PP-OCRv3_rec_infer.onnx \ 47 | --keys ppocr_keys_v1.txt \ 48 | --image $TARGET_IMG \ 49 | --numThread $NUM_THREADS \ 50 | --padding 50 \ 51 | --maxSideLen 1024 \ 52 | --boxScoreThresh 0.5 \ 53 | --boxThresh 0.3 \ 54 | --unClipRatio 1.6 \ 55 | --doAngle 1 \ 56 | --mostAngle 1 \ 57 | --GPU $GPU_INDEX -------------------------------------------------------------------------------- /src/AngleNet.cpp: -------------------------------------------------------------------------------- 1 | #include "AngleNet.h" 2 | #include "OcrUtils.h" 3 | #include 4 | 5 | #ifdef __DIRECTML__ 6 | #include 7 | #endif 8 | 9 | void AngleNet::setGpuIndex(int gpuIndex) { 10 | #ifdef __CUDA__ 11 | if (gpuIndex >= 0) { 12 | OrtCUDAProviderOptions cuda_options; 13 | cuda_options.device_id = gpuIndex; 14 | cuda_options.arena_extend_strategy = 0; 15 | cuda_options.gpu_mem_limit = 2ULL * 1024 * 1024 * 1024; 16 | cuda_options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchDefault; 17 | cuda_options.do_copy_in_default_stream = 1; 18 | 19 | sessionOptions.AppendExecutionProvider_CUDA(cuda_options); 20 | printf("cls try to use GPU%d\n", gpuIndex); 21 | } 22 | else { 23 | printf("cls use CPU\n"); 24 | } 25 | #endif 26 | 27 | #ifdef __DIRECTML__ 28 | if (gpuIndex >= 0) { 29 | OrtSessionOptionsAppendExecutionProvider_DML(sessionOptions, gpuIndex); 30 | printf("cls try to use GPU%d\n", gpuIndex); 31 | } 32 | else { 33 | printf("cls use CPU\n"); 34 | } 35 | #endif 36 | } 37 | 38 | AngleNet::~AngleNet() { 39 | delete session; 40 | inputNamesPtr.clear(); 41 | outputNamesPtr.clear(); 42 | } 43 | 44 | void AngleNet::setNumThread(int numOfThread) { 45 | numThread = numOfThread; 46 | //===session options=== 47 | // Sets the number of threads used to parallelize the execution within nodes 48 | // A value of 0 means ORT will pick a default 49 | //sessionOptions.SetIntraOpNumThreads(numThread); 50 | //set OMP_NUM_THREADS=16 51 | 52 | // Sets the number of threads used to parallelize the execution of the graph (across nodes) 53 | // If sequential execution is enabled this value is ignored 54 | // A value of 0 means ORT will pick a default 55 | sessionOptions.SetInterOpNumThreads(numThread); 56 | sessionOptions.SetIntraOpNumThreads(numThread); 57 | 58 | // Sets graph optimization level 59 | // ORT_DISABLE_ALL -> To disable all optimizations 60 | // ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node removals) 61 | // ORT_ENABLE_EXTENDED -> To enable extended optimizations (Includes level 1 + more complex optimizations like node fusions) 62 | // ORT_ENABLE_ALL -> To Enable All possible opitmizations 63 | sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); 64 | } 65 | 66 | void AngleNet::initModel(const std::string &pathStr) { 67 | #ifdef _WIN32 68 | std::wstring clsPath = strToWstr(pathStr); 69 | session = new Ort::Session(env, clsPath.c_str(), sessionOptions); 70 | #else 71 | session = new Ort::Session(env, pathStr.c_str(), sessionOptions); 72 | #endif 73 | inputNamesPtr = getInputNames(session); 74 | outputNamesPtr = getOutputNames(session); 75 | } 76 | 77 | Angle scoreToAngle(const std::vector &outputData) { 78 | int maxIndex = 0; 79 | float maxScore = 0; 80 | for (size_t i = 0; i < outputData.size(); i++) { 81 | if (outputData[i] > maxScore) { 82 | maxScore = outputData[i]; 83 | maxIndex = i; 84 | } 85 | } 86 | return {maxIndex, maxScore}; 87 | } 88 | 89 | Angle AngleNet::getAngle(cv::Mat &src) { 90 | std::vector inputTensorValues = substractMeanNormalize(src, meanValues, normValues); 91 | std::array inputShape{1, src.channels(), src.rows, src.cols}; 92 | auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); 93 | Ort::Value inputTensor = Ort::Value::CreateTensor(memoryInfo, inputTensorValues.data(), 94 | inputTensorValues.size(), inputShape.data(), 95 | inputShape.size()); 96 | assert(inputTensor.IsTensor()); 97 | std::vector inputNames = {inputNamesPtr.data()->get()}; 98 | std::vector outputNames = {outputNamesPtr.data()->get()}; 99 | auto outputTensor = session->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensor, 100 | inputNames.size(), outputNames.data(), outputNames.size()); 101 | assert(outputTensor.size() == 1 && outputTensor.front().IsTensor()); 102 | std::vector outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape(); 103 | int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, 104 | std::multiplies()); 105 | float *floatArray = outputTensor.front().GetTensorMutableData(); 106 | std::vector outputData(floatArray, floatArray + outputCount); 107 | return scoreToAngle(outputData); 108 | } 109 | 110 | std::vector AngleNet::getAngles(std::vector &partImgs, const char *path, 111 | const char *imgName, bool doAngle, bool mostAngle) { 112 | size_t size = partImgs.size(); 113 | std::vector angles(size); 114 | if (doAngle) { 115 | for (size_t i = 0; i < size; ++i) { 116 | double startAngle = getCurrentTime(); 117 | cv::Mat angleImg; 118 | cv::resize(partImgs[i], angleImg, cv::Size(dstWidth, dstHeight)); 119 | Angle angle = getAngle(angleImg); 120 | double endAngle = getCurrentTime(); 121 | angle.time = endAngle - startAngle; 122 | 123 | angles[i] = angle; 124 | 125 | //OutPut AngleImg 126 | if (isOutputAngleImg) { 127 | std::string angleImgFile = getDebugImgFilePath(path, imgName, i, "-angle-"); 128 | saveImg(angleImg, angleImgFile.c_str()); 129 | } 130 | } 131 | } else { 132 | for (size_t i = 0; i < size; ++i) { 133 | angles[i] = Angle{-1, 0.f}; 134 | } 135 | } 136 | //Most Possible AngleIndex 137 | if (doAngle && mostAngle) { 138 | auto angleIndexes = getAngleIndexes(angles); 139 | double sum = std::accumulate(angleIndexes.begin(), angleIndexes.end(), 0.0); 140 | double halfPercent = angles.size() / 2.0f; 141 | int mostAngleIndex; 142 | if (sum < halfPercent) {//all angle set to 0 143 | mostAngleIndex = 0; 144 | } else {//all angle set to 1 145 | mostAngleIndex = 1; 146 | } 147 | //printf("Set All Angle to mostAngleIndex(%d)\n", mostAngleIndex); 148 | for (size_t i = 0; i < angles.size(); ++i) { 149 | Angle angle = angles[i]; 150 | angle.index = mostAngleIndex; 151 | angles.at(i) = angle; 152 | } 153 | } 154 | 155 | return angles; 156 | } -------------------------------------------------------------------------------- /src/CrnnNet.cpp: -------------------------------------------------------------------------------- 1 | #include "CrnnNet.h" 2 | #include "OcrUtils.h" 3 | #include 4 | #include 5 | 6 | #ifdef __DIRECTML__ 7 | #include 8 | #endif 9 | 10 | void CrnnNet::setGpuIndex(int gpuIndex) { 11 | #ifdef __CUDA__ 12 | if (gpuIndex >= 0) { 13 | OrtCUDAProviderOptions cuda_options; 14 | cuda_options.device_id = gpuIndex; 15 | cuda_options.arena_extend_strategy = 0; 16 | cuda_options.gpu_mem_limit = 2ULL * 1024 * 1024 * 1024; 17 | cuda_options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchDefault; 18 | cuda_options.do_copy_in_default_stream = 1; 19 | 20 | sessionOptions.AppendExecutionProvider_CUDA(cuda_options); 21 | printf("rec try to use GPU%d\n", gpuIndex); 22 | } 23 | else { 24 | printf("rec use CPU\n"); 25 | } 26 | #endif 27 | 28 | #ifdef __DIRECTML__ 29 | if (gpuIndex >= 0) { 30 | OrtSessionOptionsAppendExecutionProvider_DML(sessionOptions, gpuIndex); 31 | printf("rec try to use GPU%d\n", gpuIndex); 32 | } 33 | else { 34 | printf("rec use CPU\n"); 35 | } 36 | #endif 37 | } 38 | 39 | CrnnNet::~CrnnNet() { 40 | delete session; 41 | inputNamesPtr.clear(); 42 | outputNamesPtr.clear(); 43 | } 44 | 45 | void CrnnNet::setNumThread(int numOfThread) { 46 | numThread = numOfThread; 47 | //===session options=== 48 | // Sets the number of threads used to parallelize the execution within nodes 49 | // A value of 0 means ORT will pick a default 50 | //sessionOptions.SetIntraOpNumThreads(numThread); 51 | //set OMP_NUM_THREADS=16 52 | 53 | // Sets the number of threads used to parallelize the execution of the graph (across nodes) 54 | // If sequential execution is enabled this value is ignored 55 | // A value of 0 means ORT will pick a default 56 | sessionOptions.SetInterOpNumThreads(numThread); 57 | sessionOptions.SetIntraOpNumThreads(numThread); 58 | 59 | // Sets graph optimization level 60 | // ORT_DISABLE_ALL -> To disable all optimizations 61 | // ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node removals) 62 | // ORT_ENABLE_EXTENDED -> To enable extended optimizations (Includes level 1 + more complex optimizations like node fusions) 63 | // ORT_ENABLE_ALL -> To Enable All possible opitmizations 64 | sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); 65 | } 66 | 67 | void CrnnNet::initModel(const std::string &pathStr, const std::string &keysPath) { 68 | #ifdef _WIN32 69 | std::wstring crnnPath = strToWstr(pathStr); 70 | session = new Ort::Session(env, crnnPath.c_str(), sessionOptions); 71 | #else 72 | session = new Ort::Session(env, pathStr.c_str(), sessionOptions); 73 | #endif 74 | inputNamesPtr = getInputNames(session); 75 | outputNamesPtr = getOutputNames(session); 76 | 77 | //load keys 78 | std::ifstream in(keysPath.c_str()); 79 | std::string line; 80 | if (in) { 81 | while (getline(in, line)) {// line中不包括每行的换行符 82 | keys.push_back(line); 83 | } 84 | } else { 85 | printf("The keys.txt file was not found\n"); 86 | return; 87 | } 88 | keys.insert(keys.begin(), "#"); 89 | keys.emplace_back(" "); 90 | printf("total keys size(%lu)\n", keys.size()); 91 | } 92 | 93 | template 94 | inline static size_t argmax(ForwardIterator first, ForwardIterator last) { 95 | return std::distance(first, std::max_element(first, last)); 96 | } 97 | 98 | TextLine CrnnNet::scoreToTextLine(const std::vector &outputData, size_t h, size_t w) { 99 | auto keySize = keys.size(); 100 | auto dataSize = outputData.size(); 101 | std::string strRes; 102 | std::vector scores; 103 | size_t lastIndex = 0; 104 | size_t maxIndex; 105 | float maxValue; 106 | 107 | for (size_t i = 0; i < h; i++) { 108 | size_t start = i * w; 109 | size_t stop = (i + 1) * w; 110 | if (stop > dataSize - 1) { 111 | stop = (i + 1) * w - 1; 112 | } 113 | maxIndex = int(argmax(&outputData[start], &outputData[stop])); 114 | maxValue = float(*std::max_element(&outputData[start], &outputData[stop])); 115 | 116 | if (maxIndex > 0 && maxIndex < keySize && (!(i > 0 && maxIndex == lastIndex))) { 117 | scores.emplace_back(maxValue); 118 | strRes.append(keys[maxIndex]); 119 | } 120 | lastIndex = maxIndex; 121 | } 122 | return {strRes, scores}; 123 | } 124 | 125 | TextLine CrnnNet::getTextLine(const cv::Mat &src) { 126 | float scale = (float) dstHeight / (float) src.rows; 127 | int dstWidth = int((float) src.cols * scale); 128 | cv::Mat srcResize; 129 | resize(src, srcResize, cv::Size(dstWidth, dstHeight)); 130 | std::vector inputTensorValues = substractMeanNormalize(srcResize, meanValues, normValues); 131 | std::array inputShape{1, srcResize.channels(), srcResize.rows, srcResize.cols}; 132 | auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); 133 | Ort::Value inputTensor = Ort::Value::CreateTensor(memoryInfo, inputTensorValues.data(), 134 | inputTensorValues.size(), inputShape.data(), 135 | inputShape.size()); 136 | assert(inputTensor.IsTensor()); 137 | std::vector inputNames = {inputNamesPtr.data()->get()}; 138 | std::vector outputNames = {outputNamesPtr.data()->get()}; 139 | auto outputTensor = session->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensor, 140 | inputNames.size(), outputNames.data(), outputNames.size()); 141 | assert(outputTensor.size() == 1 && outputTensor.front().IsTensor()); 142 | std::vector outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape(); 143 | int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, 144 | std::multiplies()); 145 | float *floatArray = outputTensor.front().GetTensorMutableData(); 146 | std::vector outputData(floatArray, floatArray + outputCount); 147 | return scoreToTextLine(outputData, outputShape[1], outputShape[2]); 148 | } 149 | 150 | std::vector CrnnNet::getTextLines(std::vector &partImg, const char *path, const char *imgName) { 151 | int size = partImg.size(); 152 | std::vector textLines(size); 153 | for (int i = 0; i < size; ++i) { 154 | //OutPut DebugImg 155 | if (isOutputDebugImg) { 156 | std::string debugImgFile = getDebugImgFilePath(path, imgName, i, "-debug-"); 157 | saveImg(partImg[i], debugImgFile.c_str()); 158 | } 159 | 160 | //getTextLine 161 | double startCrnnTime = getCurrentTime(); 162 | TextLine textLine = getTextLine(partImg[i]); 163 | double endCrnnTime = getCurrentTime(); 164 | textLine.time = endCrnnTime - startCrnnTime; 165 | textLines[i] = textLine; 166 | } 167 | return textLines; 168 | } -------------------------------------------------------------------------------- /src/DbNet.cpp: -------------------------------------------------------------------------------- 1 | #include "DbNet.h" 2 | #include "OcrUtils.h" 3 | 4 | void DbNet::setGpuIndex(int gpuIndex) { 5 | #ifdef __CUDA__ 6 | if (gpuIndex >= 0) { 7 | OrtCUDAProviderOptions cuda_options; 8 | cuda_options.device_id = gpuIndex; 9 | cuda_options.arena_extend_strategy = 0; 10 | cuda_options.gpu_mem_limit = 2ULL * 1024 * 1024 * 1024; 11 | cuda_options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchDefault; 12 | cuda_options.do_copy_in_default_stream = 1; 13 | 14 | sessionOptions.AppendExecutionProvider_CUDA(cuda_options); 15 | printf("det try to use GPU%d\n", gpuIndex); 16 | } 17 | else { 18 | printf("det use CPU\n"); 19 | } 20 | #endif 21 | } 22 | 23 | DbNet::~DbNet() { 24 | delete session; 25 | inputNamesPtr.clear(); 26 | outputNamesPtr.clear(); 27 | } 28 | 29 | void DbNet::setNumThread(int numOfThread) { 30 | numThread = numOfThread; 31 | //===session options=== 32 | // Sets the number of threads used to parallelize the execution within nodes 33 | // A value of 0 means ORT will pick a default 34 | //sessionOptions.SetIntraOpNumThreads(numThread); 35 | //set OMP_NUM_THREADS=16 36 | 37 | // Sets the number of threads used to parallelize the execution of the graph (across nodes) 38 | // If sequential execution is enabled this value is ignored 39 | // A value of 0 means ORT will pick a default 40 | sessionOptions.SetInterOpNumThreads(numThread); 41 | sessionOptions.SetIntraOpNumThreads(numThread); 42 | 43 | // Sets graph optimization level 44 | // ORT_DISABLE_ALL -> To disable all optimizations 45 | // ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node removals) 46 | // ORT_ENABLE_EXTENDED -> To enable extended optimizations (Includes level 1 + more complex optimizations like node fusions) 47 | // ORT_ENABLE_ALL -> To Enable All possible opitmizations 48 | sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); 49 | } 50 | 51 | void DbNet::initModel(const std::string &pathStr) { 52 | #ifdef _WIN32 53 | std::wstring detPath = strToWstr(pathStr); 54 | session = new Ort::Session(env, detPath.c_str(), sessionOptions); 55 | #else 56 | session = new Ort::Session(env, pathStr.c_str(), sessionOptions); 57 | #endif 58 | inputNamesPtr = getInputNames(session); 59 | outputNamesPtr = getOutputNames(session); 60 | } 61 | 62 | std::vector findRsBoxes(const cv::Mat &predMat, const cv::Mat &dilateMat, ScaleParam &s, 63 | const float boxScoreThresh, const float unClipRatio) { 64 | const int longSideThresh = 3;//minBox 长边门限 65 | const int maxCandidates = 1000; 66 | 67 | std::vector> contours; 68 | std::vector hierarchy; 69 | 70 | cv::findContours(dilateMat, contours, hierarchy, cv::RETR_LIST, 71 | cv::CHAIN_APPROX_SIMPLE); 72 | 73 | size_t numContours = contours.size() >= maxCandidates ? maxCandidates : contours.size(); 74 | 75 | std::vector rsBoxes; 76 | 77 | for (size_t i = 0; i < numContours; i++) { 78 | if (contours[i].size() <= 2) { 79 | continue; 80 | } 81 | cv::RotatedRect minAreaRect = cv::minAreaRect(contours[i]); 82 | 83 | float longSide; 84 | std::vector minBoxes = getMinBoxes(minAreaRect, longSide); 85 | 86 | if (longSide < longSideThresh) { 87 | continue; 88 | } 89 | 90 | float boxScore = boxScoreFast(minBoxes, predMat); 91 | if (boxScore < boxScoreThresh) 92 | continue; 93 | 94 | //-----unClip----- 95 | cv::RotatedRect clipRect = unClip(minBoxes, unClipRatio); 96 | if (clipRect.size.height < 1.001 && clipRect.size.width < 1.001) { 97 | continue; 98 | } 99 | //-----unClip----- 100 | 101 | std::vector clipMinBoxes = getMinBoxes(clipRect, longSide); 102 | if (longSide < longSideThresh + 2) 103 | continue; 104 | 105 | std::vector intClipMinBoxes; 106 | 107 | for (auto &clipMinBox: clipMinBoxes) { 108 | float x = clipMinBox.x / s.ratioWidth; 109 | float y = clipMinBox.y / s.ratioHeight; 110 | int ptX = (std::min)((std::max)(int(x), 0), s.srcWidth - 1); 111 | int ptY = (std::min)((std::max)(int(y), 0), s.srcHeight - 1); 112 | cv::Point point{ptX, ptY}; 113 | intClipMinBoxes.push_back(point); 114 | } 115 | rsBoxes.push_back(TextBox{intClipMinBoxes, boxScore}); 116 | } 117 | reverse(rsBoxes.begin(), rsBoxes.end()); 118 | return rsBoxes; 119 | } 120 | 121 | std::vector 122 | DbNet::getTextBoxes(cv::Mat &src, ScaleParam &s, float boxScoreThresh, float boxThresh, float unClipRatio) { 123 | cv::Mat srcResize; 124 | resize(src, srcResize, cv::Size(s.dstWidth, s.dstHeight)); 125 | std::vector inputTensorValues = substractMeanNormalize(srcResize, meanValues, normValues); 126 | std::array inputShape{1, srcResize.channels(), srcResize.rows, srcResize.cols}; 127 | auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); 128 | Ort::Value inputTensor = Ort::Value::CreateTensor(memoryInfo, inputTensorValues.data(), 129 | inputTensorValues.size(), inputShape.data(), 130 | inputShape.size()); 131 | assert(inputTensor.IsTensor()); 132 | std::vector inputNames = {inputNamesPtr.data()->get()}; 133 | std::vector outputNames = {outputNamesPtr.data()->get()}; 134 | auto outputTensor = session->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensor, 135 | inputNames.size(), outputNames.data(), outputNames.size()); 136 | assert(outputTensor.size() == 1 && outputTensor.front().IsTensor()); 137 | std::vector outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape(); 138 | int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, 139 | std::multiplies()); 140 | float *floatArray = outputTensor.front().GetTensorMutableData(); 141 | std::vector outputData(floatArray, floatArray + outputCount); 142 | 143 | //-----Data preparation----- 144 | int outHeight = (int) outputShape[2]; 145 | int outWidth = (int) outputShape[3]; 146 | size_t area = outHeight * outWidth; 147 | 148 | std::vector predData(area, 0.0); 149 | std::vector cbufData(area, ' '); 150 | 151 | for (int i = 0; i < area; i++) { 152 | predData[i] = float(outputData[i]); 153 | cbufData[i] = (unsigned char) ((outputData[i]) * 255); 154 | } 155 | 156 | cv::Mat predMat(outHeight, outWidth, CV_32F, (float *) predData.data()); 157 | cv::Mat cBufMat(outHeight, outWidth, CV_8UC1, (unsigned char *) cbufData.data()); 158 | 159 | //-----boxThresh----- 160 | const double maxValue = 255; 161 | const double threshold = boxThresh * 255; 162 | cv::Mat thresholdMat; 163 | cv::threshold(cBufMat, thresholdMat, threshold, maxValue, cv::THRESH_BINARY); 164 | 165 | //-----dilate----- 166 | cv::Mat dilateMat; 167 | cv::Mat dilateElement = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2)); 168 | cv::dilate(thresholdMat, dilateMat, dilateElement); 169 | 170 | return findRsBoxes(predMat, dilateMat, s, boxScoreThresh, unClipRatio); 171 | } -------------------------------------------------------------------------------- /src/OcrLite.cpp: -------------------------------------------------------------------------------- 1 | #include "OcrLite.h" 2 | #include "OcrLiteImpl.h" 3 | 4 | OcrLite::OcrLite() { 5 | pImpl = new OcrLiteImpl(); 6 | } 7 | 8 | OcrLite::~OcrLite() { 9 | delete pImpl; 10 | } 11 | 12 | void OcrLite::setNumThread(int numOfThread) { 13 | pImpl->setNumThread(numOfThread); 14 | } 15 | 16 | void OcrLite::initLogger(bool isConsole, bool isPartImg, bool isResultImg) { 17 | pImpl->initLogger(isConsole, isPartImg, isResultImg); 18 | } 19 | 20 | void OcrLite::enableResultTxt(const char *path, const char *imgName) { 21 | pImpl->enableResultTxt(path, imgName); 22 | } 23 | 24 | void OcrLite::setGpuIndex(int gpuIndex) { 25 | pImpl->setGpuIndex(gpuIndex); 26 | } 27 | 28 | bool OcrLite::initModels(const std::string &detPath, const std::string &clsPath, 29 | const std::string &recPath, const std::string &keysPath) { 30 | return pImpl->initModels(detPath, clsPath, recPath, keysPath); 31 | } 32 | 33 | void OcrLite::Logger(const char *format, ...) { 34 | if (!(pImpl->isOutputConsole || pImpl->isOutputResultTxt)) return; 35 | char *buffer = (char *) malloc(8192); 36 | va_list args; 37 | va_start(args, format); 38 | vsprintf(buffer, format, args); 39 | va_end(args); 40 | if (pImpl->isOutputConsole) printf("%s", buffer); 41 | if (pImpl->isOutputResultTxt) fprintf(pImpl->resultTxt, "%s", buffer); 42 | free(buffer); 43 | } 44 | 45 | OcrResult OcrLite::detect(const char *path, const char *imgName, 46 | int padding, int maxSideLen, 47 | float boxScoreThresh, float boxThresh, float unClipRatio, bool doAngle, bool mostAngle) { 48 | return pImpl->detect(path, imgName, padding, maxSideLen, boxScoreThresh, boxThresh, unClipRatio, doAngle, mostAngle); 49 | } 50 | 51 | OcrResult OcrLite::detect(const cv::Mat &mat, 52 | int padding, int maxSideLen, 53 | float boxScoreThresh, float boxThresh, float unClipRatio, bool doAngle, bool mostAngle) { 54 | return pImpl->detect(mat, padding, maxSideLen, boxScoreThresh, boxThresh, unClipRatio, doAngle, mostAngle); 55 | } 56 | -------------------------------------------------------------------------------- /src/OcrLiteCApi.cpp: -------------------------------------------------------------------------------- 1 | #ifdef __CLIB__ 2 | 3 | #include "OcrLiteCApi.h" 4 | #include "OcrLiteImpl.h" 5 | 6 | extern "C" 7 | { 8 | typedef struct { 9 | OcrLiteImpl OcrObj; 10 | std::string strRes; 11 | } OCR_OBJ; 12 | 13 | _QM_OCR_API OCR_HANDLE 14 | OcrInit(const char *szDetModel, const char *szClsModel, const char *szRecModel, const char *szKeyPath, int nThreads) { 15 | 16 | OCR_OBJ *pOcrObj = new OCR_OBJ; 17 | if (pOcrObj) { 18 | pOcrObj->OcrObj.setNumThread(nThreads); 19 | 20 | pOcrObj->OcrObj.initModels(szDetModel, szClsModel, szRecModel, szKeyPath); 21 | 22 | return pOcrObj; 23 | } else { 24 | return nullptr; 25 | } 26 | 27 | } 28 | 29 | _QM_OCR_API OCR_BOOL 30 | OcrDetect(OCR_HANDLE handle, const char *imgPath, const char *imgName, OCR_PARAM *pParam) { 31 | 32 | OCR_OBJ *pOcrObj = (OCR_OBJ *) handle; 33 | if (!pOcrObj) 34 | return FALSE; 35 | 36 | OCR_PARAM Param = *pParam; 37 | if (Param.padding == 0) 38 | Param.padding = 50; 39 | 40 | if (Param.maxSideLen == 0) 41 | Param.maxSideLen = 1024; 42 | 43 | if (Param.boxScoreThresh == 0) 44 | Param.boxScoreThresh = 0.6; 45 | 46 | if (Param.boxThresh == 0) 47 | Param.boxThresh = 0.3f; 48 | 49 | if (Param.unClipRatio == 0) 50 | Param.unClipRatio = 2.0; 51 | 52 | if (Param.doAngle == 0) 53 | Param.doAngle = 1; 54 | 55 | if (Param.mostAngle == 0) 56 | Param.mostAngle = 1; 57 | 58 | OcrResult result = pOcrObj->OcrObj.detect(imgPath, imgName, Param.padding, Param.maxSideLen, 59 | Param.boxScoreThresh, Param.boxThresh, Param.unClipRatio, 60 | Param.doAngle != 0, Param.mostAngle != 0); 61 | if (result.strRes.length() > 0) { 62 | pOcrObj->strRes = result.strRes; 63 | return TRUE; 64 | } else 65 | return FALSE; 66 | } 67 | 68 | _QM_OCR_API OCR_BOOL 69 | OcrDetectInput(OCR_HANDLE handle, OCR_INPUT *input, OCR_PARAM *pParam, OCR_RESULT *ocrResult) { 70 | 71 | OCR_OBJ *pOcrObj = (OCR_OBJ *) handle; 72 | if (!pOcrObj) 73 | return FALSE; 74 | 75 | OCR_PARAM Param = *pParam; 76 | if (Param.padding == 0) 77 | Param.padding = 50; 78 | 79 | if (Param.maxSideLen == 0) 80 | Param.maxSideLen = 1024; 81 | 82 | if (Param.boxScoreThresh == 0) 83 | Param.boxScoreThresh = 0.6; 84 | 85 | if (Param.boxThresh == 0) 86 | Param.boxThresh = 0.3f; 87 | 88 | if (Param.unClipRatio == 0) 89 | Param.unClipRatio = 2.0; 90 | 91 | if (Param.doAngle == 0) 92 | Param.doAngle = 1; 93 | 94 | if (Param.mostAngle == 0) 95 | Param.mostAngle = 1; 96 | OcrResult result; 97 | if(input->dataLength == 0) { 98 | return FALSE; 99 | } 100 | 101 | if(input->type == 0){ 102 | if(input->channels == 0){ 103 | return FALSE; 104 | } 105 | result = pOcrObj->OcrObj.detectBitmap(input->data,input->width,input->height, input->channels, Param.padding, Param.maxSideLen, 106 | Param.boxScoreThresh, Param.boxThresh, Param.unClipRatio, 107 | Param.doAngle != 0, Param.mostAngle != 0); 108 | } 109 | 110 | if(input->type == 1){ 111 | result= pOcrObj->OcrObj.detectImageBytes(input->data,input->dataLength, input->channels >= 3 ? 0 : 1, Param.padding, Param.maxSideLen, 112 | Param.boxScoreThresh, Param.boxThresh, Param.unClipRatio, 113 | Param.doAngle != 0, Param.mostAngle != 0); 114 | } 115 | 116 | if (result.strRes.length() > 0) { 117 | ocrResult->dbNetTime = result.dbNetTime; 118 | ocrResult->detectTime = result.detectTime; 119 | ocrResult->textBlocksLength = result.textBlocks.size(); 120 | // 计算所需内存大小 121 | size_t count = result.textBlocks.size(); 122 | 123 | // 分配足够大的内存块 124 | auto *rawArray = static_cast(calloc(count, sizeof(TEXT_BLOCK))); 125 | for (size_t i = 0; i < count; i++) { 126 | TextBlock textBlock = result.textBlocks[i]; 127 | 128 | rawArray[i].boxScore = textBlock.boxScore; 129 | rawArray[i].angleIndex = textBlock.angleIndex; 130 | rawArray[i].angleScore = textBlock.angleScore; 131 | rawArray[i].angleTime = textBlock.angleTime; 132 | auto* charScore = static_cast(calloc(textBlock.charScores.size(), sizeof(float))); 133 | std::copy(textBlock.charScores.begin(), textBlock.charScores.end(), charScore); 134 | rawArray[i].charScores = charScore; 135 | rawArray[i].charScoresLength = textBlock.charScores.size(); 136 | auto * boxPoint= static_cast(calloc(textBlock.boxPoint.size(), sizeof (OCR_POINT))); 137 | for(size_t boxPointIdx = 0; boxPointIdx < textBlock.boxPoint.size(); boxPointIdx++){ 138 | boxPoint[boxPointIdx].x = textBlock.boxPoint[boxPointIdx].x; 139 | boxPoint[boxPointIdx].y = textBlock.boxPoint[boxPointIdx].y; 140 | } 141 | rawArray[i].boxPoint = boxPoint; 142 | rawArray[i].boxPointLength = textBlock.boxPoint.size(); 143 | auto* text = static_cast(calloc(textBlock.text.size(), sizeof (uint8_t))); 144 | std::copy(textBlock.text.begin(), textBlock.text.end(), text); 145 | rawArray[i].text = text; 146 | rawArray[i].textLength = textBlock.text.size() + 1; 147 | rawArray[i].crnnTime = textBlock.crnnTime; 148 | rawArray[i].blockTime = textBlock.blockTime; 149 | } 150 | ocrResult->textBlocks = rawArray; 151 | return TRUE; 152 | } else 153 | return FALSE; 154 | } 155 | 156 | _QM_OCR_API OCR_BOOL 157 | OcrFreeResult(OCR_RESULT *result) { 158 | if(result && result->textBlocksLength && result->textBlocks){ 159 | for(int i = 0; i < result->textBlocksLength; i++){ 160 | free(result->textBlocks[i].charScores); 161 | free(result->textBlocks[i].text); 162 | free(result->textBlocks[i].boxPoint); 163 | } 164 | free(result->textBlocks); 165 | return true; 166 | } 167 | return false; 168 | } 169 | 170 | _QM_OCR_API int OcrGetLen(OCR_HANDLE handle) { 171 | OCR_OBJ *pOcrObj = (OCR_OBJ *) handle; 172 | if (!pOcrObj) 173 | return 0; 174 | return pOcrObj->strRes.size() + 1; 175 | } 176 | 177 | _QM_OCR_API OCR_BOOL OcrGetResult(OCR_HANDLE handle, char *szBuf, int nLen) { 178 | OCR_OBJ *pOcrObj = (OCR_OBJ *) handle; 179 | if (!pOcrObj) 180 | return FALSE; 181 | 182 | if (nLen > pOcrObj->strRes.size()) { 183 | strncpy(szBuf, pOcrObj->strRes.c_str(), pOcrObj->strRes.size()); 184 | szBuf[pOcrObj->strRes.size() - 1] = 0; 185 | } 186 | 187 | return pOcrObj->strRes.size(); 188 | } 189 | 190 | _QM_OCR_API void OcrDestroy(OCR_HANDLE handle) { 191 | OCR_OBJ *pOcrObj = (OCR_OBJ *) handle; 192 | if (pOcrObj) 193 | delete pOcrObj; 194 | } 195 | 196 | }; 197 | #endif 198 | -------------------------------------------------------------------------------- /src/OcrLiteImpl.cpp: -------------------------------------------------------------------------------- 1 | #include "OcrLiteImpl.h" 2 | #include "OcrUtils.h" 3 | #include //windows&linux 4 | 5 | OcrLiteImpl::OcrLiteImpl() { 6 | loggerBuffer = (char *)malloc(8192); 7 | } 8 | 9 | OcrLiteImpl::~OcrLiteImpl() { 10 | if (isOutputResultTxt) { 11 | fclose(resultTxt); 12 | } 13 | free(loggerBuffer); 14 | } 15 | 16 | void OcrLiteImpl::setNumThread(int numOfThread) { 17 | dbNet.setNumThread(numOfThread); 18 | angleNet.setNumThread(numOfThread); 19 | crnnNet.setNumThread(numOfThread); 20 | } 21 | 22 | void OcrLiteImpl::initLogger(bool isConsole, bool isPartImg, bool isResultImg) { 23 | isOutputConsole = isConsole; 24 | isOutputPartImg = isPartImg; 25 | isOutputResultImg = isResultImg; 26 | } 27 | 28 | void OcrLiteImpl::enableResultTxt(const char *path, const char *imgName) { 29 | isOutputResultTxt = true; 30 | std::string resultTxtPath = getResultTxtFilePath(path, imgName); 31 | printf("resultTxtPath(%s)\n", resultTxtPath.c_str()); 32 | resultTxt = fopen(resultTxtPath.c_str(), "w"); 33 | } 34 | 35 | void OcrLiteImpl::setGpuIndex(int gpuIndex) { 36 | dbNet.setGpuIndex(gpuIndex); 37 | angleNet.setGpuIndex(gpuIndex); 38 | crnnNet.setGpuIndex(gpuIndex); 39 | } 40 | 41 | bool OcrLiteImpl::initModels(const std::string &detPath, const std::string &clsPath, 42 | const std::string &recPath, const std::string &keysPath) { 43 | Logger("=====Init Models=====\n"); 44 | Logger("--- Init DbNet ---\n"); 45 | dbNet.initModel(detPath); 46 | 47 | Logger("--- Init AngleNet ---\n"); 48 | angleNet.initModel(clsPath); 49 | 50 | Logger("--- Init CrnnNet ---\n"); 51 | crnnNet.initModel(recPath, keysPath); 52 | 53 | Logger("Init Models Success!\n"); 54 | return true; 55 | } 56 | 57 | void OcrLiteImpl::Logger(const char *format, ...) { 58 | if (!(isOutputConsole || isOutputResultTxt)) return; 59 | memset(loggerBuffer, 0, 8192); 60 | va_list args; 61 | va_start(args, format); 62 | vsprintf(loggerBuffer, format, args); 63 | va_end(args); 64 | if (isOutputConsole) printf("%s", loggerBuffer); 65 | if (isOutputResultTxt) fprintf(resultTxt, "%s", loggerBuffer); 66 | } 67 | 68 | cv::Mat makePadding(cv::Mat &src, const int padding) { 69 | if (padding <= 0) return src; 70 | cv::Scalar paddingScalar = {255, 255, 255}; 71 | cv::Mat paddingSrc; 72 | cv::copyMakeBorder(src, paddingSrc, padding, padding, padding, padding, cv::BORDER_ISOLATED, paddingScalar); 73 | return paddingSrc; 74 | } 75 | 76 | OcrResult OcrLiteImpl::detect(const char *path, const char *imgName, 77 | const int padding, const int maxSideLen, 78 | float boxScoreThresh, float boxThresh, float unClipRatio, bool doAngle, bool mostAngle) { 79 | std::string imgFile = getSrcImgFilePath(path, imgName); 80 | cv::Mat originSrc = imread(imgFile, cv::IMREAD_COLOR);//default : BGR 81 | int originMaxSide = (std::max)(originSrc.cols, originSrc.rows); 82 | int resize; 83 | if (maxSideLen <= 0 || maxSideLen > originMaxSide) { 84 | resize = originMaxSide; 85 | } else { 86 | resize = maxSideLen; 87 | } 88 | resize += 2 * padding; 89 | cv::Rect paddingRect(padding, padding, originSrc.cols, originSrc.rows); 90 | cv::Mat paddingSrc = makePadding(originSrc, padding); 91 | ScaleParam scale = getScaleParam(paddingSrc, resize); 92 | OcrResult result; 93 | result = detect(path, imgName, paddingSrc, paddingRect, scale, 94 | boxScoreThresh, boxThresh, unClipRatio, doAngle, mostAngle); 95 | return result; 96 | } 97 | 98 | 99 | OcrResult OcrLiteImpl::detectImageBytes(const uint8_t *data, const long dataLength, const int grey, 100 | const int padding, const int maxSideLen, 101 | float boxScoreThresh, float boxThresh, float unClipRatio, bool doAngle, 102 | bool mostAngle) { 103 | std::vector vecData(data, data + dataLength); 104 | cv::Mat originSrc = cv::imdecode(vecData, grey == 1 ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);//default : BGR 105 | OcrResult result; 106 | result = detect(originSrc, padding, maxSideLen, 107 | boxScoreThresh, boxThresh, unClipRatio, doAngle, mostAngle); 108 | return result; 109 | 110 | } 111 | 112 | OcrResult OcrLiteImpl::detectBitmap(uint8_t *bitmapData, int width, int height, int channels, int padding, 113 | int maxSideLen, float boxScoreThresh, float boxThresh, float unClipRatio, bool doAngle, 114 | bool mostAngle) { 115 | cv::Mat originSrc(height, width, CV_8UC(channels), bitmapData); 116 | if (channels > 3) { 117 | cv::cvtColor(originSrc, originSrc, cv::COLOR_RGBA2BGR); 118 | } else if (channels == 3) { 119 | cv::cvtColor(originSrc, originSrc, cv::COLOR_RGB2BGR); 120 | } 121 | OcrResult result; 122 | result = detect(originSrc, padding, maxSideLen, 123 | boxScoreThresh, boxThresh, unClipRatio, doAngle, mostAngle); 124 | return result; 125 | } 126 | 127 | 128 | OcrResult OcrLiteImpl::detect(const cv::Mat &mat, int padding, int maxSideLen, float boxScoreThresh, float boxThresh, 129 | float unClipRatio, bool doAngle, bool mostAngle) { 130 | cv::Mat originSrc = mat; 131 | int originMaxSide = (std::max)(originSrc.cols, originSrc.rows); 132 | int resize; 133 | if (maxSideLen <= 0 || maxSideLen > originMaxSide) { 134 | resize = originMaxSide; 135 | } else { 136 | resize = maxSideLen; 137 | } 138 | resize += 2 * padding; 139 | cv::Rect paddingRect(padding, padding, originSrc.cols, originSrc.rows); 140 | cv::Mat paddingSrc = makePadding(originSrc, padding); 141 | ScaleParam scale = getScaleParam(paddingSrc, resize); 142 | OcrResult result; 143 | result = detect(NULL, NULL, paddingSrc, paddingRect, scale, 144 | boxScoreThresh, boxThresh, unClipRatio, doAngle, mostAngle); 145 | return result; 146 | } 147 | 148 | std::vector OcrLiteImpl::getPartImages(cv::Mat &src, std::vector &textBoxes, 149 | const char *path, const char *imgName) { 150 | std::vector partImages; 151 | for (size_t i = 0; i < textBoxes.size(); ++i) { 152 | cv::Mat partImg = getRotateCropImage(src, textBoxes[i].boxPoint); 153 | partImages.emplace_back(partImg); 154 | //OutPut DebugImg 155 | if (isOutputPartImg) { 156 | std::string debugImgFile = getDebugImgFilePath(path, imgName, i, "-part-"); 157 | saveImg(partImg, debugImgFile.c_str()); 158 | } 159 | } 160 | return partImages; 161 | } 162 | 163 | OcrResult OcrLiteImpl::detect(const char *path, const char *imgName, 164 | cv::Mat &src, cv::Rect &originRect, ScaleParam &scale, 165 | float boxScoreThresh, float boxThresh, float unClipRatio, bool doAngle, bool mostAngle) { 166 | 167 | cv::Mat textBoxPaddingImg = src.clone(); 168 | int thickness = getThickness(src); 169 | 170 | Logger("=====Start detect=====\n"); 171 | Logger("ScaleParam(sw:%d,sh:%d,dw:%d,dh:%d,%f,%f)\n", scale.srcWidth, scale.srcHeight, 172 | scale.dstWidth, scale.dstHeight, 173 | scale.ratioWidth, scale.ratioHeight); 174 | 175 | Logger("---------- step: dbNet getTextBoxes ----------\n"); 176 | double startTime = getCurrentTime(); 177 | std::vector textBoxes = dbNet.getTextBoxes(src, scale, boxScoreThresh, boxThresh, unClipRatio); 178 | double endDbNetTime = getCurrentTime(); 179 | double dbNetTime = endDbNetTime - startTime; 180 | Logger("dbNetTime(%fms)\n", dbNetTime); 181 | 182 | for (size_t i = 0; i < textBoxes.size(); ++i) { 183 | Logger("TextBox[%d](+padding)[score(%f),[x: %d, y: %d], [x: %d, y: %d], [x: %d, y: %d], [x: %d, y: %d]]\n", i, 184 | textBoxes[i].score, 185 | textBoxes[i].boxPoint[0].x, textBoxes[i].boxPoint[0].y, 186 | textBoxes[i].boxPoint[1].x, textBoxes[i].boxPoint[1].y, 187 | textBoxes[i].boxPoint[2].x, textBoxes[i].boxPoint[2].y, 188 | textBoxes[i].boxPoint[3].x, textBoxes[i].boxPoint[3].y); 189 | } 190 | 191 | Logger("---------- step: drawTextBoxes ----------\n"); 192 | drawTextBoxes(textBoxPaddingImg, textBoxes, thickness); 193 | 194 | //---------- getPartImages ---------- 195 | std::vector partImages = getPartImages(src, textBoxes, path, imgName); 196 | 197 | Logger("---------- step: angleNet getAngles ----------\n"); 198 | std::vector angles; 199 | angles = angleNet.getAngles(partImages, path, imgName, doAngle, mostAngle); 200 | 201 | //Log Angles 202 | for (size_t i = 0; i < angles.size(); ++i) { 203 | Logger("angle[%d][index(%d), score(%f), time(%fms)]\n", i, angles[i].index, angles[i].score, angles[i].time); 204 | } 205 | 206 | //Rotate partImgs 207 | for (size_t i = 0; i < partImages.size(); ++i) { 208 | if (angles[i].index == 1) { 209 | partImages.at(i) = matRotateClockWise180(partImages[i]); 210 | } 211 | } 212 | 213 | Logger("---------- step: crnnNet getTextLine ----------\n"); 214 | std::vector textLines = crnnNet.getTextLines(partImages, path, imgName); 215 | //Log TextLines 216 | for (size_t i = 0; i < textLines.size(); ++i) { 217 | Logger("textLine[%d](%s)\n", i, textLines[i].text.c_str()); 218 | std::ostringstream txtScores; 219 | for (size_t s = 0; s < textLines[i].charScores.size(); ++s) { 220 | if (s == 0) { 221 | txtScores << textLines[i].charScores[s]; 222 | } else { 223 | txtScores << " ," << textLines[i].charScores[s]; 224 | } 225 | } 226 | Logger("textScores[%d]{%s}\n", i, std::string(txtScores.str()).c_str()); 227 | Logger("crnnTime[%d](%fms)\n", i, textLines[i].time); 228 | } 229 | 230 | std::vector textBlocks; 231 | for (size_t i = 0; i < textLines.size(); ++i) { 232 | std::vector boxPoint = std::vector(4); 233 | int padding = originRect.x;//padding conversion 234 | boxPoint[0] = cv::Point(textBoxes[i].boxPoint[0].x - padding, textBoxes[i].boxPoint[0].y - padding); 235 | boxPoint[1] = cv::Point(textBoxes[i].boxPoint[1].x - padding, textBoxes[i].boxPoint[1].y - padding); 236 | boxPoint[2] = cv::Point(textBoxes[i].boxPoint[2].x - padding, textBoxes[i].boxPoint[2].y - padding); 237 | boxPoint[3] = cv::Point(textBoxes[i].boxPoint[3].x - padding, textBoxes[i].boxPoint[3].y - padding); 238 | TextBlock textBlock{boxPoint, textBoxes[i].score, angles[i].index, angles[i].score, 239 | angles[i].time, textLines[i].text, textLines[i].charScores, textLines[i].time, 240 | angles[i].time + textLines[i].time}; 241 | textBlocks.emplace_back(textBlock); 242 | } 243 | 244 | double endTime = getCurrentTime(); 245 | double fullTime = endTime - startTime; 246 | Logger("=====End detect=====\n"); 247 | Logger("FullDetectTime(%fms)\n", fullTime); 248 | 249 | //cropped to original size 250 | cv::Mat textBoxImg; 251 | 252 | if (originRect.x > 0 && originRect.y > 0) { 253 | textBoxPaddingImg(originRect).copyTo(textBoxImg); 254 | } else { 255 | textBoxImg = textBoxPaddingImg; 256 | } 257 | 258 | //Save result.jpg 259 | if (isOutputResultImg) { 260 | std::string resultImgFile = getResultImgFilePath(path, imgName); 261 | imwrite(resultImgFile, textBoxImg); 262 | } 263 | 264 | std::string strRes; 265 | for (auto &textBlock: textBlocks) { 266 | strRes.append(textBlock.text); 267 | strRes.append("\n"); 268 | } 269 | 270 | return OcrResult{dbNetTime, textBlocks, textBoxImg, fullTime, strRes}; 271 | } 272 | -------------------------------------------------------------------------------- /src/OcrLiteJni.cpp: -------------------------------------------------------------------------------- 1 | #ifdef __JNI__ 2 | 3 | #include "version.h" 4 | #include 5 | #include "OcrLite.h" 6 | #include "OcrResultUtils.h" 7 | #include "OcrUtils.h" 8 | 9 | static OcrLite *ocrLite; 10 | 11 | JNIEXPORT jint JNICALL 12 | JNI_OnLoad(JavaVM *vm, void *reserved) { 13 | ocrLite = new OcrLite(); 14 | return JNI_VERSION_1_4; 15 | } 16 | 17 | JNIEXPORT void JNICALL 18 | JNI_OnUnload(JavaVM *vm, void *reserved) { 19 | //printf("JNI_OnUnload\n"); 20 | delete ocrLite; 21 | } 22 | 23 | #ifdef _WIN32 24 | char *jstringToChar(JNIEnv *env, jstring jstr) { 25 | char *rtn = NULL; 26 | jclass clsstring = env->FindClass("java/lang/String"); 27 | jstring strencode = env->NewStringUTF("gbk"); 28 | jmethodID mid = env->GetMethodID(clsstring, "getBytes", "(Ljava/lang/String;)[B"); 29 | jbyteArray barr = (jbyteArray) env->CallObjectMethod(jstr, mid, strencode); 30 | jsize alen = env->GetArrayLength(barr); 31 | jbyte *ba = env->GetByteArrayElements(barr, JNI_FALSE); 32 | if (alen > 0) { 33 | rtn = (char *) malloc(alen + 1); 34 | memcpy(rtn, ba, alen); 35 | rtn[alen] = 0; 36 | } 37 | env->ReleaseByteArrayElements(barr, ba, 0); 38 | return rtn; 39 | } 40 | #else 41 | 42 | char *jstringToChar(JNIEnv *env, jstring input) { 43 | char *str = NULL; 44 | jclass clsstring = env->FindClass("java/lang/String"); 45 | jstring strencode = env->NewStringUTF("utf-8"); 46 | jmethodID mid = env->GetMethodID(clsstring, "getBytes", "(Ljava/lang/String;)[B"); 47 | jbyteArray barr = (jbyteArray) env->CallObjectMethod(input, mid, strencode); 48 | jsize alen = env->GetArrayLength(barr); 49 | jbyte *ba = env->GetByteArrayElements(barr, JNI_FALSE); 50 | if (alen > 0) { 51 | str = (char *) malloc(alen + 1); 52 | memcpy(str, ba, alen); 53 | str[alen] = 0; 54 | } 55 | env->ReleaseByteArrayElements(barr, ba, 0); 56 | return str; 57 | } 58 | 59 | #endif 60 | 61 | extern "C" JNIEXPORT jstring JNICALL 62 | Java_com_benjaminwan_ocrlibrary_OcrEngine_getVersion(JNIEnv *env, jobject thiz) { 63 | jstring ver = env->NewStringUTF(VERSION); 64 | return ver; 65 | } 66 | 67 | extern "C" JNIEXPORT jboolean JNICALL 68 | Java_com_benjaminwan_ocrlibrary_OcrEngine_setNumThread(JNIEnv *env, jobject thiz, jint numThread) { 69 | ocrLite->setNumThread(numThread); 70 | printf("numThread=%d\n", numThread); 71 | return JNI_TRUE; 72 | } 73 | 74 | extern "C" JNIEXPORT void JNICALL 75 | Java_com_benjaminwan_ocrlibrary_OcrEngine_initLogger(JNIEnv *env, jobject thiz, jboolean isConsole, 76 | jboolean isPartImg, jboolean isResultImg) { 77 | ocrLite->initLogger(isConsole,//isOutputConsole 78 | isPartImg,//isOutputPartImg 79 | isResultImg);//isOutputResultImg 80 | } 81 | 82 | extern "C" JNIEXPORT void JNICALL 83 | Java_com_benjaminwan_ocrlibrary_OcrEngine_enableResultText(JNIEnv *env, jobject thiz, jstring input) { 84 | std::string imgPath = jstringToChar(env, input); 85 | std::string imgDir = imgPath.substr(0, imgPath.find_last_of('/') + 1); 86 | std::string imgName = imgPath.substr(imgPath.find_last_of('/') + 1); 87 | ocrLite->enableResultTxt(imgDir.c_str(), imgName.c_str()); 88 | } 89 | 90 | extern "C" JNIEXPORT jboolean JNICALL 91 | Java_com_benjaminwan_ocrlibrary_OcrEngine_initModels(JNIEnv *env, jobject thiz, jstring path, 92 | jstring det, jstring cls, jstring rec, jstring keys) { 93 | std::string modelsDir = jstringToChar(env, path); 94 | std::string detName = jstringToChar(env, det); 95 | std::string clsName = jstringToChar(env, cls); 96 | std::string recName = jstringToChar(env, rec); 97 | std::string keysName = jstringToChar(env, keys); 98 | std::string modelDetPath = modelsDir + "/" + detName; 99 | std::string modelClsPath = modelsDir + "/" + clsName; 100 | std::string modelRecPath = modelsDir + "/" + recName; 101 | std::string keysPath = modelsDir + "/" + keysName; 102 | printf("modelsDir=%s\ndet=%s\ncls=%s\nrec=%s\nkeys=%s\n", modelsDir.c_str(), detName.c_str(), clsName.c_str(), 103 | recName.c_str(), keysName.c_str()); 104 | bool hasModelDetFile = isFileExists(modelDetPath); 105 | if (!hasModelDetFile) { 106 | fprintf(stderr, "Model det file not found: %s\n", modelDetPath.c_str()); 107 | return false; 108 | } 109 | bool hasModelClsFile = isFileExists(modelClsPath); 110 | if (!hasModelClsFile) { 111 | fprintf(stderr, "Model cls file not found: %s\n", modelClsPath.c_str()); 112 | return false; 113 | } 114 | bool hasModelRecFile = isFileExists(modelRecPath); 115 | if (!hasModelRecFile) { 116 | fprintf(stderr, "Model rec file not found: %s\n", modelRecPath.c_str()); 117 | return false; 118 | } 119 | bool hasKeysFile = isFileExists(keysPath); 120 | if (!hasKeysFile) { 121 | fprintf(stderr, "keys file not found: %s\n", keysPath.c_str()); 122 | return false; 123 | } 124 | ocrLite->initModels(modelDetPath, modelClsPath, modelRecPath, keysPath); 125 | return true; 126 | } 127 | 128 | extern "C" JNIEXPORT jobject JNICALL 129 | Java_com_benjaminwan_ocrlibrary_OcrEngine_detect(JNIEnv *env, jobject thiz, jstring input, jint padding, 130 | jint maxSideLen, 131 | jfloat boxScoreThresh, jfloat boxThresh, jfloat unClipRatio, 132 | jboolean doAngle, jboolean mostAngle 133 | ) { 134 | std::string imgPath = jstringToChar(env, input); 135 | bool hasTargetImgFile = isFileExists(imgPath); 136 | if (!hasTargetImgFile) { 137 | fprintf(stderr, "Target image not found: %s\n", imgPath.c_str()); 138 | OcrResult result{}; 139 | return OcrResultUtils(env, result).getJObject(); 140 | } 141 | std::string imgDir = imgPath.substr(0, imgPath.find_last_of('/') + 1); 142 | std::string imgName = imgPath.substr(imgPath.find_last_of('/') + 1); 143 | printf("imgDir=%s, imgName=%s\n", imgDir.c_str(), imgName.c_str()); 144 | OcrResult result = ocrLite->detect(imgDir.c_str(), imgName.c_str(), padding, maxSideLen, 145 | boxScoreThresh, boxThresh, unClipRatio, doAngle, mostAngle); 146 | return OcrResultUtils(env, result).getJObject(); 147 | } 148 | 149 | extern "C" JNIEXPORT jobject JNICALL 150 | Java_com_benjaminwan_ocrlibrary_OcrEngine_detectInput(JNIEnv *env, jobject thiz, jobject input, jint padding, 151 | jint maxSideLen, 152 | jfloat boxScoreThresh, jfloat boxThresh, jfloat unClipRatio, 153 | jboolean doAngle, jboolean mostAngle 154 | ) { 155 | jclass ocrInputClass = env->GetObjectClass(input); 156 | jfieldID dataFieldId = env->GetFieldID(ocrInputClass, "data", "[B"); 157 | jbyteArray javaDataArray = (jbyteArray) env->GetObjectField(input, dataFieldId); 158 | jsize dataLength = env->GetArrayLength(javaDataArray); 159 | jbyte *nativeData = env->GetByteArrayElements(javaDataArray, nullptr); 160 | 161 | jfieldID typeFieldId = env->GetFieldID(ocrInputClass, "type", "I"); 162 | jint type = env->GetIntField(input, typeFieldId); 163 | 164 | jfieldID widthFieldId = env->GetFieldID(ocrInputClass, "width", "I"); 165 | jint width = env->GetIntField(input, widthFieldId); 166 | 167 | jfieldID heightFieldId = env->GetFieldID(ocrInputClass, "height", "I"); 168 | jint height = env->GetIntField(input, heightFieldId); 169 | 170 | jfieldID channelFieldId = env->GetFieldID(ocrInputClass, "channels", "I"); 171 | jint channels = env->GetIntField(input, channelFieldId); 172 | 173 | if (dataLength <= 0) { 174 | fprintf(stderr, "data cannot be empty"); 175 | OcrResult result{}; 176 | return OcrResultUtils(env, result).getJObject(); 177 | } 178 | OcrResult result; 179 | //bitmap 180 | if(type == 0){ 181 | if(channels == 0){ 182 | env->ThrowNew(env->FindClass("java/lang/RuntimeException"), "When type is bitmap, the channel cannot be empty."); 183 | } 184 | result = ocrLite->detectBitmap(reinterpret_cast(nativeData), width, height, channels, padding, maxSideLen, 185 | boxScoreThresh, boxThresh, unClipRatio, doAngle, mostAngle); 186 | } 187 | //file bytes 188 | if(type == 1){ 189 | result = ocrLite->detectImageBytes(reinterpret_cast(nativeData), dataLength, channels >= 3 ? 0 :1, padding, maxSideLen, 190 | boxScoreThresh, boxThresh, unClipRatio, doAngle, mostAngle); 191 | } 192 | env->ReleaseByteArrayElements(javaDataArray, nativeData, JNI_ABORT); 193 | return OcrResultUtils(env, result).getJObject(); 194 | } 195 | #endif 196 | -------------------------------------------------------------------------------- /src/OcrResultUtils.cpp: -------------------------------------------------------------------------------- 1 | #ifdef __JNI__ 2 | #include 3 | #include "OcrResultUtils.h" 4 | 5 | OcrResultUtils::OcrResultUtils(JNIEnv *env, OcrResult &ocrResult) { 6 | jniEnv = env; 7 | 8 | jclass jOcrResultClass = env->FindClass("com/benjaminwan/ocrlibrary/OcrResult"); 9 | 10 | if (jOcrResultClass == NULL) { 11 | printf("OcrResult class is null\n"); 12 | } 13 | 14 | jmethodID jOcrResultConstructor = env->GetMethodID(jOcrResultClass, "", 15 | "(DLjava/util/ArrayList;DLjava/lang/String;)V"); 16 | 17 | jobject textBlocks = getTextBlocks(ocrResult.textBlocks); 18 | jdouble dbNetTime = (jdouble) ocrResult.dbNetTime; 19 | jdouble detectTime = (jdouble) ocrResult.detectTime; 20 | jstring jStrRest = jniEnv->NewStringUTF(ocrResult.strRes.c_str()); 21 | 22 | jOcrResult = env->NewObject(jOcrResultClass, jOcrResultConstructor, dbNetTime, 23 | textBlocks, detectTime, jStrRest); 24 | } 25 | 26 | OcrResultUtils::~OcrResultUtils() { 27 | jniEnv = NULL; 28 | } 29 | 30 | jobject OcrResultUtils::getJObject() { 31 | return jOcrResult; 32 | } 33 | 34 | jclass OcrResultUtils::newJListClass() { 35 | jclass clazz = jniEnv->FindClass("java/util/ArrayList"); 36 | if (clazz == NULL) { 37 | printf("ArrayList class is null\n"); 38 | return NULL; 39 | } 40 | return clazz; 41 | } 42 | 43 | jmethodID OcrResultUtils::getListConstructor(jclass clazz) { 44 | jmethodID constructor = jniEnv->GetMethodID(clazz, "", "()V"); 45 | return constructor; 46 | } 47 | 48 | jobject OcrResultUtils::newJPoint(cv::Point &point) { 49 | jclass clazz = jniEnv->FindClass("com/benjaminwan/ocrlibrary/Point"); 50 | if (clazz == NULL) { 51 | printf("Point class is null\n"); 52 | return NULL; 53 | } 54 | jmethodID constructor = jniEnv->GetMethodID(clazz, "", "(II)V"); 55 | jobject obj = jniEnv->NewObject(clazz, constructor, point.x, point.y); 56 | return obj; 57 | } 58 | 59 | jobject OcrResultUtils::newJBoxPoint(std::vector &boxPoint) { 60 | jclass jListClass = newJListClass(); 61 | jmethodID jListConstructor = getListConstructor(jListClass); 62 | jobject jList = jniEnv->NewObject(jListClass, jListConstructor); 63 | jmethodID jListAdd = jniEnv->GetMethodID(jListClass, "add", "(Ljava/lang/Object;)Z"); 64 | 65 | for (auto point : boxPoint) { 66 | jobject jPoint = newJPoint(point); 67 | jniEnv->CallBooleanMethod(jList, jListAdd, jPoint); 68 | } 69 | return jList; 70 | } 71 | 72 | jobject OcrResultUtils::getTextBlock(TextBlock &textBlock) { 73 | jobject jBoxPint = newJBoxPoint(textBlock.boxPoint); 74 | jfloat jBoxScore = (jfloat) textBlock.boxScore; 75 | jfloat jAngleScore = (jfloat) textBlock.angleScore; 76 | jdouble jAngleTime = (jdouble) textBlock.angleTime; 77 | jstring jText = jniEnv->NewStringUTF(textBlock.text.c_str()); 78 | jobject jCharScores = newJScoreArray(textBlock.charScores); 79 | jdouble jCrnnTime = (jdouble) textBlock.crnnTime; 80 | jdouble jBlockTime = (jdouble) textBlock.blockTime; 81 | jclass clazz = jniEnv->FindClass("com/benjaminwan/ocrlibrary/TextBlock"); 82 | if (clazz == NULL) { 83 | printf("TextBlock class is null\n"); 84 | return NULL; 85 | } 86 | jmethodID constructor = jniEnv->GetMethodID(clazz, "", 87 | "(Ljava/util/ArrayList;FIFDLjava/lang/String;[FDD)V"); 88 | jobject obj = jniEnv->NewObject(clazz, constructor, jBoxPint, jBoxScore, textBlock.angleIndex, 89 | jAngleScore, jAngleTime, jText, jCharScores, jCrnnTime, 90 | jBlockTime); 91 | return obj; 92 | } 93 | 94 | jobject OcrResultUtils::getTextBlocks(std::vector &textBlocks) { 95 | jclass jListClass = newJListClass(); 96 | jmethodID jListConstructor = getListConstructor(jListClass); 97 | jobject jList = jniEnv->NewObject(jListClass, jListConstructor); 98 | jmethodID jListAdd = jniEnv->GetMethodID(jListClass, "add", "(Ljava/lang/Object;)Z"); 99 | 100 | for (int i = 0; i < textBlocks.size(); ++i) { 101 | auto textBlock = textBlocks[i]; 102 | jobject jTextBlock = getTextBlock(textBlock); 103 | jniEnv->CallBooleanMethod(jList, jListAdd, jTextBlock); 104 | } 105 | return jList; 106 | } 107 | 108 | jfloatArray OcrResultUtils::newJScoreArray(std::vector &scores) { 109 | jfloatArray jScores = jniEnv->NewFloatArray(scores.size()); 110 | jniEnv->SetFloatArrayRegion(jScores, 0, scores.size(), (jfloat *) scores.data()); 111 | return jScores; 112 | } 113 | 114 | #endif -------------------------------------------------------------------------------- /src/OcrUtils.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "OcrUtils.h" 5 | #include "clipper.hpp" 6 | 7 | double getCurrentTime() { 8 | return (static_cast(cv::getTickCount())) / cv::getTickFrequency() * 1000;//单位毫秒 9 | } 10 | 11 | //onnxruntime init windows 12 | std::wstring strToWstr(std::string str) { 13 | if (str.length() == 0) 14 | return L""; 15 | std::wstring wstr; 16 | wstr.assign(str.begin(), str.end()); 17 | return wstr; 18 | } 19 | 20 | ScaleParam getScaleParam(cv::Mat &src, const float scale) { 21 | int srcWidth = src.cols; 22 | int srcHeight = src.rows; 23 | int dstWidth = int((float) srcWidth * scale); 24 | int dstHeight = int((float) srcHeight * scale); 25 | if (dstWidth % 32 != 0) { 26 | dstWidth = (dstWidth / 32 - 1) * 32; 27 | dstWidth = (std::max)(dstWidth, 32); 28 | } 29 | if (dstHeight % 32 != 0) { 30 | dstHeight = (dstHeight / 32 - 1) * 32; 31 | dstHeight = (std::max)(dstHeight, 32); 32 | } 33 | float scaleWidth = (float) dstWidth / (float) srcWidth; 34 | float scaleHeight = (float) dstHeight / (float) srcHeight; 35 | return {srcWidth, srcHeight, dstWidth, dstHeight, scaleWidth, scaleHeight}; 36 | } 37 | 38 | ScaleParam getScaleParam(cv::Mat &src, const int targetSize) { 39 | int srcWidth, srcHeight, dstWidth, dstHeight; 40 | srcWidth = dstWidth = src.cols; 41 | srcHeight = dstHeight = src.rows; 42 | 43 | float ratio = 1.f; 44 | if (srcWidth > srcHeight) { 45 | ratio = float(targetSize) / float(srcWidth); 46 | } else { 47 | ratio = float(targetSize) / float(srcHeight); 48 | } 49 | dstWidth = int(float(srcWidth) * ratio); 50 | dstHeight = int(float(srcHeight) * ratio); 51 | if (dstWidth % 32 != 0) { 52 | dstWidth = (dstWidth / 32) * 32; 53 | dstWidth = (std::max)(dstWidth, 32); 54 | } 55 | if (dstHeight % 32 != 0) { 56 | dstHeight = (dstHeight / 32) * 32; 57 | dstHeight = (std::max)(dstHeight, 32); 58 | } 59 | float ratioWidth = (float) dstWidth / (float) srcWidth; 60 | float ratioHeight = (float) dstHeight / (float) srcHeight; 61 | return {srcWidth, srcHeight, dstWidth, dstHeight, ratioWidth, ratioHeight}; 62 | } 63 | 64 | std::vector getBox(const cv::RotatedRect &rect) { 65 | cv::Point2f vertices[4]; 66 | rect.points(vertices); 67 | //std::vector ret(4); 68 | std::vector ret2(vertices, vertices + sizeof(vertices) / sizeof(vertices[0])); 69 | //memcpy(vertices, &ret[0], ret.size() * sizeof(ret[0])); 70 | return ret2; 71 | } 72 | 73 | int getThickness(cv::Mat &boxImg) { 74 | int minSize = boxImg.cols > boxImg.rows ? boxImg.rows : boxImg.cols; 75 | int thickness = minSize / 1000 + 2; 76 | return thickness; 77 | } 78 | 79 | void drawTextBox(cv::Mat &boxImg, cv::RotatedRect &rect, int thickness) { 80 | cv::Point2f vertices[4]; 81 | rect.points(vertices); 82 | for (int i = 0; i < 4; i++) 83 | cv::line(boxImg, vertices[i], vertices[(i + 1) % 4], cv::Scalar(0, 0, 255), thickness); 84 | //cv::polylines(srcmat, textpoint, true, cv::Scalar(0, 255, 0), 2); 85 | } 86 | 87 | void drawTextBox(cv::Mat &boxImg, const std::vector &box, int thickness) { 88 | auto color = cv::Scalar(0, 0, 255);// B(0) G(0) R(255) 89 | cv::line(boxImg, box[0], box[1], color, thickness); 90 | cv::line(boxImg, box[1], box[2], color, thickness); 91 | cv::line(boxImg, box[2], box[3], color, thickness); 92 | cv::line(boxImg, box[3], box[0], color, thickness); 93 | } 94 | 95 | void drawTextBoxes(cv::Mat &boxImg, std::vector &textBoxes, int thickness) { 96 | for (auto & textBox : textBoxes) { 97 | drawTextBox(boxImg, textBox.boxPoint, thickness); 98 | } 99 | } 100 | 101 | cv::Mat matRotateClockWise180(cv::Mat src) { 102 | flip(src, src, 0); 103 | flip(src, src, 1); 104 | return src; 105 | } 106 | 107 | cv::Mat matRotateClockWise90(cv::Mat src) { 108 | transpose(src, src); 109 | flip(src, src, 1); 110 | return src; 111 | } 112 | 113 | cv::Mat getRotateCropImage(const cv::Mat &src, std::vector box) { 114 | cv::Mat image; 115 | src.copyTo(image); 116 | std::vector points = box; 117 | 118 | int collectX[4] = {box[0].x, box[1].x, box[2].x, box[3].x}; 119 | int collectY[4] = {box[0].y, box[1].y, box[2].y, box[3].y}; 120 | int left = int(*std::min_element(collectX, collectX + 4)); 121 | int right = int(*std::max_element(collectX, collectX + 4)); 122 | int top = int(*std::min_element(collectY, collectY + 4)); 123 | int bottom = int(*std::max_element(collectY, collectY + 4)); 124 | 125 | cv::Mat imgCrop; 126 | image(cv::Rect(left, top, right - left, bottom - top)).copyTo(imgCrop); 127 | 128 | for (auto &point: points) { 129 | point.x -= left; 130 | point.y -= top; 131 | } 132 | 133 | int imgCropWidth = int(sqrt(pow(points[0].x - points[1].x, 2) + 134 | pow(points[0].y - points[1].y, 2))); 135 | int imgCropHeight = int(sqrt(pow(points[0].x - points[3].x, 2) + 136 | pow(points[0].y - points[3].y, 2))); 137 | 138 | cv::Point2f ptsDst[4]; 139 | ptsDst[0] = cv::Point2f(0., 0.); 140 | ptsDst[1] = cv::Point2f(imgCropWidth, 0.); 141 | ptsDst[2] = cv::Point2f(imgCropWidth, imgCropHeight); 142 | ptsDst[3] = cv::Point2f(0.f, imgCropHeight); 143 | 144 | cv::Point2f ptsSrc[4]; 145 | ptsSrc[0] = cv::Point2f(points[0].x, points[0].y); 146 | ptsSrc[1] = cv::Point2f(points[1].x, points[1].y); 147 | ptsSrc[2] = cv::Point2f(points[2].x, points[2].y); 148 | ptsSrc[3] = cv::Point2f(points[3].x, points[3].y); 149 | 150 | cv::Mat M = cv::getPerspectiveTransform(ptsSrc, ptsDst); 151 | 152 | cv::Mat partImg; 153 | cv::warpPerspective(imgCrop, partImg, M, 154 | cv::Size(imgCropWidth, imgCropHeight), 155 | cv::BORDER_REPLICATE); 156 | 157 | if (float(partImg.rows) >= float(partImg.cols) * 1.5) { 158 | cv::Mat srcCopy = cv::Mat(partImg.rows, partImg.cols, partImg.depth()); 159 | cv::transpose(partImg, srcCopy); 160 | cv::flip(srcCopy, srcCopy, 0); 161 | return srcCopy; 162 | } else { 163 | return partImg; 164 | } 165 | } 166 | 167 | cv::Mat adjustTargetImg(cv::Mat &src, int dstWidth, int dstHeight) { 168 | cv::Mat srcResize; 169 | float scale = (float) dstHeight / (float) src.rows; 170 | int angleWidth = int((float) src.cols * scale); 171 | cv::resize(src, srcResize, cv::Size(angleWidth, dstHeight)); 172 | cv::Mat srcFit = cv::Mat(dstHeight, dstWidth, CV_8UC3, cv::Scalar(255, 255, 255)); 173 | if (angleWidth < dstWidth) { 174 | cv::Rect rect(0, 0, srcResize.cols, srcResize.rows); 175 | srcResize.copyTo(srcFit(rect)); 176 | } else { 177 | cv::Rect rect(0, 0, dstWidth, dstHeight); 178 | srcResize(rect).copyTo(srcFit); 179 | } 180 | return srcFit; 181 | } 182 | 183 | bool cvPointCompare(const cv::Point &a, const cv::Point &b) { 184 | return a.x < b.x; 185 | } 186 | 187 | std::vector getMinBoxes(const cv::RotatedRect &boxRect, float &maxSideLen) { 188 | maxSideLen = std::max(boxRect.size.width, boxRect.size.height); 189 | std::vector boxPoint = getBox(boxRect); 190 | std::sort(boxPoint.begin(), boxPoint.end(), cvPointCompare); 191 | int index1, index2, index3, index4; 192 | if (boxPoint[1].y > boxPoint[0].y) { 193 | index1 = 0; 194 | index4 = 1; 195 | } else { 196 | index1 = 1; 197 | index4 = 0; 198 | } 199 | if (boxPoint[3].y > boxPoint[2].y) { 200 | index2 = 2; 201 | index3 = 3; 202 | } else { 203 | index2 = 3; 204 | index3 = 2; 205 | } 206 | std::vector minBox(4); 207 | minBox[0] = boxPoint[index1]; 208 | minBox[1] = boxPoint[index2]; 209 | minBox[2] = boxPoint[index3]; 210 | minBox[3] = boxPoint[index4]; 211 | return minBox; 212 | } 213 | 214 | float boxScoreFast(const std::vector &boxes, const cv::Mat &pred) { 215 | int width = pred.cols; 216 | int height = pred.rows; 217 | 218 | float arrayX[4] = {boxes[0].x, boxes[1].x, boxes[2].x, boxes[3].x}; 219 | float arrayY[4] = {boxes[0].y, boxes[1].y, boxes[2].y, boxes[3].y}; 220 | 221 | int minX = clamp(int(std::floor(*(std::min_element(arrayX, arrayX + 4)))), 0, width - 1); 222 | int maxX = clamp(int(std::ceil(*(std::max_element(arrayX, arrayX + 4)))), 0, width - 1); 223 | int minY = clamp(int(std::floor(*(std::min_element(arrayY, arrayY + 4)))), 0, height - 1); 224 | int maxY = clamp(int(std::ceil(*(std::max_element(arrayY, arrayY + 4)))), 0, height - 1); 225 | 226 | cv::Mat mask = cv::Mat::zeros(maxY - minY + 1, maxX - minX + 1, CV_8UC1); 227 | 228 | cv::Point box[4]; 229 | box[0] = cv::Point(int(boxes[0].x) - minX, int(boxes[0].y) - minY); 230 | box[1] = cv::Point(int(boxes[1].x) - minX, int(boxes[1].y) - minY); 231 | box[2] = cv::Point(int(boxes[2].x) - minX, int(boxes[2].y) - minY); 232 | box[3] = cv::Point(int(boxes[3].x) - minX, int(boxes[3].y) - minY); 233 | const cv::Point *pts[1] = {box}; 234 | int npts[] = {4}; 235 | cv::fillPoly(mask, pts, npts, 1, cv::Scalar(1)); 236 | 237 | cv::Mat croppedImg; 238 | pred(cv::Rect(minX, minY, maxX - minX + 1, maxY - minY + 1)) 239 | .copyTo(croppedImg); 240 | 241 | auto score = (float) cv::mean(croppedImg, mask)[0]; 242 | return score; 243 | } 244 | 245 | float getContourArea(const std::vector &box, float unClipRatio) { 246 | size_t size = box.size(); 247 | float area = 0.0f; 248 | float dist = 0.0f; 249 | for (size_t i = 0; i < size; i++) { 250 | area += box[i].x * box[(i + 1) % size].y - 251 | box[i].y * box[(i + 1) % size].x; 252 | dist += sqrtf((box[i].x - box[(i + 1) % size].x) * 253 | (box[i].x - box[(i + 1) % size].x) + 254 | (box[i].y - box[(i + 1) % size].y) * 255 | (box[i].y - box[(i + 1) % size].y)); 256 | } 257 | area = fabs(float(area / 2.0)); 258 | 259 | return area * unClipRatio / dist; 260 | } 261 | 262 | cv::RotatedRect unClip(std::vector box, float unClipRatio) { 263 | float distance = getContourArea(box, unClipRatio); 264 | 265 | ClipperLib::ClipperOffset offset; 266 | ClipperLib::Path p; 267 | p << ClipperLib::IntPoint(int(box[0].x), int(box[0].y)) 268 | << ClipperLib::IntPoint(int(box[1].x), int(box[1].y)) 269 | << ClipperLib::IntPoint(int(box[2].x), int(box[2].y)) 270 | << ClipperLib::IntPoint(int(box[3].x), int(box[3].y)); 271 | offset.AddPath(p, ClipperLib::jtRound, ClipperLib::etClosedPolygon); 272 | 273 | ClipperLib::Paths soln; 274 | offset.Execute(soln, distance); 275 | std::vector points; 276 | 277 | for (size_t j = 0; j < soln.size(); j++) { 278 | for (size_t i = 0; i < soln[soln.size() - 1].size(); i++) { 279 | points.emplace_back(soln[j][i].X, soln[j][i].Y); 280 | } 281 | } 282 | cv::RotatedRect res; 283 | if (points.empty()) { 284 | res = cv::RotatedRect(cv::Point2f(0, 0), cv::Size2f(1, 1), 0); 285 | } else { 286 | res = cv::minAreaRect(points); 287 | } 288 | return res; 289 | } 290 | 291 | std::vector substractMeanNormalize(cv::Mat &src, const float *meanVals, const float *normVals) { 292 | auto inputTensorSize = src.cols * src.rows * src.channels(); 293 | std::vector inputTensorValues(inputTensorSize); 294 | size_t numChannels = src.channels(); 295 | size_t imageSize = src.cols * src.rows; 296 | 297 | for (size_t pid = 0; pid < imageSize; pid++) { 298 | for (size_t ch = 0; ch < numChannels; ++ch) { 299 | float data = (float) (src.data[pid * numChannels + ch] * normVals[ch] - meanVals[ch] * normVals[ch]); 300 | inputTensorValues[ch * imageSize + pid] = data; 301 | } 302 | } 303 | return inputTensorValues; 304 | } 305 | 306 | std::vector getAngleIndexes(std::vector &angles) { 307 | std::vector angleIndexes; 308 | angleIndexes.reserve(angles.size()); 309 | for (auto &angle: angles) { 310 | angleIndexes.push_back(angle.index); 311 | } 312 | return angleIndexes; 313 | } 314 | 315 | std::vector getInputNames(Ort::Session *session) { 316 | Ort::AllocatorWithDefaultOptions allocator; 317 | const size_t numInputNodes = session->GetInputCount(); 318 | 319 | std::vector inputNamesPtr; 320 | inputNamesPtr.reserve(numInputNodes); 321 | std::vector input_node_dims; 322 | 323 | // iterate over all input nodes 324 | for (size_t i = 0; i < numInputNodes; i++) { 325 | auto inputName = session->GetInputNameAllocated(i, allocator); 326 | inputNamesPtr.push_back(std::move(inputName)); 327 | /*printf("inputName[%zu] = %s\n", i, inputName.get()); 328 | 329 | // print input node types 330 | auto typeInfo = session->GetInputTypeInfo(i); 331 | auto tensorInfo = typeInfo.GetTensorTypeAndShapeInfo(); 332 | 333 | ONNXTensorElementDataType type = tensorInfo.GetElementType(); 334 | printf("inputType[%zu] = %u\n", i, type); 335 | 336 | // print input shapes/dims 337 | input_node_dims = tensorInfo.GetShape(); 338 | printf("Input num_dims = %zu\n", input_node_dims.size()); 339 | for (size_t j = 0; j < input_node_dims.size(); j++) { 340 | printf("Input dim[%zu] = %llu\n",j, input_node_dims[j]); 341 | }*/ 342 | } 343 | return inputNamesPtr; 344 | } 345 | 346 | std::vector getOutputNames(Ort::Session *session) { 347 | Ort::AllocatorWithDefaultOptions allocator; 348 | const size_t numOutputNodes = session->GetOutputCount(); 349 | 350 | std::vector outputNamesPtr; 351 | outputNamesPtr.reserve(numOutputNodes); 352 | std::vector output_node_dims; 353 | 354 | for (size_t i = 0; i < numOutputNodes; i++) { 355 | auto outputName = session->GetOutputNameAllocated(i, allocator); 356 | outputNamesPtr.push_back(std::move(outputName)); 357 | /*printf("outputName[%zu] = %s\n", i, outputName.get()); 358 | 359 | // print input node types 360 | auto type_info = session->GetOutputTypeInfo(i); 361 | auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); 362 | 363 | ONNXTensorElementDataType type = tensor_info.GetElementType(); 364 | printf("outputType[%zu] = %u\n", i, type); 365 | 366 | // print input shapes/dims 367 | output_node_dims = tensor_info.GetShape(); 368 | printf("output num_dims = %zu\n", output_node_dims.size()); 369 | for (size_t j = 0; j < output_node_dims.size(); j++) { 370 | printf("output dim[%zu] = %llu\n",j, output_node_dims[j]); 371 | }*/ 372 | } 373 | return outputNamesPtr; 374 | } 375 | 376 | void saveImg(cv::Mat &img, const char *imgPath) { 377 | cv::imwrite(imgPath, img); 378 | } 379 | 380 | std::string getSrcImgFilePath(const char *path, const char *imgName) { 381 | std::string filePath; 382 | filePath.append(path).append(imgName); 383 | return filePath; 384 | } 385 | 386 | std::string getResultTxtFilePath(const char *path, const char *imgName) { 387 | std::string filePath; 388 | filePath.append(path).append(imgName).append("-result.txt"); 389 | return filePath; 390 | } 391 | 392 | std::string getResultImgFilePath(const char *path, const char *imgName) { 393 | std::string filePath; 394 | filePath.append(path).append(imgName).append("-result.jpg"); 395 | return filePath; 396 | } 397 | 398 | std::string getDebugImgFilePath(const char *path, const char *imgName, size_t i, const char *tag) { 399 | std::string filePath; 400 | filePath.append(path).append(imgName).append(tag).append(std::to_string(i)).append(".jpg"); 401 | return filePath; 402 | } -------------------------------------------------------------------------------- /src/getopt.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * getopt - POSIX like getopt for Windows console Application 3 | * 4 | * win-c - Windows Console Library 5 | * Copyright (c) 2015 Koji Takami 6 | * Released under the MIT license 7 | * https://github.com/takamin/win-c/blob/master/LICENSE 8 | */ 9 | #ifndef __CLIB__ 10 | 11 | #include 12 | #include 13 | #include "getopt.h" 14 | 15 | char *optarg = 0; 16 | int optind = 1; 17 | int opterr = 1; 18 | int optopt = 0; 19 | 20 | int postpone_count = 0; 21 | int nextchar = 0; 22 | 23 | static void postpone(int argc, char *const argv[], int index) { 24 | char **nc_argv = (char **) argv; 25 | char *p = nc_argv[index]; 26 | int j = index; 27 | for (; j < argc - 1; j++) { 28 | nc_argv[j] = nc_argv[j + 1]; 29 | } 30 | nc_argv[argc - 1] = p; 31 | } 32 | 33 | static int postpone_noopt(int argc, char *const argv[], int index) { 34 | int i = index; 35 | for (; i < argc; i++) { 36 | if (*(argv[i]) == '-') { 37 | postpone(argc, argv, index); 38 | return 1; 39 | } 40 | } 41 | return 0; 42 | } 43 | 44 | static int _getopt_(int argc, char *const argv[], 45 | const char *optstring, 46 | const struct option *longopts, int *longindex) { 47 | while (1) { 48 | int c; 49 | const char *optptr = 0; 50 | if (optind >= argc - postpone_count) { 51 | c = 0; 52 | optarg = 0; 53 | break; 54 | } 55 | c = *(argv[optind] + nextchar); 56 | if (c == '\0') { 57 | nextchar = 0; 58 | ++optind; 59 | continue; 60 | } 61 | if (nextchar == 0) { 62 | if (optstring[0] != '+' && optstring[0] != '-') { 63 | while (c != '-') { 64 | /* postpone non-opt parameter */ 65 | if (!postpone_noopt(argc, argv, optind)) { 66 | break; /* all args are non-opt param */ 67 | } 68 | ++postpone_count; 69 | c = *argv[optind]; 70 | } 71 | } 72 | if (c != '-') { 73 | if (optstring[0] == '-') { 74 | optarg = argv[optind]; 75 | nextchar = 0; 76 | ++optind; 77 | return 1; 78 | } 79 | break; 80 | } else { 81 | if (strcmp(argv[optind], "--") == 0) { 82 | optind++; 83 | break; 84 | } 85 | ++nextchar; 86 | if (longopts != 0 && *(argv[optind] + 1) == '-') { 87 | char const *spec_long = argv[optind] + 2; 88 | char const *pos_eq = strchr(spec_long, '='); 89 | int spec_len = (pos_eq == NULL ? strlen(spec_long) : pos_eq - spec_long); 90 | int index_search = 0; 91 | int index_found = -1; 92 | const struct option *optdef = 0; 93 | while (longopts->name != 0) { 94 | if (strncmp(spec_long, longopts->name, spec_len) == 0) { 95 | if (optdef != 0) { 96 | if (opterr) { 97 | fprintf(stderr, "ambiguous option: %s\n", spec_long); 98 | } 99 | return '?'; 100 | } 101 | optdef = longopts; 102 | index_found = index_search; 103 | } 104 | longopts++; 105 | index_search++; 106 | } 107 | if (optdef == 0) { 108 | if (opterr) { 109 | fprintf(stderr, "no such a option: %s\n", spec_long); 110 | } 111 | return '?'; 112 | } 113 | switch (optdef->has_arg) { 114 | case no_argument: 115 | optarg = 0; 116 | if (pos_eq != 0) { 117 | if (opterr) { 118 | fprintf(stderr, "no argument for %s\n", optdef->name); 119 | } 120 | return '?'; 121 | } 122 | break; 123 | case required_argument: 124 | if (pos_eq == NULL) { 125 | ++optind; 126 | optarg = argv[optind]; 127 | } else { 128 | optarg = (char *) pos_eq + 1; 129 | } 130 | break; 131 | } 132 | ++optind; 133 | nextchar = 0; 134 | if (longindex != 0) { 135 | *longindex = index_found; 136 | } 137 | if (optdef->flag != 0) { 138 | *optdef->flag = optdef->val; 139 | return 0; 140 | } 141 | return optdef->val; 142 | } 143 | continue; 144 | } 145 | } 146 | optptr = strchr(optstring, c); 147 | if (optptr == NULL) { 148 | optopt = c; 149 | if (opterr) { 150 | fprintf(stderr, 151 | "%s: invalid option -- %c\n", 152 | argv[0], c); 153 | } 154 | ++nextchar; 155 | return '?'; 156 | } 157 | if (*(optptr + 1) != ':') { 158 | nextchar++; 159 | if (*(argv[optind] + nextchar) == '\0') { 160 | ++optind; 161 | nextchar = 0; 162 | } 163 | optarg = 0; 164 | } else { 165 | nextchar++; 166 | if (*(argv[optind] + nextchar) != '\0') { 167 | optarg = argv[optind] + nextchar; 168 | } else { 169 | ++optind; 170 | if (optind < argc - postpone_count) { 171 | optarg = argv[optind]; 172 | } else { 173 | optopt = c; 174 | if (opterr) { 175 | fprintf(stderr, 176 | "%s: option requires an argument -- %c\n", 177 | argv[0], c); 178 | } 179 | if (optstring[0] == ':' || ((optstring[0] == '-' || optstring[0] == '+') && optstring[1] == ':')) { 180 | c = ':'; 181 | } else { 182 | c = '?'; 183 | } 184 | } 185 | } 186 | ++optind; 187 | nextchar = 0; 188 | } 189 | return c; 190 | } 191 | 192 | /* end of option analysis */ 193 | 194 | /* fix the order of non-opt params to original */ 195 | while ((argc - optind - postpone_count) > 0) { 196 | postpone(argc, argv, optind); 197 | ++postpone_count; 198 | } 199 | 200 | nextchar = 0; 201 | postpone_count = 0; 202 | return -1; 203 | } 204 | 205 | int getopt(int argc, char *const argv[], 206 | const char *optstring) { 207 | return _getopt_(argc, argv, optstring, 0, 0); 208 | } 209 | 210 | int getopt_long(int argc, char *const argv[], 211 | const char *optstring, 212 | const struct option *longopts, int *longindex) { 213 | return _getopt_(argc, argv, optstring, longopts, longindex); 214 | } 215 | /******************************************************** 216 | int getopt_long_only(int argc, char* const argv[], 217 | const char* optstring, 218 | const struct option* longopts, int* longindex) 219 | { 220 | return -1; 221 | } 222 | ********************************************************/ 223 | 224 | #endif // __CLIB__ 225 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #ifndef __JNI__ 2 | #ifndef __CLIB__ 3 | #include 4 | #include "main.h" 5 | #include "version.h" 6 | #include "OcrLite.h" 7 | #include "OcrUtils.h" 8 | #ifdef _WIN32 9 | #include 10 | #endif 11 | 12 | void printHelp(FILE *out, char *argv0) { 13 | fprintf(out, " ------- Usage -------\n"); 14 | fprintf(out, "%s %s", argv0, usageMsg); 15 | fprintf(out, " ------- Required Parameters -------\n"); 16 | fprintf(out, "%s", requiredMsg); 17 | fprintf(out, " ------- Optional Parameters -------\n"); 18 | fprintf(out, "%s", optionalMsg); 19 | fprintf(out, " ------- Other Parameters -------\n"); 20 | fprintf(out, "%s", otherMsg); 21 | fprintf(out, " ------- Examples -------\n"); 22 | fprintf(out, example1Msg, argv0); 23 | fprintf(out, example2Msg, argv0); 24 | } 25 | 26 | int main(int argc, char **argv) { 27 | if (argc <= 1) { 28 | printHelp(stderr, argv[0]); 29 | return -1; 30 | } 31 | #ifdef _WIN32 32 | SetConsoleOutputCP(CP_UTF8); 33 | #endif 34 | std::string modelsDir, modelDetPath, modelClsPath, modelRecPath, keysPath; 35 | std::string imgPath, imgDir, imgName; 36 | int numThread = 4; 37 | int padding = 50; 38 | int maxSideLen = 1024; 39 | float boxScoreThresh = 0.5f; 40 | float boxThresh = 0.3f; 41 | float unClipRatio = 1.6f; 42 | bool doAngle = true; 43 | int flagDoAngle = 1; 44 | bool mostAngle = true; 45 | int flagMostAngle = 1; 46 | int flagGpu = -1; 47 | 48 | int opt; 49 | int optionIndex = 0; 50 | while ((opt = getopt_long(argc, argv, "d:1:2:3:4:i:t:p:s:b:o:u:a:A:G:v:h", long_options, &optionIndex)) != -1) { 51 | //printf("option(-%c)=%s\n", opt, optarg); 52 | switch (opt) { 53 | case 'd': 54 | modelsDir = optarg; 55 | printf("modelsPath=%s\n", modelsDir.c_str()); 56 | break; 57 | case '1': 58 | modelDetPath = modelsDir + "/" + optarg; 59 | printf("model det path=%s\n", modelDetPath.c_str()); 60 | break; 61 | case '2': 62 | modelClsPath = modelsDir + "/" + optarg; 63 | printf("model cls path=%s\n", modelClsPath.c_str()); 64 | break; 65 | case '3': 66 | modelRecPath = modelsDir + "/" + optarg; 67 | printf("model rec path=%s\n", modelRecPath.c_str()); 68 | break; 69 | case '4': 70 | keysPath = modelsDir + "/" + optarg; 71 | printf("keys path=%s\n", keysPath.c_str()); 72 | break; 73 | case 'i': 74 | imgPath.assign(optarg); 75 | imgDir.assign(imgPath.substr(0, imgPath.find_last_of('/') + 1)); 76 | imgName.assign(imgPath.substr(imgPath.find_last_of('/') + 1)); 77 | printf("imgDir=%s, imgName=%s\n", imgDir.c_str(), imgName.c_str()); 78 | break; 79 | case 't': 80 | numThread = (int) strtol(optarg, NULL, 10); 81 | //printf("numThread=%d\n", numThread); 82 | break; 83 | case 'p': 84 | padding = (int) strtol(optarg, NULL, 10); 85 | //printf("padding=%d\n", padding); 86 | break; 87 | case 's': 88 | maxSideLen = (int) strtol(optarg, NULL, 10); 89 | //printf("maxSideLen=%d\n", maxSideLen); 90 | break; 91 | case 'b': 92 | boxScoreThresh = strtof(optarg, NULL); 93 | //printf("boxScoreThresh=%f\n", boxScoreThresh); 94 | break; 95 | case 'o': 96 | boxThresh = strtof(optarg, NULL); 97 | //printf("boxThresh=%f\n", boxThresh); 98 | break; 99 | case 'u': 100 | unClipRatio = strtof(optarg, NULL); 101 | //printf("unClipRatio=%f\n", unClipRatio); 102 | break; 103 | case 'a': 104 | flagDoAngle = (int) strtol(optarg, NULL, 10); 105 | if (flagDoAngle == 0) { 106 | doAngle = false; 107 | } else { 108 | doAngle = true; 109 | } 110 | //printf("doAngle=%d\n", doAngle); 111 | break; 112 | case 'A': 113 | flagMostAngle = (int) strtol(optarg, NULL, 10); 114 | if (flagMostAngle == 0) { 115 | mostAngle = false; 116 | } else { 117 | mostAngle = true; 118 | } 119 | //printf("mostAngle=%d\n", mostAngle); 120 | break; 121 | case 'v': 122 | printf("%s\n", VERSION); 123 | return 0; 124 | case 'h': 125 | printHelp(stdout, argv[0]); 126 | return 0; 127 | case 'G': 128 | flagGpu = (int) strtol(optarg, NULL, 10); 129 | break; 130 | default: 131 | printf("other option %c :%s\n", opt, optarg); 132 | } 133 | } 134 | bool hasTargetImgFile = isFileExists(imgPath); 135 | if (!hasTargetImgFile) { 136 | fprintf(stderr, "Target image not found: %s\n", imgPath.c_str()); 137 | return -1; 138 | } 139 | bool hasModelDetFile = isFileExists(modelDetPath); 140 | if (!hasModelDetFile) { 141 | fprintf(stderr, "Model det file not found: %s\n", modelDetPath.c_str()); 142 | return -1; 143 | } 144 | bool hasModelClsFile = isFileExists(modelClsPath); 145 | if (!hasModelClsFile) { 146 | fprintf(stderr, "Model cls file not found: %s\n", modelClsPath.c_str()); 147 | return -1; 148 | } 149 | bool hasModelRecFile = isFileExists(modelRecPath); 150 | if (!hasModelRecFile) { 151 | fprintf(stderr, "Model rec file not found: %s\n", modelRecPath.c_str()); 152 | return -1; 153 | } 154 | bool hasKeysFile = isFileExists(keysPath); 155 | if (!hasKeysFile) { 156 | fprintf(stderr, "keys file not found: %s\n", keysPath.c_str()); 157 | return -1; 158 | } 159 | OcrLite ocrLite; 160 | ocrLite.setNumThread(numThread); 161 | ocrLite.initLogger( 162 | true,//isOutputConsole 163 | false,//isOutputPartImg 164 | true);//isOutputResultImg 165 | 166 | ocrLite.enableResultTxt(imgDir.c_str(), imgName.c_str()); 167 | ocrLite.setGpuIndex(flagGpu); 168 | ocrLite.Logger("=====Input Params=====\n"); 169 | ocrLite.Logger( 170 | "numThread(%d),padding(%d),maxSideLen(%d),boxScoreThresh(%f),boxThresh(%f),unClipRatio(%f),doAngle(%d),mostAngle(%d),GPU(%d)\n", 171 | numThread, padding, maxSideLen, boxScoreThresh, boxThresh, unClipRatio, doAngle, mostAngle, 172 | flagGpu); 173 | 174 | ocrLite.initModels(modelDetPath, modelClsPath, modelRecPath, keysPath); 175 | 176 | OcrResult result = ocrLite.detect(imgDir.c_str(), imgName.c_str(), padding, maxSideLen, 177 | boxScoreThresh, boxThresh, unClipRatio, doAngle, mostAngle); 178 | ocrLite.Logger("%s\n", result.strRes.c_str()); 179 | return 0; 180 | } 181 | 182 | #endif 183 | #endif -------------------------------------------------------------------------------- /valgrind-massif.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## script for 内存占用检查 3 | # ========== macOS ========== 4 | # https://github.com/LouisBrunner/valgrind-macos 5 | # brew tap LouisBrunner/valgrind 6 | # brew install --HEAD LouisBrunner/valgrind/valgrind 7 | # ========== linux ========== 8 | # https://www.valgrind.org/ 9 | # apt install valgrind 10 | 11 | NUM_THREADS=1 12 | 13 | set OMP_NUM_THREADS=$NUM_THREADS 14 | 15 | TARGET_IMG=images/1.jpg 16 | if [ ! -f "$TARGET_IMG" ]; then 17 | echo "找不到待识别的目标图片:${TARGET_IMG},请打开本文件并编辑TARGET_IMG" 18 | exit 19 | fi 20 | 21 | sysOS=`uname -s` 22 | EXE_PATH=${sysOS}-CPU-BIN 23 | 24 | ##### run test on MacOS or Linux 25 | valgrind --tool=massif --pages-as-heap=yes \ 26 | ./${EXE_PATH}/RapidOcrOnnx --models models \ 27 | --det ch_PP-OCRv3_det_infer.onnx \ 28 | --cls ch_ppocr_mobile_v2.0_cls_infer.onnx \ 29 | --rec ch_PP-OCRv3_rec_infer.onnx \ 30 | --keys ppocr_keys_v1.txt \ 31 | --image $TARGET_IMG \ 32 | --numThread $NUM_THREADS \ 33 | --padding 50 \ 34 | --maxSideLen 1024 \ 35 | --boxScoreThresh 0.5 \ 36 | --boxThresh 0.3 \ 37 | --unClipRatio 1.6 \ 38 | --doAngle 1 \ 39 | --mostAngle 1 40 | -------------------------------------------------------------------------------- /valgrind-memcheck.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## script for 内存泄露检查 3 | # ========== macOS ========== 4 | # https://github.com/LouisBrunner/valgrind-macos 5 | # brew tap LouisBrunner/valgrind 6 | # brew install --HEAD LouisBrunner/valgrind/valgrind 7 | # ========== linux ========== 8 | # https://www.valgrind.org/ 9 | # apt install valgrind 10 | 11 | NUM_THREADS=1 12 | 13 | set OMP_NUM_THREADS=$NUM_THREADS 14 | 15 | TARGET_IMG=images/1.jpg 16 | if [ ! -f "$TARGET_IMG" ]; then 17 | echo "找不到待识别的目标图片:${TARGET_IMG},请打开本文件并编辑TARGET_IMG" 18 | exit 19 | fi 20 | 21 | sysOS=`uname -s` 22 | EXE_PATH=${sysOS}-CPU-BIN 23 | 24 | ##### run test on MacOS or Linux 25 | valgrind --tool=memcheck --leak-check=full --leak-resolution=med --track-origins=yes --vgdb=no --log-file=valgrind-memcheck.txt \ 26 | ./${EXE_PATH}/RapidOcrOnnx --models models \ 27 | --det ch_PP-OCRv3_det_infer.onnx \ 28 | --cls ch_ppocr_mobile_v2.0_cls_infer.onnx \ 29 | --rec ch_PP-OCRv3_rec_infer.onnx \ 30 | --keys ppocr_keys_v1.txt \ 31 | --image $TARGET_IMG \ 32 | --numThread $NUM_THREADS \ 33 | --padding 50 \ 34 | --maxSideLen 1024 \ 35 | --boxScoreThresh 0.5 \ 36 | --boxThresh 0.3 \ 37 | --unClipRatio 1.6 \ 38 | --doAngle 1 \ 39 | --mostAngle 1 \ 40 | --GPU -1 41 | --------------------------------------------------------------------------------