├── docs ├── 构建指南.md ├── 移植指南.md └── update-v1.3.md ├── api ├── node.js │ ├── .gitignore │ ├── test │ │ ├── .gitignore │ │ ├── package.json │ │ └── app.js │ ├── tsconfig.es5.json │ ├── tsconfig.json │ ├── es5 │ │ ├── worker.d.ts │ │ ├── index.d.ts │ │ └── worker.js │ ├── esnext │ │ ├── worker.d.ts │ │ ├── index.d.ts │ │ ├── index.js │ │ └── worker.js │ ├── package.json │ ├── package-lock.json │ ├── ts │ │ ├── index.ts │ │ └── worker.ts │ └── README.md ├── python │ ├── tbpu │ │ ├── parser_tools │ │ │ ├── __init__.py │ │ │ └── line_preprocessing.py │ │ ├── parser_none.py │ │ ├── parser_single_none.py │ │ ├── parser_multi_line.py │ │ ├── tbpu.py │ │ ├── __init__.py │ │ ├── parser_multi_none.py │ │ ├── parser_multi_para.py │ │ ├── parser_single_para.py │ │ ├── parser_single_code.py │ │ └── parser_single_line.py │ ├── .gitignore │ ├── test.jpg │ ├── demo3.py │ ├── demo2.py │ ├── demo1.py │ └── PPOCR_visualize.py └── PowerShell │ ├── demo.ps1 │ └── PPOCR_api.ps1 ├── cpp ├── third_party │ ├── gflags │ │ ├── doc │ │ │ ├── .nojekyll │ │ │ ├── README.md │ │ │ └── designstyle.css │ │ ├── test │ │ │ ├── flagfile.1 │ │ │ ├── flagfile.3 │ │ │ ├── flagfile.2 │ │ │ ├── gflags_unittest_flagfile │ │ │ ├── config │ │ │ │ ├── CMakeLists.txt │ │ │ │ └── main.cc │ │ │ ├── gflags_declare_flags.cc │ │ │ ├── gflags_strip_flags_test.cmake │ │ │ ├── gflags_declare_test.cc │ │ │ ├── nc │ │ │ │ ├── CMakeLists.txt │ │ │ │ └── gflags_nc.cc │ │ │ ├── gflags_build.py.in │ │ │ └── gflags_strip_flags_test.cc │ │ ├── examples │ │ │ ├── .gitignore │ │ │ ├── args.hpp │ │ │ ├── args.cpp │ │ │ ├── main.cpp │ │ │ └── CMakeLists.txt │ │ ├── AUTHORS.txt │ │ ├── .gitattributes │ │ ├── bazel │ │ │ ├── expanded_template │ │ │ │ ├── BUILD │ │ │ │ ├── expanded_template.bzl │ │ │ │ └── expand_template.cc │ │ │ └── gflags.bzl │ │ ├── cmake │ │ │ ├── README_runtime.txt │ │ │ ├── package.pc.in │ │ │ ├── version.cmake.in │ │ │ ├── cmake_uninstall.cmake.in │ │ │ ├── execute_test.cmake │ │ │ └── package.cmake.in │ │ ├── WORKSPACE │ │ ├── .travis.yml │ │ ├── .gitignore │ │ ├── BUILD │ │ ├── COPYING.txt │ │ ├── src │ │ │ ├── defines.h.in │ │ │ ├── config.h │ │ │ ├── windows_port.cc │ │ │ ├── gflags_ns.h.in │ │ │ ├── gflags_completions.sh │ │ │ ├── windows_port.h │ │ │ └── gflags_declare.h.in │ │ ├── appveyor.yml │ │ └── INSTALL.md │ ├── .gitignore │ └── third_party_library_version.txt ├── .gitignore ├── .source │ └── .gitignore ├── docs │ ├── imgs │ │ ├── b1.png │ │ ├── b2.png │ │ ├── b3.png │ │ ├── b4.png │ │ ├── b5.png │ │ ├── b6.png │ │ └── b7.png │ └── 移植指南.md ├── .dockerignore ├── tools │ ├── linux_dist_tools │ │ ├── run.sh │ │ ├── install_env.sh │ │ └── README.txt │ ├── linux_run.sh │ ├── linux_build.sh │ └── linux_build_opencv.sh ├── external-cmake │ └── auto-log.cmake ├── include │ ├── base64.h │ ├── preprocess_op.h │ ├── args.h │ ├── paddleocr.h │ ├── structure_layout.h │ ├── paddlestructure.h │ ├── ocr_cls.h │ ├── structure_table.h │ ├── ocr_rec.h │ ├── ocr_det.h │ ├── postprocess_op.h │ ├── task.h │ └── utility.h ├── cmake │ └── opencv-install-utils.cmake ├── Dockerfile ├── README-docker.md └── src │ ├── main.cpp │ └── preprocess_op.cpp └── .gitignore /docs/构建指南.md: -------------------------------------------------------------------------------- 1 | [点这里跳转到构建指南](../cpp) -------------------------------------------------------------------------------- /api/node.js/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules -------------------------------------------------------------------------------- /cpp/third_party/gflags/doc/.nojekyll: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /api/python/tbpu/parser_tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cpp/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | build/ 3 | .data/ 4 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/test/flagfile.1: -------------------------------------------------------------------------------- 1 | --version -------------------------------------------------------------------------------- /docs/移植指南.md: -------------------------------------------------------------------------------- 1 | [点这里跳转到移植指南](../cpp/docs/移植指南.md) -------------------------------------------------------------------------------- /cpp/third_party/.gitignore: -------------------------------------------------------------------------------- 1 | # 无视所有第三方项目的.git文件夹,节省空间 2 | .git/ -------------------------------------------------------------------------------- /cpp/third_party/gflags/test/flagfile.3: -------------------------------------------------------------------------------- 1 | --flagfile=flagfile.2 -------------------------------------------------------------------------------- /cpp/third_party/gflags/examples/.gitignore: -------------------------------------------------------------------------------- 1 | # folder 2 | build/ 3 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/test/flagfile.2: -------------------------------------------------------------------------------- 1 | --foo=bar 2 | --nounused_bool -------------------------------------------------------------------------------- /cpp/.source/.gitignore: -------------------------------------------------------------------------------- 1 | # 无视当前路径下所有文件,除了这个.gitignore 2 | * 3 | !.gitignore -------------------------------------------------------------------------------- /cpp/third_party/gflags/AUTHORS.txt: -------------------------------------------------------------------------------- 1 | google-gflags@googlegroups.com 2 | 3 | -------------------------------------------------------------------------------- /api/node.js/test/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | tmp 3 | package-lock.json 4 | test* 5 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/test/gflags_unittest_flagfile: -------------------------------------------------------------------------------- 1 | --test_flag=1 2 | --test_flag=2 3 | -------------------------------------------------------------------------------- /api/python/.gitignore: -------------------------------------------------------------------------------- 1 | # 无视所有图片 2 | *.png 3 | *.jpg 4 | *.jpeg 5 | 6 | # 除了 test.jpg 7 | !test.jpg -------------------------------------------------------------------------------- /api/python/test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiroi-sora/PaddleOCR-json/HEAD/api/python/test.jpg -------------------------------------------------------------------------------- /cpp/docs/imgs/b1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiroi-sora/PaddleOCR-json/HEAD/cpp/docs/imgs/b1.png -------------------------------------------------------------------------------- /cpp/docs/imgs/b2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiroi-sora/PaddleOCR-json/HEAD/cpp/docs/imgs/b2.png -------------------------------------------------------------------------------- /cpp/docs/imgs/b3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiroi-sora/PaddleOCR-json/HEAD/cpp/docs/imgs/b3.png -------------------------------------------------------------------------------- /cpp/docs/imgs/b4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiroi-sora/PaddleOCR-json/HEAD/cpp/docs/imgs/b4.png -------------------------------------------------------------------------------- /cpp/docs/imgs/b5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiroi-sora/PaddleOCR-json/HEAD/cpp/docs/imgs/b5.png -------------------------------------------------------------------------------- /cpp/docs/imgs/b6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiroi-sora/PaddleOCR-json/HEAD/cpp/docs/imgs/b6.png -------------------------------------------------------------------------------- /cpp/docs/imgs/b7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiroi-sora/PaddleOCR-json/HEAD/cpp/docs/imgs/b7.png -------------------------------------------------------------------------------- /cpp/.dockerignore: -------------------------------------------------------------------------------- 1 | *.log 2 | *.jpg 3 | *.jpeg 4 | *.png 5 | *.json 6 | config.py 7 | 8 | build/ 9 | .source/ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.code-workspace 2 | 3 | *__pycache__* 4 | 5 | .vscode 6 | 7 | *.zip 8 | *.rar 9 | *.7z 10 | other -------------------------------------------------------------------------------- /cpp/third_party/third_party_library_version.txt: -------------------------------------------------------------------------------- 1 | gflags: https://github.com/Gavin1937/gflags.git 2 | commit: 36a6a1f00c6a9115f7ddad5b34a8c345c8f36f90 3 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/.gitattributes: -------------------------------------------------------------------------------- 1 | # treat all files in this repository as text files 2 | # and normalize them to LF line endings when committed 3 | * text 4 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/doc/README.md: -------------------------------------------------------------------------------- 1 | To update the GitHub Pages at http://gflags.github.io/gflags/, use command: 2 | ``` 3 | git subtree push --prefix=doc/ origin gh-pages 4 | ``` 5 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/examples/args.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | DECLARE_string(message); 6 | DECLARE_double(number); 7 | 8 | void print_message(); 9 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/bazel/expanded_template/BUILD: -------------------------------------------------------------------------------- 1 | cc_binary( 2 | name = "expand_template", 3 | srcs = ["expand_template.cc"], 4 | visibility = ["//visibility:public"], 5 | ) 6 | -------------------------------------------------------------------------------- /api/node.js/test/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "express": "^4.18.1", 4 | "mime": "^3.0.0", 5 | "multer": "^1.4.5-lts.1", 6 | "paddleocrjson": "^1.1.0" 7 | }, 8 | "scripts": { 9 | "start": "node app.js" 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/cmake/README_runtime.txt: -------------------------------------------------------------------------------- 1 | This package contains runtime libraries only which are required 2 | by applications that use these libraries for the commandline flags 3 | processing. If you want to develop such application, download 4 | and install the development package instead. 5 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/examples/args.cpp: -------------------------------------------------------------------------------- 1 | #include "args.hpp" 2 | 3 | #include 4 | 5 | DEFINE_string(message, "", "The message to print"); 6 | DEFINE_double(number, 0.3, "The number"); 7 | 8 | void print_message() 9 | { 10 | std::cout << FLAGS_message << std::endl; 11 | } 12 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/WORKSPACE: -------------------------------------------------------------------------------- 1 | # Copyright 2006 Google Inc. All Rights Reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the COPYING.txt file. 4 | 5 | # Bazel (http://bazel.io/) WORKSPACE file for gflags. 6 | workspace(name="com_github_gflags_gflags") 7 | -------------------------------------------------------------------------------- /api/node.js/tsconfig.es5.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES5", 4 | "module": "CommonJS", 5 | "outDir": "./es5", 6 | "declarationDir": "./es5", 7 | "rootDir": "./ts", 8 | "esModuleInterop": true, 9 | "declaration": true, 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/test/config/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## gflags package configuration tests 2 | 3 | cmake_minimum_required (VERSION 2.8.12 FATAL_ERROR) 4 | 5 | project (gflags_${TEST_NAME}) 6 | 7 | find_package (gflags REQUIRED) 8 | 9 | add_executable (foo main.cc) 10 | target_link_libraries (foo gflags::gflags) 11 | -------------------------------------------------------------------------------- /api/node.js/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ESNext", 4 | "module": "CommonJS", 5 | "outDir": "./esnext", 6 | "declarationDir": "./esnext", 7 | "rootDir": "./ts", 8 | "esModuleInterop": true, 9 | "declaration": true, 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/test/gflags_declare_flags.cc: -------------------------------------------------------------------------------- 1 | #define GFLAGS_DLL_DECLARE_FLAG 2 | 3 | #include 4 | #include 5 | 6 | DECLARE_string(message); // in gflags_delcare_test.cc 7 | 8 | void print_message(); 9 | void print_message() 10 | { 11 | std::cout << FLAGS_message << std::endl; 12 | } 13 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/test/gflags_strip_flags_test.cmake: -------------------------------------------------------------------------------- 1 | if (NOT BINARY) 2 | message (FATAL_ERROR "BINARY file to check not specified!") 3 | endif () 4 | file (STRINGS "${BINARY}" strings REGEX "This text should be stripped out") 5 | if (strings) 6 | message (FATAL_ERROR "Text not stripped from binary like it should be: ${BINARY}") 7 | endif () 8 | -------------------------------------------------------------------------------- /api/python/tbpu/parser_none.py: -------------------------------------------------------------------------------- 1 | # 排版解析-不做处理 2 | 3 | from .tbpu import Tbpu 4 | 5 | 6 | class ParserNone(Tbpu): 7 | def __init__(self): 8 | self.tbpuName = "排版解析-不做处理" 9 | 10 | def run(self, textBlocks): 11 | for tb in textBlocks: 12 | if "end" not in tb: 13 | tb["end"] = "\n" # 默认结尾间隔符为换行 14 | return textBlocks 15 | -------------------------------------------------------------------------------- /cpp/tools/linux_dist_tools/run.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | # 获取当前脚本路径 4 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) 5 | 6 | # 获取PaddleOCR-json路径 7 | EXE_LOCATION="$SCRIPT_DIR/bin/PaddleOCR-json" 8 | LIB_LOCATION="$SCRIPT_DIR/lib/" 9 | 10 | # 运行PaddleOCR-json 11 | LD_LIBRARY_PATH="$LIB_LOCATION" "$EXE_LOCATION" -models_path="$SCRIPT_DIR/models" "$@" 12 | -------------------------------------------------------------------------------- /cpp/tools/linux_run.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | # 获取当前脚本路径 4 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) 5 | 6 | # 获取PaddleOCR-json路径 7 | EXE_LOCATION="$SCRIPT_DIR/../build/bin/PaddleOCR-json" 8 | 9 | # 所有PaddleOCR运行库都被复制到PaddleOCR-json的相同路径下了 10 | LIBS="$(dirname $EXE_LOCATION)" 11 | 12 | # 运行PaddleOCR-json 13 | LD_LIBRARY_PATH="$LIBS" "$EXE_LOCATION" \ 14 | "$@" 15 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/examples/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "args.hpp" 4 | 5 | 6 | int main(int argc, char **argv) 7 | { 8 | if (argc <= 1) 9 | { 10 | std::cout << "Try ./example -help" << std::endl; 11 | return 0; 12 | } 13 | GFLAGS_NAMESPACE::SetUsageMessage("Example compilation for demo."); 14 | GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true); 15 | print_message(); 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/test/gflags_declare_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | DEFINE_string(message, "", "The message to print"); 4 | void print_message(); // in gflags_declare_flags.cc 5 | 6 | int main(int argc, char **argv) 7 | { 8 | GFLAGS_NAMESPACE::SetUsageMessage("Test compilation and use of gflags_declare.h"); 9 | GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true); 10 | print_message(); 11 | return 0; 12 | } 13 | -------------------------------------------------------------------------------- /api/node.js/es5/worker.d.ts: -------------------------------------------------------------------------------- 1 | declare const __default: { 2 | path: string; 3 | args: any[]; 4 | options: { 5 | argv0: any; 6 | stdio: "pipe"; 7 | detached: boolean; 8 | shell: boolean; 9 | windowsVerbatimArguments: any; 10 | windowsHide: boolean; 11 | }; 12 | initTag: string; 13 | socketTag: string; 14 | pipeTag: string; 15 | socketMatch: RegExp; 16 | }; 17 | export { type __default }; 18 | -------------------------------------------------------------------------------- /api/node.js/esnext/worker.d.ts: -------------------------------------------------------------------------------- 1 | declare const __default: { 2 | path: string; 3 | args: any[]; 4 | options: { 5 | argv0: any; 6 | stdio: "pipe"; 7 | detached: boolean; 8 | shell: boolean; 9 | windowsVerbatimArguments: any; 10 | windowsHide: boolean; 11 | }; 12 | initTag: string; 13 | socketTag: string; 14 | pipeTag: string; 15 | socketMatch: RegExp; 16 | }; 17 | export { type __default }; 18 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/cmake/package.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | exec_prefix=${prefix} 3 | bindir=${exec_prefix}/@RUNTIME_INSTALL_DIR@ 4 | libdir=${exec_prefix}/@LIBRARY_INSTALL_DIR@ 5 | includedir=${prefix}/@INCLUDE_INSTALL_DIR@ 6 | 7 | Name: @PACKAGE_NAME@ 8 | Version: @PACKAGE_VERSION@ 9 | Description: @PACKAGE_DESCRIPTION@ 10 | URL: @PACKAGE_URL@ 11 | Requires: 12 | Libs: -L${libdir} -lgflags 13 | Libs.private: -lpthread 14 | Cflags: -I${includedir} 15 | -------------------------------------------------------------------------------- /cpp/external-cmake/auto-log.cmake: -------------------------------------------------------------------------------- 1 | find_package(Git REQUIRED) 2 | include(FetchContent) 3 | 4 | set(FETCHCONTENT_BASE_DIR "${CMAKE_CURRENT_BINARY_DIR}/third-party") 5 | 6 | FetchContent_Declare( 7 | extern_Autolog 8 | PREFIX autolog 9 | # If you don't have access to github, replace it with https://gitee.com/Double_V/AutoLog 10 | GIT_REPOSITORY https://github.com/LDOUBLEV/AutoLog.git 11 | GIT_TAG main 12 | ) 13 | FetchContent_MakeAvailable(extern_Autolog) 14 | 15 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/.travis.yml: -------------------------------------------------------------------------------- 1 | # Ubuntu 14.04 Trusty support, to get newer cmake and compilers. 2 | sudo: required 3 | dist: trusty 4 | 5 | language: cpp 6 | 7 | os: 8 | - linux 9 | - osx 10 | 11 | compiler: 12 | - clang 13 | - gcc 14 | 15 | env: 16 | - CONFIG=Release 17 | - CONFIG=Debug 18 | 19 | script: 20 | - mkdir out && cd out && cmake -D CMAKE_BUILD_TYPE=$CONFIG -D GFLAGS_BUILD_SHARED_LIBS=ON -D GFLAGS_BUILD_STATIC_LIBS=ON -D GFLAGS_BUILD_TESTING=ON .. && cmake --build . --config $CONFIG && ctest 21 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/.gitignore: -------------------------------------------------------------------------------- 1 | /xcode/ 2 | /build/ 3 | /builds/ 4 | /build-*/ 5 | /_build/ 6 | .DS_Store 7 | CMakeCache.txt 8 | DartConfiguration.tcl 9 | Makefile 10 | CMakeFiles/ 11 | /Testing/ 12 | /include/gflags/config.h 13 | /include/gflags/gflags_completions.h 14 | /include/gflags/gflags_declare.h 15 | /include/gflags/gflags.h 16 | /lib/ 17 | /test/gflags_unittest_main.cc 18 | /test/gflags_unittest-main.cc 19 | /packages/ 20 | CMakeLists.txt.user 21 | /bazel-bin 22 | /bazel-genfiles 23 | /bazel-gflags 24 | /bazel-out 25 | /bazel-testlogs 26 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/test/nc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## gflags negative compilation tests 2 | 3 | cmake_minimum_required (VERSION 2.8.12 FATAL_ERROR) 4 | 5 | if (NOT TEST_NAME) 6 | message (FATAL_ERROR "Missing TEST_NAME CMake flag") 7 | endif () 8 | string (TOUPPER ${TEST_NAME} TEST_NAME_UPPER) 9 | 10 | project (gflags_${TEST_NAME}) 11 | 12 | find_package (gflags REQUIRED) 13 | include_directories ("${CMAKE_CURRENT_SOURCE_DIR}/..") 14 | add_definitions (-DTEST_${TEST_NAME_UPPER}) 15 | add_executable (gflags_${TEST_NAME} gflags_nc.cc) 16 | target_link_libraries(gflags_${TEST_NAME} gflags) 17 | -------------------------------------------------------------------------------- /cpp/tools/linux_build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | # 获取当前脚本路径并去到 "cpp" 文件夹下 4 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) 5 | cd $SCRIPT_DIR/.. 6 | 7 | # 一些CMake的变量 8 | MODE=Release 9 | PADDLE_LIB="$(pwd)/$(ls -d .source/*paddle_inference*/ | head -n1)" 10 | 11 | echo "MODE: $MODE" 12 | echo "PADDLE_LIB: $PADDLE_LIB" 13 | 14 | # 构建 & 编译项目 15 | mkdir -p build 16 | 17 | # 如果build文件夹不是一个CMake工程,则构建工程文件夹(第一次运行) 18 | if [ ! -d "build/CMakeFiles/" ]; then 19 | cmake -S . -B build/ -DPADDLE_LIB="$PADDLE_LIB" -DCMAKE_BUILD_TYPE="$MODE" 20 | fi 21 | 22 | # 编译 23 | cmake --build build/ --config="$MODE" 24 | -------------------------------------------------------------------------------- /cpp/tools/linux_dist_tools/install_env.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | # 获取当前脚本路径 4 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) 5 | 6 | # 获取PaddleOCR-json路径 7 | EXE_LOCATION="$SCRIPT_DIR/bin/" 8 | LIB_LOCATION="$SCRIPT_DIR/lib/" 9 | 10 | # 将PaddleOCR-json及其依赖的路径写入 ~/.bashrc 11 | eval echo 'export PATH=$PATH:$EXE_LOCATION' >> ~/.bashrc 12 | eval echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LIB_LOCATION' >> ~/.bashrc 13 | 14 | echo "" 15 | echo "You need to restart current shell session, or run following command in order for the install to take effect:" 16 | echo "你需要重启当前shell session,或者运行下面这条命令来完成安装步骤:" 17 | echo "" 18 | echo "source ~/.bashrc" 19 | echo "" 20 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/test/config/main.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | DEFINE_string(message, "Hello World!", "The message to print"); 5 | 6 | static bool ValidateMessage(const char* flagname, const std::string &message) 7 | { 8 | return !message.empty(); 9 | } 10 | DEFINE_validator(message, ValidateMessage); 11 | 12 | int main(int argc, char **argv) 13 | { 14 | gflags::SetUsageMessage("Test CMake configuration of gflags library (gflags-config.cmake)"); 15 | gflags::SetVersionString("0.1"); 16 | gflags::ParseCommandLineFlags(&argc, &argv, true); 17 | std::cout << FLAGS_message << std::endl; 18 | gflags::ShutDownCommandLineFlags(); 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /api/python/tbpu/parser_single_none.py: -------------------------------------------------------------------------------- 1 | # 排版解析-单栏-无换行 2 | 3 | from .parser_single_line import SingleLine 4 | from .parser_tools.paragraph_parse import word_separator # 上下句间隔符 5 | 6 | 7 | class SingleNone(SingleLine): 8 | def __init__(self): 9 | self.tbpuName = "排版解析-单栏-无换行" 10 | 11 | def run(self, textBlocks): 12 | textBlocks = super().run(textBlocks) 13 | # 找到换行符,更改为间隔符 14 | for i in range(len(textBlocks) - 1): 15 | if textBlocks[i]["end"] == "\n": 16 | letter1 = textBlocks[i]["text"][-1] 17 | letter2 = textBlocks[i + 1]["text"][0] 18 | textBlocks[i]["end"] = word_separator(letter1, letter2) 19 | return textBlocks 20 | -------------------------------------------------------------------------------- /api/python/tbpu/parser_multi_line.py: -------------------------------------------------------------------------------- 1 | # 排版解析-多栏-单行 2 | 3 | from .tbpu import Tbpu 4 | from .parser_tools.line_preprocessing import linePreprocessing # 行预处理 5 | from .parser_tools.gap_tree import GapTree # 间隙树排序算法 6 | 7 | 8 | class MultiLine(Tbpu): 9 | def __init__(self): 10 | self.tbpuName = "排版解析-多栏-单行" 11 | 12 | # 构建算法对象,指定包围盒的元素位置 13 | self.gtree = GapTree(lambda tb: tb["normalized_bbox"]) 14 | 15 | def run(self, textBlocks): 16 | textBlocks = linePreprocessing(textBlocks) # 预处理 17 | textBlocks = self.gtree.sort(textBlocks) # 构建间隙树 18 | # 补充行尾间隔符 19 | for tb in textBlocks: 20 | tb["end"] = "\n" 21 | del tb["normalized_bbox"] 22 | return textBlocks 23 | -------------------------------------------------------------------------------- /api/python/tbpu/tbpu.py: -------------------------------------------------------------------------------- 1 | # tbpu : text block processing unit 2 | # 文块处理器的基类。 3 | # OCR返回的结果中,一项包含文字、包围盒、置信度的元素,称为一个“文块” - text block 。 4 | # 文块不一定是完整的一句话或一个段落。反之,一般是零散的文字。 5 | # 一个OCR结果常由多个文块组成。 6 | # 文块处理器就是:将传入的多个文块进行处理,比如合并、排序、删除文块。 7 | 8 | 9 | class Tbpu: 10 | def __init__(self): 11 | self.tbpuName = "文块处理单元-未知" 12 | 13 | def run(self, textBlocks): 14 | """输入:textBlocks文块列表。例:\n 15 | [ 16 | {'box': [[29, 19], [172, 19], [172, 44], [29, 44]], 'score': 0.89, 'text': '文本111'}, 17 | {'box': [[29, 60], [161, 60], [161, 86], [29, 86]], 'score': 0.75, 'text': '文本222'}, 18 | ] 19 | 输出:排序后的textBlocks文块列表,每个块增加键: 20 | 'end' 结尾间隔符 21 | """ 22 | return textBlocks 23 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/BUILD: -------------------------------------------------------------------------------- 1 | # Bazel (http://bazel.io/) BUILD file for gflags. 2 | # 3 | # See INSTALL.md for instructions for adding gflags to a Bazel workspace. 4 | 5 | licenses(["notice"]) 6 | 7 | exports_files([ 8 | "src/gflags_completions.sh", 9 | "COPYING.txt", 10 | ]) 11 | 12 | config_setting( 13 | name = "x64_windows", 14 | values = {"cpu": "x64_windows"}, 15 | ) 16 | 17 | config_setting( 18 | name = "android", 19 | values = {"crosstool_top": "//external:android/crosstool"}, 20 | ) 21 | 22 | load(":bazel/gflags.bzl", "gflags_library", "gflags_sources") 23 | 24 | (hdrs, srcs) = gflags_sources(namespace=["google", "gflags"]) 25 | gflags_library(hdrs=hdrs, srcs=srcs, threads=0) 26 | gflags_library(hdrs=hdrs, srcs=srcs, threads=1) 27 | -------------------------------------------------------------------------------- /api/node.js/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "paddleocrjson", 3 | "version": "1.1.1", 4 | "description": "基于 https://github.com/hiroi-sora/PaddleOCR-json 的node.js api", 5 | "homepage": "https://github.com/PunchlY/PaddleOCR-json-node-api/blob/main/api/node.js", 6 | "repository": { 7 | "type": "git", 8 | "url": "git://github.com/PunchlY/PaddleOCR-json-node-api.git" 9 | }, 10 | "author": "punchly ", 11 | "bugs": { 12 | "url": "https://github.com/PunchlY/PaddleOCR-json-node-api/issues" 13 | }, 14 | "license": "WTFPL", 15 | "main": "./esnext", 16 | "devDependencies": { 17 | "@types/node": "^18.11.9" 18 | }, 19 | "keywords": [ 20 | "OCR", 21 | "PaddleOCR", 22 | "api", 23 | "json" 24 | ] 25 | } 26 | -------------------------------------------------------------------------------- /docs/update-v1.3.md: -------------------------------------------------------------------------------- 1 | # V1.3 更新说明 2 | 3 | #### 新增功能 4 | 5 | - 传入图片方式增加Base64方式,可以将图片编码为Base64字符串,塞入json传给引擎识别。 6 | - 进程交互方式增加套接字服务器方式,引擎进程开启一个TCP端口,调用方可以通过ip+端口调用OCR。 7 | 8 | #### 启动参数改动说明 9 | 10 | - 取消了 `use_system_pause`, `ensure_chcp`, `use_debug` 这三个跟调试相关的参数。默认不会暂停程序。 11 | - `ensure_ascii` 的默认值改为 `true` ,默认开启ascii转义。 12 | - 不再支持以 `-image_dir=路径` 传入图片路径,请使用 `-image_path=路径` 。 13 | - 新增两个跟服务器模式有关的参数:地址 `addr` 和端口 `port` 。 14 | 15 | #### json传入值改动说明 16 | 17 | - 不再支持参数热更新,因为用处有限。所有参数必须在启动时指定。 18 | - 不再支持直接传入路径,路径必须包含在json中,如 `{"image_path":""}` 19 | - 不再支持以 `{"image_dir":""}` 传入图片路径,请使用 `{"image_path":""}` 。 20 | - 新增结束进程的指令: `exit` 。可以通过传入 `{"exit":""}` 或直接传入 `exit` 来结束引擎进程。当然手动kill掉也是可以的。 21 | - 新增Base64传图方式:`{"image_base64":"base64编码字符串"}`。 22 | - 管道模式和套接字模式,输入输出值的格式是完全相同的,仅仅是交互方式不同。编写API时,建议仅重载进程交互的函数来适配这两种模式,其他参数解析等部分的代码可以复用。 -------------------------------------------------------------------------------- /cpp/third_party/gflags/cmake/version.cmake.in: -------------------------------------------------------------------------------- 1 | ## gflags CMake configuration version file 2 | 3 | # ----------------------------------------------------------------------------- 4 | # library version 5 | set (PACKAGE_VERSION "@PACKAGE_VERSION@") 6 | 7 | # ----------------------------------------------------------------------------- 8 | # check compatibility 9 | 10 | # Perform compatibility check here using the input CMake variables. 11 | # See example in http://www.cmake.org/Wiki/CMake_2.6_Notes. 12 | 13 | set (PACKAGE_VERSION_COMPATIBLE TRUE) 14 | set (PACKAGE_VERSION_UNSUITABLE FALSE) 15 | 16 | if ("${PACKAGE_FIND_VERSION_MAJOR}" EQUAL "@PACKAGE_VERSION_MAJOR@" AND 17 | "${PACKAGE_FIND_VERSION_MINOR}" EQUAL "@PACKAGE_VERSION_MINOR@") 18 | set (PACKAGE_VERSION_EXACT TRUE) 19 | else () 20 | set (PACKAGE_VERSION_EXACT FALSE) 21 | endif () 22 | -------------------------------------------------------------------------------- /api/python/tbpu/__init__.py: -------------------------------------------------------------------------------- 1 | # tbpu : text block processing unit 文本块后处理 2 | 3 | from .tbpu import Tbpu 4 | from .parser_none import ParserNone 5 | from .parser_multi_para import MultiPara 6 | from .parser_multi_line import MultiLine 7 | from .parser_multi_none import MultiNone 8 | from .parser_single_para import SinglePara 9 | from .parser_single_line import SingleLine 10 | from .parser_single_none import SingleNone 11 | from .parser_single_code import SingleCode 12 | 13 | # 排版解析 14 | Parser = { 15 | "none": ParserNone, # 不做处理 16 | "multi_para": MultiPara, # 多栏-自然段 17 | "multi_line": MultiLine, # 多栏-总是换行 18 | "multi_none": MultiNone, # 多栏-无换行 19 | "single_para": SinglePara, # 单栏-自然段 20 | "single_line": SingleLine, # 单栏-总是换行 21 | "single_none": SingleNone, # 单栏-无换行 22 | "single_code": SingleCode, # 单栏-代码段 23 | } 24 | 25 | 26 | # 获取排版解析器对象 27 | def GetParser(key) -> Tbpu: 28 | if key in Parser: 29 | return Parser[key]() 30 | -------------------------------------------------------------------------------- /api/node.js/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "paddleocrjson", 3 | "version": "1.0.9a", 4 | "lockfileVersion": 2, 5 | "requires": true, 6 | "packages": { 7 | "": { 8 | "name": "paddleocrjson", 9 | "version": "1.0.9a", 10 | "license": "WTFPL", 11 | "devDependencies": { 12 | "@types/node": "^18.11.9" 13 | } 14 | }, 15 | "node_modules/@types/node": { 16 | "version": "18.11.9", 17 | "resolved": "https://registry.npmjs.org/@types/node/-/node-18.11.9.tgz", 18 | "integrity": "sha512-CRpX21/kGdzjOpFsZSkcrXMGIBWMGNIHXXBVFSH+ggkftxg+XYP20TESbh+zFvFj3EQOl5byk0HTRn1IL6hbqg==", 19 | "dev": true 20 | } 21 | }, 22 | "dependencies": { 23 | "@types/node": { 24 | "version": "18.11.9", 25 | "resolved": "https://registry.npmjs.org/@types/node/-/node-18.11.9.tgz", 26 | "integrity": "sha512-CRpX21/kGdzjOpFsZSkcrXMGIBWMGNIHXXBVFSH+ggkftxg+XYP20TESbh+zFvFj3EQOl5byk0HTRn1IL6hbqg==", 27 | "dev": true 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /api/python/tbpu/parser_multi_none.py: -------------------------------------------------------------------------------- 1 | # 排版解析-多栏-无换行 2 | 3 | from .tbpu import Tbpu 4 | from .parser_tools.line_preprocessing import linePreprocessing # 行预处理 5 | from .parser_tools.gap_tree import GapTree # 间隙树排序算法 6 | from .parser_tools.paragraph_parse import word_separator # 上下句间隔符 7 | 8 | 9 | class MultiNone(Tbpu): 10 | def __init__(self): 11 | self.tbpuName = "排版解析-多栏-无换行" 12 | 13 | # 构建算法对象,指定包围盒的元素位置 14 | self.gtree = GapTree(lambda tb: tb["normalized_bbox"]) 15 | 16 | def run(self, textBlocks): 17 | textBlocks = linePreprocessing(textBlocks) # 预处理 18 | textBlocks = self.gtree.sort(textBlocks) # 构建间隙树 19 | # 补充行尾间隔符 20 | for i in range(len(textBlocks)): 21 | tb = textBlocks[i] 22 | if i < len(textBlocks) - 1: 23 | letter1 = tb["text"][-1] # 行1结尾字母 24 | letter2 = textBlocks[i + 1]["text"][0] # 行2开头字母 25 | tb["end"] = word_separator(letter1, letter2) # 获取间隔符 26 | else: 27 | tb["end"] = "\n" 28 | del tb["normalized_bbox"] 29 | return textBlocks 30 | -------------------------------------------------------------------------------- /api/python/tbpu/parser_multi_para.py: -------------------------------------------------------------------------------- 1 | # 排版解析-多栏-自然段 2 | 3 | from .tbpu import Tbpu 4 | from .parser_tools.line_preprocessing import linePreprocessing # 行预处理 5 | from .parser_tools.gap_tree import GapTree # 间隙树排序算法 6 | from .parser_tools.paragraph_parse import ParagraphParse # 段内分析器 7 | 8 | 9 | class MultiPara(Tbpu): 10 | def __init__(self): 11 | self.tbpuName = "排版解析-多栏-自然段" 12 | 13 | # 间隙树对象 14 | self.gtree = GapTree(lambda tb: tb["normalized_bbox"]) 15 | 16 | # 段内分析器对象 17 | get_info = lambda tb: (tb["normalized_bbox"], tb["text"]) 18 | 19 | def set_end(tb, end): # 获取预测的块尾分隔符 20 | tb["end"] = end 21 | 22 | self.pp = ParagraphParse(get_info, set_end) 23 | 24 | def run(self, textBlocks): 25 | textBlocks = linePreprocessing(textBlocks) # 预处理 26 | textBlocks = self.gtree.sort(textBlocks) # 构建间隙树 27 | nodes = self.gtree.get_nodes_text_blocks() # 获取树节点序列 28 | # 对每个结点,进行自然段分析 29 | for tbs in nodes: 30 | self.pp.run(tbs) # 预测结尾分隔符 31 | for tb in tbs: 32 | del tb["normalized_bbox"] 33 | return textBlocks 34 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/bazel/expanded_template/expanded_template.bzl: -------------------------------------------------------------------------------- 1 | def _impl(ctx): 2 | args = ctx.actions.args() 3 | args.add("--template", ctx.file.template) 4 | args.add("--output", ctx.outputs.out) 5 | args.add_all([k + ';' + v for k, v in ctx.attr.substitutions.items()]) 6 | ctx.actions.run( 7 | executable = ctx.executable._bin, 8 | arguments = [args], 9 | inputs = [ctx.file.template], 10 | outputs = [ctx.outputs.out], 11 | ) 12 | return [ 13 | DefaultInfo( 14 | files = depset(direct = [ctx.outputs.out]), 15 | runfiles = ctx.runfiles(files = [ctx.outputs.out]), 16 | ), 17 | ] 18 | 19 | expanded_template = rule( 20 | implementation = _impl, 21 | attrs = { 22 | "out": attr.output(mandatory = True), 23 | "template": attr.label( 24 | allow_single_file = True, 25 | mandatory = True, 26 | ), 27 | "substitutions": attr.string_dict(), 28 | "_bin": attr.label( 29 | default = "//bazel/expanded_template:expand_template", 30 | executable = True, 31 | allow_single_file = True, 32 | cfg = "host", 33 | ), 34 | }, 35 | ) 36 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/cmake/cmake_uninstall.cmake.in: -------------------------------------------------------------------------------- 1 | if(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 2 | message(FATAL_ERROR "Cannot find install manifest: @CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 3 | endif(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 4 | 5 | if (NOT DEFINED CMAKE_INSTALL_PREFIX) 6 | set (CMAKE_INSTALL_PREFIX "@CMAKE_INSTALL_PREFIX@") 7 | endif () 8 | message(${CMAKE_INSTALL_PREFIX}) 9 | 10 | file(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files) 11 | string(REGEX REPLACE "\n" ";" files "${files}") 12 | foreach(file ${files}) 13 | message(STATUS "Uninstalling $ENV{DESTDIR}${file}") 14 | if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 15 | exec_program( 16 | "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" 17 | OUTPUT_VARIABLE rm_out 18 | RETURN_VALUE rm_retval 19 | ) 20 | if(NOT "${rm_retval}" STREQUAL 0) 21 | message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}") 22 | endif(NOT "${rm_retval}" STREQUAL 0) 23 | else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 24 | message(STATUS "File $ENV{DESTDIR}${file} does not exist.") 25 | endif(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 26 | endforeach(file) 27 | -------------------------------------------------------------------------------- /api/node.js/es5/index.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | import { Worker } from 'worker_threads'; 3 | declare class OCR extends Worker { 4 | pid: number; 5 | addr: string | undefined; 6 | port: number | undefined; 7 | exitCode: number | null; 8 | constructor(path?: string, args?: string[], options?: OCR.Options, debug?: boolean); 9 | postMessage(obj: OCR.Arg): void; 10 | flush(obj: OCR.Arg): Promise; 11 | } 12 | declare namespace OCR { 13 | interface BaseArg { 14 | limit_side_len?: number; 15 | limit_type?: string; 16 | visualize?: boolean; 17 | output?: string; 18 | } 19 | interface Arg_Path extends BaseArg { 20 | image_path?: string | null; 21 | } 22 | interface Arg_Base64 extends BaseArg { 23 | image_base64?: string; 24 | } 25 | export type Arg = Arg_Base64 | Arg_Path; 26 | export interface coutReturnType { 27 | code: number; 28 | message: string; 29 | data: { 30 | box: [[number, number], [number, number], [number, number], [number, number]]; 31 | score: number; 32 | text: string; 33 | }[] | null; 34 | } 35 | export type Options = Omit, keyof typeof import('./worker').__default.options>; 36 | export {}; 37 | } 38 | export = OCR; 39 | -------------------------------------------------------------------------------- /api/node.js/esnext/index.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | import { Worker } from 'worker_threads'; 3 | declare class OCR extends Worker { 4 | pid: number; 5 | addr: string | undefined; 6 | port: number | undefined; 7 | exitCode: number | null; 8 | constructor(path?: string, args?: string[], options?: OCR.Options, debug?: boolean); 9 | postMessage(obj: OCR.Arg): void; 10 | flush(obj: OCR.Arg): Promise; 11 | } 12 | declare namespace OCR { 13 | interface BaseArg { 14 | limit_side_len?: number; 15 | limit_type?: string; 16 | visualize?: boolean; 17 | output?: string; 18 | } 19 | interface Arg_Path extends BaseArg { 20 | image_path?: string | null; 21 | } 22 | interface Arg_Base64 extends BaseArg { 23 | image_base64?: string; 24 | } 25 | export type Arg = Arg_Base64 | Arg_Path; 26 | export interface coutReturnType { 27 | code: number; 28 | message: string; 29 | data: { 30 | box: [[number, number], [number, number], [number, number], [number, number]]; 31 | score: number; 32 | text: string; 33 | }[] | null; 34 | } 35 | export type Options = Omit, keyof typeof import('./worker').__default.options>; 36 | export {}; 37 | } 38 | export = OCR; 39 | -------------------------------------------------------------------------------- /cpp/tools/linux_dist_tools/README.txt: -------------------------------------------------------------------------------- 1 | 使用手册 2 | 3 | 感谢下载使用PaddleOCR-json引擎(https://github.com/hiroi-sora/PaddleOCR-json)。 4 | 5 | 你可以选择以下任意一种方式运行本引擎。 6 | 7 | # 无安装直接运行引擎 8 | 9 | 只需运行 “run.sh” 脚本即可。 10 | 11 | # 安装到系统环境再运行 12 | 13 | 先运行 “install_env.sh” 脚本来安装引擎到系统环境。 14 | 15 | 下一步运行 “source ~/.bashrc” 来刷新系统环境。 16 | 17 | 之后就可以直接使用 “PaddleOCR-json” 来运行引擎了。 18 | 19 | # 错误排查 20 | 21 | 如果出现以下错误: 22 | 23 | PaddleOCR-json: error while loading shared libraries: libiomp5.so 24 | 25 | 这代表着你的系统里缺少了一个运行依赖库 “libiomp5.so” ,请自行安装所需的依赖库到系统。 26 | 27 | 28 | 29 | 30 | Usage Manual 31 | 32 | Thanks for downloading & using PaddleOCR-json engine (https://github.com/hiroi-sora/PaddleOCR-json). 33 | 34 | You can choose any of following methods to run the engine. 35 | 36 | # No install, run engine directly 37 | 38 | You can use "run.sh" script to run it. 39 | 40 | # Install to system environment and then run engine 41 | 42 | First run "install_env.sh" script to install engine to your system environment. 43 | 44 | Next run "source ~/.bashrc" to refresh your system environment. 45 | 46 | Finally, you can use "PaddleOCR-json" to run engine. 47 | 48 | # Troubleshooting 49 | 50 | If following error occurs: 51 | 52 | PaddleOCR-json: error while loading shared libraries: libiomp5.so 53 | 54 | This indicates that your system is missing a runtime dependency library "libiomp5.so", please install all the dependency libraries to your system. 55 | 56 | -------------------------------------------------------------------------------- /cpp/include/base64.h: -------------------------------------------------------------------------------- 1 | // 2 | // base64 encoding and decoding with C++. 3 | // Version: 2.rc.09 (release candidate) 4 | // https://renenyffenegger.ch/notes/development/Base64/Encoding-and-decoding-base-64-with-cpp/ 5 | // https://github.com/ReneNyffenegger/cpp-base64 6 | // 7 | 8 | #ifndef BASE64_H_C0CE2A47_D10E_42C9_A27C_C883944E704A 9 | #define BASE64_H_C0CE2A47_D10E_42C9_A27C_C883944E704A 10 | 11 | #include 12 | 13 | #if __cplusplus >= 201703L 14 | #include 15 | #endif // __cplusplus >= 201703L 16 | 17 | std::string base64_encode (std::string const& s, bool url = false); 18 | std::string base64_encode_pem (std::string const& s); 19 | std::string base64_encode_mime(std::string const& s); 20 | 21 | std::string base64_decode(std::string const& s, bool remove_linebreaks = false); 22 | std::string base64_encode(unsigned char const*, size_t len, bool url = false); 23 | 24 | #if __cplusplus >= 201703L 25 | // 26 | // Interface with std::string_view rather than const std::string& 27 | // Requires C++17 28 | // Provided by Yannic Bonenberger (https://github.com/Yannic) 29 | // 30 | std::string base64_encode (std::string_view s, bool url = false); 31 | std::string base64_encode_pem (std::string_view s); 32 | std::string base64_encode_mime(std::string_view s); 33 | 34 | std::string base64_decode(std::string_view s, bool remove_linebreaks = false); 35 | #endif // __cplusplus >= 201703L 36 | 37 | #endif /* BASE64_H_C0CE2A47_D10E_42C9_A27C_C883944E704A */ 38 | -------------------------------------------------------------------------------- /cpp/tools/linux_build_opencv.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | # 当前脚本必须在 cpp/.source 工作目录下执行 4 | source_dir=$(pwd) # 当前工作目录 5 | 6 | # 获取 OpenCV 源码目录(可相对工作目录) 7 | if [ $# -lt 1 ]; then 8 | echo "Usage: build_opencv.sh /path/to/opencv" 9 | exit 1 10 | fi 11 | opencv_dir=$1 12 | opencv_dir=$(realpath "$opencv_dir") 13 | cd "$opencv_dir" 14 | 15 | build_dir="${opencv_dir}/build" # 构建目录 16 | release_dir="${source_dir}/opencv-release" # 生成目录 17 | 18 | if [ -d ${build_dir} ]; then 19 | rm -rf ${build_dir} 20 | echo "Delete: ${build_dir}" 21 | fi 22 | if [ -d ${release_dir} ]; then 23 | rm -rf ${release_dir} 24 | echo "Delete: ${release_dir}" 25 | fi 26 | mkdir ${build_dir} 27 | echo "Create: ${build_dir}" 28 | cd ${build_dir} 29 | 30 | cmake ../ \ 31 | -DCMAKE_INSTALL_PREFIX=${release_dir} \ 32 | -DCMAKE_BUILD_TYPE=Release \ 33 | -DBUILD_LIST=core,imgcodecs,imgproc \ 34 | -DBUILD_SHARED_LIBS=ON \ 35 | -DBUILD_opencv_world=OFF \ 36 | -DOPENCV_FORCE_3RDPARTY_BUILD=ON \ 37 | -DWITH_ZLIB=ON \ 38 | -DWITH_TIFF=ON \ 39 | -DWITH_OPENJPEG=ON \ 40 | -DWITH_JASPER=ON \ 41 | -DWITH_JPEG=ON \ 42 | -DWITH_PNG=ON \ 43 | -DWITH_OPENEXR=ON \ 44 | -DWITH_WEBP=ON \ 45 | -DWITH_IPP=ON \ 46 | -DWITH_LAPACK=ON \ 47 | -DWITH_EIGEN=ON \ 48 | -DBUILD_PERF_TESTS=OFF \ 49 | -DBUILD_TESTS=OFF \ 50 | -DBUILD_DOCSL=OFF \ 51 | -DBUILD_JAVA=OFF \ 52 | -DBUILD_opencv_python2=OFF \ 53 | -DBUILD_opencv_python3=OFF 54 | 55 | 56 | make -j 57 | make install 58 | 59 | -------------------------------------------------------------------------------- /api/python/demo3.py: -------------------------------------------------------------------------------- 1 | # demo1.py :演示OCR基础功能 2 | # demo2.py :演示可视化接口 3 | # 👉 demo3.py :演示OCR文段后处理(段落合并)接口 4 | 5 | from PPOCR_api import GetOcrApi 6 | from PPOCR_visualize import visualize # 可视化 7 | from tbpu import GetParser # 获取排版解析器的接口 8 | 9 | import os 10 | 11 | # 测试图片路径 12 | TestImagePath = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test.jpg") 13 | 14 | # 初始化识别器对象,传入 PaddleOCR-json 引擎路径。 15 | ocr = GetOcrApi(r"Your Path/PaddleOCR-json.exe") 16 | 17 | if ocr.getRunningMode() == "local": 18 | print(f"初始化OCR成功,进程号为{ocr.ret.pid}") 19 | elif ocr.getRunningMode() == "remote": 20 | print(f"连接远程OCR引擎成功,ip:{ocr.ip},port:{ocr.port}") 21 | print(f"\n测试图片路径:{TestImagePath}") 22 | 23 | # OCR识别图片,获取文本块 24 | getObj = ocr.run(TestImagePath) 25 | ocr.exit() # 结束引擎子进程 26 | if not getObj["code"] == 100: 27 | print("识别失败!!") 28 | exit() 29 | textBlocks = getObj["data"] # 提取文本块数据 30 | 31 | # OCR原始结果的可视化Image 32 | img1 = visualize(textBlocks, TestImagePath).get(isOrder=True) 33 | ocr.exit() # 结束引擎子进程 34 | print("========== 原始结果 ==========") 35 | ocr.printResult(getObj) 36 | 37 | # 获取排版解析器对象 38 | parser = GetParser("multi_para") 39 | # 传入OCR结果列表,返回新的文本块列表 40 | textBlocksNew = parser.run(textBlocks) 41 | # 注意,处理后原列表 textBlocks 的结构可能被破坏,不要再使用原列表(或先深拷贝备份)。 42 | print("========== 整理后结果 ==========") 43 | getObj["data"] = textBlocksNew 44 | ocr.printResult(getObj) 45 | 46 | # 可视化 后处理结果的可视化Image 47 | img2 = visualize(textBlocksNew, TestImagePath).get(isOrder=True) 48 | print("显示可视化结果。左边是原始结果,右边是合并自然段后的结果。") 49 | visualize.createContrast(img1, img2).show() # 左右拼接图片并展示 50 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/COPYING.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2006, Google Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above 11 | copyright notice, this list of conditions and the following disclaimer 12 | in the documentation and/or other materials provided with the 13 | distribution. 14 | * Neither the name of Google Inc. nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/src/defines.h.in: -------------------------------------------------------------------------------- 1 | /* Generated from defines.h.in during build configuration using CMake. */ 2 | 3 | // Note: This header file is only used internally. It is not part of public interface! 4 | // Any cmakedefine is defined using the -D flag instead when Bazel is used. 5 | // For Bazel, this file is thus not used to avoid a private file in $(GENDIR). 6 | 7 | #ifndef GFLAGS_DEFINES_H_ 8 | #define GFLAGS_DEFINES_H_ 9 | 10 | 11 | // Define if you build this library for a MS Windows OS. 12 | #cmakedefine OS_WINDOWS 13 | 14 | // Define if you have the header file. 15 | #cmakedefine HAVE_STDINT_H 16 | 17 | // Define if you have the header file. 18 | #cmakedefine HAVE_SYS_TYPES_H 19 | 20 | // Define if you have the header file. 21 | #cmakedefine HAVE_INTTYPES_H 22 | 23 | // Define if you have the header file. 24 | #cmakedefine HAVE_SYS_STAT_H 25 | 26 | // Define if you have the header file. 27 | #cmakedefine HAVE_UNISTD_H 28 | 29 | // Define if you have the header file. 30 | #cmakedefine HAVE_FNMATCH_H 31 | 32 | // Define if you have the header file (Windows 2000/XP). 33 | #cmakedefine HAVE_SHLWAPI_H 34 | 35 | // Define if you have the strtoll function. 36 | #cmakedefine HAVE_STRTOLL 37 | 38 | // Define if you have the strtoq function. 39 | #cmakedefine HAVE_STRTOQ 40 | 41 | // Define if you have the header file. 42 | #cmakedefine HAVE_PTHREAD 43 | 44 | // Define if your pthread library defines the type pthread_rwlock_t 45 | #cmakedefine HAVE_RWLOCK 46 | 47 | 48 | #endif // GFLAGS_DEFINES_H_ 49 | -------------------------------------------------------------------------------- /api/python/demo2.py: -------------------------------------------------------------------------------- 1 | # demo1.py :演示OCR基础功能 2 | # 👉 demo2.py :演示可视化接口 3 | # demo3.py :演示OCR文段后处理(段落合并)接口 4 | 5 | from PPOCR_api import GetOcrApi 6 | from PPOCR_visualize import visualize 7 | 8 | import os 9 | 10 | # 测试图片路径 11 | TestImagePath = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test.jpg") 12 | 13 | # 初始化识别器对象,传入 PaddleOCR-json 引擎路径。 14 | ocr = GetOcrApi(r"Your Path/PaddleOCR-json.exe") 15 | 16 | if ocr.getRunningMode() == "local": 17 | print(f"初始化OCR成功,进程号为{ocr.ret.pid}") 18 | elif ocr.getRunningMode() == "remote": 19 | print(f"连接远程OCR引擎成功,ip:{ocr.ip},port:{ocr.port}") 20 | print(f"\n测试图片路径:{TestImagePath}") 21 | 22 | 23 | # OCR识别图片,获取文本块 24 | getObj = ocr.run(TestImagePath) 25 | ocr.exit() # 结束引擎子进程 26 | if not getObj["code"] == 100: 27 | print("识别失败!!") 28 | exit() 29 | textBlocks = getObj["data"] # 提取文本块数据 30 | 31 | # 可视化演示 32 | 33 | # 示例1:传入文本块和图片路径,显示结果 34 | print("显示图片!") 35 | visualize(textBlocks, TestImagePath).show() 36 | # 程序阻塞,直到关闭图片浏览窗口才继续往下走。如果长时间不动,注释掉上面这行再跑 37 | 38 | # 示例2:显示更详细的信息 39 | vis = visualize(textBlocks, TestImagePath) 40 | print("获取图片!") 41 | # 禁用包围盒,获取原图片的 PIL Image 对象 42 | visImg1 = vis.get(isBox=False) 43 | # 启用文本和序号、禁用原图(显示透明背景),获取 PIL Image 对象 44 | visImg2 = vis.get(isText=True, isOrder=True, isSource=False) 45 | # 获取两个图片的左右对比,左边是原图,右边是单独的文本框 46 | vis = visualize.createContrast(visImg1, visImg2) 47 | # 显示该对比 48 | vis.show() 49 | # 接下来可以还用PIL库对visImg进一步处理。 50 | 51 | # 保存到本地 52 | print(f"保存图片到 {os.path.dirname(os.path.abspath(__file__))}\\可视化结果.png ") 53 | vis.save(f"{os.path.dirname(os.path.abspath(__file__))}\\可视化结果.png", isText=True) 54 | 55 | print("程序结束。") 56 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/test/gflags_build.py.in: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import subprocess 6 | import shutil 7 | 8 | CMAKE = '@CMAKE_COMMAND@' 9 | CMAKE_BUILD_TYPE = '@CMAKE_BUILD_TYPE@' 10 | TMPDIR = '@TMPDIR@' 11 | SRCDIR = '@SRCDIR@' 12 | GFLAGS_DIR = '@gflags_BINARY_DIR@' 13 | 14 | if __name__ == "__main__": 15 | if len(sys.argv) != 4: 16 | sys.stderr.write(' '.join(['usage:', sys.argv[0], ' \n'])) 17 | sys.exit(1) 18 | test_name = sys.argv[1] 19 | srcdir = sys.argv[2] 20 | expect_fail = (sys.argv[3].lower() in ['true', 'yes', 'on', '1']) 21 | bindir = os.path.join(TMPDIR, test_name) 22 | if TMPDIR == '': 23 | sys.stderr.write('Temporary directory not set!\n') 24 | sys.exit(1) 25 | # create build directory 26 | if os.path.isdir(bindir): shutil.rmtree(bindir) 27 | os.makedirs(bindir) 28 | # configure the build tree 29 | if subprocess.call([CMAKE, '-DCMAKE_BUILD_TYPE:STRING='+CMAKE_BUILD_TYPE, 30 | '-Dgflags_DIR:PATH='+GFLAGS_DIR, 31 | '-DTEST_NAME:STRING='+test_name, srcdir], cwd=bindir) != 0: 32 | sys.stderr.write('Failed to configure the build tree!\n') 33 | sys.exit(1) 34 | # build the test project 35 | exit_code = subprocess.call([CMAKE, '--build', bindir, '--config', CMAKE_BUILD_TYPE], cwd=bindir) 36 | if expect_fail == True: 37 | if exit_code == 0: 38 | sys.stderr.write('Build expected to fail, but it succeeded!\n') 39 | sys.exit(1) 40 | else: 41 | sys.stderr.write('Build failed as expected\n') 42 | exit_code = 0 43 | sys.exit(exit_code) 44 | -------------------------------------------------------------------------------- /api/node.js/esnext/index.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | const worker_threads_1 = require("worker_threads"); 3 | const path_1 = require("path"); 4 | async function* Queue(value) { 5 | while (true) 6 | value = await new Promise(yield value); 7 | } 8 | const quqeMap = new WeakMap(); 9 | class OCR extends worker_threads_1.Worker { 10 | pid; 11 | addr; 12 | port; 13 | exitCode = null; 14 | constructor(path, args, options, debug) { 15 | super((0, path_1.resolve)(__dirname, 'worker.js'), { 16 | workerData: { path, args, options, debug }, 17 | stdout: true, 18 | }); 19 | const quqe = Queue(); 20 | quqeMap.set(this, quqe); 21 | quqe.next(); 22 | quqe.next((res) => { 23 | this.stdout.once('data', (data) => { 24 | const [, pid, socket, addr, port] = String(data).match(/^pid=(\d+)(, a=(\d+\.\d+\.\d+\.\d+:\d+))?/); 25 | this.pid = Number(pid); 26 | if (socket) { 27 | this.addr = addr; 28 | this.port = Number(port); 29 | } 30 | super.emit('init', this.pid, this.addr, this.port); 31 | res(); 32 | }); 33 | }); 34 | super.once('exit', (code) => { 35 | this.exitCode = code; 36 | quqeMap.get(this).return(null); 37 | }); 38 | } 39 | postMessage(obj) { OCR.prototype.flush.call(this, obj); } 40 | async flush(obj) { 41 | return (await quqeMap.get(this).next((res) => { 42 | super.once('message', res); 43 | super.postMessage(obj); 44 | })).value; 45 | } 46 | } 47 | module.exports = OCR; 48 | -------------------------------------------------------------------------------- /api/python/tbpu/parser_single_para.py: -------------------------------------------------------------------------------- 1 | # 排版解析-单栏-自然段 2 | 3 | from .parser_single_line import SingleLine 4 | from .parser_tools.line_preprocessing import linePreprocessing # 行预处理 5 | from .parser_tools.paragraph_parse import ParagraphParse # 段内分析器 6 | 7 | 8 | class SinglePara(SingleLine): 9 | def __init__(self): 10 | self.tbpuName = "排版解析-单栏-自然段" 11 | 12 | # 段内分析器对象 13 | get_info = lambda tb: (tb["normalized_bbox"], tb["text"]) 14 | 15 | def set_end(tb, end): # 获取预测的块尾分隔符 16 | tb["line"][-1]["end"] = end 17 | 18 | self.pp = ParagraphParse(get_info, set_end) 19 | 20 | def run(self, textBlocks): 21 | textBlocks = linePreprocessing(textBlocks) # 预处理 22 | lines = self.get_lines(textBlocks) # 获取每一行 23 | # 将行封装为tb 24 | temp_tbs = [] 25 | for line in lines: 26 | b0, b1, b2, b3 = line[0]["normalized_bbox"] 27 | # 搜索bbox 28 | for i in range(1, len(line)): 29 | bb = line[i]["normalized_bbox"] 30 | b1 = min(b1, bb[1]) 31 | b2 = max(b1, bb[2]) 32 | b3 = max(b1, bb[3]) 33 | # 构建tb 34 | temp_tbs.append( 35 | { 36 | "normalized_bbox": (b0, b1, b2, b3), 37 | "text": line[0]["text"][0] + line[-1]["text"][-1], 38 | "line": line, 39 | } 40 | ) 41 | # 预测结尾分隔符 42 | self.pp.run(temp_tbs) 43 | # 解包 44 | textBlocks = [] 45 | for t in temp_tbs: 46 | for tb in t["line"]: 47 | del tb["normalized_bbox"] 48 | textBlocks.append(tb) 49 | return textBlocks 50 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/appveyor.yml: -------------------------------------------------------------------------------- 1 | # Configuration for continuous integration service at appveyor.com 2 | 3 | version: '{build}' 4 | 5 | os: Visual Studio 2015 6 | 7 | environment: 8 | matrix: 9 | - Toolset: v140 10 | - Toolset: v120 11 | - Toolset: v110 12 | - Toolset: v100 13 | - Toolset: v90 14 | 15 | platform: 16 | - Win32 17 | - x64 18 | 19 | configuration: 20 | - Release 21 | 22 | matrix: 23 | exclude: 24 | - Toolset: v90 25 | platform: x64 26 | - Toolset: v100 27 | platform: x64 28 | 29 | build: 30 | verbosity: minimal 31 | 32 | before_build: 33 | - ps: | 34 | Write-Output "Configuration: $env:CONFIGURATION" 35 | Write-Output "Platform: $env:PLATFORM" 36 | $generator = switch ($env:TOOLSET) 37 | { 38 | "v140" {"Visual Studio 14 2015"} 39 | "v120" {"Visual Studio 12 2013"} 40 | "v110" {"Visual Studio 11 2012"} 41 | "v100" {"Visual Studio 10 2010"} 42 | "v90" {"Visual Studio 9 2008"} 43 | } 44 | if ($env:PLATFORM -eq "x64") 45 | { 46 | $generator = "$generator Win64" 47 | } 48 | 49 | build_script: 50 | - ps: | 51 | md _build -Force | Out-Null 52 | cd _build 53 | 54 | & cmake -G "$generator" -D CMAKE_CONFIGURATION_TYPES="Debug;Release" -D GFLAGS_BUILD_TESTING=ON -D GFLAGS_BUILD_SHARED_LIBS=ON -D GFLAGS_BUILD_STATIC_LIBS=ON .. 55 | if ($LastExitCode -ne 0) { 56 | throw "Exec: $ErrorMessage" 57 | } 58 | & cmake --build . --config $env:CONFIGURATION 59 | if ($LastExitCode -ne 0) { 60 | throw "Exec: $ErrorMessage" 61 | } 62 | 63 | test_script: 64 | - ps: | 65 | & ctest -C $env:CONFIGURATION --output-on-failure 66 | if ($LastExitCode -ne 0) { 67 | throw "Exec: $ErrorMessage" 68 | } 69 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/bazel/expanded_template/expand_template.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | struct Substitution { 9 | std::basic_regex regex; 10 | std::string replacement; 11 | }; 12 | 13 | // Simple app that does a regex search-and-replace on a template 14 | // and outputs the result. 15 | // 16 | // To invoke: 17 | // expand_template 18 | // --template PATH 19 | // --output PATH 20 | // regex0;replacement0 21 | // regex1;replacement1 22 | // ... 23 | // 24 | // Since it's only used as a private implementation detail of a rule and not 25 | // user invoked we don't bother with error checking. 26 | int main(int argc, const char** argv) { 27 | // Parse args. 28 | const char* template_path = nullptr; 29 | const char* output_path = nullptr; 30 | std::vector substitutions; 31 | for (int i = 1; i < argc; ++i) { 32 | const char* arg = argv[i]; 33 | if (strcmp(arg, "--template") == 0) { 34 | template_path = argv[++i]; 35 | } else if (strcmp(arg, "--output") == 0) { 36 | output_path = argv[++i]; 37 | } else { 38 | const char* mid = strchr(arg, ';'); 39 | if (mid != nullptr) { 40 | substitutions.push_back(Substitution{ 41 | std::basic_regex(arg, mid - arg), 42 | std::string(mid + 1), 43 | }); 44 | } 45 | } 46 | } 47 | 48 | // Read template. 49 | std::ifstream ifs(template_path); 50 | std::string str(std::istreambuf_iterator(ifs), 51 | (std::istreambuf_iterator())); 52 | 53 | // Apply regexes. 54 | for (const auto& subst : substitutions) { 55 | str = std::regex_replace(str, subst.regex, subst.replacement); 56 | } 57 | 58 | // Output file. 59 | std::ofstream file(output_path); 60 | file << str; 61 | 62 | return 0; 63 | } 64 | -------------------------------------------------------------------------------- /api/python/demo1.py: -------------------------------------------------------------------------------- 1 | # 👉 demo1.py :演示OCR基础功能 2 | # demo2.py :演示可视化接口 3 | # demo3.py :演示OCR文段后处理(段落合并)接口 4 | 5 | from PPOCR_api import GetOcrApi 6 | 7 | import os 8 | 9 | # 测试图片路径 10 | TestImagePath = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test.jpg") 11 | 12 | # 初始化识别器对象,传入 PaddleOCR-json 引擎路径。 13 | # 引擎下载地址: https://github.com/hiroi-sora/PaddleOCR-json/releases 14 | # Windows: 传入 PaddleOCR-json.exe 的路径。 15 | # Linux: 传入 run.sh 的路径 16 | ocr = GetOcrApi(r"Your Path/PaddleOCR-json.exe") 17 | 18 | if ocr.getRunningMode() == "local": 19 | print(f"初始化OCR成功,进程号为{ocr.ret.pid}") 20 | elif ocr.getRunningMode() == "remote": 21 | print(f"连接远程OCR引擎成功,ip:{ocr.ip},port:{ocr.port}") 22 | print(f"\n测试图片路径:{TestImagePath}") 23 | 24 | # 示例1:识别本地图片 25 | res = ocr.run(TestImagePath) 26 | print(f"\n示例1-图片路径识别结果(原始信息):\n{res}") 27 | print(f"\n示例1-图片路径识别结果(格式化输出):") 28 | ocr.printResult(res) 29 | 30 | # 示例2:识别图片字节流 31 | with open(TestImagePath, "rb") as f: # 获取图片字节流 32 | # 实际使用中,可以联网下载或者截图获取字节流,直接送入OCR,无需保存到本地中转。 33 | imageBytes = f.read() 34 | res = ocr.runBytes(imageBytes) 35 | print(f"\n示例2-字节流识别结果:") 36 | ocr.printResult(res) 37 | 38 | # 示例3:识别 PIL Image 对象 39 | try: 40 | from PIL import Image 41 | from io import BytesIO 42 | except Exception: 43 | print("安装Pillow库后方可测试示例3。") 44 | Image = None 45 | if Image: 46 | # 创建一个PIL Image对象 47 | pilImage = Image.open(TestImagePath) 48 | # Image 对象转为 字节流 49 | buffered = BytesIO() 50 | pilImage.save(buffered, format="PNG") 51 | imageBytes = buffered.getvalue() 52 | # 送入OCR 53 | res = ocr.runBytes(imageBytes) 54 | print(f"\n示例3-PIL Image 识别结果:") 55 | ocr.printResult(res) 56 | 57 | # 以下示例默认禁用 58 | # 示例4:识别剪贴板图片 59 | if ocr.isClipboardEnabled(): 60 | res = ocr.runClipboard() 61 | if res["code"] == 212: 62 | print(f"\n示例4-当前剪贴板中没有图片。") 63 | else: 64 | print(f"\n示例4-剪贴板识别结果:") 65 | ocr.printResult(res) 66 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.11) 2 | 3 | # you may need to set following cmake_policies before your **first project() function** 4 | if (POLICY CMP0042) 5 | cmake_policy (SET CMP0042 NEW) 6 | endif () 7 | 8 | if (POLICY CMP0048) 9 | cmake_policy (SET CMP0048 NEW) 10 | endif () 11 | 12 | if (POLICY CMP0063) 13 | cmake_policy (SET CMP0063 NEW) 14 | endif () 15 | 16 | 17 | project(example) 18 | 19 | 20 | # gflags cmake flags 21 | 22 | # hide pathname in help message 23 | # set(GFLAGS_HIDE_PATH_IN_HELP "All") # Hide entire pathname 24 | set(GFLAGS_HIDE_PATH_IN_HELP "Basename") # Hide only the basename of pathname 25 | 26 | # Use shortest possible double precision in help message 27 | set(GFLAGS_USE_SHORTEST_DOUBLE_PRECISION ON) 28 | 29 | # set C++ namespace for gflags. (default "gflags") 30 | # you can use GFLAGS_NAMESLACE::function() in your code. 31 | # set(GFLAGS_NAMESPACE "google") 32 | 33 | # add using add_subdirectory 34 | # since gflags library's primary CMakeLists.txt are in our parent directory, 35 | # we need to use some tricks here for it to work 36 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../ ${CMAKE_CURRENT_BINARY_DIR}/gflags) 37 | 38 | 39 | # # add using FetchContent 40 | # include(FetchContent) 41 | # FetchContent_Declare(gflags 42 | # GIT_REPOSITORY https://github.com/Gavin1937/gflags.git 43 | # GIT_TAG mod 44 | # ) 45 | # FetchContent_GetProperties(gflags) 46 | # if(NOT gflags_POPULATED) 47 | # FetchContent_Populate(gflags) 48 | # cmake_policy(SET CMP0069 NEW) 49 | # add_subdirectory(${gflags_SOURCE_DIR} ${gflags_BINARY_DIR}) 50 | # endif() 51 | 52 | message(STATUS "GFLAGS_HIDE_PATH_IN_HELP: ${GFLAGS_HIDE_PATH_IN_HELP}") 53 | message(STATUS "GFLAGS_NAMESPACE: ${GFLAGS_NAMESPACE}") 54 | 55 | 56 | set(SRC main.cpp args.cpp args.hpp) 57 | 58 | add_executable(${PROJECT_NAME} ${SRC}) 59 | 60 | target_link_libraries(${PROJECT_NAME} gflags) 61 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/cmake/execute_test.cmake: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # sanitize string stored in variable for use in regular expression. 3 | macro (sanitize_for_regex STRVAR) 4 | string (REGEX REPLACE "([.+*?^$()])" "\\\\\\1" ${STRVAR} "${${STRVAR}}") 5 | endmacro () 6 | 7 | # ---------------------------------------------------------------------------- 8 | # script arguments 9 | if (NOT COMMAND) 10 | message (FATAL_ERROR "Test command not specified!") 11 | endif () 12 | if (NOT DEFINED EXPECTED_RC) 13 | set (EXPECTED_RC 0) 14 | endif () 15 | if (EXPECTED_OUTPUT) 16 | sanitize_for_regex(EXPECTED_OUTPUT) 17 | endif () 18 | if (UNEXPECTED_OUTPUT) 19 | sanitize_for_regex(UNEXPECTED_OUTPUT) 20 | endif () 21 | 22 | # ---------------------------------------------------------------------------- 23 | # set a few environment variables (useful for --tryfromenv) 24 | set (ENV{FLAGS_undefok} "foo,bar") 25 | set (ENV{FLAGS_weirdo} "") 26 | set (ENV{FLAGS_version} "true") 27 | set (ENV{FLAGS_help} "false") 28 | 29 | # ---------------------------------------------------------------------------- 30 | # execute test command 31 | execute_process( 32 | COMMAND ${COMMAND} 33 | RESULT_VARIABLE RC 34 | OUTPUT_VARIABLE OUTPUT 35 | ERROR_VARIABLE OUTPUT 36 | ) 37 | 38 | if (OUTPUT) 39 | message ("${OUTPUT}") 40 | endif () 41 | 42 | # ---------------------------------------------------------------------------- 43 | # check test result 44 | if (NOT RC EQUAL EXPECTED_RC) 45 | string (REPLACE ";" " " COMMAND "${COMMAND}") 46 | message (FATAL_ERROR "Command:\n\t${COMMAND}\nExit status is ${RC}, expected ${EXPECTED_RC}") 47 | endif () 48 | if (EXPECTED_OUTPUT AND NOT OUTPUT MATCHES "${EXPECTED_OUTPUT}") 49 | message (FATAL_ERROR "Test output does not match expected output: ${EXPECTED_OUTPUT}") 50 | endif () 51 | if (UNEXPECTED_OUTPUT AND OUTPUT MATCHES "${UNEXPECTED_OUTPUT}") 52 | message (FATAL_ERROR "Test output matches unexpected output: ${UNEXPECTED_OUTPUT}") 53 | endif () -------------------------------------------------------------------------------- /cpp/third_party/gflags/src/config.h: -------------------------------------------------------------------------------- 1 | // Note: This header file is only used internally. It is not part of public interface! 2 | 3 | #ifndef GFLAGS_CONFIG_H_ 4 | #define GFLAGS_CONFIG_H_ 5 | 6 | 7 | // --------------------------------------------------------------------------- 8 | // System checks 9 | 10 | // CMake build configuration is written to defines.h file, unused by Bazel build 11 | #if !defined(GFLAGS_BAZEL_BUILD) 12 | # include "defines.h" 13 | #endif 14 | 15 | // gcc requires this to get PRId64, etc. 16 | #if defined(HAVE_INTTYPES_H) && !defined(__STDC_FORMAT_MACROS) 17 | # define __STDC_FORMAT_MACROS 1 18 | #endif 19 | 20 | // --------------------------------------------------------------------------- 21 | // Path separator 22 | #ifndef PATH_SEPARATOR 23 | # ifdef OS_WINDOWS 24 | # define PATH_SEPARATOR '\\' 25 | # else 26 | # define PATH_SEPARATOR '/' 27 | # endif 28 | #endif 29 | 30 | // --------------------------------------------------------------------------- 31 | // Windows 32 | 33 | // Always export symbols when compiling a shared library as this file is only 34 | // included by internal modules when building the gflags library itself. 35 | // The gflags_declare.h header file will set it to import these symbols otherwise. 36 | #ifndef GFLAGS_DLL_DECL 37 | # if GFLAGS_IS_A_DLL && defined(_MSC_VER) 38 | # define GFLAGS_DLL_DECL __declspec(dllexport) 39 | # elif defined(__GNUC__) && __GNUC__ >= 4 40 | # define GFLAGS_DLL_DECL __attribute__((visibility("default"))) 41 | # else 42 | # define GFLAGS_DLL_DECL 43 | # endif 44 | #endif 45 | // Flags defined by the gflags library itself must be exported 46 | #ifndef GFLAGS_DLL_DEFINE_FLAG 47 | # define GFLAGS_DLL_DEFINE_FLAG GFLAGS_DLL_DECL 48 | #endif 49 | 50 | #ifdef OS_WINDOWS 51 | // The unittests import the symbols of the shared gflags library 52 | # if GFLAGS_IS_A_DLL && defined(_MSC_VER) 53 | # define GFLAGS_DLL_DECL_FOR_UNITTESTS __declspec(dllimport) 54 | # endif 55 | # include "windows_port.h" 56 | #endif 57 | 58 | 59 | #endif // GFLAGS_CONFIG_H_ 60 | -------------------------------------------------------------------------------- /cpp/cmake/opencv-install-utils.cmake: -------------------------------------------------------------------------------- 1 | # OpenCV 安装工具函数 2 | 3 | # collect_opencv_lib() 4 | # @参数 5 | # in_opencv_lib_dir [输入][str] 用户输入的 OPENCV_DIR 6 | # in_cmake_shared_lib_suffix [输入][str] CMAKE_SHARED_LIBRARY_SUFFIX 7 | # in_opencv_lib_name [输入][str] 当前链接的OpenCV库名,如 opencv_core、opencv_imgcodecs 8 | # out_opencv_miss_lib [输出][bool] 是否找到了输入的OpenCV库文件 9 | # out_opencv_lib_regular_file [输出][str] 找到的OpenCV库普通文件绝对路径 10 | # out_opencv_lib_symlink [输出][list] 找到的OpenCV库普通文件与库符号链接名,用于后续创建符号链接 11 | macro(collect_opencv_lib 12 | in_opencv_lib_dir 13 | in_cmake_shared_lib_suffix 14 | in_opencv_lib_name 15 | out_opencv_miss_lib 16 | out_opencv_lib_regular_file 17 | out_opencv_lib_symlink 18 | ) 19 | # 寻找库文件 20 | file(GLOB_RECURSE __opencv_libs_found "${in_opencv_lib_dir}/**/*${in_opencv_lib_name}*${in_cmake_shared_lib_suffix}*") 21 | 22 | # 检查是否找到 23 | if("${__opencv_libs_found}" STREQUAL "") 24 | message(STATUS "Missing independent shared library: ${in_opencv_lib_name}") 25 | set(${out_opencv_miss_lib} TRUE) 26 | endif() 27 | 28 | # 拆分普通文件与符号链接 29 | foreach(__item ${__opencv_libs_found}) 30 | if(IS_SYMLINK ${__item}) 31 | get_filename_component(__item_name ${__item} NAME) 32 | list(APPEND __symlink_libs ${__item_name}) 33 | else() 34 | set(__regular_libs ${__item}) 35 | endif() 36 | endforeach() 37 | 38 | # 输出 39 | set(${out_opencv_lib_regular_file} ${__regular_libs}) 40 | if(DEFINED __symlink_libs AND NOT "${__symlink_libs}" STREQUAL "") 41 | get_filename_component(__regular_libs_name "${__regular_libs}" NAME) 42 | set(${out_opencv_lib_symlink} ${__regular_libs_name} ${__symlink_libs}) 43 | endif() 44 | 45 | # 清理缓存的变量 46 | unset(__opencv_libs_found) 47 | unset(__item) 48 | unset(__item_name) 49 | unset(__symlink_libs) 50 | unset(__regular_libs) 51 | unset(__regular_libs_name) 52 | endmacro(collect_opencv_lib) 53 | -------------------------------------------------------------------------------- /api/PowerShell/demo.ps1: -------------------------------------------------------------------------------- 1 | Import-Module -Force ".\PPOCR_api.ps1" #修改对应路径 2 | 3 | # 初始化识别器对象,传入 PaddleOCR_json.exe 的路径 4 | $exePath = Convert-Path ".\PaddleOCR-json_v.1.3.0_alpha.2\PaddleOCR-json.exe" #修改对应exe路径 5 | $arg = "" #参数其实在API内没处理 6 | $ocr = [PPOCR]::new($exePath,$arg) 7 | 8 | while (1) { 9 | $prompt_str = "1:图片路径`n2:图片Base64`n3:图片Byte`n" 10 | if ( $ocr.isClipboardEnabled() ) { 11 | $prompt_str += "4:剪贴板`n" 12 | } 13 | $prompt_str += "其他则退出" 14 | $选择 = Read-Host $prompt_str 15 | switch($选择){ 16 | 1{ 17 | # 识别图片 18 | $imgPath = read-host "请输入图片路径" 19 | if ($imgPath) { 20 | $getObj = $ocr.run($imgPath) 21 | Write-Host "图片识别完毕,状态码:$($getObj.'code') 结果:`n$($getObj.'data'|Out-String)`n" 22 | } 23 | } 24 | 2{ 25 | $path = read-host "请输入图片路径,自动转换BASE64" 26 | $imgBase64 = [convert]::ToBase64String([System.IO.FIle]::ReadAllBytes($path)) 27 | if ($imgBase64) { 28 | $getObj = $ocr.runBase64($imgBase64) 29 | Write-Host "图片识别完毕,状态码:$($getObj.'code') 结果:`n$($getObj.'data'|Out-String)`n" 30 | } 31 | } 32 | 3{ 33 | $path = read-host "请输入图片路径,自动转换Byte" 34 | $imgByte = [System.IO.FIle]::ReadAllBytes($path) 35 | if ($imgByte) { 36 | $getObj = $ocr.runByte($imgByte) 37 | Write-Host "图片识别完毕,状态码:$($getObj.'code') 结果:`n$($getObj.'data'|Out-String)`n" 38 | } 39 | } 40 | # 以下示例默认禁用 41 | 4{ 42 | if ( $ocr.isClipboardEnabled() ) { 43 | $getObj = $ocr.runClipboard() 44 | Write-Host "图片识别完毕,状态码:$($getObj.'code') 结果:`n$($getObj.'data'|Out-String)`n" 45 | } 46 | else { 47 | $ocr.stop() # 结束子进程。 48 | Write-Host "程序结束。" 49 | Exit 50 | } 51 | } 52 | Default 53 | { 54 | $ocr.stop() # 结束子进程。 55 | Write-Host "程序结束。" 56 | Exit 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /api/node.js/test/app.js: -------------------------------------------------------------------------------- 1 | const OCR = require('paddleocrjson'); 2 | const ocr = new OCR('PaddleOCR_json.exe', [], { 3 | cwd: 'D:\\www\\OCR\\PaddleOCR-json', 4 | }, false); 5 | 6 | ocr.once('init', console.log); 7 | ocr.once('exit', process.exit); 8 | 9 | const fs = require('fs').promises; 10 | 11 | const mime = require('mime'); 12 | const imgMime = new Set(['bmp', 'jpeg', 'png', 'pbm', 'pgm', 'ppm', 'ras', 'tiff', 'exr', 'jp2'].map((v) => mime.getType(v))); 13 | 14 | const app = require('express')(); 15 | const upload = require('multer')({ 16 | dest: 'tmp', 17 | fileFilter(req, file, cb) { 18 | if (imgMime.has(file.mimetype)) 19 | return cb(null, true); 20 | return cb('This format is not accepted'); 21 | }, 22 | limits: { 23 | fileSize: 10 * 1024 * 1024, 24 | files: 1, 25 | }, 26 | }).single('img'); 27 | 28 | app.route('/').get((req, res, next) => 29 | res.send(` 30 |
31 | 32 | 33 |
` 34 | ) 35 | ).post((req, res, next) => 36 | upload(req, res, (err) => { 37 | if (typeof err === 'undefined') 38 | return next(); 39 | return next(err); 40 | }) 41 | ).post(async (req, res, next) => { 42 | const image_path = req.file.path; 43 | ocr.flush({ image_path }) 44 | .then((data) => { 45 | res.data = data; 46 | //console.log(data); 47 | return next(); 48 | }).catch(() => { 49 | return next(500); 50 | }).finally(() => { 51 | fs.unlink(image_path).then(); 52 | }); 53 | return; 54 | }); 55 | app.use((req, res, next) => { 56 | if (res.data === null) 57 | return next(404); 58 | if (typeof res.data === 'undefined') 59 | return next(405); 60 | return res.jsonp(res.data); 61 | }); 62 | app.use((err, req, res, next) => 63 | res.status(404).jsonp({ 64 | code: -1, 65 | message: err, 66 | data: null 67 | }) 68 | ); 69 | const server = app.listen(3000, () => 70 | console.log(server.address()) 71 | ); 72 | -------------------------------------------------------------------------------- /cpp/Dockerfile: -------------------------------------------------------------------------------- 1 | # 编译环境 2 | FROM debian:stable AS build 3 | 4 | # 构建参数 5 | ARG ENABLE_REMOTE_EXIT="ON" \ 6 | ENABLE_JSON_IMAGE_PATH="OFF" 7 | 8 | SHELL ["/bin/bash", "-c"] 9 | 10 | # 检查AVX指令集,如果不支持就退出 11 | RUN if [[ -z $(lscpu | grep avx) ]] ; \ 12 | then echo "Current CPU doesn't support AVX." ; \ 13 | exit -1 ; \ 14 | fi 15 | 16 | # 安装编译环境 17 | RUN \ 18 | apt update -y && \ 19 | apt install -y libopencv-dev && \ 20 | apt install -y wget tar zip unzip git gcc g++ cmake make pkg-config 21 | 22 | WORKDIR /src/ 23 | 24 | # 下载模型库 25 | RUN wget https://paddle-inference-lib.bj.bcebos.com/3.0.0-beta1/cxx_c/Linux/CPU/gcc8.2_avx_mkl/paddle_inference.tgz && \ 26 | tar -xf paddle_inference.tgz 27 | 28 | COPY . . 29 | 30 | ENV PADDLE_LIB="/src/.source/paddle_inference/" 31 | 32 | # 构建工程 + 编译 33 | RUN cd /src/ && \ 34 | mkdir -p .source && cp -r /src/paddle_inference .source && \ 35 | # 检查libopencv-dev是否正常安装 36 | if [[ -z $(pkg-config --silence-errors --libs opencv4) ]] ; \ 37 | then echo "Cannot find libopencv-dev in the system." ; exit -1 ; \ 38 | fi && \ 39 | mkdir -p build && \ 40 | cmake -S . -B build \ 41 | -DPADDLE_LIB=$PADDLE_LIB \ 42 | -DCMAKE_BUILD_TYPE=Release \ 43 | -DENABLE_CLIPBOARD=OFF \ 44 | -DENABLE_REMOTE_EXIT=${ENABLE_REMOTE_EXIT} \ 45 | -DENABLE_JSON_IMAGE_PATH=${ENABLE_JSON_IMAGE_PATH} && \ 46 | cmake --build build --config=Release && \ 47 | cmake --install build --prefix build/install 48 | 49 | 50 | 51 | # 部署环境 52 | FROM debian:stable-slim 53 | 54 | SHELL ["/bin/bash", "-c"] 55 | 56 | # 安装运行环境 57 | RUN apt update -y && \ 58 | apt install -y wget unzip libopencv-core-dev libopencv-imgcodecs-dev libopencv-imgproc-dev libgomp1 59 | 60 | # 安装PaddleOCR-json相关文件到系统 61 | COPY --from=build /src/build/install/ /usr/ 62 | 63 | WORKDIR /app 64 | 65 | # 下载预测库 66 | RUN wget https://github.com/hiroi-sora/PaddleOCR-json/releases/download/v1.4.1-dev/models_v1.4.1.zip && \ 67 | mv models_v1.4.1.zip models.zip && \ 68 | unzip -x models.zip && rm models.zip 69 | 70 | # 暴露端口3746,然后以套接字模式运行PaddleOCR-json 71 | EXPOSE 3746 72 | ENTRYPOINT ["PaddleOCR-json", "-models_path=/app/models", "-addr=any", "-port=3746"] 73 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/cmake/package.cmake.in: -------------------------------------------------------------------------------- 1 | # Per-generator CPack configuration file. See CPACK_PROJECT_CONFIG_FILE documented at 2 | # http://www.cmake.org/cmake/help/v2.8.12/cpack.html#variable:CPACK_PROJECT_CONFIG_FILE 3 | # 4 | # All common CPACK_* variables are set in CMakeLists.txt already. This file only 5 | # overrides some of these to provide package generator specific settings. 6 | 7 | # whether package contains all development files or only runtime files 8 | set (DEVEL @INSTALL_HEADERS@) 9 | 10 | # ------------------------------------------------------------------------------ 11 | # Mac OS X package 12 | if (CPACK_GENERATOR MATCHES "PackageMaker|DragNDrop") 13 | 14 | set (CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}") 15 | if (DEVEL) 16 | set (CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}-devel") 17 | endif () 18 | set (CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}-${CPACK_PACKAGE_VERSION}") 19 | 20 | # ------------------------------------------------------------------------------ 21 | # Debian package 22 | elseif (CPACK_GENERATOR MATCHES "DEB") 23 | 24 | set (CPACK_PACKAGE_FILE_NAME "lib${CPACK_PACKAGE_NAME}") 25 | if (DEVEL) 26 | set (CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}-dev") 27 | else () 28 | set (CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}0") 29 | endif () 30 | set (CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}_${CPACK_PACKAGE_VERSION}-1_${CPACK_PACKAGE_ARCHITECTURE}") 31 | 32 | set (CPACK_DEBIAN_PACKAGE_DEPENDS) 33 | set (CPACK_DEBIAN_PACKAGE_SECTION "devel") 34 | set (CPACK_DEBIAN_PACKAGE_PRIORITY "optional") 35 | set (CPACK_DEBIAN_PACKAGE_HOMEPAGE "${CPACK_RPM_PACKAGE_URL}") 36 | set (CPACK_DEBIAN_PACKAGE_MAINTAINER "${CPACK_PACKAGE_VENDOR}") 37 | set (CPACK_DEBIAN_PACKAGE_ARCHITECTURE "${CPACK_PACKAGE_ARCHITECTURE}") 38 | 39 | # ------------------------------------------------------------------------------ 40 | # RPM package 41 | elseif (CPACK_GENERATOR MATCHES "RPM") 42 | 43 | set (CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}") 44 | if (DEVEL) 45 | set (CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}-devel") 46 | endif () 47 | set (CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}-${CPACK_PACKAGE_VERSION}-1.${CPACK_PACKAGE_ARCHITECTURE}") 48 | 49 | endif () 50 | -------------------------------------------------------------------------------- /cpp/include/preprocess_op.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | 20 | #include "opencv2/core.hpp" 21 | #include "opencv2/imgcodecs.hpp" 22 | #include "opencv2/imgproc.hpp" 23 | 24 | namespace PaddleOCR { 25 | 26 | class Normalize { 27 | public: 28 | virtual void Run(cv::Mat *im, const std::vector &mean, 29 | const std::vector &scale, const bool is_scale = true); 30 | }; 31 | 32 | // RGB -> CHW 33 | class Permute { 34 | public: 35 | virtual void Run(const cv::Mat *im, float *data); 36 | }; 37 | 38 | class PermuteBatch { 39 | public: 40 | virtual void Run(const std::vector imgs, float *data); 41 | }; 42 | 43 | class ResizeImgType0 { 44 | public: 45 | virtual void Run(const cv::Mat &img, cv::Mat &resize_img, 46 | std::string limit_type, int limit_side_len, float &ratio_h, 47 | float &ratio_w, bool use_tensorrt); 48 | }; 49 | 50 | class CrnnResizeImg { 51 | public: 52 | virtual void Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio, 53 | bool use_tensorrt = false, 54 | const std::vector &rec_image_shape = {3, 32, 320}); 55 | }; 56 | 57 | class ClsResizeImg { 58 | public: 59 | virtual void Run(const cv::Mat &img, cv::Mat &resize_img, 60 | bool use_tensorrt = false, 61 | const std::vector &rec_image_shape = {3, 48, 192}); 62 | }; 63 | 64 | class TableResizeImg { 65 | public: 66 | virtual void Run(const cv::Mat &img, cv::Mat &resize_img, 67 | const int max_len = 488); 68 | }; 69 | 70 | class TablePadImg { 71 | public: 72 | virtual void Run(const cv::Mat &img, cv::Mat &resize_img, 73 | const int max_len = 488); 74 | }; 75 | 76 | class Resize { 77 | public: 78 | virtual void Run(const cv::Mat &img, cv::Mat &resize_img, const int h, 79 | const int w); 80 | }; 81 | 82 | } // namespace PaddleOCR -------------------------------------------------------------------------------- /cpp/include/args.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | 19 | // 工作模式 20 | DECLARE_string(image_path); 21 | DECLARE_int32(port); 22 | DECLARE_string(addr); 23 | 24 | // common args 25 | DECLARE_bool(use_gpu); 26 | DECLARE_bool(use_tensorrt); 27 | DECLARE_int32(gpu_id); 28 | DECLARE_int32(gpu_mem); 29 | DECLARE_int32(cpu_threads); 30 | DECLARE_int32(cpu_mem); 31 | DECLARE_bool(enable_mkldnn); 32 | DECLARE_string(precision); 33 | DECLARE_bool(benchmark); 34 | DECLARE_string(output); 35 | DECLARE_string(type); 36 | DECLARE_string(config_path); 37 | DECLARE_string(models_path); 38 | DECLARE_bool(ensure_ascii); 39 | // detection related 40 | DECLARE_string(det_model_dir); 41 | DECLARE_string(limit_type); 42 | DECLARE_int32(limit_side_len); 43 | DECLARE_double(det_db_thresh); 44 | DECLARE_double(det_db_box_thresh); 45 | DECLARE_double(det_db_unclip_ratio); 46 | DECLARE_bool(use_dilation); 47 | DECLARE_string(det_db_score_mode); 48 | DECLARE_bool(visualize); 49 | // classification related 50 | DECLARE_bool(use_angle_cls); 51 | DECLARE_string(cls_model_dir); 52 | DECLARE_double(cls_thresh); 53 | DECLARE_int32(cls_batch_num); 54 | // recognition related 55 | DECLARE_string(rec_model_dir); 56 | DECLARE_int32(rec_batch_num); 57 | DECLARE_string(rec_char_dict_path); 58 | DECLARE_int32(rec_img_h); 59 | DECLARE_int32(rec_img_w); 60 | // layout model related 61 | DECLARE_string(layout_model_dir); 62 | DECLARE_string(layout_dict_path); 63 | DECLARE_double(layout_score_threshold); 64 | DECLARE_double(layout_nms_threshold); 65 | // structure model related 66 | DECLARE_string(table_model_dir); 67 | DECLARE_int32(table_max_len); 68 | DECLARE_int32(table_batch_num); 69 | DECLARE_string(table_char_dict_path); 70 | DECLARE_bool(merge_no_span_structure); 71 | // forward related 72 | DECLARE_bool(det); 73 | DECLARE_bool(rec); 74 | DECLARE_bool(cls); 75 | DECLARE_bool(table); 76 | DECLARE_bool(layout); 77 | 78 | // 读取配置文件 79 | std::string read_config(); 80 | // 检测参数合法性 81 | std::string check_flags(); -------------------------------------------------------------------------------- /cpp/include/paddleocr.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | namespace PaddleOCR 22 | { 23 | class PPOCR 24 | { 25 | public: 26 | explicit PPOCR(); 27 | ~PPOCR() = default; // 默认析构函数 28 | 29 | // OCR方法,处理图像列表,返回每个图像的OCR结果向量 30 | std::vector> ocr(std::vector img_list, 31 | bool det = true, 32 | bool rec = true, 33 | bool cls = true); 34 | // OCR方法,处理单个图像,返回OCR结果 35 | std::vector ocr(cv::Mat img, bool det = true, 36 | bool rec = true, bool cls = true); 37 | 38 | void reset_timer(); // 重置计时器 39 | void benchmark_log(int img_num); // 记录基准测试日志,参数为图像数量 40 | 41 | // 智能指针 42 | std::unique_ptr detector_; // 指向 文本检测器实例 43 | std::unique_ptr classifier_; // 指向 方向分类器实例 44 | std::unique_ptr recognizer_; // 指向 文本识别器实例 45 | 46 | protected: 47 | // 时间信息 48 | std::vector time_info_det = {0, 0, 0}; 49 | std::vector time_info_rec = {0, 0, 0}; 50 | std::vector time_info_cls = {0, 0, 0}; 51 | 52 | // 文本检测:输入单张图片,在ocr_results向量中存放单行文本碎图的检测信息 53 | void det(cv::Mat img, 54 | std::vector &ocr_results); 55 | // 方向分类:输入单行碎图向量,在ocr_results向量中存放每个碎图的方向标志 56 | void cls(std::vector img_list, 57 | std::vector &ocr_results); 58 | // 文本识别:输入单行碎图向量,在ocr_results向量中存放每个碎图的文本 59 | void rec(std::vector img_list, 60 | std::vector &ocr_results); 61 | }; 62 | } // namespace PaddleOCR 63 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/doc/designstyle.css: -------------------------------------------------------------------------------- 1 | body { 2 | background-color: #ffffff; 3 | color: black; 4 | margin-right: 1in; 5 | margin-left: 1in; 6 | } 7 | 8 | 9 | h1, h2, h3, h4, h5, h6 { 10 | color: #3366ff; 11 | font-family: sans-serif; 12 | } 13 | @media print { 14 | /* Darker version for printing */ 15 | h1, h2, h3, h4, h5, h6 { 16 | color: #000080; 17 | font-family: helvetica, sans-serif; 18 | } 19 | } 20 | 21 | h1 { 22 | text-align: center; 23 | font-size: 18pt; 24 | } 25 | h2 { 26 | margin-left: -0.5in; 27 | } 28 | h3 { 29 | margin-left: -0.25in; 30 | } 31 | h4 { 32 | margin-left: -0.125in; 33 | } 34 | hr { 35 | margin-left: -1in; 36 | } 37 | 38 | /* Definition lists: definition term bold */ 39 | dt { 40 | font-weight: bold; 41 | } 42 | 43 | address { 44 | text-align: right; 45 | } 46 | /* Use the tag for bits of code and for variables and objects. */ 47 | code,pre,samp,var { 48 | color: #006000; 49 | } 50 | /* Use the tag for file and directory paths and names. */ 51 | file { 52 | color: #905050; 53 | font-family: monospace; 54 | } 55 | /* Use the tag for stuff the user should type. */ 56 | kbd { 57 | color: #600000; 58 | } 59 | div.note p { 60 | float: right; 61 | width: 3in; 62 | margin-right: 0%; 63 | padding: 1px; 64 | border: 2px solid #6060a0; 65 | background-color: #fffff0; 66 | } 67 | 68 | UL.nobullets { 69 | list-style-type: none; 70 | list-style-image: none; 71 | margin-left: -1em; 72 | } 73 | 74 | /* 75 | body:after { 76 | content: "Google Confidential"; 77 | } 78 | */ 79 | 80 | /* pretty printing styles. See prettify.js */ 81 | .str { color: #080; } 82 | .kwd { color: #008; } 83 | .com { color: #800; } 84 | .typ { color: #606; } 85 | .lit { color: #066; } 86 | .pun { color: #660; } 87 | .pln { color: #000; } 88 | .tag { color: #008; } 89 | .atn { color: #606; } 90 | .atv { color: #080; } 91 | pre.prettyprint { padding: 2px; border: 1px solid #888; } 92 | 93 | .embsrc { background: #eee; } 94 | 95 | @media print { 96 | .str { color: #060; } 97 | .kwd { color: #006; font-weight: bold; } 98 | .com { color: #600; font-style: italic; } 99 | .typ { color: #404; font-weight: bold; } 100 | .lit { color: #044; } 101 | .pun { color: #440; } 102 | .pln { color: #000; } 103 | .tag { color: #006; font-weight: bold; } 104 | .atn { color: #404; } 105 | .atv { color: #060; } 106 | } 107 | 108 | /* Table Column Headers */ 109 | .hdr { 110 | color: #006; 111 | font-weight: bold; 112 | background-color: #dddddd; } 113 | .hdr2 { 114 | color: #006; 115 | background-color: #eeeeee; } -------------------------------------------------------------------------------- /api/python/tbpu/parser_single_code.py: -------------------------------------------------------------------------------- 1 | # 排版解析-单栏-代码段 2 | 3 | from .parser_single_line import SingleLine 4 | from .parser_tools.line_preprocessing import linePreprocessing # 行预处理 5 | 6 | from bisect import bisect_left 7 | 8 | 9 | class SingleCode(SingleLine): 10 | def __init__(self): 11 | self.tbpuName = "排版解析-单栏-代码段" 12 | 13 | def merge_line(self, line): # 合并一行 14 | A = line[0] 15 | ba = A["box"] 16 | ha = ba[3][1] - ba[0][1] # 块A行高 17 | score = A["score"] 18 | for i in range(1, len(line)): 19 | B = line[i] 20 | bb = B["box"] 21 | ha = (ha + bb[3][1] - bb[0][1]) / 2 22 | # 合并文字,补充与间距相同的空格数 23 | space = 0 24 | if bb[0][0] > ba[1][0]: 25 | space = round((bb[0][0] - ba[1][0]) / ha) 26 | A["text"] += " " * space + B["text"] 27 | print(space, bb[0][0], ba[1][0]) 28 | # 合并包围盒 29 | yTop = min(ba[0][1], ba[1][1], bb[0][1], bb[1][1]) 30 | yBottom = max(ba[2][1], ba[3][1], bb[2][1], bb[3][1]) 31 | xLeft = min(ba[0][0], ba[3][0], bb[0][0], bb[3][0]) 32 | xRight = max(ba[1][0], ba[2][0], bb[1][0], bb[2][0]) 33 | ba[0][1] = ba[1][1] = yTop # y上 34 | ba[2][1] = ba[3][1] = yBottom # y下 35 | ba[0][0] = ba[3][0] = xLeft # x左 36 | ba[1][0] = ba[2][0] = xRight # x右 37 | # 置信度 38 | score += B["score"] 39 | A["score"] = score / len(line) 40 | del A["normalized_bbox"] 41 | A["end"] = "\n" 42 | return A 43 | 44 | def indent(self, tbs): # 分析所有行,构造缩进 45 | lh = 0 # 平均行高 46 | xMin = float("inf") # 句首的最左、最右x值 47 | xMax = float("-inf") 48 | for tb in tbs: 49 | b = tb["box"] 50 | lh += b[3][1] - b[0][1] 51 | x = b[0][0] 52 | xMin = min(xMin, x) 53 | xMax = max(xMax, x) 54 | lh /= len(tbs) 55 | lh2 = lh / 2 56 | # 构建缩进层级列表 57 | levelList = [] 58 | x = xMin 59 | while x < xMax: 60 | levelList.append(x) 61 | x += lh 62 | # 按照层级,为每行句首加上空格,并调整包围盒 63 | for tb in tbs: 64 | b = tb["box"] 65 | level = bisect_left(levelList, b[0][0] + lh2) - 1 # 二分查找层级点 66 | tb["text"] = " " * level + tb["text"] # 补充空格 67 | b[0][0] = b[3][0] = xMin # 左侧归零 68 | 69 | def run(self, textBlocks): 70 | textBlocks = linePreprocessing(textBlocks) # 预处理 71 | lines = self.get_lines(textBlocks) # 获取每一行 72 | tbs = [self.merge_line(line) for line in lines] # 合并所有行 73 | self.indent(tbs) # 为每行添加句首缩进 74 | return tbs 75 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/test/gflags_strip_flags_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011, Google Inc. 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following disclaimer 12 | // in the documentation and/or other materials provided with the 13 | // distribution. 14 | // * Neither the name of Google Inc. nor the names of its 15 | // contributors may be used to endorse or promote products derived from 16 | // this software without specific prior written permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | // 30 | // --- 31 | // Author: csilvers@google.com (Craig Silverstein) 32 | // 33 | // A simple program that uses STRIP_FLAG_HELP. We'll have a shell 34 | // script that runs 'strings' over this program and makes sure 35 | // that the help string is not in there. 36 | 37 | #define STRIP_FLAG_HELP 1 38 | #include 39 | 40 | #include 41 | 42 | using GFLAGS_NAMESPACE::SetUsageMessage; 43 | using GFLAGS_NAMESPACE::ParseCommandLineFlags; 44 | 45 | 46 | DEFINE_bool(test, true, "This text should be stripped out"); 47 | 48 | int main(int argc, char** argv) { 49 | SetUsageMessage("Usage message"); 50 | ParseCommandLineFlags(&argc, &argv, false); 51 | 52 | // Unfortunately, for us, libtool can replace executables with a shell 53 | // script that does some work before calling the 'real' executable 54 | // under a different name. We need the 'real' executable name to run 55 | // 'strings' on it, so we construct this binary to print the real 56 | // name (argv[0]) on stdout when run. 57 | puts(argv[0]); 58 | 59 | return 0; 60 | } 61 | -------------------------------------------------------------------------------- /cpp/include/structure_layout.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "paddle_api.h" 18 | #include "paddle_inference_api.h" 19 | 20 | #include 21 | #include 22 | 23 | namespace PaddleOCR { 24 | 25 | class StructureLayoutRecognizer { 26 | public: 27 | explicit StructureLayoutRecognizer( 28 | const std::string &model_dir, const bool &use_gpu, const int &gpu_id, 29 | const int &gpu_mem, const int &cpu_math_library_num_threads, 30 | const bool &use_mkldnn, const std::string &label_path, 31 | const bool &use_tensorrt, const std::string &precision, 32 | const double &layout_score_threshold, 33 | const double &layout_nms_threshold) { 34 | this->use_gpu_ = use_gpu; 35 | this->gpu_id_ = gpu_id; 36 | this->gpu_mem_ = gpu_mem; 37 | this->cpu_math_library_num_threads_ = cpu_math_library_num_threads; 38 | this->use_mkldnn_ = use_mkldnn; 39 | this->use_tensorrt_ = use_tensorrt; 40 | this->precision_ = precision; 41 | 42 | this->post_processor_.init(label_path, layout_score_threshold, 43 | layout_nms_threshold); 44 | LoadModel(model_dir); 45 | } 46 | 47 | // Load Paddle inference model 48 | void LoadModel(const std::string &model_dir); 49 | 50 | void Run(cv::Mat img, std::vector &result, 51 | std::vector ×); 52 | 53 | private: 54 | std::shared_ptr predictor_; 55 | 56 | bool use_gpu_ = false; 57 | int gpu_id_ = 0; 58 | int gpu_mem_ = 4000; 59 | int cpu_math_library_num_threads_ = 4; 60 | bool use_mkldnn_ = false; 61 | 62 | std::vector mean_ = {0.485f, 0.456f, 0.406f}; 63 | std::vector scale_ = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f}; 64 | bool is_scale_ = true; 65 | 66 | bool use_tensorrt_ = false; 67 | std::string precision_ = "fp32"; 68 | 69 | // pre-process 70 | Resize resize_op_; 71 | Normalize normalize_op_; 72 | Permute permute_op_; 73 | 74 | // post-process 75 | PicodetPostProcessor post_processor_; 76 | }; 77 | 78 | } // namespace PaddleOCR -------------------------------------------------------------------------------- /cpp/include/paddlestructure.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | namespace PaddleOCR 22 | { 23 | 24 | class PaddleStructure : public PPOCR 25 | { 26 | public: 27 | explicit PaddleStructure(); 28 | ~PaddleStructure() = default; 29 | 30 | std::vector structure(cv::Mat img, 31 | bool layout = false, 32 | bool table = true, 33 | bool ocr = false); 34 | 35 | void reset_timer(); 36 | void benchmark_log(int img_num); 37 | 38 | private: 39 | std::vector time_info_table = {0, 0, 0}; 40 | std::vector time_info_layout = {0, 0, 0}; 41 | 42 | std::unique_ptr table_model_; 43 | std::unique_ptr layout_model_; 44 | 45 | void layout(cv::Mat img, 46 | std::vector &structure_result); 47 | 48 | void table(cv::Mat img, StructurePredictResult &structure_result); 49 | 50 | std::string rebuild_table(std::vector rec_html_tags, 51 | std::vector> rec_boxes, 52 | std::vector &ocr_result); 53 | 54 | float dis(std::vector &box1, std::vector &box2); 55 | 56 | static bool comparison_dis(const std::vector &dis1, 57 | const std::vector &dis2) 58 | { 59 | if (dis1[1] < dis2[1]) 60 | { 61 | return true; 62 | } 63 | else if (dis1[1] == dis2[1]) 64 | { 65 | return dis1[0] < dis2[0]; 66 | } 67 | else 68 | { 69 | return false; 70 | } 71 | } 72 | }; 73 | 74 | } // namespace PaddleOCR -------------------------------------------------------------------------------- /cpp/third_party/gflags/test/nc/gflags_nc.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009, Google Inc. 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following disclaimer 12 | // in the documentation and/or other materials provided with the 13 | // distribution. 14 | // * Neither the name of Google Inc. nor the names of its 15 | // contributors may be used to endorse or promote products derived from 16 | // this software without specific prior written permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | // --- 31 | // 32 | // A negative comiple test for gflags. 33 | 34 | #include 35 | 36 | #if defined(TEST_NC_SWAPPED_ARGS) 37 | 38 | DEFINE_bool(some_bool_flag, 39 | "the default value should go here, not the description", 40 | false); 41 | 42 | 43 | #elif defined(TEST_NC_INT_INSTEAD_OF_BOOL) 44 | 45 | DEFINE_bool(some_bool_flag_2, 46 | 0, 47 | "should have been an int32 flag but mistakenly used bool instead"); 48 | 49 | #elif defined(TEST_NC_BOOL_IN_QUOTES) 50 | 51 | 52 | DEFINE_bool(some_bool_flag_3, 53 | "false", 54 | "false in in quotes, which is wrong"); 55 | 56 | #elif defined(TEST_NC_SANITY) 57 | 58 | DEFINE_bool(some_bool_flag_4, 59 | true, 60 | "this is the correct usage of DEFINE_bool"); 61 | 62 | #elif defined(TEST_NC_DEFINE_STRING_WITH_0) 63 | 64 | DEFINE_string(some_string_flag, 65 | 0, 66 | "Trying to construct a string by passing 0 would cause a crash."); 67 | 68 | #endif 69 | 70 | int main(int, char **) 71 | { 72 | return 0; 73 | } 74 | -------------------------------------------------------------------------------- /api/python/tbpu/parser_single_line.py: -------------------------------------------------------------------------------- 1 | # 排版解析-单栏-单行 2 | 3 | from .tbpu import Tbpu 4 | from .parser_tools.line_preprocessing import linePreprocessing # 行预处理 5 | from .parser_tools.paragraph_parse import word_separator # 上下句间隔符 6 | 7 | 8 | class SingleLine(Tbpu): 9 | def __init__(self): 10 | self.tbpuName = "排版解析-单栏-单行" 11 | 12 | # 从文本块列表中找出所有行 13 | def get_lines(self, textBlocks): 14 | # 按x排序 15 | textBlocks.sort(key=lambda tb: tb["normalized_bbox"][0]) 16 | lines = [] 17 | for i1, tb1 in enumerate(textBlocks): 18 | if not tb1: 19 | continue 20 | # 最左的一个块 21 | l1, top1, r1, bottom1 = tb1["normalized_bbox"] 22 | h1 = bottom1 - top1 23 | now_line = [tb1] 24 | # 考察右侧哪些块符合条件 25 | for i2 in range(i1 + 1, len(textBlocks)): 26 | tb2 = textBlocks[i2] 27 | if not tb2: 28 | continue 29 | l2, top2, r2, bottom2 = tb2["normalized_bbox"] 30 | h2 = bottom2 - top2 31 | # 行2左侧太前 32 | if l2 < r1 - h1: 33 | continue 34 | # 垂直距离太远 35 | if top2 < top1 - h1 * 0.5 or bottom2 > bottom1 + h1 * 0.5: 36 | continue 37 | # 行高差距过大 38 | if abs(h1 - h2) > min(h1, h2) * 0.5: 39 | continue 40 | # 符合条件 41 | now_line.append(tb2) 42 | textBlocks[i2] = None 43 | # 更新搜索条件 44 | r1 = r2 45 | # 处理完一行 46 | for i2 in range(len(now_line) - 1): 47 | # 检查同一行内相邻文本块的水平间隙 48 | l1, t1, r1, b1 = now_line[i2]["normalized_bbox"] 49 | l2, t2, r2, b2 = now_line[i2 + 1]["normalized_bbox"] 50 | h = (b1 + b2 - t1 - l2) * 0.5 51 | if l2 - r1 > h * 1.5: # 间隙太大,强制设置空格 52 | now_line[i2]["end"] = " " 53 | continue 54 | letter1 = now_line[i2]["text"][-1] 55 | letter2 = now_line[i2 + 1]["text"][0] 56 | now_line[i2]["end"] = word_separator(letter1, letter2) 57 | now_line[-1]["end"] = "\n" 58 | lines.append(now_line) 59 | textBlocks[i1] = None 60 | # 所有行按y排序 61 | lines.sort(key=lambda tbs: tbs[0]["normalized_bbox"][1]) 62 | return lines 63 | 64 | def run(self, textBlocks): 65 | textBlocks = linePreprocessing(textBlocks) # 预处理 66 | lines = self.get_lines(textBlocks) # 获取每一行 67 | # 解包 68 | textBlocks = [] 69 | for line in lines: 70 | for tb in line: 71 | del tb["normalized_bbox"] 72 | textBlocks.append(tb) 73 | return textBlocks 74 | -------------------------------------------------------------------------------- /api/node.js/ts/index.ts: -------------------------------------------------------------------------------- 1 | import { Worker } from 'worker_threads'; 2 | import { resolve as path_resolve } from 'path'; 3 | 4 | interface Queue extends AsyncGenerator void, reject: (reason?: any) => void) => void> { } 5 | async function* Queue(value?: T): Queue { 6 | while (true) value = await new Promise(yield value); 7 | } 8 | const quqeMap = new WeakMap>(); 9 | 10 | class OCR extends Worker { 11 | pid: number; 12 | addr: string | undefined; 13 | port: number | undefined; 14 | exitCode: number | null = null; 15 | constructor(path?: string, args?: string[], options?: OCR.Options, debug?: boolean) { 16 | super(path_resolve(__dirname, 'worker.js'), { 17 | workerData: { path, args, options, debug }, 18 | stdout: true, 19 | }); 20 | const quqe = Queue(); 21 | quqeMap.set(this, quqe); 22 | quqe.next(); 23 | quqe.next((res) => { 24 | this.stdout.once('data', (data) => { 25 | const [, pid, socket, addr, port] = String(data).match(/^pid=(\d+)(, a=(\d+\.\d+\.\d+\.\d+:\d+))?/); 26 | this.pid = Number(pid); 27 | if (socket) { 28 | this.addr = addr; 29 | this.port = Number(port); 30 | } 31 | super.emit('init', this.pid, this.addr, this.port); 32 | res(); 33 | }); 34 | }); 35 | super.once('exit', (code) => { 36 | this.exitCode = code; 37 | quqeMap.get(this).return(null); 38 | }); 39 | } 40 | postMessage(obj: OCR.Arg) { OCR.prototype.flush.call(this, obj); } 41 | async flush(obj: OCR.Arg) { 42 | return (await quqeMap.get(this).next((res) => { 43 | super.once('message', res); 44 | super.postMessage(obj); 45 | })).value; 46 | } 47 | } 48 | 49 | namespace OCR { 50 | 51 | interface BaseArg { 52 | limit_side_len?: number; 53 | limit_type?: string; 54 | visualize?: boolean; 55 | output?: string; 56 | /* ... */ 57 | } 58 | 59 | interface Arg_Path extends BaseArg { 60 | image_path?: string | null; 61 | } 62 | interface Arg_Base64 extends BaseArg { 63 | image_base64?: string; 64 | } 65 | 66 | export type Arg = Arg_Base64 | Arg_Path; 67 | 68 | export interface coutReturnType { 69 | code: number; 70 | message: string; 71 | data: { 72 | box: [[number, number], [number, number], [number, number], [number, number]], 73 | score: number, 74 | text: string, 75 | }[] | null; 76 | } 77 | 78 | export type Options = Omit< 79 | import('child_process').SpawnOptionsWithStdioTuple<'pipe', 'pipe', 'pipe'>, 80 | keyof typeof import('./worker').__default.options 81 | >; 82 | } 83 | 84 | export = OCR; 85 | -------------------------------------------------------------------------------- /cpp/include/ocr_cls.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "paddle_api.h" 18 | #include "paddle_inference_api.h" 19 | 20 | #include 21 | #include 22 | 23 | namespace PaddleOCR 24 | { 25 | 26 | class Classifier 27 | { 28 | public: 29 | explicit Classifier(const std::string &model_dir, const bool &use_gpu, 30 | const int &gpu_id, const int &gpu_mem, 31 | const int &cpu_math_library_num_threads, 32 | const bool &use_mkldnn, const double &cls_thresh, 33 | const bool &use_tensorrt, const std::string &precision, 34 | const int &cls_batch_num) 35 | { 36 | this->use_gpu_ = use_gpu; 37 | this->gpu_id_ = gpu_id; 38 | this->gpu_mem_ = gpu_mem; 39 | this->cpu_math_library_num_threads_ = cpu_math_library_num_threads; 40 | this->use_mkldnn_ = use_mkldnn; 41 | 42 | this->cls_thresh = cls_thresh; 43 | this->use_tensorrt_ = use_tensorrt; 44 | this->precision_ = precision; 45 | this->cls_batch_num_ = cls_batch_num; 46 | 47 | LoadModel(model_dir); 48 | } 49 | double cls_thresh = 0.9; 50 | 51 | // Load Paddle inference model 52 | void LoadModel(const std::string &model_dir); 53 | 54 | void Run(std::vector img_list, std::vector &cls_labels, 55 | std::vector &cls_scores, std::vector ×); 56 | std::shared_ptr predictor_; // 推理库实例 57 | 58 | private: 59 | bool use_gpu_ = false; 60 | int gpu_id_ = 0; 61 | int gpu_mem_ = 4000; 62 | int cpu_math_library_num_threads_ = 4; 63 | bool use_mkldnn_ = false; 64 | 65 | std::vector mean_ = {0.5f, 0.5f, 0.5f}; 66 | std::vector scale_ = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f}; 67 | bool is_scale_ = true; 68 | bool use_tensorrt_ = false; 69 | std::string precision_ = "fp32"; 70 | int cls_batch_num_ = 1; 71 | // pre-process 72 | ClsResizeImg resize_op_; 73 | Normalize normalize_op_; 74 | PermuteBatch permute_op_; 75 | 76 | }; // class Classifier 77 | 78 | } // namespace PaddleOCR 79 | -------------------------------------------------------------------------------- /cpp/include/structure_table.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "paddle_api.h" 18 | #include "paddle_inference_api.h" 19 | 20 | #include 21 | #include 22 | 23 | namespace PaddleOCR { 24 | 25 | class StructureTableRecognizer { 26 | public: 27 | explicit StructureTableRecognizer( 28 | const std::string &model_dir, const bool &use_gpu, const int &gpu_id, 29 | const int &gpu_mem, const int &cpu_math_library_num_threads, 30 | const bool &use_mkldnn, const std::string &label_path, 31 | const bool &use_tensorrt, const std::string &precision, 32 | const int &table_batch_num, const int &table_max_len, 33 | const bool &merge_no_span_structure) { 34 | this->use_gpu_ = use_gpu; 35 | this->gpu_id_ = gpu_id; 36 | this->gpu_mem_ = gpu_mem; 37 | this->cpu_math_library_num_threads_ = cpu_math_library_num_threads; 38 | this->use_mkldnn_ = use_mkldnn; 39 | this->use_tensorrt_ = use_tensorrt; 40 | this->precision_ = precision; 41 | this->table_batch_num_ = table_batch_num; 42 | this->table_max_len_ = table_max_len; 43 | 44 | this->post_processor_.init(label_path, merge_no_span_structure); 45 | LoadModel(model_dir); 46 | } 47 | 48 | // Load Paddle inference model 49 | void LoadModel(const std::string &model_dir); 50 | 51 | void Run(std::vector img_list, 52 | std::vector> &rec_html_tags, 53 | std::vector &rec_scores, 54 | std::vector>> &rec_boxes, 55 | std::vector ×); 56 | 57 | private: 58 | std::shared_ptr predictor_; 59 | 60 | bool use_gpu_ = false; 61 | int gpu_id_ = 0; 62 | int gpu_mem_ = 4000; 63 | int cpu_math_library_num_threads_ = 4; 64 | bool use_mkldnn_ = false; 65 | int table_max_len_ = 488; 66 | 67 | std::vector mean_ = {0.485f, 0.456f, 0.406f}; 68 | std::vector scale_ = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f}; 69 | bool is_scale_ = true; 70 | 71 | bool use_tensorrt_ = false; 72 | std::string precision_ = "fp32"; 73 | int table_batch_num_ = 1; 74 | 75 | // pre-process 76 | TableResizeImg resize_op_; 77 | Normalize normalize_op_; 78 | PermuteBatch permute_op_; 79 | TablePadImg pad_op_; 80 | 81 | // post-process 82 | TablePostProcessor post_processor_; 83 | 84 | }; // class StructureTableRecognizer 85 | 86 | } // namespace PaddleOCR -------------------------------------------------------------------------------- /cpp/third_party/gflags/src/windows_port.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2009, Google Inc. 2 | * All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are 6 | * met: 7 | * 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * * Redistributions in binary form must reproduce the above 11 | * copyright notice, this list of conditions and the following disclaimer 12 | * in the documentation and/or other materials provided with the 13 | * distribution. 14 | * * Neither the name of Google Inc. nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | * 30 | * --- 31 | * Author: Craig Silverstein 32 | */ 33 | 34 | #ifndef _WIN32 35 | # error You should only be including windows/port.cc in a windows environment! 36 | #endif 37 | 38 | #include // for strlen(), memset(), memcmp() 39 | #include 40 | #include // for va_list, va_start, va_end 41 | #include 42 | 43 | #include "windows_port.h" 44 | 45 | // These call the windows _vsnprintf, but always NUL-terminate. 46 | #if !defined(__MINGW32__) && !defined(__MINGW64__) /* mingw already defines */ 47 | #if !(defined(_MSC_VER) && _MSC_VER >= 1900) /* msvc 2015 already defines */ 48 | 49 | #ifdef _MSC_VER 50 | # pragma warning(push) 51 | # pragma warning(disable: 4996) // ignore _vsnprintf security warning 52 | #endif 53 | int safe_vsnprintf(char *str, size_t size, const char *format, va_list ap) { 54 | if (size == 0) // not even room for a \0? 55 | return -1; // not what C99 says to do, but what windows does 56 | str[size-1] = '\0'; 57 | return _vsnprintf(str, size-1, format, ap); 58 | } 59 | #ifdef _MSC_VER 60 | # pragma warning(pop) 61 | #endif 62 | 63 | int snprintf(char *str, size_t size, const char *format, ...) { 64 | int r; 65 | va_list ap; 66 | va_start(ap, format); 67 | r = vsnprintf(str, size, format, ap); 68 | va_end(ap); 69 | return r; 70 | } 71 | 72 | #endif /* if !(defined(_MSC_VER) && _MSC_VER >= 1900) */ 73 | #endif /* #if !defined(__MINGW32__) && !defined(__MINGW64__) */ 74 | -------------------------------------------------------------------------------- /api/python/tbpu/parser_tools/line_preprocessing.py: -------------------------------------------------------------------------------- 1 | # ========================================= 2 | # =============== 按行预处理 =============== 3 | # ========================================= 4 | 5 | from math import atan2, cos, sin, sqrt, pi, radians, degrees 6 | from statistics import median # 中位数 7 | 8 | angle_threshold = 3 # 进行一些操作的最小角度阈值 9 | angle_threshold_rad = radians(angle_threshold) 10 | 11 | 12 | # 计算两点之间的距离 13 | def _distance(point1, point2): 14 | return sqrt((point2[0] - point1[0]) ** 2 + (point2[1] - point1[1]) ** 2) 15 | 16 | 17 | # 计算一个box的旋转角度 18 | def _calculateAngle(box): 19 | # 获取宽高 20 | width = _distance(box[0], box[1]) 21 | height = _distance(box[1], box[2]) 22 | # 选择距离较大的两个顶点对,计算角度弧度值 23 | if width < height: 24 | angle_rad = atan2(box[2][1] - box[1][1], box[2][0] - box[1][0]) 25 | else: 26 | angle_rad = atan2(box[1][1] - box[0][1], box[1][0] - box[0][0]) 27 | # 标准化角度到[-pi/2, pi/2)范围(加上阈值) 28 | if angle_rad < -pi / 2 + angle_threshold_rad: 29 | angle_rad += pi 30 | elif angle_rad >= pi / 2 + angle_threshold_rad: 31 | angle_rad -= pi 32 | return angle_rad 33 | 34 | 35 | # 估计一组文本块的旋转角度 36 | def _estimateRotation(textBlocks): 37 | # blocks["box"] = [左上角,右上角,右下角,左下角] 38 | angle_rads = (_calculateAngle(block["box"]) for block in textBlocks) 39 | median_angle = median(angle_rads) # 中位数 40 | return median_angle 41 | 42 | 43 | # 获取旋转后的标准bbox。angle_threshold为执行旋转的阈值(最小角度值)。 44 | def _getBboxes(textBlocks, rotation_rad): 45 | # 角度低于阈值(接近0°),则不进行旋转,以提高性能。 46 | if abs(rotation_rad) <= angle_threshold_rad: 47 | bboxes = [ 48 | ( # 直接构造bbox 49 | min(x for x, y in tb["box"]), 50 | min(y for x, y in tb["box"]), 51 | max(x for x, y in tb["box"]), 52 | max(y for x, y in tb["box"]), 53 | ) 54 | for tb in textBlocks 55 | ] 56 | # 否则,进行旋转操作。 57 | else: 58 | # print(f"文本块预处理旋转 {degrees(rotation_rad):.2f} °") 59 | bboxes = [] 60 | min_x, min_y = float("inf"), float("inf") # 初始化最小的x和y坐标 61 | cos_angle = cos(-rotation_rad) # 计算角度正弦值 62 | sin_angle = sin(-rotation_rad) 63 | for tb in textBlocks: 64 | box = tb["box"] 65 | rotated_box = [ # 旋转box的每个顶点 66 | (cos_angle * x - sin_angle * y, sin_angle * x + cos_angle * y) 67 | for x, y in box 68 | ] 69 | # 解包旋转后的顶点坐标,分别得到所有x和y的值 70 | xs, ys = zip(*rotated_box) 71 | # 构建标准bbox (左上角x, 左上角y, 右下角x, 右下角y) 72 | bbox = (min(xs), min(ys), max(xs), max(ys)) 73 | bboxes.append(bbox) 74 | min_x, min_y = min(min_x, bbox[0]), min(min_y, bbox[1]) 75 | # 如果旋转后存在负坐标,将所有包围盒平移,使得最小的x和y坐标为0,确保所有坐标非负 76 | if min_x < 0 or min_y < 0: 77 | bboxes = [ 78 | (x - min_x, y - min_y, x2 - min_x, y2 - min_y) 79 | for (x, y, x2, y2) in bboxes 80 | ] 81 | return bboxes 82 | 83 | 84 | # 预处理 textBlocks ,将包围盒 ["box"] 转为标准化 bbox 85 | def linePreprocessing(textBlocks): 86 | # 判断角度 87 | rotation_rad = _estimateRotation(textBlocks) 88 | # 获取标准化bbox 89 | bboxes = _getBboxes(textBlocks, rotation_rad) 90 | # 写入tb 91 | for i, tb in enumerate(textBlocks): 92 | tb["normalized_bbox"] = bboxes[i] 93 | # 按y排序 94 | textBlocks.sort(key=lambda tb: tb["normalized_bbox"][1]) 95 | return textBlocks 96 | -------------------------------------------------------------------------------- /cpp/docs/移植指南.md: -------------------------------------------------------------------------------- 1 | # 移植指南 2 | 3 | 我(项目原作者)由于能力有限,只提供Windows10/11的版本。但是,PaddleOCR库是支持多平台的。因此,我在V1.3.0的重构过程中,着重考虑了可移植性,将所有平台绑定的代码都分离到单独的函数和文件中。移植工作者理论上只需重写一部分平台绑定的函数,即可移植到目标平台上。 4 | 5 | 本项目的代码主要分为“OCR推理”和“OCR任务流程控制”两个类型。推理部分的代码已经适配多平台,只需重写任务流程中的部分代码即可。 6 | 7 | 构建项目时,需要下载导入对应平台(Linux/macOS)的PaddlePaddle核心库与OpenCV组件。[构建指南](../Readme.md) 中会介绍。 8 | 9 | 开发过程中,可以参考 `task.h` 和 `task_win32.cpp` 。 10 | 11 | 任务流程通过类`Task`来控制,其中绑定平台、需要重写的是以下成员函数: 12 | 13 | ### `imread_u8` 14 | 15 | 重要性:高。必须重写。 16 | 17 | 代替OpenCV的imread,从一个utf-8编码的路径字符串中读取图片,并返回cv::Mat。失败时通过`set_state`设置错误码,然后返回空Mat。注意,如果传入的路径字符串是`clipboard`,那么要执行剪贴板识图。 18 | 19 | **示例:** 20 | 21 | ```cpp 22 | // 代替cv imread,接收utf-8字符串传入,返回Mat。 23 | // flag:与cv::imread(path, flag) 的flag作用相同,指定图片读取的格式,默认值为cv::IMREAD_COLOR 24 | cv::Mat Task::imread_u8(std::string pathU8, int flag) { 25 | if (pathU8 == u8"clipboard") { // 若为剪贴板任务 26 | return imread_clipboard(flag); // 返回剪贴板识别 27 | } 28 | // TODO:尝试从路径pathU8中读取图片 29 | if(成功) { 30 | return 图片Mat; 31 | } 32 | else { 33 | // TODO: 根据错误类型,设置不同的错误码和错误信息,如下↓ 34 | set_state(CODE_ERR_PATH_READ, MSG_ERR_PATH_READ(pathU8)); // 报告状态:无法读取 35 | return cv::Mat(); // 返回空Mat 36 | } 37 | } 38 | ``` 39 | 40 | ### `imread_clipboard` 41 | 42 | 重要性:低。如果暂时不想写,可以直接 `return cv::Mat();` 。 43 | 44 | 尝试从当前剪贴板中读取图片,并返回cv::Mat。失败时通过`set_state`设置错误码,然后返回空Mat。 45 | 46 | **示例:** 47 | 48 | ```cpp 49 | // 从剪贴板读入一张图片,返回Mat。 50 | cv::Mat Task::imread_clipboard(int flag) 51 | { 52 | // TODO: 读取剪贴板 53 | if(成功) { 54 | return 图片Mat 55 | } 56 | else { 57 | // TODO: 根据错误类型,设置不同的错误码和错误信息,如下↓ 58 | set_state(CODE_ERR_CLIP_OPEN, MSG_ERR_CLIP_OPEN); // 报告状态:剪贴板打开失败 59 | return cv::Mat(); // 返回空Mat 60 | } 61 | } 62 | ``` 63 | 64 | ### `socket_mode` 65 | 66 | 重要性:低。如果暂时不想写,可以直接 `return 0;` 。 67 | 68 | 套接字服务器模式工作流程。大致分为这些步骤: 69 | 70 | - 初始化套接字 71 | - 绑定地址和端口号 72 | - 输出实际地址和端口号:如 `Socket init completed. 127.0.0.1:8888` 73 | - 开启任务循环: 74 | - 接受客户端连接 75 | - 接受客户端指令 76 | - 解析指令 77 | - 若类属性`is_exit`被设为true,则退出循环,结束进程 78 | - 调用OCR 79 | - 向客户端回传识别结果 80 | - 关闭本次连接 81 | 82 | 注意,服务器初始化成功后必须cout输出 `Socket init completed. IP地址:端口`,以便客户端可以知道并提取端口号。 83 | 84 | ## 平台文件 85 | 86 | 请将以上重写的成员函数,写在一个单独的.cpp中,如 `task_linux.cpp` 。然后为整个文件加上条件编译。 87 | 88 | **示例:** `task_linux.cpp` 89 | 90 | ```cpp 91 | // Linux 平台的任务处理代码 92 | 93 | #ifdef _LINUX 94 | 95 | #include "include/paddleocr.h" 96 | #include "include/args.h" 97 | #include "include/task.h" 98 | 99 | #include <平台相关的库> 100 | 101 | namespace PaddleOCR 102 | { 103 | cv::Mat Task::imread_u8(std::string pathU8, int flag){ 104 | ………… 105 | } 106 | 107 | cv::Mat Task::imread_clipboard(int flag){ 108 | ………… 109 | } 110 | 111 | int Task::socket_mode(){ 112 | ………… 113 | } 114 | } 115 | 116 | #endif 117 | ``` 118 | 119 | 如果需要使用一些辅助函数,请一并写在该文件中。如果需要为类添加绑定平台的类函数(如Windows需要用到`Task::imread_wstr`来处理宽字节码),那么将成员函数定义在 `task.h` 中,并加上条件编译。 120 | 121 | 如果需要新增错误码和错误信息宏定义,请写在 `task.h` 的头部。 122 | 123 | 总之,请参考 `task.h` 和 `task_win32.cpp` 。 124 | 125 | #### Umi-OCR 依赖相关 126 | 127 | Umi-OCR v1 需要调用 `imread_u8` 和 `imread_clipboard`。 128 | [Umi-OCR v2](https://github.com/hiroi-sora/Umi-OCR_v2) 仅需使用 `imread_u8` ,无需调用 `imread_clipboard` 和 `socket_mode` 。 129 | 130 | ## 贡献代码 131 | 132 | 我很乐意并感谢你为这个项目贡献代码!如果可以的话,请尽量遵守以下约定: 133 | 134 | 关于编码: 135 | - 尽量写注释。 136 | - 请尽量考虑到并区分不同的异常情况,用错误码和错误信息宏定义向调用方反馈。 137 | - 代码文件使用utf-8编码。 138 | - 如果需要输出调试信息,并且不希望调用方接收到(以免干扰正常的OCR流程),用cerr而不是cout。 139 | - 尽量复用项目中已有的函数而不是造轮子。 -------------------------------------------------------------------------------- /cpp/include/ocr_rec.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "paddle_api.h" 18 | #include "paddle_inference_api.h" 19 | 20 | #include 21 | #include 22 | 23 | namespace PaddleOCR 24 | { 25 | 26 | class CRNNRecognizer 27 | { 28 | public: 29 | explicit CRNNRecognizer(const std::string &model_dir, const bool &use_gpu, 30 | const int &gpu_id, const int &gpu_mem, 31 | const int &cpu_math_library_num_threads, 32 | const bool &use_mkldnn, const std::string &label_path, 33 | const bool &use_tensorrt, 34 | const std::string &precision, 35 | const int &rec_batch_num, const int &rec_img_h, 36 | const int &rec_img_w) 37 | { 38 | this->use_gpu_ = use_gpu; 39 | this->gpu_id_ = gpu_id; 40 | this->gpu_mem_ = gpu_mem; 41 | this->cpu_math_library_num_threads_ = cpu_math_library_num_threads; 42 | this->use_mkldnn_ = use_mkldnn; 43 | this->use_tensorrt_ = use_tensorrt; 44 | this->precision_ = precision; 45 | this->rec_batch_num_ = rec_batch_num; 46 | this->rec_img_h_ = rec_img_h; 47 | this->rec_img_w_ = rec_img_w; 48 | std::vector rec_image_shape = {3, rec_img_h, rec_img_w}; 49 | this->rec_image_shape_ = rec_image_shape; 50 | 51 | this->label_list_ = Utility::ReadDict(label_path); 52 | this->label_list_.insert(this->label_list_.begin(), 53 | "#"); // blank char for ctc 54 | this->label_list_.push_back(" "); 55 | 56 | LoadModel(model_dir); 57 | } 58 | 59 | // Load Paddle inference model 60 | void LoadModel(const std::string &model_dir); 61 | 62 | void Run(std::vector img_list, std::vector &rec_texts, 63 | std::vector &rec_text_scores, std::vector ×); 64 | std::shared_ptr predictor_; // 推理库实例 65 | 66 | private: 67 | bool use_gpu_ = false; 68 | int gpu_id_ = 0; 69 | int gpu_mem_ = 4000; 70 | int cpu_math_library_num_threads_ = 4; 71 | bool use_mkldnn_ = false; 72 | 73 | std::vector label_list_; 74 | 75 | std::vector mean_ = {0.5f, 0.5f, 0.5f}; 76 | std::vector scale_ = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f}; 77 | bool is_scale_ = true; 78 | bool use_tensorrt_ = false; 79 | std::string precision_ = "fp32"; 80 | int rec_batch_num_ = 6; 81 | int rec_img_h_ = 32; 82 | int rec_img_w_ = 320; 83 | std::vector rec_image_shape_ = {3, rec_img_h_, rec_img_w_}; 84 | // pre-process 85 | CrnnResizeImg resize_op_; 86 | Normalize normalize_op_; 87 | PermuteBatch permute_op_; 88 | 89 | }; // class CrnnRecognizer 90 | 91 | } // namespace PaddleOCR 92 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/bazel/gflags.bzl: -------------------------------------------------------------------------------- 1 | load("//bazel/expanded_template:expanded_template.bzl", "expanded_template") 2 | 3 | # ------------------------------------------------------------------------------ 4 | # Add native rules to configure source files 5 | def gflags_sources(namespace = ["google", "gflags"]): 6 | expanded_template( 7 | name = "gflags_declare_h", 8 | template = "src/gflags_declare.h.in", 9 | out = "gflags_declare.h", 10 | substitutions = { 11 | "@GFLAGS_NAMESPACE@": namespace[0], 12 | "@(HAVE_STDINT_H|HAVE_SYS_TYPES_H|HAVE_INTTYPES_H|GFLAGS_INTTYPES_FORMAT_C99)@": "1", 13 | "@([A-Z0-9_]+)@": "0", 14 | }, 15 | ) 16 | gflags_ns_h_files = [] 17 | for ns in namespace[1:]: 18 | gflags_ns_h_file = "gflags_{}.h".format(ns) 19 | expanded_template( 20 | name = gflags_ns_h_file.replace(".", "_"), 21 | template = "src/gflags_ns.h.in", 22 | out = gflags_ns_h_file, 23 | substitutions = { 24 | "@ns@": ns, 25 | "@NS@": ns.upper(), 26 | }, 27 | ) 28 | gflags_ns_h_files.append(gflags_ns_h_file) 29 | expanded_template( 30 | name = "gflags_h", 31 | template = "src/gflags.h.in", 32 | out = "gflags.h", 33 | substitutions = { 34 | "@GFLAGS_ATTRIBUTE_UNUSED@": "", 35 | "@INCLUDE_GFLAGS_NS_H@": "\n".join(["#include \"gflags/{}\"".format(hdr) for hdr in gflags_ns_h_files]), 36 | }, 37 | ) 38 | expanded_template( 39 | name = "gflags_completions_h", 40 | template = "src/gflags_completions.h.in", 41 | out = "gflags_completions.h", 42 | substitutions = { 43 | "@GFLAGS_NAMESPACE@": namespace[0], 44 | }, 45 | ) 46 | hdrs = [":gflags_h", ":gflags_declare_h", ":gflags_completions_h"] 47 | hdrs.extend([":" + hdr.replace(".", "_") for hdr in gflags_ns_h_files]) 48 | srcs = [ 49 | "src/config.h", 50 | "src/gflags.cc", 51 | "src/gflags_completions.cc", 52 | "src/gflags_reporting.cc", 53 | "src/mutex.h", 54 | "src/util.h", 55 | ] + select({ 56 | "//:x64_windows": [ 57 | "src/windows_port.cc", 58 | "src/windows_port.h", 59 | ], 60 | "//conditions:default": [], 61 | }) 62 | return [hdrs, srcs] 63 | 64 | # ------------------------------------------------------------------------------ 65 | # Add native rule to build gflags library 66 | def gflags_library(hdrs = [], srcs = [], threads = 1): 67 | name = "gflags" 68 | copts = [ 69 | "-DGFLAGS_BAZEL_BUILD", 70 | "-DGFLAGS_INTTYPES_FORMAT_C99", 71 | "-DGFLAGS_IS_A_DLL=0", 72 | # macros otherwise defined by CMake configured defines.h file 73 | "-DHAVE_STDINT_H", 74 | "-DHAVE_SYS_TYPES_H", 75 | "-DHAVE_INTTYPES_H", 76 | "-DHAVE_SYS_STAT_H", 77 | "-DHAVE_STRTOLL", 78 | "-DHAVE_STRTOQ", 79 | "-DHAVE_RWLOCK", 80 | ] + select({ 81 | "//:x64_windows": [ 82 | "-DOS_WINDOWS", 83 | ], 84 | "//conditions:default": [ 85 | "-DHAVE_UNISTD_H", 86 | "-DHAVE_FNMATCH_H", 87 | "-DHAVE_PTHREAD", 88 | ], 89 | }) 90 | linkopts = [] 91 | if threads: 92 | linkopts += select({ 93 | "//:android": [], 94 | "//:x64_windows": [], 95 | "//conditions:default": ["-lpthread"], 96 | }) 97 | else: 98 | name += "_nothreads" 99 | copts += ["-DNO_THREADS"] 100 | native.cc_library( 101 | name = name, 102 | hdrs = hdrs, 103 | srcs = srcs, 104 | copts = copts, 105 | linkopts = linkopts, 106 | visibility = ["//visibility:public"], 107 | include_prefix = "gflags", 108 | ) 109 | -------------------------------------------------------------------------------- /cpp/include/ocr_det.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "paddle_api.h" 18 | #include "paddle_inference_api.h" 19 | 20 | #include 21 | #include 22 | 23 | namespace PaddleOCR 24 | { 25 | 26 | class DBDetector 27 | { 28 | public: 29 | explicit DBDetector(const std::string &model_dir, const bool &use_gpu, 30 | const int &gpu_id, const int &gpu_mem, 31 | const int &cpu_math_library_num_threads, 32 | const bool &use_mkldnn, const std::string &limit_type, 33 | const int &limit_side_len, const double &det_db_thresh, 34 | const double &det_db_box_thresh, 35 | const double &det_db_unclip_ratio, 36 | const std::string &det_db_score_mode, 37 | const bool &use_dilation, const bool &use_tensorrt, 38 | const std::string &precision) 39 | { 40 | this->use_gpu_ = use_gpu; 41 | this->gpu_id_ = gpu_id; 42 | this->gpu_mem_ = gpu_mem; 43 | this->cpu_math_library_num_threads_ = cpu_math_library_num_threads; 44 | this->use_mkldnn_ = use_mkldnn; 45 | 46 | this->limit_type_ = limit_type; 47 | this->limit_side_len_ = limit_side_len; 48 | 49 | this->det_db_thresh_ = det_db_thresh; 50 | this->det_db_box_thresh_ = det_db_box_thresh; 51 | this->det_db_unclip_ratio_ = det_db_unclip_ratio; 52 | this->det_db_score_mode_ = det_db_score_mode; 53 | this->use_dilation_ = use_dilation; 54 | 55 | this->use_tensorrt_ = use_tensorrt; 56 | this->precision_ = precision; 57 | 58 | LoadModel(model_dir); 59 | } 60 | 61 | // Load Paddle inference model 62 | void LoadModel(const std::string &model_dir); 63 | 64 | // Run predictor 65 | void Run(cv::Mat &img, std::vector>> &boxes, 66 | std::vector ×); 67 | std::shared_ptr predictor_; // 推理库实例 68 | 69 | private: 70 | bool use_gpu_ = false; 71 | int gpu_id_ = 0; 72 | int gpu_mem_ = 4000; 73 | int cpu_math_library_num_threads_ = 4; 74 | bool use_mkldnn_ = false; 75 | 76 | std::string limit_type_ = "max"; 77 | int limit_side_len_ = 960; 78 | 79 | double det_db_thresh_ = 0.3; 80 | double det_db_box_thresh_ = 0.5; 81 | double det_db_unclip_ratio_ = 2.0; 82 | std::string det_db_score_mode_ = "slow"; 83 | bool use_dilation_ = false; 84 | 85 | bool visualize_ = true; 86 | bool use_tensorrt_ = false; 87 | std::string precision_ = "fp32"; 88 | 89 | std::vector mean_ = {0.485f, 0.456f, 0.406f}; 90 | std::vector scale_ = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f}; 91 | bool is_scale_ = true; 92 | 93 | // pre-process 94 | ResizeImgType0 resize_op_; 95 | Normalize normalize_op_; 96 | Permute permute_op_; 97 | 98 | // post-process 99 | DBPostProcessor post_processor_; 100 | }; 101 | 102 | } // namespace PaddleOCR -------------------------------------------------------------------------------- /api/node.js/esnext/worker.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | Object.defineProperty(exports, "__esModule", { value: true }); 3 | const worker_threads_1 = require("worker_threads"); 4 | const path_1 = require("path"); 5 | const net_1 = require("net"); 6 | const child_process_1 = require("child_process"); 7 | const __default = { 8 | path: 'PaddleOCR-json.exe', 9 | args: [], 10 | options: { 11 | argv0: undefined, 12 | stdio: 'pipe', 13 | detached: false, 14 | shell: false, 15 | windowsVerbatimArguments: undefined, 16 | windowsHide: true, 17 | }, 18 | initTag: 'OCR init completed.', 19 | socketTag: 'OCR socket mode.', 20 | pipeTag: 'OCR anonymous pipe mode.', 21 | socketMatch: /^Socket init completed. (\d+\.\d+\.\d+\.\d+:\d+)/, 22 | }; 23 | function cargs(obj) { 24 | const currentPath = process.cwd(); 25 | obj = Object.assign({}, obj); 26 | if ('image_path' in obj) { 27 | if (obj.image_path === null) 28 | throw new Error('图片路径为空。'); 29 | else if (obj.image_path) 30 | obj.image_path = (0, path_1.resolve)(currentPath, obj.image_path); 31 | } 32 | if (obj.output !== undefined) 33 | obj.output = (0, path_1.resolve)(currentPath, obj.output); 34 | return obj; 35 | } 36 | function cout(data) { 37 | return { 38 | code: data.code, 39 | message: data.code - 100 ? data.data : '', 40 | data: data.code - 100 ? null : data.data, 41 | }; 42 | } 43 | const end = 'at' in String ? (str) => str.at(-1) : (str) => str[str.length - 1]; 44 | if (!worker_threads_1.isMainThread) { 45 | const { path = __default.path, args = [], options, debug, } = worker_threads_1.workerData; 46 | let mode = 0; 47 | const proc = (0, child_process_1.spawn)(path, args.concat(__default.args), { 48 | ...options, 49 | ...__default.options, 50 | }); 51 | process.once('exit', proc.kill.bind(proc)); 52 | proc.once('exit', process.exit); 53 | new Promise((res) => proc.stdout.on('data', function stdout(chunk) { 54 | const data = chunk.toString(); 55 | if (!mode) { 56 | data.match(__default.pipeTag) && (mode = 1); 57 | data.match(__default.socketTag) && (mode = 2); 58 | } 59 | if (!data.match(__default.initTag)) 60 | return; 61 | proc.stdout.off('data', stdout); 62 | return res(); 63 | })).then(() => new Promise((res) => { 64 | if (mode === 1) { 65 | process.stdout.write(`pid=${proc.pid}, pipe=true\n`); 66 | return res(); 67 | } 68 | proc.stderr.once('data', () => null); 69 | proc.stdout.once('data', (chunk) => { 70 | const data = chunk.toString(); 71 | const socket = data.match(__default.socketMatch)[1].split(':'); 72 | process.stdout.write(`pid=${proc.pid}, addr=${socket[0]}, port=${socket[1]}\n`); 73 | return res([socket[0], Number(socket[1])]); 74 | }); 75 | })).then((socket) => { 76 | if (debug) { 77 | proc.stdout.pipe(process.stdout); 78 | proc.stderr.pipe(process.stderr); 79 | } 80 | else if (socket) { 81 | proc.stdout.destroy(); 82 | proc.stderr.destroy(); 83 | } 84 | if (socket) { 85 | const client = new net_1.Socket(); 86 | const [addr, port] = socket; 87 | worker_threads_1.parentPort.on('message', (data) => { 88 | client.connect(port, addr, () => { 89 | client.end(`${JSON.stringify(cargs(data))}\n`); 90 | }); 91 | }); 92 | client.on('data', (chunk) => { 93 | worker_threads_1.parentPort.postMessage(cout(JSON.parse(String(chunk)))); 94 | }); 95 | } 96 | else { 97 | worker_threads_1.parentPort.on('message', (data) => { 98 | proc.stdin.write(`${JSON.stringify(cargs(data))}\n`); 99 | }); 100 | const cache = []; 101 | proc.stdout.on('data', (chunk) => { 102 | const str = String(chunk); 103 | cache.push(str); 104 | if (end(str) !== '\n') 105 | return; 106 | worker_threads_1.parentPort.postMessage(cout(JSON.parse(cache.join('')))); 107 | cache.length = 0; 108 | }); 109 | } 110 | }); 111 | } 112 | -------------------------------------------------------------------------------- /api/node.js/README.md: -------------------------------------------------------------------------------- 1 | # PaddleOCR-json-node-api 2 | 基于[hiroi-sora/PaddleOCR-json](https://github.com/hiroi-sora/PaddleOCR-json)的node.js api. 3 | 4 |
5 | Log 6 | 7 | v1.1.0 2023.5.2 8 | 9 | 适配 [hiroi-sora/PaddleOCR-json v1.3.0 Alpha.1](https://github.com/hiroi-sora/PaddleOCR-json/releases/tag/v1.3.0_alpha.1) 10 | 11 | (v1.2 回退 paddleocrjson@1.0.11-a) 12 | 13 | v1.0.8 2023.1.20 14 | 15 | \-\-\- 16 | 17 | v1.0.7 2022.11.8 18 | 19 | \-\-\- 20 | 21 | v1.0.6 2022.11.8 22 | 23 | `OCR.postMessage`与`OCR.flush`区别开. 24 | 25 | 使监听的数据与`OCR.flush`返回的数据一致. 26 | 27 | 优化动态参数中关于路径的字段. 28 | 29 | 使用`npm`包管理. 30 | 31 | 使用`ts`编译. 32 | 33 | v1.0.5 2022.10.12 34 | 35 | \-\-\- 36 | 37 | v1.0.4 2022.10.1 38 | 39 | 适配[hiroi-sora/PaddleOCR-json v1.2.1](https://github.com/hiroi-sora/PaddleOCR-json/releases/tag/v1.2.1). 40 | 41 | 不使用`iconv-lite`包. 42 | 43 | 更改启动参数输入方式. 44 | 45 | v1.0.3 2022.10.1 46 | 47 | 适配[hiroi-sora/PaddleOCR-json v1.2.1](https://github.com/hiroi-sora/PaddleOCR-json/releases/tag/v1.2.1). 48 | 49 | v1.0.2 2022.9.14 50 | 51 | 增加环境选项. 52 | 53 | v1.0.1 2022.9.14 54 | 55 | 修复无法识别 Alpha版 的启动完成标志的bug. 56 | JSON输入更改为ascii转义. 57 | 58 | v1.0.0 2022.9.10 59 | 60 | \-\-\- 61 | 62 |
63 | 64 | ## 快速开始 65 | 66 | ### OCR api 67 | 68 | ``` 69 | npm install paddleocrjson 70 | ``` 71 | 72 | ```js 73 | const OCR = require('paddleocrjson'); 74 | 75 | const OCR = require('paddleocrjson/es5'); // ES5 76 | 77 | const ocr = new OCR('PaddleOCR-json.exe', [/* '-port=9985', '-addr=loopback' */], { 78 | cwd: './PaddleOCR-json', 79 | }, /* debug */true); 80 | 81 | ocr.flush({ image_path: 'path/to/test/img' }) 82 | .then((data) => console.log(data)); 83 | .then(() => ocr.terminate()); 84 | 85 | // debug = true 86 | ocr.stdout.on('data', (chunk) =>{ 87 | console.log(chunk.toString()); 88 | }); 89 | ocr.stderr.on('data', (data) =>{ 90 | console.log(data.toString()); 91 | }); 92 | ocr.on('exit', (code) =>{ 93 | console.log('exit code: ', code); 94 | }); 95 | 96 | ``` 97 | 98 | ### server服务 99 | 100 | 详见[PunchlY/PaddleOCR-json-node-api/test](https://github.com/PunchlY/PaddleOCR-json-node-api/tree/main/api/node.js/test). 101 | 102 | ## api 103 | 104 | ### OCR 105 | 106 | `OCR`是`worker_threads.Worker`的派生类. 107 | 108 | #### new OCR(path, args, options, debug) 109 | 110 | ```js 111 | const ocr = new OCR('PaddleOCR_json.exe', [], { 112 | cwd: './PaddleOCR-json', 113 | }, false); 114 | ``` 115 | 116 | `args`详见[Node.js child_process.spawn](https://nodejs.org/api/child_process.html#child_processspawncommand-args-options)与[hiroi-sora/PaddleOCR-json#配置参数说明](https://github.com/hiroi-sora/PaddleOCR-json#%E9%85%8D%E7%BD%AE%E5%8F%82%E6%95%B0%E8%AF%B4%E6%98%8E). 117 | 118 | `options`详见[Node.js child_process.spawn](https://nodejs.org/api/child_process.html#child_processspawncommand-args-options). 119 | 120 | #### OCR.flush(obj) 121 | 122 | ```js 123 | ocr.flush({ 124 | image_path: 'path/to/test/img', 125 | // limit_side_len: 960, 126 | // ... 127 | }); 128 | ``` 129 | `ocr.flush`返回的是`Promise`对象. 130 | 131 | `obj`详见[hiroi-sora/PaddleOCR-json/blob/main/docs/详细使用指南.md#配置参数](https://github.com/hiroi-sora/PaddleOCR-json/blob/main/docs/%E8%AF%A6%E7%BB%86%E4%BD%BF%E7%94%A8%E6%8C%87%E5%8D%97.md#%E9%85%8D%E7%BD%AE%E5%8F%82%E6%95%B0) 132 | 133 |
134 | 135 | 剪贴板相关接口已弃用,不建议使用 136 | 137 | 138 | 如果要识别剪贴板,请传入`{ image_path: null }`(`obj.image_dir`设置为`null`). 139 | 140 |
141 | 142 | #### 其他 143 | 144 | 你可以用`worker_threads.Worker`的api来监听或操作`OCR`实例. 145 | 146 | 例如: 147 | ```js 148 | ocr.on('init', (pid, addr, port) => { 149 | console.log('OCR init completed.'); 150 | console.log('pid:', pid); 151 | console.log('addr:', addr); 152 | console.log('port:', port); 153 | }); 154 | 155 | ocr.on('message', (data) => { 156 | console.log(data); 157 | // { code: ..., message: ..., data: ... } 158 | }); 159 | ``` 160 | 161 | ## License 162 | 163 | ``` 164 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 165 | Version 2, December 2004 166 | 167 | Copyright (C) 2023 PunchlY 168 | 169 | Everyone is permitted to copy and distribute verbatim or modified 170 | copies of this license document, and changing it is allowed as long 171 | as the name is changed. 172 | 173 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 174 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 175 | 176 | 1. You just DO WHAT THE FUCK YOU WANT TO. 177 | ``` 178 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/src/gflags_ns.h.in: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014, Andreas Schuh 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following disclaimer 12 | // in the documentation and/or other materials provided with the 13 | // distribution. 14 | // * Neither the name of Google Inc. nor the names of its 15 | // contributors may be used to endorse or promote products derived from 16 | // this software without specific prior written permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | // ----------------------------------------------------------------------------- 31 | // Imports the gflags library symbols into an alternative/deprecated namespace. 32 | 33 | #ifndef GFLAGS_GFLAGS_H_ 34 | # error The internal header gflags_@ns@.h may only be included by gflags.h 35 | #endif 36 | 37 | #ifndef GFLAGS_NS_@NS@_H_ 38 | #define GFLAGS_NS_@NS@_H_ 39 | 40 | 41 | namespace @ns@ { 42 | 43 | 44 | using GFLAGS_NAMESPACE::int32; 45 | using GFLAGS_NAMESPACE::uint32; 46 | using GFLAGS_NAMESPACE::int64; 47 | using GFLAGS_NAMESPACE::uint64; 48 | 49 | using GFLAGS_NAMESPACE::RegisterFlagValidator; 50 | using GFLAGS_NAMESPACE::CommandLineFlagInfo; 51 | using GFLAGS_NAMESPACE::GetAllFlags; 52 | using GFLAGS_NAMESPACE::ShowUsageWithFlags; 53 | using GFLAGS_NAMESPACE::ShowUsageWithFlagsRestrict; 54 | using GFLAGS_NAMESPACE::DescribeOneFlag; 55 | using GFLAGS_NAMESPACE::SetArgv; 56 | using GFLAGS_NAMESPACE::GetArgvs; 57 | using GFLAGS_NAMESPACE::GetArgv; 58 | using GFLAGS_NAMESPACE::GetArgv0; 59 | using GFLAGS_NAMESPACE::GetArgvSum; 60 | using GFLAGS_NAMESPACE::ProgramInvocationName; 61 | using GFLAGS_NAMESPACE::ProgramInvocationShortName; 62 | using GFLAGS_NAMESPACE::ProgramUsage; 63 | using GFLAGS_NAMESPACE::VersionString; 64 | using GFLAGS_NAMESPACE::GetCommandLineOption; 65 | using GFLAGS_NAMESPACE::GetCommandLineFlagInfo; 66 | using GFLAGS_NAMESPACE::GetCommandLineFlagInfoOrDie; 67 | using GFLAGS_NAMESPACE::FlagSettingMode; 68 | using GFLAGS_NAMESPACE::SET_FLAGS_VALUE; 69 | using GFLAGS_NAMESPACE::SET_FLAG_IF_DEFAULT; 70 | using GFLAGS_NAMESPACE::SET_FLAGS_DEFAULT; 71 | using GFLAGS_NAMESPACE::SetCommandLineOption; 72 | using GFLAGS_NAMESPACE::SetCommandLineOptionWithMode; 73 | using GFLAGS_NAMESPACE::FlagSaver; 74 | using GFLAGS_NAMESPACE::CommandlineFlagsIntoString; 75 | using GFLAGS_NAMESPACE::ReadFlagsFromString; 76 | using GFLAGS_NAMESPACE::AppendFlagsIntoFile; 77 | using GFLAGS_NAMESPACE::ReadFromFlagsFile; 78 | using GFLAGS_NAMESPACE::BoolFromEnv; 79 | using GFLAGS_NAMESPACE::Int32FromEnv; 80 | using GFLAGS_NAMESPACE::Uint32FromEnv; 81 | using GFLAGS_NAMESPACE::Int64FromEnv; 82 | using GFLAGS_NAMESPACE::Uint64FromEnv; 83 | using GFLAGS_NAMESPACE::DoubleFromEnv; 84 | using GFLAGS_NAMESPACE::StringFromEnv; 85 | using GFLAGS_NAMESPACE::SetUsageMessage; 86 | using GFLAGS_NAMESPACE::SetVersionString; 87 | using GFLAGS_NAMESPACE::ParseCommandLineNonHelpFlags; 88 | using GFLAGS_NAMESPACE::HandleCommandLineHelpFlags; 89 | using GFLAGS_NAMESPACE::AllowCommandLineReparsing; 90 | using GFLAGS_NAMESPACE::ReparseCommandLineNonHelpFlags; 91 | using GFLAGS_NAMESPACE::ShutDownCommandLineFlags; 92 | using GFLAGS_NAMESPACE::FlagRegisterer; 93 | 94 | #ifndef SWIG 95 | using GFLAGS_NAMESPACE::ParseCommandLineFlags; 96 | #endif 97 | 98 | 99 | } // namespace @ns@ 100 | 101 | 102 | #endif // GFLAGS_NS_@NS@_H_ 103 | -------------------------------------------------------------------------------- /cpp/include/postprocess_op.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "include/clipper.h" 18 | #include "include/utility.h" 19 | 20 | namespace PaddleOCR { 21 | 22 | class DBPostProcessor { 23 | public: 24 | void GetContourArea(const std::vector> &box, 25 | float unclip_ratio, float &distance); 26 | 27 | cv::RotatedRect UnClip(std::vector> box, 28 | const float &unclip_ratio); 29 | 30 | float **Mat2Vec(cv::Mat mat); 31 | 32 | std::vector> 33 | OrderPointsClockwise(std::vector> pts); 34 | 35 | std::vector> GetMiniBoxes(cv::RotatedRect box, 36 | float &ssid); 37 | 38 | float BoxScoreFast(std::vector> box_array, cv::Mat pred); 39 | float PolygonScoreAcc(std::vector contour, cv::Mat pred); 40 | 41 | std::vector>> 42 | BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap, 43 | const float &box_thresh, const float &det_db_unclip_ratio, 44 | const std::string &det_db_score_mode); 45 | 46 | std::vector>> 47 | FilterTagDetRes(std::vector>> boxes, 48 | float ratio_h, float ratio_w, cv::Mat srcimg); 49 | 50 | private: 51 | static bool XsortInt(std::vector a, std::vector b); 52 | 53 | static bool XsortFp32(std::vector a, std::vector b); 54 | 55 | std::vector> Mat2Vector(cv::Mat mat); 56 | 57 | inline int _max(int a, int b) { return a >= b ? a : b; } 58 | 59 | inline int _min(int a, int b) { return a >= b ? b : a; } 60 | 61 | template inline T clamp(T x, T min, T max) { 62 | if (x > max) 63 | return max; 64 | if (x < min) 65 | return min; 66 | return x; 67 | } 68 | 69 | inline float clampf(float x, float min, float max) { 70 | if (x > max) 71 | return max; 72 | if (x < min) 73 | return min; 74 | return x; 75 | } 76 | }; 77 | 78 | class TablePostProcessor { 79 | public: 80 | void init(std::string label_path, bool merge_no_span_structure = true); 81 | void Run(std::vector &loc_preds, std::vector &structure_probs, 82 | std::vector &rec_scores, std::vector &loc_preds_shape, 83 | std::vector &structure_probs_shape, 84 | std::vector> &rec_html_tag_batch, 85 | std::vector>> &rec_boxes_batch, 86 | std::vector &width_list, std::vector &height_list); 87 | 88 | private: 89 | std::vector label_list_; 90 | std::string end = "eos"; 91 | std::string beg = "sos"; 92 | }; 93 | 94 | class PicodetPostProcessor { 95 | public: 96 | void init(std::string label_path, const double score_threshold = 0.4, 97 | const double nms_threshold = 0.5, 98 | const std::vector &fpn_stride = {8, 16, 32, 64}); 99 | void Run(std::vector &results, 100 | std::vector> outs, std::vector ori_shape, 101 | std::vector resize_shape, int eg_max); 102 | std::vector fpn_stride_ = {8, 16, 32, 64}; 103 | 104 | private: 105 | StructurePredictResult disPred2Bbox(std::vector bbox_pred, int label, 106 | float score, int x, int y, int stride, 107 | std::vector im_shape, int reg_max); 108 | void nms(std::vector &input_boxes, 109 | float nms_threshold); 110 | 111 | std::vector label_list_; 112 | double score_threshold_ = 0.4; 113 | double nms_threshold_ = 0.5; 114 | int num_class_ = 5; 115 | }; 116 | 117 | } // namespace PaddleOCR 118 | -------------------------------------------------------------------------------- /api/node.js/ts/worker.ts: -------------------------------------------------------------------------------- 1 | import { isMainThread, parentPort, workerData } from 'worker_threads'; 2 | import { resolve as path_resolve } from 'path'; 3 | import { Socket } from 'net'; 4 | import { spawn } from 'child_process'; 5 | import type { Arg, coutReturnType, Options } from './index'; 6 | 7 | interface workerData { 8 | path: string; 9 | args?: string[]; 10 | options?: Options; 11 | debug?: boolean; 12 | } 13 | 14 | const __default = { 15 | path: 'PaddleOCR-json.exe', 16 | args: [], 17 | options: { 18 | argv0: undefined, 19 | stdio: 'pipe' as const, 20 | detached: false, 21 | shell: false, 22 | windowsVerbatimArguments: undefined, 23 | windowsHide: true, 24 | }, 25 | initTag: 'OCR init completed.', 26 | socketTag: 'OCR socket mode.', 27 | pipeTag: 'OCR anonymous pipe mode.', 28 | socketMatch: /^Socket init completed. (\d+\.\d+\.\d+\.\d+:\d+)/, 29 | }; 30 | export { type __default }; 31 | 32 | function cargs(obj: Arg) { 33 | 34 | const currentPath = process.cwd(); 35 | 36 | obj = Object.assign({}, obj); 37 | if ('image_path' in obj) { 38 | if (obj.image_path === null) 39 | throw new Error('图片路径为空。'); 40 | else if (obj.image_path) 41 | obj.image_path = path_resolve(currentPath, obj.image_path); 42 | } 43 | if (obj.output !== undefined) 44 | obj.output = path_resolve(currentPath, obj.output); 45 | return obj; 46 | } 47 | function cout(data: { code: number, data: any; }) { 48 | return { 49 | code: data.code, 50 | message: data.code - 100 ? data.data : '', 51 | data: data.code - 100 ? null : data.data, 52 | } as coutReturnType; 53 | } 54 | 55 | const end = 'at' in String ? (str: string) => str.at(-1) : (str: string) => str[str.length - 1]; 56 | 57 | if (!isMainThread) { 58 | const { 59 | path = __default.path, 60 | args = [], 61 | options, 62 | debug, 63 | } = workerData as workerData; 64 | let mode = 0; 65 | 66 | const proc = spawn(path, args.concat(__default.args), { 67 | ...options, 68 | ...__default.options, 69 | }); 70 | process.once('exit', proc.kill.bind(proc)); 71 | proc.once('exit', process.exit); 72 | 73 | new Promise((res: (value?: void) => void) => proc.stdout.on('data', function stdout(chunk) { 74 | const data: string = chunk.toString(); 75 | if (!mode) { 76 | data.match(__default.pipeTag) && (mode = 1); 77 | data.match(__default.socketTag) && (mode = 2); 78 | } 79 | if (!data.match(__default.initTag)) return; 80 | proc.stdout.off('data', stdout); 81 | return res(); 82 | })).then(() => new Promise((res: (value?: [string, number]) => void) => { 83 | if (mode === 1) { 84 | process.stdout.write(`pid=${proc.pid}, pipe=true\n`); 85 | return res(); 86 | } 87 | proc.stderr.once('data', () => null); 88 | proc.stdout.once('data', (chunk) => { 89 | const data: string = chunk.toString(); 90 | const socket = data.match(__default.socketMatch)[1].split(':'); 91 | process.stdout.write(`pid=${proc.pid}, addr=${socket[0]}, port=${socket[1]}\n`); 92 | return res([socket[0], Number(socket[1])]); 93 | }); 94 | })).then((socket) => { 95 | if (debug) { 96 | proc.stdout.pipe(process.stdout); 97 | proc.stderr.pipe(process.stderr); 98 | } else if (socket) { 99 | proc.stdout.destroy(); 100 | proc.stderr.destroy(); 101 | } 102 | 103 | if (socket) { 104 | const client = new Socket(); 105 | const [addr, port] = socket; 106 | parentPort.on('message', (data) => { 107 | client.connect(port, addr, () => { 108 | client.end(`${JSON.stringify(cargs(data))}\n`); 109 | }); 110 | }); 111 | client.on('data', (chunk) => { 112 | parentPort.postMessage(cout(JSON.parse(String(chunk)))); 113 | }); 114 | } else { 115 | parentPort.on('message', (data) => { 116 | proc.stdin.write(`${JSON.stringify(cargs(data))}\n`); 117 | }); 118 | const cache = []; 119 | proc.stdout.on('data', (chunk) => { 120 | const str = String(chunk); 121 | cache.push(str); 122 | if (end(str) !== '\n') return; 123 | parentPort.postMessage(cout(JSON.parse(cache.join('')))); 124 | cache.length = 0; 125 | }); 126 | } 127 | }); 128 | } 129 | -------------------------------------------------------------------------------- /cpp/README-docker.md: -------------------------------------------------------------------------------- 1 | # PaddleOCR-json V1.4 Docker 部署指南 2 | 3 | 你可以将本项目部署到Docker容器里,然后通过API或套接字来连接使用。 4 | 5 | 部署步骤: 6 | 7 | 1. [安装Docker](https://yeasy.gitbook.io/docker_practice/install) 8 | 2. clone 本仓库,**然后在 `cpp` 文件夹下打开一个终端(或PowerShell)**。 9 | 3. 接着我们使用Docker来构建镜像 10 | 11 | ```sh 12 | docker build -t paddleocr-json . 13 | ``` 14 | 15 | > [!NOTE] 16 | > 镜像构建默认会开启远程关停服务器功能,并且禁用从路径读取图片的功能。从剪贴板读取图片的功能默认禁用,而且它在Linux下也不存在。 17 | 18 | > [!TIP] 19 | > 你可以使用docker `--build-arg` 参数来开/关镜像的一些功能。 20 | > * `ENABLE_REMOTE_EXIT`: 控制是否启用远程关停引擎进程命令,[详情请看这里](README.md#cmake构建参数)。默认开启。 21 | > * `ENABLE_JSON_IMAGE_PATH`: 控制是否启用json命令image_path,[详情请看这里](README.md#cmake构建参数)。默认关闭。 22 | > 23 | > 比如下面这条修改过的命令。它会禁用远程关停服务器与image_path json命令,这样一来这个镜像就变成了一个纯服务器镜像。用户无法轻易的关停服务器或令服务器读取容器内的文件。 24 | > ```sh 25 | > docker build -t paddleocr-json \ 26 | > --build-arg "ENABLE_REMOTE_EXIT=OFF" \ 27 | > --build-arg "ENABLE_JSON_IMAGE_PATH=OFF" \ 28 | > . 29 | > ``` 30 | > * 最后那个 `.` 必须放在整段命令的末尾,它指定了当前文件夹为docker镜像构建的基础文件夹。 31 | > [了解更多关于 `--build-arg` 的信息](https://yeasy.gitbook.io/docker_practice/image/dockerfile/arg) 32 | 33 | > [!NOTE] 34 | > 可能出现的错误: 35 | > * 如果在镜像构建的过程中出现了类似下面这样的错误。这大概率是网络问题,建议更换docker镜像源或者更换dns。 36 | > 37 | > ``` 38 | > Ign:516 http://deb.debian.org/debian stable/main amd64 va-driver-all amd64 2.17.0-1 39 | > Ign:517 http://deb.debian.org/debian stable/main amd64 vdpau-driver-all amd64 1.5-2 40 | > Ign:518 http://deb.debian.org/debian stable/main amd64 xauth amd64 1:1.1.2-1 41 | > Ign:519 http://deb.debian.org/debian stable/main amd64 xdg-user-dirs amd64 0.18-1 42 | > Ign:520 http://deb.debian.org/debian stable/main amd64 zip amd64 3.0-13 43 | > Err:402 http://deb.debian.org/debian stable/main amd64 libmunge2 amd64 0.5.15-2 44 | > Could not connect to deb.debian.org:80 (146.75.94.132), connection timed out [IP: 146.75.94.132 80] 45 | > Err:403 http://deb.debian.org/debian stable/main amd64 libtbbmalloc2 amd64 2021.8.0-2 46 | > Unable to connect to deb.debian.org:80: [IP: 146.75.94.132 80] 47 | > Err:404 http://deb.debian.org/debian stable/main amd64 libtbbbind-2-5 amd64 2021.8.0-2 48 | > Unable to connect to deb.debian.org:80: [IP: 146.75.94.132 80] 49 | > Err:405 http://deb.debian.org/debian stable/main amd64 libtbb12 amd64 2021.8.0-2 50 | > ``` 51 | 52 | 4. 接着就可以部署了 53 | 54 | ```sh 55 | docker run -d \ 56 | --name paddleocr-json \ 57 | -p 3746:3746 \ 58 | paddleocr-json 59 | ``` 60 | 61 | * 这里我们使用参数 `-d` 来以后台模式运行容器。 62 | * 使用参数 `--name` 来命名Docker容器。 63 | * 使用参数 `-p` 来暴露容器端口 `3746` 到本地端口 `3746`。容器在运行时会默认将套接字服务器开在容器端口 `3746` 上。 64 | * 最后使用我们刚刚构建的镜像 `paddleocr-json` 来创建容器。 65 | 66 | > [!TIP] 67 | > * 你可以在上面这条docker命令的末尾加上各种PaddleOCR-json参数来修改服务器。更多配置参数请参考[简单试用](../README.md#简单试用)和[常用配置参数说明](../README.md#常用配置参数说明) 68 | > * 并且,PaddleOCR-json已经被安装到了容器系统里,你可以在容器里直接用 `PaddleOCR-json` 来运行它。当然,你需要模型库。 69 | > * 容器自带一套[模型库](https://github.com/hiroi-sora/PaddleOCR-json/releases/tag/models%2Fv1.3),存放在 `/app/models` 路径下。如果你希望使用自己的模型库,你可以[使用Docker挂载一个数据卷到容器里](https://yeasy.gitbook.io/docker_practice/data_management/volume#qi-dong-yi-ge-gua-zai-shu-ju-juan-de-rong-qi),然后使用参数 `-models_path` 来指定新的模型库路径。 70 | 71 | ## 将PaddleOCR-json Docker镜像集成到你的Dockerfile里面 72 | 73 | 你可以在完成PaddleOCR-json的Docker镜像构建之后将其集成在其他的Dockerfile里。 74 | 75 | 假设你需要在一个容器里运行PaddleOCR-json和另一个进程,我们先写一个简单的bash脚本来启动多个进程到后台。 76 | 77 | #### run.sh 78 | 79 | ```sh 80 | #!/bin/bash 81 | 82 | # 启动PaddleOCR-json到后台,监听 127.0.0.1:1234,这样你的进程就可以通过这个端口来与PaddleOCR-json通信 83 | PaddleOCR-json -models_path /app/models -addr loopback -port 1234 & 84 | 85 | # 启动你的进程到后台 86 | ./my_process & 87 | 88 | # 等待任意一个进程退出 89 | wait -n 90 | 91 | # 以第一个退出的进程的退出状态来退出脚本 92 | exit $? 93 | ``` 94 | 95 | 接下来我们就可以写一个新的Dockerfile了 96 | 97 | #### Dockerfile 98 | 99 | ```dockerfile 100 | # 以我们构建的Docker镜像为基础 101 | FROM paddleocr-json:latest 102 | 103 | # 运行其他命令 104 | RUN ... 105 | 106 | # 复制其他文件 107 | COPY .... 108 | 109 | # 复制我们刚刚写的脚本 110 | COPY run.sh run.sh 111 | 112 | # 启动PaddleOCR-json引擎进程 113 | # 这里需要用 ENTRYPOINT 来覆盖掉 paddleocr-json 镜像里的 ENTRYPOINT 114 | # 用 CMD 会出错 115 | ENTRYPOINT ["./run.sh"] 116 | ``` 117 | 118 | 之后直接构建镜像再运行就行了 119 | 120 | ```sh 121 | docker build -t myimage . 122 | ``` 123 | 124 | ```sh 125 | docker run -d --name mycontainer myimage 126 | ``` 127 | 128 | 更多例子请看: 129 | * [官方文档](https://docs.docker.com/config/containers/multi-service_container/) 130 | * [腾讯云文章](https://cloud.tencent.com/developer/article/1683445) 131 | 132 | ## 通过API调用OCR 133 | 134 | 我们提供了 Python、Java 等语言的API,详见 [README-通过API调用](../README.md/#通过API调用) 。您可以通过这些API的 **套接字模式** 来调用Docker中的OCR服务,向API接口传入容器的IP和暴露端口即可。 135 | 136 | ## 其他问题 137 | 138 | ### [关于内存泄漏 / 长期高内存占用](./README.md#关于内存泄漏--长期高内存占用) 139 | 140 | 如果你打算使用文档中提到的方法2,由于docker镜像在构建时就会自动构建编译整个项目,所以您只需要重新clone一遍本仓库并重新构建一遍docker镜像就行了。 141 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/INSTALL.md: -------------------------------------------------------------------------------- 1 | # Installing a binary distribution package 2 | 3 | No official binary distribution packages are provided by the gflags developers. 4 | There may, however, be binary packages available for your operating system. 5 | Please consult also the package repositories of your Linux distribution. 6 | 7 | On Debian/Ubuntu Linux, gflags can be installed using the following command: 8 | 9 | sudo apt-get install libgflags-dev 10 | 11 | On macOS, [Homebrew](https://brew.sh/) includes a formula for gflags: 12 | 13 | brew install gflags 14 | 15 | 16 | # Compiling the source code with Bazel 17 | 18 | To use gflags in a [Bazel](http://bazel.io) project, map it in as an external 19 | dependency by editing your WORKSPACE file: 20 | 21 | git_repository( 22 | name = "com_github_gflags_gflags", 23 | commit = "", 24 | remote = "https://github.com/gflags/gflags.git", 25 | ) 26 | 27 | You can then add `@com_github_gflags_gflags//:gflags` to the `deps` section of a 28 | `cc_binary` or `cc_library` rule, and `#include ` to include it 29 | in your source code. 30 | 31 | 32 | # Compiling the source code with vcpkg 33 | 34 | You can download and install gflags using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager: 35 | 36 | git clone https://github.com/Microsoft/vcpkg.git 37 | cd vcpkg 38 | ./bootstrap-vcpkg.sh 39 | ./vcpkg integrate install 40 | vcpkg install gflags 41 | 42 | The gflags port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository. 43 | 44 | 45 | # Compiling the source code with CMake 46 | 47 | The build system of gflags is since version 2.1 based on [CMake](http://cmake.org). 48 | The common steps to build, test, and install software are therefore: 49 | 50 | 1. Extract source files. 51 | 2. Create build directory and change to it. 52 | 3. Run CMake to configure the build tree. 53 | 4. Build the software using selected build tool. 54 | 5. Test the built software. 55 | 6. Install the built files. 56 | 57 | On Unix-like systems with GNU Make as build tool, these build steps can be 58 | summarized by the following sequence of commands executed in a shell, 59 | where ```$package``` and ```$version``` are shell variables which represent 60 | the name of this package and the obtained version of the software. 61 | 62 | $ tar xzf gflags-$version-source.tar.gz 63 | $ cd gflags-$version 64 | $ mkdir build && cd build 65 | $ ccmake .. 66 | 67 | - Press 'c' to configure the build system and 'e' to ignore warnings. 68 | - Set CMAKE_INSTALL_PREFIX and other CMake variables and options. 69 | - Continue pressing 'c' until the option 'g' is available. 70 | - Then press 'g' to generate the configuration files for GNU Make. 71 | 72 | $ make 73 | $ make test (optional) 74 | $ make install (optional) 75 | 76 | In the following, only gflags-specific CMake settings available to 77 | configure the build and installation are documented. Note that most of these 78 | variables are for advanced users and binary package maintainers only. 79 | They usually do not have to be modified. 80 | 81 | 82 | CMake Option | Description 83 | --------------------------- | ------------------------------------------------------- 84 | CMAKE_INSTALL_PREFIX | Installation directory, e.g., "/usr/local" on Unix and "C:\Program Files\gflags" on Windows. 85 | BUILD_SHARED_LIBS | Request build of dynamic link libraries. 86 | BUILD_STATIC_LIBS | Request build of static link libraries. Implied if BUILD_SHARED_LIBS is OFF. 87 | BUILD_PACKAGING | Enable binary package generation using CPack. 88 | BUILD_TESTING | Build tests for execution by CTest. 89 | BUILD_NC_TESTS | Request inclusion of negative compilation tests (requires Python). 90 | BUILD_CONFIG_TESTS | Request inclusion of package configuration tests (requires Python). 91 | BUILD_gflags_LIBS | Request build of multi-threaded gflags libraries (if threading library found). 92 | BUILD_gflags_nothreads_LIBS | Request build of single-threaded gflags libraries. 93 | GFLAGS_NAMESPACE | Name of the C++ namespace to be used by the gflags library. Note that the public source header files are installed in a subdirectory named after this namespace. To maintain backwards compatibility with the Google Commandline Flags, set this variable to "google". The default is "gflags". 94 | GFLAGS_INTTYPES_FORMAT | String identifying format of built-in integer types. 95 | GFLAGS_INCLUDE_DIR | Name of headers installation directory relative to CMAKE_INSTALL_PREFIX. 96 | LIBRARY_INSTALL_DIR | Name of library installation directory relative to CMAKE_INSTALL_PREFIX. 97 | INSTALL_HEADERS | Request installation of public header files. 98 | -------------------------------------------------------------------------------- /cpp/src/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // 版本信息 16 | #define PROJECT_VER "v1.4.1 dev.1" 17 | #define PROJECT_NAME "PaddleOCR-json " PROJECT_VER 18 | 19 | #include "opencv2/core.hpp" 20 | #include "opencv2/imgcodecs.hpp" 21 | #include "opencv2/imgproc.hpp" 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | using namespace PaddleOCR; 31 | 32 | void structure(std::vector &cv_all_img_names) 33 | { 34 | PaddleOCR::PaddleStructure engine; 35 | 36 | if (FLAGS_benchmark) 37 | { 38 | engine.reset_timer(); 39 | } 40 | 41 | for (int i = 0; i < cv_all_img_names.size(); i++) 42 | { 43 | std::cout << "predict img: " << cv_all_img_names[i] << std::endl; 44 | cv::Mat img = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); 45 | if (!img.data) 46 | { 47 | std::cerr << "[ERROR] image read failed! image path: " 48 | << cv_all_img_names[i] << std::endl; 49 | continue; 50 | } 51 | 52 | std::vector structure_results = engine.structure( 53 | img, FLAGS_layout, FLAGS_table, FLAGS_det && FLAGS_rec); 54 | 55 | for (int j = 0; j < structure_results.size(); j++) 56 | { 57 | std::cout << j << "\ttype: " << structure_results[j].type 58 | << ", region: ["; 59 | std::cout << structure_results[j].box[0] << "," 60 | << structure_results[j].box[1] << "," 61 | << structure_results[j].box[2] << "," 62 | << structure_results[j].box[3] << "], score: "; 63 | std::cout << structure_results[j].confidence << ", res: "; 64 | 65 | if (structure_results[j].type == "table") 66 | { 67 | std::cout << structure_results[j].html << std::endl; 68 | if (structure_results[j].cell_box.size() > 0 && FLAGS_visualize) 69 | { 70 | std::string file_name = Utility::basename(cv_all_img_names[i]); 71 | 72 | Utility::VisualizeBboxes(img, structure_results[j], 73 | FLAGS_output + "/" + std::to_string(j) + 74 | "_" + file_name); 75 | } 76 | } 77 | else 78 | { 79 | std::cout << "count of ocr result is : " 80 | << structure_results[j].text_res.size() << std::endl; 81 | if (structure_results[j].text_res.size() > 0) 82 | { 83 | std::cout << "********** print ocr result " 84 | << "**********" << std::endl; 85 | Utility::print_result(structure_results[j].text_res); 86 | std::cout << "********** end print ocr result " 87 | << "**********" << std::endl; 88 | } 89 | } 90 | } 91 | } 92 | if (FLAGS_benchmark) 93 | { 94 | engine.benchmark_log(cv_all_img_names.size()); 95 | } 96 | } 97 | 98 | int main(int argc, char **argv) 99 | { 100 | std::cout << PROJECT_NAME << std::endl; // 版本提示 101 | // 设置gflags并读取命令行 102 | google::SetUsageMessage("PaddleOCR-json [FLAG1=ARG1] [FLAG2=ARG2]"); 103 | google::SetVersionString(PROJECT_VER); 104 | google::ParseCommandLineFlags(&argc, &argv, true); 105 | // 读取配置文件 106 | std::string configMsg = read_config(); 107 | if (!configMsg.empty()) 108 | { 109 | std::cerr << configMsg << std::endl; 110 | } 111 | // 检查参数合法性 112 | std::string checkMsg = check_flags(); 113 | if (!checkMsg.empty()) 114 | { 115 | std::cerr << "[ERROR] " << checkMsg << std::endl; 116 | return 1; 117 | } 118 | 119 | // 启动任务 120 | Task task = Task(); 121 | if (FLAGS_type == "ocr") 122 | { // OCR图片模式 123 | return task.ocr(); 124 | } 125 | // TODO: 图表识别模式 126 | else if (FLAGS_type == "structure") 127 | { 128 | std::cerr << "[ERROR] structure not support. " << std::endl; 129 | // structure(cv_all_img_names); 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /api/node.js/es5/worker.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | var __assign = (this && this.__assign) || function () { 3 | __assign = Object.assign || function(t) { 4 | for (var s, i = 1, n = arguments.length; i < n; i++) { 5 | s = arguments[i]; 6 | for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p)) 7 | t[p] = s[p]; 8 | } 9 | return t; 10 | }; 11 | return __assign.apply(this, arguments); 12 | }; 13 | Object.defineProperty(exports, "__esModule", { value: true }); 14 | var worker_threads_1 = require("worker_threads"); 15 | var path_1 = require("path"); 16 | var net_1 = require("net"); 17 | var child_process_1 = require("child_process"); 18 | var __default = { 19 | path: 'PaddleOCR-json.exe', 20 | args: [], 21 | options: { 22 | argv0: undefined, 23 | stdio: 'pipe', 24 | detached: false, 25 | shell: false, 26 | windowsVerbatimArguments: undefined, 27 | windowsHide: true, 28 | }, 29 | initTag: 'OCR init completed.', 30 | socketTag: 'OCR socket mode.', 31 | pipeTag: 'OCR anonymous pipe mode.', 32 | socketMatch: /^Socket init completed. (\d+\.\d+\.\d+\.\d+:\d+)/, 33 | }; 34 | function cargs(obj) { 35 | var currentPath = process.cwd(); 36 | obj = Object.assign({}, obj); 37 | if ('image_path' in obj) { 38 | if (obj.image_path === null) 39 | throw new Error('图片路径为空。'); 40 | else if (obj.image_path) 41 | obj.image_path = (0, path_1.resolve)(currentPath, obj.image_path); 42 | } 43 | if (obj.output !== undefined) 44 | obj.output = (0, path_1.resolve)(currentPath, obj.output); 45 | return obj; 46 | } 47 | function cout(data) { 48 | return { 49 | code: data.code, 50 | message: data.code - 100 ? data.data : '', 51 | data: data.code - 100 ? null : data.data, 52 | }; 53 | } 54 | var end = 'at' in String ? function (str) { return str.at(-1); } : function (str) { return str[str.length - 1]; }; 55 | if (!worker_threads_1.isMainThread) { 56 | var _a = worker_threads_1.workerData, _b = _a.path, path = _b === void 0 ? __default.path : _b, _c = _a.args, args = _c === void 0 ? [] : _c, options = _a.options, debug_1 = _a.debug; 57 | var mode_1 = 0; 58 | var proc_1 = (0, child_process_1.spawn)(path, args.concat(__default.args), __assign(__assign({}, options), __default.options)); 59 | process.once('exit', proc_1.kill.bind(proc_1)); 60 | proc_1.once('exit', process.exit); 61 | new Promise(function (res) { return proc_1.stdout.on('data', function stdout(chunk) { 62 | var data = chunk.toString(); 63 | if (!mode_1) { 64 | data.match(__default.pipeTag) && (mode_1 = 1); 65 | data.match(__default.socketTag) && (mode_1 = 2); 66 | } 67 | if (!data.match(__default.initTag)) 68 | return; 69 | proc_1.stdout.off('data', stdout); 70 | return res(); 71 | }); }).then(function () { return new Promise(function (res) { 72 | if (mode_1 === 1) { 73 | process.stdout.write("pid=".concat(proc_1.pid, ", pipe=true\n")); 74 | return res(); 75 | } 76 | proc_1.stderr.once('data', function () { return null; }); 77 | proc_1.stdout.once('data', function (chunk) { 78 | var data = chunk.toString(); 79 | var socket = data.match(__default.socketMatch)[1].split(':'); 80 | process.stdout.write("pid=".concat(proc_1.pid, ", addr=").concat(socket[0], ", port=").concat(socket[1], "\n")); 81 | return res([socket[0], Number(socket[1])]); 82 | }); 83 | }); }).then(function (socket) { 84 | if (debug_1) { 85 | proc_1.stdout.pipe(process.stdout); 86 | proc_1.stderr.pipe(process.stderr); 87 | } 88 | else if (socket) { 89 | proc_1.stdout.destroy(); 90 | proc_1.stderr.destroy(); 91 | } 92 | if (socket) { 93 | var client_1 = new net_1.Socket(); 94 | var addr_1 = socket[0], port_1 = socket[1]; 95 | worker_threads_1.parentPort.on('message', function (data) { 96 | client_1.connect(port_1, addr_1, function () { 97 | client_1.end("".concat(JSON.stringify(cargs(data)), "\n")); 98 | }); 99 | }); 100 | client_1.on('data', function (chunk) { 101 | worker_threads_1.parentPort.postMessage(cout(JSON.parse(String(chunk)))); 102 | }); 103 | } 104 | else { 105 | worker_threads_1.parentPort.on('message', function (data) { 106 | proc_1.stdin.write("".concat(JSON.stringify(cargs(data)), "\n")); 107 | }); 108 | var cache_1 = []; 109 | proc_1.stdout.on('data', function (chunk) { 110 | var str = String(chunk); 111 | cache_1.push(str); 112 | if (end(str) !== '\n') 113 | return; 114 | worker_threads_1.parentPort.postMessage(cout(JSON.parse(cache_1.join('')))); 115 | cache_1.length = 0; 116 | }); 117 | } 118 | }); 119 | } 120 | -------------------------------------------------------------------------------- /cpp/include/task.h: -------------------------------------------------------------------------------- 1 | // PaddleOCR-json 2 | // https://github.com/hiroi-sora/PaddleOCR-json 3 | 4 | #ifndef TASK_H 5 | #define TASK_H 6 | 7 | #include "include/nlohmann/json.hpp" // json库 8 | 9 | namespace PaddleOCR 10 | { 11 | 12 | // ==================== 标志码 ==================== 13 | #define CODE_INIT 0 // 每回合初始值,回合结束时仍为它代表受管控的区域内未发现错误 14 | // 识别成功 15 | #define CODE_OK 100 // 成功,且识别出文字 16 | #define CODE_OK_NONE 101 // 成功,且未识别到文字 17 | #define MSG_OK_NONE(p) "No text found in image. Path: \"" + p + "\"" 18 | // 按路径读图,失败 19 | #define CODE_ERR_PATH_EXIST 200 // 图片路径不存在 20 | #define MSG_ERR_PATH_EXIST(p) "Image path dose not exist. Path: \"" + p + "\"" 21 | #define CODE_ERR_PATH_CONV 201 // 图片路径string无法转换到wstring 22 | #define MSG_ERR_PATH_CONV(p) "Image path failed to convert to utf-16 wstring. Path: \"" + p + "\"" 23 | #define CODE_ERR_PATH_READ 202 // 图片路径存在,但无法打开文件 24 | #define MSG_ERR_PATH_READ(p) "Image open failed. Path: \"" + p + "\"" 25 | #define CODE_ERR_PATH_DECODE 203 // 图片打开成功,但读取到的内容无法被opencv解码 26 | #define MSG_ERR_PATH_DECODE(p) "Image decode failed. Path: \"" + p + "\"" 27 | 28 | #if defined(_WIN32) && defined(ENABLE_CLIPBOARD) 29 | // 剪贴板读图,失败 30 | #define CODE_ERR_CLIP_OPEN 210 // 剪贴板打开失败 ( OpenClipboard ) 31 | #define MSG_ERR_CLIP_OPEN "Clipboard open failed." 32 | #define CODE_ERR_CLIP_EMPTY 211 // 剪贴板为空 ( GetPriorityClipboardFormat NULL ) 33 | #define MSG_ERR_CLIP_EMPTY "Clipboard is empty." 34 | #define CODE_ERR_CLIP_FORMAT 212 // 剪贴板的格式不支持 ( GetPriorityClipboardFormat -1 ) 35 | #define MSG_ERR_CLIP_FORMAT "Clipboard format is not valid." 36 | #define CODE_ERR_CLIP_DATA 213 // 剪贴板获取内容句柄失败,通常由别的程序占用剪贴板引起 ( GetClipboardData NULL ) 37 | #define MSG_ERR_CLIP_DATA "Getting clipboard data handle failed." 38 | #define CODE_ERR_CLIP_FILES 214 // 剪贴板查询到的文件的数量不为1 ( DragQueryFile != 1 ) 39 | #define MSG_ERR_CLIP_FILES(n) "Clipboard number of query files is not valid. Number: " + std::to_string(n) 40 | #define CODE_ERR_CLIP_GETOBJ 215 // 剪贴板检索图形对象信息失败 ( GetObject NULL ) 41 | #define MSG_ERR_CLIP_GETOBJ "Clipboard get bitmap object failed." 42 | #define CODE_ERR_CLIP_BITMAP 216 // 剪贴板获取位图数据失败 ( GetBitmapBits 复制字节为空 ) 43 | #define MSG_ERR_CLIP_BITMAP "Getting clipboard bitmap bits failed." 44 | #define CODE_ERR_CLIP_CHANNEL 217 // 剪贴板中位图的通道数不支持 ( nChannels 不为1,3,4 ) 45 | #define MSG_ERR_CLIP_CHANNEL(n) "Clipboard number of image channels is not valid. Number: " + std::to_string(n) 46 | #endif 47 | 48 | // base64读图,失败 49 | #define CODE_ERR_BASE64_DECODE 300 // base64字符串解析为string失败 50 | #define MSG_ERR_BASE64_DECODE "Base64 decode failed." 51 | #define CODE_ERR_BASE64_IM_DECODE 301 // base64字符串解析成功,但读取到的内容无法被opencv解码 52 | #define MSG_ERR_BASE64_IM_DECODE "Base64 data imdecode failed." 53 | // json相关 54 | #define CODE_ERR_JSON_DUMP 400 // json对象 转字符串失败 55 | #define MSG_ERR_JSON_DUMP "Json dump failed." 56 | #define CODE_ERR_JSON_PARSE 401 // json字符串 转对象失败 57 | #define MSG_ERR_JSON_PARSE "Json parse failed." 58 | #define CODE_ERR_JSON_PARSE_KEY 402 // json对象 解析某个键时失败 59 | #define MSG_ERR_JSON_PARSE_KEY(k) "Json parse key [" + k + "] failed." 60 | #define CODE_ERR_NO_TASK 403 // 未发现有效任务 61 | #define MSG_ERR_NO_TASK "No valid tasks." 62 | 63 | // ==================== 任务调用类 ==================== 64 | class Task 65 | { 66 | 67 | public: 68 | int ocr(); // OCR图片 69 | 70 | private: 71 | bool is_exit = false; // 为true时退出任务循环 72 | std::unique_ptr ppocr; // OCR引擎智能指针 73 | int t_code; // 本轮任务状态码 74 | std::string t_msg; // 本轮任务状态消息 75 | 76 | // 任务流程 77 | void init_engine(); // 初始化OCR引擎 78 | void memory_check_cleanup(); // 检查内存占用,达到上限时释放内存 79 | std::string run_ocr(std::string); // 输入用户传入值(字符串),返回结果json字符串 80 | int single_image_mode(); // 单次识别模式 81 | int socket_mode(); // 套接字模式 82 | int anonymous_pipe_mode(); // 匿名管道模式 83 | int get_memory_mb(); // 获取当前内存占用。返回整数,单位MB。失败时返回-1。 84 | 85 | // 输出相关 86 | void set_state(int code = CODE_INIT, std::string msg = ""); // 设置状态 87 | std::string get_state_json(int code = CODE_INIT, std::string msg = ""); // 获取状态json字符串 88 | std::string get_ocr_result_json(const std::vector &); // 传入OCR结果,返回json字符串 89 | 90 | // 输入相关 91 | std::string json_dump(nlohmann::json); // json对象转字符串 92 | cv::Mat imread_json(std::string &); // 输入json字符串,解析json并返回图片Mat 93 | cv::Mat imread_u8(std::string path, int flag = cv::IMREAD_COLOR); // 代替cv imread,输入utf-8字符串,返回Mat。失败时设置错误码,并返回空Mat。 94 | cv::Mat imread_clipboard(int flag = cv::IMREAD_COLOR); // 从当前剪贴板中读取图片 95 | cv::Mat imread_base64(std::string &, int flag = cv::IMREAD_COLOR); // 输入base64编码的字符串,返回Mat 96 | #ifdef _WIN32 97 | cv::Mat imread_wstr(std::wstring pathW, int flags = cv::IMREAD_COLOR); // 输入unicode wstring字符串,返回Mat。 98 | #endif 99 | 100 | // 其他 101 | 102 | // ipv4 地址转 uint32_t 103 | int addr_to_uint32(const std::string &addr, uint32_t &addr_out); 104 | }; 105 | 106 | } // namespace PaddleOCR 107 | 108 | #endif // TASK_H -------------------------------------------------------------------------------- /cpp/include/utility.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include "opencv2/core.hpp" 30 | #include "opencv2/imgcodecs.hpp" 31 | #include "opencv2/imgproc.hpp" 32 | 33 | namespace PaddleOCR 34 | { 35 | 36 | struct OCRPredictResult 37 | { 38 | std::vector> box; 39 | std::string text; 40 | float score = -1.0; 41 | float cls_score; 42 | int cls_label = -1; 43 | }; 44 | 45 | struct StructurePredictResult 46 | { 47 | std::vector box; 48 | std::vector> cell_box; 49 | std::string type; 50 | std::vector text_res; 51 | std::string html; 52 | float html_score = -1; 53 | float confidence; 54 | }; 55 | 56 | class Utility 57 | { 58 | public: 59 | static std::vector ReadDict(const std::string &path); 60 | 61 | static void VisualizeBboxes(const cv::Mat &srcimg, 62 | const std::vector &ocr_result, 63 | const std::string &save_path); 64 | 65 | static void VisualizeBboxes(const cv::Mat &srcimg, 66 | const StructurePredictResult &structure_result, 67 | const std::string &save_path); 68 | 69 | template 70 | inline static size_t argmax(ForwardIterator first, ForwardIterator last) 71 | { 72 | return std::distance(first, std::max_element(first, last)); 73 | } 74 | 75 | static void GetAllFiles(const char *dir_name, 76 | std::vector &all_inputs); 77 | 78 | static cv::Mat GetRotateCropImage(const cv::Mat &srcimage, 79 | std::vector> box); 80 | 81 | static std::vector argsort(const std::vector &array); 82 | 83 | static inline void ltrim(std::string &str); 84 | static inline void rtrim(std::string &str); 85 | static inline void trim(std::string &str); 86 | static inline std::string ltrim_copy(std::string str); 87 | static inline std::string rtrim_copy(std::string str); 88 | static inline std::string trim_copy(std::string str); 89 | 90 | static inline bool str_starts_with(const std::string& str, const std::string& pattern) 91 | { 92 | // https://stackoverflow.com/a/40441240 93 | return (str.rfind(pattern, 0) == 0); 94 | } 95 | 96 | static std::string basename(const std::string &filename); 97 | 98 | static std::string pathjoin(const std::string& parent, const std::string& child); 99 | 100 | static bool PathExists(const std::string &path); 101 | 102 | static void CreateDir(const std::string &path); 103 | 104 | static void print_result(const std::vector &ocr_result); 105 | 106 | static cv::Mat crop_image(cv::Mat &img, const std::vector &area); 107 | static cv::Mat crop_image(cv::Mat &img, const std::vector &area); 108 | 109 | static void sorted_boxes(std::vector &ocr_result); 110 | 111 | static std::vector xyxyxyxy2xyxy(std::vector> &box); 112 | static std::vector xyxyxyxy2xyxy(std::vector &box); 113 | 114 | static float fast_exp(float x); 115 | static std::vector 116 | activation_function_softmax(std::vector &src); 117 | static float iou(std::vector &box1, std::vector &box2); 118 | static float iou(std::vector &box1, std::vector &box2); 119 | 120 | private: 121 | static bool comparison_box(const OCRPredictResult &result1, 122 | const OCRPredictResult &result2) 123 | { 124 | if (result1.box[0][1] < result2.box[0][1]) 125 | { 126 | return true; 127 | } 128 | else if (result1.box[0][1] == result2.box[0][1]) 129 | { 130 | return result1.box[0][0] < result2.box[0][0]; 131 | } 132 | else 133 | { 134 | return false; 135 | } 136 | } 137 | }; 138 | 139 | } // namespace PaddleOCR -------------------------------------------------------------------------------- /cpp/third_party/gflags/src/gflags_completions.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) 2008, Google Inc. 4 | # All rights reserved. 5 | # 6 | # Redistribution and use in source and binary forms, with or without 7 | # modification, are permitted provided that the following conditions are 8 | # met: 9 | # 10 | # * Redistributions of source code must retain the above copyright 11 | # notice, this list of conditions and the following disclaimer. 12 | # * Redistributions in binary form must reproduce the above 13 | # copyright notice, this list of conditions and the following disclaimer 14 | # in the documentation and/or other materials provided with the 15 | # distribution. 16 | # * Neither the name of Google Inc. nor the names of its 17 | # contributors may be used to endorse or promote products derived from 18 | # this software without specific prior written permission. 19 | # 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | # 32 | # --- 33 | # Author: Dave Nicponski 34 | # 35 | # This script is invoked by bash in response to a matching compspec. When 36 | # this happens, bash calls this script using the command shown in the -C 37 | # block of the complete entry, but also appends 3 arguments. They are: 38 | # - The command being used for completion 39 | # - The word being completed 40 | # - The word preceding the completion word. 41 | # 42 | # Here's an example of how you might use this script: 43 | # $ complete -o bashdefault -o default -o nospace -C \ 44 | # '/usr/local/bin/gflags_completions.sh --tab_completion_columns $COLUMNS' \ 45 | # time env binary_name another_binary [...] 46 | 47 | # completion_word_index gets the index of the (N-1)th argument for 48 | # this command line. completion_word gets the actual argument from 49 | # this command line at the (N-1)th position 50 | completion_word_index="$(($# - 1))" 51 | completion_word="${!completion_word_index}" 52 | 53 | # TODO(user): Replace this once gflags_completions.cc has 54 | # a bool parameter indicating unambiguously to hijack the process for 55 | # completion purposes. 56 | if [ -z "$completion_word" ]; then 57 | # Until an empty value for the completion word stops being misunderstood 58 | # by binaries, don't actually execute the binary or the process 59 | # won't be hijacked! 60 | exit 0 61 | fi 62 | 63 | # binary_index gets the index of the command being completed (which bash 64 | # places in the (N-2)nd position. binary gets the actual command from 65 | # this command line at that (N-2)nd position 66 | binary_index="$(($# - 2))" 67 | binary="${!binary_index}" 68 | 69 | # For completions to be universal, we may have setup the compspec to 70 | # trigger on 'harmless pass-through' commands, like 'time' or 'env'. 71 | # If the command being completed is one of those two, we'll need to 72 | # identify the actual command being executed. To do this, we need 73 | # the actual command line that the was pressed on. Bash helpfully 74 | # places this in the $COMP_LINE variable. 75 | if [ "$binary" == "time" ] || [ "$binary" == "env" ]; then 76 | # we'll assume that the first 'argument' is actually the 77 | # binary 78 | 79 | 80 | # TODO(user): This is not perfect - the 'env' command, for instance, 81 | # is allowed to have options between the 'env' and 'the command to 82 | # be executed'. For example, consider: 83 | # $ env FOO="bar" bin/do_something --help 84 | # In this case, we'll mistake the FOO="bar" portion as the binary. 85 | # Perhaps we should continuing consuming leading words until we 86 | # either run out of words, or find a word that is a valid file 87 | # marked as executable. I can't think of any reason this wouldn't 88 | # work. 89 | 90 | # Break up the 'original command line' (not this script's command line, 91 | # rather the one the was pressed on) and find the second word. 92 | parts=( ${COMP_LINE} ) 93 | binary=${parts[1]} 94 | fi 95 | 96 | # Build the command line to use for completion. Basically it involves 97 | # passing through all the arguments given to this script (except the 3 98 | # that bash added), and appending a '--tab_completion_word "WORD"' to 99 | # the arguments. 100 | params="" 101 | for ((i=1; i<=$(($# - 3)); ++i)); do 102 | params="$params \"${!i}\""; 103 | done 104 | params="$params --tab_completion_word \"$completion_word\"" 105 | 106 | # TODO(user): Perhaps stash the output in a temporary file somewhere 107 | # in /tmp, and only cat it to stdout if the command returned a success 108 | # code, to prevent false positives 109 | 110 | # If we think we have a reasonable command to execute, then execute it 111 | # and hope for the best. 112 | candidate=$(type -p "$binary") 113 | if [ ! -z "$candidate" ]; then 114 | eval "$candidate 2>/dev/null $params" 115 | elif [ -f "$binary" ] && [ -x "$binary" ]; then 116 | eval "$binary 2>/dev/null $params" 117 | fi 118 | -------------------------------------------------------------------------------- /api/PowerShell/PPOCR_api.ps1: -------------------------------------------------------------------------------- 1 | # 调用 PaddleOCR-json.exe 的 PowerShell Api 2 | # 项目主页: 3 | # https://github.com/hiroi-sora/PaddleOCR-json 4 | 5 | ########################################################################### 6 | function asc($param){ 7 | ##用来转换中文至\uxxxx的函数 8 | $rtn = '' 9 | $list = $param -split '' 10 | foreach ($char in $list){ 11 | if($char -ne ''){ 12 | if([int]([char]$char) -gt 32 -and [int]([char]$char) -lt 127){ 13 | $rtn += $char 14 | } 15 | else{ 16 | $rtn += ("\u" + ("{0:x}" -f [int]([char]$char))) 17 | } 18 | } 19 | } 20 | return $rtn 21 | } 22 | ########################################################################### 23 | class PPOCR { 24 | [System.Object]$process # 子进程对象 25 | [int] $runFlag = 0 # 运行标志。0正在初始化,1正常运行中 26 | [System.Diagnostics.Process] $stdSender # 保存子进程stdout管道 27 | [int] $__ENABLE_CLIPBOARD = 0 # 剪贴板是否启用。0禁用,1启用 28 | [string] $imgJson # 缓存图片识别结果json字符串 29 | [string] $processID # OCR子进程id,用于组成标识符 30 | [string] $eventInit # OCR初始化完成的事件标识符,启动时定义 31 | [string] $eventJson # 一次取得json完成的事件标识符,传入图片时重新定义 32 | 33 | PPOCR( [string]$exePath,[string]$arg ) { 34 | # 初始化识别器。 35 | # :exePath: 识别器`PaddleOCR_json.exe`的路径。 36 | $WorkingDirectory = Split-Path -Path $exePath # 工作目录为父目录 37 | # 初始化进程信息,重定向输入和输出 38 | $psi = New-Object System.Diagnostics.ProcessStartInfo 39 | $psi.FileName = $exePath 40 | $psi.WorkingDirectory = $WorkingDirectory 41 | $psi.Arguments = $arg 42 | $psi.RedirectStandardInput = $true 43 | $psi.RedirectStandardOutput = $true 44 | $psi.CreateNoWindow = $true 45 | $psi.UseShellExecute = $false 46 | # 初始化进程对象 47 | $this.process = New-Object System.Diagnostics.Process 48 | $this.process.StartInfo = $psi 49 | # 给进程对象添加OutputDataReceived(得到进程输出内容)事件的订阅。通过-MessageData将$this传入-action作用域。 50 | Register-ObjectEvent -InputObject $this.process -EventName OutputDataReceived -action $this.getStdout -MessageData $this 51 | # 启动进程 52 | $this.process.Start() 53 | $this.process.BeginOutputReadLine() 54 | # 等待初始化完成事件 55 | $this.processID = $this.process.Id # 记录进程号 56 | $nowTime = Get-Date -Format "HHmmssffff" 57 | $this.eventInit = "OCRinit" + $this.processID + $nowTime # 生成事件标识符 58 | # Write-Host "初始化OCR等待中,事件标识符为"($this.eventInit) 59 | Wait-Event -SourceIdentifier $this.eventInit # 阻塞,等待初始化完成事件 60 | Write-Host "初始化OCR成功,进程号为"($this.process.Id) 61 | } 62 | 63 | # 接收stdout数据的action 64 | [ScriptBlock] $getStdout = { 65 | $this_ = $Event.MessageData # 获取所属对象 66 | $this_.stdSender = $Event.Sender # 将输入接口发送到调用对象 67 | $getData = $Event.SourceEventArgs.Data 68 | switch ( $this_.runFlag ) { 69 | # 初始化中,等待取得完成标志 70 | 0 { 71 | if ( $getData.contains("OCR init completed.") ) { 72 | $this_.runFlag = 1 73 | New-Event -SourceIdentifier $this_.eventInit # 发送初始化完成事件 74 | } 75 | elseif ( $getData.contains("OCR clipboard enbaled.") ) { 76 | $this_.__ENABLE_CLIPBOARD = 1 # 检测到剪贴板已启用 77 | } 78 | break 79 | } 80 | # 正常运行中 81 | 1 { 82 | $this_.imgJson = $getData 83 | New-Event -SourceIdentifier $this_.eventJson # 发送取得json事件 84 | break 85 | } 86 | } 87 | } 88 | 89 | [PSCustomObject] isClipboardEnabled() { 90 | return $this.__ENABLE_CLIPBOARD; 91 | } 92 | 93 | [PSCustomObject] runDict( [string]$writeDict ) { 94 | if ($this.stdSender) { 95 | $nowTime = Get-Date -Format "HHmmssffff" 96 | $this.eventJson = "OCRjson" + $this.processID + $nowTime # 更新事件标识符 97 | $this.stdSender.StandardInput.WriteLine($writeDict); # 向管道写入 98 | Wait-Event -SourceIdentifier $this.eventJson # 阻塞,等待取得json 99 | try { 100 | $getdict = $this.imgJson | ConvertFrom-Json 101 | return $getdict 102 | } 103 | catch { 104 | return @{code = 402; data = "识别器输出值反序列化JSON失败,疑似传入了不存在或无法识别的图片。异常信息:$($PSItem.ToString())原始内容:$($this.imgJson)" } 105 | } 106 | } 107 | else { 108 | # 输入流不存在,可能为未初始化完毕 109 | return @{ code = 400; data = "子进程输入流不存在" } 110 | } 111 | } 112 | 113 | 114 | # 识别图片 115 | [PSCustomObject] run( [string]$imgPath ) { 116 | # 对一张图片文字识别。 117 | # :imgPath: 图片路径。 118 | $writeDict = asc (@{ image_path = $imgPath } | ConvertTo-Json -Compress) #更新图片路径为json格式,asc函数替换中文 119 | return $this.runDict($writeDict); # 向管道写入图片路径 120 | } 121 | 122 | [PSCustomObject] runClipboard() { 123 | if ( $this.__ENABLE_CLIPBOARD ) { 124 | return $this.run("clipboard"); 125 | } 126 | else { 127 | throw "剪贴板功能不存在或已禁用。" 128 | } 129 | } 130 | 131 | [PSCustomObject] runBase64( [string]$imgBase64 ) { 132 | $writeDict = @{ image_base64 = $imgBase64 } | ConvertTo-Json -Compress 133 | return $this.runDict($writeDict); 134 | } 135 | 136 | [PSCustomObject] runByte( $imgByte ) { 137 | $imgBase64 = [convert]::ToBase64String($imgByte) 138 | return $this.runBase64($imgBase64); 139 | } 140 | 141 | # 结束子进程 142 | [void] stop() { 143 | $this.stdSender.StandardInput.WriteLine('{"exit":""}') 144 | Write-Host "识别器进程结束。" 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /cpp/src/preprocess_op.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | 17 | namespace PaddleOCR { 18 | 19 | void Permute::Run(const cv::Mat *im, float *data) { 20 | int rh = im->rows; 21 | int rw = im->cols; 22 | int rc = im->channels(); 23 | for (int i = 0; i < rc; ++i) { 24 | cv::extractChannel(*im, cv::Mat(rh, rw, CV_32FC1, data + i * rh * rw), i); 25 | } 26 | } 27 | 28 | void PermuteBatch::Run(const std::vector imgs, float *data) { 29 | for (int j = 0; j < imgs.size(); j++) { 30 | int rh = imgs[j].rows; 31 | int rw = imgs[j].cols; 32 | int rc = imgs[j].channels(); 33 | for (int i = 0; i < rc; ++i) { 34 | cv::extractChannel( 35 | imgs[j], cv::Mat(rh, rw, CV_32FC1, data + (j * rc + i) * rh * rw), i); 36 | } 37 | } 38 | } 39 | 40 | void Normalize::Run(cv::Mat *im, const std::vector &mean, 41 | const std::vector &scale, const bool is_scale) { 42 | double e = 1.0; 43 | if (is_scale) { 44 | e /= 255.0; 45 | } 46 | (*im).convertTo(*im, CV_32FC3, e); 47 | std::vector bgr_channels(3); 48 | cv::split(*im, bgr_channels); 49 | for (auto i = 0; i < bgr_channels.size(); i++) { 50 | bgr_channels[i].convertTo(bgr_channels[i], CV_32FC1, 1.0 * scale[i], 51 | (0.0 - mean[i]) * scale[i]); 52 | } 53 | cv::merge(bgr_channels, *im); 54 | } 55 | 56 | void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, 57 | std::string limit_type, int limit_side_len, 58 | float &ratio_h, float &ratio_w, bool use_tensorrt) { 59 | int w = img.cols; 60 | int h = img.rows; 61 | float ratio = 1.f; 62 | if (limit_type == "min") { 63 | int min_wh = std::min(h, w); 64 | if (min_wh < limit_side_len) { 65 | if (h < w) { 66 | ratio = float(limit_side_len) / float(h); 67 | } else { 68 | ratio = float(limit_side_len) / float(w); 69 | } 70 | } 71 | } else { 72 | int max_wh = std::max(h, w); 73 | if (max_wh > limit_side_len) { 74 | if (h > w) { 75 | ratio = float(limit_side_len) / float(h); 76 | } else { 77 | ratio = float(limit_side_len) / float(w); 78 | } 79 | } 80 | } 81 | 82 | int resize_h = int(float(h) * ratio); 83 | int resize_w = int(float(w) * ratio); 84 | 85 | resize_h = std::max(int(round(float(resize_h) / 32) * 32), 32); 86 | resize_w = std::max(int(round(float(resize_w) / 32) * 32), 32); 87 | 88 | cv::resize(img, resize_img, cv::Size(resize_w, resize_h)); 89 | ratio_h = float(resize_h) / float(h); 90 | ratio_w = float(resize_w) / float(w); 91 | } 92 | 93 | void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio, 94 | bool use_tensorrt, 95 | const std::vector &rec_image_shape) { 96 | int imgC, imgH, imgW; 97 | imgC = rec_image_shape[0]; 98 | imgH = rec_image_shape[1]; 99 | imgW = rec_image_shape[2]; 100 | 101 | imgW = int(imgH * wh_ratio); 102 | 103 | float ratio = float(img.cols) / float(img.rows); 104 | int resize_w, resize_h; 105 | 106 | if (ceilf(imgH * ratio) > imgW) 107 | resize_w = imgW; 108 | else 109 | resize_w = int(ceilf(imgH * ratio)); 110 | 111 | cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f, 112 | cv::INTER_LINEAR); 113 | cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, 114 | int(imgW - resize_img.cols), cv::BORDER_CONSTANT, 115 | {0, 0, 0}); 116 | } 117 | 118 | void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, 119 | bool use_tensorrt, 120 | const std::vector &rec_image_shape) { 121 | int imgC, imgH, imgW; 122 | imgC = rec_image_shape[0]; 123 | imgH = rec_image_shape[1]; 124 | imgW = rec_image_shape[2]; 125 | 126 | float ratio = float(img.cols) / float(img.rows); 127 | int resize_w, resize_h; 128 | if (ceilf(imgH * ratio) > imgW) 129 | resize_w = imgW; 130 | else 131 | resize_w = int(ceilf(imgH * ratio)); 132 | 133 | cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f, 134 | cv::INTER_LINEAR); 135 | } 136 | 137 | void TableResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, 138 | const int max_len) { 139 | int w = img.cols; 140 | int h = img.rows; 141 | 142 | int max_wh = w >= h ? w : h; 143 | float ratio = w >= h ? float(max_len) / float(w) : float(max_len) / float(h); 144 | 145 | int resize_h = int(float(h) * ratio); 146 | int resize_w = int(float(w) * ratio); 147 | 148 | cv::resize(img, resize_img, cv::Size(resize_w, resize_h)); 149 | } 150 | 151 | void TablePadImg::Run(const cv::Mat &img, cv::Mat &resize_img, 152 | const int max_len) { 153 | int w = img.cols; 154 | int h = img.rows; 155 | cv::copyMakeBorder(img, resize_img, 0, max_len - h, 0, max_len - w, 156 | cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); 157 | } 158 | 159 | void Resize::Run(const cv::Mat &img, cv::Mat &resize_img, const int h, 160 | const int w) { 161 | cv::resize(img, resize_img, cv::Size(w, h)); 162 | } 163 | 164 | } // namespace PaddleOCR 165 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/src/windows_port.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2009, Google Inc. 2 | * All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are 6 | * met: 7 | * 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * * Redistributions in binary form must reproduce the above 11 | * copyright notice, this list of conditions and the following disclaimer 12 | * in the documentation and/or other materials provided with the 13 | * distribution. 14 | * * Neither the name of Google Inc. nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | * 30 | * --- 31 | * Author: Craig Silverstein 32 | * 33 | * These are some portability typedefs and defines to make it a bit 34 | * easier to compile this code under VC++. 35 | * 36 | * Several of these are taken from glib: 37 | * http://developer.gnome.org/doc/API/glib/glib-windows-compatability-functions.html 38 | */ 39 | 40 | #ifndef GFLAGS_WINDOWS_PORT_H_ 41 | #define GFLAGS_WINDOWS_PORT_H_ 42 | 43 | #include "config.h" 44 | 45 | // This must be defined before the windows.h is included. 46 | // It's needed for mutex.h, to give access to the TryLock method. 47 | # if !defined(_WIN32_WINNT) && !(defined( __MINGW32__) || defined(__MINGW64__)) 48 | # define _WIN32_WINNT 0x0400 49 | # endif 50 | // We always want minimal includes 51 | #ifndef WIN32_LEAN_AND_MEAN 52 | # define WIN32_LEAN_AND_MEAN 53 | #endif 54 | #include 55 | #include /* for mkdir */ 56 | #include /* for _putenv, getenv */ 57 | #include /* need this to override stdio's snprintf, also defines _unlink used by unit tests */ 58 | #include /* util.h uses va_copy */ 59 | #include /* for _stricmp and _strdup */ 60 | 61 | /* We can't just use _vsnprintf and _snprintf as drop-in-replacements, 62 | * because they don't always NUL-terminate. :-( We also can't use the 63 | * name vsnprintf, since windows defines that (but not snprintf (!)). 64 | */ 65 | #if !defined(__MINGW32__) && !defined(__MINGW64__) /* mingw already defines */ 66 | #if !(defined(_MSC_VER) && _MSC_VER >= 1900) /* msvc 2015 already defines */ 67 | extern GFLAGS_DLL_DECL int snprintf(char *str, size_t size, 68 | const char *format, ...); 69 | extern int GFLAGS_DLL_DECL safe_vsnprintf(char *str, size_t size, 70 | const char *format, va_list ap); 71 | #define vsnprintf(str, size, format, ap) safe_vsnprintf(str, size, format, ap) 72 | #define va_copy(dst, src) (dst) = (src) 73 | #endif 74 | #endif /* #if !defined(__MINGW32__) && !defined(__MINGW64__) */ 75 | 76 | #ifdef _MSC_VER 77 | # pragma warning(push) 78 | # pragma warning(disable: 4996) // ignore getenv security warning 79 | #endif 80 | #if !defined(_POSIX_C_SOURCE) || _POSIX_C_SOURCE < 200112L 81 | inline void setenv(const char* name, const char* value, int) { 82 | // In windows, it's impossible to set a variable to the empty string. 83 | // We handle this by setting it to "0" and the NUL-ing out the \0. 84 | // That is, we putenv("FOO=0") and then find out where in memory the 85 | // putenv wrote "FOO=0", and change it in-place to "FOO=\0". 86 | // c.f. http://svn.apache.org/viewvc/stdcxx/trunk/tests/src/environ.cpp?r1=611451&r2=637508&pathrev=637508 87 | static const char* const kFakeZero = "0"; 88 | if (*value == '\0') 89 | value = kFakeZero; 90 | // Apparently the semantics of putenv() is that the input 91 | // must live forever, so we leak memory here. :-( 92 | const size_t nameval_len = strlen(name) + 1 + strlen(value) + 1; 93 | char* nameval = reinterpret_cast(malloc(nameval_len)); 94 | snprintf(nameval, nameval_len, "%s=%s", name, value); 95 | _putenv(nameval); 96 | if (value == kFakeZero) { 97 | nameval[nameval_len - 2] = '\0'; // works when putenv() makes no copy 98 | if (*getenv(name) != '\0') 99 | *getenv(name) = '\0'; // works when putenv() copies nameval 100 | } 101 | } 102 | #endif 103 | #ifdef _MSC_VER 104 | # pragma warning(pop) 105 | #endif 106 | 107 | #define strcasecmp _stricmp 108 | 109 | #if defined(_MSC_VER) && _MSC_VER >= 1400 110 | #define strdup _strdup 111 | #define unlink _unlink 112 | #endif 113 | 114 | #if defined(_MSC_VER) && _MSC_VER >= 1800 115 | #include 116 | #else 117 | #define PRId32 "d" 118 | #define PRIu32 "u" 119 | #define PRId64 "I64d" 120 | #define PRIu64 "I64u" 121 | #endif 122 | 123 | #if !defined(__MINGW32__) && !defined(__MINGW64__) 124 | #define strtoq _strtoi64 125 | #define strtouq _strtoui64 126 | #define strtoll _strtoi64 127 | #define strtoull _strtoui64 128 | #define atoll _atoi64 129 | #endif 130 | 131 | #ifndef PATH_MAX 132 | #define PATH_MAX 1024 133 | #endif 134 | 135 | #endif /* GFLAGS_WINDOWS_PORT_H_ */ 136 | -------------------------------------------------------------------------------- /api/python/PPOCR_visualize.py: -------------------------------------------------------------------------------- 1 | # 将 PaddleOCR-json 结果可视化表现 2 | # 项目主页: 3 | # https://github.com/hiroi-sora/PaddleOCR-json 4 | from PIL import Image, ImageDraw, ImageFont 5 | import math 6 | 7 | 8 | class visualize: 9 | """可视化""" 10 | 11 | # ================================ 静态方法 ================================ 12 | 13 | @staticmethod 14 | def createBox(textBlocks, size, fill="#00500040", outline="#11ff22", width=6): 15 | """创建包围盒图层,返回PIL Image对象。\n 16 | :textBlocks: 文本块列表。\n 17 | :size: 图片尺寸。\n 18 | 以下为可选字段:(颜色为十六进制6位RGB或8位RGBA字符串,如 #112233ff)\n 19 | :fill: 包围盒填充颜色。\n 20 | :outline: 包围盒轮廓颜色。\n 21 | :width: 包围盒轮廓粗细,像素。 22 | """ 23 | img = Image.new("RGBA", size, 0) 24 | draw = ImageDraw.Draw(img) 25 | for tb in textBlocks: 26 | box = [ 27 | tuple(tb["box"][0]), 28 | tuple(tb["box"][1]), 29 | tuple(tb["box"][2]), 30 | tuple(tb["box"][3]), 31 | ] 32 | draw.polygon(box, fill=fill, outline=outline, width=width) 33 | return img 34 | 35 | @staticmethod 36 | def createText( 37 | textBlocks, 38 | size, 39 | ttfPath="C:\Windows\Fonts\msyh.ttc", 40 | ttfScale=0.9, 41 | fill="#ff0000", 42 | ): 43 | """创建文字图层,返回PIL Image对象。\n 44 | :textBlocks: 文本块列表。\n 45 | :size: 图片尺寸。\n 46 | 以下为可选字段:\n 47 | :ttfPath: 字体文件路径。默认为微软雅黑,若不存在此字体会报错。\n 48 | :ttfScale: 字体大小整体缩放系数,应在1附近。\n 49 | :fill: 文字颜色,十六进制6位RGB或8位RGBA字符串,如 #112233ff。\n 50 | """ 51 | img = Image.new("RGBA", size, 0) 52 | draw = ImageDraw.Draw(img) 53 | ttfDict = {} # 缓存不同大小的字体对象 54 | for tb in textBlocks: 55 | text = tb["text"] 56 | xy = tuple(tb["box"][0]) # 左上角坐标 57 | xy1 = tb["box"][3] # 左下角坐标# 行高 58 | hight = round( 59 | math.sqrt(((xy[0] - xy1[0]) ** 2) + ((xy[1] - xy1[1]) ** 2)) * ttfScale 60 | ) 61 | if hight not in ttfDict: 62 | ttfDict[hight] = ImageFont.truetype(ttfPath, hight) # 创建新大小的字体 63 | draw.text(xy, text, font=ttfDict[hight], fill=fill) 64 | return img 65 | 66 | @staticmethod 67 | def createOrder( 68 | textBlocks, 69 | size, 70 | ttfPath="C:\Windows\Fonts\msyh.ttc", 71 | ttfSize=50, 72 | fill="#2233ff", 73 | bg="#ffffffe0", 74 | ): 75 | """创建序号图层,返回PIL Image对象。\n 76 | :textBlocks: 文本块列表。\n 77 | :size: 图片尺寸。\n 78 | 以下为可选字段:\n 79 | :ttfPath: 字体文件路径。默认为微软雅黑,若不存在此字体会报错。\n 80 | :ttfSize: 字体大小。\n 81 | :fill: 文字颜色,十六进制6位RGB或8位RGBA字符串,如 #112233ff。\n 82 | """ 83 | img = Image.new("RGBA", size, 0) 84 | draw = ImageDraw.Draw(img) 85 | ttf = ImageFont.truetype(ttfPath, ttfSize) # 字体 86 | for index, tb in enumerate(textBlocks): 87 | text = f"{index+1}" 88 | xy = tuple(tb["box"][0]) # 左上角坐标 89 | x_, y_, w, h = ttf.getbbox(text) # 获取宽高。只需要w和h 90 | w *= 1.1 91 | h *= 1.1 92 | draw.rectangle((xy, (xy[0] + w, xy[1] + h)), fill=bg, width=0) # 背景矩形 93 | draw.text(xy, text, font=ttf, fill=fill) # 文字 94 | return img 95 | 96 | @staticmethod 97 | def createContrast(img1, img2): 98 | """左右拼合两个图片,创建对比图层,返回PIL Image对象。""" 99 | size = (img1.size[0] + img2.size[0], max(img1.size[1], img2.size[1])) 100 | img = Image.new("RGBA", size, 0) 101 | img.paste(img1, (0, 0)) 102 | img.paste(img2, (img1.size[0], 0)) 103 | return img 104 | 105 | @staticmethod 106 | def composite(img1, img2): 107 | """传入两个PIL Image对象(RGBA格式),以img1为底,将img2叠加在其上 108 | 返回生成的图片""" 109 | return Image.alpha_composite(img1, img2) 110 | 111 | # ================================ 快捷接口 ================================ 112 | 113 | def __init__(self, textBlocks, imagePath): 114 | """创建可视化对象。\n 115 | :textBlocks: 文本块列表,即OCR返回的data部分\n 116 | :imagePath: 对应的图片路径。 117 | """ 118 | self.imgSource = Image.open(imagePath).convert("RGBA") # 原始图片图层 119 | self.size = self.imgSource.size 120 | self.imgBox = self.createBox(textBlocks, self.size) # 包围盒图层 121 | self.imgText = self.createText(textBlocks, self.size) # 文字图层 122 | self.imgOrder = self.createOrder(textBlocks, self.size) # 序号图层 123 | 124 | def get(self, isBox=True, isText=False, isOrder=False, isSource=True): 125 | """返回合成可视化结果的PIL Image图像。\n 126 | :isBox: T时返回包围盒图层。\n 127 | :isText: T时返回文字图层。\n 128 | :isOrder: T时返回序号图层。\n 129 | :isSource: T时返回原图。F时返回透明背景的纯可视化结果。\n 130 | """ 131 | img = Image.new("RGBA", self.size, 0) 132 | flags = (isSource, isBox, isText, isOrder) 133 | for index, im in enumerate( 134 | [self.imgSource, self.imgBox, self.imgText, self.imgOrder] 135 | ): 136 | if im and flags[index]: 137 | img = visualize.composite(img, im) 138 | return img 139 | 140 | def show(self, isBox=True, isText=False, isOrder=False, isSource=True): 141 | """显示可视化结果图像。\n 142 | :isBox: T时返回包围盒图层。\n 143 | :isText: T时返回文字图层。\n 144 | :isOrder: T时返回序号图层。\n 145 | :isSource: T时返回原图。F时返回透明背景的纯可视化结果。\n 146 | """ 147 | img = self.get(isBox, isText, isOrder, isSource) 148 | img.show() 149 | 150 | def save(self, path="", isBox=True, isText=False, isOrder=False, isSource=True): 151 | """保存可视化结果图像。\n 152 | :path: 保存路径。\n 153 | :isBox: T时返回包围盒图层。\n 154 | :isText: T时返回文字图层。\n 155 | :isOrder: T时返回序号图层。\n 156 | :isSource: T时返回原图。F时返回透明背景的纯可视化结果。\n 157 | """ 158 | img = self.get(isBox, isText, isOrder, isSource) 159 | img.save(path) 160 | -------------------------------------------------------------------------------- /cpp/third_party/gflags/src/gflags_declare.h.in: -------------------------------------------------------------------------------- 1 | // Copyright (c) 1999, Google Inc. 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following disclaimer 12 | // in the documentation and/or other materials provided with the 13 | // distribution. 14 | // * Neither the name of Google Inc. nor the names of its 15 | // contributors may be used to endorse or promote products derived from 16 | // this software without specific prior written permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | // --- 31 | // 32 | // Revamped and reorganized by Craig Silverstein 33 | // 34 | // This is the file that should be included by any file which declares 35 | // command line flag. 36 | 37 | #ifndef GFLAGS_DECLARE_H_ 38 | #define GFLAGS_DECLARE_H_ 39 | 40 | 41 | // --------------------------------------------------------------------------- 42 | // Namespace of gflags library symbols. 43 | #define GFLAGS_NAMESPACE @GFLAGS_NAMESPACE@ 44 | 45 | // --------------------------------------------------------------------------- 46 | // Windows DLL import/export. 47 | 48 | // Whether gflags library is a DLL. 49 | // 50 | // Set to 1 by default when the shared gflags library was built on Windows. 51 | // Must be overwritten when this header file is used with the optionally also 52 | // built static library instead; set by CMake's INTERFACE_COMPILE_DEFINITIONS. 53 | #ifndef GFLAGS_IS_A_DLL 54 | # define GFLAGS_IS_A_DLL @GFLAGS_IS_A_DLL@ 55 | #endif 56 | 57 | // We always want to import the symbols of the gflags library. 58 | #ifndef GFLAGS_DLL_DECL 59 | # if GFLAGS_IS_A_DLL && defined(_MSC_VER) 60 | # define GFLAGS_DLL_DECL __declspec(dllimport) 61 | # elif defined(__GNUC__) && __GNUC__ >= 4 62 | # define GFLAGS_DLL_DECL __attribute__((visibility("default"))) 63 | # else 64 | # define GFLAGS_DLL_DECL 65 | # endif 66 | #endif 67 | 68 | // We always want to import variables declared in user code. 69 | #ifndef GFLAGS_DLL_DECLARE_FLAG 70 | # if GFLAGS_IS_A_DLL && defined(_MSC_VER) 71 | # define GFLAGS_DLL_DECLARE_FLAG __declspec(dllimport) 72 | # elif defined(__GNUC__) && __GNUC__ >= 4 73 | # define GFLAGS_DLL_DECLARE_FLAG __attribute__((visibility("default"))) 74 | # else 75 | # define GFLAGS_DLL_DECLARE_FLAG 76 | # endif 77 | #endif 78 | 79 | // --------------------------------------------------------------------------- 80 | // Flag types 81 | #include 82 | #if @HAVE_STDINT_H@ 83 | # include // the normal place uint32_t is defined 84 | #elif @HAVE_SYS_TYPES_H@ 85 | # include // the normal place u_int32_t is defined 86 | #elif @HAVE_INTTYPES_H@ 87 | # include // a third place for uint32_t or u_int32_t 88 | #endif 89 | 90 | namespace GFLAGS_NAMESPACE { 91 | 92 | #if @GFLAGS_INTTYPES_FORMAT_C99@ // C99 93 | typedef int32_t int32; 94 | typedef uint32_t uint32; 95 | typedef int64_t int64; 96 | typedef uint64_t uint64; 97 | #elif @GFLAGS_INTTYPES_FORMAT_BSD@ // BSD 98 | typedef int32_t int32; 99 | typedef u_int32_t uint32; 100 | typedef int64_t int64; 101 | typedef u_int64_t uint64; 102 | #elif @GFLAGS_INTTYPES_FORMAT_VC7@ // Windows 103 | typedef __int32 int32; 104 | typedef unsigned __int32 uint32; 105 | typedef __int64 int64; 106 | typedef unsigned __int64 uint64; 107 | #else 108 | # error Do not know how to define a 32-bit integer quantity on your system 109 | #endif 110 | 111 | } // namespace GFLAGS_NAMESPACE 112 | 113 | 114 | namespace fLS { 115 | 116 | // The meaning of "string" might be different between now and when the 117 | // macros below get invoked (e.g., if someone is experimenting with 118 | // other string implementations that get defined after this file is 119 | // included). Save the current meaning now and use it in the macros. 120 | typedef std::string clstring; 121 | 122 | } // namespace fLS 123 | 124 | 125 | #define DECLARE_VARIABLE(type, shorttype, name) \ 126 | /* We always want to import declared variables, dll or no */ \ 127 | namespace fL##shorttype { extern GFLAGS_DLL_DECLARE_FLAG type FLAGS_##name; } \ 128 | using fL##shorttype::FLAGS_##name 129 | 130 | #define DECLARE_bool(name) \ 131 | DECLARE_VARIABLE(bool, B, name) 132 | 133 | #define DECLARE_int32(name) \ 134 | DECLARE_VARIABLE(::GFLAGS_NAMESPACE::int32, I, name) 135 | 136 | #define DECLARE_uint32(name) \ 137 | DECLARE_VARIABLE(::GFLAGS_NAMESPACE::uint32, U, name) 138 | 139 | #define DECLARE_int64(name) \ 140 | DECLARE_VARIABLE(::GFLAGS_NAMESPACE::int64, I64, name) 141 | 142 | #define DECLARE_uint64(name) \ 143 | DECLARE_VARIABLE(::GFLAGS_NAMESPACE::uint64, U64, name) 144 | 145 | #define DECLARE_double(name) \ 146 | DECLARE_VARIABLE(double, D, name) 147 | 148 | #define DECLARE_string(name) \ 149 | /* We always want to import declared variables, dll or no */ \ 150 | namespace fLS { \ 151 | extern GFLAGS_DLL_DECLARE_FLAG ::fLS::clstring& FLAGS_##name; \ 152 | } \ 153 | using fLS::FLAGS_##name 154 | 155 | 156 | #endif // GFLAGS_DECLARE_H_ 157 | --------------------------------------------------------------------------------