├── .gitignore
├── CMakeLists.txt
├── README.md
├── cmake
└── Modules
│ ├── Eigen.cmake
│ ├── Eigen_VERSION.cmake
│ └── FindTensorFlow.cmake
├── ctc_scene_text_recognizer.cpp
├── ctc_scene_text_recognizer.h
├── detector.h
├── faster_rcnn_text_detector.cpp
├── faster_rcnn_text_detector.h
├── images
├── 1.jpg
├── 2.jpg
├── 3.jpg
├── 4.jpg
├── 5.jpg
└── pipeline.jpg
├── main.cpp
├── recognizer.h
├── scene_text_reader.cpp
├── scene_text_reader.h
├── test_images
├── img_108.jpg
└── word_2.png
├── text_box.cpp
├── text_box.h
├── utils.cpp
└── utils.h
/.gitignore:
--------------------------------------------------------------------------------
1 | # Prerequisites
2 | *.d
3 |
4 | # Compiled Object files
5 | *.slo
6 | *.lo
7 | *.o
8 | *.obj
9 |
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 |
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 |
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 |
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 |
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 |
34 | #
35 | build/
36 | external/
37 | model/
38 | bin/
39 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 2.8)
2 | project(SceneTextDetection)
3 | set (CMAKE_CXX_STANDARD 11)
4 |
5 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/bin)
6 |
7 | if(NOT CMAKE_BUILD_TYPE)
8 | set(CMAKE_BUILD_TYPE Release)
9 | endif()
10 |
11 | set(CMAKE_CXX_FLAGS "-Wall -Wextra")
12 | set(CMAKE_CXX_FLAGS_DEBUG "-g")
13 | set(CMAKE_CXX_FLAGS_RELEASE "-O3")
14 |
15 | set(SOURCE_FILES main.cpp faster_rcnn_text_detector.cpp ctc_scene_text_recognizer.cpp utils.cpp scene_text_reader.cpp text_box.cpp)
16 | set(EXECUTABLE DetectText)
17 | #set(EXECUTABLE EndtoEndReading)
18 | #set(EXECUTABLE RecognizeText)
19 |
20 | # Add modules
21 | list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/Modules")
22 |
23 | find_package(TensorFlow REQUIRED)
24 | find_package(Protobuf REQUIRED)
25 | find_package(OpenCV REQUIRED highgui imgproc)
26 |
27 | #set(TensorFlow_LIBRARIES "/usr/local/lib/libtensorflow_cc.so" "/usr/local/lib/libtensorflow_framework.so")
28 | #set(TensorFlow_LIBRARIES "/usr/local/lib/libtensorflow_cc.so" "/usr/local/lib/libtensorflow.so")
29 | set(TensorFlow_LIBRARIES "/usr/local/lib/libtensorflow.so" "/usr/local/lib/libtensorflow_cc.so" "/usr/local/lib/libtensorflow_framework.so")
30 |
31 | message(STATUS "protobuf include dirs:" ${PROTOBUF_INCLUDE_DIRS})
32 | message(STATUS "protobuf libraries:" ${PROTOBUF_LIBRARIES})
33 | message(STATUS "tensorflow include dirs:" ${TensorFlow_INCLUDE_DIRS})
34 | message(STATUS "tensorflow libraries:" ${TensorFlow_LIBRARIES})
35 |
36 | # set variables for external dependencies
37 | set(EXTERNAL_DIR "${PROJECT_SOURCE_DIR}/external" CACHE PATH "Location where external dependencies will installed")
38 | set(DOWNLOAD_LOCATION "${EXTERNAL_DIR}/src" CACHE PATH "Location where external projects will be downloaded")
39 |
40 | #change the path to ur tesnorflow installed path
41 | #set(NSYNC_DIR "/usr/local/lib/python2.7/dist-packages/tensorflow/include/external/nsync/public")
42 |
43 | mark_as_advanced(EXTERNAL_DIR DOWNLOAD_LOCATION)
44 |
45 | include(Eigen)
46 |
47 | set(PROJECT_INCLUDE_DIRS ${TensorFlow_INCLUDE_DIRS} ${EXTERNAL_DIR}/include ${PROTOBUF_INCLUDE_DIRS} ${NSYNC_DIR} ${OpenCV_INCLUDE_DIRS})
48 | set(PROJECT_LIBRARIES ${TensorFlow_LIBRARIES} ${PROTOBUF_LIBRARIES} ${OpenCV_LIBS})
49 | set(PROJECT_DEPENDENCIES Eigen)
50 |
51 | include_directories(${PROJECT_INCLUDE_DIRS})
52 | add_library(SceneTextDetector faster_rcnn_text_detector.cpp utils.cpp)
53 | add_library(CTCSceneTextRecognizer ctc_scene_text_recognizer.cpp utils.cpp)
54 | add_library(SceneTextReader scene_text_reader.cpp faster_rcnn_text_detector.cpp ctc_scene_text_recognizer.cpp utils.cpp text_box.cpp)
55 | add_executable(${EXECUTABLE} ${SOURCE_FILES})
56 | target_link_libraries(${EXECUTABLE} ${PROJECT_LIBRARIES})
57 | add_dependencies(${EXECUTABLE} ${PROJECT_DEPENDENCIES})
58 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DeepSceneTextReader
2 | This is a c++ project deploying a deep scene text reading pipeline. It reads text from natural scene images.
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | # Prerequsites
13 |
14 | The project is written in c++ using tensorflow computational framework. It is tested using tensorflow 1.4. Newer version should be ok too, but not tested.
15 | Please install:
16 |
17 | * Tensorflow
18 |
19 | * nsync project: https://github.com/google/nsync.git This is needed for building tensorflow.
20 |
21 | * opencv3.3
22 |
23 | * protobuf
24 |
25 | * eigen
26 |
27 | Please check this project on how to build project using tensorflow with cmake:
28 | https://github.com/cjweeks/tensorflow-cmake
29 | It greatly helped the progress of building this project.
30 | When building tensorflow library, please be careful since we need to use opencv. Looks like there is still problem when including tensorflow and opencv together.
31 | It will make opencv unable to read image.
32 | Check out this issue: https://github.com/tensorflow/tensorflow/issues/14267
33 | The answer by allenlavoie solved my problem, so I paste it here:
34 |
35 | "In the meantime, as long as you're not using any custom ops you can build libtensorflow_cc.so with bazel build --config=monolithic, which will condense everything together into one shared object (no libtensorflow_framework dependence) and seal off non-TensorFlow symbols. That shared object will have protocol buffer symbols."
36 |
37 | # Status
38 | Currently two pretrained model is provided. One for scene text detection, and one for scene text recognition.
39 | More model will be provided.
40 | Note that the current model is not so robust. U can easily change to ur trained model.
41 | The models will be continuously updated.
42 |
43 | # build process
44 |
45 | cd build
46 |
47 | cmake ..
48 |
49 | make
50 |
51 | It will create an excutable named **DetectText** in bin folder.
52 |
53 | # Usage:
54 | The excutable could be excuted in three modes: (1) Detect (2) Recognize (3) Detect and Recognize
55 |
56 | ## Detect
57 | Download the pretrained detector model and put it in model/
58 |
59 | ./DetectText --detector_graph='model/Detector_model.pb' \
60 | --image_filename='test_images/test_img1.jpg' --mode='detect' --output_filename='results/output_image.jpg'
61 |
62 | ## Recognize
63 | Download the pretrained recognizer model and put it in model/
64 | Download the dictionary file and put it in model
65 |
66 |
67 | ./DetectText --recognizer_graph='model/Recognizer_model.pb' \
68 | --image_filename='test_images/recognize_image1.jpg' --mode='recognize' \
69 | --im_height=32 --im_width=128
70 |
71 | ## Detect and Recognize
72 | Download the pretrained detector and recognizer model and put it in model/ as described previously.
73 |
74 | ./DetectText --recognizer_graph=$recognizer_graph --detector_graph='model/Detector_model.pb' \
75 | --image_filename='model/Recognizer_model.pb' --mode='detect_and_read' --output_filename='results/output_image.jpg'
76 |
77 | # Model Description
78 | ### *Detector*
79 | 1. Faster RCNN Detector Model
80 | The detector is trained with modified tensorflow [object detector api]: (https://github.com/tensorflow/models/tree/master/research/object_detection)
81 | I modify it by changing the proposal scheme to regress to the 4 coordinates of the oriented bounding box rather than regular rectangular bounding box.
82 | Check out this [repo](https://github.com/dafanghe/Tensorflow_SceneText_Oriented_Box_Predictor) for the training code.
83 | Pretrained model: FasterRCNN_detector_model.pb
84 |
85 | 2. R2CNN will be updated. See [R2CNN](https://arxiv.org/abs/1706.09579) for details.
86 | The code is also modified with tnesorflow [object detector api]: (https://github.com/tensorflow/models/tree/master/research/object_detection)
87 | The training code will be released soon.
88 |
89 |
90 | ### *Recognizer*
91 | 1. CTC scene text recognizer.
92 | The recognizer model follows the famous scene text recognition [CRNN model](https://arxiv.org/abs/1507.05717)
93 |
94 | 2. Spatial Attention OCR will be updated soon. It is based on [GoogleOCR](https://github.com/tensorflow/models/tree/master/research/attention_ocr)
95 |
96 | ### *Detect and Recognize*
97 | The whole scene text reading pipeline detects the text and rotate it horizontally and read it with recognizer.
98 | The pipeline is here:
99 |
100 |
101 |
102 |
103 |
104 | ### *Pretrained Models*
105 | You can play with the code with provided pretrained models. \
106 | They are not fully optimized yet, but could be used for being familiar with the code. \
107 | Check them out here: [models](https://drive.google.com/drive/folders/1Ao0ZrSVf0YjU6pnzGY0C3QJ2Qz0ljRIU?usp=sharing)
108 |
109 | You will find two detection models called: (1) **FasterRCNN_detector_model.pb** (2) **R2CNN_detector_model.pb** \
110 | Two recognition models with their charset: (1) **Recognizer_model.pb + charset_full.txt** and (2)**Recognizer_model_case_insen.pb + charset_case_insen.txt**. \
111 | Full charset means English letters + digit and case insen means case insensitive English letters + digit.
112 | Let me know if u have any problens using them.
113 |
114 |
115 | # Reference and Related Projects
116 | - [Faster RCNN](https://arxiv.org/abs/1506.01497) Faster RCNN paper.
117 | - [Tensorflow Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection).
118 | - [An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition](https://arxiv.org/abs/1507.05717), reference paper for CRNN model.
119 | - [tensorflow-cmake](https://github.com/cjweeks/tensorflow-cmake), Tutorial of Building Project with tensorflow using cmake.
120 | - [R2CNN](https://arxiv.org/abs/1706.09579) Reference paper for R2CNN.
121 |
122 | # Contact:
123 |
124 | * Dafang He. The Penn State University. hdfcraig@gmail.com http://personal.psu.edu/duh188/
125 |
--------------------------------------------------------------------------------
/cmake/Modules/Eigen.cmake:
--------------------------------------------------------------------------------
1 | include(ExternalProject)
2 | include(Eigen_VERSION)
3 |
4 | set(Eigen_INSTALL ${EXTERNAL_DIR}/include/eigen/${Eigen_DIR})
5 |
6 | set(Eigen_INCLUDE_DIRS
7 | ${PROJECT_SOURCE_DIR}/external/include/eigen
8 | ${Eigen_INSTALL})
9 |
10 | ExternalProject_Add(Eigen
11 | PREFIX ${PROJECT_SOURCE_DIR}/external/src/eigen
12 | URL ${Eigen_URL}
13 | URL_HASH ${Eigen_HASH}
14 | DOWNLOAD_DIR ${DOWNLOAD_LOCATION}
15 |
16 | CMAKE_ARGS
17 | -DCMAKE_BUILD_TYPE:STRING=Release
18 | -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
19 | -DCMAKE_INSTALL_PREFIX:STRING=${Eigen_INSTALL}
20 | -DINCLUDE_INSTALL_DIR:STRING=${Eigen_INSTALL})
21 |
22 | include_directories(${Eigen_INCLUDE_DIRS})
23 |
--------------------------------------------------------------------------------
/cmake/Modules/Eigen_VERSION.cmake:
--------------------------------------------------------------------------------
1 | #set(Eigen_URL https://bitbucket.org/eigen/eigen/get/429aa5254200.tar.gz,http://mirror.bazel.build/bitbucket.org/eigen/eigen/get/429aa5254200.tar.gz)
2 | set(Eigen_URL http://mirror.bazel.build/bitbucket.org/eigen/eigen/get/429aa5254200.tar.gz)
3 | set(Eigen_ARCHIVE_HASH 429aa5254200)
4 | set(Eigen_HASH SHA256=61d8b6fc4279dd1dda986fb1677d15e3d641c07a3ea5abe255790b1f0c0c14e9)
5 | set(Eigen_DIR eigen3)
6 | set(Eigen_INSTALL_DIR /usr/local)
7 |
--------------------------------------------------------------------------------
/cmake/Modules/FindTensorFlow.cmake:
--------------------------------------------------------------------------------
1 | # Locates the tensorFlow library and include directories.
2 |
3 | include(FindPackageHandleStandardArgs)
4 | unset(TENSORFLOW_FOUND)
5 |
6 | find_path(TensorFlow_INCLUDE_DIR
7 | NAMES
8 | tensorflow/core
9 | tensorflow/cc
10 | third_party
11 | HINTS
12 | /usr/local/include/google/tensorflow
13 | /usr/include/google/tensorflow)
14 |
15 | find_library(TensorFlow_LIBRARY
16 | NAMES
17 | tensorflow_cc
18 | HINTS
19 | /usr/lib
20 | /usr/local/lib)
21 |
22 | #tensorflow_all
23 | # set TensorFlow_FOUND
24 | find_package_handle_standard_args(TensorFlow DEFAULT_MSG TensorFlow_INCLUDE_DIR TensorFlow_LIBRARY)
25 |
26 | # set external variables for usage in CMakeLists.txt
27 | if(TENSORFLOW_FOUND)
28 | set(TensorFlow_LIBRARIES ${TensorFlow_LIBRARY})
29 | set(TensorFlow_INCLUDE_DIRS ${TensorFlow_INCLUDE_DIR})
30 | endif()
31 |
32 | # hide locals from GUI
33 | mark_as_advanced(TensorFlow_INCLUDE_DIR TensorFlow_LIBRARY)
34 |
--------------------------------------------------------------------------------
/ctc_scene_text_recognizer.cpp:
--------------------------------------------------------------------------------
1 | #include "ctc_scene_text_recognizer.h"
2 |
3 |
4 | CTCSceneTextRecognizer::CTCSceneTextRecognizer(){
5 | init_constant_vars();
6 | }
7 |
8 |
9 | CTCSceneTextRecognizer::CTCSceneTextRecognizer(std::string frozen_graph_filename, std::string dictionary_filename, int _im_height, int _im_width):Recognizer(frozen_graph_filename, dictionary_filename){
10 | init_constant_vars(_im_height, _im_width);
11 | }
12 |
13 |
14 | bool CTCSceneTextRecognizer::init(const std::string frozen_graph_filename, const std::string dictionary_filename){
15 | this->init_graph(frozen_graph_filename);
16 | this->init_dictionary(dictionary_filename);
17 | return true;
18 | }
19 |
20 |
21 | void CTCSceneTextRecognizer::init_constant_vars(int _im_height, int _im_width){
22 | std::string input_layer_string = "input_images:0,input_seq_lens:0";
23 | std::string output_layer_string = "CTCBeamSearchDecoder:0,CTCBeamSearchDecoder:1,CTCBeamSearchDecoder:2";
24 | this->input_layers = str_util::Split(input_layer_string, ',');
25 | this->output_layers = str_util::Split(output_layer_string, ',');
26 | this->seq_len = 29;
27 | this->image_width = _im_width; //input image width;
28 | this->image_height = _im_height; //input image height
29 | this->width_scale_ratio = 1.2; //scale the width for better recognition
30 | }
31 |
32 |
33 | void CTCSceneTextRecognizer::preprocess_image(cv::Mat& input_image, cv::Mat& output_image){
34 | cv::Mat resized_image, padded_image;
35 | int new_width = int(this->width_scale_ratio * input_image.cols);
36 | cv::resize(input_image, input_image, cv::Size(new_width, input_image.rows));
37 | float ratio=0;
38 | resize_image_fix_height(input_image, resized_image, ratio, this->image_height);
39 | pad_image_width(resized_image, output_image, this->image_width);
40 | }
41 |
42 |
43 | string CTCSceneTextRecognizer::run_graph(const cv::Mat& image){
44 | int height = image.rows;
45 | int width = image.cols;
46 | Tensor input_img_tensor(DT_FLOAT, TensorShape({1, height, width, 3}));
47 |
48 | unsigned char *input_data = (unsigned char*)(image.data);
49 | auto input_tensor_mapped = input_img_tensor.tensor();
50 | //(TODO) is there any other ways to copy the data into tensor?
51 | for (int y = 0;y < height; ++y) {
52 | for (int x = 0;x < width; ++x) {
53 | unsigned char b = input_data[image.step * y + x * image.channels()];
54 | unsigned char g = input_data[image.step * y + x * image.channels() + 1];
55 | unsigned char r = input_data[image.step * y + x * image.channels() + 2];
56 | input_tensor_mapped(0, y, x, 0) = float(r);
57 | input_tensor_mapped(0, y, x, 1) = float(g);
58 | input_tensor_mapped(0, y, x, 2) = float(b);
59 | }
60 | }
61 | //create the seq len tensor and assign fixed value
62 | Tensor input_seq_len_tensor(DT_INT32, TensorShape({1}));
63 | auto input_seq_len_mapped = input_seq_len_tensor.tensor();
64 | input_seq_len_mapped(0) = this->seq_len;
65 |
66 | //create the input to run
67 | std::vector > inputs = {
68 | {this->input_layers[0], input_img_tensor},
69 | {this->input_layers[1], input_seq_len_tensor},
70 | };
71 |
72 | std::vector outputs;
73 | Status run_status = this->session->Run(inputs,
74 | this->output_layers, {}, &outputs);
75 | if (!run_status.ok()) {
76 | LOG(ERROR) << "Running model failed: " << run_status;
77 | return "";
78 | }
79 | LOG(INFO) <<"number of output:"<();
82 | auto values = outputs[1].flat_outer_dims();
83 |
84 | const Eigen::Tensor::Dimensions& indices_dim = indices.dimensions();
85 | const Eigen::Tensor::Dimensions& values_dim = values.dimensions();
86 |
87 | LOG(INFO) << outputs[0].DebugString();
88 | LOG(INFO) << outputs[1].DebugString();
89 | std::vector encoded_text;
90 | for(int i=0; i CTCSceneTextRecognizer::preprocess_images(std::vector& input_images){
101 | std::vector processed_images(input_images.size());
102 | for(int i=0; ipreprocess_image(input_images[i], preprocessed_image);
105 | processed_images[i] = preprocessed_image;
106 | }
107 | return processed_images;
108 | }
109 |
110 |
111 | std::vector CTCSceneTextRecognizer::run_graph(const std::vector images){
112 | //the images must be preprocessd and has the same height and width!!
113 | std::vector res;
114 | int num_word = images.size();
115 | if(num_word == 0) return res;
116 |
117 | int height = this->image_height;
118 | int width = this->image_width;
119 | Tensor input_img_tensor(DT_FLOAT, TensorShape({num_word, height, width, 3}));
120 | auto input_tensor_mapped = input_img_tensor.tensor();
121 | //create the seq len tensor and assign fixed value for ctc
122 | Tensor input_seq_len_tensor(DT_INT32, TensorShape({num_word}));
123 | auto input_seq_len_mapped = input_seq_len_tensor.tensor();
124 |
125 | for(int i=0; iseq_len;
143 | }
144 | //create the input to run
145 | std::vector > inputs = {
146 | {this->input_layers[0], input_img_tensor},
147 | {this->input_layers[1], input_seq_len_tensor},
148 | };
149 |
150 | //std::cout<<"run recognition graph"< outputs;
152 | Status run_status = this->session->Run(inputs,
153 | this->output_layers, {}, &outputs);
154 | if (!run_status.ok()) {
155 | LOG(ERROR) << "Running model failed: " << run_status;
156 | return res;
157 | }
158 | LOG(INFO) <<"number of output:"<();
165 | auto values = outputs[1].tensor();
166 |
167 | //const Eigen::Tensor::Dimensions& indices_dim = indices.dimensions();
168 | //const Eigen::Tensor::Dimensions& values_dim = values.dimensions();
169 |
170 | std::vector > encoded_texts(num_word);
171 | for(int i=0; i
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | #include "tensorflow/core/framework/tensor_shape.pb.h"
11 | #include "tensorflow/core/framework/tensor.h"
12 | #include "tensorflow/core/graph/graph.h"
13 | #include "tensorflow/core/lib/strings/str_util.h"
14 | #include "tensorflow/core/platform/logging.h"
15 | #include "tensorflow/core/platform/platform.h"
16 | #include "tensorflow/core/platform/types.h"
17 |
18 | //opencv
19 | #include
20 | #include "opencv2/opencv.hpp"
21 |
22 | #include "utils.h"
23 |
24 | #include "recognizer.h"
25 |
26 | using namespace tensorflow;
27 |
28 |
29 | class CTCSceneTextRecognizer: public Recognizer{
30 | public:
31 | CTCSceneTextRecognizer();
32 |
33 | CTCSceneTextRecognizer(const std::string frozen_graph_filename, const std::string dictionary_filename,
34 | int _im_height=32, int _im_width=128);
35 |
36 | bool init(const std::string frozen_graph_filename, const std::string);
37 | void preprocess_image(cv::Mat& input_image, cv::Mat& output_image);
38 | std::vector preprocess_images(std::vector& input_images);
39 | std::string run_graph(const cv::Mat& image);
40 | std::vector run_graph(const std::vector input_images);
41 |
42 | private:
43 | void init_constant_vars(int _im_height=32, int _im_width=128);
44 | float width_scale_ratio;
45 | int seq_len;
46 | int image_width;
47 | int image_height;
48 | };
49 |
50 | #endif
51 |
--------------------------------------------------------------------------------
/detector.h:
--------------------------------------------------------------------------------
1 | #ifndef Detector_H
2 | #define Detector_H
3 |
4 | #include
5 | #include
6 |
7 | //opencv
8 | #include
9 | #include "opencv2/opencv.hpp"
10 |
11 | //tensorflow
12 | #include "tensorflow/core/platform/logging.h"
13 | #include "tensorflow/core/public/session.h"
14 | #include "tensorflow/core/framework/graph.pb.h"
15 | #include "tensorflow/core/graph/graph.h"
16 | #include "tensorflow/core/public/session.h"
17 | #include "tensorflow/core/lib/strings/str_util.h"
18 |
19 | #include "text_box.h"
20 |
21 |
22 | class Detector{
23 | public:
24 | Detector(){};
25 | Detector(const std::string frozen_graph_filename){
26 | init_graph(frozen_graph_filename);
27 | }
28 | bool init_graph(const std::string& frozen_graph_filename){
29 | if (!ReadBinaryProto(tensorflow::Env::Default(), frozen_graph_filename, &graph_def).ok()) {
30 | LOG(ERROR) << "error when reading proto" << frozen_graph_fliename;
31 | return -1;
32 | }
33 |
34 | tensorflow::SessionOptions sess_opt;
35 | sess_opt.config.mutable_gpu_options()->set_allow_growth(true);
36 | (&session)->reset(tensorflow::NewSession(sess_opt));
37 | if (!session->Create(graph_def).ok()) {
38 | LOG(ERROR) << "error create graph";
39 | return -1;
40 | }
41 | }
42 | virtual int run_graph(const cv::Mat& image, std::vector& results) = 0;
43 |
44 | tensorflow::GraphDef graph_def;
45 | std::string input_layer; //for detector, we assume there is only one input
46 | std::unique_ptr session;
47 | std::vector output_layers;
48 | };
49 |
50 | #endif
51 |
--------------------------------------------------------------------------------
/faster_rcnn_text_detector.cpp:
--------------------------------------------------------------------------------
1 | #include "faster_rcnn_text_detector.h"
2 |
3 |
4 | FasterRCNNTextDetector::FasterRCNNTextDetector(const std::string frozen_graph_filename): Detector(frozen_graph_filename) {
5 | this->init_constants();
6 | }
7 |
8 |
9 | bool FasterRCNNTextDetector::init_constants(){
10 | input_layer = "image_tensor:0";
11 | output_layers = str_util::Split("detection_boxes:0,detection_scores:0,detection_classes:0,detection_oriented_boxes:0,num_detections:0", ',');
12 | score_thresh = 0.6;
13 | }
14 |
15 |
16 | int FasterRCNNTextDetector::run_graph(const cv::Mat& image, std::vector& results){
17 | cv::Mat resized_image;
18 | float ratio_h=0, ratio_w=0;
19 | resize_image_max_len(image, resized_image, ratio_h, ratio_w);
20 | auto input_tensor = cv_mat_to_tensor(resized_image);
21 |
22 | std::vector outputs;
23 | Status run_status = this->session->Run({{this->input_layer, input_tensor}},
24 | this->output_layers, {}, &outputs);
25 | if (!run_status.ok()) {
26 | LOG(ERROR) << "Running model failed: " << run_status;
27 | return -1;
28 | }
29 | LOG(INFO) <<"number of output:"<();
32 | auto detection_scores = outputs[1].tensor();
33 | auto detection_classes = outputs[2].tensor();
34 | auto detection_oriented_boxes = outputs[3].tensor();
35 |
36 | int num_box = detection_boxes.dimension(1);
37 | for(int i=0;i this->score_thresh){
39 | std::vector points;
40 | for(int j=0; j<4; j++){
41 | cv::Point p;
42 | p.x = int(detection_oriented_boxes(0, i, j, 1) * image.cols);
43 | p.y = int(detection_oriented_boxes(0, i, j, 0) * image.rows);
44 | points.push_back(p);
45 | }
46 | TextBox tb(points, "");
47 | results.push_back(tb);
48 | }
49 | }
50 | }
51 |
52 |
--------------------------------------------------------------------------------
/faster_rcnn_text_detector.h:
--------------------------------------------------------------------------------
1 | #ifndef FasterRCNN_Text_Detector_H
2 | #define FasterRCNN_Text_Detector_H
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | //tensorflow
10 | #include "tensorflow/core/framework/graph.pb.h"
11 | #include "tensorflow/core/framework/tensor_shape.pb.h"
12 | #include "tensorflow/core/framework/tensor.h"
13 | #include "tensorflow/core/graph/graph.h"
14 | #include "tensorflow/core/lib/strings/str_util.h"
15 | #include "tensorflow/core/platform/logging.h"
16 | #include "tensorflow/core/platform/platform.h"
17 | #include "tensorflow/core/platform/types.h"
18 | #include "tensorflow/core/public/session.h"
19 |
20 | //opencv
21 | #include
22 | #include "opencv2/opencv.hpp"
23 |
24 | #include "detector.h"
25 | #include "text_box.h"
26 | #include "utils.h"
27 |
28 | using namespace tensorflow;
29 |
30 |
31 | class FasterRCNNTextDetector: public Detector{
32 | public:
33 | FasterRCNNTextDetector(){};
34 |
35 | FasterRCNNTextDetector(const std::string frozen_graph_filename);
36 |
37 | bool init_constants();
38 | int run_graph(const cv::Mat& image, std::vector& results);
39 |
40 | private:
41 | float score_thresh;
42 | };
43 |
44 | #endif
45 |
--------------------------------------------------------------------------------
/images/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dafanghe/DeepSceneTextReader/ade507dd988a18ce3fbb12f2f518bac6b6b683fa/images/1.jpg
--------------------------------------------------------------------------------
/images/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dafanghe/DeepSceneTextReader/ade507dd988a18ce3fbb12f2f518bac6b6b683fa/images/2.jpg
--------------------------------------------------------------------------------
/images/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dafanghe/DeepSceneTextReader/ade507dd988a18ce3fbb12f2f518bac6b6b683fa/images/3.jpg
--------------------------------------------------------------------------------
/images/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dafanghe/DeepSceneTextReader/ade507dd988a18ce3fbb12f2f518bac6b6b683fa/images/4.jpg
--------------------------------------------------------------------------------
/images/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dafanghe/DeepSceneTextReader/ade507dd988a18ce3fbb12f2f518bac6b6b683fa/images/5.jpg
--------------------------------------------------------------------------------
/images/pipeline.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dafanghe/DeepSceneTextReader/ade507dd988a18ce3fbb12f2f518bac6b6b683fa/images/pipeline.jpg
--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | #include "tensorflow/core/platform/init_main.h"
8 | #include "tensorflow/core/util/command_line_flags.h"
9 | #include "faster_rcnn_text_detector.h"
10 | #include "ctc_scene_text_recognizer.h"
11 | #include "scene_text_reader.h"
12 | #include "utils.h"
13 |
14 | int detect_text(string& detector_graph_filename, string& image_filename, string& output_filename)
15 | {
16 | LOG(INFO)<<"start text detection:";
17 |
18 | FasterRCNNTextDetector detector(detector_graph_filename);
19 |
20 | cv::Mat image = cv::imread(image_filename);
21 | if(!image.data) // Check for invalid input
22 | {
23 | LOG(ERROR) << "Could not open or find the image " << image_filename;
24 | return -1;
25 | }
26 | std::vector colors={cv::Scalar(0,0,255), cv::Scalar(0,255,0),
27 | cv::Scalar(255,0,0), cv::Scalar(255,255,0), cv::Scalar(0,255,255), cv::Scalar(255,0,255)};
28 | std::vector res;
29 | detector.run_graph(image, res);
30 | for(int i=0; i points = res[i].get_points();
32 | for(int j=0; j<4; j++){
33 | cv::line(image, points[j], points[(j+1)%4], colors[j%4], 3);
34 | }
35 | }
36 |
37 | //write image
38 | cv::imwrite(output_filename, image);
39 | return 0;
40 | }
41 |
42 |
43 | int recognize_text(string& recognizer_graph_filename, string& dictionary_filename,
44 | string& image_filename, int im_height=32, int im_width=128)
45 | {
46 | LOG(INFO) <<"start text recognition: "< res;
76 | reader.read_text(image, res);
77 | for(int i=0; i flag_list = {
96 | Flag("detector_graph", &detector_graph, "detector graph file name"),
97 | Flag("recognizer_graph", &recognizer_graph, "recognizer graph file name"),
98 | Flag("im_height", &im_height, "image height for recognition model"),
99 | Flag("im_width", &im_width, "image width for recognition model"),
100 | Flag("dictionary_filename", &dictionary_filename, "dictionary filename for decode the recognition results"),
101 | Flag("image_filename", &image_filename, "the filename to be tested."),
102 | Flag("output_filename", &output_filename, "the output filename"),
103 | Flag("mode", &mode, "the mode, must be within the three categories: detect, recognize, detect_and_read"),
104 | };
105 |
106 | string usage = Flags::Usage(argv[0], flag_list);
107 | const bool parse_result = Flags::Parse(&argc, argv, flag_list);
108 |
109 | if (!parse_result) {
110 | LOG(ERROR) << usage;
111 | return -1;
112 | }
113 |
114 | ::tensorflow::port::InitMain(argv[0], &argc, &argv);
115 | if (argc > 1) {
116 | LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage;
117 | return -1;
118 | }
119 |
120 | if(mode == "detect"){
121 | detect_text(detector_graph, image_filename, output_filename);
122 | }else if(mode == "recognize"){
123 | recognize_text(recognizer_graph, dictionary_filename, image_filename, im_height, im_width);
124 | }else if(mode == "detect_and_read"){
125 | end_to_end_reading(detector_graph, recognizer_graph,
126 | dictionary_filename, image_filename, output_filename);
127 | }else{
128 | LOG(ERROR) << "mode should be within: detect, recognize, detect_and_read";
129 | }
130 | }
131 |
--------------------------------------------------------------------------------
/recognizer.h:
--------------------------------------------------------------------------------
1 | #ifndef Recognizer_H
2 | #define Recognizer_H
3 |
4 | #include
5 | #include
6 | #include
7 |
8 | //opencv
9 | #include
10 | #include "opencv2/opencv.hpp"
11 |
12 | //tensorflow
13 | #include "tensorflow/core/platform/logging.h"
14 | #include "tensorflow/core/public/session.h"
15 | #include "tensorflow/core/framework/graph.pb.h"
16 | #include "tensorflow/core/graph/graph.h"
17 | #include "tensorflow/core/public/session.h"
18 | #include "tensorflow/core/lib/strings/str_util.h"
19 |
20 |
21 | class Recognizer{
22 | //A base class should implemented the following functions:
23 | //Preprocess_image: preprocess a single image represented as an opencv mat
24 | //Preprocess images: preprocess a vector of opencv mat images
25 | public:
26 | Recognizer(){};
27 | Recognizer(const std::string& recognizer_graph_filename, const std::string& dictionary_filename){
28 | init_dictionary(dictionary_filename);
29 | init_graph(recognizer_graph_filename);
30 | };
31 | bool init_graph(const std::string& frozen_graph_filename){
32 | if (!ReadBinaryProto(tensorflow::Env::Default(), frozen_graph_filename, &graph_def).ok()) {
33 | LOG(ERROR) << "Read proto";
34 | return -1;
35 | }
36 |
37 | tensorflow::SessionOptions sess_opt;
38 | sess_opt.config.mutable_gpu_options()->set_allow_growth(true);
39 | (&session)->reset(tensorflow::NewSession(sess_opt));
40 | if (!session->Create(graph_def).ok()) {
41 | LOG(ERROR) << "Create graph";
42 | return -1;
43 | }
44 | }
45 | bool init_dictionary(const std::string& filename){
46 | std::ifstream inf(filename, std::ios::in);
47 | if(!inf.is_open())
48 | { LOG(ERROR)<<"Error dictionary opening file "< splits;
53 | while(!inf.eof()){
54 | inf>>line;
55 | splits = tensorflow::str_util::Split(line, ',');
56 | this->mapping[std::stoi(splits[0])] = splits[1][0];
57 | }
58 | inf.close();
59 | return 1;
60 | }
61 | virtual void preprocess_image(cv::Mat& input_image, cv::Mat& output_image) = 0;
62 | virtual std::vector preprocess_images(std::vector& input_images) = 0;
63 | virtual std::string run_graph(const cv::Mat& image) = 0;
64 | virtual std::vector run_graph(const std::vector input_images) = 0;
65 | std::string decode_single_text(std::vector& vec){
66 | std::string res;
67 | for(int i=0; imapping[vec[i]]);
69 | }
70 | return res;
71 | }
72 | std::unordered_map mapping;
73 | tensorflow::GraphDef graph_def;
74 | std::unique_ptr session;
75 | std::vector input_layers;
76 | std::vector output_layers;
77 | };
78 |
79 | #endif
80 |
--------------------------------------------------------------------------------
/scene_text_reader.cpp:
--------------------------------------------------------------------------------
1 | #include "scene_text_reader.h"
2 |
3 | namespace scene_text_reader{
4 |
5 | SceneTextReader::SceneTextReader(){
6 |
7 | }
8 |
9 | SceneTextReader::SceneTextReader(const std::string& detector_graph_filename, const string& recognizer_graph_filename,
10 | const std::string& detector_model, const std::string& dictionary_filename, const std::string& recognizer_model)
11 | {
12 | if(detector_model == "FasterRCNN"){
13 | detector = new FasterRCNNTextDetector(detector_graph_filename);
14 | }else{
15 | LOG(ERROR) <& boxes, std::vector& word_regions){
25 | int num_word = boxes.size();
26 | if(num_word == 0) return;
27 |
28 | for(int i=0; i new_points;
33 | get_cropped_extend_image(image, boxes[i], cropped, new_points);
34 |
35 | cv::Mat rotated;
36 | std::vector rotated_points;
37 | rotate_image_and_points(cropped, new_points, angle, rotated, rotated_points);
38 | word_regions.push_back(rotated);
39 | }
40 | }
41 |
42 | void SceneTextReader::read_text(cv::Mat& image, std::vector& res){
43 | detector->run_graph(image, res);
44 | std::cout<<"found "< word_regions;
46 | extract_word_regions(image, res, word_regions);
47 | //preprocess all the images;
48 | std::vector preprocessed_images = recognizer->preprocess_images(word_regions);
49 | std::cout< output_texts = recognizer->run_graph(preprocessed_images);
51 | for(int i=0; i
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 |
11 | #include "tensorflow/core/framework/graph.pb.h"
12 | #include "tensorflow/core/framework/tensor_shape.pb.h"
13 | #include "tensorflow/core/framework/tensor.h"
14 | #include "tensorflow/core/graph/graph.h"
15 | #include "tensorflow/core/lib/strings/str_util.h"
16 | #include "tensorflow/core/platform/init_main.h"
17 | #include "tensorflow/core/platform/logging.h"
18 | #include "tensorflow/core/platform/platform.h"
19 | #include "tensorflow/core/platform/types.h"
20 | #include "tensorflow/core/public/session.h"
21 |
22 | //opencv
23 | #include
24 | #include "opencv2/opencv.hpp"
25 |
26 | #include "utils.h"
27 | //recognizer
28 | #include "ctc_scene_text_recognizer.h"
29 | #include "recognizer.h"
30 | //detector
31 | #include "faster_rcnn_text_detector.h"
32 | #include "detector.h"
33 |
34 | #include "text_box.h"
35 |
36 | using namespace tensorflow;
37 |
38 | namespace scene_text_reader{
39 |
40 | class SceneTextReader{
41 | public:
42 | SceneTextReader();
43 |
44 | SceneTextReader(const std::string&, const std::string&, const std::string&,
45 | const std::string& detector_model=std::string("FasterRCNN"),
46 | const std::string& recognizer_model=std::string("CTC"));
47 |
48 | void read_text(cv::Mat&, std::vector& res);
49 |
50 | void extract_word_regions(cv::Mat& image,
51 | std::vector& boxes, std::vector& word_regions);
52 |
53 | private:
54 | Detector *detector;
55 | Recognizer *recognizer;
56 | };
57 |
58 | }
59 | #endif
60 |
--------------------------------------------------------------------------------
/test_images/img_108.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dafanghe/DeepSceneTextReader/ade507dd988a18ce3fbb12f2f518bac6b6b683fa/test_images/img_108.jpg
--------------------------------------------------------------------------------
/test_images/word_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dafanghe/DeepSceneTextReader/ade507dd988a18ce3fbb12f2f518bac6b6b683fa/test_images/word_2.png
--------------------------------------------------------------------------------
/text_box.cpp:
--------------------------------------------------------------------------------
1 | #include "text_box.h"
2 |
3 | TextBox::TextBox(std::vector& points, std::string text){
4 | this->points = points;
5 | this->text = text;
6 | }
7 |
8 | void TextBox::get_rectangle_box(cv::Point& p1, cv::Point& p2){
9 | int minx = 100000, miny = 100000, maxx = 0, maxy = 0;
10 | for(auto const& value: this->points){
11 | minx = std::min(minx, value.x);
12 | miny = std::min(miny, value.y);
13 | maxx = std::max(maxx, value.x);
14 | maxy = std::max(maxy, value.y);
15 | }
16 | p1.x = minx;
17 | p1.y = miny;
18 | p2.x = maxx;
19 | p2.y = maxy;
20 | }
21 |
22 | std::ostream &operator<<(std::ostream &os, TextBox &m) {
23 | std::vector points = m.get_points();
24 | os<<"oriented box: ";
25 | for(int i = 0; i < points.size(); i++){
26 | os<
5 | #include
6 | #include
7 | #include
8 |
9 | //opencv
10 | #include
11 | #include "opencv2/opencv.hpp"
12 |
13 | class TextBox{
14 | public:
15 | TextBox(std::vector& points, std::string text);
16 | void get_rectangle_box(cv::Point& p1, cv::Point& p2);
17 | std::vector& get_points() {return points;}
18 | std::string& get_text() {return text;}
19 | float& get_score() {return score;}
20 | void set_text(std::string s) {text=s;}
21 | private:
22 | std::vector points;
23 | std::string text;
24 | float score;
25 | };
26 |
27 | std::ostream &operator<<(std::ostream &os, TextBox &m);
28 |
29 | #endif
30 |
--------------------------------------------------------------------------------
/utils.cpp:
--------------------------------------------------------------------------------
1 | #include "utils.h"
2 |
3 | #define PI 3.14159265358979323846
4 |
5 | void resize_image_max_len(const cv::Mat& image, cv::Mat& resized_image, float& ratio_h, float& ratio_w, int max_side_len){
6 | int height = image.rows;
7 | int width = image.cols;
8 | float ratio = 1;
9 | if(std::max(height, width) > max_side_len)
10 | ratio = height > width ? float(max_side_len)/height: float(max_side_len)/width;
11 | int resize_h = int(height * ratio);
12 | int resize_w = int(width * ratio);
13 | resize_h = resize_h%32 == 0? resize_h : (resize_h/32 - 1) * 32;
14 | resize_w = resize_w%32 == 0? resize_w : (resize_w/32 - 1) * 32;
15 | cv::resize(image, resized_image, cv::Size(resize_w, resize_h));
16 |
17 | ratio_h = float(resize_h)/height;
18 | ratio_w = float(resize_w)/width;
19 | }
20 |
21 | void resize_image_fix_height(const cv::Mat& image, cv::Mat& resized_image, float& ratio, int fixed_height){
22 | int height = image.rows;
23 | int width = image.cols;
24 | ratio = float(fixed_height)/height;
25 | int resize_h = fixed_height;
26 | int resize_w = int(width * ratio);
27 | cv::resize(image, resized_image, cv::Size(resize_w, resize_h));
28 | }
29 |
30 | void pad_image_width(const cv::Mat& image, cv::Mat& padded_image, int target_width){
31 | int height = image.rows;
32 | int width = image.cols;
33 | int borderType = cv::BORDER_CONSTANT;
34 | if(width > target_width)
35 | cv::resize(image, padded_image, cv::Size(target_width, height));
36 | else if(width < target_width){
37 | int pad_len = target_width - width;
38 | copyMakeBorder(image, padded_image, 0, 0, 0, pad_len, borderType, cv::Scalar(0,0,0));
39 | }else
40 | padded_image = image.clone();
41 | }
42 |
43 | tensorflow::Tensor cv_mat_to_tensor(const cv::Mat& image){
44 | int height = image.rows;
45 | int width = image.cols;
46 | int depth = 3;
47 | tensorflow::Tensor res_tensor(tensorflow::DT_UINT8, tensorflow::TensorShape({1, height, width, 3}));
48 |
49 | //we assume that the image is unsigned char dtype
50 | const unsigned char *source_data = (unsigned char*)(image.data);
51 |
52 | auto tensor_mapped = res_tensor.tensor();
53 | for (int y = 0; y < height; ++y) {
54 | for (int x = 0; x < width; ++x) {
55 | auto b = source_data[image.step * y + x * image.channels()];
56 | auto g = source_data[image.step * y + x * image.channels()+1];
57 | auto r = source_data[image.step * y + x * image.channels()+2];
58 | tensor_mapped(0, y, x, 0) = r;
59 | tensor_mapped(0, y, x, 1) = g;
60 | tensor_mapped(0, y, x, 2) = b;
61 | }
62 | }
63 | return res_tensor;
64 | }
65 |
66 | cv::Mat tensor_to_cv_mat(const tensorflow::Tensor tensor){
67 | auto tensor_data = tensor.flat();
68 | //assume it is a 4d tensor
69 | auto tensor_shape = tensor.shape();
70 | int height = tensor_shape.dim_size(1);
71 | int width = tensor_shape.dim_size(2);
72 | std::cout<<" height "< points = text_box.get_points();
91 | float offset_y = points[1].y - points[0].y;
92 | float offset_x = points[1].x - points[0].x;
93 | return atan2(offset_y, offset_x);
94 | }
95 |
96 | void get_cropped_extend_image(cv::Mat& image, TextBox& box, cv::Mat& cropped, std::vector& new_points){
97 | cv::Point p1, p2;
98 | box.get_rectangle_box(p1, p2);
99 | int height = p2.y - p1.y;
100 | int width = p1.y - p1.x;
101 |
102 | int extend_len = std::max(height, width);
103 | int minx = std::max(0, p1.x - extend_len);
104 | int miny = std::max(0, p1.y - extend_len);
105 | int maxx = std::min(image.cols, p2.x + extend_len);
106 | int maxy = std::min(image.rows, p2.y + extend_len);
107 |
108 | std::vector points = box.get_points();
109 | new_points.resize(points.size());
110 | for(int i=0; i& points,
126 | float angle, cv::Mat& rotated_image, std::vector& rotated_points){
127 | int height = cropped.rows, width = cropped.cols;
128 | cv::Point center(width/2, height/2);
129 | int min_side = std::min(height, width);
130 | auto M = cv::getRotationMatrix2D(center, angle * 180./PI, 1.0);
131 | cv::warpAffine(cropped, rotated_image, M, cv::Size(cropped.cols*2, cropped.rows*2));
132 |
133 | //rotate the images
134 | rotated_points.resize(points.size());
135 | for(int i=0; i& points){
169 | std::vector colors={cv::Scalar(0,0,255), cv::Scalar(0,255,0),
170 | cv::Scalar(255,0,0), cv::Scalar(255,255,0), cv::Scalar(0,255,255), cv::Scalar(255,0,255)};
171 | for(int j=0; j<4; j++){
172 | cv::line(image, points[j], points[(j+1)%4], colors[j%4], 3);
173 | }
174 | }
175 |
176 | void draw_text_box(cv::Mat& image, TextBox& text_box){
177 | //draw the polygon
178 | std::vector colors={cv::Scalar(0,0,255), cv::Scalar(0,255,0),
179 | cv::Scalar(255,0,0), cv::Scalar(255,255,0), cv::Scalar(0,255,255), cv::Scalar(255,0,255)};
180 | draw_polygon(image, text_box.get_points());
181 | //draw text above the left up corner
182 | cv::Point p1, p2;
183 | text_box.get_rectangle_box(p1, p2);
184 | cv::Point draw_loc(std::max(0, p1.x - 10), std::max(0, p1.y - 10));
185 | cv::putText(image, text_box.get_text(), draw_loc, cv::FONT_HERSHEY_PLAIN, 1.3, cv::Scalar(0,255,255));
186 | }
187 |
--------------------------------------------------------------------------------
/utils.h:
--------------------------------------------------------------------------------
1 | #ifndef Scene_Text_Utils_H
2 | #define Scene_Text_Utils_H
3 |
4 | #include "tensorflow/core/framework/tensor_shape.pb.h"
5 | #include "tensorflow/core/framework/tensor.h"
6 | #include
7 |
8 | //opencv
9 | #include
10 | #include "opencv2/opencv.hpp"
11 | #include "text_box.h"
12 | #include
13 |
14 | void resize_image_max_len(const cv::Mat& image, cv::Mat& resized_image, float& ratio_h, float& ratio_w, int max_side_len=800);
15 |
16 | void resize_image_fix_height(const cv::Mat& image, cv::Mat& resized_image, float& ratio, int fixed_height=32);
17 |
18 | void pad_image_width(const cv::Mat& image, cv::Mat& padded_image, int target_width=128);
19 |
20 | tensorflow::Tensor cv_mat_to_tensor(const cv::Mat& image);
21 |
22 | cv::Mat tensor_to_cv_mat(const tensorflow::Tensor tensor);
23 |
24 | float get_angle(TextBox& text_box);
25 |
26 | void get_cropped_extend_image(cv::Mat& image, TextBox& box, cv::Mat& cropped, std::vector& new_points);
27 |
28 | void rotate_image_and_points(cv::Mat& cropped, std::vector& points, float angle, cv::Mat& rotated_image, std::vector& rotated_points);
29 |
30 | cv::Point rotate_point(cv::Point& point, float angle, cv::Point& center);
31 |
32 | void draw_polygon(cv::Mat& image, std::vector& points);
33 |
34 | void draw_text_box(cv::Mat& image, TextBox& text_box);
35 | #endif
36 |
--------------------------------------------------------------------------------