├── paddleOCR ├── images │ └── test.bmp ├── text_angle_cls.cpp ├── weights │ ├── en_det_model.onnx │ ├── en_rec_model.onnx │ └── ch_ppocr_mobile_v2.0_cls_train.onnx ├── en_dict.txt ├── text_angle_cls.h ├── text_rec.h ├── text_det.h ├── main.cpp ├── text_rec.cpp └── text_det.cpp └── README.md /paddleOCR/images/test.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingHsun/PaddleOCR-cpp/HEAD/paddleOCR/images/test.bmp -------------------------------------------------------------------------------- /paddleOCR/text_angle_cls.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingHsun/PaddleOCR-cpp/HEAD/paddleOCR/text_angle_cls.cpp -------------------------------------------------------------------------------- /paddleOCR/weights/en_det_model.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingHsun/PaddleOCR-cpp/HEAD/paddleOCR/weights/en_det_model.onnx -------------------------------------------------------------------------------- /paddleOCR/weights/en_rec_model.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingHsun/PaddleOCR-cpp/HEAD/paddleOCR/weights/en_rec_model.onnx -------------------------------------------------------------------------------- /paddleOCR/weights/ch_ppocr_mobile_v2.0_cls_train.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingHsun/PaddleOCR-cpp/HEAD/paddleOCR/weights/ch_ppocr_mobile_v2.0_cls_train.onnx -------------------------------------------------------------------------------- /paddleOCR/en_dict.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 1 3 | 2 4 | 3 5 | 4 6 | 5 7 | 6 8 | 7 9 | 8 10 | 9 11 | : 12 | ; 13 | < 14 | = 15 | > 16 | ? 17 | @ 18 | A 19 | B 20 | C 21 | D 22 | E 23 | F 24 | G 25 | H 26 | I 27 | J 28 | K 29 | L 30 | M 31 | N 32 | O 33 | P 34 | Q 35 | R 36 | S 37 | T 38 | U 39 | V 40 | W 41 | X 42 | Y 43 | Z 44 | [ 45 | \ 46 | ] 47 | ^ 48 | _ 49 | ` 50 | a 51 | b 52 | c 53 | d 54 | e 55 | f 56 | g 57 | h 58 | i 59 | j 60 | k 61 | l 62 | m 63 | n 64 | o 65 | p 66 | q 67 | r 68 | s 69 | t 70 | u 71 | v 72 | w 73 | x 74 | y 75 | z 76 | { 77 | | 78 | } 79 | ~ 80 | ! 81 | " 82 | # 83 | $ 84 | % 85 | & 86 | ' 87 | ( 88 | ) 89 | * 90 | + 91 | , 92 | - 93 | . 94 | / 95 | 96 | -------------------------------------------------------------------------------- /paddleOCR/text_angle_cls.h: -------------------------------------------------------------------------------- 1 | #define _CRT_SECURE_NO_WARNINGS 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | //#include 8 | #include 9 | 10 | using namespace cv; 11 | using namespace std; 12 | using namespace Ort; 13 | 14 | class TextClassifier 15 | { 16 | public: 17 | TextClassifier(); 18 | int predict(Mat cv_image); 19 | private: 20 | const int label_list[2] = { 0, 180 }; 21 | 22 | Mat preprocess(Mat srcimg); 23 | void normalize_(Mat img); 24 | const int inpWidth = 192; 25 | const int inpHeight = 48; 26 | int num_out; 27 | vector input_image_; 28 | 29 | Env env = Env(ORT_LOGGING_LEVEL_ERROR, "Angle classify"); 30 | Ort::Session* ort_session = nullptr; 31 | SessionOptions sessionOptions = SessionOptions(); 32 | vector input_names; 33 | vector output_names; 34 | vector> input_node_dims; // >=1 outputs 35 | vector> output_node_dims; // >=1 outputs 36 | }; -------------------------------------------------------------------------------- /paddleOCR/text_rec.h: -------------------------------------------------------------------------------- 1 | #define _CRT_SECURE_NO_WARNINGS 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | //#include 8 | #include 9 | 10 | using namespace cv; 11 | using namespace std; 12 | using namespace Ort; 13 | 14 | class TextRecognizer 15 | { 16 | public: 17 | TextRecognizer(); 18 | string predict_text(Mat cv_image); 19 | 20 | private: 21 | Mat preprocess(Mat srcimg); 22 | void normalize_(Mat img); 23 | const int inpWidth = 560; 24 | const int inpHeight = 48; 25 | 26 | vector input_image_; 27 | vector alphabet; 28 | int names_len; 29 | vector preb_label; 30 | 31 | Env env = Env(ORT_LOGGING_LEVEL_ERROR, "CRNN"); 32 | Ort::Session* ort_session = nullptr; 33 | SessionOptions sessionOptions = SessionOptions(); 34 | vector input_names; 35 | vector output_names; 36 | vector> input_node_dims; // >=1 outputs 37 | vector> output_node_dims; // >=1 outputs 38 | }; -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PaddleOCR C++ 使用 opencv 和 onnxruntime 運行 2 | PaddleOCR on C++ using onnxruntime and opencv. 3 | 本專案使用C++實作開發，onnxruntime實現PaddleOCR，可運行x64和x86版本。 4 | 5 | 提供兩種OCR功能 6 | 1. 全圖識別(文字位置)與辨識(文字內容) 7 | 2. 選擇ROI範圍進行辨識 8 | ## C++ Packages 9 | * Microsoft.ML.OnnxRuntime.1.12.1 -> https://www.nuget.org/packages/Microsoft.ML.OnnxRuntime/1.12.1 10 | * Microsoft.ML.OnnxRuntime.Gpu.1.12.1 -> https://feed.nuget.org/packages/Microsoft.ML.OnnxRuntime.Gpu/1.12.1 11 | * opencv 4.5.0 12 | * OpencvX86 4.4.0 13 | 14 | 放置路徑README.md同目錄下 15 | 16 | ## 模型選擇 17 | PP-OCR系列模型列表 18 | 19 | https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.7/doc/doc_ch/models_list.md 20 | 21 | 下載後至 https://github.com/PaddlePaddle/Paddle2ONNX 執行model轉onnx並放置到下列路徑(放置路徑 ./weights/) 22 | 23 | 修改程式 24 | * text_det.cpp - string model_path = your_model.onnx 25 | * text_rec.cpp - string model_path = your_model.onnx 26 | * text_angle_cls.cpp - string model_path = your_model.onnx 27 | 28 | ## 文字辨識文本下載 29 | xxx.txt放置main.cpp同目錄下。 30 | 範例：中文識別模型ch_PP-OCRv4_rec 31 | ![tempsnip](https://github.com/DingHsun/PaddleOCR-cpp/assets/110473288/e2c09df1-882f-4458-9bff-f5cadcd01682) 32 | ![tempsnip1](https://github.com/DingHsun/PaddleOCR-cpp/assets/110473288/a81544da-11fe-4887-8e9c-9cb063826a1a) 33 | -------------------------------------------------------------------------------- /paddleOCR/text_det.h: -------------------------------------------------------------------------------- 1 | #define _CRT_SECURE_NO_WARNINGS 2 | #include 3 | #include 4 | #include 5 | //#include 6 | #include 7 | 8 | using namespace cv; 9 | using namespace std; 10 | using namespace Ort; 11 | 12 | class TextDetector 13 | { 14 | public: 15 | TextDetector(); 16 | vector< vector > detect(Mat& srcimg); 17 | void draw_pred(Mat& srcimg, vector< vector > results); 18 | Mat get_rotate_crop_image(const Mat& frame, vector vertices); 19 | private: 20 | float binaryThreshold; 21 | float polygonThreshold; 22 | float unclipRatio; 23 | int maxCandidates; 24 | const int longSideThresh = 3; 25 | const int short_size = 736; 26 | const float meanValues[3] = { 0.485, 0.456, 0.406 }; 27 | const float normValues[3] = { 0.229, 0.224, 0.225 }; 28 | float contourScore(const Mat& binary, const vector& contour); 29 | void unclip(const vector& inPoly, vector& outPoly); 30 | vector< vector > order_points_clockwise(vector< vector > results); 31 | Mat preprocess(Mat srcimg); 32 | vector input_image_; 33 | void normalize_(Mat img); 34 | 35 | Session* net; 36 | Env env = Env(ORT_LOGGING_LEVEL_ERROR, "DBNet"); 37 | SessionOptions sessionOptions = SessionOptions(); 38 | vector input_names; 39 | vector output_names; 40 | }; 41 | -------------------------------------------------------------------------------- /paddleOCR/main.cpp: -------------------------------------------------------------------------------- 1 | #define _CRT_SECURE_NO_WARNINGS 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | //#include 8 | #include 9 | #include "text_det.h" 10 | #include "text_angle_cls.h" 11 | #include "text_rec.h" 12 | 13 | using namespace cv; 14 | using namespace std; 15 | using namespace Ort; 16 | 17 | 18 | int main() 19 | { 20 | TextDetector detect_model; 21 | TextClassifier angle_model; 22 | TextRecognizer rec_model; 23 | 24 | string imgpath = "images/test.bmp"; 25 | Mat srcimg = imread(imgpath); 26 | 27 | // test hole image 28 | vector< vector > results = detect_model.detect(srcimg); 29 | for (size_t i = 0; i < results.size(); i++) 30 | { 31 | Mat textimg = detect_model.get_rotate_crop_image(srcimg, results[i]); 32 | if (angle_model.predict(textimg) == 1) 33 | { 34 | cv::rotate(textimg, textimg, 1); 35 | } 36 | string text = rec_model.predict_text(textimg); 37 | cout << text << endl; 38 | } 39 | detect_model.draw_pred(srcimg, results); 40 | imshow("PaddleOCR", srcimg); 41 | waitKey(0); 42 | destroyAllWindows(); 43 | 44 | 45 | // select ROI and test 46 | srcimg = imread(imgpath); 47 | cv::Rect img = cv::selectROI(srcimg); 48 | destroyAllWindows(); 49 | cv::Mat RegionROI = srcimg(cv::Range(img.y, img.y + img.height), cv::Range(img.x, img.x + img.width)); 50 | cv::Mat MaskImg = cv::Mat::zeros(srcimg.size(), srcimg.type()); 51 | Mat imageROI = MaskImg(Rect(img.x, img.y, RegionROI.cols, RegionROI.rows)); 52 | addWeighted(imageROI, 1, RegionROI, 1, 0, imageROI); 53 | vector< vector > resultss = detect_model.detect(MaskImg); 54 | for (size_t i = 0; i < resultss.size(); i++) 55 | { 56 | Mat textimg = detect_model.get_rotate_crop_image(MaskImg, resultss[i]); 57 | string text = rec_model.predict_text(textimg); 58 | cout << text << endl; 59 | } 60 | detect_model.draw_pred(srcimg, resultss); 61 | imshow("PaddleOCR", srcimg); 62 | waitKey(0); 63 | destroyAllWindows(); 64 | } -------------------------------------------------------------------------------- /paddleOCR/text_rec.cpp: -------------------------------------------------------------------------------- 1 | #include"text_rec.h" 2 | 3 | TextRecognizer::TextRecognizer() 4 | { 5 | string model_path = "weights/en_rec_model.onnx"; 6 | std::wstring widestr = std::wstring(model_path.begin(), model_path.end()); 7 | //OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0); 8 | sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC); 9 | ort_session = new Session(env, widestr.c_str(), sessionOptions); 10 | size_t numInputNodes = ort_session->GetInputCount(); 11 | size_t numOutputNodes = ort_session->GetOutputCount(); 12 | AllocatorWithDefaultOptions allocator; 13 | for (int i = 0; i < numInputNodes; i++) 14 | { 15 | input_names.push_back(ort_session->GetInputName(i, allocator)); 16 | Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i); 17 | auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo(); 18 | auto input_dims = input_tensor_info.GetShape(); 19 | input_node_dims.push_back(input_dims); 20 | } 21 | for (int i = 0; i < numOutputNodes; i++) 22 | { 23 | output_names.push_back(ort_session->GetOutputName(i, allocator)); 24 | Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i); 25 | auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo(); 26 | auto output_dims = output_tensor_info.GetShape(); 27 | output_node_dims.push_back(output_dims); 28 | } 29 | 30 | ifstream ifs("en_dict.txt"); 31 | string line; 32 | while (getline(ifs, line)) 33 | { 34 | this->alphabet.push_back(line); 35 | } 36 | this->alphabet.push_back(" "); 37 | names_len = this->alphabet.size(); 38 | } 39 | 40 | Mat TextRecognizer::preprocess(Mat srcimg) 41 | { 42 | Mat dstimg; 43 | int h = srcimg.rows; 44 | int w = srcimg.cols; 45 | const float ratio = w / float(h); 46 | int resized_w = int(ceil((float)this->inpHeight * ratio)); 47 | if (ceil(this->inpHeight * ratio) > this->inpWidth) 48 | { 49 | resized_w = this->inpWidth; 50 | } 51 | 52 | resize(srcimg, dstimg, Size(resized_w, this->inpHeight), INTER_LINEAR); 53 | return dstimg; 54 | } 55 | 56 | void TextRecognizer::normalize_(Mat img) 57 | { 58 | // img.convertTo(img, CV_32F); 59 | int row = img.rows; 60 | int col = img.cols; 61 | this->input_image_.resize(this->inpHeight * this->inpWidth * img.channels()); 62 | for (int c = 0; c < 3; c++) 63 | { 64 | for (int i = 0; i < row; i++) 65 | { 66 | for (int j = 0; j < inpWidth; j++) 67 | { 68 | if (j < col) 69 | { 70 | float pix = img.ptr(i)[j * 3 + c]; 71 | this->input_image_[c * row * inpWidth + i * inpWidth + j] = (pix / 255.0 - 0.5) / 0.5; 72 | } 73 | else 74 | { 75 | this->input_image_[c * row * inpWidth + i * inpWidth + j] = 0; 76 | } 77 | } 78 | } 79 | } 80 | } 81 | 82 | string TextRecognizer::predict_text(Mat cv_image) 83 | { 84 | Mat dstimg = this->preprocess(cv_image); 85 | this->normalize_(dstimg); 86 | 87 | array input_shape_{ 1, 3, this->inpHeight, this->inpWidth }; 88 | 89 | auto allocator_info = MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); 90 | Value input_tensor_ = Value::CreateTensor(allocator_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size()); 91 | 92 | // 开始推理 93 | vector ort_outputs = ort_session->Run(RunOptions{ nullptr }, &input_names[0], &input_tensor_, 1, output_names.data(), output_names.size()); // 开始推理 94 | 95 | const float* pdata = ort_outputs[0].GetTensorMutableData(); 96 | 97 | int i = 0, j = 0; 98 | int h = ort_outputs.at(0).GetTensorTypeAndShapeInfo().GetShape().at(2); 99 | int w = ort_outputs.at(0).GetTensorTypeAndShapeInfo().GetShape().at(1); 100 | 101 | preb_label.resize(w); 102 | for (i = 0; i < w; i++) 103 | { 104 | int one_label_idx = 0; 105 | float max_data = -10000; 106 | for (j = 0; j < h; j++) 107 | { 108 | float data_ = pdata[i * h + j]; 109 | if (data_ > max_data) 110 | { 111 | max_data = data_; 112 | one_label_idx = j; 113 | } 114 | } 115 | preb_label[i] = one_label_idx; 116 | } 117 | 118 | vector no_repeat_blank_label; 119 | for (size_t elementIndex = 0; elementIndex < w; ++elementIndex) 120 | { 121 | if (preb_label[elementIndex] != 0 && !(elementIndex > 0 && preb_label[elementIndex - 1] == preb_label[elementIndex])) 122 | { 123 | no_repeat_blank_label.push_back(preb_label[elementIndex] - 1); 124 | } 125 | } 126 | 127 | int len_s = no_repeat_blank_label.size(); 128 | string plate_text; 129 | for (i = 0; i < len_s; i++) 130 | { 131 | plate_text += alphabet[no_repeat_blank_label[i]]; 132 | } 133 | 134 | return plate_text; 135 | } 136 | -------------------------------------------------------------------------------- /paddleOCR/text_det.cpp: -------------------------------------------------------------------------------- 1 | #include"text_det.h" 2 | 3 | TextDetector::TextDetector() 4 | { 5 | this->binaryThreshold = 0.3; 6 | this->polygonThreshold = 0.5; 7 | this->unclipRatio = 1.6; 8 | this->maxCandidates = 1000; 9 | 10 | string model_path = "weights/en_det_model.onnx"; 11 | std::wstring widestr = std::wstring(model_path.begin(), model_path.end()); 12 | //OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0); ////gpu 13 | sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC); 14 | net = new Session(env, widestr.c_str(), sessionOptions); 15 | size_t numInputNodes = net->GetInputCount(); 16 | size_t numOutputNodes = net->GetOutputCount(); 17 | AllocatorWithDefaultOptions allocator; 18 | for (int i = 0; i < numInputNodes; i++) 19 | { 20 | input_names.push_back(net->GetInputName(i, allocator)); 21 | } 22 | for (int i = 0; i < numOutputNodes; i++) 23 | { 24 | output_names.push_back(net->GetOutputName(i, allocator)); 25 | } 26 | } 27 | 28 | Mat TextDetector::preprocess(Mat srcimg) 29 | { 30 | Mat dstimg; 31 | cvtColor(srcimg, dstimg, COLOR_BGR2RGB); 32 | int h = srcimg.rows; 33 | int w = srcimg.cols; 34 | float scale_h = 1; 35 | float scale_w = 1; 36 | if (h < w) 37 | { 38 | scale_h = (float)this->short_size / (float)h; 39 | float tar_w = (float)w * scale_h; 40 | tar_w = tar_w - (int)tar_w % 32; 41 | tar_w = max((float)32, tar_w); 42 | scale_w = tar_w / (float)w; 43 | } 44 | else 45 | { 46 | scale_w = (float)this->short_size / (float)w; 47 | float tar_h = (float)h * scale_w; 48 | tar_h = tar_h - (int)tar_h % 32; 49 | tar_h = max((float)32, tar_h); 50 | scale_h = tar_h / (float)h; 51 | } 52 | resize(dstimg, dstimg, Size(int(scale_w * dstimg.cols), int(scale_h * dstimg.rows)), INTER_LINEAR); 53 | return dstimg; 54 | } 55 | 56 | void TextDetector::normalize_(Mat img) 57 | { 58 | // img.convertTo(img, CV_32F); 59 | int row = img.rows; 60 | int col = img.cols; 61 | this->input_image_.resize(row * col * img.channels()); 62 | for (int c = 0; c < 3; c++) 63 | { 64 | for (int i = 0; i < row; i++) 65 | { 66 | for (int j = 0; j < col; j++) 67 | { 68 | float pix = img.ptr(i)[j * 3 + c]; 69 | this->input_image_[c * row * col + i * col + j] = (pix / 255.0 - this->meanValues[c]) / this->normValues[c]; 70 | } 71 | } 72 | } 73 | } 74 | 75 | vector< vector > TextDetector::detect(Mat& srcimg) 76 | { 77 | int h = srcimg.rows; 78 | int w = srcimg.cols; 79 | Mat dstimg = this->preprocess(srcimg); 80 | this->normalize_(dstimg); 81 | array input_shape_{ 1, 3, dstimg.rows, dstimg.cols }; 82 | 83 | auto allocator_info = MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); 84 | Value input_tensor_ = Value::CreateTensor(allocator_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size()); 85 | 86 | vector ort_outputs = net->Run(RunOptions{ nullptr }, &input_names[0], &input_tensor_, 1, output_names.data(), output_names.size()); 87 | const float* floatArray = ort_outputs[0].GetTensorMutableData(); 88 | int outputCount = 1; 89 | for (int i = 0; i < ort_outputs.at(0).GetTensorTypeAndShapeInfo().GetShape().size(); i++) 90 | { 91 | int dim = ort_outputs.at(0).GetTensorTypeAndShapeInfo().GetShape().at(i); 92 | outputCount *= dim; 93 | } 94 | 95 | Mat binary(dstimg.rows, dstimg.cols, CV_32FC1); 96 | memcpy(binary.data, floatArray, outputCount * sizeof(float)); 97 | 98 | // Threshold 99 | Mat bitmap; 100 | threshold(binary, bitmap, binaryThreshold, 255, THRESH_BINARY); 101 | // Scale ratio 102 | float scaleHeight = (float)(h) / (float)(binary.size[0]); 103 | float scaleWidth = (float)(w) / (float)(binary.size[1]); 104 | // Find contours 105 | vector< vector > contours; 106 | bitmap.convertTo(bitmap, CV_8UC1); 107 | findContours(bitmap, contours, RETR_LIST, CHAIN_APPROX_SIMPLE); 108 | 109 | // Candidate number limitation 110 | size_t numCandidate = min(contours.size(), (size_t)(maxCandidates > 0 ? maxCandidates : INT_MAX)); 111 | vector confidences; 112 | vector< vector > results; 113 | for (size_t i = 0; i < numCandidate; i++) 114 | { 115 | vector& contour = contours[i]; 116 | 117 | // Calculate text contour score 118 | if (contourScore(binary, contour) < polygonThreshold) 119 | continue; 120 | 121 | // Rescale 122 | vector contourScaled; contourScaled.reserve(contour.size()); 123 | for (size_t j = 0; j < contour.size(); j++) 124 | { 125 | contourScaled.push_back(Point(int(contour[j].x * scaleWidth), 126 | int(contour[j].y * scaleHeight))); 127 | } 128 | 129 | // Unclip 130 | RotatedRect box = minAreaRect(contourScaled); 131 | float longSide = std::max(box.size.width, box.size.height); 132 | if (longSide < longSideThresh) 133 | { 134 | continue; 135 | } 136 | 137 | // minArea() rect is not normalized, it may return rectangles with angle=-90 or height < width 138 | const float angle_threshold = 60; // do not expect vertical text, TODO detection algo property 139 | bool swap_size = false; 140 | if (box.size.width < box.size.height) // horizontal-wide text area is expected 141 | swap_size = true; 142 | else if (fabs(box.angle) >= angle_threshold) // don't work with vertical rectangles 143 | swap_size = true; 144 | if (swap_size) 145 | { 146 | swap(box.size.width, box.size.height); 147 | if (box.angle < 0) 148 | box.angle += 90; 149 | else if (box.angle > 0) 150 | box.angle -= 90; 151 | } 152 | 153 | Point2f vertex[4]; 154 | box.points(vertex); // order: bl, tl, tr, br 155 | vector approx; 156 | for (int j = 0; j < 4; j++) 157 | approx.emplace_back(vertex[j]); 158 | vector polygon; 159 | unclip(approx, polygon); 160 | 161 | box = minAreaRect(polygon); 162 | longSide = std::max(box.size.width, box.size.height); 163 | if (longSide < longSideThresh + 2) 164 | { 165 | continue; 166 | } 167 | 168 | results.push_back(polygon); 169 | } 170 | confidences = vector(contours.size(), 1.0f); 171 | return results; 172 | /*vector< vector > order_points = this->order_points_clockwise(results); 173 | return order_points;*/ 174 | } 175 | 176 | vector< vector > TextDetector::order_points_clockwise(vector< vector > results) 177 | { 178 | vector< vector > order_points(results); 179 | for (int i = 0; i < results.size(); i++) 180 | { 181 | float max_sum_pts = -10000; 182 | float min_sum_pts = 10000; 183 | float max_diff_pts = -10000; 184 | float min_diff_pts = 10000; 185 | 186 | int max_sum_pts_id = 0; 187 | int min_sum_pts_id = 0; 188 | int max_diff_pts_id = 0; 189 | int min_diff_pts_id = 0; 190 | for (int j = 0; j < 4; j++) 191 | { 192 | const float sum_pt = results[i][j].x + results[i][j].y; 193 | if (sum_pt > max_sum_pts) 194 | { 195 | max_sum_pts = sum_pt; 196 | max_sum_pts_id = j; 197 | } 198 | if (sum_pt < min_sum_pts) 199 | { 200 | min_sum_pts = sum_pt; 201 | min_sum_pts_id = j; 202 | } 203 | 204 | const float diff_pt = results[i][j].y - results[i][j].x; 205 | if (diff_pt > max_diff_pts) 206 | { 207 | max_diff_pts = diff_pt; 208 | max_diff_pts_id = j; 209 | } 210 | if (diff_pt < min_diff_pts) 211 | { 212 | min_diff_pts = diff_pt; 213 | min_diff_pts_id = j; 214 | } 215 | } 216 | order_points[i][0].x = results[i][min_sum_pts_id].x; 217 | order_points[i][0].y = results[i][min_sum_pts_id].y; 218 | order_points[i][2].x = results[i][max_sum_pts_id].x; 219 | order_points[i][2].y = results[i][max_sum_pts_id].y; 220 | 221 | order_points[i][1].x = results[i][min_diff_pts_id].x; 222 | order_points[i][1].y = results[i][min_diff_pts_id].y; 223 | order_points[i][3].x = results[i][max_diff_pts_id].x; 224 | order_points[i][3].y = results[i][max_diff_pts_id].y; 225 | } 226 | return order_points; 227 | } 228 | 229 | void TextDetector::draw_pred(Mat& srcimg, vector< vector > results) 230 | { 231 | for (int i = 0; i < results.size(); i++) 232 | { 233 | for (int j = 0; j < 4; j++) 234 | { 235 | circle(srcimg, Point((int)results[i][j].x, (int)results[i][j].y), 2, Scalar(0, 0, 255), -1); 236 | if (j < 3) 237 | { 238 | line(srcimg, Point((int)results[i][j].x, (int)results[i][j].y), Point((int)results[i][j + 1].x, (int)results[i][j + 1].y), Scalar(0, 255, 0)); 239 | } 240 | else 241 | { 242 | line(srcimg, Point((int)results[i][j].x, (int)results[i][j].y), Point((int)results[i][0].x, (int)results[i][0].y), Scalar(0, 255, 0)); 243 | } 244 | } 245 | } 246 | } 247 | 248 | float TextDetector::contourScore(const Mat& binary, const vector& contour) 249 | { 250 | Rect rect = boundingRect(contour); 251 | int xmin = max(rect.x, 0); 252 | int xmax = min(rect.x + rect.width, binary.cols - 1); 253 | int ymin = max(rect.y, 0); 254 | int ymax = min(rect.y + rect.height, binary.rows - 1); 255 | 256 | Mat binROI = binary(Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1)); 257 | 258 | Mat mask = Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8U); 259 | vector roiContour; 260 | for (size_t i = 0; i < contour.size(); i++) { 261 | Point pt = Point(contour[i].x - xmin, contour[i].y - ymin); 262 | roiContour.push_back(pt); 263 | } 264 | vector> roiContours = { roiContour }; 265 | fillPoly(mask, roiContours, Scalar(1)); 266 | float score = mean(binROI, mask).val[0]; 267 | return score; 268 | } 269 | 270 | void TextDetector::unclip(const vector& inPoly, vector& outPoly) 271 | { 272 | float area = contourArea(inPoly); 273 | float length = arcLength(inPoly, true); 274 | float distance = area * unclipRatio / length; 275 | 276 | size_t numPoints = inPoly.size(); 277 | vector> newLines; 278 | for (size_t i = 0; i < numPoints; i++) 279 | { 280 | vector newLine; 281 | Point pt1 = inPoly[i]; 282 | Point pt2 = inPoly[(i - 1) % numPoints]; 283 | Point vec = pt1 - pt2; 284 | float unclipDis = (float)(distance / norm(vec)); 285 | Point2f rotateVec = Point2f(vec.y * unclipDis, -vec.x * unclipDis); 286 | newLine.push_back(Point2f(pt1.x + rotateVec.x, pt1.y + rotateVec.y)); 287 | newLine.push_back(Point2f(pt2.x + rotateVec.x, pt2.y + rotateVec.y)); 288 | newLines.push_back(newLine); 289 | } 290 | 291 | size_t numLines = newLines.size(); 292 | for (size_t i = 0; i < numLines; i++) 293 | { 294 | Point2f a = newLines[i][0]; 295 | Point2f b = newLines[i][1]; 296 | Point2f c = newLines[(i + 1) % numLines][0]; 297 | Point2f d = newLines[(i + 1) % numLines][1]; 298 | Point2f pt; 299 | Point2f v1 = b - a; 300 | Point2f v2 = d - c; 301 | float cosAngle = (v1.x * v2.x + v1.y * v2.y) / (norm(v1) * norm(v2)); 302 | 303 | if (fabs(cosAngle) > 0.7) 304 | { 305 | pt.x = (b.x + c.x) * 0.5; 306 | pt.y = (b.y + c.y) * 0.5; 307 | } 308 | else 309 | { 310 | float denom = a.x * (float)(d.y - c.y) + b.x * (float)(c.y - d.y) + 311 | d.x * (float)(b.y - a.y) + c.x * (float)(a.y - b.y); 312 | float num = a.x * (float)(d.y - c.y) + c.x * (float)(a.y - d.y) + d.x * (float)(c.y - a.y); 313 | float s = num / denom; 314 | 315 | pt.x = a.x + s * (b.x - a.x); 316 | pt.y = a.y + s * (b.y - a.y); 317 | } 318 | outPoly.push_back(pt); 319 | } 320 | } 321 | 322 | Mat TextDetector::get_rotate_crop_image(const Mat& frame, vector vertices) 323 | { 324 | Rect rect = boundingRect(Mat(vertices)); 325 | if (rect.x < 0) rect.x = 0; 326 | if (rect.x + rect.width > frame.cols) rect.x = frame.cols - rect.width; 327 | if (rect.y < 0) rect.y = 0; 328 | if (rect.y + rect.height > frame.rows) rect.y = frame.rows - rect.height; 329 | Mat crop_img = frame(rect); 330 | 331 | const Size outputSize = Size(rect.width, rect.height); 332 | 333 | vector targetVertices{ Point2f(0, outputSize.height),Point2f(0, 0), Point2f(outputSize.width, 0), Point2f(outputSize.width, outputSize.height) }; 334 | //vector targetVertices{ Point2f(0, 0), Point2f(outputSize.width, 0), Point2f(outputSize.width, outputSize.height), Point2f(0, outputSize.height) }; 335 | 336 | for (int i = 0; i < 4; i++) 337 | { 338 | vertices[i].x -= rect.x; 339 | vertices[i].y -= rect.y; 340 | } 341 | 342 | Mat rotationMatrix = getPerspectiveTransform(vertices, targetVertices); 343 | Mat result; 344 | warpPerspective(crop_img, result, rotationMatrix, outputSize, cv::BORDER_REPLICATE); 345 | return result; 346 | } --------------------------------------------------------------------------------