├── README.md ├── cpp ├── CMakeLists.txt ├── images │ ├── chip.jpg │ ├── chip2.jpg │ ├── doc.png │ ├── doc2.jpg │ ├── doc3.jpg │ ├── doc4.jpg │ ├── doc5.jpg │ ├── real2.jpeg │ ├── real3.jpg │ ├── real4.jpg │ └── real5.jpg ├── inference.cpp ├── inference.h ├── main.cpp ├── predictor.cpp ├── predictor.h ├── utils.cpp └── utils.h └── python ├── images ├── chip.jpg ├── chip2.jpg ├── doc.png ├── doc2.jpg ├── doc3.jpg ├── doc4.jpg ├── doc5.jpg ├── real2.jpeg ├── real3.jpg ├── real4.jpg └── real5.jpg ├── inference.py ├── main.py ├── predictor.py └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | 起初我是在modelscope里看到的一个表格检测项目 2 | https://modelscope.cn/models/jockerK/TableExtractor 3 | 它是百度网盘AI大赛-表格检测的第2名方案,看起来很牛逼的,于是我就编写了 4 | 使用opencv-dnn推理引擎的c++和python的程序。 5 | 有兴趣的开发者还可以继续添加下游模块,例如表格文字识别,可以采用百度的PP-Structure 6 | 7 | 8 | onnx文件在百度云盘 9 | 链接: https://pan.baidu.com/s/1k61nXlFzkVg6C3HE_1zY2A 提取码: xpwq 10 | 11 | -------------------------------------------------------------------------------- /cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(test) 2 | set(CMAKE_CXX_STANDARD 17) 3 | 4 | add_executable(${PROJECT_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp 5 | ${CMAKE_CURRENT_SOURCE_DIR}/inference.cpp 6 | ${CMAKE_CURRENT_SOURCE_DIR}/predictor.cpp 7 | ${CMAKE_CURRENT_SOURCE_DIR}/utils.cpp) 8 | 9 | target_include_directories(${PROJECT_NAME} 10 | PUBLIC "/home/wangbo/libs/opencv-4.8.0/temp_install_dir/include/opencv4") 11 | 12 | target_link_libraries(${PROJECT_NAME} 13 | "/home/wangbo/libs/opencv-4.8.0/temp_install_dir/lib/libopencv_imgcodecs.so.4.8.0" 14 | "/home/wangbo/libs/opencv-4.8.0/temp_install_dir/lib/libopencv_highgui.so.4.8.0" 15 | "/home/wangbo/libs/opencv-4.8.0/temp_install_dir/lib/libopencv_imgproc.so.4.8.0" 16 | "/home/wangbo/libs/opencv-4.8.0/temp_install_dir/lib/libopencv_core.so.4.8.0" 17 | "/home/wangbo/libs/opencv-4.8.0/temp_install_dir/lib/libopencv_dnn.so.4.8.0") -------------------------------------------------------------------------------- /cpp/images/chip.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/chip.jpg -------------------------------------------------------------------------------- /cpp/images/chip2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/chip2.jpg -------------------------------------------------------------------------------- /cpp/images/doc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/doc.png -------------------------------------------------------------------------------- /cpp/images/doc2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/doc2.jpg -------------------------------------------------------------------------------- /cpp/images/doc3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/doc3.jpg -------------------------------------------------------------------------------- /cpp/images/doc4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/doc4.jpg -------------------------------------------------------------------------------- /cpp/images/doc5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/doc5.jpg -------------------------------------------------------------------------------- /cpp/images/real2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/real2.jpeg -------------------------------------------------------------------------------- /cpp/images/real3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/real3.jpg -------------------------------------------------------------------------------- /cpp/images/real4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/real4.jpg -------------------------------------------------------------------------------- /cpp/images/real5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/real5.jpg -------------------------------------------------------------------------------- /cpp/inference.cpp: -------------------------------------------------------------------------------- 1 | #include "inference.h" 2 | 3 | 4 | using namespace std; 5 | using namespace cv; 6 | 7 | 8 | TableDetector::TableDetector(const string obj_model_path, const string edge_model_path, const string cls_model_path) 9 | { 10 | this->obj_detector = std::make_shared(obj_model_path); 11 | this->segnet = std::make_shared(edge_model_path); 12 | this->pplcnet = std::make_shared(cls_model_path); 13 | } 14 | 15 | vector TableDetector::detect(const Mat& srcimg, const float det_accuracy) 16 | { 17 | Mat img; 18 | cvtColor(srcimg, img, COLOR_BGR2RGB); 19 | const int h = img.rows; 20 | const int w = img.cols; 21 | vector result; 22 | 23 | vector obj_det_res = this->obj_detector->infer(img, det_accuracy); 24 | 25 | for(int i=0;iget_box_points(obj_det_res[i], lt, rt, rb, lb); 30 | 31 | Bbox edge_ = this->pad_box_points(h, w, obj_det_res[i].xmax, obj_det_res[i].xmin, obj_det_res[i].ymax, obj_det_res[i].ymin, 10); 32 | Rect roi = Rect(edge_.xmin, edge_.ymin, edge_.xmax-edge_.xmin, edge_.ymax-edge_.ymin); 33 | Mat crop_img; 34 | img(roi).copyTo(crop_img); 35 | std::tuple seg_res = this->segnet->infer(crop_img); 36 | Mat edge_box = get<0>(seg_res); //// 4x2的矩阵 37 | if(edge_box.empty()) 38 | { 39 | continue; 40 | } 41 | 42 | lt = get<1>(seg_res); 43 | lb = get<2>(seg_res); 44 | rt = get<3>(seg_res); 45 | rb = get<4>(seg_res); 46 | this->adjust_edge_points_axis(edge_box, lb, lt, rb, rt, edge_.xmin, edge_.ymin); 47 | 48 | Bbox cls_ = this->pad_box_points(h, w, obj_det_res[i].xmax, obj_det_res[i].xmin, obj_det_res[i].ymax, obj_det_res[i].ymin, 5); 49 | roi = Rect(cls_.xmin, cls_.ymin, cls_.xmax-cls_.xmin, cls_.ymax-cls_.ymin); 50 | Mat cls_img; 51 | img(roi).copyTo(cls_img); 52 | 53 | this->add_pre_info_for_cls(cls_img, edge_box, cls_.xmin, cls_.ymin); 54 | const int pred_label = this->pplcnet->infer(cls_img); 55 | 56 | Bbox_Points box_points; 57 | this->get_real_rotated_points(lb, lt, pred_label, rb, rt, box_points.lb, box_points.lt, box_points.rb, box_points.rt); 58 | box_points.box = obj_det_res[i]; 59 | result.emplace_back(box_points); 60 | } 61 | return result; 62 | } 63 | 64 | void TableDetector::get_box_points(const Bbox& box, Point& lt, Point& rt, Point& rb, Point& lb) 65 | { 66 | lt = Point(box.xmin, box.ymin); 67 | rt = Point(box.xmax, box.ymin); 68 | rb = Point(box.xmax, box.ymax); 69 | lb = Point(box.xmin, box.ymax); 70 | } 71 | 72 | Bbox TableDetector::pad_box_points(const int h, const int w, const int xmax, const int xmin, const int ymax, const int ymin, const int pad) 73 | { 74 | Bbox edge; 75 | edge.xmin = max(xmin-pad, 0); 76 | edge.ymin = max(ymin-pad, 0); 77 | edge.xmax = min(xmax+pad, w); 78 | edge.ymax = min(ymax+pad, h); 79 | edge.score=1.f; ////忽律,没用的 80 | return edge; 81 | } 82 | 83 | void TableDetector::adjust_edge_points_axis(Mat& edge_box, Point& lb, Point& lt, Point& rb, Point& rt, const int xmin_edge, const int ymin_edge) 84 | { 85 | edge_box.col(0) += xmin_edge; 86 | edge_box.col(1) += ymin_edge; 87 | lt.x += xmin_edge; 88 | lt.y += ymin_edge; 89 | lb.x += xmin_edge; 90 | lb.y += ymin_edge; 91 | rt.x += xmin_edge; 92 | rt.y += ymin_edge; 93 | rb.x += xmin_edge; 94 | rb.y += ymin_edge; 95 | } 96 | 97 | void TableDetector::add_pre_info_for_cls(cv::Mat& cls_img, const cv::Mat& edge_box, const int xmin_cls, const int ymin_cls) 98 | { 99 | vector cls_box(edge_box.rows); 100 | for(int i=0;i(i)[0] - xmin_cls, edge_box.ptr(i)[1] - ymin_cls); 103 | } 104 | cv::polylines(cls_img, cls_box, true, Scalar(255, 0, 255), 5); 105 | } 106 | 107 | void TableDetector::get_real_rotated_points(const Point& lb, const Point& lt, const int pred_label, const Point& rb, const Point& rt, Point& lb1, Point& lt1, Point& rb1, Point& rt1) 108 | { 109 | if(pred_label == 0) 110 | { 111 | lt1 = lt; 112 | rt1 = rt; 113 | rb1 = rb; 114 | lb1 = lb; 115 | } 116 | else if(pred_label == 1) 117 | { 118 | lt1 = rt; 119 | rt1 = rb; 120 | rb1 = lb; 121 | lb1 = lt; 122 | } 123 | else if(pred_label == 2) 124 | { 125 | lt1 = rb; 126 | rt1 = lb; 127 | rb1 = lt; 128 | lb1 = rt; 129 | } 130 | else if(pred_label == 3) 131 | { 132 | lt1 = lb; 133 | rt1 = lt; 134 | rb1 = rt; 135 | lb1 = rb; 136 | } 137 | else 138 | { 139 | lt1 = lt; 140 | rt1 = rt; 141 | rb1 = rb; 142 | lb1 = lb; 143 | } 144 | } -------------------------------------------------------------------------------- /cpp/inference.h: -------------------------------------------------------------------------------- 1 | #ifndef INFERENCE_H 2 | #define INFERENCE_H 3 | #include "predictor.h" 4 | 5 | typedef struct 6 | { 7 | Bbox box; 8 | cv::Point lb; 9 | cv::Point lt; 10 | cv::Point rb; 11 | cv::Point rt; 12 | } Bbox_Points; 13 | 14 | class TableDetector 15 | { 16 | public: 17 | TableDetector(const std::string obj_model_path, const std::string edge_model_path, const std::string cls_model_path); 18 | std::vector detect(const cv::Mat& srcimg, const float det_accuracy=0.7); 19 | private: 20 | std::shared_ptr obj_detector{nullptr}; 21 | std::shared_ptr segnet{nullptr}; 22 | std::shared_ptr pplcnet{nullptr}; 23 | 24 | void get_box_points(const Bbox& box, cv::Point& lt, cv::Point& rt, cv::Point& rb, cv::Point& lb); 25 | Bbox pad_box_points(const int h, const int w, const int xmax, const int xmin, const int ymax, const int ymin, const int pad); 26 | void adjust_edge_points_axis(cv::Mat& edge_box, cv::Point& lb, cv::Point& lt, cv::Point& rb, cv::Point& rt, const int xmin_edge, const int ymin_edge); 27 | void add_pre_info_for_cls(cv::Mat& cls_img, const cv::Mat& edge_box, const int xmin_cls, const int ymin_cls); 28 | void get_real_rotated_points(const cv::Point& lb, const cv::Point& lt, const int pred_label, const cv::Point& rb, const cv::Point& rt, cv::Point& lb1, cv::Point& lt1, cv::Point& rb1, cv::Point& rt1); 29 | }; 30 | 31 | #endif -------------------------------------------------------------------------------- /cpp/main.cpp: -------------------------------------------------------------------------------- 1 | #include"inference.h" 2 | 3 | 4 | using namespace std; 5 | using namespace cv; 6 | 7 | 8 | int main() 9 | { 10 | const string imgpath = "/home/wangbo/project/my_table_det/images/doc5.jpg"; 11 | const string obj_model_path = "/home/wangbo/project/my_table_det/weights/yolo_obj_det.onnx"; 12 | const string edge_model_path = "/home/wangbo/project/my_table_det/weights/yolo_edge_det.onnx"; 13 | const string cls_model_path = "/home/wangbo/project/my_table_det/weights/paddle_cls.onnx"; 14 | 15 | TableDetector table_det(obj_model_path, edge_model_path, cls_model_path); 16 | Mat srcimg = imread(imgpath); 17 | std::vector result = table_det.detect(srcimg); 18 | 19 | ////输出可视化 20 | Mat draw_img = srcimg.clone(); 21 | for(int i=0;imodel = readNet(model_path); 12 | this->outlayer_names = this->model.getUnconnectedOutLayersNames(); 13 | } 14 | 15 | vector YoloDet::infer(const Mat& srcimg, const float score) 16 | { 17 | const int ori_h = srcimg.rows; 18 | const int ori_w = srcimg.cols; 19 | ////img_preprocess//// 20 | Mat img; 21 | int new_w, new_h, left, top; 22 | img = ResizePad(srcimg, this->resize_shape[0], new_w, new_h, left, top); 23 | img.convertTo(img, CV_32FC3, 1.0/255.0); 24 | Mat blob = blobFromImage(img); 25 | 26 | this->model.setInput(blob); 27 | std::vector outs; 28 | this->model.forward(outs, this->outlayer_names); 29 | 30 | ////img_postprocess//// 31 | const float x_factor = (float)ori_w / new_w; 32 | const float y_factor = (float)ori_h / new_h; 33 | vector boxes; 34 | vector scores; 35 | const int rows = outs[0].size[2]; 36 | for(int i=0;i(0, 4)[i]; 39 | if(max_score >= score) 40 | { 41 | float x = outs[0].ptr(0, 0)[i]; 42 | float y = outs[0].ptr(0, 1)[i]; 43 | float w = outs[0].ptr(0, 2)[i]; 44 | float h = outs[0].ptr(0, 3)[i]; 45 | int xmin = max(int((x - w / 2 - left) * x_factor), 0); 46 | int ymin = max(int((y - h / 2 - top) * y_factor), 0); 47 | boxes.emplace_back(Rect(xmin, ymin, int(w * x_factor), int(h * y_factor))); 48 | scores.emplace_back(max_score); 49 | } 50 | } 51 | 52 | vector indices; 53 | NMSBoxes(boxes, scores, score, 0.4, indices); 54 | const int num_keep = indices.size(); 55 | vector bboxes(num_keep); 56 | for(int i=0;imodel = readNet(model_path); 67 | this->outlayer_names = this->model.getUnconnectedOutLayersNames(); 68 | } 69 | 70 | std::tuple YoloSeg::infer(const Mat& srcimg) 71 | { 72 | const int destHeight = srcimg.rows; 73 | const int destWidth = srcimg.cols; 74 | ////img_preprocess//// 75 | Mat img; 76 | int resize_h, resize_w, left, top; 77 | img = ResizePad(srcimg, this->resize_shape[0], resize_w, resize_h, left, top); 78 | // img.convertTo(img, CV_32FC3, 1.0/255.0); ///也可以 79 | Mat blob = blobFromImage(img, 1.0/255.0); 80 | 81 | this->model.setInput(blob); 82 | std::vector predict_maps; 83 | this->model.forward(predict_maps, this->outlayer_names); 84 | 85 | Mat pred = this->img_postprocess(predict_maps); 86 | if(pred.empty()) 87 | { 88 | return std::make_tuple(Mat(), Point(), Point(), Point(), Point()); 89 | } 90 | Mat mask = pred > 0.8; 91 | mask.convertTo(mask, CV_8UC1); 92 | 93 | Mat box = get_max_adjacent_bbox(mask); 94 | if(!box.empty()) 95 | { 96 | this->adjust_coordinates(box, left, top, resize_w, resize_h, destWidth, destHeight); 97 | Point lt, lb, rt, rb; 98 | this->sort_and_clip_coordinates(box, lt, lb, rt, rb); 99 | return std::make_tuple(box, lt, lb, rt, rb); 100 | } 101 | else 102 | { 103 | return std::make_tuple(Mat(), Point(), Point(), Point(), Point()); 104 | } 105 | } 106 | 107 | void YoloSeg::adjust_coordinates(Mat& box, const int left, const int top, const int resize_w, const int resize_h, const int destWidth, const int destHeight) 108 | { 109 | for(int i=0;i(i)[0] - left) / resize_w * destWidth; 112 | float y = (box.ptr(i)[1] - top) / resize_h * destHeight; 113 | box.ptr(i)[0] = (int)std::min(std::max(x, 0.0f), (float)destWidth-1); 114 | box.ptr(i)[1] = (int)std::min(std::max(y, 0.0f), (float)destHeight-1); 115 | } 116 | } 117 | 118 | void YoloSeg::sort_and_clip_coordinates(const Mat& box, Point& lt, Point& lb, Point& rt, Point& rb) 119 | { 120 | vector x = box.col(0).reshape(1); 121 | vector l_idx = argsort_ascend(x); 122 | int l_box[2][2] = {{(int)box.ptr(l_idx[0])[0], (int)box.ptr(l_idx[0])[1]}, {(int)box.ptr(l_idx[1])[0], (int)box.ptr(l_idx[1])[1]}}; 123 | int r_box[2][2] = {{(int)box.ptr(l_idx[2])[0], (int)box.ptr(l_idx[2])[1]}, {(int)box.ptr(l_idx[3])[0], (int)box.ptr(l_idx[3])[1]}}; 124 | 125 | int l_idx_1[2] = {0, 1}; 126 | if(l_box[0][1] > l_box[1][1]) 127 | { 128 | l_idx_1[0] = 1; 129 | l_idx_1[1] = 0; 130 | } 131 | lt = Point(std::max(l_box[l_idx_1[0]][0], 0), std::max(l_box[l_idx_1[0]][1], 0)); 132 | lb = Point(std::max(l_box[l_idx_1[1]][0], 0), std::max(l_box[l_idx_1[1]][1], 0)); 133 | 134 | int r_idx_1[2] = {0, 1}; 135 | if(r_box[0][1] > r_box[1][1]) 136 | { 137 | r_idx_1[0] = 1; 138 | r_idx_1[1] = 0; 139 | } 140 | rt = Point(std::max(r_box[r_idx_1[0]][0], 0), std::max(r_box[r_idx_1[0]][1], 0)); 141 | rb = Point(std::max(r_box[r_idx_1[1]][0], 0), std::max(r_box[r_idx_1[1]][1], 0)); 142 | } 143 | 144 | Mat YoloSeg::img_postprocess(const vector& predict_maps) 145 | { 146 | Mat mask_output = predict_maps[1]; 147 | const int len = predict_maps[0].size[1]; 148 | const int num_proposals = predict_maps[0].size[2]; 149 | Mat predictions = predict_maps[0].reshape(0, len).t(); 150 | Mat scores = predictions.col(4); 151 | double max_class_socre;; 152 | Point classIdPoint; 153 | minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint); 154 | int highest_score_index = classIdPoint.y; 155 | 156 | Mat highest_score_prediction = predictions.row(highest_score_index); 157 | float x = highest_score_prediction.ptr(0)[0]; 158 | float y = highest_score_prediction.ptr(0)[1]; 159 | float w = highest_score_prediction.ptr(0)[2]; 160 | float h = highest_score_prediction.ptr(0)[3]; 161 | float highest_score = highest_score_prediction.ptr(0)[4]; 162 | if(highest_score < 0.7) 163 | { 164 | return Mat(); 165 | } 166 | Mat mask_predictions = highest_score_prediction.colRange(5, len); 167 | const int num_mask = mask_output.size[1]; 168 | const int mask_height = mask_output.size[2]; 169 | const int mask_width = mask_output.size[3]; 170 | const std::vector newshape = {num_mask, mask_height*mask_width}; 171 | Mat mask_output_reshaped = mask_output.reshape(0, newshape); ////不考虑batchsize 172 | Mat masks = mask_predictions * mask_output_reshaped; 173 | cv::exp(-masks, masks); 174 | masks = 1.f / (1 + masks); 175 | Mat mask = masks.reshape(0, mask_height); ////不考虑batchsize 176 | 177 | const int small_w = 200; 178 | const int small_h = 200; 179 | int small_x_min = max(0, int((x - w / 2) * small_w / 800.0)); 180 | int small_x_max = min(small_w, int((x + w / 2) * small_w / 800.0)); 181 | int small_y_min = max(0, int((y - h / 2) * small_h / 800.0)); 182 | int small_y_max = min(small_h, int((y + h / 2) * small_h / 800.0)); 183 | 184 | Mat filtered_mask = Mat::zeros(small_h, small_w, CV_32FC1); 185 | Rect crop_rect(small_x_min, small_y_min, small_x_max-small_x_min, small_y_max-small_y_min); 186 | mask(crop_rect).copyTo(filtered_mask(crop_rect)); 187 | Mat resized_mask; 188 | resize(filtered_mask, resized_mask, Size(800, 800), 0, 0, INTER_CUBIC); 189 | return resized_mask; 190 | } 191 | 192 | 193 | PPLCNet::PPLCNet(const string model_path) 194 | { 195 | this->model = readNet(model_path); 196 | this->outlayer_names = this->model.getUnconnectedOutLayersNames(); 197 | } 198 | 199 | int PPLCNet::infer(const Mat& srcimg) 200 | { 201 | ////img_preprocess//// 202 | Mat img; 203 | int new_w, new_h, left, top; 204 | img = ResizePad(srcimg, this->resize_shape[0], new_w, new_h, left, top); 205 | img.convertTo(img, CV_32FC3, 1.0/255.0); 206 | Mat blob = blobFromImage(img); 207 | 208 | this->model.setInput(blob); 209 | std::vector outs; 210 | this->model.forward(outs, this->outlayer_names); 211 | 212 | ////img_postprocess//// 213 | const int cols = outs[0].size[1]; 214 | float* pdata = (float*)outs[0].data; 215 | int maxPosition = std::max_element(pdata, pdata+cols) - pdata; 216 | return maxPosition; 217 | } -------------------------------------------------------------------------------- /cpp/predictor.h: -------------------------------------------------------------------------------- 1 | #ifndef PREDICTOR_H 2 | #define PREDICTOR_H 3 | #include "utils.h" 4 | #include 5 | 6 | 7 | class YoloDet 8 | { 9 | public: 10 | YoloDet(const std::string model_path); 11 | std::vector infer(const cv::Mat& srcimg, const float score=0.4f); 12 | private: 13 | const int resize_shape[2] = {928, 928}; 14 | std::vector outlayer_names; 15 | cv::dnn::Net model; 16 | }; 17 | 18 | class YoloSeg 19 | { 20 | public: 21 | YoloSeg(const std::string model_path); 22 | std::tuple infer(const cv::Mat& srcimg); 23 | private: 24 | const int resize_shape[2] = {800, 800}; 25 | cv::Mat img_postprocess(const std::vector& predict_maps); 26 | void adjust_coordinates(cv::Mat& box, const int left, const int top, const int resize_w, const int resize_h, const int destWidth, const int destHeight); 27 | void sort_and_clip_coordinates(const cv::Mat& box, cv::Point& lt, cv::Point& lb, cv::Point& rt, cv::Point& rb); 28 | std::vector outlayer_names; 29 | cv::dnn::Net model; 30 | }; 31 | 32 | class PPLCNet 33 | { 34 | public: 35 | PPLCNet(const std::string model_path); 36 | int infer(const cv::Mat& srcimg); 37 | private: 38 | const int resize_shape[2] = {624, 624}; 39 | std::vector outlayer_names; 40 | cv::dnn::Net model; 41 | }; 42 | 43 | #endif -------------------------------------------------------------------------------- /cpp/utils.cpp: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | using namespace std; 8 | using namespace cv; 9 | 10 | Mat sortMat(const Mat &stats, int colId) 11 | { 12 | //根据指定列以行为单位排序 13 | 14 | Mat sorted_index; 15 | cv::sortIdx(stats, sorted_index, cv::SORT_EVERY_COLUMN + cv::SORT_ASCENDING); 16 | // 降序是DESCENDING 升序是ASCENDING 17 | 18 | sorted_index = sorted_index.col(colId); 19 | Mat sorted_stats = stats.clone(); 20 | int row_num = sorted_index.rows; 21 | for(int i = 0; i < row_num; i++){ 22 | int _idx = sorted_index.at(i, 0); 23 | sorted_stats.row(i) = stats.row(_idx) + 0;//必须加0否则会出很难debug的错误 24 | } 25 | return sorted_stats; 26 | } 27 | 28 | 29 | std::tuple, float> get_mini_boxes(const vector& contour) 30 | { 31 | RotatedRect bounding_box = cv::minAreaRect(contour); 32 | cv::Mat rect; 33 | cv::boxPoints(bounding_box, rect); 34 | Mat points = sortMat(rect, 0); 35 | 36 | int index_1 = 0; 37 | int index_2 = 1; 38 | int index_3 = 2; 39 | int index_4 = 3; 40 | if(points.ptr(1)[1] > points.ptr(0)[1]) 41 | { 42 | index_1 = 0; 43 | index_4 = 1; 44 | } 45 | else 46 | { 47 | index_1 = 1; 48 | index_4 = 0; 49 | } 50 | if(points.ptr(3)[1] > points.ptr(2)[1]) 51 | { 52 | index_2 = 2; 53 | index_3 = 3; 54 | } 55 | else 56 | { 57 | index_2 = 3; 58 | index_3 = 2; 59 | } 60 | 61 | vector box = {Point2f(points.ptr(index_1)[0], points.ptr(index_1)[1]), 62 | Point2f(points.ptr(index_2)[0], points.ptr(index_2)[1]), 63 | Point2f(points.ptr(index_3)[0], points.ptr(index_3)[1]), 64 | Point2f(points.ptr(index_4)[0], points.ptr(index_4)[1])}; 65 | std::tuple, float> result = std::make_tuple(box, std::min(bounding_box.size.width, bounding_box.size.height)); 66 | return result; 67 | } 68 | 69 | Mat get_inv(const Mat& concat) { 70 | double a = concat.at(0, 0); 71 | double b = concat.at(0, 1); 72 | double c = concat.at(1, 0); 73 | double d = concat.at(1, 1); 74 | double det_concat = a * d - b * c; 75 | Mat inv_result = (Mat_(2, 2) << d / det_concat, -b / det_concat, -c / det_concat, a / det_concat); 76 | return inv_result; 77 | } 78 | 79 | vector> nchoosek(int startnum, int endnum, int step = 1, int n = 1) { 80 | vector> c; 81 | vector range; 82 | for (int i = startnum; i <= endnum; i += step) { 83 | range.push_back(i); 84 | } 85 | vector combination; 86 | function combine = [&](int offset, int k) { 87 | if (k == 0) { 88 | c.push_back(combination); 89 | return; 90 | } 91 | for (int i = offset; i <= range.size() - k; ++i) { 92 | combination.push_back(range[i]); 93 | combine(i + 1, k - 1); 94 | combination.pop_back(); 95 | } 96 | }; 97 | combine(0, n); 98 | return c; 99 | } 100 | 101 | vector minboundquad(const vector& hull) 102 | { 103 | int len_hull = hull.size(); 104 | vector xy(hull.begin(), hull.end()); 105 | vector idx(len_hull); 106 | iota(idx.begin(), idx.end(), 0); 107 | vector idx_roll(len_hull); 108 | rotate_copy(idx.begin(), idx.begin() + 1, idx.end(), idx_roll.begin()); 109 | vector> edges(len_hull, vector(2)); 110 | for (int i = 0; i < len_hull; ++i) { 111 | edges[i][0] = idx[i]; 112 | edges[i][1] = idx_roll[i]; 113 | } 114 | vector> edgeangles1; 115 | for (int i = 0; i < len_hull; ++i) { 116 | double y = xy[edges[i][1]].y - xy[edges[i][0]].y; 117 | double x = xy[edges[i][1]].x - xy[edges[i][0]].x; 118 | double angle = atan2(y, x); 119 | if (angle < 0) { 120 | angle += 2 * M_PI; 121 | } 122 | edgeangles1.emplace_back(angle, i); 123 | } 124 | sort(edgeangles1.begin(), edgeangles1.end()); 125 | vector> edges1; 126 | vector edgeangle1; 127 | for (const auto& item : edgeangles1) { 128 | edges1.push_back(edges[item.second]); 129 | edgeangle1.push_back(item.first); 130 | } 131 | vector edgeangles(edgeangle1.begin(), edgeangle1.end()); 132 | edges = edges1; 133 | double eps = 2.2204e-16; 134 | double angletol = eps * 100; 135 | vector k(edgeangles.size() - 1); 136 | adjacent_difference(edgeangles.begin(), edgeangles.end(), k.begin(), [&](double a, double b) { return (b - a) < angletol; }); 137 | vector idx_to_delete; 138 | for (int i = 0; i < k.size(); ++i) { 139 | if (k[i]) { 140 | idx_to_delete.push_back(i); 141 | } 142 | } 143 | for (int i = idx_to_delete.size() - 1; i >= 0; --i) { 144 | edges.erase(edges.begin() + idx_to_delete[i]); 145 | edgeangles.erase(edgeangles.begin() + idx_to_delete[i]); 146 | } 147 | int nedges = edges.size(); 148 | vector> edgelist = nchoosek(0, nedges - 1, 1, 4); 149 | vector k_idx; 150 | for (int i = 0; i < edgelist.size(); ++i) { 151 | if (edgeangles[edgelist[i][3]] - edgeangles[edgelist[i][0]] <= M_PI) { 152 | k_idx.push_back(i); 153 | } 154 | } 155 | for (int i = k_idx.size() - 1; i >= 0; --i) { 156 | edgelist.erase(edgelist.begin() + k_idx[i]); 157 | } 158 | int nquads = edgelist.size(); 159 | double quadareas = numeric_limits::infinity(); 160 | vector cnt(4); 161 | for (int i = 0; i < nquads; ++i) { 162 | vector edgeind = edgelist[i]; 163 | edgeind.push_back(edgelist[i][0]); 164 | vector> edgesi; 165 | vector edgeang; 166 | for (int idx : edgeind) { 167 | edgesi.push_back(edges[idx]); 168 | edgeang.push_back(edgeangles[idx]); 169 | } 170 | bool is_continue = false; 171 | for (int j = 0; j < edgeang.size() - 1; ++j) { 172 | if (edgeang[j + 1] - edgeang[j] > M_PI) { 173 | is_continue = true; 174 | break; 175 | } 176 | } 177 | if (is_continue) { 178 | continue; 179 | } 180 | vector qxi(4), qyi(4); 181 | for (int j = 0; j < 4; ++j) { 182 | int jplus1 = j + 1; 183 | vector shared; 184 | set_intersection(edgesi[j].begin(), edgesi[j].end(), edgesi[jplus1].begin(), edgesi[jplus1].end(), back_inserter(shared)); 185 | if (!shared.empty()) { 186 | qxi[j] = xy[shared[0]].x; 187 | qyi[j] = xy[shared[0]].y; 188 | } else { 189 | Point2f A = xy[edgesi[j][0]]; 190 | Point2f B = xy[edgesi[j][1]]; 191 | Point2f C = xy[edgesi[jplus1][0]]; 192 | Point2f D = xy[edgesi[jplus1][1]]; 193 | Mat concat = (Mat_(2, 2) << A.x - B.x, D.x - C.x, A.y - B.y, D.y - C.y); 194 | Mat div = (Mat_(2, 1) << A.x - C.x, A.y - C.y); 195 | Mat inv_result = get_inv(concat); 196 | double a = inv_result.at(0, 0); 197 | double b = inv_result.at(0, 1); 198 | double c = inv_result.at(1, 0); 199 | double d = inv_result.at(1, 1); 200 | double e = div.at(0, 0); 201 | double f = div.at(1, 0); 202 | vector ts1 = {a * e + b * f, c * e + d * f}; 203 | Point2f Q = A + (B - A) * ts1[0]; 204 | qxi[j] = Q.x; 205 | qyi[j] = Q.y; 206 | } 207 | } 208 | vector contour; 209 | for (int j = 0; j < 4; ++j) { 210 | contour.emplace_back(qxi[j], qyi[j]); 211 | } 212 | double A_i = contourArea(contour); 213 | if (A_i < quadareas) { 214 | quadareas = A_i; 215 | cnt = contour; 216 | } 217 | } 218 | return cnt; 219 | } 220 | 221 | 222 | Mat ResizePad(const Mat& img, const int target_size, int& new_w, int& new_h, int& left, int& top) 223 | { 224 | const int h = img.rows; 225 | const int w = img.cols; 226 | const int m = max(h, w); 227 | const float ratio = (float)target_size / (float)m; 228 | new_w = int(ratio * w); 229 | new_h = int(ratio * h); 230 | Mat dstimg; 231 | resize(img, dstimg, Size(new_w, new_h), 0, 0, INTER_LINEAR); 232 | top = (target_size - new_h) / 2; 233 | int bottom = (target_size - new_h) - top; 234 | left = (target_size - new_w) / 2; 235 | int right = (target_size - new_w) - left; 236 | copyMakeBorder(dstimg, dstimg, top, bottom, left, right, BORDER_CONSTANT, Scalar(114, 114, 114)); 237 | return dstimg; 238 | } 239 | 240 | Mat get_max_adjacent_bbox(const Mat& mask) 241 | { 242 | vector> contours; 243 | cv::findContours(mask, contours, RETR_TREE, CHAIN_APPROX_SIMPLE, Point(0, 0)); 244 | float max_size; 245 | vector cnt_save; 246 | for(int i=0;i, float> result = get_mini_boxes(contours[i]); 249 | //vector points = std::get<0>(result); ////没有用 250 | float sside = std::get<1>(result); 251 | if(sside > max_size) 252 | { 253 | max_size = sside; 254 | cnt_save = contours[i]; 255 | } 256 | } 257 | if(cnt_save.size() > 0) 258 | { 259 | float epsilon = 0.01 * cv::arcLength(cnt_save, true); 260 | vector box; 261 | cv::approxPolyDP(cnt_save, box, epsilon, true); 262 | vector hull; 263 | cv::convexHull(box, hull); 264 | std::tuple, float> result = get_mini_boxes(cnt_save); 265 | vector points = std::get<0>(result); 266 | const int len_hull = hull.size(); 267 | 268 | if(len_hull==4) 269 | { 270 | Mat tar_box = Mat(hull.size(), 2, CV_32FC1); 271 | for(int i=0;i(i)[0] = hull[i].x; 274 | tar_box.ptr(i)[1] = hull[i].y; 275 | } 276 | return tar_box; ////也可以返回vector这种格式的 277 | } 278 | else if(len_hull > 4) 279 | { 280 | vector target_box = minboundquad(hull); 281 | Mat tar_box = Mat(target_box.size(), 2, CV_32FC1); 282 | for(int i=0;i(i)[0] = target_box[i].x; 285 | tar_box.ptr(i)[1] = target_box[i].y; 286 | } 287 | return tar_box; 288 | } 289 | else 290 | { 291 | Mat tar_box = Mat(points.size(), 2, CV_32FC1); 292 | for(int i=0;i(i)[0] = points[i].x; 295 | tar_box.ptr(i)[1] = points[i].y; 296 | } 297 | return tar_box; 298 | } 299 | } 300 | else 301 | { 302 | return cv::Mat(); 303 | } 304 | } 305 | 306 | 307 | void visuallize(cv::Mat& img, const Bbox& box, const Point& lt, const Point& rt, const Point& rb, const Point& lb) 308 | { 309 | vector draw_box = {lt, rt, rb, lb}; 310 | circle(img, lt, 50, Scalar(255, 0, 0), 10); 311 | rectangle(img, Point(box.xmin, box.ymin), Point(box.xmax, box.ymax), Scalar(255, 0, 0), 10); 312 | cv::polylines(img, draw_box, true, Scalar(255, 0, 255), 6); 313 | } 314 | 315 | Mat extract_table_img(const Mat&img, const Point& lt, const Point& rt, const Point& rb, const Point& lb) 316 | { 317 | Point2f src_points[4] = {lt, rt, lb, rb}; 318 | const float width_a = sqrt(pow(rb.x - lb.x, 2) + pow(rb.y - lb.y, 2)); 319 | const float width_b = sqrt(pow(rt.x - lt.x, 2) + pow(rt.y - lt.y, 2)); 320 | const float max_width = max(width_a, width_b); 321 | 322 | const float height_a = sqrt(pow(rt.x - rb.x, 2) + pow(rt.y - rb.y, 2)); 323 | const float height_b = sqrt(pow(lt.x - lb.x, 2) + pow(lt.y - lb.y, 2)); 324 | const float max_height = max(height_a, height_b); 325 | 326 | Point2f dst_points[4] = {Point2f(0, 0), Point2f(max_width - 1, 0), Point2f(0, max_height - 1), Point(max_width - 1, max_height - 1)}; 327 | Mat M; 328 | M = cv::getPerspectiveTransform(src_points, dst_points); 329 | Mat warped ; 330 | cv::warpPerspective(img, warped, M, Size(max_width, max_height)); 331 | return warped; 332 | } -------------------------------------------------------------------------------- /cpp/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_H 2 | #define UTIL_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | typedef struct 9 | { 10 | int xmin; 11 | int ymin; 12 | int xmax; 13 | int ymax; 14 | float score; 15 | } Bbox; 16 | 17 | cv::Mat ResizePad(const cv::Mat& img, const int target_size, int& new_w, int& new_h, int& left, int& top); 18 | cv::Mat get_max_adjacent_bbox(const cv::Mat& mask); 19 | void visuallize(cv::Mat& img, const Bbox& box, const cv::Point& lt, const cv::Point& rt, const cv::Point& rb, const cv::Point& lb); 20 | cv::Mat extract_table_img(const cv::Mat&img, const cv::Point& lt, const cv::Point& rt, const cv::Point& rb, const cv::Point& lb); 21 | 22 | template std::vector argsort_descend(const std::vector& array) 23 | { 24 | const int array_len(array.size()); 25 | std::vector array_index(array_len, 0); 26 | std::iota(array_index.begin(), array_index.end(), 0); 27 | 28 | std::sort(array_index.begin(), array_index.end(), 29 | [&array](int pos1, int pos2) {return (array[pos1] > array[pos2]); }); 30 | 31 | return array_index; 32 | } 33 | 34 | template std::vector argsort_ascend(const std::vector& array) 35 | { 36 | const int array_len(array.size()); 37 | std::vector array_index(array_len, 0); 38 | std::iota(array_index.begin(), array_index.end(), 0); 39 | 40 | std::sort(array_index.begin(), array_index.end(), 41 | [&array](int pos1, int pos2) {return (array[pos1] < array[pos2]); }); 42 | 43 | return array_index; 44 | } 45 | 46 | 47 | #endif -------------------------------------------------------------------------------- /python/images/chip.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/chip.jpg -------------------------------------------------------------------------------- /python/images/chip2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/chip2.jpg -------------------------------------------------------------------------------- /python/images/doc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/doc.png -------------------------------------------------------------------------------- /python/images/doc2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/doc2.jpg -------------------------------------------------------------------------------- /python/images/doc3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/doc3.jpg -------------------------------------------------------------------------------- /python/images/doc4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/doc4.jpg -------------------------------------------------------------------------------- /python/images/doc5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/doc5.jpg -------------------------------------------------------------------------------- /python/images/real2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/real2.jpeg -------------------------------------------------------------------------------- /python/images/real3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/real3.jpg -------------------------------------------------------------------------------- /python/images/real4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/real4.jpg -------------------------------------------------------------------------------- /python/images/real5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/real5.jpg -------------------------------------------------------------------------------- /python/inference.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from predictor import YoloSeg, YoloDet, PPLCNet 4 | 5 | 6 | class TableDetector: 7 | def __init__(self, obj_model_path, edge_model_path, cls_model_path): 8 | self.obj_detector = YoloDet(obj_model_path) 9 | self.segnet = YoloSeg(edge_model_path) 10 | self.pplcnet = PPLCNet(cls_model_path) 11 | 12 | def detect(self, img, det_accuracy=0.7): 13 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 14 | img_mask = img.copy() 15 | h, w = img.shape[:-1] 16 | obj_det_res, pred_label = self.init_default_output(h, w) 17 | result = [] 18 | 19 | obj_det_res = self.obj_detector.infer(img, score=det_accuracy) 20 | 21 | for i in range(len(obj_det_res)): 22 | det_res = obj_det_res[i] 23 | score, box = det_res 24 | xmin, ymin, xmax, ymax = box 25 | edge_box = box.reshape([-1, 2]) 26 | lb, lt, rb, rt = self.get_box_points(box) 27 | 28 | xmin_edge, ymin_edge, xmax_edge, ymax_edge = self.pad_box_points(h, w, xmax, xmin, ymax, ymin, 10) 29 | crop_img = img_mask[ymin_edge:ymax_edge, xmin_edge:xmax_edge, :] 30 | edge_box, lt, lb, rt, rb = self.segnet.infer(crop_img) 31 | if edge_box is None: 32 | continue 33 | lb, lt, rb, rt = self.adjust_edge_points_axis(edge_box, lb, lt, rb, rt, xmin_edge, ymin_edge) 34 | 35 | xmin_cls, ymin_cls, xmax_cls, ymax_cls = self.pad_box_points( 36 | h, w, xmax, xmin, ymax, ymin, 5 37 | ) 38 | cls_img = img_mask[ymin_cls:ymax_cls, xmin_cls:xmax_cls, :] 39 | # 增加先验信息 40 | self.add_pre_info_for_cls(cls_img, edge_box, xmin_cls, ymin_cls) 41 | pred_label = self.pplcnet.infer(cls_img) 42 | 43 | lb1, lt1, rb1, rt1 = self.get_real_rotated_points(lb, lt, pred_label, rb, rt) 44 | result.append( 45 | { 46 | "box": [int(xmin), int(ymin), int(xmax), int(ymax)], 47 | "lb": [int(lb1[0]), int(lb1[1])], 48 | "lt": [int(lt1[0]), int(lt1[1])], 49 | "rt": [int(rt1[0]), int(rt1[1])], 50 | "rb": [int(rb1[0]), int(rb1[1])], 51 | } 52 | ) 53 | 54 | return result 55 | 56 | def init_default_output(self, h, w): 57 | img_box = np.array([0, 0, w, h]) 58 | # 初始化默认值 59 | obj_det_res, edge_box, pred_label = ( 60 | [[1.0, img_box]], 61 | img_box.reshape([-1, 2]), 62 | 0, 63 | ) 64 | return obj_det_res, pred_label 65 | 66 | def add_pre_info_for_cls(self, cls_img, edge_box, xmin_cls, ymin_cls): 67 | """ 68 | Args: 69 | cls_img: 70 | edge_box: 71 | xmin_cls: 72 | ymin_cls: 73 | 74 | Returns: 带边缘划线的图片,给方向分类提供先验信息 75 | 76 | """ 77 | cls_box = edge_box.copy() 78 | cls_box[:, 0] = cls_box[:, 0] - xmin_cls 79 | cls_box[:, 1] = cls_box[:, 1] - ymin_cls 80 | # 画框增加先验信息,辅助方向label识别 81 | cv2.polylines( 82 | cls_img, 83 | [np.array(cls_box).astype(np.int32).reshape((-1, 1, 2))], 84 | True, 85 | color=(255, 0, 255), 86 | thickness=5, 87 | ) 88 | 89 | def adjust_edge_points_axis(self, edge_box, lb, lt, rb, rt, xmin_edge, ymin_edge): 90 | edge_box[:, 0] += xmin_edge 91 | edge_box[:, 1] += ymin_edge 92 | lt, lb, rt, rb = ( 93 | lt + [xmin_edge, ymin_edge], 94 | lb + [xmin_edge, ymin_edge], 95 | rt + [xmin_edge, ymin_edge], 96 | rb + [xmin_edge, ymin_edge], 97 | ) 98 | return lb, lt, rb, rt 99 | 100 | def get_box_points(self, img_box): 101 | x1, y1, x2, y2 = img_box 102 | lt = np.array([x1, y1]) # 左上角 103 | rt = np.array([x2, y1]) # 右上角 104 | rb = np.array([x2, y2]) # 右下角 105 | lb = np.array([x1, y2]) # 左下角 106 | return lb, lt, rb, rt 107 | 108 | def get_real_rotated_points(self, lb, lt, pred_label, rb, rt): 109 | if pred_label == 0: 110 | lt1 = lt 111 | rt1 = rt 112 | rb1 = rb 113 | lb1 = lb 114 | elif pred_label == 1: 115 | lt1 = rt 116 | rt1 = rb 117 | rb1 = lb 118 | lb1 = lt 119 | elif pred_label == 2: 120 | lt1 = rb 121 | rt1 = lb 122 | rb1 = lt 123 | lb1 = rt 124 | elif pred_label == 3: 125 | lt1 = lb 126 | rt1 = lt 127 | rb1 = rt 128 | lb1 = rb 129 | else: 130 | lt1 = lt 131 | rt1 = rt 132 | rb1 = rb 133 | lb1 = lb 134 | return lb1, lt1, rb1, rt1 135 | 136 | def pad_box_points(self, h, w, xmax, xmin, ymax, ymin, pad): 137 | ymin_edge = max(ymin - pad, 0) 138 | xmin_edge = max(xmin - pad, 0) 139 | ymax_edge = min(ymax + pad, h) 140 | xmax_edge = min(xmax + pad, w) 141 | return xmin_edge, ymin_edge, xmax_edge, ymax_edge 142 | -------------------------------------------------------------------------------- /python/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | from inference import TableDetector 4 | from utils import visuallize, extract_table_img 5 | 6 | 7 | if __name__=='__main__': 8 | img_path = "images/chip2.jpg" 9 | table_det = TableDetector("weights/yolo_obj_det.onnx", "weights/yolo_edge_det.onnx", "weights/paddle_cls.onnx") 10 | 11 | srcimg = cv2.imread(img_path) 12 | result = table_det.detect(srcimg.copy()) 13 | 14 | # 输出可视化 15 | file_name_with_ext = os.path.basename(img_path) 16 | file_name, file_ext = os.path.splitext(file_name_with_ext) 17 | out_dir = "outputs" 18 | if not os.path.exists(out_dir): 19 | os.makedirs(out_dir) 20 | draw_img = srcimg.copy() 21 | for i, res in enumerate(result): 22 | box = res["box"] 23 | lt, rt, rb, lb = res["lt"], res["rt"], res["rb"], res["lb"] 24 | # 带识别框和左上角方向位置 25 | draw_img = visuallize(draw_img, box, lt, rt, rb, lb) 26 | # 透视变换提取表格图片 27 | wrapped_img = extract_table_img(srcimg, lt, rt, rb, lb) 28 | cv2.imwrite(f"{out_dir}/{file_name}-extract-{i}.jpg", wrapped_img) 29 | cv2.imwrite(f"{out_dir}/{file_name}-visualize.jpg", draw_img) -------------------------------------------------------------------------------- /python/predictor.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from utils import ( 4 | custom_NMSBoxes, 5 | ResizePad, 6 | sigmoid, 7 | get_max_adjacent_bbox, 8 | ) 9 | 10 | 11 | class YoloDet: 12 | def __init__(self, model_path): 13 | self.model = cv2.dnn.readNet(model_path) 14 | self.resize_shape = [928, 928] 15 | self.outlayer_names = self.model.getUnconnectedOutLayersNames() 16 | 17 | def infer(self, img, **kwargs): 18 | score = kwargs.get("score", 0.4) 19 | ori_h, ori_w = img.shape[:-1] 20 | img, new_w, new_h, left, top = self.img_preprocess(img, self.resize_shape) 21 | self.model.setInput(img) 22 | pre = self.model.forward(self.outlayer_names) 23 | result = self.img_postprocess(pre, ori_w / new_w, ori_h / new_h, left, top, score) 24 | return result 25 | 26 | def img_preprocess(self, img, resize_shape=[928, 928]): 27 | im, new_w, new_h, left, top = ResizePad(img, resize_shape[0]) 28 | im = im / 255.0 29 | im = im.transpose((2, 0, 1)).copy() 30 | im = im[None, :].astype("float32") 31 | return im, new_w, new_h, left, top 32 | 33 | def img_postprocess(self, predict_maps, x_factor, y_factor, left, top, score): 34 | result = [] 35 | # 转置和压缩输出以匹配预期的形状 36 | outputs = np.transpose(np.squeeze(predict_maps[0])) 37 | # 获取输出数组的行数 38 | rows = outputs.shape[0] 39 | # 用于存储检测的边界框、得分和类别ID的列表 40 | boxes = [] 41 | scores = [] 42 | # 遍历输出数组的每一行 43 | for i in range(rows): 44 | # 找到类别得分中的最大得分 45 | max_score = outputs[i][4] 46 | # 如果最大得分高于置信度阈值 47 | if max_score >= score: 48 | # 从当前行提取边界框坐标 49 | x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3] 50 | # 计算边界框的缩放坐标 51 | xmin = max(int((x - w / 2 - left) * x_factor), 0) 52 | ymin = max(int((y - h / 2 - top) * y_factor), 0) 53 | xmax = xmin + int(w * x_factor) 54 | ymax = ymin + int(h * y_factor) 55 | # 将类别ID、得分和框坐标添加到各自的列表中 56 | boxes.append([xmin, ymin, xmax, ymax]) 57 | scores.append(max_score) 58 | # 应用非最大抑制过滤重叠的边界框 59 | indices = custom_NMSBoxes(boxes, scores) 60 | for i in indices: 61 | result.append([scores[i], np.array(boxes[i])]) 62 | return result 63 | 64 | 65 | class YoloSeg: 66 | def __init__(self, model_path): 67 | self.model = cv2.dnn.readNet(model_path) 68 | self.resize_shape = [800, 800] 69 | self.outlayer_names = self.model.getUnconnectedOutLayersNames() 70 | 71 | def infer(self, img, **kwargs): 72 | destHeight, destWidth = img.shape[:-1] 73 | img, resize_h, resize_w, left, top = self.img_preprocess(img, self.resize_shape) 74 | self.model.setInput(img) 75 | predict_maps = self.model.forward(self.outlayer_names) 76 | pred = self.img_postprocess(predict_maps) 77 | if pred is None: 78 | return None, None, None, None, None 79 | segmentation = pred > 0.8 80 | mask = np.array(segmentation).astype(np.uint8) 81 | # 找到最佳边缘box shape(4, 2) 82 | box = get_max_adjacent_bbox(mask) 83 | # todo 注意还有crop的偏移 84 | if box is not None: 85 | # 根据缩放调整坐标适配输入的img大小 86 | adjusted_box = self.adjust_coordinates(box, left, top, resize_w, resize_h, destWidth, destHeight) ### 87 | # 排序并裁剪负值 88 | lt, lb, rt, rb = self.sort_and_clip_coordinates(adjusted_box) 89 | return box, lt, lb, rt, rb 90 | else: 91 | return None, None, None, None, None 92 | 93 | def img_postprocess(self, predict_maps): 94 | box_output = predict_maps[0] 95 | mask_output = predict_maps[1] 96 | predictions = np.squeeze(box_output).T 97 | # Filter out object confidence scores below threshold 98 | scores = predictions[:, 4] 99 | # 获取得分最高的索引 100 | highest_score_index = scores.argmax() 101 | # 获取得分最高的预测结果 102 | highest_score_prediction = predictions[highest_score_index] 103 | x, y, w, h = highest_score_prediction[:4] 104 | highest_score = highest_score_prediction[4] 105 | if highest_score < 0.7: 106 | return None 107 | mask_predictions = highest_score_prediction[5:] 108 | mask_predictions = np.expand_dims(mask_predictions, axis=0) 109 | mask_output = np.squeeze(mask_output) 110 | # Calculate the mask maps for each box 111 | num_mask, mask_height, mask_width = mask_output.shape # CHW 112 | masks = sigmoid(mask_predictions @ mask_output.reshape((num_mask, -1))) 113 | masks = masks.reshape((-1, mask_height, mask_width)) 114 | # 提取第一个通道 115 | mask = masks[0] 116 | 117 | # 计算缩小后的区域边界 118 | small_w = 200 119 | small_h = 200 120 | small_x_min = max(0, int((x - w / 2) * small_w / 800)) 121 | small_x_max = min(small_w, int((x + w / 2) * small_w / 800)) 122 | small_y_min = max(0, int((y - h / 2) * small_h / 800)) 123 | small_y_max = min(small_h, int((y + h / 2) * small_h / 800)) 124 | 125 | # 创建一个全零的掩码 126 | filtered_mask = np.zeros((small_h, small_w), dtype=np.float32) 127 | 128 | # 将区域内的值复制到过滤后的掩码中 129 | filtered_mask[small_y_min:small_y_max, small_x_min:small_x_max] = mask[small_y_min:small_y_max, small_x_min:small_x_max] 130 | 131 | # 使用 OpenCV 进行放大,保持边缘清晰 132 | resized_mask = cv2.resize(filtered_mask, (800, 800), interpolation=cv2.INTER_CUBIC) 133 | return resized_mask 134 | 135 | def adjust_coordinates( 136 | self, box, left, top, resize_w, resize_h, destWidth, destHeight 137 | ): 138 | """ 139 | 调整边界框坐标,确保它们在合理范围内。 140 | 141 | 参数: 142 | box (numpy.ndarray): 原始边界框坐标 (shape: (4, 2)) 143 | left (int): 左侧偏移量 144 | top (int): 顶部偏移量 145 | resize_w (int): 缩放宽度 146 | resize_h (int): 缩放高度 147 | destWidth (int): 目标宽度 148 | destHeight (int): 目标高度 149 | 150 | 返回: 151 | numpy.ndarray: 调整后的边界框坐标 152 | """ 153 | # 调整横坐标 154 | box[:, 0] = np.clip( 155 | (np.round(box[:, 0] - left) / resize_w * destWidth), 0, destWidth 156 | ) 157 | 158 | # 调整纵坐标 159 | box[:, 1] = np.clip( 160 | (np.round(box[:, 1] - top) / resize_h * destHeight), 0, destHeight 161 | ) 162 | return box 163 | 164 | def sort_and_clip_coordinates(self, box): 165 | """ 166 | 对边界框坐标进行排序并裁剪负值。 167 | 168 | 参数: 169 | box (numpy.ndarray): 边界框坐标 (shape: (4, 2)) 170 | 171 | 返回: 172 | tuple: 左上角、左下角、右上角、右下角坐标 173 | """ 174 | # 按横坐标排序 175 | x = box[:, 0] 176 | l_idx = x.argsort() 177 | l_box = np.array([box[l_idx[0]], box[l_idx[1]]]) 178 | r_box = np.array([box[l_idx[2]], box[l_idx[3]]]) 179 | 180 | # 左侧坐标按纵坐标排序 181 | l_idx_1 = np.array(l_box[:, 1]).argsort() 182 | lt = l_box[l_idx_1[0]] 183 | lb = l_box[l_idx_1[1]] 184 | 185 | # 右侧坐标按纵坐标排序 186 | r_idx_1 = np.array(r_box[:, 1]).argsort() 187 | rt = r_box[r_idx_1[0]] 188 | rb = r_box[r_idx_1[1]] 189 | 190 | # 裁剪负值 191 | lt[lt < 0] = 0 192 | lb[lb < 0] = 0 193 | rt[rt < 0] = 0 194 | rb[rb < 0] = 0 195 | 196 | return lt, lb, rt, rb 197 | 198 | def img_preprocess(self, img, resize_shape=[800, 800]): 199 | im, new_w, new_h, left, top = ResizePad(img, resize_shape[0]) 200 | im = im / 255.0 201 | im = im.transpose((2, 0, 1)).copy() 202 | im = im[None, :].astype("float32") 203 | return im, new_h, new_w, left, top 204 | 205 | 206 | class PPLCNet: 207 | def __init__(self, model_path): 208 | self.model = cv2.dnn.readNet(model_path) 209 | self.resize_shape = [624, 624] 210 | self.outlayer_names = self.model.getUnconnectedOutLayersNames() 211 | 212 | def infer(self, img, **kwargs): 213 | img = self.img_preprocess(img, self.resize_shape) 214 | self.model.setInput(img) 215 | label= self.model.forward(self.outlayer_names)[0] 216 | label = label[None, :] 217 | mini_batch_result = np.argsort(label) 218 | mini_batch_result = mini_batch_result[0][-1] # 把这些列标拿出来 219 | mini_batch_result = mini_batch_result.flatten() # 拉平了,只吐出一个 array 220 | mini_batch_result = mini_batch_result[::-1] # 逆序 221 | pred_label = mini_batch_result[0] 222 | return pred_label 223 | 224 | def img_preprocess(self, img, resize_shape=[624, 624]): 225 | im, new_w, new_h, left, top = ResizePad(img, resize_shape[0]) 226 | im = np.array(im).transpose((2, 0, 1)) / 255.0 227 | return im[None, :].astype("float32") 228 | -------------------------------------------------------------------------------- /python/utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | import itertools 3 | import cv2 4 | import numpy as np 5 | 6 | 7 | def generate_scale(im, resize_shape, keep_ratio): 8 | """ 9 | Args: 10 | im (np.ndarray): image (np.ndarray) 11 | Returns: 12 | im_scale_x: the resize ratio of X 13 | im_scale_y: the resize ratio of Y 14 | """ 15 | target_size = (resize_shape[0], resize_shape[1]) 16 | # target_size = (800, 1333) 17 | origin_shape = im.shape[:2] 18 | 19 | if keep_ratio: 20 | im_size_min = np.min(origin_shape) 21 | im_size_max = np.max(origin_shape) 22 | target_size_min = np.min(target_size) 23 | target_size_max = np.max(target_size) 24 | im_scale = float(target_size_min) / float(im_size_min) 25 | if np.round(im_scale * im_size_max) > target_size_max: 26 | im_scale = float(target_size_max) / float(im_size_max) 27 | im_scale_x = im_scale 28 | im_scale_y = im_scale 29 | else: 30 | resize_h, resize_w = target_size 31 | im_scale_y = resize_h / float(origin_shape[0]) 32 | im_scale_x = resize_w / float(origin_shape[1]) 33 | return im_scale_y, im_scale_x 34 | 35 | 36 | def resize(im, im_info, resize_shape, keep_ratio, interp=2): 37 | im_scale_y, im_scale_x = generate_scale(im, resize_shape, keep_ratio) 38 | im = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=interp) 39 | im_info["im_shape"] = np.array(im.shape[:2]).astype("float32") 40 | im_info["scale_factor"] = np.array([im_scale_y, im_scale_x]).astype("float32") 41 | 42 | return im, im_info 43 | 44 | 45 | def pad(im, im_info, resize_shape): 46 | im_h, im_w = im.shape[:2] 47 | fill_value = [114.0, 114.0, 114.0] 48 | h, w = resize_shape[0], resize_shape[1] 49 | if h == im_h and w == im_w: 50 | im = im.astype(np.float32) 51 | return im, im_info 52 | 53 | canvas = np.ones((h, w, 3), dtype=np.float32) 54 | canvas *= np.array(fill_value, dtype=np.float32) 55 | canvas[0:im_h, 0:im_w, :] = im.astype(np.float32) 56 | im = canvas 57 | return im, im_info 58 | 59 | 60 | def ResizePad(img, target_size): 61 | h, w = img.shape[:2] 62 | m = max(h, w) 63 | ratio = target_size / m 64 | new_w, new_h = int(ratio * w), int(ratio * h) 65 | img = cv2.resize(img, (new_w, new_h), cv2.INTER_LINEAR) 66 | top = (target_size - new_h) // 2 67 | bottom = (target_size - new_h) - top 68 | left = (target_size - new_w) // 2 69 | right = (target_size - new_w) - left 70 | img1 = cv2.copyMakeBorder( 71 | img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114) 72 | ) 73 | return img1, new_w, new_h, left, top 74 | 75 | 76 | def get_mini_boxes(contour): 77 | bounding_box = cv2.minAreaRect(contour) 78 | points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) 79 | 80 | index_1, index_2, index_3, index_4 = 0, 1, 2, 3 81 | if points[1][1] > points[0][1]: 82 | index_1 = 0 83 | index_4 = 1 84 | else: 85 | index_1 = 1 86 | index_4 = 0 87 | if points[3][1] > points[2][1]: 88 | index_2 = 2 89 | index_3 = 3 90 | else: 91 | index_2 = 3 92 | index_3 = 2 93 | 94 | box = [points[index_1], points[index_2], points[index_3], points[index_4]] 95 | return box, min(bounding_box[1]) 96 | 97 | 98 | def minboundquad(hull): 99 | len_hull = len(hull) 100 | xy = np.array(hull).reshape([-1, 2]) 101 | idx = np.arange(0, len_hull) 102 | idx_roll = np.roll(idx, -1, axis=0) 103 | edges = np.array([idx, idx_roll]).reshape([2, -1]) 104 | edges = np.transpose(edges, [1, 0]) 105 | edgeangles1 = [] 106 | for i in range(len_hull): 107 | y = xy[edges[i, 1], 1] - xy[edges[i, 0], 1] 108 | x = xy[edges[i, 1], 0] - xy[edges[i, 0], 0] 109 | angle = math.atan2(y, x) 110 | if angle < 0: 111 | angle = angle + 2 * math.pi 112 | edgeangles1.append([angle, i]) 113 | edgeangles1_idx = sorted(list(edgeangles1), key=lambda x: x[0]) 114 | edges1 = [] 115 | edgeangle1 = [] 116 | for item in edgeangles1_idx: 117 | idx = item[1] 118 | edges1.append(edges[idx, :]) 119 | edgeangle1.append(item[0]) 120 | edgeangles = np.array(edgeangle1) 121 | edges = np.array(edges1) 122 | eps = 2.2204e-16 123 | angletol = eps * 100 124 | 125 | k = np.diff(edgeangles) < angletol 126 | idx = np.where(k == 1) 127 | edges = np.delete(edges, idx, 0) 128 | edgeangles = np.delete(edgeangles, idx, 0) 129 | nedges = edges.shape[0] 130 | edgelist = np.array(nchoosek(0, nedges - 1, 1, 4)) 131 | k = edgeangles[edgelist[:, 3]] - edgeangles[edgelist[:, 0]] <= math.pi 132 | k_idx = np.where(k == 1) 133 | edgelist = np.delete(edgelist, k_idx, 0) 134 | 135 | nquads = edgelist.shape[0] 136 | quadareas = math.inf 137 | qxi = np.zeros([5]) 138 | qyi = np.zeros([5]) 139 | cnt = np.zeros([4, 1, 2]) 140 | edgelist = list(edgelist) 141 | edges = list(edges) 142 | xy = list(xy) 143 | 144 | for i in range(nquads): 145 | edgeind = list(edgelist[i]) 146 | edgeind.append(edgelist[i][0]) 147 | edgesi = [] 148 | edgeang = [] 149 | for idx in edgeind: 150 | edgesi.append(edges[idx]) 151 | edgeang.append(edgeangles[idx]) 152 | is_continue = False 153 | for idx in range(len(edgeang) - 1): 154 | diff = edgeang[idx + 1] - edgeang[idx] 155 | if diff > math.pi: 156 | is_continue = True 157 | if is_continue: 158 | continue 159 | for j in range(4): 160 | jplus1 = j + 1 161 | shared = np.intersect1d(edgesi[j], edgesi[jplus1]) 162 | if shared.size != 0: 163 | qxi[j] = xy[shared[0]][0] 164 | qyi[j] = xy[shared[0]][1] 165 | else: 166 | A = xy[edgesi[j][0]] 167 | B = xy[edgesi[j][1]] 168 | C = xy[edgesi[jplus1][0]] 169 | D = xy[edgesi[jplus1][1]] 170 | concat = np.hstack(((A - B).reshape([2, -1]), (D - C).reshape([2, -1]))) 171 | div = (A - C).reshape([2, -1]) 172 | inv_result = get_inv(concat) 173 | a = inv_result[0, 0] 174 | b = inv_result[0, 1] 175 | c = inv_result[1, 0] 176 | d = inv_result[1, 1] 177 | e = div[0, 0] 178 | f = div[1, 0] 179 | ts1 = [a * e + b * f, c * e + d * f] 180 | Q = A + (B - A) * ts1[0] 181 | qxi[j] = Q[0] 182 | qyi[j] = Q[1] 183 | 184 | contour = np.array([qxi[:4], qyi[:4]]).astype(np.int32) 185 | contour = np.transpose(contour, [1, 0]) 186 | contour = contour[:, np.newaxis, :] 187 | A_i = cv2.contourArea(contour) 188 | # break 189 | 190 | if A_i < quadareas: 191 | quadareas = A_i 192 | cnt = contour 193 | return cnt 194 | 195 | 196 | def nchoosek(startnum, endnum, step=1, n=1): 197 | c = [] 198 | for i in itertools.combinations(range(startnum, endnum + 1, step), n): 199 | c.append(list(i)) 200 | return c 201 | 202 | 203 | def get_inv(concat): 204 | a = concat[0][0] 205 | b = concat[0][1] 206 | c = concat[1][0] 207 | d = concat[1][1] 208 | det_concat = a * d - b * c 209 | inv_result = np.array( 210 | [[d / det_concat, -b / det_concat], [-c / det_concat, a / det_concat]] 211 | ) 212 | return inv_result 213 | 214 | 215 | def get_max_adjacent_bbox(mask): 216 | contours, _ = cv2.findContours((mask * 255).astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 217 | max_size = 0 218 | cnt_save = None 219 | # 找到最大边缘邻接矩形 220 | for cont in contours: 221 | points, sside = get_mini_boxes(cont) 222 | if sside > max_size: 223 | max_size = sside 224 | cnt_save = cont 225 | if cnt_save is not None: 226 | epsilon = 0.01 * cv2.arcLength(cnt_save, True) 227 | box = cv2.approxPolyDP(cnt_save, epsilon, True) 228 | hull = cv2.convexHull(box) 229 | points, sside = get_mini_boxes(cnt_save) 230 | len_hull = len(hull) 231 | 232 | if len_hull == 4: 233 | target_box = np.array(hull) 234 | elif len_hull > 4: 235 | target_box = minboundquad(hull) ####用shapely库里面的polygon.convex_hull也能做到 236 | else: 237 | target_box = np.array(points) 238 | 239 | return np.array(target_box).reshape([-1, 2]) 240 | else: 241 | return None 242 | 243 | 244 | def sigmoid(x): 245 | return 1 / (1 + np.exp(-x)) 246 | 247 | 248 | def calculate_iou(box, other_boxes): 249 | """ 250 | 计算给定边界框与一组其他边界框之间的交并比(IoU)。 251 | 252 | 参数: 253 | - box: 单个边界框,格式为 [x1, y1, width, height]。 254 | - other_boxes: 其他边界框的数组,每个边界框的格式也为 [x1, y1, width, height]。 255 | 256 | 返回值: 257 | - iou: 一个数组,包含给定边界框与每个其他边界框的IoU值。 258 | """ 259 | 260 | # 计算交集的左上角坐标 261 | x1 = np.maximum(box[0], np.array(other_boxes)[:, 0]) 262 | y1 = np.maximum(box[1], np.array(other_boxes)[:, 1]) 263 | # 计算交集的右下角坐标 264 | x2 = np.minimum(box[2], np.array(other_boxes)[:, 2]) 265 | y2 = np.minimum(box[3], np.array(other_boxes)[:, 3]) 266 | # 计算交集区域的面积 267 | intersection_area = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1) 268 | # 计算给定边界框的面积 269 | box_area = (box[2] - box[0]) * (box[3] - box[1]) 270 | # 计算其他边界框的面积 271 | other_boxes_area = np.array(other_boxes[:, 2] - other_boxes[:, 0]) * np.array( 272 | other_boxes[:, 3] - other_boxes[:, 1] 273 | ) 274 | # 计算IoU值 275 | iou = intersection_area / (box_area + other_boxes_area - intersection_area) 276 | return iou 277 | 278 | 279 | def custom_NMSBoxes(boxes, scores, iou_threshold=0.4): 280 | # 如果没有边界框,则直接返回空列表 281 | if len(boxes) == 0: 282 | return [] 283 | # 将得分和边界框转换为NumPy数组 284 | scores = np.array(scores) 285 | boxes = np.array(boxes) 286 | # 根据置信度阈值过滤边界框 287 | # filtered_boxes = boxes[mask] 288 | # filtered_scores = scores[mask] 289 | # 如果过滤后没有边界框,则返回空列表 290 | if len(boxes) == 0: 291 | return [] 292 | # 根据置信度得分对边界框进行排序 293 | sorted_indices = np.argsort(scores)[::-1] 294 | # 初始化一个空列表来存储选择的边界框索引 295 | indices = [] 296 | # 当还有未处理的边界框时,循环继续 297 | while len(sorted_indices) > 0: 298 | # 选择得分最高的边界框索引 299 | current_index = sorted_indices[0] 300 | indices.append(current_index) 301 | # 如果只剩一个边界框,则结束循环 302 | if len(sorted_indices) == 1: 303 | break 304 | # 获取当前边界框和其他边界框 305 | current_box = boxes[current_index] 306 | other_boxes = boxes[sorted_indices[1:]] 307 | # 计算当前边界框与其他边界框的IoU 308 | iou = calculate_iou(current_box, other_boxes) 309 | # 找到IoU低于阈值的边界框,即与当前边界框不重叠的边界框 310 | non_overlapping_indices = np.where(iou <= iou_threshold)[0] 311 | # 更新sorted_indices以仅包含不重叠的边界框 312 | sorted_indices = sorted_indices[non_overlapping_indices + 1] 313 | # 返回选择的边界框索引 314 | return indices 315 | 316 | 317 | def visuallize(img, box, lt, rt, rb, lb): 318 | xmin, ymin, xmax, ymax = box 319 | draw_box = np.array([lt, rt, rb, lb]).reshape([-1, 2]) 320 | cv2.circle(img, (int(lt[0]), int(lt[1])), 50, (255, 0, 0), 10) 321 | cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 0, 0), 10) 322 | cv2.polylines( 323 | img, 324 | [np.array(draw_box).astype(np.int32).reshape((-1, 1, 2))], 325 | True, 326 | color=(255, 0, 255), 327 | thickness=6, 328 | ) 329 | return img 330 | 331 | 332 | def extract_table_img(img, lt, rt, rb, lb): 333 | """ 334 | 根据四个角点进行透视变换,并提取出角点区域的图片。 335 | 336 | 参数: 337 | img (numpy.ndarray): 输入图像 338 | lt (numpy.ndarray): 左上角坐标 339 | rt (numpy.ndarray): 右上角坐标 340 | lb (numpy.ndarray): 左下角坐标 341 | rb (numpy.ndarray): 右下角坐标 342 | 343 | 返回: 344 | numpy.ndarray: 提取出的角点区域图片 345 | """ 346 | # 源点坐标 347 | src_points = np.float32([lt, rt, lb, rb]) 348 | 349 | # 目标点坐标 350 | width_a = np.sqrt(((rb[0] - lb[0]) ** 2) + ((rb[1] - lb[1]) ** 2)) 351 | width_b = np.sqrt(((rt[0] - lt[0]) ** 2) + ((rt[1] - lt[1]) ** 2)) 352 | max_width = max(int(width_a), int(width_b)) 353 | 354 | height_a = np.sqrt(((rt[0] - rb[0]) ** 2) + ((rt[1] - rb[1]) ** 2)) 355 | height_b = np.sqrt(((lt[0] - lb[0]) ** 2) + ((lt[1] - lb[1]) ** 2)) 356 | max_height = max(int(height_a), int(height_b)) 357 | 358 | dst_points = np.float32( 359 | [ 360 | [0, 0], 361 | [max_width - 1, 0], 362 | [0, max_height - 1], 363 | [max_width - 1, max_height - 1], 364 | ] 365 | ) 366 | 367 | # 获取透视变换矩阵 368 | M = cv2.getPerspectiveTransform(src_points, dst_points) 369 | 370 | # 应用透视变换 371 | warped = cv2.warpPerspective(img, M, (max_width, max_height)) 372 | 373 | return warped 374 | --------------------------------------------------------------------------------