├── README.md
├── cpp
    ├── CMakeLists.txt
    ├── images
    │   ├── chip.jpg
    │   ├── chip2.jpg
    │   ├── doc.png
    │   ├── doc2.jpg
    │   ├── doc3.jpg
    │   ├── doc4.jpg
    │   ├── doc5.jpg
    │   ├── real2.jpeg
    │   ├── real3.jpg
    │   ├── real4.jpg
    │   └── real5.jpg
    ├── inference.cpp
    ├── inference.h
    ├── main.cpp
    ├── predictor.cpp
    ├── predictor.h
    ├── utils.cpp
    └── utils.h
└── python
    ├── images
        ├── chip.jpg
        ├── chip2.jpg
        ├── doc.png
        ├── doc2.jpg
        ├── doc3.jpg
        ├── doc4.jpg
        ├── doc5.jpg
        ├── real2.jpeg
        ├── real3.jpg
        ├── real4.jpg
        └── real5.jpg
    ├── inference.py
    ├── main.py
    ├── predictor.py
    └── utils.py


/README.md:
--------------------------------------------------------------------------------
 1 | 起初我是在modelscope里看到的一个表格检测项目
 2 | https://modelscope.cn/models/jockerK/TableExtractor
 3 | 它是百度网盘AI大赛-表格检测的第2名方案，看起来很牛逼的，于是我就编写了
 4 | 使用opencv-dnn推理引擎的c++和python的程序。
 5 | 有兴趣的开发者还可以继续添加下游模块，例如表格文字识别，可以采用百度的PP-Structure
 6 | 
 7 | 
 8 | onnx文件在百度云盘
 9 | 链接: https://pan.baidu.com/s/1k61nXlFzkVg6C3HE_1zY2A 提取码: xpwq
10 | 
11 | 


--------------------------------------------------------------------------------
/cpp/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(test)
 2 | set(CMAKE_CXX_STANDARD 17)
 3 | 
 4 | add_executable(${PROJECT_NAME}  ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
 5 |                                 ${CMAKE_CURRENT_SOURCE_DIR}/inference.cpp
 6 |                                 ${CMAKE_CURRENT_SOURCE_DIR}/predictor.cpp
 7 |                                 ${CMAKE_CURRENT_SOURCE_DIR}/utils.cpp)
 8 | 
 9 | target_include_directories(${PROJECT_NAME}
10 |         PUBLIC "/home/wangbo/libs/opencv-4.8.0/temp_install_dir/include/opencv4")
11 | 
12 | target_link_libraries(${PROJECT_NAME}
13 |         "/home/wangbo/libs/opencv-4.8.0/temp_install_dir/lib/libopencv_imgcodecs.so.4.8.0"
14 |         "/home/wangbo/libs/opencv-4.8.0/temp_install_dir/lib/libopencv_highgui.so.4.8.0"
15 |         "/home/wangbo/libs/opencv-4.8.0/temp_install_dir/lib/libopencv_imgproc.so.4.8.0"
16 |         "/home/wangbo/libs/opencv-4.8.0/temp_install_dir/lib/libopencv_core.so.4.8.0"
17 |         "/home/wangbo/libs/opencv-4.8.0/temp_install_dir/lib/libopencv_dnn.so.4.8.0")


--------------------------------------------------------------------------------
/cpp/images/chip.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/chip.jpg


--------------------------------------------------------------------------------
/cpp/images/chip2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/chip2.jpg


--------------------------------------------------------------------------------
/cpp/images/doc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/doc.png


--------------------------------------------------------------------------------
/cpp/images/doc2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/doc2.jpg


--------------------------------------------------------------------------------
/cpp/images/doc3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/doc3.jpg


--------------------------------------------------------------------------------
/cpp/images/doc4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/doc4.jpg


--------------------------------------------------------------------------------
/cpp/images/doc5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/doc5.jpg


--------------------------------------------------------------------------------
/cpp/images/real2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/real2.jpeg


--------------------------------------------------------------------------------
/cpp/images/real3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/real3.jpg


--------------------------------------------------------------------------------
/cpp/images/real4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/real4.jpg


--------------------------------------------------------------------------------
/cpp/images/real5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/cpp/images/real5.jpg


--------------------------------------------------------------------------------
/cpp/inference.cpp:
--------------------------------------------------------------------------------
  1 | #include "inference.h"
  2 | 
  3 | 
  4 | using namespace std;
  5 | using namespace cv;
  6 | 
  7 | 
  8 | TableDetector::TableDetector(const string obj_model_path, const string edge_model_path, const string cls_model_path)
  9 | {
 10 |     this->obj_detector = std::make_shared<YoloDet>(obj_model_path);
 11 |     this->segnet = std::make_shared<YoloSeg>(edge_model_path);
 12 |     this->pplcnet = std::make_shared<PPLCNet>(cls_model_path);
 13 | }
 14 | 
 15 | vector<Bbox_Points> TableDetector::detect(const Mat& srcimg, const float det_accuracy)
 16 | {
 17 |     Mat img;
 18 |     cvtColor(srcimg, img, COLOR_BGR2RGB);
 19 |     const int h = img.rows;
 20 |     const int w = img.cols;
 21 |     vector<Bbox_Points> result;
 22 |     
 23 |     vector<Bbox> obj_det_res = this->obj_detector->infer(img, det_accuracy);
 24 | 
 25 |     for(int i=0;i<obj_det_res.size();i++)
 26 |     {
 27 | 
 28 |         Point lb, lt, rb, rt;
 29 |         this->get_box_points(obj_det_res[i], lt, rt, rb, lb);
 30 | 
 31 |         Bbox edge_ = this->pad_box_points(h, w, obj_det_res[i].xmax, obj_det_res[i].xmin, obj_det_res[i].ymax, obj_det_res[i].ymin, 10);
 32 |         Rect roi = Rect(edge_.xmin, edge_.ymin, edge_.xmax-edge_.xmin, edge_.ymax-edge_.ymin);
 33 |         Mat crop_img;
 34 |         img(roi).copyTo(crop_img);
 35 |         std::tuple<Mat, Point, Point, Point, Point> seg_res = this->segnet->infer(crop_img);
 36 |         Mat edge_box = get<0>(seg_res);   //// 4x2的矩阵
 37 |         if(edge_box.empty())
 38 |         {
 39 |             continue;
 40 |         }
 41 |         
 42 |         lt = get<1>(seg_res);
 43 |         lb = get<2>(seg_res);
 44 |         rt = get<3>(seg_res);
 45 |         rb = get<4>(seg_res);
 46 |         this->adjust_edge_points_axis(edge_box, lb, lt, rb, rt, edge_.xmin, edge_.ymin);
 47 | 
 48 |         Bbox cls_ = this->pad_box_points(h, w, obj_det_res[i].xmax, obj_det_res[i].xmin, obj_det_res[i].ymax, obj_det_res[i].ymin, 5);
 49 |         roi = Rect(cls_.xmin, cls_.ymin, cls_.xmax-cls_.xmin, cls_.ymax-cls_.ymin);
 50 |         Mat cls_img;
 51 |         img(roi).copyTo(cls_img);
 52 | 
 53 |         this->add_pre_info_for_cls(cls_img, edge_box, cls_.xmin, cls_.ymin);
 54 |         const int pred_label = this->pplcnet->infer(cls_img);
 55 | 
 56 |         Bbox_Points box_points;
 57 |         this->get_real_rotated_points(lb, lt, pred_label, rb, rt, box_points.lb, box_points.lt, box_points.rb, box_points.rt);
 58 |         box_points.box = obj_det_res[i];
 59 |         result.emplace_back(box_points);
 60 |     }
 61 |     return result;
 62 | }
 63 | 
 64 | void TableDetector::get_box_points(const Bbox& box, Point& lt, Point& rt, Point& rb, Point& lb)
 65 | {
 66 |     lt = Point(box.xmin, box.ymin);
 67 |     rt = Point(box.xmax, box.ymin);
 68 |     rb = Point(box.xmax, box.ymax);
 69 |     lb = Point(box.xmin, box.ymax);
 70 | }
 71 | 
 72 | Bbox TableDetector::pad_box_points(const int h, const int w, const int xmax, const int xmin, const int ymax, const int ymin, const int pad)
 73 | {
 74 |     Bbox edge;
 75 |     edge.xmin = max(xmin-pad, 0);
 76 |     edge.ymin = max(ymin-pad, 0);
 77 |     edge.xmax = min(xmax+pad, w);
 78 |     edge.ymax = min(ymax+pad, h);
 79 |     edge.score=1.f;  ////忽律，没用的
 80 |     return edge;
 81 | }
 82 | 
 83 | void TableDetector::adjust_edge_points_axis(Mat& edge_box, Point& lb, Point& lt, Point& rb, Point& rt, const int xmin_edge, const int ymin_edge)
 84 | {
 85 |     edge_box.col(0) += xmin_edge;
 86 |     edge_box.col(1) += ymin_edge;
 87 |     lt.x += xmin_edge;
 88 |     lt.y += ymin_edge;
 89 |     lb.x += xmin_edge;
 90 |     lb.y += ymin_edge;
 91 |     rt.x += xmin_edge;
 92 |     rt.y += ymin_edge;
 93 |     rb.x += xmin_edge;
 94 |     rb.y += ymin_edge;
 95 | }
 96 | 
 97 | void TableDetector::add_pre_info_for_cls(cv::Mat& cls_img, const cv::Mat& edge_box, const int xmin_cls, const int ymin_cls)
 98 | {
 99 |     vector<Point> cls_box(edge_box.rows);
100 |     for(int i=0;i<edge_box.rows;i++)
101 |     {
102 |         cls_box[i] = Point(edge_box.ptr<float>(i)[0] - xmin_cls, edge_box.ptr<float>(i)[1] - ymin_cls);
103 |     }
104 |     cv::polylines(cls_img, cls_box, true, Scalar(255, 0, 255), 5);
105 | }
106 | 
107 | void TableDetector::get_real_rotated_points(const Point& lb, const Point& lt, const int pred_label, const Point& rb, const Point& rt, Point& lb1, Point& lt1, Point& rb1, Point& rt1)
108 | {
109 |     if(pred_label == 0)
110 |     {
111 |         lt1 = lt;
112 |         rt1 = rt;
113 |         rb1 = rb;
114 |         lb1 = lb;
115 |     }
116 |     else if(pred_label == 1)
117 |     {
118 |         lt1 = rt;
119 |         rt1 = rb;
120 |         rb1 = lb;
121 |         lb1 = lt;
122 |     }
123 |     else if(pred_label == 2)
124 |     {
125 |         lt1 = rb;
126 |         rt1 = lb;
127 |         rb1 = lt;
128 |         lb1 = rt;
129 |     }
130 |     else if(pred_label == 3)
131 |     {
132 |         lt1 = lb;
133 |         rt1 = lt;
134 |         rb1 = rt;
135 |         lb1 = rb;
136 |     }
137 |     else
138 |     {
139 |         lt1 = lt;
140 |         rt1 = rt;
141 |         rb1 = rb;
142 |         lb1 = lb;
143 |     }
144 | }


--------------------------------------------------------------------------------
/cpp/inference.h:
--------------------------------------------------------------------------------
 1 | #ifndef INFERENCE_H
 2 | #define INFERENCE_H
 3 | #include "predictor.h"
 4 | 
 5 | typedef struct
 6 | {
 7 |     Bbox box;
 8 |     cv::Point lb;
 9 |     cv::Point lt;
10 |     cv::Point rb;
11 |     cv::Point rt;
12 | } Bbox_Points;
13 | 
14 | class TableDetector
15 | {
16 | public:
17 |     TableDetector(const std::string obj_model_path, const std::string edge_model_path, const std::string cls_model_path);
18 |     std::vector<Bbox_Points> detect(const cv::Mat& srcimg, const float det_accuracy=0.7);
19 | private:
20 |     std::shared_ptr<YoloDet> obj_detector{nullptr};
21 |     std::shared_ptr<YoloSeg> segnet{nullptr};
22 |     std::shared_ptr<PPLCNet> pplcnet{nullptr};
23 | 
24 |     void get_box_points(const Bbox& box, cv::Point& lt, cv::Point& rt, cv::Point& rb, cv::Point& lb);
25 |     Bbox pad_box_points(const int h, const int w, const int xmax, const int xmin, const int ymax, const int ymin, const int pad);
26 |     void adjust_edge_points_axis(cv::Mat& edge_box, cv::Point& lb, cv::Point& lt, cv::Point& rb, cv::Point& rt, const int xmin_edge, const int ymin_edge);
27 |     void add_pre_info_for_cls(cv::Mat& cls_img, const cv::Mat& edge_box, const int xmin_cls, const int ymin_cls);
28 |     void get_real_rotated_points(const cv::Point& lb, const cv::Point& lt, const int pred_label, const cv::Point& rb, const cv::Point& rt, cv::Point& lb1, cv::Point& lt1, cv::Point& rb1, cv::Point& rt1);
29 | };
30 | 
31 | #endif


--------------------------------------------------------------------------------
/cpp/main.cpp:
--------------------------------------------------------------------------------
 1 | #include"inference.h"
 2 | 
 3 | 
 4 | using namespace std;
 5 | using namespace cv;
 6 | 
 7 | 
 8 | int main()
 9 | {
10 |     const string imgpath = "/home/wangbo/project/my_table_det/images/doc5.jpg";
11 |     const string obj_model_path = "/home/wangbo/project/my_table_det/weights/yolo_obj_det.onnx";
12 |     const string edge_model_path = "/home/wangbo/project/my_table_det/weights/yolo_edge_det.onnx";
13 |     const string cls_model_path = "/home/wangbo/project/my_table_det/weights/paddle_cls.onnx";
14 | 
15 |     TableDetector table_det(obj_model_path, edge_model_path, cls_model_path);
16 |     Mat srcimg = imread(imgpath);
17 |     std::vector<Bbox_Points> result = table_det.detect(srcimg);
18 | 
19 |     ////输出可视化
20 |     Mat draw_img = srcimg.clone();
21 |     for(int i=0;i<result.size();i++)
22 |     {
23 |         Bbox box = result[i].box;
24 |         Point lt = result[i].lt;
25 |         Point rt = result[i].rt;
26 |         Point rb = result[i].rb;
27 |         Point lb = result[i].lb;
28 |         visuallize(draw_img, box, lt, rt, rb, lb);
29 |         Mat wrapped_img = extract_table_img(srcimg, lt, rt, rb, lb);
30 |         string savepath = "extract-"+to_string(i)+".jpg";
31 |         imwrite(savepath, wrapped_img);
32 |     }
33 |     imwrite("visualize.jpg", draw_img);
34 | }


--------------------------------------------------------------------------------
/cpp/predictor.cpp:
--------------------------------------------------------------------------------
  1 | #include "predictor.h"
  2 | 
  3 | 
  4 | using namespace std;
  5 | using namespace cv;
  6 | using namespace dnn;
  7 | 
  8 | 
  9 | YoloDet::YoloDet(const string model_path)
 10 | {
 11 |     this->model = readNet(model_path);
 12 | 	this->outlayer_names = this->model.getUnconnectedOutLayersNames();
 13 | }
 14 | 
 15 | vector<Bbox> YoloDet::infer(const Mat& srcimg, const float score)
 16 | {
 17 |     const int ori_h = srcimg.rows;
 18 |     const int ori_w = srcimg.cols;
 19 |     ////img_preprocess////
 20 |     Mat img;
 21 |     int new_w, new_h, left, top;
 22 |     img = ResizePad(srcimg, this->resize_shape[0], new_w, new_h, left, top);
 23 |     img.convertTo(img, CV_32FC3, 1.0/255.0);
 24 |     Mat blob = blobFromImage(img);
 25 | 
 26 |     this->model.setInput(blob);
 27 |     std::vector<Mat> outs;
 28 |     this->model.forward(outs, this->outlayer_names);
 29 | 
 30 |     ////img_postprocess////
 31 |     const float x_factor = (float)ori_w / new_w;
 32 |     const float y_factor = (float)ori_h / new_h;
 33 |     vector<Rect> boxes;
 34 |     vector<float> scores;
 35 |     const int rows = outs[0].size[2];
 36 |     for(int i=0;i<rows;i++)
 37 |     {
 38 |         float max_score = outs[0].ptr<float>(0, 4)[i];
 39 |         if(max_score >= score)
 40 |         {
 41 |             float x = outs[0].ptr<float>(0, 0)[i];
 42 |             float y = outs[0].ptr<float>(0, 1)[i];
 43 |             float w = outs[0].ptr<float>(0, 2)[i];
 44 |             float h = outs[0].ptr<float>(0, 3)[i];
 45 |             int xmin = max(int((x - w / 2 - left) * x_factor), 0);
 46 |             int ymin = max(int((y - h / 2 - top) * y_factor), 0);
 47 |             boxes.emplace_back(Rect(xmin, ymin, int(w * x_factor), int(h * y_factor)));
 48 |             scores.emplace_back(max_score);
 49 |         }
 50 |     }
 51 | 
 52 |     vector<int> indices;
 53 |     NMSBoxes(boxes, scores, score, 0.4, indices);
 54 |     const int num_keep = indices.size();
 55 |     vector<Bbox> bboxes(num_keep);
 56 |     for(int i=0;i<num_keep;i++)
 57 |     {
 58 |         const int ind = indices[i];
 59 |         bboxes[i] = {boxes[ind].x, boxes[ind].y, min(boxes[ind].x + boxes[ind].width, ori_w-1), min(boxes[ind].y + boxes[ind].height, ori_h-1), scores[ind]};
 60 |     }
 61 |     return bboxes;
 62 | }
 63 | 
 64 | YoloSeg::YoloSeg(const string model_path)
 65 | {
 66 |     this->model = readNet(model_path);
 67 | 	this->outlayer_names = this->model.getUnconnectedOutLayersNames();
 68 | }
 69 | 
 70 | std::tuple<Mat, Point, Point, Point, Point> YoloSeg::infer(const Mat& srcimg)
 71 | {
 72 |     const int destHeight = srcimg.rows;
 73 |     const int destWidth = srcimg.cols;
 74 |     ////img_preprocess////
 75 |     Mat img;
 76 |     int resize_h, resize_w, left, top;
 77 |     img = ResizePad(srcimg, this->resize_shape[0], resize_w, resize_h, left, top);
 78 |     // img.convertTo(img, CV_32FC3, 1.0/255.0);  ///也可以
 79 |     Mat blob = blobFromImage(img, 1.0/255.0);
 80 | 
 81 |     this->model.setInput(blob);
 82 |     std::vector<Mat> predict_maps;
 83 |     this->model.forward(predict_maps, this->outlayer_names);
 84 | 
 85 |     Mat pred = this->img_postprocess(predict_maps);
 86 |     if(pred.empty())
 87 |     {
 88 |         return std::make_tuple(Mat(), Point(), Point(), Point(), Point());
 89 |     }
 90 |     Mat mask = pred > 0.8;
 91 |     mask.convertTo(mask, CV_8UC1);
 92 |     
 93 |     Mat box = get_max_adjacent_bbox(mask);
 94 |     if(!box.empty())
 95 |     {
 96 |         this->adjust_coordinates(box, left, top, resize_w, resize_h, destWidth, destHeight);
 97 |         Point lt, lb, rt, rb;
 98 |         this->sort_and_clip_coordinates(box, lt, lb, rt, rb);
 99 |         return std::make_tuple(box, lt, lb, rt, rb);
100 |     }
101 |     else
102 |     {
103 |         return std::make_tuple(Mat(), Point(), Point(), Point(), Point());
104 |     }
105 | }
106 | 
107 | void YoloSeg::adjust_coordinates(Mat& box, const int left, const int top, const int resize_w, const int resize_h, const int destWidth, const int destHeight)
108 | {
109 |     for(int i=0;i<box.rows;i++)
110 |     {
111 |         float x = (box.ptr<float>(i)[0] - left) / resize_w * destWidth;
112 |         float y = (box.ptr<float>(i)[1] - top) / resize_h * destHeight;
113 |         box.ptr<float>(i)[0] = (int)std::min(std::max(x, 0.0f), (float)destWidth-1);
114 |         box.ptr<float>(i)[1] = (int)std::min(std::max(y, 0.0f), (float)destHeight-1);
115 |     }
116 | }
117 | 
118 | void YoloSeg::sort_and_clip_coordinates(const Mat& box, Point& lt, Point& lb, Point& rt, Point& rb)
119 | {
120 |     vector<float> x = box.col(0).reshape(1);
121 |     vector<int> l_idx = argsort_ascend(x);
122 |     int l_box[2][2] = {{(int)box.ptr<float>(l_idx[0])[0], (int)box.ptr<float>(l_idx[0])[1]}, {(int)box.ptr<float>(l_idx[1])[0], (int)box.ptr<float>(l_idx[1])[1]}};
123 |     int r_box[2][2] = {{(int)box.ptr<float>(l_idx[2])[0], (int)box.ptr<float>(l_idx[2])[1]}, {(int)box.ptr<float>(l_idx[3])[0], (int)box.ptr<float>(l_idx[3])[1]}};
124 | 
125 |     int l_idx_1[2] = {0, 1};
126 |     if(l_box[0][1] > l_box[1][1])
127 |     {
128 |         l_idx_1[0] = 1;
129 |         l_idx_1[1] = 0;
130 |     }
131 |     lt = Point(std::max(l_box[l_idx_1[0]][0], 0), std::max(l_box[l_idx_1[0]][1], 0));
132 |     lb = Point(std::max(l_box[l_idx_1[1]][0], 0), std::max(l_box[l_idx_1[1]][1], 0));
133 | 
134 |     int r_idx_1[2] = {0, 1};
135 |     if(r_box[0][1] > r_box[1][1])
136 |     {
137 |         r_idx_1[0] = 1;
138 |         r_idx_1[1] = 0;
139 |     }
140 |     rt = Point(std::max(r_box[r_idx_1[0]][0], 0), std::max(r_box[r_idx_1[0]][1], 0));
141 |     rb = Point(std::max(r_box[r_idx_1[1]][0], 0), std::max(r_box[r_idx_1[1]][1], 0));
142 | }
143 | 
144 | Mat YoloSeg::img_postprocess(const vector<Mat>& predict_maps)
145 | {
146 |     Mat mask_output = predict_maps[1];
147 |     const int len = predict_maps[0].size[1];
148 |     const int num_proposals = predict_maps[0].size[2];
149 |     Mat predictions = predict_maps[0].reshape(0, len).t();
150 |     Mat scores = predictions.col(4);
151 |     double max_class_socre;;
152 |     Point classIdPoint;
153 |     minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);
154 |     int highest_score_index = classIdPoint.y;
155 | 
156 |     Mat highest_score_prediction = predictions.row(highest_score_index);
157 |     float x = highest_score_prediction.ptr<float>(0)[0];
158 |     float y = highest_score_prediction.ptr<float>(0)[1];
159 |     float w = highest_score_prediction.ptr<float>(0)[2];
160 |     float h = highest_score_prediction.ptr<float>(0)[3];
161 |     float highest_score = highest_score_prediction.ptr<float>(0)[4];
162 |     if(highest_score < 0.7)
163 |     {
164 |         return Mat();
165 |     }
166 |     Mat mask_predictions = highest_score_prediction.colRange(5, len);
167 |     const int num_mask = mask_output.size[1];
168 |     const int mask_height = mask_output.size[2];
169 |     const int mask_width = mask_output.size[3];
170 |     const std::vector<int> newshape = {num_mask, mask_height*mask_width};
171 |     Mat mask_output_reshaped = mask_output.reshape(0, newshape);   ////不考虑batchsize
172 |     Mat masks = mask_predictions * mask_output_reshaped;
173 |     cv::exp(-masks, masks);
174 |     masks = 1.f / (1 + masks);
175 |     Mat mask = masks.reshape(0, mask_height);     ////不考虑batchsize
176 | 
177 |     const int small_w = 200;
178 |     const int small_h = 200;
179 |     int small_x_min = max(0, int((x - w / 2) * small_w / 800.0));
180 |     int small_x_max = min(small_w, int((x + w / 2) * small_w / 800.0));
181 |     int small_y_min = max(0, int((y - h / 2) * small_h / 800.0));
182 |     int small_y_max = min(small_h, int((y + h / 2) * small_h / 800.0));
183 |     
184 |     Mat filtered_mask = Mat::zeros(small_h, small_w, CV_32FC1);
185 |     Rect crop_rect(small_x_min, small_y_min, small_x_max-small_x_min, small_y_max-small_y_min);
186 |     mask(crop_rect).copyTo(filtered_mask(crop_rect));
187 |     Mat resized_mask;
188 |     resize(filtered_mask, resized_mask, Size(800, 800), 0, 0, INTER_CUBIC);
189 |     return resized_mask;
190 | }
191 | 
192 | 
193 | PPLCNet::PPLCNet(const string model_path)
194 | {
195 |     this->model = readNet(model_path);
196 |     this->outlayer_names = this->model.getUnconnectedOutLayersNames();
197 | }
198 | 
199 | int PPLCNet::infer(const Mat& srcimg)
200 | {
201 |     ////img_preprocess////
202 |     Mat img;
203 |     int new_w, new_h, left, top;
204 |     img = ResizePad(srcimg, this->resize_shape[0], new_w, new_h, left, top);
205 |     img.convertTo(img, CV_32FC3, 1.0/255.0);
206 |     Mat blob = blobFromImage(img);
207 | 
208 |     this->model.setInput(blob);
209 |     std::vector<Mat> outs;
210 |     this->model.forward(outs, this->outlayer_names);
211 | 
212 |     ////img_postprocess////
213 |     const int cols = outs[0].size[1];
214 |     float* pdata = (float*)outs[0].data;
215 |     int maxPosition = std::max_element(pdata, pdata+cols) - pdata;
216 |     return maxPosition;
217 | }


--------------------------------------------------------------------------------
/cpp/predictor.h:
--------------------------------------------------------------------------------
 1 | #ifndef PREDICTOR_H
 2 | #define PREDICTOR_H
 3 | #include "utils.h"
 4 | #include <opencv2/dnn.hpp>
 5 | 
 6 | 
 7 | class YoloDet
 8 | {
 9 | public:
10 | 	YoloDet(const std::string model_path);
11 | 	std::vector<Bbox> infer(const cv::Mat& srcimg, const float score=0.4f);
12 | private:
13 | 	const int resize_shape[2] = {928, 928};
14 | 	std::vector<std::string> outlayer_names;
15 |     cv::dnn::Net model;
16 | };
17 | 
18 | class YoloSeg
19 | {
20 | public:
21 | 	YoloSeg(const std::string model_path);
22 | 	std::tuple<cv::Mat, cv::Point, cv::Point, cv::Point, cv::Point> infer(const cv::Mat& srcimg);
23 | private:
24 | 	const int resize_shape[2] = {800, 800};
25 | 	cv::Mat img_postprocess(const std::vector<cv::Mat>& predict_maps);
26 | 	void adjust_coordinates(cv::Mat& box, const int left, const int top, const int resize_w, const int resize_h, const int destWidth, const int destHeight);
27 | 	void sort_and_clip_coordinates(const cv::Mat& box, cv::Point& lt, cv::Point& lb, cv::Point& rt, cv::Point& rb);
28 | 	std::vector<std::string> outlayer_names;
29 |     cv::dnn::Net model;
30 | };
31 | 
32 | class PPLCNet
33 | {
34 | public:
35 | 	PPLCNet(const std::string model_path);
36 | 	int infer(const cv::Mat& srcimg);
37 | private:
38 | 	const int resize_shape[2] = {624, 624};
39 | 	std::vector<std::string> outlayer_names;
40 | 	cv::dnn::Net model;
41 | };
42 | 
43 | #endif


--------------------------------------------------------------------------------
/cpp/utils.cpp:
--------------------------------------------------------------------------------
  1 | #include "utils.h"
  2 | #include <vector>
  3 | #include <cmath>
  4 | #include <algorithm>
  5 | 
  6 | 
  7 | using namespace std;
  8 | using namespace cv;
  9 | 
 10 | Mat sortMat(const Mat &stats, int colId)
 11 | {
 12 |     //根据指定列以行为单位排序
 13 |     
 14 |     Mat sorted_index;
 15 |     cv::sortIdx(stats, sorted_index, cv::SORT_EVERY_COLUMN + cv::SORT_ASCENDING);
 16 |     // 降序是DESCENDING 升序是ASCENDING
 17 |     
 18 |     sorted_index = sorted_index.col(colId);
 19 |     Mat sorted_stats = stats.clone();
 20 |     int row_num = sorted_index.rows;
 21 |     for(int i = 0; i < row_num; i++){
 22 |         int _idx = sorted_index.at<int>(i, 0);
 23 |         sorted_stats.row(i) = stats.row(_idx) + 0;//必须加0否则会出很难debug的错误
 24 |     }
 25 |     return sorted_stats;
 26 | }
 27 | 
 28 | 
 29 | std::tuple<vector<Point2f>, float> get_mini_boxes(const vector<Point>& contour)
 30 | {
 31 |     RotatedRect bounding_box = cv::minAreaRect(contour);
 32 |     cv::Mat rect;
 33 |     cv::boxPoints(bounding_box, rect);
 34 |     Mat points = sortMat(rect, 0);
 35 | 
 36 |     int index_1 = 0;
 37 |     int index_2 = 1;
 38 |     int index_3 = 2;
 39 |     int index_4 = 3;
 40 |     if(points.ptr<float>(1)[1] > points.ptr<float>(0)[1])
 41 |     {
 42 |         index_1 = 0;
 43 |         index_4 = 1;
 44 |     }
 45 |     else
 46 |     {
 47 |         index_1 = 1;
 48 |         index_4 = 0;
 49 |     }
 50 |     if(points.ptr<float>(3)[1] > points.ptr<float>(2)[1])
 51 |     {
 52 |         index_2 = 2;
 53 |         index_3 = 3;
 54 |     } 
 55 |     else
 56 |     {
 57 |         index_2 = 3;
 58 |         index_3 = 2;
 59 |     }
 60 |         
 61 |     vector<Point2f> box = {Point2f(points.ptr<float>(index_1)[0], points.ptr<float>(index_1)[1]),
 62 |                            Point2f(points.ptr<float>(index_2)[0], points.ptr<float>(index_2)[1]),
 63 |                            Point2f(points.ptr<float>(index_3)[0], points.ptr<float>(index_3)[1]),
 64 |                            Point2f(points.ptr<float>(index_4)[0], points.ptr<float>(index_4)[1])};
 65 |     std::tuple<vector<Point2f>, float> result = std::make_tuple(box, std::min(bounding_box.size.width, bounding_box.size.height));
 66 |     return result;
 67 | }
 68 | 
 69 | Mat get_inv(const Mat& concat) {
 70 |     double a = concat.at<double>(0, 0);
 71 |     double b = concat.at<double>(0, 1);
 72 |     double c = concat.at<double>(1, 0);
 73 |     double d = concat.at<double>(1, 1);
 74 |     double det_concat = a * d - b * c;
 75 |     Mat inv_result = (Mat_<double>(2, 2) << d / det_concat, -b / det_concat, -c / det_concat, a / det_concat);
 76 |     return inv_result;
 77 | }
 78 | 
 79 | vector<vector<int>> nchoosek(int startnum, int endnum, int step = 1, int n = 1) {
 80 |     vector<vector<int>> c;
 81 |     vector<int> range;
 82 |     for (int i = startnum; i <= endnum; i += step) {
 83 |         range.push_back(i);
 84 |     }
 85 |     vector<int> combination;
 86 |     function<void(int, int)> combine = [&](int offset, int k) {
 87 |         if (k == 0) {
 88 |             c.push_back(combination);
 89 |             return;
 90 |         }
 91 |         for (int i = offset; i <= range.size() - k; ++i) {
 92 |             combination.push_back(range[i]);
 93 |             combine(i + 1, k - 1);
 94 |             combination.pop_back();
 95 |         }
 96 |     };
 97 |     combine(0, n);
 98 |     return c;
 99 | }
100 | 
101 | vector<Point> minboundquad(const vector<Point>& hull) 
102 | {
103 |     int len_hull = hull.size();
104 |     vector<Point2f> xy(hull.begin(), hull.end());
105 |     vector<int> idx(len_hull);
106 |     iota(idx.begin(), idx.end(), 0);
107 |     vector<int> idx_roll(len_hull);
108 |     rotate_copy(idx.begin(), idx.begin() + 1, idx.end(), idx_roll.begin());
109 |     vector<vector<int>> edges(len_hull, vector<int>(2));
110 |     for (int i = 0; i < len_hull; ++i) {
111 |         edges[i][0] = idx[i];
112 |         edges[i][1] = idx_roll[i];
113 |     }
114 |     vector<pair<double, int>> edgeangles1;
115 |     for (int i = 0; i < len_hull; ++i) {
116 |         double y = xy[edges[i][1]].y - xy[edges[i][0]].y;
117 |         double x = xy[edges[i][1]].x - xy[edges[i][0]].x;
118 |         double angle = atan2(y, x);
119 |         if (angle < 0) {
120 |             angle += 2 * M_PI;
121 |         }
122 |         edgeangles1.emplace_back(angle, i);
123 |     }
124 |     sort(edgeangles1.begin(), edgeangles1.end());
125 |     vector<vector<int>> edges1;
126 |     vector<double> edgeangle1;
127 |     for (const auto& item : edgeangles1) {
128 |         edges1.push_back(edges[item.second]);
129 |         edgeangle1.push_back(item.first);
130 |     }
131 |     vector<double> edgeangles(edgeangle1.begin(), edgeangle1.end());
132 |     edges = edges1;
133 |     double eps = 2.2204e-16;
134 |     double angletol = eps * 100;
135 |     vector<bool> k(edgeangles.size() - 1);
136 |     adjacent_difference(edgeangles.begin(), edgeangles.end(), k.begin(), [&](double a, double b) { return (b - a) < angletol; });
137 |     vector<int> idx_to_delete;
138 |     for (int i = 0; i < k.size(); ++i) {
139 |         if (k[i]) {
140 |             idx_to_delete.push_back(i);
141 |         }
142 |     }
143 |     for (int i = idx_to_delete.size() - 1; i >= 0; --i) {
144 |         edges.erase(edges.begin() + idx_to_delete[i]);
145 |         edgeangles.erase(edgeangles.begin() + idx_to_delete[i]);
146 |     }
147 |     int nedges = edges.size();
148 |     vector<vector<int>> edgelist = nchoosek(0, nedges - 1, 1, 4);
149 |     vector<int> k_idx;
150 |     for (int i = 0; i < edgelist.size(); ++i) {
151 |         if (edgeangles[edgelist[i][3]] - edgeangles[edgelist[i][0]] <= M_PI) {
152 |             k_idx.push_back(i);
153 |         }
154 |     }
155 |     for (int i = k_idx.size() - 1; i >= 0; --i) {
156 |         edgelist.erase(edgelist.begin() + k_idx[i]);
157 |     }
158 |     int nquads = edgelist.size();
159 |     double quadareas = numeric_limits<double>::infinity();
160 |     vector<Point> cnt(4);
161 |     for (int i = 0; i < nquads; ++i) {
162 |         vector<int> edgeind = edgelist[i];
163 |         edgeind.push_back(edgelist[i][0]);
164 |         vector<vector<int>> edgesi;
165 |         vector<double> edgeang;
166 |         for (int idx : edgeind) {
167 |             edgesi.push_back(edges[idx]);
168 |             edgeang.push_back(edgeangles[idx]);
169 |         }
170 |         bool is_continue = false;
171 |         for (int j = 0; j < edgeang.size() - 1; ++j) {
172 |             if (edgeang[j + 1] - edgeang[j] > M_PI) {
173 |                 is_continue = true;
174 |                 break;
175 |             }
176 |         }
177 |         if (is_continue) {
178 |             continue;
179 |         }
180 |         vector<double> qxi(4), qyi(4);
181 |         for (int j = 0; j < 4; ++j) {
182 |             int jplus1 = j + 1;
183 |             vector<int> shared;
184 |             set_intersection(edgesi[j].begin(), edgesi[j].end(), edgesi[jplus1].begin(), edgesi[jplus1].end(), back_inserter(shared));
185 |             if (!shared.empty()) {
186 |                 qxi[j] = xy[shared[0]].x;
187 |                 qyi[j] = xy[shared[0]].y;
188 |             } else {
189 |                 Point2f A = xy[edgesi[j][0]];
190 |                 Point2f B = xy[edgesi[j][1]];
191 |                 Point2f C = xy[edgesi[jplus1][0]];
192 |                 Point2f D = xy[edgesi[jplus1][1]];
193 |                 Mat concat = (Mat_<double>(2, 2) << A.x - B.x, D.x - C.x, A.y - B.y, D.y - C.y);
194 |                 Mat div = (Mat_<double>(2, 1) << A.x - C.x, A.y - C.y);
195 |                 Mat inv_result = get_inv(concat);
196 |                 double a = inv_result.at<double>(0, 0);
197 |                 double b = inv_result.at<double>(0, 1);
198 |                 double c = inv_result.at<double>(1, 0);
199 |                 double d = inv_result.at<double>(1, 1);
200 |                 double e = div.at<double>(0, 0);
201 |                 double f = div.at<double>(1, 0);
202 |                 vector<double> ts1 = {a * e + b * f, c * e + d * f};
203 |                 Point2f Q = A + (B - A) * ts1[0];
204 |                 qxi[j] = Q.x;
205 |                 qyi[j] = Q.y;
206 |             }
207 |         }
208 |         vector<Point> contour;
209 |         for (int j = 0; j < 4; ++j) {
210 |             contour.emplace_back(qxi[j], qyi[j]);
211 |         }
212 |         double A_i = contourArea(contour);
213 |         if (A_i < quadareas) {
214 |             quadareas = A_i;
215 |             cnt = contour;
216 |         }
217 |     }
218 |     return cnt;
219 | }
220 | 
221 | 
222 | Mat ResizePad(const Mat& img, const int target_size, int& new_w, int& new_h, int& left, int& top)
223 | {
224 |     const int h = img.rows;
225 |     const int w = img.cols;
226 |     const int m = max(h, w);
227 |     const float ratio = (float)target_size / (float)m;
228 |     new_w = int(ratio * w);
229 |     new_h = int(ratio * h);
230 |     Mat dstimg;
231 |     resize(img, dstimg, Size(new_w, new_h), 0, 0, INTER_LINEAR);
232 |     top = (target_size - new_h) / 2;
233 |     int bottom = (target_size - new_h) - top;
234 |     left = (target_size - new_w) / 2;
235 |     int right = (target_size - new_w) - left;
236 |     copyMakeBorder(dstimg, dstimg, top, bottom, left, right, BORDER_CONSTANT, Scalar(114, 114, 114));
237 |     return dstimg;
238 | }
239 | 
240 | Mat get_max_adjacent_bbox(const Mat& mask)
241 | {
242 |     vector<vector<Point>> contours;
243 |     cv::findContours(mask, contours, RETR_TREE, CHAIN_APPROX_SIMPLE, Point(0, 0));
244 |     float max_size;
245 |     vector<Point> cnt_save;
246 |     for(int i=0;i<contours.size();i++)
247 |     {
248 |         std::tuple<vector<Point2f>, float> result = get_mini_boxes(contours[i]);
249 |         //vector<Point2f> points = std::get<0>(result); ////没有用
250 |         float sside = std::get<1>(result);
251 |         if(sside > max_size)
252 |         {
253 |             max_size = sside;
254 |             cnt_save = contours[i];
255 |         }
256 |     }
257 |     if(cnt_save.size() > 0)
258 |     {
259 |         float epsilon = 0.01 * cv::arcLength(cnt_save, true);
260 |         vector<Point> box;
261 |         cv::approxPolyDP(cnt_save, box, epsilon, true);
262 |         vector<Point> hull;
263 |         cv::convexHull(box, hull);
264 |         std::tuple<vector<Point2f>, float> result = get_mini_boxes(cnt_save);
265 |         vector<Point2f> points = std::get<0>(result);
266 |         const int len_hull = hull.size();
267 | 
268 |         if(len_hull==4)
269 |         {
270 |             Mat tar_box = Mat(hull.size(), 2, CV_32FC1);
271 |             for(int i=0;i<hull.size();i++)
272 |             {
273 |                 tar_box.ptr<float>(i)[0] = hull[i].x;
274 |                 tar_box.ptr<float>(i)[1] = hull[i].y;
275 |             }
276 |             return tar_box;   ////也可以返回vector<Point>这种格式的
277 |         }
278 |         else if(len_hull > 4)
279 |         {
280 |             vector<Point> target_box = minboundquad(hull);
281 |             Mat tar_box = Mat(target_box.size(), 2, CV_32FC1);
282 |             for(int i=0;i<target_box.size();i++)
283 |             {
284 |                 tar_box.ptr<float>(i)[0] = target_box[i].x;
285 |                 tar_box.ptr<float>(i)[1] = target_box[i].y;
286 |             }
287 |             return tar_box;
288 |         }
289 |         else
290 |         {
291 |             Mat tar_box = Mat(points.size(), 2, CV_32FC1);
292 |             for(int i=0;i<points.size();i++)
293 |             {
294 |                 tar_box.ptr<float>(i)[0] = points[i].x;
295 |                 tar_box.ptr<float>(i)[1] = points[i].y;
296 |             }
297 |             return tar_box;
298 |         }
299 |     }
300 |     else
301 |     {
302 |         return cv::Mat();
303 |     }
304 | }
305 | 
306 | 
307 | void visuallize(cv::Mat& img, const Bbox& box, const Point& lt, const Point& rt, const Point& rb, const Point& lb)
308 | {
309 |     vector<Point> draw_box = {lt, rt, rb, lb};
310 |     circle(img, lt, 50, Scalar(255, 0, 0), 10);
311 |     rectangle(img, Point(box.xmin, box.ymin), Point(box.xmax, box.ymax), Scalar(255, 0, 0), 10);
312 |     cv::polylines(img, draw_box, true, Scalar(255, 0, 255), 6);
313 | }
314 | 
315 | Mat extract_table_img(const Mat&img, const Point& lt, const Point& rt, const Point& rb, const Point& lb)
316 | {
317 |     Point2f src_points[4] = {lt, rt, lb, rb};
318 |     const float width_a = sqrt(pow(rb.x - lb.x, 2) + pow(rb.y - lb.y, 2));
319 |     const float width_b = sqrt(pow(rt.x - lt.x, 2) + pow(rt.y - lt.y, 2));
320 |     const float max_width = max(width_a, width_b);
321 | 
322 |     const float height_a = sqrt(pow(rt.x - rb.x, 2) + pow(rt.y - rb.y, 2));
323 |     const float height_b = sqrt(pow(lt.x - lb.x, 2) + pow(lt.y - lb.y, 2));
324 |     const float max_height = max(height_a, height_b);
325 | 
326 |     Point2f dst_points[4] = {Point2f(0, 0), Point2f(max_width - 1, 0), Point2f(0, max_height - 1), Point(max_width - 1, max_height - 1)};
327 |     Mat M;
328 |     M = cv::getPerspectiveTransform(src_points, dst_points);
329 |     Mat warped ;
330 |     cv::warpPerspective(img, warped, M, Size(max_width, max_height));
331 |     return warped;
332 | }


--------------------------------------------------------------------------------
/cpp/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTIL_H
 2 | #define UTIL_H
 3 | #include <iostream>
 4 | #include <numeric>
 5 | #include <opencv2/imgproc.hpp>
 6 | #include<opencv2/highgui.hpp>
 7 | 
 8 | typedef struct
 9 | {
10 |     int xmin;
11 |     int ymin;
12 |     int xmax;
13 |     int ymax;
14 |     float score;
15 | } Bbox;
16 | 
17 | cv::Mat ResizePad(const cv::Mat& img, const int target_size, int& new_w, int& new_h, int& left, int& top);
18 | cv::Mat get_max_adjacent_bbox(const cv::Mat& mask);
19 | void visuallize(cv::Mat& img, const Bbox& box, const cv::Point& lt, const cv::Point& rt, const cv::Point& rb, const cv::Point& lb);
20 | cv::Mat extract_table_img(const cv::Mat&img, const cv::Point& lt, const cv::Point& rt, const cv::Point& rb, const cv::Point& lb);
21 | 
22 | template<typename T> std::vector<int> argsort_descend(const std::vector<T>& array)
23 | {
24 |     const int array_len(array.size());
25 |     std::vector<int> array_index(array_len, 0);
26 |     std::iota(array_index.begin(), array_index.end(), 0);
27 | 
28 |     std::sort(array_index.begin(), array_index.end(),
29 |         [&array](int pos1, int pos2) {return (array[pos1] > array[pos2]); });
30 | 
31 |     return array_index;
32 | }
33 | 
34 | template<typename T> std::vector<int> argsort_ascend(const std::vector<T>& array)
35 | {
36 |     const int array_len(array.size());
37 |     std::vector<int> array_index(array_len, 0);
38 |     std::iota(array_index.begin(), array_index.end(), 0);
39 | 
40 |     std::sort(array_index.begin(), array_index.end(),
41 |         [&array](int pos1, int pos2) {return (array[pos1] < array[pos2]); });
42 | 
43 |     return array_index;
44 | }
45 | 
46 | 
47 | #endif


--------------------------------------------------------------------------------
/python/images/chip.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/chip.jpg


--------------------------------------------------------------------------------
/python/images/chip2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/chip2.jpg


--------------------------------------------------------------------------------
/python/images/doc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/doc.png


--------------------------------------------------------------------------------
/python/images/doc2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/doc2.jpg


--------------------------------------------------------------------------------
/python/images/doc3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/doc3.jpg


--------------------------------------------------------------------------------
/python/images/doc4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/doc4.jpg


--------------------------------------------------------------------------------
/python/images/doc5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/doc5.jpg


--------------------------------------------------------------------------------
/python/images/real2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/real2.jpeg


--------------------------------------------------------------------------------
/python/images/real3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/real3.jpg


--------------------------------------------------------------------------------
/python/images/real4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/real4.jpg


--------------------------------------------------------------------------------
/python/images/real5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/TableDetection/804b25c6b7a4a6d909cba53243d65d6b6e387f87/python/images/real5.jpg


--------------------------------------------------------------------------------
/python/inference.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | from predictor import YoloSeg, YoloDet, PPLCNet
  4 | 
  5 | 
  6 | class TableDetector:
  7 |     def __init__(self, obj_model_path, edge_model_path, cls_model_path):
  8 |         self.obj_detector = YoloDet(obj_model_path)
  9 |         self.segnet = YoloSeg(edge_model_path)
 10 |         self.pplcnet = PPLCNet(cls_model_path)
 11 | 
 12 |     def detect(self, img, det_accuracy=0.7):
 13 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 14 |         img_mask = img.copy()
 15 |         h, w = img.shape[:-1]
 16 |         obj_det_res, pred_label = self.init_default_output(h, w)
 17 |         result = []
 18 |         
 19 |         obj_det_res = self.obj_detector.infer(img, score=det_accuracy)
 20 | 
 21 |         for i in range(len(obj_det_res)):
 22 |             det_res = obj_det_res[i]
 23 |             score, box = det_res
 24 |             xmin, ymin, xmax, ymax = box
 25 |             edge_box = box.reshape([-1, 2])
 26 |             lb, lt, rb, rt = self.get_box_points(box)
 27 |             
 28 |             xmin_edge, ymin_edge, xmax_edge, ymax_edge = self.pad_box_points(h, w, xmax, xmin, ymax, ymin, 10)
 29 |             crop_img = img_mask[ymin_edge:ymax_edge, xmin_edge:xmax_edge, :]
 30 |             edge_box, lt, lb, rt, rb = self.segnet.infer(crop_img)
 31 |             if edge_box is None:
 32 |                 continue
 33 |             lb, lt, rb, rt = self.adjust_edge_points_axis(edge_box, lb, lt, rb, rt, xmin_edge, ymin_edge)
 34 |             
 35 |             xmin_cls, ymin_cls, xmax_cls, ymax_cls = self.pad_box_points(
 36 |                 h, w, xmax, xmin, ymax, ymin, 5
 37 |             )
 38 |             cls_img = img_mask[ymin_cls:ymax_cls, xmin_cls:xmax_cls, :]
 39 |             # 增加先验信息
 40 |             self.add_pre_info_for_cls(cls_img, edge_box, xmin_cls, ymin_cls)
 41 |             pred_label = self.pplcnet.infer(cls_img)
 42 | 
 43 |             lb1, lt1, rb1, rt1 = self.get_real_rotated_points(lb, lt, pred_label, rb, rt)
 44 |             result.append(
 45 |                 {
 46 |                     "box": [int(xmin), int(ymin), int(xmax), int(ymax)],
 47 |                     "lb": [int(lb1[0]), int(lb1[1])],
 48 |                     "lt": [int(lt1[0]), int(lt1[1])],
 49 |                     "rt": [int(rt1[0]), int(rt1[1])],
 50 |                     "rb": [int(rb1[0]), int(rb1[1])],
 51 |                 }
 52 |             )
 53 |         
 54 |         return result
 55 | 
 56 |     def init_default_output(self, h, w):
 57 |         img_box = np.array([0, 0, w, h])
 58 |         # 初始化默认值
 59 |         obj_det_res, edge_box, pred_label = (
 60 |             [[1.0, img_box]],
 61 |             img_box.reshape([-1, 2]),
 62 |             0,
 63 |         )
 64 |         return obj_det_res, pred_label
 65 | 
 66 |     def add_pre_info_for_cls(self, cls_img, edge_box, xmin_cls, ymin_cls):
 67 |         """
 68 |         Args:
 69 |             cls_img:
 70 |             edge_box:
 71 |             xmin_cls:
 72 |             ymin_cls:
 73 | 
 74 |         Returns: 带边缘划线的图片，给方向分类提供先验信息
 75 | 
 76 |         """
 77 |         cls_box = edge_box.copy()
 78 |         cls_box[:, 0] = cls_box[:, 0] - xmin_cls
 79 |         cls_box[:, 1] = cls_box[:, 1] - ymin_cls
 80 |         # 画框增加先验信息，辅助方向label识别
 81 |         cv2.polylines(
 82 |             cls_img,
 83 |             [np.array(cls_box).astype(np.int32).reshape((-1, 1, 2))],
 84 |             True,
 85 |             color=(255, 0, 255),
 86 |             thickness=5,
 87 |         )
 88 | 
 89 |     def adjust_edge_points_axis(self, edge_box, lb, lt, rb, rt, xmin_edge, ymin_edge):
 90 |         edge_box[:, 0] += xmin_edge
 91 |         edge_box[:, 1] += ymin_edge
 92 |         lt, lb, rt, rb = (
 93 |             lt + [xmin_edge, ymin_edge],
 94 |             lb + [xmin_edge, ymin_edge],
 95 |             rt + [xmin_edge, ymin_edge],
 96 |             rb + [xmin_edge, ymin_edge],
 97 |         )
 98 |         return lb, lt, rb, rt
 99 | 
100 |     def get_box_points(self, img_box):
101 |         x1, y1, x2, y2 = img_box
102 |         lt = np.array([x1, y1])  # 左上角
103 |         rt = np.array([x2, y1])  # 右上角
104 |         rb = np.array([x2, y2])  # 右下角
105 |         lb = np.array([x1, y2])  # 左下角
106 |         return lb, lt, rb, rt
107 | 
108 |     def get_real_rotated_points(self, lb, lt, pred_label, rb, rt):
109 |         if pred_label == 0:
110 |             lt1 = lt
111 |             rt1 = rt
112 |             rb1 = rb
113 |             lb1 = lb
114 |         elif pred_label == 1:
115 |             lt1 = rt
116 |             rt1 = rb
117 |             rb1 = lb
118 |             lb1 = lt
119 |         elif pred_label == 2:
120 |             lt1 = rb
121 |             rt1 = lb
122 |             rb1 = lt
123 |             lb1 = rt
124 |         elif pred_label == 3:
125 |             lt1 = lb
126 |             rt1 = lt
127 |             rb1 = rt
128 |             lb1 = rb
129 |         else:
130 |             lt1 = lt
131 |             rt1 = rt
132 |             rb1 = rb
133 |             lb1 = lb
134 |         return lb1, lt1, rb1, rt1
135 | 
136 |     def pad_box_points(self, h, w, xmax, xmin, ymax, ymin, pad):
137 |         ymin_edge = max(ymin - pad, 0)
138 |         xmin_edge = max(xmin - pad, 0)
139 |         ymax_edge = min(ymax + pad, h)
140 |         xmax_edge = min(xmax + pad, w)
141 |         return xmin_edge, ymin_edge, xmax_edge, ymax_edge
142 | 


--------------------------------------------------------------------------------
/python/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | from inference import TableDetector
 4 | from utils import visuallize, extract_table_img
 5 | 
 6 | 
 7 | if __name__=='__main__':
 8 |     img_path = "images/chip2.jpg"
 9 |     table_det = TableDetector("weights/yolo_obj_det.onnx", "weights/yolo_edge_det.onnx", "weights/paddle_cls.onnx")
10 | 
11 |     srcimg = cv2.imread(img_path)
12 |     result = table_det.detect(srcimg.copy())
13 |     
14 |     # 输出可视化
15 |     file_name_with_ext = os.path.basename(img_path)
16 |     file_name, file_ext = os.path.splitext(file_name_with_ext)
17 |     out_dir = "outputs"
18 |     if not os.path.exists(out_dir):
19 |         os.makedirs(out_dir)
20 |     draw_img = srcimg.copy()
21 |     for i, res in enumerate(result):
22 |         box = res["box"]
23 |         lt, rt, rb, lb = res["lt"], res["rt"], res["rb"], res["lb"]
24 |         # 带识别框和左上角方向位置
25 |         draw_img = visuallize(draw_img, box, lt, rt, rb, lb)
26 |         # 透视变换提取表格图片
27 |         wrapped_img = extract_table_img(srcimg, lt, rt, rb, lb)
28 |         cv2.imwrite(f"{out_dir}/{file_name}-extract-{i}.jpg", wrapped_img)
29 |     cv2.imwrite(f"{out_dir}/{file_name}-visualize.jpg", draw_img)


--------------------------------------------------------------------------------
/python/predictor.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | from utils import (
  4 |     custom_NMSBoxes,
  5 |     ResizePad,
  6 |     sigmoid,
  7 |     get_max_adjacent_bbox,
  8 | )
  9 | 
 10 | 
 11 | class YoloDet:
 12 |     def __init__(self, model_path):
 13 |         self.model = cv2.dnn.readNet(model_path)
 14 |         self.resize_shape = [928, 928]
 15 |         self.outlayer_names = self.model.getUnconnectedOutLayersNames()
 16 | 
 17 |     def infer(self, img, **kwargs):
 18 |         score = kwargs.get("score", 0.4)
 19 |         ori_h, ori_w = img.shape[:-1]
 20 |         img, new_w, new_h, left, top = self.img_preprocess(img, self.resize_shape)
 21 |         self.model.setInput(img)
 22 |         pre = self.model.forward(self.outlayer_names)
 23 |         result = self.img_postprocess(pre, ori_w / new_w, ori_h / new_h, left, top, score)
 24 |         return result
 25 | 
 26 |     def img_preprocess(self, img, resize_shape=[928, 928]):
 27 |         im, new_w, new_h, left, top = ResizePad(img, resize_shape[0])
 28 |         im = im / 255.0
 29 |         im = im.transpose((2, 0, 1)).copy()
 30 |         im = im[None, :].astype("float32")
 31 |         return im, new_w, new_h, left, top
 32 | 
 33 |     def img_postprocess(self, predict_maps, x_factor, y_factor, left, top, score):
 34 |         result = []
 35 |         # 转置和压缩输出以匹配预期的形状
 36 |         outputs = np.transpose(np.squeeze(predict_maps[0]))
 37 |         # 获取输出数组的行数
 38 |         rows = outputs.shape[0]
 39 |         # 用于存储检测的边界框、得分和类别ID的列表
 40 |         boxes = []
 41 |         scores = []
 42 |         # 遍历输出数组的每一行
 43 |         for i in range(rows):
 44 |             # 找到类别得分中的最大得分
 45 |             max_score = outputs[i][4]
 46 |             # 如果最大得分高于置信度阈值
 47 |             if max_score >= score:
 48 |                 # 从当前行提取边界框坐标
 49 |                 x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3]
 50 |                 # 计算边界框的缩放坐标
 51 |                 xmin = max(int((x - w / 2 - left) * x_factor), 0)
 52 |                 ymin = max(int((y - h / 2 - top) * y_factor), 0)
 53 |                 xmax = xmin + int(w * x_factor)
 54 |                 ymax = ymin + int(h * y_factor)
 55 |                 # 将类别ID、得分和框坐标添加到各自的列表中
 56 |                 boxes.append([xmin, ymin, xmax, ymax])
 57 |                 scores.append(max_score)
 58 |                 # 应用非最大抑制过滤重叠的边界框
 59 |         indices = custom_NMSBoxes(boxes, scores)
 60 |         for i in indices:
 61 |             result.append([scores[i], np.array(boxes[i])])
 62 |         return result
 63 | 
 64 | 
 65 | class YoloSeg:
 66 |     def __init__(self, model_path):
 67 |         self.model = cv2.dnn.readNet(model_path)
 68 |         self.resize_shape = [800, 800]
 69 |         self.outlayer_names = self.model.getUnconnectedOutLayersNames()
 70 | 
 71 |     def infer(self, img, **kwargs):
 72 |         destHeight, destWidth = img.shape[:-1]
 73 |         img, resize_h, resize_w, left, top = self.img_preprocess(img, self.resize_shape)
 74 |         self.model.setInput(img)
 75 |         predict_maps = self.model.forward(self.outlayer_names)
 76 |         pred = self.img_postprocess(predict_maps)
 77 |         if pred is None:
 78 |             return None, None, None, None, None
 79 |         segmentation = pred > 0.8
 80 |         mask = np.array(segmentation).astype(np.uint8)
 81 |         # 找到最佳边缘box shape(4, 2)
 82 |         box = get_max_adjacent_bbox(mask)
 83 |         # todo 注意还有crop的偏移
 84 |         if box is not None:
 85 |             # 根据缩放调整坐标适配输入的img大小
 86 |             adjusted_box = self.adjust_coordinates(box, left, top, resize_w, resize_h, destWidth, destHeight)   ###
 87 |             # 排序并裁剪负值
 88 |             lt, lb, rt, rb = self.sort_and_clip_coordinates(adjusted_box)
 89 |             return box, lt, lb, rt, rb
 90 |         else:
 91 |             return None, None, None, None, None
 92 | 
 93 |     def img_postprocess(self, predict_maps):
 94 |         box_output = predict_maps[0]
 95 |         mask_output = predict_maps[1]
 96 |         predictions = np.squeeze(box_output).T
 97 |         # Filter out object confidence scores below threshold
 98 |         scores = predictions[:, 4]
 99 |         # 获取得分最高的索引
100 |         highest_score_index = scores.argmax()
101 |         # 获取得分最高的预测结果
102 |         highest_score_prediction = predictions[highest_score_index]
103 |         x, y, w, h = highest_score_prediction[:4]
104 |         highest_score = highest_score_prediction[4]
105 |         if highest_score < 0.7:
106 |             return None
107 |         mask_predictions = highest_score_prediction[5:]
108 |         mask_predictions = np.expand_dims(mask_predictions, axis=0)
109 |         mask_output = np.squeeze(mask_output)
110 |         # Calculate the mask maps for each box
111 |         num_mask, mask_height, mask_width = mask_output.shape  # CHW
112 |         masks = sigmoid(mask_predictions @ mask_output.reshape((num_mask, -1)))
113 |         masks = masks.reshape((-1, mask_height, mask_width))
114 |         # 提取第一个通道
115 |         mask = masks[0]
116 | 
117 |         # 计算缩小后的区域边界
118 |         small_w = 200
119 |         small_h = 200
120 |         small_x_min = max(0, int((x - w / 2) * small_w / 800))
121 |         small_x_max = min(small_w, int((x + w / 2) * small_w / 800))
122 |         small_y_min = max(0, int((y - h / 2) * small_h / 800))
123 |         small_y_max = min(small_h, int((y + h / 2) * small_h / 800))
124 | 
125 |         # 创建一个全零的掩码
126 |         filtered_mask = np.zeros((small_h, small_w), dtype=np.float32)
127 | 
128 |         # 将区域内的值复制到过滤后的掩码中
129 |         filtered_mask[small_y_min:small_y_max, small_x_min:small_x_max] = mask[small_y_min:small_y_max, small_x_min:small_x_max]
130 | 
131 |         # 使用 OpenCV 进行放大，保持边缘清晰
132 |         resized_mask = cv2.resize(filtered_mask, (800, 800), interpolation=cv2.INTER_CUBIC)
133 |         return resized_mask
134 | 
135 |     def adjust_coordinates(
136 |         self, box, left, top, resize_w, resize_h, destWidth, destHeight
137 |     ):
138 |         """
139 |         调整边界框坐标，确保它们在合理范围内。
140 | 
141 |         参数:
142 |         box (numpy.ndarray): 原始边界框坐标 (shape: (4, 2))
143 |         left (int): 左侧偏移量
144 |         top (int): 顶部偏移量
145 |         resize_w (int): 缩放宽度
146 |         resize_h (int): 缩放高度
147 |         destWidth (int): 目标宽度
148 |         destHeight (int): 目标高度
149 | 
150 |         返回:
151 |         numpy.ndarray: 调整后的边界框坐标
152 |         """
153 |         # 调整横坐标
154 |         box[:, 0] = np.clip(
155 |             (np.round(box[:, 0] - left) / resize_w * destWidth), 0, destWidth
156 |         )
157 | 
158 |         # 调整纵坐标
159 |         box[:, 1] = np.clip(
160 |             (np.round(box[:, 1] - top) / resize_h * destHeight), 0, destHeight
161 |         )
162 |         return box
163 | 
164 |     def sort_and_clip_coordinates(self, box):
165 |         """
166 |         对边界框坐标进行排序并裁剪负值。
167 | 
168 |         参数:
169 |         box (numpy.ndarray): 边界框坐标 (shape: (4, 2))
170 | 
171 |         返回:
172 |         tuple: 左上角、左下角、右上角、右下角坐标
173 |         """
174 |         # 按横坐标排序
175 |         x = box[:, 0]
176 |         l_idx = x.argsort()
177 |         l_box = np.array([box[l_idx[0]], box[l_idx[1]]])
178 |         r_box = np.array([box[l_idx[2]], box[l_idx[3]]])
179 | 
180 |         # 左侧坐标按纵坐标排序
181 |         l_idx_1 = np.array(l_box[:, 1]).argsort()
182 |         lt = l_box[l_idx_1[0]]
183 |         lb = l_box[l_idx_1[1]]
184 | 
185 |         # 右侧坐标按纵坐标排序
186 |         r_idx_1 = np.array(r_box[:, 1]).argsort()
187 |         rt = r_box[r_idx_1[0]]
188 |         rb = r_box[r_idx_1[1]]
189 | 
190 |         # 裁剪负值
191 |         lt[lt < 0] = 0
192 |         lb[lb < 0] = 0
193 |         rt[rt < 0] = 0
194 |         rb[rb < 0] = 0
195 | 
196 |         return lt, lb, rt, rb
197 | 
198 |     def img_preprocess(self, img, resize_shape=[800, 800]):
199 |         im, new_w, new_h, left, top = ResizePad(img, resize_shape[0])
200 |         im = im / 255.0
201 |         im = im.transpose((2, 0, 1)).copy()
202 |         im = im[None, :].astype("float32")
203 |         return im, new_h, new_w, left, top
204 | 
205 | 
206 | class PPLCNet:
207 |     def __init__(self, model_path):
208 |         self.model = cv2.dnn.readNet(model_path)
209 |         self.resize_shape = [624, 624]
210 |         self.outlayer_names = self.model.getUnconnectedOutLayersNames()
211 | 
212 |     def infer(self, img, **kwargs):
213 |         img = self.img_preprocess(img, self.resize_shape)
214 |         self.model.setInput(img)
215 |         label= self.model.forward(self.outlayer_names)[0]
216 |         label = label[None, :]
217 |         mini_batch_result = np.argsort(label)
218 |         mini_batch_result = mini_batch_result[0][-1]  # 把这些列标拿出来
219 |         mini_batch_result = mini_batch_result.flatten()  # 拉平了，只吐出一个 array
220 |         mini_batch_result = mini_batch_result[::-1]  # 逆序
221 |         pred_label = mini_batch_result[0]
222 |         return pred_label
223 | 
224 |     def img_preprocess(self, img, resize_shape=[624, 624]):
225 |         im, new_w, new_h, left, top = ResizePad(img, resize_shape[0])
226 |         im = np.array(im).transpose((2, 0, 1)) / 255.0
227 |         return im[None, :].astype("float32")
228 | 


--------------------------------------------------------------------------------
/python/utils.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import itertools
  3 | import cv2
  4 | import numpy as np
  5 | 
  6 | 
  7 | def generate_scale(im, resize_shape, keep_ratio):
  8 |     """
  9 |     Args:
 10 |         im (np.ndarray): image (np.ndarray)
 11 |     Returns:
 12 |         im_scale_x: the resize ratio of X
 13 |         im_scale_y: the resize ratio of Y
 14 |     """
 15 |     target_size = (resize_shape[0], resize_shape[1])
 16 |     # target_size = (800, 1333)
 17 |     origin_shape = im.shape[:2]
 18 | 
 19 |     if keep_ratio:
 20 |         im_size_min = np.min(origin_shape)
 21 |         im_size_max = np.max(origin_shape)
 22 |         target_size_min = np.min(target_size)
 23 |         target_size_max = np.max(target_size)
 24 |         im_scale = float(target_size_min) / float(im_size_min)
 25 |         if np.round(im_scale * im_size_max) > target_size_max:
 26 |             im_scale = float(target_size_max) / float(im_size_max)
 27 |         im_scale_x = im_scale
 28 |         im_scale_y = im_scale
 29 |     else:
 30 |         resize_h, resize_w = target_size
 31 |         im_scale_y = resize_h / float(origin_shape[0])
 32 |         im_scale_x = resize_w / float(origin_shape[1])
 33 |     return im_scale_y, im_scale_x
 34 | 
 35 | 
 36 | def resize(im, im_info, resize_shape, keep_ratio, interp=2):
 37 |     im_scale_y, im_scale_x = generate_scale(im, resize_shape, keep_ratio)
 38 |     im = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=interp)
 39 |     im_info["im_shape"] = np.array(im.shape[:2]).astype("float32")
 40 |     im_info["scale_factor"] = np.array([im_scale_y, im_scale_x]).astype("float32")
 41 | 
 42 |     return im, im_info
 43 | 
 44 | 
 45 | def pad(im, im_info, resize_shape):
 46 |     im_h, im_w = im.shape[:2]
 47 |     fill_value = [114.0, 114.0, 114.0]
 48 |     h, w = resize_shape[0], resize_shape[1]
 49 |     if h == im_h and w == im_w:
 50 |         im = im.astype(np.float32)
 51 |         return im, im_info
 52 | 
 53 |     canvas = np.ones((h, w, 3), dtype=np.float32)
 54 |     canvas *= np.array(fill_value, dtype=np.float32)
 55 |     canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
 56 |     im = canvas
 57 |     return im, im_info
 58 | 
 59 | 
 60 | def ResizePad(img, target_size):
 61 |     h, w = img.shape[:2]
 62 |     m = max(h, w)
 63 |     ratio = target_size / m
 64 |     new_w, new_h = int(ratio * w), int(ratio * h)
 65 |     img = cv2.resize(img, (new_w, new_h), cv2.INTER_LINEAR)
 66 |     top = (target_size - new_h) // 2
 67 |     bottom = (target_size - new_h) - top
 68 |     left = (target_size - new_w) // 2
 69 |     right = (target_size - new_w) - left
 70 |     img1 = cv2.copyMakeBorder(
 71 |         img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)
 72 |     )
 73 |     return img1, new_w, new_h, left, top
 74 | 
 75 | 
 76 | def get_mini_boxes(contour):
 77 |     bounding_box = cv2.minAreaRect(contour)
 78 |     points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
 79 | 
 80 |     index_1, index_2, index_3, index_4 = 0, 1, 2, 3
 81 |     if points[1][1] > points[0][1]:
 82 |         index_1 = 0
 83 |         index_4 = 1
 84 |     else:
 85 |         index_1 = 1
 86 |         index_4 = 0
 87 |     if points[3][1] > points[2][1]:
 88 |         index_2 = 2
 89 |         index_3 = 3
 90 |     else:
 91 |         index_2 = 3
 92 |         index_3 = 2
 93 | 
 94 |     box = [points[index_1], points[index_2], points[index_3], points[index_4]]
 95 |     return box, min(bounding_box[1])
 96 | 
 97 | 
 98 | def minboundquad(hull):
 99 |     len_hull = len(hull)
100 |     xy = np.array(hull).reshape([-1, 2])
101 |     idx = np.arange(0, len_hull)
102 |     idx_roll = np.roll(idx, -1, axis=0)
103 |     edges = np.array([idx, idx_roll]).reshape([2, -1])
104 |     edges = np.transpose(edges, [1, 0])
105 |     edgeangles1 = []
106 |     for i in range(len_hull):
107 |         y = xy[edges[i, 1], 1] - xy[edges[i, 0], 1]
108 |         x = xy[edges[i, 1], 0] - xy[edges[i, 0], 0]
109 |         angle = math.atan2(y, x)
110 |         if angle < 0:
111 |             angle = angle + 2 * math.pi
112 |         edgeangles1.append([angle, i])
113 |     edgeangles1_idx = sorted(list(edgeangles1), key=lambda x: x[0])
114 |     edges1 = []
115 |     edgeangle1 = []
116 |     for item in edgeangles1_idx:
117 |         idx = item[1]
118 |         edges1.append(edges[idx, :])
119 |         edgeangle1.append(item[0])
120 |     edgeangles = np.array(edgeangle1)
121 |     edges = np.array(edges1)
122 |     eps = 2.2204e-16
123 |     angletol = eps * 100
124 | 
125 |     k = np.diff(edgeangles) < angletol
126 |     idx = np.where(k == 1)
127 |     edges = np.delete(edges, idx, 0)
128 |     edgeangles = np.delete(edgeangles, idx, 0)
129 |     nedges = edges.shape[0]
130 |     edgelist = np.array(nchoosek(0, nedges - 1, 1, 4))
131 |     k = edgeangles[edgelist[:, 3]] - edgeangles[edgelist[:, 0]] <= math.pi
132 |     k_idx = np.where(k == 1)
133 |     edgelist = np.delete(edgelist, k_idx, 0)
134 | 
135 |     nquads = edgelist.shape[0]
136 |     quadareas = math.inf
137 |     qxi = np.zeros([5])
138 |     qyi = np.zeros([5])
139 |     cnt = np.zeros([4, 1, 2])
140 |     edgelist = list(edgelist)
141 |     edges = list(edges)
142 |     xy = list(xy)
143 | 
144 |     for i in range(nquads):
145 |         edgeind = list(edgelist[i])
146 |         edgeind.append(edgelist[i][0])
147 |         edgesi = []
148 |         edgeang = []
149 |         for idx in edgeind:
150 |             edgesi.append(edges[idx])
151 |             edgeang.append(edgeangles[idx])
152 |         is_continue = False
153 |         for idx in range(len(edgeang) - 1):
154 |             diff = edgeang[idx + 1] - edgeang[idx]
155 |             if diff > math.pi:
156 |                 is_continue = True
157 |         if is_continue:
158 |             continue
159 |         for j in range(4):
160 |             jplus1 = j + 1
161 |             shared = np.intersect1d(edgesi[j], edgesi[jplus1])
162 |             if shared.size != 0:
163 |                 qxi[j] = xy[shared[0]][0]
164 |                 qyi[j] = xy[shared[0]][1]
165 |             else:
166 |                 A = xy[edgesi[j][0]]
167 |                 B = xy[edgesi[j][1]]
168 |                 C = xy[edgesi[jplus1][0]]
169 |                 D = xy[edgesi[jplus1][1]]
170 |                 concat = np.hstack(((A - B).reshape([2, -1]), (D - C).reshape([2, -1])))
171 |                 div = (A - C).reshape([2, -1])
172 |                 inv_result = get_inv(concat)
173 |                 a = inv_result[0, 0]
174 |                 b = inv_result[0, 1]
175 |                 c = inv_result[1, 0]
176 |                 d = inv_result[1, 1]
177 |                 e = div[0, 0]
178 |                 f = div[1, 0]
179 |                 ts1 = [a * e + b * f, c * e + d * f]
180 |                 Q = A + (B - A) * ts1[0]
181 |                 qxi[j] = Q[0]
182 |                 qyi[j] = Q[1]
183 | 
184 |         contour = np.array([qxi[:4], qyi[:4]]).astype(np.int32)
185 |         contour = np.transpose(contour, [1, 0])
186 |         contour = contour[:, np.newaxis, :]
187 |         A_i = cv2.contourArea(contour)
188 |         # break
189 | 
190 |         if A_i < quadareas:
191 |             quadareas = A_i
192 |             cnt = contour
193 |     return cnt
194 | 
195 | 
196 | def nchoosek(startnum, endnum, step=1, n=1):
197 |     c = []
198 |     for i in itertools.combinations(range(startnum, endnum + 1, step), n):
199 |         c.append(list(i))
200 |     return c
201 | 
202 | 
203 | def get_inv(concat):
204 |     a = concat[0][0]
205 |     b = concat[0][1]
206 |     c = concat[1][0]
207 |     d = concat[1][1]
208 |     det_concat = a * d - b * c
209 |     inv_result = np.array(
210 |         [[d / det_concat, -b / det_concat], [-c / det_concat, a / det_concat]]
211 |     )
212 |     return inv_result
213 | 
214 | 
215 | def get_max_adjacent_bbox(mask):
216 |     contours, _ = cv2.findContours((mask * 255).astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
217 |     max_size = 0
218 |     cnt_save = None
219 |     # 找到最大边缘邻接矩形
220 |     for cont in contours:
221 |         points, sside = get_mini_boxes(cont)
222 |         if sside > max_size:
223 |             max_size = sside
224 |             cnt_save = cont
225 |     if cnt_save is not None:
226 |         epsilon = 0.01 * cv2.arcLength(cnt_save, True)
227 |         box = cv2.approxPolyDP(cnt_save, epsilon, True)
228 |         hull = cv2.convexHull(box)
229 |         points, sside = get_mini_boxes(cnt_save)
230 |         len_hull = len(hull)
231 | 
232 |         if len_hull == 4:
233 |             target_box = np.array(hull)
234 |         elif len_hull > 4:
235 |             target_box = minboundquad(hull)  ####用shapely库里面的polygon.convex_hull也能做到
236 |         else:
237 |             target_box = np.array(points)
238 | 
239 |         return np.array(target_box).reshape([-1, 2])
240 |     else:
241 |         return None
242 | 
243 | 
244 | def sigmoid(x):
245 |     return 1 / (1 + np.exp(-x))
246 | 
247 | 
248 | def calculate_iou(box, other_boxes):
249 |     """
250 |     计算给定边界框与一组其他边界框之间的交并比（IoU）。
251 | 
252 |     参数：
253 |     - box: 单个边界框，格式为 [x1, y1, width, height]。
254 |     - other_boxes: 其他边界框的数组，每个边界框的格式也为 [x1, y1, width, height]。
255 | 
256 |     返回值：
257 |     - iou: 一个数组，包含给定边界框与每个其他边界框的IoU值。
258 |     """
259 | 
260 |     # 计算交集的左上角坐标
261 |     x1 = np.maximum(box[0], np.array(other_boxes)[:, 0])
262 |     y1 = np.maximum(box[1], np.array(other_boxes)[:, 1])
263 |     # 计算交集的右下角坐标
264 |     x2 = np.minimum(box[2], np.array(other_boxes)[:, 2])
265 |     y2 = np.minimum(box[3], np.array(other_boxes)[:, 3])
266 |     # 计算交集区域的面积
267 |     intersection_area = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1)
268 |     # 计算给定边界框的面积
269 |     box_area = (box[2] - box[0]) * (box[3] - box[1])
270 |     # 计算其他边界框的面积
271 |     other_boxes_area = np.array(other_boxes[:, 2] - other_boxes[:, 0]) * np.array(
272 |         other_boxes[:, 3] - other_boxes[:, 1]
273 |     )
274 |     # 计算IoU值
275 |     iou = intersection_area / (box_area + other_boxes_area - intersection_area)
276 |     return iou
277 | 
278 | 
279 | def custom_NMSBoxes(boxes, scores, iou_threshold=0.4):
280 |     # 如果没有边界框，则直接返回空列表
281 |     if len(boxes) == 0:
282 |         return []
283 |     # 将得分和边界框转换为NumPy数组
284 |     scores = np.array(scores)
285 |     boxes = np.array(boxes)
286 |     # 根据置信度阈值过滤边界框
287 |     # filtered_boxes = boxes[mask]
288 |     # filtered_scores = scores[mask]
289 |     # 如果过滤后没有边界框，则返回空列表
290 |     if len(boxes) == 0:
291 |         return []
292 |     # 根据置信度得分对边界框进行排序
293 |     sorted_indices = np.argsort(scores)[::-1]
294 |     # 初始化一个空列表来存储选择的边界框索引
295 |     indices = []
296 |     # 当还有未处理的边界框时，循环继续
297 |     while len(sorted_indices) > 0:
298 |         # 选择得分最高的边界框索引
299 |         current_index = sorted_indices[0]
300 |         indices.append(current_index)
301 |         # 如果只剩一个边界框，则结束循环
302 |         if len(sorted_indices) == 1:
303 |             break
304 |         # 获取当前边界框和其他边界框
305 |         current_box = boxes[current_index]
306 |         other_boxes = boxes[sorted_indices[1:]]
307 |         # 计算当前边界框与其他边界框的IoU
308 |         iou = calculate_iou(current_box, other_boxes)
309 |         # 找到IoU低于阈值的边界框，即与当前边界框不重叠的边界框
310 |         non_overlapping_indices = np.where(iou <= iou_threshold)[0]
311 |         # 更新sorted_indices以仅包含不重叠的边界框
312 |         sorted_indices = sorted_indices[non_overlapping_indices + 1]
313 |     # 返回选择的边界框索引
314 |     return indices
315 | 
316 | 
317 | def visuallize(img, box, lt, rt, rb, lb):
318 |     xmin, ymin, xmax, ymax = box
319 |     draw_box = np.array([lt, rt, rb, lb]).reshape([-1, 2])
320 |     cv2.circle(img, (int(lt[0]), int(lt[1])), 50, (255, 0, 0), 10)
321 |     cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 0, 0), 10)
322 |     cv2.polylines(
323 |         img,
324 |         [np.array(draw_box).astype(np.int32).reshape((-1, 1, 2))],
325 |         True,
326 |         color=(255, 0, 255),
327 |         thickness=6,
328 |     )
329 |     return img
330 | 
331 | 
332 | def extract_table_img(img, lt, rt, rb, lb):
333 |     """
334 |     根据四个角点进行透视变换，并提取出角点区域的图片。
335 | 
336 |     参数:
337 |     img (numpy.ndarray): 输入图像
338 |     lt (numpy.ndarray): 左上角坐标
339 |     rt (numpy.ndarray): 右上角坐标
340 |     lb (numpy.ndarray): 左下角坐标
341 |     rb (numpy.ndarray): 右下角坐标
342 | 
343 |     返回:
344 |     numpy.ndarray: 提取出的角点区域图片
345 |     """
346 |     # 源点坐标
347 |     src_points = np.float32([lt, rt, lb, rb])
348 | 
349 |     # 目标点坐标
350 |     width_a = np.sqrt(((rb[0] - lb[0]) ** 2) + ((rb[1] - lb[1]) ** 2))
351 |     width_b = np.sqrt(((rt[0] - lt[0]) ** 2) + ((rt[1] - lt[1]) ** 2))
352 |     max_width = max(int(width_a), int(width_b))
353 | 
354 |     height_a = np.sqrt(((rt[0] - rb[0]) ** 2) + ((rt[1] - rb[1]) ** 2))
355 |     height_b = np.sqrt(((lt[0] - lb[0]) ** 2) + ((lt[1] - lb[1]) ** 2))
356 |     max_height = max(int(height_a), int(height_b))
357 | 
358 |     dst_points = np.float32(
359 |         [
360 |             [0, 0],
361 |             [max_width - 1, 0],
362 |             [0, max_height - 1],
363 |             [max_width - 1, max_height - 1],
364 |         ]
365 |     )
366 | 
367 |     # 获取透视变换矩阵
368 |     M = cv2.getPerspectiveTransform(src_points, dst_points)
369 | 
370 |     # 应用透视变换
371 |     warped = cv2.warpPerspective(img, M, (max_width, max_height))
372 | 
373 |     return warped
374 | 


--------------------------------------------------------------------------------