├── README.md ├── cpp ├── main.cpp ├── predictor.cpp ├── predictor.h ├── utils.cpp └── utils.h ├── python ├── main.py └── utils.py └── testimgs ├── demo.jpg ├── demo1.jpg ├── demo2.jpg ├── demo3.jpg ├── demo4.jpg ├── demo5.jpg └── demo6.jpg /README.md: -------------------------------------------------------------------------------- 1 | 源码在魔塔社区 https://modelscope.cn/models/iic/cv_resnet18_card_correction 2 | 3 | 下班后回家也没事干，索性留在公司加班，导出onnx写一下部署程序。 4 | 5 | onnx文件，链接: https://pan.baidu.com/s/19AcZ-Rg6vC9-vJTPx3e0wQ 提取码: 2k8m 6 | -------------------------------------------------------------------------------- /cpp/main.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "predictor.h" 4 | 5 | using namespace std; 6 | using namespace cv; 7 | 8 | int main() 9 | { 10 | string imgpath = "./testimgs/demo3.jpg"; ////图片路径要写正确 11 | string modelpath = "./cv_resnet18_card_correction.onnx"; 12 | 13 | cv_resnet18_card_correction mynet(modelpath); 14 | Mat srcimg = imread(imgpath); 15 | myDict out = mynet.infer(srcimg); 16 | 17 | draw_show_img(srcimg.clone(), out, "show.jpg"); 18 | vector sub_imgs = std::get>(out["OUTPUT_IMGS"]); 19 | sub_imgs.insert(sub_imgs.begin(), srcimg); 20 | merge_images_horizontal(sub_imgs, "pp4_rotate_show.jpg"); 21 | 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /cpp/predictor.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "predictor.h" 3 | 4 | 5 | using namespace std; 6 | using namespace cv; 7 | using namespace dnn; 8 | 9 | 10 | cv_resnet18_card_correction::cv_resnet18_card_correction(const string model_path) 11 | { 12 | this->model = readNet(model_path); 13 | this->outlayer_names = this->model.getUnconnectedOutLayersNames(); 14 | } 15 | 16 | Mat cv_resnet18_card_correction::preprocess(const Mat& srcimg) 17 | { 18 | Mat img; 19 | int new_w, new_h, left, top; 20 | img = ResizePad(srcimg, this->resize_shape[0], new_w, new_h, left, top); 21 | 22 | vector bgrChannels(3); 23 | split(img, bgrChannels); 24 | for (int c = 0; c < 3; c++) 25 | { 26 | bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1.0 / (255.0* std_[c]), (0.0 - mean_[c]) / std_[c]); 27 | } 28 | Mat m_normalized_mat; 29 | merge(bgrChannels, m_normalized_mat); 30 | 31 | Mat blob = blobFromImage(m_normalized_mat); 32 | return blob; 33 | } 34 | 35 | myDict cv_resnet18_card_correction::infer(const Mat& srcimg) 36 | { 37 | const int ori_h = srcimg.rows; 38 | const int ori_w = srcimg.cols; 39 | this->c[0] = (float)ori_w / 2.f; 40 | this->c[1] = (float)ori_h / 2.f; 41 | this->s = std::max(ori_h, ori_w) * 1.f; 42 | Mat blob = this->preprocess(srcimg); 43 | 44 | this->model.setInput(blob); 45 | std::vector pre_out; 46 | this->model.forward(pre_out, this->outlayer_names); 47 | 48 | myDict out = this->postprocess(pre_out, srcimg); 49 | return out; 50 | } 51 | 52 | 53 | static Mat sigmoid(Mat x) 54 | { 55 | Mat y; 56 | cv::exp(-x, y); 57 | y = 1.f / (1 + y); 58 | return y; 59 | } 60 | 61 | Mat cv_resnet18_card_correction::crop_image(const Mat& img, const vector& position) 62 | { 63 | float img_width = distance((position[0].x + position[3].x)*0.5, (position[0].y + position[3].y)*0.5, (position[1].x + position[2].x)*0.5, (position[1].y + position[2].y)*0.5); 64 | float img_height = distance((position[0].x + position[1].x)*0.5, (position[0].y + position[1].y)*0.5, (position[2].x + position[3].x)*0.5, (position[2].y + position[3].y)*0.5); 65 | 66 | vector corners_trans = { Point2f(0,0), Point2f(img_width, 0), Point2f(img_width, img_height), Point2f(0, img_height) }; 67 | 68 | Mat transform = cv::getPerspectiveTransform(position, corners_trans); 69 | Mat dst; 70 | cv::warpPerspective(img, dst, transform, Size(int(img_width), int(img_height))); 71 | return dst; 72 | } 73 | 74 | myDict cv_resnet18_card_correction::postprocess(const std::vector& output, const cv::Mat& image) 75 | { 76 | Mat reg = output[3]; ////shape: (1, 2, 192, 192) 77 | Mat wh = output[2]; ////shape: (1, 8, 192, 192) 78 | Mat hm = sigmoid(output[4]); ////shape: (1, 1, 192, 192) 79 | Mat angle_cls = sigmoid(output[0]); ////shape: (1, 4, 192, 192) 80 | Mat ftype_cls = sigmoid(output[1]); ////shape: (1, 2, 192, 192) 81 | 82 | std::tuple> outs = bbox_decode(hm, wh, reg, this->K); 83 | Mat bbox = get<0>(outs); 84 | vector inds = get<1>(outs); 85 | angle_cls = decode_by_ind(angle_cls, inds, this->K); 86 | ftype_cls = decode_by_ind(ftype_cls, inds, this->K); 87 | 88 | for (int i = 0; i < bbox.size[1]; i++) 89 | { 90 | bbox.ptr(0, i)[9] = angle_cls.ptr(0)[i]; 91 | bbox.ptr(0, i)[12] = ftype_cls.ptr(0)[i]; 92 | } 93 | 94 | bbox_post_process(bbox, this->c, this->s, this->out_height, this->out_width); 95 | 96 | vector> res; 97 | vector angle; 98 | vector sub_imgs; 99 | vector> corner_left_right; 100 | vector ftype; 101 | vector score; 102 | vector center; 103 | for (int i = 0; i < bbox.size[0]; i++) 104 | { 105 | if (bbox.ptr(i)[8] > this->obj_score) 106 | { 107 | const int angle_data = int(bbox.ptr(i)[9]); 108 | angle.emplace_back(angle_data); 109 | vector box8point(4); 110 | int min_x = 10000, min_y = 10000; 111 | int max_x = -10000, max_y = -10000; 112 | for (int j = 0; j < 4; j += 1) 113 | { 114 | const float x = bbox.ptr(i)[2 * j]; 115 | const float y = bbox.ptr(i)[2 * j + 1]; 116 | box8point[j] = { x,y }; 117 | min_x = std::min((int)x, min_x); 118 | min_y = std::min((int)y, min_y); 119 | max_x = std::max((int)x, max_x); 120 | max_y = std::max((int)y, max_y); 121 | } 122 | vector corner_left_right_data = { min_x, min_y, max_x, max_y }; 123 | corner_left_right.emplace_back(corner_left_right_data); 124 | res.emplace_back(box8point); 125 | Mat sub_img = this->crop_image(image, box8point); 126 | if (angle_data == 1) 127 | { 128 | cv::rotate(sub_img, sub_img, 2); 129 | } 130 | if (angle_data == 2) 131 | { 132 | cv::rotate(sub_img, sub_img, 1); 133 | } 134 | if (angle_data == 3) 135 | { 136 | cv::rotate(sub_img, sub_img, 0); 137 | } 138 | sub_imgs.emplace_back(sub_img); 139 | ftype.emplace_back(int(bbox.ptr(i)[12])); 140 | score.emplace_back(bbox.ptr(i)[8]); 141 | center.emplace_back(Point2f(bbox.ptr(i)[10], bbox.ptr(i)[11])); 142 | } 143 | } 144 | myDict result; 145 | result["POLYGONS"] = res; 146 | result["BBOX"] = corner_left_right; 147 | result["SCORES"] = score; 148 | result["OUTPUT_IMGS"] = sub_imgs; 149 | result["LABELS"] = angle; 150 | result["LAYOUT"] = ftype; 151 | result["CENTER"] = center; 152 | return result; 153 | } -------------------------------------------------------------------------------- /cpp/predictor.h: -------------------------------------------------------------------------------- 1 | #ifndef PREDICTOR_H 2 | #define PREDICTOR_H 3 | #include "utils.h" 4 | #include 5 | 6 | 7 | class cv_resnet18_card_correction 8 | { 9 | public: 10 | cv_resnet18_card_correction(const std::string model_path); 11 | myDict infer(const cv::Mat& srcimg); 12 | private: 13 | const int resize_shape[2] = { 768, 768 }; 14 | const float mean_[3] = { 0.408, 0.447, 0.470 }; 15 | const float std_[3] = { 0.289, 0.274, 0.278 }; 16 | const int K = 10; 17 | const float obj_score = 0.5; 18 | float c[2]; 19 | float s; 20 | const int out_height = int(resize_shape[0] / 4); 21 | const int out_width = int(resize_shape[1] / 4); 22 | cv::Mat preprocess(const cv::Mat& srcimg); 23 | myDict postprocess(const std::vector& output, const cv::Mat& image); 24 | cv::Mat crop_image(const cv::Mat& img, const std::vector& position); 25 | 26 | std::vector outlayer_names; 27 | cv::dnn::Net model; 28 | }; 29 | 30 | inline float distance(float x1, float y1, float x2, float y2) 31 | { 32 | return sqrt(powf(x1 - x2, 2) + powf(y1 - y2, 2)); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /cpp/utils.cpp: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | 3 | 4 | using namespace std; 5 | using namespace cv; 6 | 7 | 8 | Mat ResizePad(const Mat& img, const int target_size, int& new_w, int& new_h, int& left, int& top) 9 | { 10 | const int h = img.rows; 11 | const int w = img.cols; 12 | const int m = max(h, w); 13 | const float ratio = (float)target_size / (float)m; 14 | new_w = int(ratio * w); 15 | new_h = int(ratio * h); 16 | Mat dstimg; 17 | resize(img, dstimg, Size(new_w, new_h), 0, 0, INTER_LINEAR); 18 | top = (target_size - new_h) / 2; 19 | int bottom = (target_size - new_h) - top; 20 | left = (target_size - new_w) / 2; 21 | int right = (target_size - new_w) - left; 22 | copyMakeBorder(dstimg, dstimg, top, bottom, left, right, BORDER_CONSTANT, Scalar(0, 0, 0)); 23 | return dstimg; 24 | } 25 | 26 | static int max_pooling(const float *input, float *output, const int inputHeight, const int inputWidth, const int outputHeight, const int outputWidth, const int kernel_h, const int kernel_w, const int outChannels, const int pad_h, const int pad_w, const int stride_h, const int stride_w, float* keep_data) 27 | { 28 | int i = 0, r = 0, c = 0; 29 | int hstart = 0, wstart = 0, hend = 0, wend = 0; 30 | int h = 0, w = 0, pool_index = 0, index = 0; 31 | 32 | const float *pin = NULL; 33 | float *py = NULL; 34 | float* pkeep = NULL; 35 | const float flt_max = 100000; 36 | std::fill(output, output + outChannels * outputHeight * outputWidth, -flt_max); 37 | for (i = 0; i < outChannels; i++) 38 | { 39 | py = output + i * outputHeight * outputWidth; 40 | pin = input + i * inputHeight * inputWidth; 41 | pkeep = keep_data + i * outputHeight * outputWidth; 42 | for (r = 0; r < outputHeight; r++) 43 | for (c = 0; c < outputWidth; c++) 44 | { 45 | hstart = r * stride_h - pad_h; 46 | wstart = c * stride_w - pad_w; 47 | hend = std::min(hstart + kernel_h, inputHeight); 48 | wend = std::min(wstart + kernel_w, inputWidth); 49 | hstart = std::max(hstart, 0); 50 | wstart = std::max(wstart, 0); 51 | pool_index = r * outputWidth + c; 52 | for (h = hstart; h < hend; ++h) 53 | for (w = wstart; w < wend; ++w) 54 | { 55 | index = h * inputWidth + w; 56 | if (pin[index] > py[pool_index]) 57 | { 58 | py[pool_index] = pin[index]; 59 | } 60 | } 61 | if (py[pool_index] == pin[pool_index]) 62 | { 63 | pkeep[pool_index] = 1; 64 | } 65 | } 66 | } 67 | return 0; 68 | } 69 | 70 | static void _nms(Mat& heat) 71 | { 72 | const int kernel = 3; 73 | const int pad = (kernel - 1) / 2; 74 | const int stride = 1; 75 | Mat hmax = heat.clone().setTo(0.f); 76 | const int chan = heat.size[1]; 77 | const int inp_h = heat.size[2]; 78 | const int inp_w = heat.size[3]; 79 | Mat keep = heat.clone().setTo(0.f); 80 | max_pooling((float*)heat.data, (float*)hmax.data, inp_h, inp_w, inp_h, inp_w, kernel, kernel, chan, pad, pad, stride, stride, (float*)keep.data); 81 | 82 | const vector shape = { heat.size[2] , heat.size[3] }; 83 | heat = heat.reshape(0, shape); 84 | keep = keep.reshape(0, shape); 85 | heat = heat.mul(keep); 86 | } 87 | 88 | static void topk_index(const float* vec, const int len, std::vector& topK, std::vector& topKIndex, const int topk) 89 | { 90 | topK.clear(); 91 | topKIndex.clear(); 92 | std::vector vec_index(len); 93 | std::iota(vec_index.begin(), vec_index.end(), 0); 94 | 95 | std::sort(vec_index.begin(), vec_index.end(), [&vec](size_t index_1, size_t index_2) 96 | { return vec[index_1] > vec[index_2]; }); 97 | 98 | int k_num = std::min(len, topk); 99 | topKIndex.resize(k_num); 100 | topK.resize(k_num); 101 | for (int i = 0; i < k_num; ++i) 102 | { 103 | const int ind = vec_index[i]; 104 | topKIndex[i] = ind; 105 | topK[i] = vec[ind]; 106 | } 107 | } 108 | 109 | static void _gather_feat(cv::Mat& feat, const vector& ind) 110 | { 111 | //cout << "feat.type():" << feat.type() << endl; 112 | const int dtype = feat.type(); 113 | const int ndims = feat.size.dims(); 114 | vector newsz(ndims); 115 | for (int i = 0; i < ndims; i++) 116 | { 117 | newsz[i] = feat.size[i]; 118 | } 119 | newsz[1] = ind.size(); 120 | Mat new_feat; 121 | new_feat = Mat(newsz, CV_32FC1); 122 | 123 | for (int i = 0; i < newsz[1]; i++) 124 | { 125 | const int idx = ind[i]; 126 | for (int j = 0; j < newsz[2]; j++) 127 | { 128 | new_feat.ptr(0, i)[j] = feat.ptr(0, idx)[j]; 129 | } 130 | } 131 | new_feat.copyTo(feat); 132 | new_feat.release(); 133 | } 134 | 135 | 136 | static std::tuple, vector, Mat, Mat> _topk(const Mat& scores, const int K) 137 | { 138 | const int height = scores.size[0]; 139 | const int width = scores.size[1]; 140 | const int len = height * width; 141 | 142 | vector topk_scores; 143 | vector topk_inds; 144 | topk_index((float*)scores.data, len, topk_scores, topk_inds, K); 145 | 146 | int num = topk_inds.size(); 147 | vector topk_ys(num); 148 | vector topk_xs(num); 149 | for (int i = 0; i < num; i++) 150 | { 151 | topk_inds[i] = topk_inds[i] % len; 152 | topk_ys[i] = (float)topk_inds[i] / width; 153 | topk_xs[i] = float(topk_inds[i] % width); 154 | } 155 | 156 | vector topk_score; 157 | vector topk_ind; 158 | topk_index(topk_scores.data(), num, topk_score, topk_ind, K); 159 | num = topk_ind.size(); 160 | vector topk_clses(num); 161 | 162 | for (int i = 0; i < num; i++) 163 | { 164 | topk_clses[i] = int(topk_ind[i] / K); 165 | } 166 | 167 | num = int(topk_inds.size()); 168 | const vector newsz = { 1, num, 1 }; 169 | for(int i=0;i out_size = { 1, K }; 174 | Mat topk_ys_mat = Mat(newsz, CV_32FC1, topk_ys.data()); 175 | 176 | _gather_feat(topk_ys_mat, topk_ind); 177 | topk_ys_mat = topk_ys_mat.reshape(0, out_size).clone(); 178 | Mat topk_xs_mat = Mat(newsz, CV_32FC1, topk_xs.data()); 179 | _gather_feat(topk_xs_mat, topk_ind); 180 | topk_xs_mat = topk_xs_mat.reshape(0, out_size).clone(); 181 | 182 | Mat topk_score_mat = Mat(out_size, CV_32FC1, topk_score.data()); 183 | 184 | return std::make_tuple(topk_score_mat.clone(), topk_inds, topk_clses, topk_ys_mat.clone(), topk_xs_mat.clone()); 185 | } 186 | 187 | static void _tranpose_and_gather_feat(cv::Mat& feat, const vector& ind) 188 | { 189 | Mat new_feat; 190 | cv::transposeND(feat, { 0, 2, 3, 1 }, new_feat); 191 | const vector newsz = { new_feat.size[0], new_feat.size[1] * new_feat.size[2], new_feat.size[3] }; 192 | new_feat = new_feat.reshape(0, newsz); 193 | _gather_feat(new_feat, ind); 194 | new_feat.copyTo(feat); 195 | new_feat.release(); 196 | } 197 | 198 | std::tuple> bbox_decode(cv::Mat& heat, cv::Mat& wh, cv::Mat& reg, const int K) 199 | { 200 | _nms(heat); 201 | 202 | std::tuple, vector, Mat, Mat> outs = _topk(heat, K); 203 | Mat scores = get<0>(outs).clone(); 204 | vector inds = get<1>(outs); 205 | vector clses = get<2>(outs); 206 | Mat ys = get<3>(outs); 207 | Mat xs = get<4>(outs); 208 | 209 | if (!reg.empty()) 210 | { 211 | _tranpose_and_gather_feat(reg, inds); 212 | reg = reg.reshape(0, { K,2 }); 213 | xs = xs.reshape(0, { K, 1 }) + reg.col(0); 214 | ys = ys.reshape(0, { K, 1 }) + reg.col(1); 215 | 216 | reg = reg.reshape(0, {1, K,2 }); 217 | xs = xs.reshape(0, { 1, K, 1 }); 218 | ys = ys.reshape(0, { 1, K, 1 }); 219 | } 220 | else 221 | { 222 | xs = xs.reshape(0, { 1, K, 1 }) + 0.5; 223 | ys = ys.reshape(0, { 1, K, 1 }) + 0.5; 224 | } 225 | _tranpose_and_gather_feat(wh, inds); 226 | wh = wh.reshape(0, { 1, K, 8 }); 227 | scores = scores.reshape(0, { 1, K, 1 }); 228 | 229 | const vector newshape = { 1, K, wh.size[2] + scores.size[2] + 1 + xs.size[2] + ys.size[2] + 1 }; 230 | Mat detections = Mat(newshape, CV_32FC1); 231 | for (int i = 0; i < K; i++) 232 | { 233 | const float x = xs.ptr(0, i)[0]; 234 | const float y = ys.ptr(0, i)[0]; 235 | detections.ptr(0, i)[0] = x - wh.ptr(0, i)[0]; 236 | detections.ptr(0, i)[1] = y - wh.ptr(0, i)[1]; 237 | detections.ptr(0, i)[2] = x - wh.ptr(0, i)[2]; 238 | detections.ptr(0, i)[3] = y - wh.ptr(0, i)[3]; 239 | detections.ptr(0, i)[4] = x - wh.ptr(0, i)[4]; 240 | detections.ptr(0, i)[5] = y - wh.ptr(0, i)[5]; 241 | detections.ptr(0, i)[6] = x - wh.ptr(0, i)[6]; 242 | detections.ptr(0, i)[7] = y - wh.ptr(0, i)[7]; 243 | detections.ptr(0, i)[8] = scores.ptr(0, i)[0]; 244 | detections.ptr(0, i)[9] = (float)clses[i]; 245 | detections.ptr(0, i)[10] = x; 246 | detections.ptr(0, i)[11] = y; 247 | } 248 | 249 | return std::make_tuple(detections.clone(), inds); 250 | } 251 | 252 | Mat decode_by_ind(const cv::Mat& heat, const vector& inds, const int K) 253 | { 254 | Mat score = heat.clone(); 255 | _tranpose_and_gather_feat(score, inds); 256 | score = score.reshape(0, { K, heat.size[1] }); 257 | Mat Type = Mat(1, K, CV_32FC1); 258 | for (int i = 0; i < K; i++) 259 | { 260 | Mat row_ = score.row(i); 261 | double max_socre;; 262 | Point classIdPoint; 263 | cv::minMaxLoc(row_, 0, &max_socre, 0, &classIdPoint); 264 | Type.ptr(0)[i] = (float)max_socre; 265 | } 266 | return Type; 267 | } 268 | 269 | static void get_dir(const float* src_point, const float rot_rad, float* src_result) 270 | { 271 | float sn = sinf(rot_rad); 272 | float cs = cosf(rot_rad); 273 | 274 | src_result[0] = src_point[0] * cs - src_point[1] * sn; 275 | src_result[1] = src_point[0] * sn + src_point[1] * cs; 276 | } 277 | 278 | static void get_3rd_point(const Point2f& a, const Point2f& b, Point2f& result) 279 | { 280 | Point2f direct = { a.x - b.x, a.y - b.y }; 281 | result.x = b.x - direct.y; 282 | result.y = b.y + direct.x; 283 | } 284 | 285 | static Mat get_affine_transform(const float* center, const float scale, const int rot, const int* output_size, const int inv) 286 | { 287 | const float shift[] = { 0, 0 }; 288 | const float src_w = scale; 289 | const int dst_w = output_size[0]; 290 | const int dst_h = output_size[1]; 291 | 292 | float rot_rad = PI * rot / 180.f; 293 | float src_point[2] = { 0, src_w * -0.5f }; 294 | float src_dir[2]; 295 | get_dir(src_point, rot_rad, src_dir); 296 | float dst_dir[2] = { 0, dst_w * -0.5f }; 297 | 298 | Point2f src[3]; 299 | Point2f dst[3]; 300 | src[0] = Point2f(center[0] + scale * shift[0], center[1] + scale * shift[1]); 301 | src[1] = Point2f(center[0] + src_dir[0] + scale * shift[0], center[1] + src_dir[1] + scale * shift[1]); 302 | dst[0] = Point2f(dst_w * 0.5f, dst_h * 0.5f); 303 | dst[1] = Point2f(dst_w * 0.5f + dst_dir[0], dst_h * 0.5f + dst_dir[1]); 304 | 305 | get_3rd_point(src[0], src[1], src[2]); 306 | get_3rd_point(dst[0], dst[1], dst[2]); 307 | Mat trans; 308 | if (inv == 1) 309 | { 310 | trans = cv::getAffineTransform(dst, src); 311 | } 312 | else 313 | { 314 | trans = cv::getAffineTransform(src, dst); 315 | } 316 | return trans; 317 | } 318 | 319 | static void affine_transform(float* pt, const cv::Mat& t) 320 | { 321 | Mat new_pt = (Mat_(3, 1) << pt[0], pt[1], 1.0); 322 | Mat tmp = t * new_pt; 323 | pt[0] = (float)tmp.ptr(0)[0]; 324 | pt[1] = (float)tmp.ptr(1)[0]; 325 | } 326 | 327 | void bbox_post_process(cv::Mat& bbox, const float* c, const float s, const int h, const int w) 328 | { 329 | const int num = bbox.size[1]; 330 | const int len = bbox.size[2]; 331 | vector newshape = { num, len }; 332 | bbox = bbox.reshape(0, newshape); 333 | const int output_size[2] = { w,h }; 334 | Mat trans = get_affine_transform(c, s, 0, output_size, 1); 335 | float* pdata = (float*)bbox.data; 336 | for(int i=0;i> polys = std::get>>(result["POLYGONS"]); 350 | vector centers = std::get>(result["CENTER"]); 351 | vector angle_cls = std::get>(result["LABELS"]); 352 | vector> bbox = std::get>>(result["BBOX"]); 353 | Scalar color = Scalar(0, 0, 255); 354 | for(int i=0;i> cnts(1); 357 | for(int j=0;j images, string output_path) 372 | { 373 | int target_height = images[0].rows; 374 | for(int i=1;i resized_images(num_img); 380 | int total_width = 0; 381 | for(int i=0;i 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define PI 3.14159265358979323846 12 | 13 | ////std::variant是c++17里的特性 14 | typedef std::map>, std::vector, std::vector, std::vector>, std::vector, std::vector>> myDict; 15 | 16 | cv::Mat ResizePad(const cv::Mat& img, const int target_size, int& new_w, int& new_h, int& left, int& top); 17 | 18 | std::tuple> bbox_decode(cv::Mat& heat, cv::Mat& wh, cv::Mat& reg, const int K=100); 19 | cv::Mat decode_by_ind(const cv::Mat& heat, const std::vector& inds, const int K = 100); 20 | void bbox_post_process(cv::Mat& bbox, const float* c, const float s, const int h, const int w); 21 | 22 | void draw_show_img(cv::Mat img, myDict result, std::string savepath); 23 | void merge_images_horizontal(std::vector images, std::string output_path); 24 | 25 | #endif -------------------------------------------------------------------------------- /python/main.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import math 4 | from utils import bbox_decode, decode_by_ind, bbox_post_process, nms, draw_show_img, merge_images_horizontal 5 | 6 | 7 | def ResizePad(img, target_size): 8 | h, w = img.shape[:2] 9 | m = max(h, w) 10 | ratio = target_size / m 11 | new_w, new_h = int(ratio * w), int(ratio * h) 12 | img = cv2.resize(img, (new_w, new_h), cv2.INTER_LINEAR) 13 | top = (target_size - new_h) // 2 14 | bottom = (target_size - new_h) - top 15 | left = (target_size - new_w) // 2 16 | right = (target_size - new_w) - left 17 | img1 = cv2.copyMakeBorder( 18 | img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(0, 0, 0) 19 | ) 20 | return img1, new_w, new_h, left, top 21 | 22 | def sigmoid(x): 23 | return 1 / (1 + np.exp(-x)) 24 | 25 | class cv_resnet18_card_correction: 26 | def __init__(self, model_path): 27 | self.model = cv2.dnn.readNet(model_path) 28 | self.resize_shape = [768, 768] 29 | self.outlayer_names = self.model.getUnconnectedOutLayersNames() 30 | self.mean = np.array([0.408, 0.447, 0.470],dtype=np.float32).reshape((1, 1, 3)) 31 | self.std = np.array([0.289, 0.274, 0.278],dtype=np.float32).reshape((1, 1, 3)) 32 | self.K = 10 33 | self.obj_score = 0.5 34 | self.out_height = self.resize_shape[0] // 4 35 | self.out_width = self.resize_shape[1] // 4 36 | 37 | def infer(self, srcimg): 38 | self.image = srcimg.copy() 39 | ori_h, ori_w = srcimg.shape[:-1] 40 | self.c = np.array([ori_w / 2., ori_h / 2.], dtype=np.float32) 41 | self.s = max(ori_h, ori_w) * 1.0 42 | blob, new_w, new_h, left, top = self.preprocess(srcimg, self.resize_shape) 43 | self.model.setInput(blob) 44 | pre_out = self.model.forward(self.outlayer_names) 45 | 46 | out = self.postprocess(pre_out) 47 | return out 48 | 49 | def preprocess(self, img, resize_shape): 50 | im, new_w, new_h, left, top = ResizePad(img, resize_shape[0]) 51 | im = (im.astype(np.float32) / 255.0 - self.mean) / self.std 52 | im = np.expand_dims(im.transpose((2, 0, 1)), axis=0) 53 | return im.astype(np.float32), new_w, new_h, left, top 54 | 55 | def distance(self, x1, y1, x2, y2): 56 | return math.sqrt(pow(x1 - x2, 2) + pow(y1 - y2, 2)) 57 | def crop_image(self, img, position): 58 | x0, y0 = position[0][0], position[0][1] 59 | x1, y1 = position[1][0], position[1][1] 60 | x2, y2 = position[2][0], position[2][1] 61 | x3, y3 = position[3][0], position[3][1] 62 | 63 | img_width = self.distance((x0 + x3) / 2, (y0 + y3) / 2, (x1 + x2) / 2, 64 | (y1 + y2) / 2) 65 | img_height = self.distance((x0 + x1) / 2, (y0 + y1) / 2, (x2 + x3) / 2, 66 | (y2 + y3) / 2) 67 | 68 | corners_trans = np.zeros((4, 2), np.float32) 69 | corners_trans[0] = [0, 0] 70 | corners_trans[1] = [img_width, 0] 71 | corners_trans[2] = [img_width, img_height] 72 | corners_trans[3] = [0, img_height] 73 | 74 | transform = cv2.getPerspectiveTransform(position, corners_trans) 75 | dst = cv2.warpPerspective(img, transform, 76 | (int(img_width), int(img_height))) 77 | return dst 78 | 79 | def postprocess(self, output): 80 | reg = output[3] 81 | wh = output[2] 82 | hm = output[4] 83 | angle_cls = output[0] 84 | ftype_cls = output[1] 85 | 86 | hm = sigmoid(hm) 87 | angle_cls = sigmoid(angle_cls) 88 | ftype_cls = sigmoid(ftype_cls) 89 | 90 | bbox, inds = bbox_decode(hm, wh, reg=reg, K=self.K) 91 | angle_cls = decode_by_ind(angle_cls, inds, K=self.K) 92 | ftype_cls = decode_by_ind(ftype_cls, inds,K=self.K).astype(np.float32) 93 | 94 | for i in range(bbox.shape[1]): 95 | bbox[0][i][9] = angle_cls[0][i] 96 | bbox = np.concatenate((bbox, np.expand_dims(ftype_cls, axis=-1)),axis=-1) 97 | # bbox = nms(bbox, 0.3) 98 | bbox = bbox_post_process(bbox.copy(), [self.c], [self.s], self.out_height, self.out_width) 99 | res = [] 100 | angle = [] 101 | sub_imgs = [] 102 | ftype = [] 103 | score = [] 104 | center = [] 105 | corner_left_right = [] 106 | for idx, box in enumerate(bbox[0]): 107 | if box[8] > self.obj_score: 108 | angle.append(int(box[9])) 109 | res.append(box[0:8]) 110 | box8point = np.array(box[0:8]).reshape(4,2).astype(np.int32) 111 | corner_left_right.append([box8point[:,0].min(),box8point[:,1].min(),box8point[:,0].max(),box8point[:,1].max()]) 112 | sub_img = self.crop_image(self.image,res[-1].copy().reshape(4, 2)) 113 | if angle[-1] == 1: 114 | sub_img = cv2.rotate(sub_img, 2) 115 | if angle[-1] == 2: 116 | sub_img = cv2.rotate(sub_img, 1) 117 | if angle[-1] == 3: 118 | sub_img = cv2.rotate(sub_img, 0) 119 | sub_imgs.append(sub_img) 120 | ftype.append(int(box[12])) 121 | score.append(box[8]) 122 | center.append([box[10],box[11]]) 123 | 124 | result = { 125 | "POLYGONS": np.array(res), 126 | "BBOX": np.array(corner_left_right), 127 | "SCORES": np.array(score), 128 | "OUTPUT_IMGS": sub_imgs, 129 | "LABELS": np.array(angle), 130 | "LAYOUT": np.array(ftype), 131 | "CENTER": np.array(center) 132 | } 133 | return result 134 | 135 | if __name__ == "__main__": 136 | imgpath = 'testimgs/demo3.jpg' 137 | model_path = 'cv_resnet18_card_correction.onnx' 138 | mynet = cv_resnet18_card_correction(model_path) 139 | 140 | srcimg = cv2.imread(imgpath) 141 | out = mynet.infer(srcimg) 142 | 143 | draw_show_img(srcimg.copy(), out, "show.jpg") 144 | merge_images_horizontal([srcimg] + out['OUTPUT_IMGS'],"pp4_rotate_show.jpg") 145 | # cv2.imwrite('rotate_img.jpg',out['OUTPUT_IMGS'][0]) -------------------------------------------------------------------------------- /python/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | 5 | def max_pool2d(input, kernel_size, stride=1, padding=0, return_indices=False): 6 | batch_size, channels, in_height, in_width = input.shape 7 | k_height, k_width = kernel_size 8 | 9 | out_height = int((in_height + 2 * padding - k_height) / stride) + 1 10 | out_width = int((in_width + 2 * padding - k_width) / stride) + 1 11 | out = np.zeros((batch_size, channels, out_height, out_width), dtype=np.float32) 12 | index = np.zeros((batch_size, channels, out_height, out_width), dtype=np.int64) 13 | if padding > 0: 14 | input_ = np.zeros((batch_size, channels, in_height + 2 * padding, in_width + 2 * padding), dtype=np.float32) 15 | input_[:, :, padding:padding + in_height, padding:padding + in_width] = input 16 | input = input_ 17 | 18 | for b in range(batch_size): 19 | for c in range(channels): 20 | for i in range(out_height): 21 | for j in range(out_width): 22 | start_i = i * stride 23 | start_j = j * stride 24 | end_i = start_i + k_height 25 | end_j = start_j + k_width 26 | Xi = input[b, c, start_i: end_i, start_j: end_j] 27 | 28 | max_value = np.max(Xi) 29 | k = np.argmax(Xi) 30 | Ia = k // k_height + start_i - padding 31 | Ib = k % k_width + start_j - padding 32 | Ia = Ia if Ia > 0 else 0 33 | Ib = Ib if Ib > 0 else 0 34 | max_index = Ia * in_width + Ib 35 | out[b, c, i, j] = max_value 36 | index[b, c, i, j] = max_index 37 | 38 | if return_indices: 39 | return out, index 40 | else: 41 | return out 42 | 43 | def _nms(heat, kernel=3): 44 | pad = (kernel - 1) // 2 45 | hmax = max_pool2d(heat, (kernel, kernel), stride=1, padding=pad) 46 | keep = (hmax == heat).astype(np.float32) 47 | return heat * keep, keep 48 | 49 | def numpy_topk(scores, K, axis=-1): 50 | indices = np.argsort(-scores, axis=axis).take(np.arange(K), axis=axis) ### 从大到小排序，取出前K个 51 | sort_scores = np.take(scores, indices) 52 | return sort_scores, indices 53 | 54 | def _gather_feat(feat, ind, mask=None): 55 | # print("_gather_feat input shape:", feat.shape, ind.shape) 56 | dim = feat.shape[2] 57 | ind = np.tile(np.expand_dims(ind, axis=2), (1, 1, dim)) 58 | feat = np.take_along_axis(feat, ind, axis=1) 59 | # print("_gather_feat output shape:", feat.shape, ind.shape) 60 | if mask is not None: 61 | mask = np.tile(np.expand_dims(mask, axis=2), (1, 1, feat.shape[-1])) 62 | feat = feat[mask] 63 | feat = feat.reshape((-1, dim)) 64 | return feat 65 | 66 | def _tranpose_and_gather_feat(feat, ind): 67 | feat = np.transpose(feat, (0, 2, 3, 1)) 68 | feat = feat.reshape((feat.shape[0], -1, feat.shape[3])) 69 | feat = _gather_feat(feat, ind) 70 | return feat 71 | 72 | def _topk(scores, K=40): 73 | batch, cat, height, width = scores.shape 74 | 75 | topk_scores, topk_inds = numpy_topk(scores.reshape((batch, cat, -1)), K) 76 | 77 | topk_inds = topk_inds % (height * width) 78 | topk_ys = (topk_inds / width).astype(np.float32) 79 | topk_xs = (topk_inds % width).astype(np.float32) 80 | 81 | topk_score, topk_ind = numpy_topk(topk_scores.reshape((batch, -1)), K) 82 | topk_clses = (topk_ind / K).astype(np.int32) 83 | topk_inds = _gather_feat(topk_inds.reshape((batch, -1, 1)),topk_ind).reshape((batch, K)) 84 | topk_ys = _gather_feat(topk_ys.reshape((batch, -1, 1)), topk_ind).reshape((batch, K)) 85 | topk_xs = _gather_feat(topk_xs.reshape((batch, -1, 1)), topk_ind).reshape((batch, K)) 86 | 87 | return topk_score, topk_inds, topk_clses, topk_ys, topk_xs 88 | 89 | def bbox_decode(heat, wh, reg=None, K=100): 90 | batch, cat, height, width = heat.shape 91 | 92 | heat, keep = _nms(heat) 93 | 94 | scores, inds, clses, ys, xs = _topk(heat, K=K) 95 | if reg is not None: 96 | reg = _tranpose_and_gather_feat(reg, inds) 97 | reg = reg.reshape((batch, K, 2)) 98 | xs = xs.reshape((batch, K, 1)) + reg[:, :, 0:1] 99 | ys = ys.reshape((batch, K, 1)) + reg[:, :, 1:2] 100 | else: 101 | xs = xs.reshape((batch, K, 1)) + 0.5 102 | ys = ys.reshape((batch, K, 1)) + 0.5 103 | wh = _tranpose_and_gather_feat(wh, inds) 104 | wh = wh.reshape((batch, K, 8)) 105 | clses = clses.reshape((batch, K, 1)).astype(np.float32) 106 | scores = scores.reshape((batch, K, 1)) 107 | bboxes = np.concatenate( 108 | [ 109 | xs - wh[..., 0:1], 110 | ys - wh[..., 1:2], 111 | xs - wh[..., 2:3], 112 | ys - wh[..., 3:4], 113 | xs - wh[..., 4:5], 114 | ys - wh[..., 5:6], 115 | xs - wh[..., 6:7], 116 | ys - wh[..., 7:8], 117 | ], 118 | axis=2, 119 | ) 120 | detections = np.concatenate([bboxes, scores, clses, xs, ys], axis=2) 121 | 122 | return detections, inds 123 | 124 | def decode_by_ind(heat, inds, K=100): 125 | batch, cat, height, width = heat.shape 126 | score = _tranpose_and_gather_feat(heat, inds) 127 | score = score.reshape((batch, K, cat)) 128 | Type = np.max(score, axis=2) 129 | return Type 130 | 131 | def get_dir(src_point, rot_rad): 132 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 133 | 134 | src_result = [0, 0] 135 | src_result[0] = src_point[0] * cs - src_point[1] * sn 136 | src_result[1] = src_point[0] * sn + src_point[1] * cs 137 | 138 | return src_result 139 | 140 | def get_3rd_point(a, b): 141 | direct = a - b 142 | return b + np.array([-direct[1], direct[0]], dtype=np.float32) 143 | 144 | def get_affine_transform(center, 145 | scale, 146 | rot, 147 | output_size, 148 | shift=np.array([0, 0], dtype=np.float32), 149 | inv=0): 150 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list): 151 | scale = np.array([scale, scale], dtype=np.float32) 152 | 153 | scale_tmp = scale 154 | src_w = scale_tmp[0] 155 | dst_w = output_size[0] 156 | dst_h = output_size[1] 157 | 158 | rot_rad = np.pi * rot / 180 159 | src_dir = get_dir([0, src_w * -0.5], rot_rad) 160 | dst_dir = np.array([0, dst_w * -0.5], np.float32) 161 | 162 | src = np.zeros((3, 2), dtype=np.float32) 163 | dst = np.zeros((3, 2), dtype=np.float32) 164 | src[0, :] = center + scale_tmp * shift 165 | src[1, :] = center + src_dir + scale_tmp * shift 166 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5] 167 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir 168 | 169 | src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 170 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 171 | 172 | if inv: 173 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 174 | else: 175 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 176 | 177 | return trans 178 | 179 | 180 | def affine_transform(pt, t): 181 | new_pt = np.array([pt[0], pt[1], 1.0], dtype=np.float32).T 182 | new_pt = np.dot(t, new_pt) 183 | return new_pt[:2] 184 | 185 | def transform_preds(coords, center, scale, output_size, rot=0): 186 | target_coords = np.zeros(coords.shape) 187 | trans = get_affine_transform(center, scale, rot, output_size, inv=1) 188 | for p in range(coords.shape[0]): 189 | target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) 190 | return target_coords 191 | 192 | def bbox_post_process(bbox, c, s, h, w): 193 | for i in range(bbox.shape[0]): 194 | bbox[i, :, 0:2] = transform_preds(bbox[i, :, 0:2], c[i], s[i], (w, h)) 195 | bbox[i, :, 2:4] = transform_preds(bbox[i, :, 2:4], c[i], s[i], (w, h)) 196 | bbox[i, :, 4:6] = transform_preds(bbox[i, :, 4:6], c[i], s[i], (w, h)) 197 | bbox[i, :, 6:8] = transform_preds(bbox[i, :, 6:8], c[i], s[i], (w, h)) 198 | bbox[i, :, 10:12] = transform_preds(bbox[i, :, 10:12], c[i], s[i], (w, h)) 199 | return bbox 200 | 201 | def nms(dets, thresh): 202 | ''' 203 | len(dets)是batchsize,在推理时batchsize通常等于1, 那就意味着这个函数执行到开头的if就返回了 204 | 即使在推理时输入多张图片, batchsize大于1,也不应该做nms的呀.因为计算多个目标的重叠关系只是在一张图片内做的,多张图片之间计算目标框的 205 | 重叠关系,这个是什么意思呢? 206 | ''' 207 | 208 | if len(dets) < 2: 209 | return dets 210 | index_keep = [] 211 | keep = [] 212 | for i in range(len(dets)): 213 | box = dets[i] 214 | if box[8] < thresh: 215 | break 216 | max_score_index = -1 217 | ctx = (dets[i][0] + dets[i][2] + dets[i][4] + dets[i][6]) / 4 218 | cty = (dets[i][1] + dets[i][3] + dets[i][5] + dets[i][7]) / 4 219 | for j in range(len(dets)): 220 | if i == j or dets[j][8] < thresh: 221 | break 222 | x1, y1 = dets[j][0], dets[j][1] 223 | x2, y2 = dets[j][2], dets[j][3] 224 | x3, y3 = dets[j][4], dets[j][5] 225 | x4, y4 = dets[j][6], dets[j][7] 226 | a = (x2 - x1) * (cty - y1) - (y2 - y1) * (ctx - x1) 227 | b = (x3 - x2) * (cty - y2) - (y3 - y2) * (ctx - x2) 228 | c = (x4 - x3) * (cty - y3) - (y4 - y3) * (ctx - x3) 229 | d = (x1 - x4) * (cty - y4) - (y1 - y4) * (ctx - x4) 230 | if (a > 0 and b > 0 and c > 0 and d > 0) or (a < 0 and b < 0 231 | and c < 0 and d < 0): 232 | if dets[i][8] > dets[j][8] and max_score_index < 0: 233 | max_score_index = i 234 | elif dets[i][8] < dets[j][8]: 235 | max_score_index = -2 236 | break 237 | if max_score_index > -1: 238 | index_keep.append(max_score_index) 239 | elif max_score_index == -1: 240 | index_keep.append(i) 241 | for i in range(0, len(index_keep)): 242 | keep.append(dets[index_keep[i]]) 243 | return np.array(keep) 244 | 245 | 246 | def draw_show_img(img, result, savepath): 247 | polys = result['POLYGONS'] 248 | centers = result['CENTER'] 249 | angle_cls = result['LABELS'] 250 | bbox = result['BBOX'] 251 | color = (0,0,255) 252 | for idx, poly in enumerate(polys): 253 | poly = poly.reshape(4, 2).astype(np.int32) 254 | ori_center = ((bbox[idx][0]+bbox[idx][2])//2,(bbox[idx][1]+bbox[idx][3])//2) 255 | img = cv2.drawContours(img,[poly],-1,color,2) 256 | img = cv2.circle(img,tuple(centers[idx].astype(np.int64).tolist()),5,color,thickness=2) 257 | img = cv2.circle(img,ori_center,5,color,thickness=2) 258 | img = cv2.putText(img,str(angle_cls[idx]),ori_center,cv2.FONT_HERSHEY_SIMPLEX,2,color,2) 259 | cv2.imwrite(savepath,img) 260 | 261 | def merge_images_horizontal(images, output_path="./show.jpg"): 262 | # 确定目标高度（所有图像的目标高度） 263 | target_height = min(img.shape[0] for img in images) 264 | 265 | # 调整所有图像的大小，以使它们的高度一致 266 | resized_images = [] 267 | for img in images: 268 | aspect_ratio = img.shape[1] / img.shape[0] # 计算原始图像的宽高比 269 | new_width = int(target_height * aspect_ratio) # 计算调整后的宽度 270 | resized_img = cv2.resize(img, (new_width, target_height)) # 调整图像大小 271 | resized_images.append(resized_img) 272 | 273 | # 计算合并后的总宽度 274 | total_width = sum(img.shape[1] for img in resized_images) 275 | 276 | # 创建一个新的空白图像 277 | merged_image = np.zeros((target_height, total_width, 3), dtype=np.uint8) 278 | 279 | # 将所有调整大小后的图像粘贴到新的图像上 280 | x_offset = 0 281 | for img in resized_images: 282 | merged_image[:, x_offset:x_offset + img.shape[1]] = img 283 | x_offset += img.shape[1] 284 | 285 | # 保存合并后的图像 286 | cv2.imwrite(output_path, merged_image) -------------------------------------------------------------------------------- /testimgs/demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/cv_resnet18_card_correction-opencv-dnn/04d39d8e1f95bfebdcdd5baf0d8c1a1a49ef073b/testimgs/demo.jpg -------------------------------------------------------------------------------- /testimgs/demo1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/cv_resnet18_card_correction-opencv-dnn/04d39d8e1f95bfebdcdd5baf0d8c1a1a49ef073b/testimgs/demo1.jpg -------------------------------------------------------------------------------- /testimgs/demo2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/cv_resnet18_card_correction-opencv-dnn/04d39d8e1f95bfebdcdd5baf0d8c1a1a49ef073b/testimgs/demo2.jpg -------------------------------------------------------------------------------- /testimgs/demo3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/cv_resnet18_card_correction-opencv-dnn/04d39d8e1f95bfebdcdd5baf0d8c1a1a49ef073b/testimgs/demo3.jpg -------------------------------------------------------------------------------- /testimgs/demo4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/cv_resnet18_card_correction-opencv-dnn/04d39d8e1f95bfebdcdd5baf0d8c1a1a49ef073b/testimgs/demo4.jpg -------------------------------------------------------------------------------- /testimgs/demo5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/cv_resnet18_card_correction-opencv-dnn/04d39d8e1f95bfebdcdd5baf0d8c1a1a49ef073b/testimgs/demo5.jpg -------------------------------------------------------------------------------- /testimgs/demo6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/cv_resnet18_card_correction-opencv-dnn/04d39d8e1f95bfebdcdd5baf0d8c1a1a49ef073b/testimgs/demo6.jpg --------------------------------------------------------------------------------