├── README.md ├── onnxruntime ├── class.names ├── images │ ├── 0bd530d6-76c944fe.jpg │ ├── 0c8b32af-2d9a2bad.jpg │ ├── ID_0a973b163.jpg │ ├── ID_0e5045516.jpg │ ├── berlin_000000_000019_leftImg8bit.png │ ├── berlin_000001_000019_leftImg8bit.png │ ├── berlin_000002_000019_leftImg8bit.png │ ├── bus.jpg │ └── zidane.jpg ├── main.cpp └── main.py └── opencv ├── class.names ├── images ├── 0bd530d6-76c944fe.jpg ├── 0c8b32af-2d9a2bad.jpg ├── ID_0a973b163.jpg ├── ID_0e5045516.jpg ├── berlin_000000_000019_leftImg8bit.png ├── berlin_000001_000019_leftImg8bit.png ├── berlin_000002_000019_leftImg8bit.png ├── bus.jpg └── zidane.jpg ├── main.cpp └── main.py /README.md: -------------------------------------------------------------------------------- 1 | # multiyolov5-opencv-onnxrun 2 | 3 | 使用OpenCV部署多任务的yolov5目标检测+语义分割,包含C++和Python两个版本的程序 4 | 5 | 6 | 使用ONNXRuntime部署多任务的yolov5目标检测+语义分割,包含C++和Python两个版本的程序 7 | 8 | onnx文件在百度云盘,下载链接: https://pan.baidu.com/s/1-Ix5zkQnkZZtgm5EO7-k5w 密码: wnm7 9 | -------------------------------------------------------------------------------- /onnxruntime/class.names: -------------------------------------------------------------------------------- 1 | vehicle 2 | person 3 | cycle 4 | cone 5 | waterbarrier 6 | -------------------------------------------------------------------------------- /onnxruntime/images/0bd530d6-76c944fe.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/onnxruntime/images/0bd530d6-76c944fe.jpg -------------------------------------------------------------------------------- /onnxruntime/images/0c8b32af-2d9a2bad.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/onnxruntime/images/0c8b32af-2d9a2bad.jpg -------------------------------------------------------------------------------- /onnxruntime/images/ID_0a973b163.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/onnxruntime/images/ID_0a973b163.jpg -------------------------------------------------------------------------------- /onnxruntime/images/ID_0e5045516.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/onnxruntime/images/ID_0e5045516.jpg -------------------------------------------------------------------------------- /onnxruntime/images/berlin_000000_000019_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/onnxruntime/images/berlin_000000_000019_leftImg8bit.png -------------------------------------------------------------------------------- /onnxruntime/images/berlin_000001_000019_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/onnxruntime/images/berlin_000001_000019_leftImg8bit.png -------------------------------------------------------------------------------- /onnxruntime/images/berlin_000002_000019_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/onnxruntime/images/berlin_000002_000019_leftImg8bit.png -------------------------------------------------------------------------------- /onnxruntime/images/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/onnxruntime/images/bus.jpg -------------------------------------------------------------------------------- /onnxruntime/images/zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/onnxruntime/images/zidane.jpg -------------------------------------------------------------------------------- /onnxruntime/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | //#include 7 | #include 8 | 9 | using namespace std; 10 | using namespace cv; 11 | using namespace Ort; 12 | 13 | struct Net_config 14 | { 15 | float confThreshold; // Confidence threshold 16 | float nmsThreshold; // Non-maximum suppression threshold 17 | float objThreshold; //Object Confidence threshold 18 | }; 19 | 20 | typedef struct BoxInfo 21 | { 22 | float x1; 23 | float y1; 24 | float x2; 25 | float y2; 26 | float score; 27 | int label; 28 | } BoxInfo; 29 | 30 | const char *Cityscapes_Class[] = { "road", "sidewalk", "building", "wall", "fence", 31 | "pole", "traffic light", "traffic sign", "vegetation", 32 | "terrain", "sky", "person", "rider", "car", "truck", 33 | "bus", "train", "motorcycle", "bicyle" }; 34 | 35 | const int Cityscapes_COLORMAP[19][3] = { {128, 64, 128}, {244, 35, 232}, {70, 70, 70}, {102, 102, 156}, 36 | {190, 153, 153}, {153, 153, 153}, {250, 170, 30}, {220, 220, 0}, {107, 142, 35}, {152, 251, 152}, 37 | {0, 130, 180}, {220, 20, 60}, {255, 0, 0}, {0, 0, 142}, {0, 0, 70}, {0, 60, 100}, {0, 80, 100}, {0, 0, 230}, {119, 11, 32} }; 38 | 39 | class YOLO 40 | { 41 | public: 42 | YOLO(Net_config config); 43 | Mat detect(Mat& frame); 44 | private: 45 | const float anchors[3][6] = { {10.0, 13.0, 16.0, 30.0, 33.0, 23.0}, 46 | {30.0, 61.0, 62.0, 45.0, 59.0, 119.0}, 47 | {116.0, 90.0, 156.0, 198.0, 373.0, 326.0} }; 48 | const float stride[3] = { 8.0, 16.0, 32.0 }; 49 | int inpWidth; 50 | int inpHeight; 51 | int area; 52 | int nout; 53 | int num_proposal; 54 | vector class_names; 55 | int num_class; 56 | int seg_num_class; 57 | 58 | float confThreshold; 59 | float nmsThreshold; 60 | float objThreshold; 61 | const bool keep_ratio = true; 62 | vector input_image_; 63 | void normalize_(Mat img); 64 | void nms(vector& input_boxes); 65 | Mat resize_image(Mat srcimg, int *newh, int *neww, int *top, int *left); 66 | 67 | Env env = Env(ORT_LOGGING_LEVEL_ERROR, "yolov5"); 68 | Ort::Session *ort_session = nullptr; 69 | SessionOptions sessionOptions = SessionOptions(); 70 | vector input_names; 71 | vector output_names; 72 | vector> input_node_dims; // >=1 outputs 73 | vector> output_node_dims; // >=1 outputs 74 | }; 75 | 76 | YOLO::YOLO(Net_config config) 77 | { 78 | this->confThreshold = config.confThreshold; 79 | this->nmsThreshold = config.nmsThreshold; 80 | this->objThreshold = config.objThreshold; 81 | 82 | string classesFile = "class.names"; 83 | string model_path = "pspv5m_citybdd_conewaterbarrier.onnx"; 84 | std::wstring widestr = std::wstring(model_path.begin(), model_path.end()); 85 | //OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0); 86 | sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC); 87 | ort_session = new Session(env, widestr.c_str(), sessionOptions); 88 | size_t numInputNodes = ort_session->GetInputCount(); 89 | size_t numOutputNodes = ort_session->GetOutputCount(); 90 | AllocatorWithDefaultOptions allocator; 91 | for (int i = 0; i < numInputNodes; i++) 92 | { 93 | input_names.push_back(ort_session->GetInputName(i, allocator)); 94 | Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i); 95 | auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo(); 96 | auto input_dims = input_tensor_info.GetShape(); 97 | input_node_dims.push_back(input_dims); 98 | } 99 | for (int i = 0; i < numOutputNodes; i++) 100 | { 101 | output_names.push_back(ort_session->GetOutputName(i, allocator)); 102 | Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i); 103 | auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo(); 104 | auto output_dims = output_tensor_info.GetShape(); 105 | output_node_dims.push_back(output_dims); 106 | } 107 | this->inpHeight = input_node_dims[0][2]; 108 | this->inpWidth = input_node_dims[0][3]; 109 | this->nout = output_node_dims[0][2]; 110 | this->area = this->inpHeight*this->inpWidth; 111 | this->num_proposal = output_node_dims[0][1]; 112 | this->seg_num_class = output_node_dims[1][1]; 113 | ifstream ifs(classesFile.c_str()); 114 | string line; 115 | while (getline(ifs, line)) this->class_names.push_back(line); 116 | this->num_class = class_names.size(); 117 | } 118 | 119 | Mat YOLO::resize_image(Mat srcimg, int *newh, int *neww, int *top, int *left) 120 | { 121 | int srch = srcimg.rows, srcw = srcimg.cols; 122 | *newh = this->inpHeight; 123 | *neww = this->inpWidth; 124 | Mat dstimg; 125 | if (this->keep_ratio && srch != srcw) { 126 | float hw_scale = (float)srch / srcw; 127 | if (hw_scale > 1) { 128 | *newh = this->inpHeight; 129 | *neww = int(this->inpWidth / hw_scale); 130 | resize(srcimg, dstimg, Size(*neww, *newh), INTER_AREA); 131 | *left = int((this->inpWidth - *neww) * 0.5); 132 | copyMakeBorder(dstimg, dstimg, 0, 0, *left, this->inpWidth - *neww - *left, BORDER_CONSTANT, 114); 133 | } 134 | else { 135 | *newh = (int)this->inpHeight * hw_scale; 136 | *neww = this->inpWidth; 137 | resize(srcimg, dstimg, Size(*neww, *newh), INTER_AREA); 138 | *top = (int)(this->inpHeight - *newh) * 0.5; 139 | copyMakeBorder(dstimg, dstimg, *top, this->inpHeight - *newh - *top, 0, 0, BORDER_CONSTANT, 114); 140 | } 141 | } 142 | else { 143 | resize(srcimg, dstimg, Size(*neww, *newh), INTER_AREA); 144 | } 145 | return dstimg; 146 | } 147 | 148 | void YOLO::normalize_(Mat img) 149 | { 150 | // img.convertTo(img, CV_32F); 151 | int row = img.rows; 152 | int col = img.cols; 153 | this->input_image_.resize(row * col * img.channels()); 154 | for (int c = 0; c < 3; c++) 155 | { 156 | for (int i = 0; i < row; i++) 157 | { 158 | for (int j = 0; j < col; j++) 159 | { 160 | float pix = img.ptr(i)[j * 3 + 2 - c]; 161 | this->input_image_[c * row * col + i * col + j] = pix / 255.0; 162 | 163 | } 164 | } 165 | } 166 | } 167 | 168 | void YOLO::nms(vector& input_boxes) 169 | { 170 | sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; }); 171 | vector vArea(input_boxes.size()); 172 | for (int i = 0; i < int(input_boxes.size()); ++i) 173 | { 174 | vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) 175 | * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1); 176 | } 177 | 178 | vector isSuppressed(input_boxes.size(), false); 179 | for (int i = 0; i < int(input_boxes.size()); ++i) 180 | { 181 | if (isSuppressed[i]) { continue; } 182 | for (int j = i + 1; j < int(input_boxes.size()); ++j) 183 | { 184 | if (isSuppressed[j]) { continue; } 185 | float xx1 = (max)(input_boxes[i].x1, input_boxes[j].x1); 186 | float yy1 = (max)(input_boxes[i].y1, input_boxes[j].y1); 187 | float xx2 = (min)(input_boxes[i].x2, input_boxes[j].x2); 188 | float yy2 = (min)(input_boxes[i].y2, input_boxes[j].y2); 189 | 190 | float w = (max)(float(0), xx2 - xx1 + 1); 191 | float h = (max)(float(0), yy2 - yy1 + 1); 192 | float inter = w * h; 193 | float ovr = inter / (vArea[i] + vArea[j] - inter); 194 | 195 | if (ovr >= this->nmsThreshold) 196 | { 197 | isSuppressed[j] = true; 198 | } 199 | } 200 | } 201 | // return post_nms; 202 | int idx_t = 0; 203 | input_boxes.erase(remove_if(input_boxes.begin(), input_boxes.end(), [&idx_t, &isSuppressed](const BoxInfo& f) { return isSuppressed[idx_t++]; }), input_boxes.end()); 204 | } 205 | 206 | Mat YOLO::detect(Mat& frame) 207 | { 208 | Mat seg_img = frame.clone(); 209 | int newh = 0, neww = 0, padh = 0, padw = 0; 210 | Mat dstimg = this->resize_image(frame, &newh, &neww, &padh, &padw); 211 | this->normalize_(dstimg); 212 | array input_shape_{ 1, 3, this->inpHeight, this->inpWidth }; 213 | 214 | auto allocator_info = MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); 215 | Value input_tensor_ = Value::CreateTensor(allocator_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size()); 216 | 217 | // 开始推理 218 | vector ort_outputs = ort_session->Run(RunOptions{ nullptr }, &input_names[0], &input_tensor_, 1, output_names.data(), output_names.size()); // 开始推理 219 | /////generate proposals 220 | vector generate_boxes; 221 | float ratioh = (float)frame.rows / newh, ratiow = (float)frame.cols / neww; 222 | int n = 0, q = 0, i = 0, j = 0, row_ind = 0, k = 0; ///xmin,ymin,xamx,ymax,box_score, class_score 223 | const float* pdata = ort_outputs[0].GetTensorMutableData(); 224 | for (n = 0; n < 3; n++) ///特征图尺度 225 | { 226 | int num_grid_x = (int)(this->inpWidth / this->stride[n]); 227 | int num_grid_y = (int)(this->inpHeight / this->stride[n]); 228 | for (q = 0; q < 3; q++) ///anchor 229 | { 230 | const float anchor_w = this->anchors[n][q * 2]; 231 | const float anchor_h = this->anchors[n][q * 2 + 1]; 232 | for (i = 0; i < num_grid_y; i++) 233 | { 234 | for (j = 0; j < num_grid_x; j++) 235 | { 236 | float box_score = pdata[4]; 237 | if (box_score > this->objThreshold) 238 | { 239 | int max_ind = 0; 240 | float max_class_socre = 0; 241 | for (k = 0; k < num_class; k++) 242 | { 243 | if (pdata[k + 5] > max_class_socre) 244 | { 245 | max_class_socre = pdata[k + 5]; 246 | max_ind = k; 247 | } 248 | } 249 | //if (max_class_socre > this->confThreshold) 250 | //{ 251 | float cx = (pdata[0] * 2.f - 0.5f + j) * this->stride[n]; ///cx 252 | float cy = (pdata[1] * 2.f - 0.5f + i) * this->stride[n]; ///cy 253 | float w = powf(pdata[2] * 2.f, 2.f) * anchor_w; ///w 254 | float h = powf(pdata[3] * 2.f, 2.f) * anchor_h; ///h 255 | 256 | float xmin = (cx - padw - 0.5 * w)*ratiow; 257 | float ymin = (cy - padh - 0.5 * h)*ratioh; 258 | float xmax = (cx - padw + 0.5 * w)*ratiow; 259 | float ymax = (cy - padh + 0.5 * h)*ratioh; 260 | 261 | generate_boxes.push_back(BoxInfo{ xmin, ymin, xmax, ymax, max_class_socre, max_ind }); 262 | //} 263 | } 264 | row_ind++; 265 | pdata += nout; 266 | } 267 | } 268 | } 269 | } 270 | 271 | // Perform non maximum suppression to eliminate redundant overlapping boxes with 272 | // lower confidences 273 | nms(generate_boxes); 274 | for (size_t i = 0; i < generate_boxes.size(); ++i) 275 | { 276 | int xmin = int(generate_boxes[i].x1); 277 | int ymin = int(generate_boxes[i].y1); 278 | rectangle(frame, Point(xmin, ymin), Point(int(generate_boxes[i].x2), int(generate_boxes[i].y2)), Scalar(0, 0, 255), 2); 279 | string label = format("%.2f", generate_boxes[i].score); 280 | label = this->class_names[generate_boxes[i].label] + ":" + label; 281 | putText(frame, label, Point(xmin, ymin - 5), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1); 282 | } 283 | 284 | ratioh = (float)newh / frame.rows; 285 | ratiow = (float)neww / frame.cols; 286 | const float* pseg = ort_outputs[1].GetTensorMutableData(); 287 | for (i = 0; i < seg_img.rows; i++) 288 | { 289 | for (j = 0; j < seg_img.cols; j++) 290 | { 291 | const int x = int(j*ratiow) + padw; 292 | const int y = int(i*ratioh) + padh; 293 | float max_prob = -1; 294 | int max_ind = 0; 295 | for (n = 0; n < seg_num_class; n++) 296 | { 297 | float pix_data = pseg[n * area + y * this->inpWidth + x]; 298 | if (pix_data > max_prob) 299 | { 300 | max_prob = pix_data; 301 | max_ind = n; 302 | } 303 | } 304 | seg_img.at(i, j)[0] = Cityscapes_COLORMAP[max_ind][0]; 305 | seg_img.at(i, j)[1] = Cityscapes_COLORMAP[max_ind][1]; 306 | seg_img.at(i, j)[2] = Cityscapes_COLORMAP[max_ind][2]; 307 | } 308 | } 309 | 310 | Mat combine; 311 | if (frame.rows < frame.cols) 312 | { 313 | vconcat(frame, seg_img, combine); 314 | } 315 | else 316 | { 317 | hconcat(frame, seg_img, combine); 318 | } 319 | return combine; 320 | } 321 | 322 | int main() 323 | { 324 | Net_config yolo_nets = { 0.3, 0.5, 0.3 }; 325 | YOLO yolo_model(yolo_nets); 326 | string imgpath = "images/berlin_000002_000019_leftImg8bit.png"; 327 | Mat srcimg = imread(imgpath); 328 | Mat outimg = yolo_model.detect(srcimg); 329 | 330 | static const string kWinName = "Deep learning object detection in ONNXRuntime"; 331 | namedWindow(kWinName, WINDOW_NORMAL); 332 | imshow(kWinName, outimg); 333 | waitKey(0); 334 | destroyAllWindows(); 335 | } 336 | -------------------------------------------------------------------------------- /onnxruntime/main.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import argparse 3 | import numpy as np 4 | import onnxruntime as ort 5 | 6 | Cityscapes_Class = ["road", "sidewalk", "building", "wall", "fence", 7 | "pole", "traffic light", "traffic sign", "vegetation", 8 | "terrain", "sky", "person", "rider", "car", "truck", 9 | "bus", "train", "motorcycle", "bicyle"] 10 | Cityscapes_COLORMAP = [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], 11 | [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0], [107, 142, 35], [152, 251, 152], 12 | [0, 130, 180], [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], [0, 80, 100], [0, 0, 230], [119, 11, 32]] 13 | 14 | class yolov5(): 15 | def __init__(self, modelpath, confThreshold=0.5, nmsThreshold=0.5, objThreshold=0.5): 16 | with open('class.names', 'rt') as f: 17 | self.classes = f.read().rstrip('\n').split('\n') 18 | num_classes = len(self.classes) 19 | anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]] 20 | self.nl = len(anchors) 21 | self.na = len(anchors[0]) // 2 22 | self.no = num_classes + 5 23 | self.grid = [np.zeros(1)] * self.nl 24 | self.stride = np.array([8., 16., 32.]) 25 | self.anchor_grid = np.asarray(anchors, dtype=np.float32).reshape(self.nl, -1, 2) 26 | self.inpWidth = 1024 27 | self.inpHeight = 1024 28 | so = ort.SessionOptions() 29 | so.log_severity_level = 3 30 | self.net = ort.InferenceSession(modelpath, so) 31 | self.confThreshold = confThreshold 32 | self.nmsThreshold = nmsThreshold 33 | self.objThreshold = objThreshold 34 | 35 | def resize_image(self, srcimg, keep_ratio=True): 36 | top, left, newh, neww = 0, 0, self.inpWidth, self.inpHeight 37 | if keep_ratio and srcimg.shape[0] != srcimg.shape[1]: 38 | hw_scale = srcimg.shape[0] / srcimg.shape[1] 39 | if hw_scale > 1: 40 | newh, neww = self.inpHeight, int(self.inpWidth / hw_scale) 41 | img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA) 42 | left = int((self.inpWidth - neww) * 0.5) 43 | img = cv2.copyMakeBorder(img, 0, 0, left, self.inpWidth - neww - left, cv2.BORDER_CONSTANT, 44 | value=(114, 114, 114)) # add border 45 | else: 46 | newh, neww = int(self.inpHeight * hw_scale), self.inpWidth 47 | img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA) 48 | top = int((self.inpHeight - newh) * 0.5) 49 | img = cv2.copyMakeBorder(img, top, self.inpHeight - newh - top, 0, 0, cv2.BORDER_CONSTANT, value=(114, 114, 114)) 50 | else: 51 | img = cv2.resize(srcimg, (self.inpWidth, self.inpHeight), interpolation=cv2.INTER_AREA) 52 | return img, newh, neww, top, left 53 | def _make_grid(self, nx=20, ny=20): 54 | xv, yv = np.meshgrid(np.arange(ny), np.arange(nx)) 55 | return np.stack((xv, yv), 2).reshape((-1, 2)).astype(np.float32) 56 | def preprocess(self, img): 57 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 58 | img = img.astype(np.float32) / 255.0 59 | return img 60 | 61 | def postprocess(self, frame, outs, padsize=None): 62 | frameHeight = frame.shape[0] 63 | frameWidth = frame.shape[1] 64 | newh, neww, padh, padw = padsize 65 | ratioh, ratiow = frameHeight / newh, frameWidth / neww 66 | # Scan through all the bounding boxes output from the network and keep only the 67 | # ones with high confidence scores. Assign the box's class label as the class with the highest score. 68 | 69 | confidences = [] 70 | boxes = [] 71 | classIds = [] 72 | for detection in outs: 73 | scores = detection[5:] 74 | classId = np.argmax(scores) 75 | confidence = scores[classId] 76 | # if confidence > self.confThreshold and detection[4] > self.objThreshold: 77 | if detection[4] > self.objThreshold: 78 | center_x = int((detection[0] - padw) * ratiow) 79 | center_y = int((detection[1] - padh) * ratioh) 80 | width = int(detection[2] * ratiow) 81 | height = int(detection[3] * ratioh) 82 | left = int(center_x - width / 2) 83 | top = int(center_y - height / 2) 84 | 85 | confidences.append(float(confidence)) 86 | boxes.append([left, top, width, height]) 87 | classIds.append(classId) 88 | # Perform non maximum suppression to eliminate redundant overlapping boxes with 89 | # lower confidences. 90 | indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confThreshold, self.nmsThreshold).flatten() 91 | for i in indices: 92 | box = boxes[i] 93 | left = box[0] 94 | top = box[1] 95 | width = box[2] 96 | height = box[3] 97 | frame = self.drawPred(frame, classIds[i], confidences[i], left, top, left + width, top + height) 98 | return frame 99 | def drawPred(self, frame, classId, conf, left, top, right, bottom): 100 | # Draw a bounding box. 101 | cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=4) 102 | 103 | label = '%.2f' % conf 104 | label = '%s:%s' % (self.classes[classId], label) 105 | 106 | # Display the label at the top of the bounding box 107 | labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) 108 | top = max(top, labelSize[1]) 109 | # cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED) 110 | cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2) 111 | return frame 112 | def detect(self, srcimg): 113 | img, newh, neww, padh, padw = self.resize_image(srcimg) 114 | img = self.preprocess(img) 115 | blob = np.expand_dims(np.transpose(img, (2, 0, 1)), axis=0) 116 | outs = self.net.run(None, {self.net.get_inputs()[0].name: blob}) 117 | 118 | # inference output 119 | det_outs = outs[0].squeeze(axis=0) 120 | row_ind = 0 121 | for i in range(self.nl): 122 | h, w = int(self.inpHeight/self.stride[i]), int(self.inpWidth/self.stride[i]) 123 | length = int(self.na * h * w) 124 | if self.grid[i].shape[2:4] != (h,w): 125 | self.grid[i] = self._make_grid(w, h) 126 | 127 | det_outs[row_ind:row_ind+length, 0:2] = (det_outs[row_ind:row_ind+length, 0:2] * 2. - 0.5 + np.tile(self.grid[i],(self.na, 1))) * int(self.stride[i]) 128 | det_outs[row_ind:row_ind+length, 2:4] = (det_outs[row_ind:row_ind+length, 2:4] * 2) ** 2 * np.repeat(self.anchor_grid[i],h*w, axis=0) 129 | row_ind += length 130 | srcimg = self.postprocess(srcimg, det_outs, padsize=(newh, neww, padh, padw)) 131 | 132 | seg_outs = outs[1].squeeze(axis=0) 133 | mask = seg_outs[:, padh:(self.inpHeight - padh), padw:(self.inpWidth - padw)] 134 | seg_id = np.argmax(mask, axis=0).astype(np.uint8) 135 | seg_id = cv2.resize(seg_id, (srcimg.shape[1], srcimg.shape[0]), interpolation=cv2.INTER_NEAREST) 136 | # srcimg = np.array(Cityscapes_COLORMAP, dtype=np.uint8)[seg_id] 137 | # return srcimg 138 | 139 | seg_img = np.array(Cityscapes_COLORMAP, dtype=np.uint8)[seg_id] 140 | if srcimg.shape[0] < srcimg.shape[1]: 141 | return np.vstack((srcimg, seg_img)) 142 | else: 143 | return np.hstack((srcimg, seg_img)) 144 | 145 | if __name__ == "__main__": 146 | parser = argparse.ArgumentParser() 147 | parser.add_argument("--imgpath", type=str, default='images/berlin_000002_000019_leftImg8bit.png', help="image path") 148 | parser.add_argument('--modelpath', type=str, default='pspv5m_citybdd_conewaterbarrier.onnx') 149 | parser.add_argument('--confThreshold', default=0.5, type=float, help='class confidence') 150 | parser.add_argument('--nmsThreshold', default=0.5, type=float, help='nms iou thresh') 151 | parser.add_argument('--objThreshold', default=0.5, type=float, help='object confidence') 152 | args = parser.parse_args() 153 | 154 | yolonet = yolov5(args.modelpath, confThreshold=args.confThreshold, nmsThreshold=args.nmsThreshold, objThreshold=args.objThreshold) 155 | srcimg = cv2.imread(args.imgpath) 156 | srcimg = yolonet.detect(srcimg) 157 | 158 | winName = 'Deep learning object detection in ONNXRuntime' 159 | cv2.namedWindow(winName, 0) 160 | cv2.imshow(winName, srcimg) 161 | cv2.waitKey(0) 162 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /opencv/class.names: -------------------------------------------------------------------------------- 1 | vehicle 2 | person 3 | cycle 4 | cone 5 | waterbarrier 6 | -------------------------------------------------------------------------------- /opencv/images/0bd530d6-76c944fe.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/opencv/images/0bd530d6-76c944fe.jpg -------------------------------------------------------------------------------- /opencv/images/0c8b32af-2d9a2bad.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/opencv/images/0c8b32af-2d9a2bad.jpg -------------------------------------------------------------------------------- /opencv/images/ID_0a973b163.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/opencv/images/ID_0a973b163.jpg -------------------------------------------------------------------------------- /opencv/images/ID_0e5045516.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/opencv/images/ID_0e5045516.jpg -------------------------------------------------------------------------------- /opencv/images/berlin_000000_000019_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/opencv/images/berlin_000000_000019_leftImg8bit.png -------------------------------------------------------------------------------- /opencv/images/berlin_000001_000019_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/opencv/images/berlin_000001_000019_leftImg8bit.png -------------------------------------------------------------------------------- /opencv/images/berlin_000002_000019_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/opencv/images/berlin_000002_000019_leftImg8bit.png -------------------------------------------------------------------------------- /opencv/images/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/opencv/images/bus.jpg -------------------------------------------------------------------------------- /opencv/images/zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/opencv/images/zidane.jpg -------------------------------------------------------------------------------- /opencv/main.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/multiyolov5-opencv-onnxrun/569f56dd5ef53ee2e7fef0d0cc267cfc88ecc25f/opencv/main.cpp -------------------------------------------------------------------------------- /opencv/main.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import argparse 3 | import numpy as np 4 | 5 | Cityscapes_Class = ["road", "sidewalk", "building", "wall", "fence", 6 | "pole", "traffic light", "traffic sign", "vegetation", 7 | "terrain", "sky", "person", "rider", "car", "truck", 8 | "bus", "train", "motorcycle", "bicyle"] 9 | Cityscapes_COLORMAP = [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], 10 | [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0], [107, 142, 35], [152, 251, 152], 11 | [0, 130, 180], [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], [0, 80, 100], [0, 0, 230], [119, 11, 32]] 12 | 13 | class yolov5(): 14 | def __init__(self, modelpath, confThreshold=0.5, nmsThreshold=0.5, objThreshold=0.5): 15 | with open('class.names', 'rt') as f: 16 | self.classes = f.read().rstrip('\n').split('\n') 17 | num_classes = len(self.classes) 18 | anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]] 19 | self.nl = len(anchors) 20 | self.na = len(anchors[0]) // 2 21 | self.no = num_classes + 5 22 | self.grid = [np.zeros(1)] * self.nl 23 | self.stride = np.array([8., 16., 32.]) 24 | self.anchor_grid = np.asarray(anchors, dtype=np.float32).reshape(self.nl, -1, 2) 25 | self.inpWidth = 1024 26 | self.inpHeight = 1024 27 | self.net = cv2.dnn.readNet(modelpath) 28 | self.confThreshold = confThreshold 29 | self.nmsThreshold = nmsThreshold 30 | self.objThreshold = objThreshold 31 | 32 | def resize_image(self, srcimg, keep_ratio=True): 33 | top, left, newh, neww = 0, 0, self.inpWidth, self.inpHeight 34 | if keep_ratio and srcimg.shape[0] != srcimg.shape[1]: 35 | hw_scale = srcimg.shape[0] / srcimg.shape[1] 36 | if hw_scale > 1: 37 | newh, neww = self.inpHeight, int(self.inpWidth / hw_scale) 38 | img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA) 39 | left = int((self.inpWidth - neww) * 0.5) 40 | img = cv2.copyMakeBorder(img, 0, 0, left, self.inpWidth - neww - left, cv2.BORDER_CONSTANT, 41 | value=(114, 114, 114)) # add border 42 | else: 43 | newh, neww = int(self.inpHeight * hw_scale), self.inpWidth 44 | img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA) 45 | top = int((self.inpHeight - newh) * 0.5) 46 | img = cv2.copyMakeBorder(img, top, self.inpHeight - newh - top, 0, 0, cv2.BORDER_CONSTANT, value=(114, 114, 114)) 47 | else: 48 | img = cv2.resize(srcimg, (self.inpWidth, self.inpHeight), interpolation=cv2.INTER_AREA) 49 | return img, newh, neww, top, left 50 | def _make_grid(self, nx=20, ny=20): 51 | xv, yv = np.meshgrid(np.arange(ny), np.arange(nx)) 52 | return np.stack((xv, yv), 2).reshape((-1, 2)).astype(np.float32) 53 | def preprocess(self, img): 54 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 55 | img = img.astype(np.float32) / 255.0 56 | return img 57 | 58 | def postprocess(self, frame, outs, padsize=None): 59 | frameHeight = frame.shape[0] 60 | frameWidth = frame.shape[1] 61 | newh, neww, padh, padw = padsize 62 | ratioh, ratiow = frameHeight / newh, frameWidth / neww 63 | # Scan through all the bounding boxes output from the network and keep only the 64 | # ones with high confidence scores. Assign the box's class label as the class with the highest score. 65 | 66 | confidences = [] 67 | boxes = [] 68 | classIds = [] 69 | for detection in outs: 70 | scores = detection[5:] 71 | classId = np.argmax(scores) 72 | confidence = scores[classId] 73 | # if confidence > self.confThreshold and detection[4] > self.objThreshold: 74 | if detection[4] > self.objThreshold: 75 | center_x = int((detection[0] - padw) * ratiow) 76 | center_y = int((detection[1] - padh) * ratioh) 77 | width = int(detection[2] * ratiow) 78 | height = int(detection[3] * ratioh) 79 | left = int(center_x - width / 2) 80 | top = int(center_y - height / 2) 81 | 82 | confidences.append(float(confidence)) 83 | boxes.append([left, top, width, height]) 84 | classIds.append(classId) 85 | # Perform non maximum suppression to eliminate redundant overlapping boxes with 86 | # lower confidences. 87 | indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confThreshold, self.nmsThreshold).flatten() 88 | for i in indices: 89 | box = boxes[i] 90 | left = box[0] 91 | top = box[1] 92 | width = box[2] 93 | height = box[3] 94 | frame = self.drawPred(frame, classIds[i], confidences[i], left, top, left + width, top + height) 95 | return frame 96 | def drawPred(self, frame, classId, conf, left, top, right, bottom): 97 | # Draw a bounding box. 98 | cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=4) 99 | 100 | label = '%.2f' % conf 101 | label = '%s:%s' % (self.classes[classId], label) 102 | 103 | # Display the label at the top of the bounding box 104 | labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) 105 | top = max(top, labelSize[1]) 106 | # cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED) 107 | cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2) 108 | return frame 109 | def detect(self, srcimg): 110 | img, newh, neww, padh, padw = self.resize_image(srcimg) 111 | blob = cv2.dnn.blobFromImage(img, scalefactor=1 / 255.0, swapRB=True) 112 | # Sets the input to the network 113 | self.net.setInput(blob) 114 | 115 | # Runs the forward pass to get output of the output layers 116 | outs = self.net.forward(self.net.getUnconnectedOutLayersNames()) 117 | 118 | # inference output 119 | det_outs = outs[1].squeeze(axis=0) 120 | row_ind = 0 121 | for i in range(self.nl): 122 | h, w = int(self.inpHeight/self.stride[i]), int(self.inpWidth/self.stride[i]) 123 | length = int(self.na * h * w) 124 | if self.grid[i].shape[2:4] != (h,w): 125 | self.grid[i] = self._make_grid(w, h) 126 | 127 | det_outs[row_ind:row_ind+length, 0:2] = (det_outs[row_ind:row_ind+length, 0:2] * 2. - 0.5 + np.tile(self.grid[i],(self.na, 1))) * int(self.stride[i]) 128 | det_outs[row_ind:row_ind+length, 2:4] = (det_outs[row_ind:row_ind+length, 2:4] * 2) ** 2 * np.repeat(self.anchor_grid[i],h*w, axis=0) 129 | row_ind += length 130 | srcimg = self.postprocess(srcimg, det_outs, padsize=(newh, neww, padh, padw)) 131 | 132 | seg_outs = outs[0].squeeze(axis=0) 133 | mask = seg_outs[:, padh:(self.inpHeight - padh), padw:(self.inpWidth - padw)] 134 | seg_id = np.argmax(mask, axis=0).astype(np.uint8) 135 | seg_id = cv2.resize(seg_id, (srcimg.shape[1], srcimg.shape[0]), interpolation=cv2.INTER_NEAREST) 136 | # srcimg = np.array(Cityscapes_COLORMAP, dtype=np.uint8)[seg_id] 137 | # return srcimg 138 | 139 | seg_img = np.array(Cityscapes_COLORMAP, dtype=np.uint8)[seg_id] 140 | if srcimg.shape[0] < srcimg.shape[1]: 141 | return np.vstack((srcimg, seg_img)) 142 | else: 143 | return np.hstack((srcimg, seg_img)) 144 | 145 | if __name__ == "__main__": 146 | parser = argparse.ArgumentParser() 147 | parser.add_argument("--imgpath", type=str, default='images/berlin_000002_000019_leftImg8bit.png', help="image path") 148 | parser.add_argument('--modelpath', type=str, default='pspv5m_citybdd_conewaterbarrier.onnx') 149 | parser.add_argument('--confThreshold', default=0.5, type=float, help='class confidence') 150 | parser.add_argument('--nmsThreshold', default=0.5, type=float, help='nms iou thresh') 151 | parser.add_argument('--objThreshold', default=0.5, type=float, help='object confidence') 152 | args = parser.parse_args() 153 | 154 | yolonet = yolov5(args.modelpath, confThreshold=args.confThreshold, nmsThreshold=args.nmsThreshold, objThreshold=args.objThreshold) 155 | srcimg = cv2.imread(args.imgpath) 156 | srcimg = yolonet.detect(srcimg) 157 | 158 | winName = 'Deep learning object detection in OpenCV' 159 | cv2.namedWindow(winName, 0) 160 | cv2.imshow(winName, srcimg) 161 | cv2.waitKey(0) 162 | cv2.destroyAllWindows() --------------------------------------------------------------------------------