├── CMakeLists.txt ├── README.md ├── bus.jpg ├── coco.names ├── convert-onnx ├── common.py ├── convert_onnx.py ├── yolov5l.py ├── yolov5m.py ├── yolov5s.py └── yolov5x.py ├── dog.jpg ├── main_yolo.cpp ├── main_yolov5.py ├── person.jpg ├── yolo.h └── zidane.jpg /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.17) 2 | project(yolov5-dnn-cpp-python-v2) 3 | 4 | set(CMAKE_CXX_STANDARD 14) 5 | 6 | add_executable(yolov5-dnn-cpp-python-v2 main_yolo.cpp) 7 | find_package(OpenCV REQUIRED) 8 | target_link_libraries(yolov5-dnn-cpp-python-v2 ${OpenCV_LIBS}) 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # yolov5-dnn-cpp-python-v2 2 | 这套程序是对上一版本的后处理模块优化,把三个尺度的输出特征图作reshape和permute维度置换后, 3 | 输出特征图的形状分别是(3x80x80, 85), (3x40x40, 85), (3x20x20, 85),然后在行方向拼接成一个特征图。 4 | 列方向的长度保持为(num_classes+5),这样在求最大分类置信度时,可以使用opencv内置函数minMaxLoc, 5 | 这相比于上一个版本,减少了一个for循环 6 | 7 | 由于对特征图添加了维度变换的操作,那么生成onnx文件也发生了改变,新的onnx在百度云盘下载 8 | 链接: https://pan.baidu.com/s/11uF1QeYyu3otrGbMGhw0ZQ 密码: es2w 9 | 10 | 11 | 12 | 2022年2月26日,看到https://github.com/ultralytics/yolov5 在最近更新的v6.1版本的, 13 | 我编写了分别使用OpenCV、ONNXRuntime部署yolov5-v6.1目标检测,包含C++和Python两个版本的程序。 14 | 源码地址是: https://github.com/hpc203/yolov5-v6.1-opencv-onnxrun 15 | -------------------------------------------------------------------------------- /bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolov5-dnn-cpp-python-v2/025a52f94a2afc71e43a7b2fe761f10b56914331/bus.jpg -------------------------------------------------------------------------------- /coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /convert-onnx/common.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import torch.nn.functional as F 4 | import math 5 | import numpy as np 6 | from tqdm import tqdm 7 | import numpy as np 8 | 9 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 10 | 11 | class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() 12 | @staticmethod 13 | def forward(x): 14 | # return x * F.hardsigmoid(x) # for torchscript and CoreML 15 | return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX 16 | 17 | class SiLU(nn.Module): # export-friendly version of nn.SiLU() 18 | @staticmethod 19 | def forward(x): 20 | return x * torch.sigmoid(x) 21 | 22 | def DWConv(c1, c2, k=1, s=1, act=True): 23 | # Depthwise convolution 24 | return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act) 25 | 26 | def autopad(k, p=None): # kernel, padding 27 | # Pad to 'same' 28 | if p is None: 29 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 30 | return p 31 | 32 | class Conv(nn.Module): 33 | # Standard convolution 34 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 35 | super(Conv, self).__init__() 36 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) 37 | self.bn = nn.BatchNorm2d(c2) 38 | self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) 39 | 40 | def forward(self, x): 41 | return self.act(self.bn(self.conv(x))) 42 | 43 | def fuseforward(self, x): 44 | return self.act(self.conv(x)) 45 | 46 | class Bottleneck(nn.Module): 47 | # Standard bottleneck 48 | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion 49 | super(Bottleneck, self).__init__() 50 | c_ = int(c2 * e) # hidden channels 51 | self.cv1 = Conv(c1, c_, 1, 1) 52 | self.cv2 = Conv(c_, c2, 3, 1, g=g) 53 | self.add = shortcut and c1 == c2 54 | 55 | def forward(self, x): 56 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 57 | 58 | class BottleneckCSP(nn.Module): 59 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks 60 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 61 | super(BottleneckCSP, self).__init__() 62 | c_ = int(c2 * e) # hidden channels 63 | self.cv1 = Conv(c1, c_, 1, 1) 64 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) 65 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) 66 | self.cv4 = Conv(c2, c2, 1, 1) 67 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) 68 | self.act = nn.LeakyReLU(0.1, inplace=True) 69 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 70 | 71 | def forward(self, x): 72 | y1 = self.cv3(self.m(self.cv1(x))) 73 | y2 = self.cv2(x) 74 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) 75 | 76 | # cat_y = torch.cat((y1, y2), dim=1) 77 | # out = self.cv4(self.act(self.bn(cat_y))) 78 | # return out 79 | 80 | class SPP(nn.Module): 81 | # Spatial pyramid pooling layer used in YOLOv3-SPP 82 | def __init__(self, c1, c2, k=(5, 9, 13)): 83 | super(SPP, self).__init__() 84 | c_ = c1 // 2 # hidden channels 85 | self.cv1 = Conv(c1, c_, 1, 1) 86 | self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) 87 | self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) 88 | 89 | def forward(self, x): 90 | x = self.cv1(x) 91 | return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) 92 | 93 | class Focus(nn.Module): 94 | # Focus wh information into c-space 95 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 96 | super(Focus, self).__init__() 97 | self.conv = Conv(c1 * 4, c2, k, s, p, g, act) 98 | self.contract = Contract(gain=2) 99 | 100 | def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) 101 | # return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], dim=1)) 102 | return self.conv(self.contract(x)) 103 | 104 | class Contract(nn.Module): 105 | # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40) 106 | def __init__(self, gain=2): 107 | super().__init__() 108 | self.gain = gain 109 | 110 | def forward(self, x): 111 | N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain' 112 | s = self.gain 113 | x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2) 114 | x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40) 115 | return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40) 116 | 117 | 118 | class Expand(nn.Module): 119 | # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160) 120 | def __init__(self, gain=2): 121 | super().__init__() 122 | self.gain = gain 123 | 124 | def forward(self, x): 125 | N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain' 126 | s = self.gain 127 | x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80) 128 | x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2) 129 | return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160) 130 | 131 | class Upsample(nn.Module): 132 | def __init__(self, size, scale, mode, align_corners=None): 133 | super(Upsample, self).__init__() 134 | self.size = size 135 | self.scale = scale 136 | self.mode = mode 137 | self.align_corners = align_corners 138 | 139 | def forward(self, x): 140 | sh = torch.tensor(x.shape) 141 | return F.interpolate(x, size=(int(sh[2]*self.scale), int(sh[3]*self.scale)), mode=self.mode, align_corners=self.align_corners) 142 | 143 | class Flatten(nn.Module): 144 | # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions 145 | def forward(self, x): 146 | return x.view(x.size(0), -1) 147 | 148 | class Concat(nn.Module): 149 | # Concatenate a list of tensors along dimension 150 | def __init__(self, dimension=1): 151 | super(Concat, self).__init__() 152 | self.d = dimension 153 | 154 | def forward(self, x): 155 | return torch.cat(x, self.d) 156 | 157 | class ConvPlus(nn.Module): 158 | # Plus-shaped convolution 159 | def __init__(self, c1, c2, k=3, s=1, g=1, bias=True): # ch_in, ch_out, kernel, stride, groups 160 | super(ConvPlus, self).__init__() 161 | self.cv1 = nn.Conv2d(c1, c2, (k, 1), s, (k // 2, 0), groups=g, bias=bias) 162 | self.cv2 = nn.Conv2d(c1, c2, (1, k), s, (0, k // 2), groups=g, bias=bias) 163 | 164 | def forward(self, x): 165 | return self.cv1(x) + self.cv2(x) 166 | 167 | class MixConv2d(nn.Module): 168 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 169 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): 170 | super(MixConv2d, self).__init__() 171 | groups = len(k) 172 | if equal_ch: # equal c_ per group 173 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices 174 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels 175 | else: # equal weight.numel() per group 176 | b = [c2] + [0] * groups 177 | a = np.eye(groups + 1, groups, k=-1) 178 | a -= np.roll(a, 1, axis=1) 179 | a *= np.array(k) ** 2 180 | a[0] = 1 181 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 182 | 183 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) 184 | self.bn = nn.BatchNorm2d(c2) 185 | self.act = nn.LeakyReLU(0.1, inplace=True) 186 | 187 | def forward(self, x): 188 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 189 | 190 | class CrossConv(nn.Module): 191 | # Cross Convolution Downsample 192 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): 193 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut 194 | super(CrossConv, self).__init__() 195 | c_ = int(c2 * e) # hidden channels 196 | self.cv1 = Conv(c1, c_, (1, k), (1, s)) 197 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) 198 | self.add = shortcut and c1 == c2 199 | 200 | def forward(self, x): 201 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 202 | 203 | class C3(nn.Module): 204 | # CSP Bottleneck with 3 convolutions 205 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 206 | super(C3, self).__init__() 207 | c_ = int(c2 * e) # hidden channels 208 | self.cv1 = Conv(c1, c_, 1, 1) 209 | self.cv2 = Conv(c1, c_, 1, 1) 210 | self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2) 211 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 212 | # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)]) 213 | 214 | def forward(self, x): 215 | return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)) 216 | 217 | def fuse_conv_and_bn(conv, bn): 218 | # https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 219 | with torch.no_grad(): 220 | # init 221 | fusedconv = torch.nn.Conv2d(conv.in_channels, 222 | conv.out_channels, 223 | kernel_size=conv.kernel_size, 224 | stride=conv.stride, 225 | padding=conv.padding, 226 | bias=True) 227 | 228 | # prepare filters 229 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 230 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 231 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) 232 | 233 | # prepare spatial bias 234 | if conv.bias is not None: 235 | b_conv = conv.bias 236 | else: 237 | b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) 238 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 239 | fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) 240 | return fusedconv 241 | 242 | class Yolo_Layers(nn.Module): 243 | def __init__(self, nc=80, anchors=(), ch=(), training=False): # detection layer 244 | super(Yolo_Layers, self).__init__() 245 | self.stride = torch.tensor([ 8., 16., 32.]).to(device) # strides computed during build 246 | self.no = nc + 5 # number of outputs per anchor 247 | self.nl = len(anchors) # number of detection layers 248 | self.na = len(anchors[0]) // 2 # number of anchors 249 | self.grid = [torch.zeros(1)] * self.nl # init grid 250 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 251 | self.ch = ch 252 | self.anchor_grid = torch.tensor(anchors).float().view(self.nl, 1, -1, 1, 1, 2).to(device) 253 | self.anchors = self.anchor_grid.view(self.nl, -1, 2) / self.stride.view(-1, 1, 1) 254 | self.training = training # onnx export 255 | 256 | def forward(self, x): 257 | # x = x.copy() # for profiling 258 | z = [] # inference output 259 | for i in range(self.nl): 260 | x[i] = self.m[i](x[i]) # conv 261 | # np.save('out'+str(i)+'.npy', x[i].data.cpu().numpy()) 262 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 263 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 264 | 265 | if not self.training: # inference 266 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 267 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 268 | # np.save('torch_grid' + str(i) + '.npy', self.grid[i].data.cpu().numpy()) 269 | y = x[i].sigmoid() 270 | # np.save('torch_x' + str(i) + 'sigmoid.npy', y.data.cpu().numpy()) 271 | # y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy 272 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * int(self.stride[i]) # xy 273 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 274 | z.append(y.view(bs, -1, self.no)) 275 | 276 | return x if self.training else (torch.cat(z, 1), x) 277 | 278 | @staticmethod 279 | def _make_grid(nx=20, ny=20): 280 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 281 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 282 | 283 | def weights_init_normal(m): 284 | classname = m.__class__.__name__ 285 | if classname.find("Conv") != -1: 286 | torch.nn.init.normal_(m.weight.data, 0.0, 0.02) 287 | elif classname.find("BatchNorm2d") != -1: 288 | torch.nn.init.normal_(m.weight.data, 1.0, 0.02) 289 | torch.nn.init.constant_(m.bias.data, 0.0) 290 | 291 | def to_cpu(tensor): 292 | return tensor.detach().cpu() 293 | 294 | def bbox_iou(box1, box2, x1y1x2y2=True): 295 | """ 296 | Returns the IoU of two bounding boxes 297 | """ 298 | if not x1y1x2y2: 299 | # Transform from center and width to exact coordinates 300 | b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 301 | b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 302 | b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 303 | b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 304 | else: 305 | # Get the coordinates of bounding boxes 306 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] 307 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] 308 | 309 | # get the corrdinates of the intersection rectangle 310 | inter_rect_x1 = torch.max(b1_x1, b2_x1) 311 | inter_rect_y1 = torch.max(b1_y1, b2_y1) 312 | inter_rect_x2 = torch.min(b1_x2, b2_x2) 313 | inter_rect_y2 = torch.min(b1_y2, b2_y2) 314 | # Intersection area 315 | inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp( 316 | inter_rect_y2 - inter_rect_y1 + 1, min=0 317 | ) 318 | # Union Area 319 | b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) 320 | b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) 321 | 322 | iou = inter_area / (b1_area + b2_area - inter_area + 1e-16) 323 | 324 | return iou 325 | 326 | def get_batch_statistics(outputs, targets, iou_threshold): 327 | """ Compute true positives, predicted scores and predicted labels per sample """ 328 | batch_metrics = [] 329 | for sample_i in range(len(outputs)): 330 | 331 | if outputs[sample_i] is None: 332 | continue 333 | 334 | output = outputs[sample_i] 335 | pred_boxes = output[:, :4] 336 | pred_scores = output[:, 4] 337 | pred_labels = output[:, -1] 338 | 339 | true_positives = np.zeros(pred_boxes.shape[0]) 340 | 341 | annotations = targets[targets[:, 0] == sample_i][:, 1:] 342 | target_labels = annotations[:, 0] if len(annotations) else [] 343 | if len(annotations): 344 | detected_boxes = [] 345 | target_boxes = annotations[:, 1:] 346 | 347 | for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)): 348 | 349 | # If targets are found break 350 | if len(detected_boxes) == len(annotations): 351 | break 352 | 353 | # Ignore if label is not one of the target labels 354 | if pred_label not in target_labels: 355 | continue 356 | 357 | iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0) 358 | if iou >= iou_threshold and box_index not in detected_boxes: 359 | true_positives[pred_i] = 1 360 | detected_boxes += [box_index] 361 | batch_metrics.append([true_positives, pred_scores, pred_labels]) 362 | return batch_metrics 363 | 364 | def compute_ap(recall, precision): 365 | """ Compute the average precision, given the recall and precision curves. 366 | Code originally from https://github.com/rbgirshick/py-faster-rcnn. 367 | 368 | # Arguments 369 | recall: The recall curve (list). 370 | precision: The precision curve (list). 371 | # Returns 372 | The average precision as computed in py-faster-rcnn. 373 | """ 374 | # correct AP calculation 375 | # first append sentinel values at the end 376 | mrec = np.concatenate(([0.0], recall, [1.0])) 377 | mpre = np.concatenate(([0.0], precision, [0.0])) 378 | 379 | # compute the precision envelope 380 | for i in range(mpre.size - 1, 0, -1): 381 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 382 | 383 | # to calculate area under PR curve, look for points 384 | # where X axis (recall) changes value 385 | i = np.where(mrec[1:] != mrec[:-1])[0] 386 | 387 | # and sum (\Delta recall) * prec 388 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 389 | return ap 390 | 391 | def ap_per_class(tp, conf, pred_cls, target_cls): 392 | """ Compute the average precision, given the recall and precision curves. 393 | Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. 394 | # Arguments 395 | tp: True positives (list). 396 | conf: Objectness value from 0-1 (list). 397 | pred_cls: Predicted object classes (list). 398 | target_cls: True object classes (list). 399 | # Returns 400 | The average precision as computed in py-faster-rcnn. 401 | """ 402 | 403 | # Sort by objectness 404 | i = np.argsort(-conf) 405 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] 406 | 407 | # Find unique classes 408 | unique_classes = np.unique(target_cls) 409 | 410 | # Create Precision-Recall curve and compute AP for each class 411 | ap, p, r = [], [], [] 412 | for c in tqdm(unique_classes, desc="Computing AP"): 413 | i = pred_cls == c 414 | n_gt = (target_cls == c).sum() # Number of ground truth objects 415 | n_p = i.sum() # Number of predicted objects 416 | 417 | if n_p == 0 and n_gt == 0: 418 | continue 419 | elif n_p == 0 or n_gt == 0: 420 | ap.append(0) 421 | r.append(0) 422 | p.append(0) 423 | else: 424 | # Accumulate FPs and TPs 425 | fpc = (1 - tp[i]).cumsum() 426 | tpc = (tp[i]).cumsum() 427 | 428 | # Recall 429 | recall_curve = tpc / (n_gt + 1e-16) 430 | r.append(recall_curve[-1]) 431 | 432 | # Precision 433 | precision_curve = tpc / (tpc + fpc) 434 | p.append(precision_curve[-1]) 435 | 436 | # AP from recall-precision curve 437 | ap.append(compute_ap(recall_curve, precision_curve)) 438 | 439 | # Compute F1 score (harmonic mean of precision and recall) 440 | p, r, ap = np.array(p), np.array(r), np.array(ap) 441 | f1 = 2 * p * r / (p + r + 1e-16) 442 | 443 | return p, r, ap, f1, unique_classes.astype("int32") -------------------------------------------------------------------------------- /convert-onnx/convert_onnx.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import argparse 4 | from yolov5s import My_YOLO as my_yolov5s 5 | from yolov5l import My_YOLO as my_yolov5l 6 | from yolov5m import My_YOLO as my_yolov5m 7 | from yolov5x import My_YOLO as my_yolov5x 8 | import operator 9 | import cv2 10 | from common import Conv,Hardswish,SiLU 11 | 12 | class My_YOLOv5s_extract(nn.Module): 13 | def __init__(self, YOLO, num_classes, anchors=()): 14 | super().__init__() 15 | self.backbone = YOLO.backbone_head 16 | self.ch = YOLO.yolo_layers.ch 17 | self.no = num_classes + 5 # number of outputs per anchor 18 | self.na = len(anchors[0]) // 2 # number of anchors 19 | # self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) 20 | self.m0 = nn.Conv2d(self.ch[0], self.no * self.na, 1) 21 | self.m1 = nn.Conv2d(self.ch[1], self.no * self.na, 1) 22 | self.m2 = nn.Conv2d(self.ch[2], self.no * self.na, 1) 23 | def forward(self, x): 24 | out0, out1, out2 = self.backbone(x) 25 | 26 | out0 = self.m0(out0) 27 | out1 = self.m1(out1) 28 | out2 = self.m2(out2) 29 | 30 | h, w = out0.shape[2:] 31 | out0 = out0.view(self.na, self.no, h, w).permute(0, 2, 3, 1).contiguous() ###去掉batchs维度 32 | out0 = out0.view(-1, self.no) 33 | h, w = out1.shape[2:] 34 | out1 = out1.view(self.na, self.no, h, w).permute(0, 2, 3, 1).contiguous() 35 | out1 = out1.view(-1, self.no) 36 | h, w = out2.shape[2:] 37 | out2 = out2.view(self.na, self.no, h, w).permute(0, 2, 3, 1).contiguous() 38 | out2 = out2.view(-1, self.no) 39 | return torch.cat((out0, out1, out2), 0) 40 | 41 | if __name__ == "__main__": 42 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 43 | parser = argparse.ArgumentParser() 44 | parser.add_argument('--net_type', default='yolov5s', choices=['yolov5s', 'yolov5l', 'yolov5m', 'yolov5x']) 45 | parser.add_argument('--num_classes', default=80, type=int) 46 | args = parser.parse_args() 47 | print(args) 48 | 49 | # Set up model 50 | anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]] 51 | if args.net_type == 'yolov5s': 52 | net = my_yolov5s(args.num_classes, anchors=anchors, training=False) 53 | elif args.net_type == 'yolov5l': 54 | net = my_yolov5l(args.num_classes, anchors=anchors, training=False) 55 | elif args.net_type == 'yolov5m': 56 | net = my_yolov5m(args.num_classes, anchors=anchors, training=False) 57 | else: 58 | net = my_yolov5x(args.num_classes, anchors=anchors, training=False) 59 | 60 | net.to(device) 61 | net.eval() 62 | own_state = net.state_dict() 63 | pth = args.net_type+'_param.pth' 64 | utl_param = torch.load(pth, map_location=device) 65 | del utl_param['24.anchors'] 66 | del utl_param['24.anchor_grid'] 67 | 68 | print(len(utl_param), len(own_state)) 69 | for a, b, namea, nameb in zip(utl_param.values(), own_state.values(), utl_param.keys(), own_state.keys()): 70 | if namea.find('anchor') > -1: 71 | print('anchor') 72 | continue 73 | if not operator.eq(a.shape, b.shape): 74 | print(namea, nameb, a.shape, b.shape) 75 | else: 76 | own_state[nameb].copy_(a) 77 | 78 | onnx_model = My_YOLOv5s_extract(net, args.num_classes, anchors=anchors).to(device).eval() 79 | onnx_param = onnx_model.state_dict() 80 | 81 | print(len(utl_param), len(onnx_param)) 82 | for a, b, namea, nameb in zip(utl_param.values(), onnx_param.values(), utl_param.keys(), onnx_param.keys()): 83 | if namea.find('anchor')>-1: 84 | print('anchor') 85 | continue 86 | if not operator.eq(a.shape, b.shape): 87 | print(namea, nameb, a.shape, b.shape) 88 | else: 89 | onnx_param[nameb].copy_(a) 90 | 91 | output_onnx = args.net_type+'.onnx' 92 | inputs = torch.randn(1, 3, 640, 640).to(device) 93 | 94 | # Update model 95 | for k, m in onnx_model.named_modules(): 96 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 97 | if isinstance(m, Conv): # assign export-friendly activations 98 | if isinstance(m.act, nn.Hardswish): 99 | m.act = Hardswish() 100 | elif isinstance(m.act, nn.SiLU): 101 | m.act = SiLU() 102 | 103 | torch.onnx.export(onnx_model, inputs, output_onnx, verbose=False, opset_version=12, input_names=['images'], output_names=['out']) 104 | print('convert',output_onnx,'to onnx finish!!!') 105 | 106 | try: 107 | dnnnet = cv2.dnn.readNet(output_onnx) 108 | print('read sucess') 109 | except: 110 | print('read failed') 111 | -------------------------------------------------------------------------------- /convert-onnx/yolov5l.py: -------------------------------------------------------------------------------- 1 | from common import * 2 | 3 | class My_YOLO_backbone_head(nn.Module): 4 | def __init__(self): 5 | super().__init__() 6 | self.seq0_Focus = Focus(3, 64, 3) 7 | self.seq1_Conv = Conv(64, 128, 3, 2) 8 | self.seq2_C3 = C3(128, 128, 3) 9 | self.seq3_Conv = Conv(128, 256, 3, 2) 10 | self.seq4_C3 = C3(256, 256, 9) 11 | self.seq5_Conv = Conv(256, 512, 3, 2) 12 | self.seq6_C3 = C3(512, 512, 9) 13 | self.seq7_Conv = Conv(512, 1024, 3, 2) 14 | self.seq8_SPP = SPP(1024, 1024, [5, 9, 13]) 15 | self.seq9_C3 = C3(1024, 1024, 3, False) 16 | self.seq10_Conv = Conv(1024, 512, 1, 1) 17 | self.seq13_C3 = C3(1024, 512, 3, False) 18 | self.seq14_Conv = Conv(512, 256, 1, 1) 19 | self.seq17_C3 = C3(512, 256, 3, False) 20 | self.seq18_Conv = Conv(256, 256, 3, 2) 21 | self.seq20_C3 = C3(512, 512, 3, False) 22 | self.seq21_Conv = Conv(512, 512, 3, 2) 23 | self.seq23_C3 = C3(1024, 1024, 3, False) 24 | def forward(self, x): 25 | x = self.seq0_Focus(x) 26 | x = self.seq1_Conv(x) 27 | x = self.seq2_C3(x) 28 | x = self.seq3_Conv(x) 29 | xRt0 = self.seq4_C3(x) 30 | x = self.seq5_Conv(xRt0) 31 | xRt1 = self.seq6_C3(x) 32 | x = self.seq7_Conv(xRt1) 33 | x = self.seq8_SPP(x) 34 | x = self.seq9_C3(x) 35 | xRt2 = self.seq10_Conv(x) 36 | route = F.interpolate(xRt2, size=(int(xRt2.shape[2] * 2), int(xRt2.shape[3] * 2)), mode='nearest') 37 | x = torch.cat([route, xRt1], dim=1) 38 | x = self.seq13_C3(x) 39 | xRt3 = self.seq14_Conv(x) 40 | route = F.interpolate(xRt3, size=(int(xRt3.shape[2] * 2), int(xRt3.shape[3] * 2)), mode='nearest') 41 | x = torch.cat([route, xRt0], dim=1) 42 | out0 = self.seq17_C3(x) 43 | x = self.seq18_Conv(out0) 44 | x = torch.cat([x, xRt3], dim=1) 45 | out1 = self.seq20_C3(x) 46 | x = self.seq21_Conv(out1) 47 | x = torch.cat([x, xRt2], dim=1) 48 | out2 = self.seq23_C3(x) 49 | return out0, out1, out2 50 | 51 | class My_YOLO(nn.Module): 52 | def __init__(self, num_classes, anchors=(), training=False): 53 | super().__init__() 54 | self.backbone_head = My_YOLO_backbone_head() 55 | self.yolo_layers = Yolo_Layers(nc=num_classes, anchors=anchors, ch=(256,512,1024),training=training) 56 | def forward(self, x): 57 | out0, out1, out2 = self.backbone_head(x) 58 | output = self.yolo_layers([out0, out1, out2]) 59 | return output 60 | -------------------------------------------------------------------------------- /convert-onnx/yolov5m.py: -------------------------------------------------------------------------------- 1 | from common import * 2 | 3 | class My_YOLO_backbone_head(nn.Module): 4 | def __init__(self): 5 | super().__init__() 6 | self.seq0_Focus = Focus(3, 48, 3) 7 | self.seq1_Conv = Conv(48, 96, 3, 2) 8 | self.seq2_C3 = C3(96, 96, 2) 9 | self.seq3_Conv = Conv(96, 192, 3, 2) 10 | self.seq4_C3 = C3(192, 192, 6) 11 | self.seq5_Conv = Conv(192, 384, 3, 2) 12 | self.seq6_C3 = C3(384, 384, 6) 13 | self.seq7_Conv = Conv(384, 768, 3, 2) 14 | self.seq8_SPP = SPP(768, 768, [5, 9, 13]) 15 | self.seq9_C3 = C3(768, 768, 2, False) 16 | self.seq10_Conv = Conv(768, 384, 1, 1) 17 | self.seq13_C3 = C3(768, 384, 2, False) 18 | self.seq14_Conv = Conv(384, 192, 1, 1) 19 | self.seq17_C3 = C3(384, 192, 2, False) 20 | self.seq18_Conv = Conv(192, 192, 3, 2) 21 | self.seq20_C3 = C3(384, 384, 2, False) 22 | self.seq21_Conv = Conv(384, 384, 3, 2) 23 | self.seq23_C3 = C3(768, 768, 2, False) 24 | def forward(self, x): 25 | x = self.seq0_Focus(x) 26 | x = self.seq1_Conv(x) 27 | x = self.seq2_C3(x) 28 | x = self.seq3_Conv(x) 29 | xRt0 = self.seq4_C3(x) 30 | x = self.seq5_Conv(xRt0) 31 | xRt1 = self.seq6_C3(x) 32 | x = self.seq7_Conv(xRt1) 33 | x = self.seq8_SPP(x) 34 | x = self.seq9_C3(x) 35 | xRt2 = self.seq10_Conv(x) 36 | route = F.interpolate(xRt2, size=(int(xRt2.shape[2] * 2), int(xRt2.shape[3] * 2)), mode='nearest') 37 | x = torch.cat([route, xRt1], dim=1) 38 | x = self.seq13_C3(x) 39 | xRt3 = self.seq14_Conv(x) 40 | route = F.interpolate(xRt3, size=(int(xRt3.shape[2] * 2), int(xRt3.shape[3] * 2)), mode='nearest') 41 | x = torch.cat([route, xRt0], dim=1) 42 | out0 = self.seq17_C3(x) 43 | x = self.seq18_Conv(out0) 44 | x = torch.cat([x, xRt3], dim=1) 45 | out1 = self.seq20_C3(x) 46 | x = self.seq21_Conv(out1) 47 | x = torch.cat([x, xRt2], dim=1) 48 | out2 = self.seq23_C3(x) 49 | return out0, out1, out2 50 | 51 | class My_YOLO(nn.Module): 52 | def __init__(self, num_classes, anchors=(), training=False): 53 | super().__init__() 54 | self.backbone_head = My_YOLO_backbone_head() 55 | self.yolo_layers = Yolo_Layers(nc=num_classes, anchors=anchors, ch=(192,384,768),training=training) 56 | def forward(self, x): 57 | out0, out1, out2 = self.backbone_head(x) 58 | output = self.yolo_layers([out0, out1, out2]) 59 | return output 60 | -------------------------------------------------------------------------------- /convert-onnx/yolov5s.py: -------------------------------------------------------------------------------- 1 | from common import * 2 | 3 | class My_YOLO_backbone_head(nn.Module): 4 | def __init__(self): 5 | super().__init__() 6 | self.seq0_Focus = Focus(3, 32, 3) 7 | self.seq1_Conv = Conv(32, 64, 3, 2) 8 | self.seq2_C3 = C3(64, 64, 1) 9 | self.seq3_Conv = Conv(64, 128, 3, 2) 10 | self.seq4_C3 = C3(128, 128, 3) 11 | self.seq5_Conv = Conv(128, 256, 3, 2) 12 | self.seq6_C3 = C3(256, 256, 3) 13 | self.seq7_Conv = Conv(256, 512, 3, 2) 14 | self.seq8_SPP = SPP(512, 512, [5, 9, 13]) 15 | self.seq9_C3 = C3(512, 512, 1, False) 16 | self.seq10_Conv = Conv(512, 256, 1, 1) 17 | self.seq13_C3 = C3(512, 256, 1, False) 18 | self.seq14_Conv = Conv(256, 128, 1, 1) 19 | self.seq17_C3 = C3(256, 128, 1, False) 20 | self.seq18_Conv = Conv(128, 128, 3, 2) 21 | self.seq20_C3 = C3(256, 256, 1, False) 22 | self.seq21_Conv = Conv(256, 256, 3, 2) 23 | self.seq23_C3 = C3(512, 512, 1, False) 24 | def forward(self, x): 25 | x = self.seq0_Focus(x) 26 | x = self.seq1_Conv(x) 27 | x = self.seq2_C3(x) 28 | x = self.seq3_Conv(x) 29 | xRt0 = self.seq4_C3(x) 30 | x = self.seq5_Conv(xRt0) 31 | xRt1 = self.seq6_C3(x) 32 | x = self.seq7_Conv(xRt1) 33 | x = self.seq8_SPP(x) 34 | x = self.seq9_C3(x) 35 | xRt2 = self.seq10_Conv(x) 36 | route = F.interpolate(xRt2, size=(int(xRt2.shape[2] * 2), int(xRt2.shape[3] * 2)), mode='nearest') 37 | x = torch.cat([route, xRt1], dim=1) 38 | x = self.seq13_C3(x) 39 | xRt3 = self.seq14_Conv(x) 40 | route = F.interpolate(xRt3, size=(int(xRt3.shape[2] * 2), int(xRt3.shape[3] * 2)), mode='nearest') 41 | x = torch.cat([route, xRt0], dim=1) 42 | out0 = self.seq17_C3(x) 43 | x = self.seq18_Conv(out0) 44 | x = torch.cat([x, xRt3], dim=1) 45 | out1 = self.seq20_C3(x) 46 | x = self.seq21_Conv(out1) 47 | x = torch.cat([x, xRt2], dim=1) 48 | out2 = self.seq23_C3(x) 49 | return out0, out1, out2 50 | 51 | class My_YOLO(nn.Module): 52 | def __init__(self, num_classes, anchors=(), training=False): 53 | super().__init__() 54 | self.backbone_head = My_YOLO_backbone_head() 55 | self.yolo_layers = Yolo_Layers(nc=num_classes, anchors=anchors, ch=(128,256,512),training=training) 56 | def forward(self, x): 57 | out0, out1, out2 = self.backbone_head(x) 58 | output = self.yolo_layers([out0, out1, out2]) 59 | return output 60 | -------------------------------------------------------------------------------- /convert-onnx/yolov5x.py: -------------------------------------------------------------------------------- 1 | from common import * 2 | 3 | class My_YOLO_backbone_head(nn.Module): 4 | def __init__(self): 5 | super().__init__() 6 | self.seq0_Focus = Focus(3, 80, 3) 7 | self.seq1_Conv = Conv(80, 160, 3, 2) 8 | self.seq2_C3 = C3(160, 160, 4) 9 | self.seq3_Conv = Conv(160, 320, 3, 2) 10 | self.seq4_C3 = C3(320, 320, 12) 11 | self.seq5_Conv = Conv(320, 640, 3, 2) 12 | self.seq6_C3 = C3(640, 640, 12) 13 | self.seq7_Conv = Conv(640, 1280, 3, 2) 14 | self.seq8_SPP = SPP(1280, 1280, [5, 9, 13]) 15 | self.seq9_C3 = C3(1280, 1280, 4, False) 16 | self.seq10_Conv = Conv(1280, 640, 1, 1) 17 | self.seq13_C3 = C3(1280, 640, 4, False) 18 | self.seq14_Conv = Conv(640, 320, 1, 1) 19 | self.seq17_C3 = C3(640, 320, 4, False) 20 | self.seq18_Conv = Conv(320, 320, 3, 2) 21 | self.seq20_C3 = C3(640, 640, 4, False) 22 | self.seq21_Conv = Conv(640, 640, 3, 2) 23 | self.seq23_C3 = C3(1280, 1280, 4, False) 24 | def forward(self, x): 25 | x = self.seq0_Focus(x) 26 | x = self.seq1_Conv(x) 27 | x = self.seq2_C3(x) 28 | x = self.seq3_Conv(x) 29 | xRt0 = self.seq4_C3(x) 30 | x = self.seq5_Conv(xRt0) 31 | xRt1 = self.seq6_C3(x) 32 | x = self.seq7_Conv(xRt1) 33 | x = self.seq8_SPP(x) 34 | x = self.seq9_C3(x) 35 | xRt2 = self.seq10_Conv(x) 36 | route = F.interpolate(xRt2, size=(int(xRt2.shape[2] * 2), int(xRt2.shape[3] * 2)), mode='nearest') 37 | x = torch.cat([route, xRt1], dim=1) 38 | x = self.seq13_C3(x) 39 | xRt3 = self.seq14_Conv(x) 40 | route = F.interpolate(xRt3, size=(int(xRt3.shape[2] * 2), int(xRt3.shape[3] * 2)), mode='nearest') 41 | x = torch.cat([route, xRt0], dim=1) 42 | out0 = self.seq17_C3(x) 43 | x = self.seq18_Conv(out0) 44 | x = torch.cat([x, xRt3], dim=1) 45 | out1 = self.seq20_C3(x) 46 | x = self.seq21_Conv(out1) 47 | x = torch.cat([x, xRt2], dim=1) 48 | out2 = self.seq23_C3(x) 49 | return out0, out1, out2 50 | 51 | class My_YOLO(nn.Module): 52 | def __init__(self, num_classes, anchors=(), training=False): 53 | super().__init__() 54 | self.backbone_head = My_YOLO_backbone_head() 55 | self.yolo_layers = Yolo_Layers(nc=num_classes, anchors=anchors, ch=(320,640,1280),training=training) 56 | def forward(self, x): 57 | out0, out1, out2 = self.backbone_head(x) 58 | output = self.yolo_layers([out0, out1, out2]) 59 | return output 60 | -------------------------------------------------------------------------------- /dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolov5-dnn-cpp-python-v2/025a52f94a2afc71e43a7b2fe761f10b56914331/dog.jpg -------------------------------------------------------------------------------- /main_yolo.cpp: -------------------------------------------------------------------------------- 1 | #include "yolo.h" 2 | 3 | YOLO::YOLO(Net_config config) 4 | { 5 | cout << "Net use " << config.netname << endl; 6 | this->confThreshold = config.confThreshold; 7 | this->nmsThreshold = config.nmsThreshold; 8 | this->objThreshold = config.objThreshold; 9 | strcpy(this->netname, config.netname.c_str()); 10 | 11 | ifstream ifs(this->classesFile.c_str()); 12 | string line; 13 | while (getline(ifs, line)) this->classes.push_back(line); 14 | 15 | string modelFile = this->netname; 16 | modelFile += ".onnx"; 17 | this->net = readNet(modelFile); 18 | } 19 | 20 | void YOLO::drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame) // Draw the predicted bounding box 21 | { 22 | //Draw a rectangle displaying the bounding box 23 | rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 3); 24 | 25 | //Get the label for the class name and its confidence 26 | string label = format("%.2f", conf); 27 | label = this->classes[classId] + ":" + label; 28 | 29 | //Display the label at the top of the bounding box 30 | int baseLine; 31 | Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); 32 | top = max(top, labelSize.height); 33 | //rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED); 34 | putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 255, 0), 2); 35 | } 36 | 37 | void YOLO::sigmoid(Mat* out, int length) 38 | { 39 | float* pdata = (float*)(out->data); 40 | int i = 0; 41 | for (i = 0; i < length; i++) 42 | { 43 | pdata[i] = 1.0 / (1 + expf(-pdata[i])); 44 | } 45 | } 46 | 47 | void YOLO::detect(Mat& frame) 48 | { 49 | Mat blob; 50 | blobFromImage(frame, blob, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false); 51 | this->net.setInput(blob); 52 | vector outs; 53 | this->net.forward(outs, this->net.getUnconnectedOutLayersNames()); 54 | 55 | /////generate proposals 56 | vector classIds; 57 | vector confidences; 58 | vector boxes; 59 | float ratioh = (float)frame.rows / this->inpHeight, ratiow = (float)frame.cols / this->inpWidth; 60 | int n = 0, q = 0, i = 0, j = 0, nout = this->classes.size() + 5, row_ind = 0; 61 | for (n = 0; n < 3; n++) ///�߶� 62 | { 63 | int num_grid_x = (int)(this->inpWidth / this->stride[n]); 64 | int num_grid_y = (int)(this->inpHeight / this->stride[n]); 65 | for (q = 0; q < 3; q++) ///anchor�� 66 | { 67 | const float anchor_w = this->anchors[n][q * 2]; 68 | const float anchor_h = this->anchors[n][q * 2 + 1]; 69 | for (i = 0; i < num_grid_y; i++) 70 | { 71 | for (j = 0; j < num_grid_x; j++) 72 | { 73 | float* pdata = (float*)outs[0].data + row_ind * nout; 74 | float box_score = sigmoid_x(pdata[4]); 75 | if (box_score > this->objThreshold) 76 | { 77 | Mat scores = outs[0].row(row_ind).colRange(5, outs[0].cols); 78 | Point classIdPoint; 79 | double max_class_socre; 80 | // Get the value and location of the maximum score 81 | minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint); 82 | max_class_socre = sigmoid_x((float)max_class_socre); 83 | if (max_class_socre > this->confThreshold) 84 | { 85 | float cx = (sigmoid_x(pdata[0]) * 2.f - 0.5f + j) * this->stride[n]; ///cx 86 | float cy = (sigmoid_x(pdata[1]) * 2.f - 0.5f + i) * this->stride[n]; ///cy 87 | float w = powf(sigmoid_x(pdata[2]) * 2.f, 2.f) * anchor_w; ///w 88 | float h = powf(sigmoid_x(pdata[3]) * 2.f, 2.f) * anchor_h; ///h 89 | 90 | int left = (cx - 0.5*w)*ratiow; 91 | int top = (cy - 0.5*h)*ratioh; ///���껹ԭ��ԭͼ�� 92 | 93 | classIds.push_back(classIdPoint.x); 94 | confidences.push_back(max_class_socre); 95 | boxes.push_back(Rect(left, top, (int)(w*ratiow), (int)(h*ratioh))); 96 | } 97 | } 98 | row_ind++; 99 | } 100 | } 101 | } 102 | } 103 | 104 | // Perform non maximum suppression to eliminate redundant overlapping boxes with 105 | // lower confidences 106 | vector indices; 107 | NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices); 108 | for (size_t i = 0; i < indices.size(); ++i) 109 | { 110 | int idx = indices[i]; 111 | Rect box = boxes[idx]; 112 | this->drawPred(classIds[idx], confidences[idx], box.x, box.y, 113 | box.x + box.width, box.y + box.height, frame); 114 | } 115 | } 116 | 117 | int main() 118 | { 119 | YOLO yolo_model(yolo_nets[3]); 120 | string imgpath = "bus.jpg"; 121 | Mat srcimg = imread(imgpath); 122 | yolo_model.detect(srcimg); 123 | 124 | static const string kWinName = "Deep learning object detection in OpenCV"; 125 | namedWindow(kWinName, WINDOW_NORMAL); 126 | imshow(kWinName, srcimg); 127 | waitKey(0); 128 | destroyAllWindows(); 129 | } -------------------------------------------------------------------------------- /main_yolov5.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import argparse 3 | import numpy as np 4 | 5 | class yolov5(): 6 | def __init__(self, yolo_type, confThreshold=0.5, nmsThreshold=0.5, objThreshold=0.5): 7 | with open('coco.names', 'rt') as f: 8 | self.classes = f.read().rstrip('\n').split('\n') ###这个是在coco数据集上训练的模型做opencv部署的,如果你在自己的数据集上训练出的模型做opencv部署,那么需要修改self.classes 9 | self.colors = [np.random.randint(0, 255, size=3).tolist() for _ in range(len(self.classes))] 10 | num_classes = len(self.classes) 11 | anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]] 12 | self.nl = len(anchors) 13 | self.na = len(anchors[0]) // 2 14 | self.no = num_classes + 5 15 | self.grid = [np.zeros(1)] * self.nl 16 | self.stride = np.array([8., 16., 32.]) 17 | self.anchor_grid = np.asarray(anchors, dtype=np.float32).reshape(self.nl, -1, 2) 18 | self.inpWidth = 640 19 | self.inpHeight = 640 20 | self.net = cv2.dnn.readNet(yolo_type + '.onnx') 21 | self.confThreshold = confThreshold 22 | self.nmsThreshold = nmsThreshold 23 | self.objThreshold = objThreshold 24 | 25 | def _make_grid(self, nx=20, ny=20): 26 | xv, yv = np.meshgrid(np.arange(ny), np.arange(nx)) 27 | return np.stack((xv, yv), 2).reshape((-1, 2)).astype(np.float32) 28 | 29 | def postprocess(self, frame, outs): 30 | frameHeight = frame.shape[0] 31 | frameWidth = frame.shape[1] 32 | ratioh, ratiow = frameHeight / self.inpHeight, frameWidth / self.inpWidth 33 | # Scan through all the bounding boxes output from the network and keep only the 34 | # ones with high confidence scores. Assign the box's class label as the class with the highest score. 35 | classIds = [] 36 | confidences = [] 37 | boxes = [] 38 | for detection in outs: 39 | scores = detection[5:] 40 | classId = np.argmax(scores) 41 | confidence = scores[classId] 42 | if confidence > self.confThreshold and detection[4] > self.objThreshold: 43 | center_x = int(detection[0] * ratiow) 44 | center_y = int(detection[1] * ratioh) 45 | width = int(detection[2] * ratiow) 46 | height = int(detection[3] * ratioh) 47 | left = int(center_x - width / 2) 48 | top = int(center_y - height / 2) 49 | classIds.append(classId) 50 | confidences.append(float(confidence)) 51 | boxes.append([left, top, width, height]) 52 | 53 | # Perform non maximum suppression to eliminate redundant overlapping boxes with 54 | # lower confidences. 55 | indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confThreshold, self.nmsThreshold) 56 | for i in indices: 57 | i = i[0] 58 | box = boxes[i] 59 | left = box[0] 60 | top = box[1] 61 | width = box[2] 62 | height = box[3] 63 | frame = self.drawPred(frame, classIds[i], confidences[i], left, top, left + width, top + height) 64 | return frame 65 | def drawPred(self, frame, classId, conf, left, top, right, bottom): 66 | # Draw a bounding box. 67 | cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=4) 68 | 69 | label = '%.2f' % conf 70 | label = '%s:%s' % (self.classes[classId], label) 71 | 72 | # Display the label at the top of the bounding box 73 | labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) 74 | top = max(top, labelSize[1]) 75 | # cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED) 76 | cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2) 77 | return frame 78 | def detect(self, srcimg): 79 | blob = cv2.dnn.blobFromImage(srcimg, 1 / 255.0, (self.inpWidth, self.inpHeight), [0, 0, 0], swapRB=True, crop=False) 80 | # Sets the input to the network 81 | self.net.setInput(blob) 82 | 83 | # Runs the forward pass to get output of the output layers 84 | outs = self.net.forward(self.net.getUnconnectedOutLayersNames())[0] 85 | 86 | # inference output 87 | outs = 1 / (1 + np.exp(-outs)) ###sigmoid 88 | row_ind = 0 89 | for i in range(self.nl): 90 | h, w = int(self.inpHeight/self.stride[i]), int(self.inpWidth/self.stride[i]) 91 | length = int(self.na * h * w) 92 | if self.grid[i].shape[2:4] != (h,w): 93 | self.grid[i] = self._make_grid(w, h) 94 | 95 | outs[row_ind:row_ind+length, 0:2] = (outs[row_ind:row_ind+length, 0:2] * 2. - 0.5 + np.tile(self.grid[i],(self.na, 1))) * int(self.stride[i]) 96 | outs[row_ind:row_ind+length, 2:4] = (outs[row_ind:row_ind+length, 2:4] * 2) ** 2 * np.repeat(self.anchor_grid[i],h*w, axis=0) 97 | row_ind += length 98 | return outs 99 | 100 | if __name__ == "__main__": 101 | parser = argparse.ArgumentParser() 102 | parser.add_argument("--imgpath", type=str, default='bus.jpg', help="image path") 103 | parser.add_argument('--net_type', default='yolov5s', choices=['yolov5s', 'yolov5l', 'yolov5m', 'yolov5x']) 104 | parser.add_argument('--confThreshold', default=0.5, type=float, help='class confidence') 105 | parser.add_argument('--nmsThreshold', default=0.5, type=float, help='nms iou thresh') 106 | parser.add_argument('--objThreshold', default=0.5, type=float, help='object confidence') 107 | args = parser.parse_args() 108 | 109 | yolonet = yolov5(args.net_type, confThreshold=args.confThreshold, nmsThreshold=args.nmsThreshold, objThreshold=args.objThreshold) 110 | srcimg = cv2.imread(args.imgpath) 111 | dets = yolonet.detect(srcimg) 112 | srcimg = yolonet.postprocess(srcimg, dets) 113 | 114 | winName = 'Deep learning object detection in OpenCV' 115 | cv2.namedWindow(winName, 0) 116 | cv2.imshow(winName, srcimg) 117 | cv2.waitKey(0) 118 | cv2.destroyAllWindows() 119 | -------------------------------------------------------------------------------- /person.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolov5-dnn-cpp-python-v2/025a52f94a2afc71e43a7b2fe761f10b56914331/person.jpg -------------------------------------------------------------------------------- /yolo.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace cv; 9 | using namespace dnn; 10 | using namespace std; 11 | 12 | struct Net_config 13 | { 14 | float confThreshold; // class Confidence threshold 15 | float nmsThreshold; // Non-maximum suppression threshold 16 | float objThreshold; //Object Confidence threshold 17 | string netname; 18 | }; 19 | 20 | class YOLO 21 | { 22 | public: 23 | YOLO(Net_config config); 24 | void detect(Mat& frame); 25 | private: 26 | const float anchors[3][6] = {{10.0, 13.0, 16.0, 30.0, 33.0, 23.0}, {30.0, 61.0, 62.0, 45.0, 59.0, 119.0},{116.0, 90.0, 156.0, 198.0, 373.0, 326.0}}; 27 | const float stride[3] = { 8.0, 16.0, 32.0 }; 28 | const string classesFile = "coco.names"; 29 | const int inpWidth = 640; 30 | const int inpHeight = 640; 31 | float confThreshold; 32 | float nmsThreshold; 33 | float objThreshold; 34 | 35 | char netname[20]; 36 | vector classes; 37 | Net net; 38 | void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame); 39 | void sigmoid(Mat* out, int length); 40 | }; 41 | 42 | static inline float sigmoid_x(float x) 43 | { 44 | return static_cast(1.f / (1.f + exp(-x))); 45 | } 46 | 47 | Net_config yolo_nets[4] = { 48 | {0.5, 0.5, 0.5, "yolov5s"}, 49 | {0.5, 0.5, 0.5, "yolov5m"}, 50 | {0.5, 0.5, 0.5, "yolov5l"}, 51 | {0.5, 0.5, 0.5, "yolov5x"} 52 | }; 53 | -------------------------------------------------------------------------------- /zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolov5-dnn-cpp-python-v2/025a52f94a2afc71e43a7b2fe761f10b56914331/zidane.jpg --------------------------------------------------------------------------------