├── CMakeLists.txt
├── README.md
├── bus.jpg
├── coco.names
├── convert-onnx
    ├── common.py
    ├── convert_onnx.py
    ├── yolov5l.py
    ├── yolov5m.py
    ├── yolov5s.py
    └── yolov5x.py
├── dog.jpg
├── main_yolo.cpp
├── main_yolov5.py
├── person.jpg
├── yolo.h
└── zidane.jpg


/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.17)
2 | project(yolov5-dnn-cpp-python-v2)
3 | 
4 | set(CMAKE_CXX_STANDARD 14)
5 | 
6 | add_executable(yolov5-dnn-cpp-python-v2 main_yolo.cpp)
7 | find_package(OpenCV REQUIRED)
8 | target_link_libraries(yolov5-dnn-cpp-python-v2 ${OpenCV_LIBS})
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # yolov5-dnn-cpp-python-v2
 2 | 这套程序是对上一版本的后处理模块优化，把三个尺度的输出特征图作reshape和permute维度置换后，
 3 | 输出特征图的形状分别是(3x80x80, 85), (3x40x40, 85), (3x20x20, 85)，然后在行方向拼接成一个特征图。
 4 | 列方向的长度保持为(num_classes+5)，这样在求最大分类置信度时，可以使用opencv内置函数minMaxLoc，
 5 | 这相比于上一个版本，减少了一个for循环
 6 | 
 7 | 由于对特征图添加了维度变换的操作，那么生成onnx文件也发生了改变，新的onnx在百度云盘下载
 8 | 链接: https://pan.baidu.com/s/11uF1QeYyu3otrGbMGhw0ZQ  密码: es2w
 9 | 
10 | 
11 | 
12 | 2022年2月26日，看到https://github.com/ultralytics/yolov5 在最近更新的v6.1版本的，
13 | 我编写了分别使用OpenCV、ONNXRuntime部署yolov5-v6.1目标检测，包含C++和Python两个版本的程序。
14 | 源码地址是： https://github.com/hpc203/yolov5-v6.1-opencv-onnxrun
15 | 


--------------------------------------------------------------------------------
/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/yolov5-dnn-cpp-python-v2/025a52f94a2afc71e43a7b2fe761f10b56914331/bus.jpg


--------------------------------------------------------------------------------
/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/convert-onnx/common.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | import torch.nn.functional as F
  4 | import math
  5 | import numpy as np
  6 | from tqdm import tqdm
  7 | import numpy as np
  8 | 
  9 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
 10 | 
 11 | class Hardswish(nn.Module):  # export-friendly version of nn.Hardswish()
 12 |     @staticmethod
 13 |     def forward(x):
 14 |         # return x * F.hardsigmoid(x)  # for torchscript and CoreML
 15 |         return x * F.hardtanh(x + 3, 0., 6.) / 6.  # for torchscript, CoreML and ONNX
 16 | 
 17 | class SiLU(nn.Module):  # export-friendly version of nn.SiLU()
 18 |     @staticmethod
 19 |     def forward(x):
 20 |         return x * torch.sigmoid(x)
 21 | 
 22 | def DWConv(c1, c2, k=1, s=1, act=True):
 23 |     # Depthwise convolution
 24 |     return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
 25 | 
 26 | def autopad(k, p=None):  # kernel, padding
 27 |     # Pad to 'same'
 28 |     if p is None:
 29 |         p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
 30 |     return p
 31 | 
 32 | class Conv(nn.Module):
 33 |     # Standard convolution
 34 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
 35 |         super(Conv, self).__init__()
 36 |         self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
 37 |         self.bn = nn.BatchNorm2d(c2)
 38 |         self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
 39 | 
 40 |     def forward(self, x):
 41 |         return self.act(self.bn(self.conv(x)))
 42 | 
 43 |     def fuseforward(self, x):
 44 |         return self.act(self.conv(x))
 45 | 
 46 | class Bottleneck(nn.Module):
 47 |     # Standard bottleneck
 48 |     def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
 49 |         super(Bottleneck, self).__init__()
 50 |         c_ = int(c2 * e)  # hidden channels
 51 |         self.cv1 = Conv(c1, c_, 1, 1)
 52 |         self.cv2 = Conv(c_, c2, 3, 1, g=g)
 53 |         self.add = shortcut and c1 == c2
 54 | 
 55 |     def forward(self, x):
 56 |         return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
 57 | 
 58 | class BottleneckCSP(nn.Module):
 59 |     # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
 60 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
 61 |         super(BottleneckCSP, self).__init__()
 62 |         c_ = int(c2 * e)  # hidden channels
 63 |         self.cv1 = Conv(c1, c_, 1, 1)
 64 |         self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
 65 |         self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
 66 |         self.cv4 = Conv(c2, c2, 1, 1)
 67 |         self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
 68 |         self.act = nn.LeakyReLU(0.1, inplace=True)
 69 |         self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
 70 | 
 71 |     def forward(self, x):
 72 |         y1 = self.cv3(self.m(self.cv1(x)))
 73 |         y2 = self.cv2(x)
 74 |         return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
 75 | 
 76 |         # cat_y = torch.cat((y1, y2), dim=1)
 77 |         # out = self.cv4(self.act(self.bn(cat_y)))
 78 |         # return out
 79 | 
 80 | class SPP(nn.Module):
 81 |     # Spatial pyramid pooling layer used in YOLOv3-SPP
 82 |     def __init__(self, c1, c2, k=(5, 9, 13)):
 83 |         super(SPP, self).__init__()
 84 |         c_ = c1 // 2  # hidden channels
 85 |         self.cv1 = Conv(c1, c_, 1, 1)
 86 |         self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
 87 |         self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
 88 | 
 89 |     def forward(self, x):
 90 |         x = self.cv1(x)
 91 |         return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
 92 | 
 93 | class Focus(nn.Module):
 94 |     # Focus wh information into c-space
 95 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
 96 |         super(Focus, self).__init__()
 97 |         self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
 98 |         self.contract = Contract(gain=2)
 99 | 
100 |     def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
101 |         # return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], dim=1))
102 |         return self.conv(self.contract(x))
103 | 
104 | class Contract(nn.Module):
105 |     # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
106 |     def __init__(self, gain=2):
107 |         super().__init__()
108 |         self.gain = gain
109 | 
110 |     def forward(self, x):
111 |         N, C, H, W = x.size()  # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
112 |         s = self.gain
113 |         x = x.view(N, C, H // s, s, W // s, s)  # x(1,64,40,2,40,2)
114 |         x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # x(1,2,2,64,40,40)
115 |         return x.view(N, C * s * s, H // s, W // s)  # x(1,256,40,40)
116 | 
117 | 
118 | class Expand(nn.Module):
119 |     # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
120 |     def __init__(self, gain=2):
121 |         super().__init__()
122 |         self.gain = gain
123 | 
124 |     def forward(self, x):
125 |         N, C, H, W = x.size()  # assert C / s ** 2 == 0, 'Indivisible gain'
126 |         s = self.gain
127 |         x = x.view(N, s, s, C // s ** 2, H, W)  # x(1,2,2,16,80,80)
128 |         x = x.permute(0, 3, 4, 1, 5, 2).contiguous()  # x(1,16,80,2,80,2)
129 |         return x.view(N, C // s ** 2, H * s, W * s)  # x(1,16,160,160)
130 | 
131 | class Upsample(nn.Module):
132 |     def __init__(self, size, scale, mode, align_corners=None):
133 |         super(Upsample, self).__init__()
134 |         self.size = size
135 |         self.scale = scale
136 |         self.mode = mode
137 |         self.align_corners = align_corners
138 | 
139 |     def forward(self, x):
140 |         sh = torch.tensor(x.shape)
141 |         return F.interpolate(x, size=(int(sh[2]*self.scale), int(sh[3]*self.scale)), mode=self.mode, align_corners=self.align_corners)
142 | 
143 | class Flatten(nn.Module):
144 |     # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions
145 |     def forward(self, x):
146 |         return x.view(x.size(0), -1)
147 | 
148 | class Concat(nn.Module):
149 |     # Concatenate a list of tensors along dimension
150 |     def __init__(self, dimension=1):
151 |         super(Concat, self).__init__()
152 |         self.d = dimension
153 | 
154 |     def forward(self, x):
155 |         return torch.cat(x, self.d)
156 | 
157 | class ConvPlus(nn.Module):
158 |     # Plus-shaped convolution
159 |     def __init__(self, c1, c2, k=3, s=1, g=1, bias=True):  # ch_in, ch_out, kernel, stride, groups
160 |         super(ConvPlus, self).__init__()
161 |         self.cv1 = nn.Conv2d(c1, c2, (k, 1), s, (k // 2, 0), groups=g, bias=bias)
162 |         self.cv2 = nn.Conv2d(c1, c2, (1, k), s, (0, k // 2), groups=g, bias=bias)
163 | 
164 |     def forward(self, x):
165 |         return self.cv1(x) + self.cv2(x)
166 | 
167 | class MixConv2d(nn.Module):
168 |     # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
169 |     def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
170 |         super(MixConv2d, self).__init__()
171 |         groups = len(k)
172 |         if equal_ch:  # equal c_ per group
173 |             i = torch.linspace(0, groups - 1E-6, c2).floor()  # c2 indices
174 |             c_ = [(i == g).sum() for g in range(groups)]  # intermediate channels
175 |         else:  # equal weight.numel() per group
176 |             b = [c2] + [0] * groups
177 |             a = np.eye(groups + 1, groups, k=-1)
178 |             a -= np.roll(a, 1, axis=1)
179 |             a *= np.array(k) ** 2
180 |             a[0] = 1
181 |             c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b
182 | 
183 |         self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
184 |         self.bn = nn.BatchNorm2d(c2)
185 |         self.act = nn.LeakyReLU(0.1, inplace=True)
186 | 
187 |     def forward(self, x):
188 |         return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
189 | 
190 | class CrossConv(nn.Module):
191 |     # Cross Convolution Downsample
192 |     def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
193 |         # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
194 |         super(CrossConv, self).__init__()
195 |         c_ = int(c2 * e)  # hidden channels
196 |         self.cv1 = Conv(c1, c_, (1, k), (1, s))
197 |         self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
198 |         self.add = shortcut and c1 == c2
199 | 
200 |     def forward(self, x):
201 |         return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
202 |     
203 | class C3(nn.Module):
204 |     # CSP Bottleneck with 3 convolutions
205 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
206 |         super(C3, self).__init__()
207 |         c_ = int(c2 * e)  # hidden channels
208 |         self.cv1 = Conv(c1, c_, 1, 1)
209 |         self.cv2 = Conv(c1, c_, 1, 1)
210 |         self.cv3 = Conv(2 * c_, c2, 1)  # act=FReLU(c2)
211 |         self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
212 |         # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
213 | 
214 |     def forward(self, x):
215 |         return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
216 | 
217 | def fuse_conv_and_bn(conv, bn):
218 |     # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
219 |     with torch.no_grad():
220 |         # init
221 |         fusedconv = torch.nn.Conv2d(conv.in_channels,
222 |                                     conv.out_channels,
223 |                                     kernel_size=conv.kernel_size,
224 |                                     stride=conv.stride,
225 |                                     padding=conv.padding,
226 |                                     bias=True)
227 | 
228 |         # prepare filters
229 |         w_conv = conv.weight.clone().view(conv.out_channels, -1)
230 |         w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
231 |         fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
232 | 
233 |         # prepare spatial bias
234 |         if conv.bias is not None:
235 |             b_conv = conv.bias
236 |         else:
237 |             b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device)
238 |         b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
239 |         fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
240 |         return fusedconv
241 | 
242 | class Yolo_Layers(nn.Module):
243 |     def __init__(self, nc=80, anchors=(), ch=(), training=False):  # detection layer
244 |         super(Yolo_Layers, self).__init__()
245 |         self.stride = torch.tensor([ 8., 16., 32.]).to(device)  # strides computed during build
246 |         self.no = nc + 5  # number of outputs per anchor
247 |         self.nl = len(anchors)  # number of detection layers
248 |         self.na = len(anchors[0]) // 2  # number of anchors
249 |         self.grid = [torch.zeros(1)] * self.nl  # init grid
250 |         self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
251 |         self.ch = ch
252 |         self.anchor_grid = torch.tensor(anchors).float().view(self.nl, 1, -1, 1, 1, 2).to(device)
253 |         self.anchors = self.anchor_grid.view(self.nl, -1, 2) / self.stride.view(-1, 1, 1)
254 |         self.training = training  # onnx export
255 | 
256 |     def forward(self, x):
257 |         # x = x.copy()  # for profiling
258 |         z = []  # inference output
259 |         for i in range(self.nl):
260 |             x[i] = self.m[i](x[i])  # conv
261 |             # np.save('out'+str(i)+'.npy', x[i].data.cpu().numpy())
262 |             bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
263 |             x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
264 | 
265 |             if not self.training:  # inference
266 |                 if self.grid[i].shape[2:4] != x[i].shape[2:4]:
267 |                     self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
268 |                 # np.save('torch_grid' + str(i) + '.npy', self.grid[i].data.cpu().numpy())
269 |                 y = x[i].sigmoid()
270 |                 # np.save('torch_x' + str(i) + 'sigmoid.npy', y.data.cpu().numpy())
271 |                 # y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i]  # xy
272 |                 y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * int(self.stride[i])  # xy
273 |                 y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
274 |                 z.append(y.view(bs, -1, self.no))
275 | 
276 |         return x if self.training else (torch.cat(z, 1), x)
277 | 
278 |     @staticmethod
279 |     def _make_grid(nx=20, ny=20):
280 |         yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
281 |         return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
282 | 
283 | def weights_init_normal(m):
284 |     classname = m.__class__.__name__
285 |     if classname.find("Conv") != -1:
286 |         torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
287 |     elif classname.find("BatchNorm2d") != -1:
288 |         torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
289 |         torch.nn.init.constant_(m.bias.data, 0.0)
290 | 
291 | def to_cpu(tensor):
292 |     return tensor.detach().cpu()
293 | 
294 | def bbox_iou(box1, box2, x1y1x2y2=True):
295 |     """
296 |     Returns the IoU of two bounding boxes
297 |     """
298 |     if not x1y1x2y2:
299 |         # Transform from center and width to exact coordinates
300 |         b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
301 |         b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
302 |         b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
303 |         b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
304 |     else:
305 |         # Get the coordinates of bounding boxes
306 |         b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
307 |         b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
308 | 
309 |     # get the corrdinates of the intersection rectangle
310 |     inter_rect_x1 = torch.max(b1_x1, b2_x1)
311 |     inter_rect_y1 = torch.max(b1_y1, b2_y1)
312 |     inter_rect_x2 = torch.min(b1_x2, b2_x2)
313 |     inter_rect_y2 = torch.min(b1_y2, b2_y2)
314 |     # Intersection area
315 |     inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
316 |         inter_rect_y2 - inter_rect_y1 + 1, min=0
317 |     )
318 |     # Union Area
319 |     b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
320 |     b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
321 | 
322 |     iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
323 | 
324 |     return iou
325 | 
326 | def get_batch_statistics(outputs, targets, iou_threshold):
327 |     """ Compute true positives, predicted scores and predicted labels per sample """
328 |     batch_metrics = []
329 |     for sample_i in range(len(outputs)):
330 | 
331 |         if outputs[sample_i] is None:
332 |             continue
333 | 
334 |         output = outputs[sample_i]
335 |         pred_boxes = output[:, :4]
336 |         pred_scores = output[:, 4]
337 |         pred_labels = output[:, -1]
338 | 
339 |         true_positives = np.zeros(pred_boxes.shape[0])
340 | 
341 |         annotations = targets[targets[:, 0] == sample_i][:, 1:]
342 |         target_labels = annotations[:, 0] if len(annotations) else []
343 |         if len(annotations):
344 |             detected_boxes = []
345 |             target_boxes = annotations[:, 1:]
346 | 
347 |             for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
348 | 
349 |                 # If targets are found break
350 |                 if len(detected_boxes) == len(annotations):
351 |                     break
352 | 
353 |                 # Ignore if label is not one of the target labels
354 |                 if pred_label not in target_labels:
355 |                     continue
356 | 
357 |                 iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0)
358 |                 if iou >= iou_threshold and box_index not in detected_boxes:
359 |                     true_positives[pred_i] = 1
360 |                     detected_boxes += [box_index]
361 |         batch_metrics.append([true_positives, pred_scores, pred_labels])
362 |     return batch_metrics
363 | 
364 | def compute_ap(recall, precision):
365 |     """ Compute the average precision, given the recall and precision curves.
366 |     Code originally from https://github.com/rbgirshick/py-faster-rcnn.
367 | 
368 |     # Arguments
369 |         recall:    The recall curve (list).
370 |         precision: The precision curve (list).
371 |     # Returns
372 |         The average precision as computed in py-faster-rcnn.
373 |     """
374 |     # correct AP calculation
375 |     # first append sentinel values at the end
376 |     mrec = np.concatenate(([0.0], recall, [1.0]))
377 |     mpre = np.concatenate(([0.0], precision, [0.0]))
378 | 
379 |     # compute the precision envelope
380 |     for i in range(mpre.size - 1, 0, -1):
381 |         mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
382 | 
383 |     # to calculate area under PR curve, look for points
384 |     # where X axis (recall) changes value
385 |     i = np.where(mrec[1:] != mrec[:-1])[0]
386 | 
387 |     # and sum (\Delta recall) * prec
388 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
389 |     return ap
390 | 
391 | def ap_per_class(tp, conf, pred_cls, target_cls):
392 |     """ Compute the average precision, given the recall and precision curves.
393 |     Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
394 |     # Arguments
395 |         tp:    True positives (list).
396 |         conf:  Objectness value from 0-1 (list).
397 |         pred_cls: Predicted object classes (list).
398 |         target_cls: True object classes (list).
399 |     # Returns
400 |         The average precision as computed in py-faster-rcnn.
401 |     """
402 | 
403 |     # Sort by objectness
404 |     i = np.argsort(-conf)
405 |     tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
406 | 
407 |     # Find unique classes
408 |     unique_classes = np.unique(target_cls)
409 | 
410 |     # Create Precision-Recall curve and compute AP for each class
411 |     ap, p, r = [], [], []
412 |     for c in tqdm(unique_classes, desc="Computing AP"):
413 |         i = pred_cls == c
414 |         n_gt = (target_cls == c).sum()  # Number of ground truth objects
415 |         n_p = i.sum()  # Number of predicted objects
416 | 
417 |         if n_p == 0 and n_gt == 0:
418 |             continue
419 |         elif n_p == 0 or n_gt == 0:
420 |             ap.append(0)
421 |             r.append(0)
422 |             p.append(0)
423 |         else:
424 |             # Accumulate FPs and TPs
425 |             fpc = (1 - tp[i]).cumsum()
426 |             tpc = (tp[i]).cumsum()
427 | 
428 |             # Recall
429 |             recall_curve = tpc / (n_gt + 1e-16)
430 |             r.append(recall_curve[-1])
431 | 
432 |             # Precision
433 |             precision_curve = tpc / (tpc + fpc)
434 |             p.append(precision_curve[-1])
435 | 
436 |             # AP from recall-precision curve
437 |             ap.append(compute_ap(recall_curve, precision_curve))
438 | 
439 |     # Compute F1 score (harmonic mean of precision and recall)
440 |     p, r, ap = np.array(p), np.array(r), np.array(ap)
441 |     f1 = 2 * p * r / (p + r + 1e-16)
442 | 
443 |     return p, r, ap, f1, unique_classes.astype("int32")


--------------------------------------------------------------------------------
/convert-onnx/convert_onnx.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import argparse
  4 | from yolov5s import My_YOLO as my_yolov5s
  5 | from yolov5l import My_YOLO as my_yolov5l
  6 | from yolov5m import My_YOLO as my_yolov5m
  7 | from yolov5x import My_YOLO as my_yolov5x
  8 | import operator
  9 | import cv2
 10 | from common import Conv,Hardswish,SiLU
 11 | 
 12 | class My_YOLOv5s_extract(nn.Module):
 13 |     def __init__(self, YOLO, num_classes, anchors=()):
 14 |         super().__init__()
 15 |         self.backbone = YOLO.backbone_head
 16 |         self.ch = YOLO.yolo_layers.ch
 17 |         self.no = num_classes + 5  # number of outputs per anchor
 18 |         self.na = len(anchors[0]) // 2  # number of anchors
 19 |         # self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)
 20 |         self.m0 = nn.Conv2d(self.ch[0], self.no * self.na, 1)
 21 |         self.m1 = nn.Conv2d(self.ch[1], self.no * self.na, 1)
 22 |         self.m2 = nn.Conv2d(self.ch[2], self.no * self.na, 1)
 23 |     def forward(self, x):
 24 |         out0, out1, out2 = self.backbone(x)
 25 | 
 26 |         out0 = self.m0(out0)
 27 |         out1 = self.m1(out1)
 28 |         out2 = self.m2(out2)
 29 | 
 30 |         h, w = out0.shape[2:]
 31 |         out0 = out0.view(self.na, self.no, h, w).permute(0, 2, 3, 1).contiguous()   ###去掉batchs维度
 32 |         out0 = out0.view(-1, self.no)
 33 |         h, w = out1.shape[2:]
 34 |         out1 = out1.view(self.na, self.no, h, w).permute(0, 2, 3, 1).contiguous()
 35 |         out1 = out1.view(-1, self.no)
 36 |         h, w = out2.shape[2:]
 37 |         out2 = out2.view(self.na, self.no, h, w).permute(0, 2, 3, 1).contiguous()
 38 |         out2 = out2.view(-1, self.no)
 39 |         return torch.cat((out0, out1, out2), 0)
 40 | 
 41 | if __name__ == "__main__":
 42 |     device = 'cuda' if torch.cuda.is_available() else 'cpu'
 43 |     parser = argparse.ArgumentParser()
 44 |     parser.add_argument('--net_type', default='yolov5s', choices=['yolov5s', 'yolov5l', 'yolov5m', 'yolov5x'])
 45 |     parser.add_argument('--num_classes', default=80, type=int)
 46 |     args = parser.parse_args()
 47 |     print(args)
 48 | 
 49 |     # Set up model
 50 |     anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
 51 |     if args.net_type == 'yolov5s':
 52 |         net = my_yolov5s(args.num_classes, anchors=anchors, training=False)
 53 |     elif args.net_type == 'yolov5l':
 54 |         net = my_yolov5l(args.num_classes, anchors=anchors, training=False)
 55 |     elif args.net_type == 'yolov5m':
 56 |         net = my_yolov5m(args.num_classes, anchors=anchors, training=False)
 57 |     else:
 58 |         net = my_yolov5x(args.num_classes, anchors=anchors, training=False)
 59 | 
 60 |     net.to(device)
 61 |     net.eval()
 62 |     own_state = net.state_dict()
 63 |     pth = args.net_type+'_param.pth'
 64 |     utl_param = torch.load(pth, map_location=device)
 65 |     del utl_param['24.anchors']
 66 |     del utl_param['24.anchor_grid']
 67 | 
 68 |     print(len(utl_param), len(own_state))
 69 |     for a, b, namea, nameb in zip(utl_param.values(), own_state.values(), utl_param.keys(), own_state.keys()):
 70 |         if namea.find('anchor') > -1:
 71 |             print('anchor')
 72 |             continue
 73 |         if not operator.eq(a.shape, b.shape):
 74 |             print(namea, nameb, a.shape, b.shape)
 75 |         else:
 76 |             own_state[nameb].copy_(a)
 77 | 
 78 |     onnx_model = My_YOLOv5s_extract(net, args.num_classes, anchors=anchors).to(device).eval()
 79 |     onnx_param = onnx_model.state_dict()
 80 | 
 81 |     print(len(utl_param), len(onnx_param))
 82 |     for a, b, namea, nameb in zip(utl_param.values(), onnx_param.values(), utl_param.keys(), onnx_param.keys()):
 83 |         if namea.find('anchor')>-1:
 84 |             print('anchor')
 85 |             continue
 86 |         if not operator.eq(a.shape, b.shape):
 87 |             print(namea, nameb, a.shape, b.shape)
 88 |         else:
 89 |             onnx_param[nameb].copy_(a)
 90 | 
 91 |     output_onnx = args.net_type+'.onnx'
 92 |     inputs = torch.randn(1, 3, 640, 640).to(device)
 93 | 
 94 |     # Update model
 95 |     for k, m in onnx_model.named_modules():
 96 |         m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
 97 |         if isinstance(m, Conv):  # assign export-friendly activations
 98 |             if isinstance(m.act, nn.Hardswish):
 99 |                 m.act = Hardswish()
100 |             elif isinstance(m.act, nn.SiLU):
101 |                 m.act = SiLU()
102 | 
103 |     torch.onnx.export(onnx_model, inputs, output_onnx, verbose=False, opset_version=12, input_names=['images'], output_names=['out'])
104 |     print('convert',output_onnx,'to onnx finish!!!')
105 | 
106 |     try:
107 |         dnnnet = cv2.dnn.readNet(output_onnx)
108 |         print('read sucess')
109 |     except:
110 |         print('read failed')
111 | 


--------------------------------------------------------------------------------
/convert-onnx/yolov5l.py:
--------------------------------------------------------------------------------
 1 | from common import *
 2 | 
 3 | class My_YOLO_backbone_head(nn.Module):
 4 |     def __init__(self):
 5 |         super().__init__()
 6 |         self.seq0_Focus = Focus(3, 64, 3)
 7 |         self.seq1_Conv = Conv(64, 128, 3, 2)
 8 |         self.seq2_C3 = C3(128, 128, 3)
 9 |         self.seq3_Conv = Conv(128, 256, 3, 2)
10 |         self.seq4_C3 = C3(256, 256, 9)
11 |         self.seq5_Conv = Conv(256, 512, 3, 2)
12 |         self.seq6_C3 = C3(512, 512, 9)
13 |         self.seq7_Conv = Conv(512, 1024, 3, 2)
14 |         self.seq8_SPP = SPP(1024, 1024, [5, 9, 13])
15 |         self.seq9_C3 = C3(1024, 1024, 3, False)
16 |         self.seq10_Conv = Conv(1024, 512, 1, 1)
17 |         self.seq13_C3 = C3(1024, 512, 3, False)
18 |         self.seq14_Conv = Conv(512, 256, 1, 1)
19 |         self.seq17_C3 = C3(512, 256, 3, False)
20 |         self.seq18_Conv = Conv(256, 256, 3, 2)
21 |         self.seq20_C3 = C3(512, 512, 3, False)
22 |         self.seq21_Conv = Conv(512, 512, 3, 2)
23 |         self.seq23_C3 = C3(1024, 1024, 3, False)
24 |     def forward(self, x):
25 |         x = self.seq0_Focus(x)
26 |         x = self.seq1_Conv(x)
27 |         x = self.seq2_C3(x)
28 |         x = self.seq3_Conv(x)
29 |         xRt0 = self.seq4_C3(x)
30 |         x = self.seq5_Conv(xRt0)
31 |         xRt1 = self.seq6_C3(x)
32 |         x = self.seq7_Conv(xRt1)
33 |         x = self.seq8_SPP(x)
34 |         x = self.seq9_C3(x)
35 |         xRt2 = self.seq10_Conv(x)
36 |         route = F.interpolate(xRt2, size=(int(xRt2.shape[2] * 2), int(xRt2.shape[3] * 2)), mode='nearest')
37 |         x = torch.cat([route, xRt1], dim=1)
38 |         x = self.seq13_C3(x)
39 |         xRt3 = self.seq14_Conv(x)
40 |         route = F.interpolate(xRt3, size=(int(xRt3.shape[2] * 2), int(xRt3.shape[3] * 2)), mode='nearest')
41 |         x = torch.cat([route, xRt0], dim=1)
42 |         out0 = self.seq17_C3(x)
43 |         x = self.seq18_Conv(out0)
44 |         x = torch.cat([x, xRt3], dim=1)
45 |         out1 = self.seq20_C3(x)
46 |         x = self.seq21_Conv(out1)
47 |         x = torch.cat([x, xRt2], dim=1)
48 |         out2 = self.seq23_C3(x)
49 |         return out0, out1, out2
50 | 
51 | class My_YOLO(nn.Module):
52 |     def __init__(self, num_classes, anchors=(), training=False):
53 |         super().__init__()
54 |         self.backbone_head = My_YOLO_backbone_head()
55 |         self.yolo_layers = Yolo_Layers(nc=num_classes, anchors=anchors, ch=(256,512,1024),training=training)
56 |     def forward(self, x):
57 |         out0, out1, out2 = self.backbone_head(x)
58 |         output = self.yolo_layers([out0, out1, out2])
59 |         return output
60 | 


--------------------------------------------------------------------------------
/convert-onnx/yolov5m.py:
--------------------------------------------------------------------------------
 1 | from common import *
 2 | 
 3 | class My_YOLO_backbone_head(nn.Module):
 4 |     def __init__(self):
 5 |         super().__init__()
 6 |         self.seq0_Focus = Focus(3, 48, 3)
 7 |         self.seq1_Conv = Conv(48, 96, 3, 2)
 8 |         self.seq2_C3 = C3(96, 96, 2)
 9 |         self.seq3_Conv = Conv(96, 192, 3, 2)
10 |         self.seq4_C3 = C3(192, 192, 6)
11 |         self.seq5_Conv = Conv(192, 384, 3, 2)
12 |         self.seq6_C3 = C3(384, 384, 6)
13 |         self.seq7_Conv = Conv(384, 768, 3, 2)
14 |         self.seq8_SPP = SPP(768, 768, [5, 9, 13])
15 |         self.seq9_C3 = C3(768, 768, 2, False)
16 |         self.seq10_Conv = Conv(768, 384, 1, 1)
17 |         self.seq13_C3 = C3(768, 384, 2, False)
18 |         self.seq14_Conv = Conv(384, 192, 1, 1)
19 |         self.seq17_C3 = C3(384, 192, 2, False)
20 |         self.seq18_Conv = Conv(192, 192, 3, 2)
21 |         self.seq20_C3 = C3(384, 384, 2, False)
22 |         self.seq21_Conv = Conv(384, 384, 3, 2)
23 |         self.seq23_C3 = C3(768, 768, 2, False)
24 |     def forward(self, x):
25 |         x = self.seq0_Focus(x)
26 |         x = self.seq1_Conv(x)
27 |         x = self.seq2_C3(x)
28 |         x = self.seq3_Conv(x)
29 |         xRt0 = self.seq4_C3(x)
30 |         x = self.seq5_Conv(xRt0)
31 |         xRt1 = self.seq6_C3(x)
32 |         x = self.seq7_Conv(xRt1)
33 |         x = self.seq8_SPP(x)
34 |         x = self.seq9_C3(x)
35 |         xRt2 = self.seq10_Conv(x)
36 |         route = F.interpolate(xRt2, size=(int(xRt2.shape[2] * 2), int(xRt2.shape[3] * 2)), mode='nearest')
37 |         x = torch.cat([route, xRt1], dim=1)
38 |         x = self.seq13_C3(x)
39 |         xRt3 = self.seq14_Conv(x)
40 |         route = F.interpolate(xRt3, size=(int(xRt3.shape[2] * 2), int(xRt3.shape[3] * 2)), mode='nearest')
41 |         x = torch.cat([route, xRt0], dim=1)
42 |         out0 = self.seq17_C3(x)
43 |         x = self.seq18_Conv(out0)
44 |         x = torch.cat([x, xRt3], dim=1)
45 |         out1 = self.seq20_C3(x)
46 |         x = self.seq21_Conv(out1)
47 |         x = torch.cat([x, xRt2], dim=1)
48 |         out2 = self.seq23_C3(x)
49 |         return out0, out1, out2
50 | 
51 | class My_YOLO(nn.Module):
52 |     def __init__(self, num_classes, anchors=(), training=False):
53 |         super().__init__()
54 |         self.backbone_head = My_YOLO_backbone_head()
55 |         self.yolo_layers = Yolo_Layers(nc=num_classes, anchors=anchors, ch=(192,384,768),training=training)
56 |     def forward(self, x):
57 |         out0, out1, out2 = self.backbone_head(x)
58 |         output = self.yolo_layers([out0, out1, out2])
59 |         return output
60 | 


--------------------------------------------------------------------------------
/convert-onnx/yolov5s.py:
--------------------------------------------------------------------------------
 1 | from common import *
 2 | 
 3 | class My_YOLO_backbone_head(nn.Module):
 4 |     def __init__(self):
 5 |         super().__init__()
 6 |         self.seq0_Focus = Focus(3, 32, 3)
 7 |         self.seq1_Conv = Conv(32, 64, 3, 2)
 8 |         self.seq2_C3 = C3(64, 64, 1)
 9 |         self.seq3_Conv = Conv(64, 128, 3, 2)
10 |         self.seq4_C3 = C3(128, 128, 3)
11 |         self.seq5_Conv = Conv(128, 256, 3, 2)
12 |         self.seq6_C3 = C3(256, 256, 3)
13 |         self.seq7_Conv = Conv(256, 512, 3, 2)
14 |         self.seq8_SPP = SPP(512, 512, [5, 9, 13])
15 |         self.seq9_C3 = C3(512, 512, 1, False)
16 |         self.seq10_Conv = Conv(512, 256, 1, 1)
17 |         self.seq13_C3 = C3(512, 256, 1, False)
18 |         self.seq14_Conv = Conv(256, 128, 1, 1)
19 |         self.seq17_C3 = C3(256, 128, 1, False)
20 |         self.seq18_Conv = Conv(128, 128, 3, 2)
21 |         self.seq20_C3 = C3(256, 256, 1, False)
22 |         self.seq21_Conv = Conv(256, 256, 3, 2)
23 |         self.seq23_C3 = C3(512, 512, 1, False)
24 |     def forward(self, x):
25 |         x = self.seq0_Focus(x)
26 |         x = self.seq1_Conv(x)
27 |         x = self.seq2_C3(x)
28 |         x = self.seq3_Conv(x)
29 |         xRt0 = self.seq4_C3(x)
30 |         x = self.seq5_Conv(xRt0)
31 |         xRt1 = self.seq6_C3(x)
32 |         x = self.seq7_Conv(xRt1)
33 |         x = self.seq8_SPP(x)
34 |         x = self.seq9_C3(x)
35 |         xRt2 = self.seq10_Conv(x)
36 |         route = F.interpolate(xRt2, size=(int(xRt2.shape[2] * 2), int(xRt2.shape[3] * 2)), mode='nearest')
37 |         x = torch.cat([route, xRt1], dim=1)
38 |         x = self.seq13_C3(x)
39 |         xRt3 = self.seq14_Conv(x)
40 |         route = F.interpolate(xRt3, size=(int(xRt3.shape[2] * 2), int(xRt3.shape[3] * 2)), mode='nearest')
41 |         x = torch.cat([route, xRt0], dim=1)
42 |         out0 = self.seq17_C3(x)
43 |         x = self.seq18_Conv(out0)
44 |         x = torch.cat([x, xRt3], dim=1)
45 |         out1 = self.seq20_C3(x)
46 |         x = self.seq21_Conv(out1)
47 |         x = torch.cat([x, xRt2], dim=1)
48 |         out2 = self.seq23_C3(x)
49 |         return out0, out1, out2
50 | 
51 | class My_YOLO(nn.Module):
52 |     def __init__(self, num_classes, anchors=(), training=False):
53 |         super().__init__()
54 |         self.backbone_head = My_YOLO_backbone_head()
55 |         self.yolo_layers = Yolo_Layers(nc=num_classes, anchors=anchors, ch=(128,256,512),training=training)
56 |     def forward(self, x):
57 |         out0, out1, out2 = self.backbone_head(x)
58 |         output = self.yolo_layers([out0, out1, out2])
59 |         return output
60 | 


--------------------------------------------------------------------------------
/convert-onnx/yolov5x.py:
--------------------------------------------------------------------------------
 1 | from common import *
 2 | 
 3 | class My_YOLO_backbone_head(nn.Module):
 4 |     def __init__(self):
 5 |         super().__init__()
 6 |         self.seq0_Focus = Focus(3, 80, 3)
 7 |         self.seq1_Conv = Conv(80, 160, 3, 2)
 8 |         self.seq2_C3 = C3(160, 160, 4)
 9 |         self.seq3_Conv = Conv(160, 320, 3, 2)
10 |         self.seq4_C3 = C3(320, 320, 12)
11 |         self.seq5_Conv = Conv(320, 640, 3, 2)
12 |         self.seq6_C3 = C3(640, 640, 12)
13 |         self.seq7_Conv = Conv(640, 1280, 3, 2)
14 |         self.seq8_SPP = SPP(1280, 1280, [5, 9, 13])
15 |         self.seq9_C3 = C3(1280, 1280, 4, False)
16 |         self.seq10_Conv = Conv(1280, 640, 1, 1)
17 |         self.seq13_C3 = C3(1280, 640, 4, False)
18 |         self.seq14_Conv = Conv(640, 320, 1, 1)
19 |         self.seq17_C3 = C3(640, 320, 4, False)
20 |         self.seq18_Conv = Conv(320, 320, 3, 2)
21 |         self.seq20_C3 = C3(640, 640, 4, False)
22 |         self.seq21_Conv = Conv(640, 640, 3, 2)
23 |         self.seq23_C3 = C3(1280, 1280, 4, False)
24 |     def forward(self, x):
25 |         x = self.seq0_Focus(x)
26 |         x = self.seq1_Conv(x)
27 |         x = self.seq2_C3(x)
28 |         x = self.seq3_Conv(x)
29 |         xRt0 = self.seq4_C3(x)
30 |         x = self.seq5_Conv(xRt0)
31 |         xRt1 = self.seq6_C3(x)
32 |         x = self.seq7_Conv(xRt1)
33 |         x = self.seq8_SPP(x)
34 |         x = self.seq9_C3(x)
35 |         xRt2 = self.seq10_Conv(x)
36 |         route = F.interpolate(xRt2, size=(int(xRt2.shape[2] * 2), int(xRt2.shape[3] * 2)), mode='nearest')
37 |         x = torch.cat([route, xRt1], dim=1)
38 |         x = self.seq13_C3(x)
39 |         xRt3 = self.seq14_Conv(x)
40 |         route = F.interpolate(xRt3, size=(int(xRt3.shape[2] * 2), int(xRt3.shape[3] * 2)), mode='nearest')
41 |         x = torch.cat([route, xRt0], dim=1)
42 |         out0 = self.seq17_C3(x)
43 |         x = self.seq18_Conv(out0)
44 |         x = torch.cat([x, xRt3], dim=1)
45 |         out1 = self.seq20_C3(x)
46 |         x = self.seq21_Conv(out1)
47 |         x = torch.cat([x, xRt2], dim=1)
48 |         out2 = self.seq23_C3(x)
49 |         return out0, out1, out2
50 | 
51 | class My_YOLO(nn.Module):
52 |     def __init__(self, num_classes, anchors=(), training=False):
53 |         super().__init__()
54 |         self.backbone_head = My_YOLO_backbone_head()
55 |         self.yolo_layers = Yolo_Layers(nc=num_classes, anchors=anchors, ch=(320,640,1280),training=training)
56 |     def forward(self, x):
57 |         out0, out1, out2 = self.backbone_head(x)
58 |         output = self.yolo_layers([out0, out1, out2])
59 |         return output
60 | 


--------------------------------------------------------------------------------
/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/yolov5-dnn-cpp-python-v2/025a52f94a2afc71e43a7b2fe761f10b56914331/dog.jpg


--------------------------------------------------------------------------------
/main_yolo.cpp:
--------------------------------------------------------------------------------
  1 | #include "yolo.h"
  2 | 
  3 | YOLO::YOLO(Net_config config)
  4 | {
  5 | 	cout << "Net use " << config.netname << endl;
  6 | 	this->confThreshold = config.confThreshold;
  7 | 	this->nmsThreshold = config.nmsThreshold;
  8 | 	this->objThreshold = config.objThreshold;
  9 |     strcpy(this->netname, config.netname.c_str());
 10 | 
 11 | 	ifstream ifs(this->classesFile.c_str());
 12 | 	string line;
 13 | 	while (getline(ifs, line)) this->classes.push_back(line);
 14 | 
 15 | 	string modelFile = this->netname;
 16 | 	modelFile += ".onnx";
 17 | 	this->net = readNet(modelFile);
 18 | }
 19 | 
 20 | void YOLO::drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)   // Draw the predicted bounding box
 21 | {
 22 | 	//Draw a rectangle displaying the bounding box
 23 | 	rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 3);
 24 | 
 25 | 	//Get the label for the class name and its confidence
 26 | 	string label = format("%.2f", conf);
 27 | 	label = this->classes[classId] + ":" + label;
 28 | 
 29 | 	//Display the label at the top of the bounding box
 30 | 	int baseLine;
 31 | 	Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
 32 | 	top = max(top, labelSize.height);
 33 | 	//rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
 34 | 	putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 255, 0), 2);
 35 | }
 36 | 
 37 | void YOLO::sigmoid(Mat* out, int length)
 38 | {
 39 | 	float* pdata = (float*)(out->data);
 40 | 	int i = 0; 
 41 | 	for (i = 0; i < length; i++)
 42 | 	{
 43 | 		pdata[i] = 1.0 / (1 + expf(-pdata[i]));
 44 | 	}
 45 | }
 46 | 
 47 | void YOLO::detect(Mat& frame)
 48 | {
 49 | 	Mat blob;
 50 | 	blobFromImage(frame, blob, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false);
 51 | 	this->net.setInput(blob);
 52 | 	vector<Mat> outs;
 53 | 	this->net.forward(outs, this->net.getUnconnectedOutLayersNames());
 54 | 	
 55 | 	/////generate proposals
 56 | 	vector<int> classIds;
 57 | 	vector<float> confidences;
 58 | 	vector<Rect> boxes;
 59 | 	float ratioh = (float)frame.rows / this->inpHeight, ratiow = (float)frame.cols / this->inpWidth;
 60 | 	int n = 0, q = 0, i = 0, j = 0, nout = this->classes.size() + 5, row_ind = 0;
 61 | 	for (n = 0; n < 3; n++)   ///�߶�
 62 | 	{
 63 | 		int num_grid_x = (int)(this->inpWidth / this->stride[n]);
 64 | 		int num_grid_y = (int)(this->inpHeight / this->stride[n]);
 65 | 		for (q = 0; q < 3; q++)    ///anchor��
 66 | 		{
 67 | 			const float anchor_w = this->anchors[n][q * 2];
 68 | 			const float anchor_h = this->anchors[n][q * 2 + 1];
 69 | 			for (i = 0; i < num_grid_y; i++)
 70 | 			{
 71 | 				for (j = 0; j < num_grid_x; j++)
 72 | 				{
 73 |                     float* pdata = (float*)outs[0].data + row_ind * nout;
 74 |                     float box_score = sigmoid_x(pdata[4]);
 75 | 					if (box_score > this->objThreshold)
 76 | 					{
 77 |                         Mat scores = outs[0].row(row_ind).colRange(5, outs[0].cols);
 78 |                         Point classIdPoint;
 79 |                         double max_class_socre;
 80 |                         // Get the value and location of the maximum score
 81 |                         minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);
 82 |                         max_class_socre = sigmoid_x((float)max_class_socre);
 83 | 						if (max_class_socre > this->confThreshold)
 84 | 						{
 85 | 							float cx = (sigmoid_x(pdata[0]) * 2.f - 0.5f + j) * this->stride[n];  ///cx
 86 | 							float cy = (sigmoid_x(pdata[1]) * 2.f - 0.5f + i) * this->stride[n];   ///cy
 87 | 							float w = powf(sigmoid_x(pdata[2]) * 2.f, 2.f) * anchor_w;   ///w
 88 | 							float h = powf(sigmoid_x(pdata[3]) * 2.f, 2.f) * anchor_h;  ///h
 89 | 							
 90 | 							int left = (cx - 0.5*w)*ratiow;
 91 | 							int top = (cy - 0.5*h)*ratioh;   ///���껹ԭ��ԭͼ��
 92 | 
 93 | 							classIds.push_back(classIdPoint.x);
 94 | 							confidences.push_back(max_class_socre);
 95 | 							boxes.push_back(Rect(left, top, (int)(w*ratiow), (int)(h*ratioh)));
 96 | 						}	
 97 | 					}
 98 | 					row_ind++;
 99 | 				}
100 | 			}
101 | 		}
102 | 	}
103 | 	
104 | 	// Perform non maximum suppression to eliminate redundant overlapping boxes with
105 | 	// lower confidences
106 | 	vector<int> indices;
107 | 	NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
108 | 	for (size_t i = 0; i < indices.size(); ++i)
109 | 	{
110 | 		int idx = indices[i];
111 | 		Rect box = boxes[idx];
112 | 		this->drawPred(classIds[idx], confidences[idx], box.x, box.y,
113 | 			box.x + box.width, box.y + box.height, frame);
114 | 	}
115 | }
116 | 
117 | int main()
118 | {
119 | 	YOLO yolo_model(yolo_nets[3]);
120 | 	string imgpath = "bus.jpg";
121 | 	Mat srcimg = imread(imgpath);
122 | 	yolo_model.detect(srcimg);
123 | 	
124 | 	static const string kWinName = "Deep learning object detection in OpenCV";
125 | 	namedWindow(kWinName, WINDOW_NORMAL);
126 | 	imshow(kWinName, srcimg);
127 | 	waitKey(0);
128 | 	destroyAllWindows();
129 | }


--------------------------------------------------------------------------------
/main_yolov5.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import argparse
  3 | import numpy as np
  4 | 
  5 | class yolov5():
  6 |     def __init__(self, yolo_type, confThreshold=0.5, nmsThreshold=0.5, objThreshold=0.5):
  7 |         with open('coco.names', 'rt') as f:
  8 |             self.classes = f.read().rstrip('\n').split('\n')   ###这个是在coco数据集上训练的模型做opencv部署的，如果你在自己的数据集上训练出的模型做opencv部署，那么需要修改self.classes
  9 |         self.colors = [np.random.randint(0, 255, size=3).tolist() for _ in range(len(self.classes))]
 10 |         num_classes = len(self.classes)
 11 |         anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
 12 |         self.nl = len(anchors)
 13 |         self.na = len(anchors[0]) // 2
 14 |         self.no = num_classes + 5
 15 |         self.grid = [np.zeros(1)] * self.nl
 16 |         self.stride = np.array([8., 16., 32.])
 17 |         self.anchor_grid = np.asarray(anchors, dtype=np.float32).reshape(self.nl, -1, 2)
 18 |         self.inpWidth = 640
 19 |         self.inpHeight = 640
 20 |         self.net = cv2.dnn.readNet(yolo_type + '.onnx')
 21 |         self.confThreshold = confThreshold
 22 |         self.nmsThreshold = nmsThreshold
 23 |         self.objThreshold = objThreshold
 24 | 
 25 |     def _make_grid(self, nx=20, ny=20):
 26 |         xv, yv = np.meshgrid(np.arange(ny), np.arange(nx))
 27 |         return np.stack((xv, yv), 2).reshape((-1, 2)).astype(np.float32)
 28 | 
 29 |     def postprocess(self, frame, outs):
 30 |         frameHeight = frame.shape[0]
 31 |         frameWidth = frame.shape[1]
 32 |         ratioh, ratiow = frameHeight / self.inpHeight, frameWidth / self.inpWidth
 33 |         # Scan through all the bounding boxes output from the network and keep only the
 34 |         # ones with high confidence scores. Assign the box's class label as the class with the highest score.
 35 |         classIds = []
 36 |         confidences = []
 37 |         boxes = []
 38 |         for detection in outs:
 39 |             scores = detection[5:]
 40 |             classId = np.argmax(scores)
 41 |             confidence = scores[classId]
 42 |             if confidence > self.confThreshold and detection[4] > self.objThreshold:
 43 |                 center_x = int(detection[0] * ratiow)
 44 |                 center_y = int(detection[1] * ratioh)
 45 |                 width = int(detection[2] * ratiow)
 46 |                 height = int(detection[3] * ratioh)
 47 |                 left = int(center_x - width / 2)
 48 |                 top = int(center_y - height / 2)
 49 |                 classIds.append(classId)
 50 |                 confidences.append(float(confidence))
 51 |                 boxes.append([left, top, width, height])
 52 | 
 53 |         # Perform non maximum suppression to eliminate redundant overlapping boxes with
 54 |         # lower confidences.
 55 |         indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confThreshold, self.nmsThreshold)
 56 |         for i in indices:
 57 |             i = i[0]
 58 |             box = boxes[i]
 59 |             left = box[0]
 60 |             top = box[1]
 61 |             width = box[2]
 62 |             height = box[3]
 63 |             frame = self.drawPred(frame, classIds[i], confidences[i], left, top, left + width, top + height)
 64 |         return frame
 65 |     def drawPred(self, frame, classId, conf, left, top, right, bottom):
 66 |         # Draw a bounding box.
 67 |         cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=4)
 68 | 
 69 |         label = '%.2f' % conf
 70 |         label = '%s:%s' % (self.classes[classId], label)
 71 | 
 72 |         # Display the label at the top of the bounding box
 73 |         labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
 74 |         top = max(top, labelSize[1])
 75 |         # cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED)
 76 |         cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2)
 77 |         return frame
 78 |     def detect(self, srcimg):
 79 |         blob = cv2.dnn.blobFromImage(srcimg, 1 / 255.0, (self.inpWidth, self.inpHeight), [0, 0, 0], swapRB=True, crop=False)
 80 |         # Sets the input to the network
 81 |         self.net.setInput(blob)
 82 | 
 83 |         # Runs the forward pass to get output of the output layers
 84 |         outs = self.net.forward(self.net.getUnconnectedOutLayersNames())[0]
 85 | 
 86 |         # inference output
 87 |         outs = 1 / (1 + np.exp(-outs))   ###sigmoid
 88 |         row_ind = 0
 89 |         for i in range(self.nl):
 90 |             h, w = int(self.inpHeight/self.stride[i]), int(self.inpWidth/self.stride[i])
 91 |             length = int(self.na * h * w)
 92 |             if self.grid[i].shape[2:4] != (h,w):
 93 |                 self.grid[i] = self._make_grid(w, h)
 94 | 
 95 |             outs[row_ind:row_ind+length, 0:2] = (outs[row_ind:row_ind+length, 0:2] * 2. - 0.5 + np.tile(self.grid[i],(self.na, 1))) * int(self.stride[i])
 96 |             outs[row_ind:row_ind+length, 2:4] = (outs[row_ind:row_ind+length, 2:4] * 2) ** 2 * np.repeat(self.anchor_grid[i],h*w, axis=0)
 97 |             row_ind += length
 98 |         return outs
 99 | 
100 | if __name__ == "__main__":
101 |     parser = argparse.ArgumentParser()
102 |     parser.add_argument("--imgpath", type=str, default='bus.jpg', help="image path")
103 |     parser.add_argument('--net_type', default='yolov5s', choices=['yolov5s', 'yolov5l', 'yolov5m', 'yolov5x'])
104 |     parser.add_argument('--confThreshold', default=0.5, type=float, help='class confidence')
105 |     parser.add_argument('--nmsThreshold', default=0.5, type=float, help='nms iou thresh')
106 |     parser.add_argument('--objThreshold', default=0.5, type=float, help='object confidence')
107 |     args = parser.parse_args()
108 | 
109 |     yolonet = yolov5(args.net_type, confThreshold=args.confThreshold, nmsThreshold=args.nmsThreshold, objThreshold=args.objThreshold)
110 |     srcimg = cv2.imread(args.imgpath)
111 |     dets = yolonet.detect(srcimg)
112 |     srcimg = yolonet.postprocess(srcimg, dets)
113 | 
114 |     winName = 'Deep learning object detection in OpenCV'
115 |     cv2.namedWindow(winName, 0)
116 |     cv2.imshow(winName, srcimg)
117 |     cv2.waitKey(0)
118 |     cv2.destroyAllWindows()
119 | 


--------------------------------------------------------------------------------
/person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/yolov5-dnn-cpp-python-v2/025a52f94a2afc71e43a7b2fe761f10b56914331/person.jpg


--------------------------------------------------------------------------------
/yolo.h:
--------------------------------------------------------------------------------
 1 | #include <fstream>
 2 | #include <sstream>
 3 | #include <iostream>
 4 | #include <opencv2/dnn.hpp>
 5 | #include <opencv2/imgproc.hpp>
 6 | #include <opencv2/highgui.hpp>
 7 | 
 8 | using namespace cv;
 9 | using namespace dnn;
10 | using namespace std;
11 | 
12 | struct Net_config
13 | {
14 | 	float confThreshold; // class Confidence threshold
15 | 	float nmsThreshold;  // Non-maximum suppression threshold
16 | 	float objThreshold;  //Object Confidence threshold
17 | 	string netname;
18 | };
19 | 
20 | class YOLO
21 | {
22 | 	public:
23 | 		YOLO(Net_config config);
24 | 		void detect(Mat& frame);
25 | 	private:
26 | 		const float anchors[3][6] = {{10.0, 13.0, 16.0, 30.0, 33.0, 23.0}, {30.0, 61.0, 62.0, 45.0, 59.0, 119.0},{116.0, 90.0, 156.0, 198.0, 373.0, 326.0}};
27 | 		const float stride[3] = { 8.0, 16.0, 32.0 };
28 | 		const string classesFile = "coco.names";
29 | 		const int inpWidth = 640;
30 | 		const int inpHeight = 640;
31 | 		float confThreshold;
32 | 		float nmsThreshold;
33 | 		float objThreshold;
34 | 		
35 | 		char netname[20];
36 | 		vector<string> classes;
37 | 		Net net;
38 | 		void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);
39 | 		void sigmoid(Mat* out, int length);
40 | };
41 | 
42 | static inline float sigmoid_x(float x)
43 | {
44 | 	return static_cast<float>(1.f / (1.f + exp(-x)));
45 | }
46 | 
47 | Net_config yolo_nets[4] = {
48 | 	{0.5, 0.5, 0.5, "yolov5s"},
49 | 	{0.5, 0.5, 0.5,  "yolov5m"},
50 | 	{0.5, 0.5, 0.5, "yolov5l"},
51 | 	{0.5, 0.5, 0.5, "yolov5x"}
52 | };
53 | 


--------------------------------------------------------------------------------
/zidane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/yolov5-dnn-cpp-python-v2/025a52f94a2afc71e43a7b2fe761f10b56914331/zidane.jpg


--------------------------------------------------------------------------------