├── ConvNeXt-YoloV5
    ├── models
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── common.cpython-37.pyc
    │   │   ├── experimental.cpython-37.pyc
    │   │   └── yolo.cpython-37.pyc
    │   ├── common.py
    │   ├── experimental.py
    │   ├── hub
    │   │   ├── anchors.yaml
    │   │   ├── pplc.yaml
    │   │   ├── yolov3-spp.yaml
    │   │   ├── yolov3-tiny.yaml
    │   │   ├── yolov3.yaml
    │   │   ├── yolov5-bifpn.yaml
    │   │   ├── yolov5-fpn.yaml
    │   │   ├── yolov5-p2.yaml
    │   │   ├── yolov5-p6.yaml
    │   │   ├── yolov5-p7.yaml
    │   │   ├── yolov5-panet.yaml
    │   │   ├── yolov5_ConvNeXt.yaml
    │   │   ├── yolov5_ConvNeXt_Tiny.yaml
    │   │   ├── yolov5l6.yaml
    │   │   ├── yolov5m6.yaml
    │   │   ├── yolov5n6.yaml
    │   │   ├── yolov5s-ghost.yaml
    │   │   ├── yolov5s-transformer.yaml
    │   │   ├── yolov5s6.yaml
    │   │   └── yolov5x6.yaml
    │   ├── tf.py
    │   ├── yolo.py
    │   ├── yolov5_ConvNeXt_Tiny.yaml
    │   ├── yolov5_ConvNeXt_base.yaml
    │   ├── yolov5_ConvNeXt_large.yaml
    │   ├── yolov5_ConvNeXt_small.yaml
    │   ├── yolov5_ConvNeXt_xlarge_22k.yaml
    │   ├── yolov5l.yaml
    │   ├── yolov5m.yaml
    │   ├── yolov5n.yaml
    │   ├── yolov5s.yaml
    │   └── yolov5x.yaml
    └── train.py
├── PP-LCNet-Yolov5
    ├── PP_LCNet
    │   └── LCNet.py
    ├── models
    │   ├── common.py
    │   ├── yolo.py
    │   ├── yolov5_LCNet_0.25.yaml
    │   ├── yolov5_LCNet_0.35.yaml
    │   ├── yolov5_LCNet_0.5.yaml
    │   ├── yolov5_LCNet_0.75.yaml
    │   ├── yolov5_LCNet_1.5.yaml
    │   ├── yolov5_LCNet_1.yaml
    │   ├── yolov5_LCNet_2.5.yaml
    │   └── yolov5_LCNet_2.yaml
    └── test.txt
├── README.md
├── RepLKNet-Yolov5
    └── models
    │   ├── common.py
    │   ├── yolo.py
    │   └── yolov5_RepLKNet.yaml
└── pic.png


/ConvNeXt-YoloV5/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OutBreak-hui/YoloV5-Flexible-and-Inference/115a140839ba71e2dfa491bdc685bc3bd6f2ae2b/ConvNeXt-YoloV5/models/__init__.py


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OutBreak-hui/YoloV5-Flexible-and-Inference/115a140839ba71e2dfa491bdc685bc3bd6f2ae2b/ConvNeXt-YoloV5/models/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/__pycache__/common.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OutBreak-hui/YoloV5-Flexible-and-Inference/115a140839ba71e2dfa491bdc685bc3bd6f2ae2b/ConvNeXt-YoloV5/models/__pycache__/common.cpython-37.pyc


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/__pycache__/experimental.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OutBreak-hui/YoloV5-Flexible-and-Inference/115a140839ba71e2dfa491bdc685bc3bd6f2ae2b/ConvNeXt-YoloV5/models/__pycache__/experimental.cpython-37.pyc


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/__pycache__/yolo.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OutBreak-hui/YoloV5-Flexible-and-Inference/115a140839ba71e2dfa491bdc685bc3bd6f2ae2b/ConvNeXt-YoloV5/models/__pycache__/yolo.cpython-37.pyc


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/common.py:
--------------------------------------------------------------------------------
  1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2 | """
  3 | Common modules
  4 | """
  5 | 
  6 | import logging
  7 | import math
  8 | import warnings
  9 | from copy import copy
 10 | from pathlib import Path
 11 | 
 12 | import numpy as np
 13 | import pandas as pd
 14 | import requests
 15 | import torch
 16 | import torch.nn as nn
 17 | from PIL import Image
 18 | from torch.cuda import amp
 19 | 
 20 | from utils.datasets import exif_transpose, letterbox
 21 | from utils.general import colorstr, increment_path, is_ascii, make_divisible, non_max_suppression, save_one_box, \
 22 |     scale_coords, xyxy2xywh
 23 | from utils.plots import Annotator, colors
 24 | from utils.torch_utils import time_sync
 25 | from torch.nn.parameter import Parameter
 26 | 
 27 | import torch.nn.functional as F
 28 | from timm.models.layers import trunc_normal_, DropPath
 29 | from timm.models.registry import register_model
 30 | 
 31 | LOGGER = logging.getLogger(__name__)
 32 | 
 33 | 
 34 | def autopad(k, p=None):  # kernel, padding
 35 |     # Pad to 'same'
 36 |     if p is None:
 37 |         p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
 38 |     return p
 39 | 
 40 | 
 41 | class Conv(nn.Module):
 42 |     # Standard convolution
 43 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
 44 |         super().__init__()
 45 |         self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
 46 |         self.bn = nn.BatchNorm2d(c2)
 47 |         self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
 48 | 
 49 |     def forward(self, x):
 50 |         return self.act(self.bn(self.conv(x)))
 51 | 
 52 |     def forward_fuse(self, x):
 53 |         return self.act(self.conv(x))
 54 | 
 55 | 
 56 | class DWConv(Conv):
 57 |     # Depth-wise convolution class
 58 |     def __init__(self, c1, c2, k=1, s=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
 59 |         super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
 60 | 
 61 | 
 62 | class TransformerLayer(nn.Module):
 63 |     # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
 64 |     def __init__(self, c, num_heads):
 65 |         super().__init__()
 66 |         self.q = nn.Linear(c, c, bias=False)
 67 |         self.k = nn.Linear(c, c, bias=False)
 68 |         self.v = nn.Linear(c, c, bias=False)
 69 |         self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
 70 |         self.fc1 = nn.Linear(c, c, bias=False)
 71 |         self.fc2 = nn.Linear(c, c, bias=False)
 72 | 
 73 |     def forward(self, x):
 74 |         x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
 75 |         x = self.fc2(self.fc1(x)) + x
 76 |         return x
 77 | 
 78 | 
 79 | class TransformerBlock(nn.Module):
 80 |     # Vision Transformer https://arxiv.org/abs/2010.11929
 81 |     def __init__(self, c1, c2, num_heads, num_layers):
 82 |         super().__init__()
 83 |         self.conv = None
 84 |         if c1 != c2:
 85 |             self.conv = Conv(c1, c2)
 86 |         self.linear = nn.Linear(c2, c2)  # learnable position embedding
 87 |         self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
 88 |         self.c2 = c2
 89 | 
 90 |     def forward(self, x):
 91 |         if self.conv is not None:
 92 |             x = self.conv(x)
 93 |         b, _, w, h = x.shape
 94 |         p = x.flatten(2).unsqueeze(0).transpose(0, 3).squeeze(3)
 95 |         return self.tr(p + self.linear(p)).unsqueeze(3).transpose(0, 3).reshape(b, self.c2, w, h)
 96 | 
 97 | 
 98 | class Bottleneck(nn.Module):
 99 |     # Standard bottleneck
100 |     def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
101 |         super().__init__()
102 |         c_ = int(c2 * e)  # hidden channels
103 |         self.cv1 = Conv(c1, c_, 1, 1)
104 |         self.cv2 = Conv(c_, c2, 3, 1, g=g)
105 |         self.add = shortcut and c1 == c2
106 | 
107 |     def forward(self, x):
108 |         return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
109 | 
110 | 
111 | class BottleneckCSP(nn.Module):
112 |     # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
113 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
114 |         super().__init__()
115 |         c_ = int(c2 * e)  # hidden channels
116 |         self.cv1 = Conv(c1, c_, 1, 1)
117 |         self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
118 |         self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
119 |         self.cv4 = Conv(2 * c_, c2, 1, 1)
120 |         self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
121 |         self.act = nn.LeakyReLU(0.1, inplace=True)
122 |         self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
123 | 
124 |     def forward(self, x):
125 |         y1 = self.cv3(self.m(self.cv1(x)))
126 |         y2 = self.cv2(x)
127 |         return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
128 | 
129 | # [512, 256, 1, False]
130 | class C3(nn.Module):
131 |     # CSP Bottleneck with 3 convolutions
132 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
133 |         super().__init__()
134 |         c_ = int(c2 * e)  # hidden channels
135 |         self.cv1 = Conv(c1, c_, 1, 1)
136 |         self.cv2 = Conv(c1, c_, 1, 1)
137 |         self.cv3 = Conv(2 * c_, c2, 1)  # act=FReLU(c2)
138 |         self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
139 |         # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
140 | 
141 |     def forward(self, x):
142 |         # print(self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)).shape)
143 |         return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
144 | 
145 | 
146 | class C3TR(C3):
147 |     # C3 module with TransformerBlock()
148 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
149 |         super().__init__(c1, c2, n, shortcut, g, e)
150 |         c_ = int(c2 * e)
151 |         self.m = TransformerBlock(c_, c_, 4, n)
152 | 
153 | 
154 | class C3SPP(C3):
155 |     # C3 module with SPP()
156 |     def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
157 |         super().__init__(c1, c2, n, shortcut, g, e)
158 |         c_ = int(c2 * e)
159 |         self.m = SPP(c_, c_, k)
160 | 
161 | 
162 | class C3Ghost(C3):
163 |     # C3 module with GhostBottleneck()
164 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
165 |         super().__init__(c1, c2, n, shortcut, g, e)
166 |         c_ = int(c2 * e)  # hidden channels
167 |         self.m = nn.Sequential(*[GhostBottleneck(c_, c_) for _ in range(n)])
168 | 
169 | 
170 | class SPP(nn.Module):
171 |     # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
172 |     def __init__(self, c1, c2, k=(5, 9, 13)):
173 |         super().__init__()
174 |         c_ = c1 // 2  # hidden channels
175 |         self.cv1 = Conv(c1, c_, 1, 1)
176 |         self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
177 |         self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
178 | 
179 |     def forward(self, x):
180 |         x = self.cv1(x)
181 |         with warnings.catch_warnings():
182 |             warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
183 |             return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
184 | 
185 | 
186 | class SPPF(nn.Module):
187 |     # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
188 |     def __init__(self, c1, c2, k=5):  # equivalent to SPP(k=(5, 9, 13))
189 |         super().__init__()
190 |         c_ = c1 // 2  # hidden channels
191 |         self.cv1 = Conv(c1, c_, 1, 1)
192 |         self.cv2 = Conv(c_ * 4, c2, 1, 1)
193 |         self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
194 | 
195 |     def forward(self, x):
196 |         x = self.cv1(x)
197 |         with warnings.catch_warnings():
198 |             warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
199 |             y1 = self.m(x)
200 |             y2 = self.m(y1)
201 |             return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
202 | 
203 | 
204 | class Focus(nn.Module):
205 |     # Focus wh information into c-space
206 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
207 |         super().__init__()
208 |         self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
209 |         # self.contract = Contract(gain=2)
210 | 
211 |     def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
212 |         return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
213 |         # return self.conv(self.contract(x))
214 | 
215 | 
216 | class GhostConv(nn.Module):
217 |     # Ghost Convolution https://github.com/huawei-noah/ghostnet
218 |     def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
219 |         super().__init__()
220 |         c_ = c2 // 2  # hidden channels
221 |         self.cv1 = Conv(c1, c_, k, s, None, g, act)
222 |         self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
223 | 
224 |     def forward(self, x):
225 |         y = self.cv1(x)
226 |         return torch.cat([y, self.cv2(y)], 1)
227 | 
228 | 
229 | class GhostBottleneck(nn.Module):
230 |     # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
231 |     def __init__(self, c1, c2, k=3, s=1):  # ch_in, ch_out, kernel, stride
232 |         super().__init__()
233 |         c_ = c2 // 2
234 |         self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pw
235 |                                   DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
236 |                                   GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
237 |         self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
238 |                                       Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
239 | 
240 |     def forward(self, x):
241 |         return self.conv(x) + self.shortcut(x)
242 | 
243 | 
244 | class Contract(nn.Module):
245 |     # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
246 |     def __init__(self, gain=2):
247 |         super().__init__()
248 |         self.gain = gain
249 | 
250 |     def forward(self, x):
251 |         b, c, h, w = x.size()  # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
252 |         s = self.gain
253 |         x = x.view(b, c, h // s, s, w // s, s)  # x(1,64,40,2,40,2)
254 |         x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # x(1,2,2,64,40,40)
255 |         return x.view(b, c * s * s, h // s, w // s)  # x(1,256,40,40)
256 | 
257 | 
258 | class Expand(nn.Module):
259 |     # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
260 |     def __init__(self, gain=2):
261 |         super().__init__()
262 |         self.gain = gain
263 | 
264 |     def forward(self, x):
265 |         b, c, h, w = x.size()  # assert C / s ** 2 == 0, 'Indivisible gain'
266 |         s = self.gain
267 |         x = x.view(b, s, s, c // s ** 2, h, w)  # x(1,2,2,16,80,80)
268 |         x = x.permute(0, 3, 4, 1, 5, 2).contiguous()  # x(1,16,80,2,80,2)
269 |         return x.view(b, c // s ** 2, h * s, w * s)  # x(1,16,160,160)
270 | 
271 | 
272 | class Concat(nn.Module):
273 |     # Concatenate a list of tensors along dimension
274 |     def __init__(self, dimension=1):
275 |         super().__init__()
276 |         self.d = dimension
277 | 
278 |     def forward(self, x):
279 |         return torch.cat(x, self.d)
280 | 
281 | 
282 | class AutoShape(nn.Module):
283 |     # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
284 |     conf = 0.25  # NMS confidence threshold
285 |     iou = 0.45  # NMS IoU threshold
286 |     classes = None  # (optional list) filter by class
287 |     max_det = 1000  # maximum number of detections per image
288 | 
289 |     def __init__(self, model):
290 |         super().__init__()
291 |         self.model = model.eval()
292 | 
293 |     def autoshape(self):
294 |         LOGGER.info('AutoShape already enabled, skipping... ')  # model already converted to model.autoshape()
295 |         return self
296 | 
297 |     @torch.no_grad()
298 |     def forward(self, imgs, size=640, augment=False, profile=False):
299 |         # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
300 |         #   file:       imgs = 'data/images/zidane.jpg'  # str or PosixPath
301 |         #   URI:             = 'https://ultralytics.com/images/zidane.jpg'
302 |         #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)
303 |         #   PIL:             = Image.open('image.jpg') or ImageGrab.grab()  # HWC x(640,1280,3)
304 |         #   numpy:           = np.zeros((640,1280,3))  # HWC
305 |         #   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)
306 |         #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images
307 | 
308 |         t = [time_sync()]
309 |         p = next(self.model.parameters())  # for device and type
310 |         if isinstance(imgs, torch.Tensor):  # torch
311 |             with amp.autocast(enabled=p.device.type != 'cpu'):
312 |                 return self.model(imgs.to(p.device).type_as(p), augment, profile)  # inference
313 | 
314 |         # Pre-process
315 |         n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs])  # number of images, list of images
316 |         shape0, shape1, files = [], [], []  # image and inference shapes, filenames
317 |         for i, im in enumerate(imgs):
318 |             f = f'image{i}'  # filename
319 |             if isinstance(im, (str, Path)):  # filename or uri
320 |                 im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
321 |                 im = np.asarray(exif_transpose(im))
322 |             elif isinstance(im, Image.Image):  # PIL Image
323 |                 im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
324 |             files.append(Path(f).with_suffix('.jpg').name)
325 |             if im.shape[0] < 5:  # image in CHW
326 |                 im = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
327 |             im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3)  # enforce 3ch input
328 |             s = im.shape[:2]  # HWC
329 |             shape0.append(s)  # image shape
330 |             g = (size / max(s))  # gain
331 |             shape1.append([y * g for y in s])
332 |             imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im)  # update
333 |         shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)]  # inference shape
334 |         x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs]  # pad
335 |         x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
336 |         x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
337 |         x = torch.from_numpy(x).to(p.device).type_as(p) / 255.  # uint8 to fp16/32
338 |         t.append(time_sync())
339 | 
340 |         with amp.autocast(enabled=p.device.type != 'cpu'):
341 |             # Inference
342 |             y = self.model(x, augment, profile)[0]  # forward
343 |             t.append(time_sync())
344 | 
345 |             # Post-process
346 |             y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes, max_det=self.max_det)  # NMS
347 |             for i in range(n):
348 |                 scale_coords(shape1, y[i][:, :4], shape0[i])
349 | 
350 |             t.append(time_sync())
351 |             return Detections(imgs, y, files, t, self.names, x.shape)
352 | 
353 | 
354 | class Detections:
355 |     # YOLOv5 detections class for inference results
356 |     def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
357 |         super().__init__()
358 |         d = pred[0].device  # device
359 |         gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs]  # normalizations
360 |         self.imgs = imgs  # list of images as numpy arrays
361 |         self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)
362 |         self.names = names  # class names
363 |         self.ascii = is_ascii(names)  # names are ascii (use PIL for UTF-8)
364 |         self.files = files  # image filenames
365 |         self.xyxy = pred  # xyxy pixels
366 |         self.xywh = [xyxy2xywh(x) for x in pred]  # xywh pixels
367 |         self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)]  # xyxy normalized
368 |         self.xywhn = [x / g for x, g in zip(self.xywh, gn)]  # xywh normalized
369 |         self.n = len(self.pred)  # number of images (batch size)
370 |         self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3))  # timestamps (ms)
371 |         self.s = shape  # inference BCHW shape
372 | 
373 |     def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')):
374 |         for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
375 |             str = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '
376 |             if pred.shape[0]:
377 |                 for c in pred[:, -1].unique():
378 |                     n = (pred[:, -1] == c).sum()  # detections per class
379 |                     str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to string
380 |                 if show or save or render or crop:
381 |                     annotator = Annotator(im, pil=not self.ascii)
382 |                     for *box, conf, cls in reversed(pred):  # xyxy, confidence, class
383 |                         label = f'{self.names[int(cls)]} {conf:.2f}'
384 |                         if crop:
385 |                             save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i])
386 |                         else:  # all others
387 |                             annotator.box_label(box, label, color=colors(cls))
388 |                     im = annotator.im
389 |             else:
390 |                 str += '(no detections)'
391 | 
392 |             im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im  # from np
393 |             if pprint:
394 |                 LOGGER.info(str.rstrip(', '))
395 |             if show:
396 |                 im.show(self.files[i])  # show
397 |             if save:
398 |                 f = self.files[i]
399 |                 im.save(save_dir / f)  # save
400 |                 if i == self.n - 1:
401 |                     LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
402 |             if render:
403 |                 self.imgs[i] = np.asarray(im)
404 | 
405 |     def print(self):
406 |         self.display(pprint=True)  # print results
407 |         LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' %
408 |                     self.t)
409 | 
410 |     def show(self):
411 |         self.display(show=True)  # show results
412 | 
413 |     def save(self, save_dir='runs/detect/exp'):
414 |         save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True)  # increment save_dir
415 |         self.display(save=True, save_dir=save_dir)  # save results
416 | 
417 |     def crop(self, save_dir='runs/detect/exp'):
418 |         save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True)  # increment save_dir
419 |         self.display(crop=True, save_dir=save_dir)  # crop results
420 |         LOGGER.info(f'Saved results to {save_dir}\n')
421 | 
422 |     def render(self):
423 |         self.display(render=True)  # render results
424 |         return self.imgs
425 | 
426 |     def pandas(self):
427 |         # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
428 |         new = copy(self)  # return copy
429 |         ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name'  # xyxy columns
430 |         cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name'  # xywh columns
431 |         for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
432 |             a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)]  # update
433 |             setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
434 |         return new
435 | 
436 |     def tolist(self):
437 |         # return a list of Detections objects, i.e. 'for result in results.tolist():'
438 |         x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
439 |         for d in x:
440 |             for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
441 |                 setattr(d, k, getattr(d, k)[0])  # pop out of list
442 |         return x
443 | 
444 |     def __len__(self):
445 |         return self.n
446 | 
447 | 
448 | class Classify(nn.Module):
449 |     # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
450 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
451 |         super().__init__()
452 |         self.aap = nn.AdaptiveAvgPool2d(1)  # to x(b,c1,1,1)
453 |         self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g)  # to x(b,c2,1,1)
454 |         self.flat = nn.Flatten()
455 | 
456 |     def forward(self, x):
457 |         z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1)  # cat if list
458 |         return self.flat(self.conv(z))  # flatten to x(b,c2)
459 | 
460 | class h_sigmoid(nn.Module):
461 |     def __init__(self, inplace=True):
462 |         super(h_sigmoid, self).__init__()
463 |         self.relu = nn.ReLU6(inplace=inplace)
464 | 
465 |     def forward(self, x):
466 |         return self.relu(x + 3) / 6
467 | 
468 | 
469 | class h_swish(nn.Module):
470 |     def __init__(self, inplace=True):
471 |         super(h_swish, self).__init__()
472 |         self.sigmoid = h_sigmoid(inplace=inplace)
473 | 
474 |     def forward(self, x):
475 |         return x * self.sigmoid(x)
476 | 
477 | 
478 | class CoordinateLayer(nn.Module):
479 |     def __init__(self, inp, oup, reduction=32):
480 |         super(CoordinateLayer, self).__init__()
481 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
482 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
483 | 
484 |         mip = max(8, inp // reduction)
485 | 
486 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
487 |         self.bn1 = nn.BatchNorm2d(mip)
488 |         self.act = h_swish()
489 | 
490 |         self.conv_h = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
491 |         self.conv_w = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
492 | 
493 |     def forward(self, x):
494 |         identity = x
495 | 
496 |         n, c, h, w = x.size()
497 |         x_h = self.pool_h(x)  # [n,c,h,w]-->[n,c,h,1]
498 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)  # [n,c,h,w]-->[n,c,1,w]-->[n,c,w,1]
499 | 
500 |         y = torch.cat([x_h, x_w], dim=2)  # -->[n,c,(h+w),1]
501 |         y = self.conv1(y)  # [n,c,(h+w),1]-->[n,mip,(h+w),1]
502 |         y = self.bn1(y)  # [n,mip,(h+w),1]
503 |         y = self.act(y)  # [n,mip,(h+w),1]
504 | 
505 |         x_h, x_w = torch.split(y, [h, w], dim=2)  # [n,mip,(h+w),1]-->[n,mip,h,1] & [n,mip,w,1]
506 |         x_w = x_w.permute(0, 1, 3, 2)  # [n,mip,w,1]-->[n,mip,1,w]
507 | 
508 |         a_h = self.conv_h(x_h).sigmoid()  # [n,mip,h,1]-->[n,oup,h,1]-->x轴概率分布
509 |         a_w = self.conv_w(x_w).sigmoid()  # [n,mip,w,1]-->[n,oup,w,1]-->y轴概率分布
510 | 
511 |         out = identity * a_w * a_h
512 | 
513 |         return out
514 | 
515 | 
516 | 
517 | NET_CONFIG = {
518 |     "blocks2":
519 |     # k, in_c, out_c, s, use_se
520 |     [[3, 16, 32, 1, False]],
521 |     "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
522 |     "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
523 |     "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False],
524 |                 [5, 256, 256, 1, False], [5, 256, 256, 1, False],
525 |                 [5, 256, 256, 1, False], [5, 256, 256, 1, False]],
526 |     "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]]
527 | }
528 | BLOCK_LIST = ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"]
529 | 
530 | def make_divisible_LC(v, divisor=8, min_value=None):
531 |     if min_value is None:
532 |         min_value = divisor
533 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
534 |     if new_v < 0.9 * v:
535 |         new_v += divisor
536 |     return new_v
537 | 
538 | 
539 | class HardSwish(nn.Module):
540 |     def __init__(self, inplace=True):
541 |         super(HardSwish, self).__init__()
542 |         self.relu6 = nn.ReLU6(inplace=inplace)
543 | 
544 |     def forward(self, x):
545 |         return x * self.relu6(x+3) / 6
546 | 
547 | 
548 | class HardSigmoid(nn.Module):
549 |     def __init__(self, inplace=True):
550 |         super(HardSigmoid, self).__init__()
551 |         self.relu6 = nn.ReLU6(inplace=inplace)
552 | 
553 |     def forward(self, x):
554 |         return (self.relu6(x+3)) / 6
555 | 
556 | 
557 | class SELayer(nn.Module):
558 |     def __init__(self, channel, reduction=16):
559 |         super(SELayer, self).__init__()
560 |         self.avgpool = nn.AdaptiveAvgPool2d(1)
561 |         self.fc = nn.Sequential(
562 |             nn.Linear(channel, channel // reduction, bias=False),
563 |             nn.ReLU(inplace=True),
564 |             nn.Linear(channel // reduction, channel, bias=False),
565 |             HardSigmoid()
566 |         )
567 | 
568 |     def forward(self, x):
569 |         b, c, h, w = x.size()
570 |         y = self.avgpool(x).view(b, c)
571 |         y = self.fc(y).view(b, c, 1, 1)
572 |         return x * y.expand_as(x)
573 | 
574 | 
575 | class DepthwiseSeparable(nn.Module):
576 |     def __init__(self, inp, oup, dw_size, stride, use_se=False):
577 |         super(DepthwiseSeparable, self).__init__()
578 |         self.use_se = use_se
579 |         self.stride = stride
580 |         self.inp = inp
581 |         self.oup = oup
582 |         self.dw_size = dw_size
583 |         self.dw_sp = nn.Sequential(
584 |             nn.Conv2d(self.inp, self.inp, kernel_size=self.dw_size, stride=self.stride,
585 |                       padding=autopad(self.dw_size, None), groups=self.inp, bias=False),
586 |             nn.BatchNorm2d(self.inp),
587 |             HardSwish(),
588 | 
589 |             nn.Conv2d(self.inp, self.oup, kernel_size=1, stride=1, padding=0, bias=False),
590 |             nn.BatchNorm2d(self.oup),
591 |             HardSwish(),
592 |         )
593 |         self.se = SELayer(self.oup)
594 | 
595 |     def forward(self, x):
596 |         x = self.dw_sp(x)
597 |         if self.use_se:
598 |             x = self.se(x)
599 |         return x
600 | 
601 | class PPLC_Conv(nn.Module):
602 |     def __init__(self, scale):
603 |         super(PPLC_Conv, self).__init__()
604 |         self.scale = scale
605 |         self.conv = nn.Conv2d(3, out_channels=make_divisible_LC(16 * self.scale),
606 |                                kernel_size=3, stride=2, padding=1, bias=False)
607 |     def forward(self, x):
608 |         return self.conv(x)
609 | 
610 | class PPLC_Block(nn.Module):
611 |     def __init__(self, scale, block_num):
612 |         super(PPLC_Block, self).__init__()
613 |         self.scale = scale
614 |         self.block_num = BLOCK_LIST[block_num]
615 |         self.block = nn.Sequential(*[
616 |             DepthwiseSeparable(inp=make_divisible_LC(in_c * self.scale),
617 |                                oup=make_divisible_LC(out_c * self.scale),
618 |                                dw_size=k, stride=s, use_se=use_se)
619 |             for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG[self.block_num])
620 |         ])
621 |     def forward(self, x):
622 |         return self.block(x)
623 | 
624 | # todo ConvNeXt
625 | class Block(nn.Module):
626 | 
627 |     def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
628 |         super().__init__()
629 |         self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim)  # depthwise conv
630 |         self.norm = LayerNorm(dim, eps=1e-6)
631 |         self.pwconv1 = nn.Linear(dim, 4 * dim)  # pointwise/1x1 convs, implemented with linear layers
632 |         self.act = nn.GELU()
633 |         self.pwconv2 = nn.Linear(4 * dim, dim)
634 |         self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)),
635 |                                   requires_grad=True) if layer_scale_init_value > 0 else None
636 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
637 | 
638 |     def forward(self, x):
639 |         input = x
640 |         x = self.dwconv(x)
641 |         x = x.permute(0, 2, 3, 1)  # (N, C, H, W) -> (N, H, W, C)
642 |         x = self.norm(x)
643 |         x = self.pwconv1(x)
644 |         x = self.act(x)
645 |         x = self.pwconv2(x)
646 |         if self.gamma is not None:
647 |             x = self.gamma * x
648 |         x = x.permute(0, 3, 1, 2)  # (N, H, W, C) -> (N, C, H, W)
649 | 
650 |         x = input + self.drop_path(x)
651 |         return x
652 | 
653 | 
654 | class LayerNorm(nn.Module):
655 | 
656 |     def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
657 |         super().__init__()
658 |         self.weight = nn.Parameter(torch.ones(normalized_shape))
659 |         self.bias = nn.Parameter(torch.zeros(normalized_shape))
660 |         self.eps = eps
661 |         self.data_format = data_format
662 |         if self.data_format not in ["channels_last", "channels_first"]:
663 |             raise NotImplementedError
664 |         self.normalized_shape = (normalized_shape,)
665 | 
666 |     def forward(self, x):
667 |         if self.data_format == "channels_last":
668 |             return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
669 |         elif self.data_format == "channels_first":
670 |             u = x.mean(1, keepdim=True)
671 |             s = (x - u).pow(2).mean(1, keepdim=True)
672 |             x = (x - u) / torch.sqrt(s + self.eps)
673 |             x = self.weight[:, None, None] * x + self.bias[:, None, None]
674 |             return x
675 | 
676 | class ConvNeXt_Block(nn.Module):  # index 0~3
677 |     def __init__(self, index, in_chans, depths, dims, drop_path_rate=0., layer_scale_init_value=1e-6):
678 |         super().__init__()
679 | 
680 |         self.index = index
681 |         self.downsample_layers = nn.ModuleList()  # stem and 3 intermediate downsampling conv layers
682 |         stem = nn.Sequential(
683 |             nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
684 |             LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
685 |         )
686 |         self.downsample_layers.append(stem)
687 |         for i in range(3):
688 |             downsample_layer = nn.Sequential(
689 |                 LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
690 |                 nn.Conv2d(dims[i], dims[i + 1], kernel_size=2, stride=2),
691 |             )
692 |             self.downsample_layers.append(downsample_layer)
693 | 
694 |         self.stages = nn.ModuleList()  # 4 feature resolution stages, each consisting of multiple residual blocks
695 |         dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
696 |         cur = 0
697 |         for i in range(4):
698 |             stage = nn.Sequential(
699 |                 *[Block(dim=dims[i], drop_path=dp_rates[cur + j],
700 |                         layer_scale_init_value=layer_scale_init_value) for j in range(depths[i])]
701 |             )
702 |             self.stages.append(stage)
703 |             cur += depths[i]
704 |         self.apply(self._init_weights)
705 | 
706 |     def _init_weights(self, m):
707 |         if isinstance(m, (nn.Conv2d, nn.Linear)):
708 |             trunc_normal_(m.weight, std=.02)
709 |             nn.init.constant_(m.bias, 0)
710 | 
711 |     def forward(self, x):
712 |         x = self.downsample_layers[self.index](x)
713 |         x = self.stages[self.index](x)
714 |         return x
715 | 
716 | 
717 | 
718 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/experimental.py:
--------------------------------------------------------------------------------
  1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2 | """
  3 | Experimental modules
  4 | """
  5 | import math
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | import torch.nn as nn
 10 | 
 11 | from models.common import Conv
 12 | from utils.downloads import attempt_download
 13 | 
 14 | 
 15 | class CrossConv(nn.Module):
 16 |     # Cross Convolution Downsample
 17 |     def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
 18 |         # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
 19 |         super().__init__()
 20 |         c_ = int(c2 * e)  # hidden channels
 21 |         self.cv1 = Conv(c1, c_, (1, k), (1, s))
 22 |         self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
 23 |         self.add = shortcut and c1 == c2
 24 | 
 25 |     def forward(self, x):
 26 |         return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
 27 | 
 28 | 
 29 | class Sum(nn.Module):
 30 |     # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
 31 |     def __init__(self, n, weight=False):  # n: number of inputs
 32 |         super().__init__()
 33 |         self.weight = weight  # apply weights boolean
 34 |         self.iter = range(n - 1)  # iter object
 35 |         if weight:
 36 |             self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True)  # layer weights
 37 | 
 38 |     def forward(self, x):
 39 |         y = x[0]  # no weight
 40 |         if self.weight:
 41 |             w = torch.sigmoid(self.w) * 2
 42 |             for i in self.iter:
 43 |                 y = y + x[i + 1] * w[i]
 44 |         else:
 45 |             for i in self.iter:
 46 |                 y = y + x[i + 1]
 47 |         return y
 48 | 
 49 | 
 50 | class MixConv2d(nn.Module):
 51 |     # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
 52 |     def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):  # ch_in, ch_out, kernel, stride, ch_strategy
 53 |         super().__init__()
 54 |         n = len(k)  # number of convolutions
 55 |         if equal_ch:  # equal c_ per group
 56 |             i = torch.linspace(0, n - 1E-6, c2).floor()  # c2 indices
 57 |             c_ = [(i == g).sum() for g in range(n)]  # intermediate channels
 58 |         else:  # equal weight.numel() per group
 59 |             b = [c2] + [0] * n
 60 |             a = np.eye(n + 1, n, k=-1)
 61 |             a -= np.roll(a, 1, axis=1)
 62 |             a *= np.array(k) ** 2
 63 |             a[0] = 1
 64 |             c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b
 65 | 
 66 |         self.m = nn.ModuleList(
 67 |             [nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)])
 68 |         self.bn = nn.BatchNorm2d(c2)
 69 |         self.act = nn.SiLU()
 70 | 
 71 |     def forward(self, x):
 72 |         return self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
 73 | 
 74 | 
 75 | class Ensemble(nn.ModuleList):
 76 |     # Ensemble of models
 77 |     def __init__(self):
 78 |         super().__init__()
 79 | 
 80 |     def forward(self, x, augment=False, profile=False, visualize=False):
 81 |         y = []
 82 |         for module in self:
 83 |             y.append(module(x, augment, profile, visualize)[0])
 84 |         # y = torch.stack(y).max(0)[0]  # max ensemble
 85 |         # y = torch.stack(y).mean(0)  # mean ensemble
 86 |         y = torch.cat(y, 1)  # nms ensemble
 87 |         return y, None  # inference, train output
 88 | 
 89 | 
 90 | def attempt_load(weights, map_location=None, inplace=True, fuse=True):
 91 |     from models.yolo import Detect, Model
 92 | 
 93 |     # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
 94 |     model = Ensemble()
 95 |     for w in weights if isinstance(weights, list) else [weights]:
 96 |         ckpt = torch.load(attempt_download(w), map_location=map_location)  # load
 97 |         if fuse:
 98 |             model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval())  # FP32 model
 99 |         else:
100 |             model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().eval())  # without layer fuse
101 | 
102 |     # Compatibility updates
103 |     for m in model.modules():
104 |         if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]:
105 |             m.inplace = inplace  # pytorch 1.7.0 compatibility
106 |             if type(m) is Detect:
107 |                 if not isinstance(m.anchor_grid, list):  # new Detect Layer compatibility
108 |                     delattr(m, 'anchor_grid')
109 |                     setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl)
110 |         elif type(m) is Conv:
111 |             m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
112 | 
113 |     if len(model) == 1:
114 |         return model[-1]  # return model
115 |     else:
116 |         print(f'Ensemble created with {weights}\n')
117 |         for k in ['names']:
118 |             setattr(model, k, getattr(model[-1], k))
119 |         model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride  # max stride
120 |         return model  # return ensemble
121 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/anchors.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | # Default anchors for COCO data
 3 | 
 4 | 
 5 | # P5 -------------------------------------------------------------------------------------------------------------------
 6 | # P5-640:
 7 | anchors_p5_640:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | 
13 | # P6 -------------------------------------------------------------------------------------------------------------------
14 | # P6-640:  thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11,  21,19,  17,41,  43,32,  39,70,  86,64,  65,131,  134,130,  120,265,  282,180,  247,354,  512,387
15 | anchors_p6_640:
16 |   - [9,11,  21,19,  17,41]  # P3/8
17 |   - [43,32,  39,70,  86,64]  # P4/16
18 |   - [65,131,  134,130,  120,265]  # P5/32
19 |   - [282,180,  247,354,  512,387]  # P6/64
20 | 
21 | # P6-1280:  thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27,  44,40,  38,94,  96,68,  86,152,  180,137,  140,301,  303,264,  238,542,  436,615,  739,380,  925,792
22 | anchors_p6_1280:
23 |   - [19,27,  44,40,  38,94]  # P3/8
24 |   - [96,68,  86,152,  180,137]  # P4/16
25 |   - [140,301,  303,264,  238,542]  # P5/32
26 |   - [436,615,  739,380,  925,792]  # P6/64
27 | 
28 | # P6-1920:  thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41,  67,59,  57,141,  144,103,  129,227,  270,205,  209,452,  455,396,  358,812,  653,922,  1109,570,  1387,1187
29 | anchors_p6_1920:
30 |   - [28,41,  67,59,  57,141]  # P3/8
31 |   - [144,103,  129,227,  270,205]  # P4/16
32 |   - [209,452,  455,396,  358,812]  # P5/32
33 |   - [653,922,  1109,570,  1387,1187]  # P6/64
34 | 
35 | 
36 | # P7 -------------------------------------------------------------------------------------------------------------------
37 | # P7-640:  thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11,  13,30,  29,20,  30,46,  61,38,  39,92,  78,80,  146,66,  79,163,  149,150,  321,143,  157,303,  257,402,  359,290,  524,372
38 | anchors_p7_640:
39 |   - [11,11,  13,30,  29,20]  # P3/8
40 |   - [30,46,  61,38,  39,92]  # P4/16
41 |   - [78,80,  146,66,  79,163]  # P5/32
42 |   - [149,150,  321,143,  157,303]  # P6/64
43 |   - [257,402,  359,290,  524,372]  # P7/128
44 | 
45 | # P7-1280:  thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22,  54,36,  32,77,  70,83,  138,71,  75,173,  165,159,  148,334,  375,151,  334,317,  251,626,  499,474,  750,326,  534,814,  1079,818
46 | anchors_p7_1280:
47 |   - [19,22,  54,36,  32,77]  # P3/8
48 |   - [70,83,  138,71,  75,173]  # P4/16
49 |   - [165,159,  148,334,  375,151]  # P5/32
50 |   - [334,317,  251,626,  499,474]  # P6/64
51 |   - [750,326,  534,814,  1079,818]  # P7/128
52 | 
53 | # P7-1920:  thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34,  81,55,  47,115,  105,124,  207,107,  113,259,  247,238,  222,500,  563,227,  501,476,  376,939,  749,711,  1126,489,  801,1222,  1618,1227
54 | anchors_p7_1920:
55 |   - [29,34,  81,55,  47,115]  # P3/8
56 |   - [105,124,  207,107,  113,259]  # P4/16
57 |   - [247,238,  222,500,  563,227]  # P5/32
58 |   - [501,476,  376,939,  749,711]  # P6/64
59 |   - [1126,489,  801,1222,  1618,1227]  # P7/128
60 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/pplc.yaml:
--------------------------------------------------------------------------------
 1 | nc: 1  # number of classes
 2 | depth_multiple: 0.33  # model depth multiple
 3 | width_multiple: 0.50  # layer channel multiple
 4 | anchors:
 5 |   - [10,13, 16,30, 33,23]  # P3/8
 6 |   - [30,61, 62,45, 59,119]  # P4/16
 7 |   - [116,90, 156,198, 373,326]  # P5/32
 8 | 
 9 | # YOLOv5 backbone
10 | backbone:
11 |   [[-1, 1, PPLC_Conv, [8, 0.5]],
12 |    [-1, 1, PPLC_Block, [16, 0.5, 0]],
13 |    [-1, 1, PPLC_Block, [32, 0.5, 1]],
14 |    [-1, 1, PPLC_Block, [64, 0.5, 2]],
15 |    [-1, 1, PPLC_Block, [128, 0.5, 3]],
16 |    [-1, 1, PPLC_Block, [256, 0.5, 4]],
17 |   ]
18 | 
19 | 
20 | # YOLOv5 head
21 | head:
22 |   [[-1, 1, Conv, [512, 1, 1]],
23 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
24 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P4
25 |    [-1, 3, C3, [512, False]],  # 13
26 | 
27 |    [-1, 1, Conv, [256, 1, 1]],
28 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
29 |    [[-1, 3], 1, Concat, [1]],  # cat backbone P3
30 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
31 | 
32 |    [-1, 1, Conv, [256, 3, 2]],
33 |    [[-1, 10], 1, Concat, [1]],  # cat head P4
34 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
35 | 
36 |    [-1, 1, Conv, [512, 3, 2]],
37 |    [[-1, 6], 1, Concat, [1]],  # cat head P5
38 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
39 | 
40 |    [[13, 16, 19], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
41 |   ]


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/yolov3-spp.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.0  # model depth multiple
 6 | width_multiple: 1.0  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # darknet53 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [32, 3, 1]],  # 0
16 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
17 |    [-1, 1, Bottleneck, [64]],
18 |    [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
19 |    [-1, 2, Bottleneck, [128]],
20 |    [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
21 |    [-1, 8, Bottleneck, [256]],
22 |    [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
23 |    [-1, 8, Bottleneck, [512]],
24 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
25 |    [-1, 4, Bottleneck, [1024]],  # 10
26 |   ]
27 | 
28 | # YOLOv3-SPP head
29 | head:
30 |   [[-1, 1, Bottleneck, [1024, False]],
31 |    [-1, 1, SPP, [512, [5, 9, 13]]],
32 |    [-1, 1, Conv, [1024, 3, 1]],
33 |    [-1, 1, Conv, [512, 1, 1]],
34 |    [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
35 | 
36 |    [-2, 1, Conv, [256, 1, 1]],
37 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P4
39 |    [-1, 1, Bottleneck, [512, False]],
40 |    [-1, 1, Bottleneck, [512, False]],
41 |    [-1, 1, Conv, [256, 1, 1]],
42 |    [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
43 | 
44 |    [-2, 1, Conv, [128, 1, 1]],
45 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P3
47 |    [-1, 1, Bottleneck, [256, False]],
48 |    [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
49 | 
50 |    [[27, 22, 15], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
51 |   ]
52 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/yolov3-tiny.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.0  # model depth multiple
 6 | width_multiple: 1.0  # layer channel multiple
 7 | anchors:
 8 |   - [10,14, 23,27, 37,58]  # P4/16
 9 |   - [81,82, 135,169, 344,319]  # P5/32
10 | 
11 | # YOLOv3-tiny backbone
12 | backbone:
13 |   # [from, number, module, args]
14 |   [[-1, 1, Conv, [16, 3, 1]],  # 0
15 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 1-P1/2
16 |    [-1, 1, Conv, [32, 3, 1]],
17 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 3-P2/4
18 |    [-1, 1, Conv, [64, 3, 1]],
19 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 5-P3/8
20 |    [-1, 1, Conv, [128, 3, 1]],
21 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 7-P4/16
22 |    [-1, 1, Conv, [256, 3, 1]],
23 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 9-P5/32
24 |    [-1, 1, Conv, [512, 3, 1]],
25 |    [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]],  # 11
26 |    [-1, 1, nn.MaxPool2d, [2, 1, 0]],  # 12
27 |   ]
28 | 
29 | # YOLOv3-tiny head
30 | head:
31 |   [[-1, 1, Conv, [1024, 3, 1]],
32 |    [-1, 1, Conv, [256, 1, 1]],
33 |    [-1, 1, Conv, [512, 3, 1]],  # 15 (P5/32-large)
34 | 
35 |    [-2, 1, Conv, [128, 1, 1]],
36 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P4
38 |    [-1, 1, Conv, [256, 3, 1]],  # 19 (P4/16-medium)
39 | 
40 |    [[19, 15], 1, Detect, [nc, anchors]],  # Detect(P4, P5)
41 |   ]
42 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/yolov3.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.0  # model depth multiple
 6 | width_multiple: 1.0  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # darknet53 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [32, 3, 1]],  # 0
16 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
17 |    [-1, 1, Bottleneck, [64]],
18 |    [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
19 |    [-1, 2, Bottleneck, [128]],
20 |    [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
21 |    [-1, 8, Bottleneck, [256]],
22 |    [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
23 |    [-1, 8, Bottleneck, [512]],
24 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
25 |    [-1, 4, Bottleneck, [1024]],  # 10
26 |   ]
27 | 
28 | # YOLOv3 head
29 | head:
30 |   [[-1, 1, Bottleneck, [1024, False]],
31 |    [-1, 1, Conv, [512, [1, 1]]],
32 |    [-1, 1, Conv, [1024, 3, 1]],
33 |    [-1, 1, Conv, [512, 1, 1]],
34 |    [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
35 | 
36 |    [-2, 1, Conv, [256, 1, 1]],
37 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P4
39 |    [-1, 1, Bottleneck, [512, False]],
40 |    [-1, 1, Bottleneck, [512, False]],
41 |    [-1, 1, Conv, [256, 1, 1]],
42 |    [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
43 | 
44 |    [-2, 1, Conv, [128, 1, 1]],
45 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P3
47 |    [-1, 1, Bottleneck, [256, False]],
48 |    [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
49 | 
50 |    [[27, 22, 15], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
51 |   ]
52 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/yolov5-bifpn.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.0  # model depth multiple
 6 | width_multiple: 1.0  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, C3, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 6, C3, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, C3, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 3, C3, [1024]],
24 |    [-1, 1, SPPF, [1024, 5]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 v6.0 BiFPN head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, C3, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14, 6], 1, Concat, [1]],  # cat P4 <--- BiFPN change
41 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/yolov5-fpn.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.0  # model depth multiple
 6 | width_multiple: 1.0  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, C3, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 6, C3, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, C3, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 3, C3, [1024]],
24 |    [-1, 1, SPPF, [1024, 5]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 v6.0 FPN head
28 | head:
29 |   [[-1, 3, C3, [1024, False]],  # 10 (P5/32-large)
30 | 
31 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
33 |    [-1, 1, Conv, [512, 1, 1]],
34 |    [-1, 3, C3, [512, False]],  # 14 (P4/16-medium)
35 | 
36 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
38 |    [-1, 1, Conv, [256, 1, 1]],
39 |    [-1, 3, C3, [256, False]],  # 18 (P3/8-small)
40 | 
41 |    [[18, 14, 10], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
42 |   ]
43 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/yolov5-p2.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.0  # model depth multiple
 6 | width_multiple: 1.0  # layer channel multiple
 7 | anchors: 3  # auto-anchor evolves 3 anchors per P output layer
 8 | 
 9 | # YOLOv5 v6.0 backbone
10 | backbone:
11 |   # [from, number, module, args]
12 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
13 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
14 |    [-1, 3, C3, [128]],
15 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
16 |    [-1, 6, C3, [256]],
17 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
18 |    [-1, 9, C3, [512]],
19 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
20 |    [-1, 3, C3, [1024]],
21 |    [-1, 1, SPPF, [1024, 5]],  # 9
22 |   ]
23 | 
24 | # YOLOv5 v6.0 head
25 | head:
26 |   [[-1, 1, Conv, [512, 1, 1]],
27 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
28 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
29 |    [-1, 3, C3, [512, False]],  # 13
30 | 
31 |    [-1, 1, Conv, [256, 1, 1]],
32 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
33 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
34 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
35 | 
36 |    [-1, 1, Conv, [128, 1, 1]],
37 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 |    [[-1, 2], 1, Concat, [1]],  # cat backbone P2
39 |    [-1, 1, C3, [128, False]],  # 21 (P2/4-xsmall)
40 | 
41 |    [-1, 1, Conv, [128, 3, 2]],
42 |    [[-1, 18], 1, Concat, [1]],  # cat head P3
43 |    [-1, 3, C3, [256, False]],  # 24 (P3/8-small)
44 | 
45 |    [-1, 1, Conv, [256, 3, 2]],
46 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
47 |    [-1, 3, C3, [512, False]],  # 27 (P4/16-medium)
48 | 
49 |    [-1, 1, Conv, [512, 3, 2]],
50 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
51 |    [-1, 3, C3, [1024, False]],  # 30 (P5/32-large)
52 | 
53 |    [[21, 24, 27, 30], 1, Detect, [nc, anchors]],  # Detect(P2, P3, P4, P5)
54 |   ]
55 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/yolov5-p6.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.0  # model depth multiple
 6 | width_multiple: 1.0  # layer channel multiple
 7 | anchors: 3  # auto-anchor 3 anchors per P output layer
 8 | 
 9 | # YOLOv5 v6.0 backbone
10 | backbone:
11 |   # [from, number, module, args]
12 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
13 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
14 |    [-1, 3, C3, [128]],
15 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
16 |    [-1, 6, C3, [256]],
17 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
18 |    [-1, 9, C3, [512]],
19 |    [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
20 |    [-1, 3, C3, [768]],
21 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
22 |    [-1, 3, C3, [1024]],
23 |    [-1, 1, SPPF, [1024, 5]],  # 11
24 |   ]
25 | 
26 | # YOLOv5 v6.0 head
27 | head:
28 |   [[-1, 1, Conv, [768, 1, 1]],
29 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
30 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P5
31 |    [-1, 3, C3, [768, False]],  # 15
32 | 
33 |    [-1, 1, Conv, [512, 1, 1]],
34 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
36 |    [-1, 3, C3, [512, False]],  # 19
37 | 
38 |    [-1, 1, Conv, [256, 1, 1]],
39 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
40 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
41 |    [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
42 | 
43 |    [-1, 1, Conv, [256, 3, 2]],
44 |    [[-1, 20], 1, Concat, [1]],  # cat head P4
45 |    [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
46 | 
47 |    [-1, 1, Conv, [512, 3, 2]],
48 |    [[-1, 16], 1, Concat, [1]],  # cat head P5
49 |    [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
50 | 
51 |    [-1, 1, Conv, [768, 3, 2]],
52 |    [[-1, 12], 1, Concat, [1]],  # cat head P6
53 |    [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
54 | 
55 |    [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
56 |   ]
57 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/yolov5-p7.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.0  # model depth multiple
 6 | width_multiple: 1.0  # layer channel multiple
 7 | anchors: 3  # auto-anchor 3 anchors per P output layer
 8 | 
 9 | # YOLOv5 v6.0 backbone
10 | backbone:
11 |   # [from, number, module, args]
12 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
13 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
14 |    [-1, 3, C3, [128]],
15 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
16 |    [-1, 6, C3, [256]],
17 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
18 |    [-1, 9, C3, [512]],
19 |    [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
20 |    [-1, 3, C3, [768]],
21 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
22 |    [-1, 3, C3, [1024]],
23 |    [-1, 1, Conv, [1280, 3, 2]],  # 11-P7/128
24 |    [-1, 3, C3, [1280]],
25 |    [-1, 1, SPPF, [1280, 5]],  # 13
26 |   ]
27 | 
28 | # YOLOv5 head
29 | head:
30 |   [[-1, 1, Conv, [1024, 1, 1]],
31 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 |    [[-1, 10], 1, Concat, [1]],  # cat backbone P6
33 |    [-1, 3, C3, [1024, False]],  # 17
34 | 
35 |    [-1, 1, Conv, [768, 1, 1]],
36 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P5
38 |    [-1, 3, C3, [768, False]],  # 21
39 | 
40 |    [-1, 1, Conv, [512, 1, 1]],
41 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
42 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
43 |    [-1, 3, C3, [512, False]],  # 25
44 | 
45 |    [-1, 1, Conv, [256, 1, 1]],
46 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
47 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
48 |    [-1, 3, C3, [256, False]],  # 29 (P3/8-small)
49 | 
50 |    [-1, 1, Conv, [256, 3, 2]],
51 |    [[-1, 26], 1, Concat, [1]],  # cat head P4
52 |    [-1, 3, C3, [512, False]],  # 32 (P4/16-medium)
53 | 
54 |    [-1, 1, Conv, [512, 3, 2]],
55 |    [[-1, 22], 1, Concat, [1]],  # cat head P5
56 |    [-1, 3, C3, [768, False]],  # 35 (P5/32-large)
57 | 
58 |    [-1, 1, Conv, [768, 3, 2]],
59 |    [[-1, 18], 1, Concat, [1]],  # cat head P6
60 |    [-1, 3, C3, [1024, False]],  # 38 (P6/64-xlarge)
61 | 
62 |    [-1, 1, Conv, [1024, 3, 2]],
63 |    [[-1, 14], 1, Concat, [1]],  # cat head P7
64 |    [-1, 3, C3, [1280, False]],  # 41 (P7/128-xxlarge)
65 | 
66 |    [[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6, P7)
67 |   ]
68 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/yolov5-panet.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.0  # model depth multiple
 6 | width_multiple: 1.0  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, C3, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 6, C3, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, C3, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 3, C3, [1024]],
24 |    [-1, 1, SPPF, [1024, 5]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 v6.0 PANet head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, C3, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/yolov5_ConvNeXt.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 1  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | # [from, number, module, args]
14 | backbone:
15 |   [[-1, 1, ConvNeXt_Block, [256, 0, 3, [3, 3, 27, 3], [256, 512, 1024, 2048]]],
16 |    [-1, 1, ConvNeXt_Block, [512, 1, 3, [3, 3, 27, 3], [256, 512, 1024, 2048]]],
17 |    [-1, 1, ConvNeXt_Block, [1024, 2, 3, [3, 3, 27, 3], [256, 512, 1024, 2048]]],
18 |    [-1, 1, ConvNeXt_Block, [2048, 3, 3, [3, 3, 27, 3], [256, 512, 1024, 2048]]],
19 |   ]
20 | 
21 | # YOLOv5 head
22 | # 1024, 512, 256, 128 -> 768, 384, 192, 96
23 | # size 160[0] -> 80[1] -> 40[2] -> 20[3]
24 | head:
25 |   [[-1, 1, Conv, [2048, 1, 1]],
26 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
27 |    [[-1, 2], 1, Concat, [1]],
28 |    [-1, 3, C3, [2048, False]],
29 | 
30 |    [-1, 1, Conv, [1024, 1, 1]],
31 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 |    [[-1, 1], 1, Concat, [1]],
33 |    [-1, 3, C3, [1024, False]],
34 | 
35 |    [-1, 1, Conv, [1024, 3, 2]],
36 |    [[-1, 8], 1, Concat, [1]],
37 |    [-1, 3, C3, [2048, False]],
38 | 
39 |    [-1, 1, Conv, [2048, 3, 2]],
40 |    [[-1, 4], 1, Concat, [1]],  # cat head P5
41 |    [-1, 3, C3, [2048, False]],  # 23 (P5/32-large)
42 | 
43 |    [[11, 14, 17], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
44 |   ]
45 | 
46 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/yolov5_ConvNeXt_Tiny.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 1  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | # [from, number, module, args]
14 | backbone:
15 |   [[-1, 1, ConvNeXt_Block, [96, 0, 3, [3, 3, 9, 3], [96, 192, 384, 768]]],
16 |    [-1, 1, ConvNeXt_Block, [192, 1, 3, [3, 3, 9, 3], [96, 192, 384, 768]]],
17 |    [-1, 1, ConvNeXt_Block, [384, 2, 3, [3, 3, 9, 3], [96, 192, 384, 768]]],
18 |    [-1, 1, ConvNeXt_Block, [768, 3, 3, [3, 3, 9, 3], [96, 192, 384, 768]]],
19 |   ]
20 | 
21 | # YOLOv5 head
22 | # 1024, 512, 256, 128 -> 768, 384, 192, 96
23 | # size 160[0] -> 80[1] -> 40[2] -> 20[3]
24 | head:
25 |   [[-1, 1, Conv, [768, 1, 1]],
26 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
27 |    [[-1, 2], 1, Concat, [1]],
28 |    [-1, 3, C3, [768, False]],
29 | 
30 |    [-1, 1, Conv, [384, 1, 1]],
31 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 |    [[-1, 1], 1, Concat, [1]],
33 |    [-1, 3, C3, [384, False]],
34 | 
35 |    [-1, 1, Conv, [384, 3, 2]],
36 |    [[-1, 8], 1, Concat, [1]],
37 |    [-1, 3, C3, [768, False]],
38 | 
39 |    [-1, 1, Conv, [768, 3, 2]],
40 |    [[-1, 4], 1, Concat, [1]],  # cat head P5
41 |    [-1, 3, C3, [768, False]],  # 23 (P5/32-large)
42 | 
43 |    [[11, 14, 17], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
44 |   ]
45 | 
46 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/yolov5l6.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.0  # model depth multiple
 6 | width_multiple: 1.0  # layer channel multiple
 7 | anchors:
 8 |   - [19,27,  44,40,  38,94]  # P3/8
 9 |   - [96,68,  86,152,  180,137]  # P4/16
10 |   - [140,301,  303,264,  238,542]  # P5/32
11 |   - [436,615,  739,380,  925,792]  # P6/64
12 | 
13 | # YOLOv5 v6.0 backbone
14 | backbone:
15 |   # [from, number, module, args]
16 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
17 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
18 |    [-1, 3, C3, [128]],
19 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
20 |    [-1, 6, C3, [256]],
21 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
22 |    [-1, 9, C3, [512]],
23 |    [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
24 |    [-1, 3, C3, [768]],
25 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
26 |    [-1, 3, C3, [1024]],
27 |    [-1, 1, SPPF, [1024, 5]],  # 11
28 |   ]
29 | 
30 | # YOLOv5 v6.0 head
31 | head:
32 |   [[-1, 1, Conv, [768, 1, 1]],
33 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P5
35 |    [-1, 3, C3, [768, False]],  # 15
36 | 
37 |    [-1, 1, Conv, [512, 1, 1]],
38 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
40 |    [-1, 3, C3, [512, False]],  # 19
41 | 
42 |    [-1, 1, Conv, [256, 1, 1]],
43 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
45 |    [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
46 | 
47 |    [-1, 1, Conv, [256, 3, 2]],
48 |    [[-1, 20], 1, Concat, [1]],  # cat head P4
49 |    [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
50 | 
51 |    [-1, 1, Conv, [512, 3, 2]],
52 |    [[-1, 16], 1, Concat, [1]],  # cat head P5
53 |    [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
54 | 
55 |    [-1, 1, Conv, [768, 3, 2]],
56 |    [[-1, 12], 1, Concat, [1]],  # cat head P6
57 |    [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
58 | 
59 |    [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
60 |   ]
61 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/yolov5m6.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.67  # model depth multiple
 6 | width_multiple: 0.75  # layer channel multiple
 7 | anchors:
 8 |   - [19,27,  44,40,  38,94]  # P3/8
 9 |   - [96,68,  86,152,  180,137]  # P4/16
10 |   - [140,301,  303,264,  238,542]  # P5/32
11 |   - [436,615,  739,380,  925,792]  # P6/64
12 | 
13 | # YOLOv5 v6.0 backbone
14 | backbone:
15 |   # [from, number, module, args]
16 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
17 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
18 |    [-1, 3, C3, [128]],
19 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
20 |    [-1, 6, C3, [256]],
21 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
22 |    [-1, 9, C3, [512]],
23 |    [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
24 |    [-1, 3, C3, [768]],
25 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
26 |    [-1, 3, C3, [1024]],
27 |    [-1, 1, SPPF, [1024, 5]],  # 11
28 |   ]
29 | 
30 | # YOLOv5 v6.0 head
31 | head:
32 |   [[-1, 1, Conv, [768, 1, 1]],
33 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P5
35 |    [-1, 3, C3, [768, False]],  # 15
36 | 
37 |    [-1, 1, Conv, [512, 1, 1]],
38 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
40 |    [-1, 3, C3, [512, False]],  # 19
41 | 
42 |    [-1, 1, Conv, [256, 1, 1]],
43 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
45 |    [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
46 | 
47 |    [-1, 1, Conv, [256, 3, 2]],
48 |    [[-1, 20], 1, Concat, [1]],  # cat head P4
49 |    [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
50 | 
51 |    [-1, 1, Conv, [512, 3, 2]],
52 |    [[-1, 16], 1, Concat, [1]],  # cat head P5
53 |    [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
54 | 
55 |    [-1, 1, Conv, [768, 3, 2]],
56 |    [[-1, 12], 1, Concat, [1]],  # cat head P6
57 |    [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
58 | 
59 |    [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
60 |   ]
61 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/yolov5n6.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.25  # layer channel multiple
 7 | anchors:
 8 |   - [19,27,  44,40,  38,94]  # P3/8
 9 |   - [96,68,  86,152,  180,137]  # P4/16
10 |   - [140,301,  303,264,  238,542]  # P5/32
11 |   - [436,615,  739,380,  925,792]  # P6/64
12 | 
13 | # YOLOv5 v6.0 backbone
14 | backbone:
15 |   # [from, number, module, args]
16 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
17 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
18 |    [-1, 3, C3, [128]],
19 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
20 |    [-1, 6, C3, [256]],
21 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
22 |    [-1, 9, C3, [512]],
23 |    [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
24 |    [-1, 3, C3, [768]],
25 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
26 |    [-1, 3, C3, [1024]],
27 |    [-1, 1, SPPF, [1024, 5]],  # 11
28 |   ]
29 | 
30 | # YOLOv5 v6.0 head
31 | head:
32 |   [[-1, 1, Conv, [768, 1, 1]],
33 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P5
35 |    [-1, 3, C3, [768, False]],  # 15
36 | 
37 |    [-1, 1, Conv, [512, 1, 1]],
38 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
40 |    [-1, 3, C3, [512, False]],  # 19
41 | 
42 |    [-1, 1, Conv, [256, 1, 1]],
43 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
45 |    [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
46 | 
47 |    [-1, 1, Conv, [256, 3, 2]],
48 |    [[-1, 20], 1, Concat, [1]],  # cat head P4
49 |    [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
50 | 
51 |    [-1, 1, Conv, [512, 3, 2]],
52 |    [[-1, 16], 1, Concat, [1]],  # cat head P5
53 |    [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
54 | 
55 |    [-1, 1, Conv, [768, 3, 2]],
56 |    [[-1, 12], 1, Concat, [1]],  # cat head P6
57 |    [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
58 | 
59 |    [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
60 |   ]
61 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/yolov5s-ghost.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
16 |    [-1, 1, GhostConv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, C3Ghost, [128]],
18 |    [-1, 1, GhostConv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 6, C3Ghost, [256]],
20 |    [-1, 1, GhostConv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, C3Ghost, [512]],
22 |    [-1, 1, GhostConv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 3, C3Ghost, [1024]],
24 |    [-1, 1, SPPF, [1024, 5]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 v6.0 head
28 | head:
29 |   [[-1, 1, GhostConv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, C3Ghost, [512, False]],  # 13
33 | 
34 |    [-1, 1, GhostConv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, C3Ghost, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, GhostConv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, C3Ghost, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, GhostConv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, C3Ghost, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/yolov5s-transformer.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, C3, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 6, C3, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, C3, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 3, C3TR, [1024]],  # 9 <--- C3TR() Transformer module
24 |    [-1, 1, SPPF, [1024, 5]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 v6.0 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, C3, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/yolov5s6.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [19,27,  44,40,  38,94]  # P3/8
 9 |   - [96,68,  86,152,  180,137]  # P4/16
10 |   - [140,301,  303,264,  238,542]  # P5/32
11 |   - [436,615,  739,380,  925,792]  # P6/64
12 | 
13 | # YOLOv5 v6.0 backbone
14 | backbone:
15 |   # [from, number, module, args]
16 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
17 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
18 |    [-1, 3, C3, [128]],
19 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
20 |    [-1, 6, C3, [256]],
21 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
22 |    [-1, 9, C3, [512]],
23 |    [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
24 |    [-1, 3, C3, [768]],
25 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
26 |    [-1, 3, C3, [1024]],
27 |    [-1, 1, SPPF, [1024, 5]],  # 11
28 |   ]
29 | 
30 | # YOLOv5 v6.0 head
31 | head:
32 |   [[-1, 1, Conv, [768, 1, 1]],
33 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P5
35 |    [-1, 3, C3, [768, False]],  # 15
36 | 
37 |    [-1, 1, Conv, [512, 1, 1]],
38 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
40 |    [-1, 3, C3, [512, False]],  # 19
41 | 
42 |    [-1, 1, Conv, [256, 1, 1]],
43 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
45 |    [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
46 | 
47 |    [-1, 1, Conv, [256, 3, 2]],
48 |    [[-1, 20], 1, Concat, [1]],  # cat head P4
49 |    [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
50 | 
51 |    [-1, 1, Conv, [512, 3, 2]],
52 |    [[-1, 16], 1, Concat, [1]],  # cat head P5
53 |    [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
54 | 
55 |    [-1, 1, Conv, [768, 3, 2]],
56 |    [[-1, 12], 1, Concat, [1]],  # cat head P6
57 |    [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
58 | 
59 |    [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
60 |   ]
61 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/hub/yolov5x6.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 1  # number of classes
 5 | depth_multiple: 1.33  # model depth multiple
 6 | width_multiple: 1.25  # layer channel multiple
 7 | anchors:
 8 |   - [19,27,  44,40,  38,94]  # P3/8
 9 |   - [96,68,  86,152,  180,137]  # P4/16
10 |   - [140,301,  303,264,  238,542]  # P5/32
11 |   - [436,615,  739,380,  925,792]  # P6/64
12 | 
13 | # YOLOv5 v6.0 backbone
14 | backbone:
15 |   # [from, number, module, args]
16 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
17 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
18 |    [-1, 3, C3, [128]],
19 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
20 |    [-1, 6, C3, [256]],
21 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
22 |    [-1, 9, C3, [512]],
23 |    [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
24 |    [-1, 3, C3, [768]],
25 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
26 |    [-1, 3, C3, [1024]],
27 |    [-1, 1, SPPF, [1024, 5]],  # 11
28 |   ]
29 | 
30 | # YOLOv5 v6.0 head
31 | head:
32 |   [[-1, 1, Conv, [768, 1, 1]],
33 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P5
35 |    [-1, 3, C3, [768, False]],  # 15
36 | 
37 |    [-1, 1, Conv, [512, 1, 1]],
38 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
40 |    [-1, 3, C3, [512, False]],  # 19
41 | 
42 |    [-1, 1, Conv, [256, 1, 1]],
43 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
45 |    [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
46 | 
47 |    [-1, 1, Conv, [256, 3, 2]],
48 |    [[-1, 20], 1, Concat, [1]],  # cat head P4
49 |    [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
50 | 
51 |    [-1, 1, Conv, [512, 3, 2]],
52 |    [[-1, 16], 1, Concat, [1]],  # cat head P5
53 |    [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
54 | 
55 |    [-1, 1, Conv, [768, 3, 2]],
56 |    [[-1, 12], 1, Concat, [1]],  # cat head P6
57 |    [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
58 | 
59 |    [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
60 |   ]
61 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/tf.py:
--------------------------------------------------------------------------------
  1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2 | """
  3 | TensorFlow, Keras and TFLite versions of YOLOv5
  4 | Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127
  5 | 
  6 | Usage:
  7 |     $ python models/tf.py --weights yolov5s.pt
  8 | 
  9 | Export:
 10 |     $ python path/to/export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
 11 | """
 12 | 
 13 | import argparse
 14 | import sys
 15 | from copy import deepcopy
 16 | from pathlib import Path
 17 | 
 18 | FILE = Path(__file__).resolve()
 19 | ROOT = FILE.parents[1]  # YOLOv5 root directory
 20 | if str(ROOT) not in sys.path:
 21 |     sys.path.append(str(ROOT))  # add ROOT to PATH
 22 | # ROOT = ROOT.relative_to(Path.cwd())  # relative
 23 | 
 24 | import numpy as np
 25 | import tensorflow as tf
 26 | import torch
 27 | import torch.nn as nn
 28 | from tensorflow import keras
 29 | 
 30 | from models.common import C3, SPP, SPPF, Bottleneck, BottleneckCSP, Concat, Conv, DWConv, Focus, autopad
 31 | from models.experimental import CrossConv, MixConv2d, attempt_load
 32 | from models.yolo import Detect
 33 | from utils.activations import SiLU
 34 | from utils.general import LOGGER, make_divisible, print_args
 35 | 
 36 | 
 37 | class TFBN(keras.layers.Layer):
 38 |     # TensorFlow BatchNormalization wrapper
 39 |     def __init__(self, w=None):
 40 |         super().__init__()
 41 |         self.bn = keras.layers.BatchNormalization(
 42 |             beta_initializer=keras.initializers.Constant(w.bias.numpy()),
 43 |             gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
 44 |             moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
 45 |             moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
 46 |             epsilon=w.eps)
 47 | 
 48 |     def call(self, inputs):
 49 |         return self.bn(inputs)
 50 | 
 51 | 
 52 | class TFPad(keras.layers.Layer):
 53 |     def __init__(self, pad):
 54 |         super().__init__()
 55 |         self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
 56 | 
 57 |     def call(self, inputs):
 58 |         return tf.pad(inputs, self.pad, mode='constant', constant_values=0)
 59 | 
 60 | 
 61 | class TFConv(keras.layers.Layer):
 62 |     # Standard convolution
 63 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
 64 |         # ch_in, ch_out, weights, kernel, stride, padding, groups
 65 |         super().__init__()
 66 |         assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
 67 |         assert isinstance(k, int), "Convolution with multiple kernels are not allowed."
 68 |         # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
 69 |         # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
 70 | 
 71 |         conv = keras.layers.Conv2D(
 72 |             c2, k, s, 'SAME' if s == 1 else 'VALID', use_bias=False if hasattr(w, 'bn') else True,
 73 |             kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
 74 |             bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
 75 |         self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
 76 |         self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
 77 | 
 78 |         # YOLOv5 activations
 79 |         if isinstance(w.act, nn.LeakyReLU):
 80 |             self.act = (lambda x: keras.activations.relu(x, alpha=0.1)) if act else tf.identity
 81 |         elif isinstance(w.act, nn.Hardswish):
 82 |             self.act = (lambda x: x * tf.nn.relu6(x + 3) * 0.166666667) if act else tf.identity
 83 |         elif isinstance(w.act, (nn.SiLU, SiLU)):
 84 |             self.act = (lambda x: keras.activations.swish(x)) if act else tf.identity
 85 |         else:
 86 |             raise Exception(f'no matching TensorFlow activation found for {w.act}')
 87 | 
 88 |     def call(self, inputs):
 89 |         return self.act(self.bn(self.conv(inputs)))
 90 | 
 91 | 
 92 | class TFFocus(keras.layers.Layer):
 93 |     # Focus wh information into c-space
 94 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
 95 |         # ch_in, ch_out, kernel, stride, padding, groups
 96 |         super().__init__()
 97 |         self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
 98 | 
 99 |     def call(self, inputs):  # x(b,w,h,c) -> y(b,w/2,h/2,4c)
100 |         # inputs = inputs / 255  # normalize 0-255 to 0-1
101 |         return self.conv(tf.concat([inputs[:, ::2, ::2, :],
102 |                                     inputs[:, 1::2, ::2, :],
103 |                                     inputs[:, ::2, 1::2, :],
104 |                                     inputs[:, 1::2, 1::2, :]], 3))
105 | 
106 | 
107 | class TFBottleneck(keras.layers.Layer):
108 |     # Standard bottleneck
109 |     def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None):  # ch_in, ch_out, shortcut, groups, expansion
110 |         super().__init__()
111 |         c_ = int(c2 * e)  # hidden channels
112 |         self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
113 |         self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
114 |         self.add = shortcut and c1 == c2
115 | 
116 |     def call(self, inputs):
117 |         return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
118 | 
119 | 
120 | class TFConv2d(keras.layers.Layer):
121 |     # Substitution for PyTorch nn.Conv2D
122 |     def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
123 |         super().__init__()
124 |         assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
125 |         self.conv = keras.layers.Conv2D(
126 |             c2, k, s, 'VALID', use_bias=bias,
127 |             kernel_initializer=keras.initializers.Constant(w.weight.permute(2, 3, 1, 0).numpy()),
128 |             bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None, )
129 | 
130 |     def call(self, inputs):
131 |         return self.conv(inputs)
132 | 
133 | 
134 | class TFBottleneckCSP(keras.layers.Layer):
135 |     # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
136 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
137 |         # ch_in, ch_out, number, shortcut, groups, expansion
138 |         super().__init__()
139 |         c_ = int(c2 * e)  # hidden channels
140 |         self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
141 |         self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
142 |         self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
143 |         self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
144 |         self.bn = TFBN(w.bn)
145 |         self.act = lambda x: keras.activations.relu(x, alpha=0.1)
146 |         self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
147 | 
148 |     def call(self, inputs):
149 |         y1 = self.cv3(self.m(self.cv1(inputs)))
150 |         y2 = self.cv2(inputs)
151 |         return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
152 | 
153 | 
154 | class TFC3(keras.layers.Layer):
155 |     # CSP Bottleneck with 3 convolutions
156 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
157 |         # ch_in, ch_out, number, shortcut, groups, expansion
158 |         super().__init__()
159 |         c_ = int(c2 * e)  # hidden channels
160 |         self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
161 |         self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
162 |         self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
163 |         self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
164 | 
165 |     def call(self, inputs):
166 |         return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
167 | 
168 | 
169 | class TFSPP(keras.layers.Layer):
170 |     # Spatial pyramid pooling layer used in YOLOv3-SPP
171 |     def __init__(self, c1, c2, k=(5, 9, 13), w=None):
172 |         super().__init__()
173 |         c_ = c1 // 2  # hidden channels
174 |         self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
175 |         self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
176 |         self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in k]
177 | 
178 |     def call(self, inputs):
179 |         x = self.cv1(inputs)
180 |         return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
181 | 
182 | 
183 | class TFSPPF(keras.layers.Layer):
184 |     # Spatial pyramid pooling-Fast layer
185 |     def __init__(self, c1, c2, k=5, w=None):
186 |         super().__init__()
187 |         c_ = c1 // 2  # hidden channels
188 |         self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
189 |         self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
190 |         self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME')
191 | 
192 |     def call(self, inputs):
193 |         x = self.cv1(inputs)
194 |         y1 = self.m(x)
195 |         y2 = self.m(y1)
196 |         return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
197 | 
198 | 
199 | class TFDetect(keras.layers.Layer):
200 |     def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None):  # detection layer
201 |         super().__init__()
202 |         self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
203 |         self.nc = nc  # number of classes
204 |         self.no = nc + 5  # number of outputs per anchor
205 |         self.nl = len(anchors)  # number of detection layers
206 |         self.na = len(anchors[0]) // 2  # number of anchors
207 |         self.grid = [tf.zeros(1)] * self.nl  # init grid
208 |         self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
209 |         self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]),
210 |                                       [self.nl, 1, -1, 1, 2])
211 |         self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
212 |         self.training = False  # set to False after building model
213 |         self.imgsz = imgsz
214 |         for i in range(self.nl):
215 |             ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
216 |             self.grid[i] = self._make_grid(nx, ny)
217 | 
218 |     def call(self, inputs):
219 |         z = []  # inference output
220 |         x = []
221 |         for i in range(self.nl):
222 |             x.append(self.m[i](inputs[i]))
223 |             # x(bs,20,20,255) to x(bs,3,20,20,85)
224 |             ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
225 |             x[i] = tf.transpose(tf.reshape(x[i], [-1, ny * nx, self.na, self.no]), [0, 2, 1, 3])
226 | 
227 |             if not self.training:  # inference
228 |                 y = tf.sigmoid(x[i])
229 |                 xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
230 |                 wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]
231 |                 # Normalize xywh to 0-1 to reduce calibration error
232 |                 xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
233 |                 wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
234 |                 y = tf.concat([xy, wh, y[..., 4:]], -1)
235 |                 z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
236 | 
237 |         return x if self.training else (tf.concat(z, 1), x)
238 | 
239 |     @staticmethod
240 |     def _make_grid(nx=20, ny=20):
241 |         # yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
242 |         # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
243 |         xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
244 |         return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
245 | 
246 | 
247 | class TFUpsample(keras.layers.Layer):
248 |     def __init__(self, size, scale_factor, mode, w=None):  # warning: all arguments needed including 'w'
249 |         super().__init__()
250 |         assert scale_factor == 2, "scale_factor must be 2"
251 |         self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * 2, x.shape[2] * 2), method=mode)
252 |         # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
253 |         # with default arguments: align_corners=False, half_pixel_centers=False
254 |         # self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
255 |         #                                                            size=(x.shape[1] * 2, x.shape[2] * 2))
256 | 
257 |     def call(self, inputs):
258 |         return self.upsample(inputs)
259 | 
260 | 
261 | class TFConcat(keras.layers.Layer):
262 |     def __init__(self, dimension=1, w=None):
263 |         super().__init__()
264 |         assert dimension == 1, "convert only NCHW to NHWC concat"
265 |         self.d = 3
266 | 
267 |     def call(self, inputs):
268 |         return tf.concat(inputs, self.d)
269 | 
270 | 
271 | def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
272 |     LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
273 |     anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
274 |     na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
275 |     no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
276 | 
277 |     layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
278 |     for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
279 |         m_str = m
280 |         m = eval(m) if isinstance(m, str) else m  # eval strings
281 |         for j, a in enumerate(args):
282 |             try:
283 |                 args[j] = eval(a) if isinstance(a, str) else a  # eval strings
284 |             except NameError:
285 |                 pass
286 | 
287 |         n = max(round(n * gd), 1) if n > 1 else n  # depth gain
288 |         if m in [nn.Conv2d, Conv, Bottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
289 |             c1, c2 = ch[f], args[0]
290 |             c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
291 | 
292 |             args = [c1, c2, *args[1:]]
293 |             if m in [BottleneckCSP, C3]:
294 |                 args.insert(2, n)
295 |                 n = 1
296 |         elif m is nn.BatchNorm2d:
297 |             args = [ch[f]]
298 |         elif m is Concat:
299 |             c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
300 |         elif m is Detect:
301 |             args.append([ch[x + 1] for x in f])
302 |             if isinstance(args[1], int):  # number of anchors
303 |                 args[1] = [list(range(args[1] * 2))] * len(f)
304 |             args.append(imgsz)
305 |         else:
306 |             c2 = ch[f]
307 | 
308 |         tf_m = eval('TF' + m_str.replace('nn.', ''))
309 |         m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \
310 |             else tf_m(*args, w=model.model[i])  # module
311 | 
312 |         torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
313 |         t = str(m)[8:-2].replace('__main__.', '')  # module type
314 |         np = sum(x.numel() for x in torch_m_.parameters())  # number params
315 |         m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
316 |         LOGGER.info(f'{i:>3}{str(f):>18}{str(n):>3}{np:>10}  {t:<40}{str(args):<30}')  # print
317 |         save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
318 |         layers.append(m_)
319 |         ch.append(c2)
320 |     return keras.Sequential(layers), sorted(save)
321 | 
322 | 
323 | class TFModel:
324 |     def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)):  # model, channels, classes
325 |         super().__init__()
326 |         if isinstance(cfg, dict):
327 |             self.yaml = cfg  # model dict
328 |         else:  # is *.yaml
329 |             import yaml  # for torch hub
330 |             self.yaml_file = Path(cfg).name
331 |             with open(cfg) as f:
332 |                 self.yaml = yaml.load(f, Loader=yaml.FullLoader)  # model dict
333 | 
334 |         # Define model
335 |         if nc and nc != self.yaml['nc']:
336 |             LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}")
337 |             self.yaml['nc'] = nc  # override yaml value
338 |         self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
339 | 
340 |     def predict(self, inputs, tf_nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45,
341 |                 conf_thres=0.25):
342 |         y = []  # outputs
343 |         x = inputs
344 |         for i, m in enumerate(self.model.layers):
345 |             if m.f != -1:  # if not from previous layer
346 |                 x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
347 | 
348 |             x = m(x)  # run
349 |             y.append(x if m.i in self.savelist else None)  # save output
350 | 
351 |         # Add TensorFlow NMS
352 |         if tf_nms:
353 |             boxes = self._xywh2xyxy(x[0][..., :4])
354 |             probs = x[0][:, :, 4:5]
355 |             classes = x[0][:, :, 5:]
356 |             scores = probs * classes
357 |             if agnostic_nms:
358 |                 nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
359 |                 return nms, x[1]
360 |             else:
361 |                 boxes = tf.expand_dims(boxes, 2)
362 |                 nms = tf.image.combined_non_max_suppression(
363 |                     boxes, scores, topk_per_class, topk_all, iou_thres, conf_thres, clip_boxes=False)
364 |                 return nms, x[1]
365 | 
366 |         return x[0]  # output only first tensor [1,6300,85] = [xywh, conf, class0, class1, ...]
367 |         # x = x[0][0]  # [x(1,6300,85), ...] to x(6300,85)
368 |         # xywh = x[..., :4]  # x(6300,4) boxes
369 |         # conf = x[..., 4:5]  # x(6300,1) confidences
370 |         # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1))  # x(6300,1)  classes
371 |         # return tf.concat([conf, cls, xywh], 1)
372 | 
373 |     @staticmethod
374 |     def _xywh2xyxy(xywh):
375 |         # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
376 |         x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
377 |         return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
378 | 
379 | 
380 | class AgnosticNMS(keras.layers.Layer):
381 |     # TF Agnostic NMS
382 |     def call(self, input, topk_all, iou_thres, conf_thres):
383 |         # wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450
384 |         return tf.map_fn(lambda x: self._nms(x, topk_all, iou_thres, conf_thres), input,
385 |                          fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
386 |                          name='agnostic_nms')
387 | 
388 |     @staticmethod
389 |     def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25):  # agnostic NMS
390 |         boxes, classes, scores = x
391 |         class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
392 |         scores_inp = tf.reduce_max(scores, -1)
393 |         selected_inds = tf.image.non_max_suppression(
394 |             boxes, scores_inp, max_output_size=topk_all, iou_threshold=iou_thres, score_threshold=conf_thres)
395 |         selected_boxes = tf.gather(boxes, selected_inds)
396 |         padded_boxes = tf.pad(selected_boxes,
397 |                               paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
398 |                               mode="CONSTANT", constant_values=0.0)
399 |         selected_scores = tf.gather(scores_inp, selected_inds)
400 |         padded_scores = tf.pad(selected_scores,
401 |                                paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
402 |                                mode="CONSTANT", constant_values=-1.0)
403 |         selected_classes = tf.gather(class_inds, selected_inds)
404 |         padded_classes = tf.pad(selected_classes,
405 |                                 paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
406 |                                 mode="CONSTANT", constant_values=-1.0)
407 |         valid_detections = tf.shape(selected_inds)[0]
408 |         return padded_boxes, padded_scores, padded_classes, valid_detections
409 | 
410 | 
411 | def representative_dataset_gen(dataset, ncalib=100):
412 |     # Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays
413 |     for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
414 |         input = np.transpose(img, [1, 2, 0])
415 |         input = np.expand_dims(input, axis=0).astype(np.float32)
416 |         input /= 255
417 |         yield [input]
418 |         if n >= ncalib:
419 |             break
420 | 
421 | 
422 | def run(weights=ROOT / 'yolov5s.pt',  # weights path
423 |         imgsz=(640, 640),  # inference size h,w
424 |         batch_size=1,  # batch size
425 |         dynamic=False,  # dynamic batch size
426 |         ):
427 |     # PyTorch model
428 |     im = torch.zeros((batch_size, 3, *imgsz))  # BCHW image
429 |     model = attempt_load(weights, map_location=torch.device('cpu'), inplace=True, fuse=False)
430 |     y = model(im)  # inference
431 |     model.info()
432 | 
433 |     # TensorFlow model
434 |     im = tf.zeros((batch_size, *imgsz, 3))  # BHWC image
435 |     tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
436 |     y = tf_model.predict(im)  # inference
437 | 
438 |     # Keras model
439 |     im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
440 |     keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
441 |     keras_model.summary()
442 | 
443 |     LOGGER.info('PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.')
444 | 
445 | 
446 | def parse_opt():
447 |     parser = argparse.ArgumentParser()
448 |     parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
449 |     parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
450 |     parser.add_argument('--batch-size', type=int, default=1, help='batch size')
451 |     parser.add_argument('--dynamic', action='store_true', help='dynamic batch size')
452 |     opt = parser.parse_args()
453 |     opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
454 |     print_args(FILE.stem, opt)
455 |     return opt
456 | 
457 | 
458 | def main(opt):
459 |     run(**vars(opt))
460 | 
461 | 
462 | if __name__ == "__main__":
463 |     opt = parse_opt()
464 |     main(opt)
465 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/yolo.py:
--------------------------------------------------------------------------------
  1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2 | """
  3 | YOLO-specific modules
  4 | 
  5 | Usage:
  6 |     $ python path/to/models/yolo.py --cfg yolov5s.yaml
  7 | """
  8 | 
  9 | import argparse
 10 | import sys
 11 | from copy import deepcopy
 12 | from pathlib import Path
 13 | 
 14 | import torch
 15 | from torchviz import make_dot
 16 | FILE = Path(__file__).absolute()
 17 | sys.path.append(FILE.parents[1].as_posix())  # add yolov5/ to path
 18 | 
 19 | from models.common import *
 20 | from models.experimental import *
 21 | from utils.autoanchor import check_anchor_order
 22 | from utils.general import make_divisible, check_file, set_logging
 23 | from utils.plots import feature_visualization
 24 | from utils.torch_utils import time_sync, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
 25 |     select_device, copy_attr
 26 | 
 27 | try:
 28 |     import thop  # for FLOPs computation
 29 | except ImportError:
 30 |     thop = None
 31 | 
 32 | LOGGER = logging.getLogger(__name__)
 33 | 
 34 | 
 35 | class Detect(nn.Module):
 36 |     stride = None  # strides computed during build
 37 |     onnx_dynamic = False  # ONNX export parameter
 38 | 
 39 |     def __init__(self, nc=80, anchors=(), ch=(), inplace=True):  # detection layer
 40 |         super().__init__()
 41 |         self.nc = nc  # number of classes
 42 |         self.no = nc + 5  # number of outputs per anchor
 43 |         self.nl = len(anchors)  # number of detection layers
 44 |         self.na = len(anchors[0]) // 2  # number of anchors
 45 |         self.grid = [torch.zeros(1)] * self.nl  # init grid
 46 |         a = torch.tensor(anchors).float().view(self.nl, -1, 2)
 47 |         self.register_buffer('anchors', a)  # shape(nl,na,2)
 48 |         self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
 49 |         self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
 50 |         self.inplace = inplace  # use in-place ops (e.g. slice assignment)
 51 | 
 52 |     def forward(self, x):
 53 |         z = []  # inference output
 54 |         for i in range(self.nl):
 55 |             x[i] = self.m[i](x[i])  # conv
 56 |             bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
 57 |             x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
 58 | 
 59 |             if not self.training:  # inference
 60 |                 if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
 61 |                     self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
 62 | 
 63 |                 y = x[i].sigmoid()
 64 |                 if self.inplace:
 65 |                     y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
 66 |                     y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
 67 |                 else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
 68 |                     xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
 69 |                     wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].view(1, self.na, 1, 1, 2)  # wh
 70 |                     y = torch.cat((xy, wh, y[..., 4:]), -1)
 71 |                 z.append(y.view(bs, -1, self.no))
 72 | 
 73 |         return x if self.training else (torch.cat(z, 1), x)
 74 | 
 75 |     @staticmethod
 76 |     def _make_grid(nx=20, ny=20):
 77 |         yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
 78 |         return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
 79 | 
 80 | 
 81 | class Model(nn.Module):
 82 |     def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None):  # model, input channels, number of classes
 83 |         super().__init__()
 84 |         if isinstance(cfg, dict):
 85 |             self.yaml = cfg  # model dict
 86 |         else:  # is *.yaml
 87 |             import yaml  # for torch hub
 88 |             self.yaml_file = Path(cfg).name
 89 |             with open(cfg) as f:
 90 |                 self.yaml = yaml.safe_load(f)  # model dict
 91 | 
 92 |         # Define model
 93 |         ch = self.yaml['ch'] = self.yaml.get('ch', ch)  # input channels
 94 |         if nc and nc != self.yaml['nc']:
 95 |             LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
 96 |             self.yaml['nc'] = nc  # override yaml value
 97 |         if anchors:
 98 |             LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
 99 |             self.yaml['anchors'] = round(anchors)  # override yaml value
100 |         self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist
101 |         self.names = [str(i) for i in range(self.yaml['nc'])]  # default names
102 |         self.inplace = self.yaml.get('inplace', True)
103 |         # LOGGER.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
104 | 
105 |         # Build strides, anchors
106 |         m = self.model[-1]  # Detect()
107 |         if isinstance(m, Detect):
108 |             s = 256  # 2x min stride
109 |             m.inplace = self.inplace
110 |             m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])  # forward
111 |             m.anchors /= m.stride.view(-1, 1, 1)
112 |             check_anchor_order(m)
113 |             self.stride = m.stride
114 |             self._initialize_biases()  # only run once
115 |             # LOGGER.info('Strides: %s' % m.stride.tolist())
116 | 
117 |         # Init weights, biases
118 |         initialize_weights(self)
119 |         self.info()
120 |         LOGGER.info('')
121 | 
122 |     def forward(self, x, augment=False, profile=False, visualize=False):
123 |         if augment:
124 |             return self.forward_augment(x)  # augmented inference, None
125 |         return self.forward_once(x, profile, visualize)  # single-scale inference, train
126 | 
127 |     def forward_augment(self, x):
128 |         img_size = x.shape[-2:]  # height, width
129 |         s = [1, 0.83, 0.67]  # scales
130 |         f = [None, 3, None]  # flips (2-ud, 3-lr)
131 |         y = []  # outputs
132 |         for si, fi in zip(s, f):
133 |             xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
134 |             yi = self.forward_once(xi)[0]  # forward
135 |             # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
136 |             yi = self._descale_pred(yi, fi, si, img_size)
137 |             y.append(yi)
138 |         return torch.cat(y, 1), None  # augmented inference, train
139 | 
140 |     def forward_once(self, x, profile=False, visualize=False):
141 |         y, dt = [], []  # outputs
142 |         for m in self.model:
143 |             if m.f != -1:  # if not from previous layer
144 |                 x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
145 | 
146 |             if profile:
147 |                 c = isinstance(m, Detect)  # copy input as inplace fix
148 |                 o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPs
149 |                 t = time_sync()
150 |                 for _ in range(10):
151 |                     m(x.copy() if c else x)
152 |                 dt.append((time_sync() - t) * 100)
153 |                 if m == self.model[0]:
154 |                     LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s}  {'module'}")
155 |                 LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f}  {m.type}')
156 | 
157 |             x = m(x)  # run
158 |             y.append(x if m.i in self.save else None)  # save output
159 | 
160 |             if visualize:
161 |                 feature_visualization(x, m.type, m.i, save_dir=visualize)
162 | 
163 |         if profile:
164 |             LOGGER.info('%.1fms total' % sum(dt))
165 |         return x
166 | 
167 |     def _descale_pred(self, p, flips, scale, img_size):
168 |         # de-scale predictions following augmented inference (inverse operation)
169 |         if self.inplace:
170 |             p[..., :4] /= scale  # de-scale
171 |             if flips == 2:
172 |                 p[..., 1] = img_size[0] - p[..., 1]  # de-flip ud
173 |             elif flips == 3:
174 |                 p[..., 0] = img_size[1] - p[..., 0]  # de-flip lr
175 |         else:
176 |             x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale  # de-scale
177 |             if flips == 2:
178 |                 y = img_size[0] - y  # de-flip ud
179 |             elif flips == 3:
180 |                 x = img_size[1] - x  # de-flip lr
181 |             p = torch.cat((x, y, wh, p[..., 4:]), -1)
182 |         return p
183 | 
184 |     def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
185 |         # https://arxiv.org/abs/1708.02002 section 3.3
186 |         # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
187 |         m = self.model[-1]  # Detect() module
188 |         for mi, s in zip(m.m, m.stride):  # from
189 |             b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
190 |             b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
191 |             b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
192 |             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
193 | 
194 |     def _print_biases(self):
195 |         m = self.model[-1]  # Detect() module
196 |         for mi in m.m:  # from
197 |             b = mi.bias.detach().view(m.na, -1).T  # conv.bias(255) to (3,85)
198 |             LOGGER.info(
199 |                 ('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
200 | 
201 |     # def _print_weights(self):
202 |     #     for m in self.model.modules():
203 |     #         if type(m) is Bottleneck:
204 |     #             LOGGER.info('%10.3g' % (m.w.detach().sigmoid() * 2))  # shortcut weights
205 | 
206 |     def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
207 |         LOGGER.info('Fusing layers... ')
208 |         for m in self.model.modules():
209 |             if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
210 |                 m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
211 |                 delattr(m, 'bn')  # remove batchnorm
212 |                 m.forward = m.forward_fuse  # update forward
213 |         self.info()
214 |         return self
215 | 
216 |     def autoshape(self):  # add AutoShape module
217 |         LOGGER.info('Adding AutoShape... ')
218 |         m = AutoShape(self)  # wrap model
219 |         copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=())  # copy attributes
220 |         return m
221 | 
222 |     def info(self, verbose=False, img_size=640):  # print model information
223 |         model_info(self, verbose, img_size)
224 | 
225 | 
226 | def parse_model(d, ch):  # model_dict, input_channels(3)
227 |     LOGGER.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
228 |     anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
229 |     na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
230 |     no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
231 | 
232 |     layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
233 |     for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
234 |         m = eval(m) if isinstance(m, str) else m  # eval strings
235 |         for j, a in enumerate(args):
236 |             try:
237 |                 args[j] = eval(a) if isinstance(a, str) else a  # eval strings
238 |             except:
239 |                 pass
240 | 
241 |         n = n_ = max(round(n * gd), 1) if n > 1 else n  # depth gain
242 |         if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
243 |                  BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, CoordinateLayer]:
244 |             c1, c2 = ch[f], args[0]
245 |             if c2 != no:  # if not output
246 |                 c2 = make_divisible(c2 * gw, 8)
247 | 
248 |             args = [c1, c2, *args[1:]]
249 |             if m in [BottleneckCSP, C3, C3TR, C3Ghost]:
250 |                 args.insert(2, n)  # number of repeats
251 |                 n = 1
252 |         elif m is nn.BatchNorm2d:
253 |             args = [ch[f]]
254 |         elif m is Concat:
255 |             c2 = sum([ch[x] for x in f])
256 |         elif m is Detect:
257 |             args.append([ch[x] for x in f])
258 |             if isinstance(args[1], int):  # number of anchors
259 |                 args[1] = [list(range(args[1] * 2))] * len(f)
260 |         elif m is Contract:
261 |             c2 = ch[f] * args[0] ** 2
262 |             print("concat")
263 |         elif m is Expand:
264 |             c2 = ch[f] // args[0] ** 2
265 |         elif m is CoordinateLayer:
266 |             channel, re = args[0], args[1]
267 |             channel = make_divisible(channel * gw, 8) if channel != no else channel
268 |             args = [channel, re]
269 |             print("ca")
270 |         elif m is PPLC_Conv:
271 |             c2 = args[0]
272 |             args = args[1:]
273 |         elif m is PPLC_Block:
274 |             c2 = args[0]
275 |             args = args[1:]
276 |         elif m is ConvNeXt_Block:
277 |             c2 = args[0]
278 |             args = args[1:]
279 |         else:
280 |             c2 = ch[f]
281 | 
282 |         m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
283 |         t = str(m)[8:-2].replace('__main__.', '')  # module type
284 |         np = sum([x.numel() for x in m_.parameters()])  # number params
285 |         m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
286 |         LOGGER.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n_, np, t, args))  # print
287 |         save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
288 |         layers.append(m_)
289 |         if i == 0:
290 |             ch = []
291 |         ch.append(c2)
292 |     return nn.Sequential(*layers), sorted(save)
293 | 
294 | 
295 | def getLayers(model):
296 |     """
297 |     get each layer's name and its module
298 |     :param model:
299 |     :return: each layer's name and its module
300 |     """
301 |     layers = []
302 | 
303 |     def unfoldLayer(model):
304 |         """
305 |         unfold each layer
306 |         :param model: the given model or a single layer
307 |         :param root: root name
308 |         :return:
309 |         """
310 | 
311 |         # get all layers of the model
312 |         layer_list = list(model.named_children())
313 |         for item in layer_list:
314 |             module = item[1]
315 |             sublayer = list(module.named_children())
316 |             sublayer_num = len(sublayer)
317 | 
318 |             # if current layer contains sublayers, add current layer name on its sublayers
319 |             if sublayer_num == 0:
320 |                 layers.append(module)
321 |             # if current layer contains sublayers, unfold them
322 |             elif isinstance(module, torch.nn.Module):
323 |                 unfoldLayer(module)
324 | 
325 |     unfoldLayer(model)
326 |     return layers
327 | if __name__ == '__main__':
328 | 
329 |     model_urls = {
330 |         "convnext_tiny_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_tiny_1k_224_ema.pth",
331 |         "convnext_small_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_small_1k_224_ema.pth",
332 |         "convnext_base_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224_ema.pth",
333 |         "convnext_large_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_large_1k_224_ema.pth",
334 |         "convnext_base_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_224.pth",
335 |         "convnext_large_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_224.pth",
336 |         "convnext_xlarge_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_224.pth",
337 |     }
338 |     url = model_urls['convnext_tiny_1k']
339 |     checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", check_hash=True)
340 |     init_dict = {}
341 |     for index in range(4):
342 |         for k, v in list(checkpoint['model'].items()):
343 |             if k.startswith('norm') or k.startswith('head'):
344 |                 pass
345 |             else:
346 |                 init_dict['.'.join(['model', str(index), k])] = v
347 | 
348 | 
349 |     import tensorwatch as tw
350 |     # 5s    Model Summary: 283 layers, 7082421 parameters, 7082421 gradients, 16.4 GFLOPs
351 |     # PP1.0 Model Summary: 367 layers, 3857195 parameters, 3857195 gradients, 8.2 GFLOPs
352 |     parser = argparse.ArgumentParser()
353 |     # parser.add_argument('--cfg', type=str, default='yolov5s_Coordinate.yaml', help='model.yaml')
354 |     parser.add_argument('--cfg', type=str, default='yolov5_ConvNeXt.yaml', help='model.yaml')
355 | 
356 |     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
357 |     parser.add_argument('--profile', action='store_true', help='profile model speed')
358 |     opt = parser.parse_args()
359 |     opt.cfg = check_file(opt.cfg)  # check file
360 |     set_logging()
361 |     device = select_device(opt.device)
362 | 
363 |     # Create model
364 |     model = Model(opt.cfg).to(device)
365 |     model.train()
366 | 
367 |     # 更新Backbone部分网络权重
368 |     model_dict = model.state_dict()
369 |     model_dict.update(init_dict)
370 |     model.load_state_dict(model_dict)
371 | 
372 |     # Profile
373 |     if opt.profile:
374 |         img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
375 |         y = model(img, profile=True)
376 |         print(y.shape)
377 |         # x = torch.randn(1, 8)
378 | 
379 |     # print(model)
380 |     # from thop import profile
381 | 
382 |     # flops, params = profile(model, (torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device),))
383 | 
384 |     # Tensorboard (not working https://github.com/ultralytics/yolov5/issues/2898)
385 |     # from torch.utils.tensorboard import SummaryWriter
386 |     # tb_writer = SummaryWriter('.')
387 |     # LOGGER.info("Run 'tensorboard --logdir=models' to view tensorboard at http://localhost:6006/")
388 |     # tb_writer.add_graph(torch.jit.trace(model, img, strict=False), [])  # add model graph
389 | 
390 | 
391 | 
392 | 
393 | 
394 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/yolov5_ConvNeXt_Tiny.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | # [from, number, module, args]
14 | backbone:
15 |   [[-1, 1, ConvNeXt_Block, [96, 0, 3, [3, 3, 9, 3], [96, 192, 384, 768]]],
16 |    [-1, 1, ConvNeXt_Block, [192, 1, 3, [3, 3, 9, 3], [96, 192, 384, 768]]],
17 |    [-1, 1, ConvNeXt_Block, [384, 2, 3, [3, 3, 9, 3], [96, 192, 384, 768]]],
18 |    [-1, 1, ConvNeXt_Block, [768, 3, 3, [3, 3, 9, 3], [96, 192, 384, 768]]],
19 |   ]
20 | 
21 | # YOLOv5 head
22 | head:
23 |   [[-1, 1, Conv, [768, 1, 1]],
24 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
25 |    [[-1, 2], 1, Concat, [1]],
26 |    [-1, 3, C3, [768, False]],
27 | 
28 |    [-1, 1, Conv, [384, 1, 1]],
29 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
30 |    [[-1, 1], 1, Concat, [1]],
31 |    [-1, 3, C3, [384, False]],
32 | 
33 |    [-1, 1, Conv, [384, 3, 2]],
34 |    [[-1, 8], 1, Concat, [1]],
35 |    [-1, 3, C3, [768, False]],
36 | 
37 |    [-1, 1, Conv, [768, 3, 2]],
38 |    [[-1, 4], 1, Concat, [1]],  # cat head P5
39 |    [-1, 3, C3, [768, False]],  # 23 (P5/32-large)
40 | 
41 |    [[11, 14, 17], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
42 |   ]
43 | 
44 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/yolov5_ConvNeXt_base.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | # [from, number, module, args]
14 | backbone:
15 |   [[-1, 1, ConvNeXt_Block, [96, 0, 3, [3, 3, 27, 3], [128, 256, 512, 1024]]],
16 |    [-1, 1, ConvNeXt_Block, [192, 1, 3, [3, 3, 27, 3], [128, 256, 512, 1024]]],
17 |    [-1, 1, ConvNeXt_Block, [384, 2, 3, [3, 3, 27, 3], [128, 256, 512, 1024]]],
18 |    [-1, 1, ConvNeXt_Block, [768, 3, 3, [3, 3, 27, 3], [128, 256, 512, 1024]]],
19 |   ]
20 | 
21 | # YOLOv5 head
22 | head:
23 |   [[-1, 1, Conv, [1024, 1, 1]],
24 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
25 |    [[-1, 2], 1, Concat, [1]],
26 |    [-1, 3, C3, [1024, False]],
27 | 
28 |    [-1, 1, Conv, [512, 1, 1]],
29 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
30 |    [[-1, 1], 1, Concat, [1]],
31 |    [-1, 3, C3, [512, False]],
32 | 
33 |    [-1, 1, Conv, [512, 3, 2]],
34 |    [[-1, 8], 1, Concat, [1]],
35 |    [-1, 3, C3, [1024, False]],
36 | 
37 |    [-1, 1, Conv, [1024, 3, 2]],
38 |    [[-1, 4], 1, Concat, [1]],  # cat head P5
39 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
40 | 
41 |    [[11, 14, 17], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
42 |   ]
43 | 
44 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/yolov5_ConvNeXt_large.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | # [from, number, module, args]
14 | backbone:
15 |   [[-1, 1, ConvNeXt_Block, [192, 0, 3, [3, 3, 27, 3], [192, 384, 768, 1536]]],
16 |    [-1, 1, ConvNeXt_Block, [384, 1, 3, [3, 3, 27, 3], [192, 384, 768, 1536]]],
17 |    [-1, 1, ConvNeXt_Block, [768, 2, 3, [3, 3, 27, 3], [192, 384, 768, 1536]]],
18 |    [-1, 1, ConvNeXt_Block, [1536, 3, 3, [3, 3, 27, 3], [192, 384, 768, 1536]]],
19 |   ]
20 | 
21 | # YOLOv5 head
22 | head:
23 |   [[-1, 1, Conv, [1536, 1, 1]],
24 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
25 |    [[-1, 2], 1, Concat, [1]],
26 |    [-1, 3, C3, [1536, False]],
27 | 
28 |    [-1, 1, Conv, [768, 1, 1]],
29 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
30 |    [[-1, 1], 1, Concat, [1]],
31 |    [-1, 3, C3, [768, False]],
32 | 
33 |    [-1, 1, Conv, [768, 3, 2]],
34 |    [[-1, 8], 1, Concat, [1]],
35 |    [-1, 3, C3, [1536, False]],
36 | 
37 |    [-1, 1, Conv, [1536, 3, 2]],
38 |    [[-1, 4], 1, Concat, [1]],  # cat head P5
39 |    [-1, 3, C3, [1536, False]],  # 23 (P5/32-large)
40 | 
41 |    [[11, 14, 17], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
42 |   ]
43 | 
44 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/yolov5_ConvNeXt_small.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | # [from, number, module, args]
14 | backbone:
15 |   [[-1, 1, ConvNeXt_Block, [96, 0, 3, [3, 3, 27, 3], [96, 192, 384, 768]]],
16 |    [-1, 1, ConvNeXt_Block, [192, 1, 3, [3, 3, 27, 3], [96, 192, 384, 768]]],
17 |    [-1, 1, ConvNeXt_Block, [384, 2, 3, [3, 3, 27, 3], [96, 192, 384, 768]]],
18 |    [-1, 1, ConvNeXt_Block, [768, 3, 3, [3, 3, 27, 3], [96, 192, 384, 768]]],
19 |   ]
20 | 
21 | # YOLOv5 head
22 | head:
23 |   [[-1, 1, Conv, [768, 1, 1]],
24 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
25 |    [[-1, 2], 1, Concat, [1]],
26 |    [-1, 3, C3, [768, False]],
27 | 
28 |    [-1, 1, Conv, [384, 1, 1]],
29 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
30 |    [[-1, 1], 1, Concat, [1]],
31 |    [-1, 3, C3, [384, False]],
32 | 
33 |    [-1, 1, Conv, [384, 3, 2]],
34 |    [[-1, 8], 1, Concat, [1]],
35 |    [-1, 3, C3, [768, False]],
36 | 
37 |    [-1, 1, Conv, [768, 3, 2]],
38 |    [[-1, 4], 1, Concat, [1]],  # cat head P5
39 |    [-1, 3, C3, [768, False]],  # 23 (P5/32-large)
40 | 
41 |    [[11, 14, 17], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
42 |   ]
43 | 
44 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/yolov5_ConvNeXt_xlarge_22k.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | # [from, number, module, args]
14 | backbone:
15 |   [[-1, 1, ConvNeXt_Block, [96, 0, 3, [3, 3, 27, 3], [256, 512, 1024, 2048]]],
16 |    [-1, 1, ConvNeXt_Block, [192, 1, 3, [3, 3, 27, 3], [256, 512, 1024, 2048]]],
17 |    [-1, 1, ConvNeXt_Block, [384, 2, 3, [3, 3, 27, 3], [256, 512, 1024, 2048]]],
18 |    [-1, 1, ConvNeXt_Block, [768, 3, 3, [3, 3, 27, 3], [256, 512, 1024, 2048]]],
19 |   ]
20 | 
21 | # YOLOv5 head
22 | head:
23 |   [[-1, 1, Conv, [2048, 1, 1]],
24 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
25 |    [[-1, 2], 1, Concat, [1]],
26 |    [-1, 3, C3, [2048, False]],
27 | 
28 |    [-1, 1, Conv, [1024, 1, 1]],
29 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
30 |    [[-1, 1], 1, Concat, [1]],
31 |    [-1, 3, C3, [1024, False]],
32 | 
33 |    [-1, 1, Conv, [1024, 3, 2]],
34 |    [[-1, 8], 1, Concat, [1]],
35 |    [-1, 3, C3, [2048, False]],
36 | 
37 |    [-1, 1, Conv, [2048, 3, 2]],
38 |    [[-1, 4], 1, Concat, [1]],  # cat head P5
39 |    [-1, 3, C3, [2048, False]],  # 23 (P5/32-large)
40 | 
41 |    [[11, 14, 17], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
42 |   ]
43 | 
44 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/yolov5l.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.0  # model depth multiple
 6 | width_multiple: 1.0  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, C3, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 6, C3, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, C3, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 3, C3, [1024]],
24 |    [-1, 1, SPPF, [1024, 5]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 v6.0 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, C3, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/yolov5m.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.67  # model depth multiple
 6 | width_multiple: 0.75  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, C3, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 6, C3, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, C3, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 3, C3, [1024]],
24 |    [-1, 1, SPPF, [1024, 5]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 v6.0 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, C3, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/yolov5n.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.25  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, C3, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 6, C3, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, C3, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 3, C3, [1024]],
24 |    [-1, 1, SPPF, [1024, 5]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 v6.0 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, C3, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/yolov5s.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, C3, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 6, C3, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, C3, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 3, C3, [1024]],
24 |    [-1, 1, SPPF, [1024, 5]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 v6.0 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, C3, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/ConvNeXt-YoloV5/models/yolov5x.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.33  # model depth multiple
 6 | width_multiple: 1.25  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, C3, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 6, C3, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, C3, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 3, C3, [1024]],
24 |    [-1, 1, SPPF, [1024, 5]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 v6.0 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, C3, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/PP-LCNet-Yolov5/PP_LCNet/LCNet.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import torch.nn as nn
  4 | import thop
  5 | 
  6 | # try:
  7 | #     import softpool_cuda
  8 | #     from SoftPool import soft_pool2d, SoftPool2d
  9 | # except ImportError:
 10 | #     print('Please install SoftPool first: https://github.com/alexandrosstergiou/SoftPool')
 11 | #     exit(0)
 12 | 
 13 | NET_CONFIG = {
 14 |     "blocks2":
 15 |     # k, in_c, out_c, s, use_se
 16 |     [[3, 16, 32, 1, False]],
 17 |     "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
 18 |     "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
 19 |     "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False],
 20 |                 [5, 256, 256, 1, False], [5, 256, 256, 1, False],
 21 |                 [5, 256, 256, 1, False], [5, 256, 256, 1, False]],
 22 |     "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]]
 23 | }
 24 | 
 25 | 
 26 | def autopad(k, p=None):
 27 |     if p is None:
 28 |         p = k // 2 if isinstance(k, int) else [x // 2 for x in k]
 29 |     return p
 30 | 
 31 | 
 32 | def make_divisible(v, divisor=8, min_value=None):
 33 |     if min_value is None:
 34 |         min_value = divisor
 35 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 36 |     if new_v < 0.9 * v:
 37 |         new_v += divisor
 38 |     return new_v
 39 | 
 40 | 
 41 | class HardSwish(nn.Module):
 42 |     def __init__(self, inplace=True):
 43 |         super(HardSwish, self).__init__()
 44 |         self.relu6 = nn.ReLU6(inplace=inplace)
 45 | 
 46 |     def forward(self, x):
 47 |         return x * self.relu6(x+3) / 6
 48 | 
 49 | 
 50 | class HardSigmoid(nn.Module):
 51 |     def __init__(self, inplace=True):
 52 |         super(HardSigmoid, self).__init__()
 53 |         self.relu6 = nn.ReLU6(inplace=inplace)
 54 | 
 55 |     def forward(self, x):
 56 |         return (self.relu6(x+3)) / 6
 57 | 
 58 | 
 59 | class SELayer(nn.Module):
 60 |     def __init__(self, channel, reduction=16):
 61 |         super(SELayer, self).__init__()
 62 |         self.avgpool = nn.AdaptiveAvgPool2d(1)
 63 |         self.fc = nn.Sequential(
 64 |             nn.Linear(channel, channel // reduction, bias=False),
 65 |             nn.ReLU(inplace=True),
 66 |             nn.Linear(channel // reduction, channel, bias=False),
 67 |             HardSigmoid()
 68 |         )
 69 | 
 70 |     def forward(self, x):
 71 |         b, c, h, w = x.size()
 72 |         y = self.avgpool(x).view(b, c)
 73 |         y = self.fc(y).view(b, c, 1, 1)
 74 |         return x * y.expand_as(x)
 75 | 
 76 | 
 77 | class DepthwiseSeparable(nn.Module):
 78 |     def __init__(self, inp, oup, dw_size, stride, use_se=False):
 79 |         super(DepthwiseSeparable, self).__init__()
 80 |         self.use_se = use_se
 81 |         self.stride = stride
 82 |         self.inp = inp
 83 |         self.oup = oup
 84 |         self.dw_size = dw_size
 85 |         self.dw_sp = nn.Sequential(
 86 |             nn.Conv2d(self.inp, self.inp, kernel_size=self.dw_size, stride=self.stride,
 87 |                       padding=autopad(self.dw_size, None), groups=self.inp, bias=False),
 88 |             nn.BatchNorm2d(self.inp),
 89 |             HardSwish(),
 90 | 
 91 |             nn.Conv2d(self.inp, self.oup, kernel_size=1, stride=1, padding=0, bias=False),
 92 |             nn.BatchNorm2d(self.oup),
 93 |             HardSwish(),
 94 |         )
 95 |         self.se = SELayer(self.oup)
 96 | 
 97 |     def forward(self, x):
 98 |         x = self.dw_sp(x)
 99 |         if self.use_se:
100 |             x = self.se(x)
101 |         return x
102 | 
103 | 
104 | class PP_LCNet(nn.Module):
105 |     def __init__(self, scale=1.0, class_num=10, class_expand=1280, dropout_prob=0.2):
106 |         super(PP_LCNet, self).__init__()
107 |         self.scale = scale
108 |         self.conv1 = nn.Conv2d(3, out_channels=make_divisible(16 * self.scale),
109 |                                kernel_size=3, stride=2, padding=1, bias=False)
110 |         # k, in_c, out_c, s, use_se   inp, oup, dw_size, stride, use_se=False
111 |         self.blocks2 = nn.Sequential(*[
112 |             DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
113 |                                oup=make_divisible(out_c * self.scale),
114 |                                dw_size=k, stride=s, use_se=use_se)
115 |             for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks2"])
116 |         ])
117 | 
118 |         self.blocks3 = nn.Sequential(*[
119 |             DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
120 |                                oup=make_divisible(out_c * self.scale),
121 |                                dw_size=k, stride=s, use_se=use_se)
122 |             for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks3"])
123 |         ])
124 | 
125 |         self.blocks4 = nn.Sequential(*[
126 |             DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
127 |                                oup=make_divisible(out_c * self.scale),
128 |                                dw_size=k, stride=s, use_se=use_se)
129 |             for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks4"])
130 |         ])
131 |         # k, in_c, out_c, s, use_se  inp, oup, dw_size, stride, use_se=False
132 |         self.blocks5 = nn.Sequential(*[
133 |             DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
134 |                                oup=make_divisible(out_c * self.scale),
135 |                                dw_size=k, stride=s, use_se=use_se)
136 |             for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks5"])
137 |         ])
138 | 
139 |         self.blocks6 = nn.Sequential(*[
140 |             DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
141 |                                oup=make_divisible(out_c * self.scale),
142 |                                dw_size=k, stride=s, use_se=use_se)
143 |             for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks6"])
144 |         ])
145 | 
146 |         self.GAP = nn.AdaptiveAvgPool2d(1)
147 | 
148 |         self.last_conv = nn.Conv2d(in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale),
149 |                                    out_channels=class_expand,
150 |                                    kernel_size=1, stride=1, padding=0, bias=False)
151 | 
152 |         self.hardswish = HardSwish()
153 |         self.dropout = nn.Dropout(p=dropout_prob)
154 | 
155 |         self.fc = nn.Linear(class_expand, class_num)
156 | 
157 |     def forward(self, x):
158 |         x = self.conv1(x)
159 |         print(x.shape)
160 |         x = self.blocks2(x)
161 |         print(x.shape)
162 |         x = self.blocks3(x)
163 |         print(x.shape)
164 |         x = self.blocks4(x)
165 |         print(x.shape)
166 |         x = self.blocks5(x)
167 |         print(x.shape)
168 |         x = self.blocks6(x)
169 |         print(x.shape)
170 | 
171 |         x = self.GAP(x)
172 |         x = self.last_conv(x)
173 |         x = self.hardswish(x)
174 |         x = self.dropout(x)
175 |         x = torch.flatten(x, start_dim=1, end_dim=-1)
176 |         x = self.fc(x)
177 |         return x
178 | 
179 | 
180 | def PPLCNET_x0_25(**kwargs):
181 |     model = PP_LCNet(scale=0.25, **kwargs)
182 |     return model
183 | 
184 | 
185 | def PPLCNET_x0_35(**kwargs):
186 |     model = PP_LCNet(scale=0.35, **kwargs)
187 |     return model
188 | 
189 | 
190 | def PPLCNET_x0_5(**kwargs):
191 |     model = PP_LCNet(scale=0.5, **kwargs)
192 |     return model
193 | 
194 | 
195 | def PPLCNET_x0_75(**kwargs):
196 |     model = PP_LCNet(scale=0.75, **kwargs)
197 |     return model
198 | 
199 | 
200 | def PPLCNET_x1_0(**kwargs):
201 |     model = PP_LCNet(scale=1.0, **kwargs)
202 |     return model
203 | 
204 | 
205 | def PPLCNET_x1_5(**kwargs):
206 |     model = PP_LCNet(scale=1.5, **kwargs)
207 |     return model
208 | 
209 | 
210 | def PPLCNET_x2_0(**kwargs):
211 |     model = PP_LCNet(scale=2.0, **kwargs)
212 |     return model
213 | 
214 | def PPLCNET_x2_5(**kwargs):
215 |     model = PP_LCNet(scale=2.5, **kwargs)
216 |     return model
217 | 
218 | 
219 | 
220 | 
221 | if __name__ == '__main__':
222 |     # input = torch.randn(1, 3, 640, 640)
223 |     # model = PPLCNET_x2_5()
224 |     # flops, params = thop.profile(model, inputs=(input,))
225 |     # print('flops:', flops / 1000000000)
226 |     # print('params:', params / 1000000)
227 | 
228 |     model = PPLCNET_x1_0()
229 |     # model_1 = PW_Conv(3, 16)
230 |     input = torch.randn(2, 3, 256, 256)
231 |     print(input.shape)
232 |     output = model(input)
233 |     print(output.shape)  # [1, num_class]
234 | 
235 | 
236 | 


--------------------------------------------------------------------------------
/PP-LCNet-Yolov5/models/common.py:
--------------------------------------------------------------------------------
  1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2 | """
  3 | Common modules
  4 | """
  5 | 
  6 | import logging
  7 | import math
  8 | import warnings
  9 | from copy import copy
 10 | from pathlib import Path
 11 | 
 12 | import numpy as np
 13 | import pandas as pd
 14 | import requests
 15 | import torch
 16 | import torch.nn as nn
 17 | from PIL import Image
 18 | from torch.cuda import amp
 19 | 
 20 | from utils.datasets import exif_transpose, letterbox
 21 | from utils.general import colorstr, increment_path, is_ascii, make_divisible, non_max_suppression, save_one_box, \
 22 |     scale_coords, xyxy2xywh
 23 | from utils.plots import Annotator, colors
 24 | from utils.torch_utils import time_sync
 25 | from torch.nn.parameter import Parameter
 26 | 
 27 | LOGGER = logging.getLogger(__name__)
 28 | 
 29 | 
 30 | def autopad(k, p=None):  # kernel, padding
 31 |     # Pad to 'same'
 32 |     if p is None:
 33 |         p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
 34 |     return p
 35 | 
 36 | 
 37 | class Conv(nn.Module):
 38 |     # Standard convolution
 39 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
 40 |         super().__init__()
 41 |         self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
 42 |         self.bn = nn.BatchNorm2d(c2)
 43 |         self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
 44 | 
 45 |     def forward(self, x):
 46 |         return self.act(self.bn(self.conv(x)))
 47 | 
 48 |     def forward_fuse(self, x):
 49 |         return self.act(self.conv(x))
 50 | 
 51 | 
 52 | class DWConv(Conv):
 53 |     # Depth-wise convolution class
 54 |     def __init__(self, c1, c2, k=1, s=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
 55 |         super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
 56 | 
 57 | 
 58 | class TransformerLayer(nn.Module):
 59 |     # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
 60 |     def __init__(self, c, num_heads):
 61 |         super().__init__()
 62 |         self.q = nn.Linear(c, c, bias=False)
 63 |         self.k = nn.Linear(c, c, bias=False)
 64 |         self.v = nn.Linear(c, c, bias=False)
 65 |         self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
 66 |         self.fc1 = nn.Linear(c, c, bias=False)
 67 |         self.fc2 = nn.Linear(c, c, bias=False)
 68 | 
 69 |     def forward(self, x):
 70 |         x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
 71 |         x = self.fc2(self.fc1(x)) + x
 72 |         return x
 73 | 
 74 | 
 75 | class TransformerBlock(nn.Module):
 76 |     # Vision Transformer https://arxiv.org/abs/2010.11929
 77 |     def __init__(self, c1, c2, num_heads, num_layers):
 78 |         super().__init__()
 79 |         self.conv = None
 80 |         if c1 != c2:
 81 |             self.conv = Conv(c1, c2)
 82 |         self.linear = nn.Linear(c2, c2)  # learnable position embedding
 83 |         self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
 84 |         self.c2 = c2
 85 | 
 86 |     def forward(self, x):
 87 |         if self.conv is not None:
 88 |             x = self.conv(x)
 89 |         b, _, w, h = x.shape
 90 |         p = x.flatten(2).unsqueeze(0).transpose(0, 3).squeeze(3)
 91 |         return self.tr(p + self.linear(p)).unsqueeze(3).transpose(0, 3).reshape(b, self.c2, w, h)
 92 | 
 93 | 
 94 | class Bottleneck(nn.Module):
 95 |     # Standard bottleneck
 96 |     def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
 97 |         super().__init__()
 98 |         c_ = int(c2 * e)  # hidden channels
 99 |         self.cv1 = Conv(c1, c_, 1, 1)
100 |         self.cv2 = Conv(c_, c2, 3, 1, g=g)
101 |         self.add = shortcut and c1 == c2
102 | 
103 |     def forward(self, x):
104 |         return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
105 | 
106 | 
107 | class BottleneckCSP(nn.Module):
108 |     # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
109 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
110 |         super().__init__()
111 |         c_ = int(c2 * e)  # hidden channels
112 |         self.cv1 = Conv(c1, c_, 1, 1)
113 |         self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
114 |         self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
115 |         self.cv4 = Conv(2 * c_, c2, 1, 1)
116 |         self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
117 |         self.act = nn.LeakyReLU(0.1, inplace=True)
118 |         self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
119 | 
120 |     def forward(self, x):
121 |         y1 = self.cv3(self.m(self.cv1(x)))
122 |         y2 = self.cv2(x)
123 |         return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
124 | 
125 | 
126 | class C3(nn.Module):
127 |     # CSP Bottleneck with 3 convolutions
128 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
129 |         super().__init__()
130 |         c_ = int(c2 * e)  # hidden channels
131 |         self.cv1 = Conv(c1, c_, 1, 1)
132 |         self.cv2 = Conv(c1, c_, 1, 1)
133 |         self.cv3 = Conv(2 * c_, c2, 1)  # act=FReLU(c2)
134 |         self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
135 |         # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
136 | 
137 |     def forward(self, x):
138 |         # print(self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)).shape)
139 |         return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
140 | 
141 | 
142 | class C3TR(C3):
143 |     # C3 module with TransformerBlock()
144 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
145 |         super().__init__(c1, c2, n, shortcut, g, e)
146 |         c_ = int(c2 * e)
147 |         self.m = TransformerBlock(c_, c_, 4, n)
148 | 
149 | 
150 | class C3SPP(C3):
151 |     # C3 module with SPP()
152 |     def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
153 |         super().__init__(c1, c2, n, shortcut, g, e)
154 |         c_ = int(c2 * e)
155 |         self.m = SPP(c_, c_, k)
156 | 
157 | 
158 | class C3Ghost(C3):
159 |     # C3 module with GhostBottleneck()
160 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
161 |         super().__init__(c1, c2, n, shortcut, g, e)
162 |         c_ = int(c2 * e)  # hidden channels
163 |         self.m = nn.Sequential(*[GhostBottleneck(c_, c_) for _ in range(n)])
164 | 
165 | 
166 | class SPP(nn.Module):
167 |     # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
168 |     def __init__(self, c1, c2, k=(5, 9, 13)):
169 |         super().__init__()
170 |         c_ = c1 // 2  # hidden channels
171 |         self.cv1 = Conv(c1, c_, 1, 1)
172 |         self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
173 |         self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
174 | 
175 |     def forward(self, x):
176 |         x = self.cv1(x)
177 |         with warnings.catch_warnings():
178 |             warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
179 |             return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
180 | 
181 | 
182 | class SPPF(nn.Module):
183 |     # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
184 |     def __init__(self, c1, c2, k=5):  # equivalent to SPP(k=(5, 9, 13))
185 |         super().__init__()
186 |         c_ = c1 // 2  # hidden channels
187 |         self.cv1 = Conv(c1, c_, 1, 1)
188 |         self.cv2 = Conv(c_ * 4, c2, 1, 1)
189 |         self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
190 | 
191 |     def forward(self, x):
192 |         x = self.cv1(x)
193 |         with warnings.catch_warnings():
194 |             warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
195 |             y1 = self.m(x)
196 |             y2 = self.m(y1)
197 |             return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
198 | 
199 | 
200 | class Focus(nn.Module):
201 |     # Focus wh information into c-space
202 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
203 |         super().__init__()
204 |         self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
205 |         # self.contract = Contract(gain=2)
206 | 
207 |     def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
208 |         return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
209 |         # return self.conv(self.contract(x))
210 | 
211 | 
212 | class GhostConv(nn.Module):
213 |     # Ghost Convolution https://github.com/huawei-noah/ghostnet
214 |     def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
215 |         super().__init__()
216 |         c_ = c2 // 2  # hidden channels
217 |         self.cv1 = Conv(c1, c_, k, s, None, g, act)
218 |         self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
219 | 
220 |     def forward(self, x):
221 |         y = self.cv1(x)
222 |         return torch.cat([y, self.cv2(y)], 1)
223 | 
224 | 
225 | class GhostBottleneck(nn.Module):
226 |     # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
227 |     def __init__(self, c1, c2, k=3, s=1):  # ch_in, ch_out, kernel, stride
228 |         super().__init__()
229 |         c_ = c2 // 2
230 |         self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pw
231 |                                   DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
232 |                                   GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
233 |         self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
234 |                                       Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
235 | 
236 |     def forward(self, x):
237 |         return self.conv(x) + self.shortcut(x)
238 | 
239 | 
240 | class Contract(nn.Module):
241 |     # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
242 |     def __init__(self, gain=2):
243 |         super().__init__()
244 |         self.gain = gain
245 | 
246 |     def forward(self, x):
247 |         b, c, h, w = x.size()  # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
248 |         s = self.gain
249 |         x = x.view(b, c, h // s, s, w // s, s)  # x(1,64,40,2,40,2)
250 |         x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # x(1,2,2,64,40,40)
251 |         return x.view(b, c * s * s, h // s, w // s)  # x(1,256,40,40)
252 | 
253 | 
254 | class Expand(nn.Module):
255 |     # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
256 |     def __init__(self, gain=2):
257 |         super().__init__()
258 |         self.gain = gain
259 | 
260 |     def forward(self, x):
261 |         b, c, h, w = x.size()  # assert C / s ** 2 == 0, 'Indivisible gain'
262 |         s = self.gain
263 |         x = x.view(b, s, s, c // s ** 2, h, w)  # x(1,2,2,16,80,80)
264 |         x = x.permute(0, 3, 4, 1, 5, 2).contiguous()  # x(1,16,80,2,80,2)
265 |         return x.view(b, c // s ** 2, h * s, w * s)  # x(1,16,160,160)
266 | 
267 | 
268 | class Concat(nn.Module):
269 |     # Concatenate a list of tensors along dimension
270 |     def __init__(self, dimension=1):
271 |         super().__init__()
272 |         self.d = dimension
273 | 
274 |     def forward(self, x):
275 |         return torch.cat(x, self.d)
276 | 
277 | 
278 | class AutoShape(nn.Module):
279 |     # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
280 |     conf = 0.25  # NMS confidence threshold
281 |     iou = 0.45  # NMS IoU threshold
282 |     classes = None  # (optional list) filter by class
283 |     max_det = 1000  # maximum number of detections per image
284 | 
285 |     def __init__(self, model):
286 |         super().__init__()
287 |         self.model = model.eval()
288 | 
289 |     def autoshape(self):
290 |         LOGGER.info('AutoShape already enabled, skipping... ')  # model already converted to model.autoshape()
291 |         return self
292 | 
293 |     @torch.no_grad()
294 |     def forward(self, imgs, size=640, augment=False, profile=False):
295 |         # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
296 |         #   file:       imgs = 'data/images/zidane.jpg'  # str or PosixPath
297 |         #   URI:             = 'https://ultralytics.com/images/zidane.jpg'
298 |         #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)
299 |         #   PIL:             = Image.open('image.jpg') or ImageGrab.grab()  # HWC x(640,1280,3)
300 |         #   numpy:           = np.zeros((640,1280,3))  # HWC
301 |         #   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)
302 |         #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images
303 | 
304 |         t = [time_sync()]
305 |         p = next(self.model.parameters())  # for device and type
306 |         if isinstance(imgs, torch.Tensor):  # torch
307 |             with amp.autocast(enabled=p.device.type != 'cpu'):
308 |                 return self.model(imgs.to(p.device).type_as(p), augment, profile)  # inference
309 | 
310 |         # Pre-process
311 |         n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs])  # number of images, list of images
312 |         shape0, shape1, files = [], [], []  # image and inference shapes, filenames
313 |         for i, im in enumerate(imgs):
314 |             f = f'image{i}'  # filename
315 |             if isinstance(im, (str, Path)):  # filename or uri
316 |                 im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
317 |                 im = np.asarray(exif_transpose(im))
318 |             elif isinstance(im, Image.Image):  # PIL Image
319 |                 im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
320 |             files.append(Path(f).with_suffix('.jpg').name)
321 |             if im.shape[0] < 5:  # image in CHW
322 |                 im = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
323 |             im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3)  # enforce 3ch input
324 |             s = im.shape[:2]  # HWC
325 |             shape0.append(s)  # image shape
326 |             g = (size / max(s))  # gain
327 |             shape1.append([y * g for y in s])
328 |             imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im)  # update
329 |         shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)]  # inference shape
330 |         x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs]  # pad
331 |         x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
332 |         x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
333 |         x = torch.from_numpy(x).to(p.device).type_as(p) / 255.  # uint8 to fp16/32
334 |         t.append(time_sync())
335 | 
336 |         with amp.autocast(enabled=p.device.type != 'cpu'):
337 |             # Inference
338 |             y = self.model(x, augment, profile)[0]  # forward
339 |             t.append(time_sync())
340 | 
341 |             # Post-process
342 |             y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes, max_det=self.max_det)  # NMS
343 |             for i in range(n):
344 |                 scale_coords(shape1, y[i][:, :4], shape0[i])
345 | 
346 |             t.append(time_sync())
347 |             return Detections(imgs, y, files, t, self.names, x.shape)
348 | 
349 | 
350 | class Detections:
351 |     # YOLOv5 detections class for inference results
352 |     def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
353 |         super().__init__()
354 |         d = pred[0].device  # device
355 |         gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs]  # normalizations
356 |         self.imgs = imgs  # list of images as numpy arrays
357 |         self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)
358 |         self.names = names  # class names
359 |         self.ascii = is_ascii(names)  # names are ascii (use PIL for UTF-8)
360 |         self.files = files  # image filenames
361 |         self.xyxy = pred  # xyxy pixels
362 |         self.xywh = [xyxy2xywh(x) for x in pred]  # xywh pixels
363 |         self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)]  # xyxy normalized
364 |         self.xywhn = [x / g for x, g in zip(self.xywh, gn)]  # xywh normalized
365 |         self.n = len(self.pred)  # number of images (batch size)
366 |         self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3))  # timestamps (ms)
367 |         self.s = shape  # inference BCHW shape
368 | 
369 |     def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')):
370 |         for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
371 |             str = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '
372 |             if pred.shape[0]:
373 |                 for c in pred[:, -1].unique():
374 |                     n = (pred[:, -1] == c).sum()  # detections per class
375 |                     str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to string
376 |                 if show or save or render or crop:
377 |                     annotator = Annotator(im, pil=not self.ascii)
378 |                     for *box, conf, cls in reversed(pred):  # xyxy, confidence, class
379 |                         label = f'{self.names[int(cls)]} {conf:.2f}'
380 |                         if crop:
381 |                             save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i])
382 |                         else:  # all others
383 |                             annotator.box_label(box, label, color=colors(cls))
384 |                     im = annotator.im
385 |             else:
386 |                 str += '(no detections)'
387 | 
388 |             im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im  # from np
389 |             if pprint:
390 |                 LOGGER.info(str.rstrip(', '))
391 |             if show:
392 |                 im.show(self.files[i])  # show
393 |             if save:
394 |                 f = self.files[i]
395 |                 im.save(save_dir / f)  # save
396 |                 if i == self.n - 1:
397 |                     LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
398 |             if render:
399 |                 self.imgs[i] = np.asarray(im)
400 | 
401 |     def print(self):
402 |         self.display(pprint=True)  # print results
403 |         LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' %
404 |                     self.t)
405 | 
406 |     def show(self):
407 |         self.display(show=True)  # show results
408 | 
409 |     def save(self, save_dir='runs/detect/exp'):
410 |         save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True)  # increment save_dir
411 |         self.display(save=True, save_dir=save_dir)  # save results
412 | 
413 |     def crop(self, save_dir='runs/detect/exp'):
414 |         save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True)  # increment save_dir
415 |         self.display(crop=True, save_dir=save_dir)  # crop results
416 |         LOGGER.info(f'Saved results to {save_dir}\n')
417 | 
418 |     def render(self):
419 |         self.display(render=True)  # render results
420 |         return self.imgs
421 | 
422 |     def pandas(self):
423 |         # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
424 |         new = copy(self)  # return copy
425 |         ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name'  # xyxy columns
426 |         cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name'  # xywh columns
427 |         for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
428 |             a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)]  # update
429 |             setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
430 |         return new
431 | 
432 |     def tolist(self):
433 |         # return a list of Detections objects, i.e. 'for result in results.tolist():'
434 |         x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
435 |         for d in x:
436 |             for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
437 |                 setattr(d, k, getattr(d, k)[0])  # pop out of list
438 |         return x
439 | 
440 |     def __len__(self):
441 |         return self.n
442 | 
443 | 
444 | class Classify(nn.Module):
445 |     # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
446 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
447 |         super().__init__()
448 |         self.aap = nn.AdaptiveAvgPool2d(1)  # to x(b,c1,1,1)
449 |         self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g)  # to x(b,c2,1,1)
450 |         self.flat = nn.Flatten()
451 | 
452 |     def forward(self, x):
453 |         z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1)  # cat if list
454 |         return self.flat(self.conv(z))  # flatten to x(b,c2)
455 | 
456 | class h_sigmoid(nn.Module):
457 |     def __init__(self, inplace=True):
458 |         super(h_sigmoid, self).__init__()
459 |         self.relu = nn.ReLU6(inplace=inplace)
460 | 
461 |     def forward(self, x):
462 |         return self.relu(x + 3) / 6
463 | 
464 | 
465 | class h_swish(nn.Module):
466 |     def __init__(self, inplace=True):
467 |         super(h_swish, self).__init__()
468 |         self.sigmoid = h_sigmoid(inplace=inplace)
469 | 
470 |     def forward(self, x):
471 |         return x * self.sigmoid(x)
472 | 
473 | 
474 | class CoordinateLayer(nn.Module):
475 |     def __init__(self, inp, oup, reduction=32):
476 |         super(CoordinateLayer, self).__init__()
477 |         "自适应平均池化转全局平均池化"
478 |         # inputsz = np.array([20, 20])
479 |         # outputsz_h = np.array([20, 1])
480 |         # outputsz_w = np.array([1, 20])
481 |         #
482 |         # stridesz_h = np.floor(inputsz / outputsz_h).astype(np.int32)
483 |         # kernelsz_h = inputsz - (outputsz_h - 1) * stridesz_h   #
484 |         # # self.pool_h = nn.AvgPool2d(kernel_size=list(kernelsz_h), stride=list(stridesz_h))
485 |         # self.pool_h = nn.AvgPool2d(kernel_size=[1, 20], stride=[1, 20])
486 |         #
487 |         # stridesz_w = np.floor(inputsz / outputsz_w).astype(np.int32)
488 |         # kernelsz_w = inputsz - (outputsz_w - 1) * stridesz_w
489 |         # # self.pool_w = nn.AvgPool2d(kernel_size=list(kernelsz_w), stride=list(stridesz_w))
490 |         # self.pool_w = nn.AvgPool2d(kernel_size=[20, 1], stride=[20, 1])
491 | 
492 | 
493 |         mip = max(8, inp // reduction)
494 | 
495 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
496 |         self.bn1 = nn.BatchNorm2d(mip)
497 |         self.act = h_swish()
498 | 
499 |         self.conv_h = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
500 |         self.conv_w = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
501 | 
502 |     def forward(self, x):
503 |         identity = x
504 | 
505 |         n, c, h, w = x.size()
506 |         # # print(n, c, h, w)
507 |         # x_h = self.pool_h(x)  # [n,c,h,w]-->[n,c,h,1]
508 |         # x_w = self.pool_w(x).permute(0, 1, 3, 2)  # [n,c,h,w]-->[n,c,1,w]-->[n,c,w,1]
509 |         # pool_h = nn.AvgPool2d(kernel_size=(1, 20), stride=(1, 20))
510 |         # pool_w = nn.AvgPool2d(kernel_size=(20, 1), stride=(20, 1))
511 |         # x_h = pool_h(x)
512 |         # x_w = pool_w(x).permute(0, 1, 3, 2)
513 | 
514 |         x_h = torch.flatten(x, start_dim=2, end_dim=2).mean(dim=2)
515 |         x_h = x_h.unsqueeze(3)
516 | 
517 |         x_w = torch.flatten(x, start_dim=3, end_dim=3).mean(dim=3)
518 |         x_w = x_w.unsqueeze(2).permute(0, 1, 3, 2)
519 | 
520 |         # if torch.is_tensor(h):
521 |         #     h = h.item()  # 这里是修正代码
522 |         #     w = w.item()  # 这里是修正代码
523 |         # pool_h = nn.AdaptiveAvgPool2d((h, 1))
524 |         # pool_w = nn.AdaptiveAvgPool2d((1, w))
525 |         #
526 |         # x_h = pool_h(x)
527 |         # x_w = pool_w(x).permute(0, 1, 3, 2)
528 | 
529 |         y = torch.cat([x_h, x_w], dim=2)  # -->[n,c,(h+w),1]
530 |         y = self.conv1(y)  # [n,c,(h+w),1]-->[n,mip,(h+w),1]
531 |         y = self.bn1(y)  # [n,mip,(h+w),1]
532 |         y = self.act(y)  # [n,mip,(h+w),1]
533 | 
534 |         if torch.is_tensor(h):
535 |             h = h.item()
536 |             w = w.item()
537 |         x_h, x_w = torch.split(y, [h, w], dim=2)  # [n,mip,(h+w),1]-->[n,mip,h,1] & [n,mip,w,1]
538 |         x_w = x_w.permute(0, 1, 3, 2)  # [n,mip,w,1]-->[n,mip,1,w]
539 | 
540 |         a_h = self.conv_h(x_h).sigmoid()  # [n,mip,h,1]-->[n,oup,h,1]-->x轴概率分布
541 |         a_w = self.conv_w(x_w).sigmoid()  # [n,mip,w,1]-->[n,oup,w,1]-->y轴概率分布
542 | 
543 |         # print(a_w.shape)
544 |         # print(a_h.shape)
545 |         out = identity * a_w * a_h
546 | 
547 |         return out
548 | 
549 | 
550 | class sa_layer(nn.Module):
551 |     """Constructs a Channel Spatial Group module.
552 |     Args:
553 |         k_size: Adaptive selection of kernel size
554 |     """
555 | 
556 |     def __init__(self, channel, groups=64):
557 |         super(sa_layer, self).__init__()
558 |         self.groups = groups
559 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
560 |         self.cweight = Parameter(torch.zeros(1, channel // (1 * groups), 1, 1))
561 |         self.cbias = Parameter(torch.ones(1, channel // (1 * groups), 1, 1))
562 |         self.sweight = Parameter(torch.zeros(1, channel // (1 * groups), 1, 1))
563 |         self.sbias = Parameter(torch.ones(1, channel // (1 * groups), 1, 1))
564 | 
565 |         self.sigmoid = nn.Sigmoid()
566 |         # self.gn = nn.GroupNorm(channel // (1 * groups), channel // (1 * groups))
567 |         self.gn = nn.GroupNorm(8, 8)
568 | 
569 |     @staticmethod
570 |     def channel_shuffle(x, groups):
571 |         b, c, h, w = x.shape
572 | 
573 |         x = x.reshape(b, groups, -1, h, w)
574 |         x = x.permute(0, 2, 1, 3, 4)
575 | 
576 |         # flatten
577 |         x = x.reshape(b, -1, h, w)
578 | 
579 |         return x
580 | 
581 |     def forward(self, x): # 1, 512, 8, 8
582 |         # print(x.shape)
583 |         b, c, h, w = x.shape  # 1, 512, 8, 8
584 | 
585 |         x = x.reshape(b * 32, -1, h, w)
586 |         # print(x)
587 |         # print(x.shape)
588 |         x_0, x_1 = x.chunk(2, dim=1)
589 | 
590 |         # channel attention
591 |         xn = self.avg_pool(x_0)
592 |         xn = self.cweight * xn + self.cbias
593 |         xn = x_0 * self.sigmoid(xn)
594 | 
595 |         # spatial attention
596 |         # print(x_1.shape)
597 |         xs = self.gn(x_1)
598 |         xs = self.sweight * xs + self.sbias
599 |         xs = x_1 * self.sigmoid(xs)
600 | 
601 |         # concatenate along channel axis
602 |         out = torch.cat([xn, xs], dim=1)
603 |         out = out.reshape(b, -1, h, w)
604 | 
605 |         out = self.channel_shuffle(out, 2)
606 |         return out
607 | 
608 | 
609 | #-------------------------------------PP_LCNet------------------------------------------------------
610 | NET_CONFIG = {
611 |     "blocks2":
612 |     # k, in_c, out_c, s, use_se
613 |     [[3, 16, 32, 1, False]],
614 |     "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
615 |     "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
616 |     "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False],
617 |                 [5, 256, 256, 1, False], [5, 256, 256, 1, False],
618 |                 [5, 256, 256, 1, False], [5, 256, 256, 1, False]],
619 |     "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]]
620 | }
621 | BLOCK_LIST = ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"]
622 | 
623 | def make_divisible_LC(v, divisor=8, min_value=None):
624 |     if min_value is None:
625 |         min_value = divisor
626 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
627 |     if new_v < 0.9 * v:
628 |         new_v += divisor
629 |     return new_v
630 | 
631 | 
632 | class HardSwish(nn.Module):
633 |     def __init__(self, inplace=True):
634 |         super(HardSwish, self).__init__()
635 |         self.relu6 = nn.ReLU6(inplace=inplace)
636 | 
637 |     def forward(self, x):
638 |         return x * self.relu6(x+3) / 6
639 | 
640 | 
641 | class HardSigmoid(nn.Module):
642 |     def __init__(self, inplace=True):
643 |         super(HardSigmoid, self).__init__()
644 |         self.relu6 = nn.ReLU6(inplace=inplace)
645 | 
646 |     def forward(self, x):
647 |         return (self.relu6(x+3)) / 6
648 | 
649 | 
650 | class SELayer(nn.Module):
651 |     def __init__(self, channel, reduction=16):
652 |         super(SELayer, self).__init__()
653 |         self.avgpool = nn.AdaptiveAvgPool2d(1)
654 |         self.fc = nn.Sequential(
655 |             nn.Linear(channel, channel // reduction, bias=False),
656 |             nn.ReLU(inplace=True),
657 |             nn.Linear(channel // reduction, channel, bias=False),
658 |             HardSigmoid()
659 |         )
660 | 
661 |     def forward(self, x):
662 |         b, c, h, w = x.size()
663 |         y = self.avgpool(x).view(b, c)
664 |         y = self.fc(y).view(b, c, 1, 1)
665 |         return x * y.expand_as(x)
666 | 
667 | 
668 | class DepthwiseSeparable(nn.Module):
669 |     def __init__(self, inp, oup, dw_size, stride, use_se=False):
670 |         super(DepthwiseSeparable, self).__init__()
671 |         self.use_se = use_se
672 |         self.stride = stride
673 |         self.inp = inp
674 |         self.oup = oup
675 |         self.dw_size = dw_size
676 |         self.dw_sp = nn.Sequential(
677 |             nn.Conv2d(self.inp, self.inp, kernel_size=self.dw_size, stride=self.stride,
678 |                       padding=autopad(self.dw_size, None), groups=self.inp, bias=False),
679 |             nn.BatchNorm2d(self.inp),
680 |             HardSwish(),
681 | 
682 |             nn.Conv2d(self.inp, self.oup, kernel_size=1, stride=1, padding=0, bias=False),
683 |             nn.BatchNorm2d(self.oup),
684 |             HardSwish(),
685 |         )
686 |         self.se = SELayer(self.oup)
687 | 
688 |     def forward(self, x):
689 |         x = self.dw_sp(x)
690 |         if self.use_se:
691 |             x = self.se(x)
692 |         return x
693 | 
694 | class PPLC_Conv(nn.Module):
695 |     def __init__(self, scale):
696 |         super(PPLC_Conv, self).__init__()
697 |         self.scale = scale
698 |         self.conv = nn.Conv2d(3, out_channels=make_divisible_LC(16 * self.scale),
699 |                                kernel_size=3, stride=2, padding=1, bias=False)
700 |     def forward(self, x):
701 |         return self.conv(x)
702 | 
703 | class PPLC_Block(nn.Module):
704 |     def __init__(self, scale, block_num):
705 |         super(PPLC_Block, self).__init__()
706 |         self.scale = scale
707 |         self.block_num = BLOCK_LIST[block_num]
708 |         self.block = nn.Sequential(*[
709 |             DepthwiseSeparable(inp=make_divisible_LC(in_c * self.scale),
710 |                                oup=make_divisible_LC(out_c * self.scale),
711 |                                dw_size=k, stride=s, use_se=use_se)
712 |             for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG[self.block_num])
713 |         ])
714 |     def forward(self, x):
715 |         return self.block(x)
716 | 
717 | 
718 | 
719 | 
720 | 
721 | if __name__ == '__main__':
722 |     input_tensor = torch.rand(1, 512, 20, 20)
723 | 
724 |     a = CoordinateLayer(inp=512, oup=4)
725 | 
726 |     output_tensor = a(input_tensor)
727 |     # print(output_tensor)
728 |     print(output_tensor.shape)


--------------------------------------------------------------------------------
/PP-LCNet-Yolov5/models/yolo.py:
--------------------------------------------------------------------------------
  1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2 | """
  3 | YOLO-specific modules
  4 | 
  5 | Usage:
  6 |     $ python path/to/models/yolo.py --cfg yolov5s.yaml
  7 | """
  8 | 
  9 | import argparse
 10 | import sys
 11 | from copy import deepcopy
 12 | from pathlib import Path
 13 | 
 14 | FILE = Path(__file__).absolute()
 15 | sys.path.append(FILE.parents[1].as_posix())  # add yolov5/ to path
 16 | 
 17 | from models.common import *
 18 | from models.experimental import *
 19 | from utils.autoanchor import check_anchor_order
 20 | from utils.general import make_divisible, check_file, set_logging
 21 | from utils.plots import feature_visualization
 22 | from utils.torch_utils import time_sync, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
 23 |     select_device, copy_attr
 24 | 
 25 | try:
 26 |     import thop  # for FLOPs computation
 27 | except ImportError:
 28 |     thop = None
 29 | 
 30 | LOGGER = logging.getLogger(__name__)
 31 | 
 32 | 
 33 | class Detect(nn.Module):
 34 |     stride = None  # strides computed during build
 35 |     onnx_dynamic = False  # ONNX export parameter
 36 | 
 37 |     def __init__(self, nc=80, anchors=(), ch=(), inplace=True):  # detection layer
 38 |         super().__init__()
 39 |         self.nc = nc  # number of classes
 40 |         self.no = nc + 5  # number of outputs per anchor
 41 |         self.nl = len(anchors)  # number of detection layers
 42 |         self.na = len(anchors[0]) // 2  # number of anchors
 43 |         self.grid = [torch.zeros(1)] * self.nl  # init grid
 44 |         a = torch.tensor(anchors).float().view(self.nl, -1, 2)
 45 |         self.register_buffer('anchors', a)  # shape(nl,na,2)
 46 |         self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
 47 |         self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
 48 |         self.inplace = inplace  # use in-place ops (e.g. slice assignment)
 49 | 
 50 |     def forward(self, x):
 51 |         z = []  # inference output
 52 |         for i in range(self.nl):
 53 |             x[i] = self.m[i](x[i])  # conv
 54 |             # bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
 55 |             # x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
 56 |             bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
 57 |             bs = -1
 58 |             ny = int(ny)
 59 |             nx = int(nx)
 60 |             x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
 61 | 
 62 |             if not self.training:  # inference
 63 |                 if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
 64 |                     self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
 65 | 
 66 |                 y = x[i].sigmoid()
 67 |                 if self.inplace:
 68 |                     y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
 69 |                     y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
 70 |                 else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
 71 |                     xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
 72 |                     wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].view(1, self.na, 1, 1, 2)  # wh
 73 |                     y = torch.cat((xy, wh, y[..., 4:]), -1)
 74 |                 # z.append(y.view(bs, -1, self.no))
 75 |                 z.append(y.view(bs, self.na * ny * nx, self.no))
 76 | 
 77 |         return x if self.training else (torch.cat(z, 1), x)
 78 | 
 79 |     @staticmethod
 80 |     def _make_grid(nx=20, ny=20):
 81 |         yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
 82 |         return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
 83 | 
 84 | 
 85 | class Model(nn.Module):
 86 |     def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None):  # model, input channels, number of classes
 87 |         super().__init__()
 88 |         if isinstance(cfg, dict):
 89 |             self.yaml = cfg  # model dict
 90 |         else:  # is *.yaml
 91 |             import yaml  # for torch hub
 92 |             self.yaml_file = Path(cfg).name
 93 |             with open(cfg) as f:
 94 |                 self.yaml = yaml.safe_load(f)  # model dict
 95 | 
 96 |         # Define model
 97 |         ch = self.yaml['ch'] = self.yaml.get('ch', ch)  # input channels
 98 |         if nc and nc != self.yaml['nc']:
 99 |             LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
100 |             self.yaml['nc'] = nc  # override yaml value
101 |         if anchors:
102 |             LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
103 |             self.yaml['anchors'] = round(anchors)  # override yaml value
104 |         self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist
105 |         self.names = [str(i) for i in range(self.yaml['nc'])]  # default names
106 |         self.inplace = self.yaml.get('inplace', True)
107 |         # LOGGER.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
108 | 
109 |         # Build strides, anchors
110 |         m = self.model[-1]  # Detect()
111 |         if isinstance(m, Detect):
112 |             s = 256  # 2x min stride
113 |             m.inplace = self.inplace
114 |             m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])  # forward
115 |             m.anchors /= m.stride.view(-1, 1, 1)
116 |             check_anchor_order(m)
117 |             self.stride = m.stride
118 |             self._initialize_biases()  # only run once
119 |             # LOGGER.info('Strides: %s' % m.stride.tolist())
120 | 
121 |         # Init weights, biases
122 |         initialize_weights(self)
123 |         self.info()
124 |         LOGGER.info('')
125 | 
126 |     def forward(self, x, augment=False, profile=False, visualize=False):
127 |         if augment:
128 |             return self.forward_augment(x)  # augmented inference, None
129 |         return self.forward_once(x, profile, visualize)  # single-scale inference, train
130 | 
131 |     def forward_augment(self, x):
132 |         img_size = x.shape[-2:]  # height, width
133 |         s = [1, 0.83, 0.67]  # scales
134 |         f = [None, 3, None]  # flips (2-ud, 3-lr)
135 |         y = []  # outputs
136 |         for si, fi in zip(s, f):
137 |             xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
138 |             yi = self.forward_once(xi)[0]  # forward
139 |             # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
140 |             yi = self._descale_pred(yi, fi, si, img_size)
141 |             y.append(yi)
142 |         return torch.cat(y, 1), None  # augmented inference, train
143 | 
144 |     def forward_once(self, x, profile=False, visualize=False):
145 |         y, dt = [], []  # outputs
146 |         for m in self.model:
147 |             if m.f != -1:  # if not from previous layer
148 |                 x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
149 | 
150 |             if profile:
151 |                 c = isinstance(m, Detect)  # copy input as inplace fix
152 |                 o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPs
153 |                 t = time_sync()
154 |                 for _ in range(10):
155 |                     m(x.copy() if c else x)
156 |                 dt.append((time_sync() - t) * 100)
157 |                 if m == self.model[0]:
158 |                     LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s}  {'module'}")
159 |                 LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f}  {m.type}')
160 | 
161 |             x = m(x)  # run
162 |             y.append(x if m.i in self.save else None)  # save output
163 | 
164 |             if visualize:
165 |                 feature_visualization(x, m.type, m.i, save_dir=visualize)
166 | 
167 |         if profile:
168 |             LOGGER.info('%.1fms total' % sum(dt))
169 |         return x
170 | 
171 |     def _descale_pred(self, p, flips, scale, img_size):
172 |         # de-scale predictions following augmented inference (inverse operation)
173 |         if self.inplace:
174 |             p[..., :4] /= scale  # de-scale
175 |             if flips == 2:
176 |                 p[..., 1] = img_size[0] - p[..., 1]  # de-flip ud
177 |             elif flips == 3:
178 |                 p[..., 0] = img_size[1] - p[..., 0]  # de-flip lr
179 |         else:
180 |             x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale  # de-scale
181 |             if flips == 2:
182 |                 y = img_size[0] - y  # de-flip ud
183 |             elif flips == 3:
184 |                 x = img_size[1] - x  # de-flip lr
185 |             p = torch.cat((x, y, wh, p[..., 4:]), -1)
186 |         return p
187 | 
188 |     def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
189 |         # https://arxiv.org/abs/1708.02002 section 3.3
190 |         # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
191 |         m = self.model[-1]  # Detect() module
192 |         for mi, s in zip(m.m, m.stride):  # from
193 |             b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
194 |             b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
195 |             b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
196 |             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
197 | 
198 |     def _print_biases(self):
199 |         m = self.model[-1]  # Detect() module
200 |         for mi in m.m:  # from
201 |             b = mi.bias.detach().view(m.na, -1).T  # conv.bias(255) to (3,85)
202 |             LOGGER.info(
203 |                 ('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
204 | 
205 |     # def _print_weights(self):
206 |     #     for m in self.model.modules():
207 |     #         if type(m) is Bottleneck:
208 |     #             LOGGER.info('%10.3g' % (m.w.detach().sigmoid() * 2))  # shortcut weights
209 | 
210 |     def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
211 |         LOGGER.info('Fusing layers... ')
212 |         for m in self.model.modules():
213 |             if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
214 |                 m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
215 |                 delattr(m, 'bn')  # remove batchnorm
216 |                 m.forward = m.forward_fuse  # update forward
217 |         self.info()
218 |         return self
219 | 
220 |     def autoshape(self):  # add AutoShape module
221 |         LOGGER.info('Adding AutoShape... ')
222 |         m = AutoShape(self)  # wrap model
223 |         copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=())  # copy attributes
224 |         return m
225 | 
226 |     def info(self, verbose=False, img_size=640):  # print model information
227 |         model_info(self, verbose, img_size)
228 | 
229 | 
230 | def parse_model(d, ch):  # model_dict, input_channels(3)
231 |     LOGGER.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
232 |     anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
233 |     na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
234 |     no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
235 | 
236 |     layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
237 |     # layers: 保存每一层的层结构
238 |     # save: 记录下所有层结构中from中不是-1的层结构序号
239 |     # c2: 保存当前层的输出channel
240 |     for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args,哪层开始，模块默认深度，模块类型，模块参数
241 |         m = eval(m) if isinstance(m, str) else m  # eval strings
242 |         for j, a in enumerate(args):
243 |             try:
244 |                 args[j] = eval(a) if isinstance(a, str) else a  # eval strings
245 |             except:
246 |                 pass
247 | 
248 |         n = n_ = max(round(n * gd), 1) if n > 1 else n  # depth gain
249 |         if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
250 |                  BottleneckCSP, C3, C3TR, C3SPP, C3Ghost]:
251 |             c1, c2 = ch[f], args[0]   # c2表示该层输出通道
252 |             if c2 != no:  # if not output
253 |                 c2 = make_divisible(c2 * gw, 8)
254 | 
255 |             # 在初始arg的基础上更新 加入当前层的输入channel并更新当前层
256 |             # [in_channel, out_channel, *args[1:]]
257 |             args = [c1, c2, *args[1:]]
258 | 
259 |             if m in [BottleneckCSP, C3, C3TR, C3Ghost]:
260 |                 args.insert(2, n)  # number of repeats
261 |                 n = 1
262 |         elif m is nn.BatchNorm2d:
263 |             args = [ch[f]]
264 |         elif m is Concat:
265 |             c2 = sum([ch[x] for x in f])
266 |         elif m is Detect:
267 |             args.append([ch[x] for x in f])
268 |             if isinstance(args[1], int):  # number of anchors
269 |                 args[1] = [list(range(args[1] * 2))] * len(f)
270 |         elif m is Contract:
271 |             c2 = ch[f] * args[0] ** 2
272 |         elif m is Expand:
273 |             c2 = ch[f] // args[0] ** 2
274 |         elif m is PPLC_Conv:
275 |             c2 = args[0]
276 |             args = args[1:]
277 |         elif m is PPLC_Block:
278 |             c2 = args[0]
279 |             args = args[1:]
280 |         else:
281 |             c2 = ch[f]
282 | 
283 |         m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
284 |         t = str(m)[8:-2].replace('__main__.', '')  # module type
285 |         np = sum([x.numel() for x in m_.parameters()])  # number params
286 |         m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
287 |         LOGGER.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n_, np, t, args))  # print
288 |         save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
289 |         layers.append(m_)
290 |         if i == 0:
291 |             ch = []
292 |         ch.append(c2)
293 |     return nn.Sequential(*layers), sorted(save)
294 | 
295 | 
296 | if __name__ == '__main__':
297 |     parser = argparse.ArgumentParser()
298 |     parser.add_argument('--cfg', type=str, default='yolov5_LCNet.yaml', help='model.yaml')
299 |     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
300 |     parser.add_argument('--profile', action='store_true', help='profile model speed')
301 |     opt = parser.parse_args()
302 |     opt.cfg = check_file(opt.cfg)  # check file
303 |     set_logging()
304 |     device = select_device(opt.device)
305 | 
306 |     # Create model
307 |     model = Model(opt.cfg).to(device)
308 |     model.train()
309 | 
310 |     # Profile
311 |     if opt.profile:
312 |         # img = torch.rand(8 if torch.cuda.is_available() else 2, 3, 640, 640).to(device)
313 |         img = torch.randn(2, 3, 640, 640)
314 |         y = model(img, profile=True)
315 |         print(y.shape)
316 | 


--------------------------------------------------------------------------------
/PP-LCNet-Yolov5/models/yolov5_LCNet_0.25.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   [[-1, 1, PPLC_Conv, [8, 0.25]],
15 |    [-1, 1, PPLC_Block, [8, 0.25, 0]],
16 |    [-1, 1, PPLC_Block, [16, 0.25, 1]],
17 |    [-1, 1, PPLC_Block, [32, 0.25, 2]],
18 |    [-1, 1, PPLC_Block, [64, 0.25, 3]],
19 |    [-1, 1, PPLC_Block, [128, 0.25, 4]],
20 |   ]
21 | 
22 | # YOLOv5 head
23 | head:
24 |   [[-1, 1, Conv, [512, 1, 1]],
25 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
26 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P4
27 |    [-1, 3, C3, [512, False]],  # 13
28 | 
29 |    [-1, 1, Conv, [256, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 3], 1, Concat, [1]],  # cat backbone P3
32 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
33 | 
34 |    [-1, 1, Conv, [256, 3, 2]],
35 |    [[-1, 10], 1, Concat, [1]],  # cat head P4
36 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
37 | 
38 |    [-1, 1, Conv, [512, 3, 2]],
39 |    [[-1, 6], 1, Concat, [1]],  # cat head P5
40 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
41 | 
42 |    [[13, 16, 19], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
43 |   ]
44 | 


--------------------------------------------------------------------------------
/PP-LCNet-Yolov5/models/yolov5_LCNet_0.35.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   [[-1, 1, PPLC_Conv, [8, 0.35]],
15 |    [-1, 1, PPLC_Block, [16, 0.35, 0]],
16 |    [-1, 1, PPLC_Block, [24, 0.35, 1]],
17 |    [-1, 1, PPLC_Block, [48, 0.35, 2]],
18 |    [-1, 1, PPLC_Block, [88, 0.35, 3]],
19 |    [-1, 1, PPLC_Block, [176, 0.35, 4]],
20 |   ]
21 | 
22 | # YOLOv5 head
23 | head:
24 |   [[-1, 1, Conv, [512, 1, 1]],
25 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
26 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P4
27 |    [-1, 3, C3, [512, False]],  # 13
28 | 
29 |    [-1, 1, Conv, [256, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 3], 1, Concat, [1]],  # cat backbone P3
32 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
33 | 
34 |    [-1, 1, Conv, [256, 3, 2]],
35 |    [[-1, 10], 1, Concat, [1]],  # cat head P4
36 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
37 | 
38 |    [-1, 1, Conv, [512, 3, 2]],
39 |    [[-1, 6], 1, Concat, [1]],  # cat head P5
40 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
41 | 
42 |    [[13, 16, 19], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
43 |   ]
44 | 


--------------------------------------------------------------------------------
/PP-LCNet-Yolov5/models/yolov5_LCNet_0.5.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   [[-1, 1, PPLC_Conv, [8, 0.5]],
15 |    [-1, 1, PPLC_Block, [16, 0.5, 0]],
16 |    [-1, 1, PPLC_Block, [32, 0.5, 1]],
17 |    [-1, 1, PPLC_Block, [64, 0.5, 2]],
18 |    [-1, 1, PPLC_Block, [128, 0.5, 3]],
19 |    [-1, 1, PPLC_Block, [256, 0.5, 4]],
20 |   ]
21 | 
22 | 
23 | # YOLOv5 head
24 | head:
25 |   [[-1, 1, Conv, [512, 1, 1]],
26 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
27 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P4
28 |    [-1, 3, C3, [512, False]],  # 13
29 | 
30 |    [-1, 1, Conv, [256, 1, 1]],
31 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 |    [[-1, 3], 1, Concat, [1]],  # cat backbone P3
33 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
34 | 
35 |    [-1, 1, Conv, [256, 3, 2]],
36 |    [[-1, 10], 1, Concat, [1]],  # cat head P4
37 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
38 | 
39 |    [-1, 1, Conv, [512, 3, 2]],
40 |    [[-1, 6], 1, Concat, [1]],  # cat head P5
41 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
42 | 
43 |    [[13, 16, 19], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
44 |   ]
45 | 


--------------------------------------------------------------------------------
/PP-LCNet-Yolov5/models/yolov5_LCNet_0.75.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   [[-1, 1, PPLC_Conv, [16, 0.75]],
15 |    [-1, 1, PPLC_Block, [24, 0.75, 0]],
16 |    [-1, 1, PPLC_Block, [48, 0.75, 1]],
17 |    [-1, 1, PPLC_Block, [96, 0.75, 2]],
18 |    [-1, 1, PPLC_Block, [192, 0.75, 3]],
19 |    [-1, 1, PPLC_Block, [384, 0.75, 4]],
20 |   ]
21 | 
22 | 
23 | # YOLOv5 head
24 | head:
25 |   [[-1, 1, Conv, [512, 1, 1]],
26 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
27 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P4
28 |    [-1, 3, C3, [512, False]],  # 13
29 | 
30 |    [-1, 1, Conv, [256, 1, 1]],
31 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 |    [[-1, 3], 1, Concat, [1]],  # cat backbone P3
33 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
34 | 
35 |    [-1, 1, Conv, [256, 3, 2]],
36 |    [[-1, 10], 1, Concat, [1]],  # cat head P4
37 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
38 | 
39 |    [-1, 1, Conv, [512, 3, 2]],
40 |    [[-1, 6], 1, Concat, [1]],  # cat head P5
41 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
42 | 
43 |    [[13, 16, 19], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
44 |   ]
45 | 


--------------------------------------------------------------------------------
/PP-LCNet-Yolov5/models/yolov5_LCNet_1.5.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   [[-1, 1, PPLC_Conv, [24, 1.5]],
15 |    [-1, 1, PPLC_Block, [48, 1.5, 0]],
16 |    [-1, 1, PPLC_Block, [96, 1.5, 1]],
17 |    [-1, 1, PPLC_Block, [192, 1.5, 2]],
18 |    [-1, 1, PPLC_Block, [384, 1.5, 3]],
19 |    [-1, 1, PPLC_Block, [768, 1.5, 4]],
20 |   ]
21 | 
22 | # YOLOv5 head
23 | head:
24 |   [[-1, 1, Conv, [512, 1, 1]],
25 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
26 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P4
27 |    [-1, 3, C3, [512, False]],  # 13
28 | 
29 |    [-1, 1, Conv, [256, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 3], 1, Concat, [1]],  # cat backbone P3
32 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
33 | 
34 |    [-1, 1, Conv, [256, 3, 2]],
35 |    [[-1, 10], 1, Concat, [1]],  # cat head P4
36 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
37 | 
38 |    [-1, 1, Conv, [512, 3, 2]],
39 |    [[-1, 6], 1, Concat, [1]],  # cat head P5
40 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
41 | 
42 |    [[13, 16, 19], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
43 |   ]
44 | 


--------------------------------------------------------------------------------
/PP-LCNet-Yolov5/models/yolov5_LCNet_1.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   [[-1, 1, PPLC_Conv, [16, 1]],
15 |    [-1, 1, PPLC_Block, [32, 1, 0]],
16 |    [-1, 1, PPLC_Block, [64, 1, 1]],
17 |    [-1, 1, PPLC_Block, [128, 1, 2]],
18 |    [-1, 1, PPLC_Block, [256, 1, 3]],
19 |    [-1, 1, PPLC_Block, [512, 1, 4]],
20 |   ]
21 | 
22 | # YOLOv5 head
23 | head:
24 |   [[-1, 1, Conv, [512, 1, 1]],
25 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
26 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P4
27 |    [-1, 3, C3, [512, False]],  # 13
28 | 
29 |    [-1, 1, Conv, [256, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 3], 1, Concat, [1]],  # cat backbone P3
32 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
33 | 
34 |    [-1, 1, Conv, [256, 3, 2]],
35 |    [[-1, 10], 1, Concat, [1]],  # cat head P4
36 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
37 | 
38 |    [-1, 1, Conv, [512, 3, 2]],
39 |    [[-1, 6], 1, Concat, [1]],  # cat head P5
40 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
41 | 
42 |    [[13, 16, 19], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
43 |   ]
44 | 


--------------------------------------------------------------------------------
/PP-LCNet-Yolov5/models/yolov5_LCNet_2.5.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   [[-1, 1, PPLC_Conv, [40, 2.5]],
15 |    [-1, 1, PPLC_Block, [80, 2.5, 0]],
16 |    [-1, 1, PPLC_Block, [160, 2.5, 1]],
17 |    [-1, 1, PPLC_Block, [320, 2.5, 2]],
18 |    [-1, 1, PPLC_Block, [640, 2.5, 3]],
19 |    [-1, 1, PPLC_Block, [1280, 2.5, 4]],
20 |   ]
21 | 
22 | # YOLOv5 head
23 | head:
24 |   [[-1, 1, Conv, [512, 1, 1]],
25 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
26 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P4
27 |    [-1, 3, C3, [512, False]],  # 13
28 | 
29 |    [-1, 1, Conv, [256, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 3], 1, Concat, [1]],  # cat backbone P3
32 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
33 | 
34 |    [-1, 1, Conv, [256, 3, 2]],
35 |    [[-1, 10], 1, Concat, [1]],  # cat head P4
36 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
37 | 
38 |    [-1, 1, Conv, [512, 3, 2]],
39 |    [[-1, 6], 1, Concat, [1]],  # cat head P5
40 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
41 | 
42 |    [[13, 16, 19], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
43 |   ]
44 | 


--------------------------------------------------------------------------------
/PP-LCNet-Yolov5/models/yolov5_LCNet_2.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   [[-1, 1, PPLC_Conv, [16, 1]],
15 |    [-1, 1, PPLC_Block, [32, 1, 0]],
16 |    [-1, 1, PPLC_Block, [64, 1, 1]],
17 |    [-1, 1, PPLC_Block, [128, 1, 2]],
18 |    [-1, 1, PPLC_Block, [256, 1, 3]],
19 |    [-1, 1, PPLC_Block, [512, 1, 4]],
20 |   ]
21 | 
22 | # YOLOv5 head
23 | head:
24 |   [[-1, 1, Conv, [512, 1, 1]],
25 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
26 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P4
27 |    [-1, 3, C3, [512, False]],  # 13
28 | 
29 |    [-1, 1, Conv, [256, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 3], 1, Concat, [1]],  # cat backbone P3
32 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
33 | 
34 |    [-1, 1, Conv, [256, 3, 2]],
35 |    [[-1, 10], 1, Concat, [1]],  # cat head P4
36 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
37 | 
38 |    [-1, 1, Conv, [512, 3, 2]],
39 |    [[-1, 6], 1, Concat, [1]],  # cat head P5
40 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
41 | 
42 |    [[13, 16, 19], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
43 |   ]
44 | 


--------------------------------------------------------------------------------
/PP-LCNet-Yolov5/test.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # YoloV5-Flexible-and-Inference
2 | 基于YoloV5的一些魔改及相关部署方案
3 | 
4 | 
5 | 
6 | 欢迎关注我们的公众号一起交流：
7 | ![AIt text](https://github.com/OutBreak-hui/YoloV5-Flexible-and-Inference/blob//main/pic.png)
8 | 


--------------------------------------------------------------------------------
/RepLKNet-Yolov5/models/yolo.py:
--------------------------------------------------------------------------------
  1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2 | """
  3 | YOLO-specific modules
  4 | 
  5 | Usage:
  6 |     $ python path/to/models/yolo.py --cfg yolov5s.yaml
  7 | """
  8 | 
  9 | import argparse
 10 | import sys
 11 | from copy import deepcopy
 12 | from pathlib import Path
 13 | 
 14 | FILE = Path(__file__).absolute()
 15 | sys.path.append(FILE.parents[1].as_posix())  # add yolov5/ to path
 16 | 
 17 | from models.common import *
 18 | from models.experimental import *
 19 | from utils.autoanchor import check_anchor_order
 20 | from utils.general import make_divisible, check_file, set_logging
 21 | from utils.plots import feature_visualization
 22 | from utils.torch_utils import time_sync, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
 23 |     select_device, copy_attr
 24 | 
 25 | try:
 26 |     import thop  # for FLOPs computation
 27 | except ImportError:
 28 |     thop = None
 29 | 
 30 | LOGGER = logging.getLogger(__name__)
 31 | 
 32 | 
 33 | class Detect(nn.Module):
 34 |     stride = None  # strides computed during build
 35 |     onnx_dynamic = False  # ONNX export parameter
 36 | 
 37 |     def __init__(self, nc=80, anchors=(), ch=(), inplace=True):  # detection layer
 38 |         super().__init__()
 39 |         self.nc = nc  # number of classes
 40 |         self.no = nc + 5  # number of outputs per anchor
 41 |         self.nl = len(anchors)  # number of detection layers
 42 |         self.na = len(anchors[0]) // 2  # number of anchors
 43 |         self.grid = [torch.zeros(1)] * self.nl  # init grid
 44 |         a = torch.tensor(anchors).float().view(self.nl, -1, 2)
 45 |         self.register_buffer('anchors', a)  # shape(nl,na,2)
 46 |         self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
 47 |         self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
 48 |         self.inplace = inplace  # use in-place ops (e.g. slice assignment)
 49 | 
 50 |     def forward(self, x):
 51 |         z = []  # inference output
 52 |         for i in range(self.nl):
 53 |             x[i] = self.m[i](x[i])  # conv
 54 |             # bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
 55 |             # x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
 56 |             bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
 57 |             bs = -1
 58 |             ny = int(ny)
 59 |             nx = int(nx)
 60 |             x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
 61 | 
 62 |             if not self.training:  # inference
 63 |                 if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
 64 |                     self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
 65 | 
 66 |                 y = x[i].sigmoid()
 67 |                 if self.inplace:
 68 |                     y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
 69 |                     y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
 70 |                 else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
 71 |                     xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
 72 |                     wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].view(1, self.na, 1, 1, 2)  # wh
 73 |                     y = torch.cat((xy, wh, y[..., 4:]), -1)
 74 |                 # z.append(y.view(bs, -1, self.no))
 75 |                 z.append(y.view(bs, self.na * ny * nx, self.no))
 76 | 
 77 |         return x if self.training else (torch.cat(z, 1), x)
 78 | 
 79 |     @staticmethod
 80 |     def _make_grid(nx=20, ny=20):
 81 |         yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
 82 |         return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
 83 | 
 84 | 
 85 | class Model(nn.Module):
 86 |     def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None):  # model, input channels, number of classes
 87 |         super().__init__()
 88 |         if isinstance(cfg, dict):
 89 |             self.yaml = cfg  # model dict
 90 |         else:  # is *.yaml
 91 |             import yaml  # for torch hub
 92 |             self.yaml_file = Path(cfg).name
 93 |             with open(cfg) as f:
 94 |                 self.yaml = yaml.safe_load(f)  # model dict
 95 | 
 96 |         # Define model
 97 |         ch = self.yaml['ch'] = self.yaml.get('ch', ch)  # input channels
 98 |         if nc and nc != self.yaml['nc']:
 99 |             LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
100 |             self.yaml['nc'] = nc  # override yaml value
101 |         if anchors:
102 |             LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
103 |             self.yaml['anchors'] = round(anchors)  # override yaml value
104 |         self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist
105 |         self.names = [str(i) for i in range(self.yaml['nc'])]  # default names
106 |         self.inplace = self.yaml.get('inplace', True)
107 |         # LOGGER.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
108 | 
109 |         # Build strides, anchors
110 |         m = self.model[-1]  # Detect()
111 |         if isinstance(m, Detect):
112 |             s = 256  # 2x min stride
113 |             m.inplace = self.inplace
114 |             m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])  # forward
115 |             m.anchors /= m.stride.view(-1, 1, 1)
116 |             check_anchor_order(m)
117 |             self.stride = m.stride
118 |             self._initialize_biases()  # only run once
119 |             # LOGGER.info('Strides: %s' % m.stride.tolist())
120 | 
121 |         # Init weights, biases
122 |         initialize_weights(self)
123 |         self.info()
124 |         LOGGER.info('')
125 | 
126 |     def forward(self, x, augment=False, profile=False, visualize=False):
127 |         if augment:
128 |             return self.forward_augment(x)  # augmented inference, None
129 |         return self.forward_once(x, profile, visualize)  # single-scale inference, train
130 | 
131 |     def forward_augment(self, x):
132 |         img_size = x.shape[-2:]  # height, width
133 |         s = [1, 0.83, 0.67]  # scales
134 |         f = [None, 3, None]  # flips (2-ud, 3-lr)
135 |         y = []  # outputs
136 |         for si, fi in zip(s, f):
137 |             xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
138 |             yi = self.forward_once(xi)[0]  # forward
139 |             # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
140 |             yi = self._descale_pred(yi, fi, si, img_size)
141 |             y.append(yi)
142 |         return torch.cat(y, 1), None  # augmented inference, train
143 | 
144 |     def forward_once(self, x, profile=False, visualize=False):
145 |         y, dt = [], []  # outputs
146 |         for m in self.model:
147 |             if m.f != -1:  # if not from previous layer
148 |                 x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
149 | 
150 |             if profile:
151 |                 c = isinstance(m, Detect)  # copy input as inplace fix
152 |                 o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPs
153 |                 t = time_sync()
154 |                 for _ in range(10):
155 |                     m(x.copy() if c else x)
156 |                 dt.append((time_sync() - t) * 100)
157 |                 if m == self.model[0]:
158 |                     LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s}  {'module'}")
159 |                 LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f}  {m.type}')
160 | 
161 |             x = m(x)  # run
162 |             y.append(x if m.i in self.save else None)  # save output
163 | 
164 |             if visualize:
165 |                 feature_visualization(x, m.type, m.i, save_dir=visualize)
166 | 
167 |         if profile:
168 |             LOGGER.info('%.1fms total' % sum(dt))
169 |         return x
170 | 
171 |     def _descale_pred(self, p, flips, scale, img_size):
172 |         # de-scale predictions following augmented inference (inverse operation)
173 |         if self.inplace:
174 |             p[..., :4] /= scale  # de-scale
175 |             if flips == 2:
176 |                 p[..., 1] = img_size[0] - p[..., 1]  # de-flip ud
177 |             elif flips == 3:
178 |                 p[..., 0] = img_size[1] - p[..., 0]  # de-flip lr
179 |         else:
180 |             x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale  # de-scale
181 |             if flips == 2:
182 |                 y = img_size[0] - y  # de-flip ud
183 |             elif flips == 3:
184 |                 x = img_size[1] - x  # de-flip lr
185 |             p = torch.cat((x, y, wh, p[..., 4:]), -1)
186 |         return p
187 | 
188 |     def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
189 |         # https://arxiv.org/abs/1708.02002 section 3.3
190 |         # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
191 |         m = self.model[-1]  # Detect() module
192 |         for mi, s in zip(m.m, m.stride):  # from
193 |             b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
194 |             b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
195 |             b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
196 |             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
197 | 
198 |     def _print_biases(self):
199 |         m = self.model[-1]  # Detect() module
200 |         for mi in m.m:  # from
201 |             b = mi.bias.detach().view(m.na, -1).T  # conv.bias(255) to (3,85)
202 |             LOGGER.info(
203 |                 ('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
204 | 
205 |     # def _print_weights(self):
206 |     #     for m in self.model.modules():
207 |     #         if type(m) is Bottleneck:
208 |     #             LOGGER.info('%10.3g' % (m.w.detach().sigmoid() * 2))  # shortcut weights
209 | 
210 |     def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
211 |         LOGGER.info('Fusing layers... ')
212 |         for m in self.model.modules():
213 |             if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
214 |                 m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
215 |                 delattr(m, 'bn')  # remove batchnorm
216 |                 m.forward = m.forward_fuse  # update forward
217 |         self.info()
218 |         return self
219 | 
220 |     def autoshape(self):  # add AutoShape module
221 |         LOGGER.info('Adding AutoShape... ')
222 |         m = AutoShape(self)  # wrap model
223 |         copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=())  # copy attributes
224 |         return m
225 | 
226 |     def info(self, verbose=False, img_size=640):  # print model information
227 |         model_info(self, verbose, img_size)
228 | 
229 | 
230 | def parse_model(d, ch):  # model_dict, input_channels(3)
231 |     LOGGER.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
232 |     anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
233 |     na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
234 |     no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
235 | 
236 |     layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
237 |     # layers: 保存每一层的层结构
238 |     # save: 记录下所有层结构中from中不是-1的层结构序号
239 |     # c2: 保存当前层的输出channel
240 |     for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args,哪层开始，模块默认深度，模块类型，模块参数
241 |         m = eval(m) if isinstance(m, str) else m  # eval strings
242 |         for j, a in enumerate(args):
243 |             try:
244 |                 args[j] = eval(a) if isinstance(a, str) else a  # eval strings
245 |             except:
246 |                 pass
247 | 
248 |         n = n_ = max(round(n * gd), 1) if n > 1 else n  # depth gain
249 |         if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
250 |                  BottleneckCSP, C3, C3TR, C3SPP, C3Ghost]:
251 |             c1, c2 = ch[f], args[0]   # c2表示该层输出通道
252 |             if c2 != no:  # if not output
253 |                 c2 = make_divisible(c2 * gw, 8)
254 | 
255 |             # 在初始arg的基础上更新 加入当前层的输入channel并更新当前层
256 |             # [in_channel, out_channel, *args[1:]]
257 |             args = [c1, c2, *args[1:]]
258 | 
259 |             if m in [BottleneckCSP, C3, C3TR, C3Ghost]:
260 |                 args.insert(2, n)  # number of repeats
261 |                 n = 1
262 |         elif m is nn.BatchNorm2d:
263 |             args = [ch[f]]
264 |         elif m is Concat:
265 |             c2 = sum([ch[x] for x in f])
266 |         elif m is Detect:
267 |             args.append([ch[x] for x in f])
268 |             if isinstance(args[1], int):  # number of anchors
269 |                 args[1] = [list(range(args[1] * 2))] * len(f)
270 |         elif m is Contract:
271 |             c2 = ch[f] * args[0] ** 2
272 |         elif m is Expand:
273 |             c2 = ch[f] // args[0] ** 2
274 |         elif m in [RepLKNet_Stem, RepLKNet_stage1, RepLKNet_stage2, RepLKNet_stage3, RepLKNet_stage4]:
275 |             c2 = args[0]
276 |             args = args[1:]
277 |         else:
278 |             c2 = ch[f]
279 | 
280 |         m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
281 |         t = str(m)[8:-2].replace('__main__.', '')  # module type
282 |         np = sum([x.numel() for x in m_.parameters()])  # number params
283 |         m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
284 |         LOGGER.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n_, np, t, args))  # print
285 |         save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
286 |         layers.append(m_)
287 |         if i == 0:
288 |             ch = []
289 |         ch.append(c2)
290 |     return nn.Sequential(*layers), sorted(save)
291 | 
292 | 
293 | if __name__ == '__main__':
294 |     parser = argparse.ArgumentParser()
295 |     parser.add_argument('--cfg', type=str, default='yolov5_RepLKNet.yaml', help='model.yaml')
296 |     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
297 |     parser.add_argument('--profile', action='store_true', help='profile model speed')
298 |     opt = parser.parse_args()
299 |     opt.cfg = check_file(opt.cfg)  # check file
300 |     set_logging()
301 |     device = select_device(opt.device)
302 | 
303 |     # Create model
304 |     model = Model(opt.cfg).to(device)
305 |     model.train()
306 | 
307 |     # Profile
308 |     if opt.profile:
309 |         # img = torch.rand(8 if torch.cuda.is_available() else 2, 3, 640, 640).to(device)
310 |         img = torch.randn(2, 3, 640, 640)
311 |         y = model(img, profile=True)
312 |         print(y.shape)
313 | 


--------------------------------------------------------------------------------
/RepLKNet-Yolov5/models/yolov5_RepLKNet.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | 
13 | # YOLOv5 backbone
14 | backbone:
15 |   [[-1, 1, RepLKNet_Stem, [128, 3, [128,256,512,1024]]],
16 |    [-1, 1, RepLKNet_stage1, [256, [128,256,512,1024], [31,29,27,13], [2,2,18,2], 0.3, 5, 1, 4]],
17 |    [-1, 1, RepLKNet_stage2, [512, [128,256,512,1024], [31,29,27,13], [2,2,18,2], 0.3, 5, 1, 4]],
18 |    [-1, 1, RepLKNet_stage3, [1024, [128,256,512,1024], [31,29,27,13], [2,2,18,2], 0.3, 5, 1, 4]],
19 |    [-1, 1, RepLKNet_stage4, [1024, [128,256,512,1024], [31,29,27,13], [2,2,18,2], 0.3, 5, 1, 4]],
20 |   ]
21 | 
22 | # YOLOv5 head
23 | head:
24 |   [[-1, 1, Conv, [1024, 1, 1]],
25 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
26 |    [[-1, 2], 1, Concat, [1]],  # cat backbone P4
27 |    [-1, 3, C3, [1024, False]],  # 13
28 | 
29 |    [-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 1], 1, Concat, [1]],  # cat backbone P3
32 |    [-1, 3, C3, [512, False]],  # 17 (P3/8-small)
33 | 
34 |    [-1, 1, Conv, [512, 3, 2]],
35 |    [[-1, 9], 1, Concat, [1]],  # cat head P4
36 |    [-1, 3, C3, [1024, False]],  # 20 (P4/16-medium)
37 | 
38 |    [-1, 1, Conv, [1024, 3, 2]],
39 |    [[-1, 5], 1, Concat, [1]],  # cat head P5
40 |    [-1, 3, C3, [2048, False]],  # 23 (P5/32-large)
41 | 
42 |    [[12, 15, 18], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
43 |   ]
44 | 


--------------------------------------------------------------------------------
/pic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OutBreak-hui/YoloV5-Flexible-and-Inference/115a140839ba71e2dfa491bdc685bc3bd6f2ae2b/pic.png


--------------------------------------------------------------------------------