├── ConvNeXt-YoloV5 ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── common.cpython-37.pyc │ │ ├── experimental.cpython-37.pyc │ │ └── yolo.cpython-37.pyc │ ├── common.py │ ├── experimental.py │ ├── hub │ │ ├── anchors.yaml │ │ ├── pplc.yaml │ │ ├── yolov3-spp.yaml │ │ ├── yolov3-tiny.yaml │ │ ├── yolov3.yaml │ │ ├── yolov5-bifpn.yaml │ │ ├── yolov5-fpn.yaml │ │ ├── yolov5-p2.yaml │ │ ├── yolov5-p6.yaml │ │ ├── yolov5-p7.yaml │ │ ├── yolov5-panet.yaml │ │ ├── yolov5_ConvNeXt.yaml │ │ ├── yolov5_ConvNeXt_Tiny.yaml │ │ ├── yolov5l6.yaml │ │ ├── yolov5m6.yaml │ │ ├── yolov5n6.yaml │ │ ├── yolov5s-ghost.yaml │ │ ├── yolov5s-transformer.yaml │ │ ├── yolov5s6.yaml │ │ └── yolov5x6.yaml │ ├── tf.py │ ├── yolo.py │ ├── yolov5_ConvNeXt_Tiny.yaml │ ├── yolov5_ConvNeXt_base.yaml │ ├── yolov5_ConvNeXt_large.yaml │ ├── yolov5_ConvNeXt_small.yaml │ ├── yolov5_ConvNeXt_xlarge_22k.yaml │ ├── yolov5l.yaml │ ├── yolov5m.yaml │ ├── yolov5n.yaml │ ├── yolov5s.yaml │ └── yolov5x.yaml └── train.py ├── PP-LCNet-Yolov5 ├── PP_LCNet │ └── LCNet.py ├── models │ ├── common.py │ ├── yolo.py │ ├── yolov5_LCNet_0.25.yaml │ ├── yolov5_LCNet_0.35.yaml │ ├── yolov5_LCNet_0.5.yaml │ ├── yolov5_LCNet_0.75.yaml │ ├── yolov5_LCNet_1.5.yaml │ ├── yolov5_LCNet_1.yaml │ ├── yolov5_LCNet_2.5.yaml │ └── yolov5_LCNet_2.yaml └── test.txt ├── README.md ├── RepLKNet-Yolov5 └── models │ ├── common.py │ ├── yolo.py │ └── yolov5_RepLKNet.yaml └── pic.png /ConvNeXt-YoloV5/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OutBreak-hui/YoloV5-Flexible-and-Inference/115a140839ba71e2dfa491bdc685bc3bd6f2ae2b/ConvNeXt-YoloV5/models/__init__.py -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OutBreak-hui/YoloV5-Flexible-and-Inference/115a140839ba71e2dfa491bdc685bc3bd6f2ae2b/ConvNeXt-YoloV5/models/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/__pycache__/common.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OutBreak-hui/YoloV5-Flexible-and-Inference/115a140839ba71e2dfa491bdc685bc3bd6f2ae2b/ConvNeXt-YoloV5/models/__pycache__/common.cpython-37.pyc -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/__pycache__/experimental.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OutBreak-hui/YoloV5-Flexible-and-Inference/115a140839ba71e2dfa491bdc685bc3bd6f2ae2b/ConvNeXt-YoloV5/models/__pycache__/experimental.cpython-37.pyc -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/__pycache__/yolo.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OutBreak-hui/YoloV5-Flexible-and-Inference/115a140839ba71e2dfa491bdc685bc3bd6f2ae2b/ConvNeXt-YoloV5/models/__pycache__/yolo.cpython-37.pyc -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/common.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Common modules 4 | """ 5 | 6 | import logging 7 | import math 8 | import warnings 9 | from copy import copy 10 | from pathlib import Path 11 | 12 | import numpy as np 13 | import pandas as pd 14 | import requests 15 | import torch 16 | import torch.nn as nn 17 | from PIL import Image 18 | from torch.cuda import amp 19 | 20 | from utils.datasets import exif_transpose, letterbox 21 | from utils.general import colorstr, increment_path, is_ascii, make_divisible, non_max_suppression, save_one_box, \ 22 | scale_coords, xyxy2xywh 23 | from utils.plots import Annotator, colors 24 | from utils.torch_utils import time_sync 25 | from torch.nn.parameter import Parameter 26 | 27 | import torch.nn.functional as F 28 | from timm.models.layers import trunc_normal_, DropPath 29 | from timm.models.registry import register_model 30 | 31 | LOGGER = logging.getLogger(__name__) 32 | 33 | 34 | def autopad(k, p=None): # kernel, padding 35 | # Pad to 'same' 36 | if p is None: 37 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 38 | return p 39 | 40 | 41 | class Conv(nn.Module): 42 | # Standard convolution 43 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 44 | super().__init__() 45 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) 46 | self.bn = nn.BatchNorm2d(c2) 47 | self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) 48 | 49 | def forward(self, x): 50 | return self.act(self.bn(self.conv(x))) 51 | 52 | def forward_fuse(self, x): 53 | return self.act(self.conv(x)) 54 | 55 | 56 | class DWConv(Conv): 57 | # Depth-wise convolution class 58 | def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 59 | super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act) 60 | 61 | 62 | class TransformerLayer(nn.Module): 63 | # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance) 64 | def __init__(self, c, num_heads): 65 | super().__init__() 66 | self.q = nn.Linear(c, c, bias=False) 67 | self.k = nn.Linear(c, c, bias=False) 68 | self.v = nn.Linear(c, c, bias=False) 69 | self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads) 70 | self.fc1 = nn.Linear(c, c, bias=False) 71 | self.fc2 = nn.Linear(c, c, bias=False) 72 | 73 | def forward(self, x): 74 | x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x 75 | x = self.fc2(self.fc1(x)) + x 76 | return x 77 | 78 | 79 | class TransformerBlock(nn.Module): 80 | # Vision Transformer https://arxiv.org/abs/2010.11929 81 | def __init__(self, c1, c2, num_heads, num_layers): 82 | super().__init__() 83 | self.conv = None 84 | if c1 != c2: 85 | self.conv = Conv(c1, c2) 86 | self.linear = nn.Linear(c2, c2) # learnable position embedding 87 | self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)]) 88 | self.c2 = c2 89 | 90 | def forward(self, x): 91 | if self.conv is not None: 92 | x = self.conv(x) 93 | b, _, w, h = x.shape 94 | p = x.flatten(2).unsqueeze(0).transpose(0, 3).squeeze(3) 95 | return self.tr(p + self.linear(p)).unsqueeze(3).transpose(0, 3).reshape(b, self.c2, w, h) 96 | 97 | 98 | class Bottleneck(nn.Module): 99 | # Standard bottleneck 100 | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion 101 | super().__init__() 102 | c_ = int(c2 * e) # hidden channels 103 | self.cv1 = Conv(c1, c_, 1, 1) 104 | self.cv2 = Conv(c_, c2, 3, 1, g=g) 105 | self.add = shortcut and c1 == c2 106 | 107 | def forward(self, x): 108 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 109 | 110 | 111 | class BottleneckCSP(nn.Module): 112 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks 113 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 114 | super().__init__() 115 | c_ = int(c2 * e) # hidden channels 116 | self.cv1 = Conv(c1, c_, 1, 1) 117 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) 118 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) 119 | self.cv4 = Conv(2 * c_, c2, 1, 1) 120 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) 121 | self.act = nn.LeakyReLU(0.1, inplace=True) 122 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 123 | 124 | def forward(self, x): 125 | y1 = self.cv3(self.m(self.cv1(x))) 126 | y2 = self.cv2(x) 127 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) 128 | 129 | # [512, 256, 1, False] 130 | class C3(nn.Module): 131 | # CSP Bottleneck with 3 convolutions 132 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 133 | super().__init__() 134 | c_ = int(c2 * e) # hidden channels 135 | self.cv1 = Conv(c1, c_, 1, 1) 136 | self.cv2 = Conv(c1, c_, 1, 1) 137 | self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2) 138 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 139 | # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)]) 140 | 141 | def forward(self, x): 142 | # print(self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)).shape) 143 | return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)) 144 | 145 | 146 | class C3TR(C3): 147 | # C3 module with TransformerBlock() 148 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): 149 | super().__init__(c1, c2, n, shortcut, g, e) 150 | c_ = int(c2 * e) 151 | self.m = TransformerBlock(c_, c_, 4, n) 152 | 153 | 154 | class C3SPP(C3): 155 | # C3 module with SPP() 156 | def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5): 157 | super().__init__(c1, c2, n, shortcut, g, e) 158 | c_ = int(c2 * e) 159 | self.m = SPP(c_, c_, k) 160 | 161 | 162 | class C3Ghost(C3): 163 | # C3 module with GhostBottleneck() 164 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): 165 | super().__init__(c1, c2, n, shortcut, g, e) 166 | c_ = int(c2 * e) # hidden channels 167 | self.m = nn.Sequential(*[GhostBottleneck(c_, c_) for _ in range(n)]) 168 | 169 | 170 | class SPP(nn.Module): 171 | # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729 172 | def __init__(self, c1, c2, k=(5, 9, 13)): 173 | super().__init__() 174 | c_ = c1 // 2 # hidden channels 175 | self.cv1 = Conv(c1, c_, 1, 1) 176 | self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) 177 | self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) 178 | 179 | def forward(self, x): 180 | x = self.cv1(x) 181 | with warnings.catch_warnings(): 182 | warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning 183 | return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) 184 | 185 | 186 | class SPPF(nn.Module): 187 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher 188 | def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13)) 189 | super().__init__() 190 | c_ = c1 // 2 # hidden channels 191 | self.cv1 = Conv(c1, c_, 1, 1) 192 | self.cv2 = Conv(c_ * 4, c2, 1, 1) 193 | self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2) 194 | 195 | def forward(self, x): 196 | x = self.cv1(x) 197 | with warnings.catch_warnings(): 198 | warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning 199 | y1 = self.m(x) 200 | y2 = self.m(y1) 201 | return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1)) 202 | 203 | 204 | class Focus(nn.Module): 205 | # Focus wh information into c-space 206 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 207 | super().__init__() 208 | self.conv = Conv(c1 * 4, c2, k, s, p, g, act) 209 | # self.contract = Contract(gain=2) 210 | 211 | def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) 212 | return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) 213 | # return self.conv(self.contract(x)) 214 | 215 | 216 | class GhostConv(nn.Module): 217 | # Ghost Convolution https://github.com/huawei-noah/ghostnet 218 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups 219 | super().__init__() 220 | c_ = c2 // 2 # hidden channels 221 | self.cv1 = Conv(c1, c_, k, s, None, g, act) 222 | self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) 223 | 224 | def forward(self, x): 225 | y = self.cv1(x) 226 | return torch.cat([y, self.cv2(y)], 1) 227 | 228 | 229 | class GhostBottleneck(nn.Module): 230 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet 231 | def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride 232 | super().__init__() 233 | c_ = c2 // 2 234 | self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw 235 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw 236 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear 237 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), 238 | Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() 239 | 240 | def forward(self, x): 241 | return self.conv(x) + self.shortcut(x) 242 | 243 | 244 | class Contract(nn.Module): 245 | # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40) 246 | def __init__(self, gain=2): 247 | super().__init__() 248 | self.gain = gain 249 | 250 | def forward(self, x): 251 | b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain' 252 | s = self.gain 253 | x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2) 254 | x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40) 255 | return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40) 256 | 257 | 258 | class Expand(nn.Module): 259 | # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160) 260 | def __init__(self, gain=2): 261 | super().__init__() 262 | self.gain = gain 263 | 264 | def forward(self, x): 265 | b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain' 266 | s = self.gain 267 | x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80) 268 | x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2) 269 | return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160) 270 | 271 | 272 | class Concat(nn.Module): 273 | # Concatenate a list of tensors along dimension 274 | def __init__(self, dimension=1): 275 | super().__init__() 276 | self.d = dimension 277 | 278 | def forward(self, x): 279 | return torch.cat(x, self.d) 280 | 281 | 282 | class AutoShape(nn.Module): 283 | # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS 284 | conf = 0.25 # NMS confidence threshold 285 | iou = 0.45 # NMS IoU threshold 286 | classes = None # (optional list) filter by class 287 | max_det = 1000 # maximum number of detections per image 288 | 289 | def __init__(self, model): 290 | super().__init__() 291 | self.model = model.eval() 292 | 293 | def autoshape(self): 294 | LOGGER.info('AutoShape already enabled, skipping... ') # model already converted to model.autoshape() 295 | return self 296 | 297 | @torch.no_grad() 298 | def forward(self, imgs, size=640, augment=False, profile=False): 299 | # Inference from various sources. For height=640, width=1280, RGB images example inputs are: 300 | # file: imgs = 'data/images/zidane.jpg' # str or PosixPath 301 | # URI: = 'https://ultralytics.com/images/zidane.jpg' 302 | # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3) 303 | # PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3) 304 | # numpy: = np.zeros((640,1280,3)) # HWC 305 | # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values) 306 | # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images 307 | 308 | t = [time_sync()] 309 | p = next(self.model.parameters()) # for device and type 310 | if isinstance(imgs, torch.Tensor): # torch 311 | with amp.autocast(enabled=p.device.type != 'cpu'): 312 | return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference 313 | 314 | # Pre-process 315 | n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images 316 | shape0, shape1, files = [], [], [] # image and inference shapes, filenames 317 | for i, im in enumerate(imgs): 318 | f = f'image{i}' # filename 319 | if isinstance(im, (str, Path)): # filename or uri 320 | im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im 321 | im = np.asarray(exif_transpose(im)) 322 | elif isinstance(im, Image.Image): # PIL Image 323 | im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f 324 | files.append(Path(f).with_suffix('.jpg').name) 325 | if im.shape[0] < 5: # image in CHW 326 | im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) 327 | im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3) # enforce 3ch input 328 | s = im.shape[:2] # HWC 329 | shape0.append(s) # image shape 330 | g = (size / max(s)) # gain 331 | shape1.append([y * g for y in s]) 332 | imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update 333 | shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape 334 | x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad 335 | x = np.stack(x, 0) if n > 1 else x[0][None] # stack 336 | x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW 337 | x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 338 | t.append(time_sync()) 339 | 340 | with amp.autocast(enabled=p.device.type != 'cpu'): 341 | # Inference 342 | y = self.model(x, augment, profile)[0] # forward 343 | t.append(time_sync()) 344 | 345 | # Post-process 346 | y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes, max_det=self.max_det) # NMS 347 | for i in range(n): 348 | scale_coords(shape1, y[i][:, :4], shape0[i]) 349 | 350 | t.append(time_sync()) 351 | return Detections(imgs, y, files, t, self.names, x.shape) 352 | 353 | 354 | class Detections: 355 | # YOLOv5 detections class for inference results 356 | def __init__(self, imgs, pred, files, times=None, names=None, shape=None): 357 | super().__init__() 358 | d = pred[0].device # device 359 | gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations 360 | self.imgs = imgs # list of images as numpy arrays 361 | self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) 362 | self.names = names # class names 363 | self.ascii = is_ascii(names) # names are ascii (use PIL for UTF-8) 364 | self.files = files # image filenames 365 | self.xyxy = pred # xyxy pixels 366 | self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels 367 | self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized 368 | self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized 369 | self.n = len(self.pred) # number of images (batch size) 370 | self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms) 371 | self.s = shape # inference BCHW shape 372 | 373 | def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')): 374 | for i, (im, pred) in enumerate(zip(self.imgs, self.pred)): 375 | str = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' 376 | if pred.shape[0]: 377 | for c in pred[:, -1].unique(): 378 | n = (pred[:, -1] == c).sum() # detections per class 379 | str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string 380 | if show or save or render or crop: 381 | annotator = Annotator(im, pil=not self.ascii) 382 | for *box, conf, cls in reversed(pred): # xyxy, confidence, class 383 | label = f'{self.names[int(cls)]} {conf:.2f}' 384 | if crop: 385 | save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i]) 386 | else: # all others 387 | annotator.box_label(box, label, color=colors(cls)) 388 | im = annotator.im 389 | else: 390 | str += '(no detections)' 391 | 392 | im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np 393 | if pprint: 394 | LOGGER.info(str.rstrip(', ')) 395 | if show: 396 | im.show(self.files[i]) # show 397 | if save: 398 | f = self.files[i] 399 | im.save(save_dir / f) # save 400 | if i == self.n - 1: 401 | LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}") 402 | if render: 403 | self.imgs[i] = np.asarray(im) 404 | 405 | def print(self): 406 | self.display(pprint=True) # print results 407 | LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % 408 | self.t) 409 | 410 | def show(self): 411 | self.display(show=True) # show results 412 | 413 | def save(self, save_dir='runs/detect/exp'): 414 | save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir 415 | self.display(save=True, save_dir=save_dir) # save results 416 | 417 | def crop(self, save_dir='runs/detect/exp'): 418 | save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir 419 | self.display(crop=True, save_dir=save_dir) # crop results 420 | LOGGER.info(f'Saved results to {save_dir}\n') 421 | 422 | def render(self): 423 | self.display(render=True) # render results 424 | return self.imgs 425 | 426 | def pandas(self): 427 | # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0]) 428 | new = copy(self) # return copy 429 | ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns 430 | cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns 431 | for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]): 432 | a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update 433 | setattr(new, k, [pd.DataFrame(x, columns=c) for x in a]) 434 | return new 435 | 436 | def tolist(self): 437 | # return a list of Detections objects, i.e. 'for result in results.tolist():' 438 | x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)] 439 | for d in x: 440 | for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']: 441 | setattr(d, k, getattr(d, k)[0]) # pop out of list 442 | return x 443 | 444 | def __len__(self): 445 | return self.n 446 | 447 | 448 | class Classify(nn.Module): 449 | # Classification head, i.e. x(b,c1,20,20) to x(b,c2) 450 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups 451 | super().__init__() 452 | self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) 453 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1) 454 | self.flat = nn.Flatten() 455 | 456 | def forward(self, x): 457 | z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list 458 | return self.flat(self.conv(z)) # flatten to x(b,c2) 459 | 460 | class h_sigmoid(nn.Module): 461 | def __init__(self, inplace=True): 462 | super(h_sigmoid, self).__init__() 463 | self.relu = nn.ReLU6(inplace=inplace) 464 | 465 | def forward(self, x): 466 | return self.relu(x + 3) / 6 467 | 468 | 469 | class h_swish(nn.Module): 470 | def __init__(self, inplace=True): 471 | super(h_swish, self).__init__() 472 | self.sigmoid = h_sigmoid(inplace=inplace) 473 | 474 | def forward(self, x): 475 | return x * self.sigmoid(x) 476 | 477 | 478 | class CoordinateLayer(nn.Module): 479 | def __init__(self, inp, oup, reduction=32): 480 | super(CoordinateLayer, self).__init__() 481 | self.pool_h = nn.AdaptiveAvgPool2d((None, 1)) 482 | self.pool_w = nn.AdaptiveAvgPool2d((1, None)) 483 | 484 | mip = max(8, inp // reduction) 485 | 486 | self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0) 487 | self.bn1 = nn.BatchNorm2d(mip) 488 | self.act = h_swish() 489 | 490 | self.conv_h = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0) 491 | self.conv_w = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0) 492 | 493 | def forward(self, x): 494 | identity = x 495 | 496 | n, c, h, w = x.size() 497 | x_h = self.pool_h(x) # [n,c,h,w]-->[n,c,h,1] 498 | x_w = self.pool_w(x).permute(0, 1, 3, 2) # [n,c,h,w]-->[n,c,1,w]-->[n,c,w,1] 499 | 500 | y = torch.cat([x_h, x_w], dim=2) # -->[n,c,(h+w),1] 501 | y = self.conv1(y) # [n,c,(h+w),1]-->[n,mip,(h+w),1] 502 | y = self.bn1(y) # [n,mip,(h+w),1] 503 | y = self.act(y) # [n,mip,(h+w),1] 504 | 505 | x_h, x_w = torch.split(y, [h, w], dim=2) # [n,mip,(h+w),1]-->[n,mip,h,1] & [n,mip,w,1] 506 | x_w = x_w.permute(0, 1, 3, 2) # [n,mip,w,1]-->[n,mip,1,w] 507 | 508 | a_h = self.conv_h(x_h).sigmoid() # [n,mip,h,1]-->[n,oup,h,1]-->x轴概率分布 509 | a_w = self.conv_w(x_w).sigmoid() # [n,mip,w,1]-->[n,oup,w,1]-->y轴概率分布 510 | 511 | out = identity * a_w * a_h 512 | 513 | return out 514 | 515 | 516 | 517 | NET_CONFIG = { 518 | "blocks2": 519 | # k, in_c, out_c, s, use_se 520 | [[3, 16, 32, 1, False]], 521 | "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]], 522 | "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]], 523 | "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False], 524 | [5, 256, 256, 1, False], [5, 256, 256, 1, False], 525 | [5, 256, 256, 1, False], [5, 256, 256, 1, False]], 526 | "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]] 527 | } 528 | BLOCK_LIST = ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"] 529 | 530 | def make_divisible_LC(v, divisor=8, min_value=None): 531 | if min_value is None: 532 | min_value = divisor 533 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 534 | if new_v < 0.9 * v: 535 | new_v += divisor 536 | return new_v 537 | 538 | 539 | class HardSwish(nn.Module): 540 | def __init__(self, inplace=True): 541 | super(HardSwish, self).__init__() 542 | self.relu6 = nn.ReLU6(inplace=inplace) 543 | 544 | def forward(self, x): 545 | return x * self.relu6(x+3) / 6 546 | 547 | 548 | class HardSigmoid(nn.Module): 549 | def __init__(self, inplace=True): 550 | super(HardSigmoid, self).__init__() 551 | self.relu6 = nn.ReLU6(inplace=inplace) 552 | 553 | def forward(self, x): 554 | return (self.relu6(x+3)) / 6 555 | 556 | 557 | class SELayer(nn.Module): 558 | def __init__(self, channel, reduction=16): 559 | super(SELayer, self).__init__() 560 | self.avgpool = nn.AdaptiveAvgPool2d(1) 561 | self.fc = nn.Sequential( 562 | nn.Linear(channel, channel // reduction, bias=False), 563 | nn.ReLU(inplace=True), 564 | nn.Linear(channel // reduction, channel, bias=False), 565 | HardSigmoid() 566 | ) 567 | 568 | def forward(self, x): 569 | b, c, h, w = x.size() 570 | y = self.avgpool(x).view(b, c) 571 | y = self.fc(y).view(b, c, 1, 1) 572 | return x * y.expand_as(x) 573 | 574 | 575 | class DepthwiseSeparable(nn.Module): 576 | def __init__(self, inp, oup, dw_size, stride, use_se=False): 577 | super(DepthwiseSeparable, self).__init__() 578 | self.use_se = use_se 579 | self.stride = stride 580 | self.inp = inp 581 | self.oup = oup 582 | self.dw_size = dw_size 583 | self.dw_sp = nn.Sequential( 584 | nn.Conv2d(self.inp, self.inp, kernel_size=self.dw_size, stride=self.stride, 585 | padding=autopad(self.dw_size, None), groups=self.inp, bias=False), 586 | nn.BatchNorm2d(self.inp), 587 | HardSwish(), 588 | 589 | nn.Conv2d(self.inp, self.oup, kernel_size=1, stride=1, padding=0, bias=False), 590 | nn.BatchNorm2d(self.oup), 591 | HardSwish(), 592 | ) 593 | self.se = SELayer(self.oup) 594 | 595 | def forward(self, x): 596 | x = self.dw_sp(x) 597 | if self.use_se: 598 | x = self.se(x) 599 | return x 600 | 601 | class PPLC_Conv(nn.Module): 602 | def __init__(self, scale): 603 | super(PPLC_Conv, self).__init__() 604 | self.scale = scale 605 | self.conv = nn.Conv2d(3, out_channels=make_divisible_LC(16 * self.scale), 606 | kernel_size=3, stride=2, padding=1, bias=False) 607 | def forward(self, x): 608 | return self.conv(x) 609 | 610 | class PPLC_Block(nn.Module): 611 | def __init__(self, scale, block_num): 612 | super(PPLC_Block, self).__init__() 613 | self.scale = scale 614 | self.block_num = BLOCK_LIST[block_num] 615 | self.block = nn.Sequential(*[ 616 | DepthwiseSeparable(inp=make_divisible_LC(in_c * self.scale), 617 | oup=make_divisible_LC(out_c * self.scale), 618 | dw_size=k, stride=s, use_se=use_se) 619 | for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG[self.block_num]) 620 | ]) 621 | def forward(self, x): 622 | return self.block(x) 623 | 624 | # todo ConvNeXt 625 | class Block(nn.Module): 626 | 627 | def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6): 628 | super().__init__() 629 | self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv 630 | self.norm = LayerNorm(dim, eps=1e-6) 631 | self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers 632 | self.act = nn.GELU() 633 | self.pwconv2 = nn.Linear(4 * dim, dim) 634 | self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)), 635 | requires_grad=True) if layer_scale_init_value > 0 else None 636 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() 637 | 638 | def forward(self, x): 639 | input = x 640 | x = self.dwconv(x) 641 | x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C) 642 | x = self.norm(x) 643 | x = self.pwconv1(x) 644 | x = self.act(x) 645 | x = self.pwconv2(x) 646 | if self.gamma is not None: 647 | x = self.gamma * x 648 | x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W) 649 | 650 | x = input + self.drop_path(x) 651 | return x 652 | 653 | 654 | class LayerNorm(nn.Module): 655 | 656 | def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"): 657 | super().__init__() 658 | self.weight = nn.Parameter(torch.ones(normalized_shape)) 659 | self.bias = nn.Parameter(torch.zeros(normalized_shape)) 660 | self.eps = eps 661 | self.data_format = data_format 662 | if self.data_format not in ["channels_last", "channels_first"]: 663 | raise NotImplementedError 664 | self.normalized_shape = (normalized_shape,) 665 | 666 | def forward(self, x): 667 | if self.data_format == "channels_last": 668 | return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps) 669 | elif self.data_format == "channels_first": 670 | u = x.mean(1, keepdim=True) 671 | s = (x - u).pow(2).mean(1, keepdim=True) 672 | x = (x - u) / torch.sqrt(s + self.eps) 673 | x = self.weight[:, None, None] * x + self.bias[:, None, None] 674 | return x 675 | 676 | class ConvNeXt_Block(nn.Module): # index 0~3 677 | def __init__(self, index, in_chans, depths, dims, drop_path_rate=0., layer_scale_init_value=1e-6): 678 | super().__init__() 679 | 680 | self.index = index 681 | self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers 682 | stem = nn.Sequential( 683 | nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4), 684 | LayerNorm(dims[0], eps=1e-6, data_format="channels_first") 685 | ) 686 | self.downsample_layers.append(stem) 687 | for i in range(3): 688 | downsample_layer = nn.Sequential( 689 | LayerNorm(dims[i], eps=1e-6, data_format="channels_first"), 690 | nn.Conv2d(dims[i], dims[i + 1], kernel_size=2, stride=2), 691 | ) 692 | self.downsample_layers.append(downsample_layer) 693 | 694 | self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks 695 | dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] 696 | cur = 0 697 | for i in range(4): 698 | stage = nn.Sequential( 699 | *[Block(dim=dims[i], drop_path=dp_rates[cur + j], 700 | layer_scale_init_value=layer_scale_init_value) for j in range(depths[i])] 701 | ) 702 | self.stages.append(stage) 703 | cur += depths[i] 704 | self.apply(self._init_weights) 705 | 706 | def _init_weights(self, m): 707 | if isinstance(m, (nn.Conv2d, nn.Linear)): 708 | trunc_normal_(m.weight, std=.02) 709 | nn.init.constant_(m.bias, 0) 710 | 711 | def forward(self, x): 712 | x = self.downsample_layers[self.index](x) 713 | x = self.stages[self.index](x) 714 | return x 715 | 716 | 717 | 718 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/experimental.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Experimental modules 4 | """ 5 | import math 6 | 7 | import numpy as np 8 | import torch 9 | import torch.nn as nn 10 | 11 | from models.common import Conv 12 | from utils.downloads import attempt_download 13 | 14 | 15 | class CrossConv(nn.Module): 16 | # Cross Convolution Downsample 17 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): 18 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut 19 | super().__init__() 20 | c_ = int(c2 * e) # hidden channels 21 | self.cv1 = Conv(c1, c_, (1, k), (1, s)) 22 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) 23 | self.add = shortcut and c1 == c2 24 | 25 | def forward(self, x): 26 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 27 | 28 | 29 | class Sum(nn.Module): 30 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 31 | def __init__(self, n, weight=False): # n: number of inputs 32 | super().__init__() 33 | self.weight = weight # apply weights boolean 34 | self.iter = range(n - 1) # iter object 35 | if weight: 36 | self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights 37 | 38 | def forward(self, x): 39 | y = x[0] # no weight 40 | if self.weight: 41 | w = torch.sigmoid(self.w) * 2 42 | for i in self.iter: 43 | y = y + x[i + 1] * w[i] 44 | else: 45 | for i in self.iter: 46 | y = y + x[i + 1] 47 | return y 48 | 49 | 50 | class MixConv2d(nn.Module): 51 | # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595 52 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy 53 | super().__init__() 54 | n = len(k) # number of convolutions 55 | if equal_ch: # equal c_ per group 56 | i = torch.linspace(0, n - 1E-6, c2).floor() # c2 indices 57 | c_ = [(i == g).sum() for g in range(n)] # intermediate channels 58 | else: # equal weight.numel() per group 59 | b = [c2] + [0] * n 60 | a = np.eye(n + 1, n, k=-1) 61 | a -= np.roll(a, 1, axis=1) 62 | a *= np.array(k) ** 2 63 | a[0] = 1 64 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 65 | 66 | self.m = nn.ModuleList( 67 | [nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)]) 68 | self.bn = nn.BatchNorm2d(c2) 69 | self.act = nn.SiLU() 70 | 71 | def forward(self, x): 72 | return self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 73 | 74 | 75 | class Ensemble(nn.ModuleList): 76 | # Ensemble of models 77 | def __init__(self): 78 | super().__init__() 79 | 80 | def forward(self, x, augment=False, profile=False, visualize=False): 81 | y = [] 82 | for module in self: 83 | y.append(module(x, augment, profile, visualize)[0]) 84 | # y = torch.stack(y).max(0)[0] # max ensemble 85 | # y = torch.stack(y).mean(0) # mean ensemble 86 | y = torch.cat(y, 1) # nms ensemble 87 | return y, None # inference, train output 88 | 89 | 90 | def attempt_load(weights, map_location=None, inplace=True, fuse=True): 91 | from models.yolo import Detect, Model 92 | 93 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 94 | model = Ensemble() 95 | for w in weights if isinstance(weights, list) else [weights]: 96 | ckpt = torch.load(attempt_download(w), map_location=map_location) # load 97 | if fuse: 98 | model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model 99 | else: 100 | model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().eval()) # without layer fuse 101 | 102 | # Compatibility updates 103 | for m in model.modules(): 104 | if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]: 105 | m.inplace = inplace # pytorch 1.7.0 compatibility 106 | if type(m) is Detect: 107 | if not isinstance(m.anchor_grid, list): # new Detect Layer compatibility 108 | delattr(m, 'anchor_grid') 109 | setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl) 110 | elif type(m) is Conv: 111 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 112 | 113 | if len(model) == 1: 114 | return model[-1] # return model 115 | else: 116 | print(f'Ensemble created with {weights}\n') 117 | for k in ['names']: 118 | setattr(model, k, getattr(model[-1], k)) 119 | model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride 120 | return model # return ensemble 121 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/anchors.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # Default anchors for COCO data 3 | 4 | 5 | # P5 ------------------------------------------------------------------------------------------------------------------- 6 | # P5-640: 7 | anchors_p5_640: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | 13 | # P6 ------------------------------------------------------------------------------------------------------------------- 14 | # P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387 15 | anchors_p6_640: 16 | - [9,11, 21,19, 17,41] # P3/8 17 | - [43,32, 39,70, 86,64] # P4/16 18 | - [65,131, 134,130, 120,265] # P5/32 19 | - [282,180, 247,354, 512,387] # P6/64 20 | 21 | # P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 22 | anchors_p6_1280: 23 | - [19,27, 44,40, 38,94] # P3/8 24 | - [96,68, 86,152, 180,137] # P4/16 25 | - [140,301, 303,264, 238,542] # P5/32 26 | - [436,615, 739,380, 925,792] # P6/64 27 | 28 | # P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187 29 | anchors_p6_1920: 30 | - [28,41, 67,59, 57,141] # P3/8 31 | - [144,103, 129,227, 270,205] # P4/16 32 | - [209,452, 455,396, 358,812] # P5/32 33 | - [653,922, 1109,570, 1387,1187] # P6/64 34 | 35 | 36 | # P7 ------------------------------------------------------------------------------------------------------------------- 37 | # P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372 38 | anchors_p7_640: 39 | - [11,11, 13,30, 29,20] # P3/8 40 | - [30,46, 61,38, 39,92] # P4/16 41 | - [78,80, 146,66, 79,163] # P5/32 42 | - [149,150, 321,143, 157,303] # P6/64 43 | - [257,402, 359,290, 524,372] # P7/128 44 | 45 | # P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818 46 | anchors_p7_1280: 47 | - [19,22, 54,36, 32,77] # P3/8 48 | - [70,83, 138,71, 75,173] # P4/16 49 | - [165,159, 148,334, 375,151] # P5/32 50 | - [334,317, 251,626, 499,474] # P6/64 51 | - [750,326, 534,814, 1079,818] # P7/128 52 | 53 | # P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227 54 | anchors_p7_1920: 55 | - [29,34, 81,55, 47,115] # P3/8 56 | - [105,124, 207,107, 113,259] # P4/16 57 | - [247,238, 222,500, 563,227] # P5/32 58 | - [501,476, 376,939, 749,711] # P6/64 59 | - [1126,489, 801,1222, 1618,1227] # P7/128 60 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/pplc.yaml: -------------------------------------------------------------------------------- 1 | nc: 1 # number of classes 2 | depth_multiple: 0.33 # model depth multiple 3 | width_multiple: 0.50 # layer channel multiple 4 | anchors: 5 | - [10,13, 16,30, 33,23] # P3/8 6 | - [30,61, 62,45, 59,119] # P4/16 7 | - [116,90, 156,198, 373,326] # P5/32 8 | 9 | # YOLOv5 backbone 10 | backbone: 11 | [[-1, 1, PPLC_Conv, [8, 0.5]], 12 | [-1, 1, PPLC_Block, [16, 0.5, 0]], 13 | [-1, 1, PPLC_Block, [32, 0.5, 1]], 14 | [-1, 1, PPLC_Block, [64, 0.5, 2]], 15 | [-1, 1, PPLC_Block, [128, 0.5, 3]], 16 | [-1, 1, PPLC_Block, [256, 0.5, 4]], 17 | ] 18 | 19 | 20 | # YOLOv5 head 21 | head: 22 | [[-1, 1, Conv, [512, 1, 1]], 23 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 24 | [[-1, 4], 1, Concat, [1]], # cat backbone P4 25 | [-1, 3, C3, [512, False]], # 13 26 | 27 | [-1, 1, Conv, [256, 1, 1]], 28 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 29 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 30 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 31 | 32 | [-1, 1, Conv, [256, 3, 2]], 33 | [[-1, 10], 1, Concat, [1]], # cat head P4 34 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 35 | 36 | [-1, 1, Conv, [512, 3, 2]], 37 | [[-1, 6], 1, Concat, [1]], # cat head P5 38 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 39 | 40 | [[13, 16, 19], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 41 | ] -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/yolov3-spp.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3-SPP head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, SPP, [512, [5, 9, 13]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/yolov3-tiny.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,14, 23,27, 37,58] # P4/16 9 | - [81,82, 135,169, 344,319] # P5/32 10 | 11 | # YOLOv3-tiny backbone 12 | backbone: 13 | # [from, number, module, args] 14 | [[-1, 1, Conv, [16, 3, 1]], # 0 15 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2 16 | [-1, 1, Conv, [32, 3, 1]], 17 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4 18 | [-1, 1, Conv, [64, 3, 1]], 19 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8 20 | [-1, 1, Conv, [128, 3, 1]], 21 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16 22 | [-1, 1, Conv, [256, 3, 1]], 23 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32 24 | [-1, 1, Conv, [512, 3, 1]], 25 | [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11 26 | [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12 27 | ] 28 | 29 | # YOLOv3-tiny head 30 | head: 31 | [[-1, 1, Conv, [1024, 3, 1]], 32 | [-1, 1, Conv, [256, 1, 1]], 33 | [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large) 34 | 35 | [-2, 1, Conv, [128, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 38 | [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium) 39 | 40 | [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5) 41 | ] 42 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/yolov3.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3 head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, Conv, [512, [1, 1]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/yolov5-bifpn.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 BiFPN head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14, 6], 1, Concat, [1]], # cat P4 <--- BiFPN change 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/yolov5-fpn.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 FPN head 28 | head: 29 | [[-1, 3, C3, [1024, False]], # 10 (P5/32-large) 30 | 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 3, C3, [512, False]], # 14 (P4/16-medium) 35 | 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 38 | [-1, 1, Conv, [256, 1, 1]], 39 | [-1, 3, C3, [256, False]], # 18 (P3/8-small) 40 | 41 | [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 42 | ] 43 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/yolov5-p2.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 3 # auto-anchor evolves 3 anchors per P output layer 8 | 9 | # YOLOv5 v6.0 backbone 10 | backbone: 11 | # [from, number, module, args] 12 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 13 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 14 | [-1, 3, C3, [128]], 15 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 16 | [-1, 6, C3, [256]], 17 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 18 | [-1, 9, C3, [512]], 19 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 20 | [-1, 3, C3, [1024]], 21 | [-1, 1, SPPF, [1024, 5]], # 9 22 | ] 23 | 24 | # YOLOv5 v6.0 head 25 | head: 26 | [[-1, 1, Conv, [512, 1, 1]], 27 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 28 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 29 | [-1, 3, C3, [512, False]], # 13 30 | 31 | [-1, 1, Conv, [256, 1, 1]], 32 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 33 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 34 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 35 | 36 | [-1, 1, Conv, [128, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 2], 1, Concat, [1]], # cat backbone P2 39 | [-1, 1, C3, [128, False]], # 21 (P2/4-xsmall) 40 | 41 | [-1, 1, Conv, [128, 3, 2]], 42 | [[-1, 18], 1, Concat, [1]], # cat head P3 43 | [-1, 3, C3, [256, False]], # 24 (P3/8-small) 44 | 45 | [-1, 1, Conv, [256, 3, 2]], 46 | [[-1, 14], 1, Concat, [1]], # cat head P4 47 | [-1, 3, C3, [512, False]], # 27 (P4/16-medium) 48 | 49 | [-1, 1, Conv, [512, 3, 2]], 50 | [[-1, 10], 1, Concat, [1]], # cat head P5 51 | [-1, 3, C3, [1024, False]], # 30 (P5/32-large) 52 | 53 | [[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P2, P3, P4, P5) 54 | ] 55 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/yolov5-p6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 3 # auto-anchor 3 anchors per P output layer 8 | 9 | # YOLOv5 v6.0 backbone 10 | backbone: 11 | # [from, number, module, args] 12 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 13 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 14 | [-1, 3, C3, [128]], 15 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 16 | [-1, 6, C3, [256]], 17 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 18 | [-1, 9, C3, [512]], 19 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 20 | [-1, 3, C3, [768]], 21 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 22 | [-1, 3, C3, [1024]], 23 | [-1, 1, SPPF, [1024, 5]], # 11 24 | ] 25 | 26 | # YOLOv5 v6.0 head 27 | head: 28 | [[-1, 1, Conv, [768, 1, 1]], 29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 30 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 31 | [-1, 3, C3, [768, False]], # 15 32 | 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 36 | [-1, 3, C3, [512, False]], # 19 37 | 38 | [-1, 1, Conv, [256, 1, 1]], 39 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 40 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 41 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 42 | 43 | [-1, 1, Conv, [256, 3, 2]], 44 | [[-1, 20], 1, Concat, [1]], # cat head P4 45 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 46 | 47 | [-1, 1, Conv, [512, 3, 2]], 48 | [[-1, 16], 1, Concat, [1]], # cat head P5 49 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 50 | 51 | [-1, 1, Conv, [768, 3, 2]], 52 | [[-1, 12], 1, Concat, [1]], # cat head P6 53 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 54 | 55 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 56 | ] 57 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/yolov5-p7.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 3 # auto-anchor 3 anchors per P output layer 8 | 9 | # YOLOv5 v6.0 backbone 10 | backbone: 11 | # [from, number, module, args] 12 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 13 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 14 | [-1, 3, C3, [128]], 15 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 16 | [-1, 6, C3, [256]], 17 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 18 | [-1, 9, C3, [512]], 19 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 20 | [-1, 3, C3, [768]], 21 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 22 | [-1, 3, C3, [1024]], 23 | [-1, 1, Conv, [1280, 3, 2]], # 11-P7/128 24 | [-1, 3, C3, [1280]], 25 | [-1, 1, SPPF, [1280, 5]], # 13 26 | ] 27 | 28 | # YOLOv5 head 29 | head: 30 | [[-1, 1, Conv, [1024, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 10], 1, Concat, [1]], # cat backbone P6 33 | [-1, 3, C3, [1024, False]], # 17 34 | 35 | [-1, 1, Conv, [768, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 38 | [-1, 3, C3, [768, False]], # 21 39 | 40 | [-1, 1, Conv, [512, 1, 1]], 41 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 42 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 43 | [-1, 3, C3, [512, False]], # 25 44 | 45 | [-1, 1, Conv, [256, 1, 1]], 46 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 47 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 48 | [-1, 3, C3, [256, False]], # 29 (P3/8-small) 49 | 50 | [-1, 1, Conv, [256, 3, 2]], 51 | [[-1, 26], 1, Concat, [1]], # cat head P4 52 | [-1, 3, C3, [512, False]], # 32 (P4/16-medium) 53 | 54 | [-1, 1, Conv, [512, 3, 2]], 55 | [[-1, 22], 1, Concat, [1]], # cat head P5 56 | [-1, 3, C3, [768, False]], # 35 (P5/32-large) 57 | 58 | [-1, 1, Conv, [768, 3, 2]], 59 | [[-1, 18], 1, Concat, [1]], # cat head P6 60 | [-1, 3, C3, [1024, False]], # 38 (P6/64-xlarge) 61 | 62 | [-1, 1, Conv, [1024, 3, 2]], 63 | [[-1, 14], 1, Concat, [1]], # cat head P7 64 | [-1, 3, C3, [1280, False]], # 41 (P7/128-xxlarge) 65 | 66 | [[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6, P7) 67 | ] 68 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/yolov5-panet.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 PANet head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/yolov5_ConvNeXt.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 1 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | # [from, number, module, args] 14 | backbone: 15 | [[-1, 1, ConvNeXt_Block, [256, 0, 3, [3, 3, 27, 3], [256, 512, 1024, 2048]]], 16 | [-1, 1, ConvNeXt_Block, [512, 1, 3, [3, 3, 27, 3], [256, 512, 1024, 2048]]], 17 | [-1, 1, ConvNeXt_Block, [1024, 2, 3, [3, 3, 27, 3], [256, 512, 1024, 2048]]], 18 | [-1, 1, ConvNeXt_Block, [2048, 3, 3, [3, 3, 27, 3], [256, 512, 1024, 2048]]], 19 | ] 20 | 21 | # YOLOv5 head 22 | # 1024, 512, 256, 128 -> 768, 384, 192, 96 23 | # size 160[0] -> 80[1] -> 40[2] -> 20[3] 24 | head: 25 | [[-1, 1, Conv, [2048, 1, 1]], 26 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 27 | [[-1, 2], 1, Concat, [1]], 28 | [-1, 3, C3, [2048, False]], 29 | 30 | [-1, 1, Conv, [1024, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 1], 1, Concat, [1]], 33 | [-1, 3, C3, [1024, False]], 34 | 35 | [-1, 1, Conv, [1024, 3, 2]], 36 | [[-1, 8], 1, Concat, [1]], 37 | [-1, 3, C3, [2048, False]], 38 | 39 | [-1, 1, Conv, [2048, 3, 2]], 40 | [[-1, 4], 1, Concat, [1]], # cat head P5 41 | [-1, 3, C3, [2048, False]], # 23 (P5/32-large) 42 | 43 | [[11, 14, 17], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 44 | ] 45 | 46 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/yolov5_ConvNeXt_Tiny.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 1 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | # [from, number, module, args] 14 | backbone: 15 | [[-1, 1, ConvNeXt_Block, [96, 0, 3, [3, 3, 9, 3], [96, 192, 384, 768]]], 16 | [-1, 1, ConvNeXt_Block, [192, 1, 3, [3, 3, 9, 3], [96, 192, 384, 768]]], 17 | [-1, 1, ConvNeXt_Block, [384, 2, 3, [3, 3, 9, 3], [96, 192, 384, 768]]], 18 | [-1, 1, ConvNeXt_Block, [768, 3, 3, [3, 3, 9, 3], [96, 192, 384, 768]]], 19 | ] 20 | 21 | # YOLOv5 head 22 | # 1024, 512, 256, 128 -> 768, 384, 192, 96 23 | # size 160[0] -> 80[1] -> 40[2] -> 20[3] 24 | head: 25 | [[-1, 1, Conv, [768, 1, 1]], 26 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 27 | [[-1, 2], 1, Concat, [1]], 28 | [-1, 3, C3, [768, False]], 29 | 30 | [-1, 1, Conv, [384, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 1], 1, Concat, [1]], 33 | [-1, 3, C3, [384, False]], 34 | 35 | [-1, 1, Conv, [384, 3, 2]], 36 | [[-1, 8], 1, Concat, [1]], 37 | [-1, 3, C3, [768, False]], 38 | 39 | [-1, 1, Conv, [768, 3, 2]], 40 | [[-1, 4], 1, Concat, [1]], # cat head P5 41 | [-1, 3, C3, [768, False]], # 23 (P5/32-large) 42 | 43 | [[11, 14, 17], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 44 | ] 45 | 46 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/yolov5l6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [19,27, 44,40, 38,94] # P3/8 9 | - [96,68, 86,152, 180,137] # P4/16 10 | - [140,301, 303,264, 238,542] # P5/32 11 | - [436,615, 739,380, 925,792] # P6/64 12 | 13 | # YOLOv5 v6.0 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 6, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 26 | [-1, 3, C3, [1024]], 27 | [-1, 1, SPPF, [1024, 5]], # 11 28 | ] 29 | 30 | # YOLOv5 v6.0 head 31 | head: 32 | [[-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 15 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 19 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 20], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 16], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 12], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 58 | 59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] 61 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/yolov5m6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.67 # model depth multiple 6 | width_multiple: 0.75 # layer channel multiple 7 | anchors: 8 | - [19,27, 44,40, 38,94] # P3/8 9 | - [96,68, 86,152, 180,137] # P4/16 10 | - [140,301, 303,264, 238,542] # P5/32 11 | - [436,615, 739,380, 925,792] # P6/64 12 | 13 | # YOLOv5 v6.0 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 6, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 26 | [-1, 3, C3, [1024]], 27 | [-1, 1, SPPF, [1024, 5]], # 11 28 | ] 29 | 30 | # YOLOv5 v6.0 head 31 | head: 32 | [[-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 15 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 19 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 20], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 16], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 12], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 58 | 59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] 61 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/yolov5n6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.25 # layer channel multiple 7 | anchors: 8 | - [19,27, 44,40, 38,94] # P3/8 9 | - [96,68, 86,152, 180,137] # P4/16 10 | - [140,301, 303,264, 238,542] # P5/32 11 | - [436,615, 739,380, 925,792] # P6/64 12 | 13 | # YOLOv5 v6.0 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 6, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 26 | [-1, 3, C3, [1024]], 27 | [-1, 1, SPPF, [1024, 5]], # 11 28 | ] 29 | 30 | # YOLOv5 v6.0 head 31 | head: 32 | [[-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 15 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 19 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 20], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 16], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 12], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 58 | 59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] 61 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/yolov5s-ghost.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3Ghost, [128]], 18 | [-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3Ghost, [256]], 20 | [-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3Ghost, [512]], 22 | [-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3Ghost, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, GhostConv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3Ghost, [512, False]], # 13 33 | 34 | [-1, 1, GhostConv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, GhostConv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, GhostConv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/yolov5s-transformer.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3TR, [1024]], # 9 <--- C3TR() Transformer module 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/yolov5s6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [19,27, 44,40, 38,94] # P3/8 9 | - [96,68, 86,152, 180,137] # P4/16 10 | - [140,301, 303,264, 238,542] # P5/32 11 | - [436,615, 739,380, 925,792] # P6/64 12 | 13 | # YOLOv5 v6.0 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 6, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 26 | [-1, 3, C3, [1024]], 27 | [-1, 1, SPPF, [1024, 5]], # 11 28 | ] 29 | 30 | # YOLOv5 v6.0 head 31 | head: 32 | [[-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 15 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 19 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 20], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 16], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 12], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 58 | 59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] 61 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/hub/yolov5x6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 1 # number of classes 5 | depth_multiple: 1.33 # model depth multiple 6 | width_multiple: 1.25 # layer channel multiple 7 | anchors: 8 | - [19,27, 44,40, 38,94] # P3/8 9 | - [96,68, 86,152, 180,137] # P4/16 10 | - [140,301, 303,264, 238,542] # P5/32 11 | - [436,615, 739,380, 925,792] # P6/64 12 | 13 | # YOLOv5 v6.0 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 6, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 26 | [-1, 3, C3, [1024]], 27 | [-1, 1, SPPF, [1024, 5]], # 11 28 | ] 29 | 30 | # YOLOv5 v6.0 head 31 | head: 32 | [[-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 15 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 19 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 20], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 16], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 12], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 58 | 59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] 61 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/tf.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | TensorFlow, Keras and TFLite versions of YOLOv5 4 | Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127 5 | 6 | Usage: 7 | $ python models/tf.py --weights yolov5s.pt 8 | 9 | Export: 10 | $ python path/to/export.py --weights yolov5s.pt --include saved_model pb tflite tfjs 11 | """ 12 | 13 | import argparse 14 | import sys 15 | from copy import deepcopy 16 | from pathlib import Path 17 | 18 | FILE = Path(__file__).resolve() 19 | ROOT = FILE.parents[1] # YOLOv5 root directory 20 | if str(ROOT) not in sys.path: 21 | sys.path.append(str(ROOT)) # add ROOT to PATH 22 | # ROOT = ROOT.relative_to(Path.cwd()) # relative 23 | 24 | import numpy as np 25 | import tensorflow as tf 26 | import torch 27 | import torch.nn as nn 28 | from tensorflow import keras 29 | 30 | from models.common import C3, SPP, SPPF, Bottleneck, BottleneckCSP, Concat, Conv, DWConv, Focus, autopad 31 | from models.experimental import CrossConv, MixConv2d, attempt_load 32 | from models.yolo import Detect 33 | from utils.activations import SiLU 34 | from utils.general import LOGGER, make_divisible, print_args 35 | 36 | 37 | class TFBN(keras.layers.Layer): 38 | # TensorFlow BatchNormalization wrapper 39 | def __init__(self, w=None): 40 | super().__init__() 41 | self.bn = keras.layers.BatchNormalization( 42 | beta_initializer=keras.initializers.Constant(w.bias.numpy()), 43 | gamma_initializer=keras.initializers.Constant(w.weight.numpy()), 44 | moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()), 45 | moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()), 46 | epsilon=w.eps) 47 | 48 | def call(self, inputs): 49 | return self.bn(inputs) 50 | 51 | 52 | class TFPad(keras.layers.Layer): 53 | def __init__(self, pad): 54 | super().__init__() 55 | self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]]) 56 | 57 | def call(self, inputs): 58 | return tf.pad(inputs, self.pad, mode='constant', constant_values=0) 59 | 60 | 61 | class TFConv(keras.layers.Layer): 62 | # Standard convolution 63 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None): 64 | # ch_in, ch_out, weights, kernel, stride, padding, groups 65 | super().__init__() 66 | assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument" 67 | assert isinstance(k, int), "Convolution with multiple kernels are not allowed." 68 | # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding) 69 | # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch 70 | 71 | conv = keras.layers.Conv2D( 72 | c2, k, s, 'SAME' if s == 1 else 'VALID', use_bias=False if hasattr(w, 'bn') else True, 73 | kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()), 74 | bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy())) 75 | self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv]) 76 | self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity 77 | 78 | # YOLOv5 activations 79 | if isinstance(w.act, nn.LeakyReLU): 80 | self.act = (lambda x: keras.activations.relu(x, alpha=0.1)) if act else tf.identity 81 | elif isinstance(w.act, nn.Hardswish): 82 | self.act = (lambda x: x * tf.nn.relu6(x + 3) * 0.166666667) if act else tf.identity 83 | elif isinstance(w.act, (nn.SiLU, SiLU)): 84 | self.act = (lambda x: keras.activations.swish(x)) if act else tf.identity 85 | else: 86 | raise Exception(f'no matching TensorFlow activation found for {w.act}') 87 | 88 | def call(self, inputs): 89 | return self.act(self.bn(self.conv(inputs))) 90 | 91 | 92 | class TFFocus(keras.layers.Layer): 93 | # Focus wh information into c-space 94 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None): 95 | # ch_in, ch_out, kernel, stride, padding, groups 96 | super().__init__() 97 | self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv) 98 | 99 | def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c) 100 | # inputs = inputs / 255 # normalize 0-255 to 0-1 101 | return self.conv(tf.concat([inputs[:, ::2, ::2, :], 102 | inputs[:, 1::2, ::2, :], 103 | inputs[:, ::2, 1::2, :], 104 | inputs[:, 1::2, 1::2, :]], 3)) 105 | 106 | 107 | class TFBottleneck(keras.layers.Layer): 108 | # Standard bottleneck 109 | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion 110 | super().__init__() 111 | c_ = int(c2 * e) # hidden channels 112 | self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) 113 | self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2) 114 | self.add = shortcut and c1 == c2 115 | 116 | def call(self, inputs): 117 | return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs)) 118 | 119 | 120 | class TFConv2d(keras.layers.Layer): 121 | # Substitution for PyTorch nn.Conv2D 122 | def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None): 123 | super().__init__() 124 | assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument" 125 | self.conv = keras.layers.Conv2D( 126 | c2, k, s, 'VALID', use_bias=bias, 127 | kernel_initializer=keras.initializers.Constant(w.weight.permute(2, 3, 1, 0).numpy()), 128 | bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None, ) 129 | 130 | def call(self, inputs): 131 | return self.conv(inputs) 132 | 133 | 134 | class TFBottleneckCSP(keras.layers.Layer): 135 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks 136 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None): 137 | # ch_in, ch_out, number, shortcut, groups, expansion 138 | super().__init__() 139 | c_ = int(c2 * e) # hidden channels 140 | self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) 141 | self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2) 142 | self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3) 143 | self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4) 144 | self.bn = TFBN(w.bn) 145 | self.act = lambda x: keras.activations.relu(x, alpha=0.1) 146 | self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)]) 147 | 148 | def call(self, inputs): 149 | y1 = self.cv3(self.m(self.cv1(inputs))) 150 | y2 = self.cv2(inputs) 151 | return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3)))) 152 | 153 | 154 | class TFC3(keras.layers.Layer): 155 | # CSP Bottleneck with 3 convolutions 156 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None): 157 | # ch_in, ch_out, number, shortcut, groups, expansion 158 | super().__init__() 159 | c_ = int(c2 * e) # hidden channels 160 | self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) 161 | self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2) 162 | self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3) 163 | self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)]) 164 | 165 | def call(self, inputs): 166 | return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3)) 167 | 168 | 169 | class TFSPP(keras.layers.Layer): 170 | # Spatial pyramid pooling layer used in YOLOv3-SPP 171 | def __init__(self, c1, c2, k=(5, 9, 13), w=None): 172 | super().__init__() 173 | c_ = c1 // 2 # hidden channels 174 | self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) 175 | self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2) 176 | self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in k] 177 | 178 | def call(self, inputs): 179 | x = self.cv1(inputs) 180 | return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3)) 181 | 182 | 183 | class TFSPPF(keras.layers.Layer): 184 | # Spatial pyramid pooling-Fast layer 185 | def __init__(self, c1, c2, k=5, w=None): 186 | super().__init__() 187 | c_ = c1 // 2 # hidden channels 188 | self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) 189 | self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2) 190 | self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME') 191 | 192 | def call(self, inputs): 193 | x = self.cv1(inputs) 194 | y1 = self.m(x) 195 | y2 = self.m(y1) 196 | return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3)) 197 | 198 | 199 | class TFDetect(keras.layers.Layer): 200 | def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer 201 | super().__init__() 202 | self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32) 203 | self.nc = nc # number of classes 204 | self.no = nc + 5 # number of outputs per anchor 205 | self.nl = len(anchors) # number of detection layers 206 | self.na = len(anchors[0]) // 2 # number of anchors 207 | self.grid = [tf.zeros(1)] * self.nl # init grid 208 | self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32) 209 | self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]), 210 | [self.nl, 1, -1, 1, 2]) 211 | self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)] 212 | self.training = False # set to False after building model 213 | self.imgsz = imgsz 214 | for i in range(self.nl): 215 | ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i] 216 | self.grid[i] = self._make_grid(nx, ny) 217 | 218 | def call(self, inputs): 219 | z = [] # inference output 220 | x = [] 221 | for i in range(self.nl): 222 | x.append(self.m[i](inputs[i])) 223 | # x(bs,20,20,255) to x(bs,3,20,20,85) 224 | ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i] 225 | x[i] = tf.transpose(tf.reshape(x[i], [-1, ny * nx, self.na, self.no]), [0, 2, 1, 3]) 226 | 227 | if not self.training: # inference 228 | y = tf.sigmoid(x[i]) 229 | xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy 230 | wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] 231 | # Normalize xywh to 0-1 to reduce calibration error 232 | xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32) 233 | wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32) 234 | y = tf.concat([xy, wh, y[..., 4:]], -1) 235 | z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no])) 236 | 237 | return x if self.training else (tf.concat(z, 1), x) 238 | 239 | @staticmethod 240 | def _make_grid(nx=20, ny=20): 241 | # yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 242 | # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 243 | xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny)) 244 | return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32) 245 | 246 | 247 | class TFUpsample(keras.layers.Layer): 248 | def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w' 249 | super().__init__() 250 | assert scale_factor == 2, "scale_factor must be 2" 251 | self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * 2, x.shape[2] * 2), method=mode) 252 | # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode) 253 | # with default arguments: align_corners=False, half_pixel_centers=False 254 | # self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x, 255 | # size=(x.shape[1] * 2, x.shape[2] * 2)) 256 | 257 | def call(self, inputs): 258 | return self.upsample(inputs) 259 | 260 | 261 | class TFConcat(keras.layers.Layer): 262 | def __init__(self, dimension=1, w=None): 263 | super().__init__() 264 | assert dimension == 1, "convert only NCHW to NHWC concat" 265 | self.d = 3 266 | 267 | def call(self, inputs): 268 | return tf.concat(inputs, self.d) 269 | 270 | 271 | def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3) 272 | LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}") 273 | anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] 274 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors 275 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5) 276 | 277 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out 278 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args 279 | m_str = m 280 | m = eval(m) if isinstance(m, str) else m # eval strings 281 | for j, a in enumerate(args): 282 | try: 283 | args[j] = eval(a) if isinstance(a, str) else a # eval strings 284 | except NameError: 285 | pass 286 | 287 | n = max(round(n * gd), 1) if n > 1 else n # depth gain 288 | if m in [nn.Conv2d, Conv, Bottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]: 289 | c1, c2 = ch[f], args[0] 290 | c2 = make_divisible(c2 * gw, 8) if c2 != no else c2 291 | 292 | args = [c1, c2, *args[1:]] 293 | if m in [BottleneckCSP, C3]: 294 | args.insert(2, n) 295 | n = 1 296 | elif m is nn.BatchNorm2d: 297 | args = [ch[f]] 298 | elif m is Concat: 299 | c2 = sum(ch[-1 if x == -1 else x + 1] for x in f) 300 | elif m is Detect: 301 | args.append([ch[x + 1] for x in f]) 302 | if isinstance(args[1], int): # number of anchors 303 | args[1] = [list(range(args[1] * 2))] * len(f) 304 | args.append(imgsz) 305 | else: 306 | c2 = ch[f] 307 | 308 | tf_m = eval('TF' + m_str.replace('nn.', '')) 309 | m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \ 310 | else tf_m(*args, w=model.model[i]) # module 311 | 312 | torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module 313 | t = str(m)[8:-2].replace('__main__.', '') # module type 314 | np = sum(x.numel() for x in torch_m_.parameters()) # number params 315 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params 316 | LOGGER.info(f'{i:>3}{str(f):>18}{str(n):>3}{np:>10} {t:<40}{str(args):<30}') # print 317 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist 318 | layers.append(m_) 319 | ch.append(c2) 320 | return keras.Sequential(layers), sorted(save) 321 | 322 | 323 | class TFModel: 324 | def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes 325 | super().__init__() 326 | if isinstance(cfg, dict): 327 | self.yaml = cfg # model dict 328 | else: # is *.yaml 329 | import yaml # for torch hub 330 | self.yaml_file = Path(cfg).name 331 | with open(cfg) as f: 332 | self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict 333 | 334 | # Define model 335 | if nc and nc != self.yaml['nc']: 336 | LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}") 337 | self.yaml['nc'] = nc # override yaml value 338 | self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz) 339 | 340 | def predict(self, inputs, tf_nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45, 341 | conf_thres=0.25): 342 | y = [] # outputs 343 | x = inputs 344 | for i, m in enumerate(self.model.layers): 345 | if m.f != -1: # if not from previous layer 346 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers 347 | 348 | x = m(x) # run 349 | y.append(x if m.i in self.savelist else None) # save output 350 | 351 | # Add TensorFlow NMS 352 | if tf_nms: 353 | boxes = self._xywh2xyxy(x[0][..., :4]) 354 | probs = x[0][:, :, 4:5] 355 | classes = x[0][:, :, 5:] 356 | scores = probs * classes 357 | if agnostic_nms: 358 | nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres) 359 | return nms, x[1] 360 | else: 361 | boxes = tf.expand_dims(boxes, 2) 362 | nms = tf.image.combined_non_max_suppression( 363 | boxes, scores, topk_per_class, topk_all, iou_thres, conf_thres, clip_boxes=False) 364 | return nms, x[1] 365 | 366 | return x[0] # output only first tensor [1,6300,85] = [xywh, conf, class0, class1, ...] 367 | # x = x[0][0] # [x(1,6300,85), ...] to x(6300,85) 368 | # xywh = x[..., :4] # x(6300,4) boxes 369 | # conf = x[..., 4:5] # x(6300,1) confidences 370 | # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes 371 | # return tf.concat([conf, cls, xywh], 1) 372 | 373 | @staticmethod 374 | def _xywh2xyxy(xywh): 375 | # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right 376 | x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1) 377 | return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1) 378 | 379 | 380 | class AgnosticNMS(keras.layers.Layer): 381 | # TF Agnostic NMS 382 | def call(self, input, topk_all, iou_thres, conf_thres): 383 | # wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450 384 | return tf.map_fn(lambda x: self._nms(x, topk_all, iou_thres, conf_thres), input, 385 | fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32), 386 | name='agnostic_nms') 387 | 388 | @staticmethod 389 | def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25): # agnostic NMS 390 | boxes, classes, scores = x 391 | class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32) 392 | scores_inp = tf.reduce_max(scores, -1) 393 | selected_inds = tf.image.non_max_suppression( 394 | boxes, scores_inp, max_output_size=topk_all, iou_threshold=iou_thres, score_threshold=conf_thres) 395 | selected_boxes = tf.gather(boxes, selected_inds) 396 | padded_boxes = tf.pad(selected_boxes, 397 | paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]], 398 | mode="CONSTANT", constant_values=0.0) 399 | selected_scores = tf.gather(scores_inp, selected_inds) 400 | padded_scores = tf.pad(selected_scores, 401 | paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]], 402 | mode="CONSTANT", constant_values=-1.0) 403 | selected_classes = tf.gather(class_inds, selected_inds) 404 | padded_classes = tf.pad(selected_classes, 405 | paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]], 406 | mode="CONSTANT", constant_values=-1.0) 407 | valid_detections = tf.shape(selected_inds)[0] 408 | return padded_boxes, padded_scores, padded_classes, valid_detections 409 | 410 | 411 | def representative_dataset_gen(dataset, ncalib=100): 412 | # Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays 413 | for n, (path, img, im0s, vid_cap, string) in enumerate(dataset): 414 | input = np.transpose(img, [1, 2, 0]) 415 | input = np.expand_dims(input, axis=0).astype(np.float32) 416 | input /= 255 417 | yield [input] 418 | if n >= ncalib: 419 | break 420 | 421 | 422 | def run(weights=ROOT / 'yolov5s.pt', # weights path 423 | imgsz=(640, 640), # inference size h,w 424 | batch_size=1, # batch size 425 | dynamic=False, # dynamic batch size 426 | ): 427 | # PyTorch model 428 | im = torch.zeros((batch_size, 3, *imgsz)) # BCHW image 429 | model = attempt_load(weights, map_location=torch.device('cpu'), inplace=True, fuse=False) 430 | y = model(im) # inference 431 | model.info() 432 | 433 | # TensorFlow model 434 | im = tf.zeros((batch_size, *imgsz, 3)) # BHWC image 435 | tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz) 436 | y = tf_model.predict(im) # inference 437 | 438 | # Keras model 439 | im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size) 440 | keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im)) 441 | keras_model.summary() 442 | 443 | LOGGER.info('PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.') 444 | 445 | 446 | def parse_opt(): 447 | parser = argparse.ArgumentParser() 448 | parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path') 449 | parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') 450 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 451 | parser.add_argument('--dynamic', action='store_true', help='dynamic batch size') 452 | opt = parser.parse_args() 453 | opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand 454 | print_args(FILE.stem, opt) 455 | return opt 456 | 457 | 458 | def main(opt): 459 | run(**vars(opt)) 460 | 461 | 462 | if __name__ == "__main__": 463 | opt = parse_opt() 464 | main(opt) 465 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/yolo.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | YOLO-specific modules 4 | 5 | Usage: 6 | $ python path/to/models/yolo.py --cfg yolov5s.yaml 7 | """ 8 | 9 | import argparse 10 | import sys 11 | from copy import deepcopy 12 | from pathlib import Path 13 | 14 | import torch 15 | from torchviz import make_dot 16 | FILE = Path(__file__).absolute() 17 | sys.path.append(FILE.parents[1].as_posix()) # add yolov5/ to path 18 | 19 | from models.common import * 20 | from models.experimental import * 21 | from utils.autoanchor import check_anchor_order 22 | from utils.general import make_divisible, check_file, set_logging 23 | from utils.plots import feature_visualization 24 | from utils.torch_utils import time_sync, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \ 25 | select_device, copy_attr 26 | 27 | try: 28 | import thop # for FLOPs computation 29 | except ImportError: 30 | thop = None 31 | 32 | LOGGER = logging.getLogger(__name__) 33 | 34 | 35 | class Detect(nn.Module): 36 | stride = None # strides computed during build 37 | onnx_dynamic = False # ONNX export parameter 38 | 39 | def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer 40 | super().__init__() 41 | self.nc = nc # number of classes 42 | self.no = nc + 5 # number of outputs per anchor 43 | self.nl = len(anchors) # number of detection layers 44 | self.na = len(anchors[0]) // 2 # number of anchors 45 | self.grid = [torch.zeros(1)] * self.nl # init grid 46 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 47 | self.register_buffer('anchors', a) # shape(nl,na,2) 48 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 49 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 50 | self.inplace = inplace # use in-place ops (e.g. slice assignment) 51 | 52 | def forward(self, x): 53 | z = [] # inference output 54 | for i in range(self.nl): 55 | x[i] = self.m[i](x[i]) # conv 56 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 57 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 58 | 59 | if not self.training: # inference 60 | if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic: 61 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 62 | 63 | y = x[i].sigmoid() 64 | if self.inplace: 65 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy 66 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 67 | else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 68 | xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy 69 | wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].view(1, self.na, 1, 1, 2) # wh 70 | y = torch.cat((xy, wh, y[..., 4:]), -1) 71 | z.append(y.view(bs, -1, self.no)) 72 | 73 | return x if self.training else (torch.cat(z, 1), x) 74 | 75 | @staticmethod 76 | def _make_grid(nx=20, ny=20): 77 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 78 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 79 | 80 | 81 | class Model(nn.Module): 82 | def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes 83 | super().__init__() 84 | if isinstance(cfg, dict): 85 | self.yaml = cfg # model dict 86 | else: # is *.yaml 87 | import yaml # for torch hub 88 | self.yaml_file = Path(cfg).name 89 | with open(cfg) as f: 90 | self.yaml = yaml.safe_load(f) # model dict 91 | 92 | # Define model 93 | ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels 94 | if nc and nc != self.yaml['nc']: 95 | LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") 96 | self.yaml['nc'] = nc # override yaml value 97 | if anchors: 98 | LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}') 99 | self.yaml['anchors'] = round(anchors) # override yaml value 100 | self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist 101 | self.names = [str(i) for i in range(self.yaml['nc'])] # default names 102 | self.inplace = self.yaml.get('inplace', True) 103 | # LOGGER.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) 104 | 105 | # Build strides, anchors 106 | m = self.model[-1] # Detect() 107 | if isinstance(m, Detect): 108 | s = 256 # 2x min stride 109 | m.inplace = self.inplace 110 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward 111 | m.anchors /= m.stride.view(-1, 1, 1) 112 | check_anchor_order(m) 113 | self.stride = m.stride 114 | self._initialize_biases() # only run once 115 | # LOGGER.info('Strides: %s' % m.stride.tolist()) 116 | 117 | # Init weights, biases 118 | initialize_weights(self) 119 | self.info() 120 | LOGGER.info('') 121 | 122 | def forward(self, x, augment=False, profile=False, visualize=False): 123 | if augment: 124 | return self.forward_augment(x) # augmented inference, None 125 | return self.forward_once(x, profile, visualize) # single-scale inference, train 126 | 127 | def forward_augment(self, x): 128 | img_size = x.shape[-2:] # height, width 129 | s = [1, 0.83, 0.67] # scales 130 | f = [None, 3, None] # flips (2-ud, 3-lr) 131 | y = [] # outputs 132 | for si, fi in zip(s, f): 133 | xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) 134 | yi = self.forward_once(xi)[0] # forward 135 | # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save 136 | yi = self._descale_pred(yi, fi, si, img_size) 137 | y.append(yi) 138 | return torch.cat(y, 1), None # augmented inference, train 139 | 140 | def forward_once(self, x, profile=False, visualize=False): 141 | y, dt = [], [] # outputs 142 | for m in self.model: 143 | if m.f != -1: # if not from previous layer 144 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers 145 | 146 | if profile: 147 | c = isinstance(m, Detect) # copy input as inplace fix 148 | o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs 149 | t = time_sync() 150 | for _ in range(10): 151 | m(x.copy() if c else x) 152 | dt.append((time_sync() - t) * 100) 153 | if m == self.model[0]: 154 | LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} {'module'}") 155 | LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}') 156 | 157 | x = m(x) # run 158 | y.append(x if m.i in self.save else None) # save output 159 | 160 | if visualize: 161 | feature_visualization(x, m.type, m.i, save_dir=visualize) 162 | 163 | if profile: 164 | LOGGER.info('%.1fms total' % sum(dt)) 165 | return x 166 | 167 | def _descale_pred(self, p, flips, scale, img_size): 168 | # de-scale predictions following augmented inference (inverse operation) 169 | if self.inplace: 170 | p[..., :4] /= scale # de-scale 171 | if flips == 2: 172 | p[..., 1] = img_size[0] - p[..., 1] # de-flip ud 173 | elif flips == 3: 174 | p[..., 0] = img_size[1] - p[..., 0] # de-flip lr 175 | else: 176 | x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale 177 | if flips == 2: 178 | y = img_size[0] - y # de-flip ud 179 | elif flips == 3: 180 | x = img_size[1] - x # de-flip lr 181 | p = torch.cat((x, y, wh, p[..., 4:]), -1) 182 | return p 183 | 184 | def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency 185 | # https://arxiv.org/abs/1708.02002 section 3.3 186 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. 187 | m = self.model[-1] # Detect() module 188 | for mi, s in zip(m.m, m.stride): # from 189 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) 190 | b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 191 | b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 192 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 193 | 194 | def _print_biases(self): 195 | m = self.model[-1] # Detect() module 196 | for mi in m.m: # from 197 | b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) 198 | LOGGER.info( 199 | ('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) 200 | 201 | # def _print_weights(self): 202 | # for m in self.model.modules(): 203 | # if type(m) is Bottleneck: 204 | # LOGGER.info('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights 205 | 206 | def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers 207 | LOGGER.info('Fusing layers... ') 208 | for m in self.model.modules(): 209 | if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'): 210 | m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv 211 | delattr(m, 'bn') # remove batchnorm 212 | m.forward = m.forward_fuse # update forward 213 | self.info() 214 | return self 215 | 216 | def autoshape(self): # add AutoShape module 217 | LOGGER.info('Adding AutoShape... ') 218 | m = AutoShape(self) # wrap model 219 | copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes 220 | return m 221 | 222 | def info(self, verbose=False, img_size=640): # print model information 223 | model_info(self, verbose, img_size) 224 | 225 | 226 | def parse_model(d, ch): # model_dict, input_channels(3) 227 | LOGGER.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments')) 228 | anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] 229 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors 230 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5) 231 | 232 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out 233 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args 234 | m = eval(m) if isinstance(m, str) else m # eval strings 235 | for j, a in enumerate(args): 236 | try: 237 | args[j] = eval(a) if isinstance(a, str) else a # eval strings 238 | except: 239 | pass 240 | 241 | n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain 242 | if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, 243 | BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, CoordinateLayer]: 244 | c1, c2 = ch[f], args[0] 245 | if c2 != no: # if not output 246 | c2 = make_divisible(c2 * gw, 8) 247 | 248 | args = [c1, c2, *args[1:]] 249 | if m in [BottleneckCSP, C3, C3TR, C3Ghost]: 250 | args.insert(2, n) # number of repeats 251 | n = 1 252 | elif m is nn.BatchNorm2d: 253 | args = [ch[f]] 254 | elif m is Concat: 255 | c2 = sum([ch[x] for x in f]) 256 | elif m is Detect: 257 | args.append([ch[x] for x in f]) 258 | if isinstance(args[1], int): # number of anchors 259 | args[1] = [list(range(args[1] * 2))] * len(f) 260 | elif m is Contract: 261 | c2 = ch[f] * args[0] ** 2 262 | print("concat") 263 | elif m is Expand: 264 | c2 = ch[f] // args[0] ** 2 265 | elif m is CoordinateLayer: 266 | channel, re = args[0], args[1] 267 | channel = make_divisible(channel * gw, 8) if channel != no else channel 268 | args = [channel, re] 269 | print("ca") 270 | elif m is PPLC_Conv: 271 | c2 = args[0] 272 | args = args[1:] 273 | elif m is PPLC_Block: 274 | c2 = args[0] 275 | args = args[1:] 276 | elif m is ConvNeXt_Block: 277 | c2 = args[0] 278 | args = args[1:] 279 | else: 280 | c2 = ch[f] 281 | 282 | m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module 283 | t = str(m)[8:-2].replace('__main__.', '') # module type 284 | np = sum([x.numel() for x in m_.parameters()]) # number params 285 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params 286 | LOGGER.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n_, np, t, args)) # print 287 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist 288 | layers.append(m_) 289 | if i == 0: 290 | ch = [] 291 | ch.append(c2) 292 | return nn.Sequential(*layers), sorted(save) 293 | 294 | 295 | def getLayers(model): 296 | """ 297 | get each layer's name and its module 298 | :param model: 299 | :return: each layer's name and its module 300 | """ 301 | layers = [] 302 | 303 | def unfoldLayer(model): 304 | """ 305 | unfold each layer 306 | :param model: the given model or a single layer 307 | :param root: root name 308 | :return: 309 | """ 310 | 311 | # get all layers of the model 312 | layer_list = list(model.named_children()) 313 | for item in layer_list: 314 | module = item[1] 315 | sublayer = list(module.named_children()) 316 | sublayer_num = len(sublayer) 317 | 318 | # if current layer contains sublayers, add current layer name on its sublayers 319 | if sublayer_num == 0: 320 | layers.append(module) 321 | # if current layer contains sublayers, unfold them 322 | elif isinstance(module, torch.nn.Module): 323 | unfoldLayer(module) 324 | 325 | unfoldLayer(model) 326 | return layers 327 | if __name__ == '__main__': 328 | 329 | model_urls = { 330 | "convnext_tiny_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_tiny_1k_224_ema.pth", 331 | "convnext_small_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_small_1k_224_ema.pth", 332 | "convnext_base_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224_ema.pth", 333 | "convnext_large_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_large_1k_224_ema.pth", 334 | "convnext_base_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_224.pth", 335 | "convnext_large_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_224.pth", 336 | "convnext_xlarge_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_224.pth", 337 | } 338 | url = model_urls['convnext_tiny_1k'] 339 | checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", check_hash=True) 340 | init_dict = {} 341 | for index in range(4): 342 | for k, v in list(checkpoint['model'].items()): 343 | if k.startswith('norm') or k.startswith('head'): 344 | pass 345 | else: 346 | init_dict['.'.join(['model', str(index), k])] = v 347 | 348 | 349 | import tensorwatch as tw 350 | # 5s Model Summary: 283 layers, 7082421 parameters, 7082421 gradients, 16.4 GFLOPs 351 | # PP1.0 Model Summary: 367 layers, 3857195 parameters, 3857195 gradients, 8.2 GFLOPs 352 | parser = argparse.ArgumentParser() 353 | # parser.add_argument('--cfg', type=str, default='yolov5s_Coordinate.yaml', help='model.yaml') 354 | parser.add_argument('--cfg', type=str, default='yolov5_ConvNeXt.yaml', help='model.yaml') 355 | 356 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 357 | parser.add_argument('--profile', action='store_true', help='profile model speed') 358 | opt = parser.parse_args() 359 | opt.cfg = check_file(opt.cfg) # check file 360 | set_logging() 361 | device = select_device(opt.device) 362 | 363 | # Create model 364 | model = Model(opt.cfg).to(device) 365 | model.train() 366 | 367 | # 更新Backbone部分网络权重 368 | model_dict = model.state_dict() 369 | model_dict.update(init_dict) 370 | model.load_state_dict(model_dict) 371 | 372 | # Profile 373 | if opt.profile: 374 | img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device) 375 | y = model(img, profile=True) 376 | print(y.shape) 377 | # x = torch.randn(1, 8) 378 | 379 | # print(model) 380 | # from thop import profile 381 | 382 | # flops, params = profile(model, (torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device),)) 383 | 384 | # Tensorboard (not working https://github.com/ultralytics/yolov5/issues/2898) 385 | # from torch.utils.tensorboard import SummaryWriter 386 | # tb_writer = SummaryWriter('.') 387 | # LOGGER.info("Run 'tensorboard --logdir=models' to view tensorboard at http://localhost:6006/") 388 | # tb_writer.add_graph(torch.jit.trace(model, img, strict=False), []) # add model graph 389 | 390 | 391 | 392 | 393 | 394 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/yolov5_ConvNeXt_Tiny.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | # [from, number, module, args] 14 | backbone: 15 | [[-1, 1, ConvNeXt_Block, [96, 0, 3, [3, 3, 9, 3], [96, 192, 384, 768]]], 16 | [-1, 1, ConvNeXt_Block, [192, 1, 3, [3, 3, 9, 3], [96, 192, 384, 768]]], 17 | [-1, 1, ConvNeXt_Block, [384, 2, 3, [3, 3, 9, 3], [96, 192, 384, 768]]], 18 | [-1, 1, ConvNeXt_Block, [768, 3, 3, [3, 3, 9, 3], [96, 192, 384, 768]]], 19 | ] 20 | 21 | # YOLOv5 head 22 | head: 23 | [[-1, 1, Conv, [768, 1, 1]], 24 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 25 | [[-1, 2], 1, Concat, [1]], 26 | [-1, 3, C3, [768, False]], 27 | 28 | [-1, 1, Conv, [384, 1, 1]], 29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 30 | [[-1, 1], 1, Concat, [1]], 31 | [-1, 3, C3, [384, False]], 32 | 33 | [-1, 1, Conv, [384, 3, 2]], 34 | [[-1, 8], 1, Concat, [1]], 35 | [-1, 3, C3, [768, False]], 36 | 37 | [-1, 1, Conv, [768, 3, 2]], 38 | [[-1, 4], 1, Concat, [1]], # cat head P5 39 | [-1, 3, C3, [768, False]], # 23 (P5/32-large) 40 | 41 | [[11, 14, 17], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 42 | ] 43 | 44 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/yolov5_ConvNeXt_base.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | # [from, number, module, args] 14 | backbone: 15 | [[-1, 1, ConvNeXt_Block, [96, 0, 3, [3, 3, 27, 3], [128, 256, 512, 1024]]], 16 | [-1, 1, ConvNeXt_Block, [192, 1, 3, [3, 3, 27, 3], [128, 256, 512, 1024]]], 17 | [-1, 1, ConvNeXt_Block, [384, 2, 3, [3, 3, 27, 3], [128, 256, 512, 1024]]], 18 | [-1, 1, ConvNeXt_Block, [768, 3, 3, [3, 3, 27, 3], [128, 256, 512, 1024]]], 19 | ] 20 | 21 | # YOLOv5 head 22 | head: 23 | [[-1, 1, Conv, [1024, 1, 1]], 24 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 25 | [[-1, 2], 1, Concat, [1]], 26 | [-1, 3, C3, [1024, False]], 27 | 28 | [-1, 1, Conv, [512, 1, 1]], 29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 30 | [[-1, 1], 1, Concat, [1]], 31 | [-1, 3, C3, [512, False]], 32 | 33 | [-1, 1, Conv, [512, 3, 2]], 34 | [[-1, 8], 1, Concat, [1]], 35 | [-1, 3, C3, [1024, False]], 36 | 37 | [-1, 1, Conv, [1024, 3, 2]], 38 | [[-1, 4], 1, Concat, [1]], # cat head P5 39 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 40 | 41 | [[11, 14, 17], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 42 | ] 43 | 44 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/yolov5_ConvNeXt_large.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | # [from, number, module, args] 14 | backbone: 15 | [[-1, 1, ConvNeXt_Block, [192, 0, 3, [3, 3, 27, 3], [192, 384, 768, 1536]]], 16 | [-1, 1, ConvNeXt_Block, [384, 1, 3, [3, 3, 27, 3], [192, 384, 768, 1536]]], 17 | [-1, 1, ConvNeXt_Block, [768, 2, 3, [3, 3, 27, 3], [192, 384, 768, 1536]]], 18 | [-1, 1, ConvNeXt_Block, [1536, 3, 3, [3, 3, 27, 3], [192, 384, 768, 1536]]], 19 | ] 20 | 21 | # YOLOv5 head 22 | head: 23 | [[-1, 1, Conv, [1536, 1, 1]], 24 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 25 | [[-1, 2], 1, Concat, [1]], 26 | [-1, 3, C3, [1536, False]], 27 | 28 | [-1, 1, Conv, [768, 1, 1]], 29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 30 | [[-1, 1], 1, Concat, [1]], 31 | [-1, 3, C3, [768, False]], 32 | 33 | [-1, 1, Conv, [768, 3, 2]], 34 | [[-1, 8], 1, Concat, [1]], 35 | [-1, 3, C3, [1536, False]], 36 | 37 | [-1, 1, Conv, [1536, 3, 2]], 38 | [[-1, 4], 1, Concat, [1]], # cat head P5 39 | [-1, 3, C3, [1536, False]], # 23 (P5/32-large) 40 | 41 | [[11, 14, 17], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 42 | ] 43 | 44 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/yolov5_ConvNeXt_small.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | # [from, number, module, args] 14 | backbone: 15 | [[-1, 1, ConvNeXt_Block, [96, 0, 3, [3, 3, 27, 3], [96, 192, 384, 768]]], 16 | [-1, 1, ConvNeXt_Block, [192, 1, 3, [3, 3, 27, 3], [96, 192, 384, 768]]], 17 | [-1, 1, ConvNeXt_Block, [384, 2, 3, [3, 3, 27, 3], [96, 192, 384, 768]]], 18 | [-1, 1, ConvNeXt_Block, [768, 3, 3, [3, 3, 27, 3], [96, 192, 384, 768]]], 19 | ] 20 | 21 | # YOLOv5 head 22 | head: 23 | [[-1, 1, Conv, [768, 1, 1]], 24 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 25 | [[-1, 2], 1, Concat, [1]], 26 | [-1, 3, C3, [768, False]], 27 | 28 | [-1, 1, Conv, [384, 1, 1]], 29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 30 | [[-1, 1], 1, Concat, [1]], 31 | [-1, 3, C3, [384, False]], 32 | 33 | [-1, 1, Conv, [384, 3, 2]], 34 | [[-1, 8], 1, Concat, [1]], 35 | [-1, 3, C3, [768, False]], 36 | 37 | [-1, 1, Conv, [768, 3, 2]], 38 | [[-1, 4], 1, Concat, [1]], # cat head P5 39 | [-1, 3, C3, [768, False]], # 23 (P5/32-large) 40 | 41 | [[11, 14, 17], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 42 | ] 43 | 44 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/yolov5_ConvNeXt_xlarge_22k.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | # [from, number, module, args] 14 | backbone: 15 | [[-1, 1, ConvNeXt_Block, [96, 0, 3, [3, 3, 27, 3], [256, 512, 1024, 2048]]], 16 | [-1, 1, ConvNeXt_Block, [192, 1, 3, [3, 3, 27, 3], [256, 512, 1024, 2048]]], 17 | [-1, 1, ConvNeXt_Block, [384, 2, 3, [3, 3, 27, 3], [256, 512, 1024, 2048]]], 18 | [-1, 1, ConvNeXt_Block, [768, 3, 3, [3, 3, 27, 3], [256, 512, 1024, 2048]]], 19 | ] 20 | 21 | # YOLOv5 head 22 | head: 23 | [[-1, 1, Conv, [2048, 1, 1]], 24 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 25 | [[-1, 2], 1, Concat, [1]], 26 | [-1, 3, C3, [2048, False]], 27 | 28 | [-1, 1, Conv, [1024, 1, 1]], 29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 30 | [[-1, 1], 1, Concat, [1]], 31 | [-1, 3, C3, [1024, False]], 32 | 33 | [-1, 1, Conv, [1024, 3, 2]], 34 | [[-1, 8], 1, Concat, [1]], 35 | [-1, 3, C3, [2048, False]], 36 | 37 | [-1, 1, Conv, [2048, 3, 2]], 38 | [[-1, 4], 1, Concat, [1]], # cat head P5 39 | [-1, 3, C3, [2048, False]], # 23 (P5/32-large) 40 | 41 | [[11, 14, 17], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 42 | ] 43 | 44 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/yolov5l.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/yolov5m.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.67 # model depth multiple 6 | width_multiple: 0.75 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/yolov5n.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.25 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/yolov5s.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /ConvNeXt-YoloV5/models/yolov5x.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.33 # model depth multiple 6 | width_multiple: 1.25 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /PP-LCNet-Yolov5/PP_LCNet/LCNet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn as nn 4 | import thop 5 | 6 | # try: 7 | # import softpool_cuda 8 | # from SoftPool import soft_pool2d, SoftPool2d 9 | # except ImportError: 10 | # print('Please install SoftPool first: https://github.com/alexandrosstergiou/SoftPool') 11 | # exit(0) 12 | 13 | NET_CONFIG = { 14 | "blocks2": 15 | # k, in_c, out_c, s, use_se 16 | [[3, 16, 32, 1, False]], 17 | "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]], 18 | "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]], 19 | "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False], 20 | [5, 256, 256, 1, False], [5, 256, 256, 1, False], 21 | [5, 256, 256, 1, False], [5, 256, 256, 1, False]], 22 | "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]] 23 | } 24 | 25 | 26 | def autopad(k, p=None): 27 | if p is None: 28 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] 29 | return p 30 | 31 | 32 | def make_divisible(v, divisor=8, min_value=None): 33 | if min_value is None: 34 | min_value = divisor 35 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 36 | if new_v < 0.9 * v: 37 | new_v += divisor 38 | return new_v 39 | 40 | 41 | class HardSwish(nn.Module): 42 | def __init__(self, inplace=True): 43 | super(HardSwish, self).__init__() 44 | self.relu6 = nn.ReLU6(inplace=inplace) 45 | 46 | def forward(self, x): 47 | return x * self.relu6(x+3) / 6 48 | 49 | 50 | class HardSigmoid(nn.Module): 51 | def __init__(self, inplace=True): 52 | super(HardSigmoid, self).__init__() 53 | self.relu6 = nn.ReLU6(inplace=inplace) 54 | 55 | def forward(self, x): 56 | return (self.relu6(x+3)) / 6 57 | 58 | 59 | class SELayer(nn.Module): 60 | def __init__(self, channel, reduction=16): 61 | super(SELayer, self).__init__() 62 | self.avgpool = nn.AdaptiveAvgPool2d(1) 63 | self.fc = nn.Sequential( 64 | nn.Linear(channel, channel // reduction, bias=False), 65 | nn.ReLU(inplace=True), 66 | nn.Linear(channel // reduction, channel, bias=False), 67 | HardSigmoid() 68 | ) 69 | 70 | def forward(self, x): 71 | b, c, h, w = x.size() 72 | y = self.avgpool(x).view(b, c) 73 | y = self.fc(y).view(b, c, 1, 1) 74 | return x * y.expand_as(x) 75 | 76 | 77 | class DepthwiseSeparable(nn.Module): 78 | def __init__(self, inp, oup, dw_size, stride, use_se=False): 79 | super(DepthwiseSeparable, self).__init__() 80 | self.use_se = use_se 81 | self.stride = stride 82 | self.inp = inp 83 | self.oup = oup 84 | self.dw_size = dw_size 85 | self.dw_sp = nn.Sequential( 86 | nn.Conv2d(self.inp, self.inp, kernel_size=self.dw_size, stride=self.stride, 87 | padding=autopad(self.dw_size, None), groups=self.inp, bias=False), 88 | nn.BatchNorm2d(self.inp), 89 | HardSwish(), 90 | 91 | nn.Conv2d(self.inp, self.oup, kernel_size=1, stride=1, padding=0, bias=False), 92 | nn.BatchNorm2d(self.oup), 93 | HardSwish(), 94 | ) 95 | self.se = SELayer(self.oup) 96 | 97 | def forward(self, x): 98 | x = self.dw_sp(x) 99 | if self.use_se: 100 | x = self.se(x) 101 | return x 102 | 103 | 104 | class PP_LCNet(nn.Module): 105 | def __init__(self, scale=1.0, class_num=10, class_expand=1280, dropout_prob=0.2): 106 | super(PP_LCNet, self).__init__() 107 | self.scale = scale 108 | self.conv1 = nn.Conv2d(3, out_channels=make_divisible(16 * self.scale), 109 | kernel_size=3, stride=2, padding=1, bias=False) 110 | # k, in_c, out_c, s, use_se inp, oup, dw_size, stride, use_se=False 111 | self.blocks2 = nn.Sequential(*[ 112 | DepthwiseSeparable(inp=make_divisible(in_c * self.scale), 113 | oup=make_divisible(out_c * self.scale), 114 | dw_size=k, stride=s, use_se=use_se) 115 | for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks2"]) 116 | ]) 117 | 118 | self.blocks3 = nn.Sequential(*[ 119 | DepthwiseSeparable(inp=make_divisible(in_c * self.scale), 120 | oup=make_divisible(out_c * self.scale), 121 | dw_size=k, stride=s, use_se=use_se) 122 | for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks3"]) 123 | ]) 124 | 125 | self.blocks4 = nn.Sequential(*[ 126 | DepthwiseSeparable(inp=make_divisible(in_c * self.scale), 127 | oup=make_divisible(out_c * self.scale), 128 | dw_size=k, stride=s, use_se=use_se) 129 | for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks4"]) 130 | ]) 131 | # k, in_c, out_c, s, use_se inp, oup, dw_size, stride, use_se=False 132 | self.blocks5 = nn.Sequential(*[ 133 | DepthwiseSeparable(inp=make_divisible(in_c * self.scale), 134 | oup=make_divisible(out_c * self.scale), 135 | dw_size=k, stride=s, use_se=use_se) 136 | for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks5"]) 137 | ]) 138 | 139 | self.blocks6 = nn.Sequential(*[ 140 | DepthwiseSeparable(inp=make_divisible(in_c * self.scale), 141 | oup=make_divisible(out_c * self.scale), 142 | dw_size=k, stride=s, use_se=use_se) 143 | for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks6"]) 144 | ]) 145 | 146 | self.GAP = nn.AdaptiveAvgPool2d(1) 147 | 148 | self.last_conv = nn.Conv2d(in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale), 149 | out_channels=class_expand, 150 | kernel_size=1, stride=1, padding=0, bias=False) 151 | 152 | self.hardswish = HardSwish() 153 | self.dropout = nn.Dropout(p=dropout_prob) 154 | 155 | self.fc = nn.Linear(class_expand, class_num) 156 | 157 | def forward(self, x): 158 | x = self.conv1(x) 159 | print(x.shape) 160 | x = self.blocks2(x) 161 | print(x.shape) 162 | x = self.blocks3(x) 163 | print(x.shape) 164 | x = self.blocks4(x) 165 | print(x.shape) 166 | x = self.blocks5(x) 167 | print(x.shape) 168 | x = self.blocks6(x) 169 | print(x.shape) 170 | 171 | x = self.GAP(x) 172 | x = self.last_conv(x) 173 | x = self.hardswish(x) 174 | x = self.dropout(x) 175 | x = torch.flatten(x, start_dim=1, end_dim=-1) 176 | x = self.fc(x) 177 | return x 178 | 179 | 180 | def PPLCNET_x0_25(**kwargs): 181 | model = PP_LCNet(scale=0.25, **kwargs) 182 | return model 183 | 184 | 185 | def PPLCNET_x0_35(**kwargs): 186 | model = PP_LCNet(scale=0.35, **kwargs) 187 | return model 188 | 189 | 190 | def PPLCNET_x0_5(**kwargs): 191 | model = PP_LCNet(scale=0.5, **kwargs) 192 | return model 193 | 194 | 195 | def PPLCNET_x0_75(**kwargs): 196 | model = PP_LCNet(scale=0.75, **kwargs) 197 | return model 198 | 199 | 200 | def PPLCNET_x1_0(**kwargs): 201 | model = PP_LCNet(scale=1.0, **kwargs) 202 | return model 203 | 204 | 205 | def PPLCNET_x1_5(**kwargs): 206 | model = PP_LCNet(scale=1.5, **kwargs) 207 | return model 208 | 209 | 210 | def PPLCNET_x2_0(**kwargs): 211 | model = PP_LCNet(scale=2.0, **kwargs) 212 | return model 213 | 214 | def PPLCNET_x2_5(**kwargs): 215 | model = PP_LCNet(scale=2.5, **kwargs) 216 | return model 217 | 218 | 219 | 220 | 221 | if __name__ == '__main__': 222 | # input = torch.randn(1, 3, 640, 640) 223 | # model = PPLCNET_x2_5() 224 | # flops, params = thop.profile(model, inputs=(input,)) 225 | # print('flops:', flops / 1000000000) 226 | # print('params:', params / 1000000) 227 | 228 | model = PPLCNET_x1_0() 229 | # model_1 = PW_Conv(3, 16) 230 | input = torch.randn(2, 3, 256, 256) 231 | print(input.shape) 232 | output = model(input) 233 | print(output.shape) # [1, num_class] 234 | 235 | 236 | -------------------------------------------------------------------------------- /PP-LCNet-Yolov5/models/common.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Common modules 4 | """ 5 | 6 | import logging 7 | import math 8 | import warnings 9 | from copy import copy 10 | from pathlib import Path 11 | 12 | import numpy as np 13 | import pandas as pd 14 | import requests 15 | import torch 16 | import torch.nn as nn 17 | from PIL import Image 18 | from torch.cuda import amp 19 | 20 | from utils.datasets import exif_transpose, letterbox 21 | from utils.general import colorstr, increment_path, is_ascii, make_divisible, non_max_suppression, save_one_box, \ 22 | scale_coords, xyxy2xywh 23 | from utils.plots import Annotator, colors 24 | from utils.torch_utils import time_sync 25 | from torch.nn.parameter import Parameter 26 | 27 | LOGGER = logging.getLogger(__name__) 28 | 29 | 30 | def autopad(k, p=None): # kernel, padding 31 | # Pad to 'same' 32 | if p is None: 33 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 34 | return p 35 | 36 | 37 | class Conv(nn.Module): 38 | # Standard convolution 39 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 40 | super().__init__() 41 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) 42 | self.bn = nn.BatchNorm2d(c2) 43 | self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) 44 | 45 | def forward(self, x): 46 | return self.act(self.bn(self.conv(x))) 47 | 48 | def forward_fuse(self, x): 49 | return self.act(self.conv(x)) 50 | 51 | 52 | class DWConv(Conv): 53 | # Depth-wise convolution class 54 | def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 55 | super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act) 56 | 57 | 58 | class TransformerLayer(nn.Module): 59 | # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance) 60 | def __init__(self, c, num_heads): 61 | super().__init__() 62 | self.q = nn.Linear(c, c, bias=False) 63 | self.k = nn.Linear(c, c, bias=False) 64 | self.v = nn.Linear(c, c, bias=False) 65 | self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads) 66 | self.fc1 = nn.Linear(c, c, bias=False) 67 | self.fc2 = nn.Linear(c, c, bias=False) 68 | 69 | def forward(self, x): 70 | x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x 71 | x = self.fc2(self.fc1(x)) + x 72 | return x 73 | 74 | 75 | class TransformerBlock(nn.Module): 76 | # Vision Transformer https://arxiv.org/abs/2010.11929 77 | def __init__(self, c1, c2, num_heads, num_layers): 78 | super().__init__() 79 | self.conv = None 80 | if c1 != c2: 81 | self.conv = Conv(c1, c2) 82 | self.linear = nn.Linear(c2, c2) # learnable position embedding 83 | self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)]) 84 | self.c2 = c2 85 | 86 | def forward(self, x): 87 | if self.conv is not None: 88 | x = self.conv(x) 89 | b, _, w, h = x.shape 90 | p = x.flatten(2).unsqueeze(0).transpose(0, 3).squeeze(3) 91 | return self.tr(p + self.linear(p)).unsqueeze(3).transpose(0, 3).reshape(b, self.c2, w, h) 92 | 93 | 94 | class Bottleneck(nn.Module): 95 | # Standard bottleneck 96 | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion 97 | super().__init__() 98 | c_ = int(c2 * e) # hidden channels 99 | self.cv1 = Conv(c1, c_, 1, 1) 100 | self.cv2 = Conv(c_, c2, 3, 1, g=g) 101 | self.add = shortcut and c1 == c2 102 | 103 | def forward(self, x): 104 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 105 | 106 | 107 | class BottleneckCSP(nn.Module): 108 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks 109 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 110 | super().__init__() 111 | c_ = int(c2 * e) # hidden channels 112 | self.cv1 = Conv(c1, c_, 1, 1) 113 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) 114 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) 115 | self.cv4 = Conv(2 * c_, c2, 1, 1) 116 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) 117 | self.act = nn.LeakyReLU(0.1, inplace=True) 118 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 119 | 120 | def forward(self, x): 121 | y1 = self.cv3(self.m(self.cv1(x))) 122 | y2 = self.cv2(x) 123 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) 124 | 125 | 126 | class C3(nn.Module): 127 | # CSP Bottleneck with 3 convolutions 128 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 129 | super().__init__() 130 | c_ = int(c2 * e) # hidden channels 131 | self.cv1 = Conv(c1, c_, 1, 1) 132 | self.cv2 = Conv(c1, c_, 1, 1) 133 | self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2) 134 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 135 | # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)]) 136 | 137 | def forward(self, x): 138 | # print(self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)).shape) 139 | return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)) 140 | 141 | 142 | class C3TR(C3): 143 | # C3 module with TransformerBlock() 144 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): 145 | super().__init__(c1, c2, n, shortcut, g, e) 146 | c_ = int(c2 * e) 147 | self.m = TransformerBlock(c_, c_, 4, n) 148 | 149 | 150 | class C3SPP(C3): 151 | # C3 module with SPP() 152 | def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5): 153 | super().__init__(c1, c2, n, shortcut, g, e) 154 | c_ = int(c2 * e) 155 | self.m = SPP(c_, c_, k) 156 | 157 | 158 | class C3Ghost(C3): 159 | # C3 module with GhostBottleneck() 160 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): 161 | super().__init__(c1, c2, n, shortcut, g, e) 162 | c_ = int(c2 * e) # hidden channels 163 | self.m = nn.Sequential(*[GhostBottleneck(c_, c_) for _ in range(n)]) 164 | 165 | 166 | class SPP(nn.Module): 167 | # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729 168 | def __init__(self, c1, c2, k=(5, 9, 13)): 169 | super().__init__() 170 | c_ = c1 // 2 # hidden channels 171 | self.cv1 = Conv(c1, c_, 1, 1) 172 | self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) 173 | self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) 174 | 175 | def forward(self, x): 176 | x = self.cv1(x) 177 | with warnings.catch_warnings(): 178 | warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning 179 | return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) 180 | 181 | 182 | class SPPF(nn.Module): 183 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher 184 | def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13)) 185 | super().__init__() 186 | c_ = c1 // 2 # hidden channels 187 | self.cv1 = Conv(c1, c_, 1, 1) 188 | self.cv2 = Conv(c_ * 4, c2, 1, 1) 189 | self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2) 190 | 191 | def forward(self, x): 192 | x = self.cv1(x) 193 | with warnings.catch_warnings(): 194 | warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning 195 | y1 = self.m(x) 196 | y2 = self.m(y1) 197 | return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1)) 198 | 199 | 200 | class Focus(nn.Module): 201 | # Focus wh information into c-space 202 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 203 | super().__init__() 204 | self.conv = Conv(c1 * 4, c2, k, s, p, g, act) 205 | # self.contract = Contract(gain=2) 206 | 207 | def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) 208 | return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) 209 | # return self.conv(self.contract(x)) 210 | 211 | 212 | class GhostConv(nn.Module): 213 | # Ghost Convolution https://github.com/huawei-noah/ghostnet 214 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups 215 | super().__init__() 216 | c_ = c2 // 2 # hidden channels 217 | self.cv1 = Conv(c1, c_, k, s, None, g, act) 218 | self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) 219 | 220 | def forward(self, x): 221 | y = self.cv1(x) 222 | return torch.cat([y, self.cv2(y)], 1) 223 | 224 | 225 | class GhostBottleneck(nn.Module): 226 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet 227 | def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride 228 | super().__init__() 229 | c_ = c2 // 2 230 | self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw 231 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw 232 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear 233 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), 234 | Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() 235 | 236 | def forward(self, x): 237 | return self.conv(x) + self.shortcut(x) 238 | 239 | 240 | class Contract(nn.Module): 241 | # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40) 242 | def __init__(self, gain=2): 243 | super().__init__() 244 | self.gain = gain 245 | 246 | def forward(self, x): 247 | b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain' 248 | s = self.gain 249 | x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2) 250 | x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40) 251 | return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40) 252 | 253 | 254 | class Expand(nn.Module): 255 | # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160) 256 | def __init__(self, gain=2): 257 | super().__init__() 258 | self.gain = gain 259 | 260 | def forward(self, x): 261 | b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain' 262 | s = self.gain 263 | x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80) 264 | x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2) 265 | return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160) 266 | 267 | 268 | class Concat(nn.Module): 269 | # Concatenate a list of tensors along dimension 270 | def __init__(self, dimension=1): 271 | super().__init__() 272 | self.d = dimension 273 | 274 | def forward(self, x): 275 | return torch.cat(x, self.d) 276 | 277 | 278 | class AutoShape(nn.Module): 279 | # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS 280 | conf = 0.25 # NMS confidence threshold 281 | iou = 0.45 # NMS IoU threshold 282 | classes = None # (optional list) filter by class 283 | max_det = 1000 # maximum number of detections per image 284 | 285 | def __init__(self, model): 286 | super().__init__() 287 | self.model = model.eval() 288 | 289 | def autoshape(self): 290 | LOGGER.info('AutoShape already enabled, skipping... ') # model already converted to model.autoshape() 291 | return self 292 | 293 | @torch.no_grad() 294 | def forward(self, imgs, size=640, augment=False, profile=False): 295 | # Inference from various sources. For height=640, width=1280, RGB images example inputs are: 296 | # file: imgs = 'data/images/zidane.jpg' # str or PosixPath 297 | # URI: = 'https://ultralytics.com/images/zidane.jpg' 298 | # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3) 299 | # PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3) 300 | # numpy: = np.zeros((640,1280,3)) # HWC 301 | # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values) 302 | # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images 303 | 304 | t = [time_sync()] 305 | p = next(self.model.parameters()) # for device and type 306 | if isinstance(imgs, torch.Tensor): # torch 307 | with amp.autocast(enabled=p.device.type != 'cpu'): 308 | return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference 309 | 310 | # Pre-process 311 | n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images 312 | shape0, shape1, files = [], [], [] # image and inference shapes, filenames 313 | for i, im in enumerate(imgs): 314 | f = f'image{i}' # filename 315 | if isinstance(im, (str, Path)): # filename or uri 316 | im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im 317 | im = np.asarray(exif_transpose(im)) 318 | elif isinstance(im, Image.Image): # PIL Image 319 | im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f 320 | files.append(Path(f).with_suffix('.jpg').name) 321 | if im.shape[0] < 5: # image in CHW 322 | im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) 323 | im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3) # enforce 3ch input 324 | s = im.shape[:2] # HWC 325 | shape0.append(s) # image shape 326 | g = (size / max(s)) # gain 327 | shape1.append([y * g for y in s]) 328 | imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update 329 | shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape 330 | x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad 331 | x = np.stack(x, 0) if n > 1 else x[0][None] # stack 332 | x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW 333 | x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 334 | t.append(time_sync()) 335 | 336 | with amp.autocast(enabled=p.device.type != 'cpu'): 337 | # Inference 338 | y = self.model(x, augment, profile)[0] # forward 339 | t.append(time_sync()) 340 | 341 | # Post-process 342 | y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes, max_det=self.max_det) # NMS 343 | for i in range(n): 344 | scale_coords(shape1, y[i][:, :4], shape0[i]) 345 | 346 | t.append(time_sync()) 347 | return Detections(imgs, y, files, t, self.names, x.shape) 348 | 349 | 350 | class Detections: 351 | # YOLOv5 detections class for inference results 352 | def __init__(self, imgs, pred, files, times=None, names=None, shape=None): 353 | super().__init__() 354 | d = pred[0].device # device 355 | gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations 356 | self.imgs = imgs # list of images as numpy arrays 357 | self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) 358 | self.names = names # class names 359 | self.ascii = is_ascii(names) # names are ascii (use PIL for UTF-8) 360 | self.files = files # image filenames 361 | self.xyxy = pred # xyxy pixels 362 | self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels 363 | self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized 364 | self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized 365 | self.n = len(self.pred) # number of images (batch size) 366 | self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms) 367 | self.s = shape # inference BCHW shape 368 | 369 | def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')): 370 | for i, (im, pred) in enumerate(zip(self.imgs, self.pred)): 371 | str = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' 372 | if pred.shape[0]: 373 | for c in pred[:, -1].unique(): 374 | n = (pred[:, -1] == c).sum() # detections per class 375 | str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string 376 | if show or save or render or crop: 377 | annotator = Annotator(im, pil=not self.ascii) 378 | for *box, conf, cls in reversed(pred): # xyxy, confidence, class 379 | label = f'{self.names[int(cls)]} {conf:.2f}' 380 | if crop: 381 | save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i]) 382 | else: # all others 383 | annotator.box_label(box, label, color=colors(cls)) 384 | im = annotator.im 385 | else: 386 | str += '(no detections)' 387 | 388 | im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np 389 | if pprint: 390 | LOGGER.info(str.rstrip(', ')) 391 | if show: 392 | im.show(self.files[i]) # show 393 | if save: 394 | f = self.files[i] 395 | im.save(save_dir / f) # save 396 | if i == self.n - 1: 397 | LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}") 398 | if render: 399 | self.imgs[i] = np.asarray(im) 400 | 401 | def print(self): 402 | self.display(pprint=True) # print results 403 | LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % 404 | self.t) 405 | 406 | def show(self): 407 | self.display(show=True) # show results 408 | 409 | def save(self, save_dir='runs/detect/exp'): 410 | save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir 411 | self.display(save=True, save_dir=save_dir) # save results 412 | 413 | def crop(self, save_dir='runs/detect/exp'): 414 | save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir 415 | self.display(crop=True, save_dir=save_dir) # crop results 416 | LOGGER.info(f'Saved results to {save_dir}\n') 417 | 418 | def render(self): 419 | self.display(render=True) # render results 420 | return self.imgs 421 | 422 | def pandas(self): 423 | # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0]) 424 | new = copy(self) # return copy 425 | ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns 426 | cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns 427 | for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]): 428 | a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update 429 | setattr(new, k, [pd.DataFrame(x, columns=c) for x in a]) 430 | return new 431 | 432 | def tolist(self): 433 | # return a list of Detections objects, i.e. 'for result in results.tolist():' 434 | x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)] 435 | for d in x: 436 | for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']: 437 | setattr(d, k, getattr(d, k)[0]) # pop out of list 438 | return x 439 | 440 | def __len__(self): 441 | return self.n 442 | 443 | 444 | class Classify(nn.Module): 445 | # Classification head, i.e. x(b,c1,20,20) to x(b,c2) 446 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups 447 | super().__init__() 448 | self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) 449 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1) 450 | self.flat = nn.Flatten() 451 | 452 | def forward(self, x): 453 | z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list 454 | return self.flat(self.conv(z)) # flatten to x(b,c2) 455 | 456 | class h_sigmoid(nn.Module): 457 | def __init__(self, inplace=True): 458 | super(h_sigmoid, self).__init__() 459 | self.relu = nn.ReLU6(inplace=inplace) 460 | 461 | def forward(self, x): 462 | return self.relu(x + 3) / 6 463 | 464 | 465 | class h_swish(nn.Module): 466 | def __init__(self, inplace=True): 467 | super(h_swish, self).__init__() 468 | self.sigmoid = h_sigmoid(inplace=inplace) 469 | 470 | def forward(self, x): 471 | return x * self.sigmoid(x) 472 | 473 | 474 | class CoordinateLayer(nn.Module): 475 | def __init__(self, inp, oup, reduction=32): 476 | super(CoordinateLayer, self).__init__() 477 | "自适应平均池化转全局平均池化" 478 | # inputsz = np.array([20, 20]) 479 | # outputsz_h = np.array([20, 1]) 480 | # outputsz_w = np.array([1, 20]) 481 | # 482 | # stridesz_h = np.floor(inputsz / outputsz_h).astype(np.int32) 483 | # kernelsz_h = inputsz - (outputsz_h - 1) * stridesz_h # 484 | # # self.pool_h = nn.AvgPool2d(kernel_size=list(kernelsz_h), stride=list(stridesz_h)) 485 | # self.pool_h = nn.AvgPool2d(kernel_size=[1, 20], stride=[1, 20]) 486 | # 487 | # stridesz_w = np.floor(inputsz / outputsz_w).astype(np.int32) 488 | # kernelsz_w = inputsz - (outputsz_w - 1) * stridesz_w 489 | # # self.pool_w = nn.AvgPool2d(kernel_size=list(kernelsz_w), stride=list(stridesz_w)) 490 | # self.pool_w = nn.AvgPool2d(kernel_size=[20, 1], stride=[20, 1]) 491 | 492 | 493 | mip = max(8, inp // reduction) 494 | 495 | self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0) 496 | self.bn1 = nn.BatchNorm2d(mip) 497 | self.act = h_swish() 498 | 499 | self.conv_h = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0) 500 | self.conv_w = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0) 501 | 502 | def forward(self, x): 503 | identity = x 504 | 505 | n, c, h, w = x.size() 506 | # # print(n, c, h, w) 507 | # x_h = self.pool_h(x) # [n,c,h,w]-->[n,c,h,1] 508 | # x_w = self.pool_w(x).permute(0, 1, 3, 2) # [n,c,h,w]-->[n,c,1,w]-->[n,c,w,1] 509 | # pool_h = nn.AvgPool2d(kernel_size=(1, 20), stride=(1, 20)) 510 | # pool_w = nn.AvgPool2d(kernel_size=(20, 1), stride=(20, 1)) 511 | # x_h = pool_h(x) 512 | # x_w = pool_w(x).permute(0, 1, 3, 2) 513 | 514 | x_h = torch.flatten(x, start_dim=2, end_dim=2).mean(dim=2) 515 | x_h = x_h.unsqueeze(3) 516 | 517 | x_w = torch.flatten(x, start_dim=3, end_dim=3).mean(dim=3) 518 | x_w = x_w.unsqueeze(2).permute(0, 1, 3, 2) 519 | 520 | # if torch.is_tensor(h): 521 | # h = h.item() # 这里是修正代码 522 | # w = w.item() # 这里是修正代码 523 | # pool_h = nn.AdaptiveAvgPool2d((h, 1)) 524 | # pool_w = nn.AdaptiveAvgPool2d((1, w)) 525 | # 526 | # x_h = pool_h(x) 527 | # x_w = pool_w(x).permute(0, 1, 3, 2) 528 | 529 | y = torch.cat([x_h, x_w], dim=2) # -->[n,c,(h+w),1] 530 | y = self.conv1(y) # [n,c,(h+w),1]-->[n,mip,(h+w),1] 531 | y = self.bn1(y) # [n,mip,(h+w),1] 532 | y = self.act(y) # [n,mip,(h+w),1] 533 | 534 | if torch.is_tensor(h): 535 | h = h.item() 536 | w = w.item() 537 | x_h, x_w = torch.split(y, [h, w], dim=2) # [n,mip,(h+w),1]-->[n,mip,h,1] & [n,mip,w,1] 538 | x_w = x_w.permute(0, 1, 3, 2) # [n,mip,w,1]-->[n,mip,1,w] 539 | 540 | a_h = self.conv_h(x_h).sigmoid() # [n,mip,h,1]-->[n,oup,h,1]-->x轴概率分布 541 | a_w = self.conv_w(x_w).sigmoid() # [n,mip,w,1]-->[n,oup,w,1]-->y轴概率分布 542 | 543 | # print(a_w.shape) 544 | # print(a_h.shape) 545 | out = identity * a_w * a_h 546 | 547 | return out 548 | 549 | 550 | class sa_layer(nn.Module): 551 | """Constructs a Channel Spatial Group module. 552 | Args: 553 | k_size: Adaptive selection of kernel size 554 | """ 555 | 556 | def __init__(self, channel, groups=64): 557 | super(sa_layer, self).__init__() 558 | self.groups = groups 559 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 560 | self.cweight = Parameter(torch.zeros(1, channel // (1 * groups), 1, 1)) 561 | self.cbias = Parameter(torch.ones(1, channel // (1 * groups), 1, 1)) 562 | self.sweight = Parameter(torch.zeros(1, channel // (1 * groups), 1, 1)) 563 | self.sbias = Parameter(torch.ones(1, channel // (1 * groups), 1, 1)) 564 | 565 | self.sigmoid = nn.Sigmoid() 566 | # self.gn = nn.GroupNorm(channel // (1 * groups), channel // (1 * groups)) 567 | self.gn = nn.GroupNorm(8, 8) 568 | 569 | @staticmethod 570 | def channel_shuffle(x, groups): 571 | b, c, h, w = x.shape 572 | 573 | x = x.reshape(b, groups, -1, h, w) 574 | x = x.permute(0, 2, 1, 3, 4) 575 | 576 | # flatten 577 | x = x.reshape(b, -1, h, w) 578 | 579 | return x 580 | 581 | def forward(self, x): # 1, 512, 8, 8 582 | # print(x.shape) 583 | b, c, h, w = x.shape # 1, 512, 8, 8 584 | 585 | x = x.reshape(b * 32, -1, h, w) 586 | # print(x) 587 | # print(x.shape) 588 | x_0, x_1 = x.chunk(2, dim=1) 589 | 590 | # channel attention 591 | xn = self.avg_pool(x_0) 592 | xn = self.cweight * xn + self.cbias 593 | xn = x_0 * self.sigmoid(xn) 594 | 595 | # spatial attention 596 | # print(x_1.shape) 597 | xs = self.gn(x_1) 598 | xs = self.sweight * xs + self.sbias 599 | xs = x_1 * self.sigmoid(xs) 600 | 601 | # concatenate along channel axis 602 | out = torch.cat([xn, xs], dim=1) 603 | out = out.reshape(b, -1, h, w) 604 | 605 | out = self.channel_shuffle(out, 2) 606 | return out 607 | 608 | 609 | #-------------------------------------PP_LCNet------------------------------------------------------ 610 | NET_CONFIG = { 611 | "blocks2": 612 | # k, in_c, out_c, s, use_se 613 | [[3, 16, 32, 1, False]], 614 | "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]], 615 | "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]], 616 | "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False], 617 | [5, 256, 256, 1, False], [5, 256, 256, 1, False], 618 | [5, 256, 256, 1, False], [5, 256, 256, 1, False]], 619 | "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]] 620 | } 621 | BLOCK_LIST = ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"] 622 | 623 | def make_divisible_LC(v, divisor=8, min_value=None): 624 | if min_value is None: 625 | min_value = divisor 626 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 627 | if new_v < 0.9 * v: 628 | new_v += divisor 629 | return new_v 630 | 631 | 632 | class HardSwish(nn.Module): 633 | def __init__(self, inplace=True): 634 | super(HardSwish, self).__init__() 635 | self.relu6 = nn.ReLU6(inplace=inplace) 636 | 637 | def forward(self, x): 638 | return x * self.relu6(x+3) / 6 639 | 640 | 641 | class HardSigmoid(nn.Module): 642 | def __init__(self, inplace=True): 643 | super(HardSigmoid, self).__init__() 644 | self.relu6 = nn.ReLU6(inplace=inplace) 645 | 646 | def forward(self, x): 647 | return (self.relu6(x+3)) / 6 648 | 649 | 650 | class SELayer(nn.Module): 651 | def __init__(self, channel, reduction=16): 652 | super(SELayer, self).__init__() 653 | self.avgpool = nn.AdaptiveAvgPool2d(1) 654 | self.fc = nn.Sequential( 655 | nn.Linear(channel, channel // reduction, bias=False), 656 | nn.ReLU(inplace=True), 657 | nn.Linear(channel // reduction, channel, bias=False), 658 | HardSigmoid() 659 | ) 660 | 661 | def forward(self, x): 662 | b, c, h, w = x.size() 663 | y = self.avgpool(x).view(b, c) 664 | y = self.fc(y).view(b, c, 1, 1) 665 | return x * y.expand_as(x) 666 | 667 | 668 | class DepthwiseSeparable(nn.Module): 669 | def __init__(self, inp, oup, dw_size, stride, use_se=False): 670 | super(DepthwiseSeparable, self).__init__() 671 | self.use_se = use_se 672 | self.stride = stride 673 | self.inp = inp 674 | self.oup = oup 675 | self.dw_size = dw_size 676 | self.dw_sp = nn.Sequential( 677 | nn.Conv2d(self.inp, self.inp, kernel_size=self.dw_size, stride=self.stride, 678 | padding=autopad(self.dw_size, None), groups=self.inp, bias=False), 679 | nn.BatchNorm2d(self.inp), 680 | HardSwish(), 681 | 682 | nn.Conv2d(self.inp, self.oup, kernel_size=1, stride=1, padding=0, bias=False), 683 | nn.BatchNorm2d(self.oup), 684 | HardSwish(), 685 | ) 686 | self.se = SELayer(self.oup) 687 | 688 | def forward(self, x): 689 | x = self.dw_sp(x) 690 | if self.use_se: 691 | x = self.se(x) 692 | return x 693 | 694 | class PPLC_Conv(nn.Module): 695 | def __init__(self, scale): 696 | super(PPLC_Conv, self).__init__() 697 | self.scale = scale 698 | self.conv = nn.Conv2d(3, out_channels=make_divisible_LC(16 * self.scale), 699 | kernel_size=3, stride=2, padding=1, bias=False) 700 | def forward(self, x): 701 | return self.conv(x) 702 | 703 | class PPLC_Block(nn.Module): 704 | def __init__(self, scale, block_num): 705 | super(PPLC_Block, self).__init__() 706 | self.scale = scale 707 | self.block_num = BLOCK_LIST[block_num] 708 | self.block = nn.Sequential(*[ 709 | DepthwiseSeparable(inp=make_divisible_LC(in_c * self.scale), 710 | oup=make_divisible_LC(out_c * self.scale), 711 | dw_size=k, stride=s, use_se=use_se) 712 | for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG[self.block_num]) 713 | ]) 714 | def forward(self, x): 715 | return self.block(x) 716 | 717 | 718 | 719 | 720 | 721 | if __name__ == '__main__': 722 | input_tensor = torch.rand(1, 512, 20, 20) 723 | 724 | a = CoordinateLayer(inp=512, oup=4) 725 | 726 | output_tensor = a(input_tensor) 727 | # print(output_tensor) 728 | print(output_tensor.shape) -------------------------------------------------------------------------------- /PP-LCNet-Yolov5/models/yolo.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | YOLO-specific modules 4 | 5 | Usage: 6 | $ python path/to/models/yolo.py --cfg yolov5s.yaml 7 | """ 8 | 9 | import argparse 10 | import sys 11 | from copy import deepcopy 12 | from pathlib import Path 13 | 14 | FILE = Path(__file__).absolute() 15 | sys.path.append(FILE.parents[1].as_posix()) # add yolov5/ to path 16 | 17 | from models.common import * 18 | from models.experimental import * 19 | from utils.autoanchor import check_anchor_order 20 | from utils.general import make_divisible, check_file, set_logging 21 | from utils.plots import feature_visualization 22 | from utils.torch_utils import time_sync, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \ 23 | select_device, copy_attr 24 | 25 | try: 26 | import thop # for FLOPs computation 27 | except ImportError: 28 | thop = None 29 | 30 | LOGGER = logging.getLogger(__name__) 31 | 32 | 33 | class Detect(nn.Module): 34 | stride = None # strides computed during build 35 | onnx_dynamic = False # ONNX export parameter 36 | 37 | def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer 38 | super().__init__() 39 | self.nc = nc # number of classes 40 | self.no = nc + 5 # number of outputs per anchor 41 | self.nl = len(anchors) # number of detection layers 42 | self.na = len(anchors[0]) // 2 # number of anchors 43 | self.grid = [torch.zeros(1)] * self.nl # init grid 44 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 45 | self.register_buffer('anchors', a) # shape(nl,na,2) 46 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 47 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 48 | self.inplace = inplace # use in-place ops (e.g. slice assignment) 49 | 50 | def forward(self, x): 51 | z = [] # inference output 52 | for i in range(self.nl): 53 | x[i] = self.m[i](x[i]) # conv 54 | # bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 55 | # x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 56 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 57 | bs = -1 58 | ny = int(ny) 59 | nx = int(nx) 60 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 61 | 62 | if not self.training: # inference 63 | if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic: 64 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 65 | 66 | y = x[i].sigmoid() 67 | if self.inplace: 68 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy 69 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 70 | else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 71 | xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy 72 | wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].view(1, self.na, 1, 1, 2) # wh 73 | y = torch.cat((xy, wh, y[..., 4:]), -1) 74 | # z.append(y.view(bs, -1, self.no)) 75 | z.append(y.view(bs, self.na * ny * nx, self.no)) 76 | 77 | return x if self.training else (torch.cat(z, 1), x) 78 | 79 | @staticmethod 80 | def _make_grid(nx=20, ny=20): 81 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 82 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 83 | 84 | 85 | class Model(nn.Module): 86 | def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes 87 | super().__init__() 88 | if isinstance(cfg, dict): 89 | self.yaml = cfg # model dict 90 | else: # is *.yaml 91 | import yaml # for torch hub 92 | self.yaml_file = Path(cfg).name 93 | with open(cfg) as f: 94 | self.yaml = yaml.safe_load(f) # model dict 95 | 96 | # Define model 97 | ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels 98 | if nc and nc != self.yaml['nc']: 99 | LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") 100 | self.yaml['nc'] = nc # override yaml value 101 | if anchors: 102 | LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}') 103 | self.yaml['anchors'] = round(anchors) # override yaml value 104 | self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist 105 | self.names = [str(i) for i in range(self.yaml['nc'])] # default names 106 | self.inplace = self.yaml.get('inplace', True) 107 | # LOGGER.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) 108 | 109 | # Build strides, anchors 110 | m = self.model[-1] # Detect() 111 | if isinstance(m, Detect): 112 | s = 256 # 2x min stride 113 | m.inplace = self.inplace 114 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward 115 | m.anchors /= m.stride.view(-1, 1, 1) 116 | check_anchor_order(m) 117 | self.stride = m.stride 118 | self._initialize_biases() # only run once 119 | # LOGGER.info('Strides: %s' % m.stride.tolist()) 120 | 121 | # Init weights, biases 122 | initialize_weights(self) 123 | self.info() 124 | LOGGER.info('') 125 | 126 | def forward(self, x, augment=False, profile=False, visualize=False): 127 | if augment: 128 | return self.forward_augment(x) # augmented inference, None 129 | return self.forward_once(x, profile, visualize) # single-scale inference, train 130 | 131 | def forward_augment(self, x): 132 | img_size = x.shape[-2:] # height, width 133 | s = [1, 0.83, 0.67] # scales 134 | f = [None, 3, None] # flips (2-ud, 3-lr) 135 | y = [] # outputs 136 | for si, fi in zip(s, f): 137 | xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) 138 | yi = self.forward_once(xi)[0] # forward 139 | # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save 140 | yi = self._descale_pred(yi, fi, si, img_size) 141 | y.append(yi) 142 | return torch.cat(y, 1), None # augmented inference, train 143 | 144 | def forward_once(self, x, profile=False, visualize=False): 145 | y, dt = [], [] # outputs 146 | for m in self.model: 147 | if m.f != -1: # if not from previous layer 148 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers 149 | 150 | if profile: 151 | c = isinstance(m, Detect) # copy input as inplace fix 152 | o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs 153 | t = time_sync() 154 | for _ in range(10): 155 | m(x.copy() if c else x) 156 | dt.append((time_sync() - t) * 100) 157 | if m == self.model[0]: 158 | LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} {'module'}") 159 | LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}') 160 | 161 | x = m(x) # run 162 | y.append(x if m.i in self.save else None) # save output 163 | 164 | if visualize: 165 | feature_visualization(x, m.type, m.i, save_dir=visualize) 166 | 167 | if profile: 168 | LOGGER.info('%.1fms total' % sum(dt)) 169 | return x 170 | 171 | def _descale_pred(self, p, flips, scale, img_size): 172 | # de-scale predictions following augmented inference (inverse operation) 173 | if self.inplace: 174 | p[..., :4] /= scale # de-scale 175 | if flips == 2: 176 | p[..., 1] = img_size[0] - p[..., 1] # de-flip ud 177 | elif flips == 3: 178 | p[..., 0] = img_size[1] - p[..., 0] # de-flip lr 179 | else: 180 | x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale 181 | if flips == 2: 182 | y = img_size[0] - y # de-flip ud 183 | elif flips == 3: 184 | x = img_size[1] - x # de-flip lr 185 | p = torch.cat((x, y, wh, p[..., 4:]), -1) 186 | return p 187 | 188 | def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency 189 | # https://arxiv.org/abs/1708.02002 section 3.3 190 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. 191 | m = self.model[-1] # Detect() module 192 | for mi, s in zip(m.m, m.stride): # from 193 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) 194 | b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 195 | b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 196 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 197 | 198 | def _print_biases(self): 199 | m = self.model[-1] # Detect() module 200 | for mi in m.m: # from 201 | b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) 202 | LOGGER.info( 203 | ('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) 204 | 205 | # def _print_weights(self): 206 | # for m in self.model.modules(): 207 | # if type(m) is Bottleneck: 208 | # LOGGER.info('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights 209 | 210 | def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers 211 | LOGGER.info('Fusing layers... ') 212 | for m in self.model.modules(): 213 | if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'): 214 | m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv 215 | delattr(m, 'bn') # remove batchnorm 216 | m.forward = m.forward_fuse # update forward 217 | self.info() 218 | return self 219 | 220 | def autoshape(self): # add AutoShape module 221 | LOGGER.info('Adding AutoShape... ') 222 | m = AutoShape(self) # wrap model 223 | copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes 224 | return m 225 | 226 | def info(self, verbose=False, img_size=640): # print model information 227 | model_info(self, verbose, img_size) 228 | 229 | 230 | def parse_model(d, ch): # model_dict, input_channels(3) 231 | LOGGER.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments')) 232 | anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] 233 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors 234 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5) 235 | 236 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out 237 | # layers: 保存每一层的层结构 238 | # save: 记录下所有层结构中from中不是-1的层结构序号 239 | # c2: 保存当前层的输出channel 240 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args,哪层开始,模块默认深度,模块类型,模块参数 241 | m = eval(m) if isinstance(m, str) else m # eval strings 242 | for j, a in enumerate(args): 243 | try: 244 | args[j] = eval(a) if isinstance(a, str) else a # eval strings 245 | except: 246 | pass 247 | 248 | n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain 249 | if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, 250 | BottleneckCSP, C3, C3TR, C3SPP, C3Ghost]: 251 | c1, c2 = ch[f], args[0] # c2表示该层输出通道 252 | if c2 != no: # if not output 253 | c2 = make_divisible(c2 * gw, 8) 254 | 255 | # 在初始arg的基础上更新 加入当前层的输入channel并更新当前层 256 | # [in_channel, out_channel, *args[1:]] 257 | args = [c1, c2, *args[1:]] 258 | 259 | if m in [BottleneckCSP, C3, C3TR, C3Ghost]: 260 | args.insert(2, n) # number of repeats 261 | n = 1 262 | elif m is nn.BatchNorm2d: 263 | args = [ch[f]] 264 | elif m is Concat: 265 | c2 = sum([ch[x] for x in f]) 266 | elif m is Detect: 267 | args.append([ch[x] for x in f]) 268 | if isinstance(args[1], int): # number of anchors 269 | args[1] = [list(range(args[1] * 2))] * len(f) 270 | elif m is Contract: 271 | c2 = ch[f] * args[0] ** 2 272 | elif m is Expand: 273 | c2 = ch[f] // args[0] ** 2 274 | elif m is PPLC_Conv: 275 | c2 = args[0] 276 | args = args[1:] 277 | elif m is PPLC_Block: 278 | c2 = args[0] 279 | args = args[1:] 280 | else: 281 | c2 = ch[f] 282 | 283 | m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module 284 | t = str(m)[8:-2].replace('__main__.', '') # module type 285 | np = sum([x.numel() for x in m_.parameters()]) # number params 286 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params 287 | LOGGER.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n_, np, t, args)) # print 288 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist 289 | layers.append(m_) 290 | if i == 0: 291 | ch = [] 292 | ch.append(c2) 293 | return nn.Sequential(*layers), sorted(save) 294 | 295 | 296 | if __name__ == '__main__': 297 | parser = argparse.ArgumentParser() 298 | parser.add_argument('--cfg', type=str, default='yolov5_LCNet.yaml', help='model.yaml') 299 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 300 | parser.add_argument('--profile', action='store_true', help='profile model speed') 301 | opt = parser.parse_args() 302 | opt.cfg = check_file(opt.cfg) # check file 303 | set_logging() 304 | device = select_device(opt.device) 305 | 306 | # Create model 307 | model = Model(opt.cfg).to(device) 308 | model.train() 309 | 310 | # Profile 311 | if opt.profile: 312 | # img = torch.rand(8 if torch.cuda.is_available() else 2, 3, 640, 640).to(device) 313 | img = torch.randn(2, 3, 640, 640) 314 | y = model(img, profile=True) 315 | print(y.shape) 316 | -------------------------------------------------------------------------------- /PP-LCNet-Yolov5/models/yolov5_LCNet_0.25.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | [[-1, 1, PPLC_Conv, [8, 0.25]], 15 | [-1, 1, PPLC_Block, [8, 0.25, 0]], 16 | [-1, 1, PPLC_Block, [16, 0.25, 1]], 17 | [-1, 1, PPLC_Block, [32, 0.25, 2]], 18 | [-1, 1, PPLC_Block, [64, 0.25, 3]], 19 | [-1, 1, PPLC_Block, [128, 0.25, 4]], 20 | ] 21 | 22 | # YOLOv5 head 23 | head: 24 | [[-1, 1, Conv, [512, 1, 1]], 25 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 26 | [[-1, 4], 1, Concat, [1]], # cat backbone P4 27 | [-1, 3, C3, [512, False]], # 13 28 | 29 | [-1, 1, Conv, [256, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 32 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 33 | 34 | [-1, 1, Conv, [256, 3, 2]], 35 | [[-1, 10], 1, Concat, [1]], # cat head P4 36 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 37 | 38 | [-1, 1, Conv, [512, 3, 2]], 39 | [[-1, 6], 1, Concat, [1]], # cat head P5 40 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 41 | 42 | [[13, 16, 19], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 43 | ] 44 | -------------------------------------------------------------------------------- /PP-LCNet-Yolov5/models/yolov5_LCNet_0.35.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | [[-1, 1, PPLC_Conv, [8, 0.35]], 15 | [-1, 1, PPLC_Block, [16, 0.35, 0]], 16 | [-1, 1, PPLC_Block, [24, 0.35, 1]], 17 | [-1, 1, PPLC_Block, [48, 0.35, 2]], 18 | [-1, 1, PPLC_Block, [88, 0.35, 3]], 19 | [-1, 1, PPLC_Block, [176, 0.35, 4]], 20 | ] 21 | 22 | # YOLOv5 head 23 | head: 24 | [[-1, 1, Conv, [512, 1, 1]], 25 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 26 | [[-1, 4], 1, Concat, [1]], # cat backbone P4 27 | [-1, 3, C3, [512, False]], # 13 28 | 29 | [-1, 1, Conv, [256, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 32 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 33 | 34 | [-1, 1, Conv, [256, 3, 2]], 35 | [[-1, 10], 1, Concat, [1]], # cat head P4 36 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 37 | 38 | [-1, 1, Conv, [512, 3, 2]], 39 | [[-1, 6], 1, Concat, [1]], # cat head P5 40 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 41 | 42 | [[13, 16, 19], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 43 | ] 44 | -------------------------------------------------------------------------------- /PP-LCNet-Yolov5/models/yolov5_LCNet_0.5.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | [[-1, 1, PPLC_Conv, [8, 0.5]], 15 | [-1, 1, PPLC_Block, [16, 0.5, 0]], 16 | [-1, 1, PPLC_Block, [32, 0.5, 1]], 17 | [-1, 1, PPLC_Block, [64, 0.5, 2]], 18 | [-1, 1, PPLC_Block, [128, 0.5, 3]], 19 | [-1, 1, PPLC_Block, [256, 0.5, 4]], 20 | ] 21 | 22 | 23 | # YOLOv5 head 24 | head: 25 | [[-1, 1, Conv, [512, 1, 1]], 26 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 27 | [[-1, 4], 1, Concat, [1]], # cat backbone P4 28 | [-1, 3, C3, [512, False]], # 13 29 | 30 | [-1, 1, Conv, [256, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 33 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 34 | 35 | [-1, 1, Conv, [256, 3, 2]], 36 | [[-1, 10], 1, Concat, [1]], # cat head P4 37 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 38 | 39 | [-1, 1, Conv, [512, 3, 2]], 40 | [[-1, 6], 1, Concat, [1]], # cat head P5 41 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 42 | 43 | [[13, 16, 19], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 44 | ] 45 | -------------------------------------------------------------------------------- /PP-LCNet-Yolov5/models/yolov5_LCNet_0.75.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | [[-1, 1, PPLC_Conv, [16, 0.75]], 15 | [-1, 1, PPLC_Block, [24, 0.75, 0]], 16 | [-1, 1, PPLC_Block, [48, 0.75, 1]], 17 | [-1, 1, PPLC_Block, [96, 0.75, 2]], 18 | [-1, 1, PPLC_Block, [192, 0.75, 3]], 19 | [-1, 1, PPLC_Block, [384, 0.75, 4]], 20 | ] 21 | 22 | 23 | # YOLOv5 head 24 | head: 25 | [[-1, 1, Conv, [512, 1, 1]], 26 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 27 | [[-1, 4], 1, Concat, [1]], # cat backbone P4 28 | [-1, 3, C3, [512, False]], # 13 29 | 30 | [-1, 1, Conv, [256, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 33 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 34 | 35 | [-1, 1, Conv, [256, 3, 2]], 36 | [[-1, 10], 1, Concat, [1]], # cat head P4 37 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 38 | 39 | [-1, 1, Conv, [512, 3, 2]], 40 | [[-1, 6], 1, Concat, [1]], # cat head P5 41 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 42 | 43 | [[13, 16, 19], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 44 | ] 45 | -------------------------------------------------------------------------------- /PP-LCNet-Yolov5/models/yolov5_LCNet_1.5.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | [[-1, 1, PPLC_Conv, [24, 1.5]], 15 | [-1, 1, PPLC_Block, [48, 1.5, 0]], 16 | [-1, 1, PPLC_Block, [96, 1.5, 1]], 17 | [-1, 1, PPLC_Block, [192, 1.5, 2]], 18 | [-1, 1, PPLC_Block, [384, 1.5, 3]], 19 | [-1, 1, PPLC_Block, [768, 1.5, 4]], 20 | ] 21 | 22 | # YOLOv5 head 23 | head: 24 | [[-1, 1, Conv, [512, 1, 1]], 25 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 26 | [[-1, 4], 1, Concat, [1]], # cat backbone P4 27 | [-1, 3, C3, [512, False]], # 13 28 | 29 | [-1, 1, Conv, [256, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 32 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 33 | 34 | [-1, 1, Conv, [256, 3, 2]], 35 | [[-1, 10], 1, Concat, [1]], # cat head P4 36 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 37 | 38 | [-1, 1, Conv, [512, 3, 2]], 39 | [[-1, 6], 1, Concat, [1]], # cat head P5 40 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 41 | 42 | [[13, 16, 19], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 43 | ] 44 | -------------------------------------------------------------------------------- /PP-LCNet-Yolov5/models/yolov5_LCNet_1.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | [[-1, 1, PPLC_Conv, [16, 1]], 15 | [-1, 1, PPLC_Block, [32, 1, 0]], 16 | [-1, 1, PPLC_Block, [64, 1, 1]], 17 | [-1, 1, PPLC_Block, [128, 1, 2]], 18 | [-1, 1, PPLC_Block, [256, 1, 3]], 19 | [-1, 1, PPLC_Block, [512, 1, 4]], 20 | ] 21 | 22 | # YOLOv5 head 23 | head: 24 | [[-1, 1, Conv, [512, 1, 1]], 25 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 26 | [[-1, 4], 1, Concat, [1]], # cat backbone P4 27 | [-1, 3, C3, [512, False]], # 13 28 | 29 | [-1, 1, Conv, [256, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 32 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 33 | 34 | [-1, 1, Conv, [256, 3, 2]], 35 | [[-1, 10], 1, Concat, [1]], # cat head P4 36 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 37 | 38 | [-1, 1, Conv, [512, 3, 2]], 39 | [[-1, 6], 1, Concat, [1]], # cat head P5 40 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 41 | 42 | [[13, 16, 19], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 43 | ] 44 | -------------------------------------------------------------------------------- /PP-LCNet-Yolov5/models/yolov5_LCNet_2.5.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | [[-1, 1, PPLC_Conv, [40, 2.5]], 15 | [-1, 1, PPLC_Block, [80, 2.5, 0]], 16 | [-1, 1, PPLC_Block, [160, 2.5, 1]], 17 | [-1, 1, PPLC_Block, [320, 2.5, 2]], 18 | [-1, 1, PPLC_Block, [640, 2.5, 3]], 19 | [-1, 1, PPLC_Block, [1280, 2.5, 4]], 20 | ] 21 | 22 | # YOLOv5 head 23 | head: 24 | [[-1, 1, Conv, [512, 1, 1]], 25 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 26 | [[-1, 4], 1, Concat, [1]], # cat backbone P4 27 | [-1, 3, C3, [512, False]], # 13 28 | 29 | [-1, 1, Conv, [256, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 32 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 33 | 34 | [-1, 1, Conv, [256, 3, 2]], 35 | [[-1, 10], 1, Concat, [1]], # cat head P4 36 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 37 | 38 | [-1, 1, Conv, [512, 3, 2]], 39 | [[-1, 6], 1, Concat, [1]], # cat head P5 40 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 41 | 42 | [[13, 16, 19], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 43 | ] 44 | -------------------------------------------------------------------------------- /PP-LCNet-Yolov5/models/yolov5_LCNet_2.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | [[-1, 1, PPLC_Conv, [16, 1]], 15 | [-1, 1, PPLC_Block, [32, 1, 0]], 16 | [-1, 1, PPLC_Block, [64, 1, 1]], 17 | [-1, 1, PPLC_Block, [128, 1, 2]], 18 | [-1, 1, PPLC_Block, [256, 1, 3]], 19 | [-1, 1, PPLC_Block, [512, 1, 4]], 20 | ] 21 | 22 | # YOLOv5 head 23 | head: 24 | [[-1, 1, Conv, [512, 1, 1]], 25 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 26 | [[-1, 4], 1, Concat, [1]], # cat backbone P4 27 | [-1, 3, C3, [512, False]], # 13 28 | 29 | [-1, 1, Conv, [256, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 32 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 33 | 34 | [-1, 1, Conv, [256, 3, 2]], 35 | [[-1, 10], 1, Concat, [1]], # cat head P4 36 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 37 | 38 | [-1, 1, Conv, [512, 3, 2]], 39 | [[-1, 6], 1, Concat, [1]], # cat head P5 40 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 41 | 42 | [[13, 16, 19], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 43 | ] 44 | -------------------------------------------------------------------------------- /PP-LCNet-Yolov5/test.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YoloV5-Flexible-and-Inference 2 | 基于YoloV5的一些魔改及相关部署方案 3 | 4 | 5 | 6 | 欢迎关注我们的公众号一起交流: 7 | ![AIt text](https://github.com/OutBreak-hui/YoloV5-Flexible-and-Inference/blob//main/pic.png) 8 | -------------------------------------------------------------------------------- /RepLKNet-Yolov5/models/yolo.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | YOLO-specific modules 4 | 5 | Usage: 6 | $ python path/to/models/yolo.py --cfg yolov5s.yaml 7 | """ 8 | 9 | import argparse 10 | import sys 11 | from copy import deepcopy 12 | from pathlib import Path 13 | 14 | FILE = Path(__file__).absolute() 15 | sys.path.append(FILE.parents[1].as_posix()) # add yolov5/ to path 16 | 17 | from models.common import * 18 | from models.experimental import * 19 | from utils.autoanchor import check_anchor_order 20 | from utils.general import make_divisible, check_file, set_logging 21 | from utils.plots import feature_visualization 22 | from utils.torch_utils import time_sync, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \ 23 | select_device, copy_attr 24 | 25 | try: 26 | import thop # for FLOPs computation 27 | except ImportError: 28 | thop = None 29 | 30 | LOGGER = logging.getLogger(__name__) 31 | 32 | 33 | class Detect(nn.Module): 34 | stride = None # strides computed during build 35 | onnx_dynamic = False # ONNX export parameter 36 | 37 | def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer 38 | super().__init__() 39 | self.nc = nc # number of classes 40 | self.no = nc + 5 # number of outputs per anchor 41 | self.nl = len(anchors) # number of detection layers 42 | self.na = len(anchors[0]) // 2 # number of anchors 43 | self.grid = [torch.zeros(1)] * self.nl # init grid 44 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 45 | self.register_buffer('anchors', a) # shape(nl,na,2) 46 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 47 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 48 | self.inplace = inplace # use in-place ops (e.g. slice assignment) 49 | 50 | def forward(self, x): 51 | z = [] # inference output 52 | for i in range(self.nl): 53 | x[i] = self.m[i](x[i]) # conv 54 | # bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 55 | # x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 56 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 57 | bs = -1 58 | ny = int(ny) 59 | nx = int(nx) 60 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 61 | 62 | if not self.training: # inference 63 | if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic: 64 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 65 | 66 | y = x[i].sigmoid() 67 | if self.inplace: 68 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy 69 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 70 | else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 71 | xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy 72 | wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].view(1, self.na, 1, 1, 2) # wh 73 | y = torch.cat((xy, wh, y[..., 4:]), -1) 74 | # z.append(y.view(bs, -1, self.no)) 75 | z.append(y.view(bs, self.na * ny * nx, self.no)) 76 | 77 | return x if self.training else (torch.cat(z, 1), x) 78 | 79 | @staticmethod 80 | def _make_grid(nx=20, ny=20): 81 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 82 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 83 | 84 | 85 | class Model(nn.Module): 86 | def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes 87 | super().__init__() 88 | if isinstance(cfg, dict): 89 | self.yaml = cfg # model dict 90 | else: # is *.yaml 91 | import yaml # for torch hub 92 | self.yaml_file = Path(cfg).name 93 | with open(cfg) as f: 94 | self.yaml = yaml.safe_load(f) # model dict 95 | 96 | # Define model 97 | ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels 98 | if nc and nc != self.yaml['nc']: 99 | LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") 100 | self.yaml['nc'] = nc # override yaml value 101 | if anchors: 102 | LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}') 103 | self.yaml['anchors'] = round(anchors) # override yaml value 104 | self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist 105 | self.names = [str(i) for i in range(self.yaml['nc'])] # default names 106 | self.inplace = self.yaml.get('inplace', True) 107 | # LOGGER.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) 108 | 109 | # Build strides, anchors 110 | m = self.model[-1] # Detect() 111 | if isinstance(m, Detect): 112 | s = 256 # 2x min stride 113 | m.inplace = self.inplace 114 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward 115 | m.anchors /= m.stride.view(-1, 1, 1) 116 | check_anchor_order(m) 117 | self.stride = m.stride 118 | self._initialize_biases() # only run once 119 | # LOGGER.info('Strides: %s' % m.stride.tolist()) 120 | 121 | # Init weights, biases 122 | initialize_weights(self) 123 | self.info() 124 | LOGGER.info('') 125 | 126 | def forward(self, x, augment=False, profile=False, visualize=False): 127 | if augment: 128 | return self.forward_augment(x) # augmented inference, None 129 | return self.forward_once(x, profile, visualize) # single-scale inference, train 130 | 131 | def forward_augment(self, x): 132 | img_size = x.shape[-2:] # height, width 133 | s = [1, 0.83, 0.67] # scales 134 | f = [None, 3, None] # flips (2-ud, 3-lr) 135 | y = [] # outputs 136 | for si, fi in zip(s, f): 137 | xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) 138 | yi = self.forward_once(xi)[0] # forward 139 | # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save 140 | yi = self._descale_pred(yi, fi, si, img_size) 141 | y.append(yi) 142 | return torch.cat(y, 1), None # augmented inference, train 143 | 144 | def forward_once(self, x, profile=False, visualize=False): 145 | y, dt = [], [] # outputs 146 | for m in self.model: 147 | if m.f != -1: # if not from previous layer 148 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers 149 | 150 | if profile: 151 | c = isinstance(m, Detect) # copy input as inplace fix 152 | o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs 153 | t = time_sync() 154 | for _ in range(10): 155 | m(x.copy() if c else x) 156 | dt.append((time_sync() - t) * 100) 157 | if m == self.model[0]: 158 | LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} {'module'}") 159 | LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}') 160 | 161 | x = m(x) # run 162 | y.append(x if m.i in self.save else None) # save output 163 | 164 | if visualize: 165 | feature_visualization(x, m.type, m.i, save_dir=visualize) 166 | 167 | if profile: 168 | LOGGER.info('%.1fms total' % sum(dt)) 169 | return x 170 | 171 | def _descale_pred(self, p, flips, scale, img_size): 172 | # de-scale predictions following augmented inference (inverse operation) 173 | if self.inplace: 174 | p[..., :4] /= scale # de-scale 175 | if flips == 2: 176 | p[..., 1] = img_size[0] - p[..., 1] # de-flip ud 177 | elif flips == 3: 178 | p[..., 0] = img_size[1] - p[..., 0] # de-flip lr 179 | else: 180 | x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale 181 | if flips == 2: 182 | y = img_size[0] - y # de-flip ud 183 | elif flips == 3: 184 | x = img_size[1] - x # de-flip lr 185 | p = torch.cat((x, y, wh, p[..., 4:]), -1) 186 | return p 187 | 188 | def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency 189 | # https://arxiv.org/abs/1708.02002 section 3.3 190 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. 191 | m = self.model[-1] # Detect() module 192 | for mi, s in zip(m.m, m.stride): # from 193 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) 194 | b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 195 | b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 196 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 197 | 198 | def _print_biases(self): 199 | m = self.model[-1] # Detect() module 200 | for mi in m.m: # from 201 | b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) 202 | LOGGER.info( 203 | ('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) 204 | 205 | # def _print_weights(self): 206 | # for m in self.model.modules(): 207 | # if type(m) is Bottleneck: 208 | # LOGGER.info('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights 209 | 210 | def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers 211 | LOGGER.info('Fusing layers... ') 212 | for m in self.model.modules(): 213 | if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'): 214 | m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv 215 | delattr(m, 'bn') # remove batchnorm 216 | m.forward = m.forward_fuse # update forward 217 | self.info() 218 | return self 219 | 220 | def autoshape(self): # add AutoShape module 221 | LOGGER.info('Adding AutoShape... ') 222 | m = AutoShape(self) # wrap model 223 | copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes 224 | return m 225 | 226 | def info(self, verbose=False, img_size=640): # print model information 227 | model_info(self, verbose, img_size) 228 | 229 | 230 | def parse_model(d, ch): # model_dict, input_channels(3) 231 | LOGGER.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments')) 232 | anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] 233 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors 234 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5) 235 | 236 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out 237 | # layers: 保存每一层的层结构 238 | # save: 记录下所有层结构中from中不是-1的层结构序号 239 | # c2: 保存当前层的输出channel 240 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args,哪层开始,模块默认深度,模块类型,模块参数 241 | m = eval(m) if isinstance(m, str) else m # eval strings 242 | for j, a in enumerate(args): 243 | try: 244 | args[j] = eval(a) if isinstance(a, str) else a # eval strings 245 | except: 246 | pass 247 | 248 | n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain 249 | if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, 250 | BottleneckCSP, C3, C3TR, C3SPP, C3Ghost]: 251 | c1, c2 = ch[f], args[0] # c2表示该层输出通道 252 | if c2 != no: # if not output 253 | c2 = make_divisible(c2 * gw, 8) 254 | 255 | # 在初始arg的基础上更新 加入当前层的输入channel并更新当前层 256 | # [in_channel, out_channel, *args[1:]] 257 | args = [c1, c2, *args[1:]] 258 | 259 | if m in [BottleneckCSP, C3, C3TR, C3Ghost]: 260 | args.insert(2, n) # number of repeats 261 | n = 1 262 | elif m is nn.BatchNorm2d: 263 | args = [ch[f]] 264 | elif m is Concat: 265 | c2 = sum([ch[x] for x in f]) 266 | elif m is Detect: 267 | args.append([ch[x] for x in f]) 268 | if isinstance(args[1], int): # number of anchors 269 | args[1] = [list(range(args[1] * 2))] * len(f) 270 | elif m is Contract: 271 | c2 = ch[f] * args[0] ** 2 272 | elif m is Expand: 273 | c2 = ch[f] // args[0] ** 2 274 | elif m in [RepLKNet_Stem, RepLKNet_stage1, RepLKNet_stage2, RepLKNet_stage3, RepLKNet_stage4]: 275 | c2 = args[0] 276 | args = args[1:] 277 | else: 278 | c2 = ch[f] 279 | 280 | m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module 281 | t = str(m)[8:-2].replace('__main__.', '') # module type 282 | np = sum([x.numel() for x in m_.parameters()]) # number params 283 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params 284 | LOGGER.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n_, np, t, args)) # print 285 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist 286 | layers.append(m_) 287 | if i == 0: 288 | ch = [] 289 | ch.append(c2) 290 | return nn.Sequential(*layers), sorted(save) 291 | 292 | 293 | if __name__ == '__main__': 294 | parser = argparse.ArgumentParser() 295 | parser.add_argument('--cfg', type=str, default='yolov5_RepLKNet.yaml', help='model.yaml') 296 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 297 | parser.add_argument('--profile', action='store_true', help='profile model speed') 298 | opt = parser.parse_args() 299 | opt.cfg = check_file(opt.cfg) # check file 300 | set_logging() 301 | device = select_device(opt.device) 302 | 303 | # Create model 304 | model = Model(opt.cfg).to(device) 305 | model.train() 306 | 307 | # Profile 308 | if opt.profile: 309 | # img = torch.rand(8 if torch.cuda.is_available() else 2, 3, 640, 640).to(device) 310 | img = torch.randn(2, 3, 640, 640) 311 | y = model(img, profile=True) 312 | print(y.shape) 313 | -------------------------------------------------------------------------------- /RepLKNet-Yolov5/models/yolov5_RepLKNet.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | 13 | # YOLOv5 backbone 14 | backbone: 15 | [[-1, 1, RepLKNet_Stem, [128, 3, [128,256,512,1024]]], 16 | [-1, 1, RepLKNet_stage1, [256, [128,256,512,1024], [31,29,27,13], [2,2,18,2], 0.3, 5, 1, 4]], 17 | [-1, 1, RepLKNet_stage2, [512, [128,256,512,1024], [31,29,27,13], [2,2,18,2], 0.3, 5, 1, 4]], 18 | [-1, 1, RepLKNet_stage3, [1024, [128,256,512,1024], [31,29,27,13], [2,2,18,2], 0.3, 5, 1, 4]], 19 | [-1, 1, RepLKNet_stage4, [1024, [128,256,512,1024], [31,29,27,13], [2,2,18,2], 0.3, 5, 1, 4]], 20 | ] 21 | 22 | # YOLOv5 head 23 | head: 24 | [[-1, 1, Conv, [1024, 1, 1]], 25 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 26 | [[-1, 2], 1, Concat, [1]], # cat backbone P4 27 | [-1, 3, C3, [1024, False]], # 13 28 | 29 | [-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 1], 1, Concat, [1]], # cat backbone P3 32 | [-1, 3, C3, [512, False]], # 17 (P3/8-small) 33 | 34 | [-1, 1, Conv, [512, 3, 2]], 35 | [[-1, 9], 1, Concat, [1]], # cat head P4 36 | [-1, 3, C3, [1024, False]], # 20 (P4/16-medium) 37 | 38 | [-1, 1, Conv, [1024, 3, 2]], 39 | [[-1, 5], 1, Concat, [1]], # cat head P5 40 | [-1, 3, C3, [2048, False]], # 23 (P5/32-large) 41 | 42 | [[12, 15, 18], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 43 | ] 44 | -------------------------------------------------------------------------------- /pic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OutBreak-hui/YoloV5-Flexible-and-Inference/115a140839ba71e2dfa491bdc685bc3bd6f2ae2b/pic.png --------------------------------------------------------------------------------