├── README.md ├── yolov5l.yaml ├── yolov5m.yaml ├── yolov5s.yaml ├── yolov5x.yaml ├── convert_yaml2py.py └── common.py /README.md: -------------------------------------------------------------------------------- 1 | # yolov5.yaml转换成yolov5.py 2 | 运行convert_yaml2py.py,它会读取.yaml文件,然后生成.py文件 3 | -------------------------------------------------------------------------------- /yolov5l.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /yolov5m.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.67 # model depth multiple 4 | width_multiple: 0.75 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /yolov5s.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /yolov5x.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.33 # model depth multiple 4 | width_multiple: 1.25 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /convert_yaml2py.py: -------------------------------------------------------------------------------- 1 | from common import * 2 | import yaml 3 | import os 4 | 5 | def convert_list2str(s): 6 | out = [] 7 | for x in s: 8 | if not isinstance(x, str): 9 | out.append(str(x)) 10 | else: 11 | out.append('\'' + x + '\'') 12 | return ', '.join(out) 13 | 14 | def conver_listtostr(l): 15 | out = [] 16 | for i, data in enumerate(l): 17 | if isinstance(data, list): 18 | out.append('['+', '.join(map(str, data))+']') 19 | else: 20 | out.append(str(data)) 21 | return '[' + ', '.join(out) + ']' 22 | 23 | def parse_model2py(d, ch, model_name, upsample_concat_use_nn_module=False): # model_dict, input_channels(3) 24 | print('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments')) 25 | anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] 26 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors 27 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5) 28 | 29 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out 30 | wtxt = open(model_name + '.py', 'w') 31 | wtxt.write('from common import *\n' + '\n') 32 | wtxt.write('class My_YOLO(nn.Module):\n') 33 | init_str = ' def __init__(self, num_classes='+str(nc)+', anchors='+conver_listtostr(anchors)+', training=False):\n' 34 | # wtxt.write(' def __init__(self):\n') 35 | wtxt.write(init_str) 36 | wtxt.write(' super().__init__()\n') 37 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args 38 | m = eval(m) if isinstance(m, str) else m # eval strings 39 | for j, a in enumerate(args): 40 | try: 41 | args[j] = eval(a) if isinstance(a, str) else a # eval strings 42 | except: 43 | pass 44 | 45 | n = max(round(n * gd), 1) if n > 1 else n # depth gain 46 | if m in [Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]: 47 | c1, c2 = ch[f], args[0] 48 | 49 | # Normal 50 | # if i > 0 and args[0] != no: # channel expansion factor 51 | # ex = 1.75 # exponential (default 2.0) 52 | # e = math.log(c2 / ch[1]) / math.log(2) 53 | # c2 = int(ch[1] * ex ** e) 54 | # if m != Focus: 55 | 56 | c2 = make_divisible(c2 * gw, 8) if c2 != no else c2 57 | 58 | # Experimental 59 | # if i > 0 and args[0] != no: # channel expansion factor 60 | # ex = 1 + gw # exponential (default 2.0) 61 | # ch1 = 32 # ch[1] 62 | # e = math.log(c2 / ch1) / math.log(2) # level 1-n 63 | # c2 = int(ch1 * ex ** e) 64 | # if m != Focus: 65 | # c2 = make_divisible(c2, 8) if c2 != no else c2 66 | 67 | args = [c1, c2, *args[1:]] 68 | if m in [BottleneckCSP, C3]: 69 | args.insert(2, n) 70 | n = 1 71 | elif m is nn.BatchNorm2d: 72 | args = [ch[f]] 73 | elif m is Concat: 74 | c2 = sum([ch[-1 if x == -1 else x + 1] for x in f]) 75 | elif m is Detect: 76 | args.append([ch[x + 1] for x in f]) 77 | if isinstance(args[1], int): # number of anchors 78 | args[1] = [list(range(args[1] * 2))] * len(f) 79 | else: 80 | c2 = ch[f] 81 | 82 | m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module 83 | t = str(m)[8:-2].replace('__main__.', '') # module type 84 | np = sum([x.numel() for x in m_.parameters()]) # number params 85 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params 86 | print('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print 87 | 88 | fname = t[t.rfind('.') + 1:] 89 | if t.startswith('common.') and m is not Detect: 90 | if fname not in ('Upsample', 'Concat') or upsample_concat_use_nn_module: 91 | wtxt.write(' self.seq' + str(i) + '_' + fname + ' = ' + fname + '(' + convert_list2str(args) + ')\n') 92 | elif t.startswith('torch.nn.modules') and m is not Detect: 93 | if fname not in ('Upsample', 'Concat') or upsample_concat_use_nn_module: 94 | wtxt.write(' self.seq' + str(i) + '_' + fname + ' = nn.' + fname + '(' + convert_list2str(args) + ')\n') 95 | elif m is Detect: 96 | wtxt.write(' self.yolo_layers = ' + fname + '(nc=num_classes, anchors=anchors, ch=' + conver_listtostr(args[2]) + ', training=training)\n') 97 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist 98 | layers.append(m_) 99 | ch.append(c2) 100 | wtxt.write(' def forward(self, x):\n') 101 | wtxt.write(' x = self.seq0_Focus(x)\n') 102 | wtxt.write(' x = self.seq1_Conv(x)\n') 103 | wtxt.write(' x = self.seq2_BottleneckCSP(x)\n') 104 | wtxt.write(' x = self.seq3_Conv(x)\n') 105 | wtxt.write(' xRt0 = self.seq4_BottleneckCSP(x)\n') 106 | wtxt.write(' x = self.seq5_Conv(xRt0)\n') 107 | wtxt.write(' xRt1 = self.seq6_BottleneckCSP(x)\n') 108 | wtxt.write(' x = self.seq7_Conv(xRt1)\n') 109 | wtxt.write(' x = self.seq8_SPP(x)\n') 110 | wtxt.write(' x = self.seq9_BottleneckCSP(x)\n') 111 | wtxt.write(' xRt2 = self.seq10_Conv(x)\n') 112 | if upsample_concat_use_nn_module: 113 | wtxt.write(' route = self.seq11_Upsample(xRt2)\n') 114 | wtxt.write(' x = self.seq12_Concat([route, xRt1])\n') 115 | else: 116 | wtxt.write(' route = F.interpolate(xRt2, size=(int(xRt2.shape[2] * 2), int(xRt2.shape[3] * 2)), mode=\'nearest\')\n') 117 | wtxt.write(' x = torch.cat([route, xRt1], dim=1)\n') 118 | wtxt.write(' x = self.seq13_BottleneckCSP(x)\n') 119 | wtxt.write(' xRt3 = self.seq14_Conv(x)\n') 120 | if upsample_concat_use_nn_module: 121 | wtxt.write(' route = self.seq15_Upsample(xRt3)\n') 122 | wtxt.write(' x = self.seq16_Concat([route, xRt0])\n') 123 | else: 124 | wtxt.write(' route = F.interpolate(xRt3, size=(int(xRt3.shape[2] * 2), int(xRt3.shape[3] * 2)), mode=\'nearest\')\n') 125 | wtxt.write(' x = torch.cat([route, xRt0], dim=1)\n') 126 | wtxt.write(' out1 = self.seq17_BottleneckCSP(x)\n') 127 | wtxt.write(' route = self.seq18_Conv(out1)\n') 128 | if upsample_concat_use_nn_module: 129 | wtxt.write(' x = self.seq19_Concat([route, xRt3])\n') 130 | else: 131 | wtxt.write(' x = torch.cat([route, xRt3], dim=1)\n') 132 | wtxt.write(' out2 = self.seq20_BottleneckCSP(x)\n') 133 | wtxt.write(' route = self.seq21_Conv(out2)\n') 134 | if upsample_concat_use_nn_module: 135 | wtxt.write(' x = self.seq22_Concat([route, xRt2])\n') 136 | else: 137 | wtxt.write(' x = torch.cat([route, xRt2], dim=1)\n') 138 | wtxt.write(' out3 = self.seq23_BottleneckCSP(x)\n') 139 | wtxt.write(' output = self.yolo_layers([out1, out2, out3])\n') 140 | wtxt.write(' return output\n') 141 | wtxt.close() 142 | return nn.Sequential(*layers), sorted(save) 143 | 144 | if __name__ == '__main__': 145 | choices = ['yolov5s', 'yolov5l', 'yolov5m', 'yolov5x'] 146 | model_cfg, ch = choices[0]+'.yaml', 3 147 | with open(model_cfg) as f: 148 | md = yaml.load(f, Loader=yaml.FullLoader) 149 | model_name = os.path.splitext(os.path.basename(model_cfg))[0] 150 | model, save = parse_model2py(md, [ch], model_name, upsample_concat_use_nn_module=False) 151 | print('generate '+model_name+'.py successfully') -------------------------------------------------------------------------------- /common.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | import numpy as np 5 | import torch.nn.functional as F 6 | 7 | def autopad(k, p=None): # kernel, padding 8 | # Pad to 'same' 9 | if p is None: 10 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 11 | return p 12 | 13 | 14 | def DWConv(c1, c2, k=1, s=1, act=True): 15 | # Depthwise convolution 16 | return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act) 17 | 18 | 19 | class Conv(nn.Module): 20 | # Standard convolution 21 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 22 | super(Conv, self).__init__() 23 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) 24 | self.bn = nn.BatchNorm2d(c2) 25 | # self.act = nn.Hardswish() if act else nn.Identity() 26 | self.act = nn.LeakyReLU(0.1, inplace=True) if act else nn.Identity() 27 | 28 | def forward(self, x): 29 | return self.act(self.bn(self.conv(x))) 30 | 31 | def fuseforward(self, x): 32 | return self.act(self.conv(x)) 33 | 34 | 35 | class Bottleneck(nn.Module): 36 | # Standard bottleneck 37 | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion 38 | super(Bottleneck, self).__init__() 39 | c_ = int(c2 * e) # hidden channels 40 | self.cv1 = Conv(c1, c_, 1, 1) 41 | self.cv2 = Conv(c_, c2, 3, 1, g=g) 42 | self.add = shortcut and c1 == c2 43 | 44 | def forward(self, x): 45 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 46 | 47 | 48 | class BottleneckCSP(nn.Module): 49 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks 50 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 51 | super(BottleneckCSP, self).__init__() 52 | c_ = int(c2 * e) # hidden channels 53 | self.cv1 = Conv(c1, c_, 1, 1) 54 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) 55 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) 56 | self.cv4 = Conv(2 * c_, c2, 1, 1) 57 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) 58 | self.act = nn.LeakyReLU(0.1, inplace=True) 59 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 60 | 61 | def forward(self, x): 62 | y1 = self.cv3(self.m(self.cv1(x))) 63 | y2 = self.cv2(x) 64 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) 65 | 66 | 67 | class SPP(nn.Module): 68 | # Spatial pyramid pooling layer used in YOLOv3-SPP 69 | def __init__(self, c1, c2, k=(5, 9, 13)): 70 | super(SPP, self).__init__() 71 | c_ = c1 // 2 # hidden channels 72 | self.cv1 = Conv(c1, c_, 1, 1) 73 | self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) 74 | self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) 75 | 76 | def forward(self, x): 77 | x = self.cv1(x) 78 | return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) 79 | 80 | 81 | class Focus(nn.Module): 82 | # Focus wh information into c-space 83 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 84 | super(Focus, self).__init__() 85 | self.conv = Conv(c1 * 4, c2, k, s, p, g, act) 86 | 87 | def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) 88 | return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) 89 | 90 | 91 | class Concat(nn.Module): 92 | # Concatenate a list of tensors along dimension 93 | def __init__(self, dimension=1): 94 | super(Concat, self).__init__() 95 | self.d = dimension 96 | 97 | def forward(self, x): 98 | return torch.cat(x, self.d) 99 | 100 | class Flatten(nn.Module): 101 | # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions 102 | @staticmethod 103 | def forward(x): 104 | return x.view(x.size(0), -1) 105 | 106 | class CrossConv(nn.Module): 107 | # Cross Convolution Downsample 108 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): 109 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut 110 | super(CrossConv, self).__init__() 111 | c_ = int(c2 * e) # hidden channels 112 | self.cv1 = Conv(c1, c_, (1, k), (1, s)) 113 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) 114 | self.add = shortcut and c1 == c2 115 | 116 | def forward(self, x): 117 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 118 | 119 | class C3(nn.Module): 120 | # Cross Convolution CSP 121 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 122 | super(C3, self).__init__() 123 | c_ = int(c2 * e) # hidden channels 124 | self.cv1 = Conv(c1, c_, 1, 1) 125 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) 126 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) 127 | self.cv4 = Conv(2 * c_, c2, 1, 1) 128 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) 129 | self.act = nn.LeakyReLU(0.1, inplace=True) 130 | self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)]) 131 | 132 | def forward(self, x): 133 | y1 = self.cv3(self.m(self.cv1(x))) 134 | y2 = self.cv2(x) 135 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) 136 | 137 | class Classify(nn.Module): 138 | # Classification head, i.e. x(b,c1,20,20) to x(b,c2) 139 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups 140 | super(Classify, self).__init__() 141 | self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) 142 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) # to x(b,c2,1,1) 143 | self.flat = Flatten() 144 | 145 | def forward(self, x): 146 | z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list 147 | return self.flat(self.conv(z)) # flatten to x(b,c2) 148 | 149 | class MixConv2d(nn.Module): 150 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 151 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): 152 | super(MixConv2d, self).__init__() 153 | groups = len(k) 154 | if equal_ch: # equal c_ per group 155 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices 156 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels 157 | else: # equal weight.numel() per group 158 | b = [c2] + [0] * groups 159 | a = np.eye(groups + 1, groups, k=-1) 160 | a -= np.roll(a, 1, axis=1) 161 | a *= np.array(k) ** 2 162 | a[0] = 1 163 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 164 | 165 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) 166 | self.bn = nn.BatchNorm2d(c2) 167 | self.act = nn.LeakyReLU(0.1, inplace=True) 168 | 169 | def forward(self, x): 170 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 171 | 172 | def make_divisible(x, divisor): 173 | # Returns x evenly divisible by divisor 174 | return math.ceil(x / divisor) * divisor 175 | 176 | class Detect(nn.Module): 177 | def __init__(self, nc=80, anchors=(), ch=(), training=False): # detection layer 178 | super(Detect, self).__init__() 179 | self.stride = torch.tensor([8., 16., 32.]) 180 | self.nc = nc # number of classes 181 | self.no = nc + 5 # number of outputs per anchor 182 | self.nl = len(anchors) # number of detection layers 183 | self.na = len(anchors[0]) // 2 # number of anchors 184 | self.grid = [torch.zeros(1)] * self.nl # init grid 185 | self.training = training 186 | # a = torch.tensor(anchors).float().view(self.nl, -1, 2) 187 | # self.register_buffer('anchors', a) # shape(nl,na,2) 188 | # self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 189 | 190 | self.anchors = torch.tensor(anchors).float().view(self.nl, -1, 2) 191 | self.anchor_grid = self.anchors.view(self.nl, 1, -1, 1, 1, 2) 192 | 193 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 194 | 195 | def forward(self, x): 196 | # x = x.copy() # for profiling 197 | z = [] # inference output 198 | for i in range(self.nl): 199 | x[i] = self.m[i](x[i]) # conv 200 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 201 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 202 | 203 | if not self.training: # inference 204 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 205 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 206 | 207 | y = x[i].sigmoid() 208 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy 209 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 210 | z.append(y.view(bs, -1, self.no)) 211 | 212 | return x if self.training else (torch.cat(z, 1), x) 213 | 214 | @staticmethod 215 | def _make_grid(nx=20, ny=20): 216 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 217 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 218 | 219 | def weights_init_normal(m): 220 | classname = m.__class__.__name__ 221 | if classname.find("Conv") != -1: 222 | torch.nn.init.normal_(m.weight.data, 0.0, 0.02) 223 | elif classname.find("BatchNorm2d") != -1: 224 | torch.nn.init.normal_(m.weight.data, 1.0, 0.02) 225 | torch.nn.init.constant_(m.bias.data, 0.0) --------------------------------------------------------------------------------