├── README.md
├── yolov5l.yaml
├── yolov5m.yaml
├── yolov5s.yaml
├── yolov5x.yaml
├── convert_yaml2py.py
└── common.py


/README.md:
--------------------------------------------------------------------------------
1 | # yolov5.yaml转换成yolov5.py
2 | 运行convert_yaml2py.py，它会读取.yaml文件，然后生成.py文件
3 | 


--------------------------------------------------------------------------------
/yolov5l.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/yolov5m.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 0.67  # model depth multiple
 4 | width_multiple: 0.75  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/yolov5s.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 0.33  # model depth multiple
 4 | width_multiple: 0.50  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/yolov5x.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.33  # model depth multiple
 4 | width_multiple: 1.25  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/convert_yaml2py.py:
--------------------------------------------------------------------------------
  1 | from common import *
  2 | import yaml
  3 | import os
  4 | 
  5 | def convert_list2str(s):
  6 |     out = []
  7 |     for x in s:
  8 |         if not isinstance(x, str):
  9 |             out.append(str(x))
 10 |         else:
 11 |             out.append('\'' + x + '\'')
 12 |     return ', '.join(out)
 13 | 
 14 | def conver_listtostr(l):
 15 |     out = []
 16 |     for i, data in enumerate(l):
 17 |         if isinstance(data, list):
 18 |             out.append('['+', '.join(map(str, data))+']')
 19 |         else:
 20 |             out.append(str(data))
 21 |     return '[' + ', '.join(out) + ']'
 22 | 
 23 | def parse_model2py(d, ch, model_name, upsample_concat_use_nn_module=False):  # model_dict, input_channels(3)
 24 |     print('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
 25 |     anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
 26 |     na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
 27 |     no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
 28 | 
 29 |     layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
 30 |     wtxt = open(model_name + '.py', 'w')
 31 |     wtxt.write('from common import *\n' + '\n')
 32 |     wtxt.write('class My_YOLO(nn.Module):\n')
 33 |     init_str = '    def __init__(self, num_classes='+str(nc)+', anchors='+conver_listtostr(anchors)+', training=False):\n'
 34 |     # wtxt.write('    def __init__(self):\n')
 35 |     wtxt.write(init_str)
 36 |     wtxt.write('        super().__init__()\n')
 37 |     for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
 38 |         m = eval(m) if isinstance(m, str) else m  # eval strings
 39 |         for j, a in enumerate(args):
 40 |             try:
 41 |                 args[j] = eval(a) if isinstance(a, str) else a  # eval strings
 42 |             except:
 43 |                 pass
 44 | 
 45 |         n = max(round(n * gd), 1) if n > 1 else n  # depth gain
 46 |         if m in [Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
 47 |             c1, c2 = ch[f], args[0]
 48 | 
 49 |             # Normal
 50 |             # if i > 0 and args[0] != no:  # channel expansion factor
 51 |             #     ex = 1.75  # exponential (default 2.0)
 52 |             #     e = math.log(c2 / ch[1]) / math.log(2)
 53 |             #     c2 = int(ch[1] * ex ** e)
 54 |             # if m != Focus:
 55 | 
 56 |             c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
 57 | 
 58 |             # Experimental
 59 |             # if i > 0 and args[0] != no:  # channel expansion factor
 60 |             #     ex = 1 + gw  # exponential (default 2.0)
 61 |             #     ch1 = 32  # ch[1]
 62 |             #     e = math.log(c2 / ch1) / math.log(2)  # level 1-n
 63 |             #     c2 = int(ch1 * ex ** e)
 64 |             # if m != Focus:
 65 |             #     c2 = make_divisible(c2, 8) if c2 != no else c2
 66 | 
 67 |             args = [c1, c2, *args[1:]]
 68 |             if m in [BottleneckCSP, C3]:
 69 |                 args.insert(2, n)
 70 |                 n = 1
 71 |         elif m is nn.BatchNorm2d:
 72 |             args = [ch[f]]
 73 |         elif m is Concat:
 74 |             c2 = sum([ch[-1 if x == -1 else x + 1] for x in f])
 75 |         elif m is Detect:
 76 |             args.append([ch[x + 1] for x in f])
 77 |             if isinstance(args[1], int):  # number of anchors
 78 |                 args[1] = [list(range(args[1] * 2))] * len(f)
 79 |         else:
 80 |             c2 = ch[f]
 81 | 
 82 |         m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
 83 |         t = str(m)[8:-2].replace('__main__.', '')  # module type
 84 |         np = sum([x.numel() for x in m_.parameters()])  # number params
 85 |         m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
 86 |         print('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
 87 | 
 88 |         fname = t[t.rfind('.') + 1:]
 89 |         if t.startswith('common.') and m is not Detect:
 90 |             if fname not in ('Upsample', 'Concat') or upsample_concat_use_nn_module:
 91 |                 wtxt.write('        self.seq' + str(i) + '_' + fname + ' = ' + fname + '(' + convert_list2str(args) + ')\n')
 92 |         elif t.startswith('torch.nn.modules') and m is not Detect:
 93 |             if fname not in ('Upsample', 'Concat') or upsample_concat_use_nn_module:
 94 |                 wtxt.write('        self.seq' + str(i) + '_' + fname + ' = nn.' + fname + '(' + convert_list2str(args) + ')\n')
 95 |         elif m is Detect:
 96 |             wtxt.write('        self.yolo_layers = ' + fname + '(nc=num_classes, anchors=anchors, ch=' + conver_listtostr(args[2]) + ', training=training)\n')
 97 |         save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
 98 |         layers.append(m_)
 99 |         ch.append(c2)
100 |     wtxt.write('    def forward(self, x):\n')
101 |     wtxt.write('        x = self.seq0_Focus(x)\n')
102 |     wtxt.write('        x = self.seq1_Conv(x)\n')
103 |     wtxt.write('        x = self.seq2_BottleneckCSP(x)\n')
104 |     wtxt.write('        x = self.seq3_Conv(x)\n')
105 |     wtxt.write('        xRt0 = self.seq4_BottleneckCSP(x)\n')
106 |     wtxt.write('        x = self.seq5_Conv(xRt0)\n')
107 |     wtxt.write('        xRt1 = self.seq6_BottleneckCSP(x)\n')
108 |     wtxt.write('        x = self.seq7_Conv(xRt1)\n')
109 |     wtxt.write('        x = self.seq8_SPP(x)\n')
110 |     wtxt.write('        x = self.seq9_BottleneckCSP(x)\n')
111 |     wtxt.write('        xRt2 = self.seq10_Conv(x)\n')
112 |     if upsample_concat_use_nn_module:
113 |         wtxt.write('        route = self.seq11_Upsample(xRt2)\n')
114 |         wtxt.write('        x = self.seq12_Concat([route, xRt1])\n')
115 |     else:
116 |         wtxt.write('        route = F.interpolate(xRt2, size=(int(xRt2.shape[2] * 2), int(xRt2.shape[3] * 2)), mode=\'nearest\')\n')
117 |         wtxt.write('        x = torch.cat([route, xRt1], dim=1)\n')
118 |     wtxt.write('        x = self.seq13_BottleneckCSP(x)\n')
119 |     wtxt.write('        xRt3 = self.seq14_Conv(x)\n')
120 |     if upsample_concat_use_nn_module:
121 |         wtxt.write('        route = self.seq15_Upsample(xRt3)\n')
122 |         wtxt.write('        x = self.seq16_Concat([route, xRt0])\n')
123 |     else:
124 |         wtxt.write('        route = F.interpolate(xRt3, size=(int(xRt3.shape[2] * 2), int(xRt3.shape[3] * 2)), mode=\'nearest\')\n')
125 |         wtxt.write('        x = torch.cat([route, xRt0], dim=1)\n')
126 |     wtxt.write('        out1 = self.seq17_BottleneckCSP(x)\n')
127 |     wtxt.write('        route = self.seq18_Conv(out1)\n')
128 |     if upsample_concat_use_nn_module:
129 |         wtxt.write('        x = self.seq19_Concat([route, xRt3])\n')
130 |     else:
131 |         wtxt.write('        x = torch.cat([route, xRt3], dim=1)\n')
132 |     wtxt.write('        out2 = self.seq20_BottleneckCSP(x)\n')
133 |     wtxt.write('        route = self.seq21_Conv(out2)\n')
134 |     if upsample_concat_use_nn_module:
135 |         wtxt.write('        x = self.seq22_Concat([route, xRt2])\n')
136 |     else:
137 |         wtxt.write('        x = torch.cat([route, xRt2], dim=1)\n')
138 |     wtxt.write('        out3 = self.seq23_BottleneckCSP(x)\n')
139 |     wtxt.write('        output = self.yolo_layers([out1, out2, out3])\n')
140 |     wtxt.write('        return output\n')
141 |     wtxt.close()
142 |     return nn.Sequential(*layers), sorted(save)
143 | 
144 | if __name__ == '__main__':
145 |     choices = ['yolov5s', 'yolov5l', 'yolov5m', 'yolov5x']
146 |     model_cfg, ch = choices[0]+'.yaml', 3
147 |     with open(model_cfg) as f:
148 |         md = yaml.load(f, Loader=yaml.FullLoader)
149 |     model_name = os.path.splitext(os.path.basename(model_cfg))[0]
150 |     model, save = parse_model2py(md, [ch], model_name, upsample_concat_use_nn_module=False)
151 |     print('generate '+model_name+'.py successfully')


--------------------------------------------------------------------------------
/common.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import math
  4 | import numpy as np
  5 | import torch.nn.functional as F
  6 | 
  7 | def autopad(k, p=None):  # kernel, padding
  8 |     # Pad to 'same'
  9 |     if p is None:
 10 |         p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
 11 |     return p
 12 | 
 13 | 
 14 | def DWConv(c1, c2, k=1, s=1, act=True):
 15 |     # Depthwise convolution
 16 |     return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
 17 | 
 18 | 
 19 | class Conv(nn.Module):
 20 |     # Standard convolution
 21 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
 22 |         super(Conv, self).__init__()
 23 |         self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
 24 |         self.bn = nn.BatchNorm2d(c2)
 25 |         # self.act = nn.Hardswish() if act else nn.Identity()
 26 |         self.act = nn.LeakyReLU(0.1, inplace=True) if act else nn.Identity()
 27 | 
 28 |     def forward(self, x):
 29 |         return self.act(self.bn(self.conv(x)))
 30 | 
 31 |     def fuseforward(self, x):
 32 |         return self.act(self.conv(x))
 33 | 
 34 | 
 35 | class Bottleneck(nn.Module):
 36 |     # Standard bottleneck
 37 |     def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
 38 |         super(Bottleneck, self).__init__()
 39 |         c_ = int(c2 * e)  # hidden channels
 40 |         self.cv1 = Conv(c1, c_, 1, 1)
 41 |         self.cv2 = Conv(c_, c2, 3, 1, g=g)
 42 |         self.add = shortcut and c1 == c2
 43 | 
 44 |     def forward(self, x):
 45 |         return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
 46 | 
 47 | 
 48 | class BottleneckCSP(nn.Module):
 49 |     # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
 50 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
 51 |         super(BottleneckCSP, self).__init__()
 52 |         c_ = int(c2 * e)  # hidden channels
 53 |         self.cv1 = Conv(c1, c_, 1, 1)
 54 |         self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
 55 |         self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
 56 |         self.cv4 = Conv(2 * c_, c2, 1, 1)
 57 |         self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
 58 |         self.act = nn.LeakyReLU(0.1, inplace=True)
 59 |         self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
 60 | 
 61 |     def forward(self, x):
 62 |         y1 = self.cv3(self.m(self.cv1(x)))
 63 |         y2 = self.cv2(x)
 64 |         return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
 65 | 
 66 | 
 67 | class SPP(nn.Module):
 68 |     # Spatial pyramid pooling layer used in YOLOv3-SPP
 69 |     def __init__(self, c1, c2, k=(5, 9, 13)):
 70 |         super(SPP, self).__init__()
 71 |         c_ = c1 // 2  # hidden channels
 72 |         self.cv1 = Conv(c1, c_, 1, 1)
 73 |         self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
 74 |         self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
 75 | 
 76 |     def forward(self, x):
 77 |         x = self.cv1(x)
 78 |         return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
 79 | 
 80 | 
 81 | class Focus(nn.Module):
 82 |     # Focus wh information into c-space
 83 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
 84 |         super(Focus, self).__init__()
 85 |         self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
 86 | 
 87 |     def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
 88 |         return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
 89 | 
 90 | 
 91 | class Concat(nn.Module):
 92 |     # Concatenate a list of tensors along dimension
 93 |     def __init__(self, dimension=1):
 94 |         super(Concat, self).__init__()
 95 |         self.d = dimension
 96 | 
 97 |     def forward(self, x):
 98 |         return torch.cat(x, self.d)
 99 | 
100 | class Flatten(nn.Module):
101 |     # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions
102 |     @staticmethod
103 |     def forward(x):
104 |         return x.view(x.size(0), -1)
105 | 
106 | class CrossConv(nn.Module):
107 |     # Cross Convolution Downsample
108 |     def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
109 |         # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
110 |         super(CrossConv, self).__init__()
111 |         c_ = int(c2 * e)  # hidden channels
112 |         self.cv1 = Conv(c1, c_, (1, k), (1, s))
113 |         self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
114 |         self.add = shortcut and c1 == c2
115 | 
116 |     def forward(self, x):
117 |         return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
118 | 
119 | class C3(nn.Module):
120 |     # Cross Convolution CSP
121 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
122 |         super(C3, self).__init__()
123 |         c_ = int(c2 * e)  # hidden channels
124 |         self.cv1 = Conv(c1, c_, 1, 1)
125 |         self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
126 |         self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
127 |         self.cv4 = Conv(2 * c_, c2, 1, 1)
128 |         self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
129 |         self.act = nn.LeakyReLU(0.1, inplace=True)
130 |         self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
131 | 
132 |     def forward(self, x):
133 |         y1 = self.cv3(self.m(self.cv1(x)))
134 |         y2 = self.cv2(x)
135 |         return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
136 | 
137 | class Classify(nn.Module):
138 |     # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
139 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
140 |         super(Classify, self).__init__()
141 |         self.aap = nn.AdaptiveAvgPool2d(1)  # to x(b,c1,1,1)
142 |         self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)  # to x(b,c2,1,1)
143 |         self.flat = Flatten()
144 | 
145 |     def forward(self, x):
146 |         z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1)  # cat if list
147 |         return self.flat(self.conv(z))  # flatten to x(b,c2)
148 | 
149 | class MixConv2d(nn.Module):
150 |     # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
151 |     def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
152 |         super(MixConv2d, self).__init__()
153 |         groups = len(k)
154 |         if equal_ch:  # equal c_ per group
155 |             i = torch.linspace(0, groups - 1E-6, c2).floor()  # c2 indices
156 |             c_ = [(i == g).sum() for g in range(groups)]  # intermediate channels
157 |         else:  # equal weight.numel() per group
158 |             b = [c2] + [0] * groups
159 |             a = np.eye(groups + 1, groups, k=-1)
160 |             a -= np.roll(a, 1, axis=1)
161 |             a *= np.array(k) ** 2
162 |             a[0] = 1
163 |             c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b
164 | 
165 |         self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
166 |         self.bn = nn.BatchNorm2d(c2)
167 |         self.act = nn.LeakyReLU(0.1, inplace=True)
168 | 
169 |     def forward(self, x):
170 |         return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
171 | 
172 | def make_divisible(x, divisor):
173 |     # Returns x evenly divisible by divisor
174 |     return math.ceil(x / divisor) * divisor
175 | 
176 | class Detect(nn.Module):
177 |     def __init__(self, nc=80, anchors=(), ch=(), training=False):  # detection layer
178 |         super(Detect, self).__init__()
179 |         self.stride = torch.tensor([8., 16., 32.])
180 |         self.nc = nc  # number of classes
181 |         self.no = nc + 5  # number of outputs per anchor
182 |         self.nl = len(anchors)  # number of detection layers
183 |         self.na = len(anchors[0]) // 2  # number of anchors
184 |         self.grid = [torch.zeros(1)] * self.nl  # init grid
185 |         self.training = training
186 |         # a = torch.tensor(anchors).float().view(self.nl, -1, 2)
187 |         # self.register_buffer('anchors', a)  # shape(nl,na,2)
188 |         # self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
189 | 
190 |         self.anchors = torch.tensor(anchors).float().view(self.nl, -1, 2)
191 |         self.anchor_grid = self.anchors.view(self.nl, 1, -1, 1, 1, 2)
192 | 
193 |         self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
194 | 
195 |     def forward(self, x):
196 |         # x = x.copy()  # for profiling
197 |         z = []  # inference output
198 |         for i in range(self.nl):
199 |             x[i] = self.m[i](x[i])  # conv
200 |             bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
201 |             x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
202 | 
203 |             if not self.training:  # inference
204 |                 if self.grid[i].shape[2:4] != x[i].shape[2:4]:
205 |                     self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
206 | 
207 |                 y = x[i].sigmoid()
208 |                 y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i]  # xy
209 |                 y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
210 |                 z.append(y.view(bs, -1, self.no))
211 | 
212 |         return x if self.training else (torch.cat(z, 1), x)
213 | 
214 |     @staticmethod
215 |     def _make_grid(nx=20, ny=20):
216 |         yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
217 |         return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
218 | 
219 | def weights_init_normal(m):
220 |     classname = m.__class__.__name__
221 |     if classname.find("Conv") != -1:
222 |         torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
223 |     elif classname.find("BatchNorm2d") != -1:
224 |         torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
225 |         torch.nn.init.constant_(m.bias.data, 0.0)


--------------------------------------------------------------------------------