├── dog.jpg
├── person.jpg
├── ink.color
├── compare_output.py
├── coco.names
├── README.md
├── save_sized_image.py
├── detect.py
├── convert_weights_to_caffemodel.py
├── detect_tool.py
└── yolo.prototxt


/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hustzxd/z1/HEAD/dog.jpg


--------------------------------------------------------------------------------
/person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hustzxd/z1/HEAD/person.jpg


--------------------------------------------------------------------------------
/ink.color:
--------------------------------------------------------------------------------
1 | 255, 242, 35
2 | 255, 75, 58
3 | 78, 231, 41
4 | 228, 47, 255


--------------------------------------------------------------------------------
/compare_output.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sys
 3 | 
 4 | import struct
 5 | 
 6 | if __name__ == '__main__':
 7 |     if len(sys.argv) != 3:
 8 |         print "Usage: python %s <output_in_caffe> <output_in_darknet>" % sys.argv[0]
 9 |         exit(1)
10 |     caffe_filename = sys.argv[1]
11 |     darknet_filename = sys.argv[2]
12 |     output_caffe = np.load(caffe_filename)
13 |     print 'ouput_caffe shape: ' + str(output_caffe.shape)
14 |     output_caffe = output_caffe.reshape(-1, )
15 |     fileData = open(darknet_filename, 'rb')
16 |     print 'comparing...'
17 |     sum = 0
18 |     error = 0
19 |     for one_in_caffe in output_caffe:
20 |         sum += 1
21 |         one_in_darknet = struct.unpack('f', fileData.read(4))
22 |         if abs(one_in_caffe - one_in_darknet) / max(one_in_caffe, one_in_caffe) > 0.08:
23 |             error += 1
24 |     rate = 1 - float(error) / sum
25 |     print '%d / %d = %f' % (sum - error, sum, rate)
26 | 


--------------------------------------------------------------------------------
/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ###移植yolo2到caffe框架过程中用到的一些关键文件
 2 | 　
 3 |  0. [yolo-in-caffe源码](https://github.com/hustzxd/yolo2_in_caffe)</br>
 4 |  1. [yolo.prototxt](https://github.com/hustzxd/yolo2_to_caffe_tool/blob/master/yolo.prototxt)  
 5 | yolo2的网络配置文件，对应着darknet中的[yolo.cfg](https://github.com/pjreddie/darknet/blob/master/cfg/yolo.cfg)编写
 6 |  2. [conver_weights_to_caffemodel.py](https://github.com/hustzxd/yolo2_to_caffe_tool/blob/master/convert_weights_to_caffemodel.py)  
 7 | 将yolo2的weights文件转化为caffemodel文件  
 8 | 可能需要更改prototxt和weights的路径
 9 |  3. [compare_output.py](https://github.com/hustzxd/yolo2_to_caffe_tool/blob/master/compare_output.py)  
10 | 比较darknet和caffe各层输出的脚本文件，使用这个脚本比较是一种可靠的方式，但是也可以把数据图形化显示，这样更容易比较，而且不用在意数值上的严格相等</br>
11 | `python compare_output.py layer_output_in_caffe layer_output_in_darknet`
12 |  4. [save_sized_image.py](https://github.com/hustzxd/yolo2_to_caffe_tool/blob/master/save_sized_image.py)</br>
13 |  在caffe框架中保存resize后的图片到sized_image.npy 仍然有一个问题，caffe保存的sized_image.npg 和 darknet中保存的sized_image 两个进行数据比对时并不完全一致，我使用compare_output.py 比对两个数据时，发现在一定误差范围内，数据的相似性（暂且叫这个）大概在90%多，这样应该不会影响到后边图像的特征提取</br>
14 |  5. 效果展示</br></br>
15 |  ![image](https://github.com/hustzxd/yolo2_to_caffe_tool/blob/master/person.jpg)
16 | 


--------------------------------------------------------------------------------
/save_sized_image.py:
--------------------------------------------------------------------------------
 1 | # set up Python environment: numpy for numerical routines, and matplotlib for plotting
 2 | import caffe
 3 | import numpy as np
 4 | caffe.set_mode_cpu()
 5 | # If you get "No module named _caffe", either you have not built pycaffe or you have the wrong path.
 6 | image = caffe.io.load_image('/home/zxd/projects/darknet-master/data/dog.jpg')
 7 | # print image.shape
 8 | # print image.dtype
 9 | transformer = caffe.io.Transformer({'data': (1, 3, 416, 416)})
10 | 
11 | transformer.set_transpose('data', (2, 0, 1))  # move image channels to outermost dimension
12 | # transformer.set_mean('data', )            # subtract the dataset-mean value in each channel
13 | # transformer.set_raw_scale('data', 255)      # rescale from [0, 1] to [0, 255]
14 | # transformer.set_channel_swap('data', (2, 1, 0))  # swap channels from RGB to BGR
15 | transformed_image = transformer.preprocess('data', image)
16 | # np.save('/home/zxd/data/layer_output_caffe/image', image)
17 | # array_from_file = np.load('/home/zxd/data/layer_output_caffe/image.npy');
18 | # print array_from_file.shape
19 | # create transformer for the input called 'data'
20 | np.save('/home/zxd/data/layer_output_caffe/sized_image', transformed_image)
21 | print transformed_image.shape
22 | 


--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
 1 | import caffe
 2 | import numpy as np
 3 | 
 4 | import detect_tool as tool
 5 | 
 6 | pic_name = '/home/zxd/projects/darknet-master/data/person.jpg'
 7 | # caffe.set_device(0)
 8 | # caffe.set_mode_gpu()
 9 | caffe.set_mode_cpu()
10 | image = caffe.io.load_image(pic_name)
11 | transformer = caffe.io.Transformer({'data': (1, 3, 416, 416)})
12 | transformer.set_transpose('data', (2, 0, 1))  # move image channels to outermost dimension
13 | transformed_image = transformer.preprocess('data', image)
14 | print transformed_image.shape
15 | 
16 | model_def = '/home/zxd/projects/test/prototxt_test/yolo.prototxt'
17 | model_weights = '/home/zxd/projects/PycharmProjects/test_caffe/yolo.caffemodel'
18 | 
19 | net = caffe.Net(model_def, model_weights, caffe.TEST)
20 | net.blobs['data'].reshape(1, 3, 416, 416)
21 | net.blobs['data'].data[...] = transformed_image
22 | output = net.forward()
23 | feat = net.blobs['region1'].data[0]
24 | print feat.shape
25 | 
26 | boxes_of_each_grid = 5
27 | classes = 80
28 | thread = 0.45
29 | biases = np.array([0.738768, 0.874946, 2.42204, 2.65704, 4.30971, 7.04493, 10.246, 4.59428, 12.6868, 11.8741])
30 | boxes = tool.get_region_boxes(feat, boxes_of_each_grid, classes, thread, biases)
31 | 
32 | for box in boxes:
33 |     print box
34 | 
35 | tool.draw_image(pic_name, boxes=boxes, namelist_file='coco.names')
36 | 


--------------------------------------------------------------------------------
/convert_weights_to_caffemodel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import caffe
 3 | import numpy as np
 4 | 
 5 | caffe.set_device(0)  # if we have multiple GPUs, pick the first one
 6 | caffe.set_mode_gpu()
 7 | model_filename = 'yolo.prototxt'
 8 | yoloweight_filename = 'yolo.weights'
 9 | caffemodel_filename = 'yolo.caffemodel'
10 | print 'model file is ', model_filename
11 | print 'weight file is ', yoloweight_filename
12 | print 'output caffemodel file is ', caffemodel_filename
13 | net = caffe.Net(model_filename, caffe.TEST)
14 | net.forward()
15 | # for each layer, show the output shape
16 | for layer_name, blob in net.blobs.iteritems():
17 |     print layer_name + '\t' + str(blob.data.shape)
18 | count = 0
19 | for layer_name, param in net.params.iteritems():
20 |     print layer_name + '\t',
21 |     for i in range(len(param)):
22 |         count += np.prod(param[i].data.shape)
23 |         print str(param[i].data.shape) + '\t',
24 |     print
25 | print 'count=', str(count)
26 | params = net.params.keys()
27 | # read weights from file and assign to the network
28 | netWeightsInt = np.fromfile(yoloweight_filename, dtype=np.int32)
29 | transFlag = (netWeightsInt[0] > 1000 or netWeightsInt[1] > 1000)
30 | # transpose flag, the first 4 entries are major, minor, revision and net.seen
31 | print 'transFlag = %r' % transFlag
32 | netWeightsFloat = np.fromfile(yoloweight_filename, dtype=np.float32)
33 | netWeights = netWeightsFloat[4:]
34 | # start from the 5th entry, the first 4 entries are major, minor, revision and net.seen
35 | print netWeights.shape
36 | count = 0
37 | for pr in params:
38 |     lidx = list(net._layer_names).index(pr)
39 |     layer = net.layers[lidx]
40 |     # conv_bias = None
41 |     if count == netWeights.shape[0]:
42 |         print "WARNING: no weights left for %s" % pr
43 |         break
44 |     if layer.type == 'Convolution':
45 |         print pr + "(conv)"
46 |         # bias
47 |         if len(net.params[pr]) > 1:
48 |             bias_dim = net.params[pr][1].data.shape
49 |         else:
50 |             bias_dim = (net.params[pr][0].data.shape[0],)
51 |         biasSize = np.prod(bias_dim)
52 |         conv_bias = np.reshape(netWeights[count:count + biasSize], bias_dim)
53 |         if len(net.params[pr]) > 1:
54 |             assert (bias_dim == net.params[pr][1].data.shape)
55 |             net.params[pr][1].data[...] = conv_bias
56 |             conv_bias = None
57 |         count += biasSize
58 |         # batch_norm
59 |         next_layer = net.layers[lidx + 1]
60 |         if next_layer.type == 'BatchNorm':
61 |             bn_dims = (3, net.params[pr][0].data.shape[0])
62 |             bnSize = np.prod(bn_dims)
63 |             batch_norm = np.reshape(netWeights[count:count + bnSize], bn_dims)
64 |             count += bnSize
65 |         # weights
66 |         dims = net.params[pr][0].data.shape
67 |         weightSize = np.prod(dims)
68 |         net.params[pr][0].data[...] = np.reshape(netWeights[count:count + weightSize], dims)
69 |         count += weightSize
70 |     elif layer.type == 'BatchNorm':
71 |         print pr + "(batchnorm)"
72 |         net.params[pr][0].data[...] = batch_norm[1]  # mean
73 |         net.params[pr][1].data[...] = batch_norm[2]  # variance
74 |         net.params[pr][2].data[...] = 1.0  # scale factor
75 |     elif layer.type == 'Scale':
76 |         print pr + "(scale)"
77 |         net.params[pr][0].data[...] = batch_norm[0]  # scale
78 |         batch_norm = None
79 |         if len(net.params[pr]) > 1:
80 |             net.params[pr][1].data[...] = conv_bias  # bias
81 |             conv_bias = None
82 |     else:
83 |         print "WARNING: unsupported layer, " + pr
84 | if np.prod(netWeights.shape) != count:
85 |     print "ERROR: size mismatch: %d" % count
86 | else:
87 |     print "you are right."
88 |     net.save(caffemodel_filename)
89 | 


--------------------------------------------------------------------------------
/detect_tool.py:
--------------------------------------------------------------------------------
  1 | import Image
  2 | import ImageDraw
  3 | import ImageFont
  4 | import math
  5 | 
  6 | import matplotlib.image as mpimg
  7 | 
  8 | 
  9 | class Point(object):
 10 |     def __init__(self, x=0., y=0.):
 11 |         self.x = x
 12 |         self.y = y
 13 | 
 14 |     def __str__(self):
 15 |         return "({},{})".format(self.x, self.y)
 16 | 
 17 | 
 18 | class Rectangle(object):
 19 |     def __init__(self, posn, w, h):
 20 |         self.corner = posn
 21 |         self.width = w
 22 |         self.height = h
 23 | 
 24 |     def __str__(self):
 25 |         return "({0},{1},{2})".format(self.corner, self.width, self.height)
 26 | 
 27 |     def iou(self, rect):
 28 |         return self.intersection(rect) / self.union(rect)
 29 | 
 30 |     def intersection(self, rect):
 31 |         w = overlap(self.corner.x, self.width, rect.corner.x, rect.width)
 32 |         h = overlap(self.corner.y, self.height, rect.corner.y, rect.height)
 33 |         if w < 0 or h < 0:
 34 |             return 0
 35 |         area = w * h
 36 |         return area
 37 | 
 38 |     def union(self, rect):
 39 |         i = self.intersection(rect)
 40 |         u = self.width * self.height + rect.width * rect.height - i
 41 |         return u
 42 | 
 43 | 
 44 | class Box(object):
 45 |     def __init__(self, rect, prob=0.0, category=-1):
 46 |         self.rect = rect
 47 |         self.prob = prob
 48 |         self.category = category
 49 | 
 50 |     def __str__(self):
 51 |         return "({0},{1},{2})".format(self.rect, self.prob, self.category)
 52 | 
 53 |     def iou(self, box2):
 54 |         return self.rect.iou(box2.rect)
 55 | 
 56 | 
 57 | def overlap(x1, w1, x2, w2):
 58 |     l1 = x1 - w1 / 2
 59 |     l2 = x2 - w2 / 2
 60 |     left = l1 if l1 > l2 else l2
 61 |     r1 = x1 + w1 / 2
 62 |     r2 = x2 + w2 / 2
 63 |     right = r1 if r1 < r2 else r2
 64 |     return right - left
 65 | 
 66 | 
 67 | def logistic_activate(x):
 68 |     return 1. / (1. + math.exp(-x))
 69 | 
 70 | 
 71 | def get_region_box(x, biases, n, index, i, j, w, h):
 72 |     rect = Rectangle(Point((i + logistic_activate(x[index + 0])) / w,
 73 |                            (j + logistic_activate(x[index + 1])) / h),
 74 |                      math.exp(x[index + 2]) * biases[2 * n] / w,
 75 |                      math.exp(x[index + 3]) * biases[2 * n + 1] / h)
 76 |     box = Box(rect)
 77 |     return box
 78 | 
 79 | 
 80 | def get_region_boxes(feat, boxes_of_each_grid, classes, thresh, biases, nms=0.4):
 81 |     boxes = []
 82 |     channel, height, width = feat.shape
 83 |     predictions = feat.reshape(-1)
 84 |     for i in xrange(height * width):
 85 |         row = i / width
 86 |         col = i % width
 87 |         for n in xrange(boxes_of_each_grid):
 88 |             index = i * boxes_of_each_grid + n
 89 |             p_index = index * (classes + 5) + 4
 90 |             scale = predictions[p_index]
 91 |             box_index = index * (classes + 5)
 92 |             box_tmp = get_region_box(predictions, biases, n, box_index, col, row, width, height)
 93 |             class_index = index * (classes + 5) + 5
 94 |             for j in xrange(classes):
 95 |                 prob = scale * predictions[class_index + j]
 96 |                 if prob > thresh:
 97 |                     box_tmp.category = j
 98 |                     box_tmp.prob = prob
 99 |                     boxes.append(box_tmp)
100 |     result = []
101 |     for i in xrange(boxes.__len__()):
102 |         for j in xrange(i + 1, boxes.__len__()):
103 |             if boxes[i].iou(boxes[j]) > nms:
104 |                 if boxes[i].prob > boxes[j].prob:
105 |                     boxes[j].prob = 0
106 |                 else:
107 |                     boxes[i].prob = 0
108 |     for box in boxes:
109 |         if box.prob > 0:
110 |             result.append(box)
111 |     del boxes
112 |     return result
113 | 
114 | 
115 | def get_names_from_file(filename):
116 |     result = []
117 |     fd = file(filename, 'r')
118 |     for line in fd.readlines():
119 |         result.append(line.replace('\n', ''))
120 |     return result
121 | 
122 | 
123 | def get_color_from_file(filename):
124 |     colors = []
125 |     fd = file(filename, 'r')
126 |     for line in fd.readlines():
127 |         words = line.split(r',')
128 |         color = (int(words[0]), int(words[1]), int(words[2]))
129 |         colors.append(color)
130 |     return colors
131 | 
132 | 
133 | def draw_image(pic_name, boxes, namelist_file):
134 |     name_list = get_names_from_file(namelist_file)
135 |     color_list = get_color_from_file('ink.color')
136 |     im = Image.open(pic_name)
137 |     draw = ImageDraw.Draw(im)
138 |     lena = mpimg.imread(pic_name)
139 |     height, width = lena.shape[:2]
140 |     for box in boxes:
141 |         x = box.rect.corner.x
142 |         y = box.rect.corner.y
143 |         w = box.rect.width
144 |         h = box.rect.height
145 |         left = (x - w / 2) * width
146 |         right = (x + w / 2) * width
147 |         top = (y - h / 2) * height
148 |         bot = (y + h / 2) * height
149 |         if left < 0:
150 |             left = 0
151 |         if right > width - 1:
152 |             right = width - 1
153 |         if top < 0:
154 |             top = 0
155 |         if bot > height - 1:
156 |             bot = height - 1
157 |         category = name_list[box.category]
158 |         color = color_list[box.category % color_list.__len__()]
159 |         draw.line((left, top, right, top), fill=color, width=5)
160 |         draw.line((right, top, right, bot), fill=color, width=5)
161 |         draw.line((left, top, left, bot), fill=color, width=5)
162 |         draw.line((left, bot, right, bot), fill=color, width=5)
163 |         font_size = 20
164 |         my_font = ImageFont.truetype("/usr/share/fonts/truetype/ubuntu-font-family/Ubuntu-M.ttf", size=font_size)
165 |         draw.text([left + 5, top], category, font=my_font, fill=color)
166 |     im.show()
167 | 
168 | 
169 | def show_image(pic_name):
170 |     im = Image.open(pic_name)
171 |     # rect = [(0, 0), (300, 300)]
172 |     # draw.rectangle([(0, 0), (200, 200)], outline='yellow', width=20)
173 |     # draw.line((100, 2, 200, 500), fill='yellow', width=5)
174 |     # im.save(pic_name, "PNG")
175 |     im.show()
176 |     # lena = mpimg.imread(pic_name)
177 |     # print lena.shape
178 |     # plt.imshow(lena)
179 |     # plt.axis('off')
180 |     # plt.show()
181 | 


--------------------------------------------------------------------------------
/yolo.prototxt:
--------------------------------------------------------------------------------
   1 | name: "YOLONET"
   2 | layer {
   3 |   name: "data"
   4 |   type: "Input"
   5 |   top: "data"
   6 |   input_param { shape: { dim: 1 dim: 3 dim: 416 dim: 416 } }
   7 | }
   8 | layer {
   9 |   name: "conv1"
  10 |   type: "Convolution"
  11 |   bottom: "data"
  12 |   top: "conv1"
  13 |   convolution_param {
  14 |     num_output: 32
  15 |     kernel_size: 3
  16 |     pad: 1
  17 |     stride: 1
  18 |     bias_term: false
  19 |   }
  20 | }
  21 | layer {
  22 |   name: "bn1"
  23 |   type: "BatchNorm"
  24 |   bottom: "conv1"
  25 |   top: "bn1"
  26 | }
  27 | layer {
  28 |   name: "scale1"
  29 |   type: "Scale"
  30 |   bottom: "bn1"
  31 |   top: "scale1"
  32 |   scale_param {
  33 |     bias_term: true
  34 |   }
  35 | }
  36 | layer {
  37 |   name: "relu1"
  38 |   type: "ReLU"
  39 |   bottom: "scale1"
  40 |   top: "scale1"
  41 |   relu_param{
  42 |     negative_slope: 0.1
  43 |   }		
  44 | }
  45 | layer {
  46 |   name: "pool1"
  47 |   type: "Pooling"
  48 |   bottom: "scale1"
  49 |   top: "pool1"
  50 |   pooling_param {
  51 |     pool: MAX
  52 |     kernel_size: 2
  53 |     stride: 2
  54 |   }
  55 | }
  56 | layer{
  57 |   name: "conv2"
  58 |   type: "Convolution"
  59 |   bottom: "pool1"
  60 |   top: "conv2"
  61 |   convolution_param {
  62 |     num_output: 64
  63 |     kernel_size: 3
  64 |     pad: 1
  65 |     stride: 1
  66 |     bias_term: false
  67 |   }
  68 | }
  69 | layer {
  70 |   name: "bn2"
  71 |   type: "BatchNorm"
  72 |   bottom: "conv2"
  73 |   top: "bn2"
  74 |   param {
  75 |     lr_mult: 0
  76 |   }
  77 |   param {
  78 |     lr_mult: 0
  79 |   }
  80 |   param {
  81 |     lr_mult: 0
  82 |   }
  83 | }
  84 | layer {
  85 |   name: "scale2"
  86 |   type: "Scale"
  87 |   bottom: "bn2"
  88 |   top: "scale2"
  89 |   scale_param {
  90 |     bias_term: true
  91 |   }
  92 | }
  93 | layer {
  94 |   name: "relu2"
  95 |   type: "ReLU"
  96 |   bottom: "scale2"
  97 |   top: "scale2"
  98 |   relu_param{
  99 |     negative_slope: 0.1
 100 |   }   
 101 | }
 102 | layer {
 103 |   name: "pool2"
 104 |   type: "Pooling"
 105 |   bottom: "scale2"
 106 |   top: "pool2"
 107 |   pooling_param {
 108 |     pool: MAX
 109 |     kernel_size: 2
 110 |     stride: 2
 111 |   }
 112 | }
 113 | 
 114 | layer{
 115 |   name: "conv3"
 116 |   type: "Convolution"
 117 |   bottom: "pool2"
 118 |   top: "conv3"
 119 |   convolution_param {
 120 |     num_output: 128
 121 |     kernel_size: 3
 122 |     pad: 1
 123 |     stride: 1
 124 |     bias_term: false
 125 |   }
 126 | }
 127 | layer {
 128 |   name: "bn3"
 129 |   type: "BatchNorm"
 130 |   bottom: "conv3"
 131 |   top: "bn3"
 132 |   param {
 133 |     lr_mult: 0
 134 |   }
 135 |   param {
 136 |     lr_mult: 0
 137 |   }
 138 |   param {
 139 |     lr_mult: 0
 140 |   }
 141 | }
 142 | layer {
 143 |   name: "scale3"
 144 |   type: "Scale"
 145 |   bottom: "bn3"
 146 |   top: "scale3"
 147 |   scale_param {
 148 |     bias_term: true
 149 |   }
 150 | }
 151 | layer {
 152 |   name: "relu3"
 153 |   type: "ReLU"
 154 |   bottom: "scale3"
 155 |   top: "scale3"
 156 |   relu_param{
 157 |     negative_slope: 0.1
 158 |   }		
 159 | }
 160 | 
 161 | 
 162 | layer{
 163 |   name: "conv4"
 164 |   type: "Convolution"
 165 |   bottom: "scale3"
 166 |   top: "conv4"
 167 |   convolution_param {
 168 |     num_output: 64
 169 |     kernel_size: 1
 170 |     pad: 0 #??
 171 |     stride: 1
 172 |     bias_term: false
 173 |   }
 174 | }
 175 | layer {
 176 |   name: "bn4"
 177 |   type: "BatchNorm"
 178 |   bottom: "conv4"
 179 |   top: "bn4"
 180 |   param {
 181 |     lr_mult: 0
 182 |   }
 183 |   param {
 184 |     lr_mult: 0
 185 |   }
 186 |   param {
 187 |     lr_mult: 0
 188 |   }
 189 | }
 190 | layer {
 191 |   name: "scale4"
 192 |   type: "Scale"
 193 |   bottom: "bn4"
 194 |   top: "scale4"
 195 |   scale_param {
 196 |     bias_term: true
 197 |   }
 198 | }
 199 | layer {
 200 |   name: "relu4"
 201 |   type: "ReLU"
 202 |   bottom: "scale4"
 203 |   top: "scale4"
 204 |   relu_param{
 205 |     negative_slope: 0.1
 206 |   }		
 207 | }
 208 | 
 209 | layer{
 210 |   name: "conv5"
 211 |   type: "Convolution"
 212 |   bottom: "scale4"
 213 |   top: "conv5"
 214 |   convolution_param {
 215 |     num_output: 128
 216 |     kernel_size: 3
 217 |     pad: 1
 218 |     stride: 1
 219 |     bias_term: false
 220 |   }
 221 | }
 222 | layer {
 223 |   name: "bn5"
 224 |   type: "BatchNorm"
 225 |   bottom: "conv5"
 226 |   top: "bn5"
 227 |   param {
 228 |     lr_mult: 0
 229 |   }
 230 |   param {
 231 |     lr_mult: 0
 232 |   }
 233 |   param {
 234 |     lr_mult: 0
 235 |   }
 236 | }
 237 | layer {
 238 |   name: "scale5"
 239 |   type: "Scale"
 240 |   bottom: "bn5"
 241 |   top: "scale5"
 242 |   scale_param {
 243 |     bias_term: true
 244 |   }
 245 | }
 246 | layer {
 247 |   name: "relu5"
 248 |   type: "ReLU"
 249 |   bottom: "scale5"
 250 |   top: "scale5"
 251 |   relu_param{
 252 |     negative_slope: 0.1
 253 |   }		
 254 | }
 255 | layer {
 256 |   name: "pool5"
 257 |   type: "Pooling"
 258 |   bottom: "scale5"
 259 |   top: "pool5"
 260 |   pooling_param {
 261 |     pool: MAX
 262 |     kernel_size: 2
 263 |     stride: 2
 264 |   }
 265 | }
 266 | 
 267 | layer{
 268 |   name: "conv6"
 269 |   type: "Convolution"
 270 |   bottom: "pool5"
 271 |   top: "conv6"
 272 |   convolution_param {
 273 |     num_output: 256
 274 |     kernel_size: 3
 275 |     pad: 1
 276 |     stride: 1
 277 |     bias_term: false
 278 |   }
 279 | }
 280 | layer {
 281 |   name: "bn6"
 282 |   type: "BatchNorm"
 283 |   bottom: "conv6"
 284 |   top: "bn6"
 285 |   param {
 286 |     lr_mult: 0
 287 |   }
 288 |   param {
 289 |     lr_mult: 0
 290 |   }
 291 |   param {
 292 |     lr_mult: 0
 293 |   }
 294 | }
 295 | layer {
 296 |   name: "scale6"
 297 |   type: "Scale"
 298 |   bottom: "bn6"
 299 |   top: "scale6"
 300 |   scale_param {
 301 |     bias_term: true
 302 |   }
 303 | }
 304 | layer {
 305 |   name: "relu6"
 306 |   type: "ReLU"
 307 |   bottom: "scale6"
 308 |   top: "scale6"
 309 |   relu_param{
 310 |     negative_slope: 0.1
 311 |   }		
 312 | }
 313 | 
 314 | layer{
 315 |   name: "conv7"
 316 |   type: "Convolution"
 317 |   bottom: "scale6"
 318 |   top: "conv7"
 319 |   convolution_param {
 320 |     num_output: 128
 321 |     kernel_size: 1
 322 |     pad: 0
 323 |     stride: 1
 324 |     bias_term: false
 325 |   }
 326 | }
 327 | layer {
 328 |   name: "bn7"
 329 |   type: "BatchNorm"
 330 |   bottom: "conv7"
 331 |   top: "bn7"
 332 |   param {
 333 |     lr_mult: 0
 334 |   }
 335 |   param {
 336 |     lr_mult: 0
 337 |   }
 338 |   param {
 339 |     lr_mult: 0
 340 |   }
 341 | }
 342 | layer {
 343 |   name: "scale7"
 344 |   type: "Scale"
 345 |   bottom: "bn7"
 346 |   top: "scale7"
 347 |   scale_param {
 348 |     bias_term: true
 349 |   }
 350 | }
 351 | layer {
 352 |   name: "relu7"
 353 |   type: "ReLU"
 354 |   bottom: "scale7"
 355 |   top: "scale7"
 356 |   relu_param{
 357 |     negative_slope: 0.1
 358 |   }		
 359 | }
 360 | 
 361 | layer{
 362 |   name: "conv8"
 363 |   type: "Convolution"
 364 |   bottom: "scale7"
 365 |   top: "conv8"
 366 |   convolution_param {
 367 |     num_output: 256
 368 |     kernel_size: 3
 369 |     pad: 1
 370 |     stride: 1
 371 |     bias_term: false
 372 |   }
 373 | }
 374 | layer {
 375 |   name: "bn8"
 376 |   type: "BatchNorm"
 377 |   bottom: "conv8"
 378 |   top: "bn8"
 379 |   param {
 380 |     lr_mult: 0
 381 |   }
 382 |   param {
 383 |     lr_mult: 0
 384 |   }
 385 |   param {
 386 |     lr_mult: 0
 387 |   }
 388 | }
 389 | layer {
 390 |   name: "scale8"
 391 |   type: "Scale"
 392 |   bottom: "bn8"
 393 |   top: "scale8"
 394 |   scale_param {
 395 |     bias_term: true
 396 |   }
 397 | }
 398 | layer {
 399 |   name: "relu8"
 400 |   type: "ReLU"
 401 |   bottom: "scale8"
 402 |   top: "scale8"
 403 |   relu_param{
 404 |     negative_slope: 0.1
 405 |   }		
 406 | }
 407 | layer {
 408 |   name: "pool8"
 409 |   type: "Pooling"
 410 |   bottom: "scale8"
 411 |   top: "pool8"
 412 |   pooling_param {
 413 |     pool: MAX
 414 |     kernel_size: 2
 415 |     stride: 2
 416 |   }
 417 | }
 418 | 
 419 | layer{
 420 |   name: "conv9"
 421 |   type: "Convolution"
 422 |   bottom: "pool8"
 423 |   top: "conv9"
 424 |   convolution_param {
 425 |     num_output: 512
 426 |     kernel_size: 3
 427 |     pad: 1
 428 |     stride: 1
 429 |     bias_term: false
 430 |   }
 431 | }
 432 | layer {
 433 |   name: "bn9"
 434 |   type: "BatchNorm"
 435 |   bottom: "conv9"
 436 |   top: "bn9"
 437 |   param {
 438 |     lr_mult: 0
 439 |   }
 440 |   param {
 441 |     lr_mult: 0
 442 |   }
 443 |   param {
 444 |     lr_mult: 0
 445 |   }
 446 | }
 447 | layer {
 448 |   name: "scale9"
 449 |   type: "Scale"
 450 |   bottom: "bn9"
 451 |   top: "scale9"
 452 |   scale_param {
 453 |     bias_term: true
 454 |   }
 455 | }
 456 | layer {
 457 |   name: "relu9"
 458 |   type: "ReLU"
 459 |   bottom: "scale9"
 460 |   top: "scale9"
 461 |   relu_param{
 462 |     negative_slope: 0.1
 463 |   }		
 464 | }
 465 | 
 466 | layer{
 467 |   name: "conv10"
 468 |   type: "Convolution"
 469 |   bottom: "scale9"
 470 |   top: "conv10"
 471 |   convolution_param {
 472 |     num_output: 256
 473 |     kernel_size: 1
 474 |     pad: 0
 475 |     stride: 1
 476 |     bias_term: false
 477 |   }
 478 | }
 479 | layer {
 480 |   name: "bn10"
 481 |   type: "BatchNorm"
 482 |   bottom: "conv10"
 483 |   top: "bn10"
 484 |   param {
 485 |     lr_mult: 0
 486 |   }
 487 |   param {
 488 |     lr_mult: 0
 489 |   }
 490 |   param {
 491 |     lr_mult: 0
 492 |   }
 493 | }
 494 | layer {
 495 |   name: "scale10"
 496 |   type: "Scale"
 497 |   bottom: "bn10"
 498 |   top: "scale10"
 499 |   scale_param {
 500 |     bias_term: true
 501 |   }
 502 | }
 503 | layer {
 504 |   name: "relu10"
 505 |   type: "ReLU"
 506 |   bottom: "scale10"
 507 |   top: "scale10"
 508 |   relu_param{
 509 |     negative_slope: 0.1
 510 |   }		
 511 | }
 512 | 
 513 | layer{
 514 |   name: "conv11"
 515 |   type: "Convolution"
 516 |   bottom: "scale10"
 517 |   top: "conv11"
 518 |   convolution_param {
 519 |     num_output: 512
 520 |     kernel_size: 3
 521 |     pad: 1
 522 |     stride: 1
 523 |     bias_term: false
 524 |   }
 525 | }
 526 | layer {
 527 |   name: "bn11"
 528 |   type: "BatchNorm"
 529 |   bottom: "conv11"
 530 |   top: "bn11"
 531 |   param {
 532 |     lr_mult: 0
 533 |   }
 534 |   param {
 535 |     lr_mult: 0
 536 |   }
 537 |   param {
 538 |     lr_mult: 0
 539 |   }
 540 | }
 541 | layer {
 542 |   name: "scale11"
 543 |   type: "Scale"
 544 |   bottom: "bn11"
 545 |   top: "scale11"
 546 |   scale_param {
 547 |     bias_term: true
 548 |   }
 549 | }
 550 | layer {
 551 |   name: "relu11"
 552 |   type: "ReLU"
 553 |   bottom: "scale11"
 554 |   top: "scale11"
 555 |   relu_param{
 556 |     negative_slope: 0.1
 557 |   }		
 558 | }
 559 | 
 560 | layer{
 561 |   name: "conv12"
 562 |   type: "Convolution"
 563 |   bottom: "scale11"
 564 |   top: "conv12"
 565 |   convolution_param {
 566 |     num_output: 256
 567 |     kernel_size: 1
 568 |     pad: 0
 569 |     stride: 1
 570 |     bias_term: false
 571 |   }
 572 | }
 573 | layer {
 574 |   name: "bn12"
 575 |   type: "BatchNorm"
 576 |   bottom: "conv12"
 577 |   top: "bn12"
 578 |   param {
 579 |     lr_mult: 0
 580 |   }
 581 |   param {
 582 |     lr_mult: 0
 583 |   }
 584 |   param {
 585 |     lr_mult: 0
 586 |   }
 587 | }
 588 | layer {
 589 |   name: "scale12"
 590 |   type: "Scale"
 591 |   bottom: "bn12"
 592 |   top: "scale12"
 593 |   scale_param {
 594 |     bias_term: true
 595 |   }
 596 | }
 597 | layer {
 598 |   name: "relu12"
 599 |   type: "ReLU"
 600 |   bottom: "scale12"
 601 |   top: "scale12"
 602 |   relu_param{
 603 |     negative_slope: 0.1
 604 |   }		
 605 | }
 606 | 
 607 | layer{
 608 |   name: "conv13"
 609 |   type: "Convolution"
 610 |   bottom: "scale12"
 611 |   top: "conv13"
 612 |   convolution_param {
 613 |     num_output: 512
 614 |     kernel_size: 3
 615 |     pad: 1
 616 |     stride: 1
 617 |     bias_term: false
 618 |   }
 619 | }
 620 | layer {
 621 |   name: "bn13"
 622 |   type: "BatchNorm"
 623 |   bottom: "conv13"
 624 |   top: "bn13"
 625 |   param {
 626 |     lr_mult: 0
 627 |   }
 628 |   param {
 629 |     lr_mult: 0
 630 |   }
 631 |   param {
 632 |     lr_mult: 0
 633 |   }
 634 | }
 635 | layer {
 636 |   name: "scale13"
 637 |   type: "Scale"
 638 |   bottom: "bn13"
 639 |   top: "scale13"
 640 |   scale_param {
 641 |     bias_term: true
 642 |   }
 643 | }
 644 | layer {
 645 |   name: "relu13"
 646 |   type: "ReLU"
 647 |   bottom: "scale13"
 648 |   top: "scale13"
 649 |   relu_param{
 650 |     negative_slope: 0.1
 651 |   }		
 652 | }
 653 | layer {
 654 |   name: "pool13"
 655 |   type: "Pooling"
 656 |   bottom: "scale13"
 657 |   top: "pool13"
 658 |   pooling_param {
 659 |     pool: MAX
 660 |     kernel_size: 2
 661 |     stride: 2
 662 |   }
 663 | }
 664 | 
 665 | layer{
 666 |   name: "conv14"
 667 |   type: "Convolution"
 668 |   bottom: "pool13"
 669 |   top: "conv14"
 670 |   convolution_param {
 671 |     num_output: 1024
 672 |     kernel_size: 3
 673 |     pad: 1
 674 |     stride: 1
 675 |     bias_term: false
 676 |   }
 677 | }
 678 | layer {
 679 |   name: "bn14"
 680 |   type: "BatchNorm"
 681 |   bottom: "conv14"
 682 |   top: "bn14"
 683 |   param {
 684 |     lr_mult: 0
 685 |   }
 686 |   param {
 687 |     lr_mult: 0
 688 |   }
 689 |   param {
 690 |     lr_mult: 0
 691 |   }
 692 | }
 693 | layer {
 694 |   name: "scale14"
 695 |   type: "Scale"
 696 |   bottom: "bn14"
 697 |   top: "scale14"
 698 |   scale_param {
 699 |     bias_term: true
 700 |   }
 701 | }
 702 | layer {
 703 |   name: "relu14"
 704 |   type: "ReLU"
 705 |   bottom: "scale14"
 706 |   top: "scale14"
 707 |   relu_param{
 708 |     negative_slope: 0.1
 709 |   }		
 710 | }
 711 | 
 712 | layer{
 713 |   name: "conv15"
 714 |   type: "Convolution"
 715 |   bottom: "scale14"
 716 |   top: "conv15"
 717 |   convolution_param {
 718 |     num_output: 512
 719 |     kernel_size: 1
 720 |     pad: 0
 721 |     stride: 1
 722 |     bias_term: false
 723 |   }
 724 | }
 725 | layer {
 726 |   name: "bn15"
 727 |   type: "BatchNorm"
 728 |   bottom: "conv15"
 729 |   top: "bn15"
 730 |   param {
 731 |     lr_mult: 0
 732 |   }
 733 |   param {
 734 |     lr_mult: 0
 735 |   }
 736 |   param {
 737 |     lr_mult: 0
 738 |   }
 739 | }
 740 | layer {
 741 |   name: "scale15"
 742 |   type: "Scale"
 743 |   bottom: "bn15"
 744 |   top: "scale15"
 745 |   scale_param {
 746 |     bias_term: true
 747 |   }
 748 | }
 749 | layer {
 750 |   name: "relu15"
 751 |   type: "ReLU"
 752 |   bottom: "scale15"
 753 |   top: "scale15"
 754 |   relu_param{
 755 |     negative_slope: 0.1
 756 |   }		
 757 | }
 758 | 
 759 | 
 760 | layer{
 761 |   name: "conv16"
 762 |   type: "Convolution"
 763 |   bottom: "scale15"
 764 |   top: "conv16"
 765 |   convolution_param {
 766 |     num_output: 1024
 767 |     kernel_size: 3
 768 |     pad: 1
 769 |     stride: 1
 770 |     bias_term: false
 771 |   }
 772 | }
 773 | layer {
 774 |   name: "bn16"
 775 |   type: "BatchNorm"
 776 |   bottom: "conv16"
 777 |   top: "bn16"
 778 |   param {
 779 |     lr_mult: 0
 780 |   }
 781 |   param {
 782 |     lr_mult: 0
 783 |   }
 784 |   param {
 785 |     lr_mult: 0
 786 |   }
 787 | }
 788 | layer {
 789 |   name: "scale16"
 790 |   type: "Scale"
 791 |   bottom: "bn16"
 792 |   top: "scale16"
 793 |   scale_param {
 794 |     bias_term: true
 795 |   }
 796 | }
 797 | layer {
 798 |   name: "relu16"
 799 |   type: "ReLU"
 800 |   bottom: "scale16"
 801 |   top: "scale16"
 802 |   relu_param{
 803 |     negative_slope: 0.1
 804 |   }		
 805 | }
 806 | 
 807 | layer{
 808 |   name: "conv17"
 809 |   type: "Convolution"
 810 |   bottom: "scale16"
 811 |   top: "conv17"
 812 |   convolution_param {
 813 |     num_output: 512
 814 |     kernel_size: 1
 815 |     pad: 0
 816 |     stride: 1
 817 |     bias_term: false
 818 |   }
 819 | }
 820 | layer {
 821 |   name: "bn17"
 822 |   type: "BatchNorm"
 823 |   bottom: "conv17"
 824 |   top: "bn17"
 825 |   param {
 826 |     lr_mult: 0
 827 |   }
 828 |   param {
 829 |     lr_mult: 0
 830 |   }
 831 |   param {
 832 |     lr_mult: 0
 833 |   }
 834 | }
 835 | layer {
 836 |   name: "scale17"
 837 |   type: "Scale"
 838 |   bottom: "bn17"
 839 |   top: "scale17"
 840 |   scale_param {
 841 |     bias_term: true
 842 |   }
 843 | }
 844 | layer {
 845 |   name: "relu17"
 846 |   type: "ReLU"
 847 |   bottom: "scale17"
 848 |   top: "scale17"
 849 |   relu_param{
 850 |     negative_slope: 0.1
 851 |   }		
 852 | }
 853 | 
 854 | 
 855 | layer{
 856 |   name: "conv18"
 857 |   type: "Convolution"
 858 |   bottom: "scale17"
 859 |   top: "conv18"
 860 |   convolution_param {
 861 |     num_output: 1024
 862 |     kernel_size: 3
 863 |     pad: 1
 864 |     stride: 1
 865 |     bias_term: false
 866 |   }
 867 | }
 868 | layer {
 869 |   name: "bn18"
 870 |   type: "BatchNorm"
 871 |   bottom: "conv18"
 872 |   top: "bn18"
 873 |   param {
 874 |     lr_mult: 0
 875 |   }
 876 |   param {
 877 |     lr_mult: 0
 878 |   }
 879 |   param {
 880 |     lr_mult: 0
 881 |   }
 882 | }
 883 | layer {
 884 |   name: "scale18"
 885 |   type: "Scale"
 886 |   bottom: "bn18"
 887 |   top: "scale18"
 888 |   scale_param {
 889 |     bias_term: true
 890 |   }
 891 | }
 892 | layer {
 893 |   name: "relu18"
 894 |   type: "ReLU"
 895 |   bottom: "scale18"
 896 |   top: "scale18"
 897 |   relu_param{
 898 |     negative_slope: 0.1
 899 |   }		
 900 | }
 901 | 
 902 | 
 903 | 
 904 | layer{
 905 |   name: "conv19"
 906 |   type: "Convolution"
 907 |   bottom: "scale18"
 908 |   top: "conv19"
 909 |   convolution_param {
 910 |     num_output: 1024
 911 |     kernel_size: 3
 912 |     pad: 1
 913 |     stride: 1
 914 |     bias_term: false
 915 |   }
 916 | }
 917 | layer {
 918 |   name: "bn19"
 919 |   type: "BatchNorm"
 920 |   bottom: "conv19"
 921 |   top: "bn19"
 922 |   param {
 923 |     lr_mult: 0
 924 |   }
 925 |   param {
 926 |     lr_mult: 0
 927 |   }
 928 |   param {
 929 |     lr_mult: 0
 930 |   }
 931 | }
 932 | layer {
 933 |   name: "scale19"
 934 |   type: "Scale"
 935 |   bottom: "bn19"
 936 |   top: "scale19"
 937 |   scale_param {
 938 |     bias_term: true
 939 |   }
 940 | }
 941 | layer {
 942 |   name: "relu19"
 943 |   type: "ReLU"
 944 |   bottom: "scale19"
 945 |   top: "scale19"
 946 |   relu_param{
 947 |     negative_slope: 0.1
 948 |   }		
 949 | }
 950 | 
 951 | 
 952 | 
 953 | layer{
 954 |   name: "conv20"
 955 |   type: "Convolution"
 956 |   bottom: "scale19"
 957 |   top: "conv20"
 958 |   convolution_param {
 959 |     num_output: 1024
 960 |     kernel_size: 3
 961 |     pad: 1
 962 |     stride: 1
 963 |     bias_term: false
 964 |   }
 965 | 
 966 | }
 967 | layer {
 968 |   name: "bn20"
 969 |   type: "BatchNorm"
 970 |   bottom: "conv20"
 971 |   top: "bn20"
 972 |   param {
 973 |     lr_mult: 0
 974 |   }
 975 |   param {
 976 |     lr_mult: 0
 977 |   }
 978 |   param {
 979 |     lr_mult: 0
 980 |   }
 981 | }
 982 | layer {
 983 |   name: "scale20"
 984 |   type: "Scale"
 985 |   bottom: "bn20"
 986 |   top: "scale20"
 987 |   scale_param {
 988 |     bias_term: true
 989 |   }
 990 | }
 991 | layer {
 992 |   name: "relu20"
 993 |   type: "ReLU"
 994 |   bottom: "scale20"
 995 |   top: "scale20"
 996 |   relu_param {
 997 |     negative_slope: 0.1
 998 |   }		
 999 | }
1000 | 
1001 | layer {
1002 |   name: "concat1"
1003 |   type: "Concat"
1004 |   bottom: "scale13"
1005 |   top: "concat1"
1006 | }
1007 | 
1008 | layer {
1009 |   name: "reorg1"
1010 |   type: "Reorg"
1011 |   bottom: "concat1"
1012 |   top: "reorg1"
1013 |   reorg_param {
1014 |     stride: 2
1015 |   }
1016 | }
1017 | 
1018 | layer {
1019 |   name: "concat2"
1020 |   type: "Concat"
1021 |   bottom: "reorg1"
1022 |   bottom: "scale20"
1023 |   top: "concat2"
1024 | }
1025 | 
1026 | layer{
1027 |   name: "conv21"
1028 |   type: "Convolution"
1029 |   bottom: "concat2"
1030 |   top: "conv21"
1031 |   convolution_param {
1032 |     num_output: 1024
1033 |     kernel_size: 3
1034 |     pad: 1
1035 |     stride: 1
1036 |     bias_term: false
1037 |   }
1038 | }
1039 | layer {
1040 |   name: "bn21"
1041 |   type: "BatchNorm"
1042 |   bottom: "conv21"
1043 |   top: "bn21"
1044 |   param {
1045 |     lr_mult: 0
1046 |   }
1047 |   param {
1048 |     lr_mult: 0
1049 |   }
1050 |   param {
1051 |     lr_mult: 0
1052 |   }
1053 | }
1054 | layer {
1055 |   name: "scale21"
1056 |   type: "Scale"
1057 |   bottom: "bn21"
1058 |   top: "scale21"
1059 |   scale_param {
1060 |     bias_term: true
1061 |   }
1062 | }
1063 | layer {
1064 |   name: "relu21"
1065 |   type: "ReLU"
1066 |   bottom: "scale21"
1067 |   top: "scale21"
1068 |   relu_param{
1069 |     negative_slope: 0.1
1070 |   }		
1071 | }
1072 | 
1073 | layer {
1074 |   name: "conv22"
1075 |   type: "Convolution"
1076 |   bottom: "scale21"
1077 |   top: "conv22"
1078 |   convolution_param {
1079 |     num_output: 425
1080 |     kernel_size: 1
1081 |     pad: 0
1082 |     stride: 1
1083 |   }
1084 | }
1085 | 
1086 | layer {
1087 |   name: "region1"
1088 |   type: "Region"
1089 |   bottom: "conv22"
1090 |   top: "region1"
1091 |   region_param {
1092 |     classes: 80
1093 |     coords: 4
1094 |     boxes_of_each_grid: 5
1095 |     softmax: true
1096 |   }
1097 | }
1098 | #http://ethereon.github.io/netscope/#/gist/9640ecb59a75f230446e7c70d2f8bcf3


--------------------------------------------------------------------------------