├── py_examples
    ├── utils
    │   ├── __init__.py
    │   └── app_utils.py
    ├── yolo_example.py
    └── object_detection_app.py
├── images
    ├── dog.jpg
    ├── camera.png
    └── yolo_dog.png
├── LICENSE
├── README.md
├── create_yolo_caffemodel.py
├── prototxt
    ├── yolo_tiny_deploy.prototxt
    ├── yolo_deploy.prototxt
    ├── yolo_small_deploy.prototxt
    ├── yolo_tiny_train_val.prototxt
    ├── yolo_train_val.prototxt
    └── yolo_small_train_val.prototxt
├── yolo_main.py
└── create_yolo_prototxt.py


/py_examples/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/images/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gudovskiy/yoloNCS/HEAD/images/dog.jpg


--------------------------------------------------------------------------------
/images/camera.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gudovskiy/yoloNCS/HEAD/images/camera.png


--------------------------------------------------------------------------------
/images/yolo_dog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gudovskiy/yoloNCS/HEAD/images/yolo_dog.png


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 gudovskiy
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # YOLO for Intel/Movidius Neural Compute Stick (NCS)
 2 | 
 3 | ## News
 4 | 
 5 | * Camera App is working.
 6 | * YOLOv1 Tiny is working.
 7 | 
 8 | ## Protobuf Model files
 9 | 
10 | ./prototxt/
11 | 
12 | ## Download Pretrained Caffe Models to ./weights/
13 | 
14 | * YOLO_tiny: https://drive.google.com/file/d/0Bzy9LxvTYIgKNFEzOEdaZ3U0Nms/view?usp=sharing
15 | 
16 | ## Compilation
17 | 
18 | * Compile .prototxt and corresponding .caffemodel (with the same name) to get NCS graph file. For example: "mvNCCompile prototxt/yolo_tiny_deploy.prototxt -w weights/yolo_tiny_deploy.caffemodel -s 12"
19 | * The compiled binary file "graph" has to be in main folder after this step.
20 | 
21 | ## Single Image Script
22 | 
23 | * Run "yolo_example.py" to process a single image. For example: "python3 py_examples/yolo_example.py images/dog.jpg" to get detections as below.
24 | 
25 | ![](/images/yolo_dog.png)
26 | 
27 | ## Camera Input Script
28 | 
29 | * Run "object_detection_app.py" to process a videos from your camera. For example: "python3 py_examples/object_detection_app.py" to get camera detections as below.
30 | * Modify script arguments if needed.
31 | * Press "q" to exit app.
32 | 
33 | ![](/images/camera.png)
34 | 


--------------------------------------------------------------------------------
/create_yolo_caffemodel.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Apr 29 16:10:21 2016
  4 | 
  5 | @author: xingw
  6 | """
  7 | import sys,os
  8 | caffe_root = os.environ["CAFFE_ROOT"]
  9 | os.chdir(caffe_root)
 10 | print caffe_root
 11 | sys.path.insert(0, caffe_root + '/python')
 12 | import caffe
 13 | import numpy as np
 14 | import sys, getopt
 15 | 
 16 | def main(argv):
 17 | 	model_filename = ''
 18 | 	yoloweight_filename = ''
 19 | 	caffemodel_filename = ''
 20 | 	try:
 21 | 		opts, args = getopt.getopt(argv, "hm:w:o:")
 22 | 		print opts
 23 | 	except getopt.GetoptError:
 24 | 		print 'create_yolo_caffemodel.py -m <model_file> -w <yoloweight_filename> -o <caffemodel_output>'
 25 | 		sys.exit(2)
 26 | 	for opt, arg in opts:
 27 | 		if opt == '-h':
 28 | 			print 'create_yolo_caffemodel.py -m <model_file> -w <yoloweight_filename> -o <caffemodel_output>'
 29 | 			sys.exit()
 30 | 		elif opt == "-m":
 31 | 			model_filename = arg
 32 | 		elif opt == "-w":
 33 | 			yoloweight_filename = arg
 34 | 		elif opt == "-o":
 35 | 			caffemodel_filename = arg
 36 | 
 37 | 	print 'model file is ', model_filename
 38 | 	print 'weight file is ', yoloweight_filename
 39 | 	print 'output caffemodel file is ', caffemodel_filename
 40 | 	net = caffe.Net(model_filename, caffe.TEST)
 41 | 	params = net.params.keys()
 42 | 
 43 | 	# read weights from file and assign to the network
 44 | 	netWeightsInt = np.fromfile(yoloweight_filename, dtype=np.int32)
 45 | 	transFlag = (netWeightsInt[0]>1000 or netWeightsInt[1]>1000) # transpose flag, the first 4 entries are major, minor, revision and net.seen
 46 | 	print transFlag
 47 | 
 48 | 	netWeightsFloat = np.fromfile(yoloweight_filename, dtype=np.float32)
 49 | 	netWeights = netWeightsFloat[4:] # start from the 5th entry, the first 4 entries are major, minor, revision and net.seen
 50 | 	print netWeights.shape
 51 | 	count = 0
 52 | 	for pr in params:
 53 | 		lidx = list(net._layer_names).index(pr)
 54 | 		layer = net.layers[lidx]
 55 | 		print layer,transFlag
 56 | 		if count == netWeights.shape[0] and (layer.type != 'BatchNorm' and layer.type != 'Scale'):
 57 | 			print "WARNING: no weights left for %s" % pr
 58 | 			break
 59 | 		if layer.type == 'Convolution':
 60 | 			print pr+"(conv)"
 61 | 			# bias
 62 | 			if len(net.params[pr]) > 1:
 63 | 				bias_dim = net.params[pr][1].data.shape
 64 | 			else:
 65 | 				bias_dim = (net.params[pr][0].data.shape[0], )
 66 | 			biasSize = np.prod(bias_dim)
 67 | 			conv_bias = np.reshape(netWeights[count:count+biasSize], bias_dim)
 68 | 			if len(net.params[pr]) > 1:
 69 | 				assert(bias_dim == net.params[pr][1].data.shape)
 70 | 				net.params[pr][1].data[...] = conv_bias
 71 | 				conv_bias = None
 72 | 			count = count + biasSize
 73 | 			# batch_norm
 74 | 			next_layer = net.layers[lidx+1]
 75 | 			if next_layer.type == 'BatchNorm':
 76 | 				bn_dims = (3, net.params[pr][0].data.shape[0])
 77 | 				bnSize = np.prod(bn_dims)
 78 | 				batch_norm = np.reshape(netWeights[count:count+bnSize], bn_dims)
 79 | 				count = count + bnSize
 80 | 			# weights
 81 | 			dims = net.params[pr][0].data.shape
 82 | 			weightSize = np.prod(dims)
 83 | 			net.params[pr][0].data[...] = np.reshape(netWeights[count:count+weightSize], dims)
 84 | 			count = count + weightSize
 85 | 		elif layer.type == 'InnerProduct':
 86 | 			print pr+"(fc)"
 87 | 			# bias
 88 | 			biasSize = np.prod(net.params[pr][1].data.shape)
 89 | 			net.params[pr][1].data[...] = np.reshape(netWeights[count:count+biasSize], net.params[pr][1].data.shape)
 90 | 			count = count + biasSize
 91 | 			# weights
 92 | 			dims = net.params[pr][0].data.shape
 93 | 			weightSize = np.prod(dims)
 94 | 			if transFlag:
 95 | 				net.params[pr][0].data[...] = np.reshape(netWeights[count:count+weightSize], (dims[1], dims[0])).transpose()
 96 | 			else:
 97 | 				print dims, count, weightSize, netWeights.shape
 98 | 				net.params[pr][0].data[...] = np.reshape(netWeights[count:count+weightSize], dims)
 99 | 			count = count + weightSize
100 | 		elif layer.type == 'BatchNorm':
101 | 			print pr+"(batchnorm)"
102 | 			net.params[pr][0].data[...] = batch_norm[1]	# mean
103 | 			net.params[pr][1].data[...] = batch_norm[2]	# variance
104 | 			net.params[pr][2].data[...] = 1.0	# scale factor
105 | 		elif layer.type == 'Scale':
106 | 			print pr+"(scale)"
107 | 			net.params[pr][0].data[...] = batch_norm[0]	# scale
108 | 			batch_norm = None
109 | 			if len(net.params[pr]) > 1:
110 | 				net.params[pr][1].data[...] = conv_bias	# bias
111 | 				conv_bias = None
112 | 		else:
113 | 			print "WARNING: unsupported layer, "+pr
114 | 	if np.prod(netWeights.shape) != count:
115 | 		print "ERROR: size mismatch: %d" % count
116 | 	net.save(caffemodel_filename)
117 | 
118 | if __name__=='__main__':
119 | 	main(sys.argv[1:])
120 | 


--------------------------------------------------------------------------------
/prototxt/yolo_tiny_deploy.prototxt:
--------------------------------------------------------------------------------
  1 | name: "YOLONet"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 448
  7 |   dim: 448
  8 | }
  9 | 
 10 | layer {
 11 |   name: "conv1"
 12 |   type: "Convolution"
 13 |   bottom: "data"
 14 |   top: "conv1"
 15 |   convolution_param {
 16 |     num_output: 16
 17 |     kernel_size: 3
 18 |     pad: 1
 19 |     stride: 1
 20 |   }
 21 | }
 22 | layer {
 23 |   name: "relu1"
 24 |   type: "ReLU"
 25 |   bottom: "conv1"
 26 |   top: "conv1"
 27 |   relu_param{
 28 |     negative_slope: 0.1
 29 |   }
 30 | }
 31 | layer {
 32 |   name: "pool1"
 33 |   type: "Pooling"
 34 |   bottom: "conv1"
 35 |   top: "pool1"
 36 |   pooling_param {
 37 |     pool: MAX
 38 |     kernel_size: 2
 39 |     stride: 2
 40 |   }
 41 | }
 42 | layer{
 43 |   name: "conv2"
 44 |   type: "Convolution"
 45 |   bottom: "pool1"
 46 |   top: "conv2"
 47 |   convolution_param {
 48 |     num_output: 32
 49 |     kernel_size: 3
 50 |     pad: 1
 51 |     stride: 1
 52 |   }
 53 | }
 54 | layer {
 55 |   name: "relu2"
 56 |   type: "ReLU"
 57 |   bottom: "conv2"
 58 |   top: "conv2"
 59 |   relu_param{
 60 |     negative_slope: 0.1
 61 |   }
 62 | }
 63 | layer {
 64 |   name: "pool2"
 65 |   type: "Pooling"
 66 |   bottom: "conv2"
 67 |   top: "pool2"
 68 |   pooling_param {
 69 |     pool: MAX
 70 |     kernel_size: 2
 71 |     stride: 2
 72 |   }
 73 | }
 74 | layer{
 75 |   name: "conv3"
 76 |   type: "Convolution"
 77 |   bottom: "pool2"
 78 |   top: "conv3"
 79 |   convolution_param {
 80 |     num_output: 64
 81 |     kernel_size: 3
 82 |     pad: 1
 83 |     stride: 1
 84 |   }
 85 | }
 86 | layer {
 87 |   name: "relu3"
 88 |   type: "ReLU"
 89 |   bottom: "conv3"
 90 |   top: "conv3"
 91 |   relu_param{
 92 |     negative_slope: 0.1
 93 |   }
 94 | }
 95 | layer{
 96 |   name: "pool3"
 97 |   type: "Pooling"
 98 |   bottom: "conv3"
 99 |   top: "pool3"
100 |   pooling_param {
101 |     pool: MAX
102 |     kernel_size: 2
103 |     stride: 2
104 |   }
105 | }
106 | layer{
107 |   name: "conv4"
108 |   type: "Convolution"
109 |   bottom: "pool3"
110 |   top: "conv4"
111 |   convolution_param {
112 |     num_output: 128
113 |     kernel_size: 3
114 |     pad: 1
115 |     stride: 1
116 |   }
117 | }
118 | layer {
119 |   name: "relu4"
120 |   type: "ReLU"
121 |   bottom: "conv4"
122 |   top: "conv4"
123 |   relu_param{
124 |     negative_slope: 0.1
125 |   }
126 | }
127 | layer {
128 |   name: "pool4"
129 |   type: "Pooling"
130 |   bottom: "conv4"
131 |   top: "pool4"
132 |   pooling_param {
133 |     pool: MAX
134 |     kernel_size: 2
135 |     stride: 2
136 |   }
137 | }
138 | layer{
139 |   name: "conv5"
140 |   type: "Convolution"
141 |   bottom: "pool4"
142 |   top: "conv5"
143 |   convolution_param {
144 |     num_output: 256
145 |     kernel_size: 3
146 |     pad: 1
147 |     stride: 1
148 |   }
149 | }
150 | layer {
151 |   name: "relu5"
152 |   type: "ReLU"
153 |   bottom: "conv5"
154 |   top: "conv5"
155 |   relu_param{
156 |     negative_slope: 0.1
157 |   }
158 | }
159 | layer {
160 |   name: "pool5"
161 |   type: "Pooling"
162 |   bottom: "conv5"
163 |   top: "pool5"
164 |   pooling_param {
165 |     pool: MAX
166 |     kernel_size: 2
167 |     stride: 2
168 |   }
169 | }
170 | layer{
171 |   name: "conv6"
172 |   type: "Convolution"
173 |   bottom: "pool5"
174 |   top: "conv6"
175 |   convolution_param {
176 |     num_output: 512
177 |     kernel_size: 3
178 |     pad: 1
179 |     stride: 1
180 |   }
181 | }
182 | layer {
183 |   name: "relu6"
184 |   type: "ReLU"
185 |   bottom: "conv6"
186 |   top: "conv6"
187 |   relu_param{
188 |     negative_slope: 0.1
189 |   }
190 | }
191 | layer {
192 |   name: "pool6"
193 |   type: "Pooling"
194 |   bottom: "conv6"
195 |   top: "pool6"
196 |   pooling_param {
197 |     pool: MAX
198 |     kernel_size: 2
199 |     stride: 2
200 |   }
201 | }
202 | layer{
203 |   name: "conv7"
204 |   type: "Convolution"
205 |   bottom: "pool6"
206 |   top: "conv7"
207 |   convolution_param {
208 |     num_output: 1024
209 |     pad: 1
210 |     kernel_size: 3
211 |     stride: 1
212 |   }
213 | }
214 | layer {
215 |   name: "relu7"
216 |   type: "ReLU"
217 |   bottom: "conv7"
218 |   top: "conv7"
219 |   relu_param{
220 |     negative_slope: 0.1
221 |   }
222 | }
223 | layer{
224 |   name: "conv8"
225 |   type: "Convolution"
226 |   bottom: "conv7"
227 |   top: "conv8"
228 |   convolution_param {
229 |     num_output: 1024
230 |     kernel_size: 3
231 |     pad: 1
232 |     stride: 1
233 |   }
234 | }
235 | layer {
236 |   name: "relu8"
237 |   type: "ReLU"
238 |   bottom: "conv8"
239 |   top: "conv8"
240 |   relu_param{
241 |     negative_slope: 0.1
242 |   }
243 | }
244 | layer{
245 |   name: "conv9"
246 |   type: "Convolution"
247 |   bottom: "conv8"
248 |   top: "conv9"
249 |   convolution_param {
250 |     num_output: 1024
251 |     kernel_size: 3
252 |     pad: 1
253 |     stride: 1
254 |   }
255 | }
256 | layer {
257 |   name: "relu9"
258 |   type: "ReLU"
259 |   bottom: "conv9"
260 |   top: "conv9"
261 |   relu_param{
262 |     negative_slope: 0.1
263 |   }
264 | }
265 | layer{
266 |   name: "fc10"
267 |   type: "InnerProduct"
268 |   bottom: "conv9"
269 |   top: "fc10"
270 |   inner_product_param {
271 |     num_output: 256
272 |   }
273 | }
274 | layer {
275 |   name: "fc11"
276 |   type: "InnerProduct"
277 |   bottom: "fc10"
278 |   top: "fc11"
279 |   inner_product_param {
280 |     num_output: 4096
281 |   }
282 | }
283 | layer {
284 |   name: "relu11"
285 |   type: "ReLU"
286 |   bottom: "fc11"
287 |   top: "fc11"
288 |   relu_param{
289 |     negative_slope: 0.1
290 |   }
291 | }
292 | 
293 | layer {
294 |   name: "fc12"
295 |   type: "InnerProduct"
296 |   bottom: "fc11"
297 |   top: "fc12"
298 |   inner_product_param {
299 |     num_output: 1470
300 |   }
301 | }
302 | 


--------------------------------------------------------------------------------
/py_examples/yolo_example.py:
--------------------------------------------------------------------------------
  1 | from mvnc import mvncapi as mvnc
  2 | import sys,os,time,csv,getopt,cv2
  3 | import numpy as np
  4 | from datetime import datetime
  5 | from skimage.transform import resize
  6 | 
  7 | def interpret_output(output, img_width, img_height):
  8 | 	classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]
  9 | 	w_img = img_width
 10 | 	h_img = img_height
 11 | 	print ((w_img, h_img))
 12 | 	threshold = 0.2
 13 | 	iou_threshold = 0.5
 14 | 	num_class = 20
 15 | 	num_box = 2
 16 | 	grid_size = 7
 17 | 	probs = np.zeros((7,7,2,20))
 18 | 	class_probs = (np.reshape(output[0:980],(7,7,20)))#.copy()
 19 | 	#print(class_probs)
 20 | 	scales = (np.reshape(output[980:1078],(7,7,2)))#.copy()
 21 | 	#print(scales)
 22 | 	boxes = (np.reshape(output[1078:],(7,7,2,4)))#.copy()
 23 | 	offset = np.transpose(np.reshape(np.array([np.arange(7)]*14),(2,7,7)),(1,2,0))
 24 | 	#boxes.setflags(write=1)
 25 | 	boxes[:,:,:,0] += offset
 26 | 	boxes[:,:,:,1] += np.transpose(offset,(1,0,2))
 27 | 	boxes[:,:,:,0:2] = boxes[:,:,:,0:2] / 7.0
 28 | 	boxes[:,:,:,2] = np.multiply(boxes[:,:,:,2],boxes[:,:,:,2])
 29 | 	boxes[:,:,:,3] = np.multiply(boxes[:,:,:,3],boxes[:,:,:,3])
 30 | 
 31 | 	boxes[:,:,:,0] *= w_img
 32 | 	boxes[:,:,:,1] *= h_img
 33 | 	boxes[:,:,:,2] *= w_img
 34 | 	boxes[:,:,:,3] *= h_img
 35 | 
 36 | 	for i in range(2):
 37 | 		for j in range(20):
 38 | 			probs[:,:,i,j] = np.multiply(class_probs[:,:,j],scales[:,:,i])
 39 | 	#print (probs)
 40 | 	filter_mat_probs = np.array(probs>=threshold,dtype='bool')
 41 | 	filter_mat_boxes = np.nonzero(filter_mat_probs)
 42 | 	boxes_filtered = boxes[filter_mat_boxes[0],filter_mat_boxes[1],filter_mat_boxes[2]]
 43 | 	probs_filtered = probs[filter_mat_probs]
 44 | 	classes_num_filtered = np.argmax(probs,axis=3)[filter_mat_boxes[0],filter_mat_boxes[1],filter_mat_boxes[2]]
 45 | 
 46 | 	argsort = np.array(np.argsort(probs_filtered))[::-1]
 47 | 	boxes_filtered = boxes_filtered[argsort]
 48 | 	probs_filtered = probs_filtered[argsort]
 49 | 	classes_num_filtered = classes_num_filtered[argsort]
 50 | 
 51 | 	for i in range(len(boxes_filtered)):
 52 | 		if probs_filtered[i] == 0 : continue
 53 | 		for j in range(i+1,len(boxes_filtered)):
 54 | 			if iou(boxes_filtered[i],boxes_filtered[j]) > iou_threshold :
 55 | 				probs_filtered[j] = 0.0
 56 | 
 57 | 	filter_iou = np.array(probs_filtered>0.0,dtype='bool')
 58 | 	boxes_filtered = boxes_filtered[filter_iou]
 59 | 	probs_filtered = probs_filtered[filter_iou]
 60 | 	classes_num_filtered = classes_num_filtered[filter_iou]
 61 | 
 62 | 	result = []
 63 | 	for i in range(len(boxes_filtered)):
 64 | 		result.append([classes[classes_num_filtered[i]],boxes_filtered[i][0],boxes_filtered[i][1],boxes_filtered[i][2],boxes_filtered[i][3],probs_filtered[i]])
 65 | 
 66 | 	return result
 67 | 
 68 | def iou(box1,box2):
 69 | 	tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2])
 70 | 	lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3])
 71 | 	if tb < 0 or lr < 0 : intersection = 0
 72 | 	else : intersection =  tb*lr
 73 | 	return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection)
 74 | 
 75 | 
 76 | def show_results(img, results, img_width, img_height):
 77 | 	img_cp = img.copy()
 78 | 	disp_console = True
 79 | 	imshow = True
 80 | #	if self.filewrite_txt :
 81 | #		ftxt = open(self.tofile_txt,'w')
 82 | 	for i in range(len(results)):
 83 | 		x = int(results[i][1])
 84 | 		y = int(results[i][2])
 85 | 		w = int(results[i][3])//2
 86 | 		h = int(results[i][4])//2
 87 | 		if disp_console : print ('    class : ' + results[i][0] + ' , [x,y,w,h]=[' + str(x) + ',' + str(y) + ',' + str(int(results[i][3])) + ',' + str(int(results[i][4]))+'], Confidence = ' + str(results[i][5]) )
 88 | 		xmin = x-w
 89 | 		xmax = x+w
 90 | 		ymin = y-h
 91 | 		ymax = y+h
 92 | 		if xmin<0:
 93 | 			xmin = 0
 94 | 		if ymin<0:
 95 | 			ymin = 0
 96 | 		if xmax>img_width:
 97 | 			xmax = img_width
 98 | 		if ymax>img_height:
 99 | 			ymax = img_height
100 | 		if  imshow:
101 | 			cv2.rectangle(img_cp,(xmin,ymin),(xmax,ymax),(0,255,0),2)
102 | 			#print ((xmin, ymin, xmax, ymax))
103 | 			cv2.rectangle(img_cp,(xmin,ymin-20),(xmax,ymin),(125,125,125),-1)
104 | 			cv2.putText(img_cp,results[i][0] + ' : %.2f' % results[i][5],(xmin+5,ymin-7),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,0),1)
105 | 	if imshow :
106 | 		cv2.imshow('YOLO detection',img_cp)
107 | 		cv2.waitKey(1000)
108 | 
109 | if len(sys.argv) != 2:
110 | 	print ("YOLOv1 Tiny example: python3 py_examples/yolo_example.py images/dog.jpg")
111 | 	sys.exit()
112 | 
113 | network_blob='graph'
114 | # configuration NCS
115 | mvnc.SetGlobalOption(mvnc.GlobalOption.LOG_LEVEL, 2)
116 | devices = mvnc.EnumerateDevices()
117 | if len(devices) == 0:
118 | 	print('No devices found')
119 | 	quit()
120 | device = mvnc.Device(devices[0])
121 | device.OpenDevice()
122 | opt = device.GetDeviceOption(mvnc.DeviceOption.OPTIMISATION_LIST)
123 | # load blob
124 | with open(network_blob, mode='rb') as f:
125 | 	blob = f.read()
126 | graph = device.AllocateGraph(blob)
127 | graph.SetGraphOption(mvnc.GraphOption.ITERATIONS, 1)
128 | iterations = graph.GetGraphOption(mvnc.GraphOption.ITERATIONS)
129 | # image preprocess
130 | dim=(448,448)
131 | img = cv2.imread(sys.argv[1])
132 | im = resize(img.copy()/255.0,dim,1)
133 | #im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
134 | im = im[:,:,(2,1,0)]
135 | #print('NEW shape:',im.shape)
136 | #print(img[0,0,:],im[0,0,:])
137 | start = datetime.now()
138 | # start MOD
139 | graph.LoadTensor(im.astype(np.float16), 'user object')
140 | out, userobj = graph.GetResult()
141 | #
142 | end = datetime.now()
143 | elapsedTime = end-start
144 | print ('total time is " milliseconds', elapsedTime.total_seconds()*1000)
145 | results = interpret_output(out.astype(np.float32), img.shape[1], img.shape[0]) # fc27 instead of fc12 for yolo_small
146 | #print (results)
147 | #cv2.imshow('YOLO detection',img_cv)
148 | show_results(img, results, img.shape[1], img.shape[0])
149 | cv2.waitKey(10000)
150 | #
151 | graph.DeallocateGraph()
152 | device.CloseDevice()
153 | 


--------------------------------------------------------------------------------
/yolo_main.py:
--------------------------------------------------------------------------------
  1 | import sys,os
  2 | caffe_root = os.environ["CAFFE_ROOT"]
  3 | os.chdir(caffe_root)
  4 | print caffe_root
  5 | sys.path.insert(0, caffe_root + '/python')
  6 | import caffe
  7 | GPU_ID = 0 # Switch between 0 and 1 depending on the GPU you want to use.
  8 | caffe.set_mode_gpu()
  9 | caffe.set_device(GPU_ID)
 10 | # caffe.set_mode_cpu()
 11 | from datetime import datetime
 12 | import numpy as np
 13 | import sys, getopt
 14 | import cv2
 15 | 
 16 | def interpret_output(output, img_width, img_height):
 17 | 	classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]
 18 | 	w_img = img_width
 19 | 	h_img = img_height
 20 | 	print w_img, h_img
 21 | 	threshold = 0.2
 22 | 	iou_threshold = 0.5
 23 | 	num_class = 20
 24 | 	num_box = 2
 25 | 	grid_size = 7
 26 | 	probs = np.zeros((7,7,2,20))
 27 | 	class_probs = np.reshape(output[0:980],(7,7,20))
 28 | #	print class_probs
 29 | 	scales = np.reshape(output[980:1078],(7,7,2))
 30 | #	print scales
 31 | 	boxes = np.reshape(output[1078:],(7,7,2,4))
 32 | 	offset = np.transpose(np.reshape(np.array([np.arange(7)]*14),(2,7,7)),(1,2,0))
 33 | 
 34 | 	boxes[:,:,:,0] += offset
 35 | 	boxes[:,:,:,1] += np.transpose(offset,(1,0,2))
 36 | 	boxes[:,:,:,0:2] = boxes[:,:,:,0:2] / 7.0
 37 | 	boxes[:,:,:,2] = np.multiply(boxes[:,:,:,2],boxes[:,:,:,2])
 38 | 	boxes[:,:,:,3] = np.multiply(boxes[:,:,:,3],boxes[:,:,:,3])
 39 | 
 40 | 	boxes[:,:,:,0] *= w_img
 41 | 	boxes[:,:,:,1] *= h_img
 42 | 	boxes[:,:,:,2] *= w_img
 43 | 	boxes[:,:,:,3] *= h_img
 44 | 
 45 | 	for i in range(2):
 46 | 		for j in range(20):
 47 | 			probs[:,:,i,j] = np.multiply(class_probs[:,:,j],scales[:,:,i])
 48 | 	filter_mat_probs = np.array(probs>=threshold,dtype='bool')
 49 | 	filter_mat_boxes = np.nonzero(filter_mat_probs)
 50 | 	boxes_filtered = boxes[filter_mat_boxes[0],filter_mat_boxes[1],filter_mat_boxes[2]]
 51 | 	probs_filtered = probs[filter_mat_probs]
 52 | 	classes_num_filtered = np.argmax(probs,axis=3)[filter_mat_boxes[0],filter_mat_boxes[1],filter_mat_boxes[2]]
 53 | 
 54 | 	argsort = np.array(np.argsort(probs_filtered))[::-1]
 55 | 	boxes_filtered = boxes_filtered[argsort]
 56 | 	probs_filtered = probs_filtered[argsort]
 57 | 	classes_num_filtered = classes_num_filtered[argsort]
 58 | 
 59 | 	for i in range(len(boxes_filtered)):
 60 | 		if probs_filtered[i] == 0 : continue
 61 | 		for j in range(i+1,len(boxes_filtered)):
 62 | 			if iou(boxes_filtered[i],boxes_filtered[j]) > iou_threshold :
 63 | 				probs_filtered[j] = 0.0
 64 | 
 65 | 	filter_iou = np.array(probs_filtered>0.0,dtype='bool')
 66 | 	boxes_filtered = boxes_filtered[filter_iou]
 67 | 	probs_filtered = probs_filtered[filter_iou]
 68 | 	classes_num_filtered = classes_num_filtered[filter_iou]
 69 | 
 70 | 	result = []
 71 | 	for i in range(len(boxes_filtered)):
 72 | 		result.append([classes[classes_num_filtered[i]],boxes_filtered[i][0],boxes_filtered[i][1],boxes_filtered[i][2],boxes_filtered[i][3],probs_filtered[i]])
 73 | 
 74 | 	return result
 75 | 
 76 | def iou(box1,box2):
 77 | 	tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2])
 78 | 	lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3])
 79 | 	if tb < 0 or lr < 0 : intersection = 0
 80 | 	else : intersection =  tb*lr
 81 | 	return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection)
 82 | 
 83 | 
 84 | def show_results(img,results, img_width, img_height):
 85 | 	img_cp = img.copy()
 86 | 	disp_console = True
 87 | 	imshow = True
 88 | #	if self.filewrite_txt :
 89 | #		ftxt = open(self.tofile_txt,'w')
 90 | 	for i in range(len(results)):
 91 | 		x = int(results[i][1])
 92 | 		y = int(results[i][2])
 93 | 		w = int(results[i][3])//2
 94 | 		h = int(results[i][4])//2
 95 | 		if disp_console : print '    class : ' + results[i][0] + ' , [x,y,w,h]=[' + str(x) + ',' + str(y) + ',' + str(int(results[i][3])) + ',' + str(int(results[i][4]))+'], Confidence = ' + str(results[i][5])
 96 | 		xmin = x-w
 97 | 		xmax = x+w
 98 | 		ymin = y-h
 99 | 		ymax = y+h
100 | 		if xmin<0:
101 | 			xmin = 0
102 | 		if ymin<0:
103 | 			ymin = 0
104 | 		if xmax>img_width:
105 | 			xmax = img_width
106 | 		if ymax>img_height:
107 | 			ymax = img_height
108 | 		if  imshow:
109 | 			cv2.rectangle(img_cp,(xmin,ymin),(xmax,ymax),(0,255,0),2)
110 | 			print xmin, ymin, xmax, ymax
111 | 			cv2.rectangle(img_cp,(xmin,ymin-20),(xmax,ymin),(125,125,125),-1)
112 | 			cv2.putText(img_cp,results[i][0] + ' : %.2f' % results[i][5],(xmin+5,ymin-7),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,0),1)
113 | 	if imshow :
114 | 		cv2.imshow('YOLO detection',img_cp)
115 | 		cv2.waitKey(1000)
116 | 
117 | 
118 | 
119 | def main(argv):
120 | 	model_filename = ''
121 | 	weight_filename = ''
122 | 	img_filename = ''
123 | 	try:
124 | 		opts, args = getopt.getopt(argv, "hm:w:i:")
125 | 		print opts
126 | 	except getopt.GetoptError:
127 | 		print 'yolo_main.py -m <model_file> -w <output_file> -i <img_file>'
128 | 		sys.exit(2)
129 | 	for opt, arg in opts:
130 | 		if opt == '-h':
131 | 			print 'yolo_main.py -m <model_file> -w <weight_file> -i <img_file>'
132 | 			sys.exit()
133 | 		elif opt == "-m":
134 | 			model_filename = arg
135 | 		elif opt == "-w":
136 | 			weight_filename = arg
137 | 		elif opt == "-i":
138 | 			img_filename = arg
139 | 	print 'model file is "', model_filename
140 | 	print 'weight file is "', weight_filename
141 | 	print 'image file is "', img_filename
142 | 	net = caffe.Net(model_filename, weight_filename, caffe.TEST)
143 | 	img = caffe.io.load_image(img_filename) # load the image using caffe io
144 | 	inputs = img
145 | 	transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
146 | 	transformer.set_transpose('data', (2,0,1))
147 | 	start = datetime.now()
148 | 	out = net.forward_all(data=np.asarray([transformer.preprocess('data', inputs)]))
149 | 	end = datetime.now()
150 | 	elapsedTime = end-start
151 | 	print 'total time is " milliseconds', elapsedTime.total_seconds()*1000
152 | 	print out.iteritems()
153 | 	img_cv = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
154 | 	results = interpret_output(out['result'][0], img.shape[1], img.shape[0]) # fc27 instead of fc12 for yolo_small
155 | 	show_results(img_cv,results, img.shape[1], img.shape[0])
156 | 	cv2.waitKey(10000)
157 | 
158 | 
159 | 
160 | if __name__=='__main__':
161 |      main(sys.argv[1:])
162 | 


--------------------------------------------------------------------------------
/py_examples/object_detection_app.py:
--------------------------------------------------------------------------------
  1 | import os,time,cv2,argparse,multiprocessing
  2 | import numpy as np
  3 | from mvnc import mvncapi as mvnc
  4 | from skimage.transform import resize
  5 | from utils.app_utils import FPS, WebcamVideoStream
  6 | from multiprocessing import Queue, Pool
  7 | 
  8 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]
  9 | dim=(448,448)
 10 | threshold = 0.2
 11 | iou_threshold = 0.5
 12 | num_class = 20
 13 | num_box = 2
 14 | grid_size = 7
 15 | 
 16 | def show_results(img, results, img_width, img_height):
 17 |     img_cp = img
 18 |     disp_console = False
 19 |     imshow = True
 20 |     for i in range(len(results)):
 21 |         x = int(results[i][1])
 22 |         y = int(results[i][2])
 23 |         w = int(results[i][3])//2
 24 |         h = int(results[i][4])//2
 25 |         if disp_console : print ('    class : ' + results[i][0] + ' , [x,y,w,h]=[' + str(x) + ',' + str(y) + ',' + str(int(results[i][3])) + ',' + str(int(results[i][4]))+'], Confidence = ' + str(results[i][5]) )
 26 |         xmin = x-w
 27 |         xmax = x+w
 28 |         ymin = y-h
 29 |         ymax = y+h
 30 |         if xmin<0:
 31 |         	xmin = 0
 32 |         if ymin<0:
 33 |         	ymin = 0
 34 |         if xmax>img_width:
 35 |         	xmax = img_width
 36 |         if ymax>img_height:
 37 |         	ymax = img_height
 38 |         if  imshow:
 39 |         	cv2.rectangle(img_cp,(xmin,ymin),(xmax,ymax),(0,255,0),2)
 40 |         	#print ((xmin, ymin, xmax, ymax))
 41 |         	cv2.rectangle(img_cp,(xmin,ymin-20),(xmax,ymin),(125,125,125),-1)
 42 |         	cv2.putText(img_cp,results[i][0] + ' : %.2f' % results[i][5],(xmin+5,ymin-7),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,0),1)
 43 |     #
 44 |     cv2.imshow('YOLO detection',img_cp)
 45 | 
 46 | 
 47 | def interpret_output(output, img_width, img_height):
 48 |     w_img = img_width
 49 |     h_img = img_height
 50 |     probs = np.zeros((7,7,2,20))
 51 |     class_probs = (np.reshape(output[0:980],(7,7,20)))
 52 |     #print(class_probs)
 53 |     scales = (np.reshape(output[980:1078],(7,7,2)))
 54 |     #print(scales)
 55 |     boxes = (np.reshape(output[1078:],(7,7,2,4)))
 56 |     offset = np.transpose(np.reshape(np.array([np.arange(7)]*14),(2,7,7)),(1,2,0))
 57 |     #boxes.setflags(write=1)
 58 |     boxes[:,:,:,0] += offset
 59 |     boxes[:,:,:,1] += np.transpose(offset,(1,0,2))
 60 |     boxes[:,:,:,0:2] = boxes[:,:,:,0:2] / 7.0
 61 |     boxes[:,:,:,2] = np.multiply(boxes[:,:,:,2],boxes[:,:,:,2])
 62 |     boxes[:,:,:,3] = np.multiply(boxes[:,:,:,3],boxes[:,:,:,3])
 63 | 
 64 |     boxes[:,:,:,0] *= w_img
 65 |     boxes[:,:,:,1] *= h_img
 66 |     boxes[:,:,:,2] *= w_img
 67 |     boxes[:,:,:,3] *= h_img
 68 | 
 69 |     for i in range(2):
 70 |     	for j in range(20):
 71 |     		probs[:,:,i,j] = np.multiply(class_probs[:,:,j],scales[:,:,i])
 72 |     #print (probs)
 73 |     filter_mat_probs = np.array(probs>=threshold,dtype='bool')
 74 |     filter_mat_boxes = np.nonzero(filter_mat_probs)
 75 |     boxes_filtered = boxes[filter_mat_boxes[0],filter_mat_boxes[1],filter_mat_boxes[2]]
 76 |     probs_filtered = probs[filter_mat_probs]
 77 |     classes_num_filtered = np.argmax(probs,axis=3)[filter_mat_boxes[0],filter_mat_boxes[1],filter_mat_boxes[2]]
 78 | 
 79 |     argsort = np.array(np.argsort(probs_filtered))[::-1]
 80 |     boxes_filtered = boxes_filtered[argsort]
 81 |     probs_filtered = probs_filtered[argsort]
 82 |     classes_num_filtered = classes_num_filtered[argsort]
 83 | 
 84 |     for i in range(len(boxes_filtered)):
 85 |     	if probs_filtered[i] == 0 : continue
 86 |     	for j in range(i+1,len(boxes_filtered)):
 87 |     		if iou(boxes_filtered[i],boxes_filtered[j]) > iou_threshold :
 88 |     			probs_filtered[j] = 0.0
 89 | 
 90 |     filter_iou = np.array(probs_filtered>0.0,dtype='bool')
 91 |     boxes_filtered = boxes_filtered[filter_iou]
 92 |     probs_filtered = probs_filtered[filter_iou]
 93 |     classes_num_filtered = classes_num_filtered[filter_iou]
 94 | 
 95 |     result = []
 96 |     for i in range(len(boxes_filtered)):
 97 |     	result.append([classes[classes_num_filtered[i]],boxes_filtered[i][0],boxes_filtered[i][1],boxes_filtered[i][2],boxes_filtered[i][3],probs_filtered[i]])
 98 | 
 99 |     return result
100 | 
101 | def iou(box1,box2):
102 | 	tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2])
103 | 	lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3])
104 | 	if tb < 0 or lr < 0 : intersection = 0
105 | 	else : intersection =  tb*lr
106 | 	return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection)
107 | 
108 | def worker(graph, input_q, output_q):
109 |     fps = FPS().start()
110 |     while True:
111 |         fps.update()
112 |         frame = input_q.get()
113 |         graph.LoadTensor(resize(frame/255.0,dim,1)[:,:,(2,1,0)].astype(np.float16), 'user object')
114 |         out, userobj = graph.GetResult()
115 |         results = interpret_output(out.astype(np.float32), frame.shape[1], frame.shape[0])
116 |         #print(results)
117 |         output_q.put((frame, results, frame.shape[1], frame.shape[0]))
118 |         #output_q.put((frame, [], frame.shape[1], frame.shape[0]))
119 |         #output_q.put(frame)
120 |     #
121 |     fps.stop()
122 | 
123 | if __name__ == '__main__':
124 |     parser = argparse.ArgumentParser()
125 |     parser.add_argument('-src', '--source', dest='video_source', type=int,
126 |                         default=0, help='Device index of the camera.')
127 |     parser.add_argument('-wd', '--width', dest='width', type=int,
128 |                         default=800, help='Width of the frames in the video stream.')
129 |     parser.add_argument('-ht', '--height', dest='height', type=int,
130 |                         default=600, help='Height of the frames in the video stream.')
131 |     parser.add_argument('-num-w', '--num-workers', dest='num_workers', type=int,
132 |                         default=2, help='Number of workers.')
133 |     parser.add_argument('-q-size', '--queue-size', dest='queue_size', type=int,
134 |                         default=5, help='Size of the queue.')
135 |     args = parser.parse_args()
136 | 
137 |     logger = multiprocessing.log_to_stderr()
138 |     logger.setLevel(multiprocessing.SUBDEBUG)
139 | 
140 |     input_q = Queue(maxsize=args.queue_size)
141 |     output_q = Queue(maxsize=args.queue_size)
142 |     # configuration NCS
143 |     network_blob = 'graph'
144 |     mvnc.SetGlobalOption(mvnc.GlobalOption.LOG_LEVEL, 2)
145 |     devices = mvnc.EnumerateDevices()
146 |     if len(devices) == 0:
147 |     	print('No devices found')
148 |     	quit()
149 |     device = mvnc.Device(devices[0])
150 |     device.OpenDevice()
151 |     opt = device.GetDeviceOption(mvnc.DeviceOption.OPTIMISATION_LIST)
152 |     # load blob
153 |     with open(network_blob, mode='rb') as f:
154 |     	blob = f.read()
155 |     graph = device.AllocateGraph(blob)
156 |     graph.SetGraphOption(mvnc.GraphOption.ITERATIONS, 1)
157 |     iterations = graph.GetGraphOption(mvnc.GraphOption.ITERATIONS)
158 |     #
159 |     pool = Pool(args.num_workers, worker, (graph, input_q, output_q))
160 |     #
161 |     video_capture = WebcamVideoStream(src=args.video_source,
162 |                                       width=args.width,
163 |                                       height=args.height).start()
164 |     fps = FPS().start()
165 |     #
166 |     while True:  # fps._numFrames < 120
167 |         frame = video_capture.read()
168 |         input_q.put(frame)
169 |         t = time.time()
170 |         (img, results, img_width, img_height) = output_q.get()
171 |         show_results(img, results, img_width, img_height)
172 |         #cv2.imshow('Video', output_q.get())
173 |         #cv2.imshow('Video', output_q.get())
174 |         fps.update()
175 |         print('[INFO] elapsed time: {:.2f}'.format(time.time() - t))
176 |         if cv2.waitKey(1) & 0xFF == ord('q'):
177 |             break
178 | 
179 |     fps.stop()
180 |     print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed()))
181 |     print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
182 | 
183 |     pool.terminate()
184 |     video_capture.stop()
185 |     cv2.destroyAllWindows()
186 |     graph.DeallocateGraph()
187 |     device.CloseDevice()
188 | 


--------------------------------------------------------------------------------
/py_examples/utils/app_utils.py:
--------------------------------------------------------------------------------
  1 | # From http://www.pyimagesearch.com/2015/12/21/increasing-webcam-fps-with-python-and-opencv/
  2 | 
  3 | import struct
  4 | import six
  5 | import collections
  6 | import cv2
  7 | import datetime
  8 | from threading import Thread
  9 | from matplotlib import colors
 10 | 
 11 | 
 12 | class FPS:
 13 |     def __init__(self):
 14 |         # store the start time, end time, and total number of frames
 15 |         # that were examined between the start and end intervals
 16 |         self._start = None
 17 |         self._end = None
 18 |         self._numFrames = 0
 19 | 
 20 |     def start(self):
 21 |         # start the timer
 22 |         self._start = datetime.datetime.now()
 23 |         return self
 24 | 
 25 |     def stop(self):
 26 |         # stop the timer
 27 |         self._end = datetime.datetime.now()
 28 | 
 29 |     def update(self):
 30 |         # increment the total number of frames examined during the
 31 |         # start and end intervals
 32 |         self._numFrames += 1
 33 | 
 34 |     def elapsed(self):
 35 |         # return the total number of seconds between the start and
 36 |         # end interval
 37 |         return (self._end - self._start).total_seconds()
 38 | 
 39 |     def fps(self):
 40 |         # compute the (approximate) frames per second
 41 |         return self._numFrames / self.elapsed()
 42 | 
 43 | 
 44 | class WebcamVideoStream:
 45 |     def __init__(self, src, width, height):
 46 |         # initialize the video camera stream and read the first frame
 47 |         # from the stream
 48 |         #print(src)
 49 |         self.stream = cv2.VideoCapture(src)
 50 |         self.stream.set(cv2.CAP_PROP_FRAME_WIDTH, width)
 51 |         self.stream.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
 52 |         (self.grabbed, self.frame) = self.stream.read()
 53 | 
 54 |         # initialize the variable used to indicate if the thread should
 55 |         # be stopped
 56 |         self.stopped = False
 57 | 
 58 |     def start(self):
 59 |         # start the thread to read frames from the video stream
 60 |         Thread(target=self.update, args=()).start()
 61 |         return self
 62 | 
 63 |     def update(self):
 64 |         # keep looping infinitely until the thread is stopped
 65 |         while True:
 66 |             # if the thread indicator variable is set, stop the thread
 67 |             if self.stopped:
 68 |                 return
 69 | 
 70 |             # otherwise, read the next frame from the stream
 71 |             (self.grabbed, self.frame) = self.stream.read()
 72 | 
 73 |     def read(self):
 74 |         # return the frame most recently read
 75 |         return self.frame
 76 | 
 77 |     def stop(self):
 78 |         # indicate that the thread should be stopped
 79 |         self.stopped = True
 80 | 
 81 | 
 82 | def standard_colors():
 83 |     colors = [
 84 |         'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
 85 |         'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
 86 |         'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
 87 |         'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
 88 |         'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
 89 |         'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
 90 |         'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
 91 |         'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
 92 |         'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
 93 |         'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
 94 |         'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
 95 |         'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
 96 |         'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
 97 |         'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
 98 |         'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
 99 |         'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
100 |         'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
101 |         'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
102 |         'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
103 |         'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
104 |         'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
105 |         'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
106 |         'WhiteSmoke', 'Yellow', 'YellowGreen'
107 |     ]
108 |     return colors
109 | 
110 | 
111 | def color_name_to_rgb():
112 |     colors_rgb = []
113 |     for key, value in colors.cnames.items():
114 |         colors_rgb.append((key, struct.unpack('BBB', bytes.fromhex(value.replace('#', '')))))
115 |     return dict(colors_rgb)
116 | 
117 | 
118 | def draw_boxes_and_labels(
119 |         boxes,
120 |         classes,
121 |         scores,
122 |         category_index,
123 |         instance_masks=None,
124 |         keypoints=None,
125 |         max_boxes_to_draw=20,
126 |         min_score_thresh=.5,
127 |         agnostic_mode=False):
128 |     """Returns boxes coordinates, class names and colors
129 | 
130 |     Args:
131 |       boxes: a numpy array of shape [N, 4]
132 |       classes: a numpy array of shape [N]
133 |       scores: a numpy array of shape [N] or None.  If scores=None, then
134 |         this function assumes that the boxes to be plotted are groundtruth
135 |         boxes and plot all boxes as black with no classes or scores.
136 |       category_index: a dict containing category dictionaries (each holding
137 |         category index `id` and category name `name`) keyed by category indices.
138 |       instance_masks: a numpy array of shape [N, image_height, image_width], can
139 |         be None
140 |       keypoints: a numpy array of shape [N, num_keypoints, 2], can
141 |         be None
142 |       max_boxes_to_draw: maximum number of boxes to visualize.  If None, draw
143 |         all boxes.
144 |       min_score_thresh: minimum score threshold for a box to be visualized
145 |       agnostic_mode: boolean (default: False) controlling whether to evaluate in
146 |         class-agnostic mode or not.  This mode will display scores but ignore
147 |         classes.
148 |     """
149 |     # Create a display string (and color) for every box location, group any boxes
150 |     # that correspond to the same location.
151 |     box_to_display_str_map = collections.defaultdict(list)
152 |     box_to_color_map = collections.defaultdict(str)
153 |     box_to_instance_masks_map = {}
154 |     box_to_keypoints_map = collections.defaultdict(list)
155 |     if not max_boxes_to_draw:
156 |         max_boxes_to_draw = boxes.shape[0]
157 |     for i in range(min(max_boxes_to_draw, boxes.shape[0])):
158 |         if scores is None or scores[i] > min_score_thresh:
159 |             box = tuple(boxes[i].tolist())
160 |             if instance_masks is not None:
161 |                 box_to_instance_masks_map[box] = instance_masks[i]
162 |             if keypoints is not None:
163 |                 box_to_keypoints_map[box].extend(keypoints[i])
164 |             if scores is None:
165 |                 box_to_color_map[box] = 'black'
166 |             else:
167 |                 if not agnostic_mode:
168 |                     if classes[i] in category_index.keys():
169 |                         class_name = category_index[classes[i]]['name']
170 |                     else:
171 |                         class_name = 'N/A'
172 |                     display_str = '{}: {}%'.format(
173 |                         class_name,
174 |                         int(100 * scores[i]))
175 |                 else:
176 |                     display_str = 'score: {}%'.format(int(100 * scores[i]))
177 |                 box_to_display_str_map[box].append(display_str)
178 |                 if agnostic_mode:
179 |                     box_to_color_map[box] = 'DarkOrange'
180 |                 else:
181 |                     box_to_color_map[box] = standard_colors()[
182 |                         classes[i] % len(standard_colors())]
183 | 
184 |     # Store all the coordinates of the boxes, class names and colors
185 |     color_rgb = color_name_to_rgb()
186 |     rect_points = []
187 |     class_names = []
188 |     class_colors = []
189 |     for box, color in six.iteritems(box_to_color_map):
190 |         ymin, xmin, ymax, xmax = box
191 |         rect_points.append(dict(ymin=ymin, xmin=xmin, ymax=ymax, xmax=xmax))
192 |         class_names.append(box_to_display_str_map[box])
193 |         class_colors.append(color_rgb[color.lower()])
194 |     return rect_points, class_names, class_colors
195 | 


--------------------------------------------------------------------------------
/prototxt/yolo_deploy.prototxt:
--------------------------------------------------------------------------------
  1 | name: "YOLONet"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 448
  7 |   dim: 448
  8 | }
  9 | 
 10 | layer {
 11 |   name: "conv1"
 12 |   type: "Convolution"
 13 |   bottom: "data"
 14 |   top: "conv1"
 15 |   convolution_param {
 16 |     num_output: 64
 17 |     kernel_size: 7
 18 |     pad: 3
 19 |     stride: 2
 20 |   }
 21 | }
 22 | layer {
 23 |   name: "relu1"
 24 |   type: "ReLU"
 25 |   bottom: "conv1"
 26 |   top: "conv1"
 27 |   relu_param{
 28 |     negative_slope: 0.1
 29 |   }
 30 | }
 31 | layer {
 32 |   name: "pool1"
 33 |   type: "Pooling"
 34 |   bottom: "conv1"
 35 |   top: "pool1"
 36 |   pooling_param {
 37 |     pool: MAX
 38 |     kernel_size: 2
 39 |     stride: 2
 40 |   }
 41 | }
 42 | 
 43 | layer{
 44 |   name: "conv2"
 45 |   type: "Convolution"
 46 |   bottom: "pool1"
 47 |   top: "conv2"
 48 |   convolution_param {
 49 |     num_output: 192
 50 |     kernel_size: 3
 51 |     pad: 1
 52 |     stride: 1
 53 |   }
 54 | }
 55 | layer {
 56 |   name: "relu2"
 57 |   type: "ReLU"
 58 |   bottom: "conv2"
 59 |   top: "conv2"
 60 |   relu_param{
 61 |     negative_slope: 0.1
 62 |   }
 63 | }
 64 | layer {
 65 |   name: "pool2"
 66 |   type: "Pooling"
 67 |   bottom: "conv2"
 68 |   top: "pool2"
 69 |   pooling_param {
 70 |     pool: MAX
 71 |     kernel_size: 2
 72 |     stride: 2
 73 |   }
 74 | }
 75 | 
 76 | layer{
 77 |   name: "conv3"
 78 |   type: "Convolution"
 79 |   bottom: "pool2"
 80 |   top: "conv3"
 81 |   convolution_param {
 82 |     num_output: 128
 83 |     kernel_size: 1
 84 |     pad: 0
 85 |     stride: 1
 86 |   }
 87 | }
 88 | layer {
 89 |   name: "relu3"
 90 |   type: "ReLU"
 91 |   bottom: "conv3"
 92 |   top: "conv3"
 93 |   relu_param{
 94 |     negative_slope: 0.1
 95 |   }
 96 | }
 97 | 
 98 | 
 99 | layer{
100 |   name: "conv4"
101 |   type: "Convolution"
102 |   bottom: "conv3"
103 |   top: "conv4"
104 |   convolution_param {
105 |     num_output: 256
106 |     kernel_size: 3
107 |     pad: 1
108 |     stride: 1
109 |   }
110 | }
111 | layer {
112 |   name: "relu4"
113 |   type: "ReLU"
114 |   bottom: "conv4"
115 |   top: "conv4"
116 |   relu_param{
117 |     negative_slope: 0.1
118 |   }
119 | }
120 | 
121 | layer{
122 |   name: "conv5"
123 |   type: "Convolution"
124 |   bottom: "conv4"
125 |   top: "conv5"
126 |   convolution_param {
127 |     num_output: 256
128 |     kernel_size: 1
129 |     pad: 0
130 |     stride: 1
131 |   }
132 | }
133 | layer {
134 |   name: "relu5"
135 |   type: "ReLU"
136 |   bottom: "conv5"
137 |   top: "conv5"
138 |   relu_param{
139 |     negative_slope: 0.1
140 |   }
141 | }
142 | 
143 | layer{
144 |   name: "conv6"
145 |   type: "Convolution"
146 |   bottom: "conv5"
147 |   top: "conv6"
148 |   convolution_param {
149 |     num_output: 512
150 |     kernel_size: 3
151 |     pad: 1
152 |     stride: 1
153 |   }
154 | }
155 | layer {
156 |   name: "relu6"
157 |   type: "ReLU"
158 |   bottom: "conv6"
159 |   top: "conv6"
160 |   relu_param{
161 |     negative_slope: 0.1
162 |   }
163 | }
164 | layer {
165 |   name: "pool6"
166 |   type: "Pooling"
167 |   bottom: "conv6"
168 |   top: "pool6"
169 |   pooling_param {
170 |     pool: MAX
171 |     kernel_size: 2
172 |     stride: 2
173 |   }
174 | }
175 | 
176 | layer{
177 |   name: "conv7"
178 |   type: "Convolution"
179 |   bottom: "pool6"
180 |   top: "conv7"
181 |   convolution_param {
182 |     num_output: 256
183 |     kernel_size: 1
184 |     pad: 0
185 |     stride: 1
186 |   }
187 | }
188 | layer {
189 |   name: "relu7"
190 |   type: "ReLU"
191 |   bottom: "conv7"
192 |   top: "conv7"
193 |   relu_param{
194 |     negative_slope: 0.1
195 |   }
196 | }
197 | 
198 | layer{
199 |   name: "conv8"
200 |   type: "Convolution"
201 |   bottom: "conv7"
202 |   top: "conv8"
203 |   convolution_param {
204 |     num_output: 512
205 |     kernel_size: 3
206 |     pad: 1
207 |     stride: 1
208 |   }
209 | }
210 | layer {
211 |   name: "relu8"
212 |   type: "ReLU"
213 |   bottom: "conv8"
214 |   top: "conv8"
215 |   relu_param{
216 |     negative_slope: 0.1
217 |   }
218 | }
219 | 
220 | layer{
221 |   name: "conv9"
222 |   type: "Convolution"
223 |   bottom: "conv8"
224 |   top: "conv9"
225 |   convolution_param {
226 |     num_output: 256
227 |     kernel_size: 1
228 |     pad: 0
229 |     stride: 1
230 |   }
231 | }
232 | layer {
233 |   name: "relu9"
234 |   type: "ReLU"
235 |   bottom: "conv9"
236 |   top: "conv9"
237 |   relu_param{
238 |     negative_slope: 0.1
239 |   }
240 | }
241 | 
242 | layer{
243 |   name: "conv10"
244 |   type: "Convolution"
245 |   bottom: "conv9"
246 |   top: "conv10"
247 |   convolution_param {
248 |     num_output: 512
249 |     kernel_size: 3
250 |     pad: 1
251 |     stride: 1
252 |   }
253 | }
254 | layer {
255 |   name: "relu10"
256 |   type: "ReLU"
257 |   bottom: "conv10"
258 |   top: "conv10"
259 |   relu_param{
260 |     negative_slope: 0.1
261 |   }
262 | }
263 | 
264 | layer{
265 |   name: "conv11"
266 |   type: "Convolution"
267 |   bottom: "conv10"
268 |   top: "conv11"
269 |   convolution_param {
270 |     num_output: 256
271 |     kernel_size: 1
272 |     pad: 0
273 |     stride: 1
274 |   }
275 | }
276 | layer {
277 |   name: "relu11"
278 |   type: "ReLU"
279 |   bottom: "conv11"
280 |   top: "conv11"
281 |   relu_param{
282 |     negative_slope: 0.1
283 |   }
284 | }
285 | 
286 | 
287 | layer{
288 |   name: "conv12"
289 |   type: "Convolution"
290 |   bottom: "conv11"
291 |   top: "conv12"
292 |   convolution_param {
293 |     num_output: 512
294 |     kernel_size: 3
295 |     pad: 1
296 |     stride: 1
297 |   }
298 | }
299 | layer {
300 |   name: "relu12"
301 |   type: "ReLU"
302 |   bottom: "conv12"
303 |   top: "conv12"
304 |   relu_param{
305 |     negative_slope: 0.1
306 |   }
307 | }
308 | 
309 | 
310 | layer{
311 |   name: "conv13"
312 |   type: "Convolution"
313 |   bottom: "conv12"
314 |   top: "conv13"
315 |   convolution_param {
316 |     num_output: 256
317 |     kernel_size: 1
318 |     pad: 0
319 |     stride: 1
320 |   }
321 | }
322 | layer {
323 |   name: "relu13"
324 |   type: "ReLU"
325 |   bottom: "conv13"
326 |   top: "conv13"
327 |   relu_param{
328 |     negative_slope: 0.1
329 |   }
330 | }
331 | 
332 | layer{
333 |   name: "conv14"
334 |   type: "Convolution"
335 |   bottom: "conv13"
336 |   top: "conv14"
337 |   convolution_param {
338 |     num_output: 512
339 |     kernel_size: 3
340 |     pad: 1
341 |     stride: 1
342 |   }
343 | }
344 | layer {
345 |   name: "relu14"
346 |   type: "ReLU"
347 |   bottom: "conv14"
348 |   top: "conv14"
349 |   relu_param{
350 |     negative_slope: 0.1
351 |   }
352 | }
353 | 
354 | layer{
355 |   name: "conv15"
356 |   type: "Convolution"
357 |   bottom: "conv14"
358 |   top: "conv15"
359 |   convolution_param {
360 |     num_output: 512
361 |     kernel_size: 1
362 |     pad: 0
363 |     stride: 1
364 |   }
365 | }
366 | layer {
367 |   name: "relu15"
368 |   type: "ReLU"
369 |   bottom: "conv15"
370 |   top: "conv15"
371 |   relu_param{
372 |     negative_slope: 0.1
373 |   }
374 | }
375 | 
376 | 
377 | layer{
378 |   name: "conv16"
379 |   type: "Convolution"
380 |   bottom: "conv15"
381 |   top: "conv16"
382 |   convolution_param {
383 |     num_output: 1024
384 |     kernel_size: 3
385 |     pad: 1
386 |     stride: 1
387 |   }
388 | }
389 | layer {
390 |   name: "relu16"
391 |   type: "ReLU"
392 |   bottom: "conv16"
393 |   top: "conv16"
394 |   relu_param{
395 |     negative_slope: 0.1
396 |   }
397 | }
398 | 
399 | layer {
400 |   name: "pool16"
401 |   type: "Pooling"
402 |   bottom: "conv16"
403 |   top: "pool16"
404 |   pooling_param {
405 |     pool: MAX
406 |     kernel_size: 2
407 |     stride: 2
408 |   }
409 | }
410 | 
411 | 
412 | layer{
413 |   name: "conv17"
414 |   type: "Convolution"
415 |   bottom: "pool16"
416 |   top: "conv17"
417 |   convolution_param {
418 |     num_output: 512
419 |     kernel_size: 1
420 |     pad: 0
421 |     stride: 1
422 |   }
423 | }
424 | layer {
425 |   name: "relu17"
426 |   type: "ReLU"
427 |   bottom: "conv17"
428 |   top: "conv17"
429 |   relu_param{
430 |     negative_slope: 0.1
431 |   }
432 | }
433 | 
434 | 
435 | layer{
436 |   name: "conv18"
437 |   type: "Convolution"
438 |   bottom: "conv17"
439 |   top: "conv18"
440 |   convolution_param {
441 |     num_output: 1024
442 |     kernel_size: 3
443 |     pad: 1
444 |     stride: 1
445 |   }
446 | }
447 | layer {
448 |   name: "relu18"
449 |   type: "ReLU"
450 |   bottom: "conv18"
451 |   top: "conv18"
452 |   relu_param{
453 |     negative_slope: 0.1
454 |   }
455 | }
456 | 
457 | 
458 | 
459 | layer{
460 |   name: "conv19"
461 |   type: "Convolution"
462 |   bottom: "conv18"
463 |   top: "conv19"
464 |   convolution_param {
465 |     num_output: 512
466 |     kernel_size: 1
467 |     pad: 0
468 |     stride: 1
469 |   }
470 | }
471 | layer {
472 |   name: "relu19"
473 |   type: "ReLU"
474 |   bottom: "conv19"
475 |   top: "conv19"
476 |   relu_param{
477 |     negative_slope: 0.1
478 |   }
479 | }
480 | 
481 | 
482 | 
483 | layer{
484 |   name: "conv20"
485 |   type: "Convolution"
486 |   bottom: "conv19"
487 |   top: "conv20"
488 |   convolution_param {
489 |     num_output: 1024
490 |     kernel_size: 3
491 |     pad: 1
492 |     stride: 1
493 |   }
494 | }
495 | layer {
496 |   name: "relu20"
497 |   type: "ReLU"
498 |   bottom: "conv20"
499 |   top: "conv20"
500 |   relu_param{
501 |     negative_slope: 0.1
502 |   }
503 | }
504 | 
505 | 
506 | 
507 | layer{
508 |   name: "conv21"
509 |   type: "Convolution"
510 |   bottom: "conv20"
511 |   top: "conv21"
512 |   convolution_param {
513 |     num_output: 1024
514 |     kernel_size: 3
515 |     pad: 1
516 |     stride: 1
517 |   }
518 | }
519 | layer {
520 |   name: "relu21"
521 |   type: "ReLU"
522 |   bottom: "conv21"
523 |   top: "conv21"
524 |   relu_param{
525 |     negative_slope: 0.1
526 |   }
527 | }
528 | 
529 | 
530 | layer{
531 |   name: "conv22"
532 |   type: "Convolution"
533 |   bottom: "conv21"
534 |   top: "conv22"
535 |   convolution_param {
536 |     num_output: 1024
537 |     kernel_size: 3
538 |     pad: 1
539 |     stride: 2
540 |   }
541 | }
542 | layer {
543 |   name: "relu22"
544 |   type: "ReLU"
545 |   bottom: "conv22"
546 |   top: "conv22"
547 |   relu_param{
548 |     negative_slope: 0.1
549 |   }
550 | }
551 | 
552 | 
553 | 
554 | layer{
555 |   name: "conv23"
556 |   type: "Convolution"
557 |   bottom: "conv22"
558 |   top: "conv23"
559 |   convolution_param {
560 |     num_output: 1024
561 |     kernel_size: 3
562 |     pad: 1
563 |     stride: 1
564 |   }
565 | }
566 | layer {
567 |   name: "relu23"
568 |   type: "ReLU"
569 |   bottom: "conv23"
570 |   top: "conv23"
571 |   relu_param{
572 |     negative_slope: 0.1
573 |   }
574 | }
575 | 
576 | 
577 | layer{
578 |   name: "conv24"
579 |   type: "Convolution"
580 |   bottom: "conv23"
581 |   top: "conv24"
582 |   convolution_param {
583 |     num_output: 1024
584 |     kernel_size: 3
585 |     pad: 1
586 |     stride: 1
587 |   }
588 | }
589 | layer {
590 |   name: "relu24"
591 |   type: "ReLU"
592 |   bottom: "conv24"
593 |   top: "conv24"
594 |   relu_param{
595 |     negative_slope: 0.1
596 |   }
597 | }
598 | 
599 | 
600 | 
601 | 
602 | layer{
603 |   name: "fc25"
604 |   type: "InnerProduct"
605 |   bottom: "conv24"
606 |   top: "fc25"
607 |   inner_product_param {
608 |     num_output: 4096
609 |   }
610 | }
611 | layer {
612 |   name: "relu25"
613 |   type: "ReLU"
614 |   bottom: "fc25"
615 |   top: "fc25"
616 |   relu_param{
617 |     negative_slope: 0.1
618 |   }
619 | }
620 | 
621 | 
622 | layer{
623 |   name: "fc26"
624 |   type: "InnerProduct"
625 |   bottom: "fc25"
626 |   top: "fc26"
627 |   inner_product_param {
628 |     num_output: 1470
629 |   }
630 | }
631 | 


--------------------------------------------------------------------------------
/prototxt/yolo_small_deploy.prototxt:
--------------------------------------------------------------------------------
  1 | name: "YOLONet"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 448
  7 |   dim: 448
  8 | }
  9 | 
 10 | layer {
 11 |   name: "conv1"
 12 |   type: "Convolution"
 13 |   bottom: "data"
 14 |   top: "conv1"
 15 |   convolution_param {
 16 |     num_output: 64
 17 |     kernel_size: 7
 18 |     pad: 3
 19 |     stride: 2
 20 |   }
 21 | }
 22 | layer {
 23 |   name: "relu1"
 24 |   type: "ReLU"
 25 |   bottom: "conv1"
 26 |   top: "conv1"
 27 |   relu_param{
 28 |     negative_slope: 0.1
 29 |   }
 30 | }
 31 | layer {
 32 |   name: "pool1"
 33 |   type: "Pooling"
 34 |   bottom: "conv1"
 35 |   top: "pool1"
 36 |   pooling_param {
 37 |     pool: MAX
 38 |     kernel_size: 2
 39 |     stride: 2
 40 |   }
 41 | }
 42 | 
 43 | layer{
 44 |   name: "conv2"
 45 |   type: "Convolution"
 46 |   bottom: "pool1"
 47 |   top: "conv2"
 48 |   convolution_param {
 49 |     num_output: 192
 50 |     kernel_size: 3
 51 |     pad: 1
 52 |     stride: 1
 53 |   }
 54 | }
 55 | layer {
 56 |   name: "relu2"
 57 |   type: "ReLU"
 58 |   bottom: "conv2"
 59 |   top: "conv2"
 60 |   relu_param{
 61 |     negative_slope: 0.1
 62 |   }
 63 | }
 64 | layer {
 65 |   name: "pool2"
 66 |   type: "Pooling"
 67 |   bottom: "conv2"
 68 |   top: "pool2"
 69 |   pooling_param {
 70 |     pool: MAX
 71 |     kernel_size: 2
 72 |     stride: 2
 73 |   }
 74 | }
 75 | 
 76 | layer{
 77 |   name: "conv3"
 78 |   type: "Convolution"
 79 |   bottom: "pool2"
 80 |   top: "conv3"
 81 |   convolution_param {
 82 |     num_output: 128
 83 |     kernel_size: 1
 84 |     pad: 0
 85 |     stride: 1
 86 |   }
 87 | }
 88 | layer {
 89 |   name: "relu3"
 90 |   type: "ReLU"
 91 |   bottom: "conv3"
 92 |   top: "conv3"
 93 |   relu_param{
 94 |     negative_slope: 0.1
 95 |   }
 96 | }
 97 | 
 98 | 
 99 | layer{
100 |   name: "conv4"
101 |   type: "Convolution"
102 |   bottom: "conv3"
103 |   top: "conv4"
104 |   convolution_param {
105 |     num_output: 256
106 |     kernel_size: 3
107 |     pad: 1
108 |     stride: 1
109 |   }
110 | }
111 | layer {
112 |   name: "relu4"
113 |   type: "ReLU"
114 |   bottom: "conv4"
115 |   top: "conv4"
116 |   relu_param{
117 |     negative_slope: 0.1
118 |   }
119 | }
120 | 
121 | layer{
122 |   name: "conv5"
123 |   type: "Convolution"
124 |   bottom: "conv4"
125 |   top: "conv5"
126 |   convolution_param {
127 |     num_output: 256
128 |     kernel_size: 1
129 |     pad: 0
130 |     stride: 1
131 |   }
132 | }
133 | layer {
134 |   name: "relu5"
135 |   type: "ReLU"
136 |   bottom: "conv5"
137 |   top: "conv5"
138 |   relu_param{
139 |     negative_slope: 0.1
140 |   }
141 | }
142 | 
143 | layer{
144 |   name: "conv6"
145 |   type: "Convolution"
146 |   bottom: "conv5"
147 |   top: "conv6"
148 |   convolution_param {
149 |     num_output: 512
150 |     kernel_size: 3
151 |     pad: 1
152 |     stride: 1
153 |   }
154 | }
155 | layer {
156 |   name: "relu6"
157 |   type: "ReLU"
158 |   bottom: "conv6"
159 |   top: "conv6"
160 |   relu_param{
161 |     negative_slope: 0.1
162 |   }
163 | }
164 | layer {
165 |   name: "pool6"
166 |   type: "Pooling"
167 |   bottom: "conv6"
168 |   top: "pool6"
169 |   pooling_param {
170 |     pool: MAX
171 |     kernel_size: 2
172 |     stride: 2
173 |   }
174 | }
175 | 
176 | layer{
177 |   name: "conv7"
178 |   type: "Convolution"
179 |   bottom: "pool6"
180 |   top: "conv7"
181 |   convolution_param {
182 |     num_output: 256
183 |     kernel_size: 1
184 |     pad: 0
185 |     stride: 1
186 |   }
187 | }
188 | layer {
189 |   name: "relu7"
190 |   type: "ReLU"
191 |   bottom: "conv7"
192 |   top: "conv7"
193 |   relu_param{
194 |     negative_slope: 0.1
195 |   }
196 | }
197 | 
198 | layer{
199 |   name: "conv8"
200 |   type: "Convolution"
201 |   bottom: "conv7"
202 |   top: "conv8"
203 |   convolution_param {
204 |     num_output: 512
205 |     kernel_size: 3
206 |     pad: 1
207 |     stride: 1
208 |   }
209 | }
210 | layer {
211 |   name: "relu8"
212 |   type: "ReLU"
213 |   bottom: "conv8"
214 |   top: "conv8"
215 |   relu_param{
216 |     negative_slope: 0.1
217 |   }
218 | }
219 | 
220 | layer{
221 |   name: "conv9"
222 |   type: "Convolution"
223 |   bottom: "conv8"
224 |   top: "conv9"
225 |   convolution_param {
226 |     num_output: 256
227 |     kernel_size: 1
228 |     pad: 0
229 |     stride: 1
230 |   }
231 | }
232 | layer {
233 |   name: "relu9"
234 |   type: "ReLU"
235 |   bottom: "conv9"
236 |   top: "conv9"
237 |   relu_param{
238 |     negative_slope: 0.1
239 |   }
240 | }
241 | 
242 | layer{
243 |   name: "conv10"
244 |   type: "Convolution"
245 |   bottom: "conv9"
246 |   top: "conv10"
247 |   convolution_param {
248 |     num_output: 512
249 |     kernel_size: 3
250 |     pad: 1
251 |     stride: 1
252 |   }
253 | }
254 | layer {
255 |   name: "relu10"
256 |   type: "ReLU"
257 |   bottom: "conv10"
258 |   top: "conv10"
259 |   relu_param{
260 |     negative_slope: 0.1
261 |   }
262 | }
263 | 
264 | layer{
265 |   name: "conv11"
266 |   type: "Convolution"
267 |   bottom: "conv10"
268 |   top: "conv11"
269 |   convolution_param {
270 |     num_output: 256
271 |     kernel_size: 1
272 |     pad: 0
273 |     stride: 1
274 |   }
275 | }
276 | layer {
277 |   name: "relu11"
278 |   type: "ReLU"
279 |   bottom: "conv11"
280 |   top: "conv11"
281 |   relu_param{
282 |     negative_slope: 0.1
283 |   }
284 | }
285 | 
286 | 
287 | layer{
288 |   name: "conv12"
289 |   type: "Convolution"
290 |   bottom: "conv11"
291 |   top: "conv12"
292 |   convolution_param {
293 |     num_output: 512
294 |     kernel_size: 3
295 |     pad: 1
296 |     stride: 1
297 |   }
298 | }
299 | layer {
300 |   name: "relu12"
301 |   type: "ReLU"
302 |   bottom: "conv12"
303 |   top: "conv12"
304 |   relu_param{
305 |     negative_slope: 0.1
306 |   }
307 | }
308 | 
309 | 
310 | layer{
311 |   name: "conv13"
312 |   type: "Convolution"
313 |   bottom: "conv12"
314 |   top: "conv13"
315 |   convolution_param {
316 |     num_output: 256
317 |     kernel_size: 1
318 |     pad: 0
319 |     stride: 1
320 |   }
321 | }
322 | layer {
323 |   name: "relu13"
324 |   type: "ReLU"
325 |   bottom: "conv13"
326 |   top: "conv13"
327 |   relu_param{
328 |     negative_slope: 0.1
329 |   }
330 | }
331 | 
332 | layer{
333 |   name: "conv14"
334 |   type: "Convolution"
335 |   bottom: "conv13"
336 |   top: "conv14"
337 |   convolution_param {
338 |     num_output: 512
339 |     kernel_size: 3
340 |     pad: 1
341 |     stride: 1
342 |   }
343 | }
344 | layer {
345 |   name: "relu14"
346 |   type: "ReLU"
347 |   bottom: "conv14"
348 |   top: "conv14"
349 |   relu_param{
350 |     negative_slope: 0.1
351 |   }
352 | }
353 | 
354 | layer{
355 |   name: "conv15"
356 |   type: "Convolution"
357 |   bottom: "conv14"
358 |   top: "conv15"
359 |   convolution_param {
360 |     num_output: 512
361 |     kernel_size: 1
362 |     pad: 0
363 |     stride: 1
364 |   }
365 | }
366 | layer {
367 |   name: "relu15"
368 |   type: "ReLU"
369 |   bottom: "conv15"
370 |   top: "conv15"
371 |   relu_param{
372 |     negative_slope: 0.1
373 |   }
374 | }
375 | 
376 | 
377 | layer{
378 |   name: "conv16"
379 |   type: "Convolution"
380 |   bottom: "conv15"
381 |   top: "conv16"
382 |   convolution_param {
383 |     num_output: 1024
384 |     kernel_size: 3
385 |     pad: 1
386 |     stride: 1
387 |   }
388 | }
389 | layer {
390 |   name: "relu16"
391 |   type: "ReLU"
392 |   bottom: "conv16"
393 |   top: "conv16"
394 |   relu_param{
395 |     negative_slope: 0.1
396 |   }
397 | }
398 | 
399 | layer {
400 |   name: "pool16"
401 |   type: "Pooling"
402 |   bottom: "conv16"
403 |   top: "pool16"
404 |   pooling_param {
405 |     pool: MAX
406 |     kernel_size: 2
407 |     stride: 2
408 |   }
409 | }
410 | 
411 | 
412 | layer{
413 |   name: "conv17"
414 |   type: "Convolution"
415 |   bottom: "pool16"
416 |   top: "conv17"
417 |   convolution_param {
418 |     num_output: 512
419 |     kernel_size: 1
420 |     pad: 0
421 |     stride: 1
422 |   }
423 | }
424 | layer {
425 |   name: "relu17"
426 |   type: "ReLU"
427 |   bottom: "conv17"
428 |   top: "conv17"
429 |   relu_param{
430 |     negative_slope: 0.1
431 |   }
432 | }
433 | 
434 | 
435 | layer{
436 |   name: "conv18"
437 |   type: "Convolution"
438 |   bottom: "conv17"
439 |   top: "conv18"
440 |   convolution_param {
441 |     num_output: 1024
442 |     kernel_size: 3
443 |     pad: 1
444 |     stride: 1
445 |   }
446 | }
447 | layer {
448 |   name: "relu18"
449 |   type: "ReLU"
450 |   bottom: "conv18"
451 |   top: "conv18"
452 |   relu_param{
453 |     negative_slope: 0.1
454 |   }
455 | }
456 | 
457 | 
458 | 
459 | layer{
460 |   name: "conv19"
461 |   type: "Convolution"
462 |   bottom: "conv18"
463 |   top: "conv19"
464 |   convolution_param {
465 |     num_output: 512
466 |     kernel_size: 1
467 |     pad: 0
468 |     stride: 1
469 |   }
470 | }
471 | layer {
472 |   name: "relu19"
473 |   type: "ReLU"
474 |   bottom: "conv19"
475 |   top: "conv19"
476 |   relu_param{
477 |     negative_slope: 0.1
478 |   }
479 | }
480 | 
481 | 
482 | 
483 | layer{
484 |   name: "conv20"
485 |   type: "Convolution"
486 |   bottom: "conv19"
487 |   top: "conv20"
488 |   convolution_param {
489 |     num_output: 1024
490 |     kernel_size: 3
491 |     pad: 1
492 |     stride: 1
493 |   }
494 | }
495 | layer {
496 |   name: "relu20"
497 |   type: "ReLU"
498 |   bottom: "conv20"
499 |   top: "conv20"
500 |   relu_param{
501 |     negative_slope: 0.1
502 |   }
503 | }
504 | 
505 | 
506 | 
507 | layer{
508 |   name: "conv21"
509 |   type: "Convolution"
510 |   bottom: "conv20"
511 |   top: "conv21"
512 |   convolution_param {
513 |     num_output: 1024
514 |     kernel_size: 3
515 |     pad: 1
516 |     stride: 1
517 |   }
518 | }
519 | layer {
520 |   name: "relu21"
521 |   type: "ReLU"
522 |   bottom: "conv21"
523 |   top: "conv21"
524 |   relu_param{
525 |     negative_slope: 0.1
526 |   }
527 | }
528 | 
529 | 
530 | layer{
531 |   name: "conv22"
532 |   type: "Convolution"
533 |   bottom: "conv21"
534 |   top: "conv22"
535 |   convolution_param {
536 |     num_output: 1024
537 |     kernel_size: 3
538 |     pad: 1
539 |     stride: 2
540 |   }
541 | }
542 | layer {
543 |   name: "relu22"
544 |   type: "ReLU"
545 |   bottom: "conv22"
546 |   top: "conv22"
547 |   relu_param{
548 |     negative_slope: 0.1
549 |   }
550 | }
551 | 
552 | 
553 | 
554 | layer{
555 |   name: "conv23"
556 |   type: "Convolution"
557 |   bottom: "conv22"
558 |   top: "conv23"
559 |   convolution_param {
560 |     num_output: 1024
561 |     kernel_size: 3
562 |     pad: 1
563 |     stride: 1
564 |   }
565 | }
566 | layer {
567 |   name: "relu23"
568 |   type: "ReLU"
569 |   bottom: "conv23"
570 |   top: "conv23"
571 |   relu_param{
572 |     negative_slope: 0.1
573 |   }
574 | }
575 | 
576 | 
577 | layer{
578 |   name: "conv24"
579 |   type: "Convolution"
580 |   bottom: "conv23"
581 |   top: "conv24"
582 |   convolution_param {
583 |     num_output: 1024
584 |     kernel_size: 3
585 |     pad: 1
586 |     stride: 1
587 |   }
588 | }
589 | layer {
590 |   name: "relu24"
591 |   type: "ReLU"
592 |   bottom: "conv24"
593 |   top: "conv24"
594 |   relu_param{
595 |     negative_slope: 0.1
596 |   }
597 | }
598 | 
599 | 
600 | 
601 | 
602 | layer{
603 |   name: "fc25"
604 |   type: "InnerProduct"
605 |   bottom: "conv24"
606 |   top: "fc25"
607 |   inner_product_param {
608 |     num_output: 512
609 |   }
610 | }
611 | layer {
612 |   name: "relu25"
613 |   type: "ReLU"
614 |   bottom: "fc25"
615 |   top: "fc25"
616 |   relu_param{
617 |     negative_slope: 0.1
618 |   }
619 | }
620 | 
621 | 
622 | layer{
623 |   name: "fc26"
624 |   type: "InnerProduct"
625 |   bottom: "fc25"
626 |   top: "fc26"
627 |   inner_product_param {
628 |     num_output: 4096
629 |   }
630 | }
631 | layer {
632 |   name: "relu26"
633 |   type: "ReLU"
634 |   bottom: "fc26"
635 |   top: "fc26"
636 |   relu_param{
637 |     negative_slope: 0.1
638 |   }
639 | }
640 | 
641 | 
642 | layer{
643 |   name: "fc27"
644 |   type: "InnerProduct"
645 |   bottom: "fc26"
646 |   top: "fc27"
647 |   inner_product_param {
648 |     num_output: 1470
649 |   }
650 | }
651 | 


--------------------------------------------------------------------------------
/create_yolo_prototxt.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from ConfigParser import ConfigParser
  3 | from collections import OrderedDict
  4 | import argparse
  5 | import logging
  6 | import os
  7 | import sys
  8 | 
  9 | class CaffeLayerGenerator(object):
 10 |     def __init__(self, name, ltype):
 11 |         self.name = name
 12 |         self.bottom = []
 13 |         self.top = []
 14 |         self.type = ltype
 15 |     def get_template(self):
 16 |         return """
 17 | layer {{{{
 18 |   name: "{}"
 19 |   type: "{}"
 20 |   bottom: "{}"
 21 |   top: "{}"{{}}
 22 | }}}}""".format(self.name, self.type, self.bottom[0], self.top[0])
 23 | 
 24 | class CaffeInputLayer(CaffeLayerGenerator):
 25 |     def __init__(self, name, channels, width, height):
 26 |         super(CaffeInputLayer, self).__init__(name, 'Input')
 27 |         self.channels = channels
 28 |         self.width = width
 29 |         self.height = height
 30 |     def write(self, f):
 31 |         f.write("""
 32 | input: "{}"
 33 | input_shape {{
 34 |   dim: 1
 35 |   dim: {}
 36 |   dim: {}
 37 |   dim: {}
 38 | }}""".format(self.name, self.channels, self.width, self.height))
 39 | 
 40 | class CaffeConvolutionLayer(CaffeLayerGenerator):
 41 |     def __init__(self, name, filters, ksize=None, stride=None, pad=None, bias=True):
 42 |         super(CaffeConvolutionLayer, self).__init__(name, 'Convolution')
 43 |         self.filters = filters
 44 |         self.ksize = ksize
 45 |         self.stride = stride
 46 |         self.pad = pad
 47 |         self.bias = bias
 48 |     def write(self, f):
 49 |         opts = ['']
 50 |         if self.ksize is not None: opts.append('kernel_size: {}'.format(self.ksize))
 51 |         if self.stride is not None: opts.append('stride: {}'.format(self.stride))
 52 |         if self.pad is not None: opts.append('pad: {}'.format(self.pad))
 53 |         if not self.bias: opts.append('bias_term: false')
 54 |         param_str = """
 55 |   convolution_param {{
 56 |     num_output: {}{}
 57 |   }}""".format(self.filters, '\n    '.join(opts))
 58 |         f.write(self.get_template().format(param_str))
 59 | 
 60 | class CaffePoolingLayer(CaffeLayerGenerator):
 61 |     def __init__(self, name, pooltype, ksize=None, stride=None, pad=None, global_pooling=None):
 62 |         super(CaffePoolingLayer, self).__init__(name, 'Pooling')
 63 |         self.pooltype = pooltype
 64 |         self.ksize = ksize
 65 |         self.stride = stride
 66 |         self.pad = pad
 67 |         self.global_pooling = global_pooling
 68 |     def write(self, f):
 69 |         opts = ['']
 70 |         if self.ksize is not None: opts.append('kernel_size: {}'.format(self.ksize))
 71 |         if self.stride is not None: opts.append('stride: {}'.format(self.stride))
 72 |         if self.pad is not None: opts.append('pad: {}'.format(self.pad))
 73 |         if self.global_pooling is not None: opts.append('global_pooling: {}'.format('True' if self.global_pooling else 'False'))
 74 |         param_str = """
 75 |   pooling_param {{
 76 |     pool: {}{}
 77 |   }}""".format(self.pooltype, '\n    '.join(opts))
 78 |         f.write(self.get_template().format(param_str))
 79 | 
 80 | class CaffeInnerProductLayer(CaffeLayerGenerator):
 81 |     def __init__(self, name, num_output):
 82 |         super(CaffeInnerProductLayer, self).__init__(name, 'InnerProduct')
 83 |         self.num_output = num_output
 84 |     def write(self, f):
 85 |         param_str = """
 86 |   inner_product_param {{
 87 |     num_output: {}
 88 |   }}""".format(self.num_output)
 89 |         f.write(self.get_template().format(param_str))
 90 | 
 91 | class CaffeBatchNormLayer(CaffeLayerGenerator):
 92 |     def __init__(self, name):
 93 |         super(CaffeBatchNormLayer, self).__init__(name, 'BatchNorm')
 94 |     def write(self, f):
 95 |         param_str = """
 96 |   batch_norm_param {
 97 |     use_global_stats: true
 98 |   }"""
 99 |         f.write(self.get_template().format(param_str))
100 | 
101 | class CaffeScaleLayer(CaffeLayerGenerator):
102 |     def __init__(self, name):
103 |         super(CaffeScaleLayer, self).__init__(name, 'Scale')
104 |     def write(self, f):
105 |         param_str = """
106 |   scale_param {
107 |     bias_term: true
108 |   }"""
109 |         f.write(self.get_template().format(param_str))
110 | 
111 | class CaffeReluLayer(CaffeLayerGenerator):
112 |     def __init__(self, name, negslope=None):
113 |         super(CaffeReluLayer, self).__init__(name, 'Relu')
114 |         self.negslope = negslope
115 |     def write(self, f):
116 |         param_str = ""
117 |         if self.negslope is not None:
118 |             param_str = """
119 |   relu_param {{
120 |     negative_slope: {}
121 |   }}""".format(self.negslope)
122 |         f.write(self.get_template().format(param_str))
123 | 
124 | class CaffeDropoutLayer(CaffeLayerGenerator):
125 |     def __init__(self, name, prob):
126 |         super(CaffeDropoutLayer, self).__init__(name, 'Dropout')
127 |         self.prob = prob
128 |     def write(self, f):
129 |         param_str = """
130 |   dropout_param {{
131 |     dropout_ratio: {}
132 |   }}""".format(self.prob)
133 |         f.write(self.get_template().format(param_str))
134 | 
135 | class CaffeSoftmaxLayer(CaffeLayerGenerator):
136 |     def __init__(self, name):
137 |         super(CaffeSoftmaxLayer, self).__init__(name, 'Softmax')
138 |     def write(self, f):
139 |         f.write(self.get_template().format(""))
140 | 
141 | class CaffeProtoGenerator:
142 |     def __init__(self, name):
143 |         self.name = name
144 |         self.sections = []
145 |         self.lnum = 0
146 |         self.layer = None
147 |     def add_layer(self, l):
148 |         self.sections.append( l )
149 |     def add_input_layer(self, items):
150 |         self.lnum = 0
151 |         lname = "data"
152 |         self.layer = CaffeInputLayer(lname, items['channels'], items['width'], items['height'])
153 |         self.layer.top.append( lname )
154 |         self.add_layer( self.layer )
155 |     def add_convolution_layer(self, items):
156 |         self.lnum += 1
157 |         prev_blob = self.layer.top[0]
158 |         lname = "conv"+str(self.lnum)
159 |         filters = items['filters']
160 |         ksize = items['size'] if 'size' in items else None
161 |         stride = items['stride'] if 'stride' in items else None
162 |         pad = items['pad'] if 'pad' in items else None
163 |         bias = not bool(items['batch_normalize']) if 'batch_normalize' in items else True
164 |         self.layer = CaffeConvolutionLayer( lname, filters, ksize=ksize, stride=stride, pad=pad, bias=bias )
165 |         self.layer.bottom.append( prev_blob )
166 |         self.layer.top.append( lname )
167 |         self.add_layer( self.layer )
168 |     def add_innerproduct_layer(self, items):
169 |         self.lnum += 1
170 |         prev_blob = self.layer.top[0]
171 |         lname = "fc"+str(self.lnum)
172 |         num_output = items['output']
173 |         self.layer = CaffeInnerProductLayer( lname, num_output )
174 |         self.layer.bottom.append( prev_blob )
175 |         self.layer.top.append( lname )
176 |         self.add_layer( self.layer )
177 |     def add_pooling_layer(self, ltype, items, global_pooling=None):
178 |         prev_blob = self.layer.top[0]
179 |         lname = "pool"+str(self.lnum)
180 |         ksize = items['size'] if 'size' in items else None
181 |         stride = items['stride'] if 'stride' in items else None
182 |         pad = items['pad'] if 'pad' in items else None
183 |         self.layer = CaffePoolingLayer( lname, ltype, ksize=ksize, stride=stride, pad=pad, global_pooling=global_pooling )
184 |         self.layer.bottom.append( prev_blob )
185 |         self.layer.top.append( lname )
186 |         self.add_layer( self.layer )
187 |     def add_batchnorm_layer(self, items):
188 |         prev_blob = self.layer.top[0]
189 |         lname = "bn"+str(self.lnum)
190 |         self.layer = CaffeBatchNormLayer( lname )
191 |         self.layer.bottom.append( prev_blob )
192 |         self.layer.top.append( lname )
193 |         self.add_layer( self.layer )
194 |     def add_scale_layer(self, items):
195 |         prev_blob = self.layer.top[0]
196 |         lname = "scale"+str(self.lnum)
197 |         self.layer = CaffeScaleLayer( lname )
198 |         self.layer.bottom.append( prev_blob )
199 |         self.layer.top.append( lname )
200 |         self.add_layer( self.layer )
201 |     def add_relu_layer(self, items):
202 |         prev_blob = self.layer.top[0]
203 |         lname = "relu"+str(self.lnum)
204 |         self.layer = CaffeReluLayer( lname )
205 |         self.layer.bottom.append( prev_blob )
206 |         self.layer.top.append( prev_blob )     # loopback
207 |         self.add_layer( self.layer )
208 |     def add_dropout_layer(self, items):
209 |         prev_blob = self.layer.top[0]
210 |         lname = "drop"+str(self.lnum)
211 |         self.layer = CaffeDropoutLayer( lname, items['probability'] )
212 |         self.layer.bottom.append( prev_blob )
213 |         self.layer.top.append( prev_blob )     # loopback
214 |         self.add_layer( self.layer )
215 |     def add_softmax_layer(self, items):
216 |         prev_blob = self.layer.top[0]
217 |         lname = "prob"
218 |         self.layer = CaffeSoftmaxLayer( lname )
219 |         self.layer.bottom.append( prev_blob )
220 |         self.layer.top.append( lname )
221 |         self.add_layer( self.layer )
222 |     def finalize(self, name):
223 |         self.layer.top[0] = name    # replace
224 |     def write(self, fname):
225 |         with open(fname, 'w') as f:
226 |             f.write('name: "{}"'.format(self.name))
227 |             for sec in self.sections:
228 |                 sec.write(f)
229 |         logging.info('{} is generated'.format(fname))
230 | 
231 | ###################################################################33
232 | class uniqdict(OrderedDict):
233 |     _unique = 0
234 |     def __setitem__(self, key, val):
235 |         if isinstance(val, OrderedDict):
236 |             self._unique += 1
237 |             key += "_"+str(self._unique)
238 |         OrderedDict.__setitem__(self, key, val)
239 | 
240 | def convert(cfgfile, ptxtfile):
241 |     #
242 |     parser = ConfigParser(dict_type=uniqdict)
243 |     parser.read(cfgfile)
244 |     netname = os.path.basename(cfgfile).split('.')[0]
245 |     #print netname
246 |     gen = CaffeProtoGenerator(netname)
247 |     for section in parser.sections():
248 |         _section = section.split('_')[0]
249 |         if _section in ["crop", "cost"]:
250 |             continue
251 |         #
252 |         batchnorm_followed = False
253 |         relu_followed = False
254 |         items = dict(parser.items(section))
255 |         if 'batch_normalize' in items and items['batch_normalize']:
256 |             batchnorm_followed = True
257 |         if 'activation' in items and items['activation'] != 'linear':
258 |             relu_followed = True
259 |         #
260 |         if _section == 'net':
261 |             gen.add_input_layer(items)
262 |         elif _section == 'convolutional':
263 |             gen.add_convolution_layer(items)
264 |             if batchnorm_followed:
265 |                 gen.add_batchnorm_layer(items)
266 |                 gen.add_scale_layer(items)
267 |             if relu_followed:
268 |                 gen.add_relu_layer(items)
269 |         elif _section == 'connected':
270 |             gen.add_innerproduct_layer(items)
271 |             if relu_followed:
272 |                 gen.add_relu_layer(items)
273 |         elif _section == 'maxpool':
274 |             gen.add_pooling_layer('MAX', items)
275 |         elif _section == 'avgpool':
276 |             gen.add_pooling_layer('AVE', items, global_pooling=True)
277 |         elif _section == 'dropout':
278 |             gen.add_dropout_layer(items)
279 |         elif _section == 'softmax':
280 |             gen.add_softmax_layer(items)
281 |         else:
282 |             logging.error("{} layer is not supported".format(_section))
283 |     #gen.finalize('result')
284 |     gen.write(ptxtfile)
285 | 
286 | def main():
287 |     parser = argparse.ArgumentParser(description='Convert YOLO cfg to Caffe prototxt')
288 |     parser.add_argument('cfg', type=str, help='YOLO cfg')
289 |     parser.add_argument('prototxt', type=str, help='Caffe prototxt')
290 |     args = parser.parse_args()
291 | 
292 |     convert(args.cfg, args.prototxt)
293 | 
294 | if __name__ == "__main__":
295 |     main()
296 | 
297 | # vim:sw=4:ts=4:et
298 | 


--------------------------------------------------------------------------------
/prototxt/yolo_tiny_train_val.prototxt:
--------------------------------------------------------------------------------
  1 | name: "YOLONet"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 448
  7 |   dim: 448
  8 | }
  9 | 
 10 | layer {
 11 |   name: "conv1"
 12 |   type: "Convolution"
 13 |   bottom: "data"
 14 |   top: "conv1"
 15 |   param {
 16 |     lr_mult: 1
 17 |     decay_mult: 1
 18 |   }
 19 |   convolution_param {
 20 |     num_output: 16
 21 |     kernel_size: 3
 22 |     pad: 1
 23 |     bias_term: false
 24 |     weight_filler {
 25 |       type: "xavier"
 26 |     }
 27 |   }
 28 | }
 29 | layer {
 30 |   name: "bn1"
 31 |   type: "BatchNorm"
 32 |   bottom: "conv1"
 33 |   top: "bn1"
 34 |   include { phase: TRAIN }
 35 |   param {
 36 |     lr_mult: 0
 37 |     decay_mult: 0
 38 |   }
 39 |   param {
 40 |     lr_mult: 0
 41 |     decay_mult: 0
 42 |   }
 43 |   param {
 44 |     lr_mult: 0
 45 |     decay_mult: 0
 46 |   }
 47 |   batch_norm_param {
 48 |     use_global_stats: false
 49 |   }
 50 | }
 51 | layer {
 52 |   name: "bn1"
 53 |   type: "BatchNorm"
 54 |   bottom: "conv1"
 55 |   top: "bn1"
 56 |   include { phase: TEST }
 57 |   batch_norm_param {
 58 |     use_global_stats: true
 59 |   }
 60 | }
 61 | layer {
 62 |   name: "scale1"
 63 |   type: "Scale"
 64 |   bottom: "bn1"
 65 |   top: "scale1"
 66 |   param {
 67 |     lr_mult: 0
 68 |     decay_mult: 0
 69 |   }
 70 |   param {
 71 |     lr_mult: 0
 72 |     decay_mult: 0
 73 |   }
 74 |   scale_param {
 75 |     bias_term: true
 76 |   }
 77 | }
 78 | layer {
 79 |   name: "relu1"
 80 |   type: "ReLU"
 81 |   bottom: "scale1"
 82 |   top: "scale1"
 83 |   relu_param {
 84 |     negative_slope: 0.1
 85 |   }
 86 | }
 87 | layer {
 88 |   name: "pool1"
 89 |   type: "Pooling"
 90 |   bottom: "scale1"
 91 |   top: "pool1"
 92 |   pooling_param {
 93 |     pool: MAX
 94 |     kernel_size: 2
 95 |     stride: 2
 96 |   }
 97 | }
 98 | 
 99 | layer {
100 |   name: "conv2"
101 |   type: "Convolution"
102 |   bottom: "pool1"
103 |   top: "conv2"
104 |   param {
105 |     lr_mult: 1
106 |     decay_mult: 1
107 |   }
108 |   convolution_param {
109 |     num_output: 32
110 |     kernel_size: 3
111 |     pad: 1
112 |     bias_term: false
113 |     weight_filler {
114 |       type: "xavier"
115 |     }
116 |   }
117 | }
118 | layer {
119 |   name: "bn2"
120 |   type: "BatchNorm"
121 |   bottom: "conv2"
122 |   top: "bn2"
123 |   include { phase: TRAIN }
124 |   param {
125 |     lr_mult: 0
126 |     decay_mult: 0
127 |   }
128 |   param {
129 |     lr_mult: 0
130 |     decay_mult: 0
131 |   }
132 |   param {
133 |     lr_mult: 0
134 |     decay_mult: 0
135 |   }
136 |   batch_norm_param {
137 |     use_global_stats: false
138 |   }
139 | }
140 | layer {
141 |   name: "bn2"
142 |   type: "BatchNorm"
143 |   bottom: "conv2"
144 |   top: "bn2"
145 |   include { phase: TEST }
146 |   batch_norm_param {
147 |     use_global_stats: true
148 |   }
149 | }
150 | layer {
151 |   name: "scale2"
152 |   type: "Scale"
153 |   bottom: "bn2"
154 |   top: "scale2"
155 |   param {
156 |     lr_mult: 0
157 |     decay_mult: 0
158 |   }
159 |   param {
160 |     lr_mult: 0
161 |     decay_mult: 0
162 |   }
163 |   scale_param {
164 |     bias_term: true
165 |   }
166 | }
167 | layer {
168 |   name: "relu2"
169 |   type: "ReLU"
170 |   bottom: "scale2"
171 |   top: "scale2"
172 |   relu_param {
173 |     negative_slope: 0.1
174 |   }
175 | }
176 | layer {
177 |   name: "pool2"
178 |   type: "Pooling"
179 |   bottom: "scale2"
180 |   top: "pool2"
181 |   pooling_param {
182 |     pool: MAX
183 |     kernel_size: 2
184 |     stride: 2
185 |   }
186 | }
187 | 
188 | layer {
189 |   name: "conv3"
190 |   type: "Convolution"
191 |   bottom: "pool2"
192 |   top: "conv3"
193 |   param {
194 |     lr_mult: 1
195 |     decay_mult: 1
196 |   }
197 |   convolution_param {
198 |     num_output: 64
199 |     kernel_size: 3
200 |     pad: 1
201 |     bias_term: false
202 |     weight_filler {
203 |       type: "xavier"
204 |     }
205 |   }
206 | }
207 | layer {
208 |   name: "bn3"
209 |   type: "BatchNorm"
210 |   bottom: "conv3"
211 |   top: "bn3"
212 |   include { phase: TRAIN }
213 |   param {
214 |     lr_mult: 0
215 |     decay_mult: 0
216 |   }
217 |   param {
218 |     lr_mult: 0
219 |     decay_mult: 0
220 |   }
221 |   param {
222 |     lr_mult: 0
223 |     decay_mult: 0
224 |   }
225 |   batch_norm_param {
226 |     use_global_stats: false
227 |   }
228 | }
229 | layer {
230 |   name: "bn3"
231 |   type: "BatchNorm"
232 |   bottom: "conv3"
233 |   top: "bn3"
234 |   include { phase: TEST }
235 |   batch_norm_param {
236 |     use_global_stats: true
237 |   }
238 | }
239 | layer {
240 |   name: "scale3"
241 |   type: "Scale"
242 |   bottom: "bn3"
243 |   top: "scale3"
244 |   param {
245 |     lr_mult: 0
246 |     decay_mult: 0
247 |   }
248 |   param {
249 |     lr_mult: 0
250 |     decay_mult: 0
251 |   }
252 |   scale_param {
253 |     bias_term: true
254 |   }
255 | }
256 | layer {
257 |   name: "relu3"
258 |   type: "ReLU"
259 |   bottom: "scale3"
260 |   top: "scale3"
261 |   relu_param {
262 |     negative_slope: 0.1
263 |   }
264 | }
265 | layer {
266 |   name: "pool3"
267 |   type: "Pooling"
268 |   bottom: "scale3"
269 |   top: "pool3"
270 |   pooling_param {
271 |     pool: MAX
272 |     kernel_size: 2
273 |     stride: 2
274 |   }
275 | }
276 | 
277 | layer {
278 |   name: "conv4"
279 |   type: "Convolution"
280 |   bottom: "pool3"
281 |   top: "conv4"
282 |   param {
283 |     lr_mult: 1
284 |     decay_mult: 1
285 |   }
286 |   convolution_param {
287 |     num_output: 128
288 |     kernel_size: 3
289 |     pad: 1
290 |     bias_term: false
291 |     weight_filler {
292 |       type: "xavier"
293 |     }
294 |   }
295 | }
296 | layer {
297 |   name: "bn4"
298 |   type: "BatchNorm"
299 |   bottom: "conv4"
300 |   top: "bn4"
301 |   include { phase: TRAIN }
302 |   param {
303 |     lr_mult: 0
304 |     decay_mult: 0
305 |   }
306 |   param {
307 |     lr_mult: 0
308 |     decay_mult: 0
309 |   }
310 |   param {
311 |     lr_mult: 0
312 |     decay_mult: 0
313 |   }
314 |   batch_norm_param {
315 |     use_global_stats: false
316 |   }
317 | }
318 | layer {
319 |   name: "bn4"
320 |   type: "BatchNorm"
321 |   bottom: "conv4"
322 |   top: "bn4"
323 |   include { phase: TEST }
324 |   batch_norm_param {
325 |     use_global_stats: true
326 |   }
327 | }
328 | layer {
329 |   name: "scale4"
330 |   type: "Scale"
331 |   bottom: "bn4"
332 |   top: "scale4"
333 |   param {
334 |     lr_mult: 0
335 |     decay_mult: 0
336 |   }
337 |   param {
338 |     lr_mult: 0
339 |     decay_mult: 0
340 |   }
341 |   scale_param {
342 |     bias_term: true
343 |   }
344 | }
345 | layer {
346 |   name: "relu4"
347 |   type: "ReLU"
348 |   bottom: "scale4"
349 |   top: "scale4"
350 |   relu_param {
351 |     negative_slope: 0.1
352 |   }
353 | }
354 | layer {
355 |   name: "pool4"
356 |   type: "Pooling"
357 |   bottom: "scale4"
358 |   top: "pool4"
359 |   pooling_param {
360 |     pool: MAX
361 |     kernel_size: 2
362 |     stride: 2
363 |   }
364 | }
365 | 
366 | layer {
367 |   name: "conv5"
368 |   type: "Convolution"
369 |   bottom: "pool4"
370 |   top: "conv5"
371 |   param {
372 |     lr_mult: 1
373 |     decay_mult: 1
374 |   }
375 |   convolution_param {
376 |     num_output: 256
377 |     kernel_size: 3
378 |     pad: 1
379 |     bias_term: false
380 |     weight_filler {
381 |       type: "xavier"
382 |     }
383 |   }
384 | }
385 | layer {
386 |   name: "bn5"
387 |   type: "BatchNorm"
388 |   bottom: "conv5"
389 |   top: "bn5"
390 |   include { phase: TRAIN }
391 |   param {
392 |     lr_mult: 0
393 |     decay_mult: 0
394 |   }
395 |   param {
396 |     lr_mult: 0
397 |     decay_mult: 0
398 |   }
399 |   param {
400 |     lr_mult: 0
401 |     decay_mult: 0
402 |   }
403 |   batch_norm_param {
404 |     use_global_stats: false
405 |   }
406 | }
407 | layer {
408 |   name: "bn5"
409 |   type: "BatchNorm"
410 |   bottom: "conv5"
411 |   top: "bn5"
412 |   include { phase: TEST }
413 |   batch_norm_param {
414 |     use_global_stats: true
415 |   }
416 | }
417 | layer {
418 |   name: "scale5"
419 |   type: "Scale"
420 |   bottom: "bn5"
421 |   top: "scale5"
422 |   param {
423 |     lr_mult: 0
424 |     decay_mult: 0
425 |   }
426 |   param {
427 |     lr_mult: 0
428 |     decay_mult: 0
429 |   }
430 |   scale_param {
431 |     bias_term: true
432 |   }
433 | }
434 | layer {
435 |   name: "relu5"
436 |   type: "ReLU"
437 |   bottom: "scale5"
438 |   top: "scale5"
439 |   relu_param {
440 |     negative_slope: 0.1
441 |   }
442 | }
443 | layer {
444 |   name: "pool5"
445 |   type: "Pooling"
446 |   bottom: "scale5"
447 |   top: "pool5"
448 |   pooling_param {
449 |     pool: MAX
450 |     kernel_size: 2
451 |     stride: 2
452 |   }
453 | }
454 | 
455 | layer {
456 |   name: "conv6"
457 |   type: "Convolution"
458 |   bottom: "pool5"
459 |   top: "conv6"
460 |   param {
461 |     lr_mult: 1
462 |     decay_mult: 1
463 |   }
464 |   convolution_param {
465 |     num_output: 512
466 |     kernel_size: 3
467 |     pad: 1
468 |     bias_term: false
469 |     weight_filler {
470 |       type: "xavier"
471 |     }
472 |   }
473 | }
474 | layer {
475 |   name: "bn6"
476 |   type: "BatchNorm"
477 |   bottom: "conv6"
478 |   top: "bn6"
479 |   include { phase: TRAIN }
480 |   param {
481 |     lr_mult: 0
482 |     decay_mult: 0
483 |   }
484 |   param {
485 |     lr_mult: 0
486 |     decay_mult: 0
487 |   }
488 |   param {
489 |     lr_mult: 0
490 |     decay_mult: 0
491 |   }
492 |   batch_norm_param {
493 |     use_global_stats: false
494 |   }
495 | }
496 | layer {
497 |   name: "bn6"
498 |   type: "BatchNorm"
499 |   bottom: "conv6"
500 |   top: "bn6"
501 |   include { phase: TEST }
502 |   batch_norm_param {
503 |     use_global_stats: true
504 |   }
505 | }
506 | layer {
507 |   name: "scale6"
508 |   type: "Scale"
509 |   bottom: "bn6"
510 |   top: "scale6"
511 |   param {
512 |     lr_mult: 0
513 |     decay_mult: 0
514 |   }
515 |   param {
516 |     lr_mult: 0
517 |     decay_mult: 0
518 |   }
519 |   scale_param {
520 |     bias_term: true
521 |   }
522 | }
523 | layer {
524 |   name: "relu6"
525 |   type: "ReLU"
526 |   bottom: "scale6"
527 |   top: "scale6"
528 |   relu_param {
529 |     negative_slope: 0.1
530 |   }
531 | }
532 | layer {
533 |   name: "pool6"
534 |   type: "Pooling"
535 |   bottom: "scale6"
536 |   top: "pool6"
537 |   pooling_param {
538 |     pool: MAX
539 |     kernel_size: 2
540 |     stride: 2
541 |   }
542 | }
543 | 
544 | layer {
545 |   name: "conv7"
546 |   type: "Convolution"
547 |   bottom: "pool6"
548 |   top: "conv7"
549 |   param {
550 |     lr_mult: 1
551 |     decay_mult: 1
552 |   }
553 |   convolution_param {
554 |     num_output: 1024
555 |     kernel_size: 3
556 |     pad: 1
557 |     bias_term: false
558 |     weight_filler {
559 |       type: "xavier"
560 |     }
561 |   }
562 | }
563 | layer {
564 |   name: "bn7"
565 |   type: "BatchNorm"
566 |   bottom: "conv7"
567 |   top: "bn7"
568 |   include { phase: TRAIN }
569 |   param {
570 |     lr_mult: 0
571 |     decay_mult: 0
572 |   }
573 |   param {
574 |     lr_mult: 0
575 |     decay_mult: 0
576 |   }
577 |   param {
578 |     lr_mult: 0
579 |     decay_mult: 0
580 |   }
581 |   batch_norm_param {
582 |     use_global_stats: false
583 |   }
584 | }
585 | layer {
586 |   name: "bn7"
587 |   type: "BatchNorm"
588 |   bottom: "conv7"
589 |   top: "bn7"
590 |   include { phase: TEST }
591 |   batch_norm_param {
592 |     use_global_stats: true
593 |   }
594 | }
595 | layer {
596 |   name: "scale7"
597 |   type: "Scale"
598 |   bottom: "bn7"
599 |   top: "scale7"
600 |   param {
601 |     lr_mult: 0
602 |     decay_mult: 0
603 |   }
604 |   param {
605 |     lr_mult: 0
606 |     decay_mult: 0
607 |   }
608 |   scale_param {
609 |     bias_term: true
610 |   }
611 | }
612 | layer {
613 |   name: "relu7"
614 |   type: "ReLU"
615 |   bottom: "scale7"
616 |   top: "scale7"
617 |   relu_param {
618 |     negative_slope: 0.1
619 |   }
620 | }
621 | #layer {
622 | #  name: "drop7"
623 | #  type: "Dropout"
624 | #  bottom: "scale7"
625 | #  top: "scale7"
626 | #  dropout_param {
627 | #    dropout_ratio: 0.5
628 | #  }
629 | #}
630 | 
631 | layer {
632 |   name: "conv8_y"
633 |   type: "Convolution"
634 |   bottom: "scale7"
635 |   top: "conv8"
636 |   param {
637 |     lr_mult: 1
638 |     decay_mult: 1
639 |   }
640 |   convolution_param {
641 |     num_output: 256
642 |     kernel_size: 3
643 |     pad: 1
644 |     bias_term: false
645 |     weight_filler {
646 |       type: "xavier"
647 |     }
648 |   }
649 | }
650 | layer {
651 |   name: "bn8"
652 |   type: "BatchNorm"
653 |   bottom: "conv8"
654 |   top: "bn8"
655 |   include { phase: TRAIN }
656 |   param {
657 |     lr_mult: 0
658 |     decay_mult: 0
659 |   }
660 |   param {
661 |     lr_mult: 0
662 |     decay_mult: 0
663 |   }
664 |   param {
665 |     lr_mult: 0
666 |     decay_mult: 0
667 |   }
668 |   batch_norm_param {
669 |     use_global_stats: false
670 |   }
671 | }
672 | layer {
673 |   name: "bn8"
674 |   type: "BatchNorm"
675 |   bottom: "conv8"
676 |   top: "bn8"
677 |   include { phase: TEST }
678 |   batch_norm_param {
679 |     use_global_stats: true
680 |   }
681 | }
682 | layer {
683 |   name: "scale8"
684 |   type: "Scale"
685 |   bottom: "bn8"
686 |   top: "scale8"
687 |   param {
688 |     lr_mult: 0
689 |     decay_mult: 0
690 |   }
691 |   param {
692 |     lr_mult: 0
693 |     decay_mult: 0
694 |   }
695 |   scale_param {
696 |     bias_term: true
697 |   }
698 | }
699 | layer {
700 |   name: "relu8"
701 |   type: "ReLU"
702 |   bottom: "scale8"
703 |   top: "scale8"
704 |   relu_param {
705 |     negative_slope: 0.1
706 |   }
707 | }
708 | 
709 | layer {
710 |   name: "fc9"
711 |   type: "InnerProduct"
712 |   bottom: "scale8"
713 |   top: "result"
714 |   param {
715 |     lr_mult: 1
716 |     decay_mult: 1
717 |   }
718 |   param {
719 |     lr_mult: 2
720 |     decay_mult: 0
721 |   }
722 |   inner_product_param {
723 |     num_output: 1470
724 |     weight_filler {
725 |       type: "gaussian"
726 |       std: 0.01
727 |     }
728 |     bias_filler {
729 |       type: "constant"
730 |       value: 0
731 |     }
732 |   }
733 | }
734 | 


--------------------------------------------------------------------------------
/prototxt/yolo_train_val.prototxt:
--------------------------------------------------------------------------------
  1 | name: "YOLONet"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 448
  7 |   dim: 448
  8 | }
  9 | 
 10 | layer {
 11 |   name: "conv1"
 12 |   type: "Convolution"
 13 |   bottom: "data"
 14 |   top: "conv1"
 15 |   convolution_param {
 16 |     num_output: 64
 17 |     kernel_size: 7
 18 |     pad: 3
 19 |     stride: 2
 20 |     weight_filler {
 21 |       type: "gaussian"
 22 |       std: 0.01
 23 |     }
 24 |     bias_filler {
 25 |       type: "constant"
 26 |       value: 0
 27 |     }
 28 |   }
 29 | }
 30 | layer {
 31 |   name: "relu1"
 32 |   type: "ReLU"
 33 |   bottom: "conv1"
 34 |   top: "conv1"
 35 |   relu_param{
 36 |     negative_slope: 0.1
 37 |   }		
 38 | }
 39 | layer {
 40 |   name: "pool1"
 41 |   type: "Pooling"
 42 |   bottom: "conv1"
 43 |   top: "pool1"
 44 |   pooling_param {
 45 |     pool: MAX
 46 |     kernel_size: 2
 47 |     stride: 2
 48 |   }
 49 | }
 50 | 
 51 | layer{
 52 |   name: "conv2"
 53 |   type: "Convolution"
 54 |   bottom: "pool1"
 55 |   top: "conv2"
 56 |   convolution_param {
 57 |     num_output: 192
 58 |     kernel_size: 3
 59 |     pad: 1
 60 |     stride: 1
 61 |     weight_filler {
 62 |       type: "gaussian"
 63 |       std: 0.01
 64 |     }
 65 |     bias_filler {
 66 |       type: "constant"
 67 |       value: 0
 68 |     }
 69 |   }
 70 | }
 71 | layer {
 72 |   name: "relu2"
 73 |   type: "ReLU"
 74 |   bottom: "conv2"
 75 |   top: "conv2"
 76 |   relu_param{
 77 |     negative_slope: 0.1
 78 |   }		
 79 | }
 80 | layer {
 81 |   name: "pool2"
 82 |   type: "Pooling"
 83 |   bottom: "conv2"
 84 |   top: "pool2"
 85 |   pooling_param {
 86 |     pool: MAX
 87 |     kernel_size: 2
 88 |     stride: 2
 89 |   }
 90 | }
 91 | 
 92 | layer{
 93 |   name: "conv3"
 94 |   type: "Convolution"
 95 |   bottom: "pool2"
 96 |   top: "conv3"
 97 |   convolution_param {
 98 |     num_output: 128
 99 |     kernel_size: 1
100 |     pad: 0
101 |     stride: 1
102 |     weight_filler {
103 |       type: "gaussian"
104 |       std: 0.01
105 |     }
106 |     bias_filler {
107 |       type: "constant"
108 |       value: 0
109 |     }
110 |   }
111 | }
112 | layer {
113 |   name: "relu3"
114 |   type: "ReLU"
115 |   bottom: "conv3"
116 |   top: "conv3"
117 |   relu_param{
118 |     negative_slope: 0.1
119 |   }		
120 | }
121 | 
122 | 
123 | layer{
124 |   name: "conv4"
125 |   type: "Convolution"
126 |   bottom: "conv3"
127 |   top: "conv4"
128 |   convolution_param {
129 |     num_output: 256
130 |     kernel_size: 3
131 |     pad: 1
132 |     stride: 1
133 |     weight_filler {
134 |       type: "gaussian"
135 |       std: 0.01
136 |     }
137 |     bias_filler {
138 |       type: "constant"
139 |       value: 0
140 |     }
141 |   }
142 | }
143 | layer {
144 |   name: "relu4"
145 |   type: "ReLU"
146 |   bottom: "conv4"
147 |   top: "conv4"
148 |   relu_param{
149 |     negative_slope: 0.1
150 |   }		
151 | }
152 | 
153 | layer{
154 |   name: "conv5"
155 |   type: "Convolution"
156 |   bottom: "conv4"
157 |   top: "conv5"
158 |   convolution_param {
159 |     num_output: 256
160 |     kernel_size: 1
161 |     pad: 0
162 |     stride: 1
163 |     weight_filler {
164 |       type: "gaussian"
165 |       std: 0.01
166 |     }
167 |     bias_filler {
168 |       type: "constant"
169 |       value: 0
170 |     }
171 |   }
172 | }
173 | layer {
174 |   name: "relu5"
175 |   type: "ReLU"
176 |   bottom: "conv5"
177 |   top: "conv5"
178 |   relu_param{
179 |     negative_slope: 0.1
180 |   }		
181 | }
182 | 
183 | layer{
184 |   name: "conv6"
185 |   type: "Convolution"
186 |   bottom: "conv5"
187 |   top: "conv6"
188 |   convolution_param {
189 |     num_output: 512
190 |     kernel_size: 3
191 |     pad: 1
192 |     stride: 1
193 |     weight_filler {
194 |       type: "gaussian"
195 |       std: 0.01
196 |     }
197 |     bias_filler {
198 |       type: "constant"
199 |       value: 0
200 |     }
201 |   }
202 | }
203 | layer {
204 |   name: "relu6"
205 |   type: "ReLU"
206 |   bottom: "conv6"
207 |   top: "conv6"
208 |   relu_param{
209 |     negative_slope: 0.1
210 |   }		
211 | }
212 | layer {
213 |   name: "pool6"
214 |   type: "Pooling"
215 |   bottom: "conv6"
216 |   top: "pool6"
217 |   pooling_param {
218 |     pool: MAX
219 |     kernel_size: 2
220 |     stride: 2
221 |   }
222 | }
223 | 
224 | layer{
225 |   name: "conv7"
226 |   type: "Convolution"
227 |   bottom: "pool6"
228 |   top: "conv7"
229 |   convolution_param {
230 |     num_output: 256
231 |     kernel_size: 1
232 |     pad: 0
233 |     stride: 1
234 |     weight_filler {
235 |       type: "gaussian"
236 |       std: 0.01
237 |     }
238 |     bias_filler {
239 |       type: "constant"
240 |       value: 0
241 |     }
242 |   }
243 | }
244 | layer {
245 |   name: "relu7"
246 |   type: "ReLU"
247 |   bottom: "conv7"
248 |   top: "conv7"
249 |   relu_param{
250 |     negative_slope: 0.1
251 |   }		
252 | }
253 | 
254 | layer{
255 |   name: "conv8"
256 |   type: "Convolution"
257 |   bottom: "conv7"
258 |   top: "conv8"
259 |   convolution_param {
260 |     num_output: 512
261 |     kernel_size: 3
262 |     pad: 1
263 |     stride: 1
264 |     weight_filler {
265 |       type: "gaussian"
266 |       std: 0.01
267 |     }
268 |     bias_filler {
269 |       type: "constant"
270 |       value: 0
271 |     }
272 |   }
273 | }
274 | layer {
275 |   name: "relu8"
276 |   type: "ReLU"
277 |   bottom: "conv8"
278 |   top: "conv8"
279 |   relu_param{
280 |     negative_slope: 0.1
281 |   }		
282 | }
283 | 
284 | layer{
285 |   name: "conv9"
286 |   type: "Convolution"
287 |   bottom: "conv8"
288 |   top: "conv9"
289 |   convolution_param {
290 |     num_output: 256
291 |     kernel_size: 1
292 |     pad: 0
293 |     stride: 1
294 |     weight_filler {
295 |       type: "gaussian"
296 |       std: 0.01
297 |     }
298 |     bias_filler {
299 |       type: "constant"
300 |       value: 0
301 |     }
302 |   }
303 | }
304 | layer {
305 |   name: "relu9"
306 |   type: "ReLU"
307 |   bottom: "conv9"
308 |   top: "conv9"
309 |   relu_param{
310 |     negative_slope: 0.1
311 |   }		
312 | }
313 | 
314 | layer{
315 |   name: "conv10"
316 |   type: "Convolution"
317 |   bottom: "conv9"
318 |   top: "conv10"
319 |   convolution_param {
320 |     num_output: 512
321 |     kernel_size: 3
322 |     pad: 1
323 |     stride: 1
324 |     weight_filler {
325 |       type: "gaussian"
326 |       std: 0.01
327 |     }
328 |     bias_filler {
329 |       type: "constant"
330 |       value: 0
331 |     }
332 |   }
333 | }
334 | layer {
335 |   name: "relu10"
336 |   type: "ReLU"
337 |   bottom: "conv10"
338 |   top: "conv10"
339 |   relu_param{
340 |     negative_slope: 0.1
341 |   }		
342 | }
343 | 
344 | layer{
345 |   name: "conv11"
346 |   type: "Convolution"
347 |   bottom: "conv10"
348 |   top: "conv11"
349 |   convolution_param {
350 |     num_output: 256
351 |     kernel_size: 1
352 |     pad: 0
353 |     stride: 1
354 |     weight_filler {
355 |       type: "gaussian"
356 |       std: 0.01
357 |     }
358 |     bias_filler {
359 |       type: "constant"
360 |       value: 0
361 |     }
362 |   }
363 | }
364 | layer {
365 |   name: "relu11"
366 |   type: "ReLU"
367 |   bottom: "conv11"
368 |   top: "conv11"
369 |   relu_param{
370 |     negative_slope: 0.1
371 |   }		
372 | }
373 | 
374 | 
375 | layer{
376 |   name: "conv12"
377 |   type: "Convolution"
378 |   bottom: "conv11"
379 |   top: "conv12"
380 |   convolution_param {
381 |     num_output: 512
382 |     kernel_size: 3
383 |     pad: 1
384 |     stride: 1
385 |     weight_filler {
386 |       type: "gaussian"
387 |       std: 0.01
388 |     }
389 |     bias_filler {
390 |       type: "constant"
391 |       value: 0
392 |     }
393 |   }
394 | }
395 | layer {
396 |   name: "relu12"
397 |   type: "ReLU"
398 |   bottom: "conv12"
399 |   top: "conv12"
400 |   relu_param{
401 |     negative_slope: 0.1
402 |   }		
403 | }
404 | 
405 | 
406 | layer{
407 |   name: "conv13"
408 |   type: "Convolution"
409 |   bottom: "conv12"
410 |   top: "conv13"
411 |   convolution_param {
412 |     num_output: 256
413 |     kernel_size: 1
414 |     pad: 0
415 |     stride: 1
416 |     weight_filler {
417 |       type: "gaussian"
418 |       std: 0.01
419 |     }
420 |     bias_filler {
421 |       type: "constant"
422 |       value: 0
423 |     }
424 |   }
425 | }
426 | layer {
427 |   name: "relu13"
428 |   type: "ReLU"
429 |   bottom: "conv13"
430 |   top: "conv13"
431 |   relu_param{
432 |     negative_slope: 0.1
433 |   }		
434 | }
435 | 
436 | layer{
437 |   name: "conv14"
438 |   type: "Convolution"
439 |   bottom: "conv13"
440 |   top: "conv14"
441 |   convolution_param {
442 |     num_output: 512
443 |     kernel_size: 3
444 |     pad: 1
445 |     stride: 1
446 |     weight_filler {
447 |       type: "gaussian"
448 |       std: 0.01
449 |     }
450 |     bias_filler {
451 |       type: "constant"
452 |       value: 0
453 |     }
454 |   }
455 | }
456 | layer {
457 |   name: "relu14"
458 |   type: "ReLU"
459 |   bottom: "conv14"
460 |   top: "conv14"
461 |   relu_param{
462 |     negative_slope: 0.1
463 |   }		
464 | }
465 | 
466 | layer{
467 |   name: "conv15"
468 |   type: "Convolution"
469 |   bottom: "conv14"
470 |   top: "conv15"
471 |   convolution_param {
472 |     num_output: 512
473 |     kernel_size: 1
474 |     pad: 0
475 |     stride: 1
476 |     weight_filler {
477 |       type: "gaussian"
478 |       std: 0.01
479 |     }
480 |     bias_filler {
481 |       type: "constant"
482 |       value: 0
483 |     }
484 |   }
485 | }
486 | layer {
487 |   name: "relu15"
488 |   type: "ReLU"
489 |   bottom: "conv15"
490 |   top: "conv15"
491 |   relu_param{
492 |     negative_slope: 0.1
493 |   }		
494 | }
495 | 
496 | 
497 | layer{
498 |   name: "conv16"
499 |   type: "Convolution"
500 |   bottom: "conv15"
501 |   top: "conv16"
502 |   convolution_param {
503 |     num_output: 1024
504 |     kernel_size: 3
505 |     pad: 1
506 |     stride: 1
507 |     weight_filler {
508 |       type: "gaussian"
509 |       std: 0.01
510 |     }
511 |     bias_filler {
512 |       type: "constant"
513 |       value: 0
514 |     }
515 |   }
516 | }
517 | layer {
518 |   name: "relu16"
519 |   type: "ReLU"
520 |   bottom: "conv16"
521 |   top: "conv16"
522 |   relu_param{
523 |     negative_slope: 0.1
524 |   }		
525 | }
526 | 
527 | layer {
528 |   name: "pool16"
529 |   type: "Pooling"
530 |   bottom: "conv16"
531 |   top: "pool16"
532 |   pooling_param {
533 |     pool: MAX
534 |     kernel_size: 2
535 |     stride: 2
536 |   }
537 | }
538 | 
539 | 
540 | layer{
541 |   name: "conv17"
542 |   type: "Convolution"
543 |   bottom: "pool16"
544 |   top: "conv17"
545 |   convolution_param {
546 |     num_output: 512
547 |     kernel_size: 1
548 |     pad: 0
549 |     stride: 1
550 |     weight_filler {
551 |       type: "gaussian"
552 |       std: 0.01
553 |     }
554 |     bias_filler {
555 |       type: "constant"
556 |       value: 0
557 |     }
558 |   }
559 | }
560 | layer {
561 |   name: "relu17"
562 |   type: "ReLU"
563 |   bottom: "conv17"
564 |   top: "conv17"
565 |   relu_param{
566 |     negative_slope: 0.1
567 |   }		
568 | }
569 | 
570 | 
571 | layer{
572 |   name: "conv18"
573 |   type: "Convolution"
574 |   bottom: "conv17"
575 |   top: "conv18"
576 |   convolution_param {
577 |     num_output: 1024
578 |     kernel_size: 3
579 |     pad: 1
580 |     stride: 1
581 |     weight_filler {
582 |       type: "gaussian"
583 |       std: 0.01
584 |     }
585 |     bias_filler {
586 |       type: "constant"
587 |       value: 0
588 |     }
589 |   }
590 | }
591 | layer {
592 |   name: "relu18"
593 |   type: "ReLU"
594 |   bottom: "conv18"
595 |   top: "conv18"
596 |   relu_param{
597 |     negative_slope: 0.1
598 |   }		
599 | }
600 | 
601 | 
602 | 
603 | layer{
604 |   name: "conv19"
605 |   type: "Convolution"
606 |   bottom: "conv18"
607 |   top: "conv19"
608 |   convolution_param {
609 |     num_output: 512
610 |     kernel_size: 1
611 |     pad: 0
612 |     stride: 1
613 |     weight_filler {
614 |       type: "gaussian"
615 |       std: 0.01
616 |     }
617 |     bias_filler {
618 |       type: "constant"
619 |       value: 0
620 |     }
621 |   }
622 | }
623 | layer {
624 |   name: "relu19"
625 |   type: "ReLU"
626 |   bottom: "conv19"
627 |   top: "conv19"
628 |   relu_param{
629 |     negative_slope: 0.1
630 |   }		
631 | }
632 | 
633 | 
634 | 
635 | layer{
636 |   name: "conv20"
637 |   type: "Convolution"
638 |   bottom: "conv19"
639 |   top: "conv20"
640 |   convolution_param {
641 |     num_output: 1024
642 |     kernel_size: 3
643 |     pad: 1
644 |     stride: 1
645 |     weight_filler {
646 |       type: "gaussian"
647 |       std: 0.01
648 |     }
649 |     bias_filler {
650 |       type: "constant"
651 |       value: 0
652 |     }
653 |   }
654 | }
655 | layer {
656 |   name: "relu20"
657 |   type: "ReLU"
658 |   bottom: "conv20"
659 |   top: "conv20"
660 |   relu_param{
661 |     negative_slope: 0.1
662 |   }		
663 | }
664 | 
665 | 
666 | 
667 | layer{
668 |   name: "conv21"
669 |   type: "Convolution"
670 |   bottom: "conv20"
671 |   top: "conv21"
672 |   convolution_param {
673 |     num_output: 1024
674 |     kernel_size: 3
675 |     pad: 1
676 |     stride: 1
677 |     weight_filler {
678 |       type: "gaussian"
679 |       std: 0.01
680 |     }
681 |     bias_filler {
682 |       type: "constant"
683 |       value: 0
684 |     }
685 |   }
686 | }
687 | layer {
688 |   name: "relu21"
689 |   type: "ReLU"
690 |   bottom: "conv21"
691 |   top: "conv21"
692 |   relu_param{
693 |     negative_slope: 0.1
694 |   }		
695 | }
696 | 
697 | 
698 | layer{
699 |   name: "conv22"
700 |   type: "Convolution"
701 |   bottom: "conv21"
702 |   top: "conv22"
703 |   convolution_param {
704 |     num_output: 1024
705 |     kernel_size: 3
706 |     pad: 1
707 |     stride: 2
708 |     weight_filler {
709 |       type: "gaussian"
710 |       std: 0.01
711 |     }
712 |     bias_filler {
713 |       type: "constant"
714 |       value: 0
715 |     }
716 |   }
717 | }
718 | layer {
719 |   name: "relu22"
720 |   type: "ReLU"
721 |   bottom: "conv22"
722 |   top: "conv22"
723 |   relu_param{
724 |     negative_slope: 0.1
725 |   }		
726 | }
727 | 
728 | 
729 | 
730 | layer{
731 |   name: "conv23"
732 |   type: "Convolution"
733 |   bottom: "conv22"
734 |   top: "conv23"
735 |   convolution_param {
736 |     num_output: 1024
737 |     kernel_size: 3
738 |     pad: 1
739 |     stride: 1
740 |     weight_filler {
741 |       type: "gaussian"
742 |       std: 0.01
743 |     }
744 |     bias_filler {
745 |       type: "constant"
746 |       value: 0
747 |     }
748 |   }
749 | }
750 | layer {
751 |   name: "relu23"
752 |   type: "ReLU"
753 |   bottom: "conv23"
754 |   top: "conv23"
755 |   relu_param{
756 |     negative_slope: 0.1
757 |   }		
758 | }
759 | 
760 | 
761 | layer{
762 |   name: "conv24"
763 |   type: "Convolution"
764 |   bottom: "conv23"
765 |   top: "conv24"
766 |   convolution_param {
767 |     num_output: 1024
768 |     kernel_size: 3
769 |     pad: 1
770 |     stride: 1
771 |     weight_filler {
772 |       type: "gaussian"
773 |       std: 0.01
774 |     }
775 |     bias_filler {
776 |       type: "constant"
777 |       value: 0
778 |     }
779 |   }
780 | }
781 | layer {
782 |   name: "relu24"
783 |   type: "ReLU"
784 |   bottom: "conv24"
785 |   top: "conv24"
786 |   relu_param{
787 |     negative_slope: 0.1
788 |   }		
789 | }
790 | 
791 | 
792 | 
793 | 
794 | layer{
795 |   name: "fc25"
796 |   type: "InnerProduct"
797 |   bottom: "conv24"
798 |   top: "fc25"
799 |   inner_product_param {
800 |     num_output: 4096
801 |     weight_filler {
802 |       type: "gaussian"
803 |       std: 0.01
804 |     }
805 |     bias_filler {
806 |       type: "constant"
807 |       value: 0
808 |     }
809 |   }
810 | }
811 | layer {
812 |   name: "relu25"
813 |   type: "ReLU"
814 |   bottom: "fc25"
815 |   top: "fc25"
816 |   relu_param{
817 |     negative_slope: 0.1
818 |   }		
819 | }
820 | 
821 | 
822 | layer{
823 |   name: "fc26"
824 |   type: "InnerProduct"
825 |   bottom: "fc25"
826 |   top: "result"
827 |   inner_product_param {
828 |     num_output: 1470
829 |     weight_filler {
830 |       type: "gaussian"
831 |       std: 0.01
832 |     }
833 |     bias_filler {
834 |       type: "constant"
835 |       value: 0
836 |     }
837 |   }
838 | }
839 | 
840 | 


--------------------------------------------------------------------------------
/prototxt/yolo_small_train_val.prototxt:
--------------------------------------------------------------------------------
  1 | name: "YOLONet"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 448
  7 |   dim: 448
  8 | }
  9 | 
 10 | layer {
 11 |   name: "conv1"
 12 |   type: "Convolution"
 13 |   bottom: "data"
 14 |   top: "conv1"
 15 |   convolution_param {
 16 |     num_output: 64
 17 |     kernel_size: 7
 18 |     pad: 3
 19 |     stride: 2
 20 |     weight_filler {
 21 |       type: "gaussian"
 22 |       std: 0.01
 23 |     }
 24 |     bias_filler {
 25 |       type: "constant"
 26 |       value: 0
 27 |     }
 28 |   }
 29 | }
 30 | layer {
 31 |   name: "relu1"
 32 |   type: "ReLU"
 33 |   bottom: "conv1"
 34 |   top: "conv1"
 35 |   relu_param{
 36 |     negative_slope: 0.1
 37 |   }		
 38 | }
 39 | layer {
 40 |   name: "pool1"
 41 |   type: "Pooling"
 42 |   bottom: "conv1"
 43 |   top: "pool1"
 44 |   pooling_param {
 45 |     pool: MAX
 46 |     kernel_size: 2
 47 |     stride: 2
 48 |   }
 49 | }
 50 | 
 51 | layer{
 52 |   name: "conv2"
 53 |   type: "Convolution"
 54 |   bottom: "pool1"
 55 |   top: "conv2"
 56 |   convolution_param {
 57 |     num_output: 192
 58 |     kernel_size: 3
 59 |     pad: 1
 60 |     stride: 1
 61 |     weight_filler {
 62 |       type: "gaussian"
 63 |       std: 0.01
 64 |     }
 65 |     bias_filler {
 66 |       type: "constant"
 67 |       value: 0
 68 |     }
 69 |   }
 70 | }
 71 | layer {
 72 |   name: "relu2"
 73 |   type: "ReLU"
 74 |   bottom: "conv2"
 75 |   top: "conv2"
 76 |   relu_param{
 77 |     negative_slope: 0.1
 78 |   }		
 79 | }
 80 | layer {
 81 |   name: "pool2"
 82 |   type: "Pooling"
 83 |   bottom: "conv2"
 84 |   top: "pool2"
 85 |   pooling_param {
 86 |     pool: MAX
 87 |     kernel_size: 2
 88 |     stride: 2
 89 |   }
 90 | }
 91 | 
 92 | layer{
 93 |   name: "conv3"
 94 |   type: "Convolution"
 95 |   bottom: "pool2"
 96 |   top: "conv3"
 97 |   convolution_param {
 98 |     num_output: 128
 99 |     kernel_size: 1
100 |     pad: 0
101 |     stride: 1
102 |     weight_filler {
103 |       type: "gaussian"
104 |       std: 0.01
105 |     }
106 |     bias_filler {
107 |       type: "constant"
108 |       value: 0
109 |     }
110 |   }
111 | }
112 | layer {
113 |   name: "relu3"
114 |   type: "ReLU"
115 |   bottom: "conv3"
116 |   top: "conv3"
117 |   relu_param{
118 |     negative_slope: 0.1
119 |   }		
120 | }
121 | 
122 | 
123 | layer{
124 |   name: "conv4"
125 |   type: "Convolution"
126 |   bottom: "conv3"
127 |   top: "conv4"
128 |   convolution_param {
129 |     num_output: 256
130 |     kernel_size: 3
131 |     pad: 1
132 |     stride: 1
133 |     weight_filler {
134 |       type: "gaussian"
135 |       std: 0.01
136 |     }
137 |     bias_filler {
138 |       type: "constant"
139 |       value: 0
140 |     }
141 |   }
142 | }
143 | layer {
144 |   name: "relu4"
145 |   type: "ReLU"
146 |   bottom: "conv4"
147 |   top: "conv4"
148 |   relu_param{
149 |     negative_slope: 0.1
150 |   }		
151 | }
152 | 
153 | layer{
154 |   name: "conv5"
155 |   type: "Convolution"
156 |   bottom: "conv4"
157 |   top: "conv5"
158 |   convolution_param {
159 |     num_output: 256
160 |     kernel_size: 1
161 |     pad: 0
162 |     stride: 1
163 |     weight_filler {
164 |       type: "gaussian"
165 |       std: 0.01
166 |     }
167 |     bias_filler {
168 |       type: "constant"
169 |       value: 0
170 |     }
171 |   }
172 | }
173 | layer {
174 |   name: "relu5"
175 |   type: "ReLU"
176 |   bottom: "conv5"
177 |   top: "conv5"
178 |   relu_param{
179 |     negative_slope: 0.1
180 |   }		
181 | }
182 | 
183 | layer{
184 |   name: "conv6"
185 |   type: "Convolution"
186 |   bottom: "conv5"
187 |   top: "conv6"
188 |   convolution_param {
189 |     num_output: 512
190 |     kernel_size: 3
191 |     pad: 1
192 |     stride: 1
193 |     weight_filler {
194 |       type: "gaussian"
195 |       std: 0.01
196 |     }
197 |     bias_filler {
198 |       type: "constant"
199 |       value: 0
200 |     }
201 |   }
202 | }
203 | layer {
204 |   name: "relu6"
205 |   type: "ReLU"
206 |   bottom: "conv6"
207 |   top: "conv6"
208 |   relu_param{
209 |     negative_slope: 0.1
210 |   }		
211 | }
212 | layer {
213 |   name: "pool6"
214 |   type: "Pooling"
215 |   bottom: "conv6"
216 |   top: "pool6"
217 |   pooling_param {
218 |     pool: MAX
219 |     kernel_size: 2
220 |     stride: 2
221 |   }
222 | }
223 | 
224 | layer{
225 |   name: "conv7"
226 |   type: "Convolution"
227 |   bottom: "pool6"
228 |   top: "conv7"
229 |   convolution_param {
230 |     num_output: 256
231 |     kernel_size: 1
232 |     pad: 0
233 |     stride: 1
234 |     weight_filler {
235 |       type: "gaussian"
236 |       std: 0.01
237 |     }
238 |     bias_filler {
239 |       type: "constant"
240 |       value: 0
241 |     }
242 |   }
243 | }
244 | layer {
245 |   name: "relu7"
246 |   type: "ReLU"
247 |   bottom: "conv7"
248 |   top: "conv7"
249 |   relu_param{
250 |     negative_slope: 0.1
251 |   }		
252 | }
253 | 
254 | layer{
255 |   name: "conv8"
256 |   type: "Convolution"
257 |   bottom: "conv7"
258 |   top: "conv8"
259 |   convolution_param {
260 |     num_output: 512
261 |     kernel_size: 3
262 |     pad: 1
263 |     stride: 1
264 |     weight_filler {
265 |       type: "gaussian"
266 |       std: 0.01
267 |     }
268 |     bias_filler {
269 |       type: "constant"
270 |       value: 0
271 |     }
272 |   }
273 | }
274 | layer {
275 |   name: "relu8"
276 |   type: "ReLU"
277 |   bottom: "conv8"
278 |   top: "conv8"
279 |   relu_param{
280 |     negative_slope: 0.1
281 |   }		
282 | }
283 | 
284 | layer{
285 |   name: "conv9"
286 |   type: "Convolution"
287 |   bottom: "conv8"
288 |   top: "conv9"
289 |   convolution_param {
290 |     num_output: 256
291 |     kernel_size: 1
292 |     pad: 0
293 |     stride: 1
294 |     weight_filler {
295 |       type: "gaussian"
296 |       std: 0.01
297 |     }
298 |     bias_filler {
299 |       type: "constant"
300 |       value: 0
301 |     }
302 |   }
303 | }
304 | layer {
305 |   name: "relu9"
306 |   type: "ReLU"
307 |   bottom: "conv9"
308 |   top: "conv9"
309 |   relu_param{
310 |     negative_slope: 0.1
311 |   }		
312 | }
313 | 
314 | layer{
315 |   name: "conv10"
316 |   type: "Convolution"
317 |   bottom: "conv9"
318 |   top: "conv10"
319 |   convolution_param {
320 |     num_output: 512
321 |     kernel_size: 3
322 |     pad: 1
323 |     stride: 1
324 |     weight_filler {
325 |       type: "gaussian"
326 |       std: 0.01
327 |     }
328 |     bias_filler {
329 |       type: "constant"
330 |       value: 0
331 |     }
332 |   }
333 | }
334 | layer {
335 |   name: "relu10"
336 |   type: "ReLU"
337 |   bottom: "conv10"
338 |   top: "conv10"
339 |   relu_param{
340 |     negative_slope: 0.1
341 |   }		
342 | }
343 | 
344 | layer{
345 |   name: "conv11"
346 |   type: "Convolution"
347 |   bottom: "conv10"
348 |   top: "conv11"
349 |   convolution_param {
350 |     num_output: 256
351 |     kernel_size: 1
352 |     pad: 0
353 |     stride: 1
354 |     weight_filler {
355 |       type: "gaussian"
356 |       std: 0.01
357 |     }
358 |     bias_filler {
359 |       type: "constant"
360 |       value: 0
361 |     }
362 |   }
363 | }
364 | layer {
365 |   name: "relu11"
366 |   type: "ReLU"
367 |   bottom: "conv11"
368 |   top: "conv11"
369 |   relu_param{
370 |     negative_slope: 0.1
371 |   }		
372 | }
373 | 
374 | 
375 | layer{
376 |   name: "conv12"
377 |   type: "Convolution"
378 |   bottom: "conv11"
379 |   top: "conv12"
380 |   convolution_param {
381 |     num_output: 512
382 |     kernel_size: 3
383 |     pad: 1
384 |     stride: 1
385 |     weight_filler {
386 |       type: "gaussian"
387 |       std: 0.01
388 |     }
389 |     bias_filler {
390 |       type: "constant"
391 |       value: 0
392 |     }
393 |   }
394 | }
395 | layer {
396 |   name: "relu12"
397 |   type: "ReLU"
398 |   bottom: "conv12"
399 |   top: "conv12"
400 |   relu_param{
401 |     negative_slope: 0.1
402 |   }		
403 | }
404 | 
405 | 
406 | layer{
407 |   name: "conv13"
408 |   type: "Convolution"
409 |   bottom: "conv12"
410 |   top: "conv13"
411 |   convolution_param {
412 |     num_output: 256
413 |     kernel_size: 1
414 |     pad: 0
415 |     stride: 1
416 |     weight_filler {
417 |       type: "gaussian"
418 |       std: 0.01
419 |     }
420 |     bias_filler {
421 |       type: "constant"
422 |       value: 0
423 |     }
424 |   }
425 | }
426 | layer {
427 |   name: "relu13"
428 |   type: "ReLU"
429 |   bottom: "conv13"
430 |   top: "conv13"
431 |   relu_param{
432 |     negative_slope: 0.1
433 |   }		
434 | }
435 | 
436 | layer{
437 |   name: "conv14"
438 |   type: "Convolution"
439 |   bottom: "conv13"
440 |   top: "conv14"
441 |   convolution_param {
442 |     num_output: 512
443 |     kernel_size: 3
444 |     pad: 1
445 |     stride: 1
446 |     weight_filler {
447 |       type: "gaussian"
448 |       std: 0.01
449 |     }
450 |     bias_filler {
451 |       type: "constant"
452 |       value: 0
453 |     }
454 |   }
455 | }
456 | layer {
457 |   name: "relu14"
458 |   type: "ReLU"
459 |   bottom: "conv14"
460 |   top: "conv14"
461 |   relu_param{
462 |     negative_slope: 0.1
463 |   }		
464 | }
465 | 
466 | layer{
467 |   name: "conv15"
468 |   type: "Convolution"
469 |   bottom: "conv14"
470 |   top: "conv15"
471 |   convolution_param {
472 |     num_output: 512
473 |     kernel_size: 1
474 |     pad: 0
475 |     stride: 1
476 |     weight_filler {
477 |       type: "gaussian"
478 |       std: 0.01
479 |     }
480 |     bias_filler {
481 |       type: "constant"
482 |       value: 0
483 |     }
484 |   }
485 | }
486 | layer {
487 |   name: "relu15"
488 |   type: "ReLU"
489 |   bottom: "conv15"
490 |   top: "conv15"
491 |   relu_param{
492 |     negative_slope: 0.1
493 |   }		
494 | }
495 | 
496 | 
497 | layer{
498 |   name: "conv16"
499 |   type: "Convolution"
500 |   bottom: "conv15"
501 |   top: "conv16"
502 |   convolution_param {
503 |     num_output: 1024
504 |     kernel_size: 3
505 |     pad: 1
506 |     stride: 1
507 |     weight_filler {
508 |       type: "gaussian"
509 |       std: 0.01
510 |     }
511 |     bias_filler {
512 |       type: "constant"
513 |       value: 0
514 |     }
515 |   }
516 | }
517 | layer {
518 |   name: "relu16"
519 |   type: "ReLU"
520 |   bottom: "conv16"
521 |   top: "conv16"
522 |   relu_param{
523 |     negative_slope: 0.1
524 |   }		
525 | }
526 | 
527 | layer {
528 |   name: "pool16"
529 |   type: "Pooling"
530 |   bottom: "conv16"
531 |   top: "pool16"
532 |   pooling_param {
533 |     pool: MAX
534 |     kernel_size: 2
535 |     stride: 2
536 |   }
537 | }
538 | 
539 | 
540 | layer{
541 |   name: "conv17"
542 |   type: "Convolution"
543 |   bottom: "pool16"
544 |   top: "conv17"
545 |   convolution_param {
546 |     num_output: 512
547 |     kernel_size: 1
548 |     pad: 0
549 |     stride: 1
550 |     weight_filler {
551 |       type: "gaussian"
552 |       std: 0.01
553 |     }
554 |     bias_filler {
555 |       type: "constant"
556 |       value: 0
557 |     }
558 |   }
559 | }
560 | layer {
561 |   name: "relu17"
562 |   type: "ReLU"
563 |   bottom: "conv17"
564 |   top: "conv17"
565 |   relu_param{
566 |     negative_slope: 0.1
567 |   }		
568 | }
569 | 
570 | 
571 | layer{
572 |   name: "conv18"
573 |   type: "Convolution"
574 |   bottom: "conv17"
575 |   top: "conv18"
576 |   convolution_param {
577 |     num_output: 1024
578 |     kernel_size: 3
579 |     pad: 1
580 |     stride: 1
581 |     weight_filler {
582 |       type: "gaussian"
583 |       std: 0.01
584 |     }
585 |     bias_filler {
586 |       type: "constant"
587 |       value: 0
588 |     }
589 |   }
590 | }
591 | layer {
592 |   name: "relu18"
593 |   type: "ReLU"
594 |   bottom: "conv18"
595 |   top: "conv18"
596 |   relu_param{
597 |     negative_slope: 0.1
598 |   }		
599 | }
600 | 
601 | 
602 | 
603 | layer{
604 |   name: "conv19"
605 |   type: "Convolution"
606 |   bottom: "conv18"
607 |   top: "conv19"
608 |   convolution_param {
609 |     num_output: 512
610 |     kernel_size: 1
611 |     pad: 0
612 |     stride: 1
613 |     weight_filler {
614 |       type: "gaussian"
615 |       std: 0.01
616 |     }
617 |     bias_filler {
618 |       type: "constant"
619 |       value: 0
620 |     }
621 |   }
622 | }
623 | layer {
624 |   name: "relu19"
625 |   type: "ReLU"
626 |   bottom: "conv19"
627 |   top: "conv19"
628 |   relu_param{
629 |     negative_slope: 0.1
630 |   }		
631 | }
632 | 
633 | 
634 | 
635 | layer{
636 |   name: "conv20"
637 |   type: "Convolution"
638 |   bottom: "conv19"
639 |   top: "conv20"
640 |   convolution_param {
641 |     num_output: 1024
642 |     kernel_size: 3
643 |     pad: 1
644 |     stride: 1
645 |     weight_filler {
646 |       type: "gaussian"
647 |       std: 0.01
648 |     }
649 |     bias_filler {
650 |       type: "constant"
651 |       value: 0
652 |     }
653 |   }
654 | }
655 | layer {
656 |   name: "relu20"
657 |   type: "ReLU"
658 |   bottom: "conv20"
659 |   top: "conv20"
660 |   relu_param{
661 |     negative_slope: 0.1
662 |   }		
663 | }
664 | 
665 | 
666 | 
667 | layer{
668 |   name: "conv21"
669 |   type: "Convolution"
670 |   bottom: "conv20"
671 |   top: "conv21"
672 |   convolution_param {
673 |     num_output: 1024
674 |     kernel_size: 3
675 |     pad: 1
676 |     stride: 1
677 |     weight_filler {
678 |       type: "gaussian"
679 |       std: 0.01
680 |     }
681 |     bias_filler {
682 |       type: "constant"
683 |       value: 0
684 |     }
685 |   }
686 | }
687 | layer {
688 |   name: "relu21"
689 |   type: "ReLU"
690 |   bottom: "conv21"
691 |   top: "conv21"
692 |   relu_param{
693 |     negative_slope: 0.1
694 |   }		
695 | }
696 | 
697 | 
698 | layer{
699 |   name: "conv22"
700 |   type: "Convolution"
701 |   bottom: "conv21"
702 |   top: "conv22"
703 |   convolution_param {
704 |     num_output: 1024
705 |     kernel_size: 3
706 |     pad: 1
707 |     stride: 2
708 |     weight_filler {
709 |       type: "gaussian"
710 |       std: 0.01
711 |     }
712 |     bias_filler {
713 |       type: "constant"
714 |       value: 0
715 |     }
716 |   }
717 | }
718 | layer {
719 |   name: "relu22"
720 |   type: "ReLU"
721 |   bottom: "conv22"
722 |   top: "conv22"
723 |   relu_param{
724 |     negative_slope: 0.1
725 |   }		
726 | }
727 | 
728 | 
729 | 
730 | layer{
731 |   name: "conv23"
732 |   type: "Convolution"
733 |   bottom: "conv22"
734 |   top: "conv23"
735 |   convolution_param {
736 |     num_output: 1024
737 |     kernel_size: 3
738 |     pad: 1
739 |     stride: 1
740 |     weight_filler {
741 |       type: "gaussian"
742 |       std: 0.01
743 |     }
744 |     bias_filler {
745 |       type: "constant"
746 |       value: 0
747 |     }
748 |   }
749 | }
750 | layer {
751 |   name: "relu23"
752 |   type: "ReLU"
753 |   bottom: "conv23"
754 |   top: "conv23"
755 |   relu_param{
756 |     negative_slope: 0.1
757 |   }		
758 | }
759 | 
760 | 
761 | layer{
762 |   name: "conv24"
763 |   type: "Convolution"
764 |   bottom: "conv23"
765 |   top: "conv24"
766 |   convolution_param {
767 |     num_output: 1024
768 |     kernel_size: 3
769 |     pad: 1
770 |     stride: 1
771 |     weight_filler {
772 |       type: "gaussian"
773 |       std: 0.01
774 |     }
775 |     bias_filler {
776 |       type: "constant"
777 |       value: 0
778 |     }
779 |   }
780 | }
781 | layer {
782 |   name: "relu24"
783 |   type: "ReLU"
784 |   bottom: "conv24"
785 |   top: "conv24"
786 |   relu_param{
787 |     negative_slope: 0.1
788 |   }		
789 | }
790 | 
791 | 
792 | 
793 | 
794 | layer{
795 |   name: "fc25"
796 |   type: "InnerProduct"
797 |   bottom: "conv24"
798 |   top: "fc25"
799 |   inner_product_param {
800 |     num_output: 512
801 |     weight_filler {
802 |       type: "gaussian"
803 |       std: 0.01
804 |     }
805 |     bias_filler {
806 |       type: "constant"
807 |       value: 0
808 |     }
809 |   }
810 | }
811 | layer {
812 |   name: "relu25"
813 |   type: "ReLU"
814 |   bottom: "fc25"
815 |   top: "fc25"
816 |   relu_param{
817 |     negative_slope: 0.1
818 |   }		
819 | }
820 | 
821 | 
822 | layer{
823 |   name: "fc26"
824 |   type: "InnerProduct"
825 |   bottom: "fc25"
826 |   top: "fc26"
827 |   inner_product_param {
828 |     num_output: 4096
829 |     weight_filler {
830 |       type: "gaussian"
831 |       std: 0.01
832 |     }
833 |     bias_filler {
834 |       type: "constant"
835 |       value: 0
836 |     }
837 |   }
838 | }
839 | layer {
840 |   name: "relu26"
841 |   type: "ReLU"
842 |   bottom: "fc26"
843 |   top: "fc26"
844 |   relu_param{
845 |     negative_slope: 0.1
846 |   }		
847 | }
848 | 
849 | 
850 | layer{
851 |   name: "fc27"
852 |   type: "InnerProduct"
853 |   bottom: "fc26"
854 |   top: "result"
855 |   inner_product_param {
856 |     num_output: 1470
857 |     weight_filler {
858 |       type: "gaussian"
859 |       std: 0.01
860 |     }
861 |     bias_filler {
862 |       type: "constant"
863 |       value: 0
864 |     }
865 |   }
866 | }
867 | 
868 | 


--------------------------------------------------------------------------------