├── 0_raw_data
└── Code
│ ├── divide_tr_te.py
│ └── draw_point.py
├── 1_level_1
└── Code
│ ├── 0_gen_data
│ └── gen_l1_data.py
│ ├── 1_draw_img
│ └── draw_l1_point.py
│ ├── 2_train
│ ├── l1_mobilenet.prototxt
│ ├── solver.prototxt
│ └── train.sh
│ ├── 3_inference
│ ├── inferencen.py
│ └── l1_deploy.prototxt
│ ├── 4_evaluate
│ ├── evaluate_test.py
│ └── evaluate_train.py
│ └── 5_crop_img
│ ├── crop_test_img.py
│ └── crop_train_img.py
├── 2_level_2
└── Code
│ ├── 0_train
│ ├── l2_mobilenet.prototxt
│ ├── solver.prototxt
│ └── train.sh
│ ├── 1_inference
│ ├── inferencen.py
│ └── l2_deploy.prototxt
│ └── 2_evaluate
│ ├── evaluate_test.py
│ └── evaluate_train.py
├── 3_demo
├── Code
│ └── inferencen.py
└── Data
│ ├── demo.txt
│ ├── img
│ ├── 000054.jpg
│ ├── 000133.jpg
│ ├── 000167.jpg
│ ├── 000275.jpg
│ ├── 000335.jpg
│ ├── 000765.jpg
│ ├── 001102.jpg
│ └── 001557.jpg
│ ├── l1_deploy.prototxt
│ ├── l1_net.caffemodel
│ ├── l2_deploy.prototxt
│ └── l2_net.caffemodel
├── README.md
├── caffe_need
├── conv_dw_layer.cpp
├── conv_dw_layer.cu
├── conv_dw_layer.hpp
├── image_data_layer.cpp
├── image_data_layer.hpp
└── readme.txt
├── readme_img
├── ccnntexie.PNG
├── l1.PNG
└── l2.PNG
└── util
└── tools.py
/0_raw_data/Code/divide_tr_te.py:
--------------------------------------------------------------------------------
1 | # divide celebA dataset
2 | import sys
3 | sys.path.append('../../util')
4 | import tools
5 | import os
6 | import random
7 | import shutil
8 |
9 | raw_txt = '../Data/celeba_label.txt'
10 | relative_path = '../Data/img_celeba/' # for find the img
11 | train_txt = '../Result/raw_train_label.txt' # target txt
12 | test_txt = '../Result/raw_test_label.txt'
13 | train_img_fold = '../Result/train/'
14 | test_img_fold = '../Result/test/'
15 | tools.makedir(train_img_fold)
16 | tools.makedir(test_img_fold)
17 |
18 | per = 0.8 # percentage of train set
19 | line_num = 0
20 | train_num = 0
21 | test_num = 0
22 | train_f = open(train_txt,"w")
23 | test_f = open(test_txt,"w")
24 | for line in open(raw_txt):
25 | if line.isspace() : continue # skip empty line
26 | line_num += 1
27 | img_name = line.split()[0]
28 | full_img_path = relative_path + img_name
29 | a_rand = random.uniform(0,1)
30 | # train set
31 | if a_rand <= per:
32 | train_f.write(line)
33 | train_img_path = train_img_fold + img_name
34 | shutil.copy(full_img_path,train_img_path)
35 | train_num += 1
36 | # test set
37 | else:
38 | test_f.write(line)
39 | test_img_path = test_img_fold + img_name
40 | shutil.copy(full_img_path,test_img_path)
41 | test_num +=1
42 | print 'img : ', line_num
43 | train_f.close()
44 | test_f.close()
45 |
46 |
47 |
48 | print 'train set have ', train_num ,' examples.'
49 | print 'test set have ', test_num , ' examples.'
50 | print train_num ,' + ' ,test_num ,' = ', train_num+test_num
51 | print 'line_num is ', line_num
--------------------------------------------------------------------------------
/0_raw_data/Code/draw_point.py:
--------------------------------------------------------------------------------
1 | # generate img and txt for level_1
2 | # The point order x1,x2,x3...
3 | import sys
4 | sys.path.append('../../util')
5 | import tools
6 | import os
7 | import numpy as np
8 | import cv2
9 |
10 | train_txt = '../Result/raw_train_label.txt' # raw_txt
11 | test_txt = '../Result/raw_test_label.txt'
12 | relative_path = '../Data/img_celeba/'
13 | draw_dir = '../Result/draw_img/' #
14 | tools.makedir(draw_dir)
15 |
16 | n_p = 5 # num of points
17 |
18 | def myint(numb):
19 | return int(round(float(numb)))
20 | def drawpoint(raw_txt,o_dir):
21 | for line in open(raw_txt):
22 | if line.isspace() : continue #
23 | raw_land = list(line.split())[1:2*n_p+1]
24 |
25 | img_name = line.split()[0]
26 | full_img_path = relative_path + img_name
27 | img = cv2.imread(full_img_path)
28 | draw_img = img.copy()
29 | draw_img = tools.drawpoints_0(draw_img,raw_land)
30 |
31 | # output img
32 | sub_flod = o_dir + raw_txt.split('_')[-2]
33 | tools.makedir(sub_flod)
34 | draw_img_path = sub_flod + '/' + img_name
35 | print (draw_img_path)
36 | cv2.imwrite(draw_img_path,draw_img)
37 | open(raw_txt).close()
38 | print(raw_txt,' done!')
39 |
40 | drawpoint(test_txt,draw_dir)
41 | drawpoint(train_txt,draw_dir)
--------------------------------------------------------------------------------
/1_level_1/Code/0_gen_data/gen_l1_data.py:
--------------------------------------------------------------------------------
1 | # generate img and txt for level_1
2 | # The point order has changed: x1,y1,x2...
3 | import sys
4 | sys.path.append('../../../util')
5 | import tools
6 | import os
7 | import numpy as np
8 | import cv2
9 | train_txt = '../../../raw_data/Result/raw_train_label.txt' # raw_txt
10 | test_txt = '../../../raw_data/Result/raw_test_label.txt'
11 | l1_data_dir = '../../Data/' # target dir
12 | l1_train_txt = l1_data_dir + 'l1_train_label.txt' # target txt
13 | l1_test_txt = l1_data_dir + 'l1_test_label.txt'
14 | relative_path = '../../../raw_data/Data/img_celeba/' # for find the img
15 | tools.makedir(l1_data_dir)
16 |
17 | net_1_w = 48
18 | net_1_h = 48
19 | n_p = 5 # num of points
20 | def gendata(target_txt,raw_txt):
21 | with open(target_txt,"w") as f:
22 | for line in open(raw_txt):
23 | # txt
24 | if line.isspace() : continue
25 | img_name = line.split()[0]
26 | full_img_path = relative_path + img_name
27 | print full_img_path
28 | img = cv2.imread(full_img_path)
29 |
30 | w = img.shape[1] # weight is x axis
31 | h = img.shape[0] # height is y axis
32 | w1 = (w-1)/2 # for [-1,1]
33 | h1 = (h-1)/2
34 |
35 | raw_land = list(line.split())[1:2*n_p+1]
36 | new_line = img_name
37 | for i in range(n_p):
38 | x_ = round( (float(raw_land[2*i+0])-w1)/w1 , 4) # value is [-1,1]
39 | y_ = round( (float(raw_land[2*i+1])-h1)/h1 , 4)
40 | new_line = new_line + ' ' + str(x_) # note: The point order has changed: x1,y1,x2...
41 | new_line = new_line + ' ' + str(y_)
42 | print('new_line: ', new_line)
43 | f.write(new_line + '\n')
44 |
45 | # image
46 | scale_img = cv2.resize(img,(net_1_w,net_1_h))
47 | sub_flod = l1_data_dir + raw_txt.split('_')[2] + '/'
48 | tools.makedir(sub_flod)
49 | scale_img_path = sub_flod + img_name
50 | print 'output path ',scale_img_path
51 | cv2.imwrite(scale_img_path,scale_img)
52 | # print a
53 | open(raw_txt).close()
54 | gendata(l1_test_txt,test_txt)
55 | gendata(l1_train_txt,train_txt)
56 |
57 |
--------------------------------------------------------------------------------
/1_level_1/Code/1_draw_img/draw_l1_point.py:
--------------------------------------------------------------------------------
1 | # draw points for level_1
2 | import sys
3 | sys.path.append('../../../util')
4 | import tools
5 | import os
6 | import numpy as np
7 | import cv2
8 |
9 | relative_path = '../../Data/' # for find the img
10 | relative_train_path = '../../Data/train/'
11 | relative_test_path = '../../Data/test/'
12 |
13 | train_txt = relative_path + 'l1_train_label.txt' # raw_txt
14 | test_txt = relative_path + 'l1_test_label.txt'
15 |
16 | draw_dir = relative_path + 'draw_img/'
17 | tools.makedir(draw_dir)
18 | n_p = 5 # num of points
19 |
20 | def drawpoint(raw_txt,o_dir,relative_img_path):
21 | for line in open(raw_txt):
22 | if line.isspace() : continue #
23 | img_name = line.split()[0]
24 | full_img_path = relative_img_path + img_name
25 | img = cv2.imread(full_img_path)
26 | draw_img = img.copy()
27 |
28 | w = img.shape[1] # width is x axis
29 | h = img.shape[0] # height is y axis
30 | w1 = (w-1)/2 # for [-1,1]
31 | h1 = (h-1)/2
32 |
33 | raw_land = list(line.split())[1:2*n_p+1]
34 | for i in range(n_p): # draw key points
35 | x_ = tools.convert_point(raw_land[2*i+0],w1)
36 | y_ = tools.convert_point(raw_land[2*i+1],h1)
37 | cv2.circle(draw_img,(x_,y_),2,(0,255,0))
38 | # output img
39 | sub_flod = o_dir + raw_txt.split('_')[-2] + '/'
40 | tools.makedir(sub_flod)
41 | draw_img_path = sub_flod + img_name
42 | print 'draw ima path ', draw_img_path
43 | cv2.imwrite(draw_img_path,draw_img)
44 | open(raw_txt).close()
45 | print(raw_txt,' done!')
46 | drawpoint(train_txt,draw_dir,relative_train_path)
47 | drawpoint(test_txt,draw_dir,relative_test_path)
48 |
--------------------------------------------------------------------------------
/1_level_1/Code/2_train/l1_mobilenet.prototxt:
--------------------------------------------------------------------------------
1 | name: "level_1"
2 |
3 | layer {
4 | name: "data"
5 | type: "ImageData"
6 | top: "data"
7 | top: "label"
8 | include{
9 | phase: TRAIN
10 | }
11 | transform_param {
12 | mean_value: 127.5
13 | mean_value: 127.5
14 | mean_value: 127.5
15 | # scale: 0.0039215
16 | mirror: false
17 | }
18 | image_data_param{
19 | root_folder: "../../Data/train/"
20 | source: "../../Data/l1_train_label.txt"
21 | batch_size: 128
22 | shuffle: true
23 | is_color: true
24 | new_height: 48
25 | new_width: 48
26 | }
27 | }
28 | #INPUT TEST
29 | layer {
30 | name: "data"
31 | type: "ImageData"
32 | top: "data"
33 | top: "label"
34 | include{
35 | phase: TEST
36 | }
37 | transform_param {
38 | mean_value: 127.5
39 | mean_value: 127.5
40 | mean_value: 127.5
41 | # scale: 0.0039215
42 | mirror: false
43 |
44 | }
45 | image_data_param{
46 | root_folder: "../../Data/test/"
47 | source: "../../Data/l1_test_label.txt"
48 | batch_size: 128
49 | shuffle: true
50 | is_color: true
51 | new_height: 48
52 | new_width: 48
53 | }
54 | }
55 |
56 | #------------------------------------------- conv1 input: 48*48*3 output: 24*24*16
57 | layer {
58 | name: "conv1_new"
59 | type: "Convolution"
60 | bottom: "data"
61 | top: "conv1_new"
62 | param {
63 | lr_mult: 1
64 | decay_mult: 1
65 | }
66 | convolution_param {
67 | num_output: 16
68 | bias_term: false
69 | pad: 1
70 | kernel_size: 3
71 | stride: 2
72 | weight_filler {
73 | type: "msra"
74 | }
75 | }
76 | }
77 | layer {
78 | name: "conv1/bn_new"
79 | type: "BatchNorm"
80 | bottom: "conv1_new"
81 | top: "conv1_new"
82 | param {
83 | lr_mult: 0
84 | decay_mult: 0
85 | }
86 | param {
87 | lr_mult: 0
88 | decay_mult: 0
89 | }
90 | param {
91 | lr_mult: 0
92 | decay_mult: 0
93 | }
94 | }
95 | layer {
96 | name: "conv1/scale_new"
97 | type: "Scale"
98 | bottom: "conv1_new"
99 | top: "conv1_new"
100 | scale_param {
101 | filler {
102 | value: 1
103 | }
104 | bias_term: true
105 | bias_filler {
106 | value: 0
107 | }
108 | }
109 | }
110 | layer {
111 | name: "relu1_new"
112 | type: "ReLU"
113 | bottom: "conv1_new"
114 | top: "conv1_new"
115 | }
116 |
117 | #----------- bottleneck_1 1_1(s=2) 1_2(s=1) input: 24*24*16 output: 12*12*24
118 |
119 | layer {
120 | name: "conv1_1/in/pw_new"
121 | type: "Convolution"
122 | bottom: "conv1_new"
123 | top: "conv1_1/in/pw_new"
124 | param {
125 | lr_mult: 1
126 | decay_mult: 1
127 | }
128 | convolution_param {
129 | num_output: 96
130 | bias_term: false
131 | pad: 0
132 | kernel_size: 1
133 | engine: CAFFE
134 | stride: 1
135 | weight_filler {
136 | type: "msra"
137 | }
138 | }
139 | }
140 | layer {
141 | name: "conv1_1/in/pw/bn_new"
142 | type: "BatchNorm"
143 | bottom: "conv1_1/in/pw_new"
144 | top: "conv1_1/in/pw_new"
145 | param {
146 | lr_mult: 0
147 | decay_mult: 0
148 | }
149 | param {
150 | lr_mult: 0
151 | decay_mult: 0
152 | }
153 | param {
154 | lr_mult: 0
155 | decay_mult: 0
156 | }
157 | }
158 | layer {
159 | name: "conv1_1/in/pw/scale_new"
160 | type: "Scale"
161 | bottom: "conv1_1/in/pw_new"
162 | top: "conv1_1/in/pw_new"
163 | scale_param {
164 | filler {
165 | value: 1
166 | }
167 | bias_term: true
168 | bias_filler {
169 | value: 0
170 | }
171 | }
172 | }
173 | layer {
174 | name: "relu1_1/in/pw_new"
175 | type: "ReLU"
176 | bottom: "conv1_1/in/pw_new"
177 | top: "conv1_1/in/pw_new"
178 | }
179 |
180 |
181 |
182 | # 1_1 dw conv
183 | layer {
184 | name: "conv1_1/dw_new"
185 | type: "ConvolutionDepthwise"
186 | bottom: "conv1_1/in/pw_new"
187 | top: "conv1_1/dw_new"
188 | param {
189 | lr_mult: 1
190 | decay_mult: 0
191 | }
192 | convolution_param {
193 | num_output: 96
194 | bias_term: false
195 | pad: 1
196 | kernel_size: 3
197 | engine: CAFFE
198 | stride: 2
199 | weight_filler {
200 | type: "msra"
201 | }
202 | }
203 | }
204 | layer {
205 | name: "conv1_1/dw/bn_new"
206 | type: "BatchNorm"
207 | bottom: "conv1_1/dw_new"
208 | top: "conv1_1/dw_new"
209 | param {
210 | lr_mult: 0
211 | decay_mult: 0
212 | }
213 | param {
214 | lr_mult: 0
215 | decay_mult: 0
216 | }
217 | param {
218 | lr_mult: 0
219 | decay_mult: 0
220 | }
221 | }
222 | layer {
223 | name: "conv1_1/dw/scale_new"
224 | type: "Scale"
225 | bottom: "conv1_1/dw_new"
226 | top: "conv1_1/dw_new"
227 | scale_param {
228 | filler {
229 | value: 1
230 | }
231 | bias_term: true
232 | bias_filler {
233 | value: 0
234 | }
235 | }
236 | }
237 | layer {
238 | name: "relu1_1/dw_new"
239 | type: "ReLU"
240 | bottom: "conv1_1/dw_new"
241 | top: "conv1_1/dw_new"
242 | }
243 |
244 | # 1_1 out
245 | layer {
246 | name: "conv1_1/out/pw_new"
247 | type: "Convolution"
248 | bottom: "conv1_1/dw_new"
249 | top: "conv1_1/out/pw_new"
250 | param {
251 | lr_mult: 1
252 | decay_mult: 1
253 | }
254 | convolution_param {
255 | num_output: 24
256 | bias_term: false
257 | pad: 0
258 | kernel_size: 1
259 | engine: CAFFE
260 | stride: 1
261 | weight_filler {
262 | type: "msra"
263 | }
264 | }
265 | }
266 | layer {
267 | name: "conv1_1/out/pw/bn_new"
268 | type: "BatchNorm"
269 | bottom: "conv1_1/out/pw_new"
270 | top: "conv1_1/out/pw_new"
271 | param {
272 | lr_mult: 0
273 | decay_mult: 0
274 | }
275 | param {
276 | lr_mult: 0
277 | decay_mult: 0
278 | }
279 | param {
280 | lr_mult: 0
281 | decay_mult: 0
282 | }
283 | }
284 | layer {
285 | name: "conv1_1/out/pw/scale_new"
286 | type: "Scale"
287 | bottom: "conv1_1/out/pw_new"
288 | top: "conv1_1/out/pw_new"
289 | scale_param {
290 | filler {
291 | value: 1
292 | }
293 | bias_term: true
294 | bias_filler {
295 | value: 0
296 | }
297 | }
298 | }
299 | # 1_2 in
300 |
301 | layer {
302 | name: "conv1_2/in/pw_new"
303 | type: "Convolution"
304 | bottom: "conv1_1/out/pw_new"
305 | top: "conv1_2/in/pw_new"
306 | param {
307 | lr_mult: 1
308 | decay_mult: 1
309 | }
310 | convolution_param {
311 | num_output: 144
312 | bias_term: false
313 | pad: 0
314 | kernel_size: 1
315 | engine: CAFFE
316 | stride: 1
317 | weight_filler {
318 | type: "msra"
319 | }
320 | }
321 | }
322 | layer {
323 | name: "conv1_2/in/pw/bn_new"
324 | type: "BatchNorm"
325 | bottom: "conv1_2/in/pw_new"
326 | top: "conv1_2/in/pw_new"
327 | param {
328 | lr_mult: 0
329 | decay_mult: 0
330 | }
331 | param {
332 | lr_mult: 0
333 | decay_mult: 0
334 | }
335 | param {
336 | lr_mult: 0
337 | decay_mult: 0
338 | }
339 | }
340 | layer {
341 | name: "conv1_2/in/pw/scale_new"
342 | type: "Scale"
343 | bottom: "conv1_2/in/pw_new"
344 | top: "conv1_2/in/pw_new"
345 | scale_param {
346 | filler {
347 | value: 1
348 | }
349 | bias_term: true
350 | bias_filler {
351 | value: 0
352 | }
353 | }
354 | }
355 | layer {
356 | name: "relu1_2/in/pw_new"
357 | type: "ReLU"
358 | bottom: "conv1_2/in/pw_new"
359 | top: "conv1_2/in/pw_new"
360 | }
361 |
362 | # 1_2 dw
363 |
364 | layer {
365 | name: "conv1_2/dw_new"
366 | type: "ConvolutionDepthwise"
367 | bottom: "conv1_2/in/pw_new"
368 | top: "conv1_2/dw_new"
369 | param {
370 | lr_mult: 1
371 | decay_mult: 0
372 | }
373 | convolution_param {
374 | num_output: 144
375 | bias_term: false
376 | pad: 1
377 | kernel_size: 3
378 | engine: CAFFE
379 | stride: 1
380 | weight_filler {
381 | type: "msra"
382 | }
383 | }
384 | }
385 | layer {
386 | name: "conv1_2/dw/bn_new"
387 | type: "BatchNorm"
388 | bottom: "conv1_2/dw_new"
389 | top: "conv1_2/dw_new"
390 | param {
391 | lr_mult: 0
392 | decay_mult: 0
393 | }
394 | param {
395 | lr_mult: 0
396 | decay_mult: 0
397 | }
398 | param {
399 | lr_mult: 0
400 | decay_mult: 0
401 | }
402 | }
403 | layer {
404 | name: "conv1_2/dw/scale_new"
405 | type: "Scale"
406 | bottom: "conv1_2/dw_new"
407 | top: "conv1_2/dw_new"
408 | scale_param {
409 | filler {
410 | value: 1
411 | }
412 | bias_term: true
413 | bias_filler {
414 | value: 0
415 | }
416 | }
417 | }
418 | layer {
419 | name: "relu1_2/dw_new"
420 | type: "ReLU"
421 | bottom: "conv1_2/dw_new"
422 | top: "conv1_2/dw_new"
423 | }
424 |
425 | # 1_2 out 12*12*24
426 | layer {
427 | name: "conv1_2/out/pw_new"
428 | type: "Convolution"
429 | bottom: "conv1_2/dw_new"
430 | top: "conv1_2/out/pw_new"
431 | param {
432 | lr_mult: 1
433 | decay_mult: 1
434 | }
435 | convolution_param {
436 | num_output: 24
437 | bias_term: false
438 | pad: 0
439 | kernel_size: 1
440 | engine: CAFFE
441 | stride: 1
442 | weight_filler {
443 | type: "msra"
444 | }
445 | }
446 | }
447 | layer {
448 | name: "conv1_2/out/pw/bn_new"
449 | type: "BatchNorm"
450 | bottom: "conv1_2/out/pw_new"
451 | top: "conv1_2/out/pw_new"
452 | param {
453 | lr_mult: 0
454 | decay_mult: 0
455 | }
456 | param {
457 | lr_mult: 0
458 | decay_mult: 0
459 | }
460 | param {
461 | lr_mult: 0
462 | decay_mult: 0
463 | }
464 | }
465 | layer {
466 | name: "conv1_2/out/pw/scale_new"
467 | type: "Scale"
468 | bottom: "conv1_2/out/pw_new"
469 | top: "conv1_2/out/pw_new"
470 | scale_param {
471 | filler {
472 | value: 1
473 | }
474 | bias_term: true
475 | bias_filler {
476 | value: 0
477 | }
478 | }
479 | }
480 | layer {
481 | name: "fuse_conv1_2"
482 | type: "Eltwise"
483 | bottom: "conv1_1/out/pw_new"
484 | bottom: "conv1_2/out/pw_new"
485 | top: "fuse_conv1_2"
486 | eltwise_param {
487 | operation: SUM
488 | }
489 | }
490 |
491 | #--------------------------bottleneck2 2_1(s=2) 2_2(s=1) input: 12*12*24 output: 6*6*32
492 | # 2_1 in
493 | layer {
494 | name: "conv2_1/in/pw_new"
495 | type: "Convolution"
496 | bottom: "fuse_conv1_2"
497 | top: "conv2_1/in/pw_new"
498 | param {
499 | lr_mult: 1
500 | decay_mult: 1
501 | }
502 | convolution_param {
503 | num_output: 144
504 | bias_term: false
505 | pad: 0
506 | kernel_size: 1
507 | engine: CAFFE
508 | stride: 1
509 | weight_filler {
510 | type: "msra"
511 | }
512 | }
513 | }
514 | layer {
515 | name: "conv2_1/in/pw/bn_new"
516 | type: "BatchNorm"
517 | bottom: "conv2_1/in/pw_new"
518 | top: "conv2_1/in/pw_new"
519 | param {
520 | lr_mult: 0
521 | decay_mult: 0
522 | }
523 | param {
524 | lr_mult: 0
525 | decay_mult: 0
526 | }
527 | param {
528 | lr_mult: 0
529 | decay_mult: 0
530 | }
531 | }
532 | layer {
533 | name: "conv2_1/in/pw/scale_new"
534 | type: "Scale"
535 | bottom: "conv2_1/in/pw_new"
536 | top: "conv2_1/in/pw_new"
537 | scale_param {
538 | filler {
539 | value: 1
540 | }
541 | bias_term: true
542 | bias_filler {
543 | value: 0
544 | }
545 | }
546 | }
547 | layer {
548 | name: "relu2_1/in/pw_new"
549 | type: "ReLU"
550 | bottom: "conv2_1/in/pw_new"
551 | top: "conv2_1/in/pw_new"
552 | }
553 |
554 | # 2_1 dw
555 | layer {
556 | name: "conv2_1/dw_new"
557 | type: "ConvolutionDepthwise"
558 | bottom: "conv2_1/in/pw_new"
559 | top: "conv2_1/dw_new"
560 | param {
561 | lr_mult: 1
562 | decay_mult: 0
563 | }
564 | convolution_param {
565 | num_output: 144
566 | bias_term: false
567 | pad: 1
568 | kernel_size: 3
569 | engine: CAFFE
570 | stride: 2
571 | weight_filler {
572 | type: "msra"
573 | }
574 | }
575 | }
576 | layer {
577 | name: "conv2_1/dw/bn_new"
578 | type: "BatchNorm"
579 | bottom: "conv2_1/dw_new"
580 | top: "conv2_1/dw_new"
581 | param {
582 | lr_mult: 0
583 | decay_mult: 0
584 | }
585 | param {
586 | lr_mult: 0
587 | decay_mult: 0
588 | }
589 | param {
590 | lr_mult: 0
591 | decay_mult: 0
592 | }
593 | }
594 | layer {
595 | name: "conv2_1/dw/scale_new"
596 | type: "Scale"
597 | bottom: "conv2_1/dw_new"
598 | top: "conv2_1/dw_new"
599 | scale_param {
600 | filler {
601 | value: 1
602 | }
603 | bias_term: true
604 | bias_filler {
605 | value: 0
606 | }
607 | }
608 | }
609 | layer {
610 | name: "relu2_1/dw_new"
611 | type: "ReLU"
612 | bottom: "conv2_1/dw_new"
613 | top: "conv2_1/dw_new"
614 | }
615 |
616 | # 2_1 out
617 | layer {
618 | name: "conv2_1/out/pw_new"
619 | type: "Convolution"
620 | bottom: "conv2_1/dw_new"
621 | top: "conv2_1/out/pw_new"
622 | param {
623 | lr_mult: 1
624 | decay_mult: 1
625 | }
626 | convolution_param {
627 | num_output: 32
628 | bias_term: false
629 | pad: 0
630 | kernel_size: 1
631 | engine: CAFFE
632 | stride: 1
633 | weight_filler {
634 | type: "msra"
635 | }
636 | }
637 | }
638 | layer {
639 | name: "conv2_1/out/pw/bn_new"
640 | type: "BatchNorm"
641 | bottom: "conv2_1/out/pw_new"
642 | top: "conv2_1/out/pw_new"
643 | param {
644 | lr_mult: 0
645 | decay_mult: 0
646 | }
647 | param {
648 | lr_mult: 0
649 | decay_mult: 0
650 | }
651 | param {
652 | lr_mult: 0
653 | decay_mult: 0
654 | }
655 | }
656 | layer {
657 | name: "conv2_1/out/pw/scale_new"
658 | type: "Scale"
659 | bottom: "conv2_1/out/pw_new"
660 | top: "conv2_1/out/pw_new"
661 | scale_param {
662 | filler {
663 | value: 1
664 | }
665 | bias_term: true
666 | bias_filler {
667 | value: 0
668 | }
669 | }
670 | }
671 |
672 | # 2_2 in
673 |
674 | layer {
675 | name: "conv2_2/in/pw_new"
676 | type: "Convolution"
677 | bottom: "conv2_1/out/pw_new"
678 | top: "conv2_2/in/pw_new"
679 | param {
680 | lr_mult: 1
681 | decay_mult: 1
682 | }
683 | convolution_param {
684 | num_output: 192
685 | bias_term: false
686 | pad: 0
687 | kernel_size: 1
688 | engine: CAFFE
689 | stride: 1
690 | weight_filler {
691 | type: "msra"
692 | }
693 | }
694 | }
695 | layer {
696 | name: "conv2_2/in/pw/bn_new"
697 | type: "BatchNorm"
698 | bottom: "conv2_2/in/pw_new"
699 | top: "conv2_2/in/pw_new"
700 | param {
701 | lr_mult: 0
702 | decay_mult: 0
703 | }
704 | param {
705 | lr_mult: 0
706 | decay_mult: 0
707 | }
708 | param {
709 | lr_mult: 0
710 | decay_mult: 0
711 | }
712 | }
713 | layer {
714 | name: "conv2_2/in/pw/scale_new"
715 | type: "Scale"
716 | bottom: "conv2_2/in/pw_new"
717 | top: "conv2_2/in/pw_new"
718 | scale_param {
719 | filler {
720 | value: 1
721 | }
722 | bias_term: true
723 | bias_filler {
724 | value: 0
725 | }
726 | }
727 | }
728 | layer {
729 | name: "relu2_2/in/pw_new"
730 | type: "ReLU"
731 | bottom: "conv2_2/in/pw_new"
732 | top: "conv2_2/in/pw_new"
733 | }
734 |
735 | # 2_2 dw
736 | layer {
737 | name: "conv2_2/dw_new"
738 | type: "ConvolutionDepthwise"
739 | bottom: "conv2_2/in/pw_new"
740 | top: "conv2_2/dw_new"
741 | param {
742 | lr_mult: 1
743 | decay_mult: 0
744 | }
745 | convolution_param {
746 | num_output: 192
747 | bias_term: false
748 | pad: 1
749 | kernel_size: 3
750 | engine: CAFFE
751 | stride: 1
752 | weight_filler {
753 | type: "msra"
754 | }
755 | }
756 | }
757 | layer {
758 | name: "conv2_2/dw/bn_new"
759 | type: "BatchNorm"
760 | bottom: "conv2_2/dw_new"
761 | top: "conv2_2/dw_new"
762 | param {
763 | lr_mult: 0
764 | decay_mult: 0
765 | }
766 | param {
767 | lr_mult: 0
768 | decay_mult: 0
769 | }
770 | param {
771 | lr_mult: 0
772 | decay_mult: 0
773 | }
774 | }
775 | layer {
776 | name: "conv2_2/dw/scale_new"
777 | type: "Scale"
778 | bottom: "conv2_2/dw_new"
779 | top: "conv2_2/dw_new"
780 | scale_param {
781 | filler {
782 | value: 1
783 | }
784 | bias_term: true
785 | bias_filler {
786 | value: 0
787 | }
788 | }
789 | }
790 | layer {
791 | name: "relu2_2/dw_new"
792 | type: "ReLU"
793 | bottom: "conv2_2/dw_new"
794 | top: "conv2_2/dw_new"
795 | }
796 |
797 |
798 | # 2_2 out
799 |
800 | layer {
801 | name: "conv2_2/out/pw_new"
802 | type: "Convolution"
803 | bottom: "conv2_2/dw_new"
804 | top: "conv2_2/out/pw_new"
805 | param {
806 | lr_mult: 1
807 | decay_mult: 1
808 | }
809 | convolution_param {
810 | num_output: 32
811 | bias_term: false
812 | pad: 0
813 | kernel_size: 1
814 | engine: CAFFE
815 | stride: 1
816 | weight_filler {
817 | type: "msra"
818 | }
819 | }
820 | }
821 | layer {
822 | name: "conv2_2/out/pw/bn_new"
823 | type: "BatchNorm"
824 | bottom: "conv2_2/out/pw_new"
825 | top: "conv2_2/out/pw_new"
826 | param {
827 | lr_mult: 0
828 | decay_mult: 0
829 | }
830 | param {
831 | lr_mult: 0
832 | decay_mult: 0
833 | }
834 | param {
835 | lr_mult: 0
836 | decay_mult: 0
837 | }
838 | }
839 | layer {
840 | name: "conv2_2/out/pw/scale_new"
841 | type: "Scale"
842 | bottom: "conv2_2/out/pw_new"
843 | top: "conv2_2/out/pw_new"
844 | scale_param {
845 | filler {
846 | value: 1
847 | }
848 | bias_term: true
849 | bias_filler {
850 | value: 0
851 | }
852 | }
853 | }
854 | layer {
855 | name: "fuse_conv2_2"
856 | type: "Eltwise"
857 | bottom: "conv2_1/out/pw_new"
858 | bottom: "conv2_2/out/pw_new"
859 | top: "fuse_conv2_2"
860 | eltwise_param {
861 | operation: SUM
862 | }
863 | }
864 |
865 | #--------------------------bottleneck3 3_1(s=2) 3_2(s=1) input: 6*6*32 output: 3*3*64
866 | # 3_1 in
867 | layer {
868 | name: "conv3_1/in/pw_new"
869 | type: "Convolution"
870 | bottom: "fuse_conv2_2"
871 | top: "conv3_1/in/pw_new"
872 | param {
873 | lr_mult: 1
874 | decay_mult: 1
875 | }
876 | convolution_param {
877 | num_output: 192
878 | bias_term: false
879 | pad: 0
880 | kernel_size: 1
881 | engine: CAFFE
882 | stride: 1
883 | weight_filler {
884 | type: "msra"
885 | }
886 | }
887 | }
888 | layer {
889 | name: "conv3_1/in/pw/bn_new"
890 | type: "BatchNorm"
891 | bottom: "conv3_1/in/pw_new"
892 | top: "conv3_1/in/pw_new"
893 | param {
894 | lr_mult: 0
895 | decay_mult: 0
896 | }
897 | param {
898 | lr_mult: 0
899 | decay_mult: 0
900 | }
901 | param {
902 | lr_mult: 0
903 | decay_mult: 0
904 | }
905 | }
906 | layer {
907 | name: "conv3_1/in/pw/scale_new"
908 | type: "Scale"
909 | bottom: "conv3_1/in/pw_new"
910 | top: "conv3_1/in/pw_new"
911 | scale_param {
912 | filler {
913 | value: 1
914 | }
915 | bias_term: true
916 | bias_filler {
917 | value: 0
918 | }
919 | }
920 | }
921 | layer {
922 | name: "relu3_1/in/pw_new"
923 | type: "ReLU"
924 | bottom: "conv3_1/in/pw_new"
925 | top: "conv3_1/in/pw_new"
926 | }
927 |
928 | # 3_1 dw
929 | layer {
930 | name: "conv3_1/dw_new"
931 | type: "ConvolutionDepthwise"
932 | bottom: "conv3_1/in/pw_new"
933 | top: "conv3_1/dw_new"
934 | param {
935 | lr_mult: 1
936 | decay_mult: 0
937 | }
938 | convolution_param {
939 | num_output: 192
940 | bias_term: false
941 | pad: 1
942 | kernel_size: 3
943 | engine: CAFFE
944 | stride: 2
945 | weight_filler {
946 | type: "msra"
947 | }
948 | }
949 | }
950 | layer {
951 | name: "conv3_1/dw/bn_new"
952 | type: "BatchNorm"
953 | bottom: "conv3_1/dw_new"
954 | top: "conv3_1/dw_new"
955 | param {
956 | lr_mult: 0
957 | decay_mult: 0
958 | }
959 | param {
960 | lr_mult: 0
961 | decay_mult: 0
962 | }
963 | param {
964 | lr_mult: 0
965 | decay_mult: 0
966 | }
967 | }
968 | layer {
969 | name: "conv3_1/dw/scale_new"
970 | type: "Scale"
971 | bottom: "conv3_1/dw_new"
972 | top: "conv3_1/dw_new"
973 | scale_param {
974 | filler {
975 | value: 1
976 | }
977 | bias_term: true
978 | bias_filler {
979 | value: 0
980 | }
981 | }
982 | }
983 | layer {
984 | name: "relu3_1/dw_new"
985 | type: "ReLU"
986 | bottom: "conv3_1/dw_new"
987 | top: "conv3_1/dw_new"
988 | }
989 |
990 | # 3_1 out
991 | layer {
992 | name: "conv3_1/out/pw_new"
993 | type: "Convolution"
994 | bottom: "conv3_1/dw_new"
995 | top: "conv3_1/out/pw_new"
996 | param {
997 | lr_mult: 1
998 | decay_mult: 1
999 | }
1000 | convolution_param {
1001 | num_output: 64
1002 | bias_term: false
1003 | pad: 0
1004 | kernel_size: 1
1005 | engine: CAFFE
1006 | stride: 1
1007 | weight_filler {
1008 | type: "msra"
1009 | }
1010 | }
1011 | }
1012 | layer {
1013 | name: "conv3_1/out/pw/bn_new"
1014 | type: "BatchNorm"
1015 | bottom: "conv3_1/out/pw_new"
1016 | top: "conv3_1/out/pw_new"
1017 | param {
1018 | lr_mult: 0
1019 | decay_mult: 0
1020 | }
1021 | param {
1022 | lr_mult: 0
1023 | decay_mult: 0
1024 | }
1025 | param {
1026 | lr_mult: 0
1027 | decay_mult: 0
1028 | }
1029 | }
1030 | layer {
1031 | name: "conv3_1/out/pw/scale_new"
1032 | type: "Scale"
1033 | bottom: "conv3_1/out/pw_new"
1034 | top: "conv3_1/out/pw_new"
1035 | scale_param {
1036 | filler {
1037 | value: 1
1038 | }
1039 | bias_term: true
1040 | bias_filler {
1041 | value: 0
1042 | }
1043 | }
1044 | }
1045 |
1046 | # 3_2 in
1047 |
1048 | layer {
1049 | name: "conv3_2/in/pw_new"
1050 | type: "Convolution"
1051 | bottom: "conv3_1/out/pw_new"
1052 | top: "conv3_2/in/pw_new"
1053 | param {
1054 | lr_mult: 1
1055 | decay_mult: 1
1056 | }
1057 | convolution_param {
1058 | num_output: 192
1059 | bias_term: false
1060 | pad: 0
1061 | kernel_size: 1
1062 | engine: CAFFE
1063 | stride: 1
1064 | weight_filler {
1065 | type: "msra"
1066 | }
1067 | }
1068 | }
1069 | layer {
1070 | name: "conv3_2/in/pw/bn_new"
1071 | type: "BatchNorm"
1072 | bottom: "conv3_2/in/pw_new"
1073 | top: "conv3_2/in/pw_new"
1074 | param {
1075 | lr_mult: 0
1076 | decay_mult: 0
1077 | }
1078 | param {
1079 | lr_mult: 0
1080 | decay_mult: 0
1081 | }
1082 | param {
1083 | lr_mult: 0
1084 | decay_mult: 0
1085 | }
1086 | }
1087 | layer {
1088 | name: "conv3_2/in/pw/scale_new"
1089 | type: "Scale"
1090 | bottom: "conv3_2/in/pw_new"
1091 | top: "conv3_2/in/pw_new"
1092 | scale_param {
1093 | filler {
1094 | value: 1
1095 | }
1096 | bias_term: true
1097 | bias_filler {
1098 | value: 0
1099 | }
1100 | }
1101 | }
1102 | layer {
1103 | name: "relu3_2/in/pw_new"
1104 | type: "ReLU"
1105 | bottom: "conv3_2/in/pw_new"
1106 | top: "conv3_2/in/pw_new"
1107 | }
1108 |
1109 | # 3_2 dw
1110 | layer {
1111 | name: "conv3_2/dw_new"
1112 | type: "ConvolutionDepthwise"
1113 | bottom: "conv3_2/in/pw_new"
1114 | top: "conv3_2/dw_new"
1115 | param {
1116 | lr_mult: 1
1117 | decay_mult: 0
1118 | }
1119 | convolution_param {
1120 | num_output: 192
1121 | bias_term: false
1122 | pad: 1
1123 | kernel_size: 3
1124 | engine: CAFFE
1125 | stride: 1
1126 | weight_filler {
1127 | type: "msra"
1128 | }
1129 | }
1130 | }
1131 | layer {
1132 | name: "conv3_2/dw/bn_new"
1133 | type: "BatchNorm"
1134 | bottom: "conv3_2/dw_new"
1135 | top: "conv3_2/dw_new"
1136 | param {
1137 | lr_mult: 0
1138 | decay_mult: 0
1139 | }
1140 | param {
1141 | lr_mult: 0
1142 | decay_mult: 0
1143 | }
1144 | param {
1145 | lr_mult: 0
1146 | decay_mult: 0
1147 | }
1148 | }
1149 | layer {
1150 | name: "conv3_2/dw/scale_new"
1151 | type: "Scale"
1152 | bottom: "conv3_2/dw_new"
1153 | top: "conv3_2/dw_new"
1154 | scale_param {
1155 | filler {
1156 | value: 1
1157 | }
1158 | bias_term: true
1159 | bias_filler {
1160 | value: 0
1161 | }
1162 | }
1163 | }
1164 | layer {
1165 | name: "relu3_2/dw_new"
1166 | type: "ReLU"
1167 | bottom: "conv3_2/dw_new"
1168 | top: "conv3_2/dw_new"
1169 | }
1170 |
1171 |
1172 | # 3_2 out
1173 |
1174 | layer {
1175 | name: "conv3_2/out/pw_new"
1176 | type: "Convolution"
1177 | bottom: "conv3_2/dw_new"
1178 | top: "conv3_2/out/pw_new"
1179 | param {
1180 | lr_mult: 1
1181 | decay_mult: 1
1182 | }
1183 | convolution_param {
1184 | num_output: 64
1185 | bias_term: false
1186 | pad: 0
1187 | kernel_size: 1
1188 | engine: CAFFE
1189 | stride: 1
1190 | weight_filler {
1191 | type: "msra"
1192 | }
1193 | }
1194 | }
1195 | layer {
1196 | name: "conv3_2/out/pw/bn_new"
1197 | type: "BatchNorm"
1198 | bottom: "conv3_2/out/pw_new"
1199 | top: "conv3_2/out/pw_new"
1200 | param {
1201 | lr_mult: 0
1202 | decay_mult: 0
1203 | }
1204 | param {
1205 | lr_mult: 0
1206 | decay_mult: 0
1207 | }
1208 | param {
1209 | lr_mult: 0
1210 | decay_mult: 0
1211 | }
1212 | }
1213 | layer {
1214 | name: "conv3_2/out/pw/scale_new"
1215 | type: "Scale"
1216 | bottom: "conv3_2/out/pw_new"
1217 | top: "conv3_2/out/pw_new"
1218 | scale_param {
1219 | filler {
1220 | value: 1
1221 | }
1222 | bias_term: true
1223 | bias_filler {
1224 | value: 0
1225 | }
1226 | }
1227 | }
1228 | layer {
1229 | name: "fuse_conv3_2"
1230 | type: "Eltwise"
1231 | bottom: "conv3_1/out/pw_new"
1232 | bottom: "conv3_2/out/pw_new"
1233 | top: "fuse_conv3_2"
1234 | eltwise_param {
1235 | operation: SUM
1236 | }
1237 | }
1238 |
1239 |
1240 |
1241 |
1242 |
1243 |
1244 |
1245 | #------------------------- fc1
1246 | layer {
1247 | name: "fc1"
1248 | type: "InnerProduct"
1249 | bottom: "fuse_conv3_2"
1250 | top: "fc1"
1251 | param {
1252 | lr_mult: 1
1253 | decay_mult: 1
1254 | }
1255 | param {
1256 | lr_mult: 2
1257 | decay_mult: 1
1258 | }
1259 | inner_product_param {
1260 | num_output: 256
1261 | weight_filler {
1262 | type: "gaussian"
1263 | std: 0.01
1264 | }
1265 | bias_filler {
1266 | type: "constant"
1267 | value: 0
1268 | }
1269 | }
1270 | }
1271 | layer {
1272 | name: "relu_fc1"
1273 | type: "ReLU"
1274 | bottom: "fc1"
1275 | top: "fc1"
1276 | }
1277 | layer {
1278 | name: "drop_fc1"
1279 | type: "Dropout"
1280 | bottom: "fc1"
1281 | top: "fc1"
1282 | dropout_param{
1283 | dropout_ratio: 0.3
1284 | }
1285 | }
1286 |
1287 | #------------------------- fc2
1288 | layer {
1289 | name: "fc2"
1290 | type: "InnerProduct"
1291 | bottom: "fc1"
1292 | top: "fc2"
1293 | param {
1294 | lr_mult: 1
1295 | decay_mult: 1
1296 | }
1297 | param {
1298 | lr_mult: 2
1299 | decay_mult: 1
1300 | }
1301 | inner_product_param {
1302 | num_output: 10
1303 | weight_filler {
1304 | type: "gaussian"
1305 | std: 0.01
1306 | }
1307 | bias_filler {
1308 | type: "constant"
1309 | value: 0
1310 | }
1311 | }
1312 | }
1313 |
1314 |
1315 | layer {
1316 | name: "loss"
1317 | type: "EuclideanLoss"
1318 | bottom: "fc2"
1319 | bottom: "label"
1320 | top: "loss"
1321 | loss_weight: 100
1322 | }
--------------------------------------------------------------------------------
/1_level_1/Code/2_train/solver.prototxt:
--------------------------------------------------------------------------------
1 | net: "l1_mobilenet.prototxt"
2 |
3 | test_iter: 160 # bs = 128 * 2
4 | test_interval: 1250
5 |
6 | #base_lr: 0.0001
7 | base_lr: 0.001
8 | momentum: 0.9
9 | weight_decay: 0.0004
10 |
11 | type: "Adam"
12 |
13 | lr_policy: "multistep"
14 | #gamma: 0.9
15 | gamma:0.1
16 | stepvalue: 80000
17 | stepvalue: 100000
18 | #stepvalue: 250000
19 |
20 | display: 1000
21 | max_iter: 200000
22 |
23 | snapshot: 50000
24 | snapshot_prefix: "../../Result/solver_state/"
25 | solver_mode: GPU
26 |
--------------------------------------------------------------------------------
/1_level_1/Code/2_train/train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | set -e
3 | postfix=`date +"%F-%H-%M-%S"`
4 | /***your_caffe_path***/build/tools/caffe train \
5 | --solver=./solver.prototxt -gpu 0,1 \
6 | 2>&1 | tee ../../Result/log/$(date +%Y-%m-%d-%H-%M.log) $@
--------------------------------------------------------------------------------
/1_level_1/Code/3_inference/inferencen.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import sys
3 | sys.path.append('../../../util')
4 | sys.path.append('/***your_caffe_path***/python')
5 | sys.path.append('/***your_caffe_path***/python/caffe')
6 | import tools
7 | import caffe
8 | import numpy as np
9 | import cv2
10 | import time
11 |
12 | l1_deploy = './l1_deploy.prototxt'
13 | l1_model = '../../Result/solver_state/_iter_100000.caffemodel'
14 |
15 | txt_flod = '../../Data/'
16 | train_txt = txt_flod + 'l1_train_label.txt'
17 | test_txt = txt_flod + 'l1_test_label.txt'
18 |
19 | relative_path = '../../../raw_data/Data/img_celeba/' # find the image
20 |
21 | l1_out_train_txt = '../../Result/l1_out_train_label.txt'
22 | l1_out_test_txt = '../../Result/l1_out_test_label.txt'
23 |
24 | w_net = 48
25 | h_net = 48
26 | #--------------------------------------------------------------------------- cnn initalization
27 | caffe.set_mode_gpu()
28 | caffe.set_device(0)
29 | # load model
30 | net = caffe.Net(l1_deploy,l1_model,caffe.TEST)
31 | # image preprocess
32 | mu = np.ones((3,w_net,h_net), dtype=np.float) * 127.5
33 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
34 | transformer.set_transpose('data', (2,0,1)) # (w,h,c)--> (c,w,h)
35 | transformer.set_mean('data', mu) # pixel-wise
36 | transformer.set_raw_scale('data', 255 ) # [0,1] --> [0,255]
37 | transformer.set_channel_swap('data', (2,1,0)) # RGB --> BGR
38 | #----------------------------------------------------------------------------- forward
39 | def l1_forward(input_txt,output_txt):
40 | out_f = open(output_txt,'w')
41 | for line in open(input_txt):
42 | if line.isspace() : continue
43 | img_name = line.split()[0]
44 | full_img_path = relative_path + img_name
45 | #------------------------------------------------------------------------- cnn forward
46 | im=caffe.io.load_image(full_img_path) # im is RGB with 0~1 float
47 | net.blobs['data'].data[...]=transformer.preprocess('data',im)
48 | time_s = time.clock()
49 | n_out = net.forward()
50 | time_e = time.clock()
51 | print img_name,'forward : ',round((time_e-time_s)*1000,1) ,'ms'
52 | out_landmark = net.blobs['fc2'].data[0].flatten()
53 | #------------------------------------------------------------------------- write txt
54 | str_0 = str(out_landmark)
55 | str_1 = str_0.replace("\n","")
56 | str_2 = str_1.strip('[]')
57 | new_line = img_name +' '+ str_2 +'\n'
58 | out_f.write(new_line)
59 | out_f.close()
60 |
61 | l1_forward(test_txt,l1_out_test_txt)
62 | l1_forward(train_txt,l1_out_train_txt)
--------------------------------------------------------------------------------
/1_level_1/Code/3_inference/l1_deploy.prototxt:
--------------------------------------------------------------------------------
1 | name: "level_1"
2 | input: "data"
3 | input_shape { dim: 1 dim: 3 dim: 48 dim: 48 }
4 |
5 |
6 | #------------------------------------------- conv1 input: 48*48*3 output: 24*24*16
7 | layer {
8 | name: "conv1_new"
9 | type: "Convolution"
10 | bottom: "data"
11 | top: "conv1_new"
12 | param {
13 | lr_mult: 1
14 | decay_mult: 1
15 | }
16 | convolution_param {
17 | num_output: 16
18 | bias_term: false
19 | pad: 1
20 | kernel_size: 3
21 | stride: 2
22 | weight_filler {
23 | type: "msra"
24 | }
25 | }
26 | }
27 | layer {
28 | name: "conv1/bn_new"
29 | type: "BatchNorm"
30 | bottom: "conv1_new"
31 | top: "conv1_new"
32 | param {
33 | lr_mult: 0
34 | decay_mult: 0
35 | }
36 | param {
37 | lr_mult: 0
38 | decay_mult: 0
39 | }
40 | param {
41 | lr_mult: 0
42 | decay_mult: 0
43 | }
44 | }
45 | layer {
46 | name: "conv1/scale_new"
47 | type: "Scale"
48 | bottom: "conv1_new"
49 | top: "conv1_new"
50 | scale_param {
51 | filler {
52 | value: 1
53 | }
54 | bias_term: true
55 | bias_filler {
56 | value: 0
57 | }
58 | }
59 | }
60 | layer {
61 | name: "relu1_new"
62 | type: "ReLU"
63 | bottom: "conv1_new"
64 | top: "conv1_new"
65 | }
66 |
67 | #----------- bottleneck_1 1_1(s=2) 1_2(s=1) input: 24*24*16 output: 12*12*24
68 |
69 | layer {
70 | name: "conv1_1/in/pw_new"
71 | type: "Convolution"
72 | bottom: "conv1_new"
73 | top: "conv1_1/in/pw_new"
74 | param {
75 | lr_mult: 1
76 | decay_mult: 1
77 | }
78 | convolution_param {
79 | num_output: 96
80 | bias_term: false
81 | pad: 0
82 | kernel_size: 1
83 | engine: CAFFE
84 | stride: 1
85 | weight_filler {
86 | type: "msra"
87 | }
88 | }
89 | }
90 | layer {
91 | name: "conv1_1/in/pw/bn_new"
92 | type: "BatchNorm"
93 | bottom: "conv1_1/in/pw_new"
94 | top: "conv1_1/in/pw_new"
95 | param {
96 | lr_mult: 0
97 | decay_mult: 0
98 | }
99 | param {
100 | lr_mult: 0
101 | decay_mult: 0
102 | }
103 | param {
104 | lr_mult: 0
105 | decay_mult: 0
106 | }
107 | }
108 | layer {
109 | name: "conv1_1/in/pw/scale_new"
110 | type: "Scale"
111 | bottom: "conv1_1/in/pw_new"
112 | top: "conv1_1/in/pw_new"
113 | scale_param {
114 | filler {
115 | value: 1
116 | }
117 | bias_term: true
118 | bias_filler {
119 | value: 0
120 | }
121 | }
122 | }
123 | layer {
124 | name: "relu1_1/in/pw_new"
125 | type: "ReLU"
126 | bottom: "conv1_1/in/pw_new"
127 | top: "conv1_1/in/pw_new"
128 | }
129 |
130 |
131 |
132 | # 1_1 dw conv
133 | layer {
134 | name: "conv1_1/dw_new"
135 | type: "ConvolutionDepthwise"
136 | bottom: "conv1_1/in/pw_new"
137 | top: "conv1_1/dw_new"
138 | param {
139 | lr_mult: 1
140 | decay_mult: 0
141 | }
142 | convolution_param {
143 | num_output: 96
144 | bias_term: false
145 | pad: 1
146 | kernel_size: 3
147 | engine: CAFFE
148 | stride: 2
149 | weight_filler {
150 | type: "msra"
151 | }
152 | }
153 | }
154 | layer {
155 | name: "conv1_1/dw/bn_new"
156 | type: "BatchNorm"
157 | bottom: "conv1_1/dw_new"
158 | top: "conv1_1/dw_new"
159 | param {
160 | lr_mult: 0
161 | decay_mult: 0
162 | }
163 | param {
164 | lr_mult: 0
165 | decay_mult: 0
166 | }
167 | param {
168 | lr_mult: 0
169 | decay_mult: 0
170 | }
171 | }
172 | layer {
173 | name: "conv1_1/dw/scale_new"
174 | type: "Scale"
175 | bottom: "conv1_1/dw_new"
176 | top: "conv1_1/dw_new"
177 | scale_param {
178 | filler {
179 | value: 1
180 | }
181 | bias_term: true
182 | bias_filler {
183 | value: 0
184 | }
185 | }
186 | }
187 | layer {
188 | name: "relu1_1/dw_new"
189 | type: "ReLU"
190 | bottom: "conv1_1/dw_new"
191 | top: "conv1_1/dw_new"
192 | }
193 |
194 | # 1_1 out
195 | layer {
196 | name: "conv1_1/out/pw_new"
197 | type: "Convolution"
198 | bottom: "conv1_1/dw_new"
199 | top: "conv1_1/out/pw_new"
200 | param {
201 | lr_mult: 1
202 | decay_mult: 1
203 | }
204 | convolution_param {
205 | num_output: 24
206 | bias_term: false
207 | pad: 0
208 | kernel_size: 1
209 | engine: CAFFE
210 | stride: 1
211 | weight_filler {
212 | type: "msra"
213 | }
214 | }
215 | }
216 | layer {
217 | name: "conv1_1/out/pw/bn_new"
218 | type: "BatchNorm"
219 | bottom: "conv1_1/out/pw_new"
220 | top: "conv1_1/out/pw_new"
221 | param {
222 | lr_mult: 0
223 | decay_mult: 0
224 | }
225 | param {
226 | lr_mult: 0
227 | decay_mult: 0
228 | }
229 | param {
230 | lr_mult: 0
231 | decay_mult: 0
232 | }
233 | }
234 | layer {
235 | name: "conv1_1/out/pw/scale_new"
236 | type: "Scale"
237 | bottom: "conv1_1/out/pw_new"
238 | top: "conv1_1/out/pw_new"
239 | scale_param {
240 | filler {
241 | value: 1
242 | }
243 | bias_term: true
244 | bias_filler {
245 | value: 0
246 | }
247 | }
248 | }
249 | # 1_2 in
250 |
251 | layer {
252 | name: "conv1_2/in/pw_new"
253 | type: "Convolution"
254 | bottom: "conv1_1/out/pw_new"
255 | top: "conv1_2/in/pw_new"
256 | param {
257 | lr_mult: 1
258 | decay_mult: 1
259 | }
260 | convolution_param {
261 | num_output: 144
262 | bias_term: false
263 | pad: 0
264 | kernel_size: 1
265 | engine: CAFFE
266 | stride: 1
267 | weight_filler {
268 | type: "msra"
269 | }
270 | }
271 | }
272 | layer {
273 | name: "conv1_2/in/pw/bn_new"
274 | type: "BatchNorm"
275 | bottom: "conv1_2/in/pw_new"
276 | top: "conv1_2/in/pw_new"
277 | param {
278 | lr_mult: 0
279 | decay_mult: 0
280 | }
281 | param {
282 | lr_mult: 0
283 | decay_mult: 0
284 | }
285 | param {
286 | lr_mult: 0
287 | decay_mult: 0
288 | }
289 | }
290 | layer {
291 | name: "conv1_2/in/pw/scale_new"
292 | type: "Scale"
293 | bottom: "conv1_2/in/pw_new"
294 | top: "conv1_2/in/pw_new"
295 | scale_param {
296 | filler {
297 | value: 1
298 | }
299 | bias_term: true
300 | bias_filler {
301 | value: 0
302 | }
303 | }
304 | }
305 | layer {
306 | name: "relu1_2/in/pw_new"
307 | type: "ReLU"
308 | bottom: "conv1_2/in/pw_new"
309 | top: "conv1_2/in/pw_new"
310 | }
311 |
312 | # 1_2 dw
313 |
314 | layer {
315 | name: "conv1_2/dw_new"
316 | type: "ConvolutionDepthwise"
317 | bottom: "conv1_2/in/pw_new"
318 | top: "conv1_2/dw_new"
319 | param {
320 | lr_mult: 1
321 | decay_mult: 0
322 | }
323 | convolution_param {
324 | num_output: 144
325 | bias_term: false
326 | pad: 1
327 | kernel_size: 3
328 | engine: CAFFE
329 | stride: 1
330 | weight_filler {
331 | type: "msra"
332 | }
333 | }
334 | }
335 | layer {
336 | name: "conv1_2/dw/bn_new"
337 | type: "BatchNorm"
338 | bottom: "conv1_2/dw_new"
339 | top: "conv1_2/dw_new"
340 | param {
341 | lr_mult: 0
342 | decay_mult: 0
343 | }
344 | param {
345 | lr_mult: 0
346 | decay_mult: 0
347 | }
348 | param {
349 | lr_mult: 0
350 | decay_mult: 0
351 | }
352 | }
353 | layer {
354 | name: "conv1_2/dw/scale_new"
355 | type: "Scale"
356 | bottom: "conv1_2/dw_new"
357 | top: "conv1_2/dw_new"
358 | scale_param {
359 | filler {
360 | value: 1
361 | }
362 | bias_term: true
363 | bias_filler {
364 | value: 0
365 | }
366 | }
367 | }
368 | layer {
369 | name: "relu1_2/dw_new"
370 | type: "ReLU"
371 | bottom: "conv1_2/dw_new"
372 | top: "conv1_2/dw_new"
373 | }
374 |
375 | # 1_2 out 12*12*24
376 | layer {
377 | name: "conv1_2/out/pw_new"
378 | type: "Convolution"
379 | bottom: "conv1_2/dw_new"
380 | top: "conv1_2/out/pw_new"
381 | param {
382 | lr_mult: 1
383 | decay_mult: 1
384 | }
385 | convolution_param {
386 | num_output: 24
387 | bias_term: false
388 | pad: 0
389 | kernel_size: 1
390 | engine: CAFFE
391 | stride: 1
392 | weight_filler {
393 | type: "msra"
394 | }
395 | }
396 | }
397 | layer {
398 | name: "conv1_2/out/pw/bn_new"
399 | type: "BatchNorm"
400 | bottom: "conv1_2/out/pw_new"
401 | top: "conv1_2/out/pw_new"
402 | param {
403 | lr_mult: 0
404 | decay_mult: 0
405 | }
406 | param {
407 | lr_mult: 0
408 | decay_mult: 0
409 | }
410 | param {
411 | lr_mult: 0
412 | decay_mult: 0
413 | }
414 | }
415 | layer {
416 | name: "conv1_2/out/pw/scale_new"
417 | type: "Scale"
418 | bottom: "conv1_2/out/pw_new"
419 | top: "conv1_2/out/pw_new"
420 | scale_param {
421 | filler {
422 | value: 1
423 | }
424 | bias_term: true
425 | bias_filler {
426 | value: 0
427 | }
428 | }
429 | }
430 | layer {
431 | name: "fuse_conv1_2"
432 | type: "Eltwise"
433 | bottom: "conv1_1/out/pw_new"
434 | bottom: "conv1_2/out/pw_new"
435 | top: "fuse_conv1_2"
436 | eltwise_param {
437 | operation: SUM
438 | }
439 | }
440 |
441 | #--------------------------bottleneck2 2_1(s=2) 2_2(s=1) input: 12*12*24 output: 6*6*32
442 | # 2_1 in
443 | layer {
444 | name: "conv2_1/in/pw_new"
445 | type: "Convolution"
446 | bottom: "fuse_conv1_2"
447 | top: "conv2_1/in/pw_new"
448 | param {
449 | lr_mult: 1
450 | decay_mult: 1
451 | }
452 | convolution_param {
453 | num_output: 144
454 | bias_term: false
455 | pad: 0
456 | kernel_size: 1
457 | engine: CAFFE
458 | stride: 1
459 | weight_filler {
460 | type: "msra"
461 | }
462 | }
463 | }
464 | layer {
465 | name: "conv2_1/in/pw/bn_new"
466 | type: "BatchNorm"
467 | bottom: "conv2_1/in/pw_new"
468 | top: "conv2_1/in/pw_new"
469 | param {
470 | lr_mult: 0
471 | decay_mult: 0
472 | }
473 | param {
474 | lr_mult: 0
475 | decay_mult: 0
476 | }
477 | param {
478 | lr_mult: 0
479 | decay_mult: 0
480 | }
481 | }
482 | layer {
483 | name: "conv2_1/in/pw/scale_new"
484 | type: "Scale"
485 | bottom: "conv2_1/in/pw_new"
486 | top: "conv2_1/in/pw_new"
487 | scale_param {
488 | filler {
489 | value: 1
490 | }
491 | bias_term: true
492 | bias_filler {
493 | value: 0
494 | }
495 | }
496 | }
497 | layer {
498 | name: "relu2_1/in/pw_new"
499 | type: "ReLU"
500 | bottom: "conv2_1/in/pw_new"
501 | top: "conv2_1/in/pw_new"
502 | }
503 |
504 | # 2_1 dw
505 | layer {
506 | name: "conv2_1/dw_new"
507 | type: "ConvolutionDepthwise"
508 | bottom: "conv2_1/in/pw_new"
509 | top: "conv2_1/dw_new"
510 | param {
511 | lr_mult: 1
512 | decay_mult: 0
513 | }
514 | convolution_param {
515 | num_output: 144
516 | bias_term: false
517 | pad: 1
518 | kernel_size: 3
519 | engine: CAFFE
520 | stride: 2
521 | weight_filler {
522 | type: "msra"
523 | }
524 | }
525 | }
526 | layer {
527 | name: "conv2_1/dw/bn_new"
528 | type: "BatchNorm"
529 | bottom: "conv2_1/dw_new"
530 | top: "conv2_1/dw_new"
531 | param {
532 | lr_mult: 0
533 | decay_mult: 0
534 | }
535 | param {
536 | lr_mult: 0
537 | decay_mult: 0
538 | }
539 | param {
540 | lr_mult: 0
541 | decay_mult: 0
542 | }
543 | }
544 | layer {
545 | name: "conv2_1/dw/scale_new"
546 | type: "Scale"
547 | bottom: "conv2_1/dw_new"
548 | top: "conv2_1/dw_new"
549 | scale_param {
550 | filler {
551 | value: 1
552 | }
553 | bias_term: true
554 | bias_filler {
555 | value: 0
556 | }
557 | }
558 | }
559 | layer {
560 | name: "relu2_1/dw_new"
561 | type: "ReLU"
562 | bottom: "conv2_1/dw_new"
563 | top: "conv2_1/dw_new"
564 | }
565 |
566 | # 2_1 out
567 | layer {
568 | name: "conv2_1/out/pw_new"
569 | type: "Convolution"
570 | bottom: "conv2_1/dw_new"
571 | top: "conv2_1/out/pw_new"
572 | param {
573 | lr_mult: 1
574 | decay_mult: 1
575 | }
576 | convolution_param {
577 | num_output: 32
578 | bias_term: false
579 | pad: 0
580 | kernel_size: 1
581 | engine: CAFFE
582 | stride: 1
583 | weight_filler {
584 | type: "msra"
585 | }
586 | }
587 | }
588 | layer {
589 | name: "conv2_1/out/pw/bn_new"
590 | type: "BatchNorm"
591 | bottom: "conv2_1/out/pw_new"
592 | top: "conv2_1/out/pw_new"
593 | param {
594 | lr_mult: 0
595 | decay_mult: 0
596 | }
597 | param {
598 | lr_mult: 0
599 | decay_mult: 0
600 | }
601 | param {
602 | lr_mult: 0
603 | decay_mult: 0
604 | }
605 | }
606 | layer {
607 | name: "conv2_1/out/pw/scale_new"
608 | type: "Scale"
609 | bottom: "conv2_1/out/pw_new"
610 | top: "conv2_1/out/pw_new"
611 | scale_param {
612 | filler {
613 | value: 1
614 | }
615 | bias_term: true
616 | bias_filler {
617 | value: 0
618 | }
619 | }
620 | }
621 |
622 | # 2_2 in
623 |
624 | layer {
625 | name: "conv2_2/in/pw_new"
626 | type: "Convolution"
627 | bottom: "conv2_1/out/pw_new"
628 | top: "conv2_2/in/pw_new"
629 | param {
630 | lr_mult: 1
631 | decay_mult: 1
632 | }
633 | convolution_param {
634 | num_output: 192
635 | bias_term: false
636 | pad: 0
637 | kernel_size: 1
638 | engine: CAFFE
639 | stride: 1
640 | weight_filler {
641 | type: "msra"
642 | }
643 | }
644 | }
645 | layer {
646 | name: "conv2_2/in/pw/bn_new"
647 | type: "BatchNorm"
648 | bottom: "conv2_2/in/pw_new"
649 | top: "conv2_2/in/pw_new"
650 | param {
651 | lr_mult: 0
652 | decay_mult: 0
653 | }
654 | param {
655 | lr_mult: 0
656 | decay_mult: 0
657 | }
658 | param {
659 | lr_mult: 0
660 | decay_mult: 0
661 | }
662 | }
663 | layer {
664 | name: "conv2_2/in/pw/scale_new"
665 | type: "Scale"
666 | bottom: "conv2_2/in/pw_new"
667 | top: "conv2_2/in/pw_new"
668 | scale_param {
669 | filler {
670 | value: 1
671 | }
672 | bias_term: true
673 | bias_filler {
674 | value: 0
675 | }
676 | }
677 | }
678 | layer {
679 | name: "relu2_2/in/pw_new"
680 | type: "ReLU"
681 | bottom: "conv2_2/in/pw_new"
682 | top: "conv2_2/in/pw_new"
683 | }
684 |
685 | # 2_2 dw
686 | layer {
687 | name: "conv2_2/dw_new"
688 | type: "ConvolutionDepthwise"
689 | bottom: "conv2_2/in/pw_new"
690 | top: "conv2_2/dw_new"
691 | param {
692 | lr_mult: 1
693 | decay_mult: 0
694 | }
695 | convolution_param {
696 | num_output: 192
697 | bias_term: false
698 | pad: 1
699 | kernel_size: 3
700 | engine: CAFFE
701 | stride: 1
702 | weight_filler {
703 | type: "msra"
704 | }
705 | }
706 | }
707 | layer {
708 | name: "conv2_2/dw/bn_new"
709 | type: "BatchNorm"
710 | bottom: "conv2_2/dw_new"
711 | top: "conv2_2/dw_new"
712 | param {
713 | lr_mult: 0
714 | decay_mult: 0
715 | }
716 | param {
717 | lr_mult: 0
718 | decay_mult: 0
719 | }
720 | param {
721 | lr_mult: 0
722 | decay_mult: 0
723 | }
724 | }
725 | layer {
726 | name: "conv2_2/dw/scale_new"
727 | type: "Scale"
728 | bottom: "conv2_2/dw_new"
729 | top: "conv2_2/dw_new"
730 | scale_param {
731 | filler {
732 | value: 1
733 | }
734 | bias_term: true
735 | bias_filler {
736 | value: 0
737 | }
738 | }
739 | }
740 | layer {
741 | name: "relu2_2/dw_new"
742 | type: "ReLU"
743 | bottom: "conv2_2/dw_new"
744 | top: "conv2_2/dw_new"
745 | }
746 |
747 |
748 | # 2_2 out
749 |
750 | layer {
751 | name: "conv2_2/out/pw_new"
752 | type: "Convolution"
753 | bottom: "conv2_2/dw_new"
754 | top: "conv2_2/out/pw_new"
755 | param {
756 | lr_mult: 1
757 | decay_mult: 1
758 | }
759 | convolution_param {
760 | num_output: 32
761 | bias_term: false
762 | pad: 0
763 | kernel_size: 1
764 | engine: CAFFE
765 | stride: 1
766 | weight_filler {
767 | type: "msra"
768 | }
769 | }
770 | }
771 | layer {
772 | name: "conv2_2/out/pw/bn_new"
773 | type: "BatchNorm"
774 | bottom: "conv2_2/out/pw_new"
775 | top: "conv2_2/out/pw_new"
776 | param {
777 | lr_mult: 0
778 | decay_mult: 0
779 | }
780 | param {
781 | lr_mult: 0
782 | decay_mult: 0
783 | }
784 | param {
785 | lr_mult: 0
786 | decay_mult: 0
787 | }
788 | }
789 | layer {
790 | name: "conv2_2/out/pw/scale_new"
791 | type: "Scale"
792 | bottom: "conv2_2/out/pw_new"
793 | top: "conv2_2/out/pw_new"
794 | scale_param {
795 | filler {
796 | value: 1
797 | }
798 | bias_term: true
799 | bias_filler {
800 | value: 0
801 | }
802 | }
803 | }
804 | layer {
805 | name: "fuse_conv2_2"
806 | type: "Eltwise"
807 | bottom: "conv2_1/out/pw_new"
808 | bottom: "conv2_2/out/pw_new"
809 | top: "fuse_conv2_2"
810 | eltwise_param {
811 | operation: SUM
812 | }
813 | }
814 |
815 | #--------------------------bottleneck3 3_1(s=2) 3_2(s=1) input: 6*6*32 output: 3*3*64
816 | # 3_1 in
817 | layer {
818 | name: "conv3_1/in/pw_new"
819 | type: "Convolution"
820 | bottom: "fuse_conv2_2"
821 | top: "conv3_1/in/pw_new"
822 | param {
823 | lr_mult: 1
824 | decay_mult: 1
825 | }
826 | convolution_param {
827 | num_output: 192
828 | bias_term: false
829 | pad: 0
830 | kernel_size: 1
831 | engine: CAFFE
832 | stride: 1
833 | weight_filler {
834 | type: "msra"
835 | }
836 | }
837 | }
838 | layer {
839 | name: "conv3_1/in/pw/bn_new"
840 | type: "BatchNorm"
841 | bottom: "conv3_1/in/pw_new"
842 | top: "conv3_1/in/pw_new"
843 | param {
844 | lr_mult: 0
845 | decay_mult: 0
846 | }
847 | param {
848 | lr_mult: 0
849 | decay_mult: 0
850 | }
851 | param {
852 | lr_mult: 0
853 | decay_mult: 0
854 | }
855 | }
856 | layer {
857 | name: "conv3_1/in/pw/scale_new"
858 | type: "Scale"
859 | bottom: "conv3_1/in/pw_new"
860 | top: "conv3_1/in/pw_new"
861 | scale_param {
862 | filler {
863 | value: 1
864 | }
865 | bias_term: true
866 | bias_filler {
867 | value: 0
868 | }
869 | }
870 | }
871 | layer {
872 | name: "relu3_1/in/pw_new"
873 | type: "ReLU"
874 | bottom: "conv3_1/in/pw_new"
875 | top: "conv3_1/in/pw_new"
876 | }
877 |
878 | # 3_1 dw
879 | layer {
880 | name: "conv3_1/dw_new"
881 | type: "ConvolutionDepthwise"
882 | bottom: "conv3_1/in/pw_new"
883 | top: "conv3_1/dw_new"
884 | param {
885 | lr_mult: 1
886 | decay_mult: 0
887 | }
888 | convolution_param {
889 | num_output: 192
890 | bias_term: false
891 | pad: 1
892 | kernel_size: 3
893 | engine: CAFFE
894 | stride: 2
895 | weight_filler {
896 | type: "msra"
897 | }
898 | }
899 | }
900 | layer {
901 | name: "conv3_1/dw/bn_new"
902 | type: "BatchNorm"
903 | bottom: "conv3_1/dw_new"
904 | top: "conv3_1/dw_new"
905 | param {
906 | lr_mult: 0
907 | decay_mult: 0
908 | }
909 | param {
910 | lr_mult: 0
911 | decay_mult: 0
912 | }
913 | param {
914 | lr_mult: 0
915 | decay_mult: 0
916 | }
917 | }
918 | layer {
919 | name: "conv3_1/dw/scale_new"
920 | type: "Scale"
921 | bottom: "conv3_1/dw_new"
922 | top: "conv3_1/dw_new"
923 | scale_param {
924 | filler {
925 | value: 1
926 | }
927 | bias_term: true
928 | bias_filler {
929 | value: 0
930 | }
931 | }
932 | }
933 | layer {
934 | name: "relu3_1/dw_new"
935 | type: "ReLU"
936 | bottom: "conv3_1/dw_new"
937 | top: "conv3_1/dw_new"
938 | }
939 |
940 | # 3_1 out
941 | layer {
942 | name: "conv3_1/out/pw_new"
943 | type: "Convolution"
944 | bottom: "conv3_1/dw_new"
945 | top: "conv3_1/out/pw_new"
946 | param {
947 | lr_mult: 1
948 | decay_mult: 1
949 | }
950 | convolution_param {
951 | num_output: 64
952 | bias_term: false
953 | pad: 0
954 | kernel_size: 1
955 | engine: CAFFE
956 | stride: 1
957 | weight_filler {
958 | type: "msra"
959 | }
960 | }
961 | }
962 | layer {
963 | name: "conv3_1/out/pw/bn_new"
964 | type: "BatchNorm"
965 | bottom: "conv3_1/out/pw_new"
966 | top: "conv3_1/out/pw_new"
967 | param {
968 | lr_mult: 0
969 | decay_mult: 0
970 | }
971 | param {
972 | lr_mult: 0
973 | decay_mult: 0
974 | }
975 | param {
976 | lr_mult: 0
977 | decay_mult: 0
978 | }
979 | }
980 | layer {
981 | name: "conv3_1/out/pw/scale_new"
982 | type: "Scale"
983 | bottom: "conv3_1/out/pw_new"
984 | top: "conv3_1/out/pw_new"
985 | scale_param {
986 | filler {
987 | value: 1
988 | }
989 | bias_term: true
990 | bias_filler {
991 | value: 0
992 | }
993 | }
994 | }
995 |
996 | # 3_2 in
997 |
998 | layer {
999 | name: "conv3_2/in/pw_new"
1000 | type: "Convolution"
1001 | bottom: "conv3_1/out/pw_new"
1002 | top: "conv3_2/in/pw_new"
1003 | param {
1004 | lr_mult: 1
1005 | decay_mult: 1
1006 | }
1007 | convolution_param {
1008 | num_output: 192
1009 | bias_term: false
1010 | pad: 0
1011 | kernel_size: 1
1012 | engine: CAFFE
1013 | stride: 1
1014 | weight_filler {
1015 | type: "msra"
1016 | }
1017 | }
1018 | }
1019 | layer {
1020 | name: "conv3_2/in/pw/bn_new"
1021 | type: "BatchNorm"
1022 | bottom: "conv3_2/in/pw_new"
1023 | top: "conv3_2/in/pw_new"
1024 | param {
1025 | lr_mult: 0
1026 | decay_mult: 0
1027 | }
1028 | param {
1029 | lr_mult: 0
1030 | decay_mult: 0
1031 | }
1032 | param {
1033 | lr_mult: 0
1034 | decay_mult: 0
1035 | }
1036 | }
1037 | layer {
1038 | name: "conv3_2/in/pw/scale_new"
1039 | type: "Scale"
1040 | bottom: "conv3_2/in/pw_new"
1041 | top: "conv3_2/in/pw_new"
1042 | scale_param {
1043 | filler {
1044 | value: 1
1045 | }
1046 | bias_term: true
1047 | bias_filler {
1048 | value: 0
1049 | }
1050 | }
1051 | }
1052 | layer {
1053 | name: "relu3_2/in/pw_new"
1054 | type: "ReLU"
1055 | bottom: "conv3_2/in/pw_new"
1056 | top: "conv3_2/in/pw_new"
1057 | }
1058 |
1059 | # 3_2 dw
1060 | layer {
1061 | name: "conv3_2/dw_new"
1062 | type: "ConvolutionDepthwise"
1063 | bottom: "conv3_2/in/pw_new"
1064 | top: "conv3_2/dw_new"
1065 | param {
1066 | lr_mult: 1
1067 | decay_mult: 0
1068 | }
1069 | convolution_param {
1070 | num_output: 192
1071 | bias_term: false
1072 | pad: 1
1073 | kernel_size: 3
1074 | engine: CAFFE
1075 | stride: 1
1076 | weight_filler {
1077 | type: "msra"
1078 | }
1079 | }
1080 | }
1081 | layer {
1082 | name: "conv3_2/dw/bn_new"
1083 | type: "BatchNorm"
1084 | bottom: "conv3_2/dw_new"
1085 | top: "conv3_2/dw_new"
1086 | param {
1087 | lr_mult: 0
1088 | decay_mult: 0
1089 | }
1090 | param {
1091 | lr_mult: 0
1092 | decay_mult: 0
1093 | }
1094 | param {
1095 | lr_mult: 0
1096 | decay_mult: 0
1097 | }
1098 | }
1099 | layer {
1100 | name: "conv3_2/dw/scale_new"
1101 | type: "Scale"
1102 | bottom: "conv3_2/dw_new"
1103 | top: "conv3_2/dw_new"
1104 | scale_param {
1105 | filler {
1106 | value: 1
1107 | }
1108 | bias_term: true
1109 | bias_filler {
1110 | value: 0
1111 | }
1112 | }
1113 | }
1114 | layer {
1115 | name: "relu3_2/dw_new"
1116 | type: "ReLU"
1117 | bottom: "conv3_2/dw_new"
1118 | top: "conv3_2/dw_new"
1119 | }
1120 |
1121 |
1122 | # 3_2 out
1123 |
1124 | layer {
1125 | name: "conv3_2/out/pw_new"
1126 | type: "Convolution"
1127 | bottom: "conv3_2/dw_new"
1128 | top: "conv3_2/out/pw_new"
1129 | param {
1130 | lr_mult: 1
1131 | decay_mult: 1
1132 | }
1133 | convolution_param {
1134 | num_output: 64
1135 | bias_term: false
1136 | pad: 0
1137 | kernel_size: 1
1138 | engine: CAFFE
1139 | stride: 1
1140 | weight_filler {
1141 | type: "msra"
1142 | }
1143 | }
1144 | }
1145 | layer {
1146 | name: "conv3_2/out/pw/bn_new"
1147 | type: "BatchNorm"
1148 | bottom: "conv3_2/out/pw_new"
1149 | top: "conv3_2/out/pw_new"
1150 | param {
1151 | lr_mult: 0
1152 | decay_mult: 0
1153 | }
1154 | param {
1155 | lr_mult: 0
1156 | decay_mult: 0
1157 | }
1158 | param {
1159 | lr_mult: 0
1160 | decay_mult: 0
1161 | }
1162 | }
1163 | layer {
1164 | name: "conv3_2/out/pw/scale_new"
1165 | type: "Scale"
1166 | bottom: "conv3_2/out/pw_new"
1167 | top: "conv3_2/out/pw_new"
1168 | scale_param {
1169 | filler {
1170 | value: 1
1171 | }
1172 | bias_term: true
1173 | bias_filler {
1174 | value: 0
1175 | }
1176 | }
1177 | }
1178 | layer {
1179 | name: "fuse_conv3_2"
1180 | type: "Eltwise"
1181 | bottom: "conv3_1/out/pw_new"
1182 | bottom: "conv3_2/out/pw_new"
1183 | top: "fuse_conv3_2"
1184 | eltwise_param {
1185 | operation: SUM
1186 | }
1187 | }
1188 |
1189 |
1190 |
1191 |
1192 |
1193 |
1194 |
1195 | #------------------------- fc1
1196 | layer {
1197 | name: "fc1"
1198 | type: "InnerProduct"
1199 | bottom: "fuse_conv3_2"
1200 | top: "fc1"
1201 | param {
1202 | lr_mult: 1
1203 | decay_mult: 1
1204 | }
1205 | param {
1206 | lr_mult: 2
1207 | decay_mult: 1
1208 | }
1209 | inner_product_param {
1210 | num_output: 256
1211 | weight_filler {
1212 | type: "gaussian"
1213 | std: 0.01
1214 | }
1215 | bias_filler {
1216 | type: "constant"
1217 | value: 0
1218 | }
1219 | }
1220 | }
1221 | layer {
1222 | name: "relu_fc1"
1223 | type: "ReLU"
1224 | bottom: "fc1"
1225 | top: "fc1"
1226 | }
1227 | layer {
1228 | name: "drop_fc1"
1229 | type: "Dropout"
1230 | bottom: "fc1"
1231 | top: "fc1"
1232 | dropout_param{
1233 | dropout_ratio: 0.3
1234 | }
1235 | }
1236 |
1237 | #------------------------- fc2
1238 | layer {
1239 | name: "fc2"
1240 | type: "InnerProduct"
1241 | bottom: "fc1"
1242 | top: "fc2"
1243 | param {
1244 | lr_mult: 1
1245 | decay_mult: 1
1246 | }
1247 | param {
1248 | lr_mult: 2
1249 | decay_mult: 1
1250 | }
1251 | inner_product_param {
1252 | num_output: 10
1253 | weight_filler {
1254 | type: "gaussian"
1255 | std: 0.01
1256 | }
1257 | bias_filler {
1258 | type: "constant"
1259 | value: 0
1260 | }
1261 | }
1262 | }
--------------------------------------------------------------------------------
/1_level_1/Code/4_evaluate/evaluate_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import sys
3 | sys.path.append('../../../util')
4 | import tools
5 | import numpy as np
6 | import matplotlib.pyplot as plt
7 | import os
8 | import cv2
9 |
10 | l1_out_test_label = '../../Result/l1_out_test_label.txt'
11 | l1_raw_test_label = '../../Data/l1_test_label.txt'
12 | relative_path = '../../../raw_data/Data/img_celeba/' # find the image from txt
13 | draw_img_flod = '../../Result/l1_out_draw/test/'
14 | drop_img_flod = '../../Result/l1_drop/test/'
15 |
16 | n_p = 5
17 | # ----------------------------------------------------------------------- load label
18 | l1_raw_fid = open(l1_raw_test_label)
19 | l1_raw_lines = l1_raw_fid.readlines()
20 | l1_raw_fid.close()
21 | l1_out_fid = open(l1_out_test_label)
22 | l1_out_lines = l1_out_fid.readlines()
23 | l1_out_fid.close()
24 |
25 | err_mat = []
26 | threshold = 0.1
27 | count_drop = 0
28 | for idx in range(len(l1_out_lines)):
29 | print idx
30 | r_ = l1_raw_lines[idx]
31 | o_ = l1_out_lines[idx]
32 | r_name = r_.split()[0]
33 | o_name = o_.split()[0]
34 | if r_name != o_name:
35 | print 'find a error,idx: ', idx
36 | continue
37 | full_img_path = relative_path + r_name
38 | img = cv2.imread(full_img_path)
39 | h,w,c = img.shape
40 |
41 | err_1,err_5 = tools.cal_error_nor_diag(img,r_,o_) # r_ have img name , range of [-1,1] , err_1 is mean
42 | err_mat.append(err_5)
43 | out_land = np.array(map(float,o_.split()[1:2*n_p+1]))
44 |
45 | if err_1 >= threshold :
46 | count_drop = count_drop + 1
47 | draw_img = img.copy()
48 | draw_img = tools.drawpoints(draw_img,out_land)
49 | tools.makedir(drop_img_flod)
50 | draw_img_name = str(err_1) + '_' + r_name
51 | draw_img_path = drop_img_flod + draw_img_name
52 | cv2.imwrite(draw_img_path, draw_img)
53 | else:
54 | draw_img = img.copy()
55 | draw_img = tools.drawpoints(draw_img,out_land)
56 | tools.makedir(draw_img_flod)
57 | draw_img_name = str(err_1) + '_' + r_name
58 | draw_img_path = draw_img_flod + draw_img_name
59 | cv2.imwrite(draw_img_path, draw_img)
60 | # -------------------------------------------------------------- print result
61 | err_mat = np.array(err_mat)
62 | err_mat = np.reshape(err_mat,(-1,5))
63 | MNE_5 = []
64 | for i in range(n_p):
65 | MNE_5.append(err_mat[:,i].mean())
66 | print 'err >= 10% have ' , count_drop
67 | # ------------------------------------------------------------- plot
68 | fig = plt.figure('test_MNE_5')
69 | ax1 =plt.subplot(111)
70 | data = np.array(MNE_5)
71 | width = 0.2
72 | x_bar = np.arange(5)
73 | # print('x_bar type ',type(x_bar))
74 | rect = ax1.bar(left=x_bar,height=data,width=width, color="blue")
75 | for rec in rect:
76 | x= rec.get_x()
77 | height = round(rec.get_height()*100,2)
78 | mne_text = str(height) + '%'
79 | # print('mne text',mne_text)
80 | ax1.text(x+0.05,1.02*height/100,mne_text)
81 | # print('height',height)
82 | MNE_5_mean = np.round(np.array(MNE_5).mean() *100,2)
83 | MNE_5_mean_text = 'The mean normalized error :' +str(MNE_5_mean) + '%'
84 | ax1.text(1 ,1.5*MNE_5_mean/100 ,MNE_5_mean_text,color="red")
85 |
86 | ax1.set_xticks(x_bar + width)
87 | ax1.set_xticklabels(("left eye","right eye","nose","left mouth","right mouth"))
88 | ax1.set_ylabel("MNE")
89 | ax1.set_title(" MNE")
90 | ax1.grid(True)
91 | ax1.set_ylim(0,0.025) # max y axis
92 | plt.show()
93 |
94 |
95 |
96 | print 'The mean error normalized by dist_diag is : ', err_mat.mean()
97 | fig2 = plt.figure("test_distribution")
98 | ax2 = plt.subplot(111)
99 | ax2.set_title("The mean error normalized by dist_diag :")
100 | data =err_mat.mean(axis=1)
101 | n, bins, patches = plt.hist(data ,bins=200, normed=False, facecolor='blue', alpha=0.75)
102 | err_mat_mean = np.round(np.array(err_mat).mean() *100 ,2)
103 | mean_text = 'The mean error normalized by dist_diag : ' + str(err_mat_mean) + '%'
104 | ax2.text(0.1,len(err_mat)/10 ,mean_text,color="red")
105 | plt.show()
--------------------------------------------------------------------------------
/1_level_1/Code/4_evaluate/evaluate_train.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import sys
3 | sys.path.append('../../../util')
4 | import tools
5 | import numpy as np
6 | import matplotlib.pyplot as plt
7 | import os
8 | import cv2
9 |
10 | l1_out_test_label = '../../Result/l1_out_train_label.txt'
11 | l1_raw_test_label = '../../Data/l1_train_label.txt'
12 | relative_path = '../../../raw_data/Data/img_celeba/' # find the image from txt
13 | draw_img_flod = '../../Result/l1_out_draw/train/'
14 | drop_img_flod = '../../Result/l1_drop/train/'
15 |
16 | n_p = 5
17 | # ----------------------------------------------------------------------- load label
18 | l1_raw_fid = open(l1_raw_test_label)
19 | l1_raw_lines = l1_raw_fid.readlines()
20 | l1_raw_fid.close()
21 | l1_out_fid = open(l1_out_test_label)
22 | l1_out_lines = l1_out_fid.readlines()
23 | l1_out_fid.close()
24 |
25 | err_mat = []
26 | threshold = 0.1
27 | count_drop = 0
28 | for idx in range(len(l1_out_lines)):
29 | print idx
30 | r_ = l1_raw_lines[idx]
31 | o_ = l1_out_lines[idx]
32 | r_name = r_.split()[0]
33 | o_name = o_.split()[0]
34 | if r_name != o_name:
35 | print 'find a error,idx: ', idx
36 | continue
37 | full_img_path = relative_path + r_name
38 | img = cv2.imread(full_img_path)
39 | h,w,c = img.shape
40 |
41 | err_1,err_5 = tools.cal_error_nor_diag(img,r_,o_) # r_ have img name , range of [-1,1] err_1 is mean
42 | err_mat.append(err_5)
43 | out_land = np.array(map(float,o_.split()[1:2*n_p+1]))
44 | if err_1 >= threshold :
45 | count_drop = count_drop + 1
46 | draw_img = img.copy()
47 | draw_img = tools.drawpoints(draw_img,out_land)
48 | tools.makedir(drop_img_flod)
49 | draw_img_name = str(err_1) + '_' + r_name
50 | draw_img_path = drop_img_flod + draw_img_name
51 | cv2.imwrite(draw_img_path, draw_img)
52 | else:
53 | draw_img = img.copy()
54 | draw_img = tools.drawpoints(draw_img,out_land)
55 | tools.makedir(draw_img_flod)
56 | draw_img_name = str(err_1) + '_' + r_name
57 | draw_img_path = draw_img_flod + draw_img_name
58 | cv2.imwrite(draw_img_path, draw_img)
59 | # print a
60 | # -------------------------------------------------------------- print result
61 | err_mat = np.array(err_mat)
62 | err_mat = np.reshape(err_mat,(-1,5))
63 | MNE_5 = []
64 | for i in range(n_p):
65 | MNE_5.append(err_mat[:,i].mean())
66 | print 'err >= 10% have ' , count_drop
67 | # print 'MNE of left eye: ', MNE_5[0]
68 | # print 'MNE of right eye: ', MNE_5[1]
69 | # print 'MNE of nose: ', MNE_5[2]
70 | # print 'MNE of left mouth: ', MNE_5[3]
71 | # print 'MNE of right mouth: ', MNE_5[4]
72 | # print 'MNE : ' , np.array(MNE_5).mean()
73 |
74 | # ------------------------------------------------------------- plot
75 | fig = plt.figure('train_MNE_5')
76 | ax1 =plt.subplot(111)
77 | data = np.array(MNE_5)
78 | width = 0.2
79 | x_bar = np.arange(5)
80 | # print('x_bar type ',type(x_bar))
81 | rect = ax1.bar(left=x_bar,height=data,width=width, color="blue")
82 | for rec in rect:
83 | x= rec.get_x()
84 | height = round(rec.get_height()*100,2)
85 | mne_text = str(height) + '%'
86 | # print('mne text',mne_text)
87 | ax1.text(x+0.05,1.02*height/100,mne_text)
88 | # print('height',height)
89 | MNE_5_mean = np.round(np.array(MNE_5).mean() *100,2)
90 | MNE_5_mean_text = 'The mean normalized error :' +str(MNE_5_mean) + '%'
91 | ax1.text(1 ,1.5*MNE_5_mean/100 ,MNE_5_mean_text,color="red")
92 |
93 | ax1.set_xticks(x_bar + width)
94 | ax1.set_xticklabels(("left eye","right eye","nose","left mouth","right mouth"))
95 | ax1.set_ylabel("MNE")
96 | ax1.set_title(" MNE")
97 | ax1.grid(True)
98 | ax1.set_ylim(0,0.025) # max y axis
99 | plt.show()
100 |
101 |
102 |
103 | print 'The mean error normalized by dist_diag is : ', err_mat.mean()
104 | # print a
105 | fig2 = plt.figure("train_distribution")
106 | ax2 = plt.subplot(111)
107 | ax2.set_title("The mean error normalized by dist_diag :")
108 | data =err_mat.mean(axis=1)
109 | n, bins, patches = plt.hist(data ,bins=200, normed=False, facecolor='blue', alpha=0.75)
110 | err_mat_mean = np.round(np.array(err_mat).mean() *100 ,2)
111 | mean_text = 'The mean error normalized by dist_diag : ' + str(err_mat_mean) + '%'
112 | ax2.text(0.1,len(err_mat)/10 ,mean_text,color="red")
113 | plt.show()
--------------------------------------------------------------------------------
/1_level_1/Code/5_crop_img/crop_test_img.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import sys
3 | sys.path.append('../../../util')
4 | import tools
5 | import numpy as np
6 | import os
7 | import cv2
8 |
9 | l1_out_label = '../../Result/l1_out_test_label.txt'
10 | l1_raw_label = '../../Data/l1_test_label.txt'
11 | relative_path = '../../../raw_data/Data/img_celeba/' # find the image from txt
12 |
13 | crop_img_flod = '../../../level_2/Data/l1_crop/test/'
14 |
15 | crop_label_flod = '../../../level_2/Data/l1_crop/'
16 | crop_label_txt = crop_label_flod + 'l1_crop_test_label.txt'
17 | crop_draw_img_flod = '../../../level_2/Data/l1_crop_draw/test/'
18 | tools.makedir(crop_img_flod)
19 |
20 | n_p = 5
21 | # ----------------------------------------------------------------------- load label
22 | l1_raw_fid = open(l1_raw_label)
23 | l1_raw_lines = l1_raw_fid.readlines()
24 | l1_raw_fid.close()
25 | l1_out_fid = open(l1_out_label)
26 | l1_out_lines = l1_out_fid.readlines()
27 | l1_out_fid.close()
28 | err_mat = []
29 |
30 | threshold = 0.1
31 | count_threshold = 0
32 | fid = open(crop_label_txt,'w')
33 | for idx in range(len(l1_out_lines)):
34 | print idx
35 | r_ = l1_raw_lines[idx]
36 | o_ = l1_out_lines[idx]
37 | r_name = r_.split()[0]
38 | o_name = o_.split()[0]
39 | if r_name != o_name:
40 | print 'find a error,idx: ', idx
41 | continue
42 | full_img_path = relative_path + r_name
43 | img = cv2.imread(full_img_path)
44 | h,w,c = img.shape
45 | # ---------------------------------------------------------------------- calculate error
46 | err_1,err_5 = tools.cal_error_nor_diag(img,r_,o_) # r_ have img name , range of [-1,1] err_1 is mean
47 | err_mat.append(err_5)
48 |
49 | raw_land = np.array(map(float,r_.split()[1:2*n_p+1])) # nparray float
50 | out_land = np.array(map(float,o_.split()[1:2*n_p+1]))
51 |
52 | if err_1 < threshold :
53 | # ------------------------------------------------------------ calculate w,h for crop img
54 | raw_pix_land = tools.label2points(raw_land,w,h)
55 | out_pix_land = tools.label2points(out_land,w,h)
56 | # print 'raw pix land',raw_pix_land
57 | p_nose = out_pix_land[4:6]
58 | p_lefteye = out_pix_land[0:2]
59 | d_nose_lefteye = tools.cal_eucldist(p_nose,p_lefteye)
60 |
61 | w_start = np.round(p_nose[0] - 2*d_nose_lefteye).astype(int)
62 | w_end = np.round(p_nose[0] + 2*d_nose_lefteye).astype(int)
63 | h_start = np.round(p_nose[1] - 2*d_nose_lefteye).astype(int)
64 | h_end = np.round(p_nose[1] + 2*d_nose_lefteye).astype(int)
65 |
66 | if w_start < 0: w_start = 0
67 | if h_start < 0: h_start = 0
68 | if w_end > w: w_end = w
69 | if h_end > h: h_end = h
70 | # print ('w,w_end h,h_end',w_start,w_end,h_start,h_end)
71 | # ------------------------------------------------------------ calculate new label
72 | crop_pix_land = raw_pix_land.copy()
73 | crop_pix_land[0::2] = crop_pix_land[0::2] - w_start # x
74 | crop_pix_land[1::2] = crop_pix_land[1::2] - h_start # y
75 | # print ('crop pix land ', crop_pix_land)
76 |
77 | crop_w = w_end - w_start
78 | crop_h = h_end - h_start
79 | w1 = (crop_w-1)/2
80 | h1 = (crop_h-1)/2
81 | crop_land = crop_pix_land.copy()
82 | crop_land[0::2] = (crop_pix_land[0::2] - w1) / w1
83 | crop_land[1::2] = (crop_pix_land[1::2] - h1) / h1
84 |
85 | # print('crop land ', crop_land)
86 | # ----------------------------------------------------------- output crop img
87 | crop_img = img.copy()
88 | crop_img = crop_img[h_start:h_end+1,w_start:w_end+1,:]
89 | crop_img_name = r_name
90 | crop_img_path = crop_img_flod + crop_img_name
91 | tools.makedir(crop_img_flod)
92 | cv2.imwrite(crop_img_path,crop_img)
93 | # ----------------------------------------------------------- output crop draw img
94 | crop_draw_img = crop_img.copy()
95 | crop_draw_img = tools.drawpoints(crop_draw_img, crop_land)
96 | crop_draw_img_name = r_name
97 | crop_draw_img_path = crop_draw_img_flod + crop_draw_img_name
98 | tools.makedir(crop_draw_img_flod)
99 | cv2.imwrite(crop_draw_img_path,crop_draw_img)
100 | # ----------------------------------------------------------- output label
101 | new_line = r_name
102 | str_0 = str(crop_land)
103 | str_1 = str_0.replace("\n","")
104 | str_2 = str_1.strip('[]')
105 | str_3 = str_2.split()
106 | for i in range(n_p):
107 | x_ = str_3[2*i+0] # value is [-1,1]
108 | y_ = str_3[2*i+1]
109 |
110 | new_line = new_line + ' ' + str(x_) # note: the point order has changed: x1,y1,x2...
111 | new_line = new_line + ' ' + str(y_)
112 | new_line = new_line + '\n'
113 | fid.write(new_line)
114 | fid.close()
--------------------------------------------------------------------------------
/1_level_1/Code/5_crop_img/crop_train_img.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import sys
3 | sys.path.append('../../../util')
4 | import tools
5 | import numpy as np
6 | import argparse
7 | import glob
8 | import matplotlib.pyplot as plt
9 | import os
10 | import cv2
11 |
12 | l1_out_label = '../../Result/l1_out_train_label.txt'
13 | l1_raw_label = '../../Data/l1_train_label.txt'
14 | relative_path = '../../../raw_data/Data/img_celeba/' # find the image from txt
15 |
16 | crop_img_flod = '../../../level_2/Data/l1_crop/train/'
17 |
18 | crop_label_flod = '../../../level_2/Data/l1_crop/'
19 | crop_label_txt = crop_label_flod + 'l1_crop_train_label.txt'
20 | crop_draw_img_flod = '../../../level_2/Data/l1_crop_draw/train/'
21 | tools.makedir(crop_img_flod)
22 |
23 | n_p = 5
24 | # ----------------------------------------------------------------------- load label
25 | l1_raw_fid = open(l1_raw_label)
26 | l1_raw_lines = l1_raw_fid.readlines()
27 | l1_raw_fid.close()
28 | l1_out_fid = open(l1_out_label)
29 | l1_out_lines = l1_out_fid.readlines()
30 | l1_out_fid.close()
31 | err_mat = []
32 |
33 | threshold = 0.1
34 | count_threshold = 0
35 | fid = open(crop_label_txt,'w')
36 | for idx in range(len(l1_out_lines)):
37 | print idx
38 | r_ = l1_raw_lines[idx]
39 | o_ = l1_out_lines[idx]
40 | r_name = r_.split()[0]
41 | o_name = o_.split()[0]
42 | if r_name != o_name:
43 | print 'find a error,idx: ', idx
44 | continue
45 | full_img_path = relative_path + r_name
46 | img = cv2.imread(full_img_path)
47 | h,w,c = img.shape
48 | # ---------------------------------------------------------------------- calculate error
49 | err_1,err_5 = tools.cal_error_nor_diag(img,r_,o_) # r_ have img name , range of [-1,1] err_1 is mean
50 | err_mat.append(err_5)
51 |
52 | raw_land = np.array(map(float,r_.split()[1:2*n_p+1])) # nparray float
53 | out_land = np.array(map(float,o_.split()[1:2*n_p+1]))
54 |
55 | if err_1 < threshold :
56 | # ------------------------------------------------------------ calculate w,h for crop img
57 | raw_pix_land = tools.label2points(raw_land,w,h)
58 | out_pix_land = tools.label2points(out_land,w,h)
59 |
60 | p_nose = out_pix_land[4:6]
61 | p_lefteye = out_pix_land[0:2]
62 | d_nose_lefteye = tools.cal_eucldist(p_nose,p_lefteye)
63 |
64 | w_start = np.round(p_nose[0] - 2*d_nose_lefteye).astype(int)
65 | w_end = np.round(p_nose[0] + 2*d_nose_lefteye).astype(int)
66 | h_start = np.round(p_nose[1] - 2*d_nose_lefteye).astype(int)
67 | h_end = np.round(p_nose[1] + 2*d_nose_lefteye).astype(int)
68 |
69 | if w_start < 0: w_start = 0
70 | if h_start < 0: h_start = 0
71 | if w_end > w: w_end = w
72 | if h_end > h: h_end = h
73 | # ------------------------------------------------------------ calculate new label
74 | crop_pix_land = raw_pix_land.copy()
75 | crop_pix_land[0::2] = crop_pix_land[0::2] - w_start # x
76 | crop_pix_land[1::2] = crop_pix_land[1::2] - h_start # y
77 |
78 | crop_w = w_end - w_start
79 | crop_h = h_end - h_start
80 | w1 = (crop_w-1)/2
81 | h1 = (crop_h-1)/2
82 | crop_land = crop_pix_land.copy()
83 | crop_land[0::2] = (crop_pix_land[0::2] - w1) / w1
84 | crop_land[1::2] = (crop_pix_land[1::2] - h1) / h1
85 | # ----------------------------------------------------------- output crop img
86 | crop_img = img.copy()
87 | crop_img = crop_img[h_start:h_end+1,w_start:w_end+1,:]
88 | crop_img_name = r_name
89 | crop_img_path = crop_img_flod + crop_img_name
90 | tools.makedir(crop_img_flod)
91 | cv2.imwrite(crop_img_path,crop_img)
92 | # ----------------------------------------------------------- output crop draw img
93 | crop_draw_img = crop_img.copy()
94 | crop_draw_img = tools.drawpoints(crop_draw_img, crop_land)
95 | crop_draw_img_name = r_name
96 | crop_draw_img_path = crop_draw_img_flod + crop_draw_img_name
97 | tools.makedir(crop_draw_img_flod)
98 | cv2.imwrite(crop_draw_img_path,crop_draw_img)
99 | # ----------------------------------------------------------- output label
100 | new_line = r_name
101 | str_0 = str(crop_land)
102 | str_1 = str_0.replace("\n","")
103 | str_2 = str_1.strip('[]')
104 | str_3 = str_2.split()
105 | for i in range(n_p):
106 | x_ = str_3[2*i+0] # value is [-1,1]
107 | y_ = str_3[2*i+1]
108 |
109 | new_line = new_line + ' ' + str(x_) # note: the point order has changed: x1,y1,x2...
110 | new_line = new_line + ' ' + str(y_)
111 | new_line = new_line + '\n'
112 | fid.write(new_line)
113 | fid.close()
--------------------------------------------------------------------------------
/2_level_2/Code/0_train/solver.prototxt:
--------------------------------------------------------------------------------
1 | net: "l2_mobilenet.prototxt"
2 |
3 | test_iter: 160 # bs = 128 * 2
4 | test_interval: 1250
5 |
6 | #base_lr: 0.0001
7 | base_lr: 0.001
8 | momentum: 0.9
9 | weight_decay: 0.0004
10 |
11 | type: "Adam"
12 |
13 | lr_policy: "multistep"
14 | #gamma: 0.9
15 | gamma:0.1
16 | stepvalue: 80000 # 40iter = 1 epoch
17 | stepvalue: 100000
18 | #stepvalue: 250000
19 |
20 | display: 1000
21 | max_iter: 120000
22 |
23 | snapshot: 50000
24 | snapshot_prefix: "../../Result/solver_state/"
25 | solver_mode: GPU
26 |
--------------------------------------------------------------------------------
/2_level_2/Code/0_train/train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | set -e
3 | postfix=`date +"%F-%H-%M-%S"`
4 | /***your_caffe_path***/build/tools/caffe train \
5 | --solver=./solver.prototxt -gpu 0,1 \
6 | 2>&1 | tee ../../Result/log/$(date +%Y-%m-%d-%H-%M.log) $@
--------------------------------------------------------------------------------
/2_level_2/Code/1_inference/inferencen.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import sys
3 | sys.path.append('../../../util')
4 | sys.path.append('/***your_caffe_path***/python')
5 | sys.path.append('/***your_caffe_path***/python/caffe')
6 | import tools
7 | import caffe
8 | import numpy as np
9 | import argparse
10 | import cv2
11 | import time
12 |
13 | l2_deploy = './l2_deploy.prototxt'
14 | l2_model = '../../Result/solver_state/_iter_100000.caffemodel'
15 |
16 | txt_flod = '../../Data/l1_crop/'
17 | train_txt = txt_flod + 'l1_crop_train_label.txt'
18 | test_txt = txt_flod + 'l1_crop_test_label.txt'
19 |
20 | relative_path = '../../Data/l1_crop/' # find the image
21 |
22 | l2_out_train_txt = '../../Result/l2_out_train_label.txt'
23 | l2_out_test_txt = '../../Result/l2_out_test_label.txt'
24 |
25 | w_net = 48
26 | h_net = 48
27 |
28 | #--------------------------------------------------------------------------- cnn initalization
29 | caffe.set_mode_gpu()
30 | caffe.set_device(0)
31 | # load model
32 | net = caffe.Net(l2_deploy,l2_model,caffe.TEST)
33 | # image preprocess
34 | mu = np.ones((3,w_net,h_net), dtype=np.float) * 127.5
35 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
36 | transformer.set_transpose('data', (2,0,1)) # (w,h,c)--> (c,w,h)
37 | transformer.set_mean('data', mu) # pixel-wise
38 | transformer.set_raw_scale('data', 255 ) # [0,1] --> [0,255]
39 | transformer.set_channel_swap('data', (2,1,0)) # RGB --> BGR
40 | #----------------------------------------------------------------------------- forward
41 | def l2_forward(input_txt,output_txt,status='train'):
42 | out_f = open(output_txt,'w')
43 | for line in open(input_txt):
44 | if line.isspace() : continue
45 | img_name = line.split()[0]
46 | full_img_path = relative_path + status +'/'+ img_name
47 | # print full_img_path
48 | # print a
49 | #------------------------------------------------------------------------- cnn forward
50 | im=caffe.io.load_image(full_img_path) # im is RGB with 0~1 float
51 | net.blobs['data'].data[...]=transformer.preprocess('data',im)
52 | time_s = time.clock()
53 | n_out = net.forward()
54 | time_e = time.clock()
55 | print img_name,'forward : ',round((time_e-time_s)*1000,1) ,'ms'
56 | out_landmark = net.blobs['fc2'].data[0].flatten()
57 | #------------------------------------------------------------------------- write txt
58 | str_0 = str(out_landmark)
59 | str_1 = str_0.replace("\n","")
60 | str_2 = str_1.strip('[]')
61 | new_line = img_name +' '+ str_2 +'\n'
62 | out_f.write(new_line)
63 | out_f.close()
64 |
65 | l2_forward(test_txt,l2_out_test_txt,status='test')
66 | l2_forward(train_txt,l2_out_train_txt,status='train')
--------------------------------------------------------------------------------
/2_level_2/Code/1_inference/l2_deploy.prototxt:
--------------------------------------------------------------------------------
1 | name: "level_2"
2 | input: "data"
3 | input_shape { dim: 1 dim: 3 dim: 48 dim: 48 }
4 |
5 |
6 | #------------------------------------------- conv1 input: 48*48*3 output: 24*24*16
7 | layer {
8 | name: "conv1_new"
9 | type: "Convolution"
10 | bottom: "data"
11 | top: "conv1_new"
12 | param {
13 | lr_mult: 1
14 | decay_mult: 1
15 | }
16 | convolution_param {
17 | num_output: 16
18 | bias_term: false
19 | pad: 1
20 | kernel_size: 3
21 | stride: 2
22 | weight_filler {
23 | type: "msra"
24 | }
25 | }
26 | }
27 | layer {
28 | name: "conv1/bn_new"
29 | type: "BatchNorm"
30 | bottom: "conv1_new"
31 | top: "conv1_new"
32 | param {
33 | lr_mult: 0
34 | decay_mult: 0
35 | }
36 | param {
37 | lr_mult: 0
38 | decay_mult: 0
39 | }
40 | param {
41 | lr_mult: 0
42 | decay_mult: 0
43 | }
44 | }
45 | layer {
46 | name: "conv1/scale_new"
47 | type: "Scale"
48 | bottom: "conv1_new"
49 | top: "conv1_new"
50 | scale_param {
51 | filler {
52 | value: 1
53 | }
54 | bias_term: true
55 | bias_filler {
56 | value: 0
57 | }
58 | }
59 | }
60 | layer {
61 | name: "relu1_new"
62 | type: "ReLU"
63 | bottom: "conv1_new"
64 | top: "conv1_new"
65 | }
66 |
67 | #----------- bottleneck_1 1_1(s=2) 1_2(s=1) input: 24*24*16 output: 12*12*24
68 |
69 | layer {
70 | name: "conv1_1/in/pw_new"
71 | type: "Convolution"
72 | bottom: "conv1_new"
73 | top: "conv1_1/in/pw_new"
74 | param {
75 | lr_mult: 1
76 | decay_mult: 1
77 | }
78 | convolution_param {
79 | num_output: 96
80 | bias_term: false
81 | pad: 0
82 | kernel_size: 1
83 | engine: CAFFE
84 | stride: 1
85 | weight_filler {
86 | type: "msra"
87 | }
88 | }
89 | }
90 | layer {
91 | name: "conv1_1/in/pw/bn_new"
92 | type: "BatchNorm"
93 | bottom: "conv1_1/in/pw_new"
94 | top: "conv1_1/in/pw_new"
95 | param {
96 | lr_mult: 0
97 | decay_mult: 0
98 | }
99 | param {
100 | lr_mult: 0
101 | decay_mult: 0
102 | }
103 | param {
104 | lr_mult: 0
105 | decay_mult: 0
106 | }
107 | }
108 | layer {
109 | name: "conv1_1/in/pw/scale_new"
110 | type: "Scale"
111 | bottom: "conv1_1/in/pw_new"
112 | top: "conv1_1/in/pw_new"
113 | scale_param {
114 | filler {
115 | value: 1
116 | }
117 | bias_term: true
118 | bias_filler {
119 | value: 0
120 | }
121 | }
122 | }
123 | layer {
124 | name: "relu1_1/in/pw_new"
125 | type: "ReLU"
126 | bottom: "conv1_1/in/pw_new"
127 | top: "conv1_1/in/pw_new"
128 | }
129 |
130 |
131 |
132 | # 1_1 dw conv
133 | layer {
134 | name: "conv1_1/dw_new"
135 | type: "ConvolutionDepthwise"
136 | bottom: "conv1_1/in/pw_new"
137 | top: "conv1_1/dw_new"
138 | param {
139 | lr_mult: 1
140 | decay_mult: 0
141 | }
142 | convolution_param {
143 | num_output: 96
144 | bias_term: false
145 | pad: 1
146 | kernel_size: 3
147 | engine: CAFFE
148 | stride: 2
149 | weight_filler {
150 | type: "msra"
151 | }
152 | }
153 | }
154 | layer {
155 | name: "conv1_1/dw/bn_new"
156 | type: "BatchNorm"
157 | bottom: "conv1_1/dw_new"
158 | top: "conv1_1/dw_new"
159 | param {
160 | lr_mult: 0
161 | decay_mult: 0
162 | }
163 | param {
164 | lr_mult: 0
165 | decay_mult: 0
166 | }
167 | param {
168 | lr_mult: 0
169 | decay_mult: 0
170 | }
171 | }
172 | layer {
173 | name: "conv1_1/dw/scale_new"
174 | type: "Scale"
175 | bottom: "conv1_1/dw_new"
176 | top: "conv1_1/dw_new"
177 | scale_param {
178 | filler {
179 | value: 1
180 | }
181 | bias_term: true
182 | bias_filler {
183 | value: 0
184 | }
185 | }
186 | }
187 | layer {
188 | name: "relu1_1/dw_new"
189 | type: "ReLU"
190 | bottom: "conv1_1/dw_new"
191 | top: "conv1_1/dw_new"
192 | }
193 |
194 | # 1_1 out
195 | layer {
196 | name: "conv1_1/out/pw_new"
197 | type: "Convolution"
198 | bottom: "conv1_1/dw_new"
199 | top: "conv1_1/out/pw_new"
200 | param {
201 | lr_mult: 1
202 | decay_mult: 1
203 | }
204 | convolution_param {
205 | num_output: 24
206 | bias_term: false
207 | pad: 0
208 | kernel_size: 1
209 | engine: CAFFE
210 | stride: 1
211 | weight_filler {
212 | type: "msra"
213 | }
214 | }
215 | }
216 | layer {
217 | name: "conv1_1/out/pw/bn_new"
218 | type: "BatchNorm"
219 | bottom: "conv1_1/out/pw_new"
220 | top: "conv1_1/out/pw_new"
221 | param {
222 | lr_mult: 0
223 | decay_mult: 0
224 | }
225 | param {
226 | lr_mult: 0
227 | decay_mult: 0
228 | }
229 | param {
230 | lr_mult: 0
231 | decay_mult: 0
232 | }
233 | }
234 | layer {
235 | name: "conv1_1/out/pw/scale_new"
236 | type: "Scale"
237 | bottom: "conv1_1/out/pw_new"
238 | top: "conv1_1/out/pw_new"
239 | scale_param {
240 | filler {
241 | value: 1
242 | }
243 | bias_term: true
244 | bias_filler {
245 | value: 0
246 | }
247 | }
248 | }
249 | # 1_2 in
250 |
251 | layer {
252 | name: "conv1_2/in/pw_new"
253 | type: "Convolution"
254 | bottom: "conv1_1/out/pw_new"
255 | top: "conv1_2/in/pw_new"
256 | param {
257 | lr_mult: 1
258 | decay_mult: 1
259 | }
260 | convolution_param {
261 | num_output: 144
262 | bias_term: false
263 | pad: 0
264 | kernel_size: 1
265 | engine: CAFFE
266 | stride: 1
267 | weight_filler {
268 | type: "msra"
269 | }
270 | }
271 | }
272 | layer {
273 | name: "conv1_2/in/pw/bn_new"
274 | type: "BatchNorm"
275 | bottom: "conv1_2/in/pw_new"
276 | top: "conv1_2/in/pw_new"
277 | param {
278 | lr_mult: 0
279 | decay_mult: 0
280 | }
281 | param {
282 | lr_mult: 0
283 | decay_mult: 0
284 | }
285 | param {
286 | lr_mult: 0
287 | decay_mult: 0
288 | }
289 | }
290 | layer {
291 | name: "conv1_2/in/pw/scale_new"
292 | type: "Scale"
293 | bottom: "conv1_2/in/pw_new"
294 | top: "conv1_2/in/pw_new"
295 | scale_param {
296 | filler {
297 | value: 1
298 | }
299 | bias_term: true
300 | bias_filler {
301 | value: 0
302 | }
303 | }
304 | }
305 | layer {
306 | name: "relu1_2/in/pw_new"
307 | type: "ReLU"
308 | bottom: "conv1_2/in/pw_new"
309 | top: "conv1_2/in/pw_new"
310 | }
311 |
312 | # 1_2 dw
313 |
314 | layer {
315 | name: "conv1_2/dw_new"
316 | type: "ConvolutionDepthwise"
317 | bottom: "conv1_2/in/pw_new"
318 | top: "conv1_2/dw_new"
319 | param {
320 | lr_mult: 1
321 | decay_mult: 0
322 | }
323 | convolution_param {
324 | num_output: 144
325 | bias_term: false
326 | pad: 1
327 | kernel_size: 3
328 | engine: CAFFE
329 | stride: 1
330 | weight_filler {
331 | type: "msra"
332 | }
333 | }
334 | }
335 | layer {
336 | name: "conv1_2/dw/bn_new"
337 | type: "BatchNorm"
338 | bottom: "conv1_2/dw_new"
339 | top: "conv1_2/dw_new"
340 | param {
341 | lr_mult: 0
342 | decay_mult: 0
343 | }
344 | param {
345 | lr_mult: 0
346 | decay_mult: 0
347 | }
348 | param {
349 | lr_mult: 0
350 | decay_mult: 0
351 | }
352 | }
353 | layer {
354 | name: "conv1_2/dw/scale_new"
355 | type: "Scale"
356 | bottom: "conv1_2/dw_new"
357 | top: "conv1_2/dw_new"
358 | scale_param {
359 | filler {
360 | value: 1
361 | }
362 | bias_term: true
363 | bias_filler {
364 | value: 0
365 | }
366 | }
367 | }
368 | layer {
369 | name: "relu1_2/dw_new"
370 | type: "ReLU"
371 | bottom: "conv1_2/dw_new"
372 | top: "conv1_2/dw_new"
373 | }
374 |
375 | # 1_2 out 12*12*24
376 | layer {
377 | name: "conv1_2/out/pw_new"
378 | type: "Convolution"
379 | bottom: "conv1_2/dw_new"
380 | top: "conv1_2/out/pw_new"
381 | param {
382 | lr_mult: 1
383 | decay_mult: 1
384 | }
385 | convolution_param {
386 | num_output: 24
387 | bias_term: false
388 | pad: 0
389 | kernel_size: 1
390 | engine: CAFFE
391 | stride: 1
392 | weight_filler {
393 | type: "msra"
394 | }
395 | }
396 | }
397 | layer {
398 | name: "conv1_2/out/pw/bn_new"
399 | type: "BatchNorm"
400 | bottom: "conv1_2/out/pw_new"
401 | top: "conv1_2/out/pw_new"
402 | param {
403 | lr_mult: 0
404 | decay_mult: 0
405 | }
406 | param {
407 | lr_mult: 0
408 | decay_mult: 0
409 | }
410 | param {
411 | lr_mult: 0
412 | decay_mult: 0
413 | }
414 | }
415 | layer {
416 | name: "conv1_2/out/pw/scale_new"
417 | type: "Scale"
418 | bottom: "conv1_2/out/pw_new"
419 | top: "conv1_2/out/pw_new"
420 | scale_param {
421 | filler {
422 | value: 1
423 | }
424 | bias_term: true
425 | bias_filler {
426 | value: 0
427 | }
428 | }
429 | }
430 | layer {
431 | name: "fuse_conv1_2"
432 | type: "Eltwise"
433 | bottom: "conv1_1/out/pw_new"
434 | bottom: "conv1_2/out/pw_new"
435 | top: "fuse_conv1_2"
436 | eltwise_param {
437 | operation: SUM
438 | }
439 | }
440 |
441 | #--------------------------bottleneck2 2_1(s=2) 2_2(s=1) input: 12*12*24 output: 6*6*32
442 | # 2_1 in
443 | layer {
444 | name: "conv2_1/in/pw_new"
445 | type: "Convolution"
446 | bottom: "fuse_conv1_2"
447 | top: "conv2_1/in/pw_new"
448 | param {
449 | lr_mult: 1
450 | decay_mult: 1
451 | }
452 | convolution_param {
453 | num_output: 144
454 | bias_term: false
455 | pad: 0
456 | kernel_size: 1
457 | engine: CAFFE
458 | stride: 1
459 | weight_filler {
460 | type: "msra"
461 | }
462 | }
463 | }
464 | layer {
465 | name: "conv2_1/in/pw/bn_new"
466 | type: "BatchNorm"
467 | bottom: "conv2_1/in/pw_new"
468 | top: "conv2_1/in/pw_new"
469 | param {
470 | lr_mult: 0
471 | decay_mult: 0
472 | }
473 | param {
474 | lr_mult: 0
475 | decay_mult: 0
476 | }
477 | param {
478 | lr_mult: 0
479 | decay_mult: 0
480 | }
481 | }
482 | layer {
483 | name: "conv2_1/in/pw/scale_new"
484 | type: "Scale"
485 | bottom: "conv2_1/in/pw_new"
486 | top: "conv2_1/in/pw_new"
487 | scale_param {
488 | filler {
489 | value: 1
490 | }
491 | bias_term: true
492 | bias_filler {
493 | value: 0
494 | }
495 | }
496 | }
497 | layer {
498 | name: "relu2_1/in/pw_new"
499 | type: "ReLU"
500 | bottom: "conv2_1/in/pw_new"
501 | top: "conv2_1/in/pw_new"
502 | }
503 |
504 | # 2_1 dw
505 | layer {
506 | name: "conv2_1/dw_new"
507 | type: "ConvolutionDepthwise"
508 | bottom: "conv2_1/in/pw_new"
509 | top: "conv2_1/dw_new"
510 | param {
511 | lr_mult: 1
512 | decay_mult: 0
513 | }
514 | convolution_param {
515 | num_output: 144
516 | bias_term: false
517 | pad: 1
518 | kernel_size: 3
519 | engine: CAFFE
520 | stride: 2
521 | weight_filler {
522 | type: "msra"
523 | }
524 | }
525 | }
526 | layer {
527 | name: "conv2_1/dw/bn_new"
528 | type: "BatchNorm"
529 | bottom: "conv2_1/dw_new"
530 | top: "conv2_1/dw_new"
531 | param {
532 | lr_mult: 0
533 | decay_mult: 0
534 | }
535 | param {
536 | lr_mult: 0
537 | decay_mult: 0
538 | }
539 | param {
540 | lr_mult: 0
541 | decay_mult: 0
542 | }
543 | }
544 | layer {
545 | name: "conv2_1/dw/scale_new"
546 | type: "Scale"
547 | bottom: "conv2_1/dw_new"
548 | top: "conv2_1/dw_new"
549 | scale_param {
550 | filler {
551 | value: 1
552 | }
553 | bias_term: true
554 | bias_filler {
555 | value: 0
556 | }
557 | }
558 | }
559 | layer {
560 | name: "relu2_1/dw_new"
561 | type: "ReLU"
562 | bottom: "conv2_1/dw_new"
563 | top: "conv2_1/dw_new"
564 | }
565 |
566 | # 2_1 out
567 | layer {
568 | name: "conv2_1/out/pw_new"
569 | type: "Convolution"
570 | bottom: "conv2_1/dw_new"
571 | top: "conv2_1/out/pw_new"
572 | param {
573 | lr_mult: 1
574 | decay_mult: 1
575 | }
576 | convolution_param {
577 | num_output: 32
578 | bias_term: false
579 | pad: 0
580 | kernel_size: 1
581 | engine: CAFFE
582 | stride: 1
583 | weight_filler {
584 | type: "msra"
585 | }
586 | }
587 | }
588 | layer {
589 | name: "conv2_1/out/pw/bn_new"
590 | type: "BatchNorm"
591 | bottom: "conv2_1/out/pw_new"
592 | top: "conv2_1/out/pw_new"
593 | param {
594 | lr_mult: 0
595 | decay_mult: 0
596 | }
597 | param {
598 | lr_mult: 0
599 | decay_mult: 0
600 | }
601 | param {
602 | lr_mult: 0
603 | decay_mult: 0
604 | }
605 | }
606 | layer {
607 | name: "conv2_1/out/pw/scale_new"
608 | type: "Scale"
609 | bottom: "conv2_1/out/pw_new"
610 | top: "conv2_1/out/pw_new"
611 | scale_param {
612 | filler {
613 | value: 1
614 | }
615 | bias_term: true
616 | bias_filler {
617 | value: 0
618 | }
619 | }
620 | }
621 |
622 | # 2_2 in
623 |
624 | layer {
625 | name: "conv2_2/in/pw_new"
626 | type: "Convolution"
627 | bottom: "conv2_1/out/pw_new"
628 | top: "conv2_2/in/pw_new"
629 | param {
630 | lr_mult: 1
631 | decay_mult: 1
632 | }
633 | convolution_param {
634 | num_output: 192
635 | bias_term: false
636 | pad: 0
637 | kernel_size: 1
638 | engine: CAFFE
639 | stride: 1
640 | weight_filler {
641 | type: "msra"
642 | }
643 | }
644 | }
645 | layer {
646 | name: "conv2_2/in/pw/bn_new"
647 | type: "BatchNorm"
648 | bottom: "conv2_2/in/pw_new"
649 | top: "conv2_2/in/pw_new"
650 | param {
651 | lr_mult: 0
652 | decay_mult: 0
653 | }
654 | param {
655 | lr_mult: 0
656 | decay_mult: 0
657 | }
658 | param {
659 | lr_mult: 0
660 | decay_mult: 0
661 | }
662 | }
663 | layer {
664 | name: "conv2_2/in/pw/scale_new"
665 | type: "Scale"
666 | bottom: "conv2_2/in/pw_new"
667 | top: "conv2_2/in/pw_new"
668 | scale_param {
669 | filler {
670 | value: 1
671 | }
672 | bias_term: true
673 | bias_filler {
674 | value: 0
675 | }
676 | }
677 | }
678 | layer {
679 | name: "relu2_2/in/pw_new"
680 | type: "ReLU"
681 | bottom: "conv2_2/in/pw_new"
682 | top: "conv2_2/in/pw_new"
683 | }
684 |
685 | # 2_2 dw
686 | layer {
687 | name: "conv2_2/dw_new"
688 | type: "ConvolutionDepthwise"
689 | bottom: "conv2_2/in/pw_new"
690 | top: "conv2_2/dw_new"
691 | param {
692 | lr_mult: 1
693 | decay_mult: 0
694 | }
695 | convolution_param {
696 | num_output: 192
697 | bias_term: false
698 | pad: 1
699 | kernel_size: 3
700 | engine: CAFFE
701 | stride: 1
702 | weight_filler {
703 | type: "msra"
704 | }
705 | }
706 | }
707 | layer {
708 | name: "conv2_2/dw/bn_new"
709 | type: "BatchNorm"
710 | bottom: "conv2_2/dw_new"
711 | top: "conv2_2/dw_new"
712 | param {
713 | lr_mult: 0
714 | decay_mult: 0
715 | }
716 | param {
717 | lr_mult: 0
718 | decay_mult: 0
719 | }
720 | param {
721 | lr_mult: 0
722 | decay_mult: 0
723 | }
724 | }
725 | layer {
726 | name: "conv2_2/dw/scale_new"
727 | type: "Scale"
728 | bottom: "conv2_2/dw_new"
729 | top: "conv2_2/dw_new"
730 | scale_param {
731 | filler {
732 | value: 1
733 | }
734 | bias_term: true
735 | bias_filler {
736 | value: 0
737 | }
738 | }
739 | }
740 | layer {
741 | name: "relu2_2/dw_new"
742 | type: "ReLU"
743 | bottom: "conv2_2/dw_new"
744 | top: "conv2_2/dw_new"
745 | }
746 |
747 |
748 | # 2_2 out
749 |
750 | layer {
751 | name: "conv2_2/out/pw_new"
752 | type: "Convolution"
753 | bottom: "conv2_2/dw_new"
754 | top: "conv2_2/out/pw_new"
755 | param {
756 | lr_mult: 1
757 | decay_mult: 1
758 | }
759 | convolution_param {
760 | num_output: 32
761 | bias_term: false
762 | pad: 0
763 | kernel_size: 1
764 | engine: CAFFE
765 | stride: 1
766 | weight_filler {
767 | type: "msra"
768 | }
769 | }
770 | }
771 | layer {
772 | name: "conv2_2/out/pw/bn_new"
773 | type: "BatchNorm"
774 | bottom: "conv2_2/out/pw_new"
775 | top: "conv2_2/out/pw_new"
776 | param {
777 | lr_mult: 0
778 | decay_mult: 0
779 | }
780 | param {
781 | lr_mult: 0
782 | decay_mult: 0
783 | }
784 | param {
785 | lr_mult: 0
786 | decay_mult: 0
787 | }
788 | }
789 | layer {
790 | name: "conv2_2/out/pw/scale_new"
791 | type: "Scale"
792 | bottom: "conv2_2/out/pw_new"
793 | top: "conv2_2/out/pw_new"
794 | scale_param {
795 | filler {
796 | value: 1
797 | }
798 | bias_term: true
799 | bias_filler {
800 | value: 0
801 | }
802 | }
803 | }
804 | layer {
805 | name: "fuse_conv2_2"
806 | type: "Eltwise"
807 | bottom: "conv2_1/out/pw_new"
808 | bottom: "conv2_2/out/pw_new"
809 | top: "fuse_conv2_2"
810 | eltwise_param {
811 | operation: SUM
812 | }
813 | }
814 |
815 | #--------------------------bottleneck3 3_1(s=2) 3_2(s=1) input: 6*6*32 output: 3*3*64
816 | # 3_1 in
817 | layer {
818 | name: "conv3_1/in/pw_new"
819 | type: "Convolution"
820 | bottom: "fuse_conv2_2"
821 | top: "conv3_1/in/pw_new"
822 | param {
823 | lr_mult: 1
824 | decay_mult: 1
825 | }
826 | convolution_param {
827 | num_output: 192
828 | bias_term: false
829 | pad: 0
830 | kernel_size: 1
831 | engine: CAFFE
832 | stride: 1
833 | weight_filler {
834 | type: "msra"
835 | }
836 | }
837 | }
838 | layer {
839 | name: "conv3_1/in/pw/bn_new"
840 | type: "BatchNorm"
841 | bottom: "conv3_1/in/pw_new"
842 | top: "conv3_1/in/pw_new"
843 | param {
844 | lr_mult: 0
845 | decay_mult: 0
846 | }
847 | param {
848 | lr_mult: 0
849 | decay_mult: 0
850 | }
851 | param {
852 | lr_mult: 0
853 | decay_mult: 0
854 | }
855 | }
856 | layer {
857 | name: "conv3_1/in/pw/scale_new"
858 | type: "Scale"
859 | bottom: "conv3_1/in/pw_new"
860 | top: "conv3_1/in/pw_new"
861 | scale_param {
862 | filler {
863 | value: 1
864 | }
865 | bias_term: true
866 | bias_filler {
867 | value: 0
868 | }
869 | }
870 | }
871 | layer {
872 | name: "relu3_1/in/pw_new"
873 | type: "ReLU"
874 | bottom: "conv3_1/in/pw_new"
875 | top: "conv3_1/in/pw_new"
876 | }
877 |
878 | # 3_1 dw
879 | layer {
880 | name: "conv3_1/dw_new"
881 | type: "ConvolutionDepthwise"
882 | bottom: "conv3_1/in/pw_new"
883 | top: "conv3_1/dw_new"
884 | param {
885 | lr_mult: 1
886 | decay_mult: 0
887 | }
888 | convolution_param {
889 | num_output: 192
890 | bias_term: false
891 | pad: 1
892 | kernel_size: 3
893 | engine: CAFFE
894 | stride: 2
895 | weight_filler {
896 | type: "msra"
897 | }
898 | }
899 | }
900 | layer {
901 | name: "conv3_1/dw/bn_new"
902 | type: "BatchNorm"
903 | bottom: "conv3_1/dw_new"
904 | top: "conv3_1/dw_new"
905 | param {
906 | lr_mult: 0
907 | decay_mult: 0
908 | }
909 | param {
910 | lr_mult: 0
911 | decay_mult: 0
912 | }
913 | param {
914 | lr_mult: 0
915 | decay_mult: 0
916 | }
917 | }
918 | layer {
919 | name: "conv3_1/dw/scale_new"
920 | type: "Scale"
921 | bottom: "conv3_1/dw_new"
922 | top: "conv3_1/dw_new"
923 | scale_param {
924 | filler {
925 | value: 1
926 | }
927 | bias_term: true
928 | bias_filler {
929 | value: 0
930 | }
931 | }
932 | }
933 | layer {
934 | name: "relu3_1/dw_new"
935 | type: "ReLU"
936 | bottom: "conv3_1/dw_new"
937 | top: "conv3_1/dw_new"
938 | }
939 |
940 | # 3_1 out
941 | layer {
942 | name: "conv3_1/out/pw_new"
943 | type: "Convolution"
944 | bottom: "conv3_1/dw_new"
945 | top: "conv3_1/out/pw_new"
946 | param {
947 | lr_mult: 1
948 | decay_mult: 1
949 | }
950 | convolution_param {
951 | num_output: 64
952 | bias_term: false
953 | pad: 0
954 | kernel_size: 1
955 | engine: CAFFE
956 | stride: 1
957 | weight_filler {
958 | type: "msra"
959 | }
960 | }
961 | }
962 | layer {
963 | name: "conv3_1/out/pw/bn_new"
964 | type: "BatchNorm"
965 | bottom: "conv3_1/out/pw_new"
966 | top: "conv3_1/out/pw_new"
967 | param {
968 | lr_mult: 0
969 | decay_mult: 0
970 | }
971 | param {
972 | lr_mult: 0
973 | decay_mult: 0
974 | }
975 | param {
976 | lr_mult: 0
977 | decay_mult: 0
978 | }
979 | }
980 | layer {
981 | name: "conv3_1/out/pw/scale_new"
982 | type: "Scale"
983 | bottom: "conv3_1/out/pw_new"
984 | top: "conv3_1/out/pw_new"
985 | scale_param {
986 | filler {
987 | value: 1
988 | }
989 | bias_term: true
990 | bias_filler {
991 | value: 0
992 | }
993 | }
994 | }
995 |
996 | # 3_2 in
997 |
998 | layer {
999 | name: "conv3_2/in/pw_new"
1000 | type: "Convolution"
1001 | bottom: "conv3_1/out/pw_new"
1002 | top: "conv3_2/in/pw_new"
1003 | param {
1004 | lr_mult: 1
1005 | decay_mult: 1
1006 | }
1007 | convolution_param {
1008 | num_output: 192
1009 | bias_term: false
1010 | pad: 0
1011 | kernel_size: 1
1012 | engine: CAFFE
1013 | stride: 1
1014 | weight_filler {
1015 | type: "msra"
1016 | }
1017 | }
1018 | }
1019 | layer {
1020 | name: "conv3_2/in/pw/bn_new"
1021 | type: "BatchNorm"
1022 | bottom: "conv3_2/in/pw_new"
1023 | top: "conv3_2/in/pw_new"
1024 | param {
1025 | lr_mult: 0
1026 | decay_mult: 0
1027 | }
1028 | param {
1029 | lr_mult: 0
1030 | decay_mult: 0
1031 | }
1032 | param {
1033 | lr_mult: 0
1034 | decay_mult: 0
1035 | }
1036 | }
1037 | layer {
1038 | name: "conv3_2/in/pw/scale_new"
1039 | type: "Scale"
1040 | bottom: "conv3_2/in/pw_new"
1041 | top: "conv3_2/in/pw_new"
1042 | scale_param {
1043 | filler {
1044 | value: 1
1045 | }
1046 | bias_term: true
1047 | bias_filler {
1048 | value: 0
1049 | }
1050 | }
1051 | }
1052 | layer {
1053 | name: "relu3_2/in/pw_new"
1054 | type: "ReLU"
1055 | bottom: "conv3_2/in/pw_new"
1056 | top: "conv3_2/in/pw_new"
1057 | }
1058 |
1059 | # 3_2 dw
1060 | layer {
1061 | name: "conv3_2/dw_new"
1062 | type: "ConvolutionDepthwise"
1063 | bottom: "conv3_2/in/pw_new"
1064 | top: "conv3_2/dw_new"
1065 | param {
1066 | lr_mult: 1
1067 | decay_mult: 0
1068 | }
1069 | convolution_param {
1070 | num_output: 192
1071 | bias_term: false
1072 | pad: 1
1073 | kernel_size: 3
1074 | engine: CAFFE
1075 | stride: 1
1076 | weight_filler {
1077 | type: "msra"
1078 | }
1079 | }
1080 | }
1081 | layer {
1082 | name: "conv3_2/dw/bn_new"
1083 | type: "BatchNorm"
1084 | bottom: "conv3_2/dw_new"
1085 | top: "conv3_2/dw_new"
1086 | param {
1087 | lr_mult: 0
1088 | decay_mult: 0
1089 | }
1090 | param {
1091 | lr_mult: 0
1092 | decay_mult: 0
1093 | }
1094 | param {
1095 | lr_mult: 0
1096 | decay_mult: 0
1097 | }
1098 | }
1099 | layer {
1100 | name: "conv3_2/dw/scale_new"
1101 | type: "Scale"
1102 | bottom: "conv3_2/dw_new"
1103 | top: "conv3_2/dw_new"
1104 | scale_param {
1105 | filler {
1106 | value: 1
1107 | }
1108 | bias_term: true
1109 | bias_filler {
1110 | value: 0
1111 | }
1112 | }
1113 | }
1114 | layer {
1115 | name: "relu3_2/dw_new"
1116 | type: "ReLU"
1117 | bottom: "conv3_2/dw_new"
1118 | top: "conv3_2/dw_new"
1119 | }
1120 |
1121 |
1122 | # 3_2 out
1123 |
1124 | layer {
1125 | name: "conv3_2/out/pw_new"
1126 | type: "Convolution"
1127 | bottom: "conv3_2/dw_new"
1128 | top: "conv3_2/out/pw_new"
1129 | param {
1130 | lr_mult: 1
1131 | decay_mult: 1
1132 | }
1133 | convolution_param {
1134 | num_output: 64
1135 | bias_term: false
1136 | pad: 0
1137 | kernel_size: 1
1138 | engine: CAFFE
1139 | stride: 1
1140 | weight_filler {
1141 | type: "msra"
1142 | }
1143 | }
1144 | }
1145 | layer {
1146 | name: "conv3_2/out/pw/bn_new"
1147 | type: "BatchNorm"
1148 | bottom: "conv3_2/out/pw_new"
1149 | top: "conv3_2/out/pw_new"
1150 | param {
1151 | lr_mult: 0
1152 | decay_mult: 0
1153 | }
1154 | param {
1155 | lr_mult: 0
1156 | decay_mult: 0
1157 | }
1158 | param {
1159 | lr_mult: 0
1160 | decay_mult: 0
1161 | }
1162 | }
1163 | layer {
1164 | name: "conv3_2/out/pw/scale_new"
1165 | type: "Scale"
1166 | bottom: "conv3_2/out/pw_new"
1167 | top: "conv3_2/out/pw_new"
1168 | scale_param {
1169 | filler {
1170 | value: 1
1171 | }
1172 | bias_term: true
1173 | bias_filler {
1174 | value: 0
1175 | }
1176 | }
1177 | }
1178 | layer {
1179 | name: "fuse_conv3_2"
1180 | type: "Eltwise"
1181 | bottom: "conv3_1/out/pw_new"
1182 | bottom: "conv3_2/out/pw_new"
1183 | top: "fuse_conv3_2"
1184 | eltwise_param {
1185 | operation: SUM
1186 | }
1187 | }
1188 |
1189 |
1190 |
1191 |
1192 |
1193 |
1194 |
1195 | #------------------------- fc1
1196 | layer {
1197 | name: "fc1"
1198 | type: "InnerProduct"
1199 | bottom: "fuse_conv3_2"
1200 | top: "fc1"
1201 | param {
1202 | lr_mult: 1
1203 | decay_mult: 1
1204 | }
1205 | param {
1206 | lr_mult: 2
1207 | decay_mult: 1
1208 | }
1209 | inner_product_param {
1210 | num_output: 256
1211 | weight_filler {
1212 | type: "gaussian"
1213 | std: 0.01
1214 | }
1215 | bias_filler {
1216 | type: "constant"
1217 | value: 0
1218 | }
1219 | }
1220 | }
1221 | layer {
1222 | name: "relu_fc1"
1223 | type: "ReLU"
1224 | bottom: "fc1"
1225 | top: "fc1"
1226 | }
1227 | layer {
1228 | name: "drop_fc1"
1229 | type: "Dropout"
1230 | bottom: "fc1"
1231 | top: "fc1"
1232 | dropout_param{
1233 | dropout_ratio: 0.3
1234 | }
1235 | }
1236 |
1237 | #------------------------- fc2
1238 | layer {
1239 | name: "fc2"
1240 | type: "InnerProduct"
1241 | bottom: "fc1"
1242 | top: "fc2"
1243 | param {
1244 | lr_mult: 1
1245 | decay_mult: 1
1246 | }
1247 | param {
1248 | lr_mult: 2
1249 | decay_mult: 1
1250 | }
1251 | inner_product_param {
1252 | num_output: 10
1253 | weight_filler {
1254 | type: "gaussian"
1255 | std: 0.01
1256 | }
1257 | bias_filler {
1258 | type: "constant"
1259 | value: 0
1260 | }
1261 | }
1262 | }
--------------------------------------------------------------------------------
/2_level_2/Code/2_evaluate/evaluate_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import sys
3 | sys.path.append('../../../util')
4 | import tools
5 | import numpy as np
6 | import matplotlib.pyplot as plt
7 | import os
8 | import cv2
9 |
10 | l2_out_test_label = '../../Result/l2_out_test_label.txt'
11 | l2_raw_test_label = '../../Data/l1_crop/l1_crop_test_label.txt'
12 | relative_path = '../../Data/l1_crop/test/' # find the image from txt
13 | draw_img_flod = '../../Result/l2_out_draw/test/'
14 | drop_img_flod = '../../Result/l2_drop/test/'
15 |
16 | n_p = 5
17 | # ----------------------------------------------------------------------- load label
18 | l2_raw_fid = open(l2_raw_test_label)
19 | l2_raw_lines = l2_raw_fid.readlines()
20 | l2_raw_fid.close()
21 | l2_out_fid = open(l2_out_test_label)
22 | l2_out_lines = l2_out_fid.readlines()
23 | l2_out_fid.close()
24 |
25 | err_mat = []
26 | threshold = 1
27 | count_drop = 0
28 | for idx in range(len(l2_out_lines)):
29 | print idx
30 | r_ = l2_raw_lines[idx]
31 | o_ = l2_out_lines[idx]
32 | r_name = r_.split()[0]
33 | o_name = o_.split()[0]
34 | if r_name != o_name:
35 | print 'find a error,idx: ', idx
36 | continue
37 | full_img_path = relative_path + r_name
38 | img = cv2.imread(full_img_path)
39 | h,w,c = img.shape
40 |
41 | err_1,err_5 = tools.cal_error_nor_diag(img,r_,o_) # r_ have img name , range of [-1,1] err_1 is mean
42 | err_mat.append(err_5)
43 | out_land = np.array(map(float,o_.split()[1:2*n_p+1]))
44 |
45 | if err_1 >= threshold :
46 | count_drop = count_drop + 1
47 | draw_img = img.copy()
48 | draw_img = tools.drawpoints(draw_img,out_land)
49 | tools.makedir(drop_img_flod)
50 | draw_img_name = str(err_1) + '_' + r_name
51 | draw_img_path = drop_img_flod + draw_img_name
52 | cv2.imwrite(draw_img_path, draw_img)
53 | else:
54 | draw_img = img.copy()
55 | draw_img = tools.drawpoints(draw_img,out_land)
56 | tools.makedir(draw_img_flod)
57 | draw_img_name = str(err_1) + '_' + r_name
58 | draw_img_path = draw_img_flod + draw_img_name
59 | cv2.imwrite(draw_img_path, draw_img)
60 | # -------------------------------------------------------------- print result
61 | err_mat = np.array(err_mat)
62 | err_mat = np.reshape(err_mat,(-1,5))
63 | MNE_5 = []
64 | for i in range(n_p):
65 | MNE_5.append(err_mat[:,i].mean())
66 | print 'err >= 10% have ' , count_drop
67 | # ------------------------------------------------------------- plot
68 | fig = plt.figure('test_MNE_5')
69 | ax1 =plt.subplot(111)
70 | data = np.array(MNE_5)
71 | width = 0.2
72 | x_bar = np.arange(5)
73 | # print('x_bar type ',type(x_bar))
74 | rect = ax1.bar(left=x_bar,height=data,width=width, color="blue")
75 | for rec in rect:
76 | x= rec.get_x()
77 | height = round(rec.get_height()*100,2)
78 | mne_text = str(height) + '%'
79 | # print('mne text',mne_text)
80 | ax1.text(x+0.05,1.02*height/100,mne_text)
81 | # print('height',height)
82 | MNE_5_mean = np.round(np.array(MNE_5).mean() *100,2)
83 | MNE_5_mean_text = 'The mean normalized error :' +str(MNE_5_mean) + '%'
84 | ax1.text(1 ,1.5*MNE_5_mean/100 ,MNE_5_mean_text,color="red")
85 |
86 | ax1.set_xticks(x_bar + width)
87 | ax1.set_xticklabels(("left eye","right eye","nose","left mouth","right mouth"))
88 | ax1.set_ylabel("MNE")
89 | ax1.set_title(" MNE")
90 | ax1.grid(True)
91 | ax1.set_ylim(0,0.025) # max y axis
92 | plt.show()
93 |
94 |
95 |
96 | print 'The mean error normalized by dist_diag is : ', err_mat.mean()
97 | # print a
98 | fig2 = plt.figure("test_distribution")
99 | ax2 = plt.subplot(111)
100 | ax2.set_title("The mean error normalized by dist_diag :")
101 | data =err_mat.mean(axis=1)
102 | n, bins, patches = plt.hist(data ,bins=200, normed=False, facecolor='blue', alpha=0.75)
103 | err_mat_mean = np.round(np.array(err_mat).mean() *100 ,2)
104 | mean_text = 'The mean error normalized by dist_diag : ' + str(err_mat_mean) + '%'
105 | ax2.text(0.1,len(err_mat)/10 ,mean_text,color="red")
106 | plt.show()
--------------------------------------------------------------------------------
/2_level_2/Code/2_evaluate/evaluate_train.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import sys
3 | sys.path.append('../../../util')
4 | import tools
5 | import numpy as np
6 | import matplotlib.pyplot as plt
7 | import os
8 | import cv2
9 |
10 | l2_out_train_label = '../../Result/l2_out_train_label.txt'
11 | l2_raw_train_label = '../../Data/l1_crop/l1_crop_train_label.txt'
12 | relative_path = '../../Data/l1_crop/train/' # find the image from txt
13 | draw_img_flod = '../../Result/l2_out_draw/train/'
14 | drop_img_flod = '../../Result/l2_drop/train/'
15 |
16 | n_p = 5
17 | # ----------------------------------------------------------------------- load label
18 | l2_raw_fid = open(l2_raw_train_label)
19 | l2_raw_lines = l2_raw_fid.readlines()
20 | l2_raw_fid.close()
21 | l2_out_fid = open(l2_out_train_label)
22 | l2_out_lines = l2_out_fid.readlines()
23 | l2_out_fid.close()
24 |
25 | err_mat = []
26 | threshold = 1
27 | count_drop = 0
28 | for idx in range(len(l2_out_lines)):
29 | print idx
30 | r_ = l2_raw_lines[idx]
31 | o_ = l2_out_lines[idx]
32 | r_name = r_.split()[0]
33 | o_name = o_.split()[0]
34 | if r_name != o_name:
35 | print 'find a error,idx: ', idx
36 | continue
37 | full_img_path = relative_path + r_name
38 | img = cv2.imread(full_img_path)
39 | h,w,c = img.shape
40 |
41 | err_1,err_5 = tools.cal_error_nor_diag(img,r_,o_) # r_ have img name , range of [-1,1] err_1 is mean
42 | err_mat.append(err_5)
43 | out_land = np.array(map(float,o_.split()[1:2*n_p+1]))
44 |
45 | if err_1 >= threshold :
46 | count_drop = count_drop + 1
47 | draw_img = img.copy()
48 | draw_img = tools.drawpoints(draw_img,out_land)
49 | tools.makedir(drop_img_flod)
50 | draw_img_name = str(err_1) + '_' + r_name
51 | draw_img_path = drop_img_flod + draw_img_name
52 | cv2.imwrite(draw_img_path, draw_img)
53 | else:
54 | draw_img = img.copy()
55 | draw_img = tools.drawpoints(draw_img,out_land)
56 | tools.makedir(draw_img_flod)
57 | draw_img_name = str(err_1) + '_' + r_name
58 | draw_img_path = draw_img_flod + draw_img_name
59 | cv2.imwrite(draw_img_path, draw_img)
60 | # -------------------------------------------------------------- print result
61 | err_mat = np.array(err_mat)
62 | err_mat = np.reshape(err_mat,(-1,5))
63 | MNE_5 = []
64 | for i in range(n_p):
65 | MNE_5.append(err_mat[:,i].mean())
66 | print 'err >= 10% have ' , count_drop
67 | # ------------------------------------------------------------- plot
68 | fig = plt.figure('train_MNE_5')
69 | ax1 =plt.subplot(111)
70 | data = np.array(MNE_5)
71 | width = 0.2
72 | x_bar = np.arange(5)
73 | # print('x_bar type ',type(x_bar))
74 | rect = ax1.bar(left=x_bar,height=data,width=width, color="blue")
75 | for rec in rect:
76 | x= rec.get_x()
77 | height = round(rec.get_height()*100,2)
78 | mne_text = str(height) + '%'
79 | # print('mne text',mne_text)
80 | ax1.text(x+0.05,1.02*height/100,mne_text)
81 | # print('height',height)
82 | MNE_5_mean = np.round(np.array(MNE_5).mean() *100,2)
83 | MNE_5_mean_text = 'The mean normalized error :' +str(MNE_5_mean) + '%'
84 | ax1.text(1 ,1.5*MNE_5_mean/100 ,MNE_5_mean_text,color="red")
85 |
86 | ax1.set_xticks(x_bar + width)
87 | ax1.set_xticklabels(("left eye","right eye","nose","left mouth","right mouth"))
88 | ax1.set_ylabel("MNE")
89 | ax1.set_title(" MNE")
90 | ax1.grid(True)
91 | ax1.set_ylim(0,0.025) # max y axis
92 | plt.show()
93 |
94 |
95 |
96 | print 'The mean error normalized by dist_diag is : ', err_mat.mean()
97 | # print a
98 | fig2 = plt.figure("train_distribution")
99 | ax2 = plt.subplot(111)
100 | ax2.set_title("The mean error normalized by dist_diag :")
101 | data =err_mat.mean(axis=1)
102 | n, bins, patches = plt.hist(data ,bins=200, normed=False, facecolor='blue', alpha=0.75)
103 | err_mat_mean = np.round(np.array(err_mat).mean() *100 ,2)
104 | mean_text = 'The mean error normalized by dist_diag : ' + str(err_mat_mean) + '%'
105 | ax2.text(0.1,len(err_mat)/10 ,mean_text,color="red")
106 | plt.show()
--------------------------------------------------------------------------------
/3_demo/Code/inferencen.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import sys
3 | sys.path.append('../../util')
4 | sys.path.append('/***your_caffe_path***/python')
5 | sys.path.append('/***your_caffe_path***/python/caffe')
6 | import tools
7 | import caffe
8 | import numpy as np
9 | import argparse
10 | import cv2
11 | import time
12 |
13 | l1_deploy = '../Data/l1_deploy.prototxt'
14 | l1_model = '../Data/l1_net.caffemodel'
15 | l2_deploy = '../Data/l2_deploy.prototxt'
16 | l2_model = '../Data/l2_net.caffemodel'
17 | raw_txt = '../Data/demo.txt'
18 | relative_path = '../Data/img/' # find the image
19 | draw_img_flod = '../Result/draw_img/'
20 | w_net = 48
21 | h_net = 48
22 | n_p = 5
23 |
24 | #--------------------------------------------------------------------------- cnn initalization
25 | # load model
26 | l1_net = caffe.Net(l1_deploy,l1_model,caffe.TEST)
27 | l2_net = caffe.Net(l2_deploy,l2_model,caffe.TEST)
28 |
29 | caffe.set_mode_gpu()
30 | caffe.set_device(0)
31 |
32 | # image preprocess
33 | mu = np.ones((3,w_net,h_net), dtype=np.float) * 127.5
34 | transformer = caffe.io.Transformer({'data': l1_net.blobs['data'].data.shape})
35 | transformer.set_transpose('data', (2,0,1)) # (w,h,c)--> (c,w,h)
36 | transformer.set_mean('data', mu) # pixel-wise
37 | transformer.set_raw_scale('data', 255 ) # [0,1] --> [0,255]
38 | transformer.set_channel_swap('data', (2,1,0)) # RGB --> BGR
39 | #----------------------------------------------------------------------------- forward
40 | for line in open(raw_txt):
41 | if line.isspace() : continue
42 | img_name = line.split()[0]
43 | full_img_path = relative_path + img_name
44 | img = cv2.imread(full_img_path)
45 | draw_img = img.copy()
46 | #----------------------------------------------------------------------- l1 forward
47 | l1_input_img=caffe.io.load_image(full_img_path) # im is RGB with 0~1 float
48 | h_img,w_img,c = l1_input_img.shape
49 |
50 | l1_net.blobs['data'].data[...]=transformer.preprocess('data',l1_input_img)
51 | time_s = time.clock()
52 | l1_out = l1_net.forward()
53 | time_e = time.clock()
54 | print img_name,'l1_forward : ',round((time_e-time_s)*1000,1) ,'ms'
55 | l1_out_land = l1_net.blobs['fc2'].data[0].flatten()
56 | # crop img for level_2
57 |
58 |
59 | l1_out_pix_land = tools.label2points(l1_out_land,w_img,h_img)
60 | # ---------------------------------------------------------------------------- crop img
61 | crop_img,w_start,h_start = tools.crop_img(l1_input_img,l1_out_pix_land)
62 | #----------------------------------------------------------------------- l2 forward
63 | l2_input_img = crop_img
64 | h_l2,w_l2,c = l2_input_img.shape
65 | l2_net.blobs['data'].data[...]=transformer.preprocess('data',l2_input_img)
66 | time_s = time.clock()
67 | l2_out = l2_net.forward()
68 | time_e = time.clock()
69 | print img_name,'l2_forward : ',round((time_e-time_s)*1000,1) ,'ms'
70 | l2_out_land = l2_net.blobs['fc2'].data[0].flatten()
71 | l2_out_pix_land = tools.label2points(l2_out_land,w_l2,h_l2)
72 |
73 | l2_out_pix_land[0::2] = l2_out_pix_land[0::2] + w_start # x
74 | l2_out_pix_land[1::2] = l2_out_pix_land[1::2] + h_start # y
75 |
76 | # -------------------------------------------------------------------- draw img
77 | raw_land = list(line.split())[1:2*n_p+1]
78 | draw_img = tools.drawpoints_0(draw_img, raw_land)
79 | draw_img = tools.drawpoints_1(draw_img, l1_out_land)
80 | draw_img = tools.drawpoints_2(draw_img, l2_out_pix_land)
81 |
82 | # --------------------------------------------------------------------- output img
83 | draw_img_path = draw_img_flod + img_name
84 | tools.makedir(draw_img_flod)
85 | cv2.imwrite(draw_img_path,draw_img)
86 |
87 |
--------------------------------------------------------------------------------
/3_demo/Data/demo.txt:
--------------------------------------------------------------------------------
1 | 000054.jpg 945 340 1038 340 988 397 941 423 1042 426
2 | 000133.jpg 271 150 332 150 302 195 269 210 328 212
3 | 000167.jpg 202 97 224 105 220 118 197 130 215 131
4 | 000275.jpg 157 115 209 107 184 142 175 168 220 158
5 | 000335.jpg 111 196 183 196 150 248 115 293 185 292
6 | 000765.jpg 326 153 382 145 358 185 336 205 376 199
7 | 001102.jpg 189 163 240 140 221 187 222 213 259 196
8 | 001557.jpg 96 86 135 86 127 111 99 134 131 132
--------------------------------------------------------------------------------
/3_demo/Data/img/000054.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/img/000054.jpg
--------------------------------------------------------------------------------
/3_demo/Data/img/000133.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/img/000133.jpg
--------------------------------------------------------------------------------
/3_demo/Data/img/000167.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/img/000167.jpg
--------------------------------------------------------------------------------
/3_demo/Data/img/000275.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/img/000275.jpg
--------------------------------------------------------------------------------
/3_demo/Data/img/000335.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/img/000335.jpg
--------------------------------------------------------------------------------
/3_demo/Data/img/000765.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/img/000765.jpg
--------------------------------------------------------------------------------
/3_demo/Data/img/001102.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/img/001102.jpg
--------------------------------------------------------------------------------
/3_demo/Data/img/001557.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/img/001557.jpg
--------------------------------------------------------------------------------
/3_demo/Data/l1_deploy.prototxt:
--------------------------------------------------------------------------------
1 | name: "level_1"
2 | input: "data"
3 | input_shape { dim: 1 dim: 3 dim: 48 dim: 48 }
4 |
5 |
6 | #------------------------------------------- conv1 input: 48*48*3 output: 24*24*16
7 | layer {
8 | name: "conv1_new"
9 | type: "Convolution"
10 | bottom: "data"
11 | top: "conv1_new"
12 | param {
13 | lr_mult: 1
14 | decay_mult: 1
15 | }
16 | convolution_param {
17 | num_output: 16
18 | bias_term: false
19 | pad: 1
20 | kernel_size: 3
21 | stride: 2
22 | weight_filler {
23 | type: "msra"
24 | }
25 | }
26 | }
27 | layer {
28 | name: "conv1/bn_new"
29 | type: "BatchNorm"
30 | bottom: "conv1_new"
31 | top: "conv1_new"
32 | param {
33 | lr_mult: 0
34 | decay_mult: 0
35 | }
36 | param {
37 | lr_mult: 0
38 | decay_mult: 0
39 | }
40 | param {
41 | lr_mult: 0
42 | decay_mult: 0
43 | }
44 | }
45 | layer {
46 | name: "conv1/scale_new"
47 | type: "Scale"
48 | bottom: "conv1_new"
49 | top: "conv1_new"
50 | scale_param {
51 | filler {
52 | value: 1
53 | }
54 | bias_term: true
55 | bias_filler {
56 | value: 0
57 | }
58 | }
59 | }
60 | layer {
61 | name: "relu1_new"
62 | type: "ReLU"
63 | bottom: "conv1_new"
64 | top: "conv1_new"
65 | }
66 |
67 | #----------- bottleneck_1 1_1(s=2) 1_2(s=1) input: 24*24*16 output: 12*12*24
68 |
69 | layer {
70 | name: "conv1_1/in/pw_new"
71 | type: "Convolution"
72 | bottom: "conv1_new"
73 | top: "conv1_1/in/pw_new"
74 | param {
75 | lr_mult: 1
76 | decay_mult: 1
77 | }
78 | convolution_param {
79 | num_output: 96
80 | bias_term: false
81 | pad: 0
82 | kernel_size: 1
83 | engine: CAFFE
84 | stride: 1
85 | weight_filler {
86 | type: "msra"
87 | }
88 | }
89 | }
90 | layer {
91 | name: "conv1_1/in/pw/bn_new"
92 | type: "BatchNorm"
93 | bottom: "conv1_1/in/pw_new"
94 | top: "conv1_1/in/pw_new"
95 | param {
96 | lr_mult: 0
97 | decay_mult: 0
98 | }
99 | param {
100 | lr_mult: 0
101 | decay_mult: 0
102 | }
103 | param {
104 | lr_mult: 0
105 | decay_mult: 0
106 | }
107 | }
108 | layer {
109 | name: "conv1_1/in/pw/scale_new"
110 | type: "Scale"
111 | bottom: "conv1_1/in/pw_new"
112 | top: "conv1_1/in/pw_new"
113 | scale_param {
114 | filler {
115 | value: 1
116 | }
117 | bias_term: true
118 | bias_filler {
119 | value: 0
120 | }
121 | }
122 | }
123 | layer {
124 | name: "relu1_1/in/pw_new"
125 | type: "ReLU"
126 | bottom: "conv1_1/in/pw_new"
127 | top: "conv1_1/in/pw_new"
128 | }
129 |
130 |
131 |
132 | # 1_1 dw conv
133 | layer {
134 | name: "conv1_1/dw_new"
135 | type: "ConvolutionDepthwise"
136 | bottom: "conv1_1/in/pw_new"
137 | top: "conv1_1/dw_new"
138 | param {
139 | lr_mult: 1
140 | decay_mult: 0
141 | }
142 | convolution_param {
143 | num_output: 96
144 | bias_term: false
145 | pad: 1
146 | kernel_size: 3
147 | engine: CAFFE
148 | stride: 2
149 | weight_filler {
150 | type: "msra"
151 | }
152 | }
153 | }
154 | layer {
155 | name: "conv1_1/dw/bn_new"
156 | type: "BatchNorm"
157 | bottom: "conv1_1/dw_new"
158 | top: "conv1_1/dw_new"
159 | param {
160 | lr_mult: 0
161 | decay_mult: 0
162 | }
163 | param {
164 | lr_mult: 0
165 | decay_mult: 0
166 | }
167 | param {
168 | lr_mult: 0
169 | decay_mult: 0
170 | }
171 | }
172 | layer {
173 | name: "conv1_1/dw/scale_new"
174 | type: "Scale"
175 | bottom: "conv1_1/dw_new"
176 | top: "conv1_1/dw_new"
177 | scale_param {
178 | filler {
179 | value: 1
180 | }
181 | bias_term: true
182 | bias_filler {
183 | value: 0
184 | }
185 | }
186 | }
187 | layer {
188 | name: "relu1_1/dw_new"
189 | type: "ReLU"
190 | bottom: "conv1_1/dw_new"
191 | top: "conv1_1/dw_new"
192 | }
193 |
194 | # 1_1 out
195 | layer {
196 | name: "conv1_1/out/pw_new"
197 | type: "Convolution"
198 | bottom: "conv1_1/dw_new"
199 | top: "conv1_1/out/pw_new"
200 | param {
201 | lr_mult: 1
202 | decay_mult: 1
203 | }
204 | convolution_param {
205 | num_output: 24
206 | bias_term: false
207 | pad: 0
208 | kernel_size: 1
209 | engine: CAFFE
210 | stride: 1
211 | weight_filler {
212 | type: "msra"
213 | }
214 | }
215 | }
216 | layer {
217 | name: "conv1_1/out/pw/bn_new"
218 | type: "BatchNorm"
219 | bottom: "conv1_1/out/pw_new"
220 | top: "conv1_1/out/pw_new"
221 | param {
222 | lr_mult: 0
223 | decay_mult: 0
224 | }
225 | param {
226 | lr_mult: 0
227 | decay_mult: 0
228 | }
229 | param {
230 | lr_mult: 0
231 | decay_mult: 0
232 | }
233 | }
234 | layer {
235 | name: "conv1_1/out/pw/scale_new"
236 | type: "Scale"
237 | bottom: "conv1_1/out/pw_new"
238 | top: "conv1_1/out/pw_new"
239 | scale_param {
240 | filler {
241 | value: 1
242 | }
243 | bias_term: true
244 | bias_filler {
245 | value: 0
246 | }
247 | }
248 | }
249 | # 1_2 in
250 |
251 | layer {
252 | name: "conv1_2/in/pw_new"
253 | type: "Convolution"
254 | bottom: "conv1_1/out/pw_new"
255 | top: "conv1_2/in/pw_new"
256 | param {
257 | lr_mult: 1
258 | decay_mult: 1
259 | }
260 | convolution_param {
261 | num_output: 144
262 | bias_term: false
263 | pad: 0
264 | kernel_size: 1
265 | engine: CAFFE
266 | stride: 1
267 | weight_filler {
268 | type: "msra"
269 | }
270 | }
271 | }
272 | layer {
273 | name: "conv1_2/in/pw/bn_new"
274 | type: "BatchNorm"
275 | bottom: "conv1_2/in/pw_new"
276 | top: "conv1_2/in/pw_new"
277 | param {
278 | lr_mult: 0
279 | decay_mult: 0
280 | }
281 | param {
282 | lr_mult: 0
283 | decay_mult: 0
284 | }
285 | param {
286 | lr_mult: 0
287 | decay_mult: 0
288 | }
289 | }
290 | layer {
291 | name: "conv1_2/in/pw/scale_new"
292 | type: "Scale"
293 | bottom: "conv1_2/in/pw_new"
294 | top: "conv1_2/in/pw_new"
295 | scale_param {
296 | filler {
297 | value: 1
298 | }
299 | bias_term: true
300 | bias_filler {
301 | value: 0
302 | }
303 | }
304 | }
305 | layer {
306 | name: "relu1_2/in/pw_new"
307 | type: "ReLU"
308 | bottom: "conv1_2/in/pw_new"
309 | top: "conv1_2/in/pw_new"
310 | }
311 |
312 | # 1_2 dw
313 |
314 | layer {
315 | name: "conv1_2/dw_new"
316 | type: "ConvolutionDepthwise"
317 | bottom: "conv1_2/in/pw_new"
318 | top: "conv1_2/dw_new"
319 | param {
320 | lr_mult: 1
321 | decay_mult: 0
322 | }
323 | convolution_param {
324 | num_output: 144
325 | bias_term: false
326 | pad: 1
327 | kernel_size: 3
328 | engine: CAFFE
329 | stride: 1
330 | weight_filler {
331 | type: "msra"
332 | }
333 | }
334 | }
335 | layer {
336 | name: "conv1_2/dw/bn_new"
337 | type: "BatchNorm"
338 | bottom: "conv1_2/dw_new"
339 | top: "conv1_2/dw_new"
340 | param {
341 | lr_mult: 0
342 | decay_mult: 0
343 | }
344 | param {
345 | lr_mult: 0
346 | decay_mult: 0
347 | }
348 | param {
349 | lr_mult: 0
350 | decay_mult: 0
351 | }
352 | }
353 | layer {
354 | name: "conv1_2/dw/scale_new"
355 | type: "Scale"
356 | bottom: "conv1_2/dw_new"
357 | top: "conv1_2/dw_new"
358 | scale_param {
359 | filler {
360 | value: 1
361 | }
362 | bias_term: true
363 | bias_filler {
364 | value: 0
365 | }
366 | }
367 | }
368 | layer {
369 | name: "relu1_2/dw_new"
370 | type: "ReLU"
371 | bottom: "conv1_2/dw_new"
372 | top: "conv1_2/dw_new"
373 | }
374 |
375 | # 1_2 out 12*12*24
376 | layer {
377 | name: "conv1_2/out/pw_new"
378 | type: "Convolution"
379 | bottom: "conv1_2/dw_new"
380 | top: "conv1_2/out/pw_new"
381 | param {
382 | lr_mult: 1
383 | decay_mult: 1
384 | }
385 | convolution_param {
386 | num_output: 24
387 | bias_term: false
388 | pad: 0
389 | kernel_size: 1
390 | engine: CAFFE
391 | stride: 1
392 | weight_filler {
393 | type: "msra"
394 | }
395 | }
396 | }
397 | layer {
398 | name: "conv1_2/out/pw/bn_new"
399 | type: "BatchNorm"
400 | bottom: "conv1_2/out/pw_new"
401 | top: "conv1_2/out/pw_new"
402 | param {
403 | lr_mult: 0
404 | decay_mult: 0
405 | }
406 | param {
407 | lr_mult: 0
408 | decay_mult: 0
409 | }
410 | param {
411 | lr_mult: 0
412 | decay_mult: 0
413 | }
414 | }
415 | layer {
416 | name: "conv1_2/out/pw/scale_new"
417 | type: "Scale"
418 | bottom: "conv1_2/out/pw_new"
419 | top: "conv1_2/out/pw_new"
420 | scale_param {
421 | filler {
422 | value: 1
423 | }
424 | bias_term: true
425 | bias_filler {
426 | value: 0
427 | }
428 | }
429 | }
430 | layer {
431 | name: "fuse_conv1_2"
432 | type: "Eltwise"
433 | bottom: "conv1_1/out/pw_new"
434 | bottom: "conv1_2/out/pw_new"
435 | top: "fuse_conv1_2"
436 | eltwise_param {
437 | operation: SUM
438 | }
439 | }
440 |
441 | #--------------------------bottleneck2 2_1(s=2) 2_2(s=1) input: 12*12*24 output: 6*6*32
442 | # 2_1 in
443 | layer {
444 | name: "conv2_1/in/pw_new"
445 | type: "Convolution"
446 | bottom: "fuse_conv1_2"
447 | top: "conv2_1/in/pw_new"
448 | param {
449 | lr_mult: 1
450 | decay_mult: 1
451 | }
452 | convolution_param {
453 | num_output: 144
454 | bias_term: false
455 | pad: 0
456 | kernel_size: 1
457 | engine: CAFFE
458 | stride: 1
459 | weight_filler {
460 | type: "msra"
461 | }
462 | }
463 | }
464 | layer {
465 | name: "conv2_1/in/pw/bn_new"
466 | type: "BatchNorm"
467 | bottom: "conv2_1/in/pw_new"
468 | top: "conv2_1/in/pw_new"
469 | param {
470 | lr_mult: 0
471 | decay_mult: 0
472 | }
473 | param {
474 | lr_mult: 0
475 | decay_mult: 0
476 | }
477 | param {
478 | lr_mult: 0
479 | decay_mult: 0
480 | }
481 | }
482 | layer {
483 | name: "conv2_1/in/pw/scale_new"
484 | type: "Scale"
485 | bottom: "conv2_1/in/pw_new"
486 | top: "conv2_1/in/pw_new"
487 | scale_param {
488 | filler {
489 | value: 1
490 | }
491 | bias_term: true
492 | bias_filler {
493 | value: 0
494 | }
495 | }
496 | }
497 | layer {
498 | name: "relu2_1/in/pw_new"
499 | type: "ReLU"
500 | bottom: "conv2_1/in/pw_new"
501 | top: "conv2_1/in/pw_new"
502 | }
503 |
504 | # 2_1 dw
505 | layer {
506 | name: "conv2_1/dw_new"
507 | type: "ConvolutionDepthwise"
508 | bottom: "conv2_1/in/pw_new"
509 | top: "conv2_1/dw_new"
510 | param {
511 | lr_mult: 1
512 | decay_mult: 0
513 | }
514 | convolution_param {
515 | num_output: 144
516 | bias_term: false
517 | pad: 1
518 | kernel_size: 3
519 | engine: CAFFE
520 | stride: 2
521 | weight_filler {
522 | type: "msra"
523 | }
524 | }
525 | }
526 | layer {
527 | name: "conv2_1/dw/bn_new"
528 | type: "BatchNorm"
529 | bottom: "conv2_1/dw_new"
530 | top: "conv2_1/dw_new"
531 | param {
532 | lr_mult: 0
533 | decay_mult: 0
534 | }
535 | param {
536 | lr_mult: 0
537 | decay_mult: 0
538 | }
539 | param {
540 | lr_mult: 0
541 | decay_mult: 0
542 | }
543 | }
544 | layer {
545 | name: "conv2_1/dw/scale_new"
546 | type: "Scale"
547 | bottom: "conv2_1/dw_new"
548 | top: "conv2_1/dw_new"
549 | scale_param {
550 | filler {
551 | value: 1
552 | }
553 | bias_term: true
554 | bias_filler {
555 | value: 0
556 | }
557 | }
558 | }
559 | layer {
560 | name: "relu2_1/dw_new"
561 | type: "ReLU"
562 | bottom: "conv2_1/dw_new"
563 | top: "conv2_1/dw_new"
564 | }
565 |
566 | # 2_1 out
567 | layer {
568 | name: "conv2_1/out/pw_new"
569 | type: "Convolution"
570 | bottom: "conv2_1/dw_new"
571 | top: "conv2_1/out/pw_new"
572 | param {
573 | lr_mult: 1
574 | decay_mult: 1
575 | }
576 | convolution_param {
577 | num_output: 32
578 | bias_term: false
579 | pad: 0
580 | kernel_size: 1
581 | engine: CAFFE
582 | stride: 1
583 | weight_filler {
584 | type: "msra"
585 | }
586 | }
587 | }
588 | layer {
589 | name: "conv2_1/out/pw/bn_new"
590 | type: "BatchNorm"
591 | bottom: "conv2_1/out/pw_new"
592 | top: "conv2_1/out/pw_new"
593 | param {
594 | lr_mult: 0
595 | decay_mult: 0
596 | }
597 | param {
598 | lr_mult: 0
599 | decay_mult: 0
600 | }
601 | param {
602 | lr_mult: 0
603 | decay_mult: 0
604 | }
605 | }
606 | layer {
607 | name: "conv2_1/out/pw/scale_new"
608 | type: "Scale"
609 | bottom: "conv2_1/out/pw_new"
610 | top: "conv2_1/out/pw_new"
611 | scale_param {
612 | filler {
613 | value: 1
614 | }
615 | bias_term: true
616 | bias_filler {
617 | value: 0
618 | }
619 | }
620 | }
621 |
622 | # 2_2 in
623 |
624 | layer {
625 | name: "conv2_2/in/pw_new"
626 | type: "Convolution"
627 | bottom: "conv2_1/out/pw_new"
628 | top: "conv2_2/in/pw_new"
629 | param {
630 | lr_mult: 1
631 | decay_mult: 1
632 | }
633 | convolution_param {
634 | num_output: 192
635 | bias_term: false
636 | pad: 0
637 | kernel_size: 1
638 | engine: CAFFE
639 | stride: 1
640 | weight_filler {
641 | type: "msra"
642 | }
643 | }
644 | }
645 | layer {
646 | name: "conv2_2/in/pw/bn_new"
647 | type: "BatchNorm"
648 | bottom: "conv2_2/in/pw_new"
649 | top: "conv2_2/in/pw_new"
650 | param {
651 | lr_mult: 0
652 | decay_mult: 0
653 | }
654 | param {
655 | lr_mult: 0
656 | decay_mult: 0
657 | }
658 | param {
659 | lr_mult: 0
660 | decay_mult: 0
661 | }
662 | }
663 | layer {
664 | name: "conv2_2/in/pw/scale_new"
665 | type: "Scale"
666 | bottom: "conv2_2/in/pw_new"
667 | top: "conv2_2/in/pw_new"
668 | scale_param {
669 | filler {
670 | value: 1
671 | }
672 | bias_term: true
673 | bias_filler {
674 | value: 0
675 | }
676 | }
677 | }
678 | layer {
679 | name: "relu2_2/in/pw_new"
680 | type: "ReLU"
681 | bottom: "conv2_2/in/pw_new"
682 | top: "conv2_2/in/pw_new"
683 | }
684 |
685 | # 2_2 dw
686 | layer {
687 | name: "conv2_2/dw_new"
688 | type: "ConvolutionDepthwise"
689 | bottom: "conv2_2/in/pw_new"
690 | top: "conv2_2/dw_new"
691 | param {
692 | lr_mult: 1
693 | decay_mult: 0
694 | }
695 | convolution_param {
696 | num_output: 192
697 | bias_term: false
698 | pad: 1
699 | kernel_size: 3
700 | engine: CAFFE
701 | stride: 1
702 | weight_filler {
703 | type: "msra"
704 | }
705 | }
706 | }
707 | layer {
708 | name: "conv2_2/dw/bn_new"
709 | type: "BatchNorm"
710 | bottom: "conv2_2/dw_new"
711 | top: "conv2_2/dw_new"
712 | param {
713 | lr_mult: 0
714 | decay_mult: 0
715 | }
716 | param {
717 | lr_mult: 0
718 | decay_mult: 0
719 | }
720 | param {
721 | lr_mult: 0
722 | decay_mult: 0
723 | }
724 | }
725 | layer {
726 | name: "conv2_2/dw/scale_new"
727 | type: "Scale"
728 | bottom: "conv2_2/dw_new"
729 | top: "conv2_2/dw_new"
730 | scale_param {
731 | filler {
732 | value: 1
733 | }
734 | bias_term: true
735 | bias_filler {
736 | value: 0
737 | }
738 | }
739 | }
740 | layer {
741 | name: "relu2_2/dw_new"
742 | type: "ReLU"
743 | bottom: "conv2_2/dw_new"
744 | top: "conv2_2/dw_new"
745 | }
746 |
747 |
748 | # 2_2 out
749 |
750 | layer {
751 | name: "conv2_2/out/pw_new"
752 | type: "Convolution"
753 | bottom: "conv2_2/dw_new"
754 | top: "conv2_2/out/pw_new"
755 | param {
756 | lr_mult: 1
757 | decay_mult: 1
758 | }
759 | convolution_param {
760 | num_output: 32
761 | bias_term: false
762 | pad: 0
763 | kernel_size: 1
764 | engine: CAFFE
765 | stride: 1
766 | weight_filler {
767 | type: "msra"
768 | }
769 | }
770 | }
771 | layer {
772 | name: "conv2_2/out/pw/bn_new"
773 | type: "BatchNorm"
774 | bottom: "conv2_2/out/pw_new"
775 | top: "conv2_2/out/pw_new"
776 | param {
777 | lr_mult: 0
778 | decay_mult: 0
779 | }
780 | param {
781 | lr_mult: 0
782 | decay_mult: 0
783 | }
784 | param {
785 | lr_mult: 0
786 | decay_mult: 0
787 | }
788 | }
789 | layer {
790 | name: "conv2_2/out/pw/scale_new"
791 | type: "Scale"
792 | bottom: "conv2_2/out/pw_new"
793 | top: "conv2_2/out/pw_new"
794 | scale_param {
795 | filler {
796 | value: 1
797 | }
798 | bias_term: true
799 | bias_filler {
800 | value: 0
801 | }
802 | }
803 | }
804 | layer {
805 | name: "fuse_conv2_2"
806 | type: "Eltwise"
807 | bottom: "conv2_1/out/pw_new"
808 | bottom: "conv2_2/out/pw_new"
809 | top: "fuse_conv2_2"
810 | eltwise_param {
811 | operation: SUM
812 | }
813 | }
814 |
815 | #--------------------------bottleneck3 3_1(s=2) 3_2(s=1) input: 6*6*32 output: 3*3*64
816 | # 3_1 in
817 | layer {
818 | name: "conv3_1/in/pw_new"
819 | type: "Convolution"
820 | bottom: "fuse_conv2_2"
821 | top: "conv3_1/in/pw_new"
822 | param {
823 | lr_mult: 1
824 | decay_mult: 1
825 | }
826 | convolution_param {
827 | num_output: 192
828 | bias_term: false
829 | pad: 0
830 | kernel_size: 1
831 | engine: CAFFE
832 | stride: 1
833 | weight_filler {
834 | type: "msra"
835 | }
836 | }
837 | }
838 | layer {
839 | name: "conv3_1/in/pw/bn_new"
840 | type: "BatchNorm"
841 | bottom: "conv3_1/in/pw_new"
842 | top: "conv3_1/in/pw_new"
843 | param {
844 | lr_mult: 0
845 | decay_mult: 0
846 | }
847 | param {
848 | lr_mult: 0
849 | decay_mult: 0
850 | }
851 | param {
852 | lr_mult: 0
853 | decay_mult: 0
854 | }
855 | }
856 | layer {
857 | name: "conv3_1/in/pw/scale_new"
858 | type: "Scale"
859 | bottom: "conv3_1/in/pw_new"
860 | top: "conv3_1/in/pw_new"
861 | scale_param {
862 | filler {
863 | value: 1
864 | }
865 | bias_term: true
866 | bias_filler {
867 | value: 0
868 | }
869 | }
870 | }
871 | layer {
872 | name: "relu3_1/in/pw_new"
873 | type: "ReLU"
874 | bottom: "conv3_1/in/pw_new"
875 | top: "conv3_1/in/pw_new"
876 | }
877 |
878 | # 3_1 dw
879 | layer {
880 | name: "conv3_1/dw_new"
881 | type: "ConvolutionDepthwise"
882 | bottom: "conv3_1/in/pw_new"
883 | top: "conv3_1/dw_new"
884 | param {
885 | lr_mult: 1
886 | decay_mult: 0
887 | }
888 | convolution_param {
889 | num_output: 192
890 | bias_term: false
891 | pad: 1
892 | kernel_size: 3
893 | engine: CAFFE
894 | stride: 2
895 | weight_filler {
896 | type: "msra"
897 | }
898 | }
899 | }
900 | layer {
901 | name: "conv3_1/dw/bn_new"
902 | type: "BatchNorm"
903 | bottom: "conv3_1/dw_new"
904 | top: "conv3_1/dw_new"
905 | param {
906 | lr_mult: 0
907 | decay_mult: 0
908 | }
909 | param {
910 | lr_mult: 0
911 | decay_mult: 0
912 | }
913 | param {
914 | lr_mult: 0
915 | decay_mult: 0
916 | }
917 | }
918 | layer {
919 | name: "conv3_1/dw/scale_new"
920 | type: "Scale"
921 | bottom: "conv3_1/dw_new"
922 | top: "conv3_1/dw_new"
923 | scale_param {
924 | filler {
925 | value: 1
926 | }
927 | bias_term: true
928 | bias_filler {
929 | value: 0
930 | }
931 | }
932 | }
933 | layer {
934 | name: "relu3_1/dw_new"
935 | type: "ReLU"
936 | bottom: "conv3_1/dw_new"
937 | top: "conv3_1/dw_new"
938 | }
939 |
940 | # 3_1 out
941 | layer {
942 | name: "conv3_1/out/pw_new"
943 | type: "Convolution"
944 | bottom: "conv3_1/dw_new"
945 | top: "conv3_1/out/pw_new"
946 | param {
947 | lr_mult: 1
948 | decay_mult: 1
949 | }
950 | convolution_param {
951 | num_output: 64
952 | bias_term: false
953 | pad: 0
954 | kernel_size: 1
955 | engine: CAFFE
956 | stride: 1
957 | weight_filler {
958 | type: "msra"
959 | }
960 | }
961 | }
962 | layer {
963 | name: "conv3_1/out/pw/bn_new"
964 | type: "BatchNorm"
965 | bottom: "conv3_1/out/pw_new"
966 | top: "conv3_1/out/pw_new"
967 | param {
968 | lr_mult: 0
969 | decay_mult: 0
970 | }
971 | param {
972 | lr_mult: 0
973 | decay_mult: 0
974 | }
975 | param {
976 | lr_mult: 0
977 | decay_mult: 0
978 | }
979 | }
980 | layer {
981 | name: "conv3_1/out/pw/scale_new"
982 | type: "Scale"
983 | bottom: "conv3_1/out/pw_new"
984 | top: "conv3_1/out/pw_new"
985 | scale_param {
986 | filler {
987 | value: 1
988 | }
989 | bias_term: true
990 | bias_filler {
991 | value: 0
992 | }
993 | }
994 | }
995 |
996 | # 3_2 in
997 |
998 | layer {
999 | name: "conv3_2/in/pw_new"
1000 | type: "Convolution"
1001 | bottom: "conv3_1/out/pw_new"
1002 | top: "conv3_2/in/pw_new"
1003 | param {
1004 | lr_mult: 1
1005 | decay_mult: 1
1006 | }
1007 | convolution_param {
1008 | num_output: 192
1009 | bias_term: false
1010 | pad: 0
1011 | kernel_size: 1
1012 | engine: CAFFE
1013 | stride: 1
1014 | weight_filler {
1015 | type: "msra"
1016 | }
1017 | }
1018 | }
1019 | layer {
1020 | name: "conv3_2/in/pw/bn_new"
1021 | type: "BatchNorm"
1022 | bottom: "conv3_2/in/pw_new"
1023 | top: "conv3_2/in/pw_new"
1024 | param {
1025 | lr_mult: 0
1026 | decay_mult: 0
1027 | }
1028 | param {
1029 | lr_mult: 0
1030 | decay_mult: 0
1031 | }
1032 | param {
1033 | lr_mult: 0
1034 | decay_mult: 0
1035 | }
1036 | }
1037 | layer {
1038 | name: "conv3_2/in/pw/scale_new"
1039 | type: "Scale"
1040 | bottom: "conv3_2/in/pw_new"
1041 | top: "conv3_2/in/pw_new"
1042 | scale_param {
1043 | filler {
1044 | value: 1
1045 | }
1046 | bias_term: true
1047 | bias_filler {
1048 | value: 0
1049 | }
1050 | }
1051 | }
1052 | layer {
1053 | name: "relu3_2/in/pw_new"
1054 | type: "ReLU"
1055 | bottom: "conv3_2/in/pw_new"
1056 | top: "conv3_2/in/pw_new"
1057 | }
1058 |
1059 | # 3_2 dw
1060 | layer {
1061 | name: "conv3_2/dw_new"
1062 | type: "ConvolutionDepthwise"
1063 | bottom: "conv3_2/in/pw_new"
1064 | top: "conv3_2/dw_new"
1065 | param {
1066 | lr_mult: 1
1067 | decay_mult: 0
1068 | }
1069 | convolution_param {
1070 | num_output: 192
1071 | bias_term: false
1072 | pad: 1
1073 | kernel_size: 3
1074 | engine: CAFFE
1075 | stride: 1
1076 | weight_filler {
1077 | type: "msra"
1078 | }
1079 | }
1080 | }
1081 | layer {
1082 | name: "conv3_2/dw/bn_new"
1083 | type: "BatchNorm"
1084 | bottom: "conv3_2/dw_new"
1085 | top: "conv3_2/dw_new"
1086 | param {
1087 | lr_mult: 0
1088 | decay_mult: 0
1089 | }
1090 | param {
1091 | lr_mult: 0
1092 | decay_mult: 0
1093 | }
1094 | param {
1095 | lr_mult: 0
1096 | decay_mult: 0
1097 | }
1098 | }
1099 | layer {
1100 | name: "conv3_2/dw/scale_new"
1101 | type: "Scale"
1102 | bottom: "conv3_2/dw_new"
1103 | top: "conv3_2/dw_new"
1104 | scale_param {
1105 | filler {
1106 | value: 1
1107 | }
1108 | bias_term: true
1109 | bias_filler {
1110 | value: 0
1111 | }
1112 | }
1113 | }
1114 | layer {
1115 | name: "relu3_2/dw_new"
1116 | type: "ReLU"
1117 | bottom: "conv3_2/dw_new"
1118 | top: "conv3_2/dw_new"
1119 | }
1120 |
1121 |
1122 | # 3_2 out
1123 |
1124 | layer {
1125 | name: "conv3_2/out/pw_new"
1126 | type: "Convolution"
1127 | bottom: "conv3_2/dw_new"
1128 | top: "conv3_2/out/pw_new"
1129 | param {
1130 | lr_mult: 1
1131 | decay_mult: 1
1132 | }
1133 | convolution_param {
1134 | num_output: 64
1135 | bias_term: false
1136 | pad: 0
1137 | kernel_size: 1
1138 | engine: CAFFE
1139 | stride: 1
1140 | weight_filler {
1141 | type: "msra"
1142 | }
1143 | }
1144 | }
1145 | layer {
1146 | name: "conv3_2/out/pw/bn_new"
1147 | type: "BatchNorm"
1148 | bottom: "conv3_2/out/pw_new"
1149 | top: "conv3_2/out/pw_new"
1150 | param {
1151 | lr_mult: 0
1152 | decay_mult: 0
1153 | }
1154 | param {
1155 | lr_mult: 0
1156 | decay_mult: 0
1157 | }
1158 | param {
1159 | lr_mult: 0
1160 | decay_mult: 0
1161 | }
1162 | }
1163 | layer {
1164 | name: "conv3_2/out/pw/scale_new"
1165 | type: "Scale"
1166 | bottom: "conv3_2/out/pw_new"
1167 | top: "conv3_2/out/pw_new"
1168 | scale_param {
1169 | filler {
1170 | value: 1
1171 | }
1172 | bias_term: true
1173 | bias_filler {
1174 | value: 0
1175 | }
1176 | }
1177 | }
1178 | layer {
1179 | name: "fuse_conv3_2"
1180 | type: "Eltwise"
1181 | bottom: "conv3_1/out/pw_new"
1182 | bottom: "conv3_2/out/pw_new"
1183 | top: "fuse_conv3_2"
1184 | eltwise_param {
1185 | operation: SUM
1186 | }
1187 | }
1188 |
1189 |
1190 |
1191 |
1192 |
1193 |
1194 |
1195 | #------------------------- fc1
1196 | layer {
1197 | name: "fc1"
1198 | type: "InnerProduct"
1199 | bottom: "fuse_conv3_2"
1200 | top: "fc1"
1201 | param {
1202 | lr_mult: 1
1203 | decay_mult: 1
1204 | }
1205 | param {
1206 | lr_mult: 2
1207 | decay_mult: 1
1208 | }
1209 | inner_product_param {
1210 | num_output: 256
1211 | weight_filler {
1212 | type: "gaussian"
1213 | std: 0.01
1214 | }
1215 | bias_filler {
1216 | type: "constant"
1217 | value: 0
1218 | }
1219 | }
1220 | }
1221 | layer {
1222 | name: "relu_fc1"
1223 | type: "ReLU"
1224 | bottom: "fc1"
1225 | top: "fc1"
1226 | }
1227 | layer {
1228 | name: "drop_fc1"
1229 | type: "Dropout"
1230 | bottom: "fc1"
1231 | top: "fc1"
1232 | dropout_param{
1233 | dropout_ratio: 0.3
1234 | }
1235 | }
1236 |
1237 | #------------------------- fc2
1238 | layer {
1239 | name: "fc2"
1240 | type: "InnerProduct"
1241 | bottom: "fc1"
1242 | top: "fc2"
1243 | param {
1244 | lr_mult: 1
1245 | decay_mult: 1
1246 | }
1247 | param {
1248 | lr_mult: 2
1249 | decay_mult: 1
1250 | }
1251 | inner_product_param {
1252 | num_output: 10
1253 | weight_filler {
1254 | type: "gaussian"
1255 | std: 0.01
1256 | }
1257 | bias_filler {
1258 | type: "constant"
1259 | value: 0
1260 | }
1261 | }
1262 | }
--------------------------------------------------------------------------------
/3_demo/Data/l1_net.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/l1_net.caffemodel
--------------------------------------------------------------------------------
/3_demo/Data/l2_deploy.prototxt:
--------------------------------------------------------------------------------
1 | name: "level_2"
2 | input: "data"
3 | input_shape { dim: 1 dim: 3 dim: 48 dim: 48 }
4 |
5 |
6 | #------------------------------------------- conv1 input: 48*48*3 output: 24*24*16
7 | layer {
8 | name: "conv1_new"
9 | type: "Convolution"
10 | bottom: "data"
11 | top: "conv1_new"
12 | param {
13 | lr_mult: 1
14 | decay_mult: 1
15 | }
16 | convolution_param {
17 | num_output: 16
18 | bias_term: false
19 | pad: 1
20 | kernel_size: 3
21 | stride: 2
22 | weight_filler {
23 | type: "msra"
24 | }
25 | }
26 | }
27 | layer {
28 | name: "conv1/bn_new"
29 | type: "BatchNorm"
30 | bottom: "conv1_new"
31 | top: "conv1_new"
32 | param {
33 | lr_mult: 0
34 | decay_mult: 0
35 | }
36 | param {
37 | lr_mult: 0
38 | decay_mult: 0
39 | }
40 | param {
41 | lr_mult: 0
42 | decay_mult: 0
43 | }
44 | }
45 | layer {
46 | name: "conv1/scale_new"
47 | type: "Scale"
48 | bottom: "conv1_new"
49 | top: "conv1_new"
50 | scale_param {
51 | filler {
52 | value: 1
53 | }
54 | bias_term: true
55 | bias_filler {
56 | value: 0
57 | }
58 | }
59 | }
60 | layer {
61 | name: "relu1_new"
62 | type: "ReLU"
63 | bottom: "conv1_new"
64 | top: "conv1_new"
65 | }
66 |
67 | #----------- bottleneck_1 1_1(s=2) 1_2(s=1) input: 24*24*16 output: 12*12*24
68 |
69 | layer {
70 | name: "conv1_1/in/pw_new"
71 | type: "Convolution"
72 | bottom: "conv1_new"
73 | top: "conv1_1/in/pw_new"
74 | param {
75 | lr_mult: 1
76 | decay_mult: 1
77 | }
78 | convolution_param {
79 | num_output: 96
80 | bias_term: false
81 | pad: 0
82 | kernel_size: 1
83 | engine: CAFFE
84 | stride: 1
85 | weight_filler {
86 | type: "msra"
87 | }
88 | }
89 | }
90 | layer {
91 | name: "conv1_1/in/pw/bn_new"
92 | type: "BatchNorm"
93 | bottom: "conv1_1/in/pw_new"
94 | top: "conv1_1/in/pw_new"
95 | param {
96 | lr_mult: 0
97 | decay_mult: 0
98 | }
99 | param {
100 | lr_mult: 0
101 | decay_mult: 0
102 | }
103 | param {
104 | lr_mult: 0
105 | decay_mult: 0
106 | }
107 | }
108 | layer {
109 | name: "conv1_1/in/pw/scale_new"
110 | type: "Scale"
111 | bottom: "conv1_1/in/pw_new"
112 | top: "conv1_1/in/pw_new"
113 | scale_param {
114 | filler {
115 | value: 1
116 | }
117 | bias_term: true
118 | bias_filler {
119 | value: 0
120 | }
121 | }
122 | }
123 | layer {
124 | name: "relu1_1/in/pw_new"
125 | type: "ReLU"
126 | bottom: "conv1_1/in/pw_new"
127 | top: "conv1_1/in/pw_new"
128 | }
129 |
130 |
131 |
132 | # 1_1 dw conv
133 | layer {
134 | name: "conv1_1/dw_new"
135 | type: "ConvolutionDepthwise"
136 | bottom: "conv1_1/in/pw_new"
137 | top: "conv1_1/dw_new"
138 | param {
139 | lr_mult: 1
140 | decay_mult: 0
141 | }
142 | convolution_param {
143 | num_output: 96
144 | bias_term: false
145 | pad: 1
146 | kernel_size: 3
147 | engine: CAFFE
148 | stride: 2
149 | weight_filler {
150 | type: "msra"
151 | }
152 | }
153 | }
154 | layer {
155 | name: "conv1_1/dw/bn_new"
156 | type: "BatchNorm"
157 | bottom: "conv1_1/dw_new"
158 | top: "conv1_1/dw_new"
159 | param {
160 | lr_mult: 0
161 | decay_mult: 0
162 | }
163 | param {
164 | lr_mult: 0
165 | decay_mult: 0
166 | }
167 | param {
168 | lr_mult: 0
169 | decay_mult: 0
170 | }
171 | }
172 | layer {
173 | name: "conv1_1/dw/scale_new"
174 | type: "Scale"
175 | bottom: "conv1_1/dw_new"
176 | top: "conv1_1/dw_new"
177 | scale_param {
178 | filler {
179 | value: 1
180 | }
181 | bias_term: true
182 | bias_filler {
183 | value: 0
184 | }
185 | }
186 | }
187 | layer {
188 | name: "relu1_1/dw_new"
189 | type: "ReLU"
190 | bottom: "conv1_1/dw_new"
191 | top: "conv1_1/dw_new"
192 | }
193 |
194 | # 1_1 out
195 | layer {
196 | name: "conv1_1/out/pw_new"
197 | type: "Convolution"
198 | bottom: "conv1_1/dw_new"
199 | top: "conv1_1/out/pw_new"
200 | param {
201 | lr_mult: 1
202 | decay_mult: 1
203 | }
204 | convolution_param {
205 | num_output: 24
206 | bias_term: false
207 | pad: 0
208 | kernel_size: 1
209 | engine: CAFFE
210 | stride: 1
211 | weight_filler {
212 | type: "msra"
213 | }
214 | }
215 | }
216 | layer {
217 | name: "conv1_1/out/pw/bn_new"
218 | type: "BatchNorm"
219 | bottom: "conv1_1/out/pw_new"
220 | top: "conv1_1/out/pw_new"
221 | param {
222 | lr_mult: 0
223 | decay_mult: 0
224 | }
225 | param {
226 | lr_mult: 0
227 | decay_mult: 0
228 | }
229 | param {
230 | lr_mult: 0
231 | decay_mult: 0
232 | }
233 | }
234 | layer {
235 | name: "conv1_1/out/pw/scale_new"
236 | type: "Scale"
237 | bottom: "conv1_1/out/pw_new"
238 | top: "conv1_1/out/pw_new"
239 | scale_param {
240 | filler {
241 | value: 1
242 | }
243 | bias_term: true
244 | bias_filler {
245 | value: 0
246 | }
247 | }
248 | }
249 | # 1_2 in
250 |
251 | layer {
252 | name: "conv1_2/in/pw_new"
253 | type: "Convolution"
254 | bottom: "conv1_1/out/pw_new"
255 | top: "conv1_2/in/pw_new"
256 | param {
257 | lr_mult: 1
258 | decay_mult: 1
259 | }
260 | convolution_param {
261 | num_output: 144
262 | bias_term: false
263 | pad: 0
264 | kernel_size: 1
265 | engine: CAFFE
266 | stride: 1
267 | weight_filler {
268 | type: "msra"
269 | }
270 | }
271 | }
272 | layer {
273 | name: "conv1_2/in/pw/bn_new"
274 | type: "BatchNorm"
275 | bottom: "conv1_2/in/pw_new"
276 | top: "conv1_2/in/pw_new"
277 | param {
278 | lr_mult: 0
279 | decay_mult: 0
280 | }
281 | param {
282 | lr_mult: 0
283 | decay_mult: 0
284 | }
285 | param {
286 | lr_mult: 0
287 | decay_mult: 0
288 | }
289 | }
290 | layer {
291 | name: "conv1_2/in/pw/scale_new"
292 | type: "Scale"
293 | bottom: "conv1_2/in/pw_new"
294 | top: "conv1_2/in/pw_new"
295 | scale_param {
296 | filler {
297 | value: 1
298 | }
299 | bias_term: true
300 | bias_filler {
301 | value: 0
302 | }
303 | }
304 | }
305 | layer {
306 | name: "relu1_2/in/pw_new"
307 | type: "ReLU"
308 | bottom: "conv1_2/in/pw_new"
309 | top: "conv1_2/in/pw_new"
310 | }
311 |
312 | # 1_2 dw
313 |
314 | layer {
315 | name: "conv1_2/dw_new"
316 | type: "ConvolutionDepthwise"
317 | bottom: "conv1_2/in/pw_new"
318 | top: "conv1_2/dw_new"
319 | param {
320 | lr_mult: 1
321 | decay_mult: 0
322 | }
323 | convolution_param {
324 | num_output: 144
325 | bias_term: false
326 | pad: 1
327 | kernel_size: 3
328 | engine: CAFFE
329 | stride: 1
330 | weight_filler {
331 | type: "msra"
332 | }
333 | }
334 | }
335 | layer {
336 | name: "conv1_2/dw/bn_new"
337 | type: "BatchNorm"
338 | bottom: "conv1_2/dw_new"
339 | top: "conv1_2/dw_new"
340 | param {
341 | lr_mult: 0
342 | decay_mult: 0
343 | }
344 | param {
345 | lr_mult: 0
346 | decay_mult: 0
347 | }
348 | param {
349 | lr_mult: 0
350 | decay_mult: 0
351 | }
352 | }
353 | layer {
354 | name: "conv1_2/dw/scale_new"
355 | type: "Scale"
356 | bottom: "conv1_2/dw_new"
357 | top: "conv1_2/dw_new"
358 | scale_param {
359 | filler {
360 | value: 1
361 | }
362 | bias_term: true
363 | bias_filler {
364 | value: 0
365 | }
366 | }
367 | }
368 | layer {
369 | name: "relu1_2/dw_new"
370 | type: "ReLU"
371 | bottom: "conv1_2/dw_new"
372 | top: "conv1_2/dw_new"
373 | }
374 |
375 | # 1_2 out 12*12*24
376 | layer {
377 | name: "conv1_2/out/pw_new"
378 | type: "Convolution"
379 | bottom: "conv1_2/dw_new"
380 | top: "conv1_2/out/pw_new"
381 | param {
382 | lr_mult: 1
383 | decay_mult: 1
384 | }
385 | convolution_param {
386 | num_output: 24
387 | bias_term: false
388 | pad: 0
389 | kernel_size: 1
390 | engine: CAFFE
391 | stride: 1
392 | weight_filler {
393 | type: "msra"
394 | }
395 | }
396 | }
397 | layer {
398 | name: "conv1_2/out/pw/bn_new"
399 | type: "BatchNorm"
400 | bottom: "conv1_2/out/pw_new"
401 | top: "conv1_2/out/pw_new"
402 | param {
403 | lr_mult: 0
404 | decay_mult: 0
405 | }
406 | param {
407 | lr_mult: 0
408 | decay_mult: 0
409 | }
410 | param {
411 | lr_mult: 0
412 | decay_mult: 0
413 | }
414 | }
415 | layer {
416 | name: "conv1_2/out/pw/scale_new"
417 | type: "Scale"
418 | bottom: "conv1_2/out/pw_new"
419 | top: "conv1_2/out/pw_new"
420 | scale_param {
421 | filler {
422 | value: 1
423 | }
424 | bias_term: true
425 | bias_filler {
426 | value: 0
427 | }
428 | }
429 | }
430 | layer {
431 | name: "fuse_conv1_2"
432 | type: "Eltwise"
433 | bottom: "conv1_1/out/pw_new"
434 | bottom: "conv1_2/out/pw_new"
435 | top: "fuse_conv1_2"
436 | eltwise_param {
437 | operation: SUM
438 | }
439 | }
440 |
441 | #--------------------------bottleneck2 2_1(s=2) 2_2(s=1) input: 12*12*24 output: 6*6*32
442 | # 2_1 in
443 | layer {
444 | name: "conv2_1/in/pw_new"
445 | type: "Convolution"
446 | bottom: "fuse_conv1_2"
447 | top: "conv2_1/in/pw_new"
448 | param {
449 | lr_mult: 1
450 | decay_mult: 1
451 | }
452 | convolution_param {
453 | num_output: 144
454 | bias_term: false
455 | pad: 0
456 | kernel_size: 1
457 | engine: CAFFE
458 | stride: 1
459 | weight_filler {
460 | type: "msra"
461 | }
462 | }
463 | }
464 | layer {
465 | name: "conv2_1/in/pw/bn_new"
466 | type: "BatchNorm"
467 | bottom: "conv2_1/in/pw_new"
468 | top: "conv2_1/in/pw_new"
469 | param {
470 | lr_mult: 0
471 | decay_mult: 0
472 | }
473 | param {
474 | lr_mult: 0
475 | decay_mult: 0
476 | }
477 | param {
478 | lr_mult: 0
479 | decay_mult: 0
480 | }
481 | }
482 | layer {
483 | name: "conv2_1/in/pw/scale_new"
484 | type: "Scale"
485 | bottom: "conv2_1/in/pw_new"
486 | top: "conv2_1/in/pw_new"
487 | scale_param {
488 | filler {
489 | value: 1
490 | }
491 | bias_term: true
492 | bias_filler {
493 | value: 0
494 | }
495 | }
496 | }
497 | layer {
498 | name: "relu2_1/in/pw_new"
499 | type: "ReLU"
500 | bottom: "conv2_1/in/pw_new"
501 | top: "conv2_1/in/pw_new"
502 | }
503 |
504 | # 2_1 dw
505 | layer {
506 | name: "conv2_1/dw_new"
507 | type: "ConvolutionDepthwise"
508 | bottom: "conv2_1/in/pw_new"
509 | top: "conv2_1/dw_new"
510 | param {
511 | lr_mult: 1
512 | decay_mult: 0
513 | }
514 | convolution_param {
515 | num_output: 144
516 | bias_term: false
517 | pad: 1
518 | kernel_size: 3
519 | engine: CAFFE
520 | stride: 2
521 | weight_filler {
522 | type: "msra"
523 | }
524 | }
525 | }
526 | layer {
527 | name: "conv2_1/dw/bn_new"
528 | type: "BatchNorm"
529 | bottom: "conv2_1/dw_new"
530 | top: "conv2_1/dw_new"
531 | param {
532 | lr_mult: 0
533 | decay_mult: 0
534 | }
535 | param {
536 | lr_mult: 0
537 | decay_mult: 0
538 | }
539 | param {
540 | lr_mult: 0
541 | decay_mult: 0
542 | }
543 | }
544 | layer {
545 | name: "conv2_1/dw/scale_new"
546 | type: "Scale"
547 | bottom: "conv2_1/dw_new"
548 | top: "conv2_1/dw_new"
549 | scale_param {
550 | filler {
551 | value: 1
552 | }
553 | bias_term: true
554 | bias_filler {
555 | value: 0
556 | }
557 | }
558 | }
559 | layer {
560 | name: "relu2_1/dw_new"
561 | type: "ReLU"
562 | bottom: "conv2_1/dw_new"
563 | top: "conv2_1/dw_new"
564 | }
565 |
566 | # 2_1 out
567 | layer {
568 | name: "conv2_1/out/pw_new"
569 | type: "Convolution"
570 | bottom: "conv2_1/dw_new"
571 | top: "conv2_1/out/pw_new"
572 | param {
573 | lr_mult: 1
574 | decay_mult: 1
575 | }
576 | convolution_param {
577 | num_output: 32
578 | bias_term: false
579 | pad: 0
580 | kernel_size: 1
581 | engine: CAFFE
582 | stride: 1
583 | weight_filler {
584 | type: "msra"
585 | }
586 | }
587 | }
588 | layer {
589 | name: "conv2_1/out/pw/bn_new"
590 | type: "BatchNorm"
591 | bottom: "conv2_1/out/pw_new"
592 | top: "conv2_1/out/pw_new"
593 | param {
594 | lr_mult: 0
595 | decay_mult: 0
596 | }
597 | param {
598 | lr_mult: 0
599 | decay_mult: 0
600 | }
601 | param {
602 | lr_mult: 0
603 | decay_mult: 0
604 | }
605 | }
606 | layer {
607 | name: "conv2_1/out/pw/scale_new"
608 | type: "Scale"
609 | bottom: "conv2_1/out/pw_new"
610 | top: "conv2_1/out/pw_new"
611 | scale_param {
612 | filler {
613 | value: 1
614 | }
615 | bias_term: true
616 | bias_filler {
617 | value: 0
618 | }
619 | }
620 | }
621 |
622 | # 2_2 in
623 |
624 | layer {
625 | name: "conv2_2/in/pw_new"
626 | type: "Convolution"
627 | bottom: "conv2_1/out/pw_new"
628 | top: "conv2_2/in/pw_new"
629 | param {
630 | lr_mult: 1
631 | decay_mult: 1
632 | }
633 | convolution_param {
634 | num_output: 192
635 | bias_term: false
636 | pad: 0
637 | kernel_size: 1
638 | engine: CAFFE
639 | stride: 1
640 | weight_filler {
641 | type: "msra"
642 | }
643 | }
644 | }
645 | layer {
646 | name: "conv2_2/in/pw/bn_new"
647 | type: "BatchNorm"
648 | bottom: "conv2_2/in/pw_new"
649 | top: "conv2_2/in/pw_new"
650 | param {
651 | lr_mult: 0
652 | decay_mult: 0
653 | }
654 | param {
655 | lr_mult: 0
656 | decay_mult: 0
657 | }
658 | param {
659 | lr_mult: 0
660 | decay_mult: 0
661 | }
662 | }
663 | layer {
664 | name: "conv2_2/in/pw/scale_new"
665 | type: "Scale"
666 | bottom: "conv2_2/in/pw_new"
667 | top: "conv2_2/in/pw_new"
668 | scale_param {
669 | filler {
670 | value: 1
671 | }
672 | bias_term: true
673 | bias_filler {
674 | value: 0
675 | }
676 | }
677 | }
678 | layer {
679 | name: "relu2_2/in/pw_new"
680 | type: "ReLU"
681 | bottom: "conv2_2/in/pw_new"
682 | top: "conv2_2/in/pw_new"
683 | }
684 |
685 | # 2_2 dw
686 | layer {
687 | name: "conv2_2/dw_new"
688 | type: "ConvolutionDepthwise"
689 | bottom: "conv2_2/in/pw_new"
690 | top: "conv2_2/dw_new"
691 | param {
692 | lr_mult: 1
693 | decay_mult: 0
694 | }
695 | convolution_param {
696 | num_output: 192
697 | bias_term: false
698 | pad: 1
699 | kernel_size: 3
700 | engine: CAFFE
701 | stride: 1
702 | weight_filler {
703 | type: "msra"
704 | }
705 | }
706 | }
707 | layer {
708 | name: "conv2_2/dw/bn_new"
709 | type: "BatchNorm"
710 | bottom: "conv2_2/dw_new"
711 | top: "conv2_2/dw_new"
712 | param {
713 | lr_mult: 0
714 | decay_mult: 0
715 | }
716 | param {
717 | lr_mult: 0
718 | decay_mult: 0
719 | }
720 | param {
721 | lr_mult: 0
722 | decay_mult: 0
723 | }
724 | }
725 | layer {
726 | name: "conv2_2/dw/scale_new"
727 | type: "Scale"
728 | bottom: "conv2_2/dw_new"
729 | top: "conv2_2/dw_new"
730 | scale_param {
731 | filler {
732 | value: 1
733 | }
734 | bias_term: true
735 | bias_filler {
736 | value: 0
737 | }
738 | }
739 | }
740 | layer {
741 | name: "relu2_2/dw_new"
742 | type: "ReLU"
743 | bottom: "conv2_2/dw_new"
744 | top: "conv2_2/dw_new"
745 | }
746 |
747 |
748 | # 2_2 out
749 |
750 | layer {
751 | name: "conv2_2/out/pw_new"
752 | type: "Convolution"
753 | bottom: "conv2_2/dw_new"
754 | top: "conv2_2/out/pw_new"
755 | param {
756 | lr_mult: 1
757 | decay_mult: 1
758 | }
759 | convolution_param {
760 | num_output: 32
761 | bias_term: false
762 | pad: 0
763 | kernel_size: 1
764 | engine: CAFFE
765 | stride: 1
766 | weight_filler {
767 | type: "msra"
768 | }
769 | }
770 | }
771 | layer {
772 | name: "conv2_2/out/pw/bn_new"
773 | type: "BatchNorm"
774 | bottom: "conv2_2/out/pw_new"
775 | top: "conv2_2/out/pw_new"
776 | param {
777 | lr_mult: 0
778 | decay_mult: 0
779 | }
780 | param {
781 | lr_mult: 0
782 | decay_mult: 0
783 | }
784 | param {
785 | lr_mult: 0
786 | decay_mult: 0
787 | }
788 | }
789 | layer {
790 | name: "conv2_2/out/pw/scale_new"
791 | type: "Scale"
792 | bottom: "conv2_2/out/pw_new"
793 | top: "conv2_2/out/pw_new"
794 | scale_param {
795 | filler {
796 | value: 1
797 | }
798 | bias_term: true
799 | bias_filler {
800 | value: 0
801 | }
802 | }
803 | }
804 | layer {
805 | name: "fuse_conv2_2"
806 | type: "Eltwise"
807 | bottom: "conv2_1/out/pw_new"
808 | bottom: "conv2_2/out/pw_new"
809 | top: "fuse_conv2_2"
810 | eltwise_param {
811 | operation: SUM
812 | }
813 | }
814 |
815 | #--------------------------bottleneck3 3_1(s=2) 3_2(s=1) input: 6*6*32 output: 3*3*64
816 | # 3_1 in
817 | layer {
818 | name: "conv3_1/in/pw_new"
819 | type: "Convolution"
820 | bottom: "fuse_conv2_2"
821 | top: "conv3_1/in/pw_new"
822 | param {
823 | lr_mult: 1
824 | decay_mult: 1
825 | }
826 | convolution_param {
827 | num_output: 192
828 | bias_term: false
829 | pad: 0
830 | kernel_size: 1
831 | engine: CAFFE
832 | stride: 1
833 | weight_filler {
834 | type: "msra"
835 | }
836 | }
837 | }
838 | layer {
839 | name: "conv3_1/in/pw/bn_new"
840 | type: "BatchNorm"
841 | bottom: "conv3_1/in/pw_new"
842 | top: "conv3_1/in/pw_new"
843 | param {
844 | lr_mult: 0
845 | decay_mult: 0
846 | }
847 | param {
848 | lr_mult: 0
849 | decay_mult: 0
850 | }
851 | param {
852 | lr_mult: 0
853 | decay_mult: 0
854 | }
855 | }
856 | layer {
857 | name: "conv3_1/in/pw/scale_new"
858 | type: "Scale"
859 | bottom: "conv3_1/in/pw_new"
860 | top: "conv3_1/in/pw_new"
861 | scale_param {
862 | filler {
863 | value: 1
864 | }
865 | bias_term: true
866 | bias_filler {
867 | value: 0
868 | }
869 | }
870 | }
871 | layer {
872 | name: "relu3_1/in/pw_new"
873 | type: "ReLU"
874 | bottom: "conv3_1/in/pw_new"
875 | top: "conv3_1/in/pw_new"
876 | }
877 |
878 | # 3_1 dw
879 | layer {
880 | name: "conv3_1/dw_new"
881 | type: "ConvolutionDepthwise"
882 | bottom: "conv3_1/in/pw_new"
883 | top: "conv3_1/dw_new"
884 | param {
885 | lr_mult: 1
886 | decay_mult: 0
887 | }
888 | convolution_param {
889 | num_output: 192
890 | bias_term: false
891 | pad: 1
892 | kernel_size: 3
893 | engine: CAFFE
894 | stride: 2
895 | weight_filler {
896 | type: "msra"
897 | }
898 | }
899 | }
900 | layer {
901 | name: "conv3_1/dw/bn_new"
902 | type: "BatchNorm"
903 | bottom: "conv3_1/dw_new"
904 | top: "conv3_1/dw_new"
905 | param {
906 | lr_mult: 0
907 | decay_mult: 0
908 | }
909 | param {
910 | lr_mult: 0
911 | decay_mult: 0
912 | }
913 | param {
914 | lr_mult: 0
915 | decay_mult: 0
916 | }
917 | }
918 | layer {
919 | name: "conv3_1/dw/scale_new"
920 | type: "Scale"
921 | bottom: "conv3_1/dw_new"
922 | top: "conv3_1/dw_new"
923 | scale_param {
924 | filler {
925 | value: 1
926 | }
927 | bias_term: true
928 | bias_filler {
929 | value: 0
930 | }
931 | }
932 | }
933 | layer {
934 | name: "relu3_1/dw_new"
935 | type: "ReLU"
936 | bottom: "conv3_1/dw_new"
937 | top: "conv3_1/dw_new"
938 | }
939 |
940 | # 3_1 out
941 | layer {
942 | name: "conv3_1/out/pw_new"
943 | type: "Convolution"
944 | bottom: "conv3_1/dw_new"
945 | top: "conv3_1/out/pw_new"
946 | param {
947 | lr_mult: 1
948 | decay_mult: 1
949 | }
950 | convolution_param {
951 | num_output: 64
952 | bias_term: false
953 | pad: 0
954 | kernel_size: 1
955 | engine: CAFFE
956 | stride: 1
957 | weight_filler {
958 | type: "msra"
959 | }
960 | }
961 | }
962 | layer {
963 | name: "conv3_1/out/pw/bn_new"
964 | type: "BatchNorm"
965 | bottom: "conv3_1/out/pw_new"
966 | top: "conv3_1/out/pw_new"
967 | param {
968 | lr_mult: 0
969 | decay_mult: 0
970 | }
971 | param {
972 | lr_mult: 0
973 | decay_mult: 0
974 | }
975 | param {
976 | lr_mult: 0
977 | decay_mult: 0
978 | }
979 | }
980 | layer {
981 | name: "conv3_1/out/pw/scale_new"
982 | type: "Scale"
983 | bottom: "conv3_1/out/pw_new"
984 | top: "conv3_1/out/pw_new"
985 | scale_param {
986 | filler {
987 | value: 1
988 | }
989 | bias_term: true
990 | bias_filler {
991 | value: 0
992 | }
993 | }
994 | }
995 |
996 | # 3_2 in
997 |
998 | layer {
999 | name: "conv3_2/in/pw_new"
1000 | type: "Convolution"
1001 | bottom: "conv3_1/out/pw_new"
1002 | top: "conv3_2/in/pw_new"
1003 | param {
1004 | lr_mult: 1
1005 | decay_mult: 1
1006 | }
1007 | convolution_param {
1008 | num_output: 192
1009 | bias_term: false
1010 | pad: 0
1011 | kernel_size: 1
1012 | engine: CAFFE
1013 | stride: 1
1014 | weight_filler {
1015 | type: "msra"
1016 | }
1017 | }
1018 | }
1019 | layer {
1020 | name: "conv3_2/in/pw/bn_new"
1021 | type: "BatchNorm"
1022 | bottom: "conv3_2/in/pw_new"
1023 | top: "conv3_2/in/pw_new"
1024 | param {
1025 | lr_mult: 0
1026 | decay_mult: 0
1027 | }
1028 | param {
1029 | lr_mult: 0
1030 | decay_mult: 0
1031 | }
1032 | param {
1033 | lr_mult: 0
1034 | decay_mult: 0
1035 | }
1036 | }
1037 | layer {
1038 | name: "conv3_2/in/pw/scale_new"
1039 | type: "Scale"
1040 | bottom: "conv3_2/in/pw_new"
1041 | top: "conv3_2/in/pw_new"
1042 | scale_param {
1043 | filler {
1044 | value: 1
1045 | }
1046 | bias_term: true
1047 | bias_filler {
1048 | value: 0
1049 | }
1050 | }
1051 | }
1052 | layer {
1053 | name: "relu3_2/in/pw_new"
1054 | type: "ReLU"
1055 | bottom: "conv3_2/in/pw_new"
1056 | top: "conv3_2/in/pw_new"
1057 | }
1058 |
1059 | # 3_2 dw
1060 | layer {
1061 | name: "conv3_2/dw_new"
1062 | type: "ConvolutionDepthwise"
1063 | bottom: "conv3_2/in/pw_new"
1064 | top: "conv3_2/dw_new"
1065 | param {
1066 | lr_mult: 1
1067 | decay_mult: 0
1068 | }
1069 | convolution_param {
1070 | num_output: 192
1071 | bias_term: false
1072 | pad: 1
1073 | kernel_size: 3
1074 | engine: CAFFE
1075 | stride: 1
1076 | weight_filler {
1077 | type: "msra"
1078 | }
1079 | }
1080 | }
1081 | layer {
1082 | name: "conv3_2/dw/bn_new"
1083 | type: "BatchNorm"
1084 | bottom: "conv3_2/dw_new"
1085 | top: "conv3_2/dw_new"
1086 | param {
1087 | lr_mult: 0
1088 | decay_mult: 0
1089 | }
1090 | param {
1091 | lr_mult: 0
1092 | decay_mult: 0
1093 | }
1094 | param {
1095 | lr_mult: 0
1096 | decay_mult: 0
1097 | }
1098 | }
1099 | layer {
1100 | name: "conv3_2/dw/scale_new"
1101 | type: "Scale"
1102 | bottom: "conv3_2/dw_new"
1103 | top: "conv3_2/dw_new"
1104 | scale_param {
1105 | filler {
1106 | value: 1
1107 | }
1108 | bias_term: true
1109 | bias_filler {
1110 | value: 0
1111 | }
1112 | }
1113 | }
1114 | layer {
1115 | name: "relu3_2/dw_new"
1116 | type: "ReLU"
1117 | bottom: "conv3_2/dw_new"
1118 | top: "conv3_2/dw_new"
1119 | }
1120 |
1121 |
1122 | # 3_2 out
1123 |
1124 | layer {
1125 | name: "conv3_2/out/pw_new"
1126 | type: "Convolution"
1127 | bottom: "conv3_2/dw_new"
1128 | top: "conv3_2/out/pw_new"
1129 | param {
1130 | lr_mult: 1
1131 | decay_mult: 1
1132 | }
1133 | convolution_param {
1134 | num_output: 64
1135 | bias_term: false
1136 | pad: 0
1137 | kernel_size: 1
1138 | engine: CAFFE
1139 | stride: 1
1140 | weight_filler {
1141 | type: "msra"
1142 | }
1143 | }
1144 | }
1145 | layer {
1146 | name: "conv3_2/out/pw/bn_new"
1147 | type: "BatchNorm"
1148 | bottom: "conv3_2/out/pw_new"
1149 | top: "conv3_2/out/pw_new"
1150 | param {
1151 | lr_mult: 0
1152 | decay_mult: 0
1153 | }
1154 | param {
1155 | lr_mult: 0
1156 | decay_mult: 0
1157 | }
1158 | param {
1159 | lr_mult: 0
1160 | decay_mult: 0
1161 | }
1162 | }
1163 | layer {
1164 | name: "conv3_2/out/pw/scale_new"
1165 | type: "Scale"
1166 | bottom: "conv3_2/out/pw_new"
1167 | top: "conv3_2/out/pw_new"
1168 | scale_param {
1169 | filler {
1170 | value: 1
1171 | }
1172 | bias_term: true
1173 | bias_filler {
1174 | value: 0
1175 | }
1176 | }
1177 | }
1178 | layer {
1179 | name: "fuse_conv3_2"
1180 | type: "Eltwise"
1181 | bottom: "conv3_1/out/pw_new"
1182 | bottom: "conv3_2/out/pw_new"
1183 | top: "fuse_conv3_2"
1184 | eltwise_param {
1185 | operation: SUM
1186 | }
1187 | }
1188 |
1189 |
1190 |
1191 |
1192 |
1193 |
1194 |
1195 | #------------------------- fc1
1196 | layer {
1197 | name: "fc1"
1198 | type: "InnerProduct"
1199 | bottom: "fuse_conv3_2"
1200 | top: "fc1"
1201 | param {
1202 | lr_mult: 1
1203 | decay_mult: 1
1204 | }
1205 | param {
1206 | lr_mult: 2
1207 | decay_mult: 1
1208 | }
1209 | inner_product_param {
1210 | num_output: 256
1211 | weight_filler {
1212 | type: "gaussian"
1213 | std: 0.01
1214 | }
1215 | bias_filler {
1216 | type: "constant"
1217 | value: 0
1218 | }
1219 | }
1220 | }
1221 | layer {
1222 | name: "relu_fc1"
1223 | type: "ReLU"
1224 | bottom: "fc1"
1225 | top: "fc1"
1226 | }
1227 | layer {
1228 | name: "drop_fc1"
1229 | type: "Dropout"
1230 | bottom: "fc1"
1231 | top: "fc1"
1232 | dropout_param{
1233 | dropout_ratio: 0.3
1234 | }
1235 | }
1236 |
1237 | #------------------------- fc2
1238 | layer {
1239 | name: "fc2"
1240 | type: "InnerProduct"
1241 | bottom: "fc1"
1242 | top: "fc2"
1243 | param {
1244 | lr_mult: 1
1245 | decay_mult: 1
1246 | }
1247 | param {
1248 | lr_mult: 2
1249 | decay_mult: 1
1250 | }
1251 | inner_product_param {
1252 | num_output: 10
1253 | weight_filler {
1254 | type: "gaussian"
1255 | std: 0.01
1256 | }
1257 | bias_filler {
1258 | type: "constant"
1259 | value: 0
1260 | }
1261 | }
1262 | }
--------------------------------------------------------------------------------
/3_demo/Data/l2_net.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/l2_net.caffemodel
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # cascaded_mobilenet-v2
2 | cascaded convolutional neural network for facial point detection
3 |
4 | `详细步骤讲解请参见博客:http://blog.csdn.net/u011995719/article/details/79435615`
5 |
6 | # 1.简介
7 | 本实验在caffe下,采用级联MobileNet-V2进行人脸关键点(5点)检测,单模型仅 956 KB,GTX1080上运行为6ms左右(可在移动端达到实时检测)
8 |
9 | 本实验采用两级MobileNet-V2进行,两级的MobileNet-V2采用相同的网络结构(因为懒),结构如下:
10 |
11 | | Input | Operator | t |c | n | s |
12 | | :--------:| :--------:| :--: |:--------:| :--------:| :--: |
13 | | 48x48x3 | conv2d | - | 16 | 1 | 2 |
14 | | 24x24x16 | bottleneck | 6 | 24 | 2 | 2 |
15 | | 12x12x24 | bottleneck | 6 | 32 | 2 | 2 |
16 | | 6x6x32 | bottleneck | 6 | 64 | 2 | 2 |
17 | | 3x3x64 | fc | - | 256 | - | - |
18 | | 1x1x256 | fc | - | 10 | - | - |
19 |
20 | t表示“扩张”倍数,c表示输出通道数,n表示重复次数,s表示步长stride
21 | (MobileNet-v2 原文: https://arxiv.org/abs/1801.04381)
22 | (可参考博客:http://blog.csdn.net/u011995719/article/details/79135818)
23 |
24 |
25 | 基本流程为,level_1负责初步检测,依据level_1得到的关键点,对原始图片进行裁剪,将裁剪后的图片输入到level_2,从而达到从粗到精的定位。
26 | ## level_1 流程为:
27 | 
28 |
29 | ## level_2 流程为
30 | 
31 |
32 | 面部放大,绿色点为landmark,红色为level_1检测到的点,蓝色为level_2检测到的点,可以看出蓝色点更靠近绿色点
33 |
34 | 
35 |
36 |
37 |
38 | 本实验初步验证MobileNet-V2的有效性以及级联CNN进行人脸关键点检测的有效性
39 |
40 | 数据来源:采用CelebA数据集,共计202599张图片,每张图片含5个关键点
41 | 官网:http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html
42 | 百度网盘下载:https://pan.baidu.com/s/1eSNpdRG#list/path=%2F
43 |
44 | 实验结果:请直接看demo跑出来的图片。由于CelebA的图片较为复杂,并且本实验不需要采用人脸检测,因此无法与之前实验进行比较
45 |
46 | # 2.运行demo
47 |
48 | ## (1) 修改 caffe源码
49 | 本实验基于MobileNet-V2,因此需要给caffe添加新的layer,即depth-wise convolution,并且需要修改image_data_layer,使得其支持多标签输入
50 | (感谢 hpp,cpp,cu,prototxt提供者:suzhenghang
git地址:https://github.com/suzhenghang/MobileNetv2/tree/master/.gitignore)
51 |
52 | 步骤,进入caffe_need/文件夹下,
53 |
54 | 1. 将image_data_layer.hpp 替换掉 ***caffe_path***/include/caffe/layers 下的 image_data_layer.hpp
55 | 2. 将conv_dw_layer.hpp 复制到 ***caffe_path***/include/caffe/layers 下
56 | 3. 将image_data_layer.cpp 替换掉 ***caffe_path***/src/caffe/layers 下的image_data_layer.cpp
57 | 4. 将conv_dw_layer.cu
58 | conv_dw_layer.cpp 复制到 ***caffe_path***/src/caffe/layers 下
59 | 重新编译,并且配置python接口
60 |
61 |
62 | ## (2) 进入文件夹3_demo
63 | 进入 3_demo/Code/,打开 inference , 更改你的caffe所在路径
64 | sys.path.append('/home/xxx your caffe xxx/python')
65 | sys.path.append('/home/xxx your caffe xxx/python/caffe')
66 | 然后运行 sudo python inference.py, 检测出的图片保存在 3_demo/Result/draw_img/ 下
67 |
68 | # 3.复现训练过程
69 | 简单介绍训练步骤,总共分三阶段,分别是 0_raw_data, 1_level_1, 2_level_2
70 |
71 | ## 第一阶段,数据准备阶段: 0_raw_data
72 | 1. 从百度网盘下载好CelebA数据集,将CelebA\Img\img_celeba 复制到 0_raw_data/Data/ 下面,将CelebA\Anno\list_landmarks_celeba.txt复制到 0_raw_data/Data/ 并且重命名为celeba_label.txt
73 | 2. 进入0_raw_data/, 运行divide_tr_te.py,将会划分好训练集,测试集,并且保存在0_raw_data/Data/ 下面
74 | 3. 运行 draw_point.py,将会在 0_raw_data/Result/draw_img/下获得 打上关键点的图片,用来检查图片以及标签是否正确
75 |
76 |
77 | ## 第二阶段, 训练level_1: 1_level_1
78 |
79 | 进入 1_level_1/Code/,依次执行 0_gen_data, 1_draw_img, 2_train, 3_inference, 4_evaluate, 5_crop_img
80 | 0_gen_data,主要是对图片进行resize,并且转换label,训练时的label是[-1,1]的
81 | 1_draw_img,用来检查图片以及标签是否正确
82 | 2_train,训练的solver等
83 | 3_inference,训练完毕,用训练好的caffemodel进行inference,将inference得到的标签 输出到 1_level_1/Result/下,用于评估和裁剪图片
84 | 4_evaluate,计算误差
85 | 5_crop_img, 采用level_1的输出标签 对原始图片进行裁剪,获得level_2的输入图片,并且制作level_2的标签
86 |
87 |
88 | ## 第三阶段,训练level_2: 2_level_2
89 | 由于 1_level_1/Code/5_crop_img 已经生成了 level_2所需的数据,并且打上关键点,供检查,因此 level_2直接从train开始
90 | 0_train, 同level_1
91 | 1_inference, 同level_1
92 | 2_evaluate,同level_1
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
--------------------------------------------------------------------------------
/caffe_need/conv_dw_layer.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "caffe/filler.hpp"
4 | #include "caffe/layers/conv_dw_layer.hpp"
5 |
6 | namespace caffe {
7 |
8 | template
9 | void ConvolutionDepthwiseLayer::LayerSetUp(const vector*>& bottom,
10 | const vector*>& top) {
11 | ConvolutionParameter conv_param = this->layer_param_.convolution_param();
12 | if (conv_param.has_kernel_h() && conv_param.has_kernel_w()) {
13 | kernel_h_ = conv_param.kernel_h();
14 | kernel_w_ = conv_param.kernel_w();
15 | } else {
16 | if (conv_param.kernel_size_size() == 1)
17 | {
18 | kernel_h_ = conv_param.kernel_size(0);
19 | kernel_w_ = conv_param.kernel_size(0);
20 | }
21 | else
22 | {
23 | kernel_h_ = conv_param.kernel_size(0);
24 | kernel_w_ = conv_param.kernel_size(1);
25 | }
26 | }
27 | if (conv_param.has_stride_h() && conv_param.has_stride_w()) {
28 | stride_h_ = conv_param.stride_h();
29 | stride_w_ = conv_param.stride_w();
30 | } else {
31 | if (conv_param.stride_size() == 1)
32 | {
33 | stride_h_ = conv_param.stride(0);
34 | stride_w_ = conv_param.stride(0);
35 | }
36 | else
37 | {
38 | stride_h_ = conv_param.stride(0);
39 | stride_w_ = conv_param.stride(1);
40 | }
41 | }
42 | if (conv_param.has_pad_h() && conv_param.has_pad_w()) {
43 | pad_h_ = conv_param.pad_h();
44 | pad_w_ = conv_param.pad_w();
45 | } else {
46 | if (conv_param.pad_size() == 1)
47 | {
48 | pad_h_ = conv_param.pad(0);
49 | pad_w_ = conv_param.pad(0);
50 | }
51 | else
52 | {
53 | pad_h_ = conv_param.pad(0);
54 | pad_w_ = conv_param.pad(1);
55 | }
56 | }
57 | if (conv_param.dilation_size() > 0)
58 | {
59 | if (conv_param.dilation_size() == 1)
60 | {
61 | dilation_h_ = conv_param.dilation(0);
62 | dilation_w_ = conv_param.dilation(0);
63 | }
64 | else
65 | {
66 | dilation_h_ = conv_param.dilation(0);
67 | dilation_w_ = conv_param.dilation(1);
68 | }
69 | }
70 | else
71 | {
72 | dilation_h_ = 1;
73 | dilation_w_ = 1;
74 | }
75 | vector weight_shape(4);
76 | weight_shape[0] = bottom[0]->channels();
77 | weight_shape[1] = 1;
78 | weight_shape[2] = kernel_h_;
79 | weight_shape[3] = kernel_w_;
80 | vector bias_shape;
81 | if (conv_param.bias_term())
82 | {
83 | bias_shape.push_back(bottom[0]->channels());
84 | }
85 | if (this->blobs_.size() == 0) {
86 | if (conv_param.bias_term()) {
87 | this->blobs_.resize(2);
88 | } else {
89 | this->blobs_.resize(1);
90 | }
91 | this->blobs_[0].reset(new Blob(weight_shape));
92 | shared_ptr > weight_filler(GetFiller(conv_param.weight_filler()));
93 | weight_filler->Fill(this->blobs_[0].get());
94 | if (conv_param.bias_term()) {
95 | this->blobs_[1].reset(new Blob(bias_shape));
96 | shared_ptr > bias_filler(GetFiller(conv_param.bias_filler()));
97 | bias_filler->Fill(this->blobs_[1].get());
98 | }
99 | }
100 | this->param_propagate_down_.resize(this->blobs_.size(), true);
101 | }
102 |
103 | template
104 | void ConvolutionDepthwiseLayer::Reshape(const vector*>& bottom,
105 | const vector*>& top) {
106 | vector top_shape;
107 | top_shape.push_back(bottom[0]->num());
108 | top_shape.push_back(bottom[0]->channels());
109 | top_shape.push_back((bottom[0]->height() + 2 * pad_h_ - (dilation_h_ * (kernel_h_ - 1) + 1)) / stride_h_ + 1);
110 | top_shape.push_back((bottom[0]->width() + 2 * pad_w_ - (dilation_w_ * (kernel_w_ - 1) + 1)) / stride_w_ + 1);
111 | top[0]->Reshape(top_shape);
112 | vector weight_buffer_shape;
113 | weight_buffer_shape.push_back(bottom[0]->channels());
114 | weight_buffer_shape.push_back(kernel_h_);
115 | weight_buffer_shape.push_back(kernel_w_);
116 | weight_buffer_shape.push_back(bottom[0]->num());
117 | weight_buffer_shape.push_back(top[0]->height());
118 | weight_buffer_shape.push_back(top[0]->width());
119 | weight_buffer_.Reshape(weight_buffer_shape);
120 | vector weight_multiplier_shape;
121 | weight_multiplier_shape.push_back(bottom[0]->num());
122 | weight_multiplier_shape.push_back(top[0]->height());
123 | weight_multiplier_shape.push_back(top[0]->width());
124 | weight_multiplier_.Reshape(weight_multiplier_shape);
125 | caffe_gpu_set(weight_multiplier_.count(), Dtype(1), weight_multiplier_.mutable_gpu_data());
126 | if (this->layer_param_.convolution_param().bias_term())
127 | {
128 | vector bias_buffer_shape;
129 | bias_buffer_shape.push_back(bottom[0]->channels());
130 | bias_buffer_shape.push_back(bottom[0]->num());
131 | bias_buffer_shape.push_back(top[0]->height());
132 | bias_buffer_shape.push_back(top[0]->width());
133 | bias_buffer_.Reshape(bias_buffer_shape);
134 | vector bias_multiplier_shape;
135 | bias_multiplier_shape.push_back(bottom[0]->num());
136 | bias_multiplier_shape.push_back(top[0]->height());
137 | bias_multiplier_shape.push_back(top[0]->width());
138 | bias_multiplier_.Reshape(bias_multiplier_shape);
139 | caffe_gpu_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_gpu_data());
140 | }
141 | }
142 |
143 | template
144 | void ConvolutionDepthwiseLayer::Forward_cpu(const vector*>& bottom,
145 | const vector*>& top)
146 | {
147 | const int num = top[0]->num();
148 | const int channels = top[0]->channels();
149 | const int top_height = top[0]->height();
150 | const int top_width = top[0]->width();
151 | const int bottom_height = bottom[0]->height();
152 | const int bottom_width = bottom[0]->width();
153 | const Dtype* bottom_data = bottom[0]->cpu_data();
154 | const Dtype* weight_data_base = this->blobs_[0]->cpu_data();
155 | Dtype* top_data = top[0]->mutable_cpu_data();
156 | for (int n = 0; n < num; ++n)
157 | {
158 | for (int c = 0; c < channels; ++c)
159 | {
160 | for (int h = 0; h < top_height; ++h)
161 | {
162 | for (int w = 0; w < top_width; ++w)
163 | {
164 | const Dtype* weight_data = weight_data_base + c * kernel_h_ * kernel_w_;
165 | Dtype value = 0;
166 | for (int kh = 0; kh < kernel_h_; ++kh)
167 | {
168 | for (int kw = 0; kw < kernel_w_; ++kw)
169 | {
170 | int h_in = -pad_h_ + h * stride_h_ + kh * dilation_h_;
171 | int w_in = -pad_w_ + w * stride_w_ + kw * dilation_w_;
172 | if ((h_in >= 0) && (h_in < bottom_height) && (w_in >= 0) && (w_in < bottom_width))
173 | {
174 | int offset = ((n * channels + c) * bottom_height + h_in) * bottom_width + w_in;
175 | value += (*weight_data) * bottom_data[offset];
176 | }
177 | ++weight_data;
178 | }
179 | }
180 | *top_data++ = value;
181 | }
182 | }
183 | }
184 | }
185 | if (this->layer_param_.convolution_param().bias_term())
186 | {
187 | top_data = top[0]->mutable_cpu_data();
188 | for (int n = 0; n < num; ++n)
189 | {
190 | const Dtype* bias_data = this->blobs_[1]->cpu_data();
191 | for (int c = 0; c < channels; ++c)
192 | {
193 | for (int h = 0; h < top_height; ++h)
194 | {
195 | for (int w = 0; w < top_width; ++w)
196 | {
197 | *top_data += *bias_data;
198 | ++top_data;
199 | }
200 | }
201 | ++bias_data;
202 | }
203 | }
204 | }
205 | }
206 |
207 | template
208 | void ConvolutionDepthwiseLayer::Backward_cpu(const vector*>& top,
209 | const vector& propagate_down, const vector*>& bottom)
210 | {
211 | const int num = top[0]->num();
212 | const int channels = top[0]->channels();
213 | const int top_height = top[0]->height();
214 | const int top_width = top[0]->width();
215 | const int bottom_height = bottom[0]->height();
216 | const int bottom_width = bottom[0]->width();
217 | caffe_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_cpu_diff());
218 | if (this->layer_param_.convolution_param().bias_term() && this->param_propagate_down_[1])
219 | {
220 | const Dtype* top_diff = top[0]->cpu_diff();
221 | for (int n = 0; n < num; ++n)
222 | {
223 | Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff();
224 | for (int c = 0; c < channels; ++c)
225 | {
226 | for (int h = 0; h < top_height; ++h)
227 | {
228 | for (int w = 0; w < top_width; ++w)
229 | {
230 | *bias_diff += *top_diff;
231 | ++top_diff;
232 | }
233 | }
234 | ++bias_diff;
235 | }
236 | }
237 | }
238 | if (this->param_propagate_down_[0])
239 | {
240 | const Dtype* top_diff = top[0]->cpu_diff();
241 | const Dtype* bottom_data = bottom[0]->cpu_data();
242 | Dtype* weight_diff_base = this->blobs_[0]->mutable_cpu_diff();
243 | for (int n = 0; n < num; ++n)
244 | {
245 | for (int c = 0; c < channels; ++c)
246 | {
247 | for (int h = 0; h < top_height; ++h)
248 | {
249 | for (int w = 0; w < top_width; ++w)
250 | {
251 | Dtype* weight_diff = weight_diff_base + c * kernel_h_ * kernel_w_;
252 | for (int kh = 0; kh < kernel_h_; ++kh)
253 | {
254 | for (int kw = 0; kw < kernel_w_; ++kw)
255 | {
256 | int h_in = -pad_h_ + h * stride_h_ + kh * dilation_h_;
257 | int w_in = -pad_w_ + w * stride_w_ + kw * dilation_w_;
258 | if ((h_in >= 0) && (h_in < bottom_height) && (w_in >= 0) && (w_in < bottom_width))
259 | {
260 | int offset = ((n * channels + c) * bottom_height + h_in) * bottom_width + w_in;
261 | *weight_diff += bottom_data[offset] * (*top_diff);
262 | }
263 | ++weight_diff;
264 | }
265 | }
266 | ++top_diff;
267 | }
268 | }
269 | }
270 | }
271 | }
272 | if (propagate_down[0])
273 | {
274 | const Dtype* top_diff = top[0]->cpu_diff();
275 | const Dtype* weight_data_base = this->blobs_[0]->cpu_data();
276 | Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
277 | for (int n = 0; n < num; ++n)
278 | {
279 | for (int c = 0; c < channels; ++c)
280 | {
281 | for (int h = 0; h < top_height; ++h)
282 | {
283 | for (int w = 0; w < top_width; ++w)
284 | {
285 | const Dtype* weight_data = weight_data_base + c * kernel_h_ * kernel_w_;
286 | for (int kh = 0; kh < kernel_h_; ++kh)
287 | {
288 | for (int kw = 0; kw < kernel_w_; ++kw)
289 | {
290 | int h_in = -pad_h_ + h * stride_h_ + kh * dilation_h_;
291 | int w_in = -pad_w_ + w * stride_w_ + kw * dilation_w_;
292 | if ((h_in >= 0) && (h_in < bottom_height) && (w_in >= 0) && (w_in < bottom_width))
293 | {
294 | int offset = ((n * channels + c) * bottom_height + h_in) * bottom_width + w_in;
295 | bottom_diff[offset] += (*weight_data) * (*top_diff);
296 | }
297 | ++weight_data;
298 | }
299 | }
300 | ++top_diff;
301 | }
302 | }
303 | }
304 | }
305 | }
306 | }
307 |
308 | #ifdef CPU_ONLY
309 | STUB_GPU(ConvolutionDepthwiseLayer);
310 | #endif
311 |
312 | INSTANTIATE_CLASS(ConvolutionDepthwiseLayer);
313 | REGISTER_LAYER_CLASS(ConvolutionDepthwise);
314 |
315 | } // namespace caffe
316 |
--------------------------------------------------------------------------------
/caffe_need/conv_dw_layer.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include "caffe/layers/conv_dw_layer.hpp"
3 | #include "caffe/util/gpu_util.cuh"
4 |
5 | namespace caffe {
6 |
7 | template
8 | __global__ void ConvolutionDepthwiseWeightForward(const int nthreads,
9 | const Dtype* const bottom_data, const Dtype* const weight_data, const int num, const int channels,
10 | const int top_height, const int top_width, const int bottom_height, const int bottom_width,
11 | const int kernel_h, const int kernel_w, const int stride_h, const int stride_w,
12 | const int pad_h, const int pad_w, const int dilation_h, const int dilation_w,
13 | Dtype* const top_data) {
14 | CUDA_KERNEL_LOOP(index, nthreads) {
15 | const int n = index / channels / top_height / top_width;
16 | const int c = (index / top_height / top_width) % channels;
17 | const int h = (index / top_width) % top_height;
18 | const int w = index % top_width;
19 | const Dtype* weight = weight_data + c * kernel_h * kernel_w;
20 | Dtype value = 0;
21 | for (int kh = 0; kh < kernel_h; ++kh)
22 | {
23 | for (int kw = 0; kw < kernel_w; ++kw)
24 | {
25 | const int h_in = -pad_h + h * stride_h + kh * dilation_h;
26 | const int w_in = -pad_w + w * stride_w + kw * dilation_w;
27 | if ((h_in >= 0) && (h_in < bottom_height) && (w_in >= 0) && (w_in < bottom_width))
28 | {
29 | const int offset = ((n * channels + c) * bottom_height + h_in) * bottom_width + w_in;
30 | value += (*weight) * bottom_data[offset];
31 | }
32 | ++weight;
33 | }
34 | }
35 | top_data[index] = value;
36 | }
37 | }
38 |
39 | template
40 | __global__ void ConvolutionDepthwiseBiasForward(const int nthreads,
41 | const Dtype* const bias_data, const int num, const int channels,
42 | const int top_height, const int top_width, Dtype* const top_data) {
43 | CUDA_KERNEL_LOOP(index, nthreads) {
44 | const int c = (index / top_height / top_width) % channels;
45 | top_data[index] += bias_data[c];
46 | }
47 | }
48 |
49 | template
50 | void ConvolutionDepthwiseLayer::Forward_gpu(const vector*>& bottom,
51 | const vector*>& top) {
52 | const Dtype* bottom_data = bottom[0]->gpu_data();
53 | Dtype* top_data = top[0]->mutable_gpu_data();
54 | const Dtype* weight_data = this->blobs_[0]->gpu_data();
55 | const int count = top[0]->count();
56 | const int num = top[0]->num();
57 | const int channels = top[0]->channels();
58 | const int top_height = top[0]->height();
59 | const int top_width = top[0]->width();
60 | const int bottom_height = bottom[0]->height();
61 | const int bottom_width = bottom[0]->width();
62 | ConvolutionDepthwiseWeightForward<<>>(
63 | count, bottom_data, weight_data, num, channels,
64 | top_height, top_width, bottom_height, bottom_width,
65 | kernel_h_, kernel_w_, stride_h_, stride_w_,
66 | pad_h_, pad_w_, dilation_h_, dilation_w_, top_data);
67 | if (this->layer_param_.convolution_param().bias_term())
68 | {
69 | const Dtype* bias_data = this->blobs_[1]->gpu_data();
70 | ConvolutionDepthwiseBiasForward<<>>(
71 | count, bias_data, num, channels,
72 | top_height, top_width, top_data);
73 | }
74 | }
75 |
76 | template
77 | __global__ void ConvolutionDepthwiseWeightBackward(const int nthreads,
78 | const Dtype* const top_diff, const Dtype* const bottom_data, const int num, const int channels,
79 | const int top_height, const int top_width, const int bottom_height, const int bottom_width,
80 | const int kernel_h, const int kernel_w, const int stride_h, const int stride_w,
81 | const int pad_h, const int pad_w, const int dilation_h, const int dilation_w,
82 | Dtype* const buffer_data) {
83 | CUDA_KERNEL_LOOP(index, nthreads) {
84 | const int h = (index / top_width) % top_height;
85 | const int w = index % top_width;
86 | const int kh = (index / kernel_w / num / top_height / top_width) % kernel_h;
87 | const int kw = (index / num / top_height / top_width) % kernel_w;
88 | const int h_in = -pad_h + h * stride_h + kh * dilation_h;
89 | const int w_in = -pad_w + w * stride_w + kw * dilation_w;
90 | if ((h_in >= 0) && (h_in < bottom_height) && (w_in >= 0) && (w_in < bottom_width))
91 | {
92 | const int c = index / kernel_h / kernel_w / num / top_height / top_width;
93 | const int n = (index / top_height / top_width) % num;
94 | const int top_offset = ((n * channels + c) * top_height + h) * top_width + w;
95 | const int bottom_offset = ((n * channels + c) * bottom_height + h_in) * bottom_width + w_in;
96 | buffer_data[index] = top_diff[top_offset] * bottom_data[bottom_offset];
97 | }
98 | else
99 | {
100 | buffer_data[index] = 0;
101 | }
102 | }
103 | }
104 |
105 | template
106 | __global__ void ConvolutionDepthwiseBottomBackward(const int nthreads,
107 | const Dtype* const top_diff, const Dtype* const weight_data, const int num, const int channels,
108 | const int top_height, const int top_width, const int bottom_height, const int bottom_width,
109 | const int kernel_h, const int kernel_w, const int stride_h, const int stride_w,
110 | const int pad_h, const int pad_w, const int dilation_h, const int dilation_w,
111 | Dtype* const bottom_diff) {
112 | CUDA_KERNEL_LOOP(index, nthreads) {
113 | const int n = index / channels / bottom_height / bottom_width;
114 | const int c = (index / bottom_height / bottom_width) % channels;
115 | const int h = (index / bottom_width) % bottom_height;
116 | const int w = index % bottom_width;
117 | const Dtype* weight = weight_data + c * kernel_h * kernel_w;
118 | Dtype value = 0;
119 | for (int kh = 0; kh < kernel_h; ++kh)
120 | {
121 | for (int kw = 0; kw < kernel_w; ++kw)
122 | {
123 | const int h_out_s = h + pad_h - kh * dilation_h;
124 | const int w_out_s = w + pad_w - kw * dilation_w;
125 | if (((h_out_s % stride_h) == 0) && ((w_out_s % stride_w) == 0))
126 | {
127 | const int h_out = h_out_s / stride_h;
128 | const int w_out = w_out_s / stride_w;
129 | if ((h_out >= 0) && (h_out < top_height) && (w_out >= 0) && (w_out < top_width))
130 | {
131 | const int offset = ((n * channels + c) * top_height + h_out) * top_width + w_out;
132 | value += (*weight) * top_diff[offset];
133 | }
134 | }
135 | ++weight;
136 | }
137 | }
138 | bottom_diff[index] += value;
139 | }
140 | }
141 |
142 | template
143 | __global__ void ConvolutionDepthwiseBiasBackward(const int nthreads,
144 | const Dtype* const top_diff, const int num, const int channels,
145 | const int top_height, const int top_width, Dtype* const buffer_data) {
146 | CUDA_KERNEL_LOOP(index, nthreads) {
147 | const int c = index / num / top_height / top_width;
148 | const int n = (index / top_height / top_width) % num;
149 | const int h = (index / top_width) % top_height;
150 | const int w = index % top_width;
151 | const int offset = ((n * channels + c) * top_height + h) * top_width + w;
152 | buffer_data[index] = top_diff[offset];
153 | }
154 | }
155 |
156 | template
157 | void ConvolutionDepthwiseLayer::Backward_gpu(const vector*>& top,
158 | const vector& propagate_down, const vector*>& bottom) {
159 | const Dtype* top_diff = top[0]->gpu_diff();
160 | const int bottom_count = bottom[0]->count();
161 | const int num = top[0]->num();
162 | const int channels = top[0]->channels();
163 | const int top_height = top[0]->height();
164 | const int top_width = top[0]->width();
165 | const int bottom_height = bottom[0]->height();
166 | const int bottom_width = bottom[0]->width();
167 | const int length = num * top_height * top_width;
168 | caffe_gpu_set(bottom_count, Dtype(0), bottom[0]->mutable_gpu_diff());
169 | if (this->layer_param_.convolution_param().bias_term() && this->param_propagate_down_[1])
170 | {
171 | const int bias_buffer_count = bias_buffer_.count();
172 | Dtype* bias_buffer_mutable_data = bias_buffer_.mutable_gpu_data();
173 | ConvolutionDepthwiseBiasBackward<<>>(
174 | bias_buffer_count, top_diff, num, channels,
175 | top_height, top_width, bias_buffer_mutable_data);
176 | const int bias_count = this->blobs_[1]->count();
177 | const Dtype* bias_buffer_data = bias_buffer_.gpu_data();
178 | Dtype* bias_diff = this->blobs_[1]->mutable_gpu_diff();
179 | const Dtype* bias_multiplier_data = bias_multiplier_.gpu_data();
180 | caffe_gpu_gemv(CblasNoTrans, bias_count, length, Dtype(1), bias_buffer_data, bias_multiplier_data, Dtype(1), bias_diff);
181 | }
182 | if (this->param_propagate_down_[0])
183 | {
184 | const int weight_buffer_count = weight_buffer_.count();
185 | const Dtype* bottom_data = bottom[0]->gpu_data();
186 | Dtype* weight_buffer_mutable_data = weight_buffer_.mutable_gpu_data();
187 | ConvolutionDepthwiseWeightBackward<<>>(
188 | weight_buffer_count, top_diff, bottom_data, num, channels,
189 | top_height, top_width, bottom_height, bottom_width,
190 | kernel_h_, kernel_w_, stride_h_, stride_w_,
191 | pad_h_, pad_w_, dilation_h_, dilation_w_, weight_buffer_mutable_data);
192 | const int weight_count = this->blobs_[0]->count();
193 | const Dtype* weight_buffer_data = weight_buffer_.gpu_data();
194 | Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff();
195 | const Dtype* weight_multiplier_data = weight_multiplier_.gpu_data();
196 | caffe_gpu_gemv(CblasNoTrans, weight_count, length, Dtype(1), weight_buffer_data, weight_multiplier_data, Dtype(1), weight_diff);
197 | }
198 | if (propagate_down[0])
199 | {
200 | const Dtype* weight_data = this->blobs_[0]->gpu_data();
201 | Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
202 | ConvolutionDepthwiseBottomBackward<<>>(
203 | bottom_count, top_diff, weight_data, num, channels,
204 | top_height, top_width, bottom_height, bottom_width,
205 | kernel_h_, kernel_w_, stride_h_, stride_w_,
206 | pad_h_, pad_w_, dilation_h_, dilation_w_, bottom_diff);
207 | }
208 | }
209 |
210 | INSTANTIATE_LAYER_GPU_FUNCS(ConvolutionDepthwiseLayer);
211 |
212 | } // namespace caffe
213 |
--------------------------------------------------------------------------------
/caffe_need/conv_dw_layer.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CAFFE_CONV_DW_LAYER_HPP_
2 | #define CAFFE_CONV_DW_LAYER_HPP_
3 |
4 | #include
5 | #include "caffe/blob.hpp"
6 | #include "caffe/layer.hpp"
7 | #include "caffe/proto/caffe.pb.h"
8 |
9 | namespace caffe {
10 |
11 | template
12 | class ConvolutionDepthwiseLayer : public Layer {
13 | public:
14 | explicit ConvolutionDepthwiseLayer(const LayerParameter& param)
15 | : Layer(param) {}
16 | virtual void LayerSetUp(const vector*>& bottom,
17 | const vector*>& top);
18 | virtual void Reshape(const vector*>& bottom,
19 | const vector*>& top);
20 | virtual inline int ExactNumBottomBlobs() const { return 1; }
21 | virtual inline int ExactNumTopBlobs() const { return 1; }
22 | virtual inline const char* type() const { return "ConvolutionDepthwise"; }
23 | protected:
24 | virtual void Forward_cpu(const vector*>& bottom,
25 | const vector*>& top);
26 | virtual void Forward_gpu(const vector*>& bottom,
27 | const vector*>& top);
28 | virtual void Backward_cpu(const vector*>& top,
29 | const vector& propagate_down, const vector*>& bottom);
30 | virtual void Backward_gpu(const vector*>& top,
31 | const vector& propagate_down, const vector*>& bottom);
32 | unsigned int kernel_h_;
33 | unsigned int kernel_w_;
34 | unsigned int stride_h_;
35 | unsigned int stride_w_;
36 | unsigned int pad_h_;
37 | unsigned int pad_w_;
38 | unsigned int dilation_h_;
39 | unsigned int dilation_w_;
40 | Blob weight_buffer_;
41 | Blob weight_multiplier_;
42 | Blob bias_buffer_;
43 | Blob bias_multiplier_;
44 | };
45 |
46 | } // namespace caffe
47 |
48 | #endif // CAFFE_CONV_DW_LAYER_HPP_
49 |
--------------------------------------------------------------------------------
/caffe_need/image_data_layer.cpp:
--------------------------------------------------------------------------------
1 | #ifdef USE_OPENCV
2 | #include
3 |
4 | #include // NOLINT(readability/streams)
5 | #include // NOLINT(readability/streams)
6 | #include
7 | #include
8 | #include
9 |
10 | #include "caffe/data_transformer.hpp"
11 | #include "caffe/layers/base_data_layer.hpp"
12 | #include "caffe/layers/image_data_layer.hpp"
13 | #include "caffe/util/benchmark.hpp"
14 | #include "caffe/util/io.hpp"
15 | #include "caffe/util/math_functions.hpp"
16 | #include "caffe/util/rng.hpp"
17 |
18 | namespace caffe {
19 |
20 | template
21 | ImageDataLayer::~ImageDataLayer() {
22 | this->StopInternalThread();
23 | }
24 |
25 | template
26 | void ImageDataLayer::DataLayerSetUp(const vector*>& bottom,
27 | const vector*>& top) {
28 | const int new_height = this->layer_param_.image_data_param().new_height();
29 | const int new_width = this->layer_param_.image_data_param().new_width();
30 | const bool is_color = this->layer_param_.image_data_param().is_color();
31 | string root_folder = this->layer_param_.image_data_param().root_folder();
32 |
33 | CHECK((new_height == 0 && new_width == 0) ||
34 | (new_height > 0 && new_width > 0)) << "Current implementation requires "
35 | "new_height and new_width to be set at the same time.";
36 | // Read the file with filenames and labels
37 | const string& source = this->layer_param_.image_data_param().source();
38 | LOG(INFO) << "Opening file " << source;
39 | std::ifstream infile(source.c_str());
40 | string line;
41 | int pos; // int pos ;
42 | int label_dim = 0 ;
43 | bool gfirst = true;
44 | while (std::getline(infile, line)) {
45 | if(line.find_last_of(' ')==line.size()-2) line.erase(line.find_last_not_of(' ')-1);
46 | pos = line.find_first_of(' ');
47 | string img_path = line.substr(0, pos);
48 | int p0 = pos + 1;
49 | vector label_vec;
50 | while (pos != -1){
51 | pos = line.find_first_of(' ', p0);
52 | float v = atof(line.substr(p0, pos).c_str());
53 | label_vec.push_back(v);
54 | p0 = pos + 1;
55 | }
56 | if (gfirst){
57 | label_dim = label_vec.size();
58 | gfirst = false;
59 | LOG(INFO) << "label dim: " << label_dim;
60 | }
61 | CHECK_EQ(label_vec.size(), label_dim) << "label dim not match in: " << lines_.size()<<", "<layer_param_.image_data_param().shuffle()) {
68 | // randomly shuffle data
69 | LOG(INFO) << "Shuffling data";
70 | const unsigned int prefetch_rng_seed = caffe_rng_rand();
71 | prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
72 | ShuffleImages();
73 | } else {
74 | if (this->phase_ == TRAIN && Caffe::solver_rank() > 0 &&
75 | this->layer_param_.image_data_param().rand_skip() == 0) {
76 | LOG(WARNING) << "Shuffling or skipping recommended for multi-GPU";
77 | }
78 | }
79 | LOG(INFO) << "A total of " << lines_.size() << " images.";
80 |
81 | lines_id_ = 0;
82 | // Check if we would need to randomly skip a few data points
83 | if (this->layer_param_.image_data_param().rand_skip()) {
84 | unsigned int skip = caffe_rng_rand() %
85 | this->layer_param_.image_data_param().rand_skip();
86 | LOG(INFO) << "Skipping first " << skip << " data points.";
87 | CHECK_GT(lines_.size(), skip) << "Not enough points to skip";
88 | lines_id_ = skip;
89 | }
90 | // Read an image, and use it to initialize the top blob.
91 | cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
92 | new_height, new_width, is_color);
93 | CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
94 | // Use data_transformer to infer the expected blob shape from a cv_image.
95 | vector top_shape = this->data_transformer_->InferBlobShape(cv_img);
96 | this->transformed_data_.Reshape(top_shape);
97 | // Reshape prefetch_data and top[0] according to the batch_size.
98 | const int batch_size = this->layer_param_.image_data_param().batch_size();
99 | CHECK_GT(batch_size, 0) << "Positive batch size required";
100 | top_shape[0] = batch_size;
101 | for (int i = 0; i < this->prefetch_.size(); ++i) {
102 | this->prefetch_[i]->data_.Reshape(top_shape);
103 | }
104 | top[0]->Reshape(top_shape);
105 |
106 | LOG(INFO) << "output data size: " << top[0]->num() << ","
107 | << top[0]->channels() << "," << top[0]->height() << ","
108 | << top[0]->width();
109 | // label
110 | vector label_shape(2, batch_size);
111 | label_shape[1] = label_dim;
112 | top[1]->Reshape(label_shape);
113 | for (int i = 0; i < this->prefetch_.size(); ++i) {
114 | this->prefetch_[i]->label_.Reshape(label_shape);
115 | }
116 | }
117 |
118 | template
119 | void ImageDataLayer::ShuffleImages() {
120 | caffe::rng_t* prefetch_rng =
121 | static_cast(prefetch_rng_->generator());
122 | shuffle(lines_.begin(), lines_.end(), prefetch_rng);
123 | }
124 |
125 | // This function is called on prefetch thread
126 | template
127 | void ImageDataLayer::load_batch(Batch* batch) {
128 | CPUTimer batch_timer;
129 | batch_timer.Start();
130 | double read_time = 0;
131 | double trans_time = 0;
132 | CPUTimer timer;
133 | CHECK(batch->data_.count());
134 | CHECK(this->transformed_data_.count());
135 | ImageDataParameter image_data_param = this->layer_param_.image_data_param();
136 | const int batch_size = image_data_param.batch_size();
137 | const int new_height = image_data_param.new_height();
138 | const int new_width = image_data_param.new_width();
139 | const bool is_color = image_data_param.is_color();
140 | string root_folder = image_data_param.root_folder();
141 |
142 | // Reshape according to the first image of each batch
143 | // on single input batches allows for inputs of varying dimension.
144 | cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
145 | new_height, new_width, is_color);
146 | CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
147 | // Use data_transformer to infer the expected blob shape from a cv_img.
148 | vector top_shape = this->data_transformer_->InferBlobShape(cv_img);
149 | this->transformed_data_.Reshape(top_shape);
150 | // Reshape batch according to the batch_size.
151 | top_shape[0] = batch_size;
152 | batch->data_.Reshape(top_shape);///////////////////////////////////////////////
153 | vector top_shape1(4);
154 | top_shape1[0] = batch_size;
155 | top_shape1[1] = lines_[0].second.size();
156 | top_shape1[2] = 1;
157 | top_shape1[3] = 1;
158 | batch->data_.Reshape(top_shape);
159 |
160 | Dtype* prefetch_data = batch->data_.mutable_cpu_data();
161 | Dtype* prefetch_label = batch->label_.mutable_cpu_data();
162 |
163 | // datum scales
164 | const int lines_size = lines_.size();
165 | for (int item_id = 0; item_id < batch_size; ++item_id) {
166 | // get a blob
167 | timer.Start();
168 | CHECK_GT(lines_size, lines_id_);
169 | cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
170 | new_height, new_width, is_color);
171 | CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
172 | read_time += timer.MicroSeconds();
173 | timer.Start();
174 | // Apply transformations (mirror, crop...) to the image
175 | int offset = batch->data_.offset(item_id);
176 | this->transformed_data_.set_cpu_data(prefetch_data + offset);
177 | this->data_transformer_->Transform(cv_img, &(this->transformed_data_));
178 | trans_time += timer.MicroSeconds();
179 |
180 | for (int i = 0; i < top_shape1[1]; i++) {
181 | prefetch_label[item_id*top_shape1[1] + i] = lines_[lines_id_].second[i];
182 | }
183 | // go to the next iter
184 | lines_id_++;
185 | if (lines_id_ >= lines_size) {
186 | // We have reached the end. Restart from the first.
187 | DLOG(INFO) << "Restarting data prefetching from start.";
188 | lines_id_ = 0;
189 | if (this->layer_param_.image_data_param().shuffle()) {
190 | ShuffleImages();
191 | }
192 | }
193 | }
194 | batch_timer.Stop();
195 | DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
196 | DLOG(INFO) << " Read time: " << read_time / 1000 << " ms.";
197 | DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
198 | }
199 |
200 | INSTANTIATE_CLASS(ImageDataLayer);
201 | REGISTER_LAYER_CLASS(ImageData);
202 |
203 | } // namespace caffe
204 | #endif // USE_OPENCV
205 |
--------------------------------------------------------------------------------
/caffe_need/image_data_layer.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CAFFE_IMAGE_DATA_LAYER_HPP_
2 | #define CAFFE_IMAGE_DATA_LAYER_HPP_
3 |
4 | #include
5 | #include
6 | #include
7 |
8 | #include "caffe/blob.hpp"
9 | #include "caffe/data_transformer.hpp"
10 | #include "caffe/internal_thread.hpp"
11 | #include "caffe/layer.hpp"
12 | #include "caffe/layers/base_data_layer.hpp"
13 | #include "caffe/proto/caffe.pb.h"
14 |
15 | namespace caffe {
16 |
17 | /**
18 | * @brief Provides data to the Net from image files.
19 | *
20 | * TODO(dox): thorough documentation for Forward and proto params.
21 | */
22 | template
23 | class ImageDataLayer : public BasePrefetchingDataLayer {
24 | public:
25 | explicit ImageDataLayer(const LayerParameter& param)
26 | : BasePrefetchingDataLayer(param) {}
27 | virtual ~ImageDataLayer();
28 | virtual void DataLayerSetUp(const vector*>& bottom,
29 | const vector*>& top);
30 |
31 | virtual inline const char* type() const { return "ImageData"; }
32 | virtual inline int ExactNumBottomBlobs() const { return 0; }
33 | virtual inline int ExactNumTopBlobs() const { return 2; }
34 |
35 | protected:
36 | shared_ptr prefetch_rng_;
37 | virtual void ShuffleImages();
38 | virtual void load_batch(Batch* batch);
39 |
40 | // edited by tingsong
41 | // vector > lines_;
42 | vector > > lines_;
43 | int lines_id_;
44 | };
45 |
46 |
47 | } // namespace caffe
48 |
49 | #endif // CAFFE_IMAGE_DATA_LAYER_HPP_
50 |
--------------------------------------------------------------------------------
/caffe_need/readme.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/caffe_need/readme.txt
--------------------------------------------------------------------------------
/readme_img/ccnntexie.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/readme_img/ccnntexie.PNG
--------------------------------------------------------------------------------
/readme_img/l1.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/readme_img/l1.PNG
--------------------------------------------------------------------------------
/readme_img/l2.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/readme_img/l2.PNG
--------------------------------------------------------------------------------
/util/tools.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import sys
3 | import numpy as np
4 | import os
5 | import cv2
6 |
7 | # range of [-1,1] to [0,w] or [0,h]
8 | def convert_point(nor_p,factor):
9 | return int(round( float(nor_p)*factor + factor))
10 |
11 | # draw points for level_1
12 | # landmark range of [-1,1] x1,y1,x2,y2...
13 | def drawpoints_1(img_,landmark):
14 | h, w, c = img_.shape
15 | w1 = (w-1)/2 # range of [-1, 1]
16 | h1 = (h-1)/2
17 | draw_img = img_.copy()
18 | num_points = len(landmark) / 2
19 | for i in range(num_points):
20 | x_ = convert_point(landmark[2*i+0],w1)
21 | y_ = convert_point(landmark[2*i+1],h1)
22 | cv2.circle(draw_img,(x_,y_),3,(0,0,255))
23 | return draw_img
24 |
25 | # draw points for level_2
26 | # landmark range of [0,h] or [0,w] x1,y1,x2,y2...
27 | def drawpoints_2(img_,landmark):
28 | draw_img = img_.copy()
29 | num_points = len(landmark) / 2
30 | for i in range(num_points):
31 | x_ = landmark[2*i+0]
32 | y_ = landmark[2*i+1]
33 | cv2.circle(draw_img,(x_,y_),3,(255,0,0))
34 | return draw_img
35 |
36 | # draw points for level_2
37 | # landmark range of [0,h] or [0,w] x1,y1,x2,y2...
38 | def drawpoints_0(img_,landmark):
39 | draw_img = img_.copy()
40 | num_points = len(landmark) / 2
41 | for i in range(num_points):
42 | x_ = myint(landmark[2*i+0])
43 | y_ = myint(landmark[2*i+1])
44 | cv2.circle(draw_img,(x_,y_),4,(0,255,0)) # green
45 | return draw_img
46 |
47 | def myint(numb):
48 | return int(round(float(numb)))
49 |
50 | def cal_eucldist(v1,v2):
51 | return np.sqrt(np.sum((v1-v2)**2))
52 |
53 | def makedir(path):
54 | if not os.path.exists(path): os.makedirs(path)
55 |
56 | # label change to pixel
57 | # l range of [-1,1] ; len(l) = 10
58 | def label2points(l,w,h):
59 | landmark = l.copy()
60 | num_points = len(landmark) /2
61 | w1 = (w-1)/2 # range of [-1, 1]
62 | h1 = (h-1)/2
63 | landmark[0::2] = landmark[0::2]*w1 +w1 # x
64 | landmark[1::2] = landmark[1::2]*h1 +h1 # y
65 | landmark = np.round(landmark)
66 | return landmark
67 |
68 | def cal_dist_norm_eye(landmark):
69 | left_eye = landmark[0:2]
70 | right_eye = landmark[2:4]
71 | return cal_eucldist(left_eye, right_eye)
72 |
73 | #
74 | # r_l range of [-1,1]
75 | # # err_1 is mean error
76 | def cal_error_nor_diag(img,r_l,o_l):
77 | h,w,c = img.shape
78 | n_p = 5
79 | r_landmark = np.array(map(float,r_l.split()[1:2*n_p+1]))
80 | o_landmark = np.array(map(float,o_l.split()[1:2*n_p+1]))
81 | r_pix_landmark = label2points(r_landmark,w,h)
82 | o_pix_landmark = label2points(o_landmark,w,h)
83 |
84 | d_diag = np.sqrt(w*w + h*h)
85 | err_all = 0
86 | err_5 = []
87 | for i in range(n_p):
88 | raw_point = r_pix_landmark[2*i+0:2*i+2]
89 | out_point = o_pix_landmark[2*i+0:2*i+2]
90 | err_ecul = cal_eucldist(raw_point, out_point) / d_diag
91 | err_all = err_all + err_ecul
92 | err_5.append(err_ecul)
93 | err_1 = round(err_all / n_p ,4) # mean
94 | return err_1,err_5
95 |
96 |
97 | # crop_img for level_2
98 | def crop_img(in_img,in_land):
99 | p_nose = in_land[4:6]
100 | p_lefteye = in_land[0:2]
101 | d_nose_lefteye = cal_eucldist(p_nose,p_lefteye)
102 |
103 | w_start = np.round(p_nose[0] - 2*d_nose_lefteye).astype(int)
104 | w_end = np.round(p_nose[0] + 2*d_nose_lefteye).astype(int)
105 | h_start = np.round(p_nose[1] - 2*d_nose_lefteye).astype(int)
106 | h_end = np.round(p_nose[1] + 2*d_nose_lefteye).astype(int)
107 |
108 | h_img,w_img,c = in_img.shape
109 |
110 | if w_start < 0: w_start = 0
111 | if h_start < 0: h_start = 0
112 | if w_end > w_img: w_end = w_img
113 | if h_end > h_img: h_end = h_img
114 |
115 | crop_img = in_img.copy()
116 | crop_img = crop_img[h_start:h_end+1,w_start:w_end+1,:]
117 | return crop_img,w_start,h_start
118 |
119 | # for ALFW x1 x2... ---> x1 y1 x2 y2
120 | def change_order(in_land):
121 | n_p = len(in_land)/2
122 | out_land = in_land[:]
123 | for i in range(n_p):
124 | out_land[2*i+0] = in_land[i]
125 | out_land[2*i+1] = in_land[i+5]
126 | return out_land
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
--------------------------------------------------------------------------------