├── 0_raw_data └── Code │ ├── divide_tr_te.py │ └── draw_point.py ├── 1_level_1 └── Code │ ├── 0_gen_data │ └── gen_l1_data.py │ ├── 1_draw_img │ └── draw_l1_point.py │ ├── 2_train │ ├── l1_mobilenet.prototxt │ ├── solver.prototxt │ └── train.sh │ ├── 3_inference │ ├── inferencen.py │ └── l1_deploy.prototxt │ ├── 4_evaluate │ ├── evaluate_test.py │ └── evaluate_train.py │ └── 5_crop_img │ ├── crop_test_img.py │ └── crop_train_img.py ├── 2_level_2 └── Code │ ├── 0_train │ ├── l2_mobilenet.prototxt │ ├── solver.prototxt │ └── train.sh │ ├── 1_inference │ ├── inferencen.py │ └── l2_deploy.prototxt │ └── 2_evaluate │ ├── evaluate_test.py │ └── evaluate_train.py ├── 3_demo ├── Code │ └── inferencen.py └── Data │ ├── demo.txt │ ├── img │ ├── 000054.jpg │ ├── 000133.jpg │ ├── 000167.jpg │ ├── 000275.jpg │ ├── 000335.jpg │ ├── 000765.jpg │ ├── 001102.jpg │ └── 001557.jpg │ ├── l1_deploy.prototxt │ ├── l1_net.caffemodel │ ├── l2_deploy.prototxt │ └── l2_net.caffemodel ├── README.md ├── caffe_need ├── conv_dw_layer.cpp ├── conv_dw_layer.cu ├── conv_dw_layer.hpp ├── image_data_layer.cpp ├── image_data_layer.hpp └── readme.txt ├── readme_img ├── ccnntexie.PNG ├── l1.PNG └── l2.PNG └── util └── tools.py /0_raw_data/Code/divide_tr_te.py: -------------------------------------------------------------------------------- 1 | # divide celebA dataset 2 | import sys 3 | sys.path.append('../../util') 4 | import tools 5 | import os 6 | import random 7 | import shutil 8 | 9 | raw_txt = '../Data/celeba_label.txt' 10 | relative_path = '../Data/img_celeba/' # for find the img 11 | train_txt = '../Result/raw_train_label.txt' # target txt 12 | test_txt = '../Result/raw_test_label.txt' 13 | train_img_fold = '../Result/train/' 14 | test_img_fold = '../Result/test/' 15 | tools.makedir(train_img_fold) 16 | tools.makedir(test_img_fold) 17 | 18 | per = 0.8 # percentage of train set 19 | line_num = 0 20 | train_num = 0 21 | test_num = 0 22 | train_f = open(train_txt,"w") 23 | test_f = open(test_txt,"w") 24 | for line in open(raw_txt): 25 | if line.isspace() : continue # skip empty line 26 | line_num += 1 27 | img_name = line.split()[0] 28 | full_img_path = relative_path + img_name 29 | a_rand = random.uniform(0,1) 30 | # train set 31 | if a_rand <= per: 32 | train_f.write(line) 33 | train_img_path = train_img_fold + img_name 34 | shutil.copy(full_img_path,train_img_path) 35 | train_num += 1 36 | # test set 37 | else: 38 | test_f.write(line) 39 | test_img_path = test_img_fold + img_name 40 | shutil.copy(full_img_path,test_img_path) 41 | test_num +=1 42 | print 'img : ', line_num 43 | train_f.close() 44 | test_f.close() 45 | 46 | 47 | 48 | print 'train set have ', train_num ,' examples.' 49 | print 'test set have ', test_num , ' examples.' 50 | print train_num ,' + ' ,test_num ,' = ', train_num+test_num 51 | print 'line_num is ', line_num -------------------------------------------------------------------------------- /0_raw_data/Code/draw_point.py: -------------------------------------------------------------------------------- 1 | # generate img and txt for level_1 2 | # The point order x1,x2,x3... 3 | import sys 4 | sys.path.append('../../util') 5 | import tools 6 | import os 7 | import numpy as np 8 | import cv2 9 | 10 | train_txt = '../Result/raw_train_label.txt' # raw_txt 11 | test_txt = '../Result/raw_test_label.txt' 12 | relative_path = '../Data/img_celeba/' 13 | draw_dir = '../Result/draw_img/' # 14 | tools.makedir(draw_dir) 15 | 16 | n_p = 5 # num of points 17 | 18 | def myint(numb): 19 | return int(round(float(numb))) 20 | def drawpoint(raw_txt,o_dir): 21 | for line in open(raw_txt): 22 | if line.isspace() : continue # 23 | raw_land = list(line.split())[1:2*n_p+1] 24 | 25 | img_name = line.split()[0] 26 | full_img_path = relative_path + img_name 27 | img = cv2.imread(full_img_path) 28 | draw_img = img.copy() 29 | draw_img = tools.drawpoints_0(draw_img,raw_land) 30 | 31 | # output img 32 | sub_flod = o_dir + raw_txt.split('_')[-2] 33 | tools.makedir(sub_flod) 34 | draw_img_path = sub_flod + '/' + img_name 35 | print (draw_img_path) 36 | cv2.imwrite(draw_img_path,draw_img) 37 | open(raw_txt).close() 38 | print(raw_txt,' done!') 39 | 40 | drawpoint(test_txt,draw_dir) 41 | drawpoint(train_txt,draw_dir) -------------------------------------------------------------------------------- /1_level_1/Code/0_gen_data/gen_l1_data.py: -------------------------------------------------------------------------------- 1 | # generate img and txt for level_1 2 | # The point order has changed: x1,y1,x2... 3 | import sys 4 | sys.path.append('../../../util') 5 | import tools 6 | import os 7 | import numpy as np 8 | import cv2 9 | train_txt = '../../../raw_data/Result/raw_train_label.txt' # raw_txt 10 | test_txt = '../../../raw_data/Result/raw_test_label.txt' 11 | l1_data_dir = '../../Data/' # target dir 12 | l1_train_txt = l1_data_dir + 'l1_train_label.txt' # target txt 13 | l1_test_txt = l1_data_dir + 'l1_test_label.txt' 14 | relative_path = '../../../raw_data/Data/img_celeba/' # for find the img 15 | tools.makedir(l1_data_dir) 16 | 17 | net_1_w = 48 18 | net_1_h = 48 19 | n_p = 5 # num of points 20 | def gendata(target_txt,raw_txt): 21 | with open(target_txt,"w") as f: 22 | for line in open(raw_txt): 23 | # txt 24 | if line.isspace() : continue 25 | img_name = line.split()[0] 26 | full_img_path = relative_path + img_name 27 | print full_img_path 28 | img = cv2.imread(full_img_path) 29 | 30 | w = img.shape[1] # weight is x axis 31 | h = img.shape[0] # height is y axis 32 | w1 = (w-1)/2 # for [-1,1] 33 | h1 = (h-1)/2 34 | 35 | raw_land = list(line.split())[1:2*n_p+1] 36 | new_line = img_name 37 | for i in range(n_p): 38 | x_ = round( (float(raw_land[2*i+0])-w1)/w1 , 4) # value is [-1,1] 39 | y_ = round( (float(raw_land[2*i+1])-h1)/h1 , 4) 40 | new_line = new_line + ' ' + str(x_) # note: The point order has changed: x1,y1,x2... 41 | new_line = new_line + ' ' + str(y_) 42 | print('new_line: ', new_line) 43 | f.write(new_line + '\n') 44 | 45 | # image 46 | scale_img = cv2.resize(img,(net_1_w,net_1_h)) 47 | sub_flod = l1_data_dir + raw_txt.split('_')[2] + '/' 48 | tools.makedir(sub_flod) 49 | scale_img_path = sub_flod + img_name 50 | print 'output path ',scale_img_path 51 | cv2.imwrite(scale_img_path,scale_img) 52 | # print a 53 | open(raw_txt).close() 54 | gendata(l1_test_txt,test_txt) 55 | gendata(l1_train_txt,train_txt) 56 | 57 | -------------------------------------------------------------------------------- /1_level_1/Code/1_draw_img/draw_l1_point.py: -------------------------------------------------------------------------------- 1 | # draw points for level_1 2 | import sys 3 | sys.path.append('../../../util') 4 | import tools 5 | import os 6 | import numpy as np 7 | import cv2 8 | 9 | relative_path = '../../Data/' # for find the img 10 | relative_train_path = '../../Data/train/' 11 | relative_test_path = '../../Data/test/' 12 | 13 | train_txt = relative_path + 'l1_train_label.txt' # raw_txt 14 | test_txt = relative_path + 'l1_test_label.txt' 15 | 16 | draw_dir = relative_path + 'draw_img/' 17 | tools.makedir(draw_dir) 18 | n_p = 5 # num of points 19 | 20 | def drawpoint(raw_txt,o_dir,relative_img_path): 21 | for line in open(raw_txt): 22 | if line.isspace() : continue # 23 | img_name = line.split()[0] 24 | full_img_path = relative_img_path + img_name 25 | img = cv2.imread(full_img_path) 26 | draw_img = img.copy() 27 | 28 | w = img.shape[1] # width is x axis 29 | h = img.shape[0] # height is y axis 30 | w1 = (w-1)/2 # for [-1,1] 31 | h1 = (h-1)/2 32 | 33 | raw_land = list(line.split())[1:2*n_p+1] 34 | for i in range(n_p): # draw key points 35 | x_ = tools.convert_point(raw_land[2*i+0],w1) 36 | y_ = tools.convert_point(raw_land[2*i+1],h1) 37 | cv2.circle(draw_img,(x_,y_),2,(0,255,0)) 38 | # output img 39 | sub_flod = o_dir + raw_txt.split('_')[-2] + '/' 40 | tools.makedir(sub_flod) 41 | draw_img_path = sub_flod + img_name 42 | print 'draw ima path ', draw_img_path 43 | cv2.imwrite(draw_img_path,draw_img) 44 | open(raw_txt).close() 45 | print(raw_txt,' done!') 46 | drawpoint(train_txt,draw_dir,relative_train_path) 47 | drawpoint(test_txt,draw_dir,relative_test_path) 48 | -------------------------------------------------------------------------------- /1_level_1/Code/2_train/l1_mobilenet.prototxt: -------------------------------------------------------------------------------- 1 | name: "level_1" 2 | 3 | layer { 4 | name: "data" 5 | type: "ImageData" 6 | top: "data" 7 | top: "label" 8 | include{ 9 | phase: TRAIN 10 | } 11 | transform_param { 12 | mean_value: 127.5 13 | mean_value: 127.5 14 | mean_value: 127.5 15 | # scale: 0.0039215 16 | mirror: false 17 | } 18 | image_data_param{ 19 | root_folder: "../../Data/train/" 20 | source: "../../Data/l1_train_label.txt" 21 | batch_size: 128 22 | shuffle: true 23 | is_color: true 24 | new_height: 48 25 | new_width: 48 26 | } 27 | } 28 | #INPUT TEST 29 | layer { 30 | name: "data" 31 | type: "ImageData" 32 | top: "data" 33 | top: "label" 34 | include{ 35 | phase: TEST 36 | } 37 | transform_param { 38 | mean_value: 127.5 39 | mean_value: 127.5 40 | mean_value: 127.5 41 | # scale: 0.0039215 42 | mirror: false 43 | 44 | } 45 | image_data_param{ 46 | root_folder: "../../Data/test/" 47 | source: "../../Data/l1_test_label.txt" 48 | batch_size: 128 49 | shuffle: true 50 | is_color: true 51 | new_height: 48 52 | new_width: 48 53 | } 54 | } 55 | 56 | #------------------------------------------- conv1 input: 48*48*3 output: 24*24*16 57 | layer { 58 | name: "conv1_new" 59 | type: "Convolution" 60 | bottom: "data" 61 | top: "conv1_new" 62 | param { 63 | lr_mult: 1 64 | decay_mult: 1 65 | } 66 | convolution_param { 67 | num_output: 16 68 | bias_term: false 69 | pad: 1 70 | kernel_size: 3 71 | stride: 2 72 | weight_filler { 73 | type: "msra" 74 | } 75 | } 76 | } 77 | layer { 78 | name: "conv1/bn_new" 79 | type: "BatchNorm" 80 | bottom: "conv1_new" 81 | top: "conv1_new" 82 | param { 83 | lr_mult: 0 84 | decay_mult: 0 85 | } 86 | param { 87 | lr_mult: 0 88 | decay_mult: 0 89 | } 90 | param { 91 | lr_mult: 0 92 | decay_mult: 0 93 | } 94 | } 95 | layer { 96 | name: "conv1/scale_new" 97 | type: "Scale" 98 | bottom: "conv1_new" 99 | top: "conv1_new" 100 | scale_param { 101 | filler { 102 | value: 1 103 | } 104 | bias_term: true 105 | bias_filler { 106 | value: 0 107 | } 108 | } 109 | } 110 | layer { 111 | name: "relu1_new" 112 | type: "ReLU" 113 | bottom: "conv1_new" 114 | top: "conv1_new" 115 | } 116 | 117 | #----------- bottleneck_1 1_1(s=2) 1_2(s=1) input: 24*24*16 output: 12*12*24 118 | 119 | layer { 120 | name: "conv1_1/in/pw_new" 121 | type: "Convolution" 122 | bottom: "conv1_new" 123 | top: "conv1_1/in/pw_new" 124 | param { 125 | lr_mult: 1 126 | decay_mult: 1 127 | } 128 | convolution_param { 129 | num_output: 96 130 | bias_term: false 131 | pad: 0 132 | kernel_size: 1 133 | engine: CAFFE 134 | stride: 1 135 | weight_filler { 136 | type: "msra" 137 | } 138 | } 139 | } 140 | layer { 141 | name: "conv1_1/in/pw/bn_new" 142 | type: "BatchNorm" 143 | bottom: "conv1_1/in/pw_new" 144 | top: "conv1_1/in/pw_new" 145 | param { 146 | lr_mult: 0 147 | decay_mult: 0 148 | } 149 | param { 150 | lr_mult: 0 151 | decay_mult: 0 152 | } 153 | param { 154 | lr_mult: 0 155 | decay_mult: 0 156 | } 157 | } 158 | layer { 159 | name: "conv1_1/in/pw/scale_new" 160 | type: "Scale" 161 | bottom: "conv1_1/in/pw_new" 162 | top: "conv1_1/in/pw_new" 163 | scale_param { 164 | filler { 165 | value: 1 166 | } 167 | bias_term: true 168 | bias_filler { 169 | value: 0 170 | } 171 | } 172 | } 173 | layer { 174 | name: "relu1_1/in/pw_new" 175 | type: "ReLU" 176 | bottom: "conv1_1/in/pw_new" 177 | top: "conv1_1/in/pw_new" 178 | } 179 | 180 | 181 | 182 | # 1_1 dw conv 183 | layer { 184 | name: "conv1_1/dw_new" 185 | type: "ConvolutionDepthwise" 186 | bottom: "conv1_1/in/pw_new" 187 | top: "conv1_1/dw_new" 188 | param { 189 | lr_mult: 1 190 | decay_mult: 0 191 | } 192 | convolution_param { 193 | num_output: 96 194 | bias_term: false 195 | pad: 1 196 | kernel_size: 3 197 | engine: CAFFE 198 | stride: 2 199 | weight_filler { 200 | type: "msra" 201 | } 202 | } 203 | } 204 | layer { 205 | name: "conv1_1/dw/bn_new" 206 | type: "BatchNorm" 207 | bottom: "conv1_1/dw_new" 208 | top: "conv1_1/dw_new" 209 | param { 210 | lr_mult: 0 211 | decay_mult: 0 212 | } 213 | param { 214 | lr_mult: 0 215 | decay_mult: 0 216 | } 217 | param { 218 | lr_mult: 0 219 | decay_mult: 0 220 | } 221 | } 222 | layer { 223 | name: "conv1_1/dw/scale_new" 224 | type: "Scale" 225 | bottom: "conv1_1/dw_new" 226 | top: "conv1_1/dw_new" 227 | scale_param { 228 | filler { 229 | value: 1 230 | } 231 | bias_term: true 232 | bias_filler { 233 | value: 0 234 | } 235 | } 236 | } 237 | layer { 238 | name: "relu1_1/dw_new" 239 | type: "ReLU" 240 | bottom: "conv1_1/dw_new" 241 | top: "conv1_1/dw_new" 242 | } 243 | 244 | # 1_1 out 245 | layer { 246 | name: "conv1_1/out/pw_new" 247 | type: "Convolution" 248 | bottom: "conv1_1/dw_new" 249 | top: "conv1_1/out/pw_new" 250 | param { 251 | lr_mult: 1 252 | decay_mult: 1 253 | } 254 | convolution_param { 255 | num_output: 24 256 | bias_term: false 257 | pad: 0 258 | kernel_size: 1 259 | engine: CAFFE 260 | stride: 1 261 | weight_filler { 262 | type: "msra" 263 | } 264 | } 265 | } 266 | layer { 267 | name: "conv1_1/out/pw/bn_new" 268 | type: "BatchNorm" 269 | bottom: "conv1_1/out/pw_new" 270 | top: "conv1_1/out/pw_new" 271 | param { 272 | lr_mult: 0 273 | decay_mult: 0 274 | } 275 | param { 276 | lr_mult: 0 277 | decay_mult: 0 278 | } 279 | param { 280 | lr_mult: 0 281 | decay_mult: 0 282 | } 283 | } 284 | layer { 285 | name: "conv1_1/out/pw/scale_new" 286 | type: "Scale" 287 | bottom: "conv1_1/out/pw_new" 288 | top: "conv1_1/out/pw_new" 289 | scale_param { 290 | filler { 291 | value: 1 292 | } 293 | bias_term: true 294 | bias_filler { 295 | value: 0 296 | } 297 | } 298 | } 299 | # 1_2 in 300 | 301 | layer { 302 | name: "conv1_2/in/pw_new" 303 | type: "Convolution" 304 | bottom: "conv1_1/out/pw_new" 305 | top: "conv1_2/in/pw_new" 306 | param { 307 | lr_mult: 1 308 | decay_mult: 1 309 | } 310 | convolution_param { 311 | num_output: 144 312 | bias_term: false 313 | pad: 0 314 | kernel_size: 1 315 | engine: CAFFE 316 | stride: 1 317 | weight_filler { 318 | type: "msra" 319 | } 320 | } 321 | } 322 | layer { 323 | name: "conv1_2/in/pw/bn_new" 324 | type: "BatchNorm" 325 | bottom: "conv1_2/in/pw_new" 326 | top: "conv1_2/in/pw_new" 327 | param { 328 | lr_mult: 0 329 | decay_mult: 0 330 | } 331 | param { 332 | lr_mult: 0 333 | decay_mult: 0 334 | } 335 | param { 336 | lr_mult: 0 337 | decay_mult: 0 338 | } 339 | } 340 | layer { 341 | name: "conv1_2/in/pw/scale_new" 342 | type: "Scale" 343 | bottom: "conv1_2/in/pw_new" 344 | top: "conv1_2/in/pw_new" 345 | scale_param { 346 | filler { 347 | value: 1 348 | } 349 | bias_term: true 350 | bias_filler { 351 | value: 0 352 | } 353 | } 354 | } 355 | layer { 356 | name: "relu1_2/in/pw_new" 357 | type: "ReLU" 358 | bottom: "conv1_2/in/pw_new" 359 | top: "conv1_2/in/pw_new" 360 | } 361 | 362 | # 1_2 dw 363 | 364 | layer { 365 | name: "conv1_2/dw_new" 366 | type: "ConvolutionDepthwise" 367 | bottom: "conv1_2/in/pw_new" 368 | top: "conv1_2/dw_new" 369 | param { 370 | lr_mult: 1 371 | decay_mult: 0 372 | } 373 | convolution_param { 374 | num_output: 144 375 | bias_term: false 376 | pad: 1 377 | kernel_size: 3 378 | engine: CAFFE 379 | stride: 1 380 | weight_filler { 381 | type: "msra" 382 | } 383 | } 384 | } 385 | layer { 386 | name: "conv1_2/dw/bn_new" 387 | type: "BatchNorm" 388 | bottom: "conv1_2/dw_new" 389 | top: "conv1_2/dw_new" 390 | param { 391 | lr_mult: 0 392 | decay_mult: 0 393 | } 394 | param { 395 | lr_mult: 0 396 | decay_mult: 0 397 | } 398 | param { 399 | lr_mult: 0 400 | decay_mult: 0 401 | } 402 | } 403 | layer { 404 | name: "conv1_2/dw/scale_new" 405 | type: "Scale" 406 | bottom: "conv1_2/dw_new" 407 | top: "conv1_2/dw_new" 408 | scale_param { 409 | filler { 410 | value: 1 411 | } 412 | bias_term: true 413 | bias_filler { 414 | value: 0 415 | } 416 | } 417 | } 418 | layer { 419 | name: "relu1_2/dw_new" 420 | type: "ReLU" 421 | bottom: "conv1_2/dw_new" 422 | top: "conv1_2/dw_new" 423 | } 424 | 425 | # 1_2 out 12*12*24 426 | layer { 427 | name: "conv1_2/out/pw_new" 428 | type: "Convolution" 429 | bottom: "conv1_2/dw_new" 430 | top: "conv1_2/out/pw_new" 431 | param { 432 | lr_mult: 1 433 | decay_mult: 1 434 | } 435 | convolution_param { 436 | num_output: 24 437 | bias_term: false 438 | pad: 0 439 | kernel_size: 1 440 | engine: CAFFE 441 | stride: 1 442 | weight_filler { 443 | type: "msra" 444 | } 445 | } 446 | } 447 | layer { 448 | name: "conv1_2/out/pw/bn_new" 449 | type: "BatchNorm" 450 | bottom: "conv1_2/out/pw_new" 451 | top: "conv1_2/out/pw_new" 452 | param { 453 | lr_mult: 0 454 | decay_mult: 0 455 | } 456 | param { 457 | lr_mult: 0 458 | decay_mult: 0 459 | } 460 | param { 461 | lr_mult: 0 462 | decay_mult: 0 463 | } 464 | } 465 | layer { 466 | name: "conv1_2/out/pw/scale_new" 467 | type: "Scale" 468 | bottom: "conv1_2/out/pw_new" 469 | top: "conv1_2/out/pw_new" 470 | scale_param { 471 | filler { 472 | value: 1 473 | } 474 | bias_term: true 475 | bias_filler { 476 | value: 0 477 | } 478 | } 479 | } 480 | layer { 481 | name: "fuse_conv1_2" 482 | type: "Eltwise" 483 | bottom: "conv1_1/out/pw_new" 484 | bottom: "conv1_2/out/pw_new" 485 | top: "fuse_conv1_2" 486 | eltwise_param { 487 | operation: SUM 488 | } 489 | } 490 | 491 | #--------------------------bottleneck2 2_1(s=2) 2_2(s=1) input: 12*12*24 output: 6*6*32 492 | # 2_1 in 493 | layer { 494 | name: "conv2_1/in/pw_new" 495 | type: "Convolution" 496 | bottom: "fuse_conv1_2" 497 | top: "conv2_1/in/pw_new" 498 | param { 499 | lr_mult: 1 500 | decay_mult: 1 501 | } 502 | convolution_param { 503 | num_output: 144 504 | bias_term: false 505 | pad: 0 506 | kernel_size: 1 507 | engine: CAFFE 508 | stride: 1 509 | weight_filler { 510 | type: "msra" 511 | } 512 | } 513 | } 514 | layer { 515 | name: "conv2_1/in/pw/bn_new" 516 | type: "BatchNorm" 517 | bottom: "conv2_1/in/pw_new" 518 | top: "conv2_1/in/pw_new" 519 | param { 520 | lr_mult: 0 521 | decay_mult: 0 522 | } 523 | param { 524 | lr_mult: 0 525 | decay_mult: 0 526 | } 527 | param { 528 | lr_mult: 0 529 | decay_mult: 0 530 | } 531 | } 532 | layer { 533 | name: "conv2_1/in/pw/scale_new" 534 | type: "Scale" 535 | bottom: "conv2_1/in/pw_new" 536 | top: "conv2_1/in/pw_new" 537 | scale_param { 538 | filler { 539 | value: 1 540 | } 541 | bias_term: true 542 | bias_filler { 543 | value: 0 544 | } 545 | } 546 | } 547 | layer { 548 | name: "relu2_1/in/pw_new" 549 | type: "ReLU" 550 | bottom: "conv2_1/in/pw_new" 551 | top: "conv2_1/in/pw_new" 552 | } 553 | 554 | # 2_1 dw 555 | layer { 556 | name: "conv2_1/dw_new" 557 | type: "ConvolutionDepthwise" 558 | bottom: "conv2_1/in/pw_new" 559 | top: "conv2_1/dw_new" 560 | param { 561 | lr_mult: 1 562 | decay_mult: 0 563 | } 564 | convolution_param { 565 | num_output: 144 566 | bias_term: false 567 | pad: 1 568 | kernel_size: 3 569 | engine: CAFFE 570 | stride: 2 571 | weight_filler { 572 | type: "msra" 573 | } 574 | } 575 | } 576 | layer { 577 | name: "conv2_1/dw/bn_new" 578 | type: "BatchNorm" 579 | bottom: "conv2_1/dw_new" 580 | top: "conv2_1/dw_new" 581 | param { 582 | lr_mult: 0 583 | decay_mult: 0 584 | } 585 | param { 586 | lr_mult: 0 587 | decay_mult: 0 588 | } 589 | param { 590 | lr_mult: 0 591 | decay_mult: 0 592 | } 593 | } 594 | layer { 595 | name: "conv2_1/dw/scale_new" 596 | type: "Scale" 597 | bottom: "conv2_1/dw_new" 598 | top: "conv2_1/dw_new" 599 | scale_param { 600 | filler { 601 | value: 1 602 | } 603 | bias_term: true 604 | bias_filler { 605 | value: 0 606 | } 607 | } 608 | } 609 | layer { 610 | name: "relu2_1/dw_new" 611 | type: "ReLU" 612 | bottom: "conv2_1/dw_new" 613 | top: "conv2_1/dw_new" 614 | } 615 | 616 | # 2_1 out 617 | layer { 618 | name: "conv2_1/out/pw_new" 619 | type: "Convolution" 620 | bottom: "conv2_1/dw_new" 621 | top: "conv2_1/out/pw_new" 622 | param { 623 | lr_mult: 1 624 | decay_mult: 1 625 | } 626 | convolution_param { 627 | num_output: 32 628 | bias_term: false 629 | pad: 0 630 | kernel_size: 1 631 | engine: CAFFE 632 | stride: 1 633 | weight_filler { 634 | type: "msra" 635 | } 636 | } 637 | } 638 | layer { 639 | name: "conv2_1/out/pw/bn_new" 640 | type: "BatchNorm" 641 | bottom: "conv2_1/out/pw_new" 642 | top: "conv2_1/out/pw_new" 643 | param { 644 | lr_mult: 0 645 | decay_mult: 0 646 | } 647 | param { 648 | lr_mult: 0 649 | decay_mult: 0 650 | } 651 | param { 652 | lr_mult: 0 653 | decay_mult: 0 654 | } 655 | } 656 | layer { 657 | name: "conv2_1/out/pw/scale_new" 658 | type: "Scale" 659 | bottom: "conv2_1/out/pw_new" 660 | top: "conv2_1/out/pw_new" 661 | scale_param { 662 | filler { 663 | value: 1 664 | } 665 | bias_term: true 666 | bias_filler { 667 | value: 0 668 | } 669 | } 670 | } 671 | 672 | # 2_2 in 673 | 674 | layer { 675 | name: "conv2_2/in/pw_new" 676 | type: "Convolution" 677 | bottom: "conv2_1/out/pw_new" 678 | top: "conv2_2/in/pw_new" 679 | param { 680 | lr_mult: 1 681 | decay_mult: 1 682 | } 683 | convolution_param { 684 | num_output: 192 685 | bias_term: false 686 | pad: 0 687 | kernel_size: 1 688 | engine: CAFFE 689 | stride: 1 690 | weight_filler { 691 | type: "msra" 692 | } 693 | } 694 | } 695 | layer { 696 | name: "conv2_2/in/pw/bn_new" 697 | type: "BatchNorm" 698 | bottom: "conv2_2/in/pw_new" 699 | top: "conv2_2/in/pw_new" 700 | param { 701 | lr_mult: 0 702 | decay_mult: 0 703 | } 704 | param { 705 | lr_mult: 0 706 | decay_mult: 0 707 | } 708 | param { 709 | lr_mult: 0 710 | decay_mult: 0 711 | } 712 | } 713 | layer { 714 | name: "conv2_2/in/pw/scale_new" 715 | type: "Scale" 716 | bottom: "conv2_2/in/pw_new" 717 | top: "conv2_2/in/pw_new" 718 | scale_param { 719 | filler { 720 | value: 1 721 | } 722 | bias_term: true 723 | bias_filler { 724 | value: 0 725 | } 726 | } 727 | } 728 | layer { 729 | name: "relu2_2/in/pw_new" 730 | type: "ReLU" 731 | bottom: "conv2_2/in/pw_new" 732 | top: "conv2_2/in/pw_new" 733 | } 734 | 735 | # 2_2 dw 736 | layer { 737 | name: "conv2_2/dw_new" 738 | type: "ConvolutionDepthwise" 739 | bottom: "conv2_2/in/pw_new" 740 | top: "conv2_2/dw_new" 741 | param { 742 | lr_mult: 1 743 | decay_mult: 0 744 | } 745 | convolution_param { 746 | num_output: 192 747 | bias_term: false 748 | pad: 1 749 | kernel_size: 3 750 | engine: CAFFE 751 | stride: 1 752 | weight_filler { 753 | type: "msra" 754 | } 755 | } 756 | } 757 | layer { 758 | name: "conv2_2/dw/bn_new" 759 | type: "BatchNorm" 760 | bottom: "conv2_2/dw_new" 761 | top: "conv2_2/dw_new" 762 | param { 763 | lr_mult: 0 764 | decay_mult: 0 765 | } 766 | param { 767 | lr_mult: 0 768 | decay_mult: 0 769 | } 770 | param { 771 | lr_mult: 0 772 | decay_mult: 0 773 | } 774 | } 775 | layer { 776 | name: "conv2_2/dw/scale_new" 777 | type: "Scale" 778 | bottom: "conv2_2/dw_new" 779 | top: "conv2_2/dw_new" 780 | scale_param { 781 | filler { 782 | value: 1 783 | } 784 | bias_term: true 785 | bias_filler { 786 | value: 0 787 | } 788 | } 789 | } 790 | layer { 791 | name: "relu2_2/dw_new" 792 | type: "ReLU" 793 | bottom: "conv2_2/dw_new" 794 | top: "conv2_2/dw_new" 795 | } 796 | 797 | 798 | # 2_2 out 799 | 800 | layer { 801 | name: "conv2_2/out/pw_new" 802 | type: "Convolution" 803 | bottom: "conv2_2/dw_new" 804 | top: "conv2_2/out/pw_new" 805 | param { 806 | lr_mult: 1 807 | decay_mult: 1 808 | } 809 | convolution_param { 810 | num_output: 32 811 | bias_term: false 812 | pad: 0 813 | kernel_size: 1 814 | engine: CAFFE 815 | stride: 1 816 | weight_filler { 817 | type: "msra" 818 | } 819 | } 820 | } 821 | layer { 822 | name: "conv2_2/out/pw/bn_new" 823 | type: "BatchNorm" 824 | bottom: "conv2_2/out/pw_new" 825 | top: "conv2_2/out/pw_new" 826 | param { 827 | lr_mult: 0 828 | decay_mult: 0 829 | } 830 | param { 831 | lr_mult: 0 832 | decay_mult: 0 833 | } 834 | param { 835 | lr_mult: 0 836 | decay_mult: 0 837 | } 838 | } 839 | layer { 840 | name: "conv2_2/out/pw/scale_new" 841 | type: "Scale" 842 | bottom: "conv2_2/out/pw_new" 843 | top: "conv2_2/out/pw_new" 844 | scale_param { 845 | filler { 846 | value: 1 847 | } 848 | bias_term: true 849 | bias_filler { 850 | value: 0 851 | } 852 | } 853 | } 854 | layer { 855 | name: "fuse_conv2_2" 856 | type: "Eltwise" 857 | bottom: "conv2_1/out/pw_new" 858 | bottom: "conv2_2/out/pw_new" 859 | top: "fuse_conv2_2" 860 | eltwise_param { 861 | operation: SUM 862 | } 863 | } 864 | 865 | #--------------------------bottleneck3 3_1(s=2) 3_2(s=1) input: 6*6*32 output: 3*3*64 866 | # 3_1 in 867 | layer { 868 | name: "conv3_1/in/pw_new" 869 | type: "Convolution" 870 | bottom: "fuse_conv2_2" 871 | top: "conv3_1/in/pw_new" 872 | param { 873 | lr_mult: 1 874 | decay_mult: 1 875 | } 876 | convolution_param { 877 | num_output: 192 878 | bias_term: false 879 | pad: 0 880 | kernel_size: 1 881 | engine: CAFFE 882 | stride: 1 883 | weight_filler { 884 | type: "msra" 885 | } 886 | } 887 | } 888 | layer { 889 | name: "conv3_1/in/pw/bn_new" 890 | type: "BatchNorm" 891 | bottom: "conv3_1/in/pw_new" 892 | top: "conv3_1/in/pw_new" 893 | param { 894 | lr_mult: 0 895 | decay_mult: 0 896 | } 897 | param { 898 | lr_mult: 0 899 | decay_mult: 0 900 | } 901 | param { 902 | lr_mult: 0 903 | decay_mult: 0 904 | } 905 | } 906 | layer { 907 | name: "conv3_1/in/pw/scale_new" 908 | type: "Scale" 909 | bottom: "conv3_1/in/pw_new" 910 | top: "conv3_1/in/pw_new" 911 | scale_param { 912 | filler { 913 | value: 1 914 | } 915 | bias_term: true 916 | bias_filler { 917 | value: 0 918 | } 919 | } 920 | } 921 | layer { 922 | name: "relu3_1/in/pw_new" 923 | type: "ReLU" 924 | bottom: "conv3_1/in/pw_new" 925 | top: "conv3_1/in/pw_new" 926 | } 927 | 928 | # 3_1 dw 929 | layer { 930 | name: "conv3_1/dw_new" 931 | type: "ConvolutionDepthwise" 932 | bottom: "conv3_1/in/pw_new" 933 | top: "conv3_1/dw_new" 934 | param { 935 | lr_mult: 1 936 | decay_mult: 0 937 | } 938 | convolution_param { 939 | num_output: 192 940 | bias_term: false 941 | pad: 1 942 | kernel_size: 3 943 | engine: CAFFE 944 | stride: 2 945 | weight_filler { 946 | type: "msra" 947 | } 948 | } 949 | } 950 | layer { 951 | name: "conv3_1/dw/bn_new" 952 | type: "BatchNorm" 953 | bottom: "conv3_1/dw_new" 954 | top: "conv3_1/dw_new" 955 | param { 956 | lr_mult: 0 957 | decay_mult: 0 958 | } 959 | param { 960 | lr_mult: 0 961 | decay_mult: 0 962 | } 963 | param { 964 | lr_mult: 0 965 | decay_mult: 0 966 | } 967 | } 968 | layer { 969 | name: "conv3_1/dw/scale_new" 970 | type: "Scale" 971 | bottom: "conv3_1/dw_new" 972 | top: "conv3_1/dw_new" 973 | scale_param { 974 | filler { 975 | value: 1 976 | } 977 | bias_term: true 978 | bias_filler { 979 | value: 0 980 | } 981 | } 982 | } 983 | layer { 984 | name: "relu3_1/dw_new" 985 | type: "ReLU" 986 | bottom: "conv3_1/dw_new" 987 | top: "conv3_1/dw_new" 988 | } 989 | 990 | # 3_1 out 991 | layer { 992 | name: "conv3_1/out/pw_new" 993 | type: "Convolution" 994 | bottom: "conv3_1/dw_new" 995 | top: "conv3_1/out/pw_new" 996 | param { 997 | lr_mult: 1 998 | decay_mult: 1 999 | } 1000 | convolution_param { 1001 | num_output: 64 1002 | bias_term: false 1003 | pad: 0 1004 | kernel_size: 1 1005 | engine: CAFFE 1006 | stride: 1 1007 | weight_filler { 1008 | type: "msra" 1009 | } 1010 | } 1011 | } 1012 | layer { 1013 | name: "conv3_1/out/pw/bn_new" 1014 | type: "BatchNorm" 1015 | bottom: "conv3_1/out/pw_new" 1016 | top: "conv3_1/out/pw_new" 1017 | param { 1018 | lr_mult: 0 1019 | decay_mult: 0 1020 | } 1021 | param { 1022 | lr_mult: 0 1023 | decay_mult: 0 1024 | } 1025 | param { 1026 | lr_mult: 0 1027 | decay_mult: 0 1028 | } 1029 | } 1030 | layer { 1031 | name: "conv3_1/out/pw/scale_new" 1032 | type: "Scale" 1033 | bottom: "conv3_1/out/pw_new" 1034 | top: "conv3_1/out/pw_new" 1035 | scale_param { 1036 | filler { 1037 | value: 1 1038 | } 1039 | bias_term: true 1040 | bias_filler { 1041 | value: 0 1042 | } 1043 | } 1044 | } 1045 | 1046 | # 3_2 in 1047 | 1048 | layer { 1049 | name: "conv3_2/in/pw_new" 1050 | type: "Convolution" 1051 | bottom: "conv3_1/out/pw_new" 1052 | top: "conv3_2/in/pw_new" 1053 | param { 1054 | lr_mult: 1 1055 | decay_mult: 1 1056 | } 1057 | convolution_param { 1058 | num_output: 192 1059 | bias_term: false 1060 | pad: 0 1061 | kernel_size: 1 1062 | engine: CAFFE 1063 | stride: 1 1064 | weight_filler { 1065 | type: "msra" 1066 | } 1067 | } 1068 | } 1069 | layer { 1070 | name: "conv3_2/in/pw/bn_new" 1071 | type: "BatchNorm" 1072 | bottom: "conv3_2/in/pw_new" 1073 | top: "conv3_2/in/pw_new" 1074 | param { 1075 | lr_mult: 0 1076 | decay_mult: 0 1077 | } 1078 | param { 1079 | lr_mult: 0 1080 | decay_mult: 0 1081 | } 1082 | param { 1083 | lr_mult: 0 1084 | decay_mult: 0 1085 | } 1086 | } 1087 | layer { 1088 | name: "conv3_2/in/pw/scale_new" 1089 | type: "Scale" 1090 | bottom: "conv3_2/in/pw_new" 1091 | top: "conv3_2/in/pw_new" 1092 | scale_param { 1093 | filler { 1094 | value: 1 1095 | } 1096 | bias_term: true 1097 | bias_filler { 1098 | value: 0 1099 | } 1100 | } 1101 | } 1102 | layer { 1103 | name: "relu3_2/in/pw_new" 1104 | type: "ReLU" 1105 | bottom: "conv3_2/in/pw_new" 1106 | top: "conv3_2/in/pw_new" 1107 | } 1108 | 1109 | # 3_2 dw 1110 | layer { 1111 | name: "conv3_2/dw_new" 1112 | type: "ConvolutionDepthwise" 1113 | bottom: "conv3_2/in/pw_new" 1114 | top: "conv3_2/dw_new" 1115 | param { 1116 | lr_mult: 1 1117 | decay_mult: 0 1118 | } 1119 | convolution_param { 1120 | num_output: 192 1121 | bias_term: false 1122 | pad: 1 1123 | kernel_size: 3 1124 | engine: CAFFE 1125 | stride: 1 1126 | weight_filler { 1127 | type: "msra" 1128 | } 1129 | } 1130 | } 1131 | layer { 1132 | name: "conv3_2/dw/bn_new" 1133 | type: "BatchNorm" 1134 | bottom: "conv3_2/dw_new" 1135 | top: "conv3_2/dw_new" 1136 | param { 1137 | lr_mult: 0 1138 | decay_mult: 0 1139 | } 1140 | param { 1141 | lr_mult: 0 1142 | decay_mult: 0 1143 | } 1144 | param { 1145 | lr_mult: 0 1146 | decay_mult: 0 1147 | } 1148 | } 1149 | layer { 1150 | name: "conv3_2/dw/scale_new" 1151 | type: "Scale" 1152 | bottom: "conv3_2/dw_new" 1153 | top: "conv3_2/dw_new" 1154 | scale_param { 1155 | filler { 1156 | value: 1 1157 | } 1158 | bias_term: true 1159 | bias_filler { 1160 | value: 0 1161 | } 1162 | } 1163 | } 1164 | layer { 1165 | name: "relu3_2/dw_new" 1166 | type: "ReLU" 1167 | bottom: "conv3_2/dw_new" 1168 | top: "conv3_2/dw_new" 1169 | } 1170 | 1171 | 1172 | # 3_2 out 1173 | 1174 | layer { 1175 | name: "conv3_2/out/pw_new" 1176 | type: "Convolution" 1177 | bottom: "conv3_2/dw_new" 1178 | top: "conv3_2/out/pw_new" 1179 | param { 1180 | lr_mult: 1 1181 | decay_mult: 1 1182 | } 1183 | convolution_param { 1184 | num_output: 64 1185 | bias_term: false 1186 | pad: 0 1187 | kernel_size: 1 1188 | engine: CAFFE 1189 | stride: 1 1190 | weight_filler { 1191 | type: "msra" 1192 | } 1193 | } 1194 | } 1195 | layer { 1196 | name: "conv3_2/out/pw/bn_new" 1197 | type: "BatchNorm" 1198 | bottom: "conv3_2/out/pw_new" 1199 | top: "conv3_2/out/pw_new" 1200 | param { 1201 | lr_mult: 0 1202 | decay_mult: 0 1203 | } 1204 | param { 1205 | lr_mult: 0 1206 | decay_mult: 0 1207 | } 1208 | param { 1209 | lr_mult: 0 1210 | decay_mult: 0 1211 | } 1212 | } 1213 | layer { 1214 | name: "conv3_2/out/pw/scale_new" 1215 | type: "Scale" 1216 | bottom: "conv3_2/out/pw_new" 1217 | top: "conv3_2/out/pw_new" 1218 | scale_param { 1219 | filler { 1220 | value: 1 1221 | } 1222 | bias_term: true 1223 | bias_filler { 1224 | value: 0 1225 | } 1226 | } 1227 | } 1228 | layer { 1229 | name: "fuse_conv3_2" 1230 | type: "Eltwise" 1231 | bottom: "conv3_1/out/pw_new" 1232 | bottom: "conv3_2/out/pw_new" 1233 | top: "fuse_conv3_2" 1234 | eltwise_param { 1235 | operation: SUM 1236 | } 1237 | } 1238 | 1239 | 1240 | 1241 | 1242 | 1243 | 1244 | 1245 | #------------------------- fc1 1246 | layer { 1247 | name: "fc1" 1248 | type: "InnerProduct" 1249 | bottom: "fuse_conv3_2" 1250 | top: "fc1" 1251 | param { 1252 | lr_mult: 1 1253 | decay_mult: 1 1254 | } 1255 | param { 1256 | lr_mult: 2 1257 | decay_mult: 1 1258 | } 1259 | inner_product_param { 1260 | num_output: 256 1261 | weight_filler { 1262 | type: "gaussian" 1263 | std: 0.01 1264 | } 1265 | bias_filler { 1266 | type: "constant" 1267 | value: 0 1268 | } 1269 | } 1270 | } 1271 | layer { 1272 | name: "relu_fc1" 1273 | type: "ReLU" 1274 | bottom: "fc1" 1275 | top: "fc1" 1276 | } 1277 | layer { 1278 | name: "drop_fc1" 1279 | type: "Dropout" 1280 | bottom: "fc1" 1281 | top: "fc1" 1282 | dropout_param{ 1283 | dropout_ratio: 0.3 1284 | } 1285 | } 1286 | 1287 | #------------------------- fc2 1288 | layer { 1289 | name: "fc2" 1290 | type: "InnerProduct" 1291 | bottom: "fc1" 1292 | top: "fc2" 1293 | param { 1294 | lr_mult: 1 1295 | decay_mult: 1 1296 | } 1297 | param { 1298 | lr_mult: 2 1299 | decay_mult: 1 1300 | } 1301 | inner_product_param { 1302 | num_output: 10 1303 | weight_filler { 1304 | type: "gaussian" 1305 | std: 0.01 1306 | } 1307 | bias_filler { 1308 | type: "constant" 1309 | value: 0 1310 | } 1311 | } 1312 | } 1313 | 1314 | 1315 | layer { 1316 | name: "loss" 1317 | type: "EuclideanLoss" 1318 | bottom: "fc2" 1319 | bottom: "label" 1320 | top: "loss" 1321 | loss_weight: 100 1322 | } -------------------------------------------------------------------------------- /1_level_1/Code/2_train/solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "l1_mobilenet.prototxt" 2 | 3 | test_iter: 160 # bs = 128 * 2 4 | test_interval: 1250 5 | 6 | #base_lr: 0.0001 7 | base_lr: 0.001 8 | momentum: 0.9 9 | weight_decay: 0.0004 10 | 11 | type: "Adam" 12 | 13 | lr_policy: "multistep" 14 | #gamma: 0.9 15 | gamma:0.1 16 | stepvalue: 80000 17 | stepvalue: 100000 18 | #stepvalue: 250000 19 | 20 | display: 1000 21 | max_iter: 200000 22 | 23 | snapshot: 50000 24 | snapshot_prefix: "../../Result/solver_state/" 25 | solver_mode: GPU 26 | -------------------------------------------------------------------------------- /1_level_1/Code/2_train/train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | set -e 3 | postfix=`date +"%F-%H-%M-%S"` 4 | /***your_caffe_path***/build/tools/caffe train \ 5 | --solver=./solver.prototxt -gpu 0,1 \ 6 | 2>&1 | tee ../../Result/log/$(date +%Y-%m-%d-%H-%M.log) $@ -------------------------------------------------------------------------------- /1_level_1/Code/3_inference/inferencen.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | sys.path.append('../../../util') 4 | sys.path.append('/***your_caffe_path***/python') 5 | sys.path.append('/***your_caffe_path***/python/caffe') 6 | import tools 7 | import caffe 8 | import numpy as np 9 | import cv2 10 | import time 11 | 12 | l1_deploy = './l1_deploy.prototxt' 13 | l1_model = '../../Result/solver_state/_iter_100000.caffemodel' 14 | 15 | txt_flod = '../../Data/' 16 | train_txt = txt_flod + 'l1_train_label.txt' 17 | test_txt = txt_flod + 'l1_test_label.txt' 18 | 19 | relative_path = '../../../raw_data/Data/img_celeba/' # find the image 20 | 21 | l1_out_train_txt = '../../Result/l1_out_train_label.txt' 22 | l1_out_test_txt = '../../Result/l1_out_test_label.txt' 23 | 24 | w_net = 48 25 | h_net = 48 26 | #--------------------------------------------------------------------------- cnn initalization 27 | caffe.set_mode_gpu() 28 | caffe.set_device(0) 29 | # load model 30 | net = caffe.Net(l1_deploy,l1_model,caffe.TEST) 31 | # image preprocess 32 | mu = np.ones((3,w_net,h_net), dtype=np.float) * 127.5 33 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) 34 | transformer.set_transpose('data', (2,0,1)) # (w,h,c)--> (c,w,h) 35 | transformer.set_mean('data', mu) # pixel-wise 36 | transformer.set_raw_scale('data', 255 ) # [0,1] --> [0,255] 37 | transformer.set_channel_swap('data', (2,1,0)) # RGB --> BGR 38 | #----------------------------------------------------------------------------- forward 39 | def l1_forward(input_txt,output_txt): 40 | out_f = open(output_txt,'w') 41 | for line in open(input_txt): 42 | if line.isspace() : continue 43 | img_name = line.split()[0] 44 | full_img_path = relative_path + img_name 45 | #------------------------------------------------------------------------- cnn forward 46 | im=caffe.io.load_image(full_img_path) # im is RGB with 0~1 float 47 | net.blobs['data'].data[...]=transformer.preprocess('data',im) 48 | time_s = time.clock() 49 | n_out = net.forward() 50 | time_e = time.clock() 51 | print img_name,'forward : ',round((time_e-time_s)*1000,1) ,'ms' 52 | out_landmark = net.blobs['fc2'].data[0].flatten() 53 | #------------------------------------------------------------------------- write txt 54 | str_0 = str(out_landmark) 55 | str_1 = str_0.replace("\n","") 56 | str_2 = str_1.strip('[]') 57 | new_line = img_name +' '+ str_2 +'\n' 58 | out_f.write(new_line) 59 | out_f.close() 60 | 61 | l1_forward(test_txt,l1_out_test_txt) 62 | l1_forward(train_txt,l1_out_train_txt) -------------------------------------------------------------------------------- /1_level_1/Code/3_inference/l1_deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "level_1" 2 | input: "data" 3 | input_shape { dim: 1 dim: 3 dim: 48 dim: 48 } 4 | 5 | 6 | #------------------------------------------- conv1 input: 48*48*3 output: 24*24*16 7 | layer { 8 | name: "conv1_new" 9 | type: "Convolution" 10 | bottom: "data" 11 | top: "conv1_new" 12 | param { 13 | lr_mult: 1 14 | decay_mult: 1 15 | } 16 | convolution_param { 17 | num_output: 16 18 | bias_term: false 19 | pad: 1 20 | kernel_size: 3 21 | stride: 2 22 | weight_filler { 23 | type: "msra" 24 | } 25 | } 26 | } 27 | layer { 28 | name: "conv1/bn_new" 29 | type: "BatchNorm" 30 | bottom: "conv1_new" 31 | top: "conv1_new" 32 | param { 33 | lr_mult: 0 34 | decay_mult: 0 35 | } 36 | param { 37 | lr_mult: 0 38 | decay_mult: 0 39 | } 40 | param { 41 | lr_mult: 0 42 | decay_mult: 0 43 | } 44 | } 45 | layer { 46 | name: "conv1/scale_new" 47 | type: "Scale" 48 | bottom: "conv1_new" 49 | top: "conv1_new" 50 | scale_param { 51 | filler { 52 | value: 1 53 | } 54 | bias_term: true 55 | bias_filler { 56 | value: 0 57 | } 58 | } 59 | } 60 | layer { 61 | name: "relu1_new" 62 | type: "ReLU" 63 | bottom: "conv1_new" 64 | top: "conv1_new" 65 | } 66 | 67 | #----------- bottleneck_1 1_1(s=2) 1_2(s=1) input: 24*24*16 output: 12*12*24 68 | 69 | layer { 70 | name: "conv1_1/in/pw_new" 71 | type: "Convolution" 72 | bottom: "conv1_new" 73 | top: "conv1_1/in/pw_new" 74 | param { 75 | lr_mult: 1 76 | decay_mult: 1 77 | } 78 | convolution_param { 79 | num_output: 96 80 | bias_term: false 81 | pad: 0 82 | kernel_size: 1 83 | engine: CAFFE 84 | stride: 1 85 | weight_filler { 86 | type: "msra" 87 | } 88 | } 89 | } 90 | layer { 91 | name: "conv1_1/in/pw/bn_new" 92 | type: "BatchNorm" 93 | bottom: "conv1_1/in/pw_new" 94 | top: "conv1_1/in/pw_new" 95 | param { 96 | lr_mult: 0 97 | decay_mult: 0 98 | } 99 | param { 100 | lr_mult: 0 101 | decay_mult: 0 102 | } 103 | param { 104 | lr_mult: 0 105 | decay_mult: 0 106 | } 107 | } 108 | layer { 109 | name: "conv1_1/in/pw/scale_new" 110 | type: "Scale" 111 | bottom: "conv1_1/in/pw_new" 112 | top: "conv1_1/in/pw_new" 113 | scale_param { 114 | filler { 115 | value: 1 116 | } 117 | bias_term: true 118 | bias_filler { 119 | value: 0 120 | } 121 | } 122 | } 123 | layer { 124 | name: "relu1_1/in/pw_new" 125 | type: "ReLU" 126 | bottom: "conv1_1/in/pw_new" 127 | top: "conv1_1/in/pw_new" 128 | } 129 | 130 | 131 | 132 | # 1_1 dw conv 133 | layer { 134 | name: "conv1_1/dw_new" 135 | type: "ConvolutionDepthwise" 136 | bottom: "conv1_1/in/pw_new" 137 | top: "conv1_1/dw_new" 138 | param { 139 | lr_mult: 1 140 | decay_mult: 0 141 | } 142 | convolution_param { 143 | num_output: 96 144 | bias_term: false 145 | pad: 1 146 | kernel_size: 3 147 | engine: CAFFE 148 | stride: 2 149 | weight_filler { 150 | type: "msra" 151 | } 152 | } 153 | } 154 | layer { 155 | name: "conv1_1/dw/bn_new" 156 | type: "BatchNorm" 157 | bottom: "conv1_1/dw_new" 158 | top: "conv1_1/dw_new" 159 | param { 160 | lr_mult: 0 161 | decay_mult: 0 162 | } 163 | param { 164 | lr_mult: 0 165 | decay_mult: 0 166 | } 167 | param { 168 | lr_mult: 0 169 | decay_mult: 0 170 | } 171 | } 172 | layer { 173 | name: "conv1_1/dw/scale_new" 174 | type: "Scale" 175 | bottom: "conv1_1/dw_new" 176 | top: "conv1_1/dw_new" 177 | scale_param { 178 | filler { 179 | value: 1 180 | } 181 | bias_term: true 182 | bias_filler { 183 | value: 0 184 | } 185 | } 186 | } 187 | layer { 188 | name: "relu1_1/dw_new" 189 | type: "ReLU" 190 | bottom: "conv1_1/dw_new" 191 | top: "conv1_1/dw_new" 192 | } 193 | 194 | # 1_1 out 195 | layer { 196 | name: "conv1_1/out/pw_new" 197 | type: "Convolution" 198 | bottom: "conv1_1/dw_new" 199 | top: "conv1_1/out/pw_new" 200 | param { 201 | lr_mult: 1 202 | decay_mult: 1 203 | } 204 | convolution_param { 205 | num_output: 24 206 | bias_term: false 207 | pad: 0 208 | kernel_size: 1 209 | engine: CAFFE 210 | stride: 1 211 | weight_filler { 212 | type: "msra" 213 | } 214 | } 215 | } 216 | layer { 217 | name: "conv1_1/out/pw/bn_new" 218 | type: "BatchNorm" 219 | bottom: "conv1_1/out/pw_new" 220 | top: "conv1_1/out/pw_new" 221 | param { 222 | lr_mult: 0 223 | decay_mult: 0 224 | } 225 | param { 226 | lr_mult: 0 227 | decay_mult: 0 228 | } 229 | param { 230 | lr_mult: 0 231 | decay_mult: 0 232 | } 233 | } 234 | layer { 235 | name: "conv1_1/out/pw/scale_new" 236 | type: "Scale" 237 | bottom: "conv1_1/out/pw_new" 238 | top: "conv1_1/out/pw_new" 239 | scale_param { 240 | filler { 241 | value: 1 242 | } 243 | bias_term: true 244 | bias_filler { 245 | value: 0 246 | } 247 | } 248 | } 249 | # 1_2 in 250 | 251 | layer { 252 | name: "conv1_2/in/pw_new" 253 | type: "Convolution" 254 | bottom: "conv1_1/out/pw_new" 255 | top: "conv1_2/in/pw_new" 256 | param { 257 | lr_mult: 1 258 | decay_mult: 1 259 | } 260 | convolution_param { 261 | num_output: 144 262 | bias_term: false 263 | pad: 0 264 | kernel_size: 1 265 | engine: CAFFE 266 | stride: 1 267 | weight_filler { 268 | type: "msra" 269 | } 270 | } 271 | } 272 | layer { 273 | name: "conv1_2/in/pw/bn_new" 274 | type: "BatchNorm" 275 | bottom: "conv1_2/in/pw_new" 276 | top: "conv1_2/in/pw_new" 277 | param { 278 | lr_mult: 0 279 | decay_mult: 0 280 | } 281 | param { 282 | lr_mult: 0 283 | decay_mult: 0 284 | } 285 | param { 286 | lr_mult: 0 287 | decay_mult: 0 288 | } 289 | } 290 | layer { 291 | name: "conv1_2/in/pw/scale_new" 292 | type: "Scale" 293 | bottom: "conv1_2/in/pw_new" 294 | top: "conv1_2/in/pw_new" 295 | scale_param { 296 | filler { 297 | value: 1 298 | } 299 | bias_term: true 300 | bias_filler { 301 | value: 0 302 | } 303 | } 304 | } 305 | layer { 306 | name: "relu1_2/in/pw_new" 307 | type: "ReLU" 308 | bottom: "conv1_2/in/pw_new" 309 | top: "conv1_2/in/pw_new" 310 | } 311 | 312 | # 1_2 dw 313 | 314 | layer { 315 | name: "conv1_2/dw_new" 316 | type: "ConvolutionDepthwise" 317 | bottom: "conv1_2/in/pw_new" 318 | top: "conv1_2/dw_new" 319 | param { 320 | lr_mult: 1 321 | decay_mult: 0 322 | } 323 | convolution_param { 324 | num_output: 144 325 | bias_term: false 326 | pad: 1 327 | kernel_size: 3 328 | engine: CAFFE 329 | stride: 1 330 | weight_filler { 331 | type: "msra" 332 | } 333 | } 334 | } 335 | layer { 336 | name: "conv1_2/dw/bn_new" 337 | type: "BatchNorm" 338 | bottom: "conv1_2/dw_new" 339 | top: "conv1_2/dw_new" 340 | param { 341 | lr_mult: 0 342 | decay_mult: 0 343 | } 344 | param { 345 | lr_mult: 0 346 | decay_mult: 0 347 | } 348 | param { 349 | lr_mult: 0 350 | decay_mult: 0 351 | } 352 | } 353 | layer { 354 | name: "conv1_2/dw/scale_new" 355 | type: "Scale" 356 | bottom: "conv1_2/dw_new" 357 | top: "conv1_2/dw_new" 358 | scale_param { 359 | filler { 360 | value: 1 361 | } 362 | bias_term: true 363 | bias_filler { 364 | value: 0 365 | } 366 | } 367 | } 368 | layer { 369 | name: "relu1_2/dw_new" 370 | type: "ReLU" 371 | bottom: "conv1_2/dw_new" 372 | top: "conv1_2/dw_new" 373 | } 374 | 375 | # 1_2 out 12*12*24 376 | layer { 377 | name: "conv1_2/out/pw_new" 378 | type: "Convolution" 379 | bottom: "conv1_2/dw_new" 380 | top: "conv1_2/out/pw_new" 381 | param { 382 | lr_mult: 1 383 | decay_mult: 1 384 | } 385 | convolution_param { 386 | num_output: 24 387 | bias_term: false 388 | pad: 0 389 | kernel_size: 1 390 | engine: CAFFE 391 | stride: 1 392 | weight_filler { 393 | type: "msra" 394 | } 395 | } 396 | } 397 | layer { 398 | name: "conv1_2/out/pw/bn_new" 399 | type: "BatchNorm" 400 | bottom: "conv1_2/out/pw_new" 401 | top: "conv1_2/out/pw_new" 402 | param { 403 | lr_mult: 0 404 | decay_mult: 0 405 | } 406 | param { 407 | lr_mult: 0 408 | decay_mult: 0 409 | } 410 | param { 411 | lr_mult: 0 412 | decay_mult: 0 413 | } 414 | } 415 | layer { 416 | name: "conv1_2/out/pw/scale_new" 417 | type: "Scale" 418 | bottom: "conv1_2/out/pw_new" 419 | top: "conv1_2/out/pw_new" 420 | scale_param { 421 | filler { 422 | value: 1 423 | } 424 | bias_term: true 425 | bias_filler { 426 | value: 0 427 | } 428 | } 429 | } 430 | layer { 431 | name: "fuse_conv1_2" 432 | type: "Eltwise" 433 | bottom: "conv1_1/out/pw_new" 434 | bottom: "conv1_2/out/pw_new" 435 | top: "fuse_conv1_2" 436 | eltwise_param { 437 | operation: SUM 438 | } 439 | } 440 | 441 | #--------------------------bottleneck2 2_1(s=2) 2_2(s=1) input: 12*12*24 output: 6*6*32 442 | # 2_1 in 443 | layer { 444 | name: "conv2_1/in/pw_new" 445 | type: "Convolution" 446 | bottom: "fuse_conv1_2" 447 | top: "conv2_1/in/pw_new" 448 | param { 449 | lr_mult: 1 450 | decay_mult: 1 451 | } 452 | convolution_param { 453 | num_output: 144 454 | bias_term: false 455 | pad: 0 456 | kernel_size: 1 457 | engine: CAFFE 458 | stride: 1 459 | weight_filler { 460 | type: "msra" 461 | } 462 | } 463 | } 464 | layer { 465 | name: "conv2_1/in/pw/bn_new" 466 | type: "BatchNorm" 467 | bottom: "conv2_1/in/pw_new" 468 | top: "conv2_1/in/pw_new" 469 | param { 470 | lr_mult: 0 471 | decay_mult: 0 472 | } 473 | param { 474 | lr_mult: 0 475 | decay_mult: 0 476 | } 477 | param { 478 | lr_mult: 0 479 | decay_mult: 0 480 | } 481 | } 482 | layer { 483 | name: "conv2_1/in/pw/scale_new" 484 | type: "Scale" 485 | bottom: "conv2_1/in/pw_new" 486 | top: "conv2_1/in/pw_new" 487 | scale_param { 488 | filler { 489 | value: 1 490 | } 491 | bias_term: true 492 | bias_filler { 493 | value: 0 494 | } 495 | } 496 | } 497 | layer { 498 | name: "relu2_1/in/pw_new" 499 | type: "ReLU" 500 | bottom: "conv2_1/in/pw_new" 501 | top: "conv2_1/in/pw_new" 502 | } 503 | 504 | # 2_1 dw 505 | layer { 506 | name: "conv2_1/dw_new" 507 | type: "ConvolutionDepthwise" 508 | bottom: "conv2_1/in/pw_new" 509 | top: "conv2_1/dw_new" 510 | param { 511 | lr_mult: 1 512 | decay_mult: 0 513 | } 514 | convolution_param { 515 | num_output: 144 516 | bias_term: false 517 | pad: 1 518 | kernel_size: 3 519 | engine: CAFFE 520 | stride: 2 521 | weight_filler { 522 | type: "msra" 523 | } 524 | } 525 | } 526 | layer { 527 | name: "conv2_1/dw/bn_new" 528 | type: "BatchNorm" 529 | bottom: "conv2_1/dw_new" 530 | top: "conv2_1/dw_new" 531 | param { 532 | lr_mult: 0 533 | decay_mult: 0 534 | } 535 | param { 536 | lr_mult: 0 537 | decay_mult: 0 538 | } 539 | param { 540 | lr_mult: 0 541 | decay_mult: 0 542 | } 543 | } 544 | layer { 545 | name: "conv2_1/dw/scale_new" 546 | type: "Scale" 547 | bottom: "conv2_1/dw_new" 548 | top: "conv2_1/dw_new" 549 | scale_param { 550 | filler { 551 | value: 1 552 | } 553 | bias_term: true 554 | bias_filler { 555 | value: 0 556 | } 557 | } 558 | } 559 | layer { 560 | name: "relu2_1/dw_new" 561 | type: "ReLU" 562 | bottom: "conv2_1/dw_new" 563 | top: "conv2_1/dw_new" 564 | } 565 | 566 | # 2_1 out 567 | layer { 568 | name: "conv2_1/out/pw_new" 569 | type: "Convolution" 570 | bottom: "conv2_1/dw_new" 571 | top: "conv2_1/out/pw_new" 572 | param { 573 | lr_mult: 1 574 | decay_mult: 1 575 | } 576 | convolution_param { 577 | num_output: 32 578 | bias_term: false 579 | pad: 0 580 | kernel_size: 1 581 | engine: CAFFE 582 | stride: 1 583 | weight_filler { 584 | type: "msra" 585 | } 586 | } 587 | } 588 | layer { 589 | name: "conv2_1/out/pw/bn_new" 590 | type: "BatchNorm" 591 | bottom: "conv2_1/out/pw_new" 592 | top: "conv2_1/out/pw_new" 593 | param { 594 | lr_mult: 0 595 | decay_mult: 0 596 | } 597 | param { 598 | lr_mult: 0 599 | decay_mult: 0 600 | } 601 | param { 602 | lr_mult: 0 603 | decay_mult: 0 604 | } 605 | } 606 | layer { 607 | name: "conv2_1/out/pw/scale_new" 608 | type: "Scale" 609 | bottom: "conv2_1/out/pw_new" 610 | top: "conv2_1/out/pw_new" 611 | scale_param { 612 | filler { 613 | value: 1 614 | } 615 | bias_term: true 616 | bias_filler { 617 | value: 0 618 | } 619 | } 620 | } 621 | 622 | # 2_2 in 623 | 624 | layer { 625 | name: "conv2_2/in/pw_new" 626 | type: "Convolution" 627 | bottom: "conv2_1/out/pw_new" 628 | top: "conv2_2/in/pw_new" 629 | param { 630 | lr_mult: 1 631 | decay_mult: 1 632 | } 633 | convolution_param { 634 | num_output: 192 635 | bias_term: false 636 | pad: 0 637 | kernel_size: 1 638 | engine: CAFFE 639 | stride: 1 640 | weight_filler { 641 | type: "msra" 642 | } 643 | } 644 | } 645 | layer { 646 | name: "conv2_2/in/pw/bn_new" 647 | type: "BatchNorm" 648 | bottom: "conv2_2/in/pw_new" 649 | top: "conv2_2/in/pw_new" 650 | param { 651 | lr_mult: 0 652 | decay_mult: 0 653 | } 654 | param { 655 | lr_mult: 0 656 | decay_mult: 0 657 | } 658 | param { 659 | lr_mult: 0 660 | decay_mult: 0 661 | } 662 | } 663 | layer { 664 | name: "conv2_2/in/pw/scale_new" 665 | type: "Scale" 666 | bottom: "conv2_2/in/pw_new" 667 | top: "conv2_2/in/pw_new" 668 | scale_param { 669 | filler { 670 | value: 1 671 | } 672 | bias_term: true 673 | bias_filler { 674 | value: 0 675 | } 676 | } 677 | } 678 | layer { 679 | name: "relu2_2/in/pw_new" 680 | type: "ReLU" 681 | bottom: "conv2_2/in/pw_new" 682 | top: "conv2_2/in/pw_new" 683 | } 684 | 685 | # 2_2 dw 686 | layer { 687 | name: "conv2_2/dw_new" 688 | type: "ConvolutionDepthwise" 689 | bottom: "conv2_2/in/pw_new" 690 | top: "conv2_2/dw_new" 691 | param { 692 | lr_mult: 1 693 | decay_mult: 0 694 | } 695 | convolution_param { 696 | num_output: 192 697 | bias_term: false 698 | pad: 1 699 | kernel_size: 3 700 | engine: CAFFE 701 | stride: 1 702 | weight_filler { 703 | type: "msra" 704 | } 705 | } 706 | } 707 | layer { 708 | name: "conv2_2/dw/bn_new" 709 | type: "BatchNorm" 710 | bottom: "conv2_2/dw_new" 711 | top: "conv2_2/dw_new" 712 | param { 713 | lr_mult: 0 714 | decay_mult: 0 715 | } 716 | param { 717 | lr_mult: 0 718 | decay_mult: 0 719 | } 720 | param { 721 | lr_mult: 0 722 | decay_mult: 0 723 | } 724 | } 725 | layer { 726 | name: "conv2_2/dw/scale_new" 727 | type: "Scale" 728 | bottom: "conv2_2/dw_new" 729 | top: "conv2_2/dw_new" 730 | scale_param { 731 | filler { 732 | value: 1 733 | } 734 | bias_term: true 735 | bias_filler { 736 | value: 0 737 | } 738 | } 739 | } 740 | layer { 741 | name: "relu2_2/dw_new" 742 | type: "ReLU" 743 | bottom: "conv2_2/dw_new" 744 | top: "conv2_2/dw_new" 745 | } 746 | 747 | 748 | # 2_2 out 749 | 750 | layer { 751 | name: "conv2_2/out/pw_new" 752 | type: "Convolution" 753 | bottom: "conv2_2/dw_new" 754 | top: "conv2_2/out/pw_new" 755 | param { 756 | lr_mult: 1 757 | decay_mult: 1 758 | } 759 | convolution_param { 760 | num_output: 32 761 | bias_term: false 762 | pad: 0 763 | kernel_size: 1 764 | engine: CAFFE 765 | stride: 1 766 | weight_filler { 767 | type: "msra" 768 | } 769 | } 770 | } 771 | layer { 772 | name: "conv2_2/out/pw/bn_new" 773 | type: "BatchNorm" 774 | bottom: "conv2_2/out/pw_new" 775 | top: "conv2_2/out/pw_new" 776 | param { 777 | lr_mult: 0 778 | decay_mult: 0 779 | } 780 | param { 781 | lr_mult: 0 782 | decay_mult: 0 783 | } 784 | param { 785 | lr_mult: 0 786 | decay_mult: 0 787 | } 788 | } 789 | layer { 790 | name: "conv2_2/out/pw/scale_new" 791 | type: "Scale" 792 | bottom: "conv2_2/out/pw_new" 793 | top: "conv2_2/out/pw_new" 794 | scale_param { 795 | filler { 796 | value: 1 797 | } 798 | bias_term: true 799 | bias_filler { 800 | value: 0 801 | } 802 | } 803 | } 804 | layer { 805 | name: "fuse_conv2_2" 806 | type: "Eltwise" 807 | bottom: "conv2_1/out/pw_new" 808 | bottom: "conv2_2/out/pw_new" 809 | top: "fuse_conv2_2" 810 | eltwise_param { 811 | operation: SUM 812 | } 813 | } 814 | 815 | #--------------------------bottleneck3 3_1(s=2) 3_2(s=1) input: 6*6*32 output: 3*3*64 816 | # 3_1 in 817 | layer { 818 | name: "conv3_1/in/pw_new" 819 | type: "Convolution" 820 | bottom: "fuse_conv2_2" 821 | top: "conv3_1/in/pw_new" 822 | param { 823 | lr_mult: 1 824 | decay_mult: 1 825 | } 826 | convolution_param { 827 | num_output: 192 828 | bias_term: false 829 | pad: 0 830 | kernel_size: 1 831 | engine: CAFFE 832 | stride: 1 833 | weight_filler { 834 | type: "msra" 835 | } 836 | } 837 | } 838 | layer { 839 | name: "conv3_1/in/pw/bn_new" 840 | type: "BatchNorm" 841 | bottom: "conv3_1/in/pw_new" 842 | top: "conv3_1/in/pw_new" 843 | param { 844 | lr_mult: 0 845 | decay_mult: 0 846 | } 847 | param { 848 | lr_mult: 0 849 | decay_mult: 0 850 | } 851 | param { 852 | lr_mult: 0 853 | decay_mult: 0 854 | } 855 | } 856 | layer { 857 | name: "conv3_1/in/pw/scale_new" 858 | type: "Scale" 859 | bottom: "conv3_1/in/pw_new" 860 | top: "conv3_1/in/pw_new" 861 | scale_param { 862 | filler { 863 | value: 1 864 | } 865 | bias_term: true 866 | bias_filler { 867 | value: 0 868 | } 869 | } 870 | } 871 | layer { 872 | name: "relu3_1/in/pw_new" 873 | type: "ReLU" 874 | bottom: "conv3_1/in/pw_new" 875 | top: "conv3_1/in/pw_new" 876 | } 877 | 878 | # 3_1 dw 879 | layer { 880 | name: "conv3_1/dw_new" 881 | type: "ConvolutionDepthwise" 882 | bottom: "conv3_1/in/pw_new" 883 | top: "conv3_1/dw_new" 884 | param { 885 | lr_mult: 1 886 | decay_mult: 0 887 | } 888 | convolution_param { 889 | num_output: 192 890 | bias_term: false 891 | pad: 1 892 | kernel_size: 3 893 | engine: CAFFE 894 | stride: 2 895 | weight_filler { 896 | type: "msra" 897 | } 898 | } 899 | } 900 | layer { 901 | name: "conv3_1/dw/bn_new" 902 | type: "BatchNorm" 903 | bottom: "conv3_1/dw_new" 904 | top: "conv3_1/dw_new" 905 | param { 906 | lr_mult: 0 907 | decay_mult: 0 908 | } 909 | param { 910 | lr_mult: 0 911 | decay_mult: 0 912 | } 913 | param { 914 | lr_mult: 0 915 | decay_mult: 0 916 | } 917 | } 918 | layer { 919 | name: "conv3_1/dw/scale_new" 920 | type: "Scale" 921 | bottom: "conv3_1/dw_new" 922 | top: "conv3_1/dw_new" 923 | scale_param { 924 | filler { 925 | value: 1 926 | } 927 | bias_term: true 928 | bias_filler { 929 | value: 0 930 | } 931 | } 932 | } 933 | layer { 934 | name: "relu3_1/dw_new" 935 | type: "ReLU" 936 | bottom: "conv3_1/dw_new" 937 | top: "conv3_1/dw_new" 938 | } 939 | 940 | # 3_1 out 941 | layer { 942 | name: "conv3_1/out/pw_new" 943 | type: "Convolution" 944 | bottom: "conv3_1/dw_new" 945 | top: "conv3_1/out/pw_new" 946 | param { 947 | lr_mult: 1 948 | decay_mult: 1 949 | } 950 | convolution_param { 951 | num_output: 64 952 | bias_term: false 953 | pad: 0 954 | kernel_size: 1 955 | engine: CAFFE 956 | stride: 1 957 | weight_filler { 958 | type: "msra" 959 | } 960 | } 961 | } 962 | layer { 963 | name: "conv3_1/out/pw/bn_new" 964 | type: "BatchNorm" 965 | bottom: "conv3_1/out/pw_new" 966 | top: "conv3_1/out/pw_new" 967 | param { 968 | lr_mult: 0 969 | decay_mult: 0 970 | } 971 | param { 972 | lr_mult: 0 973 | decay_mult: 0 974 | } 975 | param { 976 | lr_mult: 0 977 | decay_mult: 0 978 | } 979 | } 980 | layer { 981 | name: "conv3_1/out/pw/scale_new" 982 | type: "Scale" 983 | bottom: "conv3_1/out/pw_new" 984 | top: "conv3_1/out/pw_new" 985 | scale_param { 986 | filler { 987 | value: 1 988 | } 989 | bias_term: true 990 | bias_filler { 991 | value: 0 992 | } 993 | } 994 | } 995 | 996 | # 3_2 in 997 | 998 | layer { 999 | name: "conv3_2/in/pw_new" 1000 | type: "Convolution" 1001 | bottom: "conv3_1/out/pw_new" 1002 | top: "conv3_2/in/pw_new" 1003 | param { 1004 | lr_mult: 1 1005 | decay_mult: 1 1006 | } 1007 | convolution_param { 1008 | num_output: 192 1009 | bias_term: false 1010 | pad: 0 1011 | kernel_size: 1 1012 | engine: CAFFE 1013 | stride: 1 1014 | weight_filler { 1015 | type: "msra" 1016 | } 1017 | } 1018 | } 1019 | layer { 1020 | name: "conv3_2/in/pw/bn_new" 1021 | type: "BatchNorm" 1022 | bottom: "conv3_2/in/pw_new" 1023 | top: "conv3_2/in/pw_new" 1024 | param { 1025 | lr_mult: 0 1026 | decay_mult: 0 1027 | } 1028 | param { 1029 | lr_mult: 0 1030 | decay_mult: 0 1031 | } 1032 | param { 1033 | lr_mult: 0 1034 | decay_mult: 0 1035 | } 1036 | } 1037 | layer { 1038 | name: "conv3_2/in/pw/scale_new" 1039 | type: "Scale" 1040 | bottom: "conv3_2/in/pw_new" 1041 | top: "conv3_2/in/pw_new" 1042 | scale_param { 1043 | filler { 1044 | value: 1 1045 | } 1046 | bias_term: true 1047 | bias_filler { 1048 | value: 0 1049 | } 1050 | } 1051 | } 1052 | layer { 1053 | name: "relu3_2/in/pw_new" 1054 | type: "ReLU" 1055 | bottom: "conv3_2/in/pw_new" 1056 | top: "conv3_2/in/pw_new" 1057 | } 1058 | 1059 | # 3_2 dw 1060 | layer { 1061 | name: "conv3_2/dw_new" 1062 | type: "ConvolutionDepthwise" 1063 | bottom: "conv3_2/in/pw_new" 1064 | top: "conv3_2/dw_new" 1065 | param { 1066 | lr_mult: 1 1067 | decay_mult: 0 1068 | } 1069 | convolution_param { 1070 | num_output: 192 1071 | bias_term: false 1072 | pad: 1 1073 | kernel_size: 3 1074 | engine: CAFFE 1075 | stride: 1 1076 | weight_filler { 1077 | type: "msra" 1078 | } 1079 | } 1080 | } 1081 | layer { 1082 | name: "conv3_2/dw/bn_new" 1083 | type: "BatchNorm" 1084 | bottom: "conv3_2/dw_new" 1085 | top: "conv3_2/dw_new" 1086 | param { 1087 | lr_mult: 0 1088 | decay_mult: 0 1089 | } 1090 | param { 1091 | lr_mult: 0 1092 | decay_mult: 0 1093 | } 1094 | param { 1095 | lr_mult: 0 1096 | decay_mult: 0 1097 | } 1098 | } 1099 | layer { 1100 | name: "conv3_2/dw/scale_new" 1101 | type: "Scale" 1102 | bottom: "conv3_2/dw_new" 1103 | top: "conv3_2/dw_new" 1104 | scale_param { 1105 | filler { 1106 | value: 1 1107 | } 1108 | bias_term: true 1109 | bias_filler { 1110 | value: 0 1111 | } 1112 | } 1113 | } 1114 | layer { 1115 | name: "relu3_2/dw_new" 1116 | type: "ReLU" 1117 | bottom: "conv3_2/dw_new" 1118 | top: "conv3_2/dw_new" 1119 | } 1120 | 1121 | 1122 | # 3_2 out 1123 | 1124 | layer { 1125 | name: "conv3_2/out/pw_new" 1126 | type: "Convolution" 1127 | bottom: "conv3_2/dw_new" 1128 | top: "conv3_2/out/pw_new" 1129 | param { 1130 | lr_mult: 1 1131 | decay_mult: 1 1132 | } 1133 | convolution_param { 1134 | num_output: 64 1135 | bias_term: false 1136 | pad: 0 1137 | kernel_size: 1 1138 | engine: CAFFE 1139 | stride: 1 1140 | weight_filler { 1141 | type: "msra" 1142 | } 1143 | } 1144 | } 1145 | layer { 1146 | name: "conv3_2/out/pw/bn_new" 1147 | type: "BatchNorm" 1148 | bottom: "conv3_2/out/pw_new" 1149 | top: "conv3_2/out/pw_new" 1150 | param { 1151 | lr_mult: 0 1152 | decay_mult: 0 1153 | } 1154 | param { 1155 | lr_mult: 0 1156 | decay_mult: 0 1157 | } 1158 | param { 1159 | lr_mult: 0 1160 | decay_mult: 0 1161 | } 1162 | } 1163 | layer { 1164 | name: "conv3_2/out/pw/scale_new" 1165 | type: "Scale" 1166 | bottom: "conv3_2/out/pw_new" 1167 | top: "conv3_2/out/pw_new" 1168 | scale_param { 1169 | filler { 1170 | value: 1 1171 | } 1172 | bias_term: true 1173 | bias_filler { 1174 | value: 0 1175 | } 1176 | } 1177 | } 1178 | layer { 1179 | name: "fuse_conv3_2" 1180 | type: "Eltwise" 1181 | bottom: "conv3_1/out/pw_new" 1182 | bottom: "conv3_2/out/pw_new" 1183 | top: "fuse_conv3_2" 1184 | eltwise_param { 1185 | operation: SUM 1186 | } 1187 | } 1188 | 1189 | 1190 | 1191 | 1192 | 1193 | 1194 | 1195 | #------------------------- fc1 1196 | layer { 1197 | name: "fc1" 1198 | type: "InnerProduct" 1199 | bottom: "fuse_conv3_2" 1200 | top: "fc1" 1201 | param { 1202 | lr_mult: 1 1203 | decay_mult: 1 1204 | } 1205 | param { 1206 | lr_mult: 2 1207 | decay_mult: 1 1208 | } 1209 | inner_product_param { 1210 | num_output: 256 1211 | weight_filler { 1212 | type: "gaussian" 1213 | std: 0.01 1214 | } 1215 | bias_filler { 1216 | type: "constant" 1217 | value: 0 1218 | } 1219 | } 1220 | } 1221 | layer { 1222 | name: "relu_fc1" 1223 | type: "ReLU" 1224 | bottom: "fc1" 1225 | top: "fc1" 1226 | } 1227 | layer { 1228 | name: "drop_fc1" 1229 | type: "Dropout" 1230 | bottom: "fc1" 1231 | top: "fc1" 1232 | dropout_param{ 1233 | dropout_ratio: 0.3 1234 | } 1235 | } 1236 | 1237 | #------------------------- fc2 1238 | layer { 1239 | name: "fc2" 1240 | type: "InnerProduct" 1241 | bottom: "fc1" 1242 | top: "fc2" 1243 | param { 1244 | lr_mult: 1 1245 | decay_mult: 1 1246 | } 1247 | param { 1248 | lr_mult: 2 1249 | decay_mult: 1 1250 | } 1251 | inner_product_param { 1252 | num_output: 10 1253 | weight_filler { 1254 | type: "gaussian" 1255 | std: 0.01 1256 | } 1257 | bias_filler { 1258 | type: "constant" 1259 | value: 0 1260 | } 1261 | } 1262 | } -------------------------------------------------------------------------------- /1_level_1/Code/4_evaluate/evaluate_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | sys.path.append('../../../util') 4 | import tools 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import os 8 | import cv2 9 | 10 | l1_out_test_label = '../../Result/l1_out_test_label.txt' 11 | l1_raw_test_label = '../../Data/l1_test_label.txt' 12 | relative_path = '../../../raw_data/Data/img_celeba/' # find the image from txt 13 | draw_img_flod = '../../Result/l1_out_draw/test/' 14 | drop_img_flod = '../../Result/l1_drop/test/' 15 | 16 | n_p = 5 17 | # ----------------------------------------------------------------------- load label 18 | l1_raw_fid = open(l1_raw_test_label) 19 | l1_raw_lines = l1_raw_fid.readlines() 20 | l1_raw_fid.close() 21 | l1_out_fid = open(l1_out_test_label) 22 | l1_out_lines = l1_out_fid.readlines() 23 | l1_out_fid.close() 24 | 25 | err_mat = [] 26 | threshold = 0.1 27 | count_drop = 0 28 | for idx in range(len(l1_out_lines)): 29 | print idx 30 | r_ = l1_raw_lines[idx] 31 | o_ = l1_out_lines[idx] 32 | r_name = r_.split()[0] 33 | o_name = o_.split()[0] 34 | if r_name != o_name: 35 | print 'find a error,idx: ', idx 36 | continue 37 | full_img_path = relative_path + r_name 38 | img = cv2.imread(full_img_path) 39 | h,w,c = img.shape 40 | 41 | err_1,err_5 = tools.cal_error_nor_diag(img,r_,o_) # r_ have img name , range of [-1,1] , err_1 is mean 42 | err_mat.append(err_5) 43 | out_land = np.array(map(float,o_.split()[1:2*n_p+1])) 44 | 45 | if err_1 >= threshold : 46 | count_drop = count_drop + 1 47 | draw_img = img.copy() 48 | draw_img = tools.drawpoints(draw_img,out_land) 49 | tools.makedir(drop_img_flod) 50 | draw_img_name = str(err_1) + '_' + r_name 51 | draw_img_path = drop_img_flod + draw_img_name 52 | cv2.imwrite(draw_img_path, draw_img) 53 | else: 54 | draw_img = img.copy() 55 | draw_img = tools.drawpoints(draw_img,out_land) 56 | tools.makedir(draw_img_flod) 57 | draw_img_name = str(err_1) + '_' + r_name 58 | draw_img_path = draw_img_flod + draw_img_name 59 | cv2.imwrite(draw_img_path, draw_img) 60 | # -------------------------------------------------------------- print result 61 | err_mat = np.array(err_mat) 62 | err_mat = np.reshape(err_mat,(-1,5)) 63 | MNE_5 = [] 64 | for i in range(n_p): 65 | MNE_5.append(err_mat[:,i].mean()) 66 | print 'err >= 10% have ' , count_drop 67 | # ------------------------------------------------------------- plot 68 | fig = plt.figure('test_MNE_5') 69 | ax1 =plt.subplot(111) 70 | data = np.array(MNE_5) 71 | width = 0.2 72 | x_bar = np.arange(5) 73 | # print('x_bar type ',type(x_bar)) 74 | rect = ax1.bar(left=x_bar,height=data,width=width, color="blue") 75 | for rec in rect: 76 | x= rec.get_x() 77 | height = round(rec.get_height()*100,2) 78 | mne_text = str(height) + '%' 79 | # print('mne text',mne_text) 80 | ax1.text(x+0.05,1.02*height/100,mne_text) 81 | # print('height',height) 82 | MNE_5_mean = np.round(np.array(MNE_5).mean() *100,2) 83 | MNE_5_mean_text = 'The mean normalized error :' +str(MNE_5_mean) + '%' 84 | ax1.text(1 ,1.5*MNE_5_mean/100 ,MNE_5_mean_text,color="red") 85 | 86 | ax1.set_xticks(x_bar + width) 87 | ax1.set_xticklabels(("left eye","right eye","nose","left mouth","right mouth")) 88 | ax1.set_ylabel("MNE") 89 | ax1.set_title(" MNE") 90 | ax1.grid(True) 91 | ax1.set_ylim(0,0.025) # max y axis 92 | plt.show() 93 | 94 | 95 | 96 | print 'The mean error normalized by dist_diag is : ', err_mat.mean() 97 | fig2 = plt.figure("test_distribution") 98 | ax2 = plt.subplot(111) 99 | ax2.set_title("The mean error normalized by dist_diag :") 100 | data =err_mat.mean(axis=1) 101 | n, bins, patches = plt.hist(data ,bins=200, normed=False, facecolor='blue', alpha=0.75) 102 | err_mat_mean = np.round(np.array(err_mat).mean() *100 ,2) 103 | mean_text = 'The mean error normalized by dist_diag : ' + str(err_mat_mean) + '%' 104 | ax2.text(0.1,len(err_mat)/10 ,mean_text,color="red") 105 | plt.show() -------------------------------------------------------------------------------- /1_level_1/Code/4_evaluate/evaluate_train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | sys.path.append('../../../util') 4 | import tools 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import os 8 | import cv2 9 | 10 | l1_out_test_label = '../../Result/l1_out_train_label.txt' 11 | l1_raw_test_label = '../../Data/l1_train_label.txt' 12 | relative_path = '../../../raw_data/Data/img_celeba/' # find the image from txt 13 | draw_img_flod = '../../Result/l1_out_draw/train/' 14 | drop_img_flod = '../../Result/l1_drop/train/' 15 | 16 | n_p = 5 17 | # ----------------------------------------------------------------------- load label 18 | l1_raw_fid = open(l1_raw_test_label) 19 | l1_raw_lines = l1_raw_fid.readlines() 20 | l1_raw_fid.close() 21 | l1_out_fid = open(l1_out_test_label) 22 | l1_out_lines = l1_out_fid.readlines() 23 | l1_out_fid.close() 24 | 25 | err_mat = [] 26 | threshold = 0.1 27 | count_drop = 0 28 | for idx in range(len(l1_out_lines)): 29 | print idx 30 | r_ = l1_raw_lines[idx] 31 | o_ = l1_out_lines[idx] 32 | r_name = r_.split()[0] 33 | o_name = o_.split()[0] 34 | if r_name != o_name: 35 | print 'find a error,idx: ', idx 36 | continue 37 | full_img_path = relative_path + r_name 38 | img = cv2.imread(full_img_path) 39 | h,w,c = img.shape 40 | 41 | err_1,err_5 = tools.cal_error_nor_diag(img,r_,o_) # r_ have img name , range of [-1,1] err_1 is mean 42 | err_mat.append(err_5) 43 | out_land = np.array(map(float,o_.split()[1:2*n_p+1])) 44 | if err_1 >= threshold : 45 | count_drop = count_drop + 1 46 | draw_img = img.copy() 47 | draw_img = tools.drawpoints(draw_img,out_land) 48 | tools.makedir(drop_img_flod) 49 | draw_img_name = str(err_1) + '_' + r_name 50 | draw_img_path = drop_img_flod + draw_img_name 51 | cv2.imwrite(draw_img_path, draw_img) 52 | else: 53 | draw_img = img.copy() 54 | draw_img = tools.drawpoints(draw_img,out_land) 55 | tools.makedir(draw_img_flod) 56 | draw_img_name = str(err_1) + '_' + r_name 57 | draw_img_path = draw_img_flod + draw_img_name 58 | cv2.imwrite(draw_img_path, draw_img) 59 | # print a 60 | # -------------------------------------------------------------- print result 61 | err_mat = np.array(err_mat) 62 | err_mat = np.reshape(err_mat,(-1,5)) 63 | MNE_5 = [] 64 | for i in range(n_p): 65 | MNE_5.append(err_mat[:,i].mean()) 66 | print 'err >= 10% have ' , count_drop 67 | # print 'MNE of left eye: ', MNE_5[0] 68 | # print 'MNE of right eye: ', MNE_5[1] 69 | # print 'MNE of nose: ', MNE_5[2] 70 | # print 'MNE of left mouth: ', MNE_5[3] 71 | # print 'MNE of right mouth: ', MNE_5[4] 72 | # print 'MNE : ' , np.array(MNE_5).mean() 73 | 74 | # ------------------------------------------------------------- plot 75 | fig = plt.figure('train_MNE_5') 76 | ax1 =plt.subplot(111) 77 | data = np.array(MNE_5) 78 | width = 0.2 79 | x_bar = np.arange(5) 80 | # print('x_bar type ',type(x_bar)) 81 | rect = ax1.bar(left=x_bar,height=data,width=width, color="blue") 82 | for rec in rect: 83 | x= rec.get_x() 84 | height = round(rec.get_height()*100,2) 85 | mne_text = str(height) + '%' 86 | # print('mne text',mne_text) 87 | ax1.text(x+0.05,1.02*height/100,mne_text) 88 | # print('height',height) 89 | MNE_5_mean = np.round(np.array(MNE_5).mean() *100,2) 90 | MNE_5_mean_text = 'The mean normalized error :' +str(MNE_5_mean) + '%' 91 | ax1.text(1 ,1.5*MNE_5_mean/100 ,MNE_5_mean_text,color="red") 92 | 93 | ax1.set_xticks(x_bar + width) 94 | ax1.set_xticklabels(("left eye","right eye","nose","left mouth","right mouth")) 95 | ax1.set_ylabel("MNE") 96 | ax1.set_title(" MNE") 97 | ax1.grid(True) 98 | ax1.set_ylim(0,0.025) # max y axis 99 | plt.show() 100 | 101 | 102 | 103 | print 'The mean error normalized by dist_diag is : ', err_mat.mean() 104 | # print a 105 | fig2 = plt.figure("train_distribution") 106 | ax2 = plt.subplot(111) 107 | ax2.set_title("The mean error normalized by dist_diag :") 108 | data =err_mat.mean(axis=1) 109 | n, bins, patches = plt.hist(data ,bins=200, normed=False, facecolor='blue', alpha=0.75) 110 | err_mat_mean = np.round(np.array(err_mat).mean() *100 ,2) 111 | mean_text = 'The mean error normalized by dist_diag : ' + str(err_mat_mean) + '%' 112 | ax2.text(0.1,len(err_mat)/10 ,mean_text,color="red") 113 | plt.show() -------------------------------------------------------------------------------- /1_level_1/Code/5_crop_img/crop_test_img.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | sys.path.append('../../../util') 4 | import tools 5 | import numpy as np 6 | import os 7 | import cv2 8 | 9 | l1_out_label = '../../Result/l1_out_test_label.txt' 10 | l1_raw_label = '../../Data/l1_test_label.txt' 11 | relative_path = '../../../raw_data/Data/img_celeba/' # find the image from txt 12 | 13 | crop_img_flod = '../../../level_2/Data/l1_crop/test/' 14 | 15 | crop_label_flod = '../../../level_2/Data/l1_crop/' 16 | crop_label_txt = crop_label_flod + 'l1_crop_test_label.txt' 17 | crop_draw_img_flod = '../../../level_2/Data/l1_crop_draw/test/' 18 | tools.makedir(crop_img_flod) 19 | 20 | n_p = 5 21 | # ----------------------------------------------------------------------- load label 22 | l1_raw_fid = open(l1_raw_label) 23 | l1_raw_lines = l1_raw_fid.readlines() 24 | l1_raw_fid.close() 25 | l1_out_fid = open(l1_out_label) 26 | l1_out_lines = l1_out_fid.readlines() 27 | l1_out_fid.close() 28 | err_mat = [] 29 | 30 | threshold = 0.1 31 | count_threshold = 0 32 | fid = open(crop_label_txt,'w') 33 | for idx in range(len(l1_out_lines)): 34 | print idx 35 | r_ = l1_raw_lines[idx] 36 | o_ = l1_out_lines[idx] 37 | r_name = r_.split()[0] 38 | o_name = o_.split()[0] 39 | if r_name != o_name: 40 | print 'find a error,idx: ', idx 41 | continue 42 | full_img_path = relative_path + r_name 43 | img = cv2.imread(full_img_path) 44 | h,w,c = img.shape 45 | # ---------------------------------------------------------------------- calculate error 46 | err_1,err_5 = tools.cal_error_nor_diag(img,r_,o_) # r_ have img name , range of [-1,1] err_1 is mean 47 | err_mat.append(err_5) 48 | 49 | raw_land = np.array(map(float,r_.split()[1:2*n_p+1])) # nparray float 50 | out_land = np.array(map(float,o_.split()[1:2*n_p+1])) 51 | 52 | if err_1 < threshold : 53 | # ------------------------------------------------------------ calculate w,h for crop img 54 | raw_pix_land = tools.label2points(raw_land,w,h) 55 | out_pix_land = tools.label2points(out_land,w,h) 56 | # print 'raw pix land',raw_pix_land 57 | p_nose = out_pix_land[4:6] 58 | p_lefteye = out_pix_land[0:2] 59 | d_nose_lefteye = tools.cal_eucldist(p_nose,p_lefteye) 60 | 61 | w_start = np.round(p_nose[0] - 2*d_nose_lefteye).astype(int) 62 | w_end = np.round(p_nose[0] + 2*d_nose_lefteye).astype(int) 63 | h_start = np.round(p_nose[1] - 2*d_nose_lefteye).astype(int) 64 | h_end = np.round(p_nose[1] + 2*d_nose_lefteye).astype(int) 65 | 66 | if w_start < 0: w_start = 0 67 | if h_start < 0: h_start = 0 68 | if w_end > w: w_end = w 69 | if h_end > h: h_end = h 70 | # print ('w,w_end h,h_end',w_start,w_end,h_start,h_end) 71 | # ------------------------------------------------------------ calculate new label 72 | crop_pix_land = raw_pix_land.copy() 73 | crop_pix_land[0::2] = crop_pix_land[0::2] - w_start # x 74 | crop_pix_land[1::2] = crop_pix_land[1::2] - h_start # y 75 | # print ('crop pix land ', crop_pix_land) 76 | 77 | crop_w = w_end - w_start 78 | crop_h = h_end - h_start 79 | w1 = (crop_w-1)/2 80 | h1 = (crop_h-1)/2 81 | crop_land = crop_pix_land.copy() 82 | crop_land[0::2] = (crop_pix_land[0::2] - w1) / w1 83 | crop_land[1::2] = (crop_pix_land[1::2] - h1) / h1 84 | 85 | # print('crop land ', crop_land) 86 | # ----------------------------------------------------------- output crop img 87 | crop_img = img.copy() 88 | crop_img = crop_img[h_start:h_end+1,w_start:w_end+1,:] 89 | crop_img_name = r_name 90 | crop_img_path = crop_img_flod + crop_img_name 91 | tools.makedir(crop_img_flod) 92 | cv2.imwrite(crop_img_path,crop_img) 93 | # ----------------------------------------------------------- output crop draw img 94 | crop_draw_img = crop_img.copy() 95 | crop_draw_img = tools.drawpoints(crop_draw_img, crop_land) 96 | crop_draw_img_name = r_name 97 | crop_draw_img_path = crop_draw_img_flod + crop_draw_img_name 98 | tools.makedir(crop_draw_img_flod) 99 | cv2.imwrite(crop_draw_img_path,crop_draw_img) 100 | # ----------------------------------------------------------- output label 101 | new_line = r_name 102 | str_0 = str(crop_land) 103 | str_1 = str_0.replace("\n","") 104 | str_2 = str_1.strip('[]') 105 | str_3 = str_2.split() 106 | for i in range(n_p): 107 | x_ = str_3[2*i+0] # value is [-1,1] 108 | y_ = str_3[2*i+1] 109 | 110 | new_line = new_line + ' ' + str(x_) # note: the point order has changed: x1,y1,x2... 111 | new_line = new_line + ' ' + str(y_) 112 | new_line = new_line + '\n' 113 | fid.write(new_line) 114 | fid.close() -------------------------------------------------------------------------------- /1_level_1/Code/5_crop_img/crop_train_img.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | sys.path.append('../../../util') 4 | import tools 5 | import numpy as np 6 | import argparse 7 | import glob 8 | import matplotlib.pyplot as plt 9 | import os 10 | import cv2 11 | 12 | l1_out_label = '../../Result/l1_out_train_label.txt' 13 | l1_raw_label = '../../Data/l1_train_label.txt' 14 | relative_path = '../../../raw_data/Data/img_celeba/' # find the image from txt 15 | 16 | crop_img_flod = '../../../level_2/Data/l1_crop/train/' 17 | 18 | crop_label_flod = '../../../level_2/Data/l1_crop/' 19 | crop_label_txt = crop_label_flod + 'l1_crop_train_label.txt' 20 | crop_draw_img_flod = '../../../level_2/Data/l1_crop_draw/train/' 21 | tools.makedir(crop_img_flod) 22 | 23 | n_p = 5 24 | # ----------------------------------------------------------------------- load label 25 | l1_raw_fid = open(l1_raw_label) 26 | l1_raw_lines = l1_raw_fid.readlines() 27 | l1_raw_fid.close() 28 | l1_out_fid = open(l1_out_label) 29 | l1_out_lines = l1_out_fid.readlines() 30 | l1_out_fid.close() 31 | err_mat = [] 32 | 33 | threshold = 0.1 34 | count_threshold = 0 35 | fid = open(crop_label_txt,'w') 36 | for idx in range(len(l1_out_lines)): 37 | print idx 38 | r_ = l1_raw_lines[idx] 39 | o_ = l1_out_lines[idx] 40 | r_name = r_.split()[0] 41 | o_name = o_.split()[0] 42 | if r_name != o_name: 43 | print 'find a error,idx: ', idx 44 | continue 45 | full_img_path = relative_path + r_name 46 | img = cv2.imread(full_img_path) 47 | h,w,c = img.shape 48 | # ---------------------------------------------------------------------- calculate error 49 | err_1,err_5 = tools.cal_error_nor_diag(img,r_,o_) # r_ have img name , range of [-1,1] err_1 is mean 50 | err_mat.append(err_5) 51 | 52 | raw_land = np.array(map(float,r_.split()[1:2*n_p+1])) # nparray float 53 | out_land = np.array(map(float,o_.split()[1:2*n_p+1])) 54 | 55 | if err_1 < threshold : 56 | # ------------------------------------------------------------ calculate w,h for crop img 57 | raw_pix_land = tools.label2points(raw_land,w,h) 58 | out_pix_land = tools.label2points(out_land,w,h) 59 | 60 | p_nose = out_pix_land[4:6] 61 | p_lefteye = out_pix_land[0:2] 62 | d_nose_lefteye = tools.cal_eucldist(p_nose,p_lefteye) 63 | 64 | w_start = np.round(p_nose[0] - 2*d_nose_lefteye).astype(int) 65 | w_end = np.round(p_nose[0] + 2*d_nose_lefteye).astype(int) 66 | h_start = np.round(p_nose[1] - 2*d_nose_lefteye).astype(int) 67 | h_end = np.round(p_nose[1] + 2*d_nose_lefteye).astype(int) 68 | 69 | if w_start < 0: w_start = 0 70 | if h_start < 0: h_start = 0 71 | if w_end > w: w_end = w 72 | if h_end > h: h_end = h 73 | # ------------------------------------------------------------ calculate new label 74 | crop_pix_land = raw_pix_land.copy() 75 | crop_pix_land[0::2] = crop_pix_land[0::2] - w_start # x 76 | crop_pix_land[1::2] = crop_pix_land[1::2] - h_start # y 77 | 78 | crop_w = w_end - w_start 79 | crop_h = h_end - h_start 80 | w1 = (crop_w-1)/2 81 | h1 = (crop_h-1)/2 82 | crop_land = crop_pix_land.copy() 83 | crop_land[0::2] = (crop_pix_land[0::2] - w1) / w1 84 | crop_land[1::2] = (crop_pix_land[1::2] - h1) / h1 85 | # ----------------------------------------------------------- output crop img 86 | crop_img = img.copy() 87 | crop_img = crop_img[h_start:h_end+1,w_start:w_end+1,:] 88 | crop_img_name = r_name 89 | crop_img_path = crop_img_flod + crop_img_name 90 | tools.makedir(crop_img_flod) 91 | cv2.imwrite(crop_img_path,crop_img) 92 | # ----------------------------------------------------------- output crop draw img 93 | crop_draw_img = crop_img.copy() 94 | crop_draw_img = tools.drawpoints(crop_draw_img, crop_land) 95 | crop_draw_img_name = r_name 96 | crop_draw_img_path = crop_draw_img_flod + crop_draw_img_name 97 | tools.makedir(crop_draw_img_flod) 98 | cv2.imwrite(crop_draw_img_path,crop_draw_img) 99 | # ----------------------------------------------------------- output label 100 | new_line = r_name 101 | str_0 = str(crop_land) 102 | str_1 = str_0.replace("\n","") 103 | str_2 = str_1.strip('[]') 104 | str_3 = str_2.split() 105 | for i in range(n_p): 106 | x_ = str_3[2*i+0] # value is [-1,1] 107 | y_ = str_3[2*i+1] 108 | 109 | new_line = new_line + ' ' + str(x_) # note: the point order has changed: x1,y1,x2... 110 | new_line = new_line + ' ' + str(y_) 111 | new_line = new_line + '\n' 112 | fid.write(new_line) 113 | fid.close() -------------------------------------------------------------------------------- /2_level_2/Code/0_train/solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "l2_mobilenet.prototxt" 2 | 3 | test_iter: 160 # bs = 128 * 2 4 | test_interval: 1250 5 | 6 | #base_lr: 0.0001 7 | base_lr: 0.001 8 | momentum: 0.9 9 | weight_decay: 0.0004 10 | 11 | type: "Adam" 12 | 13 | lr_policy: "multistep" 14 | #gamma: 0.9 15 | gamma:0.1 16 | stepvalue: 80000 # 40iter = 1 epoch 17 | stepvalue: 100000 18 | #stepvalue: 250000 19 | 20 | display: 1000 21 | max_iter: 120000 22 | 23 | snapshot: 50000 24 | snapshot_prefix: "../../Result/solver_state/" 25 | solver_mode: GPU 26 | -------------------------------------------------------------------------------- /2_level_2/Code/0_train/train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | set -e 3 | postfix=`date +"%F-%H-%M-%S"` 4 | /***your_caffe_path***/build/tools/caffe train \ 5 | --solver=./solver.prototxt -gpu 0,1 \ 6 | 2>&1 | tee ../../Result/log/$(date +%Y-%m-%d-%H-%M.log) $@ -------------------------------------------------------------------------------- /2_level_2/Code/1_inference/inferencen.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | sys.path.append('../../../util') 4 | sys.path.append('/***your_caffe_path***/python') 5 | sys.path.append('/***your_caffe_path***/python/caffe') 6 | import tools 7 | import caffe 8 | import numpy as np 9 | import argparse 10 | import cv2 11 | import time 12 | 13 | l2_deploy = './l2_deploy.prototxt' 14 | l2_model = '../../Result/solver_state/_iter_100000.caffemodel' 15 | 16 | txt_flod = '../../Data/l1_crop/' 17 | train_txt = txt_flod + 'l1_crop_train_label.txt' 18 | test_txt = txt_flod + 'l1_crop_test_label.txt' 19 | 20 | relative_path = '../../Data/l1_crop/' # find the image 21 | 22 | l2_out_train_txt = '../../Result/l2_out_train_label.txt' 23 | l2_out_test_txt = '../../Result/l2_out_test_label.txt' 24 | 25 | w_net = 48 26 | h_net = 48 27 | 28 | #--------------------------------------------------------------------------- cnn initalization 29 | caffe.set_mode_gpu() 30 | caffe.set_device(0) 31 | # load model 32 | net = caffe.Net(l2_deploy,l2_model,caffe.TEST) 33 | # image preprocess 34 | mu = np.ones((3,w_net,h_net), dtype=np.float) * 127.5 35 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) 36 | transformer.set_transpose('data', (2,0,1)) # (w,h,c)--> (c,w,h) 37 | transformer.set_mean('data', mu) # pixel-wise 38 | transformer.set_raw_scale('data', 255 ) # [0,1] --> [0,255] 39 | transformer.set_channel_swap('data', (2,1,0)) # RGB --> BGR 40 | #----------------------------------------------------------------------------- forward 41 | def l2_forward(input_txt,output_txt,status='train'): 42 | out_f = open(output_txt,'w') 43 | for line in open(input_txt): 44 | if line.isspace() : continue 45 | img_name = line.split()[0] 46 | full_img_path = relative_path + status +'/'+ img_name 47 | # print full_img_path 48 | # print a 49 | #------------------------------------------------------------------------- cnn forward 50 | im=caffe.io.load_image(full_img_path) # im is RGB with 0~1 float 51 | net.blobs['data'].data[...]=transformer.preprocess('data',im) 52 | time_s = time.clock() 53 | n_out = net.forward() 54 | time_e = time.clock() 55 | print img_name,'forward : ',round((time_e-time_s)*1000,1) ,'ms' 56 | out_landmark = net.blobs['fc2'].data[0].flatten() 57 | #------------------------------------------------------------------------- write txt 58 | str_0 = str(out_landmark) 59 | str_1 = str_0.replace("\n","") 60 | str_2 = str_1.strip('[]') 61 | new_line = img_name +' '+ str_2 +'\n' 62 | out_f.write(new_line) 63 | out_f.close() 64 | 65 | l2_forward(test_txt,l2_out_test_txt,status='test') 66 | l2_forward(train_txt,l2_out_train_txt,status='train') -------------------------------------------------------------------------------- /2_level_2/Code/1_inference/l2_deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "level_2" 2 | input: "data" 3 | input_shape { dim: 1 dim: 3 dim: 48 dim: 48 } 4 | 5 | 6 | #------------------------------------------- conv1 input: 48*48*3 output: 24*24*16 7 | layer { 8 | name: "conv1_new" 9 | type: "Convolution" 10 | bottom: "data" 11 | top: "conv1_new" 12 | param { 13 | lr_mult: 1 14 | decay_mult: 1 15 | } 16 | convolution_param { 17 | num_output: 16 18 | bias_term: false 19 | pad: 1 20 | kernel_size: 3 21 | stride: 2 22 | weight_filler { 23 | type: "msra" 24 | } 25 | } 26 | } 27 | layer { 28 | name: "conv1/bn_new" 29 | type: "BatchNorm" 30 | bottom: "conv1_new" 31 | top: "conv1_new" 32 | param { 33 | lr_mult: 0 34 | decay_mult: 0 35 | } 36 | param { 37 | lr_mult: 0 38 | decay_mult: 0 39 | } 40 | param { 41 | lr_mult: 0 42 | decay_mult: 0 43 | } 44 | } 45 | layer { 46 | name: "conv1/scale_new" 47 | type: "Scale" 48 | bottom: "conv1_new" 49 | top: "conv1_new" 50 | scale_param { 51 | filler { 52 | value: 1 53 | } 54 | bias_term: true 55 | bias_filler { 56 | value: 0 57 | } 58 | } 59 | } 60 | layer { 61 | name: "relu1_new" 62 | type: "ReLU" 63 | bottom: "conv1_new" 64 | top: "conv1_new" 65 | } 66 | 67 | #----------- bottleneck_1 1_1(s=2) 1_2(s=1) input: 24*24*16 output: 12*12*24 68 | 69 | layer { 70 | name: "conv1_1/in/pw_new" 71 | type: "Convolution" 72 | bottom: "conv1_new" 73 | top: "conv1_1/in/pw_new" 74 | param { 75 | lr_mult: 1 76 | decay_mult: 1 77 | } 78 | convolution_param { 79 | num_output: 96 80 | bias_term: false 81 | pad: 0 82 | kernel_size: 1 83 | engine: CAFFE 84 | stride: 1 85 | weight_filler { 86 | type: "msra" 87 | } 88 | } 89 | } 90 | layer { 91 | name: "conv1_1/in/pw/bn_new" 92 | type: "BatchNorm" 93 | bottom: "conv1_1/in/pw_new" 94 | top: "conv1_1/in/pw_new" 95 | param { 96 | lr_mult: 0 97 | decay_mult: 0 98 | } 99 | param { 100 | lr_mult: 0 101 | decay_mult: 0 102 | } 103 | param { 104 | lr_mult: 0 105 | decay_mult: 0 106 | } 107 | } 108 | layer { 109 | name: "conv1_1/in/pw/scale_new" 110 | type: "Scale" 111 | bottom: "conv1_1/in/pw_new" 112 | top: "conv1_1/in/pw_new" 113 | scale_param { 114 | filler { 115 | value: 1 116 | } 117 | bias_term: true 118 | bias_filler { 119 | value: 0 120 | } 121 | } 122 | } 123 | layer { 124 | name: "relu1_1/in/pw_new" 125 | type: "ReLU" 126 | bottom: "conv1_1/in/pw_new" 127 | top: "conv1_1/in/pw_new" 128 | } 129 | 130 | 131 | 132 | # 1_1 dw conv 133 | layer { 134 | name: "conv1_1/dw_new" 135 | type: "ConvolutionDepthwise" 136 | bottom: "conv1_1/in/pw_new" 137 | top: "conv1_1/dw_new" 138 | param { 139 | lr_mult: 1 140 | decay_mult: 0 141 | } 142 | convolution_param { 143 | num_output: 96 144 | bias_term: false 145 | pad: 1 146 | kernel_size: 3 147 | engine: CAFFE 148 | stride: 2 149 | weight_filler { 150 | type: "msra" 151 | } 152 | } 153 | } 154 | layer { 155 | name: "conv1_1/dw/bn_new" 156 | type: "BatchNorm" 157 | bottom: "conv1_1/dw_new" 158 | top: "conv1_1/dw_new" 159 | param { 160 | lr_mult: 0 161 | decay_mult: 0 162 | } 163 | param { 164 | lr_mult: 0 165 | decay_mult: 0 166 | } 167 | param { 168 | lr_mult: 0 169 | decay_mult: 0 170 | } 171 | } 172 | layer { 173 | name: "conv1_1/dw/scale_new" 174 | type: "Scale" 175 | bottom: "conv1_1/dw_new" 176 | top: "conv1_1/dw_new" 177 | scale_param { 178 | filler { 179 | value: 1 180 | } 181 | bias_term: true 182 | bias_filler { 183 | value: 0 184 | } 185 | } 186 | } 187 | layer { 188 | name: "relu1_1/dw_new" 189 | type: "ReLU" 190 | bottom: "conv1_1/dw_new" 191 | top: "conv1_1/dw_new" 192 | } 193 | 194 | # 1_1 out 195 | layer { 196 | name: "conv1_1/out/pw_new" 197 | type: "Convolution" 198 | bottom: "conv1_1/dw_new" 199 | top: "conv1_1/out/pw_new" 200 | param { 201 | lr_mult: 1 202 | decay_mult: 1 203 | } 204 | convolution_param { 205 | num_output: 24 206 | bias_term: false 207 | pad: 0 208 | kernel_size: 1 209 | engine: CAFFE 210 | stride: 1 211 | weight_filler { 212 | type: "msra" 213 | } 214 | } 215 | } 216 | layer { 217 | name: "conv1_1/out/pw/bn_new" 218 | type: "BatchNorm" 219 | bottom: "conv1_1/out/pw_new" 220 | top: "conv1_1/out/pw_new" 221 | param { 222 | lr_mult: 0 223 | decay_mult: 0 224 | } 225 | param { 226 | lr_mult: 0 227 | decay_mult: 0 228 | } 229 | param { 230 | lr_mult: 0 231 | decay_mult: 0 232 | } 233 | } 234 | layer { 235 | name: "conv1_1/out/pw/scale_new" 236 | type: "Scale" 237 | bottom: "conv1_1/out/pw_new" 238 | top: "conv1_1/out/pw_new" 239 | scale_param { 240 | filler { 241 | value: 1 242 | } 243 | bias_term: true 244 | bias_filler { 245 | value: 0 246 | } 247 | } 248 | } 249 | # 1_2 in 250 | 251 | layer { 252 | name: "conv1_2/in/pw_new" 253 | type: "Convolution" 254 | bottom: "conv1_1/out/pw_new" 255 | top: "conv1_2/in/pw_new" 256 | param { 257 | lr_mult: 1 258 | decay_mult: 1 259 | } 260 | convolution_param { 261 | num_output: 144 262 | bias_term: false 263 | pad: 0 264 | kernel_size: 1 265 | engine: CAFFE 266 | stride: 1 267 | weight_filler { 268 | type: "msra" 269 | } 270 | } 271 | } 272 | layer { 273 | name: "conv1_2/in/pw/bn_new" 274 | type: "BatchNorm" 275 | bottom: "conv1_2/in/pw_new" 276 | top: "conv1_2/in/pw_new" 277 | param { 278 | lr_mult: 0 279 | decay_mult: 0 280 | } 281 | param { 282 | lr_mult: 0 283 | decay_mult: 0 284 | } 285 | param { 286 | lr_mult: 0 287 | decay_mult: 0 288 | } 289 | } 290 | layer { 291 | name: "conv1_2/in/pw/scale_new" 292 | type: "Scale" 293 | bottom: "conv1_2/in/pw_new" 294 | top: "conv1_2/in/pw_new" 295 | scale_param { 296 | filler { 297 | value: 1 298 | } 299 | bias_term: true 300 | bias_filler { 301 | value: 0 302 | } 303 | } 304 | } 305 | layer { 306 | name: "relu1_2/in/pw_new" 307 | type: "ReLU" 308 | bottom: "conv1_2/in/pw_new" 309 | top: "conv1_2/in/pw_new" 310 | } 311 | 312 | # 1_2 dw 313 | 314 | layer { 315 | name: "conv1_2/dw_new" 316 | type: "ConvolutionDepthwise" 317 | bottom: "conv1_2/in/pw_new" 318 | top: "conv1_2/dw_new" 319 | param { 320 | lr_mult: 1 321 | decay_mult: 0 322 | } 323 | convolution_param { 324 | num_output: 144 325 | bias_term: false 326 | pad: 1 327 | kernel_size: 3 328 | engine: CAFFE 329 | stride: 1 330 | weight_filler { 331 | type: "msra" 332 | } 333 | } 334 | } 335 | layer { 336 | name: "conv1_2/dw/bn_new" 337 | type: "BatchNorm" 338 | bottom: "conv1_2/dw_new" 339 | top: "conv1_2/dw_new" 340 | param { 341 | lr_mult: 0 342 | decay_mult: 0 343 | } 344 | param { 345 | lr_mult: 0 346 | decay_mult: 0 347 | } 348 | param { 349 | lr_mult: 0 350 | decay_mult: 0 351 | } 352 | } 353 | layer { 354 | name: "conv1_2/dw/scale_new" 355 | type: "Scale" 356 | bottom: "conv1_2/dw_new" 357 | top: "conv1_2/dw_new" 358 | scale_param { 359 | filler { 360 | value: 1 361 | } 362 | bias_term: true 363 | bias_filler { 364 | value: 0 365 | } 366 | } 367 | } 368 | layer { 369 | name: "relu1_2/dw_new" 370 | type: "ReLU" 371 | bottom: "conv1_2/dw_new" 372 | top: "conv1_2/dw_new" 373 | } 374 | 375 | # 1_2 out 12*12*24 376 | layer { 377 | name: "conv1_2/out/pw_new" 378 | type: "Convolution" 379 | bottom: "conv1_2/dw_new" 380 | top: "conv1_2/out/pw_new" 381 | param { 382 | lr_mult: 1 383 | decay_mult: 1 384 | } 385 | convolution_param { 386 | num_output: 24 387 | bias_term: false 388 | pad: 0 389 | kernel_size: 1 390 | engine: CAFFE 391 | stride: 1 392 | weight_filler { 393 | type: "msra" 394 | } 395 | } 396 | } 397 | layer { 398 | name: "conv1_2/out/pw/bn_new" 399 | type: "BatchNorm" 400 | bottom: "conv1_2/out/pw_new" 401 | top: "conv1_2/out/pw_new" 402 | param { 403 | lr_mult: 0 404 | decay_mult: 0 405 | } 406 | param { 407 | lr_mult: 0 408 | decay_mult: 0 409 | } 410 | param { 411 | lr_mult: 0 412 | decay_mult: 0 413 | } 414 | } 415 | layer { 416 | name: "conv1_2/out/pw/scale_new" 417 | type: "Scale" 418 | bottom: "conv1_2/out/pw_new" 419 | top: "conv1_2/out/pw_new" 420 | scale_param { 421 | filler { 422 | value: 1 423 | } 424 | bias_term: true 425 | bias_filler { 426 | value: 0 427 | } 428 | } 429 | } 430 | layer { 431 | name: "fuse_conv1_2" 432 | type: "Eltwise" 433 | bottom: "conv1_1/out/pw_new" 434 | bottom: "conv1_2/out/pw_new" 435 | top: "fuse_conv1_2" 436 | eltwise_param { 437 | operation: SUM 438 | } 439 | } 440 | 441 | #--------------------------bottleneck2 2_1(s=2) 2_2(s=1) input: 12*12*24 output: 6*6*32 442 | # 2_1 in 443 | layer { 444 | name: "conv2_1/in/pw_new" 445 | type: "Convolution" 446 | bottom: "fuse_conv1_2" 447 | top: "conv2_1/in/pw_new" 448 | param { 449 | lr_mult: 1 450 | decay_mult: 1 451 | } 452 | convolution_param { 453 | num_output: 144 454 | bias_term: false 455 | pad: 0 456 | kernel_size: 1 457 | engine: CAFFE 458 | stride: 1 459 | weight_filler { 460 | type: "msra" 461 | } 462 | } 463 | } 464 | layer { 465 | name: "conv2_1/in/pw/bn_new" 466 | type: "BatchNorm" 467 | bottom: "conv2_1/in/pw_new" 468 | top: "conv2_1/in/pw_new" 469 | param { 470 | lr_mult: 0 471 | decay_mult: 0 472 | } 473 | param { 474 | lr_mult: 0 475 | decay_mult: 0 476 | } 477 | param { 478 | lr_mult: 0 479 | decay_mult: 0 480 | } 481 | } 482 | layer { 483 | name: "conv2_1/in/pw/scale_new" 484 | type: "Scale" 485 | bottom: "conv2_1/in/pw_new" 486 | top: "conv2_1/in/pw_new" 487 | scale_param { 488 | filler { 489 | value: 1 490 | } 491 | bias_term: true 492 | bias_filler { 493 | value: 0 494 | } 495 | } 496 | } 497 | layer { 498 | name: "relu2_1/in/pw_new" 499 | type: "ReLU" 500 | bottom: "conv2_1/in/pw_new" 501 | top: "conv2_1/in/pw_new" 502 | } 503 | 504 | # 2_1 dw 505 | layer { 506 | name: "conv2_1/dw_new" 507 | type: "ConvolutionDepthwise" 508 | bottom: "conv2_1/in/pw_new" 509 | top: "conv2_1/dw_new" 510 | param { 511 | lr_mult: 1 512 | decay_mult: 0 513 | } 514 | convolution_param { 515 | num_output: 144 516 | bias_term: false 517 | pad: 1 518 | kernel_size: 3 519 | engine: CAFFE 520 | stride: 2 521 | weight_filler { 522 | type: "msra" 523 | } 524 | } 525 | } 526 | layer { 527 | name: "conv2_1/dw/bn_new" 528 | type: "BatchNorm" 529 | bottom: "conv2_1/dw_new" 530 | top: "conv2_1/dw_new" 531 | param { 532 | lr_mult: 0 533 | decay_mult: 0 534 | } 535 | param { 536 | lr_mult: 0 537 | decay_mult: 0 538 | } 539 | param { 540 | lr_mult: 0 541 | decay_mult: 0 542 | } 543 | } 544 | layer { 545 | name: "conv2_1/dw/scale_new" 546 | type: "Scale" 547 | bottom: "conv2_1/dw_new" 548 | top: "conv2_1/dw_new" 549 | scale_param { 550 | filler { 551 | value: 1 552 | } 553 | bias_term: true 554 | bias_filler { 555 | value: 0 556 | } 557 | } 558 | } 559 | layer { 560 | name: "relu2_1/dw_new" 561 | type: "ReLU" 562 | bottom: "conv2_1/dw_new" 563 | top: "conv2_1/dw_new" 564 | } 565 | 566 | # 2_1 out 567 | layer { 568 | name: "conv2_1/out/pw_new" 569 | type: "Convolution" 570 | bottom: "conv2_1/dw_new" 571 | top: "conv2_1/out/pw_new" 572 | param { 573 | lr_mult: 1 574 | decay_mult: 1 575 | } 576 | convolution_param { 577 | num_output: 32 578 | bias_term: false 579 | pad: 0 580 | kernel_size: 1 581 | engine: CAFFE 582 | stride: 1 583 | weight_filler { 584 | type: "msra" 585 | } 586 | } 587 | } 588 | layer { 589 | name: "conv2_1/out/pw/bn_new" 590 | type: "BatchNorm" 591 | bottom: "conv2_1/out/pw_new" 592 | top: "conv2_1/out/pw_new" 593 | param { 594 | lr_mult: 0 595 | decay_mult: 0 596 | } 597 | param { 598 | lr_mult: 0 599 | decay_mult: 0 600 | } 601 | param { 602 | lr_mult: 0 603 | decay_mult: 0 604 | } 605 | } 606 | layer { 607 | name: "conv2_1/out/pw/scale_new" 608 | type: "Scale" 609 | bottom: "conv2_1/out/pw_new" 610 | top: "conv2_1/out/pw_new" 611 | scale_param { 612 | filler { 613 | value: 1 614 | } 615 | bias_term: true 616 | bias_filler { 617 | value: 0 618 | } 619 | } 620 | } 621 | 622 | # 2_2 in 623 | 624 | layer { 625 | name: "conv2_2/in/pw_new" 626 | type: "Convolution" 627 | bottom: "conv2_1/out/pw_new" 628 | top: "conv2_2/in/pw_new" 629 | param { 630 | lr_mult: 1 631 | decay_mult: 1 632 | } 633 | convolution_param { 634 | num_output: 192 635 | bias_term: false 636 | pad: 0 637 | kernel_size: 1 638 | engine: CAFFE 639 | stride: 1 640 | weight_filler { 641 | type: "msra" 642 | } 643 | } 644 | } 645 | layer { 646 | name: "conv2_2/in/pw/bn_new" 647 | type: "BatchNorm" 648 | bottom: "conv2_2/in/pw_new" 649 | top: "conv2_2/in/pw_new" 650 | param { 651 | lr_mult: 0 652 | decay_mult: 0 653 | } 654 | param { 655 | lr_mult: 0 656 | decay_mult: 0 657 | } 658 | param { 659 | lr_mult: 0 660 | decay_mult: 0 661 | } 662 | } 663 | layer { 664 | name: "conv2_2/in/pw/scale_new" 665 | type: "Scale" 666 | bottom: "conv2_2/in/pw_new" 667 | top: "conv2_2/in/pw_new" 668 | scale_param { 669 | filler { 670 | value: 1 671 | } 672 | bias_term: true 673 | bias_filler { 674 | value: 0 675 | } 676 | } 677 | } 678 | layer { 679 | name: "relu2_2/in/pw_new" 680 | type: "ReLU" 681 | bottom: "conv2_2/in/pw_new" 682 | top: "conv2_2/in/pw_new" 683 | } 684 | 685 | # 2_2 dw 686 | layer { 687 | name: "conv2_2/dw_new" 688 | type: "ConvolutionDepthwise" 689 | bottom: "conv2_2/in/pw_new" 690 | top: "conv2_2/dw_new" 691 | param { 692 | lr_mult: 1 693 | decay_mult: 0 694 | } 695 | convolution_param { 696 | num_output: 192 697 | bias_term: false 698 | pad: 1 699 | kernel_size: 3 700 | engine: CAFFE 701 | stride: 1 702 | weight_filler { 703 | type: "msra" 704 | } 705 | } 706 | } 707 | layer { 708 | name: "conv2_2/dw/bn_new" 709 | type: "BatchNorm" 710 | bottom: "conv2_2/dw_new" 711 | top: "conv2_2/dw_new" 712 | param { 713 | lr_mult: 0 714 | decay_mult: 0 715 | } 716 | param { 717 | lr_mult: 0 718 | decay_mult: 0 719 | } 720 | param { 721 | lr_mult: 0 722 | decay_mult: 0 723 | } 724 | } 725 | layer { 726 | name: "conv2_2/dw/scale_new" 727 | type: "Scale" 728 | bottom: "conv2_2/dw_new" 729 | top: "conv2_2/dw_new" 730 | scale_param { 731 | filler { 732 | value: 1 733 | } 734 | bias_term: true 735 | bias_filler { 736 | value: 0 737 | } 738 | } 739 | } 740 | layer { 741 | name: "relu2_2/dw_new" 742 | type: "ReLU" 743 | bottom: "conv2_2/dw_new" 744 | top: "conv2_2/dw_new" 745 | } 746 | 747 | 748 | # 2_2 out 749 | 750 | layer { 751 | name: "conv2_2/out/pw_new" 752 | type: "Convolution" 753 | bottom: "conv2_2/dw_new" 754 | top: "conv2_2/out/pw_new" 755 | param { 756 | lr_mult: 1 757 | decay_mult: 1 758 | } 759 | convolution_param { 760 | num_output: 32 761 | bias_term: false 762 | pad: 0 763 | kernel_size: 1 764 | engine: CAFFE 765 | stride: 1 766 | weight_filler { 767 | type: "msra" 768 | } 769 | } 770 | } 771 | layer { 772 | name: "conv2_2/out/pw/bn_new" 773 | type: "BatchNorm" 774 | bottom: "conv2_2/out/pw_new" 775 | top: "conv2_2/out/pw_new" 776 | param { 777 | lr_mult: 0 778 | decay_mult: 0 779 | } 780 | param { 781 | lr_mult: 0 782 | decay_mult: 0 783 | } 784 | param { 785 | lr_mult: 0 786 | decay_mult: 0 787 | } 788 | } 789 | layer { 790 | name: "conv2_2/out/pw/scale_new" 791 | type: "Scale" 792 | bottom: "conv2_2/out/pw_new" 793 | top: "conv2_2/out/pw_new" 794 | scale_param { 795 | filler { 796 | value: 1 797 | } 798 | bias_term: true 799 | bias_filler { 800 | value: 0 801 | } 802 | } 803 | } 804 | layer { 805 | name: "fuse_conv2_2" 806 | type: "Eltwise" 807 | bottom: "conv2_1/out/pw_new" 808 | bottom: "conv2_2/out/pw_new" 809 | top: "fuse_conv2_2" 810 | eltwise_param { 811 | operation: SUM 812 | } 813 | } 814 | 815 | #--------------------------bottleneck3 3_1(s=2) 3_2(s=1) input: 6*6*32 output: 3*3*64 816 | # 3_1 in 817 | layer { 818 | name: "conv3_1/in/pw_new" 819 | type: "Convolution" 820 | bottom: "fuse_conv2_2" 821 | top: "conv3_1/in/pw_new" 822 | param { 823 | lr_mult: 1 824 | decay_mult: 1 825 | } 826 | convolution_param { 827 | num_output: 192 828 | bias_term: false 829 | pad: 0 830 | kernel_size: 1 831 | engine: CAFFE 832 | stride: 1 833 | weight_filler { 834 | type: "msra" 835 | } 836 | } 837 | } 838 | layer { 839 | name: "conv3_1/in/pw/bn_new" 840 | type: "BatchNorm" 841 | bottom: "conv3_1/in/pw_new" 842 | top: "conv3_1/in/pw_new" 843 | param { 844 | lr_mult: 0 845 | decay_mult: 0 846 | } 847 | param { 848 | lr_mult: 0 849 | decay_mult: 0 850 | } 851 | param { 852 | lr_mult: 0 853 | decay_mult: 0 854 | } 855 | } 856 | layer { 857 | name: "conv3_1/in/pw/scale_new" 858 | type: "Scale" 859 | bottom: "conv3_1/in/pw_new" 860 | top: "conv3_1/in/pw_new" 861 | scale_param { 862 | filler { 863 | value: 1 864 | } 865 | bias_term: true 866 | bias_filler { 867 | value: 0 868 | } 869 | } 870 | } 871 | layer { 872 | name: "relu3_1/in/pw_new" 873 | type: "ReLU" 874 | bottom: "conv3_1/in/pw_new" 875 | top: "conv3_1/in/pw_new" 876 | } 877 | 878 | # 3_1 dw 879 | layer { 880 | name: "conv3_1/dw_new" 881 | type: "ConvolutionDepthwise" 882 | bottom: "conv3_1/in/pw_new" 883 | top: "conv3_1/dw_new" 884 | param { 885 | lr_mult: 1 886 | decay_mult: 0 887 | } 888 | convolution_param { 889 | num_output: 192 890 | bias_term: false 891 | pad: 1 892 | kernel_size: 3 893 | engine: CAFFE 894 | stride: 2 895 | weight_filler { 896 | type: "msra" 897 | } 898 | } 899 | } 900 | layer { 901 | name: "conv3_1/dw/bn_new" 902 | type: "BatchNorm" 903 | bottom: "conv3_1/dw_new" 904 | top: "conv3_1/dw_new" 905 | param { 906 | lr_mult: 0 907 | decay_mult: 0 908 | } 909 | param { 910 | lr_mult: 0 911 | decay_mult: 0 912 | } 913 | param { 914 | lr_mult: 0 915 | decay_mult: 0 916 | } 917 | } 918 | layer { 919 | name: "conv3_1/dw/scale_new" 920 | type: "Scale" 921 | bottom: "conv3_1/dw_new" 922 | top: "conv3_1/dw_new" 923 | scale_param { 924 | filler { 925 | value: 1 926 | } 927 | bias_term: true 928 | bias_filler { 929 | value: 0 930 | } 931 | } 932 | } 933 | layer { 934 | name: "relu3_1/dw_new" 935 | type: "ReLU" 936 | bottom: "conv3_1/dw_new" 937 | top: "conv3_1/dw_new" 938 | } 939 | 940 | # 3_1 out 941 | layer { 942 | name: "conv3_1/out/pw_new" 943 | type: "Convolution" 944 | bottom: "conv3_1/dw_new" 945 | top: "conv3_1/out/pw_new" 946 | param { 947 | lr_mult: 1 948 | decay_mult: 1 949 | } 950 | convolution_param { 951 | num_output: 64 952 | bias_term: false 953 | pad: 0 954 | kernel_size: 1 955 | engine: CAFFE 956 | stride: 1 957 | weight_filler { 958 | type: "msra" 959 | } 960 | } 961 | } 962 | layer { 963 | name: "conv3_1/out/pw/bn_new" 964 | type: "BatchNorm" 965 | bottom: "conv3_1/out/pw_new" 966 | top: "conv3_1/out/pw_new" 967 | param { 968 | lr_mult: 0 969 | decay_mult: 0 970 | } 971 | param { 972 | lr_mult: 0 973 | decay_mult: 0 974 | } 975 | param { 976 | lr_mult: 0 977 | decay_mult: 0 978 | } 979 | } 980 | layer { 981 | name: "conv3_1/out/pw/scale_new" 982 | type: "Scale" 983 | bottom: "conv3_1/out/pw_new" 984 | top: "conv3_1/out/pw_new" 985 | scale_param { 986 | filler { 987 | value: 1 988 | } 989 | bias_term: true 990 | bias_filler { 991 | value: 0 992 | } 993 | } 994 | } 995 | 996 | # 3_2 in 997 | 998 | layer { 999 | name: "conv3_2/in/pw_new" 1000 | type: "Convolution" 1001 | bottom: "conv3_1/out/pw_new" 1002 | top: "conv3_2/in/pw_new" 1003 | param { 1004 | lr_mult: 1 1005 | decay_mult: 1 1006 | } 1007 | convolution_param { 1008 | num_output: 192 1009 | bias_term: false 1010 | pad: 0 1011 | kernel_size: 1 1012 | engine: CAFFE 1013 | stride: 1 1014 | weight_filler { 1015 | type: "msra" 1016 | } 1017 | } 1018 | } 1019 | layer { 1020 | name: "conv3_2/in/pw/bn_new" 1021 | type: "BatchNorm" 1022 | bottom: "conv3_2/in/pw_new" 1023 | top: "conv3_2/in/pw_new" 1024 | param { 1025 | lr_mult: 0 1026 | decay_mult: 0 1027 | } 1028 | param { 1029 | lr_mult: 0 1030 | decay_mult: 0 1031 | } 1032 | param { 1033 | lr_mult: 0 1034 | decay_mult: 0 1035 | } 1036 | } 1037 | layer { 1038 | name: "conv3_2/in/pw/scale_new" 1039 | type: "Scale" 1040 | bottom: "conv3_2/in/pw_new" 1041 | top: "conv3_2/in/pw_new" 1042 | scale_param { 1043 | filler { 1044 | value: 1 1045 | } 1046 | bias_term: true 1047 | bias_filler { 1048 | value: 0 1049 | } 1050 | } 1051 | } 1052 | layer { 1053 | name: "relu3_2/in/pw_new" 1054 | type: "ReLU" 1055 | bottom: "conv3_2/in/pw_new" 1056 | top: "conv3_2/in/pw_new" 1057 | } 1058 | 1059 | # 3_2 dw 1060 | layer { 1061 | name: "conv3_2/dw_new" 1062 | type: "ConvolutionDepthwise" 1063 | bottom: "conv3_2/in/pw_new" 1064 | top: "conv3_2/dw_new" 1065 | param { 1066 | lr_mult: 1 1067 | decay_mult: 0 1068 | } 1069 | convolution_param { 1070 | num_output: 192 1071 | bias_term: false 1072 | pad: 1 1073 | kernel_size: 3 1074 | engine: CAFFE 1075 | stride: 1 1076 | weight_filler { 1077 | type: "msra" 1078 | } 1079 | } 1080 | } 1081 | layer { 1082 | name: "conv3_2/dw/bn_new" 1083 | type: "BatchNorm" 1084 | bottom: "conv3_2/dw_new" 1085 | top: "conv3_2/dw_new" 1086 | param { 1087 | lr_mult: 0 1088 | decay_mult: 0 1089 | } 1090 | param { 1091 | lr_mult: 0 1092 | decay_mult: 0 1093 | } 1094 | param { 1095 | lr_mult: 0 1096 | decay_mult: 0 1097 | } 1098 | } 1099 | layer { 1100 | name: "conv3_2/dw/scale_new" 1101 | type: "Scale" 1102 | bottom: "conv3_2/dw_new" 1103 | top: "conv3_2/dw_new" 1104 | scale_param { 1105 | filler { 1106 | value: 1 1107 | } 1108 | bias_term: true 1109 | bias_filler { 1110 | value: 0 1111 | } 1112 | } 1113 | } 1114 | layer { 1115 | name: "relu3_2/dw_new" 1116 | type: "ReLU" 1117 | bottom: "conv3_2/dw_new" 1118 | top: "conv3_2/dw_new" 1119 | } 1120 | 1121 | 1122 | # 3_2 out 1123 | 1124 | layer { 1125 | name: "conv3_2/out/pw_new" 1126 | type: "Convolution" 1127 | bottom: "conv3_2/dw_new" 1128 | top: "conv3_2/out/pw_new" 1129 | param { 1130 | lr_mult: 1 1131 | decay_mult: 1 1132 | } 1133 | convolution_param { 1134 | num_output: 64 1135 | bias_term: false 1136 | pad: 0 1137 | kernel_size: 1 1138 | engine: CAFFE 1139 | stride: 1 1140 | weight_filler { 1141 | type: "msra" 1142 | } 1143 | } 1144 | } 1145 | layer { 1146 | name: "conv3_2/out/pw/bn_new" 1147 | type: "BatchNorm" 1148 | bottom: "conv3_2/out/pw_new" 1149 | top: "conv3_2/out/pw_new" 1150 | param { 1151 | lr_mult: 0 1152 | decay_mult: 0 1153 | } 1154 | param { 1155 | lr_mult: 0 1156 | decay_mult: 0 1157 | } 1158 | param { 1159 | lr_mult: 0 1160 | decay_mult: 0 1161 | } 1162 | } 1163 | layer { 1164 | name: "conv3_2/out/pw/scale_new" 1165 | type: "Scale" 1166 | bottom: "conv3_2/out/pw_new" 1167 | top: "conv3_2/out/pw_new" 1168 | scale_param { 1169 | filler { 1170 | value: 1 1171 | } 1172 | bias_term: true 1173 | bias_filler { 1174 | value: 0 1175 | } 1176 | } 1177 | } 1178 | layer { 1179 | name: "fuse_conv3_2" 1180 | type: "Eltwise" 1181 | bottom: "conv3_1/out/pw_new" 1182 | bottom: "conv3_2/out/pw_new" 1183 | top: "fuse_conv3_2" 1184 | eltwise_param { 1185 | operation: SUM 1186 | } 1187 | } 1188 | 1189 | 1190 | 1191 | 1192 | 1193 | 1194 | 1195 | #------------------------- fc1 1196 | layer { 1197 | name: "fc1" 1198 | type: "InnerProduct" 1199 | bottom: "fuse_conv3_2" 1200 | top: "fc1" 1201 | param { 1202 | lr_mult: 1 1203 | decay_mult: 1 1204 | } 1205 | param { 1206 | lr_mult: 2 1207 | decay_mult: 1 1208 | } 1209 | inner_product_param { 1210 | num_output: 256 1211 | weight_filler { 1212 | type: "gaussian" 1213 | std: 0.01 1214 | } 1215 | bias_filler { 1216 | type: "constant" 1217 | value: 0 1218 | } 1219 | } 1220 | } 1221 | layer { 1222 | name: "relu_fc1" 1223 | type: "ReLU" 1224 | bottom: "fc1" 1225 | top: "fc1" 1226 | } 1227 | layer { 1228 | name: "drop_fc1" 1229 | type: "Dropout" 1230 | bottom: "fc1" 1231 | top: "fc1" 1232 | dropout_param{ 1233 | dropout_ratio: 0.3 1234 | } 1235 | } 1236 | 1237 | #------------------------- fc2 1238 | layer { 1239 | name: "fc2" 1240 | type: "InnerProduct" 1241 | bottom: "fc1" 1242 | top: "fc2" 1243 | param { 1244 | lr_mult: 1 1245 | decay_mult: 1 1246 | } 1247 | param { 1248 | lr_mult: 2 1249 | decay_mult: 1 1250 | } 1251 | inner_product_param { 1252 | num_output: 10 1253 | weight_filler { 1254 | type: "gaussian" 1255 | std: 0.01 1256 | } 1257 | bias_filler { 1258 | type: "constant" 1259 | value: 0 1260 | } 1261 | } 1262 | } -------------------------------------------------------------------------------- /2_level_2/Code/2_evaluate/evaluate_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | sys.path.append('../../../util') 4 | import tools 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import os 8 | import cv2 9 | 10 | l2_out_test_label = '../../Result/l2_out_test_label.txt' 11 | l2_raw_test_label = '../../Data/l1_crop/l1_crop_test_label.txt' 12 | relative_path = '../../Data/l1_crop/test/' # find the image from txt 13 | draw_img_flod = '../../Result/l2_out_draw/test/' 14 | drop_img_flod = '../../Result/l2_drop/test/' 15 | 16 | n_p = 5 17 | # ----------------------------------------------------------------------- load label 18 | l2_raw_fid = open(l2_raw_test_label) 19 | l2_raw_lines = l2_raw_fid.readlines() 20 | l2_raw_fid.close() 21 | l2_out_fid = open(l2_out_test_label) 22 | l2_out_lines = l2_out_fid.readlines() 23 | l2_out_fid.close() 24 | 25 | err_mat = [] 26 | threshold = 1 27 | count_drop = 0 28 | for idx in range(len(l2_out_lines)): 29 | print idx 30 | r_ = l2_raw_lines[idx] 31 | o_ = l2_out_lines[idx] 32 | r_name = r_.split()[0] 33 | o_name = o_.split()[0] 34 | if r_name != o_name: 35 | print 'find a error,idx: ', idx 36 | continue 37 | full_img_path = relative_path + r_name 38 | img = cv2.imread(full_img_path) 39 | h,w,c = img.shape 40 | 41 | err_1,err_5 = tools.cal_error_nor_diag(img,r_,o_) # r_ have img name , range of [-1,1] err_1 is mean 42 | err_mat.append(err_5) 43 | out_land = np.array(map(float,o_.split()[1:2*n_p+1])) 44 | 45 | if err_1 >= threshold : 46 | count_drop = count_drop + 1 47 | draw_img = img.copy() 48 | draw_img = tools.drawpoints(draw_img,out_land) 49 | tools.makedir(drop_img_flod) 50 | draw_img_name = str(err_1) + '_' + r_name 51 | draw_img_path = drop_img_flod + draw_img_name 52 | cv2.imwrite(draw_img_path, draw_img) 53 | else: 54 | draw_img = img.copy() 55 | draw_img = tools.drawpoints(draw_img,out_land) 56 | tools.makedir(draw_img_flod) 57 | draw_img_name = str(err_1) + '_' + r_name 58 | draw_img_path = draw_img_flod + draw_img_name 59 | cv2.imwrite(draw_img_path, draw_img) 60 | # -------------------------------------------------------------- print result 61 | err_mat = np.array(err_mat) 62 | err_mat = np.reshape(err_mat,(-1,5)) 63 | MNE_5 = [] 64 | for i in range(n_p): 65 | MNE_5.append(err_mat[:,i].mean()) 66 | print 'err >= 10% have ' , count_drop 67 | # ------------------------------------------------------------- plot 68 | fig = plt.figure('test_MNE_5') 69 | ax1 =plt.subplot(111) 70 | data = np.array(MNE_5) 71 | width = 0.2 72 | x_bar = np.arange(5) 73 | # print('x_bar type ',type(x_bar)) 74 | rect = ax1.bar(left=x_bar,height=data,width=width, color="blue") 75 | for rec in rect: 76 | x= rec.get_x() 77 | height = round(rec.get_height()*100,2) 78 | mne_text = str(height) + '%' 79 | # print('mne text',mne_text) 80 | ax1.text(x+0.05,1.02*height/100,mne_text) 81 | # print('height',height) 82 | MNE_5_mean = np.round(np.array(MNE_5).mean() *100,2) 83 | MNE_5_mean_text = 'The mean normalized error :' +str(MNE_5_mean) + '%' 84 | ax1.text(1 ,1.5*MNE_5_mean/100 ,MNE_5_mean_text,color="red") 85 | 86 | ax1.set_xticks(x_bar + width) 87 | ax1.set_xticklabels(("left eye","right eye","nose","left mouth","right mouth")) 88 | ax1.set_ylabel("MNE") 89 | ax1.set_title(" MNE") 90 | ax1.grid(True) 91 | ax1.set_ylim(0,0.025) # max y axis 92 | plt.show() 93 | 94 | 95 | 96 | print 'The mean error normalized by dist_diag is : ', err_mat.mean() 97 | # print a 98 | fig2 = plt.figure("test_distribution") 99 | ax2 = plt.subplot(111) 100 | ax2.set_title("The mean error normalized by dist_diag :") 101 | data =err_mat.mean(axis=1) 102 | n, bins, patches = plt.hist(data ,bins=200, normed=False, facecolor='blue', alpha=0.75) 103 | err_mat_mean = np.round(np.array(err_mat).mean() *100 ,2) 104 | mean_text = 'The mean error normalized by dist_diag : ' + str(err_mat_mean) + '%' 105 | ax2.text(0.1,len(err_mat)/10 ,mean_text,color="red") 106 | plt.show() -------------------------------------------------------------------------------- /2_level_2/Code/2_evaluate/evaluate_train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | sys.path.append('../../../util') 4 | import tools 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import os 8 | import cv2 9 | 10 | l2_out_train_label = '../../Result/l2_out_train_label.txt' 11 | l2_raw_train_label = '../../Data/l1_crop/l1_crop_train_label.txt' 12 | relative_path = '../../Data/l1_crop/train/' # find the image from txt 13 | draw_img_flod = '../../Result/l2_out_draw/train/' 14 | drop_img_flod = '../../Result/l2_drop/train/' 15 | 16 | n_p = 5 17 | # ----------------------------------------------------------------------- load label 18 | l2_raw_fid = open(l2_raw_train_label) 19 | l2_raw_lines = l2_raw_fid.readlines() 20 | l2_raw_fid.close() 21 | l2_out_fid = open(l2_out_train_label) 22 | l2_out_lines = l2_out_fid.readlines() 23 | l2_out_fid.close() 24 | 25 | err_mat = [] 26 | threshold = 1 27 | count_drop = 0 28 | for idx in range(len(l2_out_lines)): 29 | print idx 30 | r_ = l2_raw_lines[idx] 31 | o_ = l2_out_lines[idx] 32 | r_name = r_.split()[0] 33 | o_name = o_.split()[0] 34 | if r_name != o_name: 35 | print 'find a error,idx: ', idx 36 | continue 37 | full_img_path = relative_path + r_name 38 | img = cv2.imread(full_img_path) 39 | h,w,c = img.shape 40 | 41 | err_1,err_5 = tools.cal_error_nor_diag(img,r_,o_) # r_ have img name , range of [-1,1] err_1 is mean 42 | err_mat.append(err_5) 43 | out_land = np.array(map(float,o_.split()[1:2*n_p+1])) 44 | 45 | if err_1 >= threshold : 46 | count_drop = count_drop + 1 47 | draw_img = img.copy() 48 | draw_img = tools.drawpoints(draw_img,out_land) 49 | tools.makedir(drop_img_flod) 50 | draw_img_name = str(err_1) + '_' + r_name 51 | draw_img_path = drop_img_flod + draw_img_name 52 | cv2.imwrite(draw_img_path, draw_img) 53 | else: 54 | draw_img = img.copy() 55 | draw_img = tools.drawpoints(draw_img,out_land) 56 | tools.makedir(draw_img_flod) 57 | draw_img_name = str(err_1) + '_' + r_name 58 | draw_img_path = draw_img_flod + draw_img_name 59 | cv2.imwrite(draw_img_path, draw_img) 60 | # -------------------------------------------------------------- print result 61 | err_mat = np.array(err_mat) 62 | err_mat = np.reshape(err_mat,(-1,5)) 63 | MNE_5 = [] 64 | for i in range(n_p): 65 | MNE_5.append(err_mat[:,i].mean()) 66 | print 'err >= 10% have ' , count_drop 67 | # ------------------------------------------------------------- plot 68 | fig = plt.figure('train_MNE_5') 69 | ax1 =plt.subplot(111) 70 | data = np.array(MNE_5) 71 | width = 0.2 72 | x_bar = np.arange(5) 73 | # print('x_bar type ',type(x_bar)) 74 | rect = ax1.bar(left=x_bar,height=data,width=width, color="blue") 75 | for rec in rect: 76 | x= rec.get_x() 77 | height = round(rec.get_height()*100,2) 78 | mne_text = str(height) + '%' 79 | # print('mne text',mne_text) 80 | ax1.text(x+0.05,1.02*height/100,mne_text) 81 | # print('height',height) 82 | MNE_5_mean = np.round(np.array(MNE_5).mean() *100,2) 83 | MNE_5_mean_text = 'The mean normalized error :' +str(MNE_5_mean) + '%' 84 | ax1.text(1 ,1.5*MNE_5_mean/100 ,MNE_5_mean_text,color="red") 85 | 86 | ax1.set_xticks(x_bar + width) 87 | ax1.set_xticklabels(("left eye","right eye","nose","left mouth","right mouth")) 88 | ax1.set_ylabel("MNE") 89 | ax1.set_title(" MNE") 90 | ax1.grid(True) 91 | ax1.set_ylim(0,0.025) # max y axis 92 | plt.show() 93 | 94 | 95 | 96 | print 'The mean error normalized by dist_diag is : ', err_mat.mean() 97 | # print a 98 | fig2 = plt.figure("train_distribution") 99 | ax2 = plt.subplot(111) 100 | ax2.set_title("The mean error normalized by dist_diag :") 101 | data =err_mat.mean(axis=1) 102 | n, bins, patches = plt.hist(data ,bins=200, normed=False, facecolor='blue', alpha=0.75) 103 | err_mat_mean = np.round(np.array(err_mat).mean() *100 ,2) 104 | mean_text = 'The mean error normalized by dist_diag : ' + str(err_mat_mean) + '%' 105 | ax2.text(0.1,len(err_mat)/10 ,mean_text,color="red") 106 | plt.show() -------------------------------------------------------------------------------- /3_demo/Code/inferencen.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | sys.path.append('../../util') 4 | sys.path.append('/***your_caffe_path***/python') 5 | sys.path.append('/***your_caffe_path***/python/caffe') 6 | import tools 7 | import caffe 8 | import numpy as np 9 | import argparse 10 | import cv2 11 | import time 12 | 13 | l1_deploy = '../Data/l1_deploy.prototxt' 14 | l1_model = '../Data/l1_net.caffemodel' 15 | l2_deploy = '../Data/l2_deploy.prototxt' 16 | l2_model = '../Data/l2_net.caffemodel' 17 | raw_txt = '../Data/demo.txt' 18 | relative_path = '../Data/img/' # find the image 19 | draw_img_flod = '../Result/draw_img/' 20 | w_net = 48 21 | h_net = 48 22 | n_p = 5 23 | 24 | #--------------------------------------------------------------------------- cnn initalization 25 | # load model 26 | l1_net = caffe.Net(l1_deploy,l1_model,caffe.TEST) 27 | l2_net = caffe.Net(l2_deploy,l2_model,caffe.TEST) 28 | 29 | caffe.set_mode_gpu() 30 | caffe.set_device(0) 31 | 32 | # image preprocess 33 | mu = np.ones((3,w_net,h_net), dtype=np.float) * 127.5 34 | transformer = caffe.io.Transformer({'data': l1_net.blobs['data'].data.shape}) 35 | transformer.set_transpose('data', (2,0,1)) # (w,h,c)--> (c,w,h) 36 | transformer.set_mean('data', mu) # pixel-wise 37 | transformer.set_raw_scale('data', 255 ) # [0,1] --> [0,255] 38 | transformer.set_channel_swap('data', (2,1,0)) # RGB --> BGR 39 | #----------------------------------------------------------------------------- forward 40 | for line in open(raw_txt): 41 | if line.isspace() : continue 42 | img_name = line.split()[0] 43 | full_img_path = relative_path + img_name 44 | img = cv2.imread(full_img_path) 45 | draw_img = img.copy() 46 | #----------------------------------------------------------------------- l1 forward 47 | l1_input_img=caffe.io.load_image(full_img_path) # im is RGB with 0~1 float 48 | h_img,w_img,c = l1_input_img.shape 49 | 50 | l1_net.blobs['data'].data[...]=transformer.preprocess('data',l1_input_img) 51 | time_s = time.clock() 52 | l1_out = l1_net.forward() 53 | time_e = time.clock() 54 | print img_name,'l1_forward : ',round((time_e-time_s)*1000,1) ,'ms' 55 | l1_out_land = l1_net.blobs['fc2'].data[0].flatten() 56 | # crop img for level_2 57 | 58 | 59 | l1_out_pix_land = tools.label2points(l1_out_land,w_img,h_img) 60 | # ---------------------------------------------------------------------------- crop img 61 | crop_img,w_start,h_start = tools.crop_img(l1_input_img,l1_out_pix_land) 62 | #----------------------------------------------------------------------- l2 forward 63 | l2_input_img = crop_img 64 | h_l2,w_l2,c = l2_input_img.shape 65 | l2_net.blobs['data'].data[...]=transformer.preprocess('data',l2_input_img) 66 | time_s = time.clock() 67 | l2_out = l2_net.forward() 68 | time_e = time.clock() 69 | print img_name,'l2_forward : ',round((time_e-time_s)*1000,1) ,'ms' 70 | l2_out_land = l2_net.blobs['fc2'].data[0].flatten() 71 | l2_out_pix_land = tools.label2points(l2_out_land,w_l2,h_l2) 72 | 73 | l2_out_pix_land[0::2] = l2_out_pix_land[0::2] + w_start # x 74 | l2_out_pix_land[1::2] = l2_out_pix_land[1::2] + h_start # y 75 | 76 | # -------------------------------------------------------------------- draw img 77 | raw_land = list(line.split())[1:2*n_p+1] 78 | draw_img = tools.drawpoints_0(draw_img, raw_land) 79 | draw_img = tools.drawpoints_1(draw_img, l1_out_land) 80 | draw_img = tools.drawpoints_2(draw_img, l2_out_pix_land) 81 | 82 | # --------------------------------------------------------------------- output img 83 | draw_img_path = draw_img_flod + img_name 84 | tools.makedir(draw_img_flod) 85 | cv2.imwrite(draw_img_path,draw_img) 86 | 87 | -------------------------------------------------------------------------------- /3_demo/Data/demo.txt: -------------------------------------------------------------------------------- 1 | 000054.jpg 945 340 1038 340 988 397 941 423 1042 426 2 | 000133.jpg 271 150 332 150 302 195 269 210 328 212 3 | 000167.jpg 202 97 224 105 220 118 197 130 215 131 4 | 000275.jpg 157 115 209 107 184 142 175 168 220 158 5 | 000335.jpg 111 196 183 196 150 248 115 293 185 292 6 | 000765.jpg 326 153 382 145 358 185 336 205 376 199 7 | 001102.jpg 189 163 240 140 221 187 222 213 259 196 8 | 001557.jpg 96 86 135 86 127 111 99 134 131 132 -------------------------------------------------------------------------------- /3_demo/Data/img/000054.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/img/000054.jpg -------------------------------------------------------------------------------- /3_demo/Data/img/000133.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/img/000133.jpg -------------------------------------------------------------------------------- /3_demo/Data/img/000167.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/img/000167.jpg -------------------------------------------------------------------------------- /3_demo/Data/img/000275.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/img/000275.jpg -------------------------------------------------------------------------------- /3_demo/Data/img/000335.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/img/000335.jpg -------------------------------------------------------------------------------- /3_demo/Data/img/000765.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/img/000765.jpg -------------------------------------------------------------------------------- /3_demo/Data/img/001102.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/img/001102.jpg -------------------------------------------------------------------------------- /3_demo/Data/img/001557.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/img/001557.jpg -------------------------------------------------------------------------------- /3_demo/Data/l1_deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "level_1" 2 | input: "data" 3 | input_shape { dim: 1 dim: 3 dim: 48 dim: 48 } 4 | 5 | 6 | #------------------------------------------- conv1 input: 48*48*3 output: 24*24*16 7 | layer { 8 | name: "conv1_new" 9 | type: "Convolution" 10 | bottom: "data" 11 | top: "conv1_new" 12 | param { 13 | lr_mult: 1 14 | decay_mult: 1 15 | } 16 | convolution_param { 17 | num_output: 16 18 | bias_term: false 19 | pad: 1 20 | kernel_size: 3 21 | stride: 2 22 | weight_filler { 23 | type: "msra" 24 | } 25 | } 26 | } 27 | layer { 28 | name: "conv1/bn_new" 29 | type: "BatchNorm" 30 | bottom: "conv1_new" 31 | top: "conv1_new" 32 | param { 33 | lr_mult: 0 34 | decay_mult: 0 35 | } 36 | param { 37 | lr_mult: 0 38 | decay_mult: 0 39 | } 40 | param { 41 | lr_mult: 0 42 | decay_mult: 0 43 | } 44 | } 45 | layer { 46 | name: "conv1/scale_new" 47 | type: "Scale" 48 | bottom: "conv1_new" 49 | top: "conv1_new" 50 | scale_param { 51 | filler { 52 | value: 1 53 | } 54 | bias_term: true 55 | bias_filler { 56 | value: 0 57 | } 58 | } 59 | } 60 | layer { 61 | name: "relu1_new" 62 | type: "ReLU" 63 | bottom: "conv1_new" 64 | top: "conv1_new" 65 | } 66 | 67 | #----------- bottleneck_1 1_1(s=2) 1_2(s=1) input: 24*24*16 output: 12*12*24 68 | 69 | layer { 70 | name: "conv1_1/in/pw_new" 71 | type: "Convolution" 72 | bottom: "conv1_new" 73 | top: "conv1_1/in/pw_new" 74 | param { 75 | lr_mult: 1 76 | decay_mult: 1 77 | } 78 | convolution_param { 79 | num_output: 96 80 | bias_term: false 81 | pad: 0 82 | kernel_size: 1 83 | engine: CAFFE 84 | stride: 1 85 | weight_filler { 86 | type: "msra" 87 | } 88 | } 89 | } 90 | layer { 91 | name: "conv1_1/in/pw/bn_new" 92 | type: "BatchNorm" 93 | bottom: "conv1_1/in/pw_new" 94 | top: "conv1_1/in/pw_new" 95 | param { 96 | lr_mult: 0 97 | decay_mult: 0 98 | } 99 | param { 100 | lr_mult: 0 101 | decay_mult: 0 102 | } 103 | param { 104 | lr_mult: 0 105 | decay_mult: 0 106 | } 107 | } 108 | layer { 109 | name: "conv1_1/in/pw/scale_new" 110 | type: "Scale" 111 | bottom: "conv1_1/in/pw_new" 112 | top: "conv1_1/in/pw_new" 113 | scale_param { 114 | filler { 115 | value: 1 116 | } 117 | bias_term: true 118 | bias_filler { 119 | value: 0 120 | } 121 | } 122 | } 123 | layer { 124 | name: "relu1_1/in/pw_new" 125 | type: "ReLU" 126 | bottom: "conv1_1/in/pw_new" 127 | top: "conv1_1/in/pw_new" 128 | } 129 | 130 | 131 | 132 | # 1_1 dw conv 133 | layer { 134 | name: "conv1_1/dw_new" 135 | type: "ConvolutionDepthwise" 136 | bottom: "conv1_1/in/pw_new" 137 | top: "conv1_1/dw_new" 138 | param { 139 | lr_mult: 1 140 | decay_mult: 0 141 | } 142 | convolution_param { 143 | num_output: 96 144 | bias_term: false 145 | pad: 1 146 | kernel_size: 3 147 | engine: CAFFE 148 | stride: 2 149 | weight_filler { 150 | type: "msra" 151 | } 152 | } 153 | } 154 | layer { 155 | name: "conv1_1/dw/bn_new" 156 | type: "BatchNorm" 157 | bottom: "conv1_1/dw_new" 158 | top: "conv1_1/dw_new" 159 | param { 160 | lr_mult: 0 161 | decay_mult: 0 162 | } 163 | param { 164 | lr_mult: 0 165 | decay_mult: 0 166 | } 167 | param { 168 | lr_mult: 0 169 | decay_mult: 0 170 | } 171 | } 172 | layer { 173 | name: "conv1_1/dw/scale_new" 174 | type: "Scale" 175 | bottom: "conv1_1/dw_new" 176 | top: "conv1_1/dw_new" 177 | scale_param { 178 | filler { 179 | value: 1 180 | } 181 | bias_term: true 182 | bias_filler { 183 | value: 0 184 | } 185 | } 186 | } 187 | layer { 188 | name: "relu1_1/dw_new" 189 | type: "ReLU" 190 | bottom: "conv1_1/dw_new" 191 | top: "conv1_1/dw_new" 192 | } 193 | 194 | # 1_1 out 195 | layer { 196 | name: "conv1_1/out/pw_new" 197 | type: "Convolution" 198 | bottom: "conv1_1/dw_new" 199 | top: "conv1_1/out/pw_new" 200 | param { 201 | lr_mult: 1 202 | decay_mult: 1 203 | } 204 | convolution_param { 205 | num_output: 24 206 | bias_term: false 207 | pad: 0 208 | kernel_size: 1 209 | engine: CAFFE 210 | stride: 1 211 | weight_filler { 212 | type: "msra" 213 | } 214 | } 215 | } 216 | layer { 217 | name: "conv1_1/out/pw/bn_new" 218 | type: "BatchNorm" 219 | bottom: "conv1_1/out/pw_new" 220 | top: "conv1_1/out/pw_new" 221 | param { 222 | lr_mult: 0 223 | decay_mult: 0 224 | } 225 | param { 226 | lr_mult: 0 227 | decay_mult: 0 228 | } 229 | param { 230 | lr_mult: 0 231 | decay_mult: 0 232 | } 233 | } 234 | layer { 235 | name: "conv1_1/out/pw/scale_new" 236 | type: "Scale" 237 | bottom: "conv1_1/out/pw_new" 238 | top: "conv1_1/out/pw_new" 239 | scale_param { 240 | filler { 241 | value: 1 242 | } 243 | bias_term: true 244 | bias_filler { 245 | value: 0 246 | } 247 | } 248 | } 249 | # 1_2 in 250 | 251 | layer { 252 | name: "conv1_2/in/pw_new" 253 | type: "Convolution" 254 | bottom: "conv1_1/out/pw_new" 255 | top: "conv1_2/in/pw_new" 256 | param { 257 | lr_mult: 1 258 | decay_mult: 1 259 | } 260 | convolution_param { 261 | num_output: 144 262 | bias_term: false 263 | pad: 0 264 | kernel_size: 1 265 | engine: CAFFE 266 | stride: 1 267 | weight_filler { 268 | type: "msra" 269 | } 270 | } 271 | } 272 | layer { 273 | name: "conv1_2/in/pw/bn_new" 274 | type: "BatchNorm" 275 | bottom: "conv1_2/in/pw_new" 276 | top: "conv1_2/in/pw_new" 277 | param { 278 | lr_mult: 0 279 | decay_mult: 0 280 | } 281 | param { 282 | lr_mult: 0 283 | decay_mult: 0 284 | } 285 | param { 286 | lr_mult: 0 287 | decay_mult: 0 288 | } 289 | } 290 | layer { 291 | name: "conv1_2/in/pw/scale_new" 292 | type: "Scale" 293 | bottom: "conv1_2/in/pw_new" 294 | top: "conv1_2/in/pw_new" 295 | scale_param { 296 | filler { 297 | value: 1 298 | } 299 | bias_term: true 300 | bias_filler { 301 | value: 0 302 | } 303 | } 304 | } 305 | layer { 306 | name: "relu1_2/in/pw_new" 307 | type: "ReLU" 308 | bottom: "conv1_2/in/pw_new" 309 | top: "conv1_2/in/pw_new" 310 | } 311 | 312 | # 1_2 dw 313 | 314 | layer { 315 | name: "conv1_2/dw_new" 316 | type: "ConvolutionDepthwise" 317 | bottom: "conv1_2/in/pw_new" 318 | top: "conv1_2/dw_new" 319 | param { 320 | lr_mult: 1 321 | decay_mult: 0 322 | } 323 | convolution_param { 324 | num_output: 144 325 | bias_term: false 326 | pad: 1 327 | kernel_size: 3 328 | engine: CAFFE 329 | stride: 1 330 | weight_filler { 331 | type: "msra" 332 | } 333 | } 334 | } 335 | layer { 336 | name: "conv1_2/dw/bn_new" 337 | type: "BatchNorm" 338 | bottom: "conv1_2/dw_new" 339 | top: "conv1_2/dw_new" 340 | param { 341 | lr_mult: 0 342 | decay_mult: 0 343 | } 344 | param { 345 | lr_mult: 0 346 | decay_mult: 0 347 | } 348 | param { 349 | lr_mult: 0 350 | decay_mult: 0 351 | } 352 | } 353 | layer { 354 | name: "conv1_2/dw/scale_new" 355 | type: "Scale" 356 | bottom: "conv1_2/dw_new" 357 | top: "conv1_2/dw_new" 358 | scale_param { 359 | filler { 360 | value: 1 361 | } 362 | bias_term: true 363 | bias_filler { 364 | value: 0 365 | } 366 | } 367 | } 368 | layer { 369 | name: "relu1_2/dw_new" 370 | type: "ReLU" 371 | bottom: "conv1_2/dw_new" 372 | top: "conv1_2/dw_new" 373 | } 374 | 375 | # 1_2 out 12*12*24 376 | layer { 377 | name: "conv1_2/out/pw_new" 378 | type: "Convolution" 379 | bottom: "conv1_2/dw_new" 380 | top: "conv1_2/out/pw_new" 381 | param { 382 | lr_mult: 1 383 | decay_mult: 1 384 | } 385 | convolution_param { 386 | num_output: 24 387 | bias_term: false 388 | pad: 0 389 | kernel_size: 1 390 | engine: CAFFE 391 | stride: 1 392 | weight_filler { 393 | type: "msra" 394 | } 395 | } 396 | } 397 | layer { 398 | name: "conv1_2/out/pw/bn_new" 399 | type: "BatchNorm" 400 | bottom: "conv1_2/out/pw_new" 401 | top: "conv1_2/out/pw_new" 402 | param { 403 | lr_mult: 0 404 | decay_mult: 0 405 | } 406 | param { 407 | lr_mult: 0 408 | decay_mult: 0 409 | } 410 | param { 411 | lr_mult: 0 412 | decay_mult: 0 413 | } 414 | } 415 | layer { 416 | name: "conv1_2/out/pw/scale_new" 417 | type: "Scale" 418 | bottom: "conv1_2/out/pw_new" 419 | top: "conv1_2/out/pw_new" 420 | scale_param { 421 | filler { 422 | value: 1 423 | } 424 | bias_term: true 425 | bias_filler { 426 | value: 0 427 | } 428 | } 429 | } 430 | layer { 431 | name: "fuse_conv1_2" 432 | type: "Eltwise" 433 | bottom: "conv1_1/out/pw_new" 434 | bottom: "conv1_2/out/pw_new" 435 | top: "fuse_conv1_2" 436 | eltwise_param { 437 | operation: SUM 438 | } 439 | } 440 | 441 | #--------------------------bottleneck2 2_1(s=2) 2_2(s=1) input: 12*12*24 output: 6*6*32 442 | # 2_1 in 443 | layer { 444 | name: "conv2_1/in/pw_new" 445 | type: "Convolution" 446 | bottom: "fuse_conv1_2" 447 | top: "conv2_1/in/pw_new" 448 | param { 449 | lr_mult: 1 450 | decay_mult: 1 451 | } 452 | convolution_param { 453 | num_output: 144 454 | bias_term: false 455 | pad: 0 456 | kernel_size: 1 457 | engine: CAFFE 458 | stride: 1 459 | weight_filler { 460 | type: "msra" 461 | } 462 | } 463 | } 464 | layer { 465 | name: "conv2_1/in/pw/bn_new" 466 | type: "BatchNorm" 467 | bottom: "conv2_1/in/pw_new" 468 | top: "conv2_1/in/pw_new" 469 | param { 470 | lr_mult: 0 471 | decay_mult: 0 472 | } 473 | param { 474 | lr_mult: 0 475 | decay_mult: 0 476 | } 477 | param { 478 | lr_mult: 0 479 | decay_mult: 0 480 | } 481 | } 482 | layer { 483 | name: "conv2_1/in/pw/scale_new" 484 | type: "Scale" 485 | bottom: "conv2_1/in/pw_new" 486 | top: "conv2_1/in/pw_new" 487 | scale_param { 488 | filler { 489 | value: 1 490 | } 491 | bias_term: true 492 | bias_filler { 493 | value: 0 494 | } 495 | } 496 | } 497 | layer { 498 | name: "relu2_1/in/pw_new" 499 | type: "ReLU" 500 | bottom: "conv2_1/in/pw_new" 501 | top: "conv2_1/in/pw_new" 502 | } 503 | 504 | # 2_1 dw 505 | layer { 506 | name: "conv2_1/dw_new" 507 | type: "ConvolutionDepthwise" 508 | bottom: "conv2_1/in/pw_new" 509 | top: "conv2_1/dw_new" 510 | param { 511 | lr_mult: 1 512 | decay_mult: 0 513 | } 514 | convolution_param { 515 | num_output: 144 516 | bias_term: false 517 | pad: 1 518 | kernel_size: 3 519 | engine: CAFFE 520 | stride: 2 521 | weight_filler { 522 | type: "msra" 523 | } 524 | } 525 | } 526 | layer { 527 | name: "conv2_1/dw/bn_new" 528 | type: "BatchNorm" 529 | bottom: "conv2_1/dw_new" 530 | top: "conv2_1/dw_new" 531 | param { 532 | lr_mult: 0 533 | decay_mult: 0 534 | } 535 | param { 536 | lr_mult: 0 537 | decay_mult: 0 538 | } 539 | param { 540 | lr_mult: 0 541 | decay_mult: 0 542 | } 543 | } 544 | layer { 545 | name: "conv2_1/dw/scale_new" 546 | type: "Scale" 547 | bottom: "conv2_1/dw_new" 548 | top: "conv2_1/dw_new" 549 | scale_param { 550 | filler { 551 | value: 1 552 | } 553 | bias_term: true 554 | bias_filler { 555 | value: 0 556 | } 557 | } 558 | } 559 | layer { 560 | name: "relu2_1/dw_new" 561 | type: "ReLU" 562 | bottom: "conv2_1/dw_new" 563 | top: "conv2_1/dw_new" 564 | } 565 | 566 | # 2_1 out 567 | layer { 568 | name: "conv2_1/out/pw_new" 569 | type: "Convolution" 570 | bottom: "conv2_1/dw_new" 571 | top: "conv2_1/out/pw_new" 572 | param { 573 | lr_mult: 1 574 | decay_mult: 1 575 | } 576 | convolution_param { 577 | num_output: 32 578 | bias_term: false 579 | pad: 0 580 | kernel_size: 1 581 | engine: CAFFE 582 | stride: 1 583 | weight_filler { 584 | type: "msra" 585 | } 586 | } 587 | } 588 | layer { 589 | name: "conv2_1/out/pw/bn_new" 590 | type: "BatchNorm" 591 | bottom: "conv2_1/out/pw_new" 592 | top: "conv2_1/out/pw_new" 593 | param { 594 | lr_mult: 0 595 | decay_mult: 0 596 | } 597 | param { 598 | lr_mult: 0 599 | decay_mult: 0 600 | } 601 | param { 602 | lr_mult: 0 603 | decay_mult: 0 604 | } 605 | } 606 | layer { 607 | name: "conv2_1/out/pw/scale_new" 608 | type: "Scale" 609 | bottom: "conv2_1/out/pw_new" 610 | top: "conv2_1/out/pw_new" 611 | scale_param { 612 | filler { 613 | value: 1 614 | } 615 | bias_term: true 616 | bias_filler { 617 | value: 0 618 | } 619 | } 620 | } 621 | 622 | # 2_2 in 623 | 624 | layer { 625 | name: "conv2_2/in/pw_new" 626 | type: "Convolution" 627 | bottom: "conv2_1/out/pw_new" 628 | top: "conv2_2/in/pw_new" 629 | param { 630 | lr_mult: 1 631 | decay_mult: 1 632 | } 633 | convolution_param { 634 | num_output: 192 635 | bias_term: false 636 | pad: 0 637 | kernel_size: 1 638 | engine: CAFFE 639 | stride: 1 640 | weight_filler { 641 | type: "msra" 642 | } 643 | } 644 | } 645 | layer { 646 | name: "conv2_2/in/pw/bn_new" 647 | type: "BatchNorm" 648 | bottom: "conv2_2/in/pw_new" 649 | top: "conv2_2/in/pw_new" 650 | param { 651 | lr_mult: 0 652 | decay_mult: 0 653 | } 654 | param { 655 | lr_mult: 0 656 | decay_mult: 0 657 | } 658 | param { 659 | lr_mult: 0 660 | decay_mult: 0 661 | } 662 | } 663 | layer { 664 | name: "conv2_2/in/pw/scale_new" 665 | type: "Scale" 666 | bottom: "conv2_2/in/pw_new" 667 | top: "conv2_2/in/pw_new" 668 | scale_param { 669 | filler { 670 | value: 1 671 | } 672 | bias_term: true 673 | bias_filler { 674 | value: 0 675 | } 676 | } 677 | } 678 | layer { 679 | name: "relu2_2/in/pw_new" 680 | type: "ReLU" 681 | bottom: "conv2_2/in/pw_new" 682 | top: "conv2_2/in/pw_new" 683 | } 684 | 685 | # 2_2 dw 686 | layer { 687 | name: "conv2_2/dw_new" 688 | type: "ConvolutionDepthwise" 689 | bottom: "conv2_2/in/pw_new" 690 | top: "conv2_2/dw_new" 691 | param { 692 | lr_mult: 1 693 | decay_mult: 0 694 | } 695 | convolution_param { 696 | num_output: 192 697 | bias_term: false 698 | pad: 1 699 | kernel_size: 3 700 | engine: CAFFE 701 | stride: 1 702 | weight_filler { 703 | type: "msra" 704 | } 705 | } 706 | } 707 | layer { 708 | name: "conv2_2/dw/bn_new" 709 | type: "BatchNorm" 710 | bottom: "conv2_2/dw_new" 711 | top: "conv2_2/dw_new" 712 | param { 713 | lr_mult: 0 714 | decay_mult: 0 715 | } 716 | param { 717 | lr_mult: 0 718 | decay_mult: 0 719 | } 720 | param { 721 | lr_mult: 0 722 | decay_mult: 0 723 | } 724 | } 725 | layer { 726 | name: "conv2_2/dw/scale_new" 727 | type: "Scale" 728 | bottom: "conv2_2/dw_new" 729 | top: "conv2_2/dw_new" 730 | scale_param { 731 | filler { 732 | value: 1 733 | } 734 | bias_term: true 735 | bias_filler { 736 | value: 0 737 | } 738 | } 739 | } 740 | layer { 741 | name: "relu2_2/dw_new" 742 | type: "ReLU" 743 | bottom: "conv2_2/dw_new" 744 | top: "conv2_2/dw_new" 745 | } 746 | 747 | 748 | # 2_2 out 749 | 750 | layer { 751 | name: "conv2_2/out/pw_new" 752 | type: "Convolution" 753 | bottom: "conv2_2/dw_new" 754 | top: "conv2_2/out/pw_new" 755 | param { 756 | lr_mult: 1 757 | decay_mult: 1 758 | } 759 | convolution_param { 760 | num_output: 32 761 | bias_term: false 762 | pad: 0 763 | kernel_size: 1 764 | engine: CAFFE 765 | stride: 1 766 | weight_filler { 767 | type: "msra" 768 | } 769 | } 770 | } 771 | layer { 772 | name: "conv2_2/out/pw/bn_new" 773 | type: "BatchNorm" 774 | bottom: "conv2_2/out/pw_new" 775 | top: "conv2_2/out/pw_new" 776 | param { 777 | lr_mult: 0 778 | decay_mult: 0 779 | } 780 | param { 781 | lr_mult: 0 782 | decay_mult: 0 783 | } 784 | param { 785 | lr_mult: 0 786 | decay_mult: 0 787 | } 788 | } 789 | layer { 790 | name: "conv2_2/out/pw/scale_new" 791 | type: "Scale" 792 | bottom: "conv2_2/out/pw_new" 793 | top: "conv2_2/out/pw_new" 794 | scale_param { 795 | filler { 796 | value: 1 797 | } 798 | bias_term: true 799 | bias_filler { 800 | value: 0 801 | } 802 | } 803 | } 804 | layer { 805 | name: "fuse_conv2_2" 806 | type: "Eltwise" 807 | bottom: "conv2_1/out/pw_new" 808 | bottom: "conv2_2/out/pw_new" 809 | top: "fuse_conv2_2" 810 | eltwise_param { 811 | operation: SUM 812 | } 813 | } 814 | 815 | #--------------------------bottleneck3 3_1(s=2) 3_2(s=1) input: 6*6*32 output: 3*3*64 816 | # 3_1 in 817 | layer { 818 | name: "conv3_1/in/pw_new" 819 | type: "Convolution" 820 | bottom: "fuse_conv2_2" 821 | top: "conv3_1/in/pw_new" 822 | param { 823 | lr_mult: 1 824 | decay_mult: 1 825 | } 826 | convolution_param { 827 | num_output: 192 828 | bias_term: false 829 | pad: 0 830 | kernel_size: 1 831 | engine: CAFFE 832 | stride: 1 833 | weight_filler { 834 | type: "msra" 835 | } 836 | } 837 | } 838 | layer { 839 | name: "conv3_1/in/pw/bn_new" 840 | type: "BatchNorm" 841 | bottom: "conv3_1/in/pw_new" 842 | top: "conv3_1/in/pw_new" 843 | param { 844 | lr_mult: 0 845 | decay_mult: 0 846 | } 847 | param { 848 | lr_mult: 0 849 | decay_mult: 0 850 | } 851 | param { 852 | lr_mult: 0 853 | decay_mult: 0 854 | } 855 | } 856 | layer { 857 | name: "conv3_1/in/pw/scale_new" 858 | type: "Scale" 859 | bottom: "conv3_1/in/pw_new" 860 | top: "conv3_1/in/pw_new" 861 | scale_param { 862 | filler { 863 | value: 1 864 | } 865 | bias_term: true 866 | bias_filler { 867 | value: 0 868 | } 869 | } 870 | } 871 | layer { 872 | name: "relu3_1/in/pw_new" 873 | type: "ReLU" 874 | bottom: "conv3_1/in/pw_new" 875 | top: "conv3_1/in/pw_new" 876 | } 877 | 878 | # 3_1 dw 879 | layer { 880 | name: "conv3_1/dw_new" 881 | type: "ConvolutionDepthwise" 882 | bottom: "conv3_1/in/pw_new" 883 | top: "conv3_1/dw_new" 884 | param { 885 | lr_mult: 1 886 | decay_mult: 0 887 | } 888 | convolution_param { 889 | num_output: 192 890 | bias_term: false 891 | pad: 1 892 | kernel_size: 3 893 | engine: CAFFE 894 | stride: 2 895 | weight_filler { 896 | type: "msra" 897 | } 898 | } 899 | } 900 | layer { 901 | name: "conv3_1/dw/bn_new" 902 | type: "BatchNorm" 903 | bottom: "conv3_1/dw_new" 904 | top: "conv3_1/dw_new" 905 | param { 906 | lr_mult: 0 907 | decay_mult: 0 908 | } 909 | param { 910 | lr_mult: 0 911 | decay_mult: 0 912 | } 913 | param { 914 | lr_mult: 0 915 | decay_mult: 0 916 | } 917 | } 918 | layer { 919 | name: "conv3_1/dw/scale_new" 920 | type: "Scale" 921 | bottom: "conv3_1/dw_new" 922 | top: "conv3_1/dw_new" 923 | scale_param { 924 | filler { 925 | value: 1 926 | } 927 | bias_term: true 928 | bias_filler { 929 | value: 0 930 | } 931 | } 932 | } 933 | layer { 934 | name: "relu3_1/dw_new" 935 | type: "ReLU" 936 | bottom: "conv3_1/dw_new" 937 | top: "conv3_1/dw_new" 938 | } 939 | 940 | # 3_1 out 941 | layer { 942 | name: "conv3_1/out/pw_new" 943 | type: "Convolution" 944 | bottom: "conv3_1/dw_new" 945 | top: "conv3_1/out/pw_new" 946 | param { 947 | lr_mult: 1 948 | decay_mult: 1 949 | } 950 | convolution_param { 951 | num_output: 64 952 | bias_term: false 953 | pad: 0 954 | kernel_size: 1 955 | engine: CAFFE 956 | stride: 1 957 | weight_filler { 958 | type: "msra" 959 | } 960 | } 961 | } 962 | layer { 963 | name: "conv3_1/out/pw/bn_new" 964 | type: "BatchNorm" 965 | bottom: "conv3_1/out/pw_new" 966 | top: "conv3_1/out/pw_new" 967 | param { 968 | lr_mult: 0 969 | decay_mult: 0 970 | } 971 | param { 972 | lr_mult: 0 973 | decay_mult: 0 974 | } 975 | param { 976 | lr_mult: 0 977 | decay_mult: 0 978 | } 979 | } 980 | layer { 981 | name: "conv3_1/out/pw/scale_new" 982 | type: "Scale" 983 | bottom: "conv3_1/out/pw_new" 984 | top: "conv3_1/out/pw_new" 985 | scale_param { 986 | filler { 987 | value: 1 988 | } 989 | bias_term: true 990 | bias_filler { 991 | value: 0 992 | } 993 | } 994 | } 995 | 996 | # 3_2 in 997 | 998 | layer { 999 | name: "conv3_2/in/pw_new" 1000 | type: "Convolution" 1001 | bottom: "conv3_1/out/pw_new" 1002 | top: "conv3_2/in/pw_new" 1003 | param { 1004 | lr_mult: 1 1005 | decay_mult: 1 1006 | } 1007 | convolution_param { 1008 | num_output: 192 1009 | bias_term: false 1010 | pad: 0 1011 | kernel_size: 1 1012 | engine: CAFFE 1013 | stride: 1 1014 | weight_filler { 1015 | type: "msra" 1016 | } 1017 | } 1018 | } 1019 | layer { 1020 | name: "conv3_2/in/pw/bn_new" 1021 | type: "BatchNorm" 1022 | bottom: "conv3_2/in/pw_new" 1023 | top: "conv3_2/in/pw_new" 1024 | param { 1025 | lr_mult: 0 1026 | decay_mult: 0 1027 | } 1028 | param { 1029 | lr_mult: 0 1030 | decay_mult: 0 1031 | } 1032 | param { 1033 | lr_mult: 0 1034 | decay_mult: 0 1035 | } 1036 | } 1037 | layer { 1038 | name: "conv3_2/in/pw/scale_new" 1039 | type: "Scale" 1040 | bottom: "conv3_2/in/pw_new" 1041 | top: "conv3_2/in/pw_new" 1042 | scale_param { 1043 | filler { 1044 | value: 1 1045 | } 1046 | bias_term: true 1047 | bias_filler { 1048 | value: 0 1049 | } 1050 | } 1051 | } 1052 | layer { 1053 | name: "relu3_2/in/pw_new" 1054 | type: "ReLU" 1055 | bottom: "conv3_2/in/pw_new" 1056 | top: "conv3_2/in/pw_new" 1057 | } 1058 | 1059 | # 3_2 dw 1060 | layer { 1061 | name: "conv3_2/dw_new" 1062 | type: "ConvolutionDepthwise" 1063 | bottom: "conv3_2/in/pw_new" 1064 | top: "conv3_2/dw_new" 1065 | param { 1066 | lr_mult: 1 1067 | decay_mult: 0 1068 | } 1069 | convolution_param { 1070 | num_output: 192 1071 | bias_term: false 1072 | pad: 1 1073 | kernel_size: 3 1074 | engine: CAFFE 1075 | stride: 1 1076 | weight_filler { 1077 | type: "msra" 1078 | } 1079 | } 1080 | } 1081 | layer { 1082 | name: "conv3_2/dw/bn_new" 1083 | type: "BatchNorm" 1084 | bottom: "conv3_2/dw_new" 1085 | top: "conv3_2/dw_new" 1086 | param { 1087 | lr_mult: 0 1088 | decay_mult: 0 1089 | } 1090 | param { 1091 | lr_mult: 0 1092 | decay_mult: 0 1093 | } 1094 | param { 1095 | lr_mult: 0 1096 | decay_mult: 0 1097 | } 1098 | } 1099 | layer { 1100 | name: "conv3_2/dw/scale_new" 1101 | type: "Scale" 1102 | bottom: "conv3_2/dw_new" 1103 | top: "conv3_2/dw_new" 1104 | scale_param { 1105 | filler { 1106 | value: 1 1107 | } 1108 | bias_term: true 1109 | bias_filler { 1110 | value: 0 1111 | } 1112 | } 1113 | } 1114 | layer { 1115 | name: "relu3_2/dw_new" 1116 | type: "ReLU" 1117 | bottom: "conv3_2/dw_new" 1118 | top: "conv3_2/dw_new" 1119 | } 1120 | 1121 | 1122 | # 3_2 out 1123 | 1124 | layer { 1125 | name: "conv3_2/out/pw_new" 1126 | type: "Convolution" 1127 | bottom: "conv3_2/dw_new" 1128 | top: "conv3_2/out/pw_new" 1129 | param { 1130 | lr_mult: 1 1131 | decay_mult: 1 1132 | } 1133 | convolution_param { 1134 | num_output: 64 1135 | bias_term: false 1136 | pad: 0 1137 | kernel_size: 1 1138 | engine: CAFFE 1139 | stride: 1 1140 | weight_filler { 1141 | type: "msra" 1142 | } 1143 | } 1144 | } 1145 | layer { 1146 | name: "conv3_2/out/pw/bn_new" 1147 | type: "BatchNorm" 1148 | bottom: "conv3_2/out/pw_new" 1149 | top: "conv3_2/out/pw_new" 1150 | param { 1151 | lr_mult: 0 1152 | decay_mult: 0 1153 | } 1154 | param { 1155 | lr_mult: 0 1156 | decay_mult: 0 1157 | } 1158 | param { 1159 | lr_mult: 0 1160 | decay_mult: 0 1161 | } 1162 | } 1163 | layer { 1164 | name: "conv3_2/out/pw/scale_new" 1165 | type: "Scale" 1166 | bottom: "conv3_2/out/pw_new" 1167 | top: "conv3_2/out/pw_new" 1168 | scale_param { 1169 | filler { 1170 | value: 1 1171 | } 1172 | bias_term: true 1173 | bias_filler { 1174 | value: 0 1175 | } 1176 | } 1177 | } 1178 | layer { 1179 | name: "fuse_conv3_2" 1180 | type: "Eltwise" 1181 | bottom: "conv3_1/out/pw_new" 1182 | bottom: "conv3_2/out/pw_new" 1183 | top: "fuse_conv3_2" 1184 | eltwise_param { 1185 | operation: SUM 1186 | } 1187 | } 1188 | 1189 | 1190 | 1191 | 1192 | 1193 | 1194 | 1195 | #------------------------- fc1 1196 | layer { 1197 | name: "fc1" 1198 | type: "InnerProduct" 1199 | bottom: "fuse_conv3_2" 1200 | top: "fc1" 1201 | param { 1202 | lr_mult: 1 1203 | decay_mult: 1 1204 | } 1205 | param { 1206 | lr_mult: 2 1207 | decay_mult: 1 1208 | } 1209 | inner_product_param { 1210 | num_output: 256 1211 | weight_filler { 1212 | type: "gaussian" 1213 | std: 0.01 1214 | } 1215 | bias_filler { 1216 | type: "constant" 1217 | value: 0 1218 | } 1219 | } 1220 | } 1221 | layer { 1222 | name: "relu_fc1" 1223 | type: "ReLU" 1224 | bottom: "fc1" 1225 | top: "fc1" 1226 | } 1227 | layer { 1228 | name: "drop_fc1" 1229 | type: "Dropout" 1230 | bottom: "fc1" 1231 | top: "fc1" 1232 | dropout_param{ 1233 | dropout_ratio: 0.3 1234 | } 1235 | } 1236 | 1237 | #------------------------- fc2 1238 | layer { 1239 | name: "fc2" 1240 | type: "InnerProduct" 1241 | bottom: "fc1" 1242 | top: "fc2" 1243 | param { 1244 | lr_mult: 1 1245 | decay_mult: 1 1246 | } 1247 | param { 1248 | lr_mult: 2 1249 | decay_mult: 1 1250 | } 1251 | inner_product_param { 1252 | num_output: 10 1253 | weight_filler { 1254 | type: "gaussian" 1255 | std: 0.01 1256 | } 1257 | bias_filler { 1258 | type: "constant" 1259 | value: 0 1260 | } 1261 | } 1262 | } -------------------------------------------------------------------------------- /3_demo/Data/l1_net.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/l1_net.caffemodel -------------------------------------------------------------------------------- /3_demo/Data/l2_deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "level_2" 2 | input: "data" 3 | input_shape { dim: 1 dim: 3 dim: 48 dim: 48 } 4 | 5 | 6 | #------------------------------------------- conv1 input: 48*48*3 output: 24*24*16 7 | layer { 8 | name: "conv1_new" 9 | type: "Convolution" 10 | bottom: "data" 11 | top: "conv1_new" 12 | param { 13 | lr_mult: 1 14 | decay_mult: 1 15 | } 16 | convolution_param { 17 | num_output: 16 18 | bias_term: false 19 | pad: 1 20 | kernel_size: 3 21 | stride: 2 22 | weight_filler { 23 | type: "msra" 24 | } 25 | } 26 | } 27 | layer { 28 | name: "conv1/bn_new" 29 | type: "BatchNorm" 30 | bottom: "conv1_new" 31 | top: "conv1_new" 32 | param { 33 | lr_mult: 0 34 | decay_mult: 0 35 | } 36 | param { 37 | lr_mult: 0 38 | decay_mult: 0 39 | } 40 | param { 41 | lr_mult: 0 42 | decay_mult: 0 43 | } 44 | } 45 | layer { 46 | name: "conv1/scale_new" 47 | type: "Scale" 48 | bottom: "conv1_new" 49 | top: "conv1_new" 50 | scale_param { 51 | filler { 52 | value: 1 53 | } 54 | bias_term: true 55 | bias_filler { 56 | value: 0 57 | } 58 | } 59 | } 60 | layer { 61 | name: "relu1_new" 62 | type: "ReLU" 63 | bottom: "conv1_new" 64 | top: "conv1_new" 65 | } 66 | 67 | #----------- bottleneck_1 1_1(s=2) 1_2(s=1) input: 24*24*16 output: 12*12*24 68 | 69 | layer { 70 | name: "conv1_1/in/pw_new" 71 | type: "Convolution" 72 | bottom: "conv1_new" 73 | top: "conv1_1/in/pw_new" 74 | param { 75 | lr_mult: 1 76 | decay_mult: 1 77 | } 78 | convolution_param { 79 | num_output: 96 80 | bias_term: false 81 | pad: 0 82 | kernel_size: 1 83 | engine: CAFFE 84 | stride: 1 85 | weight_filler { 86 | type: "msra" 87 | } 88 | } 89 | } 90 | layer { 91 | name: "conv1_1/in/pw/bn_new" 92 | type: "BatchNorm" 93 | bottom: "conv1_1/in/pw_new" 94 | top: "conv1_1/in/pw_new" 95 | param { 96 | lr_mult: 0 97 | decay_mult: 0 98 | } 99 | param { 100 | lr_mult: 0 101 | decay_mult: 0 102 | } 103 | param { 104 | lr_mult: 0 105 | decay_mult: 0 106 | } 107 | } 108 | layer { 109 | name: "conv1_1/in/pw/scale_new" 110 | type: "Scale" 111 | bottom: "conv1_1/in/pw_new" 112 | top: "conv1_1/in/pw_new" 113 | scale_param { 114 | filler { 115 | value: 1 116 | } 117 | bias_term: true 118 | bias_filler { 119 | value: 0 120 | } 121 | } 122 | } 123 | layer { 124 | name: "relu1_1/in/pw_new" 125 | type: "ReLU" 126 | bottom: "conv1_1/in/pw_new" 127 | top: "conv1_1/in/pw_new" 128 | } 129 | 130 | 131 | 132 | # 1_1 dw conv 133 | layer { 134 | name: "conv1_1/dw_new" 135 | type: "ConvolutionDepthwise" 136 | bottom: "conv1_1/in/pw_new" 137 | top: "conv1_1/dw_new" 138 | param { 139 | lr_mult: 1 140 | decay_mult: 0 141 | } 142 | convolution_param { 143 | num_output: 96 144 | bias_term: false 145 | pad: 1 146 | kernel_size: 3 147 | engine: CAFFE 148 | stride: 2 149 | weight_filler { 150 | type: "msra" 151 | } 152 | } 153 | } 154 | layer { 155 | name: "conv1_1/dw/bn_new" 156 | type: "BatchNorm" 157 | bottom: "conv1_1/dw_new" 158 | top: "conv1_1/dw_new" 159 | param { 160 | lr_mult: 0 161 | decay_mult: 0 162 | } 163 | param { 164 | lr_mult: 0 165 | decay_mult: 0 166 | } 167 | param { 168 | lr_mult: 0 169 | decay_mult: 0 170 | } 171 | } 172 | layer { 173 | name: "conv1_1/dw/scale_new" 174 | type: "Scale" 175 | bottom: "conv1_1/dw_new" 176 | top: "conv1_1/dw_new" 177 | scale_param { 178 | filler { 179 | value: 1 180 | } 181 | bias_term: true 182 | bias_filler { 183 | value: 0 184 | } 185 | } 186 | } 187 | layer { 188 | name: "relu1_1/dw_new" 189 | type: "ReLU" 190 | bottom: "conv1_1/dw_new" 191 | top: "conv1_1/dw_new" 192 | } 193 | 194 | # 1_1 out 195 | layer { 196 | name: "conv1_1/out/pw_new" 197 | type: "Convolution" 198 | bottom: "conv1_1/dw_new" 199 | top: "conv1_1/out/pw_new" 200 | param { 201 | lr_mult: 1 202 | decay_mult: 1 203 | } 204 | convolution_param { 205 | num_output: 24 206 | bias_term: false 207 | pad: 0 208 | kernel_size: 1 209 | engine: CAFFE 210 | stride: 1 211 | weight_filler { 212 | type: "msra" 213 | } 214 | } 215 | } 216 | layer { 217 | name: "conv1_1/out/pw/bn_new" 218 | type: "BatchNorm" 219 | bottom: "conv1_1/out/pw_new" 220 | top: "conv1_1/out/pw_new" 221 | param { 222 | lr_mult: 0 223 | decay_mult: 0 224 | } 225 | param { 226 | lr_mult: 0 227 | decay_mult: 0 228 | } 229 | param { 230 | lr_mult: 0 231 | decay_mult: 0 232 | } 233 | } 234 | layer { 235 | name: "conv1_1/out/pw/scale_new" 236 | type: "Scale" 237 | bottom: "conv1_1/out/pw_new" 238 | top: "conv1_1/out/pw_new" 239 | scale_param { 240 | filler { 241 | value: 1 242 | } 243 | bias_term: true 244 | bias_filler { 245 | value: 0 246 | } 247 | } 248 | } 249 | # 1_2 in 250 | 251 | layer { 252 | name: "conv1_2/in/pw_new" 253 | type: "Convolution" 254 | bottom: "conv1_1/out/pw_new" 255 | top: "conv1_2/in/pw_new" 256 | param { 257 | lr_mult: 1 258 | decay_mult: 1 259 | } 260 | convolution_param { 261 | num_output: 144 262 | bias_term: false 263 | pad: 0 264 | kernel_size: 1 265 | engine: CAFFE 266 | stride: 1 267 | weight_filler { 268 | type: "msra" 269 | } 270 | } 271 | } 272 | layer { 273 | name: "conv1_2/in/pw/bn_new" 274 | type: "BatchNorm" 275 | bottom: "conv1_2/in/pw_new" 276 | top: "conv1_2/in/pw_new" 277 | param { 278 | lr_mult: 0 279 | decay_mult: 0 280 | } 281 | param { 282 | lr_mult: 0 283 | decay_mult: 0 284 | } 285 | param { 286 | lr_mult: 0 287 | decay_mult: 0 288 | } 289 | } 290 | layer { 291 | name: "conv1_2/in/pw/scale_new" 292 | type: "Scale" 293 | bottom: "conv1_2/in/pw_new" 294 | top: "conv1_2/in/pw_new" 295 | scale_param { 296 | filler { 297 | value: 1 298 | } 299 | bias_term: true 300 | bias_filler { 301 | value: 0 302 | } 303 | } 304 | } 305 | layer { 306 | name: "relu1_2/in/pw_new" 307 | type: "ReLU" 308 | bottom: "conv1_2/in/pw_new" 309 | top: "conv1_2/in/pw_new" 310 | } 311 | 312 | # 1_2 dw 313 | 314 | layer { 315 | name: "conv1_2/dw_new" 316 | type: "ConvolutionDepthwise" 317 | bottom: "conv1_2/in/pw_new" 318 | top: "conv1_2/dw_new" 319 | param { 320 | lr_mult: 1 321 | decay_mult: 0 322 | } 323 | convolution_param { 324 | num_output: 144 325 | bias_term: false 326 | pad: 1 327 | kernel_size: 3 328 | engine: CAFFE 329 | stride: 1 330 | weight_filler { 331 | type: "msra" 332 | } 333 | } 334 | } 335 | layer { 336 | name: "conv1_2/dw/bn_new" 337 | type: "BatchNorm" 338 | bottom: "conv1_2/dw_new" 339 | top: "conv1_2/dw_new" 340 | param { 341 | lr_mult: 0 342 | decay_mult: 0 343 | } 344 | param { 345 | lr_mult: 0 346 | decay_mult: 0 347 | } 348 | param { 349 | lr_mult: 0 350 | decay_mult: 0 351 | } 352 | } 353 | layer { 354 | name: "conv1_2/dw/scale_new" 355 | type: "Scale" 356 | bottom: "conv1_2/dw_new" 357 | top: "conv1_2/dw_new" 358 | scale_param { 359 | filler { 360 | value: 1 361 | } 362 | bias_term: true 363 | bias_filler { 364 | value: 0 365 | } 366 | } 367 | } 368 | layer { 369 | name: "relu1_2/dw_new" 370 | type: "ReLU" 371 | bottom: "conv1_2/dw_new" 372 | top: "conv1_2/dw_new" 373 | } 374 | 375 | # 1_2 out 12*12*24 376 | layer { 377 | name: "conv1_2/out/pw_new" 378 | type: "Convolution" 379 | bottom: "conv1_2/dw_new" 380 | top: "conv1_2/out/pw_new" 381 | param { 382 | lr_mult: 1 383 | decay_mult: 1 384 | } 385 | convolution_param { 386 | num_output: 24 387 | bias_term: false 388 | pad: 0 389 | kernel_size: 1 390 | engine: CAFFE 391 | stride: 1 392 | weight_filler { 393 | type: "msra" 394 | } 395 | } 396 | } 397 | layer { 398 | name: "conv1_2/out/pw/bn_new" 399 | type: "BatchNorm" 400 | bottom: "conv1_2/out/pw_new" 401 | top: "conv1_2/out/pw_new" 402 | param { 403 | lr_mult: 0 404 | decay_mult: 0 405 | } 406 | param { 407 | lr_mult: 0 408 | decay_mult: 0 409 | } 410 | param { 411 | lr_mult: 0 412 | decay_mult: 0 413 | } 414 | } 415 | layer { 416 | name: "conv1_2/out/pw/scale_new" 417 | type: "Scale" 418 | bottom: "conv1_2/out/pw_new" 419 | top: "conv1_2/out/pw_new" 420 | scale_param { 421 | filler { 422 | value: 1 423 | } 424 | bias_term: true 425 | bias_filler { 426 | value: 0 427 | } 428 | } 429 | } 430 | layer { 431 | name: "fuse_conv1_2" 432 | type: "Eltwise" 433 | bottom: "conv1_1/out/pw_new" 434 | bottom: "conv1_2/out/pw_new" 435 | top: "fuse_conv1_2" 436 | eltwise_param { 437 | operation: SUM 438 | } 439 | } 440 | 441 | #--------------------------bottleneck2 2_1(s=2) 2_2(s=1) input: 12*12*24 output: 6*6*32 442 | # 2_1 in 443 | layer { 444 | name: "conv2_1/in/pw_new" 445 | type: "Convolution" 446 | bottom: "fuse_conv1_2" 447 | top: "conv2_1/in/pw_new" 448 | param { 449 | lr_mult: 1 450 | decay_mult: 1 451 | } 452 | convolution_param { 453 | num_output: 144 454 | bias_term: false 455 | pad: 0 456 | kernel_size: 1 457 | engine: CAFFE 458 | stride: 1 459 | weight_filler { 460 | type: "msra" 461 | } 462 | } 463 | } 464 | layer { 465 | name: "conv2_1/in/pw/bn_new" 466 | type: "BatchNorm" 467 | bottom: "conv2_1/in/pw_new" 468 | top: "conv2_1/in/pw_new" 469 | param { 470 | lr_mult: 0 471 | decay_mult: 0 472 | } 473 | param { 474 | lr_mult: 0 475 | decay_mult: 0 476 | } 477 | param { 478 | lr_mult: 0 479 | decay_mult: 0 480 | } 481 | } 482 | layer { 483 | name: "conv2_1/in/pw/scale_new" 484 | type: "Scale" 485 | bottom: "conv2_1/in/pw_new" 486 | top: "conv2_1/in/pw_new" 487 | scale_param { 488 | filler { 489 | value: 1 490 | } 491 | bias_term: true 492 | bias_filler { 493 | value: 0 494 | } 495 | } 496 | } 497 | layer { 498 | name: "relu2_1/in/pw_new" 499 | type: "ReLU" 500 | bottom: "conv2_1/in/pw_new" 501 | top: "conv2_1/in/pw_new" 502 | } 503 | 504 | # 2_1 dw 505 | layer { 506 | name: "conv2_1/dw_new" 507 | type: "ConvolutionDepthwise" 508 | bottom: "conv2_1/in/pw_new" 509 | top: "conv2_1/dw_new" 510 | param { 511 | lr_mult: 1 512 | decay_mult: 0 513 | } 514 | convolution_param { 515 | num_output: 144 516 | bias_term: false 517 | pad: 1 518 | kernel_size: 3 519 | engine: CAFFE 520 | stride: 2 521 | weight_filler { 522 | type: "msra" 523 | } 524 | } 525 | } 526 | layer { 527 | name: "conv2_1/dw/bn_new" 528 | type: "BatchNorm" 529 | bottom: "conv2_1/dw_new" 530 | top: "conv2_1/dw_new" 531 | param { 532 | lr_mult: 0 533 | decay_mult: 0 534 | } 535 | param { 536 | lr_mult: 0 537 | decay_mult: 0 538 | } 539 | param { 540 | lr_mult: 0 541 | decay_mult: 0 542 | } 543 | } 544 | layer { 545 | name: "conv2_1/dw/scale_new" 546 | type: "Scale" 547 | bottom: "conv2_1/dw_new" 548 | top: "conv2_1/dw_new" 549 | scale_param { 550 | filler { 551 | value: 1 552 | } 553 | bias_term: true 554 | bias_filler { 555 | value: 0 556 | } 557 | } 558 | } 559 | layer { 560 | name: "relu2_1/dw_new" 561 | type: "ReLU" 562 | bottom: "conv2_1/dw_new" 563 | top: "conv2_1/dw_new" 564 | } 565 | 566 | # 2_1 out 567 | layer { 568 | name: "conv2_1/out/pw_new" 569 | type: "Convolution" 570 | bottom: "conv2_1/dw_new" 571 | top: "conv2_1/out/pw_new" 572 | param { 573 | lr_mult: 1 574 | decay_mult: 1 575 | } 576 | convolution_param { 577 | num_output: 32 578 | bias_term: false 579 | pad: 0 580 | kernel_size: 1 581 | engine: CAFFE 582 | stride: 1 583 | weight_filler { 584 | type: "msra" 585 | } 586 | } 587 | } 588 | layer { 589 | name: "conv2_1/out/pw/bn_new" 590 | type: "BatchNorm" 591 | bottom: "conv2_1/out/pw_new" 592 | top: "conv2_1/out/pw_new" 593 | param { 594 | lr_mult: 0 595 | decay_mult: 0 596 | } 597 | param { 598 | lr_mult: 0 599 | decay_mult: 0 600 | } 601 | param { 602 | lr_mult: 0 603 | decay_mult: 0 604 | } 605 | } 606 | layer { 607 | name: "conv2_1/out/pw/scale_new" 608 | type: "Scale" 609 | bottom: "conv2_1/out/pw_new" 610 | top: "conv2_1/out/pw_new" 611 | scale_param { 612 | filler { 613 | value: 1 614 | } 615 | bias_term: true 616 | bias_filler { 617 | value: 0 618 | } 619 | } 620 | } 621 | 622 | # 2_2 in 623 | 624 | layer { 625 | name: "conv2_2/in/pw_new" 626 | type: "Convolution" 627 | bottom: "conv2_1/out/pw_new" 628 | top: "conv2_2/in/pw_new" 629 | param { 630 | lr_mult: 1 631 | decay_mult: 1 632 | } 633 | convolution_param { 634 | num_output: 192 635 | bias_term: false 636 | pad: 0 637 | kernel_size: 1 638 | engine: CAFFE 639 | stride: 1 640 | weight_filler { 641 | type: "msra" 642 | } 643 | } 644 | } 645 | layer { 646 | name: "conv2_2/in/pw/bn_new" 647 | type: "BatchNorm" 648 | bottom: "conv2_2/in/pw_new" 649 | top: "conv2_2/in/pw_new" 650 | param { 651 | lr_mult: 0 652 | decay_mult: 0 653 | } 654 | param { 655 | lr_mult: 0 656 | decay_mult: 0 657 | } 658 | param { 659 | lr_mult: 0 660 | decay_mult: 0 661 | } 662 | } 663 | layer { 664 | name: "conv2_2/in/pw/scale_new" 665 | type: "Scale" 666 | bottom: "conv2_2/in/pw_new" 667 | top: "conv2_2/in/pw_new" 668 | scale_param { 669 | filler { 670 | value: 1 671 | } 672 | bias_term: true 673 | bias_filler { 674 | value: 0 675 | } 676 | } 677 | } 678 | layer { 679 | name: "relu2_2/in/pw_new" 680 | type: "ReLU" 681 | bottom: "conv2_2/in/pw_new" 682 | top: "conv2_2/in/pw_new" 683 | } 684 | 685 | # 2_2 dw 686 | layer { 687 | name: "conv2_2/dw_new" 688 | type: "ConvolutionDepthwise" 689 | bottom: "conv2_2/in/pw_new" 690 | top: "conv2_2/dw_new" 691 | param { 692 | lr_mult: 1 693 | decay_mult: 0 694 | } 695 | convolution_param { 696 | num_output: 192 697 | bias_term: false 698 | pad: 1 699 | kernel_size: 3 700 | engine: CAFFE 701 | stride: 1 702 | weight_filler { 703 | type: "msra" 704 | } 705 | } 706 | } 707 | layer { 708 | name: "conv2_2/dw/bn_new" 709 | type: "BatchNorm" 710 | bottom: "conv2_2/dw_new" 711 | top: "conv2_2/dw_new" 712 | param { 713 | lr_mult: 0 714 | decay_mult: 0 715 | } 716 | param { 717 | lr_mult: 0 718 | decay_mult: 0 719 | } 720 | param { 721 | lr_mult: 0 722 | decay_mult: 0 723 | } 724 | } 725 | layer { 726 | name: "conv2_2/dw/scale_new" 727 | type: "Scale" 728 | bottom: "conv2_2/dw_new" 729 | top: "conv2_2/dw_new" 730 | scale_param { 731 | filler { 732 | value: 1 733 | } 734 | bias_term: true 735 | bias_filler { 736 | value: 0 737 | } 738 | } 739 | } 740 | layer { 741 | name: "relu2_2/dw_new" 742 | type: "ReLU" 743 | bottom: "conv2_2/dw_new" 744 | top: "conv2_2/dw_new" 745 | } 746 | 747 | 748 | # 2_2 out 749 | 750 | layer { 751 | name: "conv2_2/out/pw_new" 752 | type: "Convolution" 753 | bottom: "conv2_2/dw_new" 754 | top: "conv2_2/out/pw_new" 755 | param { 756 | lr_mult: 1 757 | decay_mult: 1 758 | } 759 | convolution_param { 760 | num_output: 32 761 | bias_term: false 762 | pad: 0 763 | kernel_size: 1 764 | engine: CAFFE 765 | stride: 1 766 | weight_filler { 767 | type: "msra" 768 | } 769 | } 770 | } 771 | layer { 772 | name: "conv2_2/out/pw/bn_new" 773 | type: "BatchNorm" 774 | bottom: "conv2_2/out/pw_new" 775 | top: "conv2_2/out/pw_new" 776 | param { 777 | lr_mult: 0 778 | decay_mult: 0 779 | } 780 | param { 781 | lr_mult: 0 782 | decay_mult: 0 783 | } 784 | param { 785 | lr_mult: 0 786 | decay_mult: 0 787 | } 788 | } 789 | layer { 790 | name: "conv2_2/out/pw/scale_new" 791 | type: "Scale" 792 | bottom: "conv2_2/out/pw_new" 793 | top: "conv2_2/out/pw_new" 794 | scale_param { 795 | filler { 796 | value: 1 797 | } 798 | bias_term: true 799 | bias_filler { 800 | value: 0 801 | } 802 | } 803 | } 804 | layer { 805 | name: "fuse_conv2_2" 806 | type: "Eltwise" 807 | bottom: "conv2_1/out/pw_new" 808 | bottom: "conv2_2/out/pw_new" 809 | top: "fuse_conv2_2" 810 | eltwise_param { 811 | operation: SUM 812 | } 813 | } 814 | 815 | #--------------------------bottleneck3 3_1(s=2) 3_2(s=1) input: 6*6*32 output: 3*3*64 816 | # 3_1 in 817 | layer { 818 | name: "conv3_1/in/pw_new" 819 | type: "Convolution" 820 | bottom: "fuse_conv2_2" 821 | top: "conv3_1/in/pw_new" 822 | param { 823 | lr_mult: 1 824 | decay_mult: 1 825 | } 826 | convolution_param { 827 | num_output: 192 828 | bias_term: false 829 | pad: 0 830 | kernel_size: 1 831 | engine: CAFFE 832 | stride: 1 833 | weight_filler { 834 | type: "msra" 835 | } 836 | } 837 | } 838 | layer { 839 | name: "conv3_1/in/pw/bn_new" 840 | type: "BatchNorm" 841 | bottom: "conv3_1/in/pw_new" 842 | top: "conv3_1/in/pw_new" 843 | param { 844 | lr_mult: 0 845 | decay_mult: 0 846 | } 847 | param { 848 | lr_mult: 0 849 | decay_mult: 0 850 | } 851 | param { 852 | lr_mult: 0 853 | decay_mult: 0 854 | } 855 | } 856 | layer { 857 | name: "conv3_1/in/pw/scale_new" 858 | type: "Scale" 859 | bottom: "conv3_1/in/pw_new" 860 | top: "conv3_1/in/pw_new" 861 | scale_param { 862 | filler { 863 | value: 1 864 | } 865 | bias_term: true 866 | bias_filler { 867 | value: 0 868 | } 869 | } 870 | } 871 | layer { 872 | name: "relu3_1/in/pw_new" 873 | type: "ReLU" 874 | bottom: "conv3_1/in/pw_new" 875 | top: "conv3_1/in/pw_new" 876 | } 877 | 878 | # 3_1 dw 879 | layer { 880 | name: "conv3_1/dw_new" 881 | type: "ConvolutionDepthwise" 882 | bottom: "conv3_1/in/pw_new" 883 | top: "conv3_1/dw_new" 884 | param { 885 | lr_mult: 1 886 | decay_mult: 0 887 | } 888 | convolution_param { 889 | num_output: 192 890 | bias_term: false 891 | pad: 1 892 | kernel_size: 3 893 | engine: CAFFE 894 | stride: 2 895 | weight_filler { 896 | type: "msra" 897 | } 898 | } 899 | } 900 | layer { 901 | name: "conv3_1/dw/bn_new" 902 | type: "BatchNorm" 903 | bottom: "conv3_1/dw_new" 904 | top: "conv3_1/dw_new" 905 | param { 906 | lr_mult: 0 907 | decay_mult: 0 908 | } 909 | param { 910 | lr_mult: 0 911 | decay_mult: 0 912 | } 913 | param { 914 | lr_mult: 0 915 | decay_mult: 0 916 | } 917 | } 918 | layer { 919 | name: "conv3_1/dw/scale_new" 920 | type: "Scale" 921 | bottom: "conv3_1/dw_new" 922 | top: "conv3_1/dw_new" 923 | scale_param { 924 | filler { 925 | value: 1 926 | } 927 | bias_term: true 928 | bias_filler { 929 | value: 0 930 | } 931 | } 932 | } 933 | layer { 934 | name: "relu3_1/dw_new" 935 | type: "ReLU" 936 | bottom: "conv3_1/dw_new" 937 | top: "conv3_1/dw_new" 938 | } 939 | 940 | # 3_1 out 941 | layer { 942 | name: "conv3_1/out/pw_new" 943 | type: "Convolution" 944 | bottom: "conv3_1/dw_new" 945 | top: "conv3_1/out/pw_new" 946 | param { 947 | lr_mult: 1 948 | decay_mult: 1 949 | } 950 | convolution_param { 951 | num_output: 64 952 | bias_term: false 953 | pad: 0 954 | kernel_size: 1 955 | engine: CAFFE 956 | stride: 1 957 | weight_filler { 958 | type: "msra" 959 | } 960 | } 961 | } 962 | layer { 963 | name: "conv3_1/out/pw/bn_new" 964 | type: "BatchNorm" 965 | bottom: "conv3_1/out/pw_new" 966 | top: "conv3_1/out/pw_new" 967 | param { 968 | lr_mult: 0 969 | decay_mult: 0 970 | } 971 | param { 972 | lr_mult: 0 973 | decay_mult: 0 974 | } 975 | param { 976 | lr_mult: 0 977 | decay_mult: 0 978 | } 979 | } 980 | layer { 981 | name: "conv3_1/out/pw/scale_new" 982 | type: "Scale" 983 | bottom: "conv3_1/out/pw_new" 984 | top: "conv3_1/out/pw_new" 985 | scale_param { 986 | filler { 987 | value: 1 988 | } 989 | bias_term: true 990 | bias_filler { 991 | value: 0 992 | } 993 | } 994 | } 995 | 996 | # 3_2 in 997 | 998 | layer { 999 | name: "conv3_2/in/pw_new" 1000 | type: "Convolution" 1001 | bottom: "conv3_1/out/pw_new" 1002 | top: "conv3_2/in/pw_new" 1003 | param { 1004 | lr_mult: 1 1005 | decay_mult: 1 1006 | } 1007 | convolution_param { 1008 | num_output: 192 1009 | bias_term: false 1010 | pad: 0 1011 | kernel_size: 1 1012 | engine: CAFFE 1013 | stride: 1 1014 | weight_filler { 1015 | type: "msra" 1016 | } 1017 | } 1018 | } 1019 | layer { 1020 | name: "conv3_2/in/pw/bn_new" 1021 | type: "BatchNorm" 1022 | bottom: "conv3_2/in/pw_new" 1023 | top: "conv3_2/in/pw_new" 1024 | param { 1025 | lr_mult: 0 1026 | decay_mult: 0 1027 | } 1028 | param { 1029 | lr_mult: 0 1030 | decay_mult: 0 1031 | } 1032 | param { 1033 | lr_mult: 0 1034 | decay_mult: 0 1035 | } 1036 | } 1037 | layer { 1038 | name: "conv3_2/in/pw/scale_new" 1039 | type: "Scale" 1040 | bottom: "conv3_2/in/pw_new" 1041 | top: "conv3_2/in/pw_new" 1042 | scale_param { 1043 | filler { 1044 | value: 1 1045 | } 1046 | bias_term: true 1047 | bias_filler { 1048 | value: 0 1049 | } 1050 | } 1051 | } 1052 | layer { 1053 | name: "relu3_2/in/pw_new" 1054 | type: "ReLU" 1055 | bottom: "conv3_2/in/pw_new" 1056 | top: "conv3_2/in/pw_new" 1057 | } 1058 | 1059 | # 3_2 dw 1060 | layer { 1061 | name: "conv3_2/dw_new" 1062 | type: "ConvolutionDepthwise" 1063 | bottom: "conv3_2/in/pw_new" 1064 | top: "conv3_2/dw_new" 1065 | param { 1066 | lr_mult: 1 1067 | decay_mult: 0 1068 | } 1069 | convolution_param { 1070 | num_output: 192 1071 | bias_term: false 1072 | pad: 1 1073 | kernel_size: 3 1074 | engine: CAFFE 1075 | stride: 1 1076 | weight_filler { 1077 | type: "msra" 1078 | } 1079 | } 1080 | } 1081 | layer { 1082 | name: "conv3_2/dw/bn_new" 1083 | type: "BatchNorm" 1084 | bottom: "conv3_2/dw_new" 1085 | top: "conv3_2/dw_new" 1086 | param { 1087 | lr_mult: 0 1088 | decay_mult: 0 1089 | } 1090 | param { 1091 | lr_mult: 0 1092 | decay_mult: 0 1093 | } 1094 | param { 1095 | lr_mult: 0 1096 | decay_mult: 0 1097 | } 1098 | } 1099 | layer { 1100 | name: "conv3_2/dw/scale_new" 1101 | type: "Scale" 1102 | bottom: "conv3_2/dw_new" 1103 | top: "conv3_2/dw_new" 1104 | scale_param { 1105 | filler { 1106 | value: 1 1107 | } 1108 | bias_term: true 1109 | bias_filler { 1110 | value: 0 1111 | } 1112 | } 1113 | } 1114 | layer { 1115 | name: "relu3_2/dw_new" 1116 | type: "ReLU" 1117 | bottom: "conv3_2/dw_new" 1118 | top: "conv3_2/dw_new" 1119 | } 1120 | 1121 | 1122 | # 3_2 out 1123 | 1124 | layer { 1125 | name: "conv3_2/out/pw_new" 1126 | type: "Convolution" 1127 | bottom: "conv3_2/dw_new" 1128 | top: "conv3_2/out/pw_new" 1129 | param { 1130 | lr_mult: 1 1131 | decay_mult: 1 1132 | } 1133 | convolution_param { 1134 | num_output: 64 1135 | bias_term: false 1136 | pad: 0 1137 | kernel_size: 1 1138 | engine: CAFFE 1139 | stride: 1 1140 | weight_filler { 1141 | type: "msra" 1142 | } 1143 | } 1144 | } 1145 | layer { 1146 | name: "conv3_2/out/pw/bn_new" 1147 | type: "BatchNorm" 1148 | bottom: "conv3_2/out/pw_new" 1149 | top: "conv3_2/out/pw_new" 1150 | param { 1151 | lr_mult: 0 1152 | decay_mult: 0 1153 | } 1154 | param { 1155 | lr_mult: 0 1156 | decay_mult: 0 1157 | } 1158 | param { 1159 | lr_mult: 0 1160 | decay_mult: 0 1161 | } 1162 | } 1163 | layer { 1164 | name: "conv3_2/out/pw/scale_new" 1165 | type: "Scale" 1166 | bottom: "conv3_2/out/pw_new" 1167 | top: "conv3_2/out/pw_new" 1168 | scale_param { 1169 | filler { 1170 | value: 1 1171 | } 1172 | bias_term: true 1173 | bias_filler { 1174 | value: 0 1175 | } 1176 | } 1177 | } 1178 | layer { 1179 | name: "fuse_conv3_2" 1180 | type: "Eltwise" 1181 | bottom: "conv3_1/out/pw_new" 1182 | bottom: "conv3_2/out/pw_new" 1183 | top: "fuse_conv3_2" 1184 | eltwise_param { 1185 | operation: SUM 1186 | } 1187 | } 1188 | 1189 | 1190 | 1191 | 1192 | 1193 | 1194 | 1195 | #------------------------- fc1 1196 | layer { 1197 | name: "fc1" 1198 | type: "InnerProduct" 1199 | bottom: "fuse_conv3_2" 1200 | top: "fc1" 1201 | param { 1202 | lr_mult: 1 1203 | decay_mult: 1 1204 | } 1205 | param { 1206 | lr_mult: 2 1207 | decay_mult: 1 1208 | } 1209 | inner_product_param { 1210 | num_output: 256 1211 | weight_filler { 1212 | type: "gaussian" 1213 | std: 0.01 1214 | } 1215 | bias_filler { 1216 | type: "constant" 1217 | value: 0 1218 | } 1219 | } 1220 | } 1221 | layer { 1222 | name: "relu_fc1" 1223 | type: "ReLU" 1224 | bottom: "fc1" 1225 | top: "fc1" 1226 | } 1227 | layer { 1228 | name: "drop_fc1" 1229 | type: "Dropout" 1230 | bottom: "fc1" 1231 | top: "fc1" 1232 | dropout_param{ 1233 | dropout_ratio: 0.3 1234 | } 1235 | } 1236 | 1237 | #------------------------- fc2 1238 | layer { 1239 | name: "fc2" 1240 | type: "InnerProduct" 1241 | bottom: "fc1" 1242 | top: "fc2" 1243 | param { 1244 | lr_mult: 1 1245 | decay_mult: 1 1246 | } 1247 | param { 1248 | lr_mult: 2 1249 | decay_mult: 1 1250 | } 1251 | inner_product_param { 1252 | num_output: 10 1253 | weight_filler { 1254 | type: "gaussian" 1255 | std: 0.01 1256 | } 1257 | bias_filler { 1258 | type: "constant" 1259 | value: 0 1260 | } 1261 | } 1262 | } -------------------------------------------------------------------------------- /3_demo/Data/l2_net.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/3_demo/Data/l2_net.caffemodel -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cascaded_mobilenet-v2 2 | cascaded convolutional neural network for facial point detection 3 | 4 | `详细步骤讲解请参见博客:http://blog.csdn.net/u011995719/article/details/79435615` 5 | 6 | # 1.简介 7 | 本实验在caffe下,采用级联MobileNet-V2进行人脸关键点(5点)检测,单模型仅 956 KB,GTX1080上运行为6ms左右(可在移动端达到实时检测) 8 | 9 | 本实验采用两级MobileNet-V2进行,两级的MobileNet-V2采用相同的网络结构(因为懒),结构如下: 10 | 11 | | Input | Operator | t |c | n | s | 12 | | :--------:| :--------:| :--: |:--------:| :--------:| :--: | 13 | | 48x48x3 | conv2d | - | 16 | 1 | 2 | 14 | | 24x24x16 | bottleneck | 6 | 24 | 2 | 2 | 15 | | 12x12x24 | bottleneck | 6 | 32 | 2 | 2 | 16 | | 6x6x32 | bottleneck | 6 | 64 | 2 | 2 | 17 | | 3x3x64 | fc | - | 256 | - | - | 18 | | 1x1x256 | fc | -   | 10 | - | -   | 19 | 20 | t表示“扩张”倍数,c表示输出通道数,n表示重复次数,s表示步长stride
21 | (MobileNet-v2 原文: https://arxiv.org/abs/1801.04381)
22 | (可参考博客:http://blog.csdn.net/u011995719/article/details/79135818)
23 |
24 | 25 | 基本流程为,level_1负责初步检测,依据level_1得到的关键点,对原始图片进行裁剪,将裁剪后的图片输入到level_2,从而达到从粗到精的定位。
26 | ## level_1 流程为: 27 | ![image](https://github.com/tensor-yu/cascaded_mobilenet-v2/blob/master/readme_img/l1.PNG) 28 | 29 | ## level_2 流程为 30 | ![image](https://github.com/tensor-yu/cascaded_mobilenet-v2/blob/master/readme_img/l2.PNG) 31 | 32 | 面部放大,绿色点为landmark,红色为level_1检测到的点,蓝色为level_2检测到的点,可以看出蓝色点更靠近绿色点 33 | 34 | ![image](https://github.com/tensor-yu/cascaded_mobilenet-v2/blob/master/readme_img/ccnntexie.PNG) 35 | 36 | 37 | 38 | 本实验初步验证MobileNet-V2的有效性以及级联CNN进行人脸关键点检测的有效性 39 | 40 | 数据来源:采用CelebA数据集,共计202599张图片,每张图片含5个关键点 41 | 官网:http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html 42 | 百度网盘下载:https://pan.baidu.com/s/1eSNpdRG#list/path=%2F 43 | 44 | 实验结果:请直接看demo跑出来的图片。由于CelebA的图片较为复杂,并且本实验不需要采用人脸检测,因此无法与之前实验进行比较 45 | 46 | # 2.运行demo 47 | 48 | ## (1) 修改 caffe源码 49 | 本实验基于MobileNet-V2,因此需要给caffe添加新的layer,即depth-wise convolution,并且需要修改image_data_layer,使得其支持多标签输入 50 | (感谢 hpp,cpp,cu,prototxt提供者:suzhenghang
git地址:https://github.com/suzhenghang/MobileNetv2/tree/master/.gitignore) 51 | 52 | 步骤,进入caffe_need/文件夹下, 53 | 54 | 1. 将image_data_layer.hpp 替换掉 ***caffe_path***/include/caffe/layers 下的 image_data_layer.hpp 55 | 2. 将conv_dw_layer.hpp 复制到 ***caffe_path***/include/caffe/layers 下 56 | 3. 将image_data_layer.cpp 替换掉 ***caffe_path***/src/caffe/layers 下的image_data_layer.cpp 57 | 4. 将conv_dw_layer.cu 58 | conv_dw_layer.cpp 复制到 ***caffe_path***/src/caffe/layers 下 59 | 重新编译,并且配置python接口 60 | 61 | 62 | ## (2) 进入文件夹3_demo 63 | 进入 3_demo/Code/,打开 inference , 更改你的caffe所在路径
64 | sys.path.append('/home/xxx your caffe xxx/python')
65 | sys.path.append('/home/xxx your caffe xxx/python/caffe')
66 | 然后运行 sudo python inference.py, 检测出的图片保存在 3_demo/Result/draw_img/ 下 67 | 68 | # 3.复现训练过程 69 | 简单介绍训练步骤,总共分三阶段,分别是 0_raw_data, 1_level_1, 2_level_2 70 | 71 | ## 第一阶段,数据准备阶段: 0_raw_data 72 | 1. 从百度网盘下载好CelebA数据集,将CelebA\Img\img_celeba 复制到 0_raw_data/Data/ 下面,将CelebA\Anno\list_landmarks_celeba.txt复制到 0_raw_data/Data/ 并且重命名为celeba_label.txt
73 | 2. 进入0_raw_data/, 运行divide_tr_te.py,将会划分好训练集,测试集,并且保存在0_raw_data/Data/ 下面
74 | 3. 运行 draw_point.py,将会在 0_raw_data/Result/draw_img/下获得 打上关键点的图片,用来检查图片以及标签是否正确 75 | 76 | 77 | ## 第二阶段, 训练level_1: 1_level_1 78 | 79 | 进入 1_level_1/Code/,依次执行 0_gen_data, 1_draw_img, 2_train, 3_inference, 4_evaluate, 5_crop_img 80 | 0_gen_data,主要是对图片进行resize,并且转换label,训练时的label是[-1,1]的 81 | 1_draw_img,用来检查图片以及标签是否正确 82 | 2_train,训练的solver等 83 | 3_inference,训练完毕,用训练好的caffemodel进行inference,将inference得到的标签 输出到 1_level_1/Result/下,用于评估和裁剪图片 84 | 4_evaluate,计算误差 85 | 5_crop_img, 采用level_1的输出标签 对原始图片进行裁剪,获得level_2的输入图片,并且制作level_2的标签 86 | 87 | 88 | ## 第三阶段,训练level_2: 2_level_2 89 | 由于 1_level_1/Code/5_crop_img 已经生成了 level_2所需的数据,并且打上关键点,供检查,因此 level_2直接从train开始
90 | 0_train, 同level_1
91 | 1_inference, 同level_1
92 | 2_evaluate,同level_1
93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /caffe_need/conv_dw_layer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "caffe/filler.hpp" 4 | #include "caffe/layers/conv_dw_layer.hpp" 5 | 6 | namespace caffe { 7 | 8 | template 9 | void ConvolutionDepthwiseLayer::LayerSetUp(const vector*>& bottom, 10 | const vector*>& top) { 11 | ConvolutionParameter conv_param = this->layer_param_.convolution_param(); 12 | if (conv_param.has_kernel_h() && conv_param.has_kernel_w()) { 13 | kernel_h_ = conv_param.kernel_h(); 14 | kernel_w_ = conv_param.kernel_w(); 15 | } else { 16 | if (conv_param.kernel_size_size() == 1) 17 | { 18 | kernel_h_ = conv_param.kernel_size(0); 19 | kernel_w_ = conv_param.kernel_size(0); 20 | } 21 | else 22 | { 23 | kernel_h_ = conv_param.kernel_size(0); 24 | kernel_w_ = conv_param.kernel_size(1); 25 | } 26 | } 27 | if (conv_param.has_stride_h() && conv_param.has_stride_w()) { 28 | stride_h_ = conv_param.stride_h(); 29 | stride_w_ = conv_param.stride_w(); 30 | } else { 31 | if (conv_param.stride_size() == 1) 32 | { 33 | stride_h_ = conv_param.stride(0); 34 | stride_w_ = conv_param.stride(0); 35 | } 36 | else 37 | { 38 | stride_h_ = conv_param.stride(0); 39 | stride_w_ = conv_param.stride(1); 40 | } 41 | } 42 | if (conv_param.has_pad_h() && conv_param.has_pad_w()) { 43 | pad_h_ = conv_param.pad_h(); 44 | pad_w_ = conv_param.pad_w(); 45 | } else { 46 | if (conv_param.pad_size() == 1) 47 | { 48 | pad_h_ = conv_param.pad(0); 49 | pad_w_ = conv_param.pad(0); 50 | } 51 | else 52 | { 53 | pad_h_ = conv_param.pad(0); 54 | pad_w_ = conv_param.pad(1); 55 | } 56 | } 57 | if (conv_param.dilation_size() > 0) 58 | { 59 | if (conv_param.dilation_size() == 1) 60 | { 61 | dilation_h_ = conv_param.dilation(0); 62 | dilation_w_ = conv_param.dilation(0); 63 | } 64 | else 65 | { 66 | dilation_h_ = conv_param.dilation(0); 67 | dilation_w_ = conv_param.dilation(1); 68 | } 69 | } 70 | else 71 | { 72 | dilation_h_ = 1; 73 | dilation_w_ = 1; 74 | } 75 | vector weight_shape(4); 76 | weight_shape[0] = bottom[0]->channels(); 77 | weight_shape[1] = 1; 78 | weight_shape[2] = kernel_h_; 79 | weight_shape[3] = kernel_w_; 80 | vector bias_shape; 81 | if (conv_param.bias_term()) 82 | { 83 | bias_shape.push_back(bottom[0]->channels()); 84 | } 85 | if (this->blobs_.size() == 0) { 86 | if (conv_param.bias_term()) { 87 | this->blobs_.resize(2); 88 | } else { 89 | this->blobs_.resize(1); 90 | } 91 | this->blobs_[0].reset(new Blob(weight_shape)); 92 | shared_ptr > weight_filler(GetFiller(conv_param.weight_filler())); 93 | weight_filler->Fill(this->blobs_[0].get()); 94 | if (conv_param.bias_term()) { 95 | this->blobs_[1].reset(new Blob(bias_shape)); 96 | shared_ptr > bias_filler(GetFiller(conv_param.bias_filler())); 97 | bias_filler->Fill(this->blobs_[1].get()); 98 | } 99 | } 100 | this->param_propagate_down_.resize(this->blobs_.size(), true); 101 | } 102 | 103 | template 104 | void ConvolutionDepthwiseLayer::Reshape(const vector*>& bottom, 105 | const vector*>& top) { 106 | vector top_shape; 107 | top_shape.push_back(bottom[0]->num()); 108 | top_shape.push_back(bottom[0]->channels()); 109 | top_shape.push_back((bottom[0]->height() + 2 * pad_h_ - (dilation_h_ * (kernel_h_ - 1) + 1)) / stride_h_ + 1); 110 | top_shape.push_back((bottom[0]->width() + 2 * pad_w_ - (dilation_w_ * (kernel_w_ - 1) + 1)) / stride_w_ + 1); 111 | top[0]->Reshape(top_shape); 112 | vector weight_buffer_shape; 113 | weight_buffer_shape.push_back(bottom[0]->channels()); 114 | weight_buffer_shape.push_back(kernel_h_); 115 | weight_buffer_shape.push_back(kernel_w_); 116 | weight_buffer_shape.push_back(bottom[0]->num()); 117 | weight_buffer_shape.push_back(top[0]->height()); 118 | weight_buffer_shape.push_back(top[0]->width()); 119 | weight_buffer_.Reshape(weight_buffer_shape); 120 | vector weight_multiplier_shape; 121 | weight_multiplier_shape.push_back(bottom[0]->num()); 122 | weight_multiplier_shape.push_back(top[0]->height()); 123 | weight_multiplier_shape.push_back(top[0]->width()); 124 | weight_multiplier_.Reshape(weight_multiplier_shape); 125 | caffe_gpu_set(weight_multiplier_.count(), Dtype(1), weight_multiplier_.mutable_gpu_data()); 126 | if (this->layer_param_.convolution_param().bias_term()) 127 | { 128 | vector bias_buffer_shape; 129 | bias_buffer_shape.push_back(bottom[0]->channels()); 130 | bias_buffer_shape.push_back(bottom[0]->num()); 131 | bias_buffer_shape.push_back(top[0]->height()); 132 | bias_buffer_shape.push_back(top[0]->width()); 133 | bias_buffer_.Reshape(bias_buffer_shape); 134 | vector bias_multiplier_shape; 135 | bias_multiplier_shape.push_back(bottom[0]->num()); 136 | bias_multiplier_shape.push_back(top[0]->height()); 137 | bias_multiplier_shape.push_back(top[0]->width()); 138 | bias_multiplier_.Reshape(bias_multiplier_shape); 139 | caffe_gpu_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_gpu_data()); 140 | } 141 | } 142 | 143 | template 144 | void ConvolutionDepthwiseLayer::Forward_cpu(const vector*>& bottom, 145 | const vector*>& top) 146 | { 147 | const int num = top[0]->num(); 148 | const int channels = top[0]->channels(); 149 | const int top_height = top[0]->height(); 150 | const int top_width = top[0]->width(); 151 | const int bottom_height = bottom[0]->height(); 152 | const int bottom_width = bottom[0]->width(); 153 | const Dtype* bottom_data = bottom[0]->cpu_data(); 154 | const Dtype* weight_data_base = this->blobs_[0]->cpu_data(); 155 | Dtype* top_data = top[0]->mutable_cpu_data(); 156 | for (int n = 0; n < num; ++n) 157 | { 158 | for (int c = 0; c < channels; ++c) 159 | { 160 | for (int h = 0; h < top_height; ++h) 161 | { 162 | for (int w = 0; w < top_width; ++w) 163 | { 164 | const Dtype* weight_data = weight_data_base + c * kernel_h_ * kernel_w_; 165 | Dtype value = 0; 166 | for (int kh = 0; kh < kernel_h_; ++kh) 167 | { 168 | for (int kw = 0; kw < kernel_w_; ++kw) 169 | { 170 | int h_in = -pad_h_ + h * stride_h_ + kh * dilation_h_; 171 | int w_in = -pad_w_ + w * stride_w_ + kw * dilation_w_; 172 | if ((h_in >= 0) && (h_in < bottom_height) && (w_in >= 0) && (w_in < bottom_width)) 173 | { 174 | int offset = ((n * channels + c) * bottom_height + h_in) * bottom_width + w_in; 175 | value += (*weight_data) * bottom_data[offset]; 176 | } 177 | ++weight_data; 178 | } 179 | } 180 | *top_data++ = value; 181 | } 182 | } 183 | } 184 | } 185 | if (this->layer_param_.convolution_param().bias_term()) 186 | { 187 | top_data = top[0]->mutable_cpu_data(); 188 | for (int n = 0; n < num; ++n) 189 | { 190 | const Dtype* bias_data = this->blobs_[1]->cpu_data(); 191 | for (int c = 0; c < channels; ++c) 192 | { 193 | for (int h = 0; h < top_height; ++h) 194 | { 195 | for (int w = 0; w < top_width; ++w) 196 | { 197 | *top_data += *bias_data; 198 | ++top_data; 199 | } 200 | } 201 | ++bias_data; 202 | } 203 | } 204 | } 205 | } 206 | 207 | template 208 | void ConvolutionDepthwiseLayer::Backward_cpu(const vector*>& top, 209 | const vector& propagate_down, const vector*>& bottom) 210 | { 211 | const int num = top[0]->num(); 212 | const int channels = top[0]->channels(); 213 | const int top_height = top[0]->height(); 214 | const int top_width = top[0]->width(); 215 | const int bottom_height = bottom[0]->height(); 216 | const int bottom_width = bottom[0]->width(); 217 | caffe_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_cpu_diff()); 218 | if (this->layer_param_.convolution_param().bias_term() && this->param_propagate_down_[1]) 219 | { 220 | const Dtype* top_diff = top[0]->cpu_diff(); 221 | for (int n = 0; n < num; ++n) 222 | { 223 | Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff(); 224 | for (int c = 0; c < channels; ++c) 225 | { 226 | for (int h = 0; h < top_height; ++h) 227 | { 228 | for (int w = 0; w < top_width; ++w) 229 | { 230 | *bias_diff += *top_diff; 231 | ++top_diff; 232 | } 233 | } 234 | ++bias_diff; 235 | } 236 | } 237 | } 238 | if (this->param_propagate_down_[0]) 239 | { 240 | const Dtype* top_diff = top[0]->cpu_diff(); 241 | const Dtype* bottom_data = bottom[0]->cpu_data(); 242 | Dtype* weight_diff_base = this->blobs_[0]->mutable_cpu_diff(); 243 | for (int n = 0; n < num; ++n) 244 | { 245 | for (int c = 0; c < channels; ++c) 246 | { 247 | for (int h = 0; h < top_height; ++h) 248 | { 249 | for (int w = 0; w < top_width; ++w) 250 | { 251 | Dtype* weight_diff = weight_diff_base + c * kernel_h_ * kernel_w_; 252 | for (int kh = 0; kh < kernel_h_; ++kh) 253 | { 254 | for (int kw = 0; kw < kernel_w_; ++kw) 255 | { 256 | int h_in = -pad_h_ + h * stride_h_ + kh * dilation_h_; 257 | int w_in = -pad_w_ + w * stride_w_ + kw * dilation_w_; 258 | if ((h_in >= 0) && (h_in < bottom_height) && (w_in >= 0) && (w_in < bottom_width)) 259 | { 260 | int offset = ((n * channels + c) * bottom_height + h_in) * bottom_width + w_in; 261 | *weight_diff += bottom_data[offset] * (*top_diff); 262 | } 263 | ++weight_diff; 264 | } 265 | } 266 | ++top_diff; 267 | } 268 | } 269 | } 270 | } 271 | } 272 | if (propagate_down[0]) 273 | { 274 | const Dtype* top_diff = top[0]->cpu_diff(); 275 | const Dtype* weight_data_base = this->blobs_[0]->cpu_data(); 276 | Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); 277 | for (int n = 0; n < num; ++n) 278 | { 279 | for (int c = 0; c < channels; ++c) 280 | { 281 | for (int h = 0; h < top_height; ++h) 282 | { 283 | for (int w = 0; w < top_width; ++w) 284 | { 285 | const Dtype* weight_data = weight_data_base + c * kernel_h_ * kernel_w_; 286 | for (int kh = 0; kh < kernel_h_; ++kh) 287 | { 288 | for (int kw = 0; kw < kernel_w_; ++kw) 289 | { 290 | int h_in = -pad_h_ + h * stride_h_ + kh * dilation_h_; 291 | int w_in = -pad_w_ + w * stride_w_ + kw * dilation_w_; 292 | if ((h_in >= 0) && (h_in < bottom_height) && (w_in >= 0) && (w_in < bottom_width)) 293 | { 294 | int offset = ((n * channels + c) * bottom_height + h_in) * bottom_width + w_in; 295 | bottom_diff[offset] += (*weight_data) * (*top_diff); 296 | } 297 | ++weight_data; 298 | } 299 | } 300 | ++top_diff; 301 | } 302 | } 303 | } 304 | } 305 | } 306 | } 307 | 308 | #ifdef CPU_ONLY 309 | STUB_GPU(ConvolutionDepthwiseLayer); 310 | #endif 311 | 312 | INSTANTIATE_CLASS(ConvolutionDepthwiseLayer); 313 | REGISTER_LAYER_CLASS(ConvolutionDepthwise); 314 | 315 | } // namespace caffe 316 | -------------------------------------------------------------------------------- /caffe_need/conv_dw_layer.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include "caffe/layers/conv_dw_layer.hpp" 3 | #include "caffe/util/gpu_util.cuh" 4 | 5 | namespace caffe { 6 | 7 | template 8 | __global__ void ConvolutionDepthwiseWeightForward(const int nthreads, 9 | const Dtype* const bottom_data, const Dtype* const weight_data, const int num, const int channels, 10 | const int top_height, const int top_width, const int bottom_height, const int bottom_width, 11 | const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, 12 | const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, 13 | Dtype* const top_data) { 14 | CUDA_KERNEL_LOOP(index, nthreads) { 15 | const int n = index / channels / top_height / top_width; 16 | const int c = (index / top_height / top_width) % channels; 17 | const int h = (index / top_width) % top_height; 18 | const int w = index % top_width; 19 | const Dtype* weight = weight_data + c * kernel_h * kernel_w; 20 | Dtype value = 0; 21 | for (int kh = 0; kh < kernel_h; ++kh) 22 | { 23 | for (int kw = 0; kw < kernel_w; ++kw) 24 | { 25 | const int h_in = -pad_h + h * stride_h + kh * dilation_h; 26 | const int w_in = -pad_w + w * stride_w + kw * dilation_w; 27 | if ((h_in >= 0) && (h_in < bottom_height) && (w_in >= 0) && (w_in < bottom_width)) 28 | { 29 | const int offset = ((n * channels + c) * bottom_height + h_in) * bottom_width + w_in; 30 | value += (*weight) * bottom_data[offset]; 31 | } 32 | ++weight; 33 | } 34 | } 35 | top_data[index] = value; 36 | } 37 | } 38 | 39 | template 40 | __global__ void ConvolutionDepthwiseBiasForward(const int nthreads, 41 | const Dtype* const bias_data, const int num, const int channels, 42 | const int top_height, const int top_width, Dtype* const top_data) { 43 | CUDA_KERNEL_LOOP(index, nthreads) { 44 | const int c = (index / top_height / top_width) % channels; 45 | top_data[index] += bias_data[c]; 46 | } 47 | } 48 | 49 | template 50 | void ConvolutionDepthwiseLayer::Forward_gpu(const vector*>& bottom, 51 | const vector*>& top) { 52 | const Dtype* bottom_data = bottom[0]->gpu_data(); 53 | Dtype* top_data = top[0]->mutable_gpu_data(); 54 | const Dtype* weight_data = this->blobs_[0]->gpu_data(); 55 | const int count = top[0]->count(); 56 | const int num = top[0]->num(); 57 | const int channels = top[0]->channels(); 58 | const int top_height = top[0]->height(); 59 | const int top_width = top[0]->width(); 60 | const int bottom_height = bottom[0]->height(); 61 | const int bottom_width = bottom[0]->width(); 62 | ConvolutionDepthwiseWeightForward<<>>( 63 | count, bottom_data, weight_data, num, channels, 64 | top_height, top_width, bottom_height, bottom_width, 65 | kernel_h_, kernel_w_, stride_h_, stride_w_, 66 | pad_h_, pad_w_, dilation_h_, dilation_w_, top_data); 67 | if (this->layer_param_.convolution_param().bias_term()) 68 | { 69 | const Dtype* bias_data = this->blobs_[1]->gpu_data(); 70 | ConvolutionDepthwiseBiasForward<<>>( 71 | count, bias_data, num, channels, 72 | top_height, top_width, top_data); 73 | } 74 | } 75 | 76 | template 77 | __global__ void ConvolutionDepthwiseWeightBackward(const int nthreads, 78 | const Dtype* const top_diff, const Dtype* const bottom_data, const int num, const int channels, 79 | const int top_height, const int top_width, const int bottom_height, const int bottom_width, 80 | const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, 81 | const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, 82 | Dtype* const buffer_data) { 83 | CUDA_KERNEL_LOOP(index, nthreads) { 84 | const int h = (index / top_width) % top_height; 85 | const int w = index % top_width; 86 | const int kh = (index / kernel_w / num / top_height / top_width) % kernel_h; 87 | const int kw = (index / num / top_height / top_width) % kernel_w; 88 | const int h_in = -pad_h + h * stride_h + kh * dilation_h; 89 | const int w_in = -pad_w + w * stride_w + kw * dilation_w; 90 | if ((h_in >= 0) && (h_in < bottom_height) && (w_in >= 0) && (w_in < bottom_width)) 91 | { 92 | const int c = index / kernel_h / kernel_w / num / top_height / top_width; 93 | const int n = (index / top_height / top_width) % num; 94 | const int top_offset = ((n * channels + c) * top_height + h) * top_width + w; 95 | const int bottom_offset = ((n * channels + c) * bottom_height + h_in) * bottom_width + w_in; 96 | buffer_data[index] = top_diff[top_offset] * bottom_data[bottom_offset]; 97 | } 98 | else 99 | { 100 | buffer_data[index] = 0; 101 | } 102 | } 103 | } 104 | 105 | template 106 | __global__ void ConvolutionDepthwiseBottomBackward(const int nthreads, 107 | const Dtype* const top_diff, const Dtype* const weight_data, const int num, const int channels, 108 | const int top_height, const int top_width, const int bottom_height, const int bottom_width, 109 | const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, 110 | const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, 111 | Dtype* const bottom_diff) { 112 | CUDA_KERNEL_LOOP(index, nthreads) { 113 | const int n = index / channels / bottom_height / bottom_width; 114 | const int c = (index / bottom_height / bottom_width) % channels; 115 | const int h = (index / bottom_width) % bottom_height; 116 | const int w = index % bottom_width; 117 | const Dtype* weight = weight_data + c * kernel_h * kernel_w; 118 | Dtype value = 0; 119 | for (int kh = 0; kh < kernel_h; ++kh) 120 | { 121 | for (int kw = 0; kw < kernel_w; ++kw) 122 | { 123 | const int h_out_s = h + pad_h - kh * dilation_h; 124 | const int w_out_s = w + pad_w - kw * dilation_w; 125 | if (((h_out_s % stride_h) == 0) && ((w_out_s % stride_w) == 0)) 126 | { 127 | const int h_out = h_out_s / stride_h; 128 | const int w_out = w_out_s / stride_w; 129 | if ((h_out >= 0) && (h_out < top_height) && (w_out >= 0) && (w_out < top_width)) 130 | { 131 | const int offset = ((n * channels + c) * top_height + h_out) * top_width + w_out; 132 | value += (*weight) * top_diff[offset]; 133 | } 134 | } 135 | ++weight; 136 | } 137 | } 138 | bottom_diff[index] += value; 139 | } 140 | } 141 | 142 | template 143 | __global__ void ConvolutionDepthwiseBiasBackward(const int nthreads, 144 | const Dtype* const top_diff, const int num, const int channels, 145 | const int top_height, const int top_width, Dtype* const buffer_data) { 146 | CUDA_KERNEL_LOOP(index, nthreads) { 147 | const int c = index / num / top_height / top_width; 148 | const int n = (index / top_height / top_width) % num; 149 | const int h = (index / top_width) % top_height; 150 | const int w = index % top_width; 151 | const int offset = ((n * channels + c) * top_height + h) * top_width + w; 152 | buffer_data[index] = top_diff[offset]; 153 | } 154 | } 155 | 156 | template 157 | void ConvolutionDepthwiseLayer::Backward_gpu(const vector*>& top, 158 | const vector& propagate_down, const vector*>& bottom) { 159 | const Dtype* top_diff = top[0]->gpu_diff(); 160 | const int bottom_count = bottom[0]->count(); 161 | const int num = top[0]->num(); 162 | const int channels = top[0]->channels(); 163 | const int top_height = top[0]->height(); 164 | const int top_width = top[0]->width(); 165 | const int bottom_height = bottom[0]->height(); 166 | const int bottom_width = bottom[0]->width(); 167 | const int length = num * top_height * top_width; 168 | caffe_gpu_set(bottom_count, Dtype(0), bottom[0]->mutable_gpu_diff()); 169 | if (this->layer_param_.convolution_param().bias_term() && this->param_propagate_down_[1]) 170 | { 171 | const int bias_buffer_count = bias_buffer_.count(); 172 | Dtype* bias_buffer_mutable_data = bias_buffer_.mutable_gpu_data(); 173 | ConvolutionDepthwiseBiasBackward<<>>( 174 | bias_buffer_count, top_diff, num, channels, 175 | top_height, top_width, bias_buffer_mutable_data); 176 | const int bias_count = this->blobs_[1]->count(); 177 | const Dtype* bias_buffer_data = bias_buffer_.gpu_data(); 178 | Dtype* bias_diff = this->blobs_[1]->mutable_gpu_diff(); 179 | const Dtype* bias_multiplier_data = bias_multiplier_.gpu_data(); 180 | caffe_gpu_gemv(CblasNoTrans, bias_count, length, Dtype(1), bias_buffer_data, bias_multiplier_data, Dtype(1), bias_diff); 181 | } 182 | if (this->param_propagate_down_[0]) 183 | { 184 | const int weight_buffer_count = weight_buffer_.count(); 185 | const Dtype* bottom_data = bottom[0]->gpu_data(); 186 | Dtype* weight_buffer_mutable_data = weight_buffer_.mutable_gpu_data(); 187 | ConvolutionDepthwiseWeightBackward<<>>( 188 | weight_buffer_count, top_diff, bottom_data, num, channels, 189 | top_height, top_width, bottom_height, bottom_width, 190 | kernel_h_, kernel_w_, stride_h_, stride_w_, 191 | pad_h_, pad_w_, dilation_h_, dilation_w_, weight_buffer_mutable_data); 192 | const int weight_count = this->blobs_[0]->count(); 193 | const Dtype* weight_buffer_data = weight_buffer_.gpu_data(); 194 | Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); 195 | const Dtype* weight_multiplier_data = weight_multiplier_.gpu_data(); 196 | caffe_gpu_gemv(CblasNoTrans, weight_count, length, Dtype(1), weight_buffer_data, weight_multiplier_data, Dtype(1), weight_diff); 197 | } 198 | if (propagate_down[0]) 199 | { 200 | const Dtype* weight_data = this->blobs_[0]->gpu_data(); 201 | Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); 202 | ConvolutionDepthwiseBottomBackward<<>>( 203 | bottom_count, top_diff, weight_data, num, channels, 204 | top_height, top_width, bottom_height, bottom_width, 205 | kernel_h_, kernel_w_, stride_h_, stride_w_, 206 | pad_h_, pad_w_, dilation_h_, dilation_w_, bottom_diff); 207 | } 208 | } 209 | 210 | INSTANTIATE_LAYER_GPU_FUNCS(ConvolutionDepthwiseLayer); 211 | 212 | } // namespace caffe 213 | -------------------------------------------------------------------------------- /caffe_need/conv_dw_layer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CAFFE_CONV_DW_LAYER_HPP_ 2 | #define CAFFE_CONV_DW_LAYER_HPP_ 3 | 4 | #include 5 | #include "caffe/blob.hpp" 6 | #include "caffe/layer.hpp" 7 | #include "caffe/proto/caffe.pb.h" 8 | 9 | namespace caffe { 10 | 11 | template 12 | class ConvolutionDepthwiseLayer : public Layer { 13 | public: 14 | explicit ConvolutionDepthwiseLayer(const LayerParameter& param) 15 | : Layer(param) {} 16 | virtual void LayerSetUp(const vector*>& bottom, 17 | const vector*>& top); 18 | virtual void Reshape(const vector*>& bottom, 19 | const vector*>& top); 20 | virtual inline int ExactNumBottomBlobs() const { return 1; } 21 | virtual inline int ExactNumTopBlobs() const { return 1; } 22 | virtual inline const char* type() const { return "ConvolutionDepthwise"; } 23 | protected: 24 | virtual void Forward_cpu(const vector*>& bottom, 25 | const vector*>& top); 26 | virtual void Forward_gpu(const vector*>& bottom, 27 | const vector*>& top); 28 | virtual void Backward_cpu(const vector*>& top, 29 | const vector& propagate_down, const vector*>& bottom); 30 | virtual void Backward_gpu(const vector*>& top, 31 | const vector& propagate_down, const vector*>& bottom); 32 | unsigned int kernel_h_; 33 | unsigned int kernel_w_; 34 | unsigned int stride_h_; 35 | unsigned int stride_w_; 36 | unsigned int pad_h_; 37 | unsigned int pad_w_; 38 | unsigned int dilation_h_; 39 | unsigned int dilation_w_; 40 | Blob weight_buffer_; 41 | Blob weight_multiplier_; 42 | Blob bias_buffer_; 43 | Blob bias_multiplier_; 44 | }; 45 | 46 | } // namespace caffe 47 | 48 | #endif // CAFFE_CONV_DW_LAYER_HPP_ 49 | -------------------------------------------------------------------------------- /caffe_need/image_data_layer.cpp: -------------------------------------------------------------------------------- 1 | #ifdef USE_OPENCV 2 | #include 3 | 4 | #include // NOLINT(readability/streams) 5 | #include // NOLINT(readability/streams) 6 | #include 7 | #include 8 | #include 9 | 10 | #include "caffe/data_transformer.hpp" 11 | #include "caffe/layers/base_data_layer.hpp" 12 | #include "caffe/layers/image_data_layer.hpp" 13 | #include "caffe/util/benchmark.hpp" 14 | #include "caffe/util/io.hpp" 15 | #include "caffe/util/math_functions.hpp" 16 | #include "caffe/util/rng.hpp" 17 | 18 | namespace caffe { 19 | 20 | template 21 | ImageDataLayer::~ImageDataLayer() { 22 | this->StopInternalThread(); 23 | } 24 | 25 | template 26 | void ImageDataLayer::DataLayerSetUp(const vector*>& bottom, 27 | const vector*>& top) { 28 | const int new_height = this->layer_param_.image_data_param().new_height(); 29 | const int new_width = this->layer_param_.image_data_param().new_width(); 30 | const bool is_color = this->layer_param_.image_data_param().is_color(); 31 | string root_folder = this->layer_param_.image_data_param().root_folder(); 32 | 33 | CHECK((new_height == 0 && new_width == 0) || 34 | (new_height > 0 && new_width > 0)) << "Current implementation requires " 35 | "new_height and new_width to be set at the same time."; 36 | // Read the file with filenames and labels 37 | const string& source = this->layer_param_.image_data_param().source(); 38 | LOG(INFO) << "Opening file " << source; 39 | std::ifstream infile(source.c_str()); 40 | string line; 41 | int pos; // int pos ; 42 | int label_dim = 0 ; 43 | bool gfirst = true; 44 | while (std::getline(infile, line)) { 45 | if(line.find_last_of(' ')==line.size()-2) line.erase(line.find_last_not_of(' ')-1); 46 | pos = line.find_first_of(' '); 47 | string img_path = line.substr(0, pos); 48 | int p0 = pos + 1; 49 | vector label_vec; 50 | while (pos != -1){ 51 | pos = line.find_first_of(' ', p0); 52 | float v = atof(line.substr(p0, pos).c_str()); 53 | label_vec.push_back(v); 54 | p0 = pos + 1; 55 | } 56 | if (gfirst){ 57 | label_dim = label_vec.size(); 58 | gfirst = false; 59 | LOG(INFO) << "label dim: " << label_dim; 60 | } 61 | CHECK_EQ(label_vec.size(), label_dim) << "label dim not match in: " << lines_.size()<<", "<layer_param_.image_data_param().shuffle()) { 68 | // randomly shuffle data 69 | LOG(INFO) << "Shuffling data"; 70 | const unsigned int prefetch_rng_seed = caffe_rng_rand(); 71 | prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed)); 72 | ShuffleImages(); 73 | } else { 74 | if (this->phase_ == TRAIN && Caffe::solver_rank() > 0 && 75 | this->layer_param_.image_data_param().rand_skip() == 0) { 76 | LOG(WARNING) << "Shuffling or skipping recommended for multi-GPU"; 77 | } 78 | } 79 | LOG(INFO) << "A total of " << lines_.size() << " images."; 80 | 81 | lines_id_ = 0; 82 | // Check if we would need to randomly skip a few data points 83 | if (this->layer_param_.image_data_param().rand_skip()) { 84 | unsigned int skip = caffe_rng_rand() % 85 | this->layer_param_.image_data_param().rand_skip(); 86 | LOG(INFO) << "Skipping first " << skip << " data points."; 87 | CHECK_GT(lines_.size(), skip) << "Not enough points to skip"; 88 | lines_id_ = skip; 89 | } 90 | // Read an image, and use it to initialize the top blob. 91 | cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first, 92 | new_height, new_width, is_color); 93 | CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first; 94 | // Use data_transformer to infer the expected blob shape from a cv_image. 95 | vector top_shape = this->data_transformer_->InferBlobShape(cv_img); 96 | this->transformed_data_.Reshape(top_shape); 97 | // Reshape prefetch_data and top[0] according to the batch_size. 98 | const int batch_size = this->layer_param_.image_data_param().batch_size(); 99 | CHECK_GT(batch_size, 0) << "Positive batch size required"; 100 | top_shape[0] = batch_size; 101 | for (int i = 0; i < this->prefetch_.size(); ++i) { 102 | this->prefetch_[i]->data_.Reshape(top_shape); 103 | } 104 | top[0]->Reshape(top_shape); 105 | 106 | LOG(INFO) << "output data size: " << top[0]->num() << "," 107 | << top[0]->channels() << "," << top[0]->height() << "," 108 | << top[0]->width(); 109 | // label 110 | vector label_shape(2, batch_size); 111 | label_shape[1] = label_dim; 112 | top[1]->Reshape(label_shape); 113 | for (int i = 0; i < this->prefetch_.size(); ++i) { 114 | this->prefetch_[i]->label_.Reshape(label_shape); 115 | } 116 | } 117 | 118 | template 119 | void ImageDataLayer::ShuffleImages() { 120 | caffe::rng_t* prefetch_rng = 121 | static_cast(prefetch_rng_->generator()); 122 | shuffle(lines_.begin(), lines_.end(), prefetch_rng); 123 | } 124 | 125 | // This function is called on prefetch thread 126 | template 127 | void ImageDataLayer::load_batch(Batch* batch) { 128 | CPUTimer batch_timer; 129 | batch_timer.Start(); 130 | double read_time = 0; 131 | double trans_time = 0; 132 | CPUTimer timer; 133 | CHECK(batch->data_.count()); 134 | CHECK(this->transformed_data_.count()); 135 | ImageDataParameter image_data_param = this->layer_param_.image_data_param(); 136 | const int batch_size = image_data_param.batch_size(); 137 | const int new_height = image_data_param.new_height(); 138 | const int new_width = image_data_param.new_width(); 139 | const bool is_color = image_data_param.is_color(); 140 | string root_folder = image_data_param.root_folder(); 141 | 142 | // Reshape according to the first image of each batch 143 | // on single input batches allows for inputs of varying dimension. 144 | cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first, 145 | new_height, new_width, is_color); 146 | CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first; 147 | // Use data_transformer to infer the expected blob shape from a cv_img. 148 | vector top_shape = this->data_transformer_->InferBlobShape(cv_img); 149 | this->transformed_data_.Reshape(top_shape); 150 | // Reshape batch according to the batch_size. 151 | top_shape[0] = batch_size; 152 | batch->data_.Reshape(top_shape);/////////////////////////////////////////////// 153 | vector top_shape1(4); 154 | top_shape1[0] = batch_size; 155 | top_shape1[1] = lines_[0].second.size(); 156 | top_shape1[2] = 1; 157 | top_shape1[3] = 1; 158 | batch->data_.Reshape(top_shape); 159 | 160 | Dtype* prefetch_data = batch->data_.mutable_cpu_data(); 161 | Dtype* prefetch_label = batch->label_.mutable_cpu_data(); 162 | 163 | // datum scales 164 | const int lines_size = lines_.size(); 165 | for (int item_id = 0; item_id < batch_size; ++item_id) { 166 | // get a blob 167 | timer.Start(); 168 | CHECK_GT(lines_size, lines_id_); 169 | cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first, 170 | new_height, new_width, is_color); 171 | CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first; 172 | read_time += timer.MicroSeconds(); 173 | timer.Start(); 174 | // Apply transformations (mirror, crop...) to the image 175 | int offset = batch->data_.offset(item_id); 176 | this->transformed_data_.set_cpu_data(prefetch_data + offset); 177 | this->data_transformer_->Transform(cv_img, &(this->transformed_data_)); 178 | trans_time += timer.MicroSeconds(); 179 | 180 | for (int i = 0; i < top_shape1[1]; i++) { 181 | prefetch_label[item_id*top_shape1[1] + i] = lines_[lines_id_].second[i]; 182 | } 183 | // go to the next iter 184 | lines_id_++; 185 | if (lines_id_ >= lines_size) { 186 | // We have reached the end. Restart from the first. 187 | DLOG(INFO) << "Restarting data prefetching from start."; 188 | lines_id_ = 0; 189 | if (this->layer_param_.image_data_param().shuffle()) { 190 | ShuffleImages(); 191 | } 192 | } 193 | } 194 | batch_timer.Stop(); 195 | DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; 196 | DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; 197 | DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; 198 | } 199 | 200 | INSTANTIATE_CLASS(ImageDataLayer); 201 | REGISTER_LAYER_CLASS(ImageData); 202 | 203 | } // namespace caffe 204 | #endif // USE_OPENCV 205 | -------------------------------------------------------------------------------- /caffe_need/image_data_layer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CAFFE_IMAGE_DATA_LAYER_HPP_ 2 | #define CAFFE_IMAGE_DATA_LAYER_HPP_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "caffe/blob.hpp" 9 | #include "caffe/data_transformer.hpp" 10 | #include "caffe/internal_thread.hpp" 11 | #include "caffe/layer.hpp" 12 | #include "caffe/layers/base_data_layer.hpp" 13 | #include "caffe/proto/caffe.pb.h" 14 | 15 | namespace caffe { 16 | 17 | /** 18 | * @brief Provides data to the Net from image files. 19 | * 20 | * TODO(dox): thorough documentation for Forward and proto params. 21 | */ 22 | template 23 | class ImageDataLayer : public BasePrefetchingDataLayer { 24 | public: 25 | explicit ImageDataLayer(const LayerParameter& param) 26 | : BasePrefetchingDataLayer(param) {} 27 | virtual ~ImageDataLayer(); 28 | virtual void DataLayerSetUp(const vector*>& bottom, 29 | const vector*>& top); 30 | 31 | virtual inline const char* type() const { return "ImageData"; } 32 | virtual inline int ExactNumBottomBlobs() const { return 0; } 33 | virtual inline int ExactNumTopBlobs() const { return 2; } 34 | 35 | protected: 36 | shared_ptr prefetch_rng_; 37 | virtual void ShuffleImages(); 38 | virtual void load_batch(Batch* batch); 39 | 40 | // edited by tingsong 41 | // vector > lines_; 42 | vector > > lines_; 43 | int lines_id_; 44 | }; 45 | 46 | 47 | } // namespace caffe 48 | 49 | #endif // CAFFE_IMAGE_DATA_LAYER_HPP_ 50 | -------------------------------------------------------------------------------- /caffe_need/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/caffe_need/readme.txt -------------------------------------------------------------------------------- /readme_img/ccnntexie.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/readme_img/ccnntexie.PNG -------------------------------------------------------------------------------- /readme_img/l1.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/readme_img/l1.PNG -------------------------------------------------------------------------------- /readme_img/l2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TingsongYu/cascaded_mobilenet-v2/3b9f292f13852e6696fed19d7d34a5ff39f8c61e/readme_img/l2.PNG -------------------------------------------------------------------------------- /util/tools.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | import numpy as np 4 | import os 5 | import cv2 6 | 7 | # range of [-1,1] to [0,w] or [0,h] 8 | def convert_point(nor_p,factor): 9 | return int(round( float(nor_p)*factor + factor)) 10 | 11 | # draw points for level_1 12 | # landmark range of [-1,1] x1,y1,x2,y2... 13 | def drawpoints_1(img_,landmark): 14 | h, w, c = img_.shape 15 | w1 = (w-1)/2 # range of [-1, 1] 16 | h1 = (h-1)/2 17 | draw_img = img_.copy() 18 | num_points = len(landmark) / 2 19 | for i in range(num_points): 20 | x_ = convert_point(landmark[2*i+0],w1) 21 | y_ = convert_point(landmark[2*i+1],h1) 22 | cv2.circle(draw_img,(x_,y_),3,(0,0,255)) 23 | return draw_img 24 | 25 | # draw points for level_2 26 | # landmark range of [0,h] or [0,w] x1,y1,x2,y2... 27 | def drawpoints_2(img_,landmark): 28 | draw_img = img_.copy() 29 | num_points = len(landmark) / 2 30 | for i in range(num_points): 31 | x_ = landmark[2*i+0] 32 | y_ = landmark[2*i+1] 33 | cv2.circle(draw_img,(x_,y_),3,(255,0,0)) 34 | return draw_img 35 | 36 | # draw points for level_2 37 | # landmark range of [0,h] or [0,w] x1,y1,x2,y2... 38 | def drawpoints_0(img_,landmark): 39 | draw_img = img_.copy() 40 | num_points = len(landmark) / 2 41 | for i in range(num_points): 42 | x_ = myint(landmark[2*i+0]) 43 | y_ = myint(landmark[2*i+1]) 44 | cv2.circle(draw_img,(x_,y_),4,(0,255,0)) # green 45 | return draw_img 46 | 47 | def myint(numb): 48 | return int(round(float(numb))) 49 | 50 | def cal_eucldist(v1,v2): 51 | return np.sqrt(np.sum((v1-v2)**2)) 52 | 53 | def makedir(path): 54 | if not os.path.exists(path): os.makedirs(path) 55 | 56 | # label change to pixel 57 | # l range of [-1,1] ; len(l) = 10 58 | def label2points(l,w,h): 59 | landmark = l.copy() 60 | num_points = len(landmark) /2 61 | w1 = (w-1)/2 # range of [-1, 1] 62 | h1 = (h-1)/2 63 | landmark[0::2] = landmark[0::2]*w1 +w1 # x 64 | landmark[1::2] = landmark[1::2]*h1 +h1 # y 65 | landmark = np.round(landmark) 66 | return landmark 67 | 68 | def cal_dist_norm_eye(landmark): 69 | left_eye = landmark[0:2] 70 | right_eye = landmark[2:4] 71 | return cal_eucldist(left_eye, right_eye) 72 | 73 | # 74 | # r_l range of [-1,1] 75 | # # err_1 is mean error 76 | def cal_error_nor_diag(img,r_l,o_l): 77 | h,w,c = img.shape 78 | n_p = 5 79 | r_landmark = np.array(map(float,r_l.split()[1:2*n_p+1])) 80 | o_landmark = np.array(map(float,o_l.split()[1:2*n_p+1])) 81 | r_pix_landmark = label2points(r_landmark,w,h) 82 | o_pix_landmark = label2points(o_landmark,w,h) 83 | 84 | d_diag = np.sqrt(w*w + h*h) 85 | err_all = 0 86 | err_5 = [] 87 | for i in range(n_p): 88 | raw_point = r_pix_landmark[2*i+0:2*i+2] 89 | out_point = o_pix_landmark[2*i+0:2*i+2] 90 | err_ecul = cal_eucldist(raw_point, out_point) / d_diag 91 | err_all = err_all + err_ecul 92 | err_5.append(err_ecul) 93 | err_1 = round(err_all / n_p ,4) # mean 94 | return err_1,err_5 95 | 96 | 97 | # crop_img for level_2 98 | def crop_img(in_img,in_land): 99 | p_nose = in_land[4:6] 100 | p_lefteye = in_land[0:2] 101 | d_nose_lefteye = cal_eucldist(p_nose,p_lefteye) 102 | 103 | w_start = np.round(p_nose[0] - 2*d_nose_lefteye).astype(int) 104 | w_end = np.round(p_nose[0] + 2*d_nose_lefteye).astype(int) 105 | h_start = np.round(p_nose[1] - 2*d_nose_lefteye).astype(int) 106 | h_end = np.round(p_nose[1] + 2*d_nose_lefteye).astype(int) 107 | 108 | h_img,w_img,c = in_img.shape 109 | 110 | if w_start < 0: w_start = 0 111 | if h_start < 0: h_start = 0 112 | if w_end > w_img: w_end = w_img 113 | if h_end > h_img: h_end = h_img 114 | 115 | crop_img = in_img.copy() 116 | crop_img = crop_img[h_start:h_end+1,w_start:w_end+1,:] 117 | return crop_img,w_start,h_start 118 | 119 | # for ALFW x1 x2... ---> x1 y1 x2 y2 120 | def change_order(in_land): 121 | n_p = len(in_land)/2 122 | out_land = in_land[:] 123 | for i in range(n_p): 124 | out_land[2*i+0] = in_land[i] 125 | out_land[2*i+1] = in_land[i+5] 126 | return out_land 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | --------------------------------------------------------------------------------