├── res.jpg
├── mafa.pdf
├── README.md
├── solver_pmask.prototxt
├── train_final_pmask.prototxt
└── layer.py


/res.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IssacCyj/Adversarial-Occlussion-aware-Face-Detection/HEAD/res.jpg


--------------------------------------------------------------------------------
/mafa.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IssacCyj/Adversarial-Occlussion-aware-Face-Detection/HEAD/mafa.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Adversarial-Occlussion-aware-Face-Detection
2 | Implementation of the BTAS 2018 oral paper Adversarial Occlusion-aware Face Detection (https://arxiv.org/abs/1709.05188).
3 | Code is wriiten in Caffe with layer interface in python.
4 | 
5 | ![res](https://github.com/IssacCyj/Adversarial-Occlussion-aware-Face-Detection/blob/master/res.jpg)
6 | 


--------------------------------------------------------------------------------
/solver_pmask.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/face/VGG16/faster_rcnn_end2end/train_final_pmask.prototxt"
 2 | base_lr: 0.0005
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | # iter_size: 1
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | snapshot: 0
12 | snapshot_prefix: "_output_v1"
13 | iter_size: 2
14 | 
15 | 


--------------------------------------------------------------------------------
/train_final_pmask.prototxt:
--------------------------------------------------------------------------------
   1 | name: "VGG_ILSVRC_16_layers"
   2 | layer {
   3 |   name: 'input-data'
   4 |   type: 'Python'
   5 |   top: 'data'
   6 |   top: 'im_info'
   7 |   top: 'gt_boxes'
   8 |   top: 'gt_mask'
   9 |   python_param {
  10 |     module: 'roi_data_layer.layer'
  11 |     layer: 'RoIDataLayer'
  12 |     param_str: "'num_classes': 2"
  13 |   }
  14 | }
  15 | 
  16 | layer {
  17 |   name: "conv1_1"
  18 |   type: "Convolution"
  19 |   bottom: "data"
  20 |   top: "conv1_1"
  21 |   param {
  22 |     lr_mult: 0
  23 |     decay_mult: 0
  24 |   }
  25 |   param {
  26 |     lr_mult: 0
  27 |     decay_mult: 0
  28 |   }
  29 |   convolution_param {
  30 |     num_output: 64
  31 |     pad: 1
  32 |     kernel_size: 3
  33 |   }
  34 | }
  35 | layer {
  36 |   name: "relu1_1"
  37 |   type: "ReLU"
  38 |   bottom: "conv1_1"
  39 |   top: "conv1_1"
  40 | }
  41 | layer {
  42 |   name: "conv1_2"
  43 |   type: "Convolution"
  44 |   bottom: "conv1_1"
  45 |   top: "conv1_2"
  46 |   param {
  47 |     lr_mult: 0
  48 |     decay_mult: 0
  49 |   }
  50 |   param {
  51 |     lr_mult: 0
  52 |     decay_mult: 0
  53 |   }
  54 |   convolution_param {
  55 |     num_output: 64
  56 |     pad: 1
  57 |     kernel_size: 3
  58 |   }
  59 | }
  60 | layer {
  61 |   name: "relu1_2"
  62 |   type: "ReLU"
  63 |   bottom: "conv1_2"
  64 |   top: "conv1_2"
  65 | }
  66 | layer {
  67 |   name: "pool1"
  68 |   type: "Pooling"
  69 |   bottom: "conv1_2"
  70 |   top: "pool1"
  71 |   pooling_param {
  72 |     pool: MAX
  73 |     kernel_size: 2
  74 |     stride: 2
  75 |   }
  76 | }
  77 | layer {
  78 |   name: "conv2_1"
  79 |   type: "Convolution"
  80 |   bottom: "pool1"
  81 |   top: "conv2_1"
  82 |   param {
  83 |     lr_mult: 0
  84 |     decay_mult: 0
  85 |   }
  86 |   param {
  87 |     lr_mult: 0
  88 |     decay_mult: 0
  89 |   }
  90 |   convolution_param {
  91 |     num_output: 128
  92 |     pad: 1
  93 |     kernel_size: 3
  94 |   }
  95 | }
  96 | layer {
  97 |   name: "relu2_1"
  98 |   type: "ReLU"
  99 |   bottom: "conv2_1"
 100 |   top: "conv2_1"
 101 | }
 102 | layer {
 103 |   name: "conv2_2"
 104 |   type: "Convolution"
 105 |   bottom: "conv2_1"
 106 |   top: "conv2_2"
 107 |   param {
 108 |     lr_mult: 0
 109 |     decay_mult: 0
 110 |   }
 111 |   param {
 112 |     lr_mult: 0
 113 |     decay_mult: 0
 114 |   }
 115 |   convolution_param {
 116 |     num_output: 128
 117 |     pad: 1
 118 |     kernel_size: 3
 119 |   }
 120 | }
 121 | layer {
 122 |   name: "relu2_2"
 123 |   type: "ReLU"
 124 |   bottom: "conv2_2"
 125 |   top: "conv2_2"
 126 | }
 127 | layer {
 128 |   name: "pool2"
 129 |   type: "Pooling"
 130 |   bottom: "conv2_2"
 131 |   top: "pool2"
 132 |   pooling_param {
 133 |     pool: MAX
 134 |     kernel_size: 2
 135 |     stride: 2
 136 |   }
 137 | }
 138 | layer {
 139 |   name: "conv3_1"
 140 |   type: "Convolution"
 141 |   bottom: "pool2"
 142 |   top: "conv3_1"
 143 |   param {
 144 |     lr_mult: 1
 145 |   }
 146 |   param {
 147 |     lr_mult: 2
 148 |   }
 149 |   convolution_param {
 150 |     num_output: 256
 151 |     pad: 1
 152 |     kernel_size: 3
 153 |   }
 154 | }
 155 | layer {
 156 |   name: "relu3_1"
 157 |   type: "ReLU"
 158 |   bottom: "conv3_1"
 159 |   top: "conv3_1"
 160 | }
 161 | layer {
 162 |   name: "conv3_2"
 163 |   type: "Convolution"
 164 |   bottom: "conv3_1"
 165 |   top: "conv3_2"
 166 |   param {
 167 |     lr_mult: 1
 168 |   }
 169 |   param {
 170 |     lr_mult: 2
 171 |   }
 172 |   convolution_param {
 173 |     num_output: 256
 174 |     pad: 1
 175 |     kernel_size: 3
 176 |   }
 177 | }
 178 | layer {
 179 |   name: "relu3_2"
 180 |   type: "ReLU"
 181 |   bottom: "conv3_2"
 182 |   top: "conv3_2"
 183 | }
 184 | layer {
 185 |   name: "conv3_3"
 186 |   type: "Convolution"
 187 |   bottom: "conv3_2"
 188 |   top: "conv3_3"
 189 |   param {
 190 |     lr_mult: 1
 191 |   }
 192 |   param {
 193 |     lr_mult: 2
 194 |   }
 195 |   convolution_param {
 196 |     num_output: 256
 197 |     pad: 1
 198 |     kernel_size: 3
 199 |   }
 200 | }
 201 | layer {
 202 |   name: "relu3_3"
 203 |   type: "ReLU"
 204 |   bottom: "conv3_3"
 205 |   top: "conv3_3"
 206 | }
 207 | layer {
 208 |   name: "pool3"
 209 |   type: "Pooling"
 210 |   bottom: "conv3_3"
 211 |   top: "pool3"
 212 |   pooling_param {
 213 |     pool: MAX
 214 |     kernel_size: 2
 215 |     stride: 2
 216 |   }
 217 | }
 218 | layer {
 219 |   name: "conv4_1"
 220 |   type: "Convolution"
 221 |   bottom: "pool3"
 222 |   top: "conv4_1"
 223 |   param {
 224 |     lr_mult: 1
 225 |   }
 226 |   param {
 227 |     lr_mult: 2
 228 |   }
 229 |   convolution_param {
 230 |     num_output: 512
 231 |     pad: 1
 232 |     kernel_size: 3
 233 |   }
 234 | }
 235 | layer {
 236 |   name: "relu4_1"
 237 |   type: "ReLU"
 238 |   bottom: "conv4_1"
 239 |   top: "conv4_1"
 240 | }
 241 | layer {
 242 |   name: "conv4_2"
 243 |   type: "Convolution"
 244 |   bottom: "conv4_1"
 245 |   top: "conv4_2"
 246 |   param {
 247 |     lr_mult: 1
 248 |   }
 249 |   param {
 250 |     lr_mult: 2
 251 |   }
 252 |   convolution_param {
 253 |     num_output: 512
 254 |     pad: 1
 255 |     kernel_size: 3
 256 |   }
 257 | }
 258 | layer {
 259 |   name: "relu4_2"
 260 |   type: "ReLU"
 261 |   bottom: "conv4_2"
 262 |   top: "conv4_2"
 263 | }
 264 | layer {
 265 |   name: "conv4_3"
 266 |   type: "Convolution"
 267 |   bottom: "conv4_2"
 268 |   top: "conv4_3"
 269 |   param {
 270 |     lr_mult: 1
 271 |   }
 272 |   param {
 273 |     lr_mult: 2
 274 |   }
 275 |   convolution_param {
 276 |     num_output: 512
 277 |     pad: 1
 278 |     kernel_size: 3
 279 |   }
 280 | }
 281 | layer {
 282 |   name: "relu4_3"
 283 |   type: "ReLU"
 284 |   bottom: "conv4_3"
 285 |   top: "conv4_3"
 286 | }
 287 | layer {
 288 |   name: "pool4"
 289 |   type: "Pooling"
 290 |   bottom: "conv4_3"
 291 |   top: "pool4"
 292 |   pooling_param {
 293 |     pool: MAX
 294 |     kernel_size: 2
 295 |     stride: 2
 296 |   }
 297 | }
 298 | layer {
 299 |   name: "conv5_1"
 300 |   type: "Convolution"
 301 |   bottom: "pool4"
 302 |   top: "conv5_1"
 303 |   param {
 304 |     lr_mult: 1
 305 |   }
 306 |   param {
 307 |     lr_mult: 2
 308 |   }
 309 |   convolution_param {
 310 |     num_output: 512
 311 |     pad: 1
 312 |     kernel_size: 3
 313 |   }
 314 | }
 315 | layer {
 316 |   name: "relu5_1"
 317 |   type: "ReLU"
 318 |   bottom: "conv5_1"
 319 |   top: "conv5_1"
 320 | }
 321 | layer {
 322 |   name: "conv5_2"
 323 |   type: "Convolution"
 324 |   bottom: "conv5_1"
 325 |   top: "conv5_2"
 326 |   param {
 327 |     lr_mult: 1
 328 |   }
 329 |   param {
 330 |     lr_mult: 2
 331 |   }
 332 |   convolution_param {
 333 |     num_output: 512
 334 |     pad: 1
 335 |     kernel_size: 3
 336 |   }
 337 | }
 338 | layer {
 339 |   name: "relu5_2"
 340 |   type: "ReLU"
 341 |   bottom: "conv5_2"
 342 |   top: "conv5_2"
 343 | }
 344 | layer {
 345 |   name: "conv5_3"
 346 |   type: "Convolution"
 347 |   bottom: "conv5_2"
 348 |   top: "conv5_3"
 349 |   param {
 350 |     lr_mult: 1
 351 |   }
 352 |   param {
 353 |     lr_mult: 2
 354 |   }
 355 |   convolution_param {
 356 |     num_output: 512
 357 |     pad: 1
 358 |     kernel_size: 3
 359 |   }
 360 | }
 361 | layer {
 362 |   name: "relu5_3"
 363 |   type: "ReLU"
 364 |   bottom: "conv5_3"
 365 |   top: "conv5_3"
 366 | }
 367 | 
 368 | #========= RPN ============
 369 | 
 370 | layer {
 371 |   name: "rpn_conv/3x3"
 372 |   type: "Convolution"
 373 |   bottom: "conv5_3"
 374 |   top: "rpn/output"
 375 |   param { lr_mult: 0
 376 | 		decay_mult: 0 }
 377 |   param { lr_mult: 0
 378 | 		decay_mult: 0  }
 379 |   convolution_param {
 380 |     num_output: 512
 381 |     kernel_size: 3 pad: 1 stride: 1
 382 |     weight_filler { type: "gaussian" std: 0.01 }
 383 |     bias_filler { type: "constant" value: 0 }
 384 |   }
 385 | }
 386 | layer {
 387 |   name: "rpn_relu/3x3"
 388 |   type: "ReLU"
 389 |   bottom: "rpn/output"
 390 |   top: "rpn/output"
 391 | }
 392 | 
 393 | layer {
 394 |   name: "rpn_cls_score"
 395 |   type: "Convolution"
 396 |   bottom: "rpn/output"
 397 |   top: "rpn_cls_score"
 398 |   param { lr_mult: 0
 399 | 		decay_mult: 0  }
 400 |   param { lr_mult: 0
 401 | 		decay_mult: 0  }
 402 |   convolution_param {
 403 |     num_output: 36   # 2(bg/fg) * 12(anchors)
 404 |     kernel_size: 1 pad: 0 stride: 1
 405 |     weight_filler { type: "gaussian" std: 0.01 }
 406 |     bias_filler { type: "constant" value: 0 }
 407 |   }
 408 | }
 409 | 
 410 | layer {
 411 |   name: "rpn_bbox_pred"
 412 |   type: "Convolution"
 413 |   bottom: "rpn/output"
 414 |   top: "rpn_bbox_pred"
 415 |   param { lr_mult: 0
 416 | 	    decay_mult: 0  }
 417 |   param { lr_mult: 0
 418 |  	    decay_mult: 0  }
 419 |   convolution_param {
 420 |     num_output: 72   # 4 * 12(anchors)
 421 |     kernel_size: 1 pad: 0 stride: 1
 422 |     weight_filler { type: "gaussian" std: 0.01 }
 423 |     bias_filler { type: "constant" value: 0 }
 424 |   }
 425 | }
 426 | 
 427 | layer {
 428 |    bottom: "rpn_cls_score"
 429 |    top: "rpn_cls_score_reshape"
 430 |    name: "rpn_cls_score_reshape"
 431 |    type: "Reshape"
 432 |    reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
 433 | }
 434 | 
 435 | #layer {
 436 | #  name: 'rpn-data'
 437 | #  type: 'Python'
 438 | #  bottom: 'rpn_cls_score'
 439 | #  bottom: 'gt_boxes'
 440 | #  bottom: 'im_info'
 441 | #  bottom: 'data'
 442 | #  top: 'rpn_labels'
 443 | #  top: 'rpn_bbox_targets'
 444 | #  top: 'rpn_bbox_inside_weights'
 445 | #  top: 'rpn_bbox_outside_weights'
 446 | #  python_param {
 447 | #    module: 'rpn.anchor_target_layer'
 448 | #    layer: 'AnchorTargetLayer'
 449 | #    param_str: "'feat_stride': 16"
 450 | #  }
 451 | #}
 452 | 
 453 | #layer {
 454 | #  name: "rpn_loss_cls"
 455 | #  type: "SoftmaxWithLoss"
 456 | #  bottom: "rpn_cls_score_reshape"
 457 | #  bottom: "rpn_labels"
 458 | #  propagate_down: 1
 459 | #  propagate_down: 0
 460 | #  top: "rpn_cls_loss"
 461 | #  loss_weight: 0
 462 | #  loss_param {
 463 | #    ignore_label: -1
 464 | #    normalize: true
 465 | #  }
 466 | #}
 467 | 
 468 | #layer {
 469 | #  name: "rpn_loss_bbox"
 470 | #  type: "SmoothL1Loss"
 471 | #  bottom: "rpn_bbox_pred"
 472 | #  bottom: "rpn_bbox_targets"
 473 | #  bottom: 'rpn_bbox_inside_weights'
 474 | #  bottom: 'rpn_bbox_outside_weights'
 475 | #  top: "rpn_loss_bbox"
 476 | #  loss_weight: 0
 477 | #  smooth_l1_loss_param { sigma: 3.0 }
 478 | #}
 479 | 
 480 | #========= RoI Proposal ============
 481 | 
 482 | layer {
 483 |   name: "rpn_cls_prob"
 484 |   type: "Softmax"
 485 |   bottom: "rpn_cls_score_reshape"
 486 |   top: "rpn_cls_prob"
 487 | }
 488 | 
 489 | layer {
 490 |   name: 'rpn_cls_prob_reshape'
 491 |   type: 'Reshape'
 492 |   bottom: 'rpn_cls_prob'
 493 |   top: 'rpn_cls_prob_reshape'
 494 |   reshape_param { shape { dim: 0 dim: 36 dim: -1 dim: 0 } }
 495 | }
 496 | 
 497 | layer {
 498 |   name: 'proposal'
 499 |   type: 'Python'
 500 |   bottom: 'rpn_cls_prob_reshape'
 501 |   bottom: 'rpn_bbox_pred'
 502 |   bottom: 'im_info'
 503 |   top: 'rpn_rois'
 504 | #  top: 'rpn_scores'
 505 |   propagate_down: false
 506 |   propagate_down: false
 507 |   propagate_down: false
 508 |   python_param {
 509 |     module: 'rpn.proposal_layer'
 510 |     layer: 'ProposalLayer'
 511 |     param_str: "'feat_stride': 16"
 512 |   }
 513 | }
 514 | 
 515 | #layer {
 516 | #  name: 'debug-data'
 517 | #  type: 'Python'
 518 | #  bottom: 'data'
 519 | #  bottom: 'rpn_rois'
 520 | #  bottom: 'rpn_scores'
 521 | #  python_param {
 522 | #    module: 'rpn.debug_layer'
 523 | #    layer: 'RPNDebugLayer'
 524 | #  }
 525 | #}
 526 | 
 527 | layer {
 528 |   name: 'roi-data'
 529 |   type: 'Python'
 530 |   bottom: 'rpn_rois'
 531 |   bottom: 'gt_boxes'
 532 |   top: 'rois'
 533 |   top: 'labels'
 534 |   top: 'bbox_targets'
 535 |   top: 'bbox_inside_weights'
 536 |   top: 'bbox_outside_weights'
 537 |   python_param {
 538 |     module: 'rpn.proposal_target_layer'
 539 |     layer: 'ProposalTargetLayer'
 540 |     param_str: "'num_classes': 2"
 541 |   }
 542 | }
 543 | 
 544 | 
 545 | 
 546 | ##########################
 547 | ## Readonly RoI Network ##
 548 | ######### Start ##########
 549 | layer {
 550 |   name: "roi_pool5_readonly"
 551 |   type: "ROIPooling"
 552 |   bottom: "conv5_3"
 553 |   bottom: "rois"
 554 |   top: "pool5_readonly"
 555 |   propagate_down: false
 556 |   propagate_down: false
 557 |   roi_pooling_param {
 558 |     pooled_w: 7
 559 |     pooled_h: 7
 560 |     spatial_scale: 0.0625 # 1/16
 561 |   }
 562 | }
 563 | layer {
 564 |   name: "roi_pool5"
 565 |   type: "ROIPooling"
 566 |   bottom: "conv5_3"
 567 |   bottom: "rois"
 568 |   top: "roi_pool5"
 569 |   propagate_down: true
 570 |   propagate_down: false
 571 |   roi_pooling_param {
 572 |     pooled_w: 7
 573 |     pooled_h: 7
 574 |     spatial_scale: 0.0625 # 1/16
 575 |   }
 576 | }
 577 | 
 578 | #### mask branch 
 579 | 
 580 | layer {
 581 |   name: "conv6_mask"
 582 |   type: "Convolution"
 583 |   bottom: "pool5_readonly"
 584 |   top: "conv6_mask"
 585 |   param {
 586 |     lr_mult: 0
 587 |     decay_mult: 0
 588 |   }
 589 |   param {
 590 |     lr_mult: 0
 591 |     decay_mult: 0
 592 |   }
 593 |   convolution_param {
 594 |     num_output: 256
 595 |     pad: 1
 596 |     kernel_size: 3
 597 | 
 598 |     weight_filler {
 599 |       type: "msra"
 600 |     }
 601 |     bias_filler {
 602 |       type: "constant"
 603 |       value: 0
 604 |     }
 605 | 
 606 |   }
 607 | }
 608 | layer {
 609 |   name: "relu6_mask"
 610 |   type: "ReLU"
 611 |   bottom: "conv6_mask"
 612 |   top: "conv6_mask"
 613 | }
 614 | layer {
 615 |   name: "conv7_mask"
 616 |   type: "Convolution"
 617 |   bottom: "conv6_mask"
 618 |   top: "conv7_mask"
 619 |   param {
 620 |     lr_mult: 0
 621 |     decay_mult: 0
 622 |   }
 623 |   param {
 624 |     lr_mult: 0
 625 |     decay_mult: 0
 626 |   }
 627 |   convolution_param {
 628 |     num_output: 256
 629 |     pad: 1
 630 |     kernel_size: 3
 631 | 
 632 |     weight_filler {
 633 |       type: "msra"
 634 |     }
 635 |     bias_filler {
 636 |       type: "constant"
 637 |       value: 0
 638 |     }
 639 | 
 640 |   }
 641 | }
 642 | layer {
 643 |   name: "relu7_mask"
 644 |   type: "ReLU"
 645 |   bottom: "conv7_mask"
 646 |   top: "conv7_mask"
 647 | }
 648 | 
 649 | layer {
 650 |   name: "conv8_mask"
 651 |   type: "Convolution"
 652 |   bottom: "conv7_mask"
 653 |   top: "conv8_mask"
 654 |   param {
 655 |     lr_mult: 0
 656 |     decay_mult: 0
 657 |   }
 658 |   param {
 659 |     lr_mult: 0
 660 |     decay_mult: 0
 661 |   }
 662 |   convolution_param {
 663 |     num_output: 256
 664 |     pad: 1
 665 |     kernel_size: 3
 666 | 
 667 |     weight_filler {
 668 |       type: "msra"
 669 |     }
 670 |     bias_filler {
 671 |       type: "constant"
 672 |       value: 0
 673 |     }
 674 | 
 675 |   }
 676 | }
 677 | layer {
 678 |   name: "relu8_mask"
 679 |   type: "ReLU"
 680 |   bottom: "conv8_mask"
 681 |   top: "conv8_mask"
 682 | }
 683 | 
 684 | layer 
 685 | {
 686 |   name: "eltwise_layer"
 687 |   type: "Eltwise"
 688 |   bottom: "conv6_mask"
 689 |   bottom: "conv8_mask"
 690 |   top: "eltwise"
 691 |   eltwise_param {
 692 |     operation: SUM
 693 |   }
 694 | }
 695 | 
 696 | layer {
 697 |   name: "conv9_mask"
 698 |   type: "Convolution"
 699 |   bottom: "eltwise"
 700 |   top: "conv9_mask"
 701 |   param {
 702 |     lr_mult: 0
 703 |     decay_mult: 0
 704 |   }
 705 |   param {
 706 |     lr_mult: 0
 707 |     decay_mult: 0
 708 |   }
 709 |   convolution_param {
 710 |     num_output: 256
 711 |     pad: 1
 712 |     kernel_size: 3
 713 | 
 714 |     weight_filler {
 715 |       type: "msra"
 716 |     }
 717 |     bias_filler {
 718 |       type: "constant"
 719 |       value: 0
 720 |     }
 721 | 
 722 |   }
 723 | }
 724 | layer {
 725 |   name: "relu9_mask"
 726 |   type: "ReLU"
 727 |   bottom: "conv9_mask"
 728 |   top: "conv9_mask"
 729 | }
 730 | 
 731 | layer {
 732 |   name: "conv10_mask_gen"
 733 |   type: "Convolution"
 734 |   bottom: "conv9_mask"
 735 |   top: "conv10_mask_gen"
 736 |   param {
 737 |     lr_mult: 0
 738 |     decay_mult: 0
 739 |   }
 740 |   param {
 741 |     lr_mult: 0
 742 |     decay_mult: 0
 743 |   }
 744 |   convolution_param {
 745 |     num_output: 1
 746 |     pad: 1
 747 |     kernel_size: 3
 748 | 
 749 |     weight_filler {
 750 |       type: "msra"
 751 |     }
 752 |     bias_filler {
 753 |       type: "constant"
 754 |       value: 0
 755 |     }
 756 |   }
 757 | }
 758 | #layer {
 759 | #  name: "sigmoid_gen"
 760 | #  type: "Sigmoid"
 761 | #  bottom: "conv10_mask_gen"
 762 | #  top: "mask_gen"
 763 | #}
 764 | layer {
 765 |   name: "gen_layer"
 766 |   type: "Python"
 767 |   bottom: "conv10_mask_gen"
 768 |   bottom: "gt_mask"
 769 |   top: "mask_gen_tile"
 770 |   top: "mask_gen_thres"
 771 |   top: "mask_for_loss"
 772 |   propagate_down: false
 773 |   propagate_down: false
 774 |   python_param {
 775 |     module: "roi_data_layer.layer"
 776 |     layer: "TileLayer2"
 777 |     param_str: "{\'channels\': 512,\'permute_count\': 20,\'count_drop\': 15,\'iter_size\': 5,'maintain_before\': 1}" 
 778 |   }
 779 | }
 780 | 
 781 | layer{
 782 |   name:"mask_prod"
 783 |   type:"Eltwise"
 784 |   bottom:"mask_gen_tile"
 785 |   bottom:"roi_pool5"
 786 |   top:"cls_branch"
 787 |   eltwise_param {
 788 |       operation:PROD
 789 |   }
 790 | }
 791 | 
 792 | 
 793 | #### classification branch 
 794 | 
 795 | layer {
 796 |   name: "fc6"
 797 |   type: "InnerProduct"
 798 |   bottom: "cls_branch"
 799 |   top: "fc6"
 800 |   param {
 801 |     lr_mult: 1
 802 |   }
 803 |   param {
 804 |     lr_mult: 2
 805 |   }
 806 |   propagate_down: true
 807 |   inner_product_param {
 808 |     num_output: 4096
 809 |   }
 810 | }
 811 | layer {
 812 |   name: "relu6"
 813 |   type: "ReLU"
 814 |   bottom: "fc6"
 815 |   top: "fc6"
 816 | }
 817 | layer {
 818 |   name: "drop6"
 819 |   type: "Dropout"
 820 |   bottom: "fc6"
 821 |   top: "fc6"
 822 |   dropout_param {
 823 |     dropout_ratio: 0.5
 824 |   }
 825 | }
 826 | layer {
 827 |   name: "fc7"
 828 |   type: "InnerProduct"
 829 |   bottom: "fc6"
 830 |   top: "fc7"
 831 |   param {
 832 |     lr_mult: 1
 833 |   }
 834 |   param {
 835 |     lr_mult: 2
 836 |   }
 837 |   propagate_down: true
 838 |   inner_product_param {
 839 |     num_output: 4096
 840 |   }
 841 | }
 842 | layer {
 843 |   name: "relu7"
 844 |   type: "ReLU"
 845 |   bottom: "fc7"
 846 |   top: "fc7"
 847 | }
 848 | layer {
 849 |   name: "drop7"
 850 |   type: "Dropout"
 851 |   bottom: "fc7"
 852 |   top: "fc7"
 853 |   dropout_param {
 854 |     dropout_ratio: 0.5
 855 |   }
 856 | }
 857 | layer {
 858 |   name: "cls_score"
 859 |   type: "InnerProduct"
 860 |   bottom: "fc7"
 861 |   top: "cls_score"
 862 |   param {
 863 |     lr_mult: 1
 864 |   }
 865 |   param {
 866 |     lr_mult: 2
 867 |   }
 868 |   propagate_down: true
 869 |   inner_product_param {
 870 |     num_output: 2
 871 |     weight_filler {
 872 |       type: "gaussian"
 873 |       std: 0.01
 874 |     }
 875 |     bias_filler {
 876 |       type: "constant"
 877 |       value: 0
 878 |     }
 879 |   }
 880 | }
 881 | 
 882 | layer {
 883 |   name: "loss_cls"
 884 |   type: "SoftmaxWithLoss"
 885 |   bottom: "cls_score"
 886 |   bottom: "labels"
 887 |   top: "loss_cls"
 888 |   propagate_down: true
 889 |   propagate_down: false
 890 |   loss_weight: 1
 891 | }
 892 | 
 893 | layer {
 894 |   name: "bbox_pred"
 895 |   type: "InnerProduct"
 896 |   bottom: "fc7"
 897 |   top: "bbox_pred"
 898 |   param {
 899 |     name: "bbox_pred_w"
 900 |     lr_mult: 1
 901 |     decay_mult: 1
 902 |   }
 903 |   param {
 904 |     name: "bbox_pred_b"
 905 |     lr_mult: 2
 906 |     decay_mult: 0
 907 |   }
 908 |   inner_product_param {
 909 |     num_output: 8
 910 |     weight_filler {
 911 |       type: "gaussian"
 912 |       std: 0.001
 913 |     }
 914 |     bias_filler {
 915 |       type: "constant"
 916 |       value: 0
 917 |     }
 918 |   }
 919 | }
 920 | layer {
 921 |   name: "loss_bbox"
 922 |   type: "SmoothL1Loss"
 923 |   bottom: "bbox_pred"
 924 |   bottom: "bbox_targets"
 925 |   bottom: "bbox_inside_weights"
 926 |   bottom: "bbox_outside_weights"
 927 |   top: "loss_bbox"
 928 |   propagate_down: true
 929 |   propagate_down: false
 930 |   propagate_down: false
 931 |   propagate_down: false
 932 |   loss_weight: 1
 933 | }
 934 | #layer {
 935 | #  name: "center_loss"
 936 | #  type: "CenterLoss"
 937 | #  bottom: "fc6"
 938 | #  bottom: "labels"
 939 | #  top: "center_loss"
 940 | #  propagate_down:true
 941 | #  propagate_down:false
 942 | #  param {
 943 | #    lr_mult: 1
 944 | #    decay_mult: 2 
 945 | #  }
 946 | #  center_loss_param {
 947 | #    num_output: 4096
 948 | #    center_filler {
 949 | #      type: "xavier"
 950 | #    }
 951 | #  }
 952 | #  loss_weight: 0.005
 953 | #}
 954 | layer {
 955 |   name: "cls_prob"
 956 |   type: "Softmax"
 957 |   bottom: "cls_score"
 958 |   top: "cls_prob"
 959 | }
 960 | layer {
 961 |   name: "SiftFaceLayer"
 962 |   type: "Python"
 963 |   bottom: "pool4"
 964 |   bottom: "bbox_pred"
 965 |   bottom: "cls_prob"
 966 |   bottom: "gt_mask"
 967 |   bottom: "rois"
 968 |   bottom: "im_info"
 969 |   top: "onlyface"
 970 |   top: "gt_mask_fg"
 971 |   propagate_down: true
 972 |   propagate_down: false
 973 |   propagate_down: false
 974 |   propagate_down: false
 975 |   propagate_down: false
 976 |   propagate_down: false 
 977 |   python_param {
 978 |     module: "roi_data_layer.layer2"
 979 |     layer: "SiftFaceLayer" 
 980 |   }
 981 | }
 982 | layer {
 983 |   name: "fc6_pmask"
 984 |   type: "Convolution"
 985 |   bottom: "onlyface"
 986 |   top: "fc6_pmask"
 987 |   param {
 988 |     lr_mult: 1
 989 |     decay_mult: 1
 990 |   }
 991 |   param {
 992 |     lr_mult: 2
 993 |     decay_mult: 0
 994 |   }
 995 |   convolution_param {
 996 |     num_output: 4096
 997 |     pad: 2
 998 |     kernel_size: 3
 999 |     stride: 1
1000 |     weight_filler {
1001 |       type: "msra"
1002 |     }
1003 |     bias_filler {
1004 |       type: "constant"
1005 |       value: 0
1006 |     }
1007 |   }
1008 | }
1009 | layer {
1010 |   name: "relu6_pmask"
1011 |   type: "ReLU"
1012 |   bottom: "fc6_pmask"
1013 |   top: "fc6_pmask"
1014 | }
1015 | layer {
1016 |   name: "drop6_pmask"
1017 |   type: "Dropout"
1018 |   bottom: "fc6_pmask"
1019 |   top: "fc6_pmask"
1020 |   dropout_param {
1021 |     dropout_ratio: 0.5
1022 |   }
1023 | }
1024 | layer {
1025 |   name: "fc7_pmask"
1026 |   type: "Convolution"
1027 |   bottom: "fc6_pmask"
1028 |   top: "fc7_pmask"
1029 |   param {
1030 |     lr_mult: 1
1031 |     decay_mult: 1
1032 |   }
1033 |   param {
1034 |     lr_mult: 2
1035 |     decay_mult: 0
1036 |   }
1037 |   convolution_param {
1038 |     num_output: 4096
1039 |     pad: 0
1040 |     kernel_size: 1
1041 |     stride: 1
1042 |     weight_filler {
1043 |       type: "msra"
1044 |     }
1045 |     bias_filler {
1046 |       type: "constant"
1047 |       value: 0
1048 |     }
1049 |   }
1050 | }
1051 | layer {
1052 |   name: "relu7_pmask"
1053 |   type: "ReLU"
1054 |   bottom: "fc7_pmask"
1055 |   top: "fc7_pmask"
1056 | }
1057 | layer {
1058 |   name: "drop7_pmask"
1059 |   type: "Dropout"
1060 |   bottom: "fc7_pmask"
1061 |   top: "fc7_pmask"
1062 |   dropout_param {
1063 |     dropout_ratio: 0.5
1064 |   }
1065 | }
1066 | layer {
1067 |   name: "score_fr_fg"
1068 |   type: "Convolution"
1069 |   bottom: "fc7_pmask"
1070 |   top: "score_fr_fg"
1071 |   param {
1072 |     lr_mult: 1
1073 |     decay_mult: 1
1074 |   }
1075 |   param {
1076 |     lr_mult: 2
1077 |     decay_mult: 0
1078 |   }
1079 |   convolution_param {
1080 |     num_output: 2
1081 |     pad: 0
1082 |     kernel_size: 1
1083 |     weight_filler {
1084 |       type: "msra"
1085 |     }
1086 |     bias_filler {
1087 |       type: "constant"
1088 |       value: 0
1089 |     }
1090 |   }
1091 | }
1092 | # layer {
1093 | #   name: "upscore2_fg"
1094 | #   type: "Deconvolution"
1095 | #   bottom: "score_fr_fg"
1096 | #   top: "upscore2_fg"
1097 | #   param {
1098 | #     lr_mult: 0
1099 | #   }
1100 | #   convolution_param {
1101 | #     num_output: 33
1102 | #     bias_term: false
1103 | #     kernel_size: 4
1104 | #     stride: 2
1105 | #   }
1106 | # }
1107 | # layer {
1108 | #   name: "score_pool4_fg"
1109 | #   type: "Convolution"
1110 | #   bottom: "pool4"
1111 | #   top: "score_pool4_fg"
1112 | #   param {
1113 | #     lr_mult: 1
1114 | #     decay_mult: 1
1115 | #   }
1116 | #   param {
1117 | #     lr_mult: 2
1118 | #     decay_mult: 0
1119 | #   }
1120 | #   convolution_param {
1121 | #     num_output: 33
1122 | #     pad: 0
1123 | #     kernel_size: 1
1124 | #   }
1125 | # }
1126 | # layer {
1127 | #   name: "score_pool4_fgc"
1128 | #   type: "Crop"
1129 | #   bottom: "score_pool4_fg"
1130 | #   bottom: "upscore2_fg"
1131 | #   top: "score_pool4_fgc"
1132 | #   crop_param {
1133 | #     axis: 2
1134 | #     offset: 0
1135 | #   }
1136 | # }
1137 | # layer {
1138 | #   name: "fuse_pool4_fg"
1139 | #   type: "Eltwise"
1140 | #   bottom: "upscore2_fg"
1141 | #   bottom: "score_pool4_fgc"
1142 | #   top: "fuse_pool4_fg"
1143 | #   eltwise_param {
1144 | #     operation: SUM
1145 | #   }
1146 | # }
1147 | # layer {
1148 | #   name: "upscore_pool4_fg"
1149 | #   type: "Deconvolution"
1150 | #   bottom: "fuse_pool4_fg"
1151 | #   top: "upscore_pool4_fg"
1152 | #   param {
1153 | #     lr_mult: 0
1154 | #   }
1155 | #   convolution_param {
1156 | #     num_output: 33
1157 | #     bias_term: false
1158 | #     kernel_size: 4
1159 | #     stride: 2
1160 | #   }
1161 | # }
1162 | # layer {
1163 | #   name: "score_pool3_fg"
1164 | #   type: "Convolution"
1165 | #   bottom: "pool3"
1166 | #   top: "score_pool3_fg"
1167 | #   param {
1168 | #     lr_mult: 1
1169 | #     decay_mult: 1
1170 | #   }
1171 | #   param {
1172 | #     lr_mult: 2
1173 | #     decay_mult: 0
1174 | #   }
1175 | #   convolution_param {
1176 | #     num_output: 33
1177 | #     pad: 0
1178 | #     kernel_size: 1
1179 | #   }
1180 | # }
1181 | # layer {
1182 | #   name: "score_pool3_fgc"
1183 | #   type: "Crop"
1184 | #   bottom: "score_pool3_fg"
1185 | #   bottom: "upscore_pool4_fg"
1186 | #   top: "score_pool3_fgc"
1187 | #   crop_param {
1188 | #     axis: 2
1189 | #     offset: 0
1190 | #   }
1191 | # }
1192 | # layer {
1193 | #   name: "fuse_pool3_fg"
1194 | #   type: "Eltwise"
1195 | #   bottom: "upscore_pool4_fg"
1196 | #   bottom: "score_pool3_fgc"
1197 | #   top: "fuse_pool3_fg"
1198 | #   eltwise_param {
1199 | #     operation: SUM
1200 | #   }
1201 | # }
1202 | layer {
1203 |   name: "upscore8_fg"
1204 |   type: "Deconvolution"
1205 |   bottom: "score_fr_fg"
1206 |   top: "upscore8_fg"
1207 |   param {
1208 |     lr_mult: 1
1209 |     decay_mult: 1
1210 |   }
1211 |   convolution_param {
1212 |     num_output: 2
1213 |     bias_term: false
1214 |     kernel_size: 32
1215 |     stride: 16
1216 | weight_filler {
1217 | type: "xavier"
1218 | }
1219 | bias_filler {
1220 | type: "constant"
1221 | value: 0
1222 | }
1223 |   }
1224 | }
1225 | #layer {
1226 | #  name: "score_fg_vis"
1227 | #  type: "Crop"
1228 | #  bottom: "upscore8_fg"
1229 | #  bottom: "data"
1230 | #  top: "score_fg_vis"
1231 | #  crop_param {
1232 | #    axis: 2
1233 | #    offset: 5
1234 | #  }
1235 | #}
1236 | 
1237 | layer {
1238 |   name: "score_fg"
1239 |   type: "Crop"
1240 |   bottom: "upscore8_fg"
1241 |   bottom: "data"
1242 |   top: "score_fg"
1243 |   crop_param {
1244 |     axis: 2
1245 |     offset: 12
1246 |   }
1247 | }
1248 | layer {
1249 |   name: "loss"
1250 |   type: "SoftmaxWithLoss"
1251 |   bottom: "score_fg"
1252 |   bottom: "gt_mask_fg"
1253 |   top: "loss"
1254 |   propagate_down: true
1255 |   propagate_down: false
1256 |   loss_param {
1257 |     ignore_label: 255
1258 |     normalize: false
1259 |   }
1260 |   loss_weight:0
1261 | }
1262 | layer{
1263 |   name:"silence1"
1264 |   type:"Silence"
1265 |   bottom:"mask_for_loss"
1266 | }
1267 | layer{
1268 |   name:"silence1"
1269 |   type:"Silence"
1270 |   bottom:"mask_gen_thres"
1271 | } 
1272 | #layer{
1273 | #  name:"silence1"
1274 | #  type:"Silence"
1275 | #  bottom:"fuse_pool3_fg"
1276 | #}
1277 | 


--------------------------------------------------------------------------------
/layer.py:
--------------------------------------------------------------------------------
   1 | #only modify siftfacelayer
   2 | # --------------------------------------------------------
   3 | # Fast R-CNN
   4 | # Copyright (c) 2015 Microsoft
   5 | # Licensed under The MIT License [see LICENSE for details]
   6 | # Written by Ross Girshick
   7 | # --------------------------------------------------------
   8 | 
   9 | """The data layer used during training to train a Fast R-CNN network.
  10 | 
  11 | RoIDataLayer implements a Caffe Python layer.
  12 | """
  13 | import pickle
  14 | import caffe
  15 | from fast_rcnn.config import cfg
  16 | from roi_data_layer.minibatch import get_minibatch
  17 | import numpy as np
  18 | import yaml
  19 | from multiprocessing import Process, Queue
  20 | from fast_rcnn.nms_wrapper import nms
  21 | from fast_rcnn.bbox_transform import clip_boxes, bbox_transform_inv
  22 | 
  23 | 
  24 | class RoIDataLayer(caffe.Layer):
  25 |     def _shuffle_roidb_inds(self):
  26 |         """Randomly permute the training roidb."""
  27 |         if cfg.TRAIN.ASPECT_GROUPING:
  28 |             widths = np.array([r['width'] for r in self._roidb])
  29 |             heights = np.array([r['height'] for r in self._roidb])
  30 |             horz = (widths >= heights)
  31 |             vert = np.logical_not(horz)
  32 |             horz_inds = np.where(horz)[0]
  33 |             vert_inds = np.where(vert)[0]
  34 |             inds = np.hstack((
  35 |                 np.random.permutation(horz_inds),
  36 |                 np.random.permutation(vert_inds)))
  37 |             inds = np.reshape(inds, (-1, 2))
  38 |             row_perm = np.random.permutation(np.arange(inds.shape[0]))
  39 |             inds = np.reshape(inds[row_perm, :], (-1,))
  40 |             self._perm = inds
  41 |         else:
  42 |             self._perm = np.random.permutation(np.arange(len(self._roidb)))
  43 |         self._cur = 0
  44 | 
  45 |     def _get_next_minibatch_inds(self):
  46 |         """Return the roidb indices for the next minibatch."""
  47 |         if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
  48 |             self._shuffle_roidb_inds()
  49 | 
  50 |         db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
  51 |         self._cur += cfg.TRAIN.IMS_PER_BATCH
  52 |         return db_inds
  53 | 
  54 |     def _get_next_minibatch(self):
  55 |         """Return the blobs to be used for the next minibatch.
  56 | 
  57 |         If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a
  58 |         separate process and made available through self._blob_queue.
  59 |         """
  60 |         if cfg.TRAIN.USE_PREFETCH:
  61 |             return self._blob_queue.get()
  62 |         else:
  63 |             db_inds = self._get_next_minibatch_inds()
  64 |             minibatch_db = [self._roidb[i] for i in db_inds]
  65 |             return get_minibatch(minibatch_db, self._num_classes)
  66 | 
  67 |     def set_roidb(self, roidb):
  68 |         """Set the roidb to be used by this layer during training."""
  69 |         self._roidb = roidb
  70 |         self._shuffle_roidb_inds()
  71 |         if cfg.TRAIN.USE_PREFETCH:
  72 |             self._blob_queue = Queue(10)
  73 |             self._prefetch_process = BlobFetcher(self._blob_queue,
  74 |                                                  self._roidb,
  75 |                                                  self._num_classes)
  76 |             self._prefetch_process.start()
  77 |             # Terminate the child process when the parent exists
  78 |             def cleanup():
  79 |                 print 'Terminating BlobFetcher'
  80 |                 self._prefetch_process.terminate()
  81 |                 self._prefetch_process.join()
  82 |             import atexit
  83 |             atexit.register(cleanup)
  84 | 
  85 |     def setup(self, bottom, top):
  86 |         """Setup the RoIDataLayer."""
  87 | 
  88 |         # parse the layer parameter string, which must be valid YAML
  89 |         layer_params = yaml.load(self.param_str_)
  90 | 
  91 |         self._num_classes = layer_params['num_classes']
  92 | 
  93 |         self._name_to_top_map = {}
  94 | 
  95 |         # data blob: holds a batch of N images, each with 3 channels
  96 |         idx = 0
  97 |         top[idx].reshape(cfg.TRAIN.IMS_PER_BATCH, 3,
  98 |             max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE)
  99 |         self._name_to_top_map['data'] = idx
 100 |         idx += 1
 101 | 
 102 |         if cfg.TRAIN.HAS_RPN:
 103 |             top[idx].reshape(1, 3)
 104 |             self._name_to_top_map['im_info'] = idx
 105 |             idx += 1
 106 | 
 107 |             top[idx].reshape(1, 4)
 108 |             self._name_to_top_map['gt_boxes'] = idx
 109 |             idx += 1
 110 | 
 111 |             top[idx].reshape(cfg.TRAIN.IMS_PER_BATCH, 1,
 112 |             max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE)
 113 |             self._name_to_top_map['gt_mask'] = idx
 114 |             idx += 1
 115 |         else: # not using RPN
 116 |             # rois blob: holds R regions of interest, each is a 5-tuple
 117 |             # (n, x1, y1, x2, y2) specifying an image batch index n and a
 118 |             # rectangle (x1, y1, x2, y2)
 119 |             top[idx].reshape(1, 5)
 120 |             self._name_to_top_map['rois'] = idx
 121 |             idx += 1
 122 | 
 123 |             # labels blob: R categorical labels in [0, ..., K] for K foreground
 124 |             # classes plus background
 125 |             top[idx].reshape(1)
 126 |             self._name_to_top_map['labels'] = idx
 127 |             idx += 1
 128 | 
 129 |             if cfg.TRAIN.BBOX_REG:
 130 |                 # bbox_targets blob: R bounding-box regression targets with 4
 131 |                 # targets per class
 132 |                 top[idx].reshape(1, self._num_classes * 4)
 133 |                 self._name_to_top_map['bbox_targets'] = idx
 134 |                 idx += 1
 135 | 
 136 |                 # bbox_inside_weights blob: At most 4 targets per roi are active;
 137 |                 # thisbinary vector sepcifies the subset of active targets
 138 |                 top[idx].reshape(1, self._num_classes * 4)
 139 |                 self._name_to_top_map['bbox_inside_weights'] = idx
 140 |                 idx += 1
 141 | 
 142 |                 top[idx].reshape(1, self._num_classes * 4)
 143 |                 self._name_to_top_map['bbox_outside_weights'] = idx
 144 |                 idx += 1
 145 | 
 146 |         print 'RoiDataLayer: name_to_top:', self._name_to_top_map
 147 |         assert len(top) == len(self._name_to_top_map)
 148 | 
 149 |     def forward(self, bottom, top):
 150 |         """Get blobs and copy them into this layer's top blob vector."""
 151 |         blobs = self._get_next_minibatch()
 152 | 
 153 |         for blob_name, blob in blobs.iteritems():
 154 |             top_ind = self._name_to_top_map[blob_name]
 155 |             # Reshape net's input blobs
 156 |             top[top_ind].reshape(*(blob.shape))
 157 |             # Copy data into net's input blobs
 158 |             top[top_ind].data[...] = blob.astype(np.float32, copy=False)
 159 | 
 160 |         #print('data blob shape: ', top[0].data.shape)
 161 |         #print('mask blob shape: ', top[3].data.shape)
 162 | 
 163 |     def backward(self, top, propagate_down, bottom):
 164 |         """This layer does not propagate gradients."""
 165 |         pass
 166 | 
 167 |     def reshape(self, bottom, top):
 168 |         """Reshaping happens during the call to forward."""
 169 |         pass
 170 | 
 171 | class BlobFetcher(Process):
 172 |     """Experimental class for prefetching blobs in a separate process."""
 173 |     def __init__(self, queue, roidb, num_classes):
 174 |         super(BlobFetcher, self).__init__()
 175 |         self._queue = queue
 176 |         self._roidb = roidb
 177 |         self._num_classes = num_classes
 178 |         self._perm = None
 179 |         self._cur = 0
 180 |         self._shuffle_roidb_inds()
 181 |         # fix the random seed for reproducibility
 182 |         np.random.seed(cfg.RNG_SEED)
 183 | 
 184 |     def _shuffle_roidb_inds(self):
 185 |         """Randomly permute the training roidb."""
 186 |         # TODO(rbg): remove duplicated code
 187 |         self._perm = np.random.permutation(np.arange(len(self._roidb)))
 188 |         self._cur = 0
 189 | 
 190 |     def _get_next_minibatch_inds(self):
 191 |         """Return the roidb indices for the next minibatch."""
 192 |         # TODO(rbg): remove duplicated code
 193 |         if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
 194 |             self._shuffle_roidb_inds()
 195 | 
 196 |         db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
 197 |         self._cur += cfg.TRAIN.IMS_PER_BATCH
 198 |         return db_inds
 199 | 
 200 |     def run(self):
 201 |         print 'BlobFetcher started'
 202 |         while True:
 203 |             db_inds = self._get_next_minibatch_inds()
 204 |             minibatch_db = [self._roidb[i] for i in db_inds]
 205 |             blobs = get_minibatch(minibatch_db, self._num_classes)
 206 |             self._queue.put(blobs)
 207 | 
 208 | 
 209 | 
 210 | 
 211 | 
 212 | 
 213 | class TileLayer(caffe.Layer):
 214 |     def setup(self, bottom, top):
 215 |         """Setup the TileLayer."""
 216 | 
 217 |         # parse the layer parameter string, which must be valid YAML
 218 |         layer_params = yaml.load(self.param_str_)
 219 | 
 220 |         self._channels = layer_params['channels']
 221 |         self._count_drop  = layer_params['count_drop']
 222 |         self._permute_count  = layer_params['permute_count']
 223 | 
 224 |         self._iter_size = layer_params['iter_size']
 225 |         self._maintain_before = layer_params['maintain_before'] # maintain the first image unchanged 
 226 | 
 227 |         self._count_iter = 0
 228 |         self.cnt = 0
 229 |         self._name_to_bottom_map = {
 230 |             'mask_pred': 0 }
 231 | 
 232 |         # 0 means block, 1 means maintain 
 233 | 
 234 |         self._name_to_top_map = {
 235 |             'mask_pred_tile': 0 ,
 236 |             'mask_pred_thres':1,
 237 |             'mask_inv':2}
 238 | 
 239 | 
 240 |         # top[0].reshape(*(bottom[0].data.shape))
 241 |         top[0].reshape(bottom[0].data.shape[0], self._channels, bottom[0].data.shape[2], bottom[0].data.shape[3])
 242 |         top[1].reshape(bottom[0].data.shape[0], 1, bottom[0].data.shape[2], bottom[0].data.shape[3])
 243 |         top[2].reshape(bottom[0].data.shape[0], 1, bottom[0].data.shape[2], bottom[0].data.shape[3])
 244 | 
 245 |         print 'TileLayer: name_to_top:', self._name_to_top_map
 246 |         assert len(top) == len(self._name_to_top_map)
 247 | 
 248 |     def select_mask(self, mask_pred):
 249 | #1 means block in the input
 250 |         self.cnt = 0
 251 |         pool_len = mask_pred.shape[2]
 252 |         sample_num = mask_pred.shape[0]
 253 | 
 254 |         mask_pixels = pool_len * pool_len
 255 | 
 256 |         count_drop = self._count_drop #15
 257 |         permute_count = self._permute_count #20
 258 | 
 259 |         mask_sel = np.ones((sample_num, 1, pool_len, pool_len))
 260 |         mask_for_loss = np.ones((sample_num, 1, pool_len, pool_len))
 261 |         for i in range(sample_num):
 262 | 
 263 |             #not exactly as it mentioned in the paper
 264 |             #15/49 = 1/3 are selected as 0
 265 |             #first choose top 20 lowest predicted pixels in mask (trained in stage 2)
 266 |             #randomly choose 15 of 20 pixels to set zero
 267 |             rp = np.random.permutation(np.arange(permute_count))
 268 |             rp = rp[0: count_drop]
 269 | 
 270 |             final_mask = np.ones(mask_pixels)
 271 | 
 272 |             now_mask_pred = mask_pred[i]
 273 |             now_mask_pred_array = np.reshape(now_mask_pred, mask_pixels)
 274 |             #convert the mask to an array and sort it ascendingly with the pixel value
 275 |             sorted_ids = np.argsort(now_mask_pred_array) 
 276 |             now_ids = sorted_ids[rp]
 277 | 
 278 |             sel = np.zeros(mask_pixels)
 279 |             sel[now_ids] = 1
 280 |             _final_mask = sel * now_mask_pred_array
 281 |  	    if i==10000:
 282 |             #use this method later
 283 |             final_mask[np.where(_final_mask==0)] = 1
 284 | 	        final_mask[np.where(_final_mask!=0)] = 0
 285 | 	    #have to try this first, finding thr most important part to mask.
 286 |             #final_mask[np.where(final_mask!=0)] = 1
 287 | 
 288 |             now_mask = np.reshape(final_mask, (pool_len, pool_len))
 289 | 	        _final_mask[np.where(_final_mask==0)] = 1
 290 |             _now_mask = np.reshape(_final_mask, (pool_len, pool_len))
 291 |             if self.cnt==0:
 292 | 	        #print(sel)
 293 | 		self.cnt += 1
 294 | 	        print(now_mask)
 295 | 
 296 |             mask_sel[i,0,:,:] = np.copy(now_mask)
 297 |             mask_for_loss[i,0,:,:] = np.copy(_now_mask)
 298 |         return mask_sel,mask_for_loss
 299 | 
 300 |     def forward(self, bottom, top):
 301 | 
 302 | #1 means block!!
 303 |         mask_pred = np.copy(bottom[0].data)
 304 |         sample_num = mask_pred.shape[0]
 305 |         pool_len = mask_pred.shape[2]
 306 | 
 307 | 
 308 |         self._count_iter = (self._count_iter + 1) % self._iter_size
 309 |         if self._count_iter >= self._maintain_before:        
 310 |             mask_sel,mask_for_loss = self.select_mask(mask_pred)
 311 |         else:
 312 |             mask_sel = np.ones((sample_num, 1, pool_len, pool_len))
 313 |             mask_for_loss = np.ones((sample_num, 1, pool_len, pool_len))
 314 | 
 315 | 
 316 | 	#print(mask_sel[0,0,:,:])
 317 |         mask_pred_tile = np.tile(mask_sel, [1, self._channels, 1, 1])
 318 | 
 319 |         mask_inv = np.abs(1-mask_sel)
 320 |         #print(mask_sel[0,0,:,:])                                 
 321 |         #print(mask_inv[0,0,:,:])                                                                                              
 322 |         top_ind = self._name_to_top_map['mask_pred_tile']
 323 |         top[top_ind].reshape(*(mask_pred_tile.shape))
 324 |         top[top_ind].data[...] = mask_pred_tile.astype(np.float32, copy=False)
 325 | 
 326 |         top_ind = self._name_to_top_map['mask_pred_thres']
 327 |         top[top_ind].reshape(*(mask_sel.shape))
 328 |         top[top_ind].data[...] = mask_sel.astype(np.float32, copy=False)
 329 | 
 330 |         top_ind = self._name_to_top_map['mask_inv']
 331 |         top[top_ind].reshape(*(mask_inv.shape))
 332 |         top[top_ind].data[...] = mask_inv.astype(np.float32, copy=False)
 333 |         
 334 | 
 335 |     def backward(self, top, propagate_down, bottom):
 336 |         top_0_diff = np.zeros(np.shape(top[2].diff))
 337 |         top_0_diff[:,0,:,:] = np.mean(top[0].diff, axis=1)
 338 |         #bottom[0].diff[...] *= top[1].diff
 339 |         bottom[0].diff[...] = (top[2].diff + top_0_diff)
 340 |         #print("\n\n\n\nbottom[0].diff:")
 341 |       #  print('inv_diff: \n',top[2].diff[0,0,:,:])
 342 |       #  print('cls_diff: \n',top_0_diff[0,0,:,:])
 343 |         #print(top_0_diff.shape) 
 344 |         #tile_diff = np.tile(top[0].diff, [1, 1, 1, 1])
 345 |         #bottom[0].diff[...] *= (tile_diff + top[1].diff)
 346 | 
 347 |     def reshape(self, bottom, top):
 348 |         """Reshaping happens during the call to forward."""
 349 |         pass
 350 | 
 351 | 
 352 | 
 353 | 
 354 | 
 355 | class TileLayer2(caffe.Layer):
 356 |     def setup(self, bottom, top):
 357 |         """Setup the TileLayer."""
 358 | 
 359 |         # parse the layer parameter string, which must be valid YAML
 360 |         layer_params = yaml.load(self.param_str_)
 361 | 
 362 |         self._channels = layer_params['channels']
 363 |         self._count_drop  = layer_params['count_drop']
 364 |         self._permute_count  = layer_params['permute_count']
 365 | 
 366 |         self._iter_size = layer_params['iter_size']
 367 |         self._maintain_before = layer_params['maintain_before'] # maintain the first image unchanged 
 368 | 
 369 |         self._count_iter = 0
 370 | 
 371 |         self._name_to_bottom_map = {
 372 |             'mask_pred': 0,
 373 |             'gt_mask_fg': 1}
 374 |         # 0 means block, 1 means maintain 
 375 | 
 376 |         self._name_to_top_map = {
 377 |             'mask_pred_tile': 0 ,
 378 |             'mask_pred_thres':1,
 379 |             'mask_inv':2}
 380 | 
 381 | 
 382 |         # top[0].reshape(*(bottom[0].data.shape))
 383 |         top[0].reshape(bottom[0].data.shape[0], self._channels, bottom[0].data.shape[2], bottom[0].data.shape[3])
 384 |         top[1].reshape(bottom[0].data.shape[0], 1, bottom[0].data.shape[2], bottom[0].data.shape[3])
 385 |         top[2].reshape(bottom[0].data.shape[0], 1, bottom[0].data.shape[2], bottom[0].data.shape[3])
 386 | 
 387 |         assert len(top) == len(self._name_to_top_map)
 388 | 
 389 |     def select_mask(self, mask_pred):
 390 | #1 means block in the input
 391 |         cnt = 0
 392 |         pool_len = mask_pred.shape[2]
 393 |         sample_num = mask_pred.shape[0]
 394 | 
 395 |         mask_pixels = pool_len * pool_len
 396 | 
 397 |         count_drop = self._count_drop #15
 398 |         permute_count = self._permute_count #20
 399 | 
 400 |         mask_sel = np.ones((sample_num, 1, pool_len, pool_len))
 401 |         mask_for_loss = np.ones((sample_num, 1, pool_len, pool_len))
 402 |         for i in range(sample_num):
 403 | 
 404 |             #not exactly as it mentioned in the paper
 405 |             #15/49 = 1/3 are selected as 0
 406 |             #first choose top 20 lowest predicted pixels in mask (trained in stage 2)
 407 |             #randomly choose 15 of 20 pixels to set zero
 408 |             rp = np.random.permutation(np.arange(permute_count))
 409 |             rp = rp[0: count_drop]
 410 | 
 411 |             final_mask = np.ones(mask_pixels)
 412 | 
 413 |             now_mask_pred = mask_pred[i]
 414 |             now_mask_pred_array = np.reshape(now_mask_pred, mask_pixels)
 415 |             #convert the mask to an array and sort it ascendingly with the pixel value
 416 |             sorted_ids = np.argsort(now_mask_pred_array) 
 417 |             now_ids = sorted_ids[rp]
 418 | 
 419 |             sel = np.zeros(mask_pixels)
 420 |             sel[now_ids] = 1
 421 |             _final_mask = sel * now_mask_pred_array
 422 |             if i==100000:
 423 |                 print(mask_pred[i,0,:,:])
 424 |             #use this method later
 425 |             final_mask[np.where(_final_mask==0)] = 1
 426 |             final_mask[np.where(_final_mask!=0)] = 0
 427 |             #have to try this first, finding thr most important part to mask.
 428 |             #final_mask[np.where(final_mask!=0)] = 1
 429 | 
 430 |             now_mask = np.reshape(final_mask, (pool_len, pool_len))
 431 |             _final_mask[np.where(_final_mask==0)] = 1
 432 |             _now_mask = np.reshape(_final_mask, (pool_len, pool_len))
 433 |             if cnt==10000000:
 434 |                 print('GT: ')
 435 |                 print(now_mask)
 436 |             cnt += 1
 437 |             mask_sel[i,0,:,:] = np.copy(now_mask)
 438 |             mask_for_loss[i,0,:,:] = np.copy(_now_mask)
 439 |         return mask_sel,mask_for_loss
 440 | 
 441 |     def forward(self, bottom, top):
 442 |         gt_mask_fg = np.copy(bottom[1].data)
 443 |         mask_pred = np.copy(bottom[0].data)
 444 |         sample_num = mask_pred.shape[0]
 445 |         pool_len = mask_pred.shape[2]
 446 |         #print("\n\nN:",sample_num)
 447 |         self._count_iter = (self._count_iter + 1) % self._iter_size#itersize = 5
 448 |         if self._count_iter == 0:        
 449 |             mask_sel,mask_for_loss = self.select_mask(mask_pred)
 450 |         elif self._count_iter == 1:
 451 |             mask_sel = np.ones((sample_num, 1, pool_len, pool_len))
 452 |             for i in range(sample_num):
 453 |                 mask_sel_pre = np.ones(pool_len*pool_len)
 454 |                 arg_array = np.random.permutation(range(pool_len*pool_len))[:20]
 455 |                 mask_sel_pre[arg_array] = 0
 456 |                 mask_rand = np.reshape(mask_sel_pre,(pool_len,pool_len))
 457 |                 mask_sel[i,0,:,:] = np.copy(mask_rand)
 458 |             mask_for_loss = np.ones((sample_num, 1, pool_len, pool_len))
 459 |         elif self._count_iter == 2:
 460 |             mask_sel = np.ones((sample_num, 1, pool_len, pool_len))
 461 |             for i in range(sample_num):
 462 |                 mask_sel_pre = np.ones((pool_len,pool_len))
 463 |                 rnd = np.random.randint(0,4)
 464 |                 drop = pool_len/2 + 1
 465 |                 if rnd == 0 :
 466 |                     mask_sel_pre[:,:drop] = 0
 467 |                 elif rnd == 1 :
 468 |                     mask_sel_pre[:,drop-1:] = 0
 469 |                 elif rnd == 2 :
 470 |                     mask_sel_pre[:drop,:] = 0
 471 |                 else:
 472 |                     mask_sel_pre[drop-1:,:] = 0
 473 |                 mask_sel[i,0,:,:] = np.copy(mask_sel_pre)
 474 |             mask_for_loss = np.ones((sample_num, 1, pool_len, pool_len))
 475 |         else:
 476 |             mask_sel = np.ones((sample_num, 1, pool_len, pool_len))
 477 |             mask_for_loss = np.ones((sample_num, 1, pool_len, pool_len))
 478 | 
 479 |         if not np.all(np.unique(gt_mask_fg) == 1 ):
 480 | #            print(np.unique(gt_mask_fg))
 481 |             mask_sel = np.ones((sample_num, 1, pool_len, pool_len))
 482 |             mask_for_loss = np.ones((sample_num, 1, pool_len, pool_len))
 483 | #        else:
 484 | #            print(np.unique(gt_mask_fg))
 485 | 
 486 |         mask_inv = np.abs(1-mask_sel)
 487 | 
 488 |         #print(mask_sel[0,0,:,:])
 489 |         mask_pred_tile = np.tile(mask_sel, [1, self._channels, 1, 1])
 490 |         #print(mask_pred_tile[0,0,:,:])
 491 |         top_ind = self._name_to_top_map['mask_pred_tile']
 492 |         top[top_ind].reshape(*(mask_pred_tile.shape))
 493 |         top[top_ind].data[...] = mask_pred_tile.astype(np.float32, copy=False)
 494 | 
 495 |         top_ind = self._name_to_top_map['mask_pred_thres']
 496 |         top[top_ind].reshape(*(mask_sel.shape))
 497 |         top[top_ind].data[...] = mask_sel.astype(np.float32, copy=False)
 498 | 
 499 |         top_ind = self._name_to_top_map['mask_inv']
 500 |         top[top_ind].reshape(*(mask_inv.shape))
 501 |         top[top_ind].data[...] = mask_inv.astype(np.float32, copy=False)
 502 |         
 503 | 
 504 |     def backward(self, top, propagate_down, bottom):
 505 |         top_0_diff = np.zeros(np.shape(top[2].diff))
 506 |         top_0_diff[:,0,:,:] = np.mean(top[0].diff, axis=1)
 507 |         #bottom[0].diff[...] *= top[1].diff
 508 |         bottom[0].diff[...] = (top[2].diff + top_0_diff)
 509 |         #print("\n\n\n\nbottom[0].diff:")
 510 |         #print(top_0_diff[:,0,:,:])
 511 |         #print(top_0_diff.shape) 
 512 |         #tile_diff = np.tile(top[0].diff, [1, 1, 1, 1])
 513 |         #bottom[0].diff[...] *= (tile_diff + top[1].diff)
 514 | 
 515 |     def reshape(self, bottom, top):
 516 |         """Reshaping happens during the call to forward."""
 517 |         pass
 518 | 
 519 | 
 520 | 
 521 | class SiftFaceLayer(caffe.Layer):
 522 |     def setup(self, bottom, top):
 523 | 
 524 |         # parse the layer parameter string, which must be valid YAML
 525 |         layer_params = yaml.load(self.param_str_)
 526 |         self.onlyface_mask = np.ones(bottom[0].data.shape)
 527 | 
 528 |         self._name_to_bottom_map = {
 529 |             'conv5_3': 0,
 530 |             'bbox_pred': 1,
 531 |             'cls_score': 2,
 532 |             'gt_mask': 3,
 533 |             'rois': 4,
 534 |             'im_info': 5}
 535 | 
 536 |         self._name_to_top_map = {
 537 |             'onlyface': 0,
 538 |             'gt_mask_fg': 1}
 539 | 
 540 |         top[0].reshape(*(bottom[0].data.shape))
 541 |         top[1].reshape(*(bottom[3].data.shape))
 542 | 
 543 |         print 'SiftFaceLayer: name_to_top:', self._name_to_top_map
 544 |         assert len(top) == len(self._name_to_top_map)
 545 | 
 546 |     def forward(self, bottom, top):
 547 | 
 548 |         #conv5_3 = np.copy(bottom[0].data)
 549 |         assert(bottom[0].data.shape[0] == 1)
 550 |         box_deltas = np.copy(bottom[1].data)
 551 |         scores =  np.copy(bottom[2].data)
 552 |         gt_mask_fg =  np.copy(bottom[3].data)
 553 |         #print(np.mean(gt_mask_fg))
 554 |         onlyface = np.copy(bottom[0].data)
 555 |         rois = np.copy(bottom[4].data)
 556 |         im_info = np.copy(bottom[5].data)
 557 | 
 558 |         boxes = rois[:, 1:5] 
 559 |         pred_boxes = bbox_transform_inv(boxes, box_deltas)
 560 |         # boxes = clip_boxes(pred_boxes, gt_mask_fg[0,0,:,:].shape[::-1])
 561 |         boxes = clip_boxes(pred_boxes, (int(im_info[0][0]),int(im_info[0][1])))
 562 | 
 563 | 
 564 |         if np.all(np.unique(gt_mask_fg) == 1):
 565 |             ## masks for imges other than occlude are set ones
 566 |             onlyface = np.zeros(onlyface.shape)
 567 |             gt_mask_fg = np.zeros(gt_mask_fg.shape)
 568 |             #print(np.sum(gt_mask_fg))
 569 |         else:
 570 |             #print('nonzero input !!!')
 571 |             CONF_THRESH = 0.6
 572 |             NMS_THRESH = 0.25
 573 |             zoom = 16
 574 | 
 575 |             #find face areas
 576 |             cls_ind = 1
 577 |             cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
 578 |             cls_scores = scores[:, cls_ind]
 579 |             dets = np.hstack((cls_boxes,
 580 |                     cls_scores[:, np.newaxis])).astype(np.float32)
 581 |             keep = nms(dets, NMS_THRESH)
 582 |             dets = dets[keep, :]
 583 | 
 584 |             keep = np.where(dets[:, 4] > CONF_THRESH)
 585 |             dets = dets[keep] #shape(n,5) n means n predictes boxes, 5 includes top left and bottom right coords and a score 
 586 |             #enlarge boxes 
 587 |             # dets[:,:4] *= 1.1 
 588 |         #    print(dets)
 589 |         #    print(dets.shape)
 590 |         #    print(bottom[3].data.shape)
 591 |         #    print(bottom[0].data.shape)
 592 |             #generate a mask for gt mask
 593 |             mask4gt = np.zeros(bottom[3].data.shape)
 594 |             for each in dets:
 595 |                 mask4gt[:,:,each[1]:each[3]+1,each[0]:each[2]+1] = 1
 596 | 
 597 |       #      gt_mask_fg *= mask4gt
 598 | 
 599 |             # map to conv5_3
 600 |             dets[:,:4] //= zoom 
 601 | 
 602 |             #generate a mask for conv5_3
 603 |             mask4conv = np.zeros(bottom[0].data.shape)
 604 |             for each in dets:
 605 |                 mask4conv[:,:,each[1]:each[3]+1,each[0]:each[2]+1] = 1
 606 | 
 607 |      #       onlyface *= mask4conv
 608 |             self.onlyface_mask = mask4conv
 609 | 
 610 | #        print(np.sum(onlyface))
 611 | 
 612 |         top_ind = self._name_to_top_map['onlyface']
 613 |         top[top_ind].reshape(*(onlyface.shape))
 614 |         top[top_ind].data[...] = onlyface.astype(np.float32, copy=False)
 615 | 
 616 |         top_ind = self._name_to_top_map['gt_mask_fg']
 617 |         top[top_ind].reshape(*(gt_mask_fg.shape))
 618 |         top[top_ind].data[...] = gt_mask_fg.astype(np.float32, copy=False)
 619 | 
 620 | 
 621 |     def backward(self, top, propagate_down, bottom):
 622 |         gt_mask_fg =  np.copy(bottom[3].data)
 623 |         # mask for imges other than occlude are set ones
 624 |         if np.all(np.unique(gt_mask_fg) == 1):
 625 |             #print('back 0')
 626 |             bottom[0].diff[...] = 0
 627 |         else:
 628 |             #print("back")
 629 |             for i in range(4):
 630 |                 if not propagate_down[i]:
 631 |                     continue
 632 |                 else:
 633 |       #              bottom[0].diff[...] = top[0].diff * self.onlyface_mask
 634 |                     bottom[0].diff[...] = top[0].diff
 635 | 
 636 |     def reshape(self, bottom, top):
 637 | 
 638 |         """Reshaping happens during the call to forward."""
 639 |         pass
 640 | 
 641 | 
 642 | 
 643 | class SiftFace4TestLayer(caffe.Layer):
 644 |     def setup(self, bottom, top):
 645 | 
 646 |         # parse the layer parameter string, which must be valid YAML
 647 |         layer_params = yaml.load(self.param_str_)
 648 |         self.onlyface_mask = np.ones(bottom[0].data.shape)
 649 | 
 650 |         self._name_to_bottom_map = {
 651 |             'conv5_3': 0,
 652 |             'bbox_pred': 1,
 653 |             'cls_score': 2,
 654 |             'rois': 3,
 655 |             'im_info': 4}
 656 | 
 657 |         self._name_to_top_map = {
 658 |             'onlyface': 0}
 659 | 
 660 |         top[0].reshape(*(bottom[0].data.shape))
 661 |         
 662 | 
 663 |         print 'SiftFaceLayer: name_to_top:', self._name_to_top_map
 664 |         assert len(top) == len(self._name_to_top_map)
 665 | 
 666 |     def forward(self, bottom, top):
 667 | 
 668 |         #conv5_3 = np.copy(bottom[0].data)
 669 |         assert(bottom[0].data.shape[0] == 1)
 670 |         box_deltas = np.copy(bottom[1].data)
 671 |         scores =  np.copy(bottom[2].data)
 672 |         onlyface = np.copy(bottom[0].data)
 673 |         rois = np.copy(bottom[3].data)
 674 |         im_info = np.copy(bottom[4].data)
 675 |         #print('layer rois: ',rois)
 676 |         boxes = rois[:, 1:5] 
 677 |         pred_boxes = bbox_transform_inv(boxes, box_deltas)
 678 |         # boxes = clip_boxes(pred_boxes, gt_mask_fg[0,0,:,:].shape[::-1])
 679 |         boxes = clip_boxes(pred_boxes, (int(im_info[0][0]),int(im_info[0][1])))
 680 |         #print('im_info',(int(im_info[0][0]),int(im_info[0][1]),int(im_info[0][2])))
 681 | 
 682 | 
 683 |         CONF_THRESH = 0.65
 684 |         NMS_THRESH = 0.15
 685 |         zoom = 16
 686 | 
 687 | 
 688 |        # print('layerbox:', boxes)
 689 |         #find face areas
 690 |         cls_ind = 1
 691 |         cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
 692 |         cls_scores = scores[:, cls_ind]
 693 |         dets = np.hstack((cls_boxes,
 694 |                 cls_scores[:, np.newaxis])).astype(np.float32)
 695 |         keep = nms(dets, NMS_THRESH)
 696 |         dets = dets[keep, :]
 697 | 
 698 |         keep = np.where(dets[:, 4] > CONF_THRESH)
 699 |         dets = dets[keep] #shape(n,5) n means n predictes boxes, 5 includes top left and bottom right coords and a score 
 700 |         #enlarge boxes 
 701 |         #print('dddets: ',dets)
 702 |     #    dets[:,:4] *= 1 
 703 |     #    print(dets)
 704 |     #    print(dets.shape)
 705 |     #    print(bottom[3].data.shape)
 706 |     #    print(bottom[0].data.shape)
 707 |         #generate a mask for gt mask
 708 |         # mask4gt = np.zeros(bottom[3].data.shape)
 709 |         # for each in dets:
 710 |         #     mask4gt[:,:,each[0]:each[2]+1,each[1]:each[3]+1] = 1
 711 | 
 712 |         # gt_mask_fg *= mask4gt
 713 |         
 714 |      
 715 |         # map to conv5_3
 716 |         dets[:,:4] //= zoom 
 717 |         #print('conv53:', bottom[0].data.shape)
 718 |         #print('premask: ',dets.shape)
 719 |         #generate a mask for conv5_3
 720 |         mask4conv = np.zeros(bottom[0].data.shape)
 721 |         for each in dets:
 722 |             mask4conv[:,:,each[1]:each[3]+1,each[0]:each[2]+1] = 1
 723 | 
 724 | #        pickle.dump(mask4conv, open("vis.txt", "w"))
 725 |         onlyface *= mask4conv
 726 |         self.onlyface_mask = mask4conv
 727 | 
 728 | #        print(np.sum(onlyface))
 729 | 
 730 |         top_ind = self._name_to_top_map['onlyface']
 731 |         top[top_ind].reshape(*(onlyface.shape))
 732 |         top[top_ind].data[...] = onlyface.astype(np.float32, copy=False)
 733 | 
 734 | 
 735 | 
 736 |     def backward(self, top, propagate_down, bottom):
 737 |          pass
 738 |     def reshape(self, bottom, top):
 739 | 
 740 |         """Reshaping happens during the call to forward."""
 741 |         pass
 742 | 
 743 | 
 744 | # class SiftFace4TestLayer(caffe.Layer):
 745 | #     def setup(self, bottom, top):
 746 | 
 747 | #         # parse the layer parameter string, which must be valid YAML
 748 | #         layer_params = yaml.load(self.param_str_)
 749 | #         self.onlyface_mask = np.ones(bottom[0].data.shape)
 750 | 
 751 | #         self._name_to_bottom_map = {
 752 | #             'conv5_3': 0,
 753 | #             'bbox_pred': 1,
 754 | #             'cls_score':2 }
 755 | 
 756 | #         self._name_to_top_map = {
 757 | #             'onlyface': 0}
 758 | 
 759 | #         top[0].reshape(*(bottom[0].data.shape))
 760 | 
 761 | #         print 'SiftFaceLayer: name_to_top:', self._name_to_top_map
 762 | #         assert len(top) == len(self._name_to_top_map)
 763 | 
 764 | #     def forward(self, bottom, top):
 765 | 
 766 | #         #conv5_3 = np.copy(bottom[0].data)
 767 | #         assert(bottom[0].data.shape[0] == 1)
 768 | #         boxes = np.copy(bottom[1].data)
 769 | #         scores =  np.copy(bottom[2].data)
 770 | 
 771 | 
 772 | #         onlyface = np.copy(bottom[0].data)
 773 | 
 774 | #         CONF_THRESH = 0.6
 775 | #         NMS_THRESH = 0.3
 776 | #         zoom = 16
 777 | 
 778 | #         #find face areas
 779 | #         cls_ind = 1
 780 | #         cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
 781 | #         cls_scores = scores[:, cls_ind]
 782 | #         dets = np.hstack((cls_boxes,
 783 | #                 cls_scores[:, np.newaxis])).astype(np.float32)
 784 | #         keep = nms(dets, NMS_THRESH)
 785 | #         dets = dets[keep, :]
 786 | 
 787 | #         keep = np.where(dets[:, 4] > CONF_THRESH)
 788 | #         dets = dets[keep] #shape(n,5) n means n predictes boxes, 5 includes top left and bottom right coords and a score 
 789 | #         #enlarge boxes 
 790 | #         dets[:,:4] *= 1.3 
 791 | 
 792 | #         # map to conv5_3
 793 | #         dets[:,:4] //= zoom 
 794 | 
 795 | #         #generate a mask for conv5_3
 796 | #         mask4conv = np.zeros(bottom[0].data.shape)
 797 | #         for each in dets:
 798 | #             mask4conv[:,:,each[0]:each[2]+1,each[1]:each[3]+1] = 1
 799 | 
 800 | # #            onlyface *= mask4conv
 801 | #         self.onlyface_mask = mask4conv
 802 | 
 803 | # #        print(np.sum(onlyface))
 804 | 
 805 | #         top_ind = self._name_to_top_map['onlyface']
 806 | #         top[top_ind].reshape(*(onlyface.shape))
 807 | #         top[top_ind].data[...] = onlyface.astype(np.float32, copy=False)
 808 | 
 809 | 
 810 | 
 811 | #     def backward(self, top, propagate_down, bottom):
 812 | #         pass
 813 | 
 814 | #     def reshape(self, bottom, top):
 815 | 
 816 | #         """Reshaping happens during the call to forward."""
 817 | #         pass
 818 | 
 819 | 
 820 | 
 821 | 
 822 | 
 823 | 
 824 | 
 825 | 
 826 | 
 827 | 
 828 | class ShuffleMaskLayer(caffe.Layer):
 829 |     def setup(self, bottom, top):
 830 |         # parse the layer parameter string, which must be valid YAML
 831 |         layer_params = yaml.load(self.param_str_)
 832 | 
 833 |         self._channels = layer_params['channels']
 834 |         self._name_to_bottom_map = {
 835 |             'mask_pred_thres': 0 }
 836 |         self._name_to_top_map = {
 837 |             'mask_pred_tile_shuffle': 0}
 838 | 
 839 |         top[0].reshape(bottom[0].data.shape[0], self._channels, bottom[0].data.shape[2], bottom[0].data.shape[3])
 840 |         print 'ShuffleMaskLayer: name_to_top:', self._name_to_top_map
 841 |         assert len(top) == len(self._name_to_top_map)
 842 | 
 843 |     def forward(self, bottom, top):
 844 |         mask_pred_thres = np.copy(bottom[0].data)
 845 |         sample_num = mask_pred_thres.shape[0]
 846 |         pool_len = mask_pred_thres.shape[2]
 847 |         mask_pixels = pool_len * pool_len
 848 | 
 849 |         mask_pred_tile_shuffle = np.ones((sample_num, self._channels, pool_len, pool_len))
 850 | 
 851 |         for i in range(sample_num):
 852 |             drop_cnt = len(np.where(mask_pred_thres[i,0,:,:]==0)[0])
 853 |             mask_thres_array = np.reshape(mask_pred_thres[i,0,:,:], mask_pixels)
 854 |             drop_ind = np.where(mask_thres_array==0)[0]
 855 |             for j in range(self._channels):
 856 |                 rnd = np.random.rand(drop_cnt)
 857 |                 shuffle_mask = np.ones(mask_pixels)
 858 |                 shuffle_mask[drop_ind] = rnd
 859 |                 _shuffle_mask = np.reshape(shuffle_mask, (pool_len, pool_len))
 860 |                 mask_pred_tile_shuffle[i,j,:,:] = np.copy(_shuffle_mask)
 861 | 
 862 |         top_ind = self._name_to_top_map['mask_pred_tile_shuffle']
 863 |         top[top_ind].reshape(*(mask_pred_tile_shuffle.shape))
 864 |         top[top_ind].data[...] = mask_pred_tile_shuffle.astype(np.float32, copy=False)
 865 | 
 866 |     def backward(self, top, propagate_down, bottom):
 867 |         pass
 868 | 
 869 | 
 870 |     def reshape(self, bottom, top):
 871 |         """Reshaping happens during the call to forward."""
 872 |         pass
 873 | 
 874 | 
 875 |          
 876 |             
 877 |             
 878 |             
 879 | class MaskPredLossLayer(caffe.Layer):
 880 |     def setup(self, bottom, top):
 881 |         layer_params = yaml.load(self.param_str_)
 882 |         self._name_to_bottom_map = {
 883 |             'mask_pred': 0,
 884 |             'mask_gt': 1}
 885 |         self._name_to_top_map = {
 886 |             'loss': 0}
 887 |         self.ignore_label = None
 888 |         top[0].reshape(1)
 889 |         print 'MaskPredLossLayer: name_to_top:', self._name_to_top_map
 890 |         assert len(top) == len(self._name_to_top_map)
 891 | 
 892 |     def forward(self, bottom, top):
 893 |         N = bottom[0].shape[0]
 894 |         mask_pred  = bottom[0].data
 895 |         mask_label = bottom[1].data
 896 |        
 897 |         ary = np.reshape(mask_pred[0,0,:,:],49)
 898 |         ids = np.argsort(ary)
 899 |         ary[ids[:15]]=0
 900 |         ary[np.where(ary!=0)]=1
 901 |         msk = np.reshape(ary,(7,7))
 902 |         #print("mask_pred: ")
 903 |         #print(msk)        
 904 | 
 905 |         count_bit = 1 
 906 |         for i in range(len(bottom[0].shape)):
 907 |             count_bit = count_bit * bottom[0].shape[i]
 908 | 
 909 |         # copy from: https://github.com/philkr/voc-classification/blob/master/src/python_layers.py#L52
 910 |         f, df, t = bottom[0].data, bottom[0].diff, bottom[1].data
 911 |         mask = (self.ignore_label is None or t != self.ignore_label)
 912 |         lZ  = np.log(1+np.exp(-np.abs(f))) * mask
 913 |         dlZ = np.exp(np.minimum(f,0))/(np.exp(np.minimum(f,0))+np.exp(-np.maximum(f,0))) * mask
 914 | 
 915 | 
 916 |         # top[0].data[...] = np.sum(lZ + ((f>0)-t)*f * mask) / N
 917 |         # df[...] = (dlZ - t*mask) / N
 918 | 
 919 |         lZ = lZ + ((f>0)-t)*f * mask
 920 |         df[...] = (dlZ - t*mask) / count_bit
 921 | 
 922 |         for i in range(N):
 923 |             if (np.sum(mask_label[i,0,:,:])==49 or np.sum(mask_label[i,0,:,:])==20):
 924 |                 lZ[i] = lZ[i] * 0.0
 925 |                 df[i] = lZ[i] * 0.0
 926 | 
 927 |         # for i in range(N):
 928 |         #     lbl = labels[i]
 929 |         #     prop_before_select = prop_before[i][lbl]
 930 |         #     prop_after_select = prop_after[i][lbl]
 931 | 
 932 |         #     if (lbl > 0 and prop_after_select + self._score_thres < prop_before_select) == False :
 933 |         #         lZ[i] = lZ[i] * 0.0
 934 |         #         df[i] = lZ[i] * 0.0
 935 | 
 936 |         top[0].data[...] = np.sum(lZ) / count_bit
 937 | 
 938 |     def backward(self, top, prop, bottom):
 939 |         bottom[0].diff[...] *= top[0].diff
 940 | 
 941 |     def reshape(self, bottom, top):
 942 |         """Reshaping happens during the call to forward."""
 943 |         pass
 944 | 
 945 | 
 946 | 
 947 | 
 948 | class MaskGenLayer(caffe.Layer):
 949 |     def setup(self, bottom, top):
 950 | 
 951 |         # parse the layer parameter string, which must be valid YAML
 952 |         layer_params = yaml.load(self.param_str_)
 953 | 
 954 |         self._channels = layer_params['channels']
 955 |         self._means = layer_params['means']
 956 |         #self._count_drop  = layer_params['count_drop']
 957 |         #self._permute_count  = layer_params['permute_count']
 958 | 
 959 | #        self._iter_size = layer_params['iter_size']
 960 | #        self._maintain_before = layer_params['maintain_before'] # maintain the first image unchanged 
 961 | #
 962 | #        self._count_iter = 0
 963 | 
 964 |         self._name_to_bottom_map = {
 965 |             'mask_pred': 0 }
 966 | 
 967 |         # 0 means block, 1 means maintain 
 968 | 
 969 |         self._name_to_top_map = {
 970 |             'mask_pred_tile': 0,
 971 |             'mask_pred_thres': 1 }
 972 | 
 973 | 
 974 |         # top[0].reshape(*(bottom[0].data.shape))
 975 |         top[0].reshape(bottom[0].data.shape[0], self._channels, 7, 7)
 976 |         top[1].reshape(bottom[0].data.shape[0], self._channels, 7, 7)
 977 | 
 978 | 
 979 |         assert len(top) == len(self._name_to_top_map)
 980 | 
 981 |     def generate_mask(self, mask_pred):
 982 | #0 means block in the input
 983 |         pool_len = 7
 984 |         k = mask_pred.shape[2]
 985 |         stride = pool_len/k
 986 |         stride_up = int(pool_len/k)+int(pool_len%k > 0)
 987 |         sample_num = mask_pred.shape[0]
 988 | 
 989 |         mask_pixels = k * k
 990 | 
 991 |         # count_drop = self._count_drop #15
 992 |         # permute_count = self._permute_count #20
 993 | 
 994 |         mask_gen = np.ones((sample_num, 1, pool_len, pool_len))
 995 |         mask_2_2 = np.ones((sample_num, 1, stride, stride))
 996 | 
 997 |         for i in range(sample_num):
 998 | 
 999 |             now_mask_pred = mask_pred[i]
1000 |             now_mask_pred_array = np.reshape(now_mask_pred, mask_pixels)
1001 |             #convert the mask to an array and sort it ascendingly with the pixel value
1002 |             sorted_ids = np.argsort(now_mask_pred_array) 
1003 |             now_ids = sorted_ids[:2]
1004 |             for ii in now_ids:
1005 |                 if ii/2 ==0:
1006 |                     mask_gen[i,0,:stride_up,ii*stride:ii*stride+stride_up] = 0
1007 |                     mask_2_2[i,0,0,ii%2] = 0
1008 |                 else:
1009 |                     j = ii%2
1010 |                     mask_gen[i,0,stride:stride+stride,j*stride:j*stride+stride] = 0
1011 |                     mask_2_2[i,0,1,ii%2] = 0
1012 |         
1013 | 	    #if ii == 0:
1014 | 	       #print(now_ids)
1015 | 	       #print(mask_pred[0])
1016 | 	       # print("mask:")
1017 | 	       #print(mask_gen[0])
1018 | 	    
1019 |         return mask_gen,mask_2_2
1020 | 
1021 |     def forward(self, bottom, top):
1022 | 
1023 | #0 means block!!
1024 |         mask_pred = np.copy(bottom[0].data)
1025 | 
1026 | #       self._count_iter = (self._count_iter + 1) % self._iter_size
1027 | #       if self._count_iter >= self._maintain_before:
1028 |         mask_gen,mask_2_2 = self.generate_mask(mask_pred)
1029 | #	else:
1030 | #	    mask_gen = np.ones((sample_num, 1, pool_len, pool_len))
1031 | 
1032 |         mask_pred_tile = np.tile(mask_gen, [1, self._channels, 1, 1])
1033 | 
1034 |         top_ind = self._name_to_top_map['mask_pred_tile']
1035 |         top[top_ind].reshape(*(mask_pred_tile.shape))
1036 |         top[top_ind].data[...] = mask_pred_tile.astype(np.float32, copy=False)
1037 | 
1038 |         top_ind = self._name_to_top_map['mask_pred_thres']
1039 |         top[top_ind].reshape(*(mask_gen.shape))
1040 |         top[top_ind].data[...] = mask_gen.astype(np.float32, copy=False)
1041 | 
1042 | 	#print("\n\nind_shape: ",mask_gen.shape)        
1043 | 
1044 |     def backward(self, top, propagate_down, bottom):
1045 |         top_diff = np.zeros((top[1].diff.shape[0],top[1].diff.shape[1],top[1].diff.shape[2]/3,top[1].diff.shape[3]/3))
1046 |         for i in range(top[1].diff.shape[0]):
1047 |             top_diff[i,0,0,0] = np.mean(top[1].diff[i,0,:4,:4])
1048 |             top_diff[i,0,0,1] = np.mean(top[1].diff[i,0,:4,3:])
1049 |             top_diff[i,0,1,0] = np.mean(top[1].diff[i,0,3:,:4])
1050 |             top_diff[i,0,1,1] = np.mean(top[1].diff[i,0,3:,3:])        
1051 |         bottom[0].diff[...] = top_diff
1052 | 
1053 |         #bottom[0].diff[...] = top[1].diff
1054 |         #print("ind_loss:")
1055 |         #print(bottom[0].diff[0,:,:,:])
1056 | 
1057 |     def reshape(self, bottom, top):
1058 |         """Reshaping happens during the call to forward."""
1059 |         pass
1060 | 
1061 | 
1062 | 
1063 | 
1064 | class SumLossLayer(caffe.Layer):
1065 |     def setup(self, bottom, top):
1066 |         layer_params = yaml.load(self.param_str_)
1067 |         self._name_to_bottom_map = {
1068 |             'mask_pred': 0}
1069 |         self._name_to_top_map = {
1070 |             'loss': 0}
1071 |         top[0].reshape(1)
1072 |         print 'SumforLossLayer: name_to_top:', self._name_to_top_map
1073 |         assert len(top) == len(self._name_to_top_map)
1074 | 
1075 |     def forward(self, bottom, top):   
1076 |         mask_pred = np.copy(bottom[0].data)
1077 |         #print('\nmask_pred: ', mask_pred[0,0,:,:])
1078 |         batchSz = bottom[0].data.shape[0]
1079 |         mask_pred[np.where(mask_pred<0)] = 0
1080 |         #print('\n\nmask_pred:')
1081 |         #print(mask_pred[0,0,:,:])
1082 |         top[0].data[...] =  np.sum(mask_pred)/batchSz
1083 |         
1084 |     def backward(self, top, propagate_down, bottom):
1085 | 
1086 |         mask_pred = np.copy(bottom[0].data) 
1087 |         mask_pred[np.where(mask_pred<0)] = 0 
1088 |         back = 1e-5 * np.ones(bottom[0].data.shape)
1089 |         back *= mask_pred 
1090 |         batchSz = bottom[0].data.shape[0]
1091 |         #print('\ndiff: ',back[0,0,:,:])
1092 |         bottom[0].diff[...] = back
1093 |  
1094 |     def reshape(self, bottom, top):
1095 |         pass    
1096 |         
1097 |         
1098 | 
1099 | #Simple L1 loss layer
1100 | class L1LossLayer(caffe.Layer):
1101 |     def setup(self, bottom, top):
1102 | 
1103 |         # parse the layer parameter string, which must be valid YAML
1104 |         layer_params = yaml.load(self.param_str_)
1105 | 
1106 |         self.loss_weight = layer_params['loss_weight']
1107 | 
1108 |         self._name_to_bottom_map = {
1109 |             'mask_gen_thres': 0, 
1110 |             'mask_ind_thres': 1 }
1111 | 
1112 |         # 0 means block, 1 means maintain 
1113 | 
1114 |         self._name_to_top_map = {
1115 |             'loss': 0 }
1116 | 
1117 |         assert len(bottom) == 2, 'There should be two bottom blobs'
1118 |         predShape = bottom[0].data.shape
1119 |         gtShape   = bottom[1].data.shape
1120 |         for i in range(len(predShape)):
1121 |             assert predShape[i] == gtShape[i], 'Mismatch: %d, %d' % (predShape[i], gtShape[i])
1122 |         assert bottom[0].data.squeeze().ndim == bottom[1].data.squeeze().ndim, 'Shape Mismatch'
1123 | 
1124 |         print("bottom[0].shape",bottom[0].shape)
1125 |         print("bottom[0].data.shape",bottom[0].data.shape)
1126 | 
1127 |         #Get the batchSz
1128 |         self.batchSz_ = gtShape[0]
1129 |         #Form the top
1130 |         assert len(top)==1, 'There should be only one output blob'
1131 |         top[0].reshape(1,1,1,1)
1132 | 
1133 | 
1134 |         
1135 |     def forward(self, bottom, top):
1136 |         #print("lossbottomshape:",bottom[0].data.shape,bottom[1].data.shape)
1137 |         batchSz = bottom[0].data.shape[0]
1138 |         top[0].data[...] = np.sum(np.abs(bottom[0].data[...].squeeze()\
1139 |                                                      - bottom[1].data[...].squeeze()))/float(batchSz*24) 
1140 |         #print("loss weight: ",self.loss_weight)
1141 |         #print('Loss is %f' % top[0].data[0])
1142 |         #print(bottom[0].data[...].squeeze()[0])
1143 |         #print(bottom[1].data[...].squeeze()[0])
1144 |         #print(np.sum(np.abs(bottom[0].data[...].squeeze() - bottom[1].data[...].squeeze()))/float(batchSz))
1145 |         #print("bath_Sz:")
1146 |         #print(float(self.batchSz_))
1147 | 
1148 |     def backward(self, top, propagate_down, bottom):
1149 |         batchSz = bottom[0].data.shape[0]
1150 |         bottom[0].diff[...] = np.sign(bottom[0].data[...].squeeze()\
1151 |                                                          - bottom[1].data[...].squeeze())/float(batchSz*24)
1152 |         bottom[1].diff[...] = np.sign(bottom[0].data[...].squeeze()\
1153 |                                                          - bottom[1].data[...].squeeze())/float(batchSz*24)
1154 |         #print("\n\n\n\nloss.diff:")
1155 |         #print(bottom[0].diff)
1156 |     def reshape(self, bottom, top):
1157 |         top[0].reshape(1,1,1,1)
1158 |         pass
1159 | 
1160 | 
1161 | 
1162 | 
1163 | 
1164 | 
1165 | 
1166 | 
1167 | 
1168 | 


--------------------------------------------------------------------------------