├── res.jpg ├── mafa.pdf ├── README.md ├── solver_pmask.prototxt ├── train_final_pmask.prototxt └── layer.py /res.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IssacCyj/Adversarial-Occlussion-aware-Face-Detection/HEAD/res.jpg -------------------------------------------------------------------------------- /mafa.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IssacCyj/Adversarial-Occlussion-aware-Face-Detection/HEAD/mafa.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Adversarial-Occlussion-aware-Face-Detection 2 | Implementation of the BTAS 2018 oral paper Adversarial Occlusion-aware Face Detection (https://arxiv.org/abs/1709.05188). 3 | Code is wriiten in Caffe with layer interface in python. 4 | 5 | ![res](https://github.com/IssacCyj/Adversarial-Occlussion-aware-Face-Detection/blob/master/res.jpg) 6 | -------------------------------------------------------------------------------- /solver_pmask.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "models/face/VGG16/faster_rcnn_end2end/train_final_pmask.prototxt" 2 | base_lr: 0.0005 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 30000 6 | display: 20 7 | average_loss: 100 8 | # iter_size: 1 9 | momentum: 0.9 10 | weight_decay: 0.0005 11 | snapshot: 0 12 | snapshot_prefix: "_output_v1" 13 | iter_size: 2 14 | 15 | -------------------------------------------------------------------------------- /train_final_pmask.prototxt: -------------------------------------------------------------------------------- 1 | name: "VGG_ILSVRC_16_layers" 2 | layer { 3 | name: 'input-data' 4 | type: 'Python' 5 | top: 'data' 6 | top: 'im_info' 7 | top: 'gt_boxes' 8 | top: 'gt_mask' 9 | python_param { 10 | module: 'roi_data_layer.layer' 11 | layer: 'RoIDataLayer' 12 | param_str: "'num_classes': 2" 13 | } 14 | } 15 | 16 | layer { 17 | name: "conv1_1" 18 | type: "Convolution" 19 | bottom: "data" 20 | top: "conv1_1" 21 | param { 22 | lr_mult: 0 23 | decay_mult: 0 24 | } 25 | param { 26 | lr_mult: 0 27 | decay_mult: 0 28 | } 29 | convolution_param { 30 | num_output: 64 31 | pad: 1 32 | kernel_size: 3 33 | } 34 | } 35 | layer { 36 | name: "relu1_1" 37 | type: "ReLU" 38 | bottom: "conv1_1" 39 | top: "conv1_1" 40 | } 41 | layer { 42 | name: "conv1_2" 43 | type: "Convolution" 44 | bottom: "conv1_1" 45 | top: "conv1_2" 46 | param { 47 | lr_mult: 0 48 | decay_mult: 0 49 | } 50 | param { 51 | lr_mult: 0 52 | decay_mult: 0 53 | } 54 | convolution_param { 55 | num_output: 64 56 | pad: 1 57 | kernel_size: 3 58 | } 59 | } 60 | layer { 61 | name: "relu1_2" 62 | type: "ReLU" 63 | bottom: "conv1_2" 64 | top: "conv1_2" 65 | } 66 | layer { 67 | name: "pool1" 68 | type: "Pooling" 69 | bottom: "conv1_2" 70 | top: "pool1" 71 | pooling_param { 72 | pool: MAX 73 | kernel_size: 2 74 | stride: 2 75 | } 76 | } 77 | layer { 78 | name: "conv2_1" 79 | type: "Convolution" 80 | bottom: "pool1" 81 | top: "conv2_1" 82 | param { 83 | lr_mult: 0 84 | decay_mult: 0 85 | } 86 | param { 87 | lr_mult: 0 88 | decay_mult: 0 89 | } 90 | convolution_param { 91 | num_output: 128 92 | pad: 1 93 | kernel_size: 3 94 | } 95 | } 96 | layer { 97 | name: "relu2_1" 98 | type: "ReLU" 99 | bottom: "conv2_1" 100 | top: "conv2_1" 101 | } 102 | layer { 103 | name: "conv2_2" 104 | type: "Convolution" 105 | bottom: "conv2_1" 106 | top: "conv2_2" 107 | param { 108 | lr_mult: 0 109 | decay_mult: 0 110 | } 111 | param { 112 | lr_mult: 0 113 | decay_mult: 0 114 | } 115 | convolution_param { 116 | num_output: 128 117 | pad: 1 118 | kernel_size: 3 119 | } 120 | } 121 | layer { 122 | name: "relu2_2" 123 | type: "ReLU" 124 | bottom: "conv2_2" 125 | top: "conv2_2" 126 | } 127 | layer { 128 | name: "pool2" 129 | type: "Pooling" 130 | bottom: "conv2_2" 131 | top: "pool2" 132 | pooling_param { 133 | pool: MAX 134 | kernel_size: 2 135 | stride: 2 136 | } 137 | } 138 | layer { 139 | name: "conv3_1" 140 | type: "Convolution" 141 | bottom: "pool2" 142 | top: "conv3_1" 143 | param { 144 | lr_mult: 1 145 | } 146 | param { 147 | lr_mult: 2 148 | } 149 | convolution_param { 150 | num_output: 256 151 | pad: 1 152 | kernel_size: 3 153 | } 154 | } 155 | layer { 156 | name: "relu3_1" 157 | type: "ReLU" 158 | bottom: "conv3_1" 159 | top: "conv3_1" 160 | } 161 | layer { 162 | name: "conv3_2" 163 | type: "Convolution" 164 | bottom: "conv3_1" 165 | top: "conv3_2" 166 | param { 167 | lr_mult: 1 168 | } 169 | param { 170 | lr_mult: 2 171 | } 172 | convolution_param { 173 | num_output: 256 174 | pad: 1 175 | kernel_size: 3 176 | } 177 | } 178 | layer { 179 | name: "relu3_2" 180 | type: "ReLU" 181 | bottom: "conv3_2" 182 | top: "conv3_2" 183 | } 184 | layer { 185 | name: "conv3_3" 186 | type: "Convolution" 187 | bottom: "conv3_2" 188 | top: "conv3_3" 189 | param { 190 | lr_mult: 1 191 | } 192 | param { 193 | lr_mult: 2 194 | } 195 | convolution_param { 196 | num_output: 256 197 | pad: 1 198 | kernel_size: 3 199 | } 200 | } 201 | layer { 202 | name: "relu3_3" 203 | type: "ReLU" 204 | bottom: "conv3_3" 205 | top: "conv3_3" 206 | } 207 | layer { 208 | name: "pool3" 209 | type: "Pooling" 210 | bottom: "conv3_3" 211 | top: "pool3" 212 | pooling_param { 213 | pool: MAX 214 | kernel_size: 2 215 | stride: 2 216 | } 217 | } 218 | layer { 219 | name: "conv4_1" 220 | type: "Convolution" 221 | bottom: "pool3" 222 | top: "conv4_1" 223 | param { 224 | lr_mult: 1 225 | } 226 | param { 227 | lr_mult: 2 228 | } 229 | convolution_param { 230 | num_output: 512 231 | pad: 1 232 | kernel_size: 3 233 | } 234 | } 235 | layer { 236 | name: "relu4_1" 237 | type: "ReLU" 238 | bottom: "conv4_1" 239 | top: "conv4_1" 240 | } 241 | layer { 242 | name: "conv4_2" 243 | type: "Convolution" 244 | bottom: "conv4_1" 245 | top: "conv4_2" 246 | param { 247 | lr_mult: 1 248 | } 249 | param { 250 | lr_mult: 2 251 | } 252 | convolution_param { 253 | num_output: 512 254 | pad: 1 255 | kernel_size: 3 256 | } 257 | } 258 | layer { 259 | name: "relu4_2" 260 | type: "ReLU" 261 | bottom: "conv4_2" 262 | top: "conv4_2" 263 | } 264 | layer { 265 | name: "conv4_3" 266 | type: "Convolution" 267 | bottom: "conv4_2" 268 | top: "conv4_3" 269 | param { 270 | lr_mult: 1 271 | } 272 | param { 273 | lr_mult: 2 274 | } 275 | convolution_param { 276 | num_output: 512 277 | pad: 1 278 | kernel_size: 3 279 | } 280 | } 281 | layer { 282 | name: "relu4_3" 283 | type: "ReLU" 284 | bottom: "conv4_3" 285 | top: "conv4_3" 286 | } 287 | layer { 288 | name: "pool4" 289 | type: "Pooling" 290 | bottom: "conv4_3" 291 | top: "pool4" 292 | pooling_param { 293 | pool: MAX 294 | kernel_size: 2 295 | stride: 2 296 | } 297 | } 298 | layer { 299 | name: "conv5_1" 300 | type: "Convolution" 301 | bottom: "pool4" 302 | top: "conv5_1" 303 | param { 304 | lr_mult: 1 305 | } 306 | param { 307 | lr_mult: 2 308 | } 309 | convolution_param { 310 | num_output: 512 311 | pad: 1 312 | kernel_size: 3 313 | } 314 | } 315 | layer { 316 | name: "relu5_1" 317 | type: "ReLU" 318 | bottom: "conv5_1" 319 | top: "conv5_1" 320 | } 321 | layer { 322 | name: "conv5_2" 323 | type: "Convolution" 324 | bottom: "conv5_1" 325 | top: "conv5_2" 326 | param { 327 | lr_mult: 1 328 | } 329 | param { 330 | lr_mult: 2 331 | } 332 | convolution_param { 333 | num_output: 512 334 | pad: 1 335 | kernel_size: 3 336 | } 337 | } 338 | layer { 339 | name: "relu5_2" 340 | type: "ReLU" 341 | bottom: "conv5_2" 342 | top: "conv5_2" 343 | } 344 | layer { 345 | name: "conv5_3" 346 | type: "Convolution" 347 | bottom: "conv5_2" 348 | top: "conv5_3" 349 | param { 350 | lr_mult: 1 351 | } 352 | param { 353 | lr_mult: 2 354 | } 355 | convolution_param { 356 | num_output: 512 357 | pad: 1 358 | kernel_size: 3 359 | } 360 | } 361 | layer { 362 | name: "relu5_3" 363 | type: "ReLU" 364 | bottom: "conv5_3" 365 | top: "conv5_3" 366 | } 367 | 368 | #========= RPN ============ 369 | 370 | layer { 371 | name: "rpn_conv/3x3" 372 | type: "Convolution" 373 | bottom: "conv5_3" 374 | top: "rpn/output" 375 | param { lr_mult: 0 376 | decay_mult: 0 } 377 | param { lr_mult: 0 378 | decay_mult: 0 } 379 | convolution_param { 380 | num_output: 512 381 | kernel_size: 3 pad: 1 stride: 1 382 | weight_filler { type: "gaussian" std: 0.01 } 383 | bias_filler { type: "constant" value: 0 } 384 | } 385 | } 386 | layer { 387 | name: "rpn_relu/3x3" 388 | type: "ReLU" 389 | bottom: "rpn/output" 390 | top: "rpn/output" 391 | } 392 | 393 | layer { 394 | name: "rpn_cls_score" 395 | type: "Convolution" 396 | bottom: "rpn/output" 397 | top: "rpn_cls_score" 398 | param { lr_mult: 0 399 | decay_mult: 0 } 400 | param { lr_mult: 0 401 | decay_mult: 0 } 402 | convolution_param { 403 | num_output: 36 # 2(bg/fg) * 12(anchors) 404 | kernel_size: 1 pad: 0 stride: 1 405 | weight_filler { type: "gaussian" std: 0.01 } 406 | bias_filler { type: "constant" value: 0 } 407 | } 408 | } 409 | 410 | layer { 411 | name: "rpn_bbox_pred" 412 | type: "Convolution" 413 | bottom: "rpn/output" 414 | top: "rpn_bbox_pred" 415 | param { lr_mult: 0 416 | decay_mult: 0 } 417 | param { lr_mult: 0 418 | decay_mult: 0 } 419 | convolution_param { 420 | num_output: 72 # 4 * 12(anchors) 421 | kernel_size: 1 pad: 0 stride: 1 422 | weight_filler { type: "gaussian" std: 0.01 } 423 | bias_filler { type: "constant" value: 0 } 424 | } 425 | } 426 | 427 | layer { 428 | bottom: "rpn_cls_score" 429 | top: "rpn_cls_score_reshape" 430 | name: "rpn_cls_score_reshape" 431 | type: "Reshape" 432 | reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } 433 | } 434 | 435 | #layer { 436 | # name: 'rpn-data' 437 | # type: 'Python' 438 | # bottom: 'rpn_cls_score' 439 | # bottom: 'gt_boxes' 440 | # bottom: 'im_info' 441 | # bottom: 'data' 442 | # top: 'rpn_labels' 443 | # top: 'rpn_bbox_targets' 444 | # top: 'rpn_bbox_inside_weights' 445 | # top: 'rpn_bbox_outside_weights' 446 | # python_param { 447 | # module: 'rpn.anchor_target_layer' 448 | # layer: 'AnchorTargetLayer' 449 | # param_str: "'feat_stride': 16" 450 | # } 451 | #} 452 | 453 | #layer { 454 | # name: "rpn_loss_cls" 455 | # type: "SoftmaxWithLoss" 456 | # bottom: "rpn_cls_score_reshape" 457 | # bottom: "rpn_labels" 458 | # propagate_down: 1 459 | # propagate_down: 0 460 | # top: "rpn_cls_loss" 461 | # loss_weight: 0 462 | # loss_param { 463 | # ignore_label: -1 464 | # normalize: true 465 | # } 466 | #} 467 | 468 | #layer { 469 | # name: "rpn_loss_bbox" 470 | # type: "SmoothL1Loss" 471 | # bottom: "rpn_bbox_pred" 472 | # bottom: "rpn_bbox_targets" 473 | # bottom: 'rpn_bbox_inside_weights' 474 | # bottom: 'rpn_bbox_outside_weights' 475 | # top: "rpn_loss_bbox" 476 | # loss_weight: 0 477 | # smooth_l1_loss_param { sigma: 3.0 } 478 | #} 479 | 480 | #========= RoI Proposal ============ 481 | 482 | layer { 483 | name: "rpn_cls_prob" 484 | type: "Softmax" 485 | bottom: "rpn_cls_score_reshape" 486 | top: "rpn_cls_prob" 487 | } 488 | 489 | layer { 490 | name: 'rpn_cls_prob_reshape' 491 | type: 'Reshape' 492 | bottom: 'rpn_cls_prob' 493 | top: 'rpn_cls_prob_reshape' 494 | reshape_param { shape { dim: 0 dim: 36 dim: -1 dim: 0 } } 495 | } 496 | 497 | layer { 498 | name: 'proposal' 499 | type: 'Python' 500 | bottom: 'rpn_cls_prob_reshape' 501 | bottom: 'rpn_bbox_pred' 502 | bottom: 'im_info' 503 | top: 'rpn_rois' 504 | # top: 'rpn_scores' 505 | propagate_down: false 506 | propagate_down: false 507 | propagate_down: false 508 | python_param { 509 | module: 'rpn.proposal_layer' 510 | layer: 'ProposalLayer' 511 | param_str: "'feat_stride': 16" 512 | } 513 | } 514 | 515 | #layer { 516 | # name: 'debug-data' 517 | # type: 'Python' 518 | # bottom: 'data' 519 | # bottom: 'rpn_rois' 520 | # bottom: 'rpn_scores' 521 | # python_param { 522 | # module: 'rpn.debug_layer' 523 | # layer: 'RPNDebugLayer' 524 | # } 525 | #} 526 | 527 | layer { 528 | name: 'roi-data' 529 | type: 'Python' 530 | bottom: 'rpn_rois' 531 | bottom: 'gt_boxes' 532 | top: 'rois' 533 | top: 'labels' 534 | top: 'bbox_targets' 535 | top: 'bbox_inside_weights' 536 | top: 'bbox_outside_weights' 537 | python_param { 538 | module: 'rpn.proposal_target_layer' 539 | layer: 'ProposalTargetLayer' 540 | param_str: "'num_classes': 2" 541 | } 542 | } 543 | 544 | 545 | 546 | ########################## 547 | ## Readonly RoI Network ## 548 | ######### Start ########## 549 | layer { 550 | name: "roi_pool5_readonly" 551 | type: "ROIPooling" 552 | bottom: "conv5_3" 553 | bottom: "rois" 554 | top: "pool5_readonly" 555 | propagate_down: false 556 | propagate_down: false 557 | roi_pooling_param { 558 | pooled_w: 7 559 | pooled_h: 7 560 | spatial_scale: 0.0625 # 1/16 561 | } 562 | } 563 | layer { 564 | name: "roi_pool5" 565 | type: "ROIPooling" 566 | bottom: "conv5_3" 567 | bottom: "rois" 568 | top: "roi_pool5" 569 | propagate_down: true 570 | propagate_down: false 571 | roi_pooling_param { 572 | pooled_w: 7 573 | pooled_h: 7 574 | spatial_scale: 0.0625 # 1/16 575 | } 576 | } 577 | 578 | #### mask branch 579 | 580 | layer { 581 | name: "conv6_mask" 582 | type: "Convolution" 583 | bottom: "pool5_readonly" 584 | top: "conv6_mask" 585 | param { 586 | lr_mult: 0 587 | decay_mult: 0 588 | } 589 | param { 590 | lr_mult: 0 591 | decay_mult: 0 592 | } 593 | convolution_param { 594 | num_output: 256 595 | pad: 1 596 | kernel_size: 3 597 | 598 | weight_filler { 599 | type: "msra" 600 | } 601 | bias_filler { 602 | type: "constant" 603 | value: 0 604 | } 605 | 606 | } 607 | } 608 | layer { 609 | name: "relu6_mask" 610 | type: "ReLU" 611 | bottom: "conv6_mask" 612 | top: "conv6_mask" 613 | } 614 | layer { 615 | name: "conv7_mask" 616 | type: "Convolution" 617 | bottom: "conv6_mask" 618 | top: "conv7_mask" 619 | param { 620 | lr_mult: 0 621 | decay_mult: 0 622 | } 623 | param { 624 | lr_mult: 0 625 | decay_mult: 0 626 | } 627 | convolution_param { 628 | num_output: 256 629 | pad: 1 630 | kernel_size: 3 631 | 632 | weight_filler { 633 | type: "msra" 634 | } 635 | bias_filler { 636 | type: "constant" 637 | value: 0 638 | } 639 | 640 | } 641 | } 642 | layer { 643 | name: "relu7_mask" 644 | type: "ReLU" 645 | bottom: "conv7_mask" 646 | top: "conv7_mask" 647 | } 648 | 649 | layer { 650 | name: "conv8_mask" 651 | type: "Convolution" 652 | bottom: "conv7_mask" 653 | top: "conv8_mask" 654 | param { 655 | lr_mult: 0 656 | decay_mult: 0 657 | } 658 | param { 659 | lr_mult: 0 660 | decay_mult: 0 661 | } 662 | convolution_param { 663 | num_output: 256 664 | pad: 1 665 | kernel_size: 3 666 | 667 | weight_filler { 668 | type: "msra" 669 | } 670 | bias_filler { 671 | type: "constant" 672 | value: 0 673 | } 674 | 675 | } 676 | } 677 | layer { 678 | name: "relu8_mask" 679 | type: "ReLU" 680 | bottom: "conv8_mask" 681 | top: "conv8_mask" 682 | } 683 | 684 | layer 685 | { 686 | name: "eltwise_layer" 687 | type: "Eltwise" 688 | bottom: "conv6_mask" 689 | bottom: "conv8_mask" 690 | top: "eltwise" 691 | eltwise_param { 692 | operation: SUM 693 | } 694 | } 695 | 696 | layer { 697 | name: "conv9_mask" 698 | type: "Convolution" 699 | bottom: "eltwise" 700 | top: "conv9_mask" 701 | param { 702 | lr_mult: 0 703 | decay_mult: 0 704 | } 705 | param { 706 | lr_mult: 0 707 | decay_mult: 0 708 | } 709 | convolution_param { 710 | num_output: 256 711 | pad: 1 712 | kernel_size: 3 713 | 714 | weight_filler { 715 | type: "msra" 716 | } 717 | bias_filler { 718 | type: "constant" 719 | value: 0 720 | } 721 | 722 | } 723 | } 724 | layer { 725 | name: "relu9_mask" 726 | type: "ReLU" 727 | bottom: "conv9_mask" 728 | top: "conv9_mask" 729 | } 730 | 731 | layer { 732 | name: "conv10_mask_gen" 733 | type: "Convolution" 734 | bottom: "conv9_mask" 735 | top: "conv10_mask_gen" 736 | param { 737 | lr_mult: 0 738 | decay_mult: 0 739 | } 740 | param { 741 | lr_mult: 0 742 | decay_mult: 0 743 | } 744 | convolution_param { 745 | num_output: 1 746 | pad: 1 747 | kernel_size: 3 748 | 749 | weight_filler { 750 | type: "msra" 751 | } 752 | bias_filler { 753 | type: "constant" 754 | value: 0 755 | } 756 | } 757 | } 758 | #layer { 759 | # name: "sigmoid_gen" 760 | # type: "Sigmoid" 761 | # bottom: "conv10_mask_gen" 762 | # top: "mask_gen" 763 | #} 764 | layer { 765 | name: "gen_layer" 766 | type: "Python" 767 | bottom: "conv10_mask_gen" 768 | bottom: "gt_mask" 769 | top: "mask_gen_tile" 770 | top: "mask_gen_thres" 771 | top: "mask_for_loss" 772 | propagate_down: false 773 | propagate_down: false 774 | python_param { 775 | module: "roi_data_layer.layer" 776 | layer: "TileLayer2" 777 | param_str: "{\'channels\': 512,\'permute_count\': 20,\'count_drop\': 15,\'iter_size\': 5,'maintain_before\': 1}" 778 | } 779 | } 780 | 781 | layer{ 782 | name:"mask_prod" 783 | type:"Eltwise" 784 | bottom:"mask_gen_tile" 785 | bottom:"roi_pool5" 786 | top:"cls_branch" 787 | eltwise_param { 788 | operation:PROD 789 | } 790 | } 791 | 792 | 793 | #### classification branch 794 | 795 | layer { 796 | name: "fc6" 797 | type: "InnerProduct" 798 | bottom: "cls_branch" 799 | top: "fc6" 800 | param { 801 | lr_mult: 1 802 | } 803 | param { 804 | lr_mult: 2 805 | } 806 | propagate_down: true 807 | inner_product_param { 808 | num_output: 4096 809 | } 810 | } 811 | layer { 812 | name: "relu6" 813 | type: "ReLU" 814 | bottom: "fc6" 815 | top: "fc6" 816 | } 817 | layer { 818 | name: "drop6" 819 | type: "Dropout" 820 | bottom: "fc6" 821 | top: "fc6" 822 | dropout_param { 823 | dropout_ratio: 0.5 824 | } 825 | } 826 | layer { 827 | name: "fc7" 828 | type: "InnerProduct" 829 | bottom: "fc6" 830 | top: "fc7" 831 | param { 832 | lr_mult: 1 833 | } 834 | param { 835 | lr_mult: 2 836 | } 837 | propagate_down: true 838 | inner_product_param { 839 | num_output: 4096 840 | } 841 | } 842 | layer { 843 | name: "relu7" 844 | type: "ReLU" 845 | bottom: "fc7" 846 | top: "fc7" 847 | } 848 | layer { 849 | name: "drop7" 850 | type: "Dropout" 851 | bottom: "fc7" 852 | top: "fc7" 853 | dropout_param { 854 | dropout_ratio: 0.5 855 | } 856 | } 857 | layer { 858 | name: "cls_score" 859 | type: "InnerProduct" 860 | bottom: "fc7" 861 | top: "cls_score" 862 | param { 863 | lr_mult: 1 864 | } 865 | param { 866 | lr_mult: 2 867 | } 868 | propagate_down: true 869 | inner_product_param { 870 | num_output: 2 871 | weight_filler { 872 | type: "gaussian" 873 | std: 0.01 874 | } 875 | bias_filler { 876 | type: "constant" 877 | value: 0 878 | } 879 | } 880 | } 881 | 882 | layer { 883 | name: "loss_cls" 884 | type: "SoftmaxWithLoss" 885 | bottom: "cls_score" 886 | bottom: "labels" 887 | top: "loss_cls" 888 | propagate_down: true 889 | propagate_down: false 890 | loss_weight: 1 891 | } 892 | 893 | layer { 894 | name: "bbox_pred" 895 | type: "InnerProduct" 896 | bottom: "fc7" 897 | top: "bbox_pred" 898 | param { 899 | name: "bbox_pred_w" 900 | lr_mult: 1 901 | decay_mult: 1 902 | } 903 | param { 904 | name: "bbox_pred_b" 905 | lr_mult: 2 906 | decay_mult: 0 907 | } 908 | inner_product_param { 909 | num_output: 8 910 | weight_filler { 911 | type: "gaussian" 912 | std: 0.001 913 | } 914 | bias_filler { 915 | type: "constant" 916 | value: 0 917 | } 918 | } 919 | } 920 | layer { 921 | name: "loss_bbox" 922 | type: "SmoothL1Loss" 923 | bottom: "bbox_pred" 924 | bottom: "bbox_targets" 925 | bottom: "bbox_inside_weights" 926 | bottom: "bbox_outside_weights" 927 | top: "loss_bbox" 928 | propagate_down: true 929 | propagate_down: false 930 | propagate_down: false 931 | propagate_down: false 932 | loss_weight: 1 933 | } 934 | #layer { 935 | # name: "center_loss" 936 | # type: "CenterLoss" 937 | # bottom: "fc6" 938 | # bottom: "labels" 939 | # top: "center_loss" 940 | # propagate_down:true 941 | # propagate_down:false 942 | # param { 943 | # lr_mult: 1 944 | # decay_mult: 2 945 | # } 946 | # center_loss_param { 947 | # num_output: 4096 948 | # center_filler { 949 | # type: "xavier" 950 | # } 951 | # } 952 | # loss_weight: 0.005 953 | #} 954 | layer { 955 | name: "cls_prob" 956 | type: "Softmax" 957 | bottom: "cls_score" 958 | top: "cls_prob" 959 | } 960 | layer { 961 | name: "SiftFaceLayer" 962 | type: "Python" 963 | bottom: "pool4" 964 | bottom: "bbox_pred" 965 | bottom: "cls_prob" 966 | bottom: "gt_mask" 967 | bottom: "rois" 968 | bottom: "im_info" 969 | top: "onlyface" 970 | top: "gt_mask_fg" 971 | propagate_down: true 972 | propagate_down: false 973 | propagate_down: false 974 | propagate_down: false 975 | propagate_down: false 976 | propagate_down: false 977 | python_param { 978 | module: "roi_data_layer.layer2" 979 | layer: "SiftFaceLayer" 980 | } 981 | } 982 | layer { 983 | name: "fc6_pmask" 984 | type: "Convolution" 985 | bottom: "onlyface" 986 | top: "fc6_pmask" 987 | param { 988 | lr_mult: 1 989 | decay_mult: 1 990 | } 991 | param { 992 | lr_mult: 2 993 | decay_mult: 0 994 | } 995 | convolution_param { 996 | num_output: 4096 997 | pad: 2 998 | kernel_size: 3 999 | stride: 1 1000 | weight_filler { 1001 | type: "msra" 1002 | } 1003 | bias_filler { 1004 | type: "constant" 1005 | value: 0 1006 | } 1007 | } 1008 | } 1009 | layer { 1010 | name: "relu6_pmask" 1011 | type: "ReLU" 1012 | bottom: "fc6_pmask" 1013 | top: "fc6_pmask" 1014 | } 1015 | layer { 1016 | name: "drop6_pmask" 1017 | type: "Dropout" 1018 | bottom: "fc6_pmask" 1019 | top: "fc6_pmask" 1020 | dropout_param { 1021 | dropout_ratio: 0.5 1022 | } 1023 | } 1024 | layer { 1025 | name: "fc7_pmask" 1026 | type: "Convolution" 1027 | bottom: "fc6_pmask" 1028 | top: "fc7_pmask" 1029 | param { 1030 | lr_mult: 1 1031 | decay_mult: 1 1032 | } 1033 | param { 1034 | lr_mult: 2 1035 | decay_mult: 0 1036 | } 1037 | convolution_param { 1038 | num_output: 4096 1039 | pad: 0 1040 | kernel_size: 1 1041 | stride: 1 1042 | weight_filler { 1043 | type: "msra" 1044 | } 1045 | bias_filler { 1046 | type: "constant" 1047 | value: 0 1048 | } 1049 | } 1050 | } 1051 | layer { 1052 | name: "relu7_pmask" 1053 | type: "ReLU" 1054 | bottom: "fc7_pmask" 1055 | top: "fc7_pmask" 1056 | } 1057 | layer { 1058 | name: "drop7_pmask" 1059 | type: "Dropout" 1060 | bottom: "fc7_pmask" 1061 | top: "fc7_pmask" 1062 | dropout_param { 1063 | dropout_ratio: 0.5 1064 | } 1065 | } 1066 | layer { 1067 | name: "score_fr_fg" 1068 | type: "Convolution" 1069 | bottom: "fc7_pmask" 1070 | top: "score_fr_fg" 1071 | param { 1072 | lr_mult: 1 1073 | decay_mult: 1 1074 | } 1075 | param { 1076 | lr_mult: 2 1077 | decay_mult: 0 1078 | } 1079 | convolution_param { 1080 | num_output: 2 1081 | pad: 0 1082 | kernel_size: 1 1083 | weight_filler { 1084 | type: "msra" 1085 | } 1086 | bias_filler { 1087 | type: "constant" 1088 | value: 0 1089 | } 1090 | } 1091 | } 1092 | # layer { 1093 | # name: "upscore2_fg" 1094 | # type: "Deconvolution" 1095 | # bottom: "score_fr_fg" 1096 | # top: "upscore2_fg" 1097 | # param { 1098 | # lr_mult: 0 1099 | # } 1100 | # convolution_param { 1101 | # num_output: 33 1102 | # bias_term: false 1103 | # kernel_size: 4 1104 | # stride: 2 1105 | # } 1106 | # } 1107 | # layer { 1108 | # name: "score_pool4_fg" 1109 | # type: "Convolution" 1110 | # bottom: "pool4" 1111 | # top: "score_pool4_fg" 1112 | # param { 1113 | # lr_mult: 1 1114 | # decay_mult: 1 1115 | # } 1116 | # param { 1117 | # lr_mult: 2 1118 | # decay_mult: 0 1119 | # } 1120 | # convolution_param { 1121 | # num_output: 33 1122 | # pad: 0 1123 | # kernel_size: 1 1124 | # } 1125 | # } 1126 | # layer { 1127 | # name: "score_pool4_fgc" 1128 | # type: "Crop" 1129 | # bottom: "score_pool4_fg" 1130 | # bottom: "upscore2_fg" 1131 | # top: "score_pool4_fgc" 1132 | # crop_param { 1133 | # axis: 2 1134 | # offset: 0 1135 | # } 1136 | # } 1137 | # layer { 1138 | # name: "fuse_pool4_fg" 1139 | # type: "Eltwise" 1140 | # bottom: "upscore2_fg" 1141 | # bottom: "score_pool4_fgc" 1142 | # top: "fuse_pool4_fg" 1143 | # eltwise_param { 1144 | # operation: SUM 1145 | # } 1146 | # } 1147 | # layer { 1148 | # name: "upscore_pool4_fg" 1149 | # type: "Deconvolution" 1150 | # bottom: "fuse_pool4_fg" 1151 | # top: "upscore_pool4_fg" 1152 | # param { 1153 | # lr_mult: 0 1154 | # } 1155 | # convolution_param { 1156 | # num_output: 33 1157 | # bias_term: false 1158 | # kernel_size: 4 1159 | # stride: 2 1160 | # } 1161 | # } 1162 | # layer { 1163 | # name: "score_pool3_fg" 1164 | # type: "Convolution" 1165 | # bottom: "pool3" 1166 | # top: "score_pool3_fg" 1167 | # param { 1168 | # lr_mult: 1 1169 | # decay_mult: 1 1170 | # } 1171 | # param { 1172 | # lr_mult: 2 1173 | # decay_mult: 0 1174 | # } 1175 | # convolution_param { 1176 | # num_output: 33 1177 | # pad: 0 1178 | # kernel_size: 1 1179 | # } 1180 | # } 1181 | # layer { 1182 | # name: "score_pool3_fgc" 1183 | # type: "Crop" 1184 | # bottom: "score_pool3_fg" 1185 | # bottom: "upscore_pool4_fg" 1186 | # top: "score_pool3_fgc" 1187 | # crop_param { 1188 | # axis: 2 1189 | # offset: 0 1190 | # } 1191 | # } 1192 | # layer { 1193 | # name: "fuse_pool3_fg" 1194 | # type: "Eltwise" 1195 | # bottom: "upscore_pool4_fg" 1196 | # bottom: "score_pool3_fgc" 1197 | # top: "fuse_pool3_fg" 1198 | # eltwise_param { 1199 | # operation: SUM 1200 | # } 1201 | # } 1202 | layer { 1203 | name: "upscore8_fg" 1204 | type: "Deconvolution" 1205 | bottom: "score_fr_fg" 1206 | top: "upscore8_fg" 1207 | param { 1208 | lr_mult: 1 1209 | decay_mult: 1 1210 | } 1211 | convolution_param { 1212 | num_output: 2 1213 | bias_term: false 1214 | kernel_size: 32 1215 | stride: 16 1216 | weight_filler { 1217 | type: "xavier" 1218 | } 1219 | bias_filler { 1220 | type: "constant" 1221 | value: 0 1222 | } 1223 | } 1224 | } 1225 | #layer { 1226 | # name: "score_fg_vis" 1227 | # type: "Crop" 1228 | # bottom: "upscore8_fg" 1229 | # bottom: "data" 1230 | # top: "score_fg_vis" 1231 | # crop_param { 1232 | # axis: 2 1233 | # offset: 5 1234 | # } 1235 | #} 1236 | 1237 | layer { 1238 | name: "score_fg" 1239 | type: "Crop" 1240 | bottom: "upscore8_fg" 1241 | bottom: "data" 1242 | top: "score_fg" 1243 | crop_param { 1244 | axis: 2 1245 | offset: 12 1246 | } 1247 | } 1248 | layer { 1249 | name: "loss" 1250 | type: "SoftmaxWithLoss" 1251 | bottom: "score_fg" 1252 | bottom: "gt_mask_fg" 1253 | top: "loss" 1254 | propagate_down: true 1255 | propagate_down: false 1256 | loss_param { 1257 | ignore_label: 255 1258 | normalize: false 1259 | } 1260 | loss_weight:0 1261 | } 1262 | layer{ 1263 | name:"silence1" 1264 | type:"Silence" 1265 | bottom:"mask_for_loss" 1266 | } 1267 | layer{ 1268 | name:"silence1" 1269 | type:"Silence" 1270 | bottom:"mask_gen_thres" 1271 | } 1272 | #layer{ 1273 | # name:"silence1" 1274 | # type:"Silence" 1275 | # bottom:"fuse_pool3_fg" 1276 | #} 1277 | -------------------------------------------------------------------------------- /layer.py: -------------------------------------------------------------------------------- 1 | #only modify siftfacelayer 2 | # -------------------------------------------------------- 3 | # Fast R-CNN 4 | # Copyright (c) 2015 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Ross Girshick 7 | # -------------------------------------------------------- 8 | 9 | """The data layer used during training to train a Fast R-CNN network. 10 | 11 | RoIDataLayer implements a Caffe Python layer. 12 | """ 13 | import pickle 14 | import caffe 15 | from fast_rcnn.config import cfg 16 | from roi_data_layer.minibatch import get_minibatch 17 | import numpy as np 18 | import yaml 19 | from multiprocessing import Process, Queue 20 | from fast_rcnn.nms_wrapper import nms 21 | from fast_rcnn.bbox_transform import clip_boxes, bbox_transform_inv 22 | 23 | 24 | class RoIDataLayer(caffe.Layer): 25 | def _shuffle_roidb_inds(self): 26 | """Randomly permute the training roidb.""" 27 | if cfg.TRAIN.ASPECT_GROUPING: 28 | widths = np.array([r['width'] for r in self._roidb]) 29 | heights = np.array([r['height'] for r in self._roidb]) 30 | horz = (widths >= heights) 31 | vert = np.logical_not(horz) 32 | horz_inds = np.where(horz)[0] 33 | vert_inds = np.where(vert)[0] 34 | inds = np.hstack(( 35 | np.random.permutation(horz_inds), 36 | np.random.permutation(vert_inds))) 37 | inds = np.reshape(inds, (-1, 2)) 38 | row_perm = np.random.permutation(np.arange(inds.shape[0])) 39 | inds = np.reshape(inds[row_perm, :], (-1,)) 40 | self._perm = inds 41 | else: 42 | self._perm = np.random.permutation(np.arange(len(self._roidb))) 43 | self._cur = 0 44 | 45 | def _get_next_minibatch_inds(self): 46 | """Return the roidb indices for the next minibatch.""" 47 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): 48 | self._shuffle_roidb_inds() 49 | 50 | db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] 51 | self._cur += cfg.TRAIN.IMS_PER_BATCH 52 | return db_inds 53 | 54 | def _get_next_minibatch(self): 55 | """Return the blobs to be used for the next minibatch. 56 | 57 | If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a 58 | separate process and made available through self._blob_queue. 59 | """ 60 | if cfg.TRAIN.USE_PREFETCH: 61 | return self._blob_queue.get() 62 | else: 63 | db_inds = self._get_next_minibatch_inds() 64 | minibatch_db = [self._roidb[i] for i in db_inds] 65 | return get_minibatch(minibatch_db, self._num_classes) 66 | 67 | def set_roidb(self, roidb): 68 | """Set the roidb to be used by this layer during training.""" 69 | self._roidb = roidb 70 | self._shuffle_roidb_inds() 71 | if cfg.TRAIN.USE_PREFETCH: 72 | self._blob_queue = Queue(10) 73 | self._prefetch_process = BlobFetcher(self._blob_queue, 74 | self._roidb, 75 | self._num_classes) 76 | self._prefetch_process.start() 77 | # Terminate the child process when the parent exists 78 | def cleanup(): 79 | print 'Terminating BlobFetcher' 80 | self._prefetch_process.terminate() 81 | self._prefetch_process.join() 82 | import atexit 83 | atexit.register(cleanup) 84 | 85 | def setup(self, bottom, top): 86 | """Setup the RoIDataLayer.""" 87 | 88 | # parse the layer parameter string, which must be valid YAML 89 | layer_params = yaml.load(self.param_str_) 90 | 91 | self._num_classes = layer_params['num_classes'] 92 | 93 | self._name_to_top_map = {} 94 | 95 | # data blob: holds a batch of N images, each with 3 channels 96 | idx = 0 97 | top[idx].reshape(cfg.TRAIN.IMS_PER_BATCH, 3, 98 | max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE) 99 | self._name_to_top_map['data'] = idx 100 | idx += 1 101 | 102 | if cfg.TRAIN.HAS_RPN: 103 | top[idx].reshape(1, 3) 104 | self._name_to_top_map['im_info'] = idx 105 | idx += 1 106 | 107 | top[idx].reshape(1, 4) 108 | self._name_to_top_map['gt_boxes'] = idx 109 | idx += 1 110 | 111 | top[idx].reshape(cfg.TRAIN.IMS_PER_BATCH, 1, 112 | max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE) 113 | self._name_to_top_map['gt_mask'] = idx 114 | idx += 1 115 | else: # not using RPN 116 | # rois blob: holds R regions of interest, each is a 5-tuple 117 | # (n, x1, y1, x2, y2) specifying an image batch index n and a 118 | # rectangle (x1, y1, x2, y2) 119 | top[idx].reshape(1, 5) 120 | self._name_to_top_map['rois'] = idx 121 | idx += 1 122 | 123 | # labels blob: R categorical labels in [0, ..., K] for K foreground 124 | # classes plus background 125 | top[idx].reshape(1) 126 | self._name_to_top_map['labels'] = idx 127 | idx += 1 128 | 129 | if cfg.TRAIN.BBOX_REG: 130 | # bbox_targets blob: R bounding-box regression targets with 4 131 | # targets per class 132 | top[idx].reshape(1, self._num_classes * 4) 133 | self._name_to_top_map['bbox_targets'] = idx 134 | idx += 1 135 | 136 | # bbox_inside_weights blob: At most 4 targets per roi are active; 137 | # thisbinary vector sepcifies the subset of active targets 138 | top[idx].reshape(1, self._num_classes * 4) 139 | self._name_to_top_map['bbox_inside_weights'] = idx 140 | idx += 1 141 | 142 | top[idx].reshape(1, self._num_classes * 4) 143 | self._name_to_top_map['bbox_outside_weights'] = idx 144 | idx += 1 145 | 146 | print 'RoiDataLayer: name_to_top:', self._name_to_top_map 147 | assert len(top) == len(self._name_to_top_map) 148 | 149 | def forward(self, bottom, top): 150 | """Get blobs and copy them into this layer's top blob vector.""" 151 | blobs = self._get_next_minibatch() 152 | 153 | for blob_name, blob in blobs.iteritems(): 154 | top_ind = self._name_to_top_map[blob_name] 155 | # Reshape net's input blobs 156 | top[top_ind].reshape(*(blob.shape)) 157 | # Copy data into net's input blobs 158 | top[top_ind].data[...] = blob.astype(np.float32, copy=False) 159 | 160 | #print('data blob shape: ', top[0].data.shape) 161 | #print('mask blob shape: ', top[3].data.shape) 162 | 163 | def backward(self, top, propagate_down, bottom): 164 | """This layer does not propagate gradients.""" 165 | pass 166 | 167 | def reshape(self, bottom, top): 168 | """Reshaping happens during the call to forward.""" 169 | pass 170 | 171 | class BlobFetcher(Process): 172 | """Experimental class for prefetching blobs in a separate process.""" 173 | def __init__(self, queue, roidb, num_classes): 174 | super(BlobFetcher, self).__init__() 175 | self._queue = queue 176 | self._roidb = roidb 177 | self._num_classes = num_classes 178 | self._perm = None 179 | self._cur = 0 180 | self._shuffle_roidb_inds() 181 | # fix the random seed for reproducibility 182 | np.random.seed(cfg.RNG_SEED) 183 | 184 | def _shuffle_roidb_inds(self): 185 | """Randomly permute the training roidb.""" 186 | # TODO(rbg): remove duplicated code 187 | self._perm = np.random.permutation(np.arange(len(self._roidb))) 188 | self._cur = 0 189 | 190 | def _get_next_minibatch_inds(self): 191 | """Return the roidb indices for the next minibatch.""" 192 | # TODO(rbg): remove duplicated code 193 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): 194 | self._shuffle_roidb_inds() 195 | 196 | db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] 197 | self._cur += cfg.TRAIN.IMS_PER_BATCH 198 | return db_inds 199 | 200 | def run(self): 201 | print 'BlobFetcher started' 202 | while True: 203 | db_inds = self._get_next_minibatch_inds() 204 | minibatch_db = [self._roidb[i] for i in db_inds] 205 | blobs = get_minibatch(minibatch_db, self._num_classes) 206 | self._queue.put(blobs) 207 | 208 | 209 | 210 | 211 | 212 | 213 | class TileLayer(caffe.Layer): 214 | def setup(self, bottom, top): 215 | """Setup the TileLayer.""" 216 | 217 | # parse the layer parameter string, which must be valid YAML 218 | layer_params = yaml.load(self.param_str_) 219 | 220 | self._channels = layer_params['channels'] 221 | self._count_drop = layer_params['count_drop'] 222 | self._permute_count = layer_params['permute_count'] 223 | 224 | self._iter_size = layer_params['iter_size'] 225 | self._maintain_before = layer_params['maintain_before'] # maintain the first image unchanged 226 | 227 | self._count_iter = 0 228 | self.cnt = 0 229 | self._name_to_bottom_map = { 230 | 'mask_pred': 0 } 231 | 232 | # 0 means block, 1 means maintain 233 | 234 | self._name_to_top_map = { 235 | 'mask_pred_tile': 0 , 236 | 'mask_pred_thres':1, 237 | 'mask_inv':2} 238 | 239 | 240 | # top[0].reshape(*(bottom[0].data.shape)) 241 | top[0].reshape(bottom[0].data.shape[0], self._channels, bottom[0].data.shape[2], bottom[0].data.shape[3]) 242 | top[1].reshape(bottom[0].data.shape[0], 1, bottom[0].data.shape[2], bottom[0].data.shape[3]) 243 | top[2].reshape(bottom[0].data.shape[0], 1, bottom[0].data.shape[2], bottom[0].data.shape[3]) 244 | 245 | print 'TileLayer: name_to_top:', self._name_to_top_map 246 | assert len(top) == len(self._name_to_top_map) 247 | 248 | def select_mask(self, mask_pred): 249 | #1 means block in the input 250 | self.cnt = 0 251 | pool_len = mask_pred.shape[2] 252 | sample_num = mask_pred.shape[0] 253 | 254 | mask_pixels = pool_len * pool_len 255 | 256 | count_drop = self._count_drop #15 257 | permute_count = self._permute_count #20 258 | 259 | mask_sel = np.ones((sample_num, 1, pool_len, pool_len)) 260 | mask_for_loss = np.ones((sample_num, 1, pool_len, pool_len)) 261 | for i in range(sample_num): 262 | 263 | #not exactly as it mentioned in the paper 264 | #15/49 = 1/3 are selected as 0 265 | #first choose top 20 lowest predicted pixels in mask (trained in stage 2) 266 | #randomly choose 15 of 20 pixels to set zero 267 | rp = np.random.permutation(np.arange(permute_count)) 268 | rp = rp[0: count_drop] 269 | 270 | final_mask = np.ones(mask_pixels) 271 | 272 | now_mask_pred = mask_pred[i] 273 | now_mask_pred_array = np.reshape(now_mask_pred, mask_pixels) 274 | #convert the mask to an array and sort it ascendingly with the pixel value 275 | sorted_ids = np.argsort(now_mask_pred_array) 276 | now_ids = sorted_ids[rp] 277 | 278 | sel = np.zeros(mask_pixels) 279 | sel[now_ids] = 1 280 | _final_mask = sel * now_mask_pred_array 281 | if i==10000: 282 | #use this method later 283 | final_mask[np.where(_final_mask==0)] = 1 284 | final_mask[np.where(_final_mask!=0)] = 0 285 | #have to try this first, finding thr most important part to mask. 286 | #final_mask[np.where(final_mask!=0)] = 1 287 | 288 | now_mask = np.reshape(final_mask, (pool_len, pool_len)) 289 | _final_mask[np.where(_final_mask==0)] = 1 290 | _now_mask = np.reshape(_final_mask, (pool_len, pool_len)) 291 | if self.cnt==0: 292 | #print(sel) 293 | self.cnt += 1 294 | print(now_mask) 295 | 296 | mask_sel[i,0,:,:] = np.copy(now_mask) 297 | mask_for_loss[i,0,:,:] = np.copy(_now_mask) 298 | return mask_sel,mask_for_loss 299 | 300 | def forward(self, bottom, top): 301 | 302 | #1 means block!! 303 | mask_pred = np.copy(bottom[0].data) 304 | sample_num = mask_pred.shape[0] 305 | pool_len = mask_pred.shape[2] 306 | 307 | 308 | self._count_iter = (self._count_iter + 1) % self._iter_size 309 | if self._count_iter >= self._maintain_before: 310 | mask_sel,mask_for_loss = self.select_mask(mask_pred) 311 | else: 312 | mask_sel = np.ones((sample_num, 1, pool_len, pool_len)) 313 | mask_for_loss = np.ones((sample_num, 1, pool_len, pool_len)) 314 | 315 | 316 | #print(mask_sel[0,0,:,:]) 317 | mask_pred_tile = np.tile(mask_sel, [1, self._channels, 1, 1]) 318 | 319 | mask_inv = np.abs(1-mask_sel) 320 | #print(mask_sel[0,0,:,:]) 321 | #print(mask_inv[0,0,:,:]) 322 | top_ind = self._name_to_top_map['mask_pred_tile'] 323 | top[top_ind].reshape(*(mask_pred_tile.shape)) 324 | top[top_ind].data[...] = mask_pred_tile.astype(np.float32, copy=False) 325 | 326 | top_ind = self._name_to_top_map['mask_pred_thres'] 327 | top[top_ind].reshape(*(mask_sel.shape)) 328 | top[top_ind].data[...] = mask_sel.astype(np.float32, copy=False) 329 | 330 | top_ind = self._name_to_top_map['mask_inv'] 331 | top[top_ind].reshape(*(mask_inv.shape)) 332 | top[top_ind].data[...] = mask_inv.astype(np.float32, copy=False) 333 | 334 | 335 | def backward(self, top, propagate_down, bottom): 336 | top_0_diff = np.zeros(np.shape(top[2].diff)) 337 | top_0_diff[:,0,:,:] = np.mean(top[0].diff, axis=1) 338 | #bottom[0].diff[...] *= top[1].diff 339 | bottom[0].diff[...] = (top[2].diff + top_0_diff) 340 | #print("\n\n\n\nbottom[0].diff:") 341 | # print('inv_diff: \n',top[2].diff[0,0,:,:]) 342 | # print('cls_diff: \n',top_0_diff[0,0,:,:]) 343 | #print(top_0_diff.shape) 344 | #tile_diff = np.tile(top[0].diff, [1, 1, 1, 1]) 345 | #bottom[0].diff[...] *= (tile_diff + top[1].diff) 346 | 347 | def reshape(self, bottom, top): 348 | """Reshaping happens during the call to forward.""" 349 | pass 350 | 351 | 352 | 353 | 354 | 355 | class TileLayer2(caffe.Layer): 356 | def setup(self, bottom, top): 357 | """Setup the TileLayer.""" 358 | 359 | # parse the layer parameter string, which must be valid YAML 360 | layer_params = yaml.load(self.param_str_) 361 | 362 | self._channels = layer_params['channels'] 363 | self._count_drop = layer_params['count_drop'] 364 | self._permute_count = layer_params['permute_count'] 365 | 366 | self._iter_size = layer_params['iter_size'] 367 | self._maintain_before = layer_params['maintain_before'] # maintain the first image unchanged 368 | 369 | self._count_iter = 0 370 | 371 | self._name_to_bottom_map = { 372 | 'mask_pred': 0, 373 | 'gt_mask_fg': 1} 374 | # 0 means block, 1 means maintain 375 | 376 | self._name_to_top_map = { 377 | 'mask_pred_tile': 0 , 378 | 'mask_pred_thres':1, 379 | 'mask_inv':2} 380 | 381 | 382 | # top[0].reshape(*(bottom[0].data.shape)) 383 | top[0].reshape(bottom[0].data.shape[0], self._channels, bottom[0].data.shape[2], bottom[0].data.shape[3]) 384 | top[1].reshape(bottom[0].data.shape[0], 1, bottom[0].data.shape[2], bottom[0].data.shape[3]) 385 | top[2].reshape(bottom[0].data.shape[0], 1, bottom[0].data.shape[2], bottom[0].data.shape[3]) 386 | 387 | assert len(top) == len(self._name_to_top_map) 388 | 389 | def select_mask(self, mask_pred): 390 | #1 means block in the input 391 | cnt = 0 392 | pool_len = mask_pred.shape[2] 393 | sample_num = mask_pred.shape[0] 394 | 395 | mask_pixels = pool_len * pool_len 396 | 397 | count_drop = self._count_drop #15 398 | permute_count = self._permute_count #20 399 | 400 | mask_sel = np.ones((sample_num, 1, pool_len, pool_len)) 401 | mask_for_loss = np.ones((sample_num, 1, pool_len, pool_len)) 402 | for i in range(sample_num): 403 | 404 | #not exactly as it mentioned in the paper 405 | #15/49 = 1/3 are selected as 0 406 | #first choose top 20 lowest predicted pixels in mask (trained in stage 2) 407 | #randomly choose 15 of 20 pixels to set zero 408 | rp = np.random.permutation(np.arange(permute_count)) 409 | rp = rp[0: count_drop] 410 | 411 | final_mask = np.ones(mask_pixels) 412 | 413 | now_mask_pred = mask_pred[i] 414 | now_mask_pred_array = np.reshape(now_mask_pred, mask_pixels) 415 | #convert the mask to an array and sort it ascendingly with the pixel value 416 | sorted_ids = np.argsort(now_mask_pred_array) 417 | now_ids = sorted_ids[rp] 418 | 419 | sel = np.zeros(mask_pixels) 420 | sel[now_ids] = 1 421 | _final_mask = sel * now_mask_pred_array 422 | if i==100000: 423 | print(mask_pred[i,0,:,:]) 424 | #use this method later 425 | final_mask[np.where(_final_mask==0)] = 1 426 | final_mask[np.where(_final_mask!=0)] = 0 427 | #have to try this first, finding thr most important part to mask. 428 | #final_mask[np.where(final_mask!=0)] = 1 429 | 430 | now_mask = np.reshape(final_mask, (pool_len, pool_len)) 431 | _final_mask[np.where(_final_mask==0)] = 1 432 | _now_mask = np.reshape(_final_mask, (pool_len, pool_len)) 433 | if cnt==10000000: 434 | print('GT: ') 435 | print(now_mask) 436 | cnt += 1 437 | mask_sel[i,0,:,:] = np.copy(now_mask) 438 | mask_for_loss[i,0,:,:] = np.copy(_now_mask) 439 | return mask_sel,mask_for_loss 440 | 441 | def forward(self, bottom, top): 442 | gt_mask_fg = np.copy(bottom[1].data) 443 | mask_pred = np.copy(bottom[0].data) 444 | sample_num = mask_pred.shape[0] 445 | pool_len = mask_pred.shape[2] 446 | #print("\n\nN:",sample_num) 447 | self._count_iter = (self._count_iter + 1) % self._iter_size#itersize = 5 448 | if self._count_iter == 0: 449 | mask_sel,mask_for_loss = self.select_mask(mask_pred) 450 | elif self._count_iter == 1: 451 | mask_sel = np.ones((sample_num, 1, pool_len, pool_len)) 452 | for i in range(sample_num): 453 | mask_sel_pre = np.ones(pool_len*pool_len) 454 | arg_array = np.random.permutation(range(pool_len*pool_len))[:20] 455 | mask_sel_pre[arg_array] = 0 456 | mask_rand = np.reshape(mask_sel_pre,(pool_len,pool_len)) 457 | mask_sel[i,0,:,:] = np.copy(mask_rand) 458 | mask_for_loss = np.ones((sample_num, 1, pool_len, pool_len)) 459 | elif self._count_iter == 2: 460 | mask_sel = np.ones((sample_num, 1, pool_len, pool_len)) 461 | for i in range(sample_num): 462 | mask_sel_pre = np.ones((pool_len,pool_len)) 463 | rnd = np.random.randint(0,4) 464 | drop = pool_len/2 + 1 465 | if rnd == 0 : 466 | mask_sel_pre[:,:drop] = 0 467 | elif rnd == 1 : 468 | mask_sel_pre[:,drop-1:] = 0 469 | elif rnd == 2 : 470 | mask_sel_pre[:drop,:] = 0 471 | else: 472 | mask_sel_pre[drop-1:,:] = 0 473 | mask_sel[i,0,:,:] = np.copy(mask_sel_pre) 474 | mask_for_loss = np.ones((sample_num, 1, pool_len, pool_len)) 475 | else: 476 | mask_sel = np.ones((sample_num, 1, pool_len, pool_len)) 477 | mask_for_loss = np.ones((sample_num, 1, pool_len, pool_len)) 478 | 479 | if not np.all(np.unique(gt_mask_fg) == 1 ): 480 | # print(np.unique(gt_mask_fg)) 481 | mask_sel = np.ones((sample_num, 1, pool_len, pool_len)) 482 | mask_for_loss = np.ones((sample_num, 1, pool_len, pool_len)) 483 | # else: 484 | # print(np.unique(gt_mask_fg)) 485 | 486 | mask_inv = np.abs(1-mask_sel) 487 | 488 | #print(mask_sel[0,0,:,:]) 489 | mask_pred_tile = np.tile(mask_sel, [1, self._channels, 1, 1]) 490 | #print(mask_pred_tile[0,0,:,:]) 491 | top_ind = self._name_to_top_map['mask_pred_tile'] 492 | top[top_ind].reshape(*(mask_pred_tile.shape)) 493 | top[top_ind].data[...] = mask_pred_tile.astype(np.float32, copy=False) 494 | 495 | top_ind = self._name_to_top_map['mask_pred_thres'] 496 | top[top_ind].reshape(*(mask_sel.shape)) 497 | top[top_ind].data[...] = mask_sel.astype(np.float32, copy=False) 498 | 499 | top_ind = self._name_to_top_map['mask_inv'] 500 | top[top_ind].reshape(*(mask_inv.shape)) 501 | top[top_ind].data[...] = mask_inv.astype(np.float32, copy=False) 502 | 503 | 504 | def backward(self, top, propagate_down, bottom): 505 | top_0_diff = np.zeros(np.shape(top[2].diff)) 506 | top_0_diff[:,0,:,:] = np.mean(top[0].diff, axis=1) 507 | #bottom[0].diff[...] *= top[1].diff 508 | bottom[0].diff[...] = (top[2].diff + top_0_diff) 509 | #print("\n\n\n\nbottom[0].diff:") 510 | #print(top_0_diff[:,0,:,:]) 511 | #print(top_0_diff.shape) 512 | #tile_diff = np.tile(top[0].diff, [1, 1, 1, 1]) 513 | #bottom[0].diff[...] *= (tile_diff + top[1].diff) 514 | 515 | def reshape(self, bottom, top): 516 | """Reshaping happens during the call to forward.""" 517 | pass 518 | 519 | 520 | 521 | class SiftFaceLayer(caffe.Layer): 522 | def setup(self, bottom, top): 523 | 524 | # parse the layer parameter string, which must be valid YAML 525 | layer_params = yaml.load(self.param_str_) 526 | self.onlyface_mask = np.ones(bottom[0].data.shape) 527 | 528 | self._name_to_bottom_map = { 529 | 'conv5_3': 0, 530 | 'bbox_pred': 1, 531 | 'cls_score': 2, 532 | 'gt_mask': 3, 533 | 'rois': 4, 534 | 'im_info': 5} 535 | 536 | self._name_to_top_map = { 537 | 'onlyface': 0, 538 | 'gt_mask_fg': 1} 539 | 540 | top[0].reshape(*(bottom[0].data.shape)) 541 | top[1].reshape(*(bottom[3].data.shape)) 542 | 543 | print 'SiftFaceLayer: name_to_top:', self._name_to_top_map 544 | assert len(top) == len(self._name_to_top_map) 545 | 546 | def forward(self, bottom, top): 547 | 548 | #conv5_3 = np.copy(bottom[0].data) 549 | assert(bottom[0].data.shape[0] == 1) 550 | box_deltas = np.copy(bottom[1].data) 551 | scores = np.copy(bottom[2].data) 552 | gt_mask_fg = np.copy(bottom[3].data) 553 | #print(np.mean(gt_mask_fg)) 554 | onlyface = np.copy(bottom[0].data) 555 | rois = np.copy(bottom[4].data) 556 | im_info = np.copy(bottom[5].data) 557 | 558 | boxes = rois[:, 1:5] 559 | pred_boxes = bbox_transform_inv(boxes, box_deltas) 560 | # boxes = clip_boxes(pred_boxes, gt_mask_fg[0,0,:,:].shape[::-1]) 561 | boxes = clip_boxes(pred_boxes, (int(im_info[0][0]),int(im_info[0][1]))) 562 | 563 | 564 | if np.all(np.unique(gt_mask_fg) == 1): 565 | ## masks for imges other than occlude are set ones 566 | onlyface = np.zeros(onlyface.shape) 567 | gt_mask_fg = np.zeros(gt_mask_fg.shape) 568 | #print(np.sum(gt_mask_fg)) 569 | else: 570 | #print('nonzero input !!!') 571 | CONF_THRESH = 0.6 572 | NMS_THRESH = 0.25 573 | zoom = 16 574 | 575 | #find face areas 576 | cls_ind = 1 577 | cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] 578 | cls_scores = scores[:, cls_ind] 579 | dets = np.hstack((cls_boxes, 580 | cls_scores[:, np.newaxis])).astype(np.float32) 581 | keep = nms(dets, NMS_THRESH) 582 | dets = dets[keep, :] 583 | 584 | keep = np.where(dets[:, 4] > CONF_THRESH) 585 | dets = dets[keep] #shape(n,5) n means n predictes boxes, 5 includes top left and bottom right coords and a score 586 | #enlarge boxes 587 | # dets[:,:4] *= 1.1 588 | # print(dets) 589 | # print(dets.shape) 590 | # print(bottom[3].data.shape) 591 | # print(bottom[0].data.shape) 592 | #generate a mask for gt mask 593 | mask4gt = np.zeros(bottom[3].data.shape) 594 | for each in dets: 595 | mask4gt[:,:,each[1]:each[3]+1,each[0]:each[2]+1] = 1 596 | 597 | # gt_mask_fg *= mask4gt 598 | 599 | # map to conv5_3 600 | dets[:,:4] //= zoom 601 | 602 | #generate a mask for conv5_3 603 | mask4conv = np.zeros(bottom[0].data.shape) 604 | for each in dets: 605 | mask4conv[:,:,each[1]:each[3]+1,each[0]:each[2]+1] = 1 606 | 607 | # onlyface *= mask4conv 608 | self.onlyface_mask = mask4conv 609 | 610 | # print(np.sum(onlyface)) 611 | 612 | top_ind = self._name_to_top_map['onlyface'] 613 | top[top_ind].reshape(*(onlyface.shape)) 614 | top[top_ind].data[...] = onlyface.astype(np.float32, copy=False) 615 | 616 | top_ind = self._name_to_top_map['gt_mask_fg'] 617 | top[top_ind].reshape(*(gt_mask_fg.shape)) 618 | top[top_ind].data[...] = gt_mask_fg.astype(np.float32, copy=False) 619 | 620 | 621 | def backward(self, top, propagate_down, bottom): 622 | gt_mask_fg = np.copy(bottom[3].data) 623 | # mask for imges other than occlude are set ones 624 | if np.all(np.unique(gt_mask_fg) == 1): 625 | #print('back 0') 626 | bottom[0].diff[...] = 0 627 | else: 628 | #print("back") 629 | for i in range(4): 630 | if not propagate_down[i]: 631 | continue 632 | else: 633 | # bottom[0].diff[...] = top[0].diff * self.onlyface_mask 634 | bottom[0].diff[...] = top[0].diff 635 | 636 | def reshape(self, bottom, top): 637 | 638 | """Reshaping happens during the call to forward.""" 639 | pass 640 | 641 | 642 | 643 | class SiftFace4TestLayer(caffe.Layer): 644 | def setup(self, bottom, top): 645 | 646 | # parse the layer parameter string, which must be valid YAML 647 | layer_params = yaml.load(self.param_str_) 648 | self.onlyface_mask = np.ones(bottom[0].data.shape) 649 | 650 | self._name_to_bottom_map = { 651 | 'conv5_3': 0, 652 | 'bbox_pred': 1, 653 | 'cls_score': 2, 654 | 'rois': 3, 655 | 'im_info': 4} 656 | 657 | self._name_to_top_map = { 658 | 'onlyface': 0} 659 | 660 | top[0].reshape(*(bottom[0].data.shape)) 661 | 662 | 663 | print 'SiftFaceLayer: name_to_top:', self._name_to_top_map 664 | assert len(top) == len(self._name_to_top_map) 665 | 666 | def forward(self, bottom, top): 667 | 668 | #conv5_3 = np.copy(bottom[0].data) 669 | assert(bottom[0].data.shape[0] == 1) 670 | box_deltas = np.copy(bottom[1].data) 671 | scores = np.copy(bottom[2].data) 672 | onlyface = np.copy(bottom[0].data) 673 | rois = np.copy(bottom[3].data) 674 | im_info = np.copy(bottom[4].data) 675 | #print('layer rois: ',rois) 676 | boxes = rois[:, 1:5] 677 | pred_boxes = bbox_transform_inv(boxes, box_deltas) 678 | # boxes = clip_boxes(pred_boxes, gt_mask_fg[0,0,:,:].shape[::-1]) 679 | boxes = clip_boxes(pred_boxes, (int(im_info[0][0]),int(im_info[0][1]))) 680 | #print('im_info',(int(im_info[0][0]),int(im_info[0][1]),int(im_info[0][2]))) 681 | 682 | 683 | CONF_THRESH = 0.65 684 | NMS_THRESH = 0.15 685 | zoom = 16 686 | 687 | 688 | # print('layerbox:', boxes) 689 | #find face areas 690 | cls_ind = 1 691 | cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] 692 | cls_scores = scores[:, cls_ind] 693 | dets = np.hstack((cls_boxes, 694 | cls_scores[:, np.newaxis])).astype(np.float32) 695 | keep = nms(dets, NMS_THRESH) 696 | dets = dets[keep, :] 697 | 698 | keep = np.where(dets[:, 4] > CONF_THRESH) 699 | dets = dets[keep] #shape(n,5) n means n predictes boxes, 5 includes top left and bottom right coords and a score 700 | #enlarge boxes 701 | #print('dddets: ',dets) 702 | # dets[:,:4] *= 1 703 | # print(dets) 704 | # print(dets.shape) 705 | # print(bottom[3].data.shape) 706 | # print(bottom[0].data.shape) 707 | #generate a mask for gt mask 708 | # mask4gt = np.zeros(bottom[3].data.shape) 709 | # for each in dets: 710 | # mask4gt[:,:,each[0]:each[2]+1,each[1]:each[3]+1] = 1 711 | 712 | # gt_mask_fg *= mask4gt 713 | 714 | 715 | # map to conv5_3 716 | dets[:,:4] //= zoom 717 | #print('conv53:', bottom[0].data.shape) 718 | #print('premask: ',dets.shape) 719 | #generate a mask for conv5_3 720 | mask4conv = np.zeros(bottom[0].data.shape) 721 | for each in dets: 722 | mask4conv[:,:,each[1]:each[3]+1,each[0]:each[2]+1] = 1 723 | 724 | # pickle.dump(mask4conv, open("vis.txt", "w")) 725 | onlyface *= mask4conv 726 | self.onlyface_mask = mask4conv 727 | 728 | # print(np.sum(onlyface)) 729 | 730 | top_ind = self._name_to_top_map['onlyface'] 731 | top[top_ind].reshape(*(onlyface.shape)) 732 | top[top_ind].data[...] = onlyface.astype(np.float32, copy=False) 733 | 734 | 735 | 736 | def backward(self, top, propagate_down, bottom): 737 | pass 738 | def reshape(self, bottom, top): 739 | 740 | """Reshaping happens during the call to forward.""" 741 | pass 742 | 743 | 744 | # class SiftFace4TestLayer(caffe.Layer): 745 | # def setup(self, bottom, top): 746 | 747 | # # parse the layer parameter string, which must be valid YAML 748 | # layer_params = yaml.load(self.param_str_) 749 | # self.onlyface_mask = np.ones(bottom[0].data.shape) 750 | 751 | # self._name_to_bottom_map = { 752 | # 'conv5_3': 0, 753 | # 'bbox_pred': 1, 754 | # 'cls_score':2 } 755 | 756 | # self._name_to_top_map = { 757 | # 'onlyface': 0} 758 | 759 | # top[0].reshape(*(bottom[0].data.shape)) 760 | 761 | # print 'SiftFaceLayer: name_to_top:', self._name_to_top_map 762 | # assert len(top) == len(self._name_to_top_map) 763 | 764 | # def forward(self, bottom, top): 765 | 766 | # #conv5_3 = np.copy(bottom[0].data) 767 | # assert(bottom[0].data.shape[0] == 1) 768 | # boxes = np.copy(bottom[1].data) 769 | # scores = np.copy(bottom[2].data) 770 | 771 | 772 | # onlyface = np.copy(bottom[0].data) 773 | 774 | # CONF_THRESH = 0.6 775 | # NMS_THRESH = 0.3 776 | # zoom = 16 777 | 778 | # #find face areas 779 | # cls_ind = 1 780 | # cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] 781 | # cls_scores = scores[:, cls_ind] 782 | # dets = np.hstack((cls_boxes, 783 | # cls_scores[:, np.newaxis])).astype(np.float32) 784 | # keep = nms(dets, NMS_THRESH) 785 | # dets = dets[keep, :] 786 | 787 | # keep = np.where(dets[:, 4] > CONF_THRESH) 788 | # dets = dets[keep] #shape(n,5) n means n predictes boxes, 5 includes top left and bottom right coords and a score 789 | # #enlarge boxes 790 | # dets[:,:4] *= 1.3 791 | 792 | # # map to conv5_3 793 | # dets[:,:4] //= zoom 794 | 795 | # #generate a mask for conv5_3 796 | # mask4conv = np.zeros(bottom[0].data.shape) 797 | # for each in dets: 798 | # mask4conv[:,:,each[0]:each[2]+1,each[1]:each[3]+1] = 1 799 | 800 | # # onlyface *= mask4conv 801 | # self.onlyface_mask = mask4conv 802 | 803 | # # print(np.sum(onlyface)) 804 | 805 | # top_ind = self._name_to_top_map['onlyface'] 806 | # top[top_ind].reshape(*(onlyface.shape)) 807 | # top[top_ind].data[...] = onlyface.astype(np.float32, copy=False) 808 | 809 | 810 | 811 | # def backward(self, top, propagate_down, bottom): 812 | # pass 813 | 814 | # def reshape(self, bottom, top): 815 | 816 | # """Reshaping happens during the call to forward.""" 817 | # pass 818 | 819 | 820 | 821 | 822 | 823 | 824 | 825 | 826 | 827 | 828 | class ShuffleMaskLayer(caffe.Layer): 829 | def setup(self, bottom, top): 830 | # parse the layer parameter string, which must be valid YAML 831 | layer_params = yaml.load(self.param_str_) 832 | 833 | self._channels = layer_params['channels'] 834 | self._name_to_bottom_map = { 835 | 'mask_pred_thres': 0 } 836 | self._name_to_top_map = { 837 | 'mask_pred_tile_shuffle': 0} 838 | 839 | top[0].reshape(bottom[0].data.shape[0], self._channels, bottom[0].data.shape[2], bottom[0].data.shape[3]) 840 | print 'ShuffleMaskLayer: name_to_top:', self._name_to_top_map 841 | assert len(top) == len(self._name_to_top_map) 842 | 843 | def forward(self, bottom, top): 844 | mask_pred_thres = np.copy(bottom[0].data) 845 | sample_num = mask_pred_thres.shape[0] 846 | pool_len = mask_pred_thres.shape[2] 847 | mask_pixels = pool_len * pool_len 848 | 849 | mask_pred_tile_shuffle = np.ones((sample_num, self._channels, pool_len, pool_len)) 850 | 851 | for i in range(sample_num): 852 | drop_cnt = len(np.where(mask_pred_thres[i,0,:,:]==0)[0]) 853 | mask_thres_array = np.reshape(mask_pred_thres[i,0,:,:], mask_pixels) 854 | drop_ind = np.where(mask_thres_array==0)[0] 855 | for j in range(self._channels): 856 | rnd = np.random.rand(drop_cnt) 857 | shuffle_mask = np.ones(mask_pixels) 858 | shuffle_mask[drop_ind] = rnd 859 | _shuffle_mask = np.reshape(shuffle_mask, (pool_len, pool_len)) 860 | mask_pred_tile_shuffle[i,j,:,:] = np.copy(_shuffle_mask) 861 | 862 | top_ind = self._name_to_top_map['mask_pred_tile_shuffle'] 863 | top[top_ind].reshape(*(mask_pred_tile_shuffle.shape)) 864 | top[top_ind].data[...] = mask_pred_tile_shuffle.astype(np.float32, copy=False) 865 | 866 | def backward(self, top, propagate_down, bottom): 867 | pass 868 | 869 | 870 | def reshape(self, bottom, top): 871 | """Reshaping happens during the call to forward.""" 872 | pass 873 | 874 | 875 | 876 | 877 | 878 | 879 | class MaskPredLossLayer(caffe.Layer): 880 | def setup(self, bottom, top): 881 | layer_params = yaml.load(self.param_str_) 882 | self._name_to_bottom_map = { 883 | 'mask_pred': 0, 884 | 'mask_gt': 1} 885 | self._name_to_top_map = { 886 | 'loss': 0} 887 | self.ignore_label = None 888 | top[0].reshape(1) 889 | print 'MaskPredLossLayer: name_to_top:', self._name_to_top_map 890 | assert len(top) == len(self._name_to_top_map) 891 | 892 | def forward(self, bottom, top): 893 | N = bottom[0].shape[0] 894 | mask_pred = bottom[0].data 895 | mask_label = bottom[1].data 896 | 897 | ary = np.reshape(mask_pred[0,0,:,:],49) 898 | ids = np.argsort(ary) 899 | ary[ids[:15]]=0 900 | ary[np.where(ary!=0)]=1 901 | msk = np.reshape(ary,(7,7)) 902 | #print("mask_pred: ") 903 | #print(msk) 904 | 905 | count_bit = 1 906 | for i in range(len(bottom[0].shape)): 907 | count_bit = count_bit * bottom[0].shape[i] 908 | 909 | # copy from: https://github.com/philkr/voc-classification/blob/master/src/python_layers.py#L52 910 | f, df, t = bottom[0].data, bottom[0].diff, bottom[1].data 911 | mask = (self.ignore_label is None or t != self.ignore_label) 912 | lZ = np.log(1+np.exp(-np.abs(f))) * mask 913 | dlZ = np.exp(np.minimum(f,0))/(np.exp(np.minimum(f,0))+np.exp(-np.maximum(f,0))) * mask 914 | 915 | 916 | # top[0].data[...] = np.sum(lZ + ((f>0)-t)*f * mask) / N 917 | # df[...] = (dlZ - t*mask) / N 918 | 919 | lZ = lZ + ((f>0)-t)*f * mask 920 | df[...] = (dlZ - t*mask) / count_bit 921 | 922 | for i in range(N): 923 | if (np.sum(mask_label[i,0,:,:])==49 or np.sum(mask_label[i,0,:,:])==20): 924 | lZ[i] = lZ[i] * 0.0 925 | df[i] = lZ[i] * 0.0 926 | 927 | # for i in range(N): 928 | # lbl = labels[i] 929 | # prop_before_select = prop_before[i][lbl] 930 | # prop_after_select = prop_after[i][lbl] 931 | 932 | # if (lbl > 0 and prop_after_select + self._score_thres < prop_before_select) == False : 933 | # lZ[i] = lZ[i] * 0.0 934 | # df[i] = lZ[i] * 0.0 935 | 936 | top[0].data[...] = np.sum(lZ) / count_bit 937 | 938 | def backward(self, top, prop, bottom): 939 | bottom[0].diff[...] *= top[0].diff 940 | 941 | def reshape(self, bottom, top): 942 | """Reshaping happens during the call to forward.""" 943 | pass 944 | 945 | 946 | 947 | 948 | class MaskGenLayer(caffe.Layer): 949 | def setup(self, bottom, top): 950 | 951 | # parse the layer parameter string, which must be valid YAML 952 | layer_params = yaml.load(self.param_str_) 953 | 954 | self._channels = layer_params['channels'] 955 | self._means = layer_params['means'] 956 | #self._count_drop = layer_params['count_drop'] 957 | #self._permute_count = layer_params['permute_count'] 958 | 959 | # self._iter_size = layer_params['iter_size'] 960 | # self._maintain_before = layer_params['maintain_before'] # maintain the first image unchanged 961 | # 962 | # self._count_iter = 0 963 | 964 | self._name_to_bottom_map = { 965 | 'mask_pred': 0 } 966 | 967 | # 0 means block, 1 means maintain 968 | 969 | self._name_to_top_map = { 970 | 'mask_pred_tile': 0, 971 | 'mask_pred_thres': 1 } 972 | 973 | 974 | # top[0].reshape(*(bottom[0].data.shape)) 975 | top[0].reshape(bottom[0].data.shape[0], self._channels, 7, 7) 976 | top[1].reshape(bottom[0].data.shape[0], self._channels, 7, 7) 977 | 978 | 979 | assert len(top) == len(self._name_to_top_map) 980 | 981 | def generate_mask(self, mask_pred): 982 | #0 means block in the input 983 | pool_len = 7 984 | k = mask_pred.shape[2] 985 | stride = pool_len/k 986 | stride_up = int(pool_len/k)+int(pool_len%k > 0) 987 | sample_num = mask_pred.shape[0] 988 | 989 | mask_pixels = k * k 990 | 991 | # count_drop = self._count_drop #15 992 | # permute_count = self._permute_count #20 993 | 994 | mask_gen = np.ones((sample_num, 1, pool_len, pool_len)) 995 | mask_2_2 = np.ones((sample_num, 1, stride, stride)) 996 | 997 | for i in range(sample_num): 998 | 999 | now_mask_pred = mask_pred[i] 1000 | now_mask_pred_array = np.reshape(now_mask_pred, mask_pixels) 1001 | #convert the mask to an array and sort it ascendingly with the pixel value 1002 | sorted_ids = np.argsort(now_mask_pred_array) 1003 | now_ids = sorted_ids[:2] 1004 | for ii in now_ids: 1005 | if ii/2 ==0: 1006 | mask_gen[i,0,:stride_up,ii*stride:ii*stride+stride_up] = 0 1007 | mask_2_2[i,0,0,ii%2] = 0 1008 | else: 1009 | j = ii%2 1010 | mask_gen[i,0,stride:stride+stride,j*stride:j*stride+stride] = 0 1011 | mask_2_2[i,0,1,ii%2] = 0 1012 | 1013 | #if ii == 0: 1014 | #print(now_ids) 1015 | #print(mask_pred[0]) 1016 | # print("mask:") 1017 | #print(mask_gen[0]) 1018 | 1019 | return mask_gen,mask_2_2 1020 | 1021 | def forward(self, bottom, top): 1022 | 1023 | #0 means block!! 1024 | mask_pred = np.copy(bottom[0].data) 1025 | 1026 | # self._count_iter = (self._count_iter + 1) % self._iter_size 1027 | # if self._count_iter >= self._maintain_before: 1028 | mask_gen,mask_2_2 = self.generate_mask(mask_pred) 1029 | # else: 1030 | # mask_gen = np.ones((sample_num, 1, pool_len, pool_len)) 1031 | 1032 | mask_pred_tile = np.tile(mask_gen, [1, self._channels, 1, 1]) 1033 | 1034 | top_ind = self._name_to_top_map['mask_pred_tile'] 1035 | top[top_ind].reshape(*(mask_pred_tile.shape)) 1036 | top[top_ind].data[...] = mask_pred_tile.astype(np.float32, copy=False) 1037 | 1038 | top_ind = self._name_to_top_map['mask_pred_thres'] 1039 | top[top_ind].reshape(*(mask_gen.shape)) 1040 | top[top_ind].data[...] = mask_gen.astype(np.float32, copy=False) 1041 | 1042 | #print("\n\nind_shape: ",mask_gen.shape) 1043 | 1044 | def backward(self, top, propagate_down, bottom): 1045 | top_diff = np.zeros((top[1].diff.shape[0],top[1].diff.shape[1],top[1].diff.shape[2]/3,top[1].diff.shape[3]/3)) 1046 | for i in range(top[1].diff.shape[0]): 1047 | top_diff[i,0,0,0] = np.mean(top[1].diff[i,0,:4,:4]) 1048 | top_diff[i,0,0,1] = np.mean(top[1].diff[i,0,:4,3:]) 1049 | top_diff[i,0,1,0] = np.mean(top[1].diff[i,0,3:,:4]) 1050 | top_diff[i,0,1,1] = np.mean(top[1].diff[i,0,3:,3:]) 1051 | bottom[0].diff[...] = top_diff 1052 | 1053 | #bottom[0].diff[...] = top[1].diff 1054 | #print("ind_loss:") 1055 | #print(bottom[0].diff[0,:,:,:]) 1056 | 1057 | def reshape(self, bottom, top): 1058 | """Reshaping happens during the call to forward.""" 1059 | pass 1060 | 1061 | 1062 | 1063 | 1064 | class SumLossLayer(caffe.Layer): 1065 | def setup(self, bottom, top): 1066 | layer_params = yaml.load(self.param_str_) 1067 | self._name_to_bottom_map = { 1068 | 'mask_pred': 0} 1069 | self._name_to_top_map = { 1070 | 'loss': 0} 1071 | top[0].reshape(1) 1072 | print 'SumforLossLayer: name_to_top:', self._name_to_top_map 1073 | assert len(top) == len(self._name_to_top_map) 1074 | 1075 | def forward(self, bottom, top): 1076 | mask_pred = np.copy(bottom[0].data) 1077 | #print('\nmask_pred: ', mask_pred[0,0,:,:]) 1078 | batchSz = bottom[0].data.shape[0] 1079 | mask_pred[np.where(mask_pred<0)] = 0 1080 | #print('\n\nmask_pred:') 1081 | #print(mask_pred[0,0,:,:]) 1082 | top[0].data[...] = np.sum(mask_pred)/batchSz 1083 | 1084 | def backward(self, top, propagate_down, bottom): 1085 | 1086 | mask_pred = np.copy(bottom[0].data) 1087 | mask_pred[np.where(mask_pred<0)] = 0 1088 | back = 1e-5 * np.ones(bottom[0].data.shape) 1089 | back *= mask_pred 1090 | batchSz = bottom[0].data.shape[0] 1091 | #print('\ndiff: ',back[0,0,:,:]) 1092 | bottom[0].diff[...] = back 1093 | 1094 | def reshape(self, bottom, top): 1095 | pass 1096 | 1097 | 1098 | 1099 | #Simple L1 loss layer 1100 | class L1LossLayer(caffe.Layer): 1101 | def setup(self, bottom, top): 1102 | 1103 | # parse the layer parameter string, which must be valid YAML 1104 | layer_params = yaml.load(self.param_str_) 1105 | 1106 | self.loss_weight = layer_params['loss_weight'] 1107 | 1108 | self._name_to_bottom_map = { 1109 | 'mask_gen_thres': 0, 1110 | 'mask_ind_thres': 1 } 1111 | 1112 | # 0 means block, 1 means maintain 1113 | 1114 | self._name_to_top_map = { 1115 | 'loss': 0 } 1116 | 1117 | assert len(bottom) == 2, 'There should be two bottom blobs' 1118 | predShape = bottom[0].data.shape 1119 | gtShape = bottom[1].data.shape 1120 | for i in range(len(predShape)): 1121 | assert predShape[i] == gtShape[i], 'Mismatch: %d, %d' % (predShape[i], gtShape[i]) 1122 | assert bottom[0].data.squeeze().ndim == bottom[1].data.squeeze().ndim, 'Shape Mismatch' 1123 | 1124 | print("bottom[0].shape",bottom[0].shape) 1125 | print("bottom[0].data.shape",bottom[0].data.shape) 1126 | 1127 | #Get the batchSz 1128 | self.batchSz_ = gtShape[0] 1129 | #Form the top 1130 | assert len(top)==1, 'There should be only one output blob' 1131 | top[0].reshape(1,1,1,1) 1132 | 1133 | 1134 | 1135 | def forward(self, bottom, top): 1136 | #print("lossbottomshape:",bottom[0].data.shape,bottom[1].data.shape) 1137 | batchSz = bottom[0].data.shape[0] 1138 | top[0].data[...] = np.sum(np.abs(bottom[0].data[...].squeeze()\ 1139 | - bottom[1].data[...].squeeze()))/float(batchSz*24) 1140 | #print("loss weight: ",self.loss_weight) 1141 | #print('Loss is %f' % top[0].data[0]) 1142 | #print(bottom[0].data[...].squeeze()[0]) 1143 | #print(bottom[1].data[...].squeeze()[0]) 1144 | #print(np.sum(np.abs(bottom[0].data[...].squeeze() - bottom[1].data[...].squeeze()))/float(batchSz)) 1145 | #print("bath_Sz:") 1146 | #print(float(self.batchSz_)) 1147 | 1148 | def backward(self, top, propagate_down, bottom): 1149 | batchSz = bottom[0].data.shape[0] 1150 | bottom[0].diff[...] = np.sign(bottom[0].data[...].squeeze()\ 1151 | - bottom[1].data[...].squeeze())/float(batchSz*24) 1152 | bottom[1].diff[...] = np.sign(bottom[0].data[...].squeeze()\ 1153 | - bottom[1].data[...].squeeze())/float(batchSz*24) 1154 | #print("\n\n\n\nloss.diff:") 1155 | #print(bottom[0].diff) 1156 | def reshape(self, bottom, top): 1157 | top[0].reshape(1,1,1,1) 1158 | pass 1159 | 1160 | 1161 | 1162 | 1163 | 1164 | 1165 | 1166 | 1167 | 1168 | --------------------------------------------------------------------------------