├── .gitignore ├── Images ├── 1.png_face.png ├── 11.png_face.png ├── 13.png_face.png ├── 4.png_face.png ├── 6.png_face.png └── 9.png_face.png ├── README.md ├── SfSNet-Caffe ├── SfSNet.caffemodel.h5 ├── SfSNet_deploy.prototxt ├── __init__.py ├── convert_to_pkl.py ├── read_h5.py └── weights.pkl ├── SfSNet_test.py ├── config.py ├── data └── SfSNet.pth ├── requirements.txt └── src ├── __init__.py ├── functions.py ├── mask.py ├── model.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.pyc 3 | result -------------------------------------------------------------------------------- /Images/1.png_face.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mannix1994/SfSNet-Pytorch/c2c1ed96b20dab66c5f84fe41ccb5d08aaa2291a/Images/1.png_face.png -------------------------------------------------------------------------------- /Images/11.png_face.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mannix1994/SfSNet-Pytorch/c2c1ed96b20dab66c5f84fe41ccb5d08aaa2291a/Images/11.png_face.png -------------------------------------------------------------------------------- /Images/13.png_face.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mannix1994/SfSNet-Pytorch/c2c1ed96b20dab66c5f84fe41ccb5d08aaa2291a/Images/13.png_face.png -------------------------------------------------------------------------------- /Images/4.png_face.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mannix1994/SfSNet-Pytorch/c2c1ed96b20dab66c5f84fe41ccb5d08aaa2291a/Images/4.png_face.png -------------------------------------------------------------------------------- /Images/6.png_face.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mannix1994/SfSNet-Pytorch/c2c1ed96b20dab66c5f84fe41ccb5d08aaa2291a/Images/6.png_face.png -------------------------------------------------------------------------------- /Images/9.png_face.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mannix1994/SfSNet-Pytorch/c2c1ed96b20dab66c5f84fe41ccb5d08aaa2291a/Images/9.png_face.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | This project partially implement SfSNet project. 3 | I will implement all codes in SfSNet project in the future, 4 | if I have enough time. 5 | 6 | Currently implemented: 7 | * Implement test_SfSNet.m as SfSNet_test.py 8 | * Implement functions/*.m in src/functions.py 9 | * move some constant variables to config.py 10 | 11 | If there are bugs in SfSNet_test.py, please open a issue. 12 | 13 | # Dependencies 14 | * Python libs in requirements.txt 15 | 16 | # Run SfSNet_test.py 17 | * Download shape_predictor_68_face_landmarks.dat from: 18 | http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 , 19 | and uncompress it to directory `data`. 20 | 21 | * Create and activate a virtual environment 22 | * for python 2.* 23 | ```bash 24 | pip install virtualenv 25 | virtualenv -p python2 venv2 26 | source venv2/bin/activate 27 | ``` 28 | * for python 3.* 29 | ```bash 30 | pip install virtualenv 31 | virtualenv -p python3 venv3 32 | source venv3/bin/activate 33 | ``` 34 | * Install python dependencies using command: 35 | ```bash 36 | pip install -r requirements.txt 37 | ``` 38 | * Put your test images in 'Images', and 39 | ```bash 40 | python SfSNet_test.py 41 | ``` 42 | -------------------------------------------------------------------------------- /SfSNet-Caffe/SfSNet.caffemodel.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mannix1994/SfSNet-Pytorch/c2c1ed96b20dab66c5f84fe41ccb5d08aaa2291a/SfSNet-Caffe/SfSNet.caffemodel.h5 -------------------------------------------------------------------------------- /SfSNet-Caffe/SfSNet_deploy.prototxt: -------------------------------------------------------------------------------- 1 | name : "PS-Net" 2 | 3 | #data 4 | layer { 5 | name: "data" 6 | type: "Input" 7 | top: "data" 8 | input_param { shape: { dim: 1 dim: 3 dim: 128 dim: 128 } } 9 | } 10 | 11 | ############################ Initial 12 | #C64 13 | layer { 14 | name: "conv1" 15 | type: "Convolution" 16 | bottom: "data" 17 | top: "conv1" 18 | param { 19 | name : "c1_w" 20 | lr_mult: 1 21 | decay_mult: 1 22 | } 23 | param { 24 | name : "c1_b" 25 | lr_mult: 2 26 | decay_mult: 0 27 | } 28 | convolution_param { 29 | num_output: 64 30 | kernel_size: 7 31 | stride: 1 32 | pad: 3 33 | weight_filler { 34 | type: "xavier" 35 | } 36 | } 37 | } 38 | 39 | layer { 40 | name: "bn1" 41 | type: "BatchNorm" 42 | bottom: "conv1" 43 | top: "conv1" 44 | batch_norm_param { 45 | use_global_stats: false 46 | } 47 | param { 48 | name : "b1_a" 49 | lr_mult: 0 50 | } 51 | param { 52 | name: "b1_b" 53 | lr_mult: 0 54 | } 55 | param { 56 | name: "b1_c" 57 | lr_mult: 0 58 | } 59 | include { 60 | phase: TRAIN 61 | } 62 | } 63 | layer { 64 | name: "bn1" 65 | type: "BatchNorm" 66 | bottom: "conv1" 67 | top: "conv1" 68 | batch_norm_param { 69 | use_global_stats: true 70 | } 71 | param { 72 | name : "b1_a" 73 | lr_mult: 0 74 | } 75 | param { 76 | name : "b1_b" 77 | lr_mult: 0 78 | } 79 | param { 80 | name: "b1_c" 81 | lr_mult: 0 82 | } 83 | include { 84 | phase: TEST 85 | } 86 | } 87 | 88 | layer { 89 | name: "relu1" 90 | type: "ReLU" 91 | bottom: "conv1" 92 | top: "conv1" 93 | } 94 | 95 | #C128 96 | layer { 97 | name: "conv2" 98 | type: "Convolution" 99 | bottom: "conv1" 100 | top: "conv2" 101 | param { 102 | name: "c2_w" 103 | lr_mult: 1 104 | decay_mult: 1 105 | } 106 | param { 107 | name: "c2_b" 108 | lr_mult: 2 109 | decay_mult: 0 110 | } 111 | convolution_param { 112 | num_output: 128 113 | kernel_size: 3 114 | stride: 1 115 | pad: 1 116 | weight_filler { 117 | type: "xavier" 118 | } 119 | } 120 | } 121 | 122 | layer { 123 | name: "bn2" 124 | type: "BatchNorm" 125 | bottom: "conv2" 126 | top: "conv2" 127 | batch_norm_param { 128 | use_global_stats: false 129 | } 130 | param { 131 | name : "b2_a" 132 | lr_mult: 0 133 | } 134 | param { 135 | name : "b2_b" 136 | lr_mult: 0 137 | } 138 | param { 139 | name: "b2_c" 140 | lr_mult: 0 141 | } 142 | include { 143 | phase: TRAIN 144 | } 145 | } 146 | layer { 147 | name: "bn2" 148 | type: "BatchNorm" 149 | bottom: "conv2" 150 | top: "conv2" 151 | batch_norm_param { 152 | use_global_stats: true 153 | } 154 | param { 155 | name : "b2_a" 156 | lr_mult: 0 157 | } 158 | param { 159 | name : "b2_b" 160 | lr_mult: 0 161 | } 162 | param { 163 | name: "b2_c" 164 | lr_mult: 0 165 | } 166 | include { 167 | phase: TEST 168 | } 169 | } 170 | 171 | layer { 172 | name: "relu2" 173 | type: "ReLU" 174 | bottom: "conv2" 175 | top: "conv2" 176 | } 177 | 178 | #C128 S2 179 | layer { 180 | name: "conv3" 181 | type: "Convolution" 182 | bottom: "conv2" 183 | top: "conv3" 184 | param { 185 | name : "c3_w" 186 | lr_mult: 1 187 | decay_mult: 1 188 | } 189 | param { 190 | name : "c3_b" 191 | lr_mult: 2 192 | decay_mult: 0 193 | } 194 | convolution_param { 195 | num_output: 128 196 | kernel_size: 3 197 | stride: 2 198 | pad: 1 199 | weight_filler { 200 | type: "xavier" 201 | } 202 | } 203 | } 204 | 205 | ###################################################### RESNET for normals 206 | 207 | 208 | 209 | ####### RES1 210 | 211 | 212 | layer { 213 | name: "nbn1" 214 | type: "BatchNorm" 215 | bottom: "conv3" 216 | top: "nbn1" 217 | batch_norm_param { 218 | use_global_stats: false 219 | } 220 | param { 221 | name : "nb1_a" 222 | lr_mult: 0 223 | } 224 | param { 225 | name: "nb1_b" 226 | lr_mult: 0 227 | } 228 | param { 229 | name: "nb1_c" 230 | lr_mult: 0 231 | } 232 | include { 233 | phase: TRAIN 234 | } 235 | } 236 | layer { 237 | name: "nbn1" 238 | type: "BatchNorm" 239 | bottom: "conv3" 240 | top: "nbn1" 241 | batch_norm_param { 242 | use_global_stats: true 243 | } 244 | param { 245 | name : "nb1_a" 246 | lr_mult: 0 247 | } 248 | param { 249 | name: "nb1_b" 250 | lr_mult: 0 251 | } 252 | param { 253 | name: "nb1_c" 254 | lr_mult: 0 255 | } 256 | include { 257 | phase: TEST 258 | } 259 | } 260 | 261 | layer { 262 | name: "nrelu1" 263 | type: "ReLU" 264 | bottom: "nbn1" 265 | top: "nbn1" 266 | } 267 | 268 | 269 | 270 | layer { 271 | name: "nconv1" 272 | type: "Convolution" 273 | bottom: "nbn1" 274 | top: "nconv1" 275 | param { 276 | name : "nc1_w" 277 | lr_mult: 1 278 | decay_mult: 1 279 | } 280 | param { 281 | name: "nc1_b" 282 | lr_mult: 2 283 | decay_mult: 0 284 | } 285 | convolution_param { 286 | num_output: 128 287 | kernel_size: 3 288 | stride: 1 289 | pad: 1 290 | weight_filler { 291 | type: "xavier" 292 | } 293 | } 294 | } 295 | 296 | 297 | 298 | layer { 299 | name: "nbn1r" 300 | type: "BatchNorm" 301 | bottom: "nconv1" 302 | top: "nconv1" 303 | batch_norm_param { 304 | use_global_stats: false 305 | } 306 | param { 307 | name : "nb1r_a" 308 | lr_mult: 0 309 | } 310 | param { 311 | name: "nb1r_b" 312 | lr_mult: 0 313 | } 314 | param { 315 | name: "nb1r_c" 316 | lr_mult: 0 317 | } 318 | include { 319 | phase: TRAIN 320 | } 321 | } 322 | layer { 323 | name: "nbn1r" 324 | type: "BatchNorm" 325 | bottom: "nconv1" 326 | top: "nconv1" 327 | batch_norm_param { 328 | use_global_stats: true 329 | } 330 | param { 331 | name : "nb1r_a" 332 | lr_mult: 0 333 | } 334 | param { 335 | name: "nb1r_b" 336 | lr_mult: 0 337 | } 338 | param { 339 | name: "nb1r_c" 340 | lr_mult: 0 341 | } 342 | include { 343 | phase: TEST 344 | } 345 | } 346 | 347 | layer { 348 | name: "nrelu1r" 349 | type: "ReLU" 350 | bottom: "nconv1" 351 | top: "nconv1" 352 | } 353 | 354 | layer { 355 | name: "nconv1r" 356 | type: "Convolution" 357 | bottom: "nconv1" 358 | top: "nconv1r" 359 | param { 360 | name: "nc1r_w" 361 | lr_mult: 1 362 | decay_mult: 1 363 | } 364 | param { 365 | name: "nc1r_b" 366 | lr_mult: 2 367 | decay_mult: 0 368 | } 369 | convolution_param { 370 | num_output: 128 371 | kernel_size: 3 372 | stride: 1 373 | pad: 1 374 | weight_filler { 375 | type: "xavier" 376 | } 377 | } 378 | } 379 | 380 | layer { 381 | name: "nsum1" 382 | type: "Eltwise" 383 | bottom: "nconv1r" 384 | bottom: "conv3" 385 | top: "nsum1" 386 | eltwise_param { 387 | operation: SUM 388 | } 389 | } 390 | 391 | 392 | ####### RES2 393 | 394 | 395 | 396 | layer { 397 | name: "nbn2" 398 | type: "BatchNorm" 399 | bottom: "nsum1" 400 | top: "nbn2" 401 | batch_norm_param { 402 | use_global_stats: false 403 | } 404 | param { 405 | name : "nb2_a" 406 | lr_mult: 0 407 | } 408 | param { 409 | name: "nb2_b" 410 | lr_mult: 0 411 | } 412 | param { 413 | name: "nb2_c" 414 | lr_mult: 0 415 | } 416 | include { 417 | phase: TRAIN 418 | } 419 | } 420 | layer { 421 | name: "nbn2" 422 | type: "BatchNorm" 423 | bottom: "nsum1" 424 | top: "nbn2" 425 | batch_norm_param { 426 | use_global_stats: true 427 | } 428 | param { 429 | name : "nb2_a" 430 | lr_mult: 0 431 | } 432 | param { 433 | name: "nb2_b" 434 | lr_mult: 0 435 | } 436 | param { 437 | name: "nb2_c" 438 | lr_mult: 0 439 | } 440 | include { 441 | phase: TEST 442 | } 443 | } 444 | 445 | layer { 446 | name: "nrelu2" 447 | type: "ReLU" 448 | bottom: "nbn2" 449 | top: "nbn2" 450 | } 451 | 452 | 453 | layer { 454 | name: "nconv2" 455 | type: "Convolution" 456 | bottom: "nbn2" 457 | top: "nconv2" 458 | param { 459 | name : "nc2_w" 460 | lr_mult: 1 461 | decay_mult: 1 462 | } 463 | param { 464 | name: "nc2_b" 465 | lr_mult: 2 466 | decay_mult: 0 467 | } 468 | convolution_param { 469 | num_output: 128 470 | kernel_size: 3 471 | stride: 1 472 | pad: 1 473 | weight_filler { 474 | type: "xavier" 475 | } 476 | } 477 | } 478 | 479 | 480 | 481 | layer { 482 | name: "nbn2r" 483 | type: "BatchNorm" 484 | bottom: "nconv2" 485 | top: "nconv2" 486 | batch_norm_param { 487 | use_global_stats: false 488 | } 489 | param { 490 | name : "nb2r_a" 491 | lr_mult: 0 492 | } 493 | param { 494 | name: "nb2r_b" 495 | lr_mult: 0 496 | } 497 | param { 498 | name: "nb2r_c" 499 | lr_mult: 0 500 | } 501 | include { 502 | phase: TRAIN 503 | } 504 | } 505 | layer { 506 | name: "nbn2r" 507 | type: "BatchNorm" 508 | bottom: "nconv2" 509 | top: "nconv2" 510 | batch_norm_param { 511 | use_global_stats: true 512 | } 513 | param { 514 | name : "nb2r_a" 515 | lr_mult: 0 516 | } 517 | param { 518 | name: "nb2r_b" 519 | lr_mult: 0 520 | } 521 | param { 522 | name: "nb2r_c" 523 | lr_mult: 0 524 | } 525 | include { 526 | phase: TEST 527 | } 528 | } 529 | 530 | layer { 531 | name: "nrelu2r" 532 | type: "ReLU" 533 | bottom: "nconv2" 534 | top: "nconv2" 535 | } 536 | 537 | layer { 538 | name: "nconv2r" 539 | type: "Convolution" 540 | bottom: "nconv2" 541 | top: "nconv2r" 542 | param { 543 | name : "nc2r_w" 544 | lr_mult: 1 545 | decay_mult: 1 546 | } 547 | param { 548 | name: "nc2r_b" 549 | lr_mult: 2 550 | decay_mult: 0 551 | } 552 | convolution_param { 553 | num_output: 128 554 | kernel_size: 3 555 | stride: 1 556 | pad: 1 557 | weight_filler { 558 | type: "xavier" 559 | } 560 | } 561 | } 562 | 563 | layer { 564 | name: "nsum2" 565 | type: "Eltwise" 566 | bottom: "nconv2r" 567 | bottom: "nsum1" 568 | top: "nsum2" 569 | eltwise_param { operation: SUM } 570 | } 571 | 572 | ####### RES3 573 | 574 | 575 | 576 | layer { 577 | name: "nbn3" 578 | type: "BatchNorm" 579 | bottom: "nsum2" 580 | top: "nbn3" 581 | batch_norm_param { 582 | use_global_stats: false 583 | } 584 | param { 585 | name : "nb3_a" 586 | lr_mult: 0 587 | } 588 | param { 589 | name: "nb3_b" 590 | lr_mult: 0 591 | } 592 | param { 593 | name: "nb3_c" 594 | lr_mult: 0 595 | } 596 | include { 597 | phase: TRAIN 598 | } 599 | } 600 | layer { 601 | name: "nbn3" 602 | type: "BatchNorm" 603 | bottom: "nsum2" 604 | top: "nbn3" 605 | batch_norm_param { 606 | use_global_stats: true 607 | } 608 | param { 609 | name : "nb3_a" 610 | lr_mult: 0 611 | } 612 | param { 613 | name: "nb3_b" 614 | lr_mult: 0 615 | } 616 | param { 617 | name: "nb3_c" 618 | lr_mult: 0 619 | } 620 | include { 621 | phase: TEST 622 | } 623 | } 624 | 625 | layer { 626 | name: "nrelu3" 627 | type: "ReLU" 628 | bottom: "nbn3" 629 | top: "nbn3" 630 | } 631 | 632 | 633 | layer { 634 | name: "nconv3" 635 | type: "Convolution" 636 | bottom: "nbn3" 637 | top: "nconv3" 638 | param { 639 | name : "nc3_w" 640 | lr_mult: 1 641 | decay_mult: 1 642 | } 643 | param { 644 | name: "nc3_b" 645 | lr_mult: 2 646 | decay_mult: 0 647 | } 648 | convolution_param { 649 | num_output: 128 650 | kernel_size: 3 651 | stride: 1 652 | pad: 1 653 | weight_filler { 654 | type: "xavier" 655 | } 656 | } 657 | } 658 | 659 | 660 | 661 | layer { 662 | name: "nbn3r" 663 | type: "BatchNorm" 664 | bottom: "nconv3" 665 | top: "nconv3" 666 | batch_norm_param { 667 | use_global_stats: false 668 | } 669 | param { 670 | name : "nb3r_a" 671 | lr_mult: 0 672 | } 673 | param { 674 | name: "nb3r_b" 675 | lr_mult: 0 676 | } 677 | param { 678 | name: "nb3r_c" 679 | lr_mult: 0 680 | } 681 | include { 682 | phase: TRAIN 683 | } 684 | } 685 | layer { 686 | name: "nbn3r" 687 | type: "BatchNorm" 688 | bottom: "nconv3" 689 | top: "nconv3" 690 | batch_norm_param { 691 | use_global_stats: true 692 | } 693 | param { 694 | name : "nb3r_a" 695 | lr_mult: 0 696 | } 697 | param { 698 | name: "nb3r_b" 699 | lr_mult: 0 700 | } 701 | param { 702 | name: "nb3r_c" 703 | lr_mult: 0 704 | } 705 | include { 706 | phase: TEST 707 | } 708 | } 709 | 710 | layer { 711 | name: "nrelu3r" 712 | type: "ReLU" 713 | bottom: "nconv3" 714 | top: "nconv3" 715 | } 716 | 717 | layer { 718 | name: "nconv3r" 719 | type: "Convolution" 720 | bottom: "nconv3" 721 | top: "nconv3r" 722 | param { 723 | name : "nc3r_w" 724 | lr_mult: 1 725 | decay_mult: 1 726 | } 727 | param { 728 | name: "nc3r_b" 729 | lr_mult: 2 730 | decay_mult: 0 731 | } 732 | convolution_param { 733 | num_output: 128 734 | kernel_size: 3 735 | stride: 1 736 | pad: 1 737 | weight_filler { 738 | type: "xavier" 739 | } 740 | } 741 | } 742 | 743 | layer { 744 | name: "nsum3" 745 | type: "Eltwise" 746 | bottom: "nconv3r" 747 | bottom: "nsum2" 748 | top: "nsum3" 749 | eltwise_param { operation: SUM } 750 | } 751 | 752 | 753 | ####### RES4 754 | 755 | 756 | 757 | layer { 758 | name: "nbn4" 759 | type: "BatchNorm" 760 | bottom: "nsum3" 761 | top: "nbn4" 762 | batch_norm_param { 763 | use_global_stats: false 764 | } 765 | param { 766 | name : "nb4_a" 767 | lr_mult: 0 768 | } 769 | param { 770 | name: "nb4_b" 771 | lr_mult: 0 772 | } 773 | param { 774 | name: "nb4_c" 775 | lr_mult: 0 776 | } 777 | include { 778 | phase: TRAIN 779 | } 780 | } 781 | layer { 782 | name: "nbn4" 783 | type: "BatchNorm" 784 | bottom: "nsum3" 785 | top: "nbn4" 786 | batch_norm_param { 787 | use_global_stats: true 788 | } 789 | param { 790 | name : "nb4_a" 791 | lr_mult: 0 792 | } 793 | param { 794 | name: "nb4_b" 795 | lr_mult: 0 796 | } 797 | param { 798 | name: "nb4_c" 799 | lr_mult: 0 800 | } 801 | include { 802 | phase: TEST 803 | } 804 | } 805 | 806 | layer { 807 | name: "nrelu4" 808 | type: "ReLU" 809 | bottom: "nbn4" 810 | top: "nbn4" 811 | } 812 | 813 | 814 | layer { 815 | name: "nconv4" 816 | type: "Convolution" 817 | bottom: "nbn4" 818 | top: "nconv4" 819 | param { 820 | name : "nc4_w" 821 | lr_mult: 1 822 | decay_mult: 1 823 | } 824 | param { 825 | name: "nc4_b" 826 | lr_mult: 2 827 | decay_mult: 0 828 | } 829 | convolution_param { 830 | num_output: 128 831 | kernel_size: 3 832 | stride: 1 833 | pad: 1 834 | weight_filler { 835 | type: "xavier" 836 | } 837 | } 838 | } 839 | 840 | 841 | 842 | layer { 843 | name: "nbn4r" 844 | type: "BatchNorm" 845 | bottom: "nconv4" 846 | top: "nconv4" 847 | batch_norm_param { 848 | use_global_stats: false 849 | } 850 | param { 851 | name : "nb4r_a" 852 | lr_mult: 0 853 | } 854 | param { 855 | name: "nb4r_b" 856 | lr_mult: 0 857 | } 858 | param { 859 | name: "nb4r_c" 860 | lr_mult: 0 861 | } 862 | include { 863 | phase: TRAIN 864 | } 865 | } 866 | layer { 867 | name: "nbn4r" 868 | type: "BatchNorm" 869 | bottom: "nconv4" 870 | top: "nconv4" 871 | batch_norm_param { 872 | use_global_stats: true 873 | } 874 | param { 875 | name : "nb4r_a" 876 | lr_mult: 0 877 | } 878 | param { 879 | name: "nb4r_b" 880 | lr_mult: 0 881 | } 882 | param { 883 | name: "nb4r_c" 884 | lr_mult: 0 885 | } 886 | include { 887 | phase: TEST 888 | } 889 | } 890 | 891 | layer { 892 | name: "nrelu4r" 893 | type: "ReLU" 894 | bottom: "nconv4" 895 | top: "nconv4" 896 | } 897 | 898 | layer { 899 | name: "nconv4r" 900 | type: "Convolution" 901 | bottom: "nconv4" 902 | top: "nconv4r" 903 | param { 904 | name : "nc4r_w" 905 | lr_mult: 1 906 | decay_mult: 1 907 | } 908 | param { 909 | name: "nc4r_b" 910 | lr_mult: 2 911 | decay_mult: 0 912 | } 913 | convolution_param { 914 | num_output: 128 915 | kernel_size: 3 916 | stride: 1 917 | pad: 1 918 | weight_filler { 919 | type: "xavier" 920 | } 921 | } 922 | } 923 | 924 | layer { 925 | name: "nsum4" 926 | type: "Eltwise" 927 | bottom: "nconv4r" 928 | bottom: "nsum3" 929 | top: "nsum4" 930 | eltwise_param { operation: SUM } 931 | } 932 | 933 | ####### RES5 934 | 935 | 936 | 937 | layer { 938 | name: "nbn5" 939 | type: "BatchNorm" 940 | bottom: "nsum4" 941 | top: "nbn5" 942 | batch_norm_param { 943 | use_global_stats: false 944 | } 945 | param { 946 | name : "nb5_a" 947 | lr_mult: 0 948 | } 949 | param { 950 | name: "nb5_b" 951 | lr_mult: 0 952 | } 953 | param { 954 | name: "nb5_c" 955 | lr_mult: 0 956 | } 957 | include { 958 | phase: TRAIN 959 | } 960 | } 961 | layer { 962 | name: "nbn5" 963 | type: "BatchNorm" 964 | bottom: "nsum4" 965 | top: "nbn5" 966 | batch_norm_param { 967 | use_global_stats: true 968 | } 969 | param { 970 | name : "nb5_a" 971 | lr_mult: 0 972 | } 973 | param { 974 | name: "nb5_b" 975 | lr_mult: 0 976 | } 977 | param { 978 | name: "nb5_c" 979 | lr_mult: 0 980 | } 981 | include { 982 | phase: TEST 983 | } 984 | } 985 | 986 | layer { 987 | name: "nrelu5" 988 | type: "ReLU" 989 | bottom: "nbn5" 990 | top: "nbn5" 991 | } 992 | 993 | 994 | layer { 995 | name: "nconv5" 996 | type: "Convolution" 997 | bottom: "nbn5" 998 | top: "nconv5" 999 | param { 1000 | name : "nc5_w" 1001 | lr_mult: 1 1002 | decay_mult: 1 1003 | } 1004 | param { 1005 | name: "nc5_b" 1006 | lr_mult: 2 1007 | decay_mult: 0 1008 | } 1009 | convolution_param { 1010 | num_output: 128 1011 | kernel_size: 3 1012 | stride: 1 1013 | pad: 1 1014 | weight_filler { 1015 | type: "xavier" 1016 | } 1017 | } 1018 | } 1019 | 1020 | 1021 | 1022 | layer { 1023 | name: "nbn5r" 1024 | type: "BatchNorm" 1025 | bottom: "nconv5" 1026 | top: "nconv5" 1027 | batch_norm_param { 1028 | use_global_stats: false 1029 | } 1030 | param { 1031 | name : "nb5r_a" 1032 | lr_mult: 0 1033 | } 1034 | param { 1035 | name: "nb5r_b" 1036 | lr_mult: 0 1037 | } 1038 | param { 1039 | name: "nb5r_c" 1040 | lr_mult: 0 1041 | } 1042 | include { 1043 | phase: TRAIN 1044 | } 1045 | } 1046 | layer { 1047 | name: "nbn5r" 1048 | type: "BatchNorm" 1049 | bottom: "nconv5" 1050 | top: "nconv5" 1051 | batch_norm_param { 1052 | use_global_stats: true 1053 | } 1054 | param { 1055 | name : "nb5r_a" 1056 | lr_mult: 0 1057 | } 1058 | param { 1059 | name: "nb5r_b" 1060 | lr_mult: 0 1061 | } 1062 | param { 1063 | name: "nb5r_c" 1064 | lr_mult: 0 1065 | } 1066 | include { 1067 | phase: TEST 1068 | } 1069 | } 1070 | 1071 | layer { 1072 | name: "nrelu5r" 1073 | type: "ReLU" 1074 | bottom: "nconv5" 1075 | top: "nconv5" 1076 | } 1077 | 1078 | layer { 1079 | name: "nconv5r" 1080 | type: "Convolution" 1081 | bottom: "nconv5" 1082 | top: "nconv5r" 1083 | param { 1084 | name : "nc5r_w" 1085 | lr_mult: 1 1086 | decay_mult: 1 1087 | } 1088 | param { 1089 | name: "nc5r_b" 1090 | lr_mult: 2 1091 | decay_mult: 0 1092 | } 1093 | convolution_param { 1094 | num_output: 128 1095 | kernel_size: 3 1096 | stride: 1 1097 | pad: 1 1098 | weight_filler { 1099 | type: "xavier" 1100 | } 1101 | } 1102 | } 1103 | 1104 | layer { 1105 | name: "nsum5" 1106 | type: "Eltwise" 1107 | bottom: "nconv5r" 1108 | bottom: "nsum4" 1109 | top: "nsum5" 1110 | eltwise_param { operation: SUM } 1111 | } 1112 | 1113 | layer { 1114 | name: "nbn6r" 1115 | type: "BatchNorm" 1116 | bottom: "nsum5" 1117 | top: "nsum5" 1118 | batch_norm_param { 1119 | use_global_stats: false 1120 | } 1121 | param { 1122 | name : "nb6r_a" 1123 | lr_mult: 0 1124 | } 1125 | param { 1126 | name: "nb6r_b" 1127 | lr_mult: 0 1128 | } 1129 | param { 1130 | name: "nb6r_c" 1131 | lr_mult: 0 1132 | } 1133 | include { 1134 | phase: TRAIN 1135 | } 1136 | } 1137 | layer { 1138 | name: "nbn6r" 1139 | type: "BatchNorm" 1140 | bottom: "nsum5" 1141 | top: "nsum5" 1142 | batch_norm_param { 1143 | use_global_stats: true 1144 | } 1145 | param { 1146 | name : "nb6r_a" 1147 | lr_mult: 0 1148 | } 1149 | param { 1150 | name: "nb6r_b" 1151 | lr_mult: 0 1152 | } 1153 | param { 1154 | name: "nb6r_c" 1155 | lr_mult: 0 1156 | } 1157 | include { 1158 | phase: TEST 1159 | } 1160 | } 1161 | 1162 | layer { 1163 | name: "nrelu6r" 1164 | type: "ReLU" 1165 | bottom: "nsum5" 1166 | top: "nsum5" 1167 | } 1168 | 1169 | #CD128 1170 | layer { 1171 | name: "nup6" 1172 | type: "Deconvolution" 1173 | bottom: "nsum5" 1174 | top: "nup6" 1175 | convolution_param { 1176 | kernel_size: 4 1177 | stride: 2 1178 | num_output: 128 1179 | group: 128 1180 | pad: 1 1181 | weight_filler { 1182 | type: "bilinear" 1183 | } 1184 | bias_term: false 1185 | } 1186 | param { 1187 | lr_mult: 0 1188 | decay_mult: 0 1189 | } 1190 | } 1191 | 1192 | layer { 1193 | name: "nconv6" 1194 | type: "Convolution" 1195 | bottom: "nup6" 1196 | top: "nconv6" 1197 | param { 1198 | name : "nc6_w" 1199 | lr_mult: 1 1200 | decay_mult: 1 1201 | } 1202 | param { 1203 | name: "nc6_b" 1204 | lr_mult: 2 1205 | decay_mult: 0 1206 | } 1207 | convolution_param { 1208 | num_output: 128 1209 | kernel_size: 1 1210 | stride: 1 1211 | pad: 0 1212 | weight_filler { 1213 | type: "xavier" 1214 | } 1215 | } 1216 | } 1217 | 1218 | 1219 | layer { 1220 | name: "nbn6" 1221 | type: "BatchNorm" 1222 | bottom: "nconv6" 1223 | top: "nconv6" 1224 | batch_norm_param { 1225 | use_global_stats: false 1226 | } 1227 | param { 1228 | name: "nb6_a" 1229 | lr_mult: 0 1230 | } 1231 | param { 1232 | name: "nb6_b" 1233 | lr_mult: 0 1234 | } 1235 | param { 1236 | name: "nb6_c" 1237 | lr_mult: 0 1238 | } 1239 | include { 1240 | phase: TRAIN 1241 | } 1242 | } 1243 | layer { 1244 | name: "nbn6" 1245 | type: "BatchNorm" 1246 | bottom: "nconv6" 1247 | top: "nconv6" 1248 | batch_norm_param { 1249 | use_global_stats: true 1250 | } 1251 | param { 1252 | name: "nb6_a" 1253 | lr_mult: 0 1254 | } 1255 | param { 1256 | name: "nb6_b" 1257 | lr_mult: 0 1258 | } 1259 | param { 1260 | name: "nb6_c" 1261 | lr_mult: 0 1262 | } 1263 | include { 1264 | phase: TEST 1265 | } 1266 | } 1267 | 1268 | layer { 1269 | name: "nrelu6" 1270 | type: "ReLU" 1271 | bottom: "nconv6" 1272 | top: "nconv6" 1273 | } 1274 | 1275 | #CD 64 1276 | layer { 1277 | name: "nconv7" 1278 | type: "Convolution" 1279 | bottom: "nconv6" 1280 | top: "nconv7" 1281 | param { 1282 | name: "nc7_w" 1283 | lr_mult: 1 1284 | decay_mult: 1 1285 | } 1286 | param { 1287 | name: "nc7_b" 1288 | lr_mult: 2 1289 | decay_mult: 0 1290 | } 1291 | convolution_param { 1292 | num_output: 64 1293 | kernel_size: 3 1294 | stride: 1 1295 | pad: 1 1296 | weight_filler { 1297 | type: "xavier" 1298 | } 1299 | } 1300 | } 1301 | 1302 | layer { 1303 | name: "nbn7" 1304 | type: "BatchNorm" 1305 | bottom: "nconv7" 1306 | top: "nconv7" 1307 | batch_norm_param { 1308 | use_global_stats: false 1309 | } 1310 | param { 1311 | name: "nb7_a" 1312 | lr_mult: 0 1313 | } 1314 | param { 1315 | name: "nb7_b" 1316 | lr_mult: 0 1317 | } 1318 | param { 1319 | name: "nb7_c" 1320 | lr_mult: 0 1321 | } 1322 | include { 1323 | phase: TRAIN 1324 | } 1325 | } 1326 | layer { 1327 | name: "nbn7" 1328 | type: "BatchNorm" 1329 | bottom: "nconv7" 1330 | top: "nconv7" 1331 | batch_norm_param { 1332 | use_global_stats: true 1333 | } 1334 | param { 1335 | name: "nb7_a" 1336 | lr_mult: 0 1337 | } 1338 | param { 1339 | name: "nb7_b" 1340 | lr_mult: 0 1341 | } 1342 | param { 1343 | name: "nb7_c" 1344 | lr_mult: 0 1345 | } 1346 | include { 1347 | phase: TEST 1348 | } 1349 | } 1350 | 1351 | layer { 1352 | name: "nrelu7" 1353 | type: "ReLU" 1354 | bottom: "nconv7" 1355 | top: "nconv7" 1356 | } 1357 | 1358 | #C*3 1359 | layer { 1360 | name: "Nconv0" 1361 | type: "Convolution" 1362 | bottom: "nconv7" 1363 | top: "Nconv0" 1364 | param { 1365 | lr_mult: 1 1366 | decay_mult: 1 1367 | } 1368 | param { 1369 | lr_mult: 2 1370 | decay_mult: 0 1371 | } 1372 | convolution_param { 1373 | num_output: 3 1374 | kernel_size: 1 1375 | stride: 1 1376 | pad: 0 1377 | weight_filler { 1378 | type: "xavier" 1379 | } 1380 | } 1381 | } 1382 | 1383 | 1384 | #################################################### Albedo ##################################################### 1385 | 1386 | 1387 | 1388 | 1389 | 1390 | ####### RES1 1391 | 1392 | 1393 | layer { 1394 | name: "abn1" 1395 | type: "BatchNorm" 1396 | bottom: "conv3" 1397 | top: "abn1" 1398 | batch_norm_param { 1399 | use_global_stats: false 1400 | } 1401 | param { 1402 | name : "ab1_a" 1403 | lr_mult: 0 1404 | } 1405 | param { 1406 | name: "ab1_b" 1407 | lr_mult: 0 1408 | } 1409 | param { 1410 | name: "ab1_c" 1411 | lr_mult: 0 1412 | } 1413 | include { 1414 | phase: TRAIN 1415 | } 1416 | } 1417 | layer { 1418 | name: "abn1" 1419 | type: "BatchNorm" 1420 | bottom: "conv3" 1421 | top: "abn1" 1422 | batch_norm_param { 1423 | use_global_stats: true 1424 | } 1425 | param { 1426 | name : "ab1_a" 1427 | lr_mult: 0 1428 | } 1429 | param { 1430 | name: "ab1_b" 1431 | lr_mult: 0 1432 | } 1433 | param { 1434 | name: "ab1_c" 1435 | lr_mult: 0 1436 | } 1437 | include { 1438 | phase: TEST 1439 | } 1440 | } 1441 | 1442 | layer { 1443 | name: "arelu1" 1444 | type: "ReLU" 1445 | bottom: "abn1" 1446 | top: "abn1" 1447 | } 1448 | 1449 | 1450 | 1451 | layer { 1452 | name: "aconv1" 1453 | type: "Convolution" 1454 | bottom: "abn1" 1455 | top: "aconv1" 1456 | param { 1457 | name : "ac1_w" 1458 | lr_mult: 1 1459 | decay_mult: 1 1460 | } 1461 | param { 1462 | name: "ac1_b" 1463 | lr_mult: 2 1464 | decay_mult: 0 1465 | } 1466 | convolution_param { 1467 | num_output: 128 1468 | kernel_size: 3 1469 | stride: 1 1470 | pad: 1 1471 | weight_filler { 1472 | type: "xavier" 1473 | } 1474 | } 1475 | } 1476 | 1477 | 1478 | 1479 | layer { 1480 | name: "abn1r" 1481 | type: "BatchNorm" 1482 | bottom: "aconv1" 1483 | top: "aconv1" 1484 | batch_norm_param { 1485 | use_global_stats: false 1486 | } 1487 | param { 1488 | name : "ab1r_a" 1489 | lr_mult: 0 1490 | } 1491 | param { 1492 | name: "ab1r_b" 1493 | lr_mult: 0 1494 | } 1495 | param { 1496 | name: "ab1r_c" 1497 | lr_mult: 0 1498 | } 1499 | include { 1500 | phase: TRAIN 1501 | } 1502 | } 1503 | layer { 1504 | name: "abn1r" 1505 | type: "BatchNorm" 1506 | bottom: "aconv1" 1507 | top: "aconv1" 1508 | batch_norm_param { 1509 | use_global_stats: true 1510 | } 1511 | param { 1512 | name : "ab1r_a" 1513 | lr_mult: 0 1514 | } 1515 | param { 1516 | name: "ab1r_b" 1517 | lr_mult: 0 1518 | } 1519 | param { 1520 | name: "ab1r_c" 1521 | lr_mult: 0 1522 | } 1523 | include { 1524 | phase: TEST 1525 | } 1526 | } 1527 | 1528 | layer { 1529 | name: "arelu1r" 1530 | type: "ReLU" 1531 | bottom: "aconv1" 1532 | top: "aconv1" 1533 | } 1534 | 1535 | layer { 1536 | name: "aconv1r" 1537 | type: "Convolution" 1538 | bottom: "aconv1" 1539 | top: "aconv1r" 1540 | param { 1541 | name: "ac1r_w" 1542 | lr_mult: 1 1543 | decay_mult: 1 1544 | } 1545 | param { 1546 | name: "ac1r_b" 1547 | lr_mult: 2 1548 | decay_mult: 0 1549 | } 1550 | convolution_param { 1551 | num_output: 128 1552 | kernel_size: 3 1553 | stride: 1 1554 | pad: 1 1555 | weight_filler { 1556 | type: "xavier" 1557 | } 1558 | } 1559 | } 1560 | 1561 | layer { 1562 | name: "asum1" 1563 | type: "Eltwise" 1564 | bottom: "aconv1r" 1565 | bottom: "conv3" 1566 | top: "asum1" 1567 | eltwise_param { operation: SUM } 1568 | } 1569 | 1570 | 1571 | ####### RES2 1572 | 1573 | 1574 | 1575 | layer { 1576 | name: "abn2" 1577 | type: "BatchNorm" 1578 | bottom: "asum1" 1579 | top: "abn2" 1580 | batch_norm_param { 1581 | use_global_stats: false 1582 | } 1583 | param { 1584 | name : "ab2_a" 1585 | lr_mult: 0 1586 | } 1587 | param { 1588 | name: "ab2_b" 1589 | lr_mult: 0 1590 | } 1591 | param { 1592 | name: "ab2_c" 1593 | lr_mult: 0 1594 | } 1595 | include { 1596 | phase: TRAIN 1597 | } 1598 | } 1599 | layer { 1600 | name: "abn2" 1601 | type: "BatchNorm" 1602 | bottom: "asum1" 1603 | top: "abn2" 1604 | batch_norm_param { 1605 | use_global_stats: true 1606 | } 1607 | param { 1608 | name : "ab2_a" 1609 | lr_mult: 0 1610 | } 1611 | param { 1612 | name: "ab2_b" 1613 | lr_mult: 0 1614 | } 1615 | param { 1616 | name: "ab2_c" 1617 | lr_mult: 0 1618 | } 1619 | include { 1620 | phase: TEST 1621 | } 1622 | } 1623 | 1624 | layer { 1625 | name: "arelu2" 1626 | type: "ReLU" 1627 | bottom: "abn2" 1628 | top: "abn2" 1629 | } 1630 | 1631 | 1632 | layer { 1633 | name: "aconv2" 1634 | type: "Convolution" 1635 | bottom: "abn2" 1636 | top: "aconv2" 1637 | param { 1638 | name : "ac2_w" 1639 | lr_mult: 1 1640 | decay_mult: 1 1641 | } 1642 | param { 1643 | name: "ac2_b" 1644 | lr_mult: 2 1645 | decay_mult: 0 1646 | } 1647 | convolution_param { 1648 | num_output: 128 1649 | kernel_size: 3 1650 | stride: 1 1651 | pad: 1 1652 | weight_filler { 1653 | type: "xavier" 1654 | } 1655 | } 1656 | } 1657 | 1658 | 1659 | 1660 | layer { 1661 | name: "abn2r" 1662 | type: "BatchNorm" 1663 | bottom: "aconv2" 1664 | top: "aconv2" 1665 | batch_norm_param { 1666 | use_global_stats: false 1667 | } 1668 | param { 1669 | name : "ab2r_a" 1670 | lr_mult: 0 1671 | } 1672 | param { 1673 | name: "ab2r_b" 1674 | lr_mult: 0 1675 | } 1676 | param { 1677 | name: "ab2r_c" 1678 | lr_mult: 0 1679 | } 1680 | include { 1681 | phase: TRAIN 1682 | } 1683 | } 1684 | layer { 1685 | name: "abn2r" 1686 | type: "BatchNorm" 1687 | bottom: "aconv2" 1688 | top: "aconv2" 1689 | batch_norm_param { 1690 | use_global_stats: true 1691 | } 1692 | param { 1693 | name : "ab2r_a" 1694 | lr_mult: 0 1695 | } 1696 | param { 1697 | name: "ab2r_b" 1698 | lr_mult: 0 1699 | } 1700 | param { 1701 | name: "ab2r_c" 1702 | lr_mult: 0 1703 | } 1704 | include { 1705 | phase: TEST 1706 | } 1707 | } 1708 | 1709 | layer { 1710 | name: "arelu2r" 1711 | type: "ReLU" 1712 | bottom: "aconv2" 1713 | top: "aconv2" 1714 | } 1715 | 1716 | layer { 1717 | name: "aconv2r" 1718 | type: "Convolution" 1719 | bottom: "aconv2" 1720 | top: "aconv2r" 1721 | param { 1722 | name : "ac2r_w" 1723 | lr_mult: 1 1724 | decay_mult: 1 1725 | } 1726 | param { 1727 | name: "ac2r_b" 1728 | lr_mult: 2 1729 | decay_mult: 0 1730 | } 1731 | convolution_param { 1732 | num_output: 128 1733 | kernel_size: 3 1734 | stride: 1 1735 | pad: 1 1736 | weight_filler { 1737 | type: "xavier" 1738 | } 1739 | } 1740 | } 1741 | 1742 | layer { 1743 | name: "asum2" 1744 | type: "Eltwise" 1745 | bottom: "aconv2r" 1746 | bottom: "asum1" 1747 | top: "asum2" 1748 | eltwise_param { operation: SUM } 1749 | } 1750 | 1751 | ####### RES3 1752 | 1753 | 1754 | 1755 | layer { 1756 | name: "abn3" 1757 | type: "BatchNorm" 1758 | bottom: "asum2" 1759 | top: "abn3" 1760 | batch_norm_param { 1761 | use_global_stats: false 1762 | } 1763 | param { 1764 | name : "ab3_a" 1765 | lr_mult: 0 1766 | } 1767 | param { 1768 | name: "ab3_b" 1769 | lr_mult: 0 1770 | } 1771 | param { 1772 | name: "ab3_c" 1773 | lr_mult: 0 1774 | } 1775 | include { 1776 | phase: TRAIN 1777 | } 1778 | } 1779 | layer { 1780 | name: "abn3" 1781 | type: "BatchNorm" 1782 | bottom: "asum2" 1783 | top: "abn3" 1784 | batch_norm_param { 1785 | use_global_stats: true 1786 | } 1787 | param { 1788 | name : "ab3_a" 1789 | lr_mult: 0 1790 | } 1791 | param { 1792 | name: "ab3_b" 1793 | lr_mult: 0 1794 | } 1795 | param { 1796 | name: "ab3_c" 1797 | lr_mult: 0 1798 | } 1799 | include { 1800 | phase: TEST 1801 | } 1802 | } 1803 | 1804 | layer { 1805 | name: "arelu3" 1806 | type: "ReLU" 1807 | bottom: "abn3" 1808 | top: "abn3" 1809 | } 1810 | 1811 | 1812 | layer { 1813 | name: "aconv3" 1814 | type: "Convolution" 1815 | bottom: "abn3" 1816 | top: "aconv3" 1817 | param { 1818 | name : "ac3_w" 1819 | lr_mult: 1 1820 | decay_mult: 1 1821 | } 1822 | param { 1823 | name: "ac3_b" 1824 | lr_mult: 2 1825 | decay_mult: 0 1826 | } 1827 | convolution_param { 1828 | num_output: 128 1829 | kernel_size: 3 1830 | stride: 1 1831 | pad: 1 1832 | weight_filler { 1833 | type: "xavier" 1834 | } 1835 | } 1836 | } 1837 | 1838 | 1839 | 1840 | layer { 1841 | name: "abn3r" 1842 | type: "BatchNorm" 1843 | bottom: "aconv3" 1844 | top: "aconv3" 1845 | batch_norm_param { 1846 | use_global_stats: false 1847 | } 1848 | param { 1849 | name : "ab3r_a" 1850 | lr_mult: 0 1851 | } 1852 | param { 1853 | name: "ab3r_b" 1854 | lr_mult: 0 1855 | } 1856 | param { 1857 | name: "ab3r_c" 1858 | lr_mult: 0 1859 | } 1860 | include { 1861 | phase: TRAIN 1862 | } 1863 | } 1864 | layer { 1865 | name: "abn3r" 1866 | type: "BatchNorm" 1867 | bottom: "aconv3" 1868 | top: "aconv3" 1869 | batch_norm_param { 1870 | use_global_stats: true 1871 | } 1872 | param { 1873 | name : "ab3r_a" 1874 | lr_mult: 0 1875 | } 1876 | param { 1877 | name: "ab3r_b" 1878 | lr_mult: 0 1879 | } 1880 | param { 1881 | name: "ab3r_c" 1882 | lr_mult: 0 1883 | } 1884 | include { 1885 | phase: TEST 1886 | } 1887 | } 1888 | 1889 | layer { 1890 | name: "arelu3r" 1891 | type: "ReLU" 1892 | bottom: "aconv3" 1893 | top: "aconv3" 1894 | } 1895 | 1896 | layer { 1897 | name: "aconv3r" 1898 | type: "Convolution" 1899 | bottom: "aconv3" 1900 | top: "aconv3r" 1901 | param { 1902 | name : "ac3r_w" 1903 | lr_mult: 1 1904 | decay_mult: 1 1905 | } 1906 | param { 1907 | name: "ac3r_b" 1908 | lr_mult: 2 1909 | decay_mult: 0 1910 | } 1911 | convolution_param { 1912 | num_output: 128 1913 | kernel_size: 3 1914 | stride: 1 1915 | pad: 1 1916 | weight_filler { 1917 | type: "xavier" 1918 | } 1919 | } 1920 | } 1921 | 1922 | layer { 1923 | name: "asum3" 1924 | type: "Eltwise" 1925 | bottom: "aconv3r" 1926 | bottom: "asum2" 1927 | top: "asum3" 1928 | eltwise_param { operation: SUM } 1929 | } 1930 | 1931 | 1932 | ####### RES4 1933 | 1934 | 1935 | 1936 | layer { 1937 | name: "abn4" 1938 | type: "BatchNorm" 1939 | bottom: "asum3" 1940 | top: "abn4" 1941 | batch_norm_param { 1942 | use_global_stats: false 1943 | } 1944 | param { 1945 | name : "ab4_a" 1946 | lr_mult: 0 1947 | } 1948 | param { 1949 | name: "ab4_b" 1950 | lr_mult: 0 1951 | } 1952 | param { 1953 | name: "ab4_c" 1954 | lr_mult: 0 1955 | } 1956 | include { 1957 | phase: TRAIN 1958 | } 1959 | } 1960 | layer { 1961 | name: "abn4" 1962 | type: "BatchNorm" 1963 | bottom: "asum3" 1964 | top: "abn4" 1965 | batch_norm_param { 1966 | use_global_stats: true 1967 | } 1968 | param { 1969 | name : "ab4_a" 1970 | lr_mult: 0 1971 | } 1972 | param { 1973 | name: "ab4_b" 1974 | lr_mult: 0 1975 | } 1976 | param { 1977 | name: "ab4_c" 1978 | lr_mult: 0 1979 | } 1980 | include { 1981 | phase: TEST 1982 | } 1983 | } 1984 | 1985 | layer { 1986 | name: "arelu4" 1987 | type: "ReLU" 1988 | bottom: "abn4" 1989 | top: "abn4" 1990 | } 1991 | 1992 | 1993 | layer { 1994 | name: "aconv4" 1995 | type: "Convolution" 1996 | bottom: "abn4" 1997 | top: "aconv4" 1998 | param { 1999 | name : "ac4_w" 2000 | lr_mult: 1 2001 | decay_mult: 1 2002 | } 2003 | param { 2004 | name: "ac4_b" 2005 | lr_mult: 2 2006 | decay_mult: 0 2007 | } 2008 | convolution_param { 2009 | num_output: 128 2010 | kernel_size: 3 2011 | stride: 1 2012 | pad: 1 2013 | weight_filler { 2014 | type: "xavier" 2015 | } 2016 | } 2017 | } 2018 | 2019 | 2020 | 2021 | layer { 2022 | name: "abn4r" 2023 | type: "BatchNorm" 2024 | bottom: "aconv4" 2025 | top: "aconv4" 2026 | batch_norm_param { 2027 | use_global_stats: false 2028 | } 2029 | param { 2030 | name : "ab4r_a" 2031 | lr_mult: 0 2032 | } 2033 | param { 2034 | name: "ab4r_b" 2035 | lr_mult: 0 2036 | } 2037 | param { 2038 | name: "ab4r_c" 2039 | lr_mult: 0 2040 | } 2041 | include { 2042 | phase: TRAIN 2043 | } 2044 | } 2045 | layer { 2046 | name: "abn4r" 2047 | type: "BatchNorm" 2048 | bottom: "aconv4" 2049 | top: "aconv4" 2050 | batch_norm_param { 2051 | use_global_stats: true 2052 | } 2053 | param { 2054 | name : "ab4r_a" 2055 | lr_mult: 0 2056 | } 2057 | param { 2058 | name: "ab4r_b" 2059 | lr_mult: 0 2060 | } 2061 | param { 2062 | name: "ab4r_c" 2063 | lr_mult: 0 2064 | } 2065 | include { 2066 | phase: TEST 2067 | } 2068 | } 2069 | 2070 | layer { 2071 | name: "arelu4r" 2072 | type: "ReLU" 2073 | bottom: "aconv4" 2074 | top: "aconv4" 2075 | } 2076 | 2077 | layer { 2078 | name: "aconv4r" 2079 | type: "Convolution" 2080 | bottom: "aconv4" 2081 | top: "aconv4r" 2082 | param { 2083 | name : "ac4r_w" 2084 | lr_mult: 1 2085 | decay_mult: 1 2086 | } 2087 | param { 2088 | name: "ac4r_b" 2089 | lr_mult: 2 2090 | decay_mult: 0 2091 | } 2092 | convolution_param { 2093 | num_output: 128 2094 | kernel_size: 3 2095 | stride: 1 2096 | pad: 1 2097 | weight_filler { 2098 | type: "xavier" 2099 | } 2100 | } 2101 | } 2102 | 2103 | layer { 2104 | name: "asum4" 2105 | type: "Eltwise" 2106 | bottom: "aconv4r" 2107 | bottom: "asum3" 2108 | top: "asum4" 2109 | eltwise_param { operation: SUM } 2110 | } 2111 | 2112 | ####### RES5 2113 | 2114 | 2115 | 2116 | layer { 2117 | name: "abn5" 2118 | type: "BatchNorm" 2119 | bottom: "asum4" 2120 | top: "abn5" 2121 | batch_norm_param { 2122 | use_global_stats: false 2123 | } 2124 | param { 2125 | name : "ab5_a" 2126 | lr_mult: 0 2127 | } 2128 | param { 2129 | name: "ab5_b" 2130 | lr_mult: 0 2131 | } 2132 | param { 2133 | name: "ab5_c" 2134 | lr_mult: 0 2135 | } 2136 | include { 2137 | phase: TRAIN 2138 | } 2139 | } 2140 | layer { 2141 | name: "abn5" 2142 | type: "BatchNorm" 2143 | bottom: "asum4" 2144 | top: "abn5" 2145 | batch_norm_param { 2146 | use_global_stats: true 2147 | } 2148 | param { 2149 | name : "ab5_a" 2150 | lr_mult: 0 2151 | } 2152 | param { 2153 | name: "ab5_b" 2154 | lr_mult: 0 2155 | } 2156 | param { 2157 | name: "ab5_c" 2158 | lr_mult: 0 2159 | } 2160 | include { 2161 | phase: TEST 2162 | } 2163 | } 2164 | 2165 | layer { 2166 | name: "arelu5" 2167 | type: "ReLU" 2168 | bottom: "abn5" 2169 | top: "abn5" 2170 | } 2171 | 2172 | 2173 | layer { 2174 | name: "aconv5" 2175 | type: "Convolution" 2176 | bottom: "abn5" 2177 | top: "aconv5" 2178 | param { 2179 | name : "ac5_w" 2180 | lr_mult: 1 2181 | decay_mult: 1 2182 | } 2183 | param { 2184 | name: "ac5_b" 2185 | lr_mult: 2 2186 | decay_mult: 0 2187 | } 2188 | convolution_param { 2189 | num_output: 128 2190 | kernel_size: 3 2191 | stride: 1 2192 | pad: 1 2193 | weight_filler { 2194 | type: "xavier" 2195 | } 2196 | } 2197 | } 2198 | 2199 | 2200 | 2201 | layer { 2202 | name: "abn5r" 2203 | type: "BatchNorm" 2204 | bottom: "aconv5" 2205 | top: "aconv5" 2206 | batch_norm_param { 2207 | use_global_stats: false 2208 | } 2209 | param { 2210 | name : "ab5r_a" 2211 | lr_mult: 0 2212 | } 2213 | param { 2214 | name: "ab5r_b" 2215 | lr_mult: 0 2216 | } 2217 | param { 2218 | name: "ab5r_c" 2219 | lr_mult: 0 2220 | } 2221 | include { 2222 | phase: TRAIN 2223 | } 2224 | } 2225 | layer { 2226 | name: "abn5r" 2227 | type: "BatchNorm" 2228 | bottom: "aconv5" 2229 | top: "aconv5" 2230 | batch_norm_param { 2231 | use_global_stats: true 2232 | } 2233 | param { 2234 | name : "ab5r_a" 2235 | lr_mult: 0 2236 | } 2237 | param { 2238 | name: "ab5r_b" 2239 | lr_mult: 0 2240 | } 2241 | param { 2242 | name: "ab5r_c" 2243 | lr_mult: 0 2244 | } 2245 | include { 2246 | phase: TEST 2247 | } 2248 | } 2249 | 2250 | layer { 2251 | name: "arelu5r" 2252 | type: "ReLU" 2253 | bottom: "aconv5" 2254 | top: "aconv5" 2255 | } 2256 | 2257 | layer { 2258 | name: "aconv5r" 2259 | type: "Convolution" 2260 | bottom: "aconv5" 2261 | top: "aconv5r" 2262 | param { 2263 | name : "ac5r_w" 2264 | lr_mult: 1 2265 | decay_mult: 1 2266 | } 2267 | param { 2268 | name: "ac5r_b" 2269 | lr_mult: 2 2270 | decay_mult: 0 2271 | } 2272 | convolution_param { 2273 | num_output: 128 2274 | kernel_size: 3 2275 | stride: 1 2276 | pad: 1 2277 | weight_filler { 2278 | type: "xavier" 2279 | } 2280 | } 2281 | } 2282 | 2283 | layer { 2284 | name: "asum5" 2285 | type: "Eltwise" 2286 | bottom: "aconv5r" 2287 | bottom: "asum4" 2288 | top: "asum5" 2289 | eltwise_param { operation: SUM } 2290 | } 2291 | layer { 2292 | name: "abn6r" 2293 | type: "BatchNorm" 2294 | bottom: "asum5" 2295 | top: "asum5" 2296 | batch_norm_param { 2297 | use_global_stats: false 2298 | } 2299 | param { 2300 | name : "ab6r_a" 2301 | lr_mult: 0 2302 | } 2303 | param { 2304 | name: "ab6r_b" 2305 | lr_mult: 0 2306 | } 2307 | param { 2308 | name: "ab6r_c" 2309 | lr_mult: 0 2310 | } 2311 | include { 2312 | phase: TRAIN 2313 | } 2314 | } 2315 | layer { 2316 | name: "abn6r" 2317 | type: "BatchNorm" 2318 | bottom: "asum5" 2319 | top: "asum5" 2320 | batch_norm_param { 2321 | use_global_stats: true 2322 | } 2323 | param { 2324 | name : "ab6r_a" 2325 | lr_mult: 0 2326 | } 2327 | param { 2328 | name: "ab6r_b" 2329 | lr_mult: 0 2330 | } 2331 | param { 2332 | name: "ab6r_c" 2333 | lr_mult: 0 2334 | } 2335 | include { 2336 | phase: TEST 2337 | } 2338 | } 2339 | layer { 2340 | name: "arelu6r" 2341 | type: "ReLU" 2342 | bottom: "asum5" 2343 | top: "asum5" 2344 | } 2345 | 2346 | #CD128 2347 | layer { 2348 | name: "aup6" 2349 | type: "Deconvolution" 2350 | bottom: "asum5" 2351 | top: "aup6" 2352 | convolution_param { 2353 | kernel_size: 4 2354 | stride: 2 2355 | num_output: 128 2356 | group: 128 2357 | pad: 1 2358 | weight_filler { 2359 | type: "bilinear" 2360 | } 2361 | bias_term: false 2362 | } 2363 | param { 2364 | lr_mult: 0 2365 | decay_mult: 0 2366 | } 2367 | } 2368 | 2369 | layer { 2370 | name: "aconv6" 2371 | type: "Convolution" 2372 | bottom: "aup6" 2373 | top: "aconv6" 2374 | param { 2375 | name : "ac6_w" 2376 | lr_mult: 1 2377 | decay_mult: 1 2378 | } 2379 | param { 2380 | name: "ac6_b" 2381 | lr_mult: 2 2382 | decay_mult: 0 2383 | } 2384 | convolution_param { 2385 | num_output: 128 2386 | kernel_size: 1 2387 | stride: 1 2388 | pad: 0 2389 | weight_filler { 2390 | type: "xavier" 2391 | } 2392 | } 2393 | } 2394 | 2395 | 2396 | layer { 2397 | name: "abn6" 2398 | type: "BatchNorm" 2399 | bottom: "aconv6" 2400 | top: "aconv6" 2401 | batch_norm_param { 2402 | use_global_stats: false 2403 | } 2404 | param { 2405 | name: "ab6_a" 2406 | lr_mult: 0 2407 | } 2408 | param { 2409 | name: "ab6_b" 2410 | lr_mult: 0 2411 | } 2412 | param { 2413 | name: "ab6_c" 2414 | lr_mult: 0 2415 | } 2416 | include { 2417 | phase: TRAIN 2418 | } 2419 | } 2420 | layer { 2421 | name: "abn6" 2422 | type: "BatchNorm" 2423 | bottom: "aconv6" 2424 | top: "aconv6" 2425 | batch_norm_param { 2426 | use_global_stats: true 2427 | } 2428 | param { 2429 | name: "ab6_a" 2430 | lr_mult: 0 2431 | } 2432 | param { 2433 | name: "ab6_b" 2434 | lr_mult: 0 2435 | } 2436 | param { 2437 | name: "ab6_c" 2438 | lr_mult: 0 2439 | } 2440 | include { 2441 | phase: TEST 2442 | } 2443 | } 2444 | 2445 | layer { 2446 | name: "arelu6" 2447 | type: "ReLU" 2448 | bottom: "aconv6" 2449 | top: "aconv6" 2450 | } 2451 | 2452 | #CD 64 2453 | layer { 2454 | name: "aconv7" 2455 | type: "Convolution" 2456 | bottom: "aconv6" 2457 | top: "aconv7" 2458 | param { 2459 | name: "ac7_w" 2460 | lr_mult: 1 2461 | decay_mult: 1 2462 | } 2463 | param { 2464 | name: "ac7_b" 2465 | lr_mult: 2 2466 | decay_mult: 0 2467 | } 2468 | convolution_param { 2469 | num_output: 64 2470 | kernel_size: 3 2471 | stride: 1 2472 | pad: 1 2473 | weight_filler { 2474 | type: "xavier" 2475 | } 2476 | } 2477 | } 2478 | 2479 | layer { 2480 | name: "abn7" 2481 | type: "BatchNorm" 2482 | bottom: "aconv7" 2483 | top: "aconv7" 2484 | batch_norm_param { 2485 | use_global_stats: false 2486 | } 2487 | param { 2488 | name: "ab7_a" 2489 | lr_mult: 0 2490 | } 2491 | param { 2492 | name: "ab7_b" 2493 | lr_mult: 0 2494 | } 2495 | param { 2496 | name: "ab7_c" 2497 | lr_mult: 0 2498 | } 2499 | include { 2500 | phase: TRAIN 2501 | } 2502 | } 2503 | layer { 2504 | name: "abn7" 2505 | type: "BatchNorm" 2506 | bottom: "aconv7" 2507 | top: "aconv7" 2508 | batch_norm_param { 2509 | use_global_stats: true 2510 | } 2511 | param { 2512 | name: "ab7_a" 2513 | lr_mult: 0 2514 | } 2515 | param { 2516 | name: "ab7_b" 2517 | lr_mult: 0 2518 | } 2519 | param { 2520 | name: "ab7_c" 2521 | lr_mult: 0 2522 | } 2523 | include { 2524 | phase: TEST 2525 | } 2526 | } 2527 | 2528 | layer { 2529 | name: "arelu7" 2530 | type: "ReLU" 2531 | bottom: "aconv7" 2532 | top: "aconv7" 2533 | } 2534 | 2535 | #C*3 2536 | layer { 2537 | name: "Aconv0" 2538 | type: "Convolution" 2539 | bottom: "aconv7" 2540 | top: "Aconv0" 2541 | param { 2542 | lr_mult: 1 2543 | decay_mult: 1 2544 | } 2545 | param { 2546 | lr_mult: 2 2547 | decay_mult: 0 2548 | } 2549 | convolution_param { 2550 | num_output: 3 2551 | kernel_size: 1 2552 | stride: 1 2553 | pad: 0 2554 | weight_filler { 2555 | type: "xavier" 2556 | } 2557 | } 2558 | } 2559 | 2560 | 2561 | ################################## Light############################################# 2562 | 2563 | #concat 2564 | layer { 2565 | name: "lconcat1" 2566 | bottom: "nsum5" 2567 | bottom: "asum5" 2568 | top: "lconcat1" 2569 | type: "Concat" 2570 | concat_param { 2571 | axis: 1 2572 | } 2573 | } 2574 | 2575 | layer { 2576 | name: "lconcat2" 2577 | bottom: "lconcat1" 2578 | bottom: "conv3" 2579 | top: "lconcat2" 2580 | type: "Concat" 2581 | concat_param { 2582 | axis: 1 2583 | } 2584 | } 2585 | 2586 | #128x1x1 conv 2587 | layer { 2588 | name: "lconv1" 2589 | type: "Convolution" 2590 | bottom: "lconcat2" 2591 | top: "lconv1" 2592 | param { 2593 | name : "lc1_w" 2594 | lr_mult: 1 2595 | decay_mult: 1 2596 | } 2597 | param { 2598 | name: "lc1_b" 2599 | lr_mult: 2 2600 | decay_mult: 0 2601 | } 2602 | convolution_param { 2603 | num_output: 128 2604 | kernel_size: 1 2605 | stride: 1 2606 | pad: 0 2607 | weight_filler { 2608 | type: "xavier" 2609 | } 2610 | } 2611 | } 2612 | 2613 | layer { 2614 | name: "lbn1" 2615 | type: "BatchNorm" 2616 | bottom: "lconv1" 2617 | top: "lconv1" 2618 | batch_norm_param { 2619 | use_global_stats: false 2620 | } 2621 | param { 2622 | name: "lb1_a" 2623 | lr_mult: 0 2624 | } 2625 | param { 2626 | name: "lb1_b" 2627 | lr_mult: 0 2628 | } 2629 | param { 2630 | name: "lb1_c" 2631 | lr_mult: 0 2632 | } 2633 | include { 2634 | phase: TRAIN 2635 | } 2636 | } 2637 | layer { 2638 | name: "lbn1" 2639 | type: "BatchNorm" 2640 | bottom: "lconv1" 2641 | top: "lconv1" 2642 | batch_norm_param { 2643 | use_global_stats: true 2644 | } 2645 | param { 2646 | name: "lb1_a" 2647 | lr_mult: 0 2648 | } 2649 | param { 2650 | name: "lb1_b" 2651 | lr_mult: 0 2652 | } 2653 | param { 2654 | name: "lb1_c" 2655 | lr_mult: 0 2656 | } 2657 | include { 2658 | phase: TEST 2659 | } 2660 | } 2661 | 2662 | layer { 2663 | name: "lrelu1" 2664 | type: "ReLU" 2665 | bottom: "lconv1" 2666 | top: "lconv1" 2667 | } 2668 | 2669 | layer { 2670 | name: "lpool2r" 2671 | type: "Pooling" 2672 | bottom: "lconv1" 2673 | top: "lpool2r" 2674 | pooling_param { 2675 | pool: AVE 2676 | kernel_size: 64 2677 | } 2678 | } 2679 | 2680 | layer { 2681 | name: "fc_light" 2682 | type: "InnerProduct" 2683 | bottom: "lpool2r" 2684 | top: "fc_light" 2685 | param { 2686 | lr_mult: 1 2687 | decay_mult: 1 2688 | } 2689 | param { 2690 | lr_mult: 2 2691 | decay_mult: 0 2692 | } 2693 | inner_product_param { 2694 | num_output: 27 2695 | weight_filler { 2696 | type: "gaussian" 2697 | std: 0.005 2698 | } 2699 | bias_filler { 2700 | type: "constant" 2701 | value: 1 2702 | } 2703 | } 2704 | } 2705 | 2706 | 2707 | 2708 | 2709 | 2710 | -------------------------------------------------------------------------------- /SfSNet-Caffe/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mannix1994/SfSNet-Pytorch/c2c1ed96b20dab66c5f84fe41ccb5d08aaa2291a/SfSNet-Caffe/__init__.py -------------------------------------------------------------------------------- /SfSNet-Caffe/convert_to_pkl.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | from __future__ import absolute_import, division, print_function 3 | import pickle as pkl 4 | import caffe 5 | 6 | # prototxt文件 7 | MODEL_FILE = 'SfSNet_deploy.prototxt' 8 | # 预先训练好的caffe模型 9 | PRETRAIN_FILE = 'SfSNet.caffemodel.h5' 10 | 11 | 12 | if __name__ == '__main__': 13 | # 导入网络 14 | net = caffe.Net(MODEL_FILE, PRETRAIN_FILE, caffe.TEST) 15 | print('*' * 80) 16 | # 名字和权重词典 17 | name_weights = {} 18 | # 保存每层的参数信息 19 | keys = open('keys.txt', 'w') 20 | keys.write('generated by SfSNet-Caffe/convert_to_pkl.py\n\n') 21 | # 遍历每一网络层 22 | for param_name in net.params.keys(): 23 | name_weights[param_name] = {} 24 | # 得到此层的参数 25 | layer_params = net.params[param_name] 26 | if len(layer_params) == 1: 27 | # 如果参数只有一个,则说明是反卷积层, 28 | # SfSNet整个模型里就只有反卷积层只有一组weight参数 29 | weight = layer_params[0].data 30 | name_weights[param_name]['weight'] = weight 31 | 32 | print('%s:\n\t%s (weight)' % (param_name, weight.shape)) 33 | keys.write('%s:\n\t%s (weight)\n' % (param_name, weight.shape)) 34 | elif len(layer_params) == 2: 35 | # 如果参数有两个,则说明是卷积层或者全连接层。 36 | # 卷积层或者全连接层都有两组参数:weight和bias 37 | # 权重参数 38 | weight = layer_params[0].data 39 | name_weights[param_name]['weight'] = weight 40 | # 偏置参数 41 | bias = layer_params[1].data 42 | name_weights[param_name]['bias'] = bias 43 | 44 | print('%s:\n\t%s (weight)' % (param_name, weight.shape)) 45 | print('\t%s (bias)' % str(bias.shape)) 46 | keys.write('%s:\n\t%s (weight)\n' % (param_name, weight.shape)) 47 | keys.write('\t%s (bias)\n' % str(bias.shape)) 48 | elif len(layer_params) == 3: 49 | # 如果有三个,则说明是BatchNorm层。 50 | # BN层共有三个参数,分别是:running_mean、running_var和一个缩放参数。 51 | running_mean = layer_params[0].data # running_mean 52 | name_weights[param_name]['running_mean'] = running_mean / layer_params[2].data 53 | running_var = layer_params[1].data # running_var 54 | name_weights[param_name]['running_var'] = running_var/layer_params[2].data 55 | 56 | print('%s:\n\t%s (running_var)' % (param_name, running_var.shape),) 57 | print('\t%s (running_mean)' % str(running_mean.shape)) 58 | keys.write('%s:\n\t%s (running_var)\n' % (param_name, running_var.shape)) 59 | keys.write('\t%s (running_mean)\n' % str(running_mean.shape)) 60 | else: 61 | # 如果报错,大家要检查自己模型哈 62 | raise RuntimeError("还有参数个数超过3个的层,别漏了兄dei!!!\n") 63 | keys.close() 64 | # 保存name_weights 65 | with open('weights.pkl', 'wb') as f: 66 | pkl.dump(name_weights, f, protocol=2) 67 | -------------------------------------------------------------------------------- /SfSNet-Caffe/read_h5.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | from __future__ import absolute_import, division, print_function 3 | import h5py 4 | import pickle as pkl 5 | 6 | 7 | if __name__ == '__main__': 8 | f = h5py.File('SfSNet.caffemodel.h5', 'r') 9 | for group_name in f.keys(): 10 | # print(group_name) 11 | # 根据一级组名获得其下面的组 12 | name_weights = {} 13 | group = f[group_name] 14 | for sub_group_name in group.keys(): 15 | # print('----'+sub_group_name) 16 | if sub_group_name not in name_weights.keys(): 17 | name_weights[sub_group_name] = {} 18 | # 根据一级组和二级组名获取其下面的dataset 19 | # 经过实验,一个dataset对应一层的参数 20 | dataset = f[group_name + '/' + sub_group_name] 21 | # 遍历该子组下所有的dataset。 22 | # print(dataset.keys()) 23 | if len(dataset.keys()) == 1: 24 | # 如果参数只有一个,则说明是反卷积层, 25 | # SfSNet整个模型里就只有反卷积层只有一组weight参数 26 | weight = dataset['0'][()] 27 | name_weights[sub_group_name]['weight'] = weight 28 | 29 | print('%s:\n\t%s (weight)' % (sub_group_name, weight.shape)) 30 | elif len(dataset.keys()) == 2: 31 | # 如果参数有两个,则说明是卷积层或者全连接层。 32 | # 卷积层或者全连接层都有两组参数:weight和bias 33 | # 权重参数 34 | weight = dataset['0'][()] 35 | # print(type(weight)) 36 | # print(weight.shape) 37 | name_weights[sub_group_name]['weight'] = weight 38 | # 偏置参数 39 | bias = dataset['1'][()] 40 | name_weights[sub_group_name]['bias'] = bias 41 | 42 | print('%s:\n\t%s (weight)' % (sub_group_name, weight.shape)) 43 | print('\t%s (bias)' % str(bias.shape)) 44 | elif len(dataset.keys()) == 3: 45 | # 如果有三个,则说明是BatchNorm层。 46 | # BN层共有三个参数,分别是:running_mean、running_var和一个缩放参数。 47 | running_mean = dataset['0'][()] # running_mean 48 | name_weights[sub_group_name]['running_mean'] = running_mean / dataset['2'][()] 49 | running_var = dataset['1'][()] # running_var 50 | name_weights[sub_group_name]['running_var'] = running_var / dataset['2'][()] 51 | 52 | print('%s:\n\t%s (running_var)' % (sub_group_name, running_var.shape), ) 53 | print('\t%s (running_mean)' % str(running_mean.shape)) 54 | elif len(dataset.keys()) == 0: 55 | # 没有参数 56 | continue 57 | else: 58 | # 如果报错,大家要检查自己模型哈 59 | raise RuntimeError("还有参数个数超过3个的层,别漏了兄dei!!!\n") 60 | 61 | with open('weights1.pkl', 'wb') as f: 62 | pkl.dump(name_weights, f, protocol=2) 63 | -------------------------------------------------------------------------------- /SfSNet-Caffe/weights.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mannix1994/SfSNet-Pytorch/c2c1ed96b20dab66c5f84fe41ccb5d08aaa2291a/SfSNet-Caffe/weights.pkl -------------------------------------------------------------------------------- /SfSNet_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import absolute_import, division, print_function 3 | import glob 4 | import os 5 | import numpy as np 6 | import cv2 7 | import torch 8 | from config import M, LANDMARK_PATH, PROJECT_DIR 9 | from src.functions import create_shading_recon 10 | from src.mask import MaskGenerator 11 | from src.model import SfSNet 12 | from src.utils import convert 13 | 14 | 15 | if __name__ == '__main__': 16 | pass 17 | 18 | 19 | def _test(): 20 | # define a SfSNet 21 | net = SfSNet() 22 | # set to eval mode 23 | net.eval() 24 | # load weights 25 | # net.load_weights_from_pkl('SfSNet-Caffe/weights.pkl') 26 | net.load_state_dict(torch.load('data/SfSNet.pth')) 27 | # define a mask generator 28 | mg = MaskGenerator(LANDMARK_PATH) 29 | 30 | # get image list 31 | image_list = glob.glob(os.path.join(PROJECT_DIR, 'Images/*.*')) 32 | 33 | for image_name in image_list: 34 | # read image 35 | image = cv2.imread(image_name) 36 | # crop face and generate mask of face 37 | aligned, mask, im, landmark = mg.align(image, size=(M, M))[0] 38 | # resize 39 | im = cv2.resize(im, (M, M)) 40 | # normalize to (0, 1.0) 41 | im = np.float32(im) / 255.0 42 | # from (128, 128, 3) to (1, 3, 128, 128) 43 | im = np.transpose(im, [2, 0, 1]) 44 | im = np.expand_dims(im, 0) 45 | 46 | # get the normal, albedo and light parameter 47 | normal, albedo, light = net(torch.from_numpy(im)) 48 | 49 | # get numpy array 50 | n_out = normal.detach().numpy() 51 | al_out = albedo.detach().numpy() 52 | light_out = light.detach().numpy() 53 | 54 | # -----------add by wang------------- 55 | # from [1, 3, 128, 128] to [128, 128, 3] 56 | n_out = np.squeeze(n_out, 0) 57 | n_out = np.transpose(n_out, [1, 2, 0]) 58 | # from [1, 3, 128, 128] to [128, 128, 3] 59 | al_out = np.squeeze(al_out, 0) 60 | al_out = np.transpose(al_out, [1, 2, 0]) 61 | # from [1, 27] to [27, 1] 62 | light_out = np.transpose(light_out, [1, 0]) 63 | # print n_out.shape, al_out.shape, light_out.shape 64 | # -----------end--------------------- 65 | 66 | """ 67 | light_out is a 27 dimensional vector. 9 dimension for each channel of 68 | RGB. For every 9 dimensional, 1st dimension is ambient illumination 69 | (0th order), next 3 dimension is directional (1st order), next 5 70 | dimension is 2nd order approximation. You can simply use 27 71 | dimensional feature vector as lighting representation. 72 | """ 73 | 74 | # transform 75 | n_out2 = n_out[:, :, (2, 1, 0)] 76 | # print 'n_out2 shape', n_out2.shape 77 | n_out2 = 2 * n_out2 - 1 # [-1 1] 78 | nr = np.sqrt(np.sum(n_out2 ** 2, axis=2)) # nr=sqrt(sum(n_out2.^2,3)) 79 | nr = np.expand_dims(nr, axis=2) 80 | n_out2 = n_out2 / np.repeat(nr, 3, axis=2) 81 | # print 'nr shape', nr.shape 82 | 83 | al_out2 = al_out[:, :, (2, 1, 0)] 84 | 85 | # Note: n_out2, al_out2, light_out is the actual output 86 | Irec, Ishd = create_shading_recon(n_out2, al_out2, light_out) 87 | 88 | diff = (mask // 255) 89 | n_out2 = n_out2 * diff 90 | al_out2 = al_out2 * diff 91 | Ishd = Ishd * diff 92 | Irec = Irec * diff 93 | 94 | # -----------add by wang------------ 95 | Ishd = cv2.cvtColor(Ishd, cv2.COLOR_RGB2GRAY) 96 | 97 | # al_out2 = (al_out2 / np.max(al_out2) * 255).astype(dtype=np.uint8) 98 | # Irec = (Irec / np.max(Irec) * 255).astype(dtype=np.uint8) 99 | # Ishd = (Ishd / np.max(Ishd) * 255).astype(dtype=np.uint8) 100 | 101 | al_out2 = cv2.cvtColor(al_out2, cv2.COLOR_RGB2BGR) 102 | n_out2 = cv2.cvtColor(n_out2, cv2.COLOR_RGB2BGR) 103 | Irec = cv2.cvtColor(Irec, cv2.COLOR_RGB2BGR) 104 | # -------------end--------------------- 105 | 106 | cv2.imshow("Normal", n_out2) 107 | cv2.imshow("Albedo", al_out2) 108 | cv2.imshow("Recon", Irec) 109 | cv2.imshow("Shading", Ishd) 110 | 111 | # save result 112 | cv2.imwrite('data/shading.png', convert(Ishd)) 113 | cv2.imwrite('data/Albedo.png', convert(al_out2)) 114 | if cv2.waitKey(0) == 27: 115 | exit() 116 | 117 | 118 | if __name__ == '__main__': 119 | d_path = os.path.join(PROJECT_DIR, 'data') 120 | if not os.path.exists(d_path): 121 | os.mkdir(d_path) 122 | _test() 123 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import os 3 | 4 | PROJECT_DIR = os.path.dirname(os.path.realpath(__file__)) 5 | 6 | # image's size, DO NOT CHANGE! 7 | M = 128 # size of input for SfSNet 8 | 9 | # landmarks's path 10 | LANDMARK_PATH = os.path.join(PROJECT_DIR, 'data/shape_predictor_68_face_landmarks.dat') 11 | -------------------------------------------------------------------------------- /data/SfSNet.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mannix1994/SfSNet-Pytorch/c2c1ed96b20dab66c5f84fe41ccb5d08aaa2291a/data/SfSNet.pth -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | h5py==2.9.0 2 | numpy==1.15.4 3 | matplotlib==2.2.3 4 | opencv-python==3.4.5.20 5 | dlib==19.16.0 6 | torch==1.0.0 7 | torchvision==0.2.0 8 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mannix1994/SfSNet-Pytorch/c2c1ed96b20dab66c5f84fe41ccb5d08aaa2291a/src/__init__.py -------------------------------------------------------------------------------- /src/functions.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | import numpy as np 3 | import sys 4 | from matplotlib.path import Path 5 | 6 | 7 | def create_shading_recon(n_out2, al_out2, light_out): 8 | """ 9 | :type n_out2: np.ndarray 10 | :type al_out2: np.ndarray 11 | :type light_out: np.ndarray 12 | :return: 13 | """ 14 | M = n_out2.shape[0] 15 | No1 = np.reshape(n_out2, (M * M, 3)) 16 | tex1 = np.reshape(al_out2, (M * M, 3)) 17 | 18 | la = lambertian_attenuation(3) 19 | HN1 = normal_harmonics(No1.T, la) 20 | 21 | HS1r = np.matmul(HN1, light_out[0:9]) 22 | HS1g = np.matmul(HN1, light_out[9:18]) 23 | HS1b = np.matmul(HN1, light_out[18:27]) 24 | 25 | HS1 = np.zeros(shape=(M, M, 3), dtype=np.float32) 26 | HS1[:, :, 0] = np.reshape(HS1r, (M, M)) 27 | HS1[:, :, 1] = np.reshape(HS1g, (M, M)) 28 | HS1[:, :, 2] = np.reshape(HS1b, (M, M)) 29 | Tex1 = np.reshape(tex1, (M, M, 3)) * HS1 30 | 31 | IRen0 = Tex1 32 | Shd = (200 / 255.0) * HS1 # 200 is added instead of 255 so that not to scale the shading to all white 33 | Ishd0 = Shd 34 | return [IRen0, Ishd0] 35 | 36 | 37 | def lambertian_attenuation(n): 38 | # a = [.8862; 1.0233; .4954]; 39 | a = [np.pi * i for i in [1.0, 2 / 3.0, .25]] 40 | if n > 3: 41 | sys.stderr.write('don\'t record more than 3 attenuation') 42 | exit(-1) 43 | o = a[0:n] 44 | return o 45 | 46 | 47 | def normal_harmonics(N, att): 48 | """ 49 | Return the harmonics evaluated at surface normals N, attenuated by att. 50 | :param N: 51 | :param att: 52 | :return: 53 | 54 | Normals can be scaled surface normals, in which case value of each 55 | harmonic at each point is scaled by albedo. 56 | Harmonics written as polynomials 57 | 0,0 1/sqrt(4*pi) 58 | 1,0 z*sqrt(3/(4*pi)) 59 | 1,1e x*sqrt(3/(4*pi)) 60 | 1,1o y*sqrt(3/(4*pi)) 61 | 2,0 (2*z.^2 - x.^2 - y.^2)/2 * sqrt(5/(4*pi)) 62 | 2,1e x*z * 3*sqrt(5/(12*pi)) 63 | 2,1o y*z * 3*sqrt(5/(12*pi)) 64 | 2,2e (x.^2-y.^2) * 3*sqrt(5/(48*pi)) 65 | 2,2o x*y * 3*sqrt(5/(12*pi)) 66 | """ 67 | xs = N[0, :].T 68 | ys = N[1, :].T 69 | zs = N[2, :].T 70 | a = np.sqrt(xs ** 2 + ys ** 2 + zs ** 2) 71 | denom = (a == 0) + a 72 | # %x = xs./a; y = ys./a; z = zs./a; 73 | x = xs / denom 74 | y = ys / denom 75 | z = zs / denom 76 | 77 | x2 = x * x 78 | y2 = y * y 79 | z2 = z * z 80 | xy = x * y 81 | xz = x * z 82 | yz = y * z 83 | 84 | H1 = att[0] * (1 / np.sqrt(4 * np.pi)) * a 85 | H2 = att[1] * (np.sqrt(3 / (4 * np.pi))) * zs 86 | H3 = att[1] * (np.sqrt(3 / (4 * np.pi))) * xs 87 | H4 = att[1] * (np.sqrt(3 / (4 * np.pi))) * ys 88 | H5 = att[2] * (1 / 2.0) * (np.sqrt(5 / (4 * np.pi))) * ((2 * z2 - x2 - y2) * a) 89 | H6 = att[2] * (3 * np.sqrt(5 / (12 * np.pi))) * (xz * a) 90 | H7 = att[2] * (3 * np.sqrt(5 / (12 * np.pi))) * (yz * a) 91 | H8 = att[2] * (3 * np.sqrt(5 / (48 * np.pi))) * ((x2 - y2) * a) 92 | H9 = att[2] * (3 * np.sqrt(5 / (12 * np.pi))) * (xy * a) 93 | H = [H1, H2, H3, H4, H5, H6, H7, H8, H9] 94 | 95 | # --------add by wang ----------- 96 | H = [np.expand_dims(h, axis=1) for h in H] 97 | H = np.concatenate(H, -1) 98 | # -------------end--------------- 99 | return H 100 | 101 | 102 | def create_mask_fiducial(fiducials, Image): 103 | """ 104 | create mask use fiducials of Image 105 | :param fiducials: the 68 landmarks detected using dlib 106 | :type fiducials np.ndarray 107 | :param Image: a 3-channel image 108 | :type Image np.ndarray 109 | :return: 110 | """ 111 | # fiducals is 2x68 112 | fiducials = np.float32(fiducials) 113 | border_fid = fiducials[:, 0:17] 114 | face_fid = fiducials[:, 17:] 115 | 116 | c1 = np.array([border_fid[0, 0], face_fid[1, 2]]) # left 117 | c2 = np.array([border_fid[0, 16], face_fid[1, 7]]) # right 118 | eye = np.linalg.norm(face_fid[:, 22] - face_fid[:, 25]) 119 | c3 = face_fid[:, 2] 120 | c3[1] = c3[1] - 0.3 * eye 121 | c4 = face_fid[:, 7] 122 | c4[1] = c4[1] - 0.3 * eye 123 | 124 | border = [c1, border_fid, c2, c4, c3] 125 | border = [item.reshape(2, -1) for item in border] 126 | border = np.hstack(border) 127 | 128 | M = Image.shape[0] # row -> y 129 | N = Image.shape[1] # col -> x 130 | 131 | y = np.arange(0, M, step=1, dtype=np.float32) 132 | x = np.arange(0, N, step=1, dtype=np.float32) 133 | X, Y = np.meshgrid(x, y) 134 | 135 | _in, _on = inpolygon(X, Y, border[0, :].T, border[1, :].T) 136 | 137 | mask = np.round(np.reshape(_in | _on, [M, N])) 138 | mask = 255 * np.uint8(mask) 139 | mask = np.repeat(np.expand_dims(mask, -1), 3, axis=-1) 140 | return mask 141 | 142 | 143 | def inpolygon(xq, yq, xv, yv): 144 | """ 145 | reimplement inpolygon in matlab 146 | :type xq: np.ndarray 147 | :type yq: np.ndarray 148 | :type xv: np.ndarray 149 | :type yv: np.ndarray 150 | """ 151 | # http://blog.sina.com.cn/s/blog_70012f010102xnel.html 152 | # merge xy and yv into vertices 153 | vertices = np.vstack((xv, yv)).T 154 | # define a Path object 155 | path = Path(vertices) 156 | # merge X and Y into test_points 157 | test_points = np.hstack([xq.reshape(xq.size, -1), yq.reshape(yq.size, -1)]) 158 | # get mask of test_points in path 159 | _in = path.contains_points(test_points) 160 | # get mask of test_points in path(include the points on path) 161 | _in_on = path.contains_points(test_points, radius=-1e-10) 162 | # get the points on path 163 | _on = _in ^ _in_on 164 | return _in_on, _on 165 | 166 | -------------------------------------------------------------------------------- /src/mask.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | from __future__ import absolute_import, division, print_function 3 | import dlib 4 | import cv2 5 | import numpy as np 6 | import os 7 | import sys 8 | from matplotlib.path import Path 9 | 10 | 11 | class MaskGenerator: 12 | def __init__(self, landmarks_path): 13 | """ 14 | :param landmarks_path: the path of pretrained key points weight, 15 | it could be download from: 16 | http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 17 | """ 18 | if not os.path.exists(landmarks_path): 19 | raise RuntimeError('face landmark file is not exist. please download if from: \n' 20 | 'http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 ' 21 | 'and uncompress it.') 22 | self._detector = dlib.get_frontal_face_detector() 23 | self._predictor = dlib.shape_predictor(landmarks_path) 24 | 25 | def bounding_boxes(self, image): 26 | # convert to gray image 27 | gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 28 | # get rect contains face 29 | face_rects = self._detector(gray_image, 0) 30 | return face_rects 31 | 32 | def align(self, image, size=(240, 240), scale=1.8, warp=True, crop=True, resize=True, 33 | crop_function_version=0, align_multi=False, draw_landmarks=False): 34 | """ 35 | warp and crop image 36 | https://blog.csdn.net/qq_39438636/article/details/79304130 37 | 38 | :param image: a BGR format face image 39 | :type image: np.ndarray 40 | :param size: target size 41 | :param scale: 42 | :param warp: warp or not 43 | :param crop: crop or not 44 | :param resize: resize od not 45 | :param crop_function_version: crop function version 46 | :param align_multi: whther to detect multi face 47 | :param draw_landmarks: whether draw face landmarks 48 | :return: mask, image and whether successfully crop image 49 | """ 50 | # check option 51 | if crop_function_version == 1 and align_multi: 52 | raise RuntimeError("When align_multi is true, crop_function_version must be 0") 53 | # if image is too big, resize to a smaller image 54 | if np.min(image.shape[0:2]) > 1000: 55 | ratio = 1000 / np.min(image.shape[0:2]) 56 | image = cv2.resize(image, dsize=(0, 0), fx=ratio, fy=ratio) 57 | # make border for image 58 | border = int(np.min(image.shape[0:2]) * 0.3) 59 | image = cv2.copyMakeBorder(image, border, border, border, border, cv2.BORDER_CONSTANT) 60 | # backup image 61 | original_image = image.copy() 62 | # get rectangles which contains face 63 | face_rects = self.bounding_boxes(image) 64 | results = [] 65 | if len(face_rects) > 0: 66 | for i in range(len(face_rects)): 67 | # get 68 landmarks of face 68 | landmarks = np.array([[p.x, p.y] for p in self._predictor(original_image, face_rects[i]).parts()]) 69 | # draw landmarks 70 | if draw_landmarks: 71 | landmark_image = self.draw_landmarks(original_image, landmarks) 72 | # remove border 73 | _row, _col, _ = landmark_image.shape 74 | landmark_image = landmark_image[border:_row-border, border:_col-border, :] 75 | else: 76 | landmark_image = None 77 | # create mask using landmarks 78 | mask = create_mask_by_landmarks(landmarks.T, original_image) 79 | if warp: 80 | image, mask, r_mat = self._warp(original_image, mask, landmarks) 81 | landmarks = self._get_rotated_points(landmarks, r_mat) 82 | if crop: 83 | if crop_function_version == 0: 84 | image = self._crop_v0(image, landmarks, scale) 85 | mask = self._crop_v0(mask, landmarks, scale) 86 | elif crop_function_version == 1: 87 | image, mask, suc_ = self._crop_v1(image, mask, scale) 88 | if not suc_: 89 | sys.stderr.write('%s: Failed to crop image and mask\n' % __file__) 90 | else: 91 | raise RuntimeError("crop_function_version must be 0 or 1") 92 | 93 | if resize: 94 | results.append((True, cv2.resize(mask, size), cv2.resize(image, size), landmark_image)) 95 | else: 96 | results.append((True, mask, image, landmark_image)) 97 | 98 | if not align_multi: 99 | return results 100 | return results 101 | else: 102 | sys.stderr.write("%s: Can't detect face in image\n" % __file__) 103 | image = cv2.resize(image, size) 104 | return [(False, np.ones(image.shape, dtype=image.dtype) * 255, image, None)] 105 | 106 | @staticmethod 107 | def _get_rotated_points(points, rotate_mat): 108 | # Blog; https://www.cnblogs.com/zhoug2020/p/7842808.html 109 | # add 1 to every point 110 | __padding = np.ones((points.shape[0], 1), dtype=points.dtype) 111 | points = np.concatenate([points, __padding], axis=1) 112 | # add [0, 0, 1] to rotate matrix 113 | __padding = np.array([0, 0, 1], dtype=points.dtype).reshape(1, 3) 114 | rotate_mat = np.concatenate([rotate_mat, __padding], axis=0) 115 | # compute rotated landmarks 116 | rotate_landmarks = np.matmul(rotate_mat, points.T) 117 | # remove the padding and transpose landmarks 118 | rotate_landmarks = rotate_landmarks[0:2, :].T 119 | # return landmark as integer numpy array 120 | return rotate_landmarks.astype(points.dtype) 121 | 122 | @staticmethod 123 | def _warp(image, mask, landmarks): 124 | """ 125 | warp image and mask by landmarks 126 | :param image: 127 | :type image np.ndarray 128 | :param landmarks: 129 | :type landmarks np.ndarray 130 | :return: warped face and mask 131 | """ 132 | # landmarks.shape = (68, 2) 133 | landmarks = np.array(landmarks) 134 | # compute rotate angle, r_angle=arctan((y1-y2)/(x1-x2)) 135 | # landmarks[36]: corner of left eye 136 | # landmarks[42]: corner of right eye 137 | r_angle = np.arctan((landmarks[36][1] - landmarks[42][1]) / 138 | (landmarks[36][0] - landmarks[42][0])) 139 | r_angle = 180 * r_angle / np.pi 140 | # get rotation matrix 141 | rot_mat = cv2.getRotationMatrix2D(tuple(landmarks[2]), r_angle, scale=1) 142 | 143 | # rotate image and mask 144 | rotated_image = cv2.warpAffine(image, rot_mat, dsize=image.shape[0:2]) 145 | rotated_mask = cv2.warpAffine(mask, rot_mat, dsize=image.shape[0:2]) 146 | 147 | return rotated_image, rotated_mask, rot_mat 148 | 149 | def _crop_v0(self, image, landmarks, scale): 150 | """ 151 | crop image by face landmarks 152 | :param image: 153 | :param landmarks: 154 | :param scale: 155 | :return: 156 | """ 157 | # left eye: landmarks[36] 158 | # left mouth: landmarks[48] 159 | # nose: landmarks[29] 160 | # find the most left point and most right point 161 | landmarks_x = landmarks[:, 0] 162 | most_left_x = np.min(landmarks_x) 163 | most_right_x = np.max(landmarks_x) 164 | mid_x = (most_left_x + most_right_x) // 2 165 | # print(most_left_x, most_right_x, mid_x) 166 | # define new center point use mid_x and y from nose point 167 | center_point = [mid_x, landmarks[29][1]] 168 | # compute the distance between left eye(landmarks[36]) 169 | distance = most_right_x - mid_x 170 | size = distance * scale 171 | # print(center_point) 172 | # compute row_start, row_end, col_start, col_end 173 | row_start = int(center_point[1] - size) 174 | row_end = int(center_point[1] + size) 175 | col_start = int(center_point[0] - size) 176 | col_end = int(center_point[0] + size) 177 | # print('*' * 10) 178 | # print(row_start, row_end, col_start, col_end) 179 | # make range valid and compute padding 180 | if row_start < 0: 181 | padding_up = abs(row_start) 182 | row_start = 0 183 | else: 184 | padding_up = 0 185 | if col_start < 0: 186 | padding_left = abs(col_start) 187 | col_start = 0 188 | else: 189 | padding_left = 0 190 | if row_end > (image.shape[0] - 1): 191 | padding_down = row_end - (image.shape[0] - 1) 192 | row_end = image.shape[0] - 1 193 | else: 194 | padding_down = 0 195 | if col_end > (image.shape[1] - 1): 196 | padding_right = col_end - (image.shape[1] - 1) 197 | col_end = image.shape[1] - 1 198 | else: 199 | padding_right = 0 200 | # print(row_start, row_end, col_start, col_end) 201 | # print('*' * 10) 202 | # crop image 203 | cropped_image = self._crop_helper(image, row_start, row_end, col_start, col_end, 204 | padding_up, padding_down, padding_left, padding_right) 205 | return cropped_image 206 | 207 | def _crop_v1(self, image, mask, scale): 208 | face_rects = self.bounding_boxes(image) 209 | if len(face_rects) == 0: 210 | return image, mask, False 211 | # define crop size 212 | size = (face_rects[0].right() - face_rects[0].left()) / 2 213 | size *= scale 214 | # define new center point use mid_x and y from nose point 215 | _x = (face_rects[0].left() + face_rects[0].right()) // 2 216 | _y = (face_rects[0].top() + face_rects[0].bottom()) // 2 217 | center_point = [_x, _y] 218 | # compute the distance between left eye(landmarks[36]) 219 | # print(center_point) 220 | # compute row_start, row_end, col_start, col_end 221 | row_start = int(center_point[1] - size) 222 | row_end = int(center_point[1] + size) 223 | col_start = int(center_point[0] - size) 224 | col_end = int(center_point[0] + size) 225 | # print('*' * 10) 226 | # print(row_start, row_end, col_start, col_end) 227 | # make range valid and compute padding 228 | if row_start < 0: 229 | padding_up = abs(row_start) 230 | row_start = 0 231 | else: 232 | padding_up = 0 233 | if col_start < 0: 234 | padding_left = abs(col_start) 235 | col_start = 0 236 | else: 237 | padding_left = 0 238 | if row_end > (image.shape[0] - 1): 239 | padding_down = row_end - (image.shape[0] - 1) 240 | row_end = image.shape[0] - 1 241 | else: 242 | padding_down = 0 243 | if col_end > (image.shape[1] - 1): 244 | padding_right = col_end - (image.shape[1] - 1) 245 | col_end = image.shape[1] - 1 246 | else: 247 | padding_right = 0 248 | # print(row_start, row_end, col_start, col_end) 249 | # print('*' * 10) 250 | # crop image 251 | image = self._crop_helper(image, row_start, row_end, col_start, col_end, 252 | padding_up, padding_down, padding_left, padding_right) 253 | mask = self._crop_helper(mask, row_start, row_end, col_start, col_end, 254 | padding_up, padding_down, padding_left, padding_right) 255 | return image, mask, True 256 | 257 | @staticmethod 258 | def _crop_helper(image, row_start, row_end, col_start, col_end, 259 | padding_up, padding_down, padding_left, padding_right): 260 | cropped_image = image[row_start:row_end, col_start:col_end] 261 | 262 | # add padding to image 263 | rows, cols, _ = cropped_image.shape 264 | if padding_up > 0: 265 | padding = np.zeros(shape=(padding_up, cols, 3), dtype=cropped_image.dtype) 266 | cropped_image = np.vstack((padding, cropped_image)) 267 | if padding_down > 0: 268 | padding = np.zeros(shape=(padding_down, cols, 3), dtype=cropped_image.dtype) 269 | cropped_image = np.vstack((cropped_image, padding)) 270 | rows, cols, _ = cropped_image.shape 271 | if padding_left > 0: 272 | padding = np.zeros(shape=(rows, padding_left, 3), dtype=cropped_image.dtype) 273 | cropped_image = np.hstack((padding, cropped_image)) 274 | if padding_right > 0: 275 | padding = np.zeros(shape=(rows, padding_right, 3), dtype=cropped_image.dtype) 276 | cropped_image = np.hstack((cropped_image, padding)) 277 | return cropped_image 278 | 279 | @staticmethod 280 | def draw_landmarks(image, landmarks): 281 | landmark_im = image.copy() 282 | for i, landmark in enumerate(landmarks): 283 | cv2.circle(landmark_im, tuple(landmark), 3, (0, 0, 255)) 284 | cv2.putText(landmark_im, str(i), tuple(landmark), cv2.FONT_HERSHEY_SIMPLEX, 285 | 0.3, (0, 255, 0)) 286 | return landmark_im 287 | 288 | 289 | def create_mask_by_landmarks(landmarks, Image): 290 | """ 291 | create mask use fiducials of Image 292 | :param landmarks: the 68 landmarks detected using dlib 293 | :type landmarks np.ndarray 294 | :param Image: a 3-channel image 295 | :type Image np.ndarray 296 | :return: 297 | """ 298 | # fiducals is 2x68 299 | landmarks = np.float32(landmarks) 300 | border_fid = landmarks[:, 0:17] 301 | face_fid = landmarks[:, 17:] 302 | 303 | c1 = np.array([border_fid[0, 0], face_fid[1, 2]]) # left 304 | c2 = np.array([border_fid[0, 16], face_fid[1, 7]]) # right 305 | eye = np.linalg.norm(face_fid[:, 22] - face_fid[:, 25]) 306 | c3 = face_fid[:, 2] 307 | c3[1] = c3[1] - 0.3 * eye 308 | c4 = face_fid[:, 7] 309 | c4[1] = c4[1] - 0.3 * eye 310 | 311 | border = [c1, border_fid, c2, c4, c3] 312 | border = [item.reshape(2, -1) for item in border] 313 | border = np.hstack(border) 314 | 315 | M = Image.shape[0] # row -> y 316 | N = Image.shape[1] # col -> x 317 | 318 | y = np.arange(0, M, step=1, dtype=np.float32) 319 | x = np.arange(0, N, step=1, dtype=np.float32) 320 | X, Y = np.meshgrid(x, y) 321 | 322 | _in, _on = inpolygon(X, Y, border[0, :].T, border[1, :].T) 323 | 324 | mask = np.round(np.reshape(_in | _on, [M, N])) 325 | mask = 255 * np.uint8(mask) 326 | mask = np.repeat(np.expand_dims(mask, -1), 3, axis=-1) 327 | return mask 328 | 329 | 330 | def inpolygon(xq, yq, xv, yv): 331 | """ 332 | reimplement inpolygon in matlab 333 | :type xq: np.ndarray 334 | :type yq: np.ndarray 335 | :type xv: np.ndarray 336 | :type yv: np.ndarray 337 | """ 338 | # http://blog.sina.com.cn/s/blog_70012f010102xnel.html 339 | # merge xy and yv into vertices 340 | vertices = np.vstack((xv, yv)).T 341 | # define a Path object 342 | path = Path(vertices) 343 | # merge X and Y into test_points 344 | test_points = np.hstack([xq.reshape(xq.size, -1), yq.reshape(yq.size, -1)]) 345 | # get mask of test_points in path 346 | _in = path.contains_points(test_points) 347 | # get mask of test_points in path(include the points on path) 348 | _in_on = path.contains_points(test_points, radius=-1e-10) 349 | # get the points on path 350 | _on = _in ^ _in_on 351 | return _in_on, _on 352 | -------------------------------------------------------------------------------- /src/model.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import absolute_import, division, print_function 3 | import torch 4 | import torchvision 5 | import pickle as pkl 6 | from torch import nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class ResidualBlock(nn.Module): 11 | def __init__(self, in_channel, out_channel): 12 | super(ResidualBlock, self).__init__() 13 | # nbn1/nbn2/.../nbn5 abn1/abn2/.../abn5 14 | self.bn = nn.BatchNorm2d(in_channel) 15 | # nconv1/nconv2/.../nconv5 aconv1/aconv2/.../aconv5 16 | self.conv = nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=1, padding=1) 17 | # nbn1r/nbn2r/.../nbn5r abn1r/abn2r/.../abn5r 18 | self.bnr = nn.BatchNorm2d(out_channel) 19 | # nconv1r/nconv2r/.../nconv5r aconv1r/aconv2r/.../anconv5r 20 | self.convr = nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1) 21 | 22 | def forward(self, x): 23 | out = self.conv(F.relu(self.bn(x))) 24 | out = self.convr(F.relu(self.bnr(out))) 25 | out += x 26 | return out 27 | 28 | 29 | class SfSNet(nn.Module): # SfSNet = PS-Net in SfSNet_deploy.prototxt 30 | def __init__(self): 31 | # C64 32 | super(SfSNet, self).__init__() 33 | # TODO 初始化器 xavier 34 | self.conv1 = nn.Conv2d(3, 64, 7, 1, 3) 35 | self.bn1 = nn.BatchNorm2d(64) 36 | # C128 37 | self.conv2 = nn.Conv2d(64, 128, 3, 1, 1) 38 | self.bn2 = nn.BatchNorm2d(128) 39 | # C128 S2 40 | self.conv3 = nn.Conv2d(128, 128, 3, 2, 1) 41 | # ------------RESNET for normals------------ 42 | # RES1 43 | self.n_res1 = ResidualBlock(128, 128) 44 | # RES2 45 | self.n_res2 = ResidualBlock(128, 128) 46 | # RES3 47 | self.n_res3 = ResidualBlock(128, 128) 48 | # RES4 49 | self.n_res4 = ResidualBlock(128, 128) 50 | # RES5 51 | self.n_res5 = ResidualBlock(128, 128) 52 | # nbn6r 53 | self.nbn6r = nn.BatchNorm2d(128) 54 | # CD128 55 | # TODO 初始化器 bilinear 56 | self.nup6 = nn.ConvTranspose2d(128, 128, 4, 2, 1, groups=128, bias=False) 57 | # nconv6 58 | self.nconv6 = nn.Conv2d(128, 128, 1, 1, 0) 59 | # nbn6 60 | self.nbn6 = nn.BatchNorm2d(128) 61 | # CD 64 62 | self.nconv7 = nn.Conv2d(128, 64, 3, 1, 1) 63 | # nbn7 64 | self.nbn7 = nn.BatchNorm2d(64) 65 | # C*3 66 | self.Nconv0 = nn.Conv2d(64, 3, 1, 1, 0) 67 | 68 | # --------------------Albedo--------------- 69 | # RES1 70 | self.a_res1 = ResidualBlock(128, 128) 71 | # RES2 72 | self.a_res2 = ResidualBlock(128, 128) 73 | # RES3 74 | self.a_res3 = ResidualBlock(128, 128) 75 | # RES4 76 | self.a_res4 = ResidualBlock(128, 128) 77 | # RES5 78 | self.a_res5 = ResidualBlock(128, 128) 79 | # abn6r 80 | self.abn6r = nn.BatchNorm2d(128) 81 | # CD128 82 | self.aup6 = nn.ConvTranspose2d(128, 128, 4, 2, 1, groups=128, bias=False) 83 | # nconv6 84 | self.aconv6 = nn.Conv2d(128, 128, 1, 1, 0) 85 | # nbn6 86 | self.abn6 = nn.BatchNorm2d(128) 87 | # CD 64 88 | self.aconv7 = nn.Conv2d(128, 64, 3, 1, 1) 89 | # nbn7 90 | self.abn7 = nn.BatchNorm2d(64) 91 | # C*3 92 | self.Aconv0 = nn.Conv2d(64, 3, 1, 1, 0) 93 | 94 | # ---------------Light------------------ 95 | # lconv1 96 | self.lconv1 = nn.Conv2d(384, 128, 1, 1, 0) 97 | # lbn1 98 | self.lbn1 = nn.BatchNorm2d(128) 99 | # lpool2r 100 | self.lpool2r = nn.AvgPool2d(64) 101 | # fc_light 102 | self.fc_light = nn.Linear(128, 27) 103 | 104 | def forward(self, inputs): 105 | # C64 106 | x = F.relu(self.bn1(self.conv1(inputs))) 107 | # C128 108 | x = F.relu(self.bn2(self.conv2(x))) 109 | # C128 S2 110 | conv3 = self.conv3(x) 111 | # ------------RESNET for normals------------ 112 | # RES1 113 | x = self.n_res1(conv3) 114 | # RES2 115 | x = self.n_res2(x) 116 | # RES3 117 | x = self.n_res3(x) 118 | # RES4 119 | x = self.n_res4(x) 120 | # RES5 121 | nsum5 = self.n_res5(x) 122 | # nbn6r 123 | nrelu6r = F.relu(self.nbn6r(nsum5)) 124 | # CD128 125 | x = self.nup6(nrelu6r) 126 | # nconv6/nbn6/nrelu6 127 | x = F.relu(self.nbn6(self.nconv6(x))) 128 | # nconv7/nbn7/nrelu7 129 | x = F.relu(self.nbn7(self.nconv7(x))) 130 | # nconv0 131 | normal = self.Nconv0(x) 132 | # --------------------Albedo--------------- 133 | # RES1 134 | x = self.a_res1(conv3) 135 | # RES2 136 | x = self.a_res2(x) 137 | # RES3 138 | x = self.a_res3(x) 139 | # RES4 140 | x = self.a_res4(x) 141 | # RES5 142 | asum5 = self.a_res5(x) 143 | # nbn6r 144 | arelu6r = F.relu(self.abn6r(asum5)) 145 | # CD128 146 | x = self.aup6(arelu6r) 147 | # nconv6/nbn6/nrelu6 148 | x = F.relu(self.abn6(self.aconv6(x))) 149 | # nconv7/nbn7/nrelu7 150 | x = F.relu(self.abn7(self.aconv7(x))) 151 | # nconv0 152 | albedo = self.Aconv0(x) 153 | # ---------------Light------------------ 154 | # lconcat1, shape(1 256 64 64) 155 | x = torch.cat((nrelu6r, arelu6r), 1) 156 | # lconcat2, shape(1 384 64 64) 157 | x = torch.cat([x, conv3], 1) 158 | # lconv1/lbn1/lrelu1 shape(1 128 64 64) 159 | x = F.relu(self.lbn1(self.lconv1(x))) 160 | # lpool2r, shape(1 128 1 1) 161 | x = self.lpool2r(x) 162 | x = x.view(-1, 128) 163 | # fc_light 164 | light = self.fc_light(x) 165 | 166 | return normal, albedo, light 167 | 168 | def load_weights_from_pkl(self, weights_pkl): 169 | from torch import from_numpy 170 | with open(weights_pkl, 'rb') as wp: 171 | try: 172 | # for python3 173 | name_weights = pkl.load(wp, encoding='latin1') 174 | except TypeError as e: 175 | # for python2 176 | name_weights = pkl.load(wp) 177 | state_dict = {} 178 | 179 | def _set_deconv(layer, key): 180 | state_dict[layer+'.weight'] = from_numpy(name_weights[key]['weight']) 181 | 182 | def _set(layer, key): 183 | state_dict[layer + '.weight'] = from_numpy(name_weights[key]['weight']) 184 | state_dict[layer + '.bias'] = from_numpy(name_weights[key]['bias']) 185 | 186 | def _set_bn(layer, key): 187 | state_dict[layer + '.running_var'] = from_numpy(name_weights[key]['running_var']) 188 | state_dict[layer + '.running_mean'] = from_numpy(name_weights[key]['running_mean']) 189 | state_dict[layer + '.weight'] = torch.ones_like(state_dict[layer + '.running_var']) 190 | state_dict[layer + '.bias'] = torch.zeros_like(state_dict[layer + '.running_var']) 191 | 192 | def _set_res(layer, n_or_a, index): 193 | _set_bn(layer+'.bn', n_or_a + 'bn' + str(index)) 194 | _set(layer+'.conv', n_or_a + 'conv' + str(index)) 195 | _set_bn(layer+'.bnr', n_or_a + 'bn' + str(index) + 'r') 196 | _set(layer+'.convr', n_or_a + 'conv' + str(index) + 'r') 197 | 198 | _set('conv1', 'conv1') 199 | _set_bn('bn1', 'bn1') 200 | _set('conv2', 'conv2') 201 | _set_bn('bn2', 'bn2') 202 | _set('conv3', 'conv3') 203 | _set_res('n_res1', 'n', 1) 204 | _set_res('n_res2', 'n', 2) 205 | _set_res('n_res3', 'n', 3) 206 | _set_res('n_res4', 'n', 4) 207 | _set_res('n_res5', 'n', 5) 208 | _set_bn('nbn6r', 'nbn6r') 209 | _set_deconv('nup6', 'nup6') 210 | _set('nconv6', 'nconv6') 211 | _set_bn('nbn6', 'nbn6') 212 | _set('nconv7', 'nconv7') 213 | _set_bn('nbn7', 'nbn7') 214 | _set('Nconv0', 'Nconv0') 215 | _set_res('a_res1', 'a', 1) 216 | _set_res('a_res2', 'a', 2) 217 | _set_res('a_res3', 'a', 3) 218 | _set_res('a_res4', 'a', 4) 219 | _set_res('a_res5', 'a', 5) 220 | _set_bn('abn6r', 'abn6r') 221 | _set_deconv('aup6', 'aup6') 222 | _set('aconv6', 'aconv6') 223 | _set_bn('abn6', 'abn6') 224 | _set('aconv7', 'aconv7') 225 | _set_bn('abn7', 'abn7') 226 | _set('Aconv0', 'Aconv0') 227 | _set('lconv1', 'lconv1') 228 | _set_bn('lbn1', 'lbn1') 229 | _set('fc_light', 'fc_light') 230 | self.load_state_dict(state_dict) 231 | 232 | 233 | if __name__ == '__main__': 234 | net = SfSNet() 235 | net.eval() 236 | 237 | print(len(list(net.named_parameters()))) 238 | for name, param in list(net.named_parameters()): 239 | print(name, param.size()) 240 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | from __future__ import absolute_import, division, print_function 3 | import cv2 4 | import numpy as np 5 | 6 | 7 | def _convert(src, max_value): 8 | # find min and max 9 | _min = np.min(src) 10 | _max = np.max(src) 11 | # scale to (0, max_value) 12 | dst = (src - _min) / (_max - _min + 1e-10) 13 | dst *= max_value 14 | return dst 15 | 16 | 17 | def convert(src, dtype=np.uint8, max_value=255.0): 18 | # type: (np.ndarray, object, float) -> np.ndarray 19 | # copy src 20 | dst = src.copy() 21 | if src.ndim == 2: 22 | dst = _convert(dst, max_value) 23 | elif src.ndim == 3: 24 | dst = cv2.cvtColor(dst, cv2.COLOR_BGR2LAB) 25 | light_channel = _convert(dst[0], max_value) 26 | dst[0, ...] = light_channel 27 | dst = cv2.cvtColor(dst, cv2.COLOR_LAB2BGR)*255 28 | else: 29 | raise RuntimeError("src/utils.py(30): src.ndim should be 2 or 3") 30 | return dst.astype(dtype) 31 | --------------------------------------------------------------------------------