├── MeanShift ├── MeanShift.exe ├── MeanShift.exp ├── MeanShift.ilk ├── MeanShift.lib ├── MeanShift.pdb ├── gdal111.dll ├── geos_c.dll ├── msvcp100d.dll ├── msvcr100d.dll ├── opencv_core2411d.dll ├── opencv_highgui2411d.dll ├── opencv_imgproc2411d.dll ├── opencv_objdetect2411d.dll └── proj.dll ├── README.md ├── data-url ├── fcn-improve ├── deploy.prototxt ├── solve.py ├── solver.prototxt ├── train.prototxt └── val.prototxt ├── infer-4s.py ├── mean.py ├── readme.txt ├── result.py ├── score.py └── voc_layers.py /MeanShift/MeanShift.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/MeanShift.exe -------------------------------------------------------------------------------- /MeanShift/MeanShift.exp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/MeanShift.exp -------------------------------------------------------------------------------- /MeanShift/MeanShift.ilk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/MeanShift.ilk -------------------------------------------------------------------------------- /MeanShift/MeanShift.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/MeanShift.lib -------------------------------------------------------------------------------- /MeanShift/MeanShift.pdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/MeanShift.pdb -------------------------------------------------------------------------------- /MeanShift/gdal111.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/gdal111.dll -------------------------------------------------------------------------------- /MeanShift/geos_c.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/geos_c.dll -------------------------------------------------------------------------------- /MeanShift/msvcp100d.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/msvcp100d.dll -------------------------------------------------------------------------------- /MeanShift/msvcr100d.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/msvcr100d.dll -------------------------------------------------------------------------------- /MeanShift/opencv_core2411d.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/opencv_core2411d.dll -------------------------------------------------------------------------------- /MeanShift/opencv_highgui2411d.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/opencv_highgui2411d.dll -------------------------------------------------------------------------------- /MeanShift/opencv_imgproc2411d.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/opencv_imgproc2411d.dll -------------------------------------------------------------------------------- /MeanShift/opencv_objdetect2411d.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/opencv_objdetect2411d.dll -------------------------------------------------------------------------------- /MeanShift/proj.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/proj.dll -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 基于全卷积神经网络的遥感影像分类试验 3 | 4 | 未来的工作将会继续沿着基于深度写的遥感影像分类工作继续下去,同时考虑转用Tensorflow/keras 机器学习框 5 | -------------------------------------------------------------------------------- /data-url: -------------------------------------------------------------------------------- 1 | 本文模型 :链接:http://pan.baidu.com/s/1kV7lkrl 密码:bt8f 2 | 3 | 本文数据集:链接:http://pan.baidu.com/s/1mhHIifQ 密码:yxzv 4 | -------------------------------------------------------------------------------- /fcn-improve/deploy.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "input" 3 | type: "Input" 4 | top: "data" 5 | input_param { 6 | # These dimensions are purely for sake of example; 7 | # see infer.py for how to reshape the net to the given input size. 8 | shape { dim: 1 dim: 3 dim: 512 dim: 512 } 9 | } 10 | } 11 | 12 | layer { 13 | name: "conv1_1" 14 | type: "Convolution" 15 | bottom: "data" 16 | top: "conv1_1" 17 | param { 18 | lr_mult: 1 19 | decay_mult: 1 20 | } 21 | param { 22 | lr_mult: 2 23 | decay_mult: 0 24 | } 25 | convolution_param { 26 | num_output: 64 27 | pad: 100 28 | kernel_size: 3 29 | stride: 1 30 | } 31 | } 32 | layer { 33 | name: "relu1_1" 34 | type: "ReLU" 35 | bottom: "conv1_1" 36 | top: "conv1_1" 37 | } 38 | layer { 39 | name: "conv1_2" 40 | type: "Convolution" 41 | bottom: "conv1_1" 42 | top: "conv1_2" 43 | param { 44 | lr_mult: 1 45 | decay_mult: 1 46 | } 47 | param { 48 | lr_mult: 2 49 | decay_mult: 0 50 | } 51 | convolution_param { 52 | num_output: 64 53 | pad: 1 54 | kernel_size: 3 55 | stride: 1 56 | } 57 | } 58 | layer { 59 | name: "relu1_2" 60 | type: "ReLU" 61 | bottom: "conv1_2" 62 | top: "conv1_2" 63 | } 64 | layer { 65 | name: "pool1" 66 | type: "Pooling" 67 | bottom: "conv1_2" 68 | top: "pool1" 69 | pooling_param { 70 | pool: MAX 71 | kernel_size: 2 72 | stride: 2 73 | } 74 | } 75 | layer { 76 | name: "conv2_1" 77 | type: "Convolution" 78 | bottom: "pool1" 79 | top: "conv2_1" 80 | param { 81 | lr_mult: 1 82 | decay_mult: 1 83 | } 84 | param { 85 | lr_mult: 2 86 | decay_mult: 0 87 | } 88 | convolution_param { 89 | num_output: 128 90 | pad: 1 91 | kernel_size: 3 92 | stride: 1 93 | } 94 | } 95 | layer { 96 | name: "relu2_1" 97 | type: "ReLU" 98 | bottom: "conv2_1" 99 | top: "conv2_1" 100 | } 101 | layer { 102 | name: "conv2_2" 103 | type: "Convolution" 104 | bottom: "conv2_1" 105 | top: "conv2_2" 106 | param { 107 | lr_mult: 1 108 | decay_mult: 1 109 | } 110 | param { 111 | lr_mult: 2 112 | decay_mult: 0 113 | } 114 | convolution_param { 115 | num_output: 128 116 | pad: 1 117 | kernel_size: 3 118 | stride: 1 119 | } 120 | } 121 | layer { 122 | name: "relu2_2" 123 | type: "ReLU" 124 | bottom: "conv2_2" 125 | top: "conv2_2" 126 | } 127 | layer { 128 | name: "pool2" 129 | type: "Pooling" 130 | bottom: "conv2_2" 131 | top: "pool2" 132 | pooling_param { 133 | pool: MAX 134 | kernel_size: 2 135 | stride: 2 136 | } 137 | } 138 | layer { 139 | name: "conv3_1" 140 | type: "Convolution" 141 | bottom: "pool2" 142 | top: "conv3_1" 143 | param { 144 | lr_mult: 1 145 | decay_mult: 1 146 | } 147 | param { 148 | lr_mult: 2 149 | decay_mult: 0 150 | } 151 | convolution_param { 152 | num_output: 256 153 | pad: 1 154 | kernel_size: 3 155 | stride: 1 156 | } 157 | } 158 | layer { 159 | name: "relu3_1" 160 | type: "ReLU" 161 | bottom: "conv3_1" 162 | top: "conv3_1" 163 | } 164 | layer { 165 | name: "conv3_2" 166 | type: "Convolution" 167 | bottom: "conv3_1" 168 | top: "conv3_2" 169 | param { 170 | lr_mult: 1 171 | decay_mult: 1 172 | } 173 | param { 174 | lr_mult: 2 175 | decay_mult: 0 176 | } 177 | convolution_param { 178 | num_output: 256 179 | pad: 1 180 | kernel_size: 3 181 | stride: 1 182 | } 183 | } 184 | layer { 185 | name: "relu3_2" 186 | type: "ReLU" 187 | bottom: "conv3_2" 188 | top: "conv3_2" 189 | } 190 | layer { 191 | name: "conv3_3" 192 | type: "Convolution" 193 | bottom: "conv3_2" 194 | top: "conv3_3" 195 | param { 196 | lr_mult: 1 197 | decay_mult: 1 198 | } 199 | param { 200 | lr_mult: 2 201 | decay_mult: 0 202 | } 203 | convolution_param { 204 | num_output: 256 205 | pad: 1 206 | kernel_size: 3 207 | stride: 1 208 | } 209 | } 210 | layer { 211 | name: "relu3_3" 212 | type: "ReLU" 213 | bottom: "conv3_3" 214 | top: "conv3_3" 215 | } 216 | layer { 217 | name: "pool3" 218 | type: "Pooling" 219 | bottom: "conv3_3" 220 | top: "pool3" 221 | pooling_param { 222 | pool: MAX 223 | kernel_size: 2 224 | stride: 2 225 | } 226 | } 227 | layer { 228 | name: "conv4_1" 229 | type: "Convolution" 230 | bottom: "pool3" 231 | top: "conv4_1" 232 | param { 233 | lr_mult: 1 234 | decay_mult: 1 235 | } 236 | param { 237 | lr_mult: 2 238 | decay_mult: 0 239 | } 240 | convolution_param { 241 | num_output: 512 242 | pad: 1 243 | kernel_size: 3 244 | stride: 1 245 | } 246 | } 247 | layer { 248 | name: "relu4_1" 249 | type: "ReLU" 250 | bottom: "conv4_1" 251 | top: "conv4_1" 252 | } 253 | layer { 254 | name: "conv4_2" 255 | type: "Convolution" 256 | bottom: "conv4_1" 257 | top: "conv4_2" 258 | param { 259 | lr_mult: 1 260 | decay_mult: 1 261 | } 262 | param { 263 | lr_mult: 2 264 | decay_mult: 0 265 | } 266 | convolution_param { 267 | num_output: 512 268 | pad: 1 269 | kernel_size: 3 270 | stride: 1 271 | } 272 | } 273 | layer { 274 | name: "relu4_2" 275 | type: "ReLU" 276 | bottom: "conv4_2" 277 | top: "conv4_2" 278 | } 279 | layer { 280 | name: "conv4_3" 281 | type: "Convolution" 282 | bottom: "conv4_2" 283 | top: "conv4_3" 284 | param { 285 | lr_mult: 1 286 | decay_mult: 1 287 | } 288 | param { 289 | lr_mult: 2 290 | decay_mult: 0 291 | } 292 | convolution_param { 293 | num_output: 512 294 | pad: 1 295 | kernel_size: 3 296 | stride: 1 297 | } 298 | } 299 | layer { 300 | name: "relu4_3" 301 | type: "ReLU" 302 | bottom: "conv4_3" 303 | top: "conv4_3" 304 | } 305 | layer { 306 | name: "pool4" 307 | type: "Pooling" 308 | bottom: "conv4_3" 309 | top: "pool4" 310 | pooling_param { 311 | pool: MAX 312 | kernel_size: 2 313 | stride: 2 314 | } 315 | } 316 | layer { 317 | name: "conv5_1" 318 | type: "Convolution" 319 | bottom: "pool4" 320 | top: "conv5_1" 321 | param { 322 | lr_mult: 1 323 | decay_mult: 1 324 | } 325 | param { 326 | lr_mult: 2 327 | decay_mult: 0 328 | } 329 | convolution_param { 330 | num_output: 512 331 | pad: 1 332 | kernel_size: 3 333 | stride: 1 334 | } 335 | } 336 | layer { 337 | name: "relu5_1" 338 | type: "ReLU" 339 | bottom: "conv5_1" 340 | top: "conv5_1" 341 | } 342 | layer { 343 | name: "conv5_2" 344 | type: "Convolution" 345 | bottom: "conv5_1" 346 | top: "conv5_2" 347 | param { 348 | lr_mult: 1 349 | decay_mult: 1 350 | } 351 | param { 352 | lr_mult: 2 353 | decay_mult: 0 354 | } 355 | convolution_param { 356 | num_output: 512 357 | pad: 1 358 | kernel_size: 3 359 | stride: 1 360 | } 361 | } 362 | layer { 363 | name: "relu5_2" 364 | type: "ReLU" 365 | bottom: "conv5_2" 366 | top: "conv5_2" 367 | } 368 | layer { 369 | name: "conv5_3" 370 | type: "Convolution" 371 | bottom: "conv5_2" 372 | top: "conv5_3" 373 | param { 374 | lr_mult: 1 375 | decay_mult: 1 376 | } 377 | param { 378 | lr_mult: 2 379 | decay_mult: 0 380 | } 381 | convolution_param { 382 | num_output: 512 383 | pad: 1 384 | kernel_size: 3 385 | stride: 1 386 | } 387 | } 388 | layer { 389 | name: "relu5_3" 390 | type: "ReLU" 391 | bottom: "conv5_3" 392 | top: "conv5_3" 393 | } 394 | layer { 395 | name: "pool5" 396 | type: "Pooling" 397 | bottom: "conv5_3" 398 | top: "pool5" 399 | pooling_param { 400 | pool: MAX 401 | kernel_size: 2 402 | stride: 2 403 | } 404 | } 405 | layer { 406 | name: "fc6" 407 | type: "Convolution" 408 | bottom: "pool5" 409 | top: "fc6" 410 | param { 411 | lr_mult: 1 412 | decay_mult: 1 413 | } 414 | param { 415 | lr_mult: 2 416 | decay_mult: 0 417 | } 418 | convolution_param { 419 | num_output: 4096 420 | pad: 0 421 | kernel_size: 7 422 | stride: 1 423 | } 424 | } 425 | layer { 426 | name: "relu6" 427 | type: "ReLU" 428 | bottom: "fc6" 429 | top: "fc6" 430 | } 431 | layer { 432 | name: "drop6" 433 | type: "Dropout" 434 | bottom: "fc6" 435 | top: "fc6" 436 | dropout_param { 437 | dropout_ratio: 0.5 438 | } 439 | } 440 | layer { 441 | name: "fc7" 442 | type: "Convolution" 443 | bottom: "fc6" 444 | top: "fc7" 445 | param { 446 | lr_mult: 1 447 | decay_mult: 1 448 | } 449 | param { 450 | lr_mult: 2 451 | decay_mult: 0 452 | } 453 | convolution_param { 454 | num_output: 4096 455 | pad: 0 456 | kernel_size: 1 457 | stride: 1 458 | } 459 | } 460 | layer { 461 | name: "relu7" 462 | type: "ReLU" 463 | bottom: "fc7" 464 | top: "fc7" 465 | } 466 | layer { 467 | name: "drop7" 468 | type: "Dropout" 469 | bottom: "fc7" 470 | top: "fc7" 471 | dropout_param { 472 | dropout_ratio: 0.5 473 | } 474 | } 475 | layer { 476 | name: "score_fr" 477 | type: "Convolution" 478 | bottom: "fc7" 479 | top: "score_fr" 480 | param { 481 | lr_mult: 1 482 | decay_mult: 1 483 | } 484 | param { 485 | lr_mult: 2 486 | decay_mult: 0 487 | } 488 | convolution_param { 489 | num_output: 11 490 | pad: 0 491 | kernel_size: 1 492 | } 493 | } 494 | layer { 495 | name: "upscore2" 496 | type: "Deconvolution" 497 | bottom: "score_fr" 498 | top: "upscore2" 499 | param { 500 | lr_mult: 0 501 | } 502 | convolution_param { 503 | num_output: 11 504 | bias_term: false 505 | kernel_size: 4 506 | stride: 2 507 | } 508 | } 509 | layer { 510 | name: "scale_pool4" 511 | type: "Scale" 512 | bottom: "pool4" 513 | top: "scale_pool4" 514 | param { 515 | lr_mult: 0 516 | } 517 | scale_param { 518 | filler { 519 | type: "constant" 520 | value: 0.01 521 | } 522 | } 523 | } 524 | layer { 525 | name: "score_pool4" 526 | type: "Convolution" 527 | bottom: "scale_pool4" 528 | top: "score_pool4" 529 | param { 530 | lr_mult: 1 531 | decay_mult: 1 532 | } 533 | param { 534 | lr_mult: 2 535 | decay_mult: 0 536 | } 537 | convolution_param { 538 | num_output: 11 539 | pad: 0 540 | kernel_size: 1 541 | } 542 | } 543 | layer { 544 | name: "score_pool4c" 545 | type: "Crop" 546 | bottom: "score_pool4" 547 | bottom: "upscore2" 548 | top: "score_pool4c" 549 | crop_param { 550 | axis: 2 551 | offset: 5 552 | } 553 | } 554 | layer { 555 | name: "fuse_pool4" 556 | type: "Eltwise" 557 | bottom: "upscore2" 558 | bottom: "score_pool4c" 559 | top: "fuse_pool4" 560 | eltwise_param { 561 | operation: SUM 562 | } 563 | } 564 | layer { 565 | name: "upscore_pool4" 566 | type: "Deconvolution" 567 | bottom: "fuse_pool4" 568 | top: "upscore_pool4" 569 | param { 570 | lr_mult: 0 571 | } 572 | convolution_param { 573 | num_output: 11 574 | bias_term: false 575 | kernel_size: 4 576 | stride: 2 577 | } 578 | } 579 | layer { 580 | name: "scale_pool3" 581 | type: "Scale" 582 | bottom: "pool3" 583 | top: "scale_pool3" 584 | param { 585 | lr_mult: 0 586 | } 587 | scale_param { 588 | filler { 589 | type: "constant" 590 | value: 0.0001 591 | } 592 | } 593 | } 594 | layer { 595 | name: "score_pool3" 596 | type: "Convolution" 597 | bottom: "scale_pool3" 598 | top: "score_pool3" 599 | param { 600 | lr_mult: 1 601 | decay_mult: 1 602 | } 603 | param { 604 | lr_mult: 2 605 | decay_mult: 0 606 | } 607 | convolution_param { 608 | num_output: 11 609 | pad: 0 610 | kernel_size: 1 611 | } 612 | } 613 | layer { 614 | name: "score_pool3c" 615 | type: "Crop" 616 | bottom: "score_pool3" 617 | bottom: "upscore_pool4" 618 | top: "score_pool3c" 619 | crop_param { 620 | axis: 2 621 | offset: 9 622 | } 623 | } 624 | layer { 625 | name: "fuse_pool3" 626 | type: "Eltwise" 627 | bottom: "upscore_pool4" 628 | bottom: "score_pool3c" 629 | top: "fuse_pool3" 630 | eltwise_param { 631 | operation: SUM 632 | } 633 | } 634 | layer { 635 | name: "upscore_pool3" 636 | type: "Deconvolution" 637 | bottom: "fuse_pool3" 638 | top: "upscore_pool3" 639 | param { 640 | lr_mult: 0 641 | } 642 | convolution_param { 643 | num_output: 11 644 | bias_term: false 645 | kernel_size: 4 646 | stride: 2 647 | } 648 | } 649 | 650 | layer { 651 | name: "scale_pool2" 652 | type: "Scale" 653 | bottom: "pool2" 654 | top: "scale_pool2" 655 | param { 656 | lr_mult: 0 657 | } 658 | scale_param { 659 | filler { 660 | type: "constant" 661 | value: 0.0001 662 | } 663 | } 664 | } 665 | layer { 666 | name: "score_pool2" 667 | type: "Convolution" 668 | bottom: "scale_pool2" 669 | top: "score_pool2" 670 | param { 671 | lr_mult: 1 672 | decay_mult: 1 673 | } 674 | param { 675 | lr_mult: 2 676 | decay_mult: 0 677 | } 678 | convolution_param { 679 | num_output: 11 680 | pad: 0 681 | kernel_size: 1 682 | } 683 | } 684 | layer { 685 | name: "score_pool2c" 686 | type: "Crop" 687 | bottom: "score_pool2" 688 | bottom: "upscore_pool3" 689 | top: "score_pool2c" 690 | crop_param { 691 | axis: 2 692 | offset: 9 693 | } 694 | } 695 | layer { 696 | name: "fuse_pool2" 697 | type: "Eltwise" 698 | bottom: "upscore_pool3" 699 | bottom: "score_pool2c" 700 | top: "fuse_pool2" 701 | eltwise_param { 702 | operation: SUM 703 | } 704 | } 705 | layer { 706 | name: "upscore4" 707 | type: "Deconvolution" 708 | bottom: "fuse_pool2" 709 | top: "upscore4" 710 | param { 711 | lr_mult: 0 712 | } 713 | convolution_param { 714 | num_output: 11 715 | bias_term: false 716 | kernel_size: 8 717 | stride: 4 718 | } 719 | } 720 | 721 | layer { 722 | name: "score" 723 | type: "Crop" 724 | bottom: "upscore4" 725 | bottom: "data" 726 | top: "score" 727 | crop_param { 728 | axis: 2 729 | offset: 31 730 | } 731 | } 732 | 733 | layer { 734 | name: "softmax_score" 735 | type: "Softmax" 736 | bottom: "score" 737 | top: "softmax_score" 738 | } -------------------------------------------------------------------------------- /fcn-improve/solve.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append('./python') 4 | sys.path.append('./python/caffe') 5 | 6 | sys.path.append('/home/sensetime/DeepLearning/test/fcn-pool') 7 | 8 | 9 | import caffe 10 | import surgery, score 11 | 12 | import numpy as np 13 | import os 14 | 15 | try: 16 | import setproctitle 17 | setproctitle.setproctitle(os.path.basename(os.getcwd())) 18 | except: 19 | pass 20 | 21 | weights = '../fcn-pool/snapshot/fcn8s-heavy-pascal.caffemodel' 22 | 23 | # init 24 | caffe.set_device(0) 25 | caffe.set_mode_gpu() 26 | 27 | solver = caffe.SGDSolver('../fcn-pool/fcn-improve/solver.prototxt') 28 | solver.net.copy_from(weights) 29 | 30 | # surgeries 31 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k] 32 | surgery.interp(solver.net, interp_layers) 33 | 34 | # scoring 35 | val = np.loadtxt('../fcn-pool/data/fcn-gf2-512-1024/valdata/val.txt', dtype=str) 36 | 37 | for _ in range(10): 38 | solver.step(2000) 39 | score.seg_tests(solver, False, val, layer='score') 40 | -------------------------------------------------------------------------------- /fcn-improve/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "../fcn-pool/fcn-improve/train.prototxt" 2 | test_net: "../fcn-pool/fcn-improve/val.prototxt" 3 | test_iter: 847 4 | # make test net, but don't invoke it from the solver itself 5 | test_interval: 999999999 6 | display: 20 7 | average_loss: 20 8 | lr_policy: "fixed" 9 | # lr for unnormalized softmax 10 | base_lr: 1e-13 11 | # high momentum 12 | momentum: 0.99 13 | # no gradient accumulation 14 | iter_size: 1 15 | max_iter: 20000 16 | weight_decay: 0.0005 17 | snapshot: 5000 18 | snapshot_prefix: "../fcn-pool/fcn-improve/snapshot/train" 19 | test_initialization: false 20 | -------------------------------------------------------------------------------- /fcn-improve/train.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "data" 3 | type: "Python" 4 | top: "data" 5 | top: "label" 6 | python_param { 7 | module: "voc_layers" 8 | layer: "SBDDSegDataLayer" 9 | param_str: "{\'sbdd_dir\': \'../fcn-pool/data/fcn-gf2-512-1024/traindata\', \'seed\': 1337, \'split\': \'train\', \'mean\': (109.39446,105.91758,89.61112)}" 10 | } 11 | } 12 | 13 | layer { 14 | name: "conv1_1" 15 | type: "Convolution" 16 | bottom: "data" 17 | top: "conv1_1" 18 | param { 19 | lr_mult: 1 20 | decay_mult: 1 21 | } 22 | param { 23 | lr_mult: 2 24 | decay_mult: 0 25 | } 26 | convolution_param { 27 | num_output: 64 28 | pad: 100 29 | kernel_size: 3 30 | stride: 1 31 | } 32 | } 33 | layer { 34 | name: "relu1_1" 35 | type: "ReLU" 36 | bottom: "conv1_1" 37 | top: "conv1_1" 38 | } 39 | layer { 40 | name: "conv1_2" 41 | type: "Convolution" 42 | bottom: "conv1_1" 43 | top: "conv1_2" 44 | param { 45 | lr_mult: 1 46 | decay_mult: 1 47 | } 48 | param { 49 | lr_mult: 2 50 | decay_mult: 0 51 | } 52 | convolution_param { 53 | num_output: 64 54 | pad: 1 55 | kernel_size: 3 56 | stride: 1 57 | } 58 | } 59 | layer { 60 | name: "relu1_2" 61 | type: "ReLU" 62 | bottom: "conv1_2" 63 | top: "conv1_2" 64 | } 65 | layer { 66 | name: "pool1" 67 | type: "Pooling" 68 | bottom: "conv1_2" 69 | top: "pool1" 70 | pooling_param { 71 | pool: MAX 72 | kernel_size: 2 73 | stride: 2 74 | } 75 | } 76 | layer { 77 | name: "conv2_1" 78 | type: "Convolution" 79 | bottom: "pool1" 80 | top: "conv2_1" 81 | param { 82 | lr_mult: 1 83 | decay_mult: 1 84 | } 85 | param { 86 | lr_mult: 2 87 | decay_mult: 0 88 | } 89 | convolution_param { 90 | num_output: 128 91 | pad: 1 92 | kernel_size: 3 93 | stride: 1 94 | } 95 | } 96 | layer { 97 | name: "relu2_1" 98 | type: "ReLU" 99 | bottom: "conv2_1" 100 | top: "conv2_1" 101 | } 102 | layer { 103 | name: "conv2_2" 104 | type: "Convolution" 105 | bottom: "conv2_1" 106 | top: "conv2_2" 107 | param { 108 | lr_mult: 1 109 | decay_mult: 1 110 | } 111 | param { 112 | lr_mult: 2 113 | decay_mult: 0 114 | } 115 | convolution_param { 116 | num_output: 128 117 | pad: 1 118 | kernel_size: 3 119 | stride: 1 120 | } 121 | } 122 | layer { 123 | name: "relu2_2" 124 | type: "ReLU" 125 | bottom: "conv2_2" 126 | top: "conv2_2" 127 | } 128 | layer { 129 | name: "pool2" 130 | type: "Pooling" 131 | bottom: "conv2_2" 132 | top: "pool2" 133 | pooling_param { 134 | pool: MAX 135 | kernel_size: 2 136 | stride: 2 137 | } 138 | } 139 | layer { 140 | name: "conv3_1" 141 | type: "Convolution" 142 | bottom: "pool2" 143 | top: "conv3_1" 144 | param { 145 | lr_mult: 1 146 | decay_mult: 1 147 | } 148 | param { 149 | lr_mult: 2 150 | decay_mult: 0 151 | } 152 | convolution_param { 153 | num_output: 256 154 | pad: 1 155 | kernel_size: 3 156 | stride: 1 157 | } 158 | } 159 | layer { 160 | name: "relu3_1" 161 | type: "ReLU" 162 | bottom: "conv3_1" 163 | top: "conv3_1" 164 | } 165 | layer { 166 | name: "conv3_2" 167 | type: "Convolution" 168 | bottom: "conv3_1" 169 | top: "conv3_2" 170 | param { 171 | lr_mult: 1 172 | decay_mult: 1 173 | } 174 | param { 175 | lr_mult: 2 176 | decay_mult: 0 177 | } 178 | convolution_param { 179 | num_output: 256 180 | pad: 1 181 | kernel_size: 3 182 | stride: 1 183 | } 184 | } 185 | layer { 186 | name: "relu3_2" 187 | type: "ReLU" 188 | bottom: "conv3_2" 189 | top: "conv3_2" 190 | } 191 | layer { 192 | name: "conv3_3" 193 | type: "Convolution" 194 | bottom: "conv3_2" 195 | top: "conv3_3" 196 | param { 197 | lr_mult: 1 198 | decay_mult: 1 199 | } 200 | param { 201 | lr_mult: 2 202 | decay_mult: 0 203 | } 204 | convolution_param { 205 | num_output: 256 206 | pad: 1 207 | kernel_size: 3 208 | stride: 1 209 | } 210 | } 211 | layer { 212 | name: "relu3_3" 213 | type: "ReLU" 214 | bottom: "conv3_3" 215 | top: "conv3_3" 216 | } 217 | layer { 218 | name: "pool3" 219 | type: "Pooling" 220 | bottom: "conv3_3" 221 | top: "pool3" 222 | pooling_param { 223 | pool: MAX 224 | kernel_size: 2 225 | stride: 2 226 | } 227 | } 228 | layer { 229 | name: "conv4_1" 230 | type: "Convolution" 231 | bottom: "pool3" 232 | top: "conv4_1" 233 | param { 234 | lr_mult: 1 235 | decay_mult: 1 236 | } 237 | param { 238 | lr_mult: 2 239 | decay_mult: 0 240 | } 241 | convolution_param { 242 | num_output: 512 243 | pad: 1 244 | kernel_size: 3 245 | stride: 1 246 | } 247 | } 248 | layer { 249 | name: "relu4_1" 250 | type: "ReLU" 251 | bottom: "conv4_1" 252 | top: "conv4_1" 253 | } 254 | layer { 255 | name: "conv4_2" 256 | type: "Convolution" 257 | bottom: "conv4_1" 258 | top: "conv4_2" 259 | param { 260 | lr_mult: 1 261 | decay_mult: 1 262 | } 263 | param { 264 | lr_mult: 2 265 | decay_mult: 0 266 | } 267 | convolution_param { 268 | num_output: 512 269 | pad: 1 270 | kernel_size: 3 271 | stride: 1 272 | } 273 | } 274 | layer { 275 | name: "relu4_2" 276 | type: "ReLU" 277 | bottom: "conv4_2" 278 | top: "conv4_2" 279 | } 280 | layer { 281 | name: "conv4_3" 282 | type: "Convolution" 283 | bottom: "conv4_2" 284 | top: "conv4_3" 285 | param { 286 | lr_mult: 1 287 | decay_mult: 1 288 | } 289 | param { 290 | lr_mult: 2 291 | decay_mult: 0 292 | } 293 | convolution_param { 294 | num_output: 512 295 | pad: 1 296 | kernel_size: 3 297 | stride: 1 298 | } 299 | } 300 | layer { 301 | name: "relu4_3" 302 | type: "ReLU" 303 | bottom: "conv4_3" 304 | top: "conv4_3" 305 | } 306 | layer { 307 | name: "pool4" 308 | type: "Pooling" 309 | bottom: "conv4_3" 310 | top: "pool4" 311 | pooling_param { 312 | pool: MAX 313 | kernel_size: 2 314 | stride: 2 315 | } 316 | } 317 | layer { 318 | name: "conv5_1" 319 | type: "Convolution" 320 | bottom: "pool4" 321 | top: "conv5_1" 322 | param { 323 | lr_mult: 1 324 | decay_mult: 1 325 | } 326 | param { 327 | lr_mult: 2 328 | decay_mult: 0 329 | } 330 | convolution_param { 331 | num_output: 512 332 | pad: 1 333 | kernel_size: 3 334 | stride: 1 335 | } 336 | } 337 | layer { 338 | name: "relu5_1" 339 | type: "ReLU" 340 | bottom: "conv5_1" 341 | top: "conv5_1" 342 | } 343 | layer { 344 | name: "conv5_2" 345 | type: "Convolution" 346 | bottom: "conv5_1" 347 | top: "conv5_2" 348 | param { 349 | lr_mult: 1 350 | decay_mult: 1 351 | } 352 | param { 353 | lr_mult: 2 354 | decay_mult: 0 355 | } 356 | convolution_param { 357 | num_output: 512 358 | pad: 1 359 | kernel_size: 3 360 | stride: 1 361 | } 362 | } 363 | layer { 364 | name: "relu5_2" 365 | type: "ReLU" 366 | bottom: "conv5_2" 367 | top: "conv5_2" 368 | } 369 | layer { 370 | name: "conv5_3" 371 | type: "Convolution" 372 | bottom: "conv5_2" 373 | top: "conv5_3" 374 | param { 375 | lr_mult: 1 376 | decay_mult: 1 377 | } 378 | param { 379 | lr_mult: 2 380 | decay_mult: 0 381 | } 382 | convolution_param { 383 | num_output: 512 384 | pad: 1 385 | kernel_size: 3 386 | stride: 1 387 | } 388 | } 389 | layer { 390 | name: "relu5_3" 391 | type: "ReLU" 392 | bottom: "conv5_3" 393 | top: "conv5_3" 394 | } 395 | layer { 396 | name: "pool5" 397 | type: "Pooling" 398 | bottom: "conv5_3" 399 | top: "pool5" 400 | pooling_param { 401 | pool: MAX 402 | kernel_size: 2 403 | stride: 2 404 | } 405 | } 406 | layer { 407 | name: "fc6" 408 | type: "Convolution" 409 | bottom: "pool5" 410 | top: "fc6" 411 | param { 412 | lr_mult: 1 413 | decay_mult: 1 414 | } 415 | param { 416 | lr_mult: 2 417 | decay_mult: 0 418 | } 419 | convolution_param { 420 | num_output: 4096 421 | pad: 0 422 | kernel_size: 7 423 | stride: 1 424 | } 425 | } 426 | layer { 427 | name: "relu6" 428 | type: "ReLU" 429 | bottom: "fc6" 430 | top: "fc6" 431 | } 432 | layer { 433 | name: "drop6" 434 | type: "Dropout" 435 | bottom: "fc6" 436 | top: "fc6" 437 | dropout_param { 438 | dropout_ratio: 0.5 439 | } 440 | } 441 | layer { 442 | name: "fc7" 443 | type: "Convolution" 444 | bottom: "fc6" 445 | top: "fc7" 446 | param { 447 | lr_mult: 1 448 | decay_mult: 1 449 | } 450 | param { 451 | lr_mult: 2 452 | decay_mult: 0 453 | } 454 | convolution_param { 455 | num_output: 4096 456 | pad: 0 457 | kernel_size: 1 458 | stride: 1 459 | } 460 | } 461 | layer { 462 | name: "relu7" 463 | type: "ReLU" 464 | bottom: "fc7" 465 | top: "fc7" 466 | } 467 | layer { 468 | name: "drop7" 469 | type: "Dropout" 470 | bottom: "fc7" 471 | top: "fc7" 472 | dropout_param { 473 | dropout_ratio: 0.5 474 | } 475 | } 476 | layer { 477 | name: "score_fr" 478 | type: "Convolution" 479 | bottom: "fc7" 480 | top: "score_fr" 481 | param { 482 | lr_mult: 1 483 | decay_mult: 1 484 | } 485 | param { 486 | lr_mult: 2 487 | decay_mult: 0 488 | } 489 | convolution_param { 490 | num_output: 11 491 | pad: 0 492 | kernel_size: 1 493 | } 494 | } 495 | layer { 496 | name: "upscore2" 497 | type: "Deconvolution" 498 | bottom: "score_fr" 499 | top: "upscore2" 500 | param { 501 | lr_mult: 0 502 | } 503 | convolution_param { 504 | num_output: 11 505 | bias_term: false 506 | kernel_size: 4 507 | stride: 2 508 | } 509 | } 510 | layer { 511 | name: "scale_pool4" 512 | type: "Scale" 513 | bottom: "pool4" 514 | top: "scale_pool4" 515 | param { 516 | lr_mult: 0 517 | } 518 | scale_param { 519 | filler { 520 | type: "constant" 521 | value: 0.01 522 | } 523 | } 524 | } 525 | layer { 526 | name: "score_pool4" 527 | type: "Convolution" 528 | bottom: "scale_pool4" 529 | top: "score_pool4" 530 | param { 531 | lr_mult: 1 532 | decay_mult: 1 533 | } 534 | param { 535 | lr_mult: 2 536 | decay_mult: 0 537 | } 538 | convolution_param { 539 | num_output: 11 540 | pad: 0 541 | kernel_size: 1 542 | } 543 | } 544 | layer { 545 | name: "score_pool4c" 546 | type: "Crop" 547 | bottom: "score_pool4" 548 | bottom: "upscore2" 549 | top: "score_pool4c" 550 | crop_param { 551 | axis: 2 552 | offset: 5 553 | } 554 | } 555 | layer { 556 | name: "fuse_pool4" 557 | type: "Eltwise" 558 | bottom: "upscore2" 559 | bottom: "score_pool4c" 560 | top: "fuse_pool4" 561 | eltwise_param { 562 | operation: SUM 563 | } 564 | } 565 | layer { 566 | name: "upscore_pool4" 567 | type: "Deconvolution" 568 | bottom: "fuse_pool4" 569 | top: "upscore_pool4" 570 | param { 571 | lr_mult: 0 572 | } 573 | convolution_param { 574 | num_output: 11 575 | bias_term: false 576 | kernel_size: 4 577 | stride: 2 578 | } 579 | } 580 | layer { 581 | name: "scale_pool3" 582 | type: "Scale" 583 | bottom: "pool3" 584 | top: "scale_pool3" 585 | param { 586 | lr_mult: 0 587 | } 588 | scale_param { 589 | filler { 590 | type: "constant" 591 | value: 0.0001 592 | } 593 | } 594 | } 595 | layer { 596 | name: "score_pool3" 597 | type: "Convolution" 598 | bottom: "scale_pool3" 599 | top: "score_pool3" 600 | param { 601 | lr_mult: 1 602 | decay_mult: 1 603 | } 604 | param { 605 | lr_mult: 2 606 | decay_mult: 0 607 | } 608 | convolution_param { 609 | num_output: 11 610 | pad: 0 611 | kernel_size: 1 612 | } 613 | } 614 | layer { 615 | name: "score_pool3c" 616 | type: "Crop" 617 | bottom: "score_pool3" 618 | bottom: "upscore_pool4" 619 | top: "score_pool3c" 620 | crop_param { 621 | axis: 2 622 | offset: 9 623 | } 624 | } 625 | layer { 626 | name: "fuse_pool3" 627 | type: "Eltwise" 628 | bottom: "upscore_pool4" 629 | bottom: "score_pool3c" 630 | top: "fuse_pool3" 631 | eltwise_param { 632 | operation: SUM 633 | } 634 | } 635 | layer { 636 | name: "upscore_pool3" 637 | type: "Deconvolution" 638 | bottom: "fuse_pool3" 639 | top: "upscore_pool3" 640 | param { 641 | lr_mult: 0 642 | } 643 | convolution_param { 644 | num_output: 11 645 | bias_term: false 646 | kernel_size: 4 647 | stride: 2 648 | } 649 | } 650 | 651 | layer { 652 | name: "scale_pool2" 653 | type: "Scale" 654 | bottom: "pool2" 655 | top: "scale_pool2" 656 | param { 657 | lr_mult: 0 658 | } 659 | scale_param { 660 | filler { 661 | type: "constant" 662 | value: 0.0001 663 | } 664 | } 665 | } 666 | layer { 667 | name: "score_pool2" 668 | type: "Convolution" 669 | bottom: "scale_pool2" 670 | top: "score_pool2" 671 | param { 672 | lr_mult: 1 673 | decay_mult: 1 674 | } 675 | param { 676 | lr_mult: 2 677 | decay_mult: 0 678 | } 679 | convolution_param { 680 | num_output: 11 681 | pad: 0 682 | kernel_size: 1 683 | } 684 | } 685 | layer { 686 | name: "score_pool2c" 687 | type: "Crop" 688 | bottom: "score_pool2" 689 | bottom: "upscore_pool3" 690 | top: "score_pool2c" 691 | crop_param { 692 | axis: 2 693 | offset: 15 694 | } 695 | } 696 | layer { 697 | name: "fuse_pool2" 698 | type: "Eltwise" 699 | bottom: "upscore_pool3" 700 | bottom: "score_pool2c" 701 | top: "fuse_pool2" 702 | eltwise_param { 703 | operation: SUM 704 | } 705 | } 706 | layer { 707 | name: "upscore4" 708 | type: "Deconvolution" 709 | bottom: "fuse_pool2" 710 | top: "upscore4" 711 | param { 712 | lr_mult: 0 713 | } 714 | convolution_param { 715 | num_output: 11 716 | bias_term: false 717 | kernel_size: 8 718 | stride: 4 719 | } 720 | } 721 | 722 | 723 | layer { 724 | name: "score" 725 | type: "Crop" 726 | bottom: "upscore4" 727 | bottom: "data" 728 | top: "score" 729 | crop_param { 730 | axis: 2 731 | offset: 31 732 | } 733 | } 734 | 735 | layer { 736 | name: "loss" 737 | type: "SoftmaxWithLoss" 738 | bottom: "score" 739 | bottom: "label" 740 | top: "loss" 741 | loss_param { 742 | ignore_label: 255 743 | normalize: false 744 | } 745 | } 746 | -------------------------------------------------------------------------------- /fcn-improve/val.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "data" 3 | type: "Python" 4 | top: "data" 5 | top: "label" 6 | python_param { 7 | module: "voc_layers" 8 | layer: "VOCSegDataLayer" 9 | param_str: "{\'voc_dir\': \'../fcn-pool/data/fcn-gf2-512-1024/valdata\', \'seed\': 1337, \'split\': \'val\', \'mean\': (109.39446,105.91758,89.61112)}" 10 | } 11 | } 12 | 13 | layer { 14 | name: "conv1_1" 15 | type: "Convolution" 16 | bottom: "data" 17 | top: "conv1_1" 18 | param { 19 | lr_mult: 1 20 | decay_mult: 1 21 | } 22 | param { 23 | lr_mult: 2 24 | decay_mult: 0 25 | } 26 | convolution_param { 27 | num_output: 64 28 | pad: 100 29 | kernel_size: 3 30 | stride: 1 31 | } 32 | } 33 | layer { 34 | name: "relu1_1" 35 | type: "ReLU" 36 | bottom: "conv1_1" 37 | top: "conv1_1" 38 | } 39 | layer { 40 | name: "conv1_2" 41 | type: "Convolution" 42 | bottom: "conv1_1" 43 | top: "conv1_2" 44 | param { 45 | lr_mult: 1 46 | decay_mult: 1 47 | } 48 | param { 49 | lr_mult: 2 50 | decay_mult: 0 51 | } 52 | convolution_param { 53 | num_output: 64 54 | pad: 1 55 | kernel_size: 3 56 | stride: 1 57 | } 58 | } 59 | layer { 60 | name: "relu1_2" 61 | type: "ReLU" 62 | bottom: "conv1_2" 63 | top: "conv1_2" 64 | } 65 | layer { 66 | name: "pool1" 67 | type: "Pooling" 68 | bottom: "conv1_2" 69 | top: "pool1" 70 | pooling_param { 71 | pool: MAX 72 | kernel_size: 2 73 | stride: 2 74 | } 75 | } 76 | layer { 77 | name: "conv2_1" 78 | type: "Convolution" 79 | bottom: "pool1" 80 | top: "conv2_1" 81 | param { 82 | lr_mult: 1 83 | decay_mult: 1 84 | } 85 | param { 86 | lr_mult: 2 87 | decay_mult: 0 88 | } 89 | convolution_param { 90 | num_output: 128 91 | pad: 1 92 | kernel_size: 3 93 | stride: 1 94 | } 95 | } 96 | layer { 97 | name: "relu2_1" 98 | type: "ReLU" 99 | bottom: "conv2_1" 100 | top: "conv2_1" 101 | } 102 | layer { 103 | name: "conv2_2" 104 | type: "Convolution" 105 | bottom: "conv2_1" 106 | top: "conv2_2" 107 | param { 108 | lr_mult: 1 109 | decay_mult: 1 110 | } 111 | param { 112 | lr_mult: 2 113 | decay_mult: 0 114 | } 115 | convolution_param { 116 | num_output: 128 117 | pad: 1 118 | kernel_size: 3 119 | stride: 1 120 | } 121 | } 122 | layer { 123 | name: "relu2_2" 124 | type: "ReLU" 125 | bottom: "conv2_2" 126 | top: "conv2_2" 127 | } 128 | layer { 129 | name: "pool2" 130 | type: "Pooling" 131 | bottom: "conv2_2" 132 | top: "pool2" 133 | pooling_param { 134 | pool: MAX 135 | kernel_size: 2 136 | stride: 2 137 | } 138 | } 139 | layer { 140 | name: "conv3_1" 141 | type: "Convolution" 142 | bottom: "pool2" 143 | top: "conv3_1" 144 | param { 145 | lr_mult: 1 146 | decay_mult: 1 147 | } 148 | param { 149 | lr_mult: 2 150 | decay_mult: 0 151 | } 152 | convolution_param { 153 | num_output: 256 154 | pad: 1 155 | kernel_size: 3 156 | stride: 1 157 | } 158 | } 159 | layer { 160 | name: "relu3_1" 161 | type: "ReLU" 162 | bottom: "conv3_1" 163 | top: "conv3_1" 164 | } 165 | layer { 166 | name: "conv3_2" 167 | type: "Convolution" 168 | bottom: "conv3_1" 169 | top: "conv3_2" 170 | param { 171 | lr_mult: 1 172 | decay_mult: 1 173 | } 174 | param { 175 | lr_mult: 2 176 | decay_mult: 0 177 | } 178 | convolution_param { 179 | num_output: 256 180 | pad: 1 181 | kernel_size: 3 182 | stride: 1 183 | } 184 | } 185 | layer { 186 | name: "relu3_2" 187 | type: "ReLU" 188 | bottom: "conv3_2" 189 | top: "conv3_2" 190 | } 191 | layer { 192 | name: "conv3_3" 193 | type: "Convolution" 194 | bottom: "conv3_2" 195 | top: "conv3_3" 196 | param { 197 | lr_mult: 1 198 | decay_mult: 1 199 | } 200 | param { 201 | lr_mult: 2 202 | decay_mult: 0 203 | } 204 | convolution_param { 205 | num_output: 256 206 | pad: 1 207 | kernel_size: 3 208 | stride: 1 209 | } 210 | } 211 | layer { 212 | name: "relu3_3" 213 | type: "ReLU" 214 | bottom: "conv3_3" 215 | top: "conv3_3" 216 | } 217 | layer { 218 | name: "pool3" 219 | type: "Pooling" 220 | bottom: "conv3_3" 221 | top: "pool3" 222 | pooling_param { 223 | pool: MAX 224 | kernel_size: 2 225 | stride: 2 226 | } 227 | } 228 | layer { 229 | name: "conv4_1" 230 | type: "Convolution" 231 | bottom: "pool3" 232 | top: "conv4_1" 233 | param { 234 | lr_mult: 1 235 | decay_mult: 1 236 | } 237 | param { 238 | lr_mult: 2 239 | decay_mult: 0 240 | } 241 | convolution_param { 242 | num_output: 512 243 | pad: 1 244 | kernel_size: 3 245 | stride: 1 246 | } 247 | } 248 | layer { 249 | name: "relu4_1" 250 | type: "ReLU" 251 | bottom: "conv4_1" 252 | top: "conv4_1" 253 | } 254 | layer { 255 | name: "conv4_2" 256 | type: "Convolution" 257 | bottom: "conv4_1" 258 | top: "conv4_2" 259 | param { 260 | lr_mult: 1 261 | decay_mult: 1 262 | } 263 | param { 264 | lr_mult: 2 265 | decay_mult: 0 266 | } 267 | convolution_param { 268 | num_output: 512 269 | pad: 1 270 | kernel_size: 3 271 | stride: 1 272 | } 273 | } 274 | layer { 275 | name: "relu4_2" 276 | type: "ReLU" 277 | bottom: "conv4_2" 278 | top: "conv4_2" 279 | } 280 | layer { 281 | name: "conv4_3" 282 | type: "Convolution" 283 | bottom: "conv4_2" 284 | top: "conv4_3" 285 | param { 286 | lr_mult: 1 287 | decay_mult: 1 288 | } 289 | param { 290 | lr_mult: 2 291 | decay_mult: 0 292 | } 293 | convolution_param { 294 | num_output: 512 295 | pad: 1 296 | kernel_size: 3 297 | stride: 1 298 | } 299 | } 300 | layer { 301 | name: "relu4_3" 302 | type: "ReLU" 303 | bottom: "conv4_3" 304 | top: "conv4_3" 305 | } 306 | layer { 307 | name: "pool4" 308 | type: "Pooling" 309 | bottom: "conv4_3" 310 | top: "pool4" 311 | pooling_param { 312 | pool: MAX 313 | kernel_size: 2 314 | stride: 2 315 | } 316 | } 317 | layer { 318 | name: "conv5_1" 319 | type: "Convolution" 320 | bottom: "pool4" 321 | top: "conv5_1" 322 | param { 323 | lr_mult: 1 324 | decay_mult: 1 325 | } 326 | param { 327 | lr_mult: 2 328 | decay_mult: 0 329 | } 330 | convolution_param { 331 | num_output: 512 332 | pad: 1 333 | kernel_size: 3 334 | stride: 1 335 | } 336 | } 337 | layer { 338 | name: "relu5_1" 339 | type: "ReLU" 340 | bottom: "conv5_1" 341 | top: "conv5_1" 342 | } 343 | layer { 344 | name: "conv5_2" 345 | type: "Convolution" 346 | bottom: "conv5_1" 347 | top: "conv5_2" 348 | param { 349 | lr_mult: 1 350 | decay_mult: 1 351 | } 352 | param { 353 | lr_mult: 2 354 | decay_mult: 0 355 | } 356 | convolution_param { 357 | num_output: 512 358 | pad: 1 359 | kernel_size: 3 360 | stride: 1 361 | } 362 | } 363 | layer { 364 | name: "relu5_2" 365 | type: "ReLU" 366 | bottom: "conv5_2" 367 | top: "conv5_2" 368 | } 369 | layer { 370 | name: "conv5_3" 371 | type: "Convolution" 372 | bottom: "conv5_2" 373 | top: "conv5_3" 374 | param { 375 | lr_mult: 1 376 | decay_mult: 1 377 | } 378 | param { 379 | lr_mult: 2 380 | decay_mult: 0 381 | } 382 | convolution_param { 383 | num_output: 512 384 | pad: 1 385 | kernel_size: 3 386 | stride: 1 387 | } 388 | } 389 | layer { 390 | name: "relu5_3" 391 | type: "ReLU" 392 | bottom: "conv5_3" 393 | top: "conv5_3" 394 | } 395 | layer { 396 | name: "pool5" 397 | type: "Pooling" 398 | bottom: "conv5_3" 399 | top: "pool5" 400 | pooling_param { 401 | pool: MAX 402 | kernel_size: 2 403 | stride: 2 404 | } 405 | } 406 | layer { 407 | name: "fc6" 408 | type: "Convolution" 409 | bottom: "pool5" 410 | top: "fc6" 411 | param { 412 | lr_mult: 1 413 | decay_mult: 1 414 | } 415 | param { 416 | lr_mult: 2 417 | decay_mult: 0 418 | } 419 | convolution_param { 420 | num_output: 4096 421 | pad: 0 422 | kernel_size: 7 423 | stride: 1 424 | } 425 | } 426 | layer { 427 | name: "relu6" 428 | type: "ReLU" 429 | bottom: "fc6" 430 | top: "fc6" 431 | } 432 | layer { 433 | name: "drop6" 434 | type: "Dropout" 435 | bottom: "fc6" 436 | top: "fc6" 437 | dropout_param { 438 | dropout_ratio: 0.5 439 | } 440 | } 441 | layer { 442 | name: "fc7" 443 | type: "Convolution" 444 | bottom: "fc6" 445 | top: "fc7" 446 | param { 447 | lr_mult: 1 448 | decay_mult: 1 449 | } 450 | param { 451 | lr_mult: 2 452 | decay_mult: 0 453 | } 454 | convolution_param { 455 | num_output: 4096 456 | pad: 0 457 | kernel_size: 1 458 | stride: 1 459 | } 460 | } 461 | layer { 462 | name: "relu7" 463 | type: "ReLU" 464 | bottom: "fc7" 465 | top: "fc7" 466 | } 467 | layer { 468 | name: "drop7" 469 | type: "Dropout" 470 | bottom: "fc7" 471 | top: "fc7" 472 | dropout_param { 473 | dropout_ratio: 0.5 474 | } 475 | } 476 | layer { 477 | name: "score_fr" 478 | type: "Convolution" 479 | bottom: "fc7" 480 | top: "score_fr" 481 | param { 482 | lr_mult: 1 483 | decay_mult: 1 484 | } 485 | param { 486 | lr_mult: 2 487 | decay_mult: 0 488 | } 489 | convolution_param { 490 | num_output: 11 491 | pad: 0 492 | kernel_size: 1 493 | } 494 | } 495 | layer { 496 | name: "upscore2" 497 | type: "Deconvolution" 498 | bottom: "score_fr" 499 | top: "upscore2" 500 | param { 501 | lr_mult: 0 502 | } 503 | convolution_param { 504 | num_output: 11 505 | bias_term: false 506 | kernel_size: 4 507 | stride: 2 508 | } 509 | } 510 | layer { 511 | name: "scale_pool4" 512 | type: "Scale" 513 | bottom: "pool4" 514 | top: "scale_pool4" 515 | param { 516 | lr_mult: 0 517 | } 518 | scale_param { 519 | filler { 520 | type: "constant" 521 | value: 0.01 522 | } 523 | } 524 | } 525 | layer { 526 | name: "score_pool4" 527 | type: "Convolution" 528 | bottom: "scale_pool4" 529 | top: "score_pool4" 530 | param { 531 | lr_mult: 1 532 | decay_mult: 1 533 | } 534 | param { 535 | lr_mult: 2 536 | decay_mult: 0 537 | } 538 | convolution_param { 539 | num_output: 11 540 | pad: 0 541 | kernel_size: 1 542 | } 543 | } 544 | layer { 545 | name: "score_pool4c" 546 | type: "Crop" 547 | bottom: "score_pool4" 548 | bottom: "upscore2" 549 | top: "score_pool4c" 550 | crop_param { 551 | axis: 2 552 | offset: 5 553 | } 554 | } 555 | layer { 556 | name: "fuse_pool4" 557 | type: "Eltwise" 558 | bottom: "upscore2" 559 | bottom: "score_pool4c" 560 | top: "fuse_pool4" 561 | eltwise_param { 562 | operation: SUM 563 | } 564 | } 565 | layer { 566 | name: "upscore_pool4" 567 | type: "Deconvolution" 568 | bottom: "fuse_pool4" 569 | top: "upscore_pool4" 570 | param { 571 | lr_mult: 0 572 | } 573 | convolution_param { 574 | num_output: 11 575 | bias_term: false 576 | kernel_size: 4 577 | stride: 2 578 | } 579 | } 580 | layer { 581 | name: "scale_pool3" 582 | type: "Scale" 583 | bottom: "pool3" 584 | top: "scale_pool3" 585 | param { 586 | lr_mult: 0 587 | } 588 | scale_param { 589 | filler { 590 | type: "constant" 591 | value: 0.0001 592 | } 593 | } 594 | } 595 | layer { 596 | name: "score_pool3" 597 | type: "Convolution" 598 | bottom: "scale_pool3" 599 | top: "score_pool3" 600 | param { 601 | lr_mult: 1 602 | decay_mult: 1 603 | } 604 | param { 605 | lr_mult: 2 606 | decay_mult: 0 607 | } 608 | convolution_param { 609 | num_output: 11 610 | pad: 0 611 | kernel_size: 1 612 | } 613 | } 614 | layer { 615 | name: "score_pool3c" 616 | type: "Crop" 617 | bottom: "score_pool3" 618 | bottom: "upscore_pool4" 619 | top: "score_pool3c" 620 | crop_param { 621 | axis: 2 622 | offset: 9 623 | } 624 | } 625 | layer { 626 | name: "fuse_pool3" 627 | type: "Eltwise" 628 | bottom: "upscore_pool4" 629 | bottom: "score_pool3c" 630 | top: "fuse_pool3" 631 | eltwise_param { 632 | operation: SUM 633 | } 634 | } 635 | layer { 636 | name: "upscore_pool3" 637 | type: "Deconvolution" 638 | bottom: "fuse_pool3" 639 | top: "upscore_pool3" 640 | param { 641 | lr_mult: 0 642 | } 643 | convolution_param { 644 | num_output: 11 645 | bias_term: false 646 | kernel_size: 4 647 | stride: 2 648 | } 649 | } 650 | 651 | layer { 652 | name: "scale_pool2" 653 | type: "Scale" 654 | bottom: "pool2" 655 | top: "scale_pool2" 656 | param { 657 | lr_mult: 0 658 | } 659 | scale_param { 660 | filler { 661 | type: "constant" 662 | value: 0.0001 663 | } 664 | } 665 | } 666 | layer { 667 | name: "score_pool2" 668 | type: "Convolution" 669 | bottom: "scale_pool2" 670 | top: "score_pool2" 671 | param { 672 | lr_mult: 1 673 | decay_mult: 1 674 | } 675 | param { 676 | lr_mult: 2 677 | decay_mult: 0 678 | } 679 | convolution_param { 680 | num_output: 11 681 | pad: 0 682 | kernel_size: 1 683 | } 684 | } 685 | layer { 686 | name: "score_pool2c" 687 | type: "Crop" 688 | bottom: "score_pool2" 689 | bottom: "upscore_pool3" 690 | top: "score_pool2c" 691 | crop_param { 692 | axis: 2 693 | offset: 15 694 | } 695 | } 696 | layer { 697 | name: "fuse_pool2" 698 | type: "Eltwise" 699 | bottom: "upscore_pool3" 700 | bottom: "score_pool2c" 701 | top: "fuse_pool2" 702 | eltwise_param { 703 | operation: SUM 704 | } 705 | } 706 | layer { 707 | name: "upscore4" 708 | type: "Deconvolution" 709 | bottom: "fuse_pool2" 710 | top: "upscore4" 711 | param { 712 | lr_mult: 0 713 | } 714 | convolution_param { 715 | num_output: 11 716 | bias_term: false 717 | kernel_size: 8 718 | stride: 4 719 | } 720 | } 721 | 722 | 723 | layer { 724 | name: "score" 725 | type: "Crop" 726 | bottom: "upscore4" 727 | bottom: "data" 728 | top: "score" 729 | crop_param { 730 | axis: 2 731 | offset: 31 732 | } 733 | } 734 | 735 | layer { 736 | name: "loss" 737 | type: "SoftmaxWithLoss" 738 | bottom: "score" 739 | bottom: "label" 740 | top: "loss" 741 | loss_param { 742 | ignore_label: 255 743 | normalize: false 744 | } 745 | } 746 | -------------------------------------------------------------------------------- /infer-4s.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | import sys 4 | from skimage.io import imread ,imshow ,imsave 5 | from skimage import img_as_ubyte 6 | from skimage import util 7 | 8 | from copy import deepcopy 9 | 10 | 11 | sys.path.append('/home/sensetime/DeepLearning/test/caffe/python') 12 | sys.path.append('/home/sensetime/DeepLearning/test/caffe/python/caffe') 13 | 14 | import caffe 15 | import matplotlib.pyplot as plt 16 | 17 | # load image, switch to BGR, subtract mean, and make dims C x H x W for Caffe 18 | im = Image.open('../imgtest/460.jpg') 19 | #im = imread('/home/sensetime/DeepLearning/fxwaterextract/data/water/1024/121.tif') 20 | in_ = np.array(im, dtype=np.float32) 21 | in_ = in_[:,:,::-1] 22 | in_ -= np.array((109.39446,105.91758,89.61112)) 23 | in_ = in_.transpose((2,0,1)) 24 | 25 | # load net 26 | net = caffe.Net('../fcn-pool/fcn-improve/deploy.prototxt', '../fcn-pool/fcn-improve/snapshot/train_iter_15000.caffemodel', caffe.TEST) 27 | # shape for input (data blob is N x C x H x W), set data 28 | net.blobs['data'].reshape(1, *in_.shape) 29 | net.blobs['data'].data[...] = in_ 30 | # run net and take argmax for prediction 31 | net.forward() 32 | out = net.blobs['score'].data[0].argmax(axis=0) 33 | print type(out),out.dtype,out.shape,out.size 34 | 35 | imsave('../fcn-pool/data/fcn-gf2-512-1024/fcn-improve/fcn4s-460-1.5w.png',out) 36 | 37 | out1 = net.blobs['softmax_score'].data[0] 38 | print type(out1),out1.dtype,out1.shape,out1.size 39 | #dimage=img_as_ubyte(out) 40 | 41 | #img=Image.open(out); 42 | #imshow(out) 43 | #show() 44 | for x in range(1,11,1): 45 | im_1 = img_as_ubyte(out1[x]) 46 | imsave("../fcn-pool/data/fcn-gf2-512-1024/fcn4s-improve/"+str(x)+"-460-1.5w-fcn.png",im_1) 47 | 48 | print "success" 49 | # img.save('test3.png','png') 50 | # plt.imshow(out); 51 | # plt.axis('off') 52 | # plt.savefig('test2.png') 53 | # plt.show() 54 | -------------------------------------------------------------------------------- /mean.py: -------------------------------------------------------------------------------- 1 | from skimage import io 2 | import os 3 | import numpy as np 4 | 5 | 6 | path1='/home/sensetime/DeepLearning/test/fcn.berkeleyvision.org/data/fcn-gf2-512-1024/traindata/img' 7 | str=os.listdir(path1) 8 | l=len(str) 9 | 10 | d1=0; 11 | d2=0; 12 | d3=0; 13 | i=0 14 | for x in str: 15 | i=i+1 16 | im=io.imread(path1+"/"+x) 17 | if i%1000==0: 18 | print i 19 | d1=d1+np.mean(im[:,:,0]) 20 | d2=d2+np.mean(im[:,:,1]) 21 | d3=d3+np.mean(im[:,:,2]) 22 | 23 | print [d1/l,d2/l,d3/l] 24 | print l 25 | 26 | # fcn_zy3 train RGB [89.611119269769276, 105.91758338559417, 109.3944624054897] 27 | 28 | # g=fcn zy3 512-1024 rgb [89.279196311370441, 105.57667462140543, 109.08637249794963] -------------------------------------------------------------------------------- /readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/readme.txt -------------------------------------------------------------------------------- /result.py: -------------------------------------------------------------------------------- 1 | from skimage import io 2 | 3 | path0='/home/sensetime/DeepLearning/test/imgtest/val/277.jpg' 4 | im0=io.imread(path0) 5 | 6 | io.imshow(im0) -------------------------------------------------------------------------------- /score.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import caffe 3 | import numpy as np 4 | import os 5 | import sys 6 | from datetime import datetime 7 | from PIL import Image 8 | 9 | def fast_hist(a, b, n): 10 | k = (a >= 0) & (a < n) 11 | return np.bincount(n * a[k].astype(int) + b[k], minlength=n**2).reshape(n, n) 12 | 13 | def compute_hist(net, save_dir, dataset, layer='score', gt='label'): 14 | n_cl = net.blobs[layer].channels 15 | if save_dir: 16 | os.mkdir(save_dir) 17 | hist = np.zeros((n_cl, n_cl)) 18 | loss = 0 19 | for idx in dataset: 20 | net.forward() 21 | hist += fast_hist(net.blobs[gt].data[0, 0].flatten(), 22 | net.blobs[layer].data[0].argmax(0).flatten(), 23 | n_cl) 24 | 25 | if save_dir: 26 | im = Image.fromarray(net.blobs[layer].data[0].argmax(0).astype(np.uint8), mode='P') 27 | im.save(os.path.join(save_dir, idx + '.png')) 28 | # compute the loss as well 29 | loss += net.blobs['loss'].data.flat[0] 30 | return hist, loss / len(dataset) 31 | 32 | def seg_tests(solver, save_format, dataset, layer='score', gt='label'): 33 | print '>>>', datetime.now(), 'Begin seg tests' 34 | solver.test_nets[0].share_with(solver.net) 35 | do_seg_tests(solver.test_nets[0], solver.iter, save_format, dataset, layer, gt) 36 | 37 | def do_seg_tests(net, iter, save_format, dataset, layer='score', gt='label'): 38 | n_cl = net.blobs[layer].channels 39 | if save_format: 40 | save_format = save_format.format(iter) 41 | hist, loss = compute_hist(net, save_format, dataset, layer, gt) 42 | # mean loss 43 | print '>>>', datetime.now(), 'Iteration', iter, 'loss', loss 44 | # overall accuracy 45 | acc = np.diag(hist).sum() / hist.sum() 46 | print '>>>', datetime.now(), 'Iteration', iter, 'overall accuracy', acc 47 | # per-class accuracy 48 | acc = np.diag(hist) / hist.sum(1) 49 | print '>>>', datetime.now(), 'Iteration', iter, 'mean accuracy', np.nanmean(acc) 50 | # per-class IU 51 | iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) 52 | print '>>>', datetime.now(), 'Iteration', iter, 'mean IU', np.nanmean(iu) 53 | freq = hist.sum(1) / hist.sum() 54 | print '>>>', datetime.now(), 'Iteration', iter, 'fwavacc', \ 55 | (freq[freq > 0] * iu[freq > 0]).sum() 56 | return hist 57 | -------------------------------------------------------------------------------- /voc_layers.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | 3 | import numpy as np 4 | from PIL import Image 5 | 6 | import random 7 | 8 | class VOCSegDataLayer(caffe.Layer): 9 | """ 10 | Load (input image, label image) pairs from PASCAL VOC 11 | one-at-a-time while reshaping the net to preserve dimensions. 12 | 13 | Use this to feed data to a fully convolutional network. 14 | """ 15 | 16 | def setup(self, bottom, top): 17 | """ 18 | Setup data layer according to parameters: 19 | 20 | - voc_dir: path to PASCAL VOC year dir 21 | - split: train / val / test 22 | - mean: tuple of mean values to subtract 23 | - randomize: load in random order (default: True) 24 | - seed: seed for randomization (default: None / current time) 25 | 26 | for PASCAL VOC semantic segmentation. 27 | 28 | example 29 | 30 | params = dict(voc_dir="/path/to/PASCAL/VOC2011", 31 | mean=(104.00698793, 116.66876762, 122.67891434), 32 | split="val") 33 | """ 34 | # config 35 | params = eval(self.param_str) 36 | self.voc_dir = params['voc_dir'] 37 | self.split = params['split'] 38 | self.mean = np.array(params['mean']) 39 | self.random = params.get('randomize', True) 40 | self.seed = params.get('seed', None) 41 | 42 | # two tops: data and label 43 | if len(top) != 2: 44 | raise Exception("Need to define two tops: data and label.") 45 | # data layers have no bottoms 46 | if len(bottom) != 0: 47 | raise Exception("Do not define a bottom.") 48 | 49 | # load indices for images and labels 50 | split_f = '{}/{}.txt'.format(self.voc_dir, 51 | self.split) 52 | self.indices = open(split_f, 'r').read().splitlines() 53 | self.idx = 0 54 | 55 | # make eval deterministic 56 | if 'train' not in self.split: 57 | self.random = False 58 | 59 | # randomization: seed and pick 60 | if self.random: 61 | random.seed(self.seed) 62 | self.idx = random.randint(0, len(self.indices)-1) 63 | 64 | 65 | def reshape(self, bottom, top): 66 | # load image + label image pair 67 | self.data = self.load_image(self.indices[self.idx]) 68 | self.label = self.load_label(self.indices[self.idx]) 69 | # reshape tops to fit (leading 1 is for batch dimension) 70 | top[0].reshape(1, *self.data.shape) 71 | top[1].reshape(1, *self.label.shape) 72 | 73 | 74 | def forward(self, bottom, top): 75 | # assign output 76 | top[0].data[...] = self.data 77 | top[1].data[...] = self.label 78 | 79 | # pick next input 80 | if self.random: 81 | self.idx = random.randint(0, len(self.indices)-1) 82 | else: 83 | self.idx += 1 84 | if self.idx == len(self.indices): 85 | self.idx = 0 86 | 87 | 88 | def backward(self, top, propagate_down, bottom): 89 | pass 90 | 91 | 92 | def load_image(self, idx): 93 | """ 94 | Load input image and preprocess for Caffe: 95 | - cast to float 96 | - switch channels RGB -> BGR 97 | - subtract mean 98 | - transpose to channel x height x width order 99 | """ 100 | im = Image.open('{}/img/{}.jpg'.format(self.voc_dir, idx)) 101 | in_ = np.array(im, dtype=np.float32) 102 | in_ = in_[:,:,::-1] 103 | in_ -= self.mean 104 | in_ = in_.transpose((2,0,1)) 105 | return in_ 106 | 107 | 108 | def load_label(self, idx): 109 | """ 110 | Load label image as 1 x height x width integer array of label indices. 111 | The leading singleton dimension is required by the loss. 112 | """ 113 | im = Image.open('{}/label/{}.PNG'.format(self.voc_dir, idx)) 114 | label = np.array(im, dtype=np.uint8) 115 | label = label[np.newaxis, ...] 116 | return label 117 | 118 | 119 | class SBDDSegDataLayer(caffe.Layer): 120 | """ 121 | Load (input image, label image) pairs from the SBDD extended labeling 122 | of PASCAL VOC for semantic segmentation 123 | one-at-a-time while reshaping the net to preserve dimensions. 124 | 125 | Use this to feed data to a fully convolutional network. 126 | """ 127 | 128 | def setup(self, bottom, top): 129 | """ 130 | Setup data layer according to parameters: 131 | 132 | - sbdd_dir: path to SBDD `dataset` dir 133 | - split: train / seg11valid 134 | - mean: tuple of mean values to subtract 135 | - randomize: load in random order (default: True) 136 | - seed: seed for randomization (default: None / current time) 137 | 138 | for SBDD semantic segmentation. 139 | 140 | N.B.segv11alid is the set of segval11 that does not intersect with SBDD. 141 | Find it here: https://gist.github.com/shelhamer/edb330760338892d511e. 142 | 143 | example 144 | 145 | params = dict(sbdd_dir="/path/to/SBDD/dataset", 146 | mean=(104.00698793, 116.66876762, 122.67891434), 147 | split="valid") 148 | """ 149 | # config 150 | params = eval(self.param_str) 151 | self.sbdd_dir = params['sbdd_dir'] 152 | self.split = params['split'] 153 | self.mean = np.array(params['mean']) 154 | self.random = params.get('randomize', True) 155 | self.seed = params.get('seed', None) 156 | 157 | # two tops: data and label 158 | if len(top) != 2: 159 | raise Exception("Need to define two tops: data and label.") 160 | # data layers have no bottoms 161 | if len(bottom) != 0: 162 | raise Exception("Do not define a bottom.") 163 | 164 | # load indices for images and labels 165 | split_f = '{}/{}.txt'.format(self.sbdd_dir, 166 | self.split) 167 | self.indices = open(split_f, 'r').read().splitlines() 168 | self.idx = 0 169 | 170 | # make eval deterministic 171 | if 'train' not in self.split: 172 | self.random = False 173 | 174 | # randomization: seed and pick 175 | if self.random: 176 | random.seed(self.seed) 177 | self.idx = random.randint(0, len(self.indices)-1) 178 | 179 | 180 | def reshape(self, bottom, top): 181 | # load image + label image pair 182 | self.data = self.load_image(self.indices[self.idx]) 183 | self.label = self.load_label(self.indices[self.idx]) 184 | # reshape tops to fit (leading 1 is for batch dimension) 185 | top[0].reshape(1, *self.data.shape) 186 | top[1].reshape(1, *self.label.shape) 187 | 188 | 189 | def forward(self, bottom, top): 190 | # assign output 191 | top[0].data[...] = self.data 192 | top[1].data[...] = self.label 193 | 194 | # pick next input 195 | if self.random: 196 | self.idx = random.randint(0, len(self.indices)-1) 197 | else: 198 | self.idx += 1 199 | if self.idx == len(self.indices): 200 | self.idx = 0 201 | 202 | 203 | def backward(self, top, propagate_down, bottom): 204 | pass 205 | 206 | 207 | def load_image(self, idx): 208 | """ 209 | Load input image and preprocess for Caffe: 210 | - cast to float 211 | - switch channels RGB -> BGR 212 | - subtract mean 213 | - transpose to channel x height x width order 214 | """ 215 | im = Image.open('{}/img/{}.jpg'.format(self.sbdd_dir, idx)) 216 | in_ = np.array(im, dtype=np.float32) 217 | in_ = in_[:,:,::-1] 218 | in_ -= self.mean 219 | in_ = in_.transpose((2,0,1)) 220 | #print idx 221 | return in_ 222 | 223 | def load_label(self, idx): 224 | """ 225 | Load label image as 1 x height x width integer array of label indices. 226 | The leading singleton dimension is required by the loss. 227 | """ 228 | im = Image.open('{}/label/{}.PNG'.format(self.sbdd_dir, idx)) 229 | label = np.array(im, dtype=np.uint8) 230 | label = label[np.newaxis, ...] 231 | return label 232 | 233 | def load_label_two(self, idx): 234 | """ 235 | Load label image as 1 x height x width integer array of label indices. 236 | The leading singleton dimension is required by the loss. 237 | """ 238 | import scipy.io 239 | mat = scipy.io.loadmat('{}/cls/{}.mat'.format(self.sbdd_dir, idx)) 240 | label = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8) 241 | label = label[np.newaxis, ...] 242 | return label 243 | --------------------------------------------------------------------------------