├── MeanShift
    ├── MeanShift.exe
    ├── MeanShift.exp
    ├── MeanShift.ilk
    ├── MeanShift.lib
    ├── MeanShift.pdb
    ├── gdal111.dll
    ├── geos_c.dll
    ├── msvcp100d.dll
    ├── msvcr100d.dll
    ├── opencv_core2411d.dll
    ├── opencv_highgui2411d.dll
    ├── opencv_imgproc2411d.dll
    ├── opencv_objdetect2411d.dll
    └── proj.dll
├── README.md
├── data-url
├── fcn-improve
    ├── deploy.prototxt
    ├── solve.py
    ├── solver.prototxt
    ├── train.prototxt
    └── val.prototxt
├── infer-4s.py
├── mean.py
├── readme.txt
├── result.py
├── score.py
└── voc_layers.py


/MeanShift/MeanShift.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/MeanShift.exe


--------------------------------------------------------------------------------
/MeanShift/MeanShift.exp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/MeanShift.exp


--------------------------------------------------------------------------------
/MeanShift/MeanShift.ilk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/MeanShift.ilk


--------------------------------------------------------------------------------
/MeanShift/MeanShift.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/MeanShift.lib


--------------------------------------------------------------------------------
/MeanShift/MeanShift.pdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/MeanShift.pdb


--------------------------------------------------------------------------------
/MeanShift/gdal111.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/gdal111.dll


--------------------------------------------------------------------------------
/MeanShift/geos_c.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/geos_c.dll


--------------------------------------------------------------------------------
/MeanShift/msvcp100d.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/msvcp100d.dll


--------------------------------------------------------------------------------
/MeanShift/msvcr100d.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/msvcr100d.dll


--------------------------------------------------------------------------------
/MeanShift/opencv_core2411d.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/opencv_core2411d.dll


--------------------------------------------------------------------------------
/MeanShift/opencv_highgui2411d.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/opencv_highgui2411d.dll


--------------------------------------------------------------------------------
/MeanShift/opencv_imgproc2411d.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/opencv_imgproc2411d.dll


--------------------------------------------------------------------------------
/MeanShift/opencv_objdetect2411d.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/opencv_objdetect2411d.dll


--------------------------------------------------------------------------------
/MeanShift/proj.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/MeanShift/proj.dll


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 基于全卷积神经网络的遥感影像分类试验
3 | 
4 | 未来的工作将会继续沿着基于深度写的遥感影像分类工作继续下去，同时考虑转用Tensorflow/keras 机器学习框
5 | 


--------------------------------------------------------------------------------
/data-url:
--------------------------------------------------------------------------------
1 | 本文模型 ：链接：http://pan.baidu.com/s/1kV7lkrl 密码：bt8f
2 | 
3 | 本文数据集：链接：http://pan.baidu.com/s/1mhHIifQ 密码：yxzv
4 | 


--------------------------------------------------------------------------------
/fcn-improve/deploy.prototxt:
--------------------------------------------------------------------------------
  1 | layer {
  2 |   name: "input"
  3 |   type: "Input"
  4 |   top: "data"
  5 |   input_param {
  6 |     # These dimensions are purely for sake of example;
  7 |     # see infer.py for how to reshape the net to the given input size.
  8 |     shape { dim: 1 dim: 3 dim: 512 dim: 512 }
  9 |   }
 10 | }
 11 | 
 12 | layer {
 13 |   name: "conv1_1"
 14 |   type: "Convolution"
 15 |   bottom: "data"
 16 |   top: "conv1_1"
 17 |   param {
 18 |     lr_mult: 1
 19 |     decay_mult: 1
 20 |   }
 21 |   param {
 22 |     lr_mult: 2
 23 |     decay_mult: 0
 24 |   }
 25 |   convolution_param {
 26 |     num_output: 64
 27 |     pad: 100
 28 |     kernel_size: 3
 29 |     stride: 1
 30 |   }
 31 | }
 32 | layer {
 33 |   name: "relu1_1"
 34 |   type: "ReLU"
 35 |   bottom: "conv1_1"
 36 |   top: "conv1_1"
 37 | }
 38 | layer {
 39 |   name: "conv1_2"
 40 |   type: "Convolution"
 41 |   bottom: "conv1_1"
 42 |   top: "conv1_2"
 43 |   param {
 44 |     lr_mult: 1
 45 |     decay_mult: 1
 46 |   }
 47 |   param {
 48 |     lr_mult: 2
 49 |     decay_mult: 0
 50 |   }
 51 |   convolution_param {
 52 |     num_output: 64
 53 |     pad: 1
 54 |     kernel_size: 3
 55 |     stride: 1
 56 |   }
 57 | }
 58 | layer {
 59 |   name: "relu1_2"
 60 |   type: "ReLU"
 61 |   bottom: "conv1_2"
 62 |   top: "conv1_2"
 63 | }
 64 | layer {
 65 |   name: "pool1"
 66 |   type: "Pooling"
 67 |   bottom: "conv1_2"
 68 |   top: "pool1"
 69 |   pooling_param {
 70 |     pool: MAX
 71 |     kernel_size: 2
 72 |     stride: 2
 73 |   }
 74 | }
 75 | layer {
 76 |   name: "conv2_1"
 77 |   type: "Convolution"
 78 |   bottom: "pool1"
 79 |   top: "conv2_1"
 80 |   param {
 81 |     lr_mult: 1
 82 |     decay_mult: 1
 83 |   }
 84 |   param {
 85 |     lr_mult: 2
 86 |     decay_mult: 0
 87 |   }
 88 |   convolution_param {
 89 |     num_output: 128
 90 |     pad: 1
 91 |     kernel_size: 3
 92 |     stride: 1
 93 |   }
 94 | }
 95 | layer {
 96 |   name: "relu2_1"
 97 |   type: "ReLU"
 98 |   bottom: "conv2_1"
 99 |   top: "conv2_1"
100 | }
101 | layer {
102 |   name: "conv2_2"
103 |   type: "Convolution"
104 |   bottom: "conv2_1"
105 |   top: "conv2_2"
106 |   param {
107 |     lr_mult: 1
108 |     decay_mult: 1
109 |   }
110 |   param {
111 |     lr_mult: 2
112 |     decay_mult: 0
113 |   }
114 |   convolution_param {
115 |     num_output: 128
116 |     pad: 1
117 |     kernel_size: 3
118 |     stride: 1
119 |   }
120 | }
121 | layer {
122 |   name: "relu2_2"
123 |   type: "ReLU"
124 |   bottom: "conv2_2"
125 |   top: "conv2_2"
126 | }
127 | layer {
128 |   name: "pool2"
129 |   type: "Pooling"
130 |   bottom: "conv2_2"
131 |   top: "pool2"
132 |   pooling_param {
133 |     pool: MAX
134 |     kernel_size: 2
135 |     stride: 2
136 |   }
137 | }
138 | layer {
139 |   name: "conv3_1"
140 |   type: "Convolution"
141 |   bottom: "pool2"
142 |   top: "conv3_1"
143 |   param {
144 |     lr_mult: 1
145 |     decay_mult: 1
146 |   }
147 |   param {
148 |     lr_mult: 2
149 |     decay_mult: 0
150 |   }
151 |   convolution_param {
152 |     num_output: 256
153 |     pad: 1
154 |     kernel_size: 3
155 |     stride: 1
156 |   }
157 | }
158 | layer {
159 |   name: "relu3_1"
160 |   type: "ReLU"
161 |   bottom: "conv3_1"
162 |   top: "conv3_1"
163 | }
164 | layer {
165 |   name: "conv3_2"
166 |   type: "Convolution"
167 |   bottom: "conv3_1"
168 |   top: "conv3_2"
169 |   param {
170 |     lr_mult: 1
171 |     decay_mult: 1
172 |   }
173 |   param {
174 |     lr_mult: 2
175 |     decay_mult: 0
176 |   }
177 |   convolution_param {
178 |     num_output: 256
179 |     pad: 1
180 |     kernel_size: 3
181 |     stride: 1
182 |   }
183 | }
184 | layer {
185 |   name: "relu3_2"
186 |   type: "ReLU"
187 |   bottom: "conv3_2"
188 |   top: "conv3_2"
189 | }
190 | layer {
191 |   name: "conv3_3"
192 |   type: "Convolution"
193 |   bottom: "conv3_2"
194 |   top: "conv3_3"
195 |   param {
196 |     lr_mult: 1
197 |     decay_mult: 1
198 |   }
199 |   param {
200 |     lr_mult: 2
201 |     decay_mult: 0
202 |   }
203 |   convolution_param {
204 |     num_output: 256
205 |     pad: 1
206 |     kernel_size: 3
207 |     stride: 1
208 |   }
209 | }
210 | layer {
211 |   name: "relu3_3"
212 |   type: "ReLU"
213 |   bottom: "conv3_3"
214 |   top: "conv3_3"
215 | }
216 | layer {
217 |   name: "pool3"
218 |   type: "Pooling"
219 |   bottom: "conv3_3"
220 |   top: "pool3"
221 |   pooling_param {
222 |     pool: MAX
223 |     kernel_size: 2
224 |     stride: 2
225 |   }
226 | }
227 | layer {
228 |   name: "conv4_1"
229 |   type: "Convolution"
230 |   bottom: "pool3"
231 |   top: "conv4_1"
232 |   param {
233 |     lr_mult: 1
234 |     decay_mult: 1
235 |   }
236 |   param {
237 |     lr_mult: 2
238 |     decay_mult: 0
239 |   }
240 |   convolution_param {
241 |     num_output: 512
242 |     pad: 1
243 |     kernel_size: 3
244 |     stride: 1
245 |   }
246 | }
247 | layer {
248 |   name: "relu4_1"
249 |   type: "ReLU"
250 |   bottom: "conv4_1"
251 |   top: "conv4_1"
252 | }
253 | layer {
254 |   name: "conv4_2"
255 |   type: "Convolution"
256 |   bottom: "conv4_1"
257 |   top: "conv4_2"
258 |   param {
259 |     lr_mult: 1
260 |     decay_mult: 1
261 |   }
262 |   param {
263 |     lr_mult: 2
264 |     decay_mult: 0
265 |   }
266 |   convolution_param {
267 |     num_output: 512
268 |     pad: 1
269 |     kernel_size: 3
270 |     stride: 1
271 |   }
272 | }
273 | layer {
274 |   name: "relu4_2"
275 |   type: "ReLU"
276 |   bottom: "conv4_2"
277 |   top: "conv4_2"
278 | }
279 | layer {
280 |   name: "conv4_3"
281 |   type: "Convolution"
282 |   bottom: "conv4_2"
283 |   top: "conv4_3"
284 |   param {
285 |     lr_mult: 1
286 |     decay_mult: 1
287 |   }
288 |   param {
289 |     lr_mult: 2
290 |     decay_mult: 0
291 |   }
292 |   convolution_param {
293 |     num_output: 512
294 |     pad: 1
295 |     kernel_size: 3
296 |     stride: 1
297 |   }
298 | }
299 | layer {
300 |   name: "relu4_3"
301 |   type: "ReLU"
302 |   bottom: "conv4_3"
303 |   top: "conv4_3"
304 | }
305 | layer {
306 |   name: "pool4"
307 |   type: "Pooling"
308 |   bottom: "conv4_3"
309 |   top: "pool4"
310 |   pooling_param {
311 |     pool: MAX
312 |     kernel_size: 2
313 |     stride: 2
314 |   }
315 | }
316 | layer {
317 |   name: "conv5_1"
318 |   type: "Convolution"
319 |   bottom: "pool4"
320 |   top: "conv5_1"
321 |   param {
322 |     lr_mult: 1
323 |     decay_mult: 1
324 |   }
325 |   param {
326 |     lr_mult: 2
327 |     decay_mult: 0
328 |   }
329 |   convolution_param {
330 |     num_output: 512
331 |     pad: 1
332 |     kernel_size: 3
333 |     stride: 1
334 |   }
335 | }
336 | layer {
337 |   name: "relu5_1"
338 |   type: "ReLU"
339 |   bottom: "conv5_1"
340 |   top: "conv5_1"
341 | }
342 | layer {
343 |   name: "conv5_2"
344 |   type: "Convolution"
345 |   bottom: "conv5_1"
346 |   top: "conv5_2"
347 |   param {
348 |     lr_mult: 1
349 |     decay_mult: 1
350 |   }
351 |   param {
352 |     lr_mult: 2
353 |     decay_mult: 0
354 |   }
355 |   convolution_param {
356 |     num_output: 512
357 |     pad: 1
358 |     kernel_size: 3
359 |     stride: 1
360 |   }
361 | }
362 | layer {
363 |   name: "relu5_2"
364 |   type: "ReLU"
365 |   bottom: "conv5_2"
366 |   top: "conv5_2"
367 | }
368 | layer {
369 |   name: "conv5_3"
370 |   type: "Convolution"
371 |   bottom: "conv5_2"
372 |   top: "conv5_3"
373 |   param {
374 |     lr_mult: 1
375 |     decay_mult: 1
376 |   }
377 |   param {
378 |     lr_mult: 2
379 |     decay_mult: 0
380 |   }
381 |   convolution_param {
382 |     num_output: 512
383 |     pad: 1
384 |     kernel_size: 3
385 |     stride: 1
386 |   }
387 | }
388 | layer {
389 |   name: "relu5_3"
390 |   type: "ReLU"
391 |   bottom: "conv5_3"
392 |   top: "conv5_3"
393 | }
394 | layer {
395 |   name: "pool5"
396 |   type: "Pooling"
397 |   bottom: "conv5_3"
398 |   top: "pool5"
399 |   pooling_param {
400 |     pool: MAX
401 |     kernel_size: 2
402 |     stride: 2
403 |   }
404 | }
405 | layer {
406 |   name: "fc6"
407 |   type: "Convolution"
408 |   bottom: "pool5"
409 |   top: "fc6"
410 |   param {
411 |     lr_mult: 1
412 |     decay_mult: 1
413 |   }
414 |   param {
415 |     lr_mult: 2
416 |     decay_mult: 0
417 |   }
418 |   convolution_param {
419 |     num_output: 4096
420 |     pad: 0
421 |     kernel_size: 7
422 |     stride: 1
423 |   }
424 | }
425 | layer {
426 |   name: "relu6"
427 |   type: "ReLU"
428 |   bottom: "fc6"
429 |   top: "fc6"
430 | }
431 | layer {
432 |   name: "drop6"
433 |   type: "Dropout"
434 |   bottom: "fc6"
435 |   top: "fc6"
436 |   dropout_param {
437 |     dropout_ratio: 0.5
438 |   }
439 | }
440 | layer {
441 |   name: "fc7"
442 |   type: "Convolution"
443 |   bottom: "fc6"
444 |   top: "fc7"
445 |   param {
446 |     lr_mult: 1
447 |     decay_mult: 1
448 |   }
449 |   param {
450 |     lr_mult: 2
451 |     decay_mult: 0
452 |   }
453 |   convolution_param {
454 |     num_output: 4096
455 |     pad: 0
456 |     kernel_size: 1
457 |     stride: 1
458 |   }
459 | }
460 | layer {
461 |   name: "relu7"
462 |   type: "ReLU"
463 |   bottom: "fc7"
464 |   top: "fc7"
465 | }
466 | layer {
467 |   name: "drop7"
468 |   type: "Dropout"
469 |   bottom: "fc7"
470 |   top: "fc7"
471 |   dropout_param {
472 |     dropout_ratio: 0.5
473 |   }
474 | }
475 | layer {
476 |   name: "score_fr"
477 |   type: "Convolution"
478 |   bottom: "fc7"
479 |   top: "score_fr"
480 |   param {
481 |     lr_mult: 1
482 |     decay_mult: 1
483 |   }
484 |   param {
485 |     lr_mult: 2
486 |     decay_mult: 0
487 |   }
488 |   convolution_param {
489 |     num_output: 11
490 |     pad: 0
491 |     kernel_size: 1
492 |   }
493 | }
494 | layer {
495 |   name: "upscore2"
496 |   type: "Deconvolution"
497 |   bottom: "score_fr"
498 |   top: "upscore2"
499 |   param {
500 |     lr_mult: 0
501 |   }
502 |   convolution_param {
503 |     num_output: 11
504 |     bias_term: false
505 |     kernel_size: 4
506 |     stride: 2
507 |   }
508 | }
509 | layer {
510 |   name: "scale_pool4"
511 |   type: "Scale"
512 |   bottom: "pool4"
513 |   top: "scale_pool4"
514 |   param {
515 |     lr_mult: 0
516 |   }
517 |   scale_param {
518 |     filler {
519 |       type: "constant"
520 |       value: 0.01
521 |     }
522 |   }
523 | }
524 | layer {
525 |   name: "score_pool4"
526 |   type: "Convolution"
527 |   bottom: "scale_pool4"
528 |   top: "score_pool4"
529 |   param {
530 |     lr_mult: 1
531 |     decay_mult: 1
532 |   }
533 |   param {
534 |     lr_mult: 2
535 |     decay_mult: 0
536 |   }
537 |   convolution_param {
538 |     num_output: 11
539 |     pad: 0
540 |     kernel_size: 1
541 |   }
542 | }
543 | layer {
544 |   name: "score_pool4c"
545 |   type: "Crop"
546 |   bottom: "score_pool4"
547 |   bottom: "upscore2"
548 |   top: "score_pool4c"
549 |   crop_param {
550 |     axis: 2
551 |     offset: 5
552 |   }
553 | }
554 | layer {
555 |   name: "fuse_pool4"
556 |   type: "Eltwise"
557 |   bottom: "upscore2"
558 |   bottom: "score_pool4c"
559 |   top: "fuse_pool4"
560 |   eltwise_param {
561 |     operation: SUM
562 |   }
563 | }
564 | layer {
565 |   name: "upscore_pool4"
566 |   type: "Deconvolution"
567 |   bottom: "fuse_pool4"
568 |   top: "upscore_pool4"
569 |   param {
570 |     lr_mult: 0
571 |   }
572 |   convolution_param {
573 |     num_output: 11
574 |     bias_term: false
575 |     kernel_size: 4
576 |     stride: 2
577 |   }
578 | }
579 | layer {
580 |   name: "scale_pool3"
581 |   type: "Scale"
582 |   bottom: "pool3"
583 |   top: "scale_pool3"
584 |   param {
585 |     lr_mult: 0
586 |   }
587 |   scale_param {
588 |     filler {
589 |       type: "constant"
590 |       value: 0.0001
591 |     }
592 |   }
593 | }
594 | layer {
595 |   name: "score_pool3"
596 |   type: "Convolution"
597 |   bottom: "scale_pool3"
598 |   top: "score_pool3"
599 |   param {
600 |     lr_mult: 1
601 |     decay_mult: 1
602 |   }
603 |   param {
604 |     lr_mult: 2
605 |     decay_mult: 0
606 |   }
607 |   convolution_param {
608 |     num_output: 11
609 |     pad: 0
610 |     kernel_size: 1
611 |   }
612 | }
613 | layer {
614 |   name: "score_pool3c"
615 |   type: "Crop"
616 |   bottom: "score_pool3"
617 |   bottom: "upscore_pool4"
618 |   top: "score_pool3c"
619 |   crop_param {
620 |     axis: 2
621 |     offset: 9
622 |   }
623 | }
624 | layer {
625 |   name: "fuse_pool3"
626 |   type: "Eltwise"
627 |   bottom: "upscore_pool4"
628 |   bottom: "score_pool3c"
629 |   top: "fuse_pool3"
630 |   eltwise_param {
631 |     operation: SUM
632 |   }
633 | }
634 | layer {
635 |   name: "upscore_pool3"
636 |   type: "Deconvolution"
637 |   bottom: "fuse_pool3"
638 |   top: "upscore_pool3"
639 |   param {
640 |     lr_mult: 0
641 |   }
642 |   convolution_param {
643 |     num_output: 11
644 |     bias_term: false
645 |     kernel_size: 4
646 |     stride: 2
647 |   }
648 | }
649 | 
650 | layer {
651 |   name: "scale_pool2"
652 |   type: "Scale"
653 |   bottom: "pool2"
654 |   top: "scale_pool2"
655 |   param {
656 |     lr_mult: 0
657 |   }
658 |   scale_param {
659 |     filler {
660 |       type: "constant"
661 |       value: 0.0001
662 |     }
663 |   }
664 | }
665 | layer {
666 |   name: "score_pool2"
667 |   type: "Convolution"
668 |   bottom: "scale_pool2"
669 |   top: "score_pool2"
670 |   param {
671 |     lr_mult: 1
672 |     decay_mult: 1
673 |   }
674 |   param {
675 |     lr_mult: 2
676 |     decay_mult: 0
677 |   }
678 |   convolution_param {
679 |     num_output: 11
680 |     pad: 0
681 |     kernel_size: 1
682 |   }
683 | }
684 | layer {
685 |   name: "score_pool2c"
686 |   type: "Crop"
687 |   bottom: "score_pool2"
688 |   bottom: "upscore_pool3"
689 |   top: "score_pool2c"
690 |   crop_param {
691 |     axis: 2
692 |     offset: 9
693 |   }
694 | }
695 | layer {
696 |   name: "fuse_pool2"
697 |   type: "Eltwise"
698 |   bottom: "upscore_pool3"
699 |   bottom: "score_pool2c"
700 |   top: "fuse_pool2"
701 |   eltwise_param {
702 |     operation: SUM
703 |   }
704 | }
705 | layer {
706 |   name: "upscore4"
707 |   type: "Deconvolution"
708 |   bottom: "fuse_pool2"
709 |   top: "upscore4"
710 |   param {
711 |     lr_mult: 0
712 |   }
713 |   convolution_param {
714 |     num_output: 11
715 |     bias_term: false
716 |     kernel_size: 8
717 |     stride: 4
718 |   }
719 | }
720 | 
721 | layer {
722 |   name: "score"
723 |   type: "Crop"
724 |   bottom: "upscore4"
725 |   bottom: "data"
726 |   top: "score"
727 |   crop_param {
728 |     axis: 2
729 |     offset: 31
730 |   }
731 | }
732 | 
733 | layer {
734 |   name: "softmax_score"
735 |   type: "Softmax"
736 |   bottom: "score"
737 |   top: "softmax_score"
738 | }


--------------------------------------------------------------------------------
/fcn-improve/solve.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.append('./python')
 4 | sys.path.append('./python/caffe')
 5 | 
 6 | sys.path.append('/home/sensetime/DeepLearning/test/fcn-pool')
 7 | 
 8 | 
 9 | import caffe
10 | import surgery, score
11 | 
12 | import numpy as np
13 | import os
14 | 
15 | try:
16 |     import setproctitle
17 |     setproctitle.setproctitle(os.path.basename(os.getcwd()))
18 | except:
19 |     pass
20 | 
21 | weights = '../fcn-pool/snapshot/fcn8s-heavy-pascal.caffemodel'
22 | 
23 | # init
24 | caffe.set_device(0)
25 | caffe.set_mode_gpu()
26 | 
27 | solver = caffe.SGDSolver('../fcn-pool/fcn-improve/solver.prototxt')
28 | solver.net.copy_from(weights)
29 | 
30 | # surgeries
31 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
32 | surgery.interp(solver.net, interp_layers)
33 | 
34 | # scoring
35 | val = np.loadtxt('../fcn-pool/data/fcn-gf2-512-1024/valdata/val.txt', dtype=str)
36 | 
37 | for _ in range(10):
38 |     solver.step(2000)
39 |     score.seg_tests(solver, False, val, layer='score')
40 | 


--------------------------------------------------------------------------------
/fcn-improve/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "../fcn-pool/fcn-improve/train.prototxt"
 2 | test_net: "../fcn-pool/fcn-improve/val.prototxt"
 3 | test_iter: 847
 4 | # make test net, but don't invoke it from the solver itself
 5 | test_interval: 999999999
 6 | display: 20
 7 | average_loss: 20
 8 | lr_policy: "fixed"
 9 | # lr for unnormalized softmax
10 | base_lr: 1e-13
11 | # high momentum
12 | momentum: 0.99
13 | # no gradient accumulation
14 | iter_size: 1
15 | max_iter: 20000
16 | weight_decay: 0.0005
17 | snapshot: 5000
18 | snapshot_prefix: "../fcn-pool/fcn-improve/snapshot/train"
19 | test_initialization: false
20 | 


--------------------------------------------------------------------------------
/fcn-improve/train.prototxt:
--------------------------------------------------------------------------------
  1 | layer {
  2 |   name: "data"
  3 |   type: "Python"
  4 |   top: "data"
  5 |   top: "label"
  6 |   python_param {
  7 |     module: "voc_layers"
  8 |     layer: "SBDDSegDataLayer"
  9 |     param_str: "{\'sbdd_dir\': \'../fcn-pool/data/fcn-gf2-512-1024/traindata\', \'seed\': 1337, \'split\': \'train\', \'mean\': (109.39446,105.91758,89.61112)}"
 10 |   }
 11 | }
 12 | 
 13 | layer {
 14 |   name: "conv1_1"
 15 |   type: "Convolution"
 16 |   bottom: "data"
 17 |   top: "conv1_1"
 18 |   param {
 19 |     lr_mult: 1
 20 |     decay_mult: 1
 21 |   }
 22 |   param {
 23 |     lr_mult: 2
 24 |     decay_mult: 0
 25 |   }
 26 |   convolution_param {
 27 |     num_output: 64
 28 |     pad: 100
 29 |     kernel_size: 3
 30 |     stride: 1
 31 |   }
 32 | }
 33 | layer {
 34 |   name: "relu1_1"
 35 |   type: "ReLU"
 36 |   bottom: "conv1_1"
 37 |   top: "conv1_1"
 38 | }
 39 | layer {
 40 |   name: "conv1_2"
 41 |   type: "Convolution"
 42 |   bottom: "conv1_1"
 43 |   top: "conv1_2"
 44 |   param {
 45 |     lr_mult: 1
 46 |     decay_mult: 1
 47 |   }
 48 |   param {
 49 |     lr_mult: 2
 50 |     decay_mult: 0
 51 |   }
 52 |   convolution_param {
 53 |     num_output: 64
 54 |     pad: 1
 55 |     kernel_size: 3
 56 |     stride: 1
 57 |   }
 58 | }
 59 | layer {
 60 |   name: "relu1_2"
 61 |   type: "ReLU"
 62 |   bottom: "conv1_2"
 63 |   top: "conv1_2"
 64 | }
 65 | layer {
 66 |   name: "pool1"
 67 |   type: "Pooling"
 68 |   bottom: "conv1_2"
 69 |   top: "pool1"
 70 |   pooling_param {
 71 |     pool: MAX
 72 |     kernel_size: 2
 73 |     stride: 2
 74 |   }
 75 | }
 76 | layer {
 77 |   name: "conv2_1"
 78 |   type: "Convolution"
 79 |   bottom: "pool1"
 80 |   top: "conv2_1"
 81 |   param {
 82 |     lr_mult: 1
 83 |     decay_mult: 1
 84 |   }
 85 |   param {
 86 |     lr_mult: 2
 87 |     decay_mult: 0
 88 |   }
 89 |   convolution_param {
 90 |     num_output: 128
 91 |     pad: 1
 92 |     kernel_size: 3
 93 |     stride: 1
 94 |   }
 95 | }
 96 | layer {
 97 |   name: "relu2_1"
 98 |   type: "ReLU"
 99 |   bottom: "conv2_1"
100 |   top: "conv2_1"
101 | }
102 | layer {
103 |   name: "conv2_2"
104 |   type: "Convolution"
105 |   bottom: "conv2_1"
106 |   top: "conv2_2"
107 |   param {
108 |     lr_mult: 1
109 |     decay_mult: 1
110 |   }
111 |   param {
112 |     lr_mult: 2
113 |     decay_mult: 0
114 |   }
115 |   convolution_param {
116 |     num_output: 128
117 |     pad: 1
118 |     kernel_size: 3
119 |     stride: 1
120 |   }
121 | }
122 | layer {
123 |   name: "relu2_2"
124 |   type: "ReLU"
125 |   bottom: "conv2_2"
126 |   top: "conv2_2"
127 | }
128 | layer {
129 |   name: "pool2"
130 |   type: "Pooling"
131 |   bottom: "conv2_2"
132 |   top: "pool2"
133 |   pooling_param {
134 |     pool: MAX
135 |     kernel_size: 2
136 |     stride: 2
137 |   }
138 | }
139 | layer {
140 |   name: "conv3_1"
141 |   type: "Convolution"
142 |   bottom: "pool2"
143 |   top: "conv3_1"
144 |   param {
145 |     lr_mult: 1
146 |     decay_mult: 1
147 |   }
148 |   param {
149 |     lr_mult: 2
150 |     decay_mult: 0
151 |   }
152 |   convolution_param {
153 |     num_output: 256
154 |     pad: 1
155 |     kernel_size: 3
156 |     stride: 1
157 |   }
158 | }
159 | layer {
160 |   name: "relu3_1"
161 |   type: "ReLU"
162 |   bottom: "conv3_1"
163 |   top: "conv3_1"
164 | }
165 | layer {
166 |   name: "conv3_2"
167 |   type: "Convolution"
168 |   bottom: "conv3_1"
169 |   top: "conv3_2"
170 |   param {
171 |     lr_mult: 1
172 |     decay_mult: 1
173 |   }
174 |   param {
175 |     lr_mult: 2
176 |     decay_mult: 0
177 |   }
178 |   convolution_param {
179 |     num_output: 256
180 |     pad: 1
181 |     kernel_size: 3
182 |     stride: 1
183 |   }
184 | }
185 | layer {
186 |   name: "relu3_2"
187 |   type: "ReLU"
188 |   bottom: "conv3_2"
189 |   top: "conv3_2"
190 | }
191 | layer {
192 |   name: "conv3_3"
193 |   type: "Convolution"
194 |   bottom: "conv3_2"
195 |   top: "conv3_3"
196 |   param {
197 |     lr_mult: 1
198 |     decay_mult: 1
199 |   }
200 |   param {
201 |     lr_mult: 2
202 |     decay_mult: 0
203 |   }
204 |   convolution_param {
205 |     num_output: 256
206 |     pad: 1
207 |     kernel_size: 3
208 |     stride: 1
209 |   }
210 | }
211 | layer {
212 |   name: "relu3_3"
213 |   type: "ReLU"
214 |   bottom: "conv3_3"
215 |   top: "conv3_3"
216 | }
217 | layer {
218 |   name: "pool3"
219 |   type: "Pooling"
220 |   bottom: "conv3_3"
221 |   top: "pool3"
222 |   pooling_param {
223 |     pool: MAX
224 |     kernel_size: 2
225 |     stride: 2
226 |   }
227 | }
228 | layer {
229 |   name: "conv4_1"
230 |   type: "Convolution"
231 |   bottom: "pool3"
232 |   top: "conv4_1"
233 |   param {
234 |     lr_mult: 1
235 |     decay_mult: 1
236 |   }
237 |   param {
238 |     lr_mult: 2
239 |     decay_mult: 0
240 |   }
241 |   convolution_param {
242 |     num_output: 512
243 |     pad: 1
244 |     kernel_size: 3
245 |     stride: 1
246 |   }
247 | }
248 | layer {
249 |   name: "relu4_1"
250 |   type: "ReLU"
251 |   bottom: "conv4_1"
252 |   top: "conv4_1"
253 | }
254 | layer {
255 |   name: "conv4_2"
256 |   type: "Convolution"
257 |   bottom: "conv4_1"
258 |   top: "conv4_2"
259 |   param {
260 |     lr_mult: 1
261 |     decay_mult: 1
262 |   }
263 |   param {
264 |     lr_mult: 2
265 |     decay_mult: 0
266 |   }
267 |   convolution_param {
268 |     num_output: 512
269 |     pad: 1
270 |     kernel_size: 3
271 |     stride: 1
272 |   }
273 | }
274 | layer {
275 |   name: "relu4_2"
276 |   type: "ReLU"
277 |   bottom: "conv4_2"
278 |   top: "conv4_2"
279 | }
280 | layer {
281 |   name: "conv4_3"
282 |   type: "Convolution"
283 |   bottom: "conv4_2"
284 |   top: "conv4_3"
285 |   param {
286 |     lr_mult: 1
287 |     decay_mult: 1
288 |   }
289 |   param {
290 |     lr_mult: 2
291 |     decay_mult: 0
292 |   }
293 |   convolution_param {
294 |     num_output: 512
295 |     pad: 1
296 |     kernel_size: 3
297 |     stride: 1
298 |   }
299 | }
300 | layer {
301 |   name: "relu4_3"
302 |   type: "ReLU"
303 |   bottom: "conv4_3"
304 |   top: "conv4_3"
305 | }
306 | layer {
307 |   name: "pool4"
308 |   type: "Pooling"
309 |   bottom: "conv4_3"
310 |   top: "pool4"
311 |   pooling_param {
312 |     pool: MAX
313 |     kernel_size: 2
314 |     stride: 2
315 |   }
316 | }
317 | layer {
318 |   name: "conv5_1"
319 |   type: "Convolution"
320 |   bottom: "pool4"
321 |   top: "conv5_1"
322 |   param {
323 |     lr_mult: 1
324 |     decay_mult: 1
325 |   }
326 |   param {
327 |     lr_mult: 2
328 |     decay_mult: 0
329 |   }
330 |   convolution_param {
331 |     num_output: 512
332 |     pad: 1
333 |     kernel_size: 3
334 |     stride: 1
335 |   }
336 | }
337 | layer {
338 |   name: "relu5_1"
339 |   type: "ReLU"
340 |   bottom: "conv5_1"
341 |   top: "conv5_1"
342 | }
343 | layer {
344 |   name: "conv5_2"
345 |   type: "Convolution"
346 |   bottom: "conv5_1"
347 |   top: "conv5_2"
348 |   param {
349 |     lr_mult: 1
350 |     decay_mult: 1
351 |   }
352 |   param {
353 |     lr_mult: 2
354 |     decay_mult: 0
355 |   }
356 |   convolution_param {
357 |     num_output: 512
358 |     pad: 1
359 |     kernel_size: 3
360 |     stride: 1
361 |   }
362 | }
363 | layer {
364 |   name: "relu5_2"
365 |   type: "ReLU"
366 |   bottom: "conv5_2"
367 |   top: "conv5_2"
368 | }
369 | layer {
370 |   name: "conv5_3"
371 |   type: "Convolution"
372 |   bottom: "conv5_2"
373 |   top: "conv5_3"
374 |   param {
375 |     lr_mult: 1
376 |     decay_mult: 1
377 |   }
378 |   param {
379 |     lr_mult: 2
380 |     decay_mult: 0
381 |   }
382 |   convolution_param {
383 |     num_output: 512
384 |     pad: 1
385 |     kernel_size: 3
386 |     stride: 1
387 |   }
388 | }
389 | layer {
390 |   name: "relu5_3"
391 |   type: "ReLU"
392 |   bottom: "conv5_3"
393 |   top: "conv5_3"
394 | }
395 | layer {
396 |   name: "pool5"
397 |   type: "Pooling"
398 |   bottom: "conv5_3"
399 |   top: "pool5"
400 |   pooling_param {
401 |     pool: MAX
402 |     kernel_size: 2
403 |     stride: 2
404 |   }
405 | }
406 | layer {
407 |   name: "fc6"
408 |   type: "Convolution"
409 |   bottom: "pool5"
410 |   top: "fc6"
411 |   param {
412 |     lr_mult: 1
413 |     decay_mult: 1
414 |   }
415 |   param {
416 |     lr_mult: 2
417 |     decay_mult: 0
418 |   }
419 |   convolution_param {
420 |     num_output: 4096
421 |     pad: 0
422 |     kernel_size: 7
423 |     stride: 1
424 |   }
425 | }
426 | layer {
427 |   name: "relu6"
428 |   type: "ReLU"
429 |   bottom: "fc6"
430 |   top: "fc6"
431 | }
432 | layer {
433 |   name: "drop6"
434 |   type: "Dropout"
435 |   bottom: "fc6"
436 |   top: "fc6"
437 |   dropout_param {
438 |     dropout_ratio: 0.5
439 |   }
440 | }
441 | layer {
442 |   name: "fc7"
443 |   type: "Convolution"
444 |   bottom: "fc6"
445 |   top: "fc7"
446 |   param {
447 |     lr_mult: 1
448 |     decay_mult: 1
449 |   }
450 |   param {
451 |     lr_mult: 2
452 |     decay_mult: 0
453 |   }
454 |   convolution_param {
455 |     num_output: 4096
456 |     pad: 0
457 |     kernel_size: 1
458 |     stride: 1
459 |   }
460 | }
461 | layer {
462 |   name: "relu7"
463 |   type: "ReLU"
464 |   bottom: "fc7"
465 |   top: "fc7"
466 | }
467 | layer {
468 |   name: "drop7"
469 |   type: "Dropout"
470 |   bottom: "fc7"
471 |   top: "fc7"
472 |   dropout_param {
473 |     dropout_ratio: 0.5
474 |   }
475 | }
476 | layer {
477 |   name: "score_fr"
478 |   type: "Convolution"
479 |   bottom: "fc7"
480 |   top: "score_fr"
481 |   param {
482 |     lr_mult: 1
483 |     decay_mult: 1
484 |   }
485 |   param {
486 |     lr_mult: 2
487 |     decay_mult: 0
488 |   }
489 |   convolution_param {
490 |     num_output: 11
491 |     pad: 0
492 |     kernel_size: 1
493 |   }
494 | }
495 | layer {
496 |   name: "upscore2"
497 |   type: "Deconvolution"
498 |   bottom: "score_fr"
499 |   top: "upscore2"
500 |   param {
501 |     lr_mult: 0
502 |   }
503 |   convolution_param {
504 |     num_output: 11
505 |     bias_term: false
506 |     kernel_size: 4
507 |     stride: 2
508 |   }
509 | }
510 | layer {
511 |   name: "scale_pool4"
512 |   type: "Scale"
513 |   bottom: "pool4"
514 |   top: "scale_pool4"
515 |   param {
516 |     lr_mult: 0
517 |   }
518 |   scale_param {
519 |     filler {
520 |       type: "constant"
521 |       value: 0.01
522 |     }
523 |   }
524 | }
525 | layer {
526 |   name: "score_pool4"
527 |   type: "Convolution"
528 |   bottom: "scale_pool4"
529 |   top: "score_pool4"
530 |   param {
531 |     lr_mult: 1
532 |     decay_mult: 1
533 |   }
534 |   param {
535 |     lr_mult: 2
536 |     decay_mult: 0
537 |   }
538 |   convolution_param {
539 |     num_output: 11
540 |     pad: 0
541 |     kernel_size: 1
542 |   }
543 | }
544 | layer {
545 |   name: "score_pool4c"
546 |   type: "Crop"
547 |   bottom: "score_pool4"
548 |   bottom: "upscore2"
549 |   top: "score_pool4c"
550 |   crop_param {
551 |     axis: 2
552 |     offset: 5
553 |   }
554 | }
555 | layer {
556 |   name: "fuse_pool4"
557 |   type: "Eltwise"
558 |   bottom: "upscore2"
559 |   bottom: "score_pool4c"
560 |   top: "fuse_pool4"
561 |   eltwise_param {
562 |     operation: SUM
563 |   }
564 | }
565 | layer {
566 |   name: "upscore_pool4"
567 |   type: "Deconvolution"
568 |   bottom: "fuse_pool4"
569 |   top: "upscore_pool4"
570 |   param {
571 |     lr_mult: 0
572 |   }
573 |   convolution_param {
574 |     num_output: 11
575 |     bias_term: false
576 |     kernel_size: 4
577 |     stride: 2
578 |   }
579 | }
580 | layer {
581 |   name: "scale_pool3"
582 |   type: "Scale"
583 |   bottom: "pool3"
584 |   top: "scale_pool3"
585 |   param {
586 |     lr_mult: 0
587 |   }
588 |   scale_param {
589 |     filler {
590 |       type: "constant"
591 |       value: 0.0001
592 |     }
593 |   }
594 | }
595 | layer {
596 |   name: "score_pool3"
597 |   type: "Convolution"
598 |   bottom: "scale_pool3"
599 |   top: "score_pool3"
600 |   param {
601 |     lr_mult: 1
602 |     decay_mult: 1
603 |   }
604 |   param {
605 |     lr_mult: 2
606 |     decay_mult: 0
607 |   }
608 |   convolution_param {
609 |     num_output: 11
610 |     pad: 0
611 |     kernel_size: 1
612 |   }
613 | }
614 | layer {
615 |   name: "score_pool3c"
616 |   type: "Crop"
617 |   bottom: "score_pool3"
618 |   bottom: "upscore_pool4"
619 |   top: "score_pool3c"
620 |   crop_param {
621 |     axis: 2
622 |     offset: 9
623 |   }
624 | }
625 | layer {
626 |   name: "fuse_pool3"
627 |   type: "Eltwise"
628 |   bottom: "upscore_pool4"
629 |   bottom: "score_pool3c"
630 |   top: "fuse_pool3"
631 |   eltwise_param {
632 |     operation: SUM
633 |   }
634 | }
635 | layer {
636 |   name: "upscore_pool3"
637 |   type: "Deconvolution"
638 |   bottom: "fuse_pool3"
639 |   top: "upscore_pool3"
640 |   param {
641 |     lr_mult: 0
642 |   }
643 |   convolution_param {
644 |     num_output: 11
645 |     bias_term: false
646 |     kernel_size: 4
647 |     stride: 2
648 |   }
649 | }
650 | 
651 | layer {
652 |   name: "scale_pool2"
653 |   type: "Scale"
654 |   bottom: "pool2"
655 |   top: "scale_pool2"
656 |   param {
657 |     lr_mult: 0
658 |   }
659 |   scale_param {
660 |     filler {
661 |       type: "constant"
662 |       value: 0.0001
663 |     }
664 |   }
665 | }
666 | layer {
667 |   name: "score_pool2"
668 |   type: "Convolution"
669 |   bottom: "scale_pool2"
670 |   top: "score_pool2"
671 |   param {
672 |     lr_mult: 1
673 |     decay_mult: 1
674 |   }
675 |   param {
676 |     lr_mult: 2
677 |     decay_mult: 0
678 |   }
679 |   convolution_param {
680 |     num_output: 11
681 |     pad: 0
682 |     kernel_size: 1
683 |   }
684 | }
685 | layer {
686 |   name: "score_pool2c"
687 |   type: "Crop"
688 |   bottom: "score_pool2"
689 |   bottom: "upscore_pool3"
690 |   top: "score_pool2c"
691 |   crop_param {
692 |     axis: 2
693 |     offset: 15
694 |   }
695 | }
696 | layer {
697 |   name: "fuse_pool2"
698 |   type: "Eltwise"
699 |   bottom: "upscore_pool3"
700 |   bottom: "score_pool2c"
701 |   top: "fuse_pool2"
702 |   eltwise_param {
703 |     operation: SUM
704 |   }
705 | }
706 | layer {
707 |   name: "upscore4"
708 |   type: "Deconvolution"
709 |   bottom: "fuse_pool2"
710 |   top: "upscore4"
711 |   param {
712 |     lr_mult: 0
713 |   }
714 |   convolution_param {
715 |     num_output: 11
716 |     bias_term: false
717 |     kernel_size: 8
718 |     stride: 4
719 |   }
720 | }
721 | 
722 | 
723 | layer {
724 |   name: "score"
725 |   type: "Crop"
726 |   bottom: "upscore4"
727 |   bottom: "data"
728 |   top: "score"
729 |   crop_param {
730 |     axis: 2
731 |     offset: 31
732 |   }
733 | }
734 | 
735 | layer {
736 |   name: "loss"
737 |   type: "SoftmaxWithLoss"
738 |   bottom: "score"
739 |   bottom: "label"
740 |   top: "loss"
741 |   loss_param {
742 |     ignore_label: 255
743 |     normalize: false
744 |   }
745 | }
746 | 


--------------------------------------------------------------------------------
/fcn-improve/val.prototxt:
--------------------------------------------------------------------------------
  1 | layer {
  2 |   name: "data"
  3 |   type: "Python"
  4 |   top: "data"
  5 |   top: "label"
  6 |   python_param {
  7 |     module: "voc_layers"
  8 |     layer: "VOCSegDataLayer"
  9 |     param_str: "{\'voc_dir\': \'../fcn-pool/data/fcn-gf2-512-1024/valdata\', \'seed\': 1337, \'split\': \'val\', \'mean\': (109.39446,105.91758,89.61112)}"
 10 |   }
 11 | }
 12 | 
 13 | layer {
 14 |   name: "conv1_1"
 15 |   type: "Convolution"
 16 |   bottom: "data"
 17 |   top: "conv1_1"
 18 |   param {
 19 |     lr_mult: 1
 20 |     decay_mult: 1
 21 |   }
 22 |   param {
 23 |     lr_mult: 2
 24 |     decay_mult: 0
 25 |   }
 26 |   convolution_param {
 27 |     num_output: 64
 28 |     pad: 100
 29 |     kernel_size: 3
 30 |     stride: 1
 31 |   }
 32 | }
 33 | layer {
 34 |   name: "relu1_1"
 35 |   type: "ReLU"
 36 |   bottom: "conv1_1"
 37 |   top: "conv1_1"
 38 | }
 39 | layer {
 40 |   name: "conv1_2"
 41 |   type: "Convolution"
 42 |   bottom: "conv1_1"
 43 |   top: "conv1_2"
 44 |   param {
 45 |     lr_mult: 1
 46 |     decay_mult: 1
 47 |   }
 48 |   param {
 49 |     lr_mult: 2
 50 |     decay_mult: 0
 51 |   }
 52 |   convolution_param {
 53 |     num_output: 64
 54 |     pad: 1
 55 |     kernel_size: 3
 56 |     stride: 1
 57 |   }
 58 | }
 59 | layer {
 60 |   name: "relu1_2"
 61 |   type: "ReLU"
 62 |   bottom: "conv1_2"
 63 |   top: "conv1_2"
 64 | }
 65 | layer {
 66 |   name: "pool1"
 67 |   type: "Pooling"
 68 |   bottom: "conv1_2"
 69 |   top: "pool1"
 70 |   pooling_param {
 71 |     pool: MAX
 72 |     kernel_size: 2
 73 |     stride: 2
 74 |   }
 75 | }
 76 | layer {
 77 |   name: "conv2_1"
 78 |   type: "Convolution"
 79 |   bottom: "pool1"
 80 |   top: "conv2_1"
 81 |   param {
 82 |     lr_mult: 1
 83 |     decay_mult: 1
 84 |   }
 85 |   param {
 86 |     lr_mult: 2
 87 |     decay_mult: 0
 88 |   }
 89 |   convolution_param {
 90 |     num_output: 128
 91 |     pad: 1
 92 |     kernel_size: 3
 93 |     stride: 1
 94 |   }
 95 | }
 96 | layer {
 97 |   name: "relu2_1"
 98 |   type: "ReLU"
 99 |   bottom: "conv2_1"
100 |   top: "conv2_1"
101 | }
102 | layer {
103 |   name: "conv2_2"
104 |   type: "Convolution"
105 |   bottom: "conv2_1"
106 |   top: "conv2_2"
107 |   param {
108 |     lr_mult: 1
109 |     decay_mult: 1
110 |   }
111 |   param {
112 |     lr_mult: 2
113 |     decay_mult: 0
114 |   }
115 |   convolution_param {
116 |     num_output: 128
117 |     pad: 1
118 |     kernel_size: 3
119 |     stride: 1
120 |   }
121 | }
122 | layer {
123 |   name: "relu2_2"
124 |   type: "ReLU"
125 |   bottom: "conv2_2"
126 |   top: "conv2_2"
127 | }
128 | layer {
129 |   name: "pool2"
130 |   type: "Pooling"
131 |   bottom: "conv2_2"
132 |   top: "pool2"
133 |   pooling_param {
134 |     pool: MAX
135 |     kernel_size: 2
136 |     stride: 2
137 |   }
138 | }
139 | layer {
140 |   name: "conv3_1"
141 |   type: "Convolution"
142 |   bottom: "pool2"
143 |   top: "conv3_1"
144 |   param {
145 |     lr_mult: 1
146 |     decay_mult: 1
147 |   }
148 |   param {
149 |     lr_mult: 2
150 |     decay_mult: 0
151 |   }
152 |   convolution_param {
153 |     num_output: 256
154 |     pad: 1
155 |     kernel_size: 3
156 |     stride: 1
157 |   }
158 | }
159 | layer {
160 |   name: "relu3_1"
161 |   type: "ReLU"
162 |   bottom: "conv3_1"
163 |   top: "conv3_1"
164 | }
165 | layer {
166 |   name: "conv3_2"
167 |   type: "Convolution"
168 |   bottom: "conv3_1"
169 |   top: "conv3_2"
170 |   param {
171 |     lr_mult: 1
172 |     decay_mult: 1
173 |   }
174 |   param {
175 |     lr_mult: 2
176 |     decay_mult: 0
177 |   }
178 |   convolution_param {
179 |     num_output: 256
180 |     pad: 1
181 |     kernel_size: 3
182 |     stride: 1
183 |   }
184 | }
185 | layer {
186 |   name: "relu3_2"
187 |   type: "ReLU"
188 |   bottom: "conv3_2"
189 |   top: "conv3_2"
190 | }
191 | layer {
192 |   name: "conv3_3"
193 |   type: "Convolution"
194 |   bottom: "conv3_2"
195 |   top: "conv3_3"
196 |   param {
197 |     lr_mult: 1
198 |     decay_mult: 1
199 |   }
200 |   param {
201 |     lr_mult: 2
202 |     decay_mult: 0
203 |   }
204 |   convolution_param {
205 |     num_output: 256
206 |     pad: 1
207 |     kernel_size: 3
208 |     stride: 1
209 |   }
210 | }
211 | layer {
212 |   name: "relu3_3"
213 |   type: "ReLU"
214 |   bottom: "conv3_3"
215 |   top: "conv3_3"
216 | }
217 | layer {
218 |   name: "pool3"
219 |   type: "Pooling"
220 |   bottom: "conv3_3"
221 |   top: "pool3"
222 |   pooling_param {
223 |     pool: MAX
224 |     kernel_size: 2
225 |     stride: 2
226 |   }
227 | }
228 | layer {
229 |   name: "conv4_1"
230 |   type: "Convolution"
231 |   bottom: "pool3"
232 |   top: "conv4_1"
233 |   param {
234 |     lr_mult: 1
235 |     decay_mult: 1
236 |   }
237 |   param {
238 |     lr_mult: 2
239 |     decay_mult: 0
240 |   }
241 |   convolution_param {
242 |     num_output: 512
243 |     pad: 1
244 |     kernel_size: 3
245 |     stride: 1
246 |   }
247 | }
248 | layer {
249 |   name: "relu4_1"
250 |   type: "ReLU"
251 |   bottom: "conv4_1"
252 |   top: "conv4_1"
253 | }
254 | layer {
255 |   name: "conv4_2"
256 |   type: "Convolution"
257 |   bottom: "conv4_1"
258 |   top: "conv4_2"
259 |   param {
260 |     lr_mult: 1
261 |     decay_mult: 1
262 |   }
263 |   param {
264 |     lr_mult: 2
265 |     decay_mult: 0
266 |   }
267 |   convolution_param {
268 |     num_output: 512
269 |     pad: 1
270 |     kernel_size: 3
271 |     stride: 1
272 |   }
273 | }
274 | layer {
275 |   name: "relu4_2"
276 |   type: "ReLU"
277 |   bottom: "conv4_2"
278 |   top: "conv4_2"
279 | }
280 | layer {
281 |   name: "conv4_3"
282 |   type: "Convolution"
283 |   bottom: "conv4_2"
284 |   top: "conv4_3"
285 |   param {
286 |     lr_mult: 1
287 |     decay_mult: 1
288 |   }
289 |   param {
290 |     lr_mult: 2
291 |     decay_mult: 0
292 |   }
293 |   convolution_param {
294 |     num_output: 512
295 |     pad: 1
296 |     kernel_size: 3
297 |     stride: 1
298 |   }
299 | }
300 | layer {
301 |   name: "relu4_3"
302 |   type: "ReLU"
303 |   bottom: "conv4_3"
304 |   top: "conv4_3"
305 | }
306 | layer {
307 |   name: "pool4"
308 |   type: "Pooling"
309 |   bottom: "conv4_3"
310 |   top: "pool4"
311 |   pooling_param {
312 |     pool: MAX
313 |     kernel_size: 2
314 |     stride: 2
315 |   }
316 | }
317 | layer {
318 |   name: "conv5_1"
319 |   type: "Convolution"
320 |   bottom: "pool4"
321 |   top: "conv5_1"
322 |   param {
323 |     lr_mult: 1
324 |     decay_mult: 1
325 |   }
326 |   param {
327 |     lr_mult: 2
328 |     decay_mult: 0
329 |   }
330 |   convolution_param {
331 |     num_output: 512
332 |     pad: 1
333 |     kernel_size: 3
334 |     stride: 1
335 |   }
336 | }
337 | layer {
338 |   name: "relu5_1"
339 |   type: "ReLU"
340 |   bottom: "conv5_1"
341 |   top: "conv5_1"
342 | }
343 | layer {
344 |   name: "conv5_2"
345 |   type: "Convolution"
346 |   bottom: "conv5_1"
347 |   top: "conv5_2"
348 |   param {
349 |     lr_mult: 1
350 |     decay_mult: 1
351 |   }
352 |   param {
353 |     lr_mult: 2
354 |     decay_mult: 0
355 |   }
356 |   convolution_param {
357 |     num_output: 512
358 |     pad: 1
359 |     kernel_size: 3
360 |     stride: 1
361 |   }
362 | }
363 | layer {
364 |   name: "relu5_2"
365 |   type: "ReLU"
366 |   bottom: "conv5_2"
367 |   top: "conv5_2"
368 | }
369 | layer {
370 |   name: "conv5_3"
371 |   type: "Convolution"
372 |   bottom: "conv5_2"
373 |   top: "conv5_3"
374 |   param {
375 |     lr_mult: 1
376 |     decay_mult: 1
377 |   }
378 |   param {
379 |     lr_mult: 2
380 |     decay_mult: 0
381 |   }
382 |   convolution_param {
383 |     num_output: 512
384 |     pad: 1
385 |     kernel_size: 3
386 |     stride: 1
387 |   }
388 | }
389 | layer {
390 |   name: "relu5_3"
391 |   type: "ReLU"
392 |   bottom: "conv5_3"
393 |   top: "conv5_3"
394 | }
395 | layer {
396 |   name: "pool5"
397 |   type: "Pooling"
398 |   bottom: "conv5_3"
399 |   top: "pool5"
400 |   pooling_param {
401 |     pool: MAX
402 |     kernel_size: 2
403 |     stride: 2
404 |   }
405 | }
406 | layer {
407 |   name: "fc6"
408 |   type: "Convolution"
409 |   bottom: "pool5"
410 |   top: "fc6"
411 |   param {
412 |     lr_mult: 1
413 |     decay_mult: 1
414 |   }
415 |   param {
416 |     lr_mult: 2
417 |     decay_mult: 0
418 |   }
419 |   convolution_param {
420 |     num_output: 4096
421 |     pad: 0
422 |     kernel_size: 7
423 |     stride: 1
424 |   }
425 | }
426 | layer {
427 |   name: "relu6"
428 |   type: "ReLU"
429 |   bottom: "fc6"
430 |   top: "fc6"
431 | }
432 | layer {
433 |   name: "drop6"
434 |   type: "Dropout"
435 |   bottom: "fc6"
436 |   top: "fc6"
437 |   dropout_param {
438 |     dropout_ratio: 0.5
439 |   }
440 | }
441 | layer {
442 |   name: "fc7"
443 |   type: "Convolution"
444 |   bottom: "fc6"
445 |   top: "fc7"
446 |   param {
447 |     lr_mult: 1
448 |     decay_mult: 1
449 |   }
450 |   param {
451 |     lr_mult: 2
452 |     decay_mult: 0
453 |   }
454 |   convolution_param {
455 |     num_output: 4096
456 |     pad: 0
457 |     kernel_size: 1
458 |     stride: 1
459 |   }
460 | }
461 | layer {
462 |   name: "relu7"
463 |   type: "ReLU"
464 |   bottom: "fc7"
465 |   top: "fc7"
466 | }
467 | layer {
468 |   name: "drop7"
469 |   type: "Dropout"
470 |   bottom: "fc7"
471 |   top: "fc7"
472 |   dropout_param {
473 |     dropout_ratio: 0.5
474 |   }
475 | }
476 | layer {
477 |   name: "score_fr"
478 |   type: "Convolution"
479 |   bottom: "fc7"
480 |   top: "score_fr"
481 |   param {
482 |     lr_mult: 1
483 |     decay_mult: 1
484 |   }
485 |   param {
486 |     lr_mult: 2
487 |     decay_mult: 0
488 |   }
489 |   convolution_param {
490 |     num_output: 11
491 |     pad: 0
492 |     kernel_size: 1
493 |   }
494 | }
495 | layer {
496 |   name: "upscore2"
497 |   type: "Deconvolution"
498 |   bottom: "score_fr"
499 |   top: "upscore2"
500 |   param {
501 |     lr_mult: 0
502 |   }
503 |   convolution_param {
504 |     num_output: 11
505 |     bias_term: false
506 |     kernel_size: 4
507 |     stride: 2
508 |   }
509 | }
510 | layer {
511 |   name: "scale_pool4"
512 |   type: "Scale"
513 |   bottom: "pool4"
514 |   top: "scale_pool4"
515 |   param {
516 |     lr_mult: 0
517 |   }
518 |   scale_param {
519 |     filler {
520 |       type: "constant"
521 |       value: 0.01
522 |     }
523 |   }
524 | }
525 | layer {
526 |   name: "score_pool4"
527 |   type: "Convolution"
528 |   bottom: "scale_pool4"
529 |   top: "score_pool4"
530 |   param {
531 |     lr_mult: 1
532 |     decay_mult: 1
533 |   }
534 |   param {
535 |     lr_mult: 2
536 |     decay_mult: 0
537 |   }
538 |   convolution_param {
539 |     num_output: 11
540 |     pad: 0
541 |     kernel_size: 1
542 |   }
543 | }
544 | layer {
545 |   name: "score_pool4c"
546 |   type: "Crop"
547 |   bottom: "score_pool4"
548 |   bottom: "upscore2"
549 |   top: "score_pool4c"
550 |   crop_param {
551 |     axis: 2
552 |     offset: 5
553 |   }
554 | }
555 | layer {
556 |   name: "fuse_pool4"
557 |   type: "Eltwise"
558 |   bottom: "upscore2"
559 |   bottom: "score_pool4c"
560 |   top: "fuse_pool4"
561 |   eltwise_param {
562 |     operation: SUM
563 |   }
564 | }
565 | layer {
566 |   name: "upscore_pool4"
567 |   type: "Deconvolution"
568 |   bottom: "fuse_pool4"
569 |   top: "upscore_pool4"
570 |   param {
571 |     lr_mult: 0
572 |   }
573 |   convolution_param {
574 |     num_output: 11
575 |     bias_term: false
576 |     kernel_size: 4
577 |     stride: 2
578 |   }
579 | }
580 | layer {
581 |   name: "scale_pool3"
582 |   type: "Scale"
583 |   bottom: "pool3"
584 |   top: "scale_pool3"
585 |   param {
586 |     lr_mult: 0
587 |   }
588 |   scale_param {
589 |     filler {
590 |       type: "constant"
591 |       value: 0.0001
592 |     }
593 |   }
594 | }
595 | layer {
596 |   name: "score_pool3"
597 |   type: "Convolution"
598 |   bottom: "scale_pool3"
599 |   top: "score_pool3"
600 |   param {
601 |     lr_mult: 1
602 |     decay_mult: 1
603 |   }
604 |   param {
605 |     lr_mult: 2
606 |     decay_mult: 0
607 |   }
608 |   convolution_param {
609 |     num_output: 11
610 |     pad: 0
611 |     kernel_size: 1
612 |   }
613 | }
614 | layer {
615 |   name: "score_pool3c"
616 |   type: "Crop"
617 |   bottom: "score_pool3"
618 |   bottom: "upscore_pool4"
619 |   top: "score_pool3c"
620 |   crop_param {
621 |     axis: 2
622 |     offset: 9
623 |   }
624 | }
625 | layer {
626 |   name: "fuse_pool3"
627 |   type: "Eltwise"
628 |   bottom: "upscore_pool4"
629 |   bottom: "score_pool3c"
630 |   top: "fuse_pool3"
631 |   eltwise_param {
632 |     operation: SUM
633 |   }
634 | }
635 | layer {
636 |   name: "upscore_pool3"
637 |   type: "Deconvolution"
638 |   bottom: "fuse_pool3"
639 |   top: "upscore_pool3"
640 |   param {
641 |     lr_mult: 0
642 |   }
643 |   convolution_param {
644 |     num_output: 11
645 |     bias_term: false
646 |     kernel_size: 4
647 |     stride: 2
648 |   }
649 | }
650 | 
651 | layer {
652 |   name: "scale_pool2"
653 |   type: "Scale"
654 |   bottom: "pool2"
655 |   top: "scale_pool2"
656 |   param {
657 |     lr_mult: 0
658 |   }
659 |   scale_param {
660 |     filler {
661 |       type: "constant"
662 |       value: 0.0001
663 |     }
664 |   }
665 | }
666 | layer {
667 |   name: "score_pool2"
668 |   type: "Convolution"
669 |   bottom: "scale_pool2"
670 |   top: "score_pool2"
671 |   param {
672 |     lr_mult: 1
673 |     decay_mult: 1
674 |   }
675 |   param {
676 |     lr_mult: 2
677 |     decay_mult: 0
678 |   }
679 |   convolution_param {
680 |     num_output: 11
681 |     pad: 0
682 |     kernel_size: 1
683 |   }
684 | }
685 | layer {
686 |   name: "score_pool2c"
687 |   type: "Crop"
688 |   bottom: "score_pool2"
689 |   bottom: "upscore_pool3"
690 |   top: "score_pool2c"
691 |   crop_param {
692 |     axis: 2
693 |     offset: 15
694 |   }
695 | }
696 | layer {
697 |   name: "fuse_pool2"
698 |   type: "Eltwise"
699 |   bottom: "upscore_pool3"
700 |   bottom: "score_pool2c"
701 |   top: "fuse_pool2"
702 |   eltwise_param {
703 |     operation: SUM
704 |   }
705 | }
706 | layer {
707 |   name: "upscore4"
708 |   type: "Deconvolution"
709 |   bottom: "fuse_pool2"
710 |   top: "upscore4"
711 |   param {
712 |     lr_mult: 0
713 |   }
714 |   convolution_param {
715 |     num_output: 11
716 |     bias_term: false
717 |     kernel_size: 8
718 |     stride: 4
719 |   }
720 | }
721 | 
722 | 
723 | layer {
724 |   name: "score"
725 |   type: "Crop"
726 |   bottom: "upscore4"
727 |   bottom: "data"
728 |   top: "score"
729 |   crop_param {
730 |     axis: 2
731 |     offset: 31
732 |   }
733 | }
734 | 
735 | layer {
736 |   name: "loss"
737 |   type: "SoftmaxWithLoss"
738 |   bottom: "score"
739 |   bottom: "label"
740 |   top: "loss"
741 |   loss_param {
742 |     ignore_label: 255
743 |     normalize: false
744 |   }
745 | }
746 | 


--------------------------------------------------------------------------------
/infer-4s.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from PIL import Image
 3 | import sys
 4 | from skimage.io import imread ,imshow ,imsave
 5 | from skimage import img_as_ubyte
 6 | from skimage import util
 7 | 
 8 | from copy import deepcopy
 9 | 
10 | 
11 | sys.path.append('/home/sensetime/DeepLearning/test/caffe/python')
12 | sys.path.append('/home/sensetime/DeepLearning/test/caffe/python/caffe')
13 | 
14 | import caffe
15 | import matplotlib.pyplot as plt
16 | 
17 | # load image, switch to BGR, subtract mean, and make dims C x H x W for Caffe
18 | im = Image.open('../imgtest/460.jpg')
19 | #im = imread('/home/sensetime/DeepLearning/fxwaterextract/data/water/1024/121.tif')
20 | in_ = np.array(im, dtype=np.float32)
21 | in_ = in_[:,:,::-1]
22 | in_ -= np.array((109.39446,105.91758,89.61112))
23 | in_ = in_.transpose((2,0,1))
24 | 
25 | # load net
26 | net = caffe.Net('../fcn-pool/fcn-improve/deploy.prototxt', '../fcn-pool/fcn-improve/snapshot/train_iter_15000.caffemodel', caffe.TEST)
27 | # shape for input (data blob is N x C x H x W), set data
28 | net.blobs['data'].reshape(1, *in_.shape)
29 | net.blobs['data'].data[...] = in_
30 | # run net and take argmax for prediction
31 | net.forward()
32 | out = net.blobs['score'].data[0].argmax(axis=0)
33 | print type(out),out.dtype,out.shape,out.size
34 | 
35 | imsave('../fcn-pool/data/fcn-gf2-512-1024/fcn-improve/fcn4s-460-1.5w.png',out)
36 | 
37 | out1 = net.blobs['softmax_score'].data[0]
38 | print type(out1),out1.dtype,out1.shape,out1.size
39 | #dimage=img_as_ubyte(out)
40 | 
41 | #img=Image.open(out);
42 | #imshow(out)
43 | #show()
44 | for x in range(1,11,1):
45 |     im_1 = img_as_ubyte(out1[x])
46 |     imsave("../fcn-pool/data/fcn-gf2-512-1024/fcn4s-improve/"+str(x)+"-460-1.5w-fcn.png",im_1)
47 | 
48 | print "success"
49 | # img.save('test3.png','png')
50 | # plt.imshow(out);
51 | # plt.axis('off')
52 | # plt.savefig('test2.png')
53 | # plt.show()
54 | 


--------------------------------------------------------------------------------
/mean.py:
--------------------------------------------------------------------------------
 1 | from skimage import io
 2 | import os
 3 | import numpy as np
 4 | 
 5 | 
 6 | path1='/home/sensetime/DeepLearning/test/fcn.berkeleyvision.org/data/fcn-gf2-512-1024/traindata/img'
 7 | str=os.listdir(path1)
 8 | l=len(str)
 9 | 
10 | d1=0;
11 | d2=0;
12 | d3=0;
13 | i=0
14 | for x in str:
15 |     i=i+1
16 |     im=io.imread(path1+"/"+x)
17 |     if i%1000==0:
18 |         print i
19 |     d1=d1+np.mean(im[:,:,0])
20 |     d2=d2+np.mean(im[:,:,1])
21 |     d3=d3+np.mean(im[:,:,2])
22 | 
23 | print  [d1/l,d2/l,d3/l]
24 | print l
25 | 
26 | # fcn_zy3 train RGB [89.611119269769276, 105.91758338559417, 109.3944624054897]
27 | 
28 | # g=fcn zy3 512-1024  rgb [89.279196311370441, 105.57667462140543, 109.08637249794963]


--------------------------------------------------------------------------------
/readme.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fangxu622/FCN-RS-Imagery-Class/98e797ad9797d049365c315d7ad3f449a6fe05f5/readme.txt


--------------------------------------------------------------------------------
/result.py:
--------------------------------------------------------------------------------
1 | from skimage import io
2 | 
3 | path0='/home/sensetime/DeepLearning/test/imgtest/val/277.jpg'
4 | im0=io.imread(path0)
5 | 
6 | io.imshow(im0)


--------------------------------------------------------------------------------
/score.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import caffe
 3 | import numpy as np
 4 | import os
 5 | import sys
 6 | from datetime import datetime
 7 | from PIL import Image
 8 | 
 9 | def fast_hist(a, b, n):
10 |     k = (a >= 0) & (a < n)
11 |     return np.bincount(n * a[k].astype(int) + b[k], minlength=n**2).reshape(n, n)
12 | 
13 | def compute_hist(net, save_dir, dataset, layer='score', gt='label'):
14 |     n_cl = net.blobs[layer].channels
15 |     if save_dir:
16 |         os.mkdir(save_dir)
17 |     hist = np.zeros((n_cl, n_cl))
18 |     loss = 0
19 |     for idx in dataset:
20 |         net.forward()
21 |         hist += fast_hist(net.blobs[gt].data[0, 0].flatten(),
22 |                                 net.blobs[layer].data[0].argmax(0).flatten(),
23 |                                 n_cl)
24 | 
25 |         if save_dir:
26 |             im = Image.fromarray(net.blobs[layer].data[0].argmax(0).astype(np.uint8), mode='P')
27 |             im.save(os.path.join(save_dir, idx + '.png'))
28 |         # compute the loss as well
29 |         loss += net.blobs['loss'].data.flat[0]
30 |     return hist, loss / len(dataset)
31 | 
32 | def seg_tests(solver, save_format, dataset, layer='score', gt='label'):
33 |     print '>>>', datetime.now(), 'Begin seg tests'
34 |     solver.test_nets[0].share_with(solver.net)
35 |     do_seg_tests(solver.test_nets[0], solver.iter, save_format, dataset, layer, gt)
36 | 
37 | def do_seg_tests(net, iter, save_format, dataset, layer='score', gt='label'):
38 |     n_cl = net.blobs[layer].channels
39 |     if save_format:
40 |         save_format = save_format.format(iter)
41 |     hist, loss = compute_hist(net, save_format, dataset, layer, gt)
42 |     # mean loss
43 |     print '>>>', datetime.now(), 'Iteration', iter, 'loss', loss
44 |     # overall accuracy
45 |     acc = np.diag(hist).sum() / hist.sum()
46 |     print '>>>', datetime.now(), 'Iteration', iter, 'overall accuracy', acc
47 |     # per-class accuracy
48 |     acc = np.diag(hist) / hist.sum(1)
49 |     print '>>>', datetime.now(), 'Iteration', iter, 'mean accuracy', np.nanmean(acc)
50 |     # per-class IU
51 |     iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
52 |     print '>>>', datetime.now(), 'Iteration', iter, 'mean IU', np.nanmean(iu)
53 |     freq = hist.sum(1) / hist.sum()
54 |     print '>>>', datetime.now(), 'Iteration', iter, 'fwavacc', \
55 |             (freq[freq > 0] * iu[freq > 0]).sum()
56 |     return hist
57 | 


--------------------------------------------------------------------------------
/voc_layers.py:
--------------------------------------------------------------------------------
  1 | import caffe
  2 | 
  3 | import numpy as np
  4 | from PIL import Image
  5 | 
  6 | import random
  7 | 
  8 | class VOCSegDataLayer(caffe.Layer):
  9 |     """
 10 |     Load (input image, label image) pairs from PASCAL VOC
 11 |     one-at-a-time while reshaping the net to preserve dimensions.
 12 | 
 13 |     Use this to feed data to a fully convolutional network.
 14 |     """
 15 | 
 16 |     def setup(self, bottom, top):
 17 |         """
 18 |         Setup data layer according to parameters:
 19 | 
 20 |         - voc_dir: path to PASCAL VOC year dir
 21 |         - split: train / val / test
 22 |         - mean: tuple of mean values to subtract
 23 |         - randomize: load in random order (default: True)
 24 |         - seed: seed for randomization (default: None / current time)
 25 | 
 26 |         for PASCAL VOC semantic segmentation.
 27 | 
 28 |         example
 29 | 
 30 |         params = dict(voc_dir="/path/to/PASCAL/VOC2011",
 31 |             mean=(104.00698793, 116.66876762, 122.67891434),
 32 |             split="val")
 33 |         """
 34 |         # config
 35 |         params = eval(self.param_str)
 36 |         self.voc_dir = params['voc_dir']
 37 |         self.split = params['split']
 38 |         self.mean = np.array(params['mean'])
 39 |         self.random = params.get('randomize', True)
 40 |         self.seed = params.get('seed', None)
 41 | 
 42 |         # two tops: data and label
 43 |         if len(top) != 2:
 44 |             raise Exception("Need to define two tops: data and label.")
 45 |         # data layers have no bottoms
 46 |         if len(bottom) != 0:
 47 |             raise Exception("Do not define a bottom.")
 48 | 
 49 |         # load indices for images and labels
 50 |         split_f  = '{}/{}.txt'.format(self.voc_dir,
 51 |                 self.split)
 52 |         self.indices = open(split_f, 'r').read().splitlines()
 53 |         self.idx = 0
 54 | 
 55 |         # make eval deterministic
 56 |         if 'train' not in self.split:
 57 |             self.random = False
 58 | 
 59 |         # randomization: seed and pick
 60 |         if self.random:
 61 |             random.seed(self.seed)
 62 |             self.idx = random.randint(0, len(self.indices)-1)
 63 | 
 64 | 
 65 |     def reshape(self, bottom, top):
 66 |         # load image + label image pair
 67 |         self.data = self.load_image(self.indices[self.idx])
 68 |         self.label = self.load_label(self.indices[self.idx])
 69 |         # reshape tops to fit (leading 1 is for batch dimension)
 70 |         top[0].reshape(1, *self.data.shape)
 71 |         top[1].reshape(1, *self.label.shape)
 72 | 
 73 | 
 74 |     def forward(self, bottom, top):
 75 |         # assign output
 76 |         top[0].data[...] = self.data
 77 |         top[1].data[...] = self.label
 78 | 
 79 |         # pick next input
 80 |         if self.random:
 81 |             self.idx = random.randint(0, len(self.indices)-1)
 82 |         else:
 83 |             self.idx += 1
 84 |             if self.idx == len(self.indices):
 85 |                 self.idx = 0
 86 | 
 87 | 
 88 |     def backward(self, top, propagate_down, bottom):
 89 |         pass
 90 | 
 91 | 
 92 |     def load_image(self, idx):
 93 |         """
 94 |         Load input image and preprocess for Caffe:
 95 |         - cast to float
 96 |         - switch channels RGB -> BGR
 97 |         - subtract mean
 98 |         - transpose to channel x height x width order
 99 |         """
100 |         im = Image.open('{}/img/{}.jpg'.format(self.voc_dir, idx))
101 |         in_ = np.array(im, dtype=np.float32)
102 |         in_ = in_[:,:,::-1]
103 |         in_ -= self.mean
104 |         in_ = in_.transpose((2,0,1))
105 |         return in_
106 | 
107 | 
108 |     def load_label(self, idx):
109 |         """
110 |         Load label image as 1 x height x width integer array of label indices.
111 |         The leading singleton dimension is required by the loss.
112 |         """
113 |         im = Image.open('{}/label/{}.PNG'.format(self.voc_dir, idx))
114 |         label = np.array(im, dtype=np.uint8)
115 |         label = label[np.newaxis, ...]
116 |         return label
117 | 
118 | 
119 | class SBDDSegDataLayer(caffe.Layer):
120 |     """
121 |     Load (input image, label image) pairs from the SBDD extended labeling
122 |     of PASCAL VOC for semantic segmentation
123 |     one-at-a-time while reshaping the net to preserve dimensions.
124 | 
125 |     Use this to feed data to a fully convolutional network.
126 |     """
127 | 
128 |     def setup(self, bottom, top):
129 |         """
130 |         Setup data layer according to parameters:
131 | 
132 |         - sbdd_dir: path to SBDD `dataset` dir
133 |         - split: train / seg11valid
134 |         - mean: tuple of mean values to subtract
135 |         - randomize: load in random order (default: True)
136 |         - seed: seed for randomization (default: None / current time)
137 | 
138 |         for SBDD semantic segmentation.
139 | 
140 |         N.B.segv11alid is the set of segval11 that does not intersect with SBDD.
141 |         Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.
142 | 
143 |         example
144 | 
145 |         params = dict(sbdd_dir="/path/to/SBDD/dataset",
146 |             mean=(104.00698793, 116.66876762, 122.67891434),
147 |             split="valid")
148 |         """
149 |         # config
150 |         params = eval(self.param_str)
151 |         self.sbdd_dir = params['sbdd_dir']
152 |         self.split = params['split']
153 |         self.mean = np.array(params['mean'])
154 |         self.random = params.get('randomize', True)
155 |         self.seed = params.get('seed', None)
156 | 
157 |         # two tops: data and label
158 |         if len(top) != 2:
159 |             raise Exception("Need to define two tops: data and label.")
160 |         # data layers have no bottoms
161 |         if len(bottom) != 0:
162 |             raise Exception("Do not define a bottom.")
163 | 
164 |         # load indices for images and labels
165 |         split_f  = '{}/{}.txt'.format(self.sbdd_dir,
166 |                 self.split)
167 |         self.indices = open(split_f, 'r').read().splitlines()
168 |         self.idx = 0
169 | 
170 |         # make eval deterministic
171 |         if 'train' not in self.split:
172 |             self.random = False
173 | 
174 |         # randomization: seed and pick
175 |         if self.random:
176 |             random.seed(self.seed)
177 |             self.idx = random.randint(0, len(self.indices)-1)
178 | 
179 | 
180 |     def reshape(self, bottom, top):
181 |         # load image + label image pair
182 |         self.data = self.load_image(self.indices[self.idx])
183 |         self.label = self.load_label(self.indices[self.idx])
184 |         # reshape tops to fit (leading 1 is for batch dimension)
185 |         top[0].reshape(1, *self.data.shape)
186 |         top[1].reshape(1, *self.label.shape)
187 | 
188 | 
189 |     def forward(self, bottom, top):
190 |         # assign output
191 |         top[0].data[...] = self.data
192 |         top[1].data[...] = self.label
193 | 
194 |         # pick next input
195 |         if self.random:
196 |             self.idx = random.randint(0, len(self.indices)-1)
197 |         else:
198 |             self.idx += 1
199 |             if self.idx == len(self.indices):
200 |                 self.idx = 0
201 | 
202 | 
203 |     def backward(self, top, propagate_down, bottom):
204 |         pass
205 | 
206 | 
207 |     def load_image(self, idx):
208 |         """
209 |         Load input image and preprocess for Caffe:
210 |         - cast to float
211 |         - switch channels RGB -> BGR
212 |         - subtract mean
213 |         - transpose to channel x height x width order
214 |         """
215 |         im = Image.open('{}/img/{}.jpg'.format(self.sbdd_dir, idx))
216 |         in_ = np.array(im, dtype=np.float32)
217 |         in_ = in_[:,:,::-1]
218 |         in_ -= self.mean
219 |         in_ = in_.transpose((2,0,1))
220 | 	#print idx
221 |         return in_
222 | 
223 |     def load_label(self, idx):
224 |         """
225 |         Load label image as 1 x height x width integer array of label indices.
226 |         The leading singleton dimension is required by the loss.
227 |         """
228 |         im = Image.open('{}/label/{}.PNG'.format(self.sbdd_dir, idx))
229 |         label = np.array(im, dtype=np.uint8)
230 |         label = label[np.newaxis, ...]
231 |         return label
232 | 
233 |     def load_label_two(self, idx):
234 |         """
235 |         Load label image as 1 x height x width integer array of label indices.
236 |         The leading singleton dimension is required by the loss.
237 |         """
238 |         import scipy.io
239 |         mat = scipy.io.loadmat('{}/cls/{}.mat'.format(self.sbdd_dir, idx))
240 |         label = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8)
241 |         label = label[np.newaxis, ...]
242 |         return label
243 | 


--------------------------------------------------------------------------------