├── .gitignore
├── Makefile
├── README.md
├── docker
    └── Dockerfile.template
├── fcn-8s
    ├── deploy.prototxt
    ├── legend.txt
    ├── readme.md
    ├── solve.py
    ├── solver.prototxt
    └── train_val.prototxt
├── images
    └── cat.jpg
├── notebook.sh
├── package.json
└── src
    ├── classify.py
    └── fcn-fwd.ipynb


/.gitignore:
--------------------------------------------------------------------------------
1 | data/
2 | *.gz
3 | *.caffemodel
4 | 
5 | node_modules
6 | .ipynb_checkpoints


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | fcn-8s/fcn-8s-pascalcontext.caffemodel:
 2 | 	${CAFFE_ROOT}/scripts/download_model_binary.py fcn-8s
 3 | 
 4 | .INTERMEDIATE: data/pascal-voc2010-trainval.tar
 5 | data/pascal-voc2010-trainval.tar:
 6 | 	curl http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar > $@
 7 | 
 8 | data/pascal-voc2010: data/pascal-voc2010-trainval.tar
 9 | 	tar -C data/ -xvf $^
10 | 	mv data/VOCdevkit/VOC2010 $@
11 | 	rmdir data/VOCdevkit
12 | 
13 | .INTERMEDIATE: docker/gpu/Dockerfile docker/cpu/Dockerfile
14 | docker/gpu/Dockerfile:
15 | 	mkdir -p $(dir $@)
16 | 	echo "FROM developmentseed/caffe-gpu:master" > docker/gpu/Dockerfile
17 | 	cat docker/Dockerfile.template >> docker/gpu/Dockerfile
18 | 
19 | docker/cpu/Dockerfile:
20 | 	mkdir -p $(dir $@)
21 | 	echo "FROM developmentseed/caffe-cpu:master" > docker/cpu/Dockerfile
22 | 	cat docker/Dockerfile.template >> docker/cpu/Dockerfile
23 | 
24 | .PHONY: build-docker
25 | build-docker: docker/gpu/Dockerfile docker/cpu/Dockerfile
26 | 	docker build -t caffe-fcn:cpu docker/cpu
27 | 	docker build -t caffe-fcn:gpu docker/gpu
28 | 
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # FCN
 2 | 
 3 | This is a simple, working example of "image segmentation" using a neural net
 4 | trained by Jonathan Long and Evan Shelhamer, as described in
 5 | [Fully Convolutional Networks for Semantic Segmentation](http://www.cs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf).
 6 | 
 7 | Trained model weights are from the [Caffe Model Zoo](https://github.com/BVLC/caffe/wiki/Model-Zoo).
 8 | 
 9 | ## Setup
10 | 
11 | Clone this repo, then:
12 | 
13 | ```
14 | # download weights from model zoo
15 | CAFFE_ROOT=/path/to/caffe/repo make fcn-8s/fcn-8s-pascalcontext.caffemodel
16 | # build docker container
17 | make build-docker
18 | ```
19 | 
20 | ## Usage (iPython/Jupyter Notebook)
21 | 
22 | If you're running docker host in a VM (i.e., on a Mac), make sure to forward
23 | port 8888.
24 | 
25 | Then do:
26 | ```
27 | docker run -it --rm -v $(pwd):/workspace -p 8888:8888 caffe-fcn ./notebook.sh
28 | ```
29 | 
30 | Then go to http://localhost:8888
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.template:
--------------------------------------------------------------------------------
 1 | RUN adduser --disabled-password caffe
 2 | 
 3 | RUN pip install jupyter && \
 4 |   mkdir -p -m 700 ~caffe/.jupyter/ && \
 5 |   echo "c.NotebookApp.ip = '*'" >> ~caffe/.jupyter/jupyter_notebook_config.py && \
 6 |   chown -R caffe:caffe ~caffe/.jupyter
 7 | 
 8 | EXPOSE 8888
 9 | 
10 | RUN chown -R caffe:caffe /opt/caffe
11 | 
12 | USER caffe
13 | 


--------------------------------------------------------------------------------
/fcn-8s/deploy.prototxt:
--------------------------------------------------------------------------------
  1 | name: "FCN"
  2 | 
  3 | input: "data"
  4 | input_dim: 1
  5 | input_dim: 3
  6 | input_dim: 500
  7 | input_dim: 500
  8 | 
  9 | layer {
 10 |   name: "conv1_1"
 11 |   type: "Convolution"
 12 |   bottom: "data"
 13 |   top: "conv1_1"
 14 |   param {
 15 |     lr_mult: 1
 16 |     decay_mult: 1
 17 |   }
 18 |   param {
 19 |     lr_mult: 2
 20 |     decay_mult: 0
 21 |   }
 22 |   convolution_param {
 23 |     num_output: 64
 24 |     pad: 100
 25 |     kernel_size: 3
 26 |     engine: CAFFE
 27 |   }
 28 | }
 29 | layer {
 30 |   name: "relu1_1"
 31 |   type: "ReLU"
 32 |   bottom: "conv1_1"
 33 |   top: "conv1_1"
 34 | }
 35 | layer {
 36 |   name: "conv1_2"
 37 |   type: "Convolution"
 38 |   bottom: "conv1_1"
 39 |   top: "conv1_2"
 40 |   param {
 41 |     lr_mult: 1
 42 |     decay_mult: 1
 43 |   }
 44 |   param {
 45 |     lr_mult: 2
 46 |     decay_mult: 0
 47 |   }
 48 |   convolution_param {
 49 |     num_output: 64
 50 |     pad: 1
 51 |     kernel_size: 3
 52 |     engine: CAFFE
 53 |   }
 54 | }
 55 | layer {
 56 |   name: "relu1_2"
 57 |   type: "ReLU"
 58 |   bottom: "conv1_2"
 59 |   top: "conv1_2"
 60 | }
 61 | layer {
 62 |   name: "pool1"
 63 |   type: "Pooling"
 64 |   bottom: "conv1_2"
 65 |   top: "pool1"
 66 |   pooling_param {
 67 |     pool: MAX
 68 |     kernel_size: 2
 69 |     stride: 2
 70 |   }
 71 | }
 72 | layer {
 73 |   name: "conv2_1"
 74 |   type: "Convolution"
 75 |   bottom: "pool1"
 76 |   top: "conv2_1"
 77 |   param {
 78 |     lr_mult: 1
 79 |     decay_mult: 1
 80 |   }
 81 |   param {
 82 |     lr_mult: 2
 83 |     decay_mult: 0
 84 |   }
 85 |   convolution_param {
 86 |     num_output: 128
 87 |     pad: 1
 88 |     kernel_size: 3
 89 |     engine: CAFFE
 90 |   }
 91 | }
 92 | layer {
 93 |   name: "relu2_1"
 94 |   type: "ReLU"
 95 |   bottom: "conv2_1"
 96 |   top: "conv2_1"
 97 | }
 98 | layer {
 99 |   name: "conv2_2"
100 |   type: "Convolution"
101 |   bottom: "conv2_1"
102 |   top: "conv2_2"
103 |   param {
104 |     lr_mult: 1
105 |     decay_mult: 1
106 |   }
107 |   param {
108 |     lr_mult: 2
109 |     decay_mult: 0
110 |   }
111 |   convolution_param {
112 |     num_output: 128
113 |     pad: 1
114 |     kernel_size: 3
115 |     engine: CAFFE
116 |   }
117 | }
118 | layer {
119 |   name: "relu2_2"
120 |   type: "ReLU"
121 |   bottom: "conv2_2"
122 |   top: "conv2_2"
123 | }
124 | layer {
125 |   name: "pool2"
126 |   type: "Pooling"
127 |   bottom: "conv2_2"
128 |   top: "pool2"
129 |   pooling_param {
130 |     pool: MAX
131 |     kernel_size: 2
132 |     stride: 2
133 |   }
134 | }
135 | layer {
136 |   name: "conv3_1"
137 |   type: "Convolution"
138 |   bottom: "pool2"
139 |   top: "conv3_1"
140 |   param {
141 |     lr_mult: 1
142 |     decay_mult: 1
143 |   }
144 |   param {
145 |     lr_mult: 2
146 |     decay_mult: 0
147 |   }
148 |   convolution_param {
149 |     num_output: 256
150 |     pad: 1
151 |     kernel_size: 3
152 |     engine: CAFFE
153 |   }
154 | }
155 | layer {
156 |   name: "relu3_1"
157 |   type: "ReLU"
158 |   bottom: "conv3_1"
159 |   top: "conv3_1"
160 | }
161 | layer {
162 |   name: "conv3_2"
163 |   type: "Convolution"
164 |   bottom: "conv3_1"
165 |   top: "conv3_2"
166 |   param {
167 |     lr_mult: 1
168 |     decay_mult: 1
169 |   }
170 |   param {
171 |     lr_mult: 2
172 |     decay_mult: 0
173 |   }
174 |   convolution_param {
175 |     num_output: 256
176 |     pad: 1
177 |     kernel_size: 3
178 |     engine: CAFFE
179 |   }
180 | }
181 | layer {
182 |   name: "relu3_2"
183 |   type: "ReLU"
184 |   bottom: "conv3_2"
185 |   top: "conv3_2"
186 | }
187 | layer {
188 |   name: "conv3_3"
189 |   type: "Convolution"
190 |   bottom: "conv3_2"
191 |   top: "conv3_3"
192 |   param {
193 |     lr_mult: 1
194 |     decay_mult: 1
195 |   }
196 |   param {
197 |     lr_mult: 2
198 |     decay_mult: 0
199 |   }
200 |   convolution_param {
201 |     num_output: 256
202 |     pad: 1
203 |     kernel_size: 3
204 |     engine: CAFFE
205 |   }
206 | }
207 | layer {
208 |   name: "relu3_3"
209 |   type: "ReLU"
210 |   bottom: "conv3_3"
211 |   top: "conv3_3"
212 | }
213 | layer {
214 |   name: "pool3"
215 |   type: "Pooling"
216 |   bottom: "conv3_3"
217 |   top: "pool3"
218 |   pooling_param {
219 |     pool: MAX
220 |     kernel_size: 2
221 |     stride: 2
222 |   }
223 | }
224 | layer {
225 |   name: "conv4_1"
226 |   type: "Convolution"
227 |   bottom: "pool3"
228 |   top: "conv4_1"
229 |   param {
230 |     lr_mult: 1
231 |     decay_mult: 1
232 |   }
233 |   param {
234 |     lr_mult: 2
235 |     decay_mult: 0
236 |   }
237 |   convolution_param {
238 |     num_output: 512
239 |     pad: 1
240 |     kernel_size: 3
241 |     engine: CAFFE
242 |   }
243 | }
244 | layer {
245 |   name: "relu4_1"
246 |   type: "ReLU"
247 |   bottom: "conv4_1"
248 |   top: "conv4_1"
249 | }
250 | layer {
251 |   name: "conv4_2"
252 |   type: "Convolution"
253 |   bottom: "conv4_1"
254 |   top: "conv4_2"
255 |   param {
256 |     lr_mult: 1
257 |     decay_mult: 1
258 |   }
259 |   param {
260 |     lr_mult: 2
261 |     decay_mult: 0
262 |   }
263 |   convolution_param {
264 |     num_output: 512
265 |     pad: 1
266 |     kernel_size: 3
267 |     engine: CAFFE
268 |   }
269 | }
270 | layer {
271 |   name: "relu4_2"
272 |   type: "ReLU"
273 |   bottom: "conv4_2"
274 |   top: "conv4_2"
275 | }
276 | layer {
277 |   name: "conv4_3"
278 |   type: "Convolution"
279 |   bottom: "conv4_2"
280 |   top: "conv4_3"
281 |   param {
282 |     lr_mult: 1
283 |     decay_mult: 1
284 |   }
285 |   param {
286 |     lr_mult: 2
287 |     decay_mult: 0
288 |   }
289 |   convolution_param {
290 |     num_output: 512
291 |     pad: 1
292 |     kernel_size: 3
293 |     engine: CAFFE
294 |   }
295 | }
296 | layer {
297 |   name: "relu4_3"
298 |   type: "ReLU"
299 |   bottom: "conv4_3"
300 |   top: "conv4_3"
301 | }
302 | layer {
303 |   name: "pool4"
304 |   type: "Pooling"
305 |   bottom: "conv4_3"
306 |   top: "pool4"
307 |   pooling_param {
308 |     pool: MAX
309 |     kernel_size: 2
310 |     stride: 2
311 |   }
312 | }
313 | layer {
314 |   name: "conv5_1"
315 |   type: "Convolution"
316 |   bottom: "pool4"
317 |   top: "conv5_1"
318 |   param {
319 |     lr_mult: 1
320 |     decay_mult: 1
321 |   }
322 |   param {
323 |     lr_mult: 2
324 |     decay_mult: 0
325 |   }
326 |   convolution_param {
327 |     num_output: 512
328 |     pad: 1
329 |     kernel_size: 3
330 |     engine: CAFFE
331 |   }
332 | }
333 | layer {
334 |   name: "relu5_1"
335 |   type: "ReLU"
336 |   bottom: "conv5_1"
337 |   top: "conv5_1"
338 | }
339 | layer {
340 |   name: "conv5_2"
341 |   type: "Convolution"
342 |   bottom: "conv5_1"
343 |   top: "conv5_2"
344 |   param {
345 |     lr_mult: 1
346 |     decay_mult: 1
347 |   }
348 |   param {
349 |     lr_mult: 2
350 |     decay_mult: 0
351 |   }
352 |   convolution_param {
353 |     num_output: 512
354 |     pad: 1
355 |     kernel_size: 3
356 |     engine: CAFFE
357 |   }
358 | }
359 | layer {
360 |   name: "relu5_2"
361 |   type: "ReLU"
362 |   bottom: "conv5_2"
363 |   top: "conv5_2"
364 | }
365 | layer {
366 |   name: "conv5_3"
367 |   type: "Convolution"
368 |   bottom: "conv5_2"
369 |   top: "conv5_3"
370 |   param {
371 |     lr_mult: 1
372 |     decay_mult: 1
373 |   }
374 |   param {
375 |     lr_mult: 2
376 |     decay_mult: 0
377 |   }
378 |   convolution_param {
379 |     num_output: 512
380 |     pad: 1
381 |     kernel_size: 3
382 |     engine: CAFFE
383 |   }
384 | }
385 | layer {
386 |   name: "relu5_3"
387 |   type: "ReLU"
388 |   bottom: "conv5_3"
389 |   top: "conv5_3"
390 | }
391 | layer {
392 |   name: "pool5"
393 |   type: "Pooling"
394 |   bottom: "conv5_3"
395 |   top: "pool5"
396 |   pooling_param {
397 |     pool: MAX
398 |     kernel_size: 2
399 |     stride: 2
400 |   }
401 | }
402 | layer {
403 |   name: "fc6"
404 |   type: "Convolution"
405 |   bottom: "pool5"
406 |   top: "fc6"
407 |   param {
408 |     lr_mult: 1
409 |     decay_mult: 1
410 |   }
411 |   param {
412 |     lr_mult: 2
413 |     decay_mult: 0
414 |   }
415 |   convolution_param {
416 |     num_output: 4096
417 |     kernel_size: 7
418 |     engine: CAFFE
419 |   }
420 | }
421 | layer {
422 |   name: "relu6"
423 |   type: "ReLU"
424 |   bottom: "fc6"
425 |   top: "fc6"
426 | }
427 | layer {
428 |   name: "drop6"
429 |   type: "Dropout"
430 |   bottom: "fc6"
431 |   top: "fc6"
432 |   dropout_param {
433 |     dropout_ratio: 0.5
434 |   }
435 | }
436 | layer {
437 |   name: "fc7"
438 |   type: "Convolution"
439 |   bottom: "fc6"
440 |   top: "fc7"
441 |   param {
442 |     lr_mult: 1
443 |     decay_mult: 1
444 |   }
445 |   param {
446 |     lr_mult: 2
447 |     decay_mult: 0
448 |   }
449 |   convolution_param {
450 |     num_output: 4096
451 |     kernel_size: 1
452 |     engine: CAFFE
453 |   }
454 | }
455 | layer {
456 |   name: "relu7"
457 |   type: "ReLU"
458 |   bottom: "fc7"
459 |   top: "fc7"
460 | }
461 | layer {
462 |   name: "drop7"
463 |   type: "Dropout"
464 |   bottom: "fc7"
465 |   top: "fc7"
466 |   dropout_param {
467 |     dropout_ratio: 0.5
468 |   }
469 | }
470 | layer {
471 |   name: "score59"
472 |   type: "Convolution"
473 |   bottom: "fc7"
474 |   top: "score59"
475 |   param {
476 |     lr_mult: 1
477 |     decay_mult: 1
478 |   }
479 |   param {
480 |     lr_mult: 2
481 |     decay_mult: 0
482 |   }
483 |   convolution_param {
484 |     num_output: 60
485 |     kernel_size: 1
486 |     engine: CAFFE
487 |   }
488 | }
489 | layer {
490 |   name: "upscore2"
491 |   type: "Deconvolution"
492 |   bottom: "score59"
493 |   top: "upscore2"
494 |   param {
495 |     lr_mult: 1
496 |     decay_mult: 1
497 |   }
498 |   convolution_param {
499 |     num_output: 60
500 |     bias_term: false
501 |     kernel_size: 4
502 |     stride: 2
503 |   }
504 | }
505 | layer {
506 |   name: "score-pool4"
507 |   type: "Convolution"
508 |   bottom: "pool4"
509 |   top: "score-pool4"
510 |   param {
511 |     lr_mult: 1
512 |     decay_mult: 1
513 |   }
514 |   param {
515 |     lr_mult: 2
516 |     decay_mult: 0
517 |   }
518 |   convolution_param {
519 |     num_output: 60
520 |     kernel_size: 1
521 |     engine: CAFFE
522 |   }
523 | }
524 | layer { type: 'Crop' name: 'crop' bottom: 'score-pool4' bottom: 'upscore2'
525 |   top: 'score-pool4c' }
526 | layer {
527 |   name: "fuse"
528 |   type: "Eltwise"
529 |   bottom: "upscore2"
530 |   bottom: "score-pool4c"
531 |   top: "score-fused"
532 |   eltwise_param {
533 |     operation: SUM
534 |   }
535 | }
536 | layer {
537 |   name: "upsample-fused-16"
538 |   type: "Deconvolution"
539 |   bottom: "score-fused"
540 |   top: "score4"
541 |   param {
542 |     lr_mult: 1
543 |     decay_mult: 1
544 |   }
545 |   convolution_param {
546 |     num_output: 60
547 |     bias_term: false
548 |     kernel_size: 4
549 |     stride: 2
550 |   }
551 | }
552 | layer {
553 |   name: "score-pool3"
554 |   type: "Convolution"
555 |   bottom: "pool3"
556 |   top: "score-pool3"
557 |   param {
558 |     lr_mult: 1
559 |     decay_mult: 1
560 |   }
561 |   param {
562 |     lr_mult: 2
563 |     decay_mult: 0
564 |   }
565 |   convolution_param {
566 |     num_output: 60
567 |     kernel_size: 1
568 |     engine: CAFFE
569 |   }
570 | }
571 | layer { type: 'Crop' name: 'crop' bottom: 'score-pool3' bottom: 'score4'
572 |   top: 'score-pool3c' }
573 | layer {
574 |   name: "fuse"
575 |   type: "Eltwise"
576 |   bottom: "score4"
577 |   bottom: "score-pool3c"
578 |   top: "score-final"
579 |   eltwise_param {
580 |     operation: SUM
581 |   }
582 | }
583 | layer {
584 |   name: "upsample"
585 |   type: "Deconvolution"
586 |   bottom: "score-final"
587 |   top: "bigscore"
588 |   param {
589 |     lr_mult: 0
590 |   }
591 |   convolution_param {
592 |     num_output: 60
593 |     bias_term: false
594 |     kernel_size: 16
595 |     stride: 8
596 |   }
597 | }
598 | layer { type: 'Crop' name: 'crop' bottom: 'bigscore' bottom: 'data' top: 'score' }


--------------------------------------------------------------------------------
/fcn-8s/legend.txt:
--------------------------------------------------------------------------------
 1 | 1: aeroplane
 2 | 2: bicycle
 3 | 3: bird
 4 | 4: boat
 5 | 5: bottle
 6 | 6: bus
 7 | 7: car
 8 | 8: cat
 9 | 9: chair
10 | 10: cow
11 | 11: table
12 | 12: dog
13 | 13: horse
14 | 14: motorbike
15 | 15: person
16 | 16: pottedplant
17 | 17: sheep
18 | 18: sofa
19 | 19: train
20 | 20: tvmonitor
21 | 21: bag
22 | 22: bed
23 | 23: bench
24 | 24: book
25 | 25: building
26 | 26: cabinet
27 | 27: ceiling
28 | 28: cloth
29 | 29: computer
30 | 30: cup
31 | 31: door
32 | 32: fence
33 | 33: floor
34 | 34: flower
35 | 35: food
36 | 36: grass
37 | 37: ground
38 | 38: keyboard
39 | 39: light
40 | 40: mountain
41 | 41: mouse
42 | 42: curtain
43 | 43: platform
44 | 44: sign
45 | 45: plate
46 | 46: road
47 | 47: rock
48 | 48: shelves
49 | 49: sidewalk
50 | 50: sky
51 | 51: snow
52 | 52: bedclothes
53 | 53: track
54 | 54: tree
55 | 55: truck
56 | 56: wall
57 | 57: water
58 | 58: window
59 | 59: wood


--------------------------------------------------------------------------------
/fcn-8s/readme.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: FCN-8s Fully Convolutional Semantic Segmentation on PASCAL-Context
 3 | caffemodel: fcn-8s-pascalcontext.caffemodel
 4 | caffemodel_url: http://dl.caffe.berkeleyvision.org/fcn-8s-pascalcontext.caffemodel
 5 | sha1: 591e7d8bbc1c55ff151b6984bde85ff5160aee31
 6 | gist_id: 91eece041c19ff8968ee
 7 | ---
 8 | 
 9 | This is a model from the [paper](http://cs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf):
10 | 
11 |     Fully Convolutional Networks for Semantic Segmentation
12 |     Jonathan Long, Evan Shelhamer, Trevor Darrell
13 |     arXiv:1411.4038
14 | 
15 | This is the three stream, 8 pixel prediction stride version.
16 | 
17 | This model was trained for the PASCAL-context 59-class (60 including background) task. The final layer outputs scores for each class, which may be normalized via softmax or argmaxed to obtain per-pixel labels. The first label (index zero) is background, with the rest following the order given by the dataset authors.
18 | 
19 | The input is expected in BGR channel order, with the following per-channel mean subtracted:
20 | 
21 |     B 104.00698793 G 116.66876762 R 122.67891434
22 | 
23 | This is a pre-release: it requires unmerged PRs to run. It should be usable with the branch available at https://github.com/longjon/caffe/tree/future. Training ought to be possible with that code, but the original training scripts have not yet been ported.
24 | 
25 | This model obtains 37.8 mean I/U on PASCAL-Context val.


--------------------------------------------------------------------------------
/fcn-8s/solve.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import caffe
 3 | import numpy as np
 4 | 
 5 | # make a bilinear interpolation kernel
 6 | # credit @longjon
 7 | def upsample_filt(size):
 8 |     factor = (size + 1) // 2
 9 |     if size % 2 == 1:
10 |         center = factor - 1
11 |     else:
12 |         center = factor - 0.5
13 |     og = np.ogrid[:size, :size]
14 |     return (1 - abs(og[0] - center) / factor) * \
15 |            (1 - abs(og[1] - center) / factor)
16 | 
17 | # set parameters s.t. deconvolutional layers compute bilinear interpolation
18 | # N.B. this is for deconvolution without groups
19 | def interp_surgery(net, layers):
20 |     for l in layers:
21 |         m, k, h, w = net.params[l][0].data.shape
22 |         if m != k:
23 |             print 'input + output channels need to be the same'
24 |             raise
25 |         if h != w:
26 |             print 'filters need to be square'
27 |             raise
28 |         filt = upsample_filt(h)
29 |         net.params[l][0].data[range(m), range(k), :, :] = filt
30 | 
31 | # base net -- the learned coarser model
32 | base_weights = 'fcn-16s-pascalcontext.caffemodel'
33 | 
34 | # init
35 | caffe.set_mode_gpu()
36 | caffe.set_device(0)
37 | 
38 | solver = caffe.SGDSolver('solver.prototxt')
39 | 
40 | # do net surgery to set the deconvolution weights for bilinear interpolation
41 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
42 | interp_surgery(solver.net, interp_layers)
43 | 
44 | # copy base weights for fine-tuning
45 | solver.net.copy_from(base_weights)
46 | 
47 | # solve straight through -- a better approach is to define a solving loop to
48 | # 1. take SGD steps
49 | # 2. score the model by the test net `solver.test_nets[0]`
50 | # 3. repeat until satisfied
51 | solver.step(80000)


--------------------------------------------------------------------------------
/fcn-8s/solver.prototxt:
--------------------------------------------------------------------------------
 1 | net: "train_val.prototxt"
 2 | test_iter: 5105
 3 | # make test net, but don't invoke it from the solver itself
 4 | test_interval: 1000000
 5 | display: 20
 6 | average_loss: 20
 7 | lr_policy: "fixed"
 8 | # lr for unnormalized softmax -- see train_val definition
 9 | base_lr: 1e-14
10 | # high momentum
11 | momentum: 0.99
12 | # no gradient accumulation
13 | iter_size: 1
14 | max_iter: 80000
15 | weight_decay: 0.0005
16 | snapshot: 10000
17 | snapshot_prefix: "train"
18 | test_initialization: false


--------------------------------------------------------------------------------
/fcn-8s/train_val.prototxt:
--------------------------------------------------------------------------------
  1 | name: "FCN"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   include {
  7 |     phase: TRAIN
  8 |   }
  9 |   transform_param {
 10 |     mean_value: 104.00699
 11 |     mean_value: 116.66877
 12 |     mean_value: 122.67892
 13 |   }
 14 |   data_param {
 15 |     source: "../../data/pascal-context/pascal-context-train-lmdb"
 16 |     batch_size: 1
 17 |     backend: LMDB
 18 |   }
 19 | }
 20 | layer {
 21 |   name: "label"
 22 |   type: "Data"
 23 |   top: "label"
 24 |   include {
 25 |     phase: TRAIN
 26 |   }
 27 |   data_param {
 28 |     source: "../../data/pascal-context/pascal-context-train-gt59-lmdb"
 29 |     batch_size: 1
 30 |     backend: LMDB
 31 |   }
 32 | }
 33 | layer {
 34 |   name: "data"
 35 |   type: "Data"
 36 |   top: "data"
 37 |   include {
 38 |     phase: TEST
 39 |   }
 40 |   transform_param {
 41 |     mean_value: 104.00699
 42 |     mean_value: 116.66877
 43 |     mean_value: 122.67892
 44 |   }
 45 |   data_param {
 46 |     source: "../../data/pascal-context/pascal-context-val-lmdb"
 47 |     batch_size: 1
 48 |     backend: LMDB
 49 |   }
 50 | }
 51 | layer {
 52 |   name: "label"
 53 |   type: "Data"
 54 |   top: "label"
 55 |   include {
 56 |     phase: TEST
 57 |   }
 58 |   data_param {
 59 |     source: "../../data/pascal-context/pascal-context-val-gt59-lmdb"
 60 |     batch_size: 1
 61 |     backend: LMDB
 62 |   }
 63 | }
 64 | layer {
 65 |   name: "conv1_1"
 66 |   type: "Convolution"
 67 |   bottom: "data"
 68 |   top: "conv1_1"
 69 |   param {
 70 |     lr_mult: 1
 71 |     decay_mult: 1
 72 |   }
 73 |   param {
 74 |     lr_mult: 2
 75 |     decay_mult: 0
 76 |   }
 77 |   convolution_param {
 78 |     num_output: 64
 79 |     pad: 100
 80 |     kernel_size: 3
 81 |     engine: CAFFE
 82 |   }
 83 | }
 84 | layer {
 85 |   name: "relu1_1"
 86 |   type: "ReLU"
 87 |   bottom: "conv1_1"
 88 |   top: "conv1_1"
 89 | }
 90 | layer {
 91 |   name: "conv1_2"
 92 |   type: "Convolution"
 93 |   bottom: "conv1_1"
 94 |   top: "conv1_2"
 95 |   param {
 96 |     lr_mult: 1
 97 |     decay_mult: 1
 98 |   }
 99 |   param {
100 |     lr_mult: 2
101 |     decay_mult: 0
102 |   }
103 |   convolution_param {
104 |     num_output: 64
105 |     pad: 1
106 |     kernel_size: 3
107 |     engine: CAFFE
108 |   }
109 | }
110 | layer {
111 |   name: "relu1_2"
112 |   type: "ReLU"
113 |   bottom: "conv1_2"
114 |   top: "conv1_2"
115 | }
116 | layer {
117 |   name: "pool1"
118 |   type: "Pooling"
119 |   bottom: "conv1_2"
120 |   top: "pool1"
121 |   pooling_param {
122 |     pool: MAX
123 |     kernel_size: 2
124 |     stride: 2
125 |   }
126 | }
127 | layer {
128 |   name: "conv2_1"
129 |   type: "Convolution"
130 |   bottom: "pool1"
131 |   top: "conv2_1"
132 |   param {
133 |     lr_mult: 1
134 |     decay_mult: 1
135 |   }
136 |   param {
137 |     lr_mult: 2
138 |     decay_mult: 0
139 |   }
140 |   convolution_param {
141 |     num_output: 128
142 |     pad: 1
143 |     kernel_size: 3
144 |     engine: CAFFE
145 |   }
146 | }
147 | layer {
148 |   name: "relu2_1"
149 |   type: "ReLU"
150 |   bottom: "conv2_1"
151 |   top: "conv2_1"
152 | }
153 | layer {
154 |   name: "conv2_2"
155 |   type: "Convolution"
156 |   bottom: "conv2_1"
157 |   top: "conv2_2"
158 |   param {
159 |     lr_mult: 1
160 |     decay_mult: 1
161 |   }
162 |   param {
163 |     lr_mult: 2
164 |     decay_mult: 0
165 |   }
166 |   convolution_param {
167 |     num_output: 128
168 |     pad: 1
169 |     kernel_size: 3
170 |     engine: CAFFE
171 |   }
172 | }
173 | layer {
174 |   name: "relu2_2"
175 |   type: "ReLU"
176 |   bottom: "conv2_2"
177 |   top: "conv2_2"
178 | }
179 | layer {
180 |   name: "pool2"
181 |   type: "Pooling"
182 |   bottom: "conv2_2"
183 |   top: "pool2"
184 |   pooling_param {
185 |     pool: MAX
186 |     kernel_size: 2
187 |     stride: 2
188 |   }
189 | }
190 | layer {
191 |   name: "conv3_1"
192 |   type: "Convolution"
193 |   bottom: "pool2"
194 |   top: "conv3_1"
195 |   param {
196 |     lr_mult: 1
197 |     decay_mult: 1
198 |   }
199 |   param {
200 |     lr_mult: 2
201 |     decay_mult: 0
202 |   }
203 |   convolution_param {
204 |     num_output: 256
205 |     pad: 1
206 |     kernel_size: 3
207 |     engine: CAFFE
208 |   }
209 | }
210 | layer {
211 |   name: "relu3_1"
212 |   type: "ReLU"
213 |   bottom: "conv3_1"
214 |   top: "conv3_1"
215 | }
216 | layer {
217 |   name: "conv3_2"
218 |   type: "Convolution"
219 |   bottom: "conv3_1"
220 |   top: "conv3_2"
221 |   param {
222 |     lr_mult: 1
223 |     decay_mult: 1
224 |   }
225 |   param {
226 |     lr_mult: 2
227 |     decay_mult: 0
228 |   }
229 |   convolution_param {
230 |     num_output: 256
231 |     pad: 1
232 |     kernel_size: 3
233 |     engine: CAFFE
234 |   }
235 | }
236 | layer {
237 |   name: "relu3_2"
238 |   type: "ReLU"
239 |   bottom: "conv3_2"
240 |   top: "conv3_2"
241 | }
242 | layer {
243 |   name: "conv3_3"
244 |   type: "Convolution"
245 |   bottom: "conv3_2"
246 |   top: "conv3_3"
247 |   param {
248 |     lr_mult: 1
249 |     decay_mult: 1
250 |   }
251 |   param {
252 |     lr_mult: 2
253 |     decay_mult: 0
254 |   }
255 |   convolution_param {
256 |     num_output: 256
257 |     pad: 1
258 |     kernel_size: 3
259 |     engine: CAFFE
260 |   }
261 | }
262 | layer {
263 |   name: "relu3_3"
264 |   type: "ReLU"
265 |   bottom: "conv3_3"
266 |   top: "conv3_3"
267 | }
268 | layer {
269 |   name: "pool3"
270 |   type: "Pooling"
271 |   bottom: "conv3_3"
272 |   top: "pool3"
273 |   pooling_param {
274 |     pool: MAX
275 |     kernel_size: 2
276 |     stride: 2
277 |   }
278 | }
279 | layer {
280 |   name: "conv4_1"
281 |   type: "Convolution"
282 |   bottom: "pool3"
283 |   top: "conv4_1"
284 |   param {
285 |     lr_mult: 1
286 |     decay_mult: 1
287 |   }
288 |   param {
289 |     lr_mult: 2
290 |     decay_mult: 0
291 |   }
292 |   convolution_param {
293 |     num_output: 512
294 |     pad: 1
295 |     kernel_size: 3
296 |     engine: CAFFE
297 |   }
298 | }
299 | layer {
300 |   name: "relu4_1"
301 |   type: "ReLU"
302 |   bottom: "conv4_1"
303 |   top: "conv4_1"
304 | }
305 | layer {
306 |   name: "conv4_2"
307 |   type: "Convolution"
308 |   bottom: "conv4_1"
309 |   top: "conv4_2"
310 |   param {
311 |     lr_mult: 1
312 |     decay_mult: 1
313 |   }
314 |   param {
315 |     lr_mult: 2
316 |     decay_mult: 0
317 |   }
318 |   convolution_param {
319 |     num_output: 512
320 |     pad: 1
321 |     kernel_size: 3
322 |     engine: CAFFE
323 |   }
324 | }
325 | layer {
326 |   name: "relu4_2"
327 |   type: "ReLU"
328 |   bottom: "conv4_2"
329 |   top: "conv4_2"
330 | }
331 | layer {
332 |   name: "conv4_3"
333 |   type: "Convolution"
334 |   bottom: "conv4_2"
335 |   top: "conv4_3"
336 |   param {
337 |     lr_mult: 1
338 |     decay_mult: 1
339 |   }
340 |   param {
341 |     lr_mult: 2
342 |     decay_mult: 0
343 |   }
344 |   convolution_param {
345 |     num_output: 512
346 |     pad: 1
347 |     kernel_size: 3
348 |     engine: CAFFE
349 |   }
350 | }
351 | layer {
352 |   name: "relu4_3"
353 |   type: "ReLU"
354 |   bottom: "conv4_3"
355 |   top: "conv4_3"
356 | }
357 | layer {
358 |   name: "pool4"
359 |   type: "Pooling"
360 |   bottom: "conv4_3"
361 |   top: "pool4"
362 |   pooling_param {
363 |     pool: MAX
364 |     kernel_size: 2
365 |     stride: 2
366 |   }
367 | }
368 | layer {
369 |   name: "conv5_1"
370 |   type: "Convolution"
371 |   bottom: "pool4"
372 |   top: "conv5_1"
373 |   param {
374 |     lr_mult: 1
375 |     decay_mult: 1
376 |   }
377 |   param {
378 |     lr_mult: 2
379 |     decay_mult: 0
380 |   }
381 |   convolution_param {
382 |     num_output: 512
383 |     pad: 1
384 |     kernel_size: 3
385 |     engine: CAFFE
386 |   }
387 | }
388 | layer {
389 |   name: "relu5_1"
390 |   type: "ReLU"
391 |   bottom: "conv5_1"
392 |   top: "conv5_1"
393 | }
394 | layer {
395 |   name: "conv5_2"
396 |   type: "Convolution"
397 |   bottom: "conv5_1"
398 |   top: "conv5_2"
399 |   param {
400 |     lr_mult: 1
401 |     decay_mult: 1
402 |   }
403 |   param {
404 |     lr_mult: 2
405 |     decay_mult: 0
406 |   }
407 |   convolution_param {
408 |     num_output: 512
409 |     pad: 1
410 |     kernel_size: 3
411 |     engine: CAFFE
412 |   }
413 | }
414 | layer {
415 |   name: "relu5_2"
416 |   type: "ReLU"
417 |   bottom: "conv5_2"
418 |   top: "conv5_2"
419 | }
420 | layer {
421 |   name: "conv5_3"
422 |   type: "Convolution"
423 |   bottom: "conv5_2"
424 |   top: "conv5_3"
425 |   param {
426 |     lr_mult: 1
427 |     decay_mult: 1
428 |   }
429 |   param {
430 |     lr_mult: 2
431 |     decay_mult: 0
432 |   }
433 |   convolution_param {
434 |     num_output: 512
435 |     pad: 1
436 |     kernel_size: 3
437 |     engine: CAFFE
438 |   }
439 | }
440 | layer {
441 |   name: "relu5_3"
442 |   type: "ReLU"
443 |   bottom: "conv5_3"
444 |   top: "conv5_3"
445 | }
446 | layer {
447 |   name: "pool5"
448 |   type: "Pooling"
449 |   bottom: "conv5_3"
450 |   top: "pool5"
451 |   pooling_param {
452 |     pool: MAX
453 |     kernel_size: 2
454 |     stride: 2
455 |   }
456 | }
457 | layer {
458 |   name: "fc6"
459 |   type: "Convolution"
460 |   bottom: "pool5"
461 |   top: "fc6"
462 |   param {
463 |     lr_mult: 1
464 |     decay_mult: 1
465 |   }
466 |   param {
467 |     lr_mult: 2
468 |     decay_mult: 0
469 |   }
470 |   convolution_param {
471 |     num_output: 4096
472 |     kernel_size: 7
473 |     engine: CAFFE
474 |   }
475 | }
476 | layer {
477 |   name: "relu6"
478 |   type: "ReLU"
479 |   bottom: "fc6"
480 |   top: "fc6"
481 | }
482 | layer {
483 |   name: "drop6"
484 |   type: "Dropout"
485 |   bottom: "fc6"
486 |   top: "fc6"
487 |   dropout_param {
488 |     dropout_ratio: 0.5
489 |   }
490 | }
491 | layer {
492 |   name: "fc7"
493 |   type: "Convolution"
494 |   bottom: "fc6"
495 |   top: "fc7"
496 |   param {
497 |     lr_mult: 1
498 |     decay_mult: 1
499 |   }
500 |   param {
501 |     lr_mult: 2
502 |     decay_mult: 0
503 |   }
504 |   convolution_param {
505 |     num_output: 4096
506 |     kernel_size: 1
507 |     engine: CAFFE
508 |   }
509 | }
510 | layer {
511 |   name: "relu7"
512 |   type: "ReLU"
513 |   bottom: "fc7"
514 |   top: "fc7"
515 | }
516 | layer {
517 |   name: "drop7"
518 |   type: "Dropout"
519 |   bottom: "fc7"
520 |   top: "fc7"
521 |   dropout_param {
522 |     dropout_ratio: 0.5
523 |   }
524 | }
525 | layer {
526 |   name: "score59"
527 |   type: "Convolution"
528 |   bottom: "fc7"
529 |   top: "score59"
530 |   param {
531 |     lr_mult: 1
532 |     decay_mult: 1
533 |   }
534 |   param {
535 |     lr_mult: 2
536 |     decay_mult: 0
537 |   }
538 |   convolution_param {
539 |     num_output: 60
540 |     kernel_size: 1
541 |     engine: CAFFE
542 |   }
543 | }
544 | layer {
545 |   name: "upscore2"
546 |   type: "Deconvolution"
547 |   bottom: "score59"
548 |   top: "upscore2"
549 |   param {
550 |     lr_mult: 1
551 |     decay_mult: 1
552 |   }
553 |   convolution_param {
554 |     num_output: 60
555 |     bias_term: false
556 |     kernel_size: 4
557 |     stride: 2
558 |   }
559 | }
560 | layer {
561 |   name: "score-pool4"
562 |   type: "Convolution"
563 |   bottom: "pool4"
564 |   top: "score-pool4"
565 |   param {
566 |     lr_mult: 1
567 |     decay_mult: 1
568 |   }
569 |   param {
570 |     lr_mult: 2
571 |     decay_mult: 0
572 |   }
573 |   convolution_param {
574 |     num_output: 60
575 |     kernel_size: 1
576 |     engine: CAFFE
577 |   }
578 | }
579 | layer { type: 'Crop' name: 'crop' bottom: 'score-pool4' bottom: 'upscore2'
580 |   top: 'score-pool4c' }
581 | layer {
582 |   name: "fuse"
583 |   type: "Eltwise"
584 |   bottom: "upscore2"
585 |   bottom: "score-pool4c"
586 |   top: "score-fused"
587 |   eltwise_param {
588 |     operation: SUM
589 |   }
590 | }
591 | layer {
592 |   name: "upsample-fused-16"
593 |   type: "Deconvolution"
594 |   bottom: "score-fused"
595 |   top: "score4"
596 |   param {
597 |     lr_mult: 1
598 |     decay_mult: 1
599 |   }
600 |   convolution_param {
601 |     num_output: 60
602 |     bias_term: false
603 |     kernel_size: 4
604 |     stride: 2
605 |   }
606 | }
607 | layer {
608 |   name: "score-pool3"
609 |   type: "Convolution"
610 |   bottom: "pool3"
611 |   top: "score-pool3"
612 |   param {
613 |     lr_mult: 1
614 |     decay_mult: 1
615 |   }
616 |   param {
617 |     lr_mult: 2
618 |     decay_mult: 0
619 |   }
620 |   convolution_param {
621 |     num_output: 60
622 |     kernel_size: 1
623 |     engine: CAFFE
624 |   }
625 | }
626 | layer { type: 'Crop' name: 'crop' bottom: 'score-pool3' bottom: 'score4'
627 |   top: 'score-pool3c' }
628 | layer {
629 |   name: "fuse"
630 |   type: "Eltwise"
631 |   bottom: "score4"
632 |   bottom: "score-pool3c"
633 |   top: "score-final"
634 |   eltwise_param {
635 |     operation: SUM
636 |   }
637 | }
638 | layer {
639 |   name: "upsample"
640 |   type: "Deconvolution"
641 |   bottom: "score-final"
642 |   top: "bigscore"
643 |   param {
644 |     lr_mult: 0
645 |   }
646 |   convolution_param {
647 |     num_output: 60
648 |     bias_term: false
649 |     kernel_size: 16
650 |     stride: 8
651 |   }
652 | }
653 | layer { type: 'Crop' name: 'crop' bottom: 'bigscore' bottom: 'data' top: 'score' }
654 | layer {
655 |   name: "loss"
656 |   type: "SoftmaxWithLoss"
657 |   bottom: "score"
658 |   bottom: "label"
659 |   top: "loss"
660 |   loss_param {
661 |     normalize: false
662 |   }
663 | }


--------------------------------------------------------------------------------
/images/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/developmentseed/caffe-fcn/f990a58930fc274271fa53693c5a350e2f33cfca/images/cat.jpg


--------------------------------------------------------------------------------
/notebook.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | jupyter notebook --no-browser
4 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "caffe-fcn",
 3 |   "version": "1.0.0",
 4 |   "description": "",
 5 |   "main": "index.js",
 6 |   "scripts": {
 7 |     "notebook": "docker run -it --rm -v $(pwd):/workspace -p 8888:8888 caffe-fcn ./notebook.sh",
 8 |     "docker": "docker run -it --rm -v $(pwd):/workspace caffe-fcn"
 9 |   },
10 |   "keywords": [],
11 |   "author": "Anand Thakker <vestibule@anandthakker.net> (http://anandthakker.net/)",
12 |   "license": "ISC"
13 | }
14 | 


--------------------------------------------------------------------------------
/src/classify.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | import os
 6 | import sys
 7 | caffe_root = os.environ['CAFFE_ROOT']
 8 | sys.path.insert(0, os.path.join(caffe_root, 'python'))
 9 | import caffe
10 | 
11 | plt.rcParams['image.interpolation'] = 'nearest'  # don't interpolate
12 | 
13 | if (os.environ.get('CAFFE_CPU_MODE')):
14 |     caffe.set_mode_cpu()
15 | else:
16 |     caffe.set_mode_gpu()
17 | 
18 | net_root = 'fcn-8s'
19 | model_def = net_root + '/deploy.prototxt'
20 | model_weights = net_root + '/fcn-8s-pascalcontext.caffemodel'
21 | net = caffe.Net(model_def, model_weights, caffe.TEST)
22 | 
23 | mu = np.array([104.00698793, 116.66876762, 122.67891434])
24 | # create transformer for the input called 'data'
25 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
26 | # move image channels to outermost dimension
27 | transformer.set_transpose('data', (2, 0, 1))
28 | # subtract the dataset-mean value in each channel
29 | transformer.set_mean('data', mu)
30 | # rescale from [0, 1] to [0, 255]
31 | transformer.set_raw_scale('data', 255)
32 | # swap channels from RGB to BGR
33 | transformer.set_channel_swap('data', (2, 1, 0))
34 | 
35 | image = caffe.io.load_image(sys.argv[1])
36 | transformed_image = transformer.preprocess('data', image)
37 | # copy the image data into the memory allocated for the net
38 | net.blobs['data'].data[...] = transformed_image
39 | 
40 | 
41 | print('Running image through net.')
42 | output = net.forward()
43 | print('Done.')
44 | 
45 | score = output['score'][0]
46 | classed = np.argmax(score, axis=0)
47 | names = dict()
48 | all_labels = ["0: Background"] + open(net_root + '/legend.txt').readlines()
49 | scores = np.unique(classed)
50 | labels = [all_labels[s] for s in scores]
51 | num_scores = len(scores)
52 | 
53 | 
54 | def rescore(c):
55 |     """ rescore values from original score values (0-59) to values ranging from
56 |     0 to num_scores-1 """
57 |     return np.where(scores == c)[0][0]
58 | 
59 | rescore = np.vectorize(rescore)
60 | painted = rescore(classed)
61 | 
62 | plt.figure(figsize=(10, 10))
63 | plt.imshow(painted)
64 | formatter = plt.FuncFormatter(lambda val, loc: labels[val])
65 | plt.colorbar(ticks=range(0, num_scores), format=formatter)
66 | plt.clim(-0.5, num_scores - 0.5)
67 | 
68 | plt.savefig(sys.argv[2])
69 | 


--------------------------------------------------------------------------------