├── .gitignore ├── Makefile ├── README.md ├── docker └── Dockerfile.template ├── fcn-8s ├── deploy.prototxt ├── legend.txt ├── readme.md ├── solve.py ├── solver.prototxt └── train_val.prototxt ├── images └── cat.jpg ├── notebook.sh ├── package.json └── src ├── classify.py └── fcn-fwd.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | *.gz 3 | *.caffemodel 4 | 5 | node_modules 6 | .ipynb_checkpoints -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | fcn-8s/fcn-8s-pascalcontext.caffemodel: 2 | ${CAFFE_ROOT}/scripts/download_model_binary.py fcn-8s 3 | 4 | .INTERMEDIATE: data/pascal-voc2010-trainval.tar 5 | data/pascal-voc2010-trainval.tar: 6 | curl http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar > $@ 7 | 8 | data/pascal-voc2010: data/pascal-voc2010-trainval.tar 9 | tar -C data/ -xvf $^ 10 | mv data/VOCdevkit/VOC2010 $@ 11 | rmdir data/VOCdevkit 12 | 13 | .INTERMEDIATE: docker/gpu/Dockerfile docker/cpu/Dockerfile 14 | docker/gpu/Dockerfile: 15 | mkdir -p $(dir $@) 16 | echo "FROM developmentseed/caffe-gpu:master" > docker/gpu/Dockerfile 17 | cat docker/Dockerfile.template >> docker/gpu/Dockerfile 18 | 19 | docker/cpu/Dockerfile: 20 | mkdir -p $(dir $@) 21 | echo "FROM developmentseed/caffe-cpu:master" > docker/cpu/Dockerfile 22 | cat docker/Dockerfile.template >> docker/cpu/Dockerfile 23 | 24 | .PHONY: build-docker 25 | build-docker: docker/gpu/Dockerfile docker/cpu/Dockerfile 26 | docker build -t caffe-fcn:cpu docker/cpu 27 | docker build -t caffe-fcn:gpu docker/gpu 28 | 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FCN 2 | 3 | This is a simple, working example of "image segmentation" using a neural net 4 | trained by Jonathan Long and Evan Shelhamer, as described in 5 | [Fully Convolutional Networks for Semantic Segmentation](http://www.cs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf). 6 | 7 | Trained model weights are from the [Caffe Model Zoo](https://github.com/BVLC/caffe/wiki/Model-Zoo). 8 | 9 | ## Setup 10 | 11 | Clone this repo, then: 12 | 13 | ``` 14 | # download weights from model zoo 15 | CAFFE_ROOT=/path/to/caffe/repo make fcn-8s/fcn-8s-pascalcontext.caffemodel 16 | # build docker container 17 | make build-docker 18 | ``` 19 | 20 | ## Usage (iPython/Jupyter Notebook) 21 | 22 | If you're running docker host in a VM (i.e., on a Mac), make sure to forward 23 | port 8888. 24 | 25 | Then do: 26 | ``` 27 | docker run -it --rm -v $(pwd):/workspace -p 8888:8888 caffe-fcn ./notebook.sh 28 | ``` 29 | 30 | Then go to http://localhost:8888 31 | 32 | 33 | -------------------------------------------------------------------------------- /docker/Dockerfile.template: -------------------------------------------------------------------------------- 1 | RUN adduser --disabled-password caffe 2 | 3 | RUN pip install jupyter && \ 4 | mkdir -p -m 700 ~caffe/.jupyter/ && \ 5 | echo "c.NotebookApp.ip = '*'" >> ~caffe/.jupyter/jupyter_notebook_config.py && \ 6 | chown -R caffe:caffe ~caffe/.jupyter 7 | 8 | EXPOSE 8888 9 | 10 | RUN chown -R caffe:caffe /opt/caffe 11 | 12 | USER caffe 13 | -------------------------------------------------------------------------------- /fcn-8s/deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "FCN" 2 | 3 | input: "data" 4 | input_dim: 1 5 | input_dim: 3 6 | input_dim: 500 7 | input_dim: 500 8 | 9 | layer { 10 | name: "conv1_1" 11 | type: "Convolution" 12 | bottom: "data" 13 | top: "conv1_1" 14 | param { 15 | lr_mult: 1 16 | decay_mult: 1 17 | } 18 | param { 19 | lr_mult: 2 20 | decay_mult: 0 21 | } 22 | convolution_param { 23 | num_output: 64 24 | pad: 100 25 | kernel_size: 3 26 | engine: CAFFE 27 | } 28 | } 29 | layer { 30 | name: "relu1_1" 31 | type: "ReLU" 32 | bottom: "conv1_1" 33 | top: "conv1_1" 34 | } 35 | layer { 36 | name: "conv1_2" 37 | type: "Convolution" 38 | bottom: "conv1_1" 39 | top: "conv1_2" 40 | param { 41 | lr_mult: 1 42 | decay_mult: 1 43 | } 44 | param { 45 | lr_mult: 2 46 | decay_mult: 0 47 | } 48 | convolution_param { 49 | num_output: 64 50 | pad: 1 51 | kernel_size: 3 52 | engine: CAFFE 53 | } 54 | } 55 | layer { 56 | name: "relu1_2" 57 | type: "ReLU" 58 | bottom: "conv1_2" 59 | top: "conv1_2" 60 | } 61 | layer { 62 | name: "pool1" 63 | type: "Pooling" 64 | bottom: "conv1_2" 65 | top: "pool1" 66 | pooling_param { 67 | pool: MAX 68 | kernel_size: 2 69 | stride: 2 70 | } 71 | } 72 | layer { 73 | name: "conv2_1" 74 | type: "Convolution" 75 | bottom: "pool1" 76 | top: "conv2_1" 77 | param { 78 | lr_mult: 1 79 | decay_mult: 1 80 | } 81 | param { 82 | lr_mult: 2 83 | decay_mult: 0 84 | } 85 | convolution_param { 86 | num_output: 128 87 | pad: 1 88 | kernel_size: 3 89 | engine: CAFFE 90 | } 91 | } 92 | layer { 93 | name: "relu2_1" 94 | type: "ReLU" 95 | bottom: "conv2_1" 96 | top: "conv2_1" 97 | } 98 | layer { 99 | name: "conv2_2" 100 | type: "Convolution" 101 | bottom: "conv2_1" 102 | top: "conv2_2" 103 | param { 104 | lr_mult: 1 105 | decay_mult: 1 106 | } 107 | param { 108 | lr_mult: 2 109 | decay_mult: 0 110 | } 111 | convolution_param { 112 | num_output: 128 113 | pad: 1 114 | kernel_size: 3 115 | engine: CAFFE 116 | } 117 | } 118 | layer { 119 | name: "relu2_2" 120 | type: "ReLU" 121 | bottom: "conv2_2" 122 | top: "conv2_2" 123 | } 124 | layer { 125 | name: "pool2" 126 | type: "Pooling" 127 | bottom: "conv2_2" 128 | top: "pool2" 129 | pooling_param { 130 | pool: MAX 131 | kernel_size: 2 132 | stride: 2 133 | } 134 | } 135 | layer { 136 | name: "conv3_1" 137 | type: "Convolution" 138 | bottom: "pool2" 139 | top: "conv3_1" 140 | param { 141 | lr_mult: 1 142 | decay_mult: 1 143 | } 144 | param { 145 | lr_mult: 2 146 | decay_mult: 0 147 | } 148 | convolution_param { 149 | num_output: 256 150 | pad: 1 151 | kernel_size: 3 152 | engine: CAFFE 153 | } 154 | } 155 | layer { 156 | name: "relu3_1" 157 | type: "ReLU" 158 | bottom: "conv3_1" 159 | top: "conv3_1" 160 | } 161 | layer { 162 | name: "conv3_2" 163 | type: "Convolution" 164 | bottom: "conv3_1" 165 | top: "conv3_2" 166 | param { 167 | lr_mult: 1 168 | decay_mult: 1 169 | } 170 | param { 171 | lr_mult: 2 172 | decay_mult: 0 173 | } 174 | convolution_param { 175 | num_output: 256 176 | pad: 1 177 | kernel_size: 3 178 | engine: CAFFE 179 | } 180 | } 181 | layer { 182 | name: "relu3_2" 183 | type: "ReLU" 184 | bottom: "conv3_2" 185 | top: "conv3_2" 186 | } 187 | layer { 188 | name: "conv3_3" 189 | type: "Convolution" 190 | bottom: "conv3_2" 191 | top: "conv3_3" 192 | param { 193 | lr_mult: 1 194 | decay_mult: 1 195 | } 196 | param { 197 | lr_mult: 2 198 | decay_mult: 0 199 | } 200 | convolution_param { 201 | num_output: 256 202 | pad: 1 203 | kernel_size: 3 204 | engine: CAFFE 205 | } 206 | } 207 | layer { 208 | name: "relu3_3" 209 | type: "ReLU" 210 | bottom: "conv3_3" 211 | top: "conv3_3" 212 | } 213 | layer { 214 | name: "pool3" 215 | type: "Pooling" 216 | bottom: "conv3_3" 217 | top: "pool3" 218 | pooling_param { 219 | pool: MAX 220 | kernel_size: 2 221 | stride: 2 222 | } 223 | } 224 | layer { 225 | name: "conv4_1" 226 | type: "Convolution" 227 | bottom: "pool3" 228 | top: "conv4_1" 229 | param { 230 | lr_mult: 1 231 | decay_mult: 1 232 | } 233 | param { 234 | lr_mult: 2 235 | decay_mult: 0 236 | } 237 | convolution_param { 238 | num_output: 512 239 | pad: 1 240 | kernel_size: 3 241 | engine: CAFFE 242 | } 243 | } 244 | layer { 245 | name: "relu4_1" 246 | type: "ReLU" 247 | bottom: "conv4_1" 248 | top: "conv4_1" 249 | } 250 | layer { 251 | name: "conv4_2" 252 | type: "Convolution" 253 | bottom: "conv4_1" 254 | top: "conv4_2" 255 | param { 256 | lr_mult: 1 257 | decay_mult: 1 258 | } 259 | param { 260 | lr_mult: 2 261 | decay_mult: 0 262 | } 263 | convolution_param { 264 | num_output: 512 265 | pad: 1 266 | kernel_size: 3 267 | engine: CAFFE 268 | } 269 | } 270 | layer { 271 | name: "relu4_2" 272 | type: "ReLU" 273 | bottom: "conv4_2" 274 | top: "conv4_2" 275 | } 276 | layer { 277 | name: "conv4_3" 278 | type: "Convolution" 279 | bottom: "conv4_2" 280 | top: "conv4_3" 281 | param { 282 | lr_mult: 1 283 | decay_mult: 1 284 | } 285 | param { 286 | lr_mult: 2 287 | decay_mult: 0 288 | } 289 | convolution_param { 290 | num_output: 512 291 | pad: 1 292 | kernel_size: 3 293 | engine: CAFFE 294 | } 295 | } 296 | layer { 297 | name: "relu4_3" 298 | type: "ReLU" 299 | bottom: "conv4_3" 300 | top: "conv4_3" 301 | } 302 | layer { 303 | name: "pool4" 304 | type: "Pooling" 305 | bottom: "conv4_3" 306 | top: "pool4" 307 | pooling_param { 308 | pool: MAX 309 | kernel_size: 2 310 | stride: 2 311 | } 312 | } 313 | layer { 314 | name: "conv5_1" 315 | type: "Convolution" 316 | bottom: "pool4" 317 | top: "conv5_1" 318 | param { 319 | lr_mult: 1 320 | decay_mult: 1 321 | } 322 | param { 323 | lr_mult: 2 324 | decay_mult: 0 325 | } 326 | convolution_param { 327 | num_output: 512 328 | pad: 1 329 | kernel_size: 3 330 | engine: CAFFE 331 | } 332 | } 333 | layer { 334 | name: "relu5_1" 335 | type: "ReLU" 336 | bottom: "conv5_1" 337 | top: "conv5_1" 338 | } 339 | layer { 340 | name: "conv5_2" 341 | type: "Convolution" 342 | bottom: "conv5_1" 343 | top: "conv5_2" 344 | param { 345 | lr_mult: 1 346 | decay_mult: 1 347 | } 348 | param { 349 | lr_mult: 2 350 | decay_mult: 0 351 | } 352 | convolution_param { 353 | num_output: 512 354 | pad: 1 355 | kernel_size: 3 356 | engine: CAFFE 357 | } 358 | } 359 | layer { 360 | name: "relu5_2" 361 | type: "ReLU" 362 | bottom: "conv5_2" 363 | top: "conv5_2" 364 | } 365 | layer { 366 | name: "conv5_3" 367 | type: "Convolution" 368 | bottom: "conv5_2" 369 | top: "conv5_3" 370 | param { 371 | lr_mult: 1 372 | decay_mult: 1 373 | } 374 | param { 375 | lr_mult: 2 376 | decay_mult: 0 377 | } 378 | convolution_param { 379 | num_output: 512 380 | pad: 1 381 | kernel_size: 3 382 | engine: CAFFE 383 | } 384 | } 385 | layer { 386 | name: "relu5_3" 387 | type: "ReLU" 388 | bottom: "conv5_3" 389 | top: "conv5_3" 390 | } 391 | layer { 392 | name: "pool5" 393 | type: "Pooling" 394 | bottom: "conv5_3" 395 | top: "pool5" 396 | pooling_param { 397 | pool: MAX 398 | kernel_size: 2 399 | stride: 2 400 | } 401 | } 402 | layer { 403 | name: "fc6" 404 | type: "Convolution" 405 | bottom: "pool5" 406 | top: "fc6" 407 | param { 408 | lr_mult: 1 409 | decay_mult: 1 410 | } 411 | param { 412 | lr_mult: 2 413 | decay_mult: 0 414 | } 415 | convolution_param { 416 | num_output: 4096 417 | kernel_size: 7 418 | engine: CAFFE 419 | } 420 | } 421 | layer { 422 | name: "relu6" 423 | type: "ReLU" 424 | bottom: "fc6" 425 | top: "fc6" 426 | } 427 | layer { 428 | name: "drop6" 429 | type: "Dropout" 430 | bottom: "fc6" 431 | top: "fc6" 432 | dropout_param { 433 | dropout_ratio: 0.5 434 | } 435 | } 436 | layer { 437 | name: "fc7" 438 | type: "Convolution" 439 | bottom: "fc6" 440 | top: "fc7" 441 | param { 442 | lr_mult: 1 443 | decay_mult: 1 444 | } 445 | param { 446 | lr_mult: 2 447 | decay_mult: 0 448 | } 449 | convolution_param { 450 | num_output: 4096 451 | kernel_size: 1 452 | engine: CAFFE 453 | } 454 | } 455 | layer { 456 | name: "relu7" 457 | type: "ReLU" 458 | bottom: "fc7" 459 | top: "fc7" 460 | } 461 | layer { 462 | name: "drop7" 463 | type: "Dropout" 464 | bottom: "fc7" 465 | top: "fc7" 466 | dropout_param { 467 | dropout_ratio: 0.5 468 | } 469 | } 470 | layer { 471 | name: "score59" 472 | type: "Convolution" 473 | bottom: "fc7" 474 | top: "score59" 475 | param { 476 | lr_mult: 1 477 | decay_mult: 1 478 | } 479 | param { 480 | lr_mult: 2 481 | decay_mult: 0 482 | } 483 | convolution_param { 484 | num_output: 60 485 | kernel_size: 1 486 | engine: CAFFE 487 | } 488 | } 489 | layer { 490 | name: "upscore2" 491 | type: "Deconvolution" 492 | bottom: "score59" 493 | top: "upscore2" 494 | param { 495 | lr_mult: 1 496 | decay_mult: 1 497 | } 498 | convolution_param { 499 | num_output: 60 500 | bias_term: false 501 | kernel_size: 4 502 | stride: 2 503 | } 504 | } 505 | layer { 506 | name: "score-pool4" 507 | type: "Convolution" 508 | bottom: "pool4" 509 | top: "score-pool4" 510 | param { 511 | lr_mult: 1 512 | decay_mult: 1 513 | } 514 | param { 515 | lr_mult: 2 516 | decay_mult: 0 517 | } 518 | convolution_param { 519 | num_output: 60 520 | kernel_size: 1 521 | engine: CAFFE 522 | } 523 | } 524 | layer { type: 'Crop' name: 'crop' bottom: 'score-pool4' bottom: 'upscore2' 525 | top: 'score-pool4c' } 526 | layer { 527 | name: "fuse" 528 | type: "Eltwise" 529 | bottom: "upscore2" 530 | bottom: "score-pool4c" 531 | top: "score-fused" 532 | eltwise_param { 533 | operation: SUM 534 | } 535 | } 536 | layer { 537 | name: "upsample-fused-16" 538 | type: "Deconvolution" 539 | bottom: "score-fused" 540 | top: "score4" 541 | param { 542 | lr_mult: 1 543 | decay_mult: 1 544 | } 545 | convolution_param { 546 | num_output: 60 547 | bias_term: false 548 | kernel_size: 4 549 | stride: 2 550 | } 551 | } 552 | layer { 553 | name: "score-pool3" 554 | type: "Convolution" 555 | bottom: "pool3" 556 | top: "score-pool3" 557 | param { 558 | lr_mult: 1 559 | decay_mult: 1 560 | } 561 | param { 562 | lr_mult: 2 563 | decay_mult: 0 564 | } 565 | convolution_param { 566 | num_output: 60 567 | kernel_size: 1 568 | engine: CAFFE 569 | } 570 | } 571 | layer { type: 'Crop' name: 'crop' bottom: 'score-pool3' bottom: 'score4' 572 | top: 'score-pool3c' } 573 | layer { 574 | name: "fuse" 575 | type: "Eltwise" 576 | bottom: "score4" 577 | bottom: "score-pool3c" 578 | top: "score-final" 579 | eltwise_param { 580 | operation: SUM 581 | } 582 | } 583 | layer { 584 | name: "upsample" 585 | type: "Deconvolution" 586 | bottom: "score-final" 587 | top: "bigscore" 588 | param { 589 | lr_mult: 0 590 | } 591 | convolution_param { 592 | num_output: 60 593 | bias_term: false 594 | kernel_size: 16 595 | stride: 8 596 | } 597 | } 598 | layer { type: 'Crop' name: 'crop' bottom: 'bigscore' bottom: 'data' top: 'score' } -------------------------------------------------------------------------------- /fcn-8s/legend.txt: -------------------------------------------------------------------------------- 1 | 1: aeroplane 2 | 2: bicycle 3 | 3: bird 4 | 4: boat 5 | 5: bottle 6 | 6: bus 7 | 7: car 8 | 8: cat 9 | 9: chair 10 | 10: cow 11 | 11: table 12 | 12: dog 13 | 13: horse 14 | 14: motorbike 15 | 15: person 16 | 16: pottedplant 17 | 17: sheep 18 | 18: sofa 19 | 19: train 20 | 20: tvmonitor 21 | 21: bag 22 | 22: bed 23 | 23: bench 24 | 24: book 25 | 25: building 26 | 26: cabinet 27 | 27: ceiling 28 | 28: cloth 29 | 29: computer 30 | 30: cup 31 | 31: door 32 | 32: fence 33 | 33: floor 34 | 34: flower 35 | 35: food 36 | 36: grass 37 | 37: ground 38 | 38: keyboard 39 | 39: light 40 | 40: mountain 41 | 41: mouse 42 | 42: curtain 43 | 43: platform 44 | 44: sign 45 | 45: plate 46 | 46: road 47 | 47: rock 48 | 48: shelves 49 | 49: sidewalk 50 | 50: sky 51 | 51: snow 52 | 52: bedclothes 53 | 53: track 54 | 54: tree 55 | 55: truck 56 | 56: wall 57 | 57: water 58 | 58: window 59 | 59: wood -------------------------------------------------------------------------------- /fcn-8s/readme.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: FCN-8s Fully Convolutional Semantic Segmentation on PASCAL-Context 3 | caffemodel: fcn-8s-pascalcontext.caffemodel 4 | caffemodel_url: http://dl.caffe.berkeleyvision.org/fcn-8s-pascalcontext.caffemodel 5 | sha1: 591e7d8bbc1c55ff151b6984bde85ff5160aee31 6 | gist_id: 91eece041c19ff8968ee 7 | --- 8 | 9 | This is a model from the [paper](http://cs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf): 10 | 11 | Fully Convolutional Networks for Semantic Segmentation 12 | Jonathan Long, Evan Shelhamer, Trevor Darrell 13 | arXiv:1411.4038 14 | 15 | This is the three stream, 8 pixel prediction stride version. 16 | 17 | This model was trained for the PASCAL-context 59-class (60 including background) task. The final layer outputs scores for each class, which may be normalized via softmax or argmaxed to obtain per-pixel labels. The first label (index zero) is background, with the rest following the order given by the dataset authors. 18 | 19 | The input is expected in BGR channel order, with the following per-channel mean subtracted: 20 | 21 | B 104.00698793 G 116.66876762 R 122.67891434 22 | 23 | This is a pre-release: it requires unmerged PRs to run. It should be usable with the branch available at https://github.com/longjon/caffe/tree/future. Training ought to be possible with that code, but the original training scripts have not yet been ported. 24 | 25 | This model obtains 37.8 mean I/U on PASCAL-Context val. -------------------------------------------------------------------------------- /fcn-8s/solve.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import caffe 3 | import numpy as np 4 | 5 | # make a bilinear interpolation kernel 6 | # credit @longjon 7 | def upsample_filt(size): 8 | factor = (size + 1) // 2 9 | if size % 2 == 1: 10 | center = factor - 1 11 | else: 12 | center = factor - 0.5 13 | og = np.ogrid[:size, :size] 14 | return (1 - abs(og[0] - center) / factor) * \ 15 | (1 - abs(og[1] - center) / factor) 16 | 17 | # set parameters s.t. deconvolutional layers compute bilinear interpolation 18 | # N.B. this is for deconvolution without groups 19 | def interp_surgery(net, layers): 20 | for l in layers: 21 | m, k, h, w = net.params[l][0].data.shape 22 | if m != k: 23 | print 'input + output channels need to be the same' 24 | raise 25 | if h != w: 26 | print 'filters need to be square' 27 | raise 28 | filt = upsample_filt(h) 29 | net.params[l][0].data[range(m), range(k), :, :] = filt 30 | 31 | # base net -- the learned coarser model 32 | base_weights = 'fcn-16s-pascalcontext.caffemodel' 33 | 34 | # init 35 | caffe.set_mode_gpu() 36 | caffe.set_device(0) 37 | 38 | solver = caffe.SGDSolver('solver.prototxt') 39 | 40 | # do net surgery to set the deconvolution weights for bilinear interpolation 41 | interp_layers = [k for k in solver.net.params.keys() if 'up' in k] 42 | interp_surgery(solver.net, interp_layers) 43 | 44 | # copy base weights for fine-tuning 45 | solver.net.copy_from(base_weights) 46 | 47 | # solve straight through -- a better approach is to define a solving loop to 48 | # 1. take SGD steps 49 | # 2. score the model by the test net `solver.test_nets[0]` 50 | # 3. repeat until satisfied 51 | solver.step(80000) -------------------------------------------------------------------------------- /fcn-8s/solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "train_val.prototxt" 2 | test_iter: 5105 3 | # make test net, but don't invoke it from the solver itself 4 | test_interval: 1000000 5 | display: 20 6 | average_loss: 20 7 | lr_policy: "fixed" 8 | # lr for unnormalized softmax -- see train_val definition 9 | base_lr: 1e-14 10 | # high momentum 11 | momentum: 0.99 12 | # no gradient accumulation 13 | iter_size: 1 14 | max_iter: 80000 15 | weight_decay: 0.0005 16 | snapshot: 10000 17 | snapshot_prefix: "train" 18 | test_initialization: false -------------------------------------------------------------------------------- /fcn-8s/train_val.prototxt: -------------------------------------------------------------------------------- 1 | name: "FCN" 2 | layer { 3 | name: "data" 4 | type: "Data" 5 | top: "data" 6 | include { 7 | phase: TRAIN 8 | } 9 | transform_param { 10 | mean_value: 104.00699 11 | mean_value: 116.66877 12 | mean_value: 122.67892 13 | } 14 | data_param { 15 | source: "../../data/pascal-context/pascal-context-train-lmdb" 16 | batch_size: 1 17 | backend: LMDB 18 | } 19 | } 20 | layer { 21 | name: "label" 22 | type: "Data" 23 | top: "label" 24 | include { 25 | phase: TRAIN 26 | } 27 | data_param { 28 | source: "../../data/pascal-context/pascal-context-train-gt59-lmdb" 29 | batch_size: 1 30 | backend: LMDB 31 | } 32 | } 33 | layer { 34 | name: "data" 35 | type: "Data" 36 | top: "data" 37 | include { 38 | phase: TEST 39 | } 40 | transform_param { 41 | mean_value: 104.00699 42 | mean_value: 116.66877 43 | mean_value: 122.67892 44 | } 45 | data_param { 46 | source: "../../data/pascal-context/pascal-context-val-lmdb" 47 | batch_size: 1 48 | backend: LMDB 49 | } 50 | } 51 | layer { 52 | name: "label" 53 | type: "Data" 54 | top: "label" 55 | include { 56 | phase: TEST 57 | } 58 | data_param { 59 | source: "../../data/pascal-context/pascal-context-val-gt59-lmdb" 60 | batch_size: 1 61 | backend: LMDB 62 | } 63 | } 64 | layer { 65 | name: "conv1_1" 66 | type: "Convolution" 67 | bottom: "data" 68 | top: "conv1_1" 69 | param { 70 | lr_mult: 1 71 | decay_mult: 1 72 | } 73 | param { 74 | lr_mult: 2 75 | decay_mult: 0 76 | } 77 | convolution_param { 78 | num_output: 64 79 | pad: 100 80 | kernel_size: 3 81 | engine: CAFFE 82 | } 83 | } 84 | layer { 85 | name: "relu1_1" 86 | type: "ReLU" 87 | bottom: "conv1_1" 88 | top: "conv1_1" 89 | } 90 | layer { 91 | name: "conv1_2" 92 | type: "Convolution" 93 | bottom: "conv1_1" 94 | top: "conv1_2" 95 | param { 96 | lr_mult: 1 97 | decay_mult: 1 98 | } 99 | param { 100 | lr_mult: 2 101 | decay_mult: 0 102 | } 103 | convolution_param { 104 | num_output: 64 105 | pad: 1 106 | kernel_size: 3 107 | engine: CAFFE 108 | } 109 | } 110 | layer { 111 | name: "relu1_2" 112 | type: "ReLU" 113 | bottom: "conv1_2" 114 | top: "conv1_2" 115 | } 116 | layer { 117 | name: "pool1" 118 | type: "Pooling" 119 | bottom: "conv1_2" 120 | top: "pool1" 121 | pooling_param { 122 | pool: MAX 123 | kernel_size: 2 124 | stride: 2 125 | } 126 | } 127 | layer { 128 | name: "conv2_1" 129 | type: "Convolution" 130 | bottom: "pool1" 131 | top: "conv2_1" 132 | param { 133 | lr_mult: 1 134 | decay_mult: 1 135 | } 136 | param { 137 | lr_mult: 2 138 | decay_mult: 0 139 | } 140 | convolution_param { 141 | num_output: 128 142 | pad: 1 143 | kernel_size: 3 144 | engine: CAFFE 145 | } 146 | } 147 | layer { 148 | name: "relu2_1" 149 | type: "ReLU" 150 | bottom: "conv2_1" 151 | top: "conv2_1" 152 | } 153 | layer { 154 | name: "conv2_2" 155 | type: "Convolution" 156 | bottom: "conv2_1" 157 | top: "conv2_2" 158 | param { 159 | lr_mult: 1 160 | decay_mult: 1 161 | } 162 | param { 163 | lr_mult: 2 164 | decay_mult: 0 165 | } 166 | convolution_param { 167 | num_output: 128 168 | pad: 1 169 | kernel_size: 3 170 | engine: CAFFE 171 | } 172 | } 173 | layer { 174 | name: "relu2_2" 175 | type: "ReLU" 176 | bottom: "conv2_2" 177 | top: "conv2_2" 178 | } 179 | layer { 180 | name: "pool2" 181 | type: "Pooling" 182 | bottom: "conv2_2" 183 | top: "pool2" 184 | pooling_param { 185 | pool: MAX 186 | kernel_size: 2 187 | stride: 2 188 | } 189 | } 190 | layer { 191 | name: "conv3_1" 192 | type: "Convolution" 193 | bottom: "pool2" 194 | top: "conv3_1" 195 | param { 196 | lr_mult: 1 197 | decay_mult: 1 198 | } 199 | param { 200 | lr_mult: 2 201 | decay_mult: 0 202 | } 203 | convolution_param { 204 | num_output: 256 205 | pad: 1 206 | kernel_size: 3 207 | engine: CAFFE 208 | } 209 | } 210 | layer { 211 | name: "relu3_1" 212 | type: "ReLU" 213 | bottom: "conv3_1" 214 | top: "conv3_1" 215 | } 216 | layer { 217 | name: "conv3_2" 218 | type: "Convolution" 219 | bottom: "conv3_1" 220 | top: "conv3_2" 221 | param { 222 | lr_mult: 1 223 | decay_mult: 1 224 | } 225 | param { 226 | lr_mult: 2 227 | decay_mult: 0 228 | } 229 | convolution_param { 230 | num_output: 256 231 | pad: 1 232 | kernel_size: 3 233 | engine: CAFFE 234 | } 235 | } 236 | layer { 237 | name: "relu3_2" 238 | type: "ReLU" 239 | bottom: "conv3_2" 240 | top: "conv3_2" 241 | } 242 | layer { 243 | name: "conv3_3" 244 | type: "Convolution" 245 | bottom: "conv3_2" 246 | top: "conv3_3" 247 | param { 248 | lr_mult: 1 249 | decay_mult: 1 250 | } 251 | param { 252 | lr_mult: 2 253 | decay_mult: 0 254 | } 255 | convolution_param { 256 | num_output: 256 257 | pad: 1 258 | kernel_size: 3 259 | engine: CAFFE 260 | } 261 | } 262 | layer { 263 | name: "relu3_3" 264 | type: "ReLU" 265 | bottom: "conv3_3" 266 | top: "conv3_3" 267 | } 268 | layer { 269 | name: "pool3" 270 | type: "Pooling" 271 | bottom: "conv3_3" 272 | top: "pool3" 273 | pooling_param { 274 | pool: MAX 275 | kernel_size: 2 276 | stride: 2 277 | } 278 | } 279 | layer { 280 | name: "conv4_1" 281 | type: "Convolution" 282 | bottom: "pool3" 283 | top: "conv4_1" 284 | param { 285 | lr_mult: 1 286 | decay_mult: 1 287 | } 288 | param { 289 | lr_mult: 2 290 | decay_mult: 0 291 | } 292 | convolution_param { 293 | num_output: 512 294 | pad: 1 295 | kernel_size: 3 296 | engine: CAFFE 297 | } 298 | } 299 | layer { 300 | name: "relu4_1" 301 | type: "ReLU" 302 | bottom: "conv4_1" 303 | top: "conv4_1" 304 | } 305 | layer { 306 | name: "conv4_2" 307 | type: "Convolution" 308 | bottom: "conv4_1" 309 | top: "conv4_2" 310 | param { 311 | lr_mult: 1 312 | decay_mult: 1 313 | } 314 | param { 315 | lr_mult: 2 316 | decay_mult: 0 317 | } 318 | convolution_param { 319 | num_output: 512 320 | pad: 1 321 | kernel_size: 3 322 | engine: CAFFE 323 | } 324 | } 325 | layer { 326 | name: "relu4_2" 327 | type: "ReLU" 328 | bottom: "conv4_2" 329 | top: "conv4_2" 330 | } 331 | layer { 332 | name: "conv4_3" 333 | type: "Convolution" 334 | bottom: "conv4_2" 335 | top: "conv4_3" 336 | param { 337 | lr_mult: 1 338 | decay_mult: 1 339 | } 340 | param { 341 | lr_mult: 2 342 | decay_mult: 0 343 | } 344 | convolution_param { 345 | num_output: 512 346 | pad: 1 347 | kernel_size: 3 348 | engine: CAFFE 349 | } 350 | } 351 | layer { 352 | name: "relu4_3" 353 | type: "ReLU" 354 | bottom: "conv4_3" 355 | top: "conv4_3" 356 | } 357 | layer { 358 | name: "pool4" 359 | type: "Pooling" 360 | bottom: "conv4_3" 361 | top: "pool4" 362 | pooling_param { 363 | pool: MAX 364 | kernel_size: 2 365 | stride: 2 366 | } 367 | } 368 | layer { 369 | name: "conv5_1" 370 | type: "Convolution" 371 | bottom: "pool4" 372 | top: "conv5_1" 373 | param { 374 | lr_mult: 1 375 | decay_mult: 1 376 | } 377 | param { 378 | lr_mult: 2 379 | decay_mult: 0 380 | } 381 | convolution_param { 382 | num_output: 512 383 | pad: 1 384 | kernel_size: 3 385 | engine: CAFFE 386 | } 387 | } 388 | layer { 389 | name: "relu5_1" 390 | type: "ReLU" 391 | bottom: "conv5_1" 392 | top: "conv5_1" 393 | } 394 | layer { 395 | name: "conv5_2" 396 | type: "Convolution" 397 | bottom: "conv5_1" 398 | top: "conv5_2" 399 | param { 400 | lr_mult: 1 401 | decay_mult: 1 402 | } 403 | param { 404 | lr_mult: 2 405 | decay_mult: 0 406 | } 407 | convolution_param { 408 | num_output: 512 409 | pad: 1 410 | kernel_size: 3 411 | engine: CAFFE 412 | } 413 | } 414 | layer { 415 | name: "relu5_2" 416 | type: "ReLU" 417 | bottom: "conv5_2" 418 | top: "conv5_2" 419 | } 420 | layer { 421 | name: "conv5_3" 422 | type: "Convolution" 423 | bottom: "conv5_2" 424 | top: "conv5_3" 425 | param { 426 | lr_mult: 1 427 | decay_mult: 1 428 | } 429 | param { 430 | lr_mult: 2 431 | decay_mult: 0 432 | } 433 | convolution_param { 434 | num_output: 512 435 | pad: 1 436 | kernel_size: 3 437 | engine: CAFFE 438 | } 439 | } 440 | layer { 441 | name: "relu5_3" 442 | type: "ReLU" 443 | bottom: "conv5_3" 444 | top: "conv5_3" 445 | } 446 | layer { 447 | name: "pool5" 448 | type: "Pooling" 449 | bottom: "conv5_3" 450 | top: "pool5" 451 | pooling_param { 452 | pool: MAX 453 | kernel_size: 2 454 | stride: 2 455 | } 456 | } 457 | layer { 458 | name: "fc6" 459 | type: "Convolution" 460 | bottom: "pool5" 461 | top: "fc6" 462 | param { 463 | lr_mult: 1 464 | decay_mult: 1 465 | } 466 | param { 467 | lr_mult: 2 468 | decay_mult: 0 469 | } 470 | convolution_param { 471 | num_output: 4096 472 | kernel_size: 7 473 | engine: CAFFE 474 | } 475 | } 476 | layer { 477 | name: "relu6" 478 | type: "ReLU" 479 | bottom: "fc6" 480 | top: "fc6" 481 | } 482 | layer { 483 | name: "drop6" 484 | type: "Dropout" 485 | bottom: "fc6" 486 | top: "fc6" 487 | dropout_param { 488 | dropout_ratio: 0.5 489 | } 490 | } 491 | layer { 492 | name: "fc7" 493 | type: "Convolution" 494 | bottom: "fc6" 495 | top: "fc7" 496 | param { 497 | lr_mult: 1 498 | decay_mult: 1 499 | } 500 | param { 501 | lr_mult: 2 502 | decay_mult: 0 503 | } 504 | convolution_param { 505 | num_output: 4096 506 | kernel_size: 1 507 | engine: CAFFE 508 | } 509 | } 510 | layer { 511 | name: "relu7" 512 | type: "ReLU" 513 | bottom: "fc7" 514 | top: "fc7" 515 | } 516 | layer { 517 | name: "drop7" 518 | type: "Dropout" 519 | bottom: "fc7" 520 | top: "fc7" 521 | dropout_param { 522 | dropout_ratio: 0.5 523 | } 524 | } 525 | layer { 526 | name: "score59" 527 | type: "Convolution" 528 | bottom: "fc7" 529 | top: "score59" 530 | param { 531 | lr_mult: 1 532 | decay_mult: 1 533 | } 534 | param { 535 | lr_mult: 2 536 | decay_mult: 0 537 | } 538 | convolution_param { 539 | num_output: 60 540 | kernel_size: 1 541 | engine: CAFFE 542 | } 543 | } 544 | layer { 545 | name: "upscore2" 546 | type: "Deconvolution" 547 | bottom: "score59" 548 | top: "upscore2" 549 | param { 550 | lr_mult: 1 551 | decay_mult: 1 552 | } 553 | convolution_param { 554 | num_output: 60 555 | bias_term: false 556 | kernel_size: 4 557 | stride: 2 558 | } 559 | } 560 | layer { 561 | name: "score-pool4" 562 | type: "Convolution" 563 | bottom: "pool4" 564 | top: "score-pool4" 565 | param { 566 | lr_mult: 1 567 | decay_mult: 1 568 | } 569 | param { 570 | lr_mult: 2 571 | decay_mult: 0 572 | } 573 | convolution_param { 574 | num_output: 60 575 | kernel_size: 1 576 | engine: CAFFE 577 | } 578 | } 579 | layer { type: 'Crop' name: 'crop' bottom: 'score-pool4' bottom: 'upscore2' 580 | top: 'score-pool4c' } 581 | layer { 582 | name: "fuse" 583 | type: "Eltwise" 584 | bottom: "upscore2" 585 | bottom: "score-pool4c" 586 | top: "score-fused" 587 | eltwise_param { 588 | operation: SUM 589 | } 590 | } 591 | layer { 592 | name: "upsample-fused-16" 593 | type: "Deconvolution" 594 | bottom: "score-fused" 595 | top: "score4" 596 | param { 597 | lr_mult: 1 598 | decay_mult: 1 599 | } 600 | convolution_param { 601 | num_output: 60 602 | bias_term: false 603 | kernel_size: 4 604 | stride: 2 605 | } 606 | } 607 | layer { 608 | name: "score-pool3" 609 | type: "Convolution" 610 | bottom: "pool3" 611 | top: "score-pool3" 612 | param { 613 | lr_mult: 1 614 | decay_mult: 1 615 | } 616 | param { 617 | lr_mult: 2 618 | decay_mult: 0 619 | } 620 | convolution_param { 621 | num_output: 60 622 | kernel_size: 1 623 | engine: CAFFE 624 | } 625 | } 626 | layer { type: 'Crop' name: 'crop' bottom: 'score-pool3' bottom: 'score4' 627 | top: 'score-pool3c' } 628 | layer { 629 | name: "fuse" 630 | type: "Eltwise" 631 | bottom: "score4" 632 | bottom: "score-pool3c" 633 | top: "score-final" 634 | eltwise_param { 635 | operation: SUM 636 | } 637 | } 638 | layer { 639 | name: "upsample" 640 | type: "Deconvolution" 641 | bottom: "score-final" 642 | top: "bigscore" 643 | param { 644 | lr_mult: 0 645 | } 646 | convolution_param { 647 | num_output: 60 648 | bias_term: false 649 | kernel_size: 16 650 | stride: 8 651 | } 652 | } 653 | layer { type: 'Crop' name: 'crop' bottom: 'bigscore' bottom: 'data' top: 'score' } 654 | layer { 655 | name: "loss" 656 | type: "SoftmaxWithLoss" 657 | bottom: "score" 658 | bottom: "label" 659 | top: "loss" 660 | loss_param { 661 | normalize: false 662 | } 663 | } -------------------------------------------------------------------------------- /images/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/developmentseed/caffe-fcn/f990a58930fc274271fa53693c5a350e2f33cfca/images/cat.jpg -------------------------------------------------------------------------------- /notebook.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | jupyter notebook --no-browser 4 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "caffe-fcn", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "notebook": "docker run -it --rm -v $(pwd):/workspace -p 8888:8888 caffe-fcn ./notebook.sh", 8 | "docker": "docker run -it --rm -v $(pwd):/workspace caffe-fcn" 9 | }, 10 | "keywords": [], 11 | "author": "Anand Thakker (http://anandthakker.net/)", 12 | "license": "ISC" 13 | } 14 | -------------------------------------------------------------------------------- /src/classify.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import os 6 | import sys 7 | caffe_root = os.environ['CAFFE_ROOT'] 8 | sys.path.insert(0, os.path.join(caffe_root, 'python')) 9 | import caffe 10 | 11 | plt.rcParams['image.interpolation'] = 'nearest' # don't interpolate 12 | 13 | if (os.environ.get('CAFFE_CPU_MODE')): 14 | caffe.set_mode_cpu() 15 | else: 16 | caffe.set_mode_gpu() 17 | 18 | net_root = 'fcn-8s' 19 | model_def = net_root + '/deploy.prototxt' 20 | model_weights = net_root + '/fcn-8s-pascalcontext.caffemodel' 21 | net = caffe.Net(model_def, model_weights, caffe.TEST) 22 | 23 | mu = np.array([104.00698793, 116.66876762, 122.67891434]) 24 | # create transformer for the input called 'data' 25 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) 26 | # move image channels to outermost dimension 27 | transformer.set_transpose('data', (2, 0, 1)) 28 | # subtract the dataset-mean value in each channel 29 | transformer.set_mean('data', mu) 30 | # rescale from [0, 1] to [0, 255] 31 | transformer.set_raw_scale('data', 255) 32 | # swap channels from RGB to BGR 33 | transformer.set_channel_swap('data', (2, 1, 0)) 34 | 35 | image = caffe.io.load_image(sys.argv[1]) 36 | transformed_image = transformer.preprocess('data', image) 37 | # copy the image data into the memory allocated for the net 38 | net.blobs['data'].data[...] = transformed_image 39 | 40 | 41 | print('Running image through net.') 42 | output = net.forward() 43 | print('Done.') 44 | 45 | score = output['score'][0] 46 | classed = np.argmax(score, axis=0) 47 | names = dict() 48 | all_labels = ["0: Background"] + open(net_root + '/legend.txt').readlines() 49 | scores = np.unique(classed) 50 | labels = [all_labels[s] for s in scores] 51 | num_scores = len(scores) 52 | 53 | 54 | def rescore(c): 55 | """ rescore values from original score values (0-59) to values ranging from 56 | 0 to num_scores-1 """ 57 | return np.where(scores == c)[0][0] 58 | 59 | rescore = np.vectorize(rescore) 60 | painted = rescore(classed) 61 | 62 | plt.figure(figsize=(10, 10)) 63 | plt.imshow(painted) 64 | formatter = plt.FuncFormatter(lambda val, loc: labels[val]) 65 | plt.colorbar(ticks=range(0, num_scores), format=formatter) 66 | plt.clim(-0.5, num_scores - 0.5) 67 | 68 | plt.savefig(sys.argv[2]) 69 | --------------------------------------------------------------------------------