├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── data
    ├── .gitignore
    ├── README.md
    ├── demo
    │   ├── 000004.jpg
    │   ├── 000004_boxes.mat
    │   ├── 001551.jpg
    │   └── 001551_boxes.mat
    ├── pylintrc
    └── scripts
    │   ├── fetch_fast_rcnn_models.sh
    │   ├── fetch_imagenet_models.sh
    │   └── fetch_selective_search_data.sh
├── experiments
    ├── README.md
    ├── cfgs
    │   ├── fc_only.yml
    │   ├── multiscale.yml
    │   ├── no_bbox_reg.yml
    │   ├── piecewise.yml
    │   └── svm.yml
    ├── logs
    │   └── .gitignore
    └── scripts
    │   ├── all_caffenet.sh
    │   ├── all_vgg16.sh
    │   ├── all_vgg_cnn_m_1024.sh
    │   ├── default_caffenet.sh
    │   ├── default_vgg16.sh
    │   ├── default_vgg_cnn_m_1024.sh
    │   ├── fc_only_vgg16.sh
    │   ├── multiscale_caffenet.sh
    │   ├── multiscale_vgg_cnn_m_1024.sh
    │   ├── multitask_no_bbox_reg_caffenet.sh
    │   ├── multitask_no_bbox_reg_vgg16.sh
    │   ├── multitask_no_bbox_reg_vgg_cnn_m_1024.sh
    │   ├── no_bbox_reg_caffenet.sh
    │   ├── no_bbox_reg_vgg16.sh
    │   ├── no_bbox_reg_vgg_cnn_m_1024.sh
    │   ├── piecewise_caffenet.sh
    │   ├── piecewise_vgg16.sh
    │   ├── piecewise_vgg_cnn_m_1024.sh
    │   ├── svd_caffenet.sh
    │   ├── svd_vgg16.sh
    │   ├── svd_vgg_cnn_m_1024.sh
    │   ├── svm_caffenet.sh
    │   ├── svm_vgg16.sh
    │   └── svm_vgg_cnn_m_1024.sh
├── lib
    ├── Makefile
    ├── datasets
    │   ├── VOCdevkit-matlab-wrapper
    │   │   ├── get_voc_opts.m
    │   │   ├── voc_eval.m
    │   │   └── xVOCap.m
    │   ├── __init__.py
    │   ├── factory.py
    │   ├── imdb.py
    │   └── pascal_voc.py
    ├── fast_rcnn
    │   ├── __init__.py
    │   ├── config.py
    │   ├── test.py
    │   └── train.py
    ├── roi_data_layer
    │   ├── __init__.py
    │   ├── layer.py
    │   ├── minibatch.py
    │   └── roidb.py
    ├── setup.py
    └── utils
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── bbox.pyx
    │   ├── blob.py
    │   ├── nms.py
    │   ├── nms.pyx
    │   └── timer.py
├── matlab
    ├── README.md
    ├── fast_rcnn_demo.m
    ├── fast_rcnn_im_detect.m
    ├── fast_rcnn_load_net.m
    ├── nms.m
    └── showboxes.m
├── models
    ├── CaffeNet
    │   ├── compressed
    │   │   └── test.prototxt
    │   ├── no_bbox_reg
    │   │   ├── solver.prototxt
    │   │   ├── test.prototxt
    │   │   └── train.prototxt
    │   ├── piecewise
    │   │   ├── solver.prototxt
    │   │   └── train.prototxt
    │   ├── solver.prototxt
    │   ├── test.prototxt
    │   └── train.prototxt
    ├── README.md
    ├── VGG16
    │   ├── compressed
    │   │   └── test.prototxt
    │   ├── fc_only
    │   │   ├── solver.prototxt
    │   │   └── train.prototxt
    │   ├── no_bbox_reg
    │   │   ├── solver.prototxt
    │   │   ├── test.prototxt
    │   │   └── train.prototxt
    │   ├── piecewise
    │   │   ├── solver.prototxt
    │   │   └── train.prototxt
    │   ├── solver.prototxt
    │   ├── test.prototxt
    │   └── train.prototxt
    └── VGG_CNN_M_1024
    │   ├── compressed
    │       └── test.prototxt
    │   ├── no_bbox_reg
    │       ├── solver.prototxt
    │       ├── test.prototxt
    │       └── train.prototxt
    │   ├── piecewise
    │       ├── solver.prototxt
    │       └── train.prototxt
    │   ├── solver.prototxt
    │   ├── test.prototxt
    │   └── train.prototxt
├── output
    ├── .gitignore
    └── README.md
├── todo.txt
└── tools
    ├── README.md
    ├── _init_paths.py
    ├── compress_net.py
    ├── demo.py
    ├── reval.py
    ├── test_net.py
    ├── train_net.py
    └── train_svms.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .ipynb_checkpoints
3 | utils/*.c
4 | utils/*.so
5 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "caffe-fast-rcnn"]
2 | 	path = caffe-fast-rcnn
3 | 	url = https://github.com/rbgirshick/caffe-fast-rcnn.git
4 | 	branch = fast-rcnn
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Fast R-CNN
 2 | 
 3 | Copyright (c) Microsoft Corporation
 4 | 
 5 | All rights reserved.
 6 | 
 7 | MIT License
 8 | 
 9 | Permission is hereby granted, free of charge, to any person obtaining a
10 | copy of this software and associated documentation files (the "Software"),
11 | to deal in the Software without restriction, including without limitation
12 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 | and/or sell copies of the Software, and to permit persons to whom the
14 | Software is furnished to do so, subject to the following conditions:
15 | 
16 | The above copyright notice and this permission notice shall be included
17 | in all copies or substantial portions of the Software.
18 | 
19 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 | OTHER DEALINGS IN THE SOFTWARE.
26 | 


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | selective_search*
2 | imagenet_models*
3 | fast_rcnn_models*
4 | VOCdevkit*
5 | cache
6 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | This directory holds (*after you download them*):
 2 | - Pre-computed object proposals
 3 | - Caffe models pre-trained on ImageNet
 4 | - Fast R-CNN models
 5 | - Symlinks to datasets
 6 | 
 7 | To download precomputed Selective Search proposals for PASCAL VOC 2007 and 2012, run:
 8 | 
 9 | ```
10 | ./data/scripts/fetch_selective_search_data.sh
11 | ```
12 | 
13 | This script will populate `data/selective_search_data`.
14 | 
15 | To download Caffe models (CaffeNet, VGG_CNN_M_1024, VGG16) pre-trained on ImageNet, run:
16 | 
17 | ```
18 | ./data/scripts/fetch_imagenet_models.sh
19 | ```
20 | 
21 | This script will populate `data/imagenet_models`.
22 | 
23 | To download Fast R-CNN models trained on VOC 2007, run:
24 | 
25 | ```
26 | ./data/scripts/fetch_fast_rcnn_models.sh
27 | ```
28 | 
29 | This script will populate `data/fast_rcnn_models`.
30 | 
31 | In order to train and test with PASCAL VOC, you will need to establish symlinks.
32 | From the `data` directory (`cd data`):
33 | 
34 | ```
35 | # For VOC 2007
36 | ln -s /your/path/to/VOC2007/VOCdevkit VOCdevkit2007
37 | 
38 | # For VOC 2012
39 | ln -s /your/path/to/VOC2012/VOCdevkit VOCdevkit2012
40 | ```
41 | 
42 | Since you'll likely be experimenting with multiple installs of Fast R-CNN in
43 | parallel, you'll probably want to keep all of this data in a shared place and
44 | use symlinks. On my system I create the following symlinks inside `data`:
45 | 
46 | ```
47 | # data/cache holds various outputs created by the datasets package
48 | ln -s /data/fast_rcnn_shared/cache
49 | 
50 | # move the imagenet_models to shared location and symlink to them
51 | ln -s /data/fast_rcnn_shared/imagenet_models
52 | 
53 | # move the selective search data to a shared location and symlink to them
54 | ln -s /data/fast_rcnn_shared/selective_search_data
55 | 
56 | ln -s /data/VOC2007/VOCdevkit VOCdevkit2007
57 | ln -s /data/VOC2012/VOCdevkit VOCdevkit2012
58 | ```
59 | 


--------------------------------------------------------------------------------
/data/demo/000004.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rbgirshick/fast-rcnn/b612190f279da3c11dd8b1396dd5e72779f8e463/data/demo/000004.jpg


--------------------------------------------------------------------------------
/data/demo/000004_boxes.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rbgirshick/fast-rcnn/b612190f279da3c11dd8b1396dd5e72779f8e463/data/demo/000004_boxes.mat


--------------------------------------------------------------------------------
/data/demo/001551.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rbgirshick/fast-rcnn/b612190f279da3c11dd8b1396dd5e72779f8e463/data/demo/001551.jpg


--------------------------------------------------------------------------------
/data/demo/001551_boxes.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rbgirshick/fast-rcnn/b612190f279da3c11dd8b1396dd5e72779f8e463/data/demo/001551_boxes.mat


--------------------------------------------------------------------------------
/data/pylintrc:
--------------------------------------------------------------------------------
1 | [TYPECHECK]
2 | 
3 | ignored-modules = numpy, numpy.random, cv2
4 | 


--------------------------------------------------------------------------------
/data/scripts/fetch_fast_rcnn_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | FILE=fast_rcnn_models.tgz
 7 | URL=https://dl.dropboxusercontent.com/s/e3ugqq3lca4z8q6/fast_rcnn_models.tgz
 8 | CHECKSUM=5f7dde9f5376e18c8e065338cc5df3f7
 9 | 
10 | if [ -f $FILE ]; then
11 |   echo "File already exists. Checking md5..."
12 |   os=`uname -s`
13 |   if [ "$os" = "Linux" ]; then
14 |     checksum=`md5sum $FILE | awk '{ print $1 }'`
15 |   elif [ "$os" = "Darwin" ]; then
16 |     checksum=`cat $FILE | md5`
17 |   fi
18 |   if [ "$checksum" = "$CHECKSUM" ]; then
19 |     echo "Checksum is correct. No need to download."
20 |     exit 0
21 |   else
22 |     echo "Checksum is incorrect. Need to download again."
23 |   fi
24 | fi
25 | 
26 | echo "Downloading Fast R-CNN demo models (0.96G)..."
27 | 
28 | wget $URL -O $FILE
29 | 
30 | echo "Unzipping..."
31 | 
32 | tar zxvf $FILE
33 | 
34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM."
35 | 


--------------------------------------------------------------------------------
/data/scripts/fetch_imagenet_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | FILE=imagenet_models.tgz
 7 | URL=https://dl.dropboxusercontent.com/s/riazjuizq0w7dqm/imagenet_models.tgz
 8 | CHECKSUM=8b1d4b9da0593fc70ef403284f810adc
 9 | 
10 | if [ -f $FILE ]; then
11 |   echo "File already exists. Checking md5..."
12 |   os=`uname -s`
13 |   if [ "$os" = "Linux" ]; then
14 |     checksum=`md5sum $FILE | awk '{ print $1 }'`
15 |   elif [ "$os" = "Darwin" ]; then
16 |     checksum=`cat $FILE | md5`
17 |   fi
18 |   if [ "$checksum" = "$CHECKSUM" ]; then
19 |     echo "Checksum is correct. No need to download."
20 |     exit 0
21 |   else
22 |     echo "Checksum is incorrect. Need to download again."
23 |   fi
24 | fi
25 | 
26 | echo "Downloading pretrained ImageNet models (1G)..."
27 | 
28 | wget $URL -O $FILE
29 | 
30 | echo "Unzipping..."
31 | 
32 | tar zxvf $FILE
33 | 
34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM."
35 | 


--------------------------------------------------------------------------------
/data/scripts/fetch_selective_search_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | FILE=selective_search_data.tgz
 7 | URL=https://dl.dropboxusercontent.com/s/orrt7o6bp6ae0tc/selective_search_data.tgz
 8 | CHECKSUM=7078c1db87a7851b31966b96774cd9b9
 9 | 
10 | if [ -f $FILE ]; then
11 |   echo "File already exists. Checking md5..."
12 |   os=`uname -s`
13 |   if [ "$os" = "Linux" ]; then
14 |     checksum=`md5sum $FILE | awk '{ print $1 }'`
15 |   elif [ "$os" = "Darwin" ]; then
16 |     checksum=`cat $FILE | md5`
17 |   fi
18 |   if [ "$checksum" = "$CHECKSUM" ]; then
19 |     echo "Checksum is correct. No need to download."
20 |     exit 0
21 |   else
22 |     echo "Checksum is incorrect. Need to download again."
23 |   fi
24 | fi
25 | 
26 | echo "Downloading precomputed selective search boxes (0.5G)..."
27 | 
28 | wget $URL -O $FILE
29 | 
30 | echo "Unzipping..."
31 | 
32 | tar zxvf $FILE
33 | 
34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM."
35 | 


--------------------------------------------------------------------------------
/experiments/README.md:
--------------------------------------------------------------------------------
1 | Scripts to reproduce (most) of the experiments in the paper.
2 | 
3 | Scripts are under `experiments/scripts`.
4 | 
5 | Each script saves a log file under `experiments/logs`.
6 | 
7 | Configuration override files used in the experiments are stored in `experiments/cfgs`.
8 | 


--------------------------------------------------------------------------------
/experiments/cfgs/fc_only.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: fc_only
2 | TRAIN:
3 |   SNAPSHOT_INFIX: fc_only
4 | 


--------------------------------------------------------------------------------
/experiments/cfgs/multiscale.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: multiscale
2 | TRAIN:
3 |   SCALES: !!python/tuple [480, 576, 688, 864, 1200]
4 |   MAX_SIZE: 2000
5 |   SNAPSHOT_INFIX: multiscale
6 | TEST:
7 |   SCALES: !!python/tuple [480, 576, 688, 864, 1200]
8 |   MAX_SIZE: 2000
9 | 


--------------------------------------------------------------------------------
/experiments/cfgs/no_bbox_reg.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: "no_bbox_reg"
2 | TRAIN:
3 |   BBOX_REG: False
4 |   SNAPSHOT_INFIX: no_bbox_reg
5 | TEST:
6 |   BBOX_REG: False
7 | 


--------------------------------------------------------------------------------
/experiments/cfgs/piecewise.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: piecewise
2 | TRAIN:
3 |   SNAPSHOT_INFIX: piecewise
4 | 


--------------------------------------------------------------------------------
/experiments/cfgs/svm.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: svm
2 | TRAIN:
3 |   # don't use flipped examples when training SVMs for two reasons:
4 |   # 1) R-CNN didn't
5 |   # 2) I've tried and it doesn't help, yet makes SVM training take 2x longer
6 |   USE_FLIPPED: False
7 | TEST:
8 |   SVM: True
9 | 


--------------------------------------------------------------------------------
/experiments/logs/.gitignore:
--------------------------------------------------------------------------------
1 | *.txt*
2 | 


--------------------------------------------------------------------------------
/experiments/scripts/all_caffenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | GPU=$1
 5 | NET=caffenet
 6 | ./experiments/scripts/default_${NET}.sh $GPU
 7 | ./experiments/scripts/multiscale_${NET}.sh $GPU
 8 | ./experiments/scripts/multitask_no_bbox_reg_${NET}.sh $GPU
 9 | ./experiments/scripts/no_bbox_reg_${NET}.sh $GPU
10 | ./experiments/scripts/piecewise_${NET}.sh $GPU
11 | ./experiments/scripts/svd_${NET}.sh $GPU
12 | ./experiments/scripts/svm_${NET}.sh $GPU
13 | 


--------------------------------------------------------------------------------
/experiments/scripts/all_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | GPU=$1
 5 | NET=vgg16
 6 | ./experiments/scripts/default_${NET}.sh $GPU
 7 | ./experiments/scripts/fc_only_${NET}.sh $GPU
 8 | ./experiments/scripts/multitask_no_bbox_reg_${NET}.sh $GPU
 9 | ./experiments/scripts/no_bbox_reg_${NET}.sh $GPU
10 | ./experiments/scripts/piecewise_${NET}.sh $GPU
11 | ./experiments/scripts/svd_${NET}.sh $GPU
12 | ./experiments/scripts/svm_${NET}.sh $GPU
13 | 


--------------------------------------------------------------------------------
/experiments/scripts/all_vgg_cnn_m_1024.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | GPU=$1
 5 | NET=vgg_cnn_m_1024
 6 | ./experiments/scripts/default_${NET}.sh $GPU
 7 | ./experiments/scripts/multiscale_${NET}.sh $GPU
 8 | ./experiments/scripts/multitask_no_bbox_reg_${NET}.sh $GPU
 9 | ./experiments/scripts/no_bbox_reg_${NET}.sh $GPU
10 | ./experiments/scripts/piecewise_${NET}.sh $GPU
11 | ./experiments/scripts/svd_${NET}.sh $GPU
12 | ./experiments/scripts/svm_${NET}.sh $GPU
13 | 


--------------------------------------------------------------------------------
/experiments/scripts/default_caffenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/default_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/CaffeNet/solver.prototxt \
14 |   --weights data/imagenet_models/CaffeNet.v2.caffemodel \
15 |   --imdb voc_2007_trainval
16 | 
17 | time ./tools/test_net.py --gpu $1 \
18 |   --def models/CaffeNet/test.prototxt \
19 |   --net output/default/voc_2007_trainval/caffenet_fast_rcnn_iter_40000.caffemodel \
20 |   --imdb voc_2007_test
21 | 


--------------------------------------------------------------------------------
/experiments/scripts/default_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/default_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/VGG16/solver.prototxt \
14 |   --weights data/imagenet_models/VGG16.v2.caffemodel \
15 |   --imdb voc_2007_trainval
16 | 
17 | time ./tools/test_net.py --gpu $1 \
18 |   --def models/VGG16/test.prototxt \
19 |   --net output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_40000.caffemodel \
20 |   --imdb voc_2007_test
21 | 


--------------------------------------------------------------------------------
/experiments/scripts/default_vgg_cnn_m_1024.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/default_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/VGG_CNN_M_1024/solver.prototxt \
14 |   --weights data/imagenet_models/VGG_CNN_M_1024.v2.caffemodel \
15 |   --imdb voc_2007_trainval
16 | 
17 | time ./tools/test_net.py --gpu $1 \
18 |   --def models/VGG_CNN_M_1024/test.prototxt \
19 |   --net output/default/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel \
20 |   --imdb voc_2007_test
21 | 


--------------------------------------------------------------------------------
/experiments/scripts/fc_only_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/fc_only_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/VGG16/fc_only/solver.prototxt \
14 |   --weights data/imagenet_models/VGG16.v2.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/fc_only.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/VGG16/test.prototxt \
20 |   --net output/fc_only/voc_2007_trainval/vgg16_fast_rcnn_fc_only_iter_40000.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/fc_only.yml
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/multiscale_caffenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/multiscale_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/CaffeNet/solver.prototxt \
14 |   --weights data/imagenet_models/CaffeNet.v2.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/multiscale.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/CaffeNet/test.prototxt \
20 |   --net output/multiscale/voc_2007_trainval/caffenet_fast_rcnn_multiscale_iter_40000.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/multiscale.yml
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/multiscale_vgg_cnn_m_1024.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/multiscale_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/VGG_CNN_M_1024/solver.prototxt \
14 |   --weights data/imagenet_models/VGG_CNN_M_1024.v2.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/multiscale.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/VGG_CNN_M_1024/test.prototxt \
20 |   --net output/multiscale/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_multiscale_iter_40000.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/multiscale.yml
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/multitask_no_bbox_reg_caffenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/multitask_no_bbox_reg_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/test_net.py --gpu $1 \
13 |   --def models/CaffeNet/test.prototxt \
14 |   --net output/default/voc_2007_trainval/caffenet_fast_rcnn_iter_40000.caffemodel \
15 |   --imdb voc_2007_test \
16 |   --cfg experiments/cfgs/no_bbox_reg.yml
17 | 


--------------------------------------------------------------------------------
/experiments/scripts/multitask_no_bbox_reg_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/multitask_no_bbox_reg_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/test_net.py --gpu $1 \
13 |   --def models/VGG16/test.prototxt \
14 |   --net output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_40000.caffemodel \
15 |   --imdb voc_2007_test \
16 |   --cfg experiments/cfgs/no_bbox_reg.yml
17 | 


--------------------------------------------------------------------------------
/experiments/scripts/multitask_no_bbox_reg_vgg_cnn_m_1024.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/multitask_no_bbox_reg_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/test_net.py --gpu $1 \
13 |   --def models/VGG_CNN_M_1024/test.prototxt \
14 |   --net output/default/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel \
15 |   --imdb voc_2007_test \
16 |   --cfg experiments/cfgs/no_bbox_reg.yml
17 | 


--------------------------------------------------------------------------------
/experiments/scripts/no_bbox_reg_caffenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/no_bbox_reg_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/CaffeNet/no_bbox_reg/solver.prototxt \
14 |   --weights data/imagenet_models/CaffeNet.v2.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/no_bbox_reg.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/CaffeNet/no_bbox_reg/test.prototxt \
20 |   --net output/no_bbox_reg/voc_2007_trainval/caffenet_fast_rcnn_no_bbox_reg_iter_40000.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/no_bbox_reg.yml
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/no_bbox_reg_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/no_bbox_reg_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/VGG16/no_bbox_reg/solver.prototxt \
14 |   --weights data/imagenet_models/VGG16.v2.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/no_bbox_reg.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/VGG16/no_bbox_reg/test.prototxt \
20 |   --net output/no_bbox_reg/voc_2007_trainval/vgg16_fast_rcnn_no_bbox_reg_iter_40000.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/no_bbox_reg.yml
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/no_bbox_reg_vgg_cnn_m_1024.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/no_bbox_reg_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/VGG_CNN_M_1024/no_bbox_reg/solver.prototxt \
14 |   --weights data/imagenet_models/VGG_CNN_M_1024.v2.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/no_bbox_reg.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/VGG_CNN_M_1024/no_bbox_reg/test.prototxt \
20 |   --net output/no_bbox_reg/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_no_bbox_reg_iter_40000.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/no_bbox_reg.yml
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/piecewise_caffenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/piecewise_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/CaffeNet/piecewise/solver.prototxt \
14 |   --weights output/no_bbox_reg/voc_2007_trainval/caffenet_fast_rcnn_no_bbox_reg_iter_40000.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/piecewise.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/CaffeNet/test.prototxt \
20 |   --net output/piecewise/voc_2007_trainval/caffenet_fast_rcnn_piecewise_iter_40000.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/piecewise.yml
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/piecewise_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/piecewise_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/VGG16/piecewise/solver.prototxt \
14 |   --weights output/no_bbox_reg/voc_2007_trainval/vgg16_fast_rcnn_no_bbox_reg_iter_40000.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/piecewise.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/VGG16/test.prototxt \
20 |   --net output/piecewise/voc_2007_trainval/vgg16_fast_rcnn_piecewise_iter_40000.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/piecewise.yml
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/piecewise_vgg_cnn_m_1024.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/piecewise_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/VGG_CNN_M_1024/piecewise/solver.prototxt \
14 |   --weights output/no_bbox_reg/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_no_bbox_reg_iter_40000.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/piecewise.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/VGG_CNN_M_1024/test.prototxt \
20 |   --net output/piecewise/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_piecewise_iter_40000.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/piecewise.yml
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/svd_caffenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/svd_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/compress_net.py \
13 |   --def models/CaffeNet/test.prototxt \
14 |   --def-svd models/CaffeNet/compressed/test.prototxt \
15 |   --net output/default/voc_2007_trainval/caffenet_fast_rcnn_iter_40000.caffemodel
16 | 
17 | time ./tools/test_net.py --gpu $1 \
18 |   --def models/CaffeNet/compressed/test.prototxt \
19 |   --net output/default/voc_2007_trainval/caffenet_fast_rcnn_iter_40000_svd_fc6_1024_fc7_256.caffemodel \
20 |   --imdb voc_2007_test
21 | 


--------------------------------------------------------------------------------
/experiments/scripts/svd_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/svd_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/compress_net.py \
13 |   --def models/VGG16/test.prototxt \
14 |   --def-svd models/VGG16/compressed/test.prototxt \
15 |   --net output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_40000.caffemodel
16 | 
17 | time ./tools/test_net.py --gpu $1 \
18 |   --def models/VGG16/compressed/test.prototxt \
19 |   --net output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_40000_svd_fc6_1024_fc7_256.caffemodel \
20 |   --imdb voc_2007_test
21 | 


--------------------------------------------------------------------------------
/experiments/scripts/svd_vgg_cnn_m_1024.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/svd_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/compress_net.py \
13 |   --def models/VGG_CNN_M_1024/test.prototxt \
14 |   --def-svd models/VGG_CNN_M_1024/compressed/test.prototxt \
15 |   --net output/default/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel
16 | 
17 | time ./tools/test_net.py --gpu $1 \
18 |   --def models/VGG_CNN_M_1024/compressed/test.prototxt \
19 |   --net output/default/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_iter_40000_svd_fc6_1024_fc7_256.caffemodel \
20 |   --imdb voc_2007_test
21 | 


--------------------------------------------------------------------------------
/experiments/scripts/svm_caffenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/svm_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_svms.py --gpu $1 \
13 |   --def models/CaffeNet/test.prototxt \
14 |   --net output/default/voc_2007_trainval/caffenet_fast_rcnn_iter_40000.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/svm.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/CaffeNet/test.prototxt \
20 |   --net output/default/voc_2007_trainval/caffenet_fast_rcnn_iter_40000_svm.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/svm.yml
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/svm_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/svm_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_svms.py --gpu $1 \
13 |   --def models/VGG16/test.prototxt \
14 |   --net output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_40000.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/svm.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/VGG16/test.prototxt \
20 |   --net output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_40000_svm.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/svm.yml
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/svm_vgg_cnn_m_1024.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/svm_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_svms.py --gpu $1 \
13 |   --def models/VGG_CNN_M_1024/test.prototxt \
14 |   --net output/default/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/svm.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/VGG_CNN_M_1024/test.prototxt \
20 |   --net output/default/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_iter_40000_svm.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/svm.yml
23 | 


--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
 1 | function res = voc_eval(path, comp_id, test_set, output_dir, rm_res)
 2 | 
 3 | VOCopts = get_voc_opts(path);
 4 | VOCopts.testset = test_set;
 5 | VOCopts.detrespath=[VOCopts.resdir 'Main/%s_det_' VOCopts.testset '_%s.txt'];
 6 | 
 7 | for i = 1:length(VOCopts.classes)
 8 |   cls = VOCopts.classes{i};
 9 |   res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir, rm_res);
10 | end
11 | 
12 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
13 | fprintf('Results:\n');
14 | aps = [res(:).ap]';
15 | fprintf('%.1f\n', aps * 100);
16 | fprintf('%.1f\n', mean(aps) * 100);
17 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
18 | 
19 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir, rm_res)
20 | 
21 | test_set = VOCopts.testset;
22 | year = VOCopts.dataset(4:end);
23 | 
24 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
25 | 
26 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
27 | 
28 | recall = [];
29 | prec = [];
30 | ap = 0;
31 | ap_auc = 0;
32 | 
33 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
34 | if do_eval
35 |   % Bug in VOCevaldet requires that tic has been called first
36 |   tic;
37 |   [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
38 |   ap_auc = xVOCap(recall, prec);
39 | 
40 |   % force plot limits
41 |   ylim([0 1]);
42 |   xlim([0 1]);
43 | 
44 |   print(gcf, '-djpeg', '-r0', ...
45 |         [output_dir '/' cls '_pr.jpg']);
46 | end
47 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
48 | 
49 | res.recall = recall;
50 | res.prec = prec;
51 | res.ap = ap;
52 | res.ap_auc = ap_auc;
53 | 
54 | save([output_dir '/' cls '_pr.mat'], ...
55 |      'res', 'recall', 'prec', 'ap', 'ap_auc');
56 | 
57 | if rm_res
58 |   delete(res_fn);
59 | end
60 | 
61 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
62 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from .imdb import imdb
 9 | from .pascal_voc import pascal_voc
10 | from . import factory
11 | 
12 | import os.path as osp
13 | ROOT_DIR = osp.join(osp.dirname(__file__), '..', '..')
14 | 
15 | # We assume your matlab binary is in your path and called `matlab'.
16 | # If either is not true, just add it to your path and alias it as matlab, or
17 | # you could change this file.
18 | MATLAB = 'matlab'
19 | 
20 | # http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python
21 | def _which(program):
22 |     import os
23 |     def is_exe(fpath):
24 |         return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
25 | 
26 |     fpath, fname = os.path.split(program)
27 |     if fpath:
28 |         if is_exe(program):
29 |             return program
30 |     else:
31 |         for path in os.environ["PATH"].split(os.pathsep):
32 |             path = path.strip('"')
33 |             exe_file = os.path.join(path, program)
34 |             if is_exe(exe_file):
35 |                 return exe_file
36 | 
37 |     return None
38 | 
39 | if _which(MATLAB) is None:
40 |     msg = ("MATLAB command '{}' not found. "
41 |            "Please add '{}' to your PATH.").format(MATLAB, MATLAB)
42 |     raise EnvironmentError(msg)
43 | 


--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Factory method for easily getting imdbs by name."""
 9 | 
10 | __sets = {}
11 | 
12 | import datasets.pascal_voc
13 | import numpy as np
14 | 
15 | def _selective_search_IJCV_top_k(split, year, top_k):
16 |     """Return an imdb that uses the top k proposals from the selective search
17 |     IJCV code.
18 |     """
19 |     imdb = datasets.pascal_voc(split, year)
20 |     imdb.roidb_handler = imdb.selective_search_IJCV_roidb
21 |     imdb.config['top_k'] = top_k
22 |     return imdb
23 | 
24 | # Set up voc_<year>_<split> using selective search "fast" mode
25 | for year in ['2007', '2012']:
26 |     for split in ['train', 'val', 'trainval', 'test']:
27 |         name = 'voc_{}_{}'.format(year, split)
28 |         __sets[name] = (lambda split=split, year=year:
29 |                 datasets.pascal_voc(split, year))
30 | 
31 | # Set up voc_<year>_<split>_top_<k> using selective search "quality" mode
32 | # but only returning the first k boxes
33 | for top_k in np.arange(1000, 11000, 1000):
34 |     for year in ['2007', '2012']:
35 |         for split in ['train', 'val', 'trainval', 'test']:
36 |             name = 'voc_{}_{}_top_{:d}'.format(year, split, top_k)
37 |             __sets[name] = (lambda split=split, year=year, top_k=top_k:
38 |                     _selective_search_IJCV_top_k(split, year, top_k))
39 | 
40 | def get_imdb(name):
41 |     """Get an imdb (image database) by name."""
42 |     if not __sets.has_key(name):
43 |         raise KeyError('Unknown dataset: {}'.format(name))
44 |     return __sets[name]()
45 | 
46 | def list_imdbs():
47 |     """List all registered imdbs."""
48 |     return __sets.keys()
49 | 


--------------------------------------------------------------------------------
/lib/datasets/imdb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | import os.path as osp
 10 | import PIL
 11 | from utils.cython_bbox import bbox_overlaps
 12 | import numpy as np
 13 | import scipy.sparse
 14 | import datasets
 15 | 
 16 | class imdb(object):
 17 |     """Image database."""
 18 | 
 19 |     def __init__(self, name):
 20 |         self._name = name
 21 |         self._num_classes = 0
 22 |         self._classes = []
 23 |         self._image_index = []
 24 |         self._obj_proposer = 'selective_search'
 25 |         self._roidb = None
 26 |         self._roidb_handler = self.default_roidb
 27 |         # Use this dict for storing dataset specific config options
 28 |         self.config = {}
 29 | 
 30 |     @property
 31 |     def name(self):
 32 |         return self._name
 33 | 
 34 |     @property
 35 |     def num_classes(self):
 36 |         return len(self._classes)
 37 | 
 38 |     @property
 39 |     def classes(self):
 40 |         return self._classes
 41 | 
 42 |     @property
 43 |     def image_index(self):
 44 |         return self._image_index
 45 | 
 46 |     @property
 47 |     def roidb_handler(self):
 48 |         return self._roidb_handler
 49 | 
 50 |     @roidb_handler.setter
 51 |     def roidb_handler(self, val):
 52 |         self._roidb_handler = val
 53 | 
 54 |     @property
 55 |     def roidb(self):
 56 |         # A roidb is a list of dictionaries, each with the following keys:
 57 |         #   boxes
 58 |         #   gt_overlaps
 59 |         #   gt_classes
 60 |         #   flipped
 61 |         if self._roidb is not None:
 62 |             return self._roidb
 63 |         self._roidb = self.roidb_handler()
 64 |         return self._roidb
 65 | 
 66 |     @property
 67 |     def cache_path(self):
 68 |         cache_path = osp.abspath(osp.join(datasets.ROOT_DIR, 'data', 'cache'))
 69 |         if not os.path.exists(cache_path):
 70 |             os.makedirs(cache_path)
 71 |         return cache_path
 72 | 
 73 |     @property
 74 |     def num_images(self):
 75 |       return len(self.image_index)
 76 | 
 77 |     def image_path_at(self, i):
 78 |         raise NotImplementedError
 79 | 
 80 |     def default_roidb(self):
 81 |         raise NotImplementedError
 82 | 
 83 |     def evaluate_detections(self, all_boxes, output_dir=None):
 84 |         """
 85 |         all_boxes is a list of length number-of-classes.
 86 |         Each list element is a list of length number-of-images.
 87 |         Each of those list elements is either an empty list []
 88 |         or a numpy array of detection.
 89 | 
 90 |         all_boxes[class][image] = [] or np.array of shape #dets x 5
 91 |         """
 92 |         raise NotImplementedError
 93 | 
 94 |     def append_flipped_images(self):
 95 |         num_images = self.num_images
 96 |         widths = [PIL.Image.open(self.image_path_at(i)).size[0]
 97 |                   for i in xrange(num_images)]
 98 |         for i in xrange(num_images):
 99 |             boxes = self.roidb[i]['boxes'].copy()
100 |             oldx1 = boxes[:, 0].copy()
101 |             oldx2 = boxes[:, 2].copy()
102 |             boxes[:, 0] = widths[i] - oldx2 - 1
103 |             boxes[:, 2] = widths[i] - oldx1 - 1
104 |             assert (boxes[:, 2] >= boxes[:, 0]).all()
105 |             entry = {'boxes' : boxes,
106 |                      'gt_overlaps' : self.roidb[i]['gt_overlaps'],
107 |                      'gt_classes' : self.roidb[i]['gt_classes'],
108 |                      'flipped' : True}
109 |             self.roidb.append(entry)
110 |         self._image_index = self._image_index * 2
111 | 
112 |     def evaluate_recall(self, candidate_boxes, ar_thresh=0.5):
113 |         # Record max overlap value for each gt box
114 |         # Return vector of overlap values
115 |         gt_overlaps = np.zeros(0)
116 |         for i in xrange(self.num_images):
117 |             gt_inds = np.where(self.roidb[i]['gt_classes'] > 0)[0]
118 |             gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
119 | 
120 |             boxes = candidate_boxes[i]
121 |             if boxes.shape[0] == 0:
122 |                 continue
123 |             overlaps = bbox_overlaps(boxes.astype(np.float),
124 |                                      gt_boxes.astype(np.float))
125 | 
126 |             # gt_overlaps = np.hstack((gt_overlaps, overlaps.max(axis=0)))
127 |             _gt_overlaps = np.zeros((gt_boxes.shape[0]))
128 |             for j in xrange(gt_boxes.shape[0]):
129 |                 argmax_overlaps = overlaps.argmax(axis=0)
130 |                 max_overlaps = overlaps.max(axis=0)
131 |                 gt_ind = max_overlaps.argmax()
132 |                 gt_ovr = max_overlaps.max()
133 |                 assert(gt_ovr >= 0)
134 |                 box_ind = argmax_overlaps[gt_ind]
135 |                 _gt_overlaps[j] = overlaps[box_ind, gt_ind]
136 |                 assert(_gt_overlaps[j] == gt_ovr)
137 |                 overlaps[box_ind, :] = -1
138 |                 overlaps[:, gt_ind] = -1
139 | 
140 |             gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
141 | 
142 |         num_pos = gt_overlaps.size
143 |         gt_overlaps = np.sort(gt_overlaps)
144 |         step = 0.001
145 |         thresholds = np.minimum(np.arange(0.5, 1.0 + step, step), 1.0)
146 |         recalls = np.zeros_like(thresholds)
147 |         for i, t in enumerate(thresholds):
148 |             recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
149 |         ar = 2 * np.trapz(recalls, thresholds)
150 | 
151 |         return ar, gt_overlaps, recalls, thresholds
152 | 
153 |     def create_roidb_from_box_list(self, box_list, gt_roidb):
154 |         assert len(box_list) == self.num_images, \
155 |                 'Number of boxes must match number of ground-truth images'
156 |         roidb = []
157 |         for i in xrange(self.num_images):
158 |             boxes = box_list[i]
159 |             num_boxes = boxes.shape[0]
160 |             overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)
161 | 
162 |             if gt_roidb is not None:
163 |                 gt_boxes = gt_roidb[i]['boxes']
164 |                 gt_classes = gt_roidb[i]['gt_classes']
165 |                 gt_overlaps = bbox_overlaps(boxes.astype(np.float),
166 |                                             gt_boxes.astype(np.float))
167 |                 argmaxes = gt_overlaps.argmax(axis=1)
168 |                 maxes = gt_overlaps.max(axis=1)
169 |                 I = np.where(maxes > 0)[0]
170 |                 overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
171 | 
172 |             overlaps = scipy.sparse.csr_matrix(overlaps)
173 |             roidb.append({'boxes' : boxes,
174 |                           'gt_classes' : np.zeros((num_boxes,),
175 |                                                   dtype=np.int32),
176 |                           'gt_overlaps' : overlaps,
177 |                           'flipped' : False})
178 |         return roidb
179 | 
180 |     @staticmethod
181 |     def merge_roidbs(a, b):
182 |         assert len(a) == len(b)
183 |         for i in xrange(len(a)):
184 |             a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes']))
185 |             a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'],
186 |                                             b[i]['gt_classes']))
187 |             a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'],
188 |                                                        b[i]['gt_overlaps']])
189 |         return a
190 | 
191 |     def competition_mode(self, on):
192 |         """Turn competition mode on or off."""
193 |         pass
194 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from . import config
 9 | from . import train
10 | from . import test
11 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/config.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Fast R-CNN config system.
  9 | 
 10 | This file specifies default config options for Fast R-CNN. You should not
 11 | change values in this file. Instead, you should write a config file (in yaml)
 12 | and use cfg_from_file(yaml_file) to load it and override the default options.
 13 | 
 14 | Most tools in $ROOT/tools take a --cfg option to specify an override file.
 15 |     - See tools/{train,test}_net.py for example code that uses cfg_from_file()
 16 |     - See experiments/cfgs/*.yml for example YAML config override files
 17 | """
 18 | 
 19 | import os
 20 | import os.path as osp
 21 | import numpy as np
 22 | # `pip install easydict` if you don't have it
 23 | from easydict import EasyDict as edict
 24 | 
 25 | __C = edict()
 26 | # Consumers can get config by:
 27 | #   from fast_rcnn_config import cfg
 28 | cfg = __C
 29 | 
 30 | #
 31 | # Training options
 32 | #
 33 | 
 34 | __C.TRAIN = edict()
 35 | 
 36 | # Scales to use during training (can list multiple scales)
 37 | # Each scale is the pixel size of an image's shortest side
 38 | __C.TRAIN.SCALES = (600,)
 39 | 
 40 | # Max pixel size of the longest side of a scaled input image
 41 | __C.TRAIN.MAX_SIZE = 1000
 42 | 
 43 | # Images to use per minibatch
 44 | __C.TRAIN.IMS_PER_BATCH = 2
 45 | 
 46 | # Minibatch size (number of regions of interest [ROIs])
 47 | __C.TRAIN.BATCH_SIZE = 128
 48 | 
 49 | # Fraction of minibatch that is labeled foreground (i.e. class > 0)
 50 | __C.TRAIN.FG_FRACTION = 0.25
 51 | 
 52 | # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 53 | __C.TRAIN.FG_THRESH = 0.5
 54 | 
 55 | # Overlap threshold for a ROI to be considered background (class = 0 if
 56 | # overlap in [LO, HI))
 57 | __C.TRAIN.BG_THRESH_HI = 0.5
 58 | __C.TRAIN.BG_THRESH_LO = 0.1
 59 | 
 60 | # Use horizontally-flipped images during training?
 61 | __C.TRAIN.USE_FLIPPED = True
 62 | 
 63 | # Train bounding-box regressors
 64 | __C.TRAIN.BBOX_REG = True
 65 | 
 66 | # Overlap required between a ROI and ground-truth box in order for that ROI to
 67 | # be used as a bounding-box regression training example
 68 | __C.TRAIN.BBOX_THRESH = 0.5
 69 | 
 70 | # Iterations between snapshots
 71 | __C.TRAIN.SNAPSHOT_ITERS = 10000
 72 | 
 73 | # solver.prototxt specifies the snapshot path prefix, this adds an optional
 74 | # infix to yield the path: <prefix>[_<infix>]_iters_XYZ.caffemodel
 75 | __C.TRAIN.SNAPSHOT_INFIX = ''
 76 | 
 77 | # Use a prefetch thread in roi_data_layer.layer
 78 | # So far I haven't found this useful; likely more engineering work is required
 79 | __C.TRAIN.USE_PREFETCH = False
 80 | 
 81 | #
 82 | # Testing options
 83 | #
 84 | 
 85 | __C.TEST = edict()
 86 | 
 87 | # Scales to use during testing (can list multiple scales)
 88 | # Each scale is the pixel size of an image's shortest side
 89 | __C.TEST.SCALES = (600,)
 90 | 
 91 | # Max pixel size of the longest side of a scaled input image
 92 | __C.TEST.MAX_SIZE = 1000
 93 | 
 94 | # Overlap threshold used for non-maximum suppression (suppress boxes with
 95 | # IoU >= this threshold)
 96 | __C.TEST.NMS = 0.3
 97 | 
 98 | # Experimental: treat the (K+1) units in the cls_score layer as linear
 99 | # predictors (trained, eg, with one-vs-rest SVMs).
100 | __C.TEST.SVM = False
101 | 
102 | # Test using bounding-box regressors
103 | __C.TEST.BBOX_REG = True
104 | 
105 | #
106 | # MISC
107 | #
108 | 
109 | # The mapping from image coordinates to feature map coordinates might cause
110 | # some boxes that are distinct in image space to become identical in feature
111 | # coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor
112 | # for identifying duplicate boxes.
113 | # 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16
114 | __C.DEDUP_BOXES = 1./16.
115 | 
116 | # Pixel mean values (BGR order) as a (1, 1, 3) array
117 | # We use the same pixel mean for all networks even though it's not exactly what
118 | # they were trained with
119 | __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
120 | 
121 | # For reproducibility
122 | __C.RNG_SEED = 3
123 | 
124 | # A small number that's used many times
125 | __C.EPS = 1e-14
126 | 
127 | # Root directory of project
128 | __C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
129 | 
130 | # Place outputs under an experiments directory
131 | __C.EXP_DIR = 'default'
132 | 
133 | def get_output_dir(imdb, net):
134 |     """Return the directory where experimental artifacts are placed.
135 | 
136 |     A canonical path is built using the name from an imdb and a network
137 |     (if not None).
138 |     """
139 |     path = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name))
140 |     if net is None:
141 |         return path
142 |     else:
143 |         return osp.join(path, net.name)
144 | 
145 | def _merge_a_into_b(a, b):
146 |     """Merge config dictionary a into config dictionary b, clobbering the
147 |     options in b whenever they are also specified in a.
148 |     """
149 |     if type(a) is not edict:
150 |         return
151 | 
152 |     for k, v in a.iteritems():
153 |         # a must specify keys that are in b
154 |         if not b.has_key(k):
155 |             raise KeyError('{} is not a valid config key'.format(k))
156 | 
157 |         # the types must match, too
158 |         if type(b[k]) is not type(v):
159 |             raise ValueError(('Type mismatch ({} vs. {}) '
160 |                               'for config key: {}').format(type(b[k]),
161 |                                                            type(v), k))
162 | 
163 |         # recursively merge dicts
164 |         if type(v) is edict:
165 |             try:
166 |                 _merge_a_into_b(a[k], b[k])
167 |             except:
168 |                 print('Error under config key: {}'.format(k))
169 |                 raise
170 |         else:
171 |             b[k] = v
172 | 
173 | def cfg_from_file(filename):
174 |     """Load a config file and merge it into the default options."""
175 |     import yaml
176 |     with open(filename, 'r') as f:
177 |         yaml_cfg = edict(yaml.load(f))
178 | 
179 |     _merge_a_into_b(yaml_cfg, __C)
180 | 
181 | def cfg_from_list(cfg_list):
182 |     """Set config keys via list (e.g., from command line)."""
183 |     from ast import literal_eval
184 |     assert len(cfg_list) % 2 == 0
185 |     for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
186 |         key_list = k.split('.')
187 |         d = __C
188 |         for subkey in key_list[:-1]:
189 |             assert d.has_key(subkey)
190 |             d = d[subkey]
191 |         subkey = key_list[-1]
192 |         assert d.has_key(subkey)
193 |         try:
194 |             value = literal_eval(v)
195 |         except:
196 |             # handle the case when v is a string literal
197 |             value = v
198 |         assert type(value) == type(d[subkey]), \
199 |             'type {} does not match original type {}'.format(
200 |             type(value), type(d[subkey]))
201 |         d[subkey] = value
202 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/train.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Train a Fast R-CNN network."""
  9 | 
 10 | import caffe
 11 | from fast_rcnn.config import cfg
 12 | import roi_data_layer.roidb as rdl_roidb
 13 | from utils.timer import Timer
 14 | import numpy as np
 15 | import os
 16 | 
 17 | from caffe.proto import caffe_pb2
 18 | import google.protobuf as pb2
 19 | 
 20 | class SolverWrapper(object):
 21 |     """A simple wrapper around Caffe's solver.
 22 |     This wrapper gives us control over he snapshotting process, which we
 23 |     use to unnormalize the learned bounding-box regression weights.
 24 |     """
 25 | 
 26 |     def __init__(self, solver_prototxt, roidb, output_dir,
 27 |                  pretrained_model=None):
 28 |         """Initialize the SolverWrapper."""
 29 |         self.output_dir = output_dir
 30 | 
 31 |         print 'Computing bounding-box regression targets...'
 32 |         self.bbox_means, self.bbox_stds = \
 33 |                 rdl_roidb.add_bbox_regression_targets(roidb)
 34 |         print 'done'
 35 | 
 36 |         self.solver = caffe.SGDSolver(solver_prototxt)
 37 |         if pretrained_model is not None:
 38 |             print ('Loading pretrained model '
 39 |                    'weights from {:s}').format(pretrained_model)
 40 |             self.solver.net.copy_from(pretrained_model)
 41 | 
 42 |         self.solver_param = caffe_pb2.SolverParameter()
 43 |         with open(solver_prototxt, 'rt') as f:
 44 |             pb2.text_format.Merge(f.read(), self.solver_param)
 45 | 
 46 |         self.solver.net.layers[0].set_roidb(roidb)
 47 | 
 48 |     def snapshot(self):
 49 |         """Take a snapshot of the network after unnormalizing the learned
 50 |         bounding-box regression weights. This enables easy use at test-time.
 51 |         """
 52 |         net = self.solver.net
 53 | 
 54 |         if cfg.TRAIN.BBOX_REG:
 55 |             # save original values
 56 |             orig_0 = net.params['bbox_pred'][0].data.copy()
 57 |             orig_1 = net.params['bbox_pred'][1].data.copy()
 58 | 
 59 |             # scale and shift with bbox reg unnormalization; then save snapshot
 60 |             net.params['bbox_pred'][0].data[...] = \
 61 |                     (net.params['bbox_pred'][0].data *
 62 |                      self.bbox_stds[:, np.newaxis])
 63 |             net.params['bbox_pred'][1].data[...] = \
 64 |                     (net.params['bbox_pred'][1].data *
 65 |                      self.bbox_stds + self.bbox_means)
 66 | 
 67 |         if not os.path.exists(self.output_dir):
 68 |             os.makedirs(self.output_dir)
 69 | 
 70 |         infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX
 71 |                  if cfg.TRAIN.SNAPSHOT_INFIX != '' else '')
 72 |         filename = (self.solver_param.snapshot_prefix + infix +
 73 |                     '_iter_{:d}'.format(self.solver.iter) + '.caffemodel')
 74 |         filename = os.path.join(self.output_dir, filename)
 75 | 
 76 |         net.save(str(filename))
 77 |         print 'Wrote snapshot to: {:s}'.format(filename)
 78 | 
 79 |         if cfg.TRAIN.BBOX_REG:
 80 |             # restore net to original state
 81 |             net.params['bbox_pred'][0].data[...] = orig_0
 82 |             net.params['bbox_pred'][1].data[...] = orig_1
 83 | 
 84 |     def train_model(self, max_iters):
 85 |         """Network training loop."""
 86 |         last_snapshot_iter = -1
 87 |         timer = Timer()
 88 |         while self.solver.iter < max_iters:
 89 |             # Make one SGD update
 90 |             timer.tic()
 91 |             self.solver.step(1)
 92 |             timer.toc()
 93 |             if self.solver.iter % (10 * self.solver_param.display) == 0:
 94 |                 print 'speed: {:.3f}s / iter'.format(timer.average_time)
 95 | 
 96 |             if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0:
 97 |                 last_snapshot_iter = self.solver.iter
 98 |                 self.snapshot()
 99 | 
100 |         if last_snapshot_iter != self.solver.iter:
101 |             self.snapshot()
102 | 
103 | def get_training_roidb(imdb):
104 |     """Returns a roidb (Region of Interest database) for use in training."""
105 |     if cfg.TRAIN.USE_FLIPPED:
106 |         print 'Appending horizontally-flipped training examples...'
107 |         imdb.append_flipped_images()
108 |         print 'done'
109 | 
110 |     print 'Preparing training data...'
111 |     rdl_roidb.prepare_roidb(imdb)
112 |     print 'done'
113 | 
114 |     return imdb.roidb
115 | 
116 | def train_net(solver_prototxt, roidb, output_dir,
117 |               pretrained_model=None, max_iters=40000):
118 |     """Train a Fast R-CNN network."""
119 |     sw = SolverWrapper(solver_prototxt, roidb, output_dir,
120 |                        pretrained_model=pretrained_model)
121 | 
122 |     print 'Solving...'
123 |     sw.train_model(max_iters)
124 |     print 'done solving'
125 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """The data layer used during training to train a Fast R-CNN network.
  9 | 
 10 | RoIDataLayer implements a Caffe Python layer.
 11 | """
 12 | 
 13 | import caffe
 14 | from fast_rcnn.config import cfg
 15 | from roi_data_layer.minibatch import get_minibatch
 16 | import numpy as np
 17 | import yaml
 18 | from multiprocessing import Process, Queue
 19 | 
 20 | class RoIDataLayer(caffe.Layer):
 21 |     """Fast R-CNN data layer used for training."""
 22 | 
 23 |     def _shuffle_roidb_inds(self):
 24 |         """Randomly permute the training roidb."""
 25 |         self._perm = np.random.permutation(np.arange(len(self._roidb)))
 26 |         self._cur = 0
 27 | 
 28 |     def _get_next_minibatch_inds(self):
 29 |         """Return the roidb indices for the next minibatch."""
 30 |         if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
 31 |             self._shuffle_roidb_inds()
 32 | 
 33 |         db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
 34 |         self._cur += cfg.TRAIN.IMS_PER_BATCH
 35 |         return db_inds
 36 | 
 37 |     def _get_next_minibatch(self):
 38 |         """Return the blobs to be used for the next minibatch.
 39 | 
 40 |         If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a
 41 |         separate process and made available through self._blob_queue.
 42 |         """
 43 |         if cfg.TRAIN.USE_PREFETCH:
 44 |             return self._blob_queue.get()
 45 |         else:
 46 |             db_inds = self._get_next_minibatch_inds()
 47 |             minibatch_db = [self._roidb[i] for i in db_inds]
 48 |             return get_minibatch(minibatch_db, self._num_classes)
 49 | 
 50 |     def set_roidb(self, roidb):
 51 |         """Set the roidb to be used by this layer during training."""
 52 |         self._roidb = roidb
 53 |         self._shuffle_roidb_inds()
 54 |         if cfg.TRAIN.USE_PREFETCH:
 55 |             self._blob_queue = Queue(10)
 56 |             self._prefetch_process = BlobFetcher(self._blob_queue,
 57 |                                                  self._roidb,
 58 |                                                  self._num_classes)
 59 |             self._prefetch_process.start()
 60 |             # Terminate the child process when the parent exists
 61 |             def cleanup():
 62 |                 print 'Terminating BlobFetcher'
 63 |                 self._prefetch_process.terminate()
 64 |                 self._prefetch_process.join()
 65 |             import atexit
 66 |             atexit.register(cleanup)
 67 | 
 68 |     def setup(self, bottom, top):
 69 |         """Setup the RoIDataLayer."""
 70 | 
 71 |         # parse the layer parameter string, which must be valid YAML
 72 |         layer_params = yaml.load(self.param_str_)
 73 | 
 74 |         self._num_classes = layer_params['num_classes']
 75 | 
 76 |         self._name_to_top_map = {
 77 |             'data': 0,
 78 |             'rois': 1,
 79 |             'labels': 2}
 80 | 
 81 |         # data blob: holds a batch of N images, each with 3 channels
 82 |         # The height and width (100 x 100) are dummy values
 83 |         top[0].reshape(1, 3, 100, 100)
 84 | 
 85 |         # rois blob: holds R regions of interest, each is a 5-tuple
 86 |         # (n, x1, y1, x2, y2) specifying an image batch index n and a
 87 |         # rectangle (x1, y1, x2, y2)
 88 |         top[1].reshape(1, 5)
 89 | 
 90 |         # labels blob: R categorical labels in [0, ..., K] for K foreground
 91 |         # classes plus background
 92 |         top[2].reshape(1)
 93 | 
 94 |         if cfg.TRAIN.BBOX_REG:
 95 |             self._name_to_top_map['bbox_targets'] = 3
 96 |             self._name_to_top_map['bbox_loss_weights'] = 4
 97 | 
 98 |             # bbox_targets blob: R bounding-box regression targets with 4
 99 |             # targets per class
100 |             top[3].reshape(1, self._num_classes * 4)
101 | 
102 |             # bbox_loss_weights blob: At most 4 targets per roi are active;
103 |             # thisbinary vector sepcifies the subset of active targets
104 |             top[4].reshape(1, self._num_classes * 4)
105 | 
106 |     def forward(self, bottom, top):
107 |         """Get blobs and copy them into this layer's top blob vector."""
108 |         blobs = self._get_next_minibatch()
109 | 
110 |         for blob_name, blob in blobs.iteritems():
111 |             top_ind = self._name_to_top_map[blob_name]
112 |             # Reshape net's input blobs
113 |             top[top_ind].reshape(*(blob.shape))
114 |             # Copy data into net's input blobs
115 |             top[top_ind].data[...] = blob.astype(np.float32, copy=False)
116 | 
117 |     def backward(self, top, propagate_down, bottom):
118 |         """This layer does not propagate gradients."""
119 |         pass
120 | 
121 |     def reshape(self, bottom, top):
122 |         """Reshaping happens during the call to forward."""
123 |         pass
124 | 
125 | class BlobFetcher(Process):
126 |     """Experimental class for prefetching blobs in a separate process."""
127 |     def __init__(self, queue, roidb, num_classes):
128 |         super(BlobFetcher, self).__init__()
129 |         self._queue = queue
130 |         self._roidb = roidb
131 |         self._num_classes = num_classes
132 |         self._perm = None
133 |         self._cur = 0
134 |         self._shuffle_roidb_inds()
135 |         # fix the random seed for reproducibility
136 |         np.random.seed(cfg.RNG_SEED)
137 | 
138 |     def _shuffle_roidb_inds(self):
139 |         """Randomly permute the training roidb."""
140 |         # TODO(rbg): remove duplicated code
141 |         self._perm = np.random.permutation(np.arange(len(self._roidb)))
142 |         self._cur = 0
143 | 
144 |     def _get_next_minibatch_inds(self):
145 |         """Return the roidb indices for the next minibatch."""
146 |         # TODO(rbg): remove duplicated code
147 |         if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
148 |             self._shuffle_roidb_inds()
149 | 
150 |         db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
151 |         self._cur += cfg.TRAIN.IMS_PER_BATCH
152 |         return db_inds
153 | 
154 |     def run(self):
155 |         print 'BlobFetcher started'
156 |         while True:
157 |             db_inds = self._get_next_minibatch_inds()
158 |             minibatch_db = [self._roidb[i] for i in db_inds]
159 |             blobs = get_minibatch(minibatch_db, self._num_classes)
160 |             self._queue.put(blobs)
161 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/minibatch.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Compute minibatch blobs for training a Fast R-CNN network."""
  9 | 
 10 | import numpy as np
 11 | import numpy.random as npr
 12 | import cv2
 13 | from fast_rcnn.config import cfg
 14 | from utils.blob import prep_im_for_blob, im_list_to_blob
 15 | 
 16 | def get_minibatch(roidb, num_classes):
 17 |     """Given a roidb, construct a minibatch sampled from it."""
 18 |     num_images = len(roidb)
 19 |     # Sample random scales to use for each image in this batch
 20 |     random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
 21 |                                     size=num_images)
 22 |     assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
 23 |         'num_images ({}) must divide BATCH_SIZE ({})'. \
 24 |         format(num_images, cfg.TRAIN.BATCH_SIZE)
 25 |     rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
 26 |     fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
 27 | 
 28 |     # Get the input image blob, formatted for caffe
 29 |     im_blob, im_scales = _get_image_blob(roidb, random_scale_inds)
 30 | 
 31 |     # Now, build the region of interest and label blobs
 32 |     rois_blob = np.zeros((0, 5), dtype=np.float32)
 33 |     labels_blob = np.zeros((0), dtype=np.float32)
 34 |     bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32)
 35 |     bbox_loss_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32)
 36 |     # all_overlaps = []
 37 |     for im_i in xrange(num_images):
 38 |         labels, overlaps, im_rois, bbox_targets, bbox_loss \
 39 |             = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image,
 40 |                            num_classes)
 41 | 
 42 |         # Add to RoIs blob
 43 |         rois = _project_im_rois(im_rois, im_scales[im_i])
 44 |         batch_ind = im_i * np.ones((rois.shape[0], 1))
 45 |         rois_blob_this_image = np.hstack((batch_ind, rois))
 46 |         rois_blob = np.vstack((rois_blob, rois_blob_this_image))
 47 | 
 48 |         # Add to labels, bbox targets, and bbox loss blobs
 49 |         labels_blob = np.hstack((labels_blob, labels))
 50 |         bbox_targets_blob = np.vstack((bbox_targets_blob, bbox_targets))
 51 |         bbox_loss_blob = np.vstack((bbox_loss_blob, bbox_loss))
 52 |         # all_overlaps = np.hstack((all_overlaps, overlaps))
 53 | 
 54 |     # For debug visualizations
 55 |     # _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps)
 56 | 
 57 |     blobs = {'data': im_blob,
 58 |              'rois': rois_blob,
 59 |              'labels': labels_blob}
 60 | 
 61 |     if cfg.TRAIN.BBOX_REG:
 62 |         blobs['bbox_targets'] = bbox_targets_blob
 63 |         blobs['bbox_loss_weights'] = bbox_loss_blob
 64 | 
 65 |     return blobs
 66 | 
 67 | def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes):
 68 |     """Generate a random sample of RoIs comprising foreground and background
 69 |     examples.
 70 |     """
 71 |     # label = class RoI has max overlap with
 72 |     labels = roidb['max_classes']
 73 |     overlaps = roidb['max_overlaps']
 74 |     rois = roidb['boxes']
 75 | 
 76 |     # Select foreground RoIs as those with >= FG_THRESH overlap
 77 |     fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
 78 |     # Guard against the case when an image has fewer than fg_rois_per_image
 79 |     # foreground RoIs
 80 |     fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size)
 81 |     # Sample foreground regions without replacement
 82 |     if fg_inds.size > 0:
 83 |         fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image,
 84 |                              replace=False)
 85 | 
 86 |     # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
 87 |     bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
 88 |                        (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
 89 |     # Compute number of background RoIs to take from this image (guarding
 90 |     # against there being fewer than desired)
 91 |     bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
 92 |     bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
 93 |                                         bg_inds.size)
 94 |     # Sample foreground regions without replacement
 95 |     if bg_inds.size > 0:
 96 |         bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image,
 97 |                              replace=False)
 98 | 
 99 |     # The indices that we're selecting (both fg and bg)
100 |     keep_inds = np.append(fg_inds, bg_inds)
101 |     # Select sampled values from various arrays:
102 |     labels = labels[keep_inds]
103 |     # Clamp labels for the background RoIs to 0
104 |     labels[fg_rois_per_this_image:] = 0
105 |     overlaps = overlaps[keep_inds]
106 |     rois = rois[keep_inds]
107 | 
108 |     bbox_targets, bbox_loss_weights = \
109 |             _get_bbox_regression_labels(roidb['bbox_targets'][keep_inds, :],
110 |                                         num_classes)
111 | 
112 |     return labels, overlaps, rois, bbox_targets, bbox_loss_weights
113 | 
114 | def _get_image_blob(roidb, scale_inds):
115 |     """Builds an input blob from the images in the roidb at the specified
116 |     scales.
117 |     """
118 |     num_images = len(roidb)
119 |     processed_ims = []
120 |     im_scales = []
121 |     for i in xrange(num_images):
122 |         im = cv2.imread(roidb[i]['image'])
123 |         if roidb[i]['flipped']:
124 |             im = im[:, ::-1, :]
125 |         target_size = cfg.TRAIN.SCALES[scale_inds[i]]
126 |         im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
127 |                                         cfg.TRAIN.MAX_SIZE)
128 |         im_scales.append(im_scale)
129 |         processed_ims.append(im)
130 | 
131 |     # Create a blob to hold the input images
132 |     blob = im_list_to_blob(processed_ims)
133 | 
134 |     return blob, im_scales
135 | 
136 | def _project_im_rois(im_rois, im_scale_factor):
137 |     """Project image RoIs into the rescaled training image."""
138 |     rois = im_rois * im_scale_factor
139 |     return rois
140 | 
141 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
142 |     """Bounding-box regression targets are stored in a compact form in the
143 |     roidb.
144 | 
145 |     This function expands those targets into the 4-of-4*K representation used
146 |     by the network (i.e. only one class has non-zero targets). The loss weights
147 |     are similarly expanded.
148 | 
149 |     Returns:
150 |         bbox_target_data (ndarray): N x 4K blob of regression targets
151 |         bbox_loss_weights (ndarray): N x 4K blob of loss weights
152 |     """
153 |     clss = bbox_target_data[:, 0]
154 |     bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
155 |     bbox_loss_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
156 |     inds = np.where(clss > 0)[0]
157 |     for ind in inds:
158 |         cls = clss[ind]
159 |         start = 4 * cls
160 |         end = start + 4
161 |         bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
162 |         bbox_loss_weights[ind, start:end] = [1., 1., 1., 1.]
163 |     return bbox_targets, bbox_loss_weights
164 | 
165 | def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps):
166 |     """Visualize a mini-batch for debugging."""
167 |     import matplotlib.pyplot as plt
168 |     for i in xrange(rois_blob.shape[0]):
169 |         rois = rois_blob[i, :]
170 |         im_ind = rois[0]
171 |         roi = rois[1:]
172 |         im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy()
173 |         im += cfg.PIXEL_MEANS
174 |         im = im[:, :, (2, 1, 0)]
175 |         im = im.astype(np.uint8)
176 |         cls = labels_blob[i]
177 |         plt.imshow(im)
178 |         print 'class: ', cls, ' overlap: ', overlaps[i]
179 |         plt.gca().add_patch(
180 |             plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0],
181 |                           roi[3] - roi[1], fill=False,
182 |                           edgecolor='r', linewidth=3)
183 |             )
184 |         plt.show()
185 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
  9 | 
 10 | import numpy as np
 11 | from fast_rcnn.config import cfg
 12 | import utils.cython_bbox
 13 | 
 14 | def prepare_roidb(imdb):
 15 |     """Enrich the imdb's roidb by adding some derived quantities that
 16 |     are useful for training. This function precomputes the maximum
 17 |     overlap, taken over ground-truth boxes, between each ROI and
 18 |     each ground-truth box. The class with maximum overlap is also
 19 |     recorded.
 20 |     """
 21 |     roidb = imdb.roidb
 22 |     for i in xrange(len(imdb.image_index)):
 23 |         roidb[i]['image'] = imdb.image_path_at(i)
 24 |         # need gt_overlaps as a dense array for argmax
 25 |         gt_overlaps = roidb[i]['gt_overlaps'].toarray()
 26 |         # max overlap with gt over classes (columns)
 27 |         max_overlaps = gt_overlaps.max(axis=1)
 28 |         # gt class that had the max overlap
 29 |         max_classes = gt_overlaps.argmax(axis=1)
 30 |         roidb[i]['max_classes'] = max_classes
 31 |         roidb[i]['max_overlaps'] = max_overlaps
 32 |         # sanity checks
 33 |         # max overlap of 0 => class should be zero (background)
 34 |         zero_inds = np.where(max_overlaps == 0)[0]
 35 |         assert all(max_classes[zero_inds] == 0)
 36 |         # max overlap > 0 => class should not be zero (must be a fg class)
 37 |         nonzero_inds = np.where(max_overlaps > 0)[0]
 38 |         assert all(max_classes[nonzero_inds] != 0)
 39 | 
 40 | def add_bbox_regression_targets(roidb):
 41 |     """Add information needed to train bounding-box regressors."""
 42 |     assert len(roidb) > 0
 43 |     assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?'
 44 | 
 45 |     num_images = len(roidb)
 46 |     # Infer number of classes from the number of columns in gt_overlaps
 47 |     num_classes = roidb[0]['gt_overlaps'].shape[1]
 48 |     for im_i in xrange(num_images):
 49 |         rois = roidb[im_i]['boxes']
 50 |         max_overlaps = roidb[im_i]['max_overlaps']
 51 |         max_classes = roidb[im_i]['max_classes']
 52 |         roidb[im_i]['bbox_targets'] = \
 53 |                 _compute_targets(rois, max_overlaps, max_classes)
 54 | 
 55 |     # Compute values needed for means and stds
 56 |     # var(x) = E(x^2) - E(x)^2
 57 |     class_counts = np.zeros((num_classes, 1)) + cfg.EPS
 58 |     sums = np.zeros((num_classes, 4))
 59 |     squared_sums = np.zeros((num_classes, 4))
 60 |     for im_i in xrange(num_images):
 61 |         targets = roidb[im_i]['bbox_targets']
 62 |         for cls in xrange(1, num_classes):
 63 |             cls_inds = np.where(targets[:, 0] == cls)[0]
 64 |             if cls_inds.size > 0:
 65 |                 class_counts[cls] += cls_inds.size
 66 |                 sums[cls, :] += targets[cls_inds, 1:].sum(axis=0)
 67 |                 squared_sums[cls, :] += (targets[cls_inds, 1:] ** 2).sum(axis=0)
 68 | 
 69 |     means = sums / class_counts
 70 |     stds = np.sqrt(squared_sums / class_counts - means ** 2)
 71 | 
 72 |     # Normalize targets
 73 |     for im_i in xrange(num_images):
 74 |         targets = roidb[im_i]['bbox_targets']
 75 |         for cls in xrange(1, num_classes):
 76 |             cls_inds = np.where(targets[:, 0] == cls)[0]
 77 |             roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
 78 |             roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :]
 79 | 
 80 |     # These values will be needed for making predictions
 81 |     # (the predicts will need to be unnormalized and uncentered)
 82 |     return means.ravel(), stds.ravel()
 83 | 
 84 | def _compute_targets(rois, overlaps, labels):
 85 |     """Compute bounding-box regression targets for an image."""
 86 |     # Ensure ROIs are floats
 87 |     rois = rois.astype(np.float, copy=False)
 88 | 
 89 |     # Indices of ground-truth ROIs
 90 |     gt_inds = np.where(overlaps == 1)[0]
 91 |     # Indices of examples for which we try to make predictions
 92 |     ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]
 93 | 
 94 |     # Get IoU overlap between each ex ROI and gt ROI
 95 |     ex_gt_overlaps = utils.cython_bbox.bbox_overlaps(rois[ex_inds, :],
 96 |                                                      rois[gt_inds, :])
 97 | 
 98 |     # Find which gt ROI each ex ROI has max overlap with:
 99 |     # this will be the ex ROI's gt target
100 |     gt_assignment = ex_gt_overlaps.argmax(axis=1)
101 |     gt_rois = rois[gt_inds[gt_assignment], :]
102 |     ex_rois = rois[ex_inds, :]
103 | 
104 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS
105 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS
106 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
107 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
108 | 
109 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS
110 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS
111 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
112 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
113 | 
114 |     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
115 |     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
116 |     targets_dw = np.log(gt_widths / ex_widths)
117 |     targets_dh = np.log(gt_heights / ex_heights)
118 | 
119 |     targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
120 |     targets[ex_inds, 0] = labels[ex_inds]
121 |     targets[ex_inds, 1] = targets_dx
122 |     targets[ex_inds, 2] = targets_dy
123 |     targets[ex_inds, 3] = targets_dw
124 |     targets[ex_inds, 4] = targets_dh
125 |     return targets
126 | 


--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | from distutils.core import setup
10 | from distutils.extension import Extension
11 | from Cython.Distutils import build_ext
12 | 
13 | cmdclass = {}
14 | ext_modules = [
15 |     Extension(
16 |         "utils.cython_bbox",
17 |         ["utils/bbox.pyx"],
18 |         extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
19 |     ),
20 |     Extension(
21 |         "utils.cython_nms",
22 |         ["utils/nms.pyx"],
23 |         extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
24 |     )
25 | ]
26 | cmdclass.update({'build_ext': build_ext})
27 | 
28 | setup(
29 |     name='fast_rcnn',
30 |     cmdclass=cmdclass,
31 |     ext_modules=ext_modules,
32 |     include_dirs=[np.get_include()]
33 | )
34 | 


--------------------------------------------------------------------------------
/lib/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.so
3 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/utils/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 


--------------------------------------------------------------------------------
/lib/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | 
10 | import numpy as np
11 | import cv2
12 | 
13 | def im_list_to_blob(ims):
14 |     """Convert a list of images into a network input.
15 | 
16 |     Assumes images are already prepared (means subtracted, BGR order, ...).
17 |     """
18 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
19 |     num_images = len(ims)
20 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
21 |                     dtype=np.float32)
22 |     for i in xrange(num_images):
23 |         im = ims[i]
24 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
25 |     # Move channels (axis 3) to axis 1
26 |     # Axis order will become: (batch elem, channel, height, width)
27 |     channel_swap = (0, 3, 1, 2)
28 |     blob = blob.transpose(channel_swap)
29 |     return blob
30 | 
31 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
32 |     """Mean subtract and scale an image for use in a blob."""
33 |     im = im.astype(np.float32, copy=False)
34 |     im -= pixel_means
35 |     im_shape = im.shape
36 |     im_size_min = np.min(im_shape[0:2])
37 |     im_size_max = np.max(im_shape[0:2])
38 |     im_scale = float(target_size) / float(im_size_min)
39 |     # Prevent the biggest axis from being more than MAX_SIZE
40 |     if np.round(im_scale * im_size_max) > max_size:
41 |         im_scale = float(max_size) / float(im_size_max)
42 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
43 |                     interpolation=cv2.INTER_LINEAR)
44 | 
45 |     return im, im_scale
46 | 


--------------------------------------------------------------------------------
/lib/utils/nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def nms(dets, thresh):
11 |     x1 = dets[:, 0]
12 |     y1 = dets[:, 1]
13 |     x2 = dets[:, 2]
14 |     y2 = dets[:, 3]
15 |     scores = dets[:, 4]
16 | 
17 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
18 |     order = scores.argsort()[::-1]
19 | 
20 |     keep = []
21 |     while order.size > 0:
22 |         i = order[0]
23 |         keep.append(i)
24 |         xx1 = np.maximum(x1[i], x1[order[1:]])
25 |         yy1 = np.maximum(y1[i], y1[order[1:]])
26 |         xx2 = np.minimum(x2[i], x2[order[1:]])
27 |         yy2 = np.minimum(y2[i], y2[order[1:]])
28 | 
29 |         w = np.maximum(0.0, xx2 - xx1 + 1)
30 |         h = np.maximum(0.0, yy2 - yy1 + 1)
31 |         inter = w * h
32 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
33 | 
34 |         inds = np.where(ovr <= thresh)[0]
35 |         order = order[inds + 1]
36 | 
37 |     return keep
38 | 


--------------------------------------------------------------------------------
/lib/utils/nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------
/matlab/README.md:
--------------------------------------------------------------------------------
1 | A basic demo in MATLAB.
2 | 
3 | Detection is also implemented in MATLAB (though missing some bells and whistles
4 | compared to the Python version) via the fast_rcnn_im_detect() function.
5 | 
6 | See fast_rcnn_demo.m for example usage.
7 | 


--------------------------------------------------------------------------------
/matlab/fast_rcnn_demo.m:
--------------------------------------------------------------------------------
 1 | % --------------------------------------------------------
 2 | % Fast R-CNN
 3 | % Copyright (c) 2015 Microsoft
 4 | % Licensed under The MIT License [see LICENSE for details]
 5 | % Written by Ross Girshick
 6 | % --------------------------------------------------------
 7 | 
 8 | function fast_rcnn_demo()
 9 | % Fast R-CNN demo (in matlab).
10 | 
11 | [folder, name, ext] = fileparts(mfilename('fullpath'));
12 | 
13 | caffe_path = fullfile(folder, '..', 'caffe-fast-rcnn', 'matlab', 'caffe');
14 | addpath(caffe_path);
15 | 
16 | use_gpu = true;
17 | % You can try other models here:
18 | def = fullfile(folder, '..', 'models', 'VGG16', 'test.prototxt');;
19 | net = fullfile(folder, '..', 'data', 'fast_rcnn_models', ...
20 |                'vgg16_fast_rcnn_iter_40000.caffemodel');
21 | model = fast_rcnn_load_net(def, net, use_gpu);
22 | 
23 | car_ind = 7;
24 | sofa_ind = 18;
25 | tv_ind = 20;
26 | 
27 | demo(model, '000004', [car_ind], {'car'});
28 | demo(model, '001551', [sofa_ind, tv_ind], {'sofa', 'tvmonitor'});
29 | fprintf('\n');
30 | 
31 | % ------------------------------------------------------------------------
32 | function demo(model, im_id, cls_inds, cls_names)
33 | % ------------------------------------------------------------------------
34 | [folder, name, ext] = fileparts(mfilename('fullpath'));
35 | box_file = fullfile(folder, '..', 'data', 'demo', [im_id '_boxes.mat']);
36 | % Boxes were saved with 0-based indexing
37 | ld = load(box_file); boxes = single(ld.boxes) + 1; clear ld;
38 | im_file = fullfile(folder, '..', 'data', 'demo', [im_id '.jpg']);
39 | im = imread(im_file);
40 | dets = fast_rcnn_im_detect(model, im, boxes);
41 | 
42 | THRESH = 0.8;
43 | for j = 1:length(cls_inds)
44 |   cls_ind = cls_inds(j);
45 |   cls_name = cls_names{j};
46 |   I = find(dets{cls_ind}(:, end) >= THRESH);
47 |   showboxes(im, dets{cls_ind}(I, :));
48 |   title(sprintf('%s detections with p(%s | box) >= %.3f', ...
49 |                 cls_name, cls_name, THRESH))
50 |   fprintf('\n> Press any key to continue');
51 |   pause;
52 | end
53 | 


--------------------------------------------------------------------------------
/matlab/fast_rcnn_im_detect.m:
--------------------------------------------------------------------------------
  1 | % --------------------------------------------------------
  2 | % Fast R-CNN
  3 | % Copyright (c) 2015 Microsoft
  4 | % Licensed under The MIT License [see LICENSE for details]
  5 | % Written by Ross Girshick
  6 | % --------------------------------------------------------
  7 | 
  8 | function dets = fast_rcnn_im_detect(model, im, boxes)
  9 | % Perform detection a Fast R-CNN network given an image and
 10 | % object proposals.
 11 | 
 12 | if model.init_key ~= caffe('get_init_key')
 13 |   error('You probably need call fast_rcnn_load_net() first.');
 14 | end
 15 | 
 16 | [im_batch, scales] = image_pyramid(im, model.pixel_means, false);
 17 | 
 18 | [feat_pyra_boxes, feat_pyra_levels] = project_im_rois(boxes, scales);
 19 | rois = cat(2, feat_pyra_levels, feat_pyra_boxes);
 20 | % Adjust to 0-based indexing and make roi info the fastest dimension
 21 | rois = rois - 1;
 22 | rois = permute(rois, [2 1]);
 23 | 
 24 | input_blobs = cell(2, 1);
 25 | input_blobs{1} = im_batch;
 26 | input_blobs{2} = rois;
 27 | th = tic();
 28 | blobs_out = caffe('forward', input_blobs);
 29 | fprintf('fwd: %.3fs\n', toc(th));
 30 | 
 31 | bbox_deltas = squeeze(blobs_out{1})';
 32 | probs = squeeze(blobs_out{2})';
 33 | 
 34 | num_classes = size(probs, 2);
 35 | dets = cell(num_classes - 1, 1);
 36 | NMS_THRESH = 0.3;
 37 | % class index 1 is __background__, so we don't return it
 38 | for j = 2:num_classes
 39 |   cls_probs = probs(:, j);
 40 |   cls_deltas = bbox_deltas(:, (1 + (j - 1) * 4):(j * 4));
 41 |   pred_boxes = bbox_pred(boxes, cls_deltas);
 42 |   cls_dets = [pred_boxes cls_probs];
 43 |   keep = nms(cls_dets, NMS_THRESH);
 44 |   cls_dets = cls_dets(keep, :);
 45 |   dets{j - 1} = cls_dets;
 46 | end
 47 | 
 48 | % ------------------------------------------------------------------------
 49 | function [batch, scales] = image_pyramid(im, pixel_means, multiscale)
 50 | % ------------------------------------------------------------------------
 51 | % Construct an image pyramid that's ready for feeding directly into caffe
 52 | if ~multiscale
 53 |   SCALES = [600];
 54 |   MAX_SIZE = 1000;
 55 | else
 56 |   SCALES = [1200 864 688 576 480];
 57 |   MAX_SIZE = 2000;
 58 | end
 59 | num_levels = length(SCALES);
 60 | 
 61 | im = single(im);
 62 | % Convert to BGR
 63 | im = im(:, :, [3 2 1]);
 64 | % Subtract mean (mean of the image mean--one mean per channel)
 65 | im = bsxfun(@minus, im, pixel_means);
 66 | 
 67 | im_orig = im;
 68 | im_size = min([size(im_orig, 1) size(im_orig, 2)]);
 69 | im_size_big = max([size(im_orig, 1) size(im_orig, 2)]);
 70 | scale_factors = SCALES ./ im_size;
 71 | 
 72 | max_size = [0 0 0];
 73 | for i = 1:num_levels
 74 |   if round(im_size_big * scale_factors(i)) > MAX_SIZE
 75 |     scale_factors(i) = MAX_SIZE / im_size_big;
 76 |   end
 77 |   ims{i} = imresize(im_orig, scale_factors(i), 'bilinear', ...
 78 |                     'antialiasing', false);
 79 |   max_size = max(cat(1, max_size, size(ims{i})), [], 1);
 80 | end
 81 | 
 82 | batch = zeros(max_size(2), max_size(1), 3, num_levels, 'single');
 83 | for i = 1:num_levels
 84 |   im = ims{i};
 85 |   im_sz = size(im);
 86 |   im_sz = im_sz(1:2);
 87 |   % Make width the fastest dimension (for caffe)
 88 |   im = permute(im, [2 1 3]);
 89 |   batch(1:im_sz(2), 1:im_sz(1), :, i) = im;
 90 | end
 91 | scales = scale_factors';
 92 | 
 93 | % ------------------------------------------------------------------------
 94 | function [boxes, levels] = project_im_rois(boxes, scales)
 95 | % ------------------------------------------------------------------------
 96 | widths = boxes(:,3) - boxes(:,1) + 1;
 97 | heights = boxes(:,4) - boxes(:,2) + 1;
 98 | 
 99 | areas = widths .* heights;
100 | scaled_areas = bsxfun(@times, areas, (scales.^2)');
101 | diff_areas = abs(scaled_areas - (224 * 224));
102 | [~, levels] = min(diff_areas, [], 2);
103 | 
104 | boxes = boxes - 1;
105 | boxes = bsxfun(@times, boxes, scales(levels));
106 | boxes = boxes + 1;
107 | 
108 | % ------------------------------------------------------------------------
109 | function pred_boxes = bbox_pred(boxes, bbox_deltas)
110 | % ------------------------------------------------------------------------
111 | if isempty(boxes)
112 |   pred_boxes = [];
113 |   return;
114 | end
115 | 
116 | Y = bbox_deltas;
117 | 
118 | % Read out predictions
119 | dst_ctr_x = Y(:, 1);
120 | dst_ctr_y = Y(:, 2);
121 | dst_scl_x = Y(:, 3);
122 | dst_scl_y = Y(:, 4);
123 | 
124 | src_w = boxes(:, 3) - boxes(:, 1) + eps;
125 | src_h = boxes(:, 4) - boxes(:, 2) + eps;
126 | src_ctr_x = boxes(:, 1) + 0.5 * src_w;
127 | src_ctr_y = boxes(:, 2) + 0.5 * src_h;
128 | 
129 | pred_ctr_x = (dst_ctr_x .* src_w) + src_ctr_x;
130 | pred_ctr_y = (dst_ctr_y .* src_h) + src_ctr_y;
131 | pred_w = exp(dst_scl_x) .* src_w;
132 | pred_h = exp(dst_scl_y) .* src_h;
133 | pred_boxes = [pred_ctr_x - 0.5 * pred_w, pred_ctr_y - 0.5 * pred_h, ...
134 |               pred_ctr_x + 0.5 * pred_w, pred_ctr_y + 0.5 * pred_h];
135 | 


--------------------------------------------------------------------------------
/matlab/fast_rcnn_load_net.m:
--------------------------------------------------------------------------------
 1 | % --------------------------------------------------------
 2 | % Fast R-CNN
 3 | % Copyright (c) 2015 Microsoft
 4 | % Licensed under The MIT License [see LICENSE for details]
 5 | % Written by Ross Girshick
 6 | % --------------------------------------------------------
 7 | 
 8 | function model = fast_rcnn_load_net(def, net, use_gpu)
 9 | % Load a Fast R-CNN network.
10 | 
11 | init_key = caffe('init', def, net, 'test');
12 | if exist('use_gpu', 'var') && ~use_gpu
13 |   caffe('set_mode_cpu');
14 | else
15 |   caffe('set_mode_gpu');
16 | end
17 | 
18 | model.init_key = init_key;
19 | % model.stride is correct for the included models, but may not be correct
20 | % for other models!
21 | model.stride = 16;
22 | model.pixel_means = reshape([102.9801, 115.9465, 122.7717], [1 1 3]);
23 | 


--------------------------------------------------------------------------------
/matlab/nms.m:
--------------------------------------------------------------------------------
 1 | function pick = nms(boxes, overlap)
 2 | % top = nms(boxes, overlap)
 3 | % Non-maximum suppression. (FAST VERSION)
 4 | % Greedily select high-scoring detections and skip detections
 5 | % that are significantly covered by a previously selected
 6 | % detection.
 7 | %
 8 | % NOTE: This is adapted from Pedro Felzenszwalb's version (nms.m),
 9 | % but an inner loop has been eliminated to significantly speed it
10 | % up in the case of a large number of boxes
11 | 
12 | % Copyright (C) 2011-12 by Tomasz Malisiewicz
13 | % All rights reserved.
14 | %
15 | % This file is part of the Exemplar-SVM library and is made
16 | % available under the terms of the MIT license (see COPYING file).
17 | % Project homepage: https://github.com/quantombone/exemplarsvm
18 | 
19 | 
20 | if isempty(boxes)
21 |   pick = [];
22 |   return;
23 | end
24 | 
25 | x1 = boxes(:,1);
26 | y1 = boxes(:,2);
27 | x2 = boxes(:,3);
28 | y2 = boxes(:,4);
29 | s = boxes(:,end);
30 | 
31 | area = (x2-x1+1) .* (y2-y1+1);
32 | [vals, I] = sort(s);
33 | 
34 | pick = s*0;
35 | counter = 1;
36 | while ~isempty(I)
37 |   last = length(I);
38 |   i = I(last);
39 |   pick(counter) = i;
40 |   counter = counter + 1;
41 | 
42 |   xx1 = max(x1(i), x1(I(1:last-1)));
43 |   yy1 = max(y1(i), y1(I(1:last-1)));
44 |   xx2 = min(x2(i), x2(I(1:last-1)));
45 |   yy2 = min(y2(i), y2(I(1:last-1)));
46 | 
47 |   w = max(0.0, xx2-xx1+1);
48 |   h = max(0.0, yy2-yy1+1);
49 | 
50 |   inter = w.*h;
51 |   o = inter ./ (area(i) + area(I(1:last-1)) - inter);
52 | 
53 |   I = I(find(o<=overlap));
54 | end
55 | 
56 | pick = pick(1:(counter-1));
57 | 


--------------------------------------------------------------------------------
/matlab/showboxes.m:
--------------------------------------------------------------------------------
 1 | % --------------------------------------------------------
 2 | % Fast R-CNN
 3 | % Copyright (c) 2015 Microsoft
 4 | % Licensed under The MIT License [see LICENSE for details]
 5 | % Written by Ross Girshick
 6 | % --------------------------------------------------------
 7 | 
 8 | function showboxes(im, boxes)
 9 | 
10 | image(im);
11 | axis image;
12 | axis off;
13 | set(gcf, 'Color', 'white');
14 | 
15 | if ~isempty(boxes)
16 |   x1 = boxes(:, 1);
17 |   y1 = boxes(:, 2);
18 |   x2 = boxes(:, 3);
19 |   y2 = boxes(:, 4);
20 |   c = 'r';
21 |   s = '-';
22 |   line([x1 x1 x2 x2 x1]', [y1 y2 y2 y1 y1]', ...
23 |        'color', c, 'linewidth', 2, 'linestyle', s);
24 |   for i = 1:size(boxes, 1)
25 |     text(double(x1(i)), double(y1(i)) - 2, ...
26 |          sprintf('%.3f', boxes(i, end)), ...
27 |          'backgroundcolor', 'r', 'color', 'w');
28 |   end
29 | end
30 | 


--------------------------------------------------------------------------------
/models/CaffeNet/compressed/test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 227
  7 |   dim: 227
  8 | }
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 1 # to be changed on-the-fly to num ROIs
 12 |   dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   param {
 20 |     lr_mult: 0
 21 |     decay_mult: 0
 22 |   }
 23 |   param {
 24 |     lr_mult: 0
 25 |     decay_mult: 0
 26 |   }
 27 |   convolution_param {
 28 |     num_output: 96
 29 |     kernel_size: 11
 30 |     pad: 5
 31 |     stride: 4
 32 |   }
 33 | }
 34 | layer {
 35 |   name: "relu1"
 36 |   type: "ReLU"
 37 |   bottom: "conv1"
 38 |   top: "conv1"
 39 | }
 40 | layer {
 41 |   name: "pool1"
 42 |   type: "Pooling"
 43 |   bottom: "conv1"
 44 |   top: "pool1"
 45 |   pooling_param {
 46 |     pool: MAX
 47 |     kernel_size: 3
 48 |     pad: 1
 49 |     stride: 2
 50 |   }
 51 | }
 52 | layer {
 53 |   name: "norm1"
 54 |   type: "LRN"
 55 |   bottom: "pool1"
 56 |   top: "norm1"
 57 |   lrn_param {
 58 |     local_size: 5
 59 |     alpha: 0.0001
 60 |     beta: 0.75
 61 |   }
 62 | }
 63 | layer {
 64 |   name: "conv2"
 65 |   type: "Convolution"
 66 |   bottom: "norm1"
 67 |   top: "conv2"
 68 |   param {
 69 |     lr_mult: 1
 70 |     decay_mult: 1
 71 |   }
 72 |   param {
 73 |     lr_mult: 2
 74 |     decay_mult: 0
 75 |   }
 76 |   convolution_param {
 77 |     num_output: 256
 78 |     kernel_size: 5
 79 |     pad: 2
 80 |     group: 2
 81 |   }
 82 | }
 83 | layer {
 84 |   name: "relu2"
 85 |   type: "ReLU"
 86 |   bottom: "conv2"
 87 |   top: "conv2"
 88 | }
 89 | layer {
 90 |   name: "pool2"
 91 |   type: "Pooling"
 92 |   bottom: "conv2"
 93 |   top: "pool2"
 94 |   pooling_param {
 95 |     pool: MAX
 96 |     kernel_size: 3
 97 |     pad: 1
 98 |     stride: 2
 99 |   }
100 | }
101 | layer {
102 |   name: "norm2"
103 |   type: "LRN"
104 |   bottom: "pool2"
105 |   top: "norm2"
106 |   lrn_param {
107 |     local_size: 5
108 |     alpha: 0.0001
109 |     beta: 0.75
110 |   }
111 | }
112 | layer {
113 |   name: "conv3"
114 |   type: "Convolution"
115 |   bottom: "norm2"
116 |   top: "conv3"
117 |   param {
118 |     lr_mult: 1
119 |     decay_mult: 1
120 |   }
121 |   param {
122 |     lr_mult: 2
123 |     decay_mult: 0
124 |   }
125 |   convolution_param {
126 |     num_output: 384
127 |     kernel_size: 3
128 |     pad: 1
129 |   }
130 | }
131 | layer {
132 |   name: "relu3"
133 |   type: "ReLU"
134 |   bottom: "conv3"
135 |   top: "conv3"
136 | }
137 | layer {
138 |   name: "conv4"
139 |   type: "Convolution"
140 |   bottom: "conv3"
141 |   top: "conv4"
142 |   param {
143 |     lr_mult: 1
144 |     decay_mult: 1
145 |   }
146 |   param {
147 |     lr_mult: 2
148 |     decay_mult: 0
149 |   }
150 |   convolution_param {
151 |     num_output: 384
152 |     kernel_size: 3
153 |     pad: 1
154 |     group: 2
155 |   }
156 | }
157 | layer {
158 |   name: "relu4"
159 |   type: "ReLU"
160 |   bottom: "conv4"
161 |   top: "conv4"
162 | }
163 | layer {
164 |   name: "conv5"
165 |   type: "Convolution"
166 |   bottom: "conv4"
167 |   top: "conv5"
168 |   param {
169 |     lr_mult: 1
170 |     decay_mult: 1
171 |   }
172 |   param {
173 |     lr_mult: 2
174 |     decay_mult: 0
175 |   }
176 |   convolution_param {
177 |     num_output: 256
178 |     kernel_size: 3
179 |     pad: 1
180 |     group: 2
181 |   }
182 | }
183 | layer {
184 |   name: "relu5"
185 |   type: "ReLU"
186 |   bottom: "conv5"
187 |   top: "conv5"
188 | }
189 | layer {
190 |   name: "roi_pool5"
191 |   type: "ROIPooling"
192 |   bottom: "conv5"
193 |   bottom: "rois"
194 |   top: "pool5"
195 |   roi_pooling_param {
196 |     pooled_w: 6
197 |     pooled_h: 6
198 |     spatial_scale: 0.0625 # 1/16
199 |   }
200 | }
201 | layer {
202 |   name: "fc6_L"
203 |   type: "InnerProduct"
204 |   bottom: "pool5"
205 |   top: "fc6_L"
206 |   param {
207 |     lr_mult: 1
208 |     decay_mult: 1
209 |   }
210 |   inner_product_param {
211 |     num_output: 1024
212 |     bias_term: false
213 |   }
214 | }
215 | layer {
216 |   name: "fc6_U"
217 |   type: "InnerProduct"
218 |   bottom: "fc6_L"
219 |   top: "fc6_U"
220 |   param {
221 |     lr_mult: 1
222 |     decay_mult: 1
223 |   }
224 |   param {
225 |     lr_mult: 2
226 |     decay_mult: 0
227 |   }
228 |   inner_product_param {
229 |     num_output: 4096
230 |   }
231 | }
232 | layer {
233 |   name: "relu6"
234 |   type: "ReLU"
235 |   bottom: "fc6_U"
236 |   top: "fc6_U"
237 | }
238 | layer {
239 |   name: "drop6"
240 |   type: "Dropout"
241 |   bottom: "fc6_U"
242 |   top: "fc6_U"
243 |   dropout_param {
244 |     dropout_ratio: 0.5
245 |   }
246 | }
247 | layer {
248 |   name: "fc7_L"
249 |   type: "InnerProduct"
250 |   bottom: "fc6_U"
251 |   top: "fc7_L"
252 |   param {
253 |     lr_mult: 1
254 |     decay_mult: 1
255 |   }
256 |   inner_product_param {
257 |     num_output: 256
258 |     bias_term: false
259 |   }
260 | }
261 | layer {
262 |   name: "fc7_U"
263 |   type: "InnerProduct"
264 |   bottom: "fc7_L"
265 |   top: "fc7_U"
266 |   param {
267 |     lr_mult: 1
268 |     decay_mult: 1
269 |   }
270 |   param {
271 |     lr_mult: 2
272 |     decay_mult: 0
273 |   }
274 |   inner_product_param {
275 |     num_output: 4096
276 |   }
277 | }
278 | layer {
279 |   name: "relu7"
280 |   type: "ReLU"
281 |   bottom: "fc7_U"
282 |   top: "fc7_U"
283 | }
284 | layer {
285 |   name: "drop7"
286 |   type: "Dropout"
287 |   bottom: "fc7_U"
288 |   top: "fc7_U"
289 |   dropout_param {
290 |     dropout_ratio: 0.5
291 |   }
292 | }
293 | layer {
294 |   name: "cls_score"
295 |   type: "InnerProduct"
296 |   bottom: "fc7_U"
297 |   top: "cls_score"
298 |   param {
299 |     lr_mult: 1
300 |     decay_mult: 1
301 |   }
302 |   param {
303 |     lr_mult: 2
304 |     decay_mult: 0
305 |   }
306 |   inner_product_param {
307 |     num_output: 21
308 |     weight_filler {
309 |       type: "gaussian"
310 |       std: 0.01
311 |     }
312 |     bias_filler {
313 |       type: "constant"
314 |       value: 0
315 |     }
316 |   }
317 | }
318 | layer {
319 |   name: "bbox_pred"
320 |   type: "InnerProduct"
321 |   bottom: "fc7_U"
322 |   top: "bbox_pred"
323 |   param {
324 |     lr_mult: 1
325 |     decay_mult: 1
326 |   }
327 |   param {
328 |     lr_mult: 2
329 |     decay_mult: 0
330 |   }
331 |   inner_product_param {
332 |     num_output: 84
333 |     weight_filler {
334 |       type: "gaussian"
335 |       std: 0.001
336 |     }
337 |     bias_filler {
338 |       type: "constant"
339 |       value: 0
340 |     }
341 |   }
342 | }
343 | layer {
344 |   name: "cls_prob"
345 |   type: "Softmax"
346 |   bottom: "cls_score"
347 |   top: "cls_prob"
348 | }
349 | 


--------------------------------------------------------------------------------
/models/CaffeNet/no_bbox_reg/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/CaffeNet/no_bbox_reg/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | # function
12 | snapshot: 0
13 | # We still use the snapshot prefix, though
14 | snapshot_prefix: "caffenet_fast_rcnn"
15 | #debug_info: true
16 | 


--------------------------------------------------------------------------------
/models/CaffeNet/no_bbox_reg/test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 227
  7 |   dim: 227
  8 | }
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 1 # to be changed on-the-fly to num ROIs
 12 |   dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   param {
 20 |     lr_mult: 0
 21 |     decay_mult: 0
 22 |   }
 23 |   param {
 24 |     lr_mult: 0
 25 |     decay_mult: 0
 26 |   }
 27 |   convolution_param {
 28 |     num_output: 96
 29 |     kernel_size: 11
 30 |     pad: 5
 31 |     stride: 4
 32 |   }
 33 | }
 34 | layer {
 35 |   name: "relu1"
 36 |   type: "ReLU"
 37 |   bottom: "conv1"
 38 |   top: "conv1"
 39 | }
 40 | layer {
 41 |   name: "pool1"
 42 |   type: "Pooling"
 43 |   bottom: "conv1"
 44 |   top: "pool1"
 45 |   pooling_param {
 46 |     pool: MAX
 47 |     kernel_size: 3
 48 |     pad: 1
 49 |     stride: 2
 50 |   }
 51 | }
 52 | layer {
 53 |   name: "norm1"
 54 |   type: "LRN"
 55 |   bottom: "pool1"
 56 |   top: "norm1"
 57 |   lrn_param {
 58 |     local_size: 5
 59 |     alpha: 0.0001
 60 |     beta: 0.75
 61 |   }
 62 | }
 63 | layer {
 64 |   name: "conv2"
 65 |   type: "Convolution"
 66 |   bottom: "norm1"
 67 |   top: "conv2"
 68 |   param {
 69 |     lr_mult: 1
 70 |     decay_mult: 1
 71 |   }
 72 |   param {
 73 |     lr_mult: 2
 74 |     decay_mult: 0
 75 |   }
 76 |   convolution_param {
 77 |     num_output: 256
 78 |     kernel_size: 5
 79 |     pad: 2
 80 |     group: 2
 81 |   }
 82 | }
 83 | layer {
 84 |   name: "relu2"
 85 |   type: "ReLU"
 86 |   bottom: "conv2"
 87 |   top: "conv2"
 88 | }
 89 | layer {
 90 |   name: "pool2"
 91 |   type: "Pooling"
 92 |   bottom: "conv2"
 93 |   top: "pool2"
 94 |   pooling_param {
 95 |     pool: MAX
 96 |     kernel_size: 3
 97 |     pad: 1
 98 |     stride: 2
 99 |   }
100 | }
101 | layer {
102 |   name: "norm2"
103 |   type: "LRN"
104 |   bottom: "pool2"
105 |   top: "norm2"
106 |   lrn_param {
107 |     local_size: 5
108 |     alpha: 0.0001
109 |     beta: 0.75
110 |   }
111 | }
112 | layer {
113 |   name: "conv3"
114 |   type: "Convolution"
115 |   bottom: "norm2"
116 |   top: "conv3"
117 |   param {
118 |     lr_mult: 1
119 |     decay_mult: 1
120 |   }
121 |   param {
122 |     lr_mult: 2
123 |     decay_mult: 0
124 |   }
125 |   convolution_param {
126 |     num_output: 384
127 |     kernel_size: 3
128 |     pad: 1
129 |   }
130 | }
131 | layer {
132 |   name: "relu3"
133 |   type: "ReLU"
134 |   bottom: "conv3"
135 |   top: "conv3"
136 | }
137 | layer {
138 |   name: "conv4"
139 |   type: "Convolution"
140 |   bottom: "conv3"
141 |   top: "conv4"
142 |   param {
143 |     lr_mult: 1
144 |     decay_mult: 1
145 |   }
146 |   param {
147 |     lr_mult: 2
148 |     decay_mult: 0
149 |   }
150 |   convolution_param {
151 |     num_output: 384
152 |     kernel_size: 3
153 |     pad: 1
154 |     group: 2
155 |   }
156 | }
157 | layer {
158 |   name: "relu4"
159 |   type: "ReLU"
160 |   bottom: "conv4"
161 |   top: "conv4"
162 | }
163 | layer {
164 |   name: "conv5"
165 |   type: "Convolution"
166 |   bottom: "conv4"
167 |   top: "conv5"
168 |   param {
169 |     lr_mult: 1
170 |     decay_mult: 1
171 |   }
172 |   param {
173 |     lr_mult: 2
174 |     decay_mult: 0
175 |   }
176 |   convolution_param {
177 |     num_output: 256
178 |     kernel_size: 3
179 |     pad: 1
180 |     group: 2
181 |   }
182 | }
183 | layer {
184 |   name: "relu5"
185 |   type: "ReLU"
186 |   bottom: "conv5"
187 |   top: "conv5"
188 | }
189 | layer {
190 |   name: "roi_pool5"
191 |   type: "ROIPooling"
192 |   bottom: "conv5"
193 |   bottom: "rois"
194 |   top: "pool5"
195 |   roi_pooling_param {
196 |     pooled_w: 6
197 |     pooled_h: 6
198 |     spatial_scale: 0.0625 # 1/16
199 |   }
200 | }
201 | layer {
202 |   name: "fc6"
203 |   type: "InnerProduct"
204 |   bottom: "pool5"
205 |   top: "fc6"
206 |   param {
207 |     lr_mult: 1
208 |     decay_mult: 1
209 |   }
210 |   param {
211 |     lr_mult: 2
212 |     decay_mult: 0
213 |   }
214 |   inner_product_param {
215 |     num_output: 4096
216 |   }
217 | }
218 | layer {
219 |   name: "relu6"
220 |   type: "ReLU"
221 |   bottom: "fc6"
222 |   top: "fc6"
223 | }
224 | layer {
225 |   name: "drop6"
226 |   type: "Dropout"
227 |   bottom: "fc6"
228 |   top: "fc6"
229 |   dropout_param {
230 |     dropout_ratio: 0.5
231 |   }
232 | }
233 | layer {
234 |   name: "fc7"
235 |   type: "InnerProduct"
236 |   bottom: "fc6"
237 |   top: "fc7"
238 |   param {
239 |     lr_mult: 1
240 |     decay_mult: 1
241 |   }
242 |   param {
243 |     lr_mult: 2
244 |     decay_mult: 0
245 |   }
246 |   inner_product_param {
247 |     num_output: 4096
248 |   }
249 | }
250 | layer {
251 |   name: "relu7"
252 |   type: "ReLU"
253 |   bottom: "fc7"
254 |   top: "fc7"
255 | }
256 | layer {
257 |   name: "drop7"
258 |   type: "Dropout"
259 |   bottom: "fc7"
260 |   top: "fc7"
261 |   dropout_param {
262 |     dropout_ratio: 0.5
263 |   }
264 | }
265 | layer {
266 |   name: "cls_score"
267 |   type: "InnerProduct"
268 |   bottom: "fc7"
269 |   top: "cls_score"
270 |   param {
271 |     lr_mult: 1
272 |     decay_mult: 1
273 |   }
274 |   param {
275 |     lr_mult: 2
276 |     decay_mult: 0
277 |   }
278 |   inner_product_param {
279 |     num_output: 21
280 |     weight_filler {
281 |       type: "gaussian"
282 |       std: 0.01
283 |     }
284 |     bias_filler {
285 |       type: "constant"
286 |       value: 0
287 |     }
288 |   }
289 | }
290 | layer {
291 |   name: "cls_prob"
292 |   type: "Softmax"
293 |   bottom: "cls_score"
294 |   top: "cls_prob"
295 | }
296 | 


--------------------------------------------------------------------------------
/models/CaffeNet/no_bbox_reg/train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | layer {
  3 |   name: 'data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'rois'
  7 |   top: 'labels'
  8 |   python_param {
  9 |     module: 'roi_data_layer.layer'
 10 |     layer: 'RoIDataLayer'
 11 |     param_str: "'num_classes': 21"
 12 |   }
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   param {
 20 |     lr_mult: 0
 21 |     decay_mult: 0
 22 |   }
 23 |   param {
 24 |     lr_mult: 0
 25 |     decay_mult: 0
 26 |   }
 27 |   convolution_param {
 28 |     num_output: 96
 29 |     kernel_size: 11
 30 |     pad: 5
 31 |     stride: 4
 32 |   }
 33 | }
 34 | layer {
 35 |   name: "relu1"
 36 |   type: "ReLU"
 37 |   bottom: "conv1"
 38 |   top: "conv1"
 39 | }
 40 | layer {
 41 |   name: "pool1"
 42 |   type: "Pooling"
 43 |   bottom: "conv1"
 44 |   top: "pool1"
 45 |   pooling_param {
 46 |     pool: MAX
 47 |     kernel_size: 3
 48 |     pad: 1
 49 |     stride: 2
 50 |   }
 51 | }
 52 | layer {
 53 |   name: "norm1"
 54 |   type: "LRN"
 55 |   bottom: "pool1"
 56 |   top: "norm1"
 57 |   lrn_param {
 58 |     local_size: 5
 59 |     alpha: 0.0001
 60 |     beta: 0.75
 61 |   }
 62 | }
 63 | layer {
 64 |   name: "conv2"
 65 |   type: "Convolution"
 66 |   bottom: "norm1"
 67 |   top: "conv2"
 68 |   param {
 69 |     lr_mult: 1
 70 |     decay_mult: 1
 71 |   }
 72 |   param {
 73 |     lr_mult: 2
 74 |     decay_mult: 0
 75 |   }
 76 |   convolution_param {
 77 |     num_output: 256
 78 |     kernel_size: 5
 79 |     pad: 2
 80 |     group: 2
 81 |   }
 82 | }
 83 | layer {
 84 |   name: "relu2"
 85 |   type: "ReLU"
 86 |   bottom: "conv2"
 87 |   top: "conv2"
 88 | }
 89 | layer {
 90 |   name: "pool2"
 91 |   type: "Pooling"
 92 |   bottom: "conv2"
 93 |   top: "pool2"
 94 |   pooling_param {
 95 |     pool: MAX
 96 |     kernel_size: 3
 97 |     pad: 1
 98 |     stride: 2
 99 |   }
100 | }
101 | layer {
102 |   name: "norm2"
103 |   type: "LRN"
104 |   bottom: "pool2"
105 |   top: "norm2"
106 |   lrn_param {
107 |     local_size: 5
108 |     alpha: 0.0001
109 |     beta: 0.75
110 |   }
111 | }
112 | layer {
113 |   name: "conv3"
114 |   type: "Convolution"
115 |   bottom: "norm2"
116 |   top: "conv3"
117 |   param {
118 |     lr_mult: 1
119 |     decay_mult: 1
120 |   }
121 |   param {
122 |     lr_mult: 2
123 |     decay_mult: 0
124 |   }
125 |   convolution_param {
126 |     num_output: 384
127 |     kernel_size: 3
128 |     pad: 1
129 |   }
130 | }
131 | layer {
132 |   name: "relu3"
133 |   type: "ReLU"
134 |   bottom: "conv3"
135 |   top: "conv3"
136 | }
137 | layer {
138 |   name: "conv4"
139 |   type: "Convolution"
140 |   bottom: "conv3"
141 |   top: "conv4"
142 |   param {
143 |     lr_mult: 1
144 |     decay_mult: 1
145 |   }
146 |   param {
147 |     lr_mult: 2
148 |     decay_mult: 0
149 |   }
150 |   convolution_param {
151 |     num_output: 384
152 |     kernel_size: 3
153 |     pad: 1
154 |     group: 2
155 |   }
156 | }
157 | layer {
158 |   name: "relu4"
159 |   type: "ReLU"
160 |   bottom: "conv4"
161 |   top: "conv4"
162 | }
163 | layer {
164 |   name: "conv5"
165 |   type: "Convolution"
166 |   bottom: "conv4"
167 |   top: "conv5"
168 |   param {
169 |     lr_mult: 1
170 |     decay_mult: 1
171 |   }
172 |   param {
173 |     lr_mult: 2
174 |     decay_mult: 0
175 |   }
176 |   convolution_param {
177 |     num_output: 256
178 |     kernel_size: 3
179 |     pad: 1
180 |     group: 2
181 |   }
182 | }
183 | layer {
184 |   name: "relu5"
185 |   type: "ReLU"
186 |   bottom: "conv5"
187 |   top: "conv5"
188 | }
189 | layer {
190 |   name: "roi_pool5"
191 |   type: "ROIPooling"
192 |   bottom: "conv5"
193 |   bottom: "rois"
194 |   top: "pool5"
195 |   roi_pooling_param {
196 |     pooled_w: 6
197 |     pooled_h: 6
198 |     spatial_scale: 0.0625 # 1/16
199 |   }
200 | }
201 | layer {
202 |   name: "fc6"
203 |   type: "InnerProduct"
204 |   bottom: "pool5"
205 |   top: "fc6"
206 |   param {
207 |     lr_mult: 1
208 |     decay_mult: 1
209 |   }
210 |   param {
211 |     lr_mult: 2
212 |     decay_mult: 0
213 |   }
214 |   inner_product_param {
215 |     num_output: 4096
216 |   }
217 | }
218 | layer {
219 |   name: "relu6"
220 |   type: "ReLU"
221 |   bottom: "fc6"
222 |   top: "fc6"
223 | }
224 | layer {
225 |   name: "drop6"
226 |   type: "Dropout"
227 |   bottom: "fc6"
228 |   top: "fc6"
229 |   dropout_param {
230 |     dropout_ratio: 0.5
231 |   }
232 | }
233 | layer {
234 |   name: "fc7"
235 |   type: "InnerProduct"
236 |   bottom: "fc6"
237 |   top: "fc7"
238 |   param {
239 |     lr_mult: 1
240 |     decay_mult: 1
241 |   }
242 |   param {
243 |     lr_mult: 2
244 |     decay_mult: 0
245 |   }
246 |   inner_product_param {
247 |     num_output: 4096
248 |   }
249 | }
250 | layer {
251 |   name: "relu7"
252 |   type: "ReLU"
253 |   bottom: "fc7"
254 |   top: "fc7"
255 | }
256 | layer {
257 |   name: "drop7"
258 |   type: "Dropout"
259 |   bottom: "fc7"
260 |   top: "fc7"
261 |   dropout_param {
262 |     dropout_ratio: 0.5
263 |   }
264 | }
265 | layer {
266 |   name: "cls_score"
267 |   type: "InnerProduct"
268 |   bottom: "fc7"
269 |   top: "cls_score"
270 |   param {
271 |     lr_mult: 1
272 |     decay_mult: 1
273 |   }
274 |   param {
275 |     lr_mult: 2
276 |     decay_mult: 0
277 |   }
278 |   inner_product_param {
279 |     num_output: 21
280 |     weight_filler {
281 |       type: "gaussian"
282 |       std: 0.01
283 |     }
284 |     bias_filler {
285 |       type: "constant"
286 |       value: 0
287 |     }
288 |   }
289 | }
290 | layer {
291 |   name: "loss_cls"
292 |   type: "SoftmaxWithLoss"
293 |   bottom: "cls_score"
294 |   bottom: "labels"
295 |   top: "loss_cls"
296 |   loss_weight: 1
297 | }
298 | 


--------------------------------------------------------------------------------
/models/CaffeNet/piecewise/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/CaffeNet/piecewise/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | # function
12 | snapshot: 0
13 | # We still use the snapshot prefix, though
14 | snapshot_prefix: "caffenet_fast_rcnn"
15 | #debug_info: true
16 | 


--------------------------------------------------------------------------------
/models/CaffeNet/piecewise/train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | layer {
  3 |   name: 'data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'rois'
  7 |   top: 'labels'
  8 |   top: 'bbox_targets'
  9 |   top: 'bbox_loss_weights'
 10 |   python_param {
 11 |     module: 'roi_data_layer.layer'
 12 |     layer: 'RoIDataLayer'
 13 |     param_str: "'num_classes': 21"
 14 |   }
 15 | }
 16 | layer {
 17 |   name: "conv1"
 18 |   type: "Convolution"
 19 |   bottom: "data"
 20 |   top: "conv1"
 21 |   param {
 22 |     lr_mult: 0
 23 |     decay_mult: 0
 24 |   }
 25 |   param {
 26 |     lr_mult: 0
 27 |     decay_mult: 0
 28 |   }
 29 |   convolution_param {
 30 |     num_output: 96
 31 |     kernel_size: 11
 32 |     pad: 5
 33 |     stride: 4
 34 |   }
 35 | }
 36 | layer {
 37 |   name: "relu1"
 38 |   type: "ReLU"
 39 |   bottom: "conv1"
 40 |   top: "conv1"
 41 | }
 42 | layer {
 43 |   name: "pool1"
 44 |   type: "Pooling"
 45 |   bottom: "conv1"
 46 |   top: "pool1"
 47 |   pooling_param {
 48 |     pool: MAX
 49 |     kernel_size: 3
 50 |     pad: 1
 51 |     stride: 2
 52 |   }
 53 | }
 54 | layer {
 55 |   name: "norm1"
 56 |   type: "LRN"
 57 |   bottom: "pool1"
 58 |   top: "norm1"
 59 |   lrn_param {
 60 |     local_size: 5
 61 |     alpha: 0.0001
 62 |     beta: 0.75
 63 |   }
 64 | }
 65 | layer {
 66 |   name: "conv2"
 67 |   type: "Convolution"
 68 |   bottom: "norm1"
 69 |   top: "conv2"
 70 |   param {
 71 |     lr_mult: 0
 72 |     decay_mult: 0
 73 |   }
 74 |   param {
 75 |     lr_mult: 0
 76 |     decay_mult: 0
 77 |   }
 78 |   convolution_param {
 79 |     num_output: 256
 80 |     kernel_size: 5
 81 |     pad: 2
 82 |     group: 2
 83 |   }
 84 | }
 85 | layer {
 86 |   name: "relu2"
 87 |   type: "ReLU"
 88 |   bottom: "conv2"
 89 |   top: "conv2"
 90 | }
 91 | layer {
 92 |   name: "pool2"
 93 |   type: "Pooling"
 94 |   bottom: "conv2"
 95 |   top: "pool2"
 96 |   pooling_param {
 97 |     pool: MAX
 98 |     kernel_size: 3
 99 |     pad: 1
100 |     stride: 2
101 |   }
102 | }
103 | layer {
104 |   name: "norm2"
105 |   type: "LRN"
106 |   bottom: "pool2"
107 |   top: "norm2"
108 |   lrn_param {
109 |     local_size: 5
110 |     alpha: 0.0001
111 |     beta: 0.75
112 |   }
113 | }
114 | layer {
115 |   name: "conv3"
116 |   type: "Convolution"
117 |   bottom: "norm2"
118 |   top: "conv3"
119 |   param {
120 |     lr_mult: 0
121 |     decay_mult: 0
122 |   }
123 |   param {
124 |     lr_mult: 0
125 |     decay_mult: 0
126 |   }
127 |   convolution_param {
128 |     num_output: 384
129 |     kernel_size: 3
130 |     pad: 1
131 |   }
132 | }
133 | layer {
134 |   name: "relu3"
135 |   type: "ReLU"
136 |   bottom: "conv3"
137 |   top: "conv3"
138 | }
139 | layer {
140 |   name: "conv4"
141 |   type: "Convolution"
142 |   bottom: "conv3"
143 |   top: "conv4"
144 |   param {
145 |     lr_mult: 0
146 |     decay_mult: 0
147 |   }
148 |   param {
149 |     lr_mult: 0
150 |     decay_mult: 0
151 |   }
152 |   convolution_param {
153 |     num_output: 384
154 |     kernel_size: 3
155 |     pad: 1
156 |     group: 2
157 |   }
158 | }
159 | layer {
160 |   name: "relu4"
161 |   type: "ReLU"
162 |   bottom: "conv4"
163 |   top: "conv4"
164 | }
165 | layer {
166 |   name: "conv5"
167 |   type: "Convolution"
168 |   bottom: "conv4"
169 |   top: "conv5"
170 |   param {
171 |     lr_mult: 0
172 |     decay_mult: 0
173 |   }
174 |   param {
175 |     lr_mult: 0
176 |     decay_mult: 0
177 |   }
178 |   convolution_param {
179 |     num_output: 256
180 |     kernel_size: 3
181 |     pad: 1
182 |     group: 2
183 |   }
184 | }
185 | layer {
186 |   name: "relu5"
187 |   type: "ReLU"
188 |   bottom: "conv5"
189 |   top: "conv5"
190 | }
191 | layer {
192 |   name: "roi_pool5"
193 |   type: "ROIPooling"
194 |   bottom: "conv5"
195 |   bottom: "rois"
196 |   top: "pool5"
197 |   roi_pooling_param {
198 |     pooled_w: 6
199 |     pooled_h: 6
200 |     spatial_scale: 0.0625 # 1/16
201 |   }
202 | }
203 | layer {
204 |   name: "fc6"
205 |   type: "InnerProduct"
206 |   bottom: "pool5"
207 |   top: "fc6"
208 |   param {
209 |     lr_mult: 0
210 |     decay_mult: 0
211 |   }
212 |   param {
213 |     lr_mult: 0
214 |     decay_mult: 0
215 |   }
216 |   inner_product_param {
217 |     num_output: 4096
218 |   }
219 | }
220 | layer {
221 |   name: "relu6"
222 |   type: "ReLU"
223 |   bottom: "fc6"
224 |   top: "fc6"
225 | }
226 | layer {
227 |   name: "drop6"
228 |   type: "Dropout"
229 |   bottom: "fc6"
230 |   top: "fc6"
231 |   dropout_param {
232 |     dropout_ratio: 0.5
233 |   }
234 | }
235 | layer {
236 |   name: "fc7"
237 |   type: "InnerProduct"
238 |   bottom: "fc6"
239 |   top: "fc7"
240 |   param {
241 |     lr_mult: 0
242 |     decay_mult: 0
243 |   }
244 |   param {
245 |     lr_mult: 0
246 |     decay_mult: 0
247 |   }
248 |   inner_product_param {
249 |     num_output: 4096
250 |   }
251 | }
252 | layer {
253 |   name: "relu7"
254 |   type: "ReLU"
255 |   bottom: "fc7"
256 |   top: "fc7"
257 | }
258 | layer {
259 |   name: "drop7"
260 |   type: "Dropout"
261 |   bottom: "fc7"
262 |   top: "fc7"
263 |   dropout_param {
264 |     dropout_ratio: 0.5
265 |   }
266 | }
267 | layer {
268 |   name: "cls_score"
269 |   type: "InnerProduct"
270 |   bottom: "fc7"
271 |   top: "cls_score"
272 |   param {
273 |     lr_mult: 0
274 |     decay_mult: 0
275 |   }
276 |   param {
277 |     lr_mult: 0
278 |     decay_mult: 0
279 |   }
280 |   inner_product_param {
281 |     num_output: 21
282 |     weight_filler {
283 |       type: "gaussian"
284 |       std: 0.01
285 |     }
286 |     bias_filler {
287 |       type: "constant"
288 |       value: 0
289 |     }
290 |   }
291 | }
292 | layer {
293 |   name: "bbox_pred"
294 |   type: "InnerProduct"
295 |   bottom: "fc7"
296 |   top: "bbox_pred"
297 |   param {
298 |     lr_mult: 1
299 |     decay_mult: 1
300 |   }
301 |   param {
302 |     lr_mult: 2
303 |     decay_mult: 0
304 |   }
305 |   inner_product_param {
306 |     num_output: 84
307 |     weight_filler {
308 |       type: "gaussian"
309 |       std: 0.001
310 |     }
311 |     bias_filler {
312 |       type: "constant"
313 |       value: 0
314 |     }
315 |   }
316 | }
317 | layer {
318 |   name: "loss_cls"
319 |   type: "SoftmaxWithLoss"
320 |   bottom: "cls_score"
321 |   bottom: "labels"
322 |   top: "loss_cls"
323 |   loss_weight: 0
324 | }
325 | layer {
326 |   name: "loss_bbox"
327 |   type: "SmoothL1Loss"
328 |   bottom: "bbox_pred"
329 |   bottom: "bbox_targets"
330 |   bottom: "bbox_loss_weights"
331 |   top: "loss_bbox"
332 |   loss_weight: 1
333 | }
334 | 


--------------------------------------------------------------------------------
/models/CaffeNet/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/CaffeNet/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | # function
12 | snapshot: 0
13 | # We still use the snapshot prefix, though
14 | snapshot_prefix: "caffenet_fast_rcnn"
15 | #debug_info: true
16 | 


--------------------------------------------------------------------------------
/models/CaffeNet/test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 227
  7 |   dim: 227
  8 | }
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 1 # to be changed on-the-fly to num ROIs
 12 |   dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   param {
 20 |     lr_mult: 0
 21 |     decay_mult: 0
 22 |   }
 23 |   param {
 24 |     lr_mult: 0
 25 |     decay_mult: 0
 26 |   }
 27 |   convolution_param {
 28 |     num_output: 96
 29 |     kernel_size: 11
 30 |     pad: 5
 31 |     stride: 4
 32 |   }
 33 | }
 34 | layer {
 35 |   name: "relu1"
 36 |   type: "ReLU"
 37 |   bottom: "conv1"
 38 |   top: "conv1"
 39 | }
 40 | layer {
 41 |   name: "pool1"
 42 |   type: "Pooling"
 43 |   bottom: "conv1"
 44 |   top: "pool1"
 45 |   pooling_param {
 46 |     pool: MAX
 47 |     kernel_size: 3
 48 |     pad: 1
 49 |     stride: 2
 50 |   }
 51 | }
 52 | layer {
 53 |   name: "norm1"
 54 |   type: "LRN"
 55 |   bottom: "pool1"
 56 |   top: "norm1"
 57 |   lrn_param {
 58 |     local_size: 5
 59 |     alpha: 0.0001
 60 |     beta: 0.75
 61 |   }
 62 | }
 63 | layer {
 64 |   name: "conv2"
 65 |   type: "Convolution"
 66 |   bottom: "norm1"
 67 |   top: "conv2"
 68 |   param {
 69 |     lr_mult: 1
 70 |     decay_mult: 1
 71 |   }
 72 |   param {
 73 |     lr_mult: 2
 74 |     decay_mult: 0
 75 |   }
 76 |   convolution_param {
 77 |     num_output: 256
 78 |     kernel_size: 5
 79 |     pad: 2
 80 |     group: 2
 81 |   }
 82 | }
 83 | layer {
 84 |   name: "relu2"
 85 |   type: "ReLU"
 86 |   bottom: "conv2"
 87 |   top: "conv2"
 88 | }
 89 | layer {
 90 |   name: "pool2"
 91 |   type: "Pooling"
 92 |   bottom: "conv2"
 93 |   top: "pool2"
 94 |   pooling_param {
 95 |     pool: MAX
 96 |     kernel_size: 3
 97 |     pad: 1
 98 |     stride: 2
 99 |   }
100 | }
101 | layer {
102 |   name: "norm2"
103 |   type: "LRN"
104 |   bottom: "pool2"
105 |   top: "norm2"
106 |   lrn_param {
107 |     local_size: 5
108 |     alpha: 0.0001
109 |     beta: 0.75
110 |   }
111 | }
112 | layer {
113 |   name: "conv3"
114 |   type: "Convolution"
115 |   bottom: "norm2"
116 |   top: "conv3"
117 |   param {
118 |     lr_mult: 1
119 |     decay_mult: 1
120 |   }
121 |   param {
122 |     lr_mult: 2
123 |     decay_mult: 0
124 |   }
125 |   convolution_param {
126 |     num_output: 384
127 |     kernel_size: 3
128 |     pad: 1
129 |   }
130 | }
131 | layer {
132 |   name: "relu3"
133 |   type: "ReLU"
134 |   bottom: "conv3"
135 |   top: "conv3"
136 | }
137 | layer {
138 |   name: "conv4"
139 |   type: "Convolution"
140 |   bottom: "conv3"
141 |   top: "conv4"
142 |   param {
143 |     lr_mult: 1
144 |     decay_mult: 1
145 |   }
146 |   param {
147 |     lr_mult: 2
148 |     decay_mult: 0
149 |   }
150 |   convolution_param {
151 |     num_output: 384
152 |     kernel_size: 3
153 |     pad: 1
154 |     group: 2
155 |   }
156 | }
157 | layer {
158 |   name: "relu4"
159 |   type: "ReLU"
160 |   bottom: "conv4"
161 |   top: "conv4"
162 | }
163 | layer {
164 |   name: "conv5"
165 |   type: "Convolution"
166 |   bottom: "conv4"
167 |   top: "conv5"
168 |   param {
169 |     lr_mult: 1
170 |     decay_mult: 1
171 |   }
172 |   param {
173 |     lr_mult: 2
174 |     decay_mult: 0
175 |   }
176 |   convolution_param {
177 |     num_output: 256
178 |     kernel_size: 3
179 |     pad: 1
180 |     group: 2
181 |   }
182 | }
183 | layer {
184 |   name: "relu5"
185 |   type: "ReLU"
186 |   bottom: "conv5"
187 |   top: "conv5"
188 | }
189 | layer {
190 |   name: "roi_pool5"
191 |   type: "ROIPooling"
192 |   bottom: "conv5"
193 |   bottom: "rois"
194 |   top: "pool5"
195 |   roi_pooling_param {
196 |     pooled_w: 6
197 |     pooled_h: 6
198 |     spatial_scale: 0.0625 # 1/16
199 |   }
200 | }
201 | layer {
202 |   name: "fc6"
203 |   type: "InnerProduct"
204 |   bottom: "pool5"
205 |   top: "fc6"
206 |   param {
207 |     lr_mult: 1
208 |     decay_mult: 1
209 |   }
210 |   param {
211 |     lr_mult: 2
212 |     decay_mult: 0
213 |   }
214 |   inner_product_param {
215 |     num_output: 4096
216 |   }
217 | }
218 | layer {
219 |   name: "relu6"
220 |   type: "ReLU"
221 |   bottom: "fc6"
222 |   top: "fc6"
223 | }
224 | layer {
225 |   name: "drop6"
226 |   type: "Dropout"
227 |   bottom: "fc6"
228 |   top: "fc6"
229 |   dropout_param {
230 |     dropout_ratio: 0.5
231 |   }
232 | }
233 | layer {
234 |   name: "fc7"
235 |   type: "InnerProduct"
236 |   bottom: "fc6"
237 |   top: "fc7"
238 |   param {
239 |     lr_mult: 1
240 |     decay_mult: 1
241 |   }
242 |   param {
243 |     lr_mult: 2
244 |     decay_mult: 0
245 |   }
246 |   inner_product_param {
247 |     num_output: 4096
248 |   }
249 | }
250 | layer {
251 |   name: "relu7"
252 |   type: "ReLU"
253 |   bottom: "fc7"
254 |   top: "fc7"
255 | }
256 | layer {
257 |   name: "drop7"
258 |   type: "Dropout"
259 |   bottom: "fc7"
260 |   top: "fc7"
261 |   dropout_param {
262 |     dropout_ratio: 0.5
263 |   }
264 | }
265 | layer {
266 |   name: "cls_score"
267 |   type: "InnerProduct"
268 |   bottom: "fc7"
269 |   top: "cls_score"
270 |   param {
271 |     lr_mult: 1
272 |     decay_mult: 1
273 |   }
274 |   param {
275 |     lr_mult: 2
276 |     decay_mult: 0
277 |   }
278 |   inner_product_param {
279 |     num_output: 21
280 |     weight_filler {
281 |       type: "gaussian"
282 |       std: 0.01
283 |     }
284 |     bias_filler {
285 |       type: "constant"
286 |       value: 0
287 |     }
288 |   }
289 | }
290 | layer {
291 |   name: "bbox_pred"
292 |   type: "InnerProduct"
293 |   bottom: "fc7"
294 |   top: "bbox_pred"
295 |   param {
296 |     lr_mult: 1
297 |     decay_mult: 1
298 |   }
299 |   param {
300 |     lr_mult: 2
301 |     decay_mult: 0
302 |   }
303 |   inner_product_param {
304 |     num_output: 84
305 |     weight_filler {
306 |       type: "gaussian"
307 |       std: 0.001
308 |     }
309 |     bias_filler {
310 |       type: "constant"
311 |       value: 0
312 |     }
313 |   }
314 | }
315 | layer {
316 |   name: "cls_prob"
317 |   type: "Softmax"
318 |   bottom: "cls_score"
319 |   top: "cls_prob"
320 | }
321 | 


--------------------------------------------------------------------------------
/models/CaffeNet/train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | layer {
  3 |   name: 'data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'rois'
  7 |   top: 'labels'
  8 |   top: 'bbox_targets'
  9 |   top: 'bbox_loss_weights'
 10 |   python_param {
 11 |     module: 'roi_data_layer.layer'
 12 |     layer: 'RoIDataLayer'
 13 |     param_str: "'num_classes': 21"
 14 |   }
 15 | }
 16 | layer {
 17 |   name: "conv1"
 18 |   type: "Convolution"
 19 |   bottom: "data"
 20 |   top: "conv1"
 21 |   param {
 22 |     lr_mult: 0
 23 |     decay_mult: 0
 24 |   }
 25 |   param {
 26 |     lr_mult: 0
 27 |     decay_mult: 0
 28 |   }
 29 |   convolution_param {
 30 |     num_output: 96
 31 |     kernel_size: 11
 32 |     pad: 5
 33 |     stride: 4
 34 |   }
 35 | }
 36 | layer {
 37 |   name: "relu1"
 38 |   type: "ReLU"
 39 |   bottom: "conv1"
 40 |   top: "conv1"
 41 | }
 42 | layer {
 43 |   name: "pool1"
 44 |   type: "Pooling"
 45 |   bottom: "conv1"
 46 |   top: "pool1"
 47 |   pooling_param {
 48 |     pool: MAX
 49 |     kernel_size: 3
 50 |     pad: 1
 51 |     stride: 2
 52 |   }
 53 | }
 54 | layer {
 55 |   name: "norm1"
 56 |   type: "LRN"
 57 |   bottom: "pool1"
 58 |   top: "norm1"
 59 |   lrn_param {
 60 |     local_size: 5
 61 |     alpha: 0.0001
 62 |     beta: 0.75
 63 |   }
 64 | }
 65 | layer {
 66 |   name: "conv2"
 67 |   type: "Convolution"
 68 |   bottom: "norm1"
 69 |   top: "conv2"
 70 |   param {
 71 |     lr_mult: 1
 72 |     decay_mult: 1
 73 |   }
 74 |   param {
 75 |     lr_mult: 2
 76 |     decay_mult: 0
 77 |   }
 78 |   convolution_param {
 79 |     num_output: 256
 80 |     kernel_size: 5
 81 |     pad: 2
 82 |     group: 2
 83 |   }
 84 | }
 85 | layer {
 86 |   name: "relu2"
 87 |   type: "ReLU"
 88 |   bottom: "conv2"
 89 |   top: "conv2"
 90 | }
 91 | layer {
 92 |   name: "pool2"
 93 |   type: "Pooling"
 94 |   bottom: "conv2"
 95 |   top: "pool2"
 96 |   pooling_param {
 97 |     pool: MAX
 98 |     kernel_size: 3
 99 |     pad: 1
100 |     stride: 2
101 |   }
102 | }
103 | layer {
104 |   name: "norm2"
105 |   type: "LRN"
106 |   bottom: "pool2"
107 |   top: "norm2"
108 |   lrn_param {
109 |     local_size: 5
110 |     alpha: 0.0001
111 |     beta: 0.75
112 |   }
113 | }
114 | layer {
115 |   name: "conv3"
116 |   type: "Convolution"
117 |   bottom: "norm2"
118 |   top: "conv3"
119 |   param {
120 |     lr_mult: 1
121 |     decay_mult: 1
122 |   }
123 |   param {
124 |     lr_mult: 2
125 |     decay_mult: 0
126 |   }
127 |   convolution_param {
128 |     num_output: 384
129 |     kernel_size: 3
130 |     pad: 1
131 |   }
132 | }
133 | layer {
134 |   name: "relu3"
135 |   type: "ReLU"
136 |   bottom: "conv3"
137 |   top: "conv3"
138 | }
139 | layer {
140 |   name: "conv4"
141 |   type: "Convolution"
142 |   bottom: "conv3"
143 |   top: "conv4"
144 |   param {
145 |     lr_mult: 1
146 |     decay_mult: 1
147 |   }
148 |   param {
149 |     lr_mult: 2
150 |     decay_mult: 0
151 |   }
152 |   convolution_param {
153 |     num_output: 384
154 |     kernel_size: 3
155 |     pad: 1
156 |     group: 2
157 |   }
158 | }
159 | layer {
160 |   name: "relu4"
161 |   type: "ReLU"
162 |   bottom: "conv4"
163 |   top: "conv4"
164 | }
165 | layer {
166 |   name: "conv5"
167 |   type: "Convolution"
168 |   bottom: "conv4"
169 |   top: "conv5"
170 |   param {
171 |     lr_mult: 1
172 |     decay_mult: 1
173 |   }
174 |   param {
175 |     lr_mult: 2
176 |     decay_mult: 0
177 |   }
178 |   convolution_param {
179 |     num_output: 256
180 |     kernel_size: 3
181 |     pad: 1
182 |     group: 2
183 |   }
184 | }
185 | layer {
186 |   name: "relu5"
187 |   type: "ReLU"
188 |   bottom: "conv5"
189 |   top: "conv5"
190 | }
191 | layer {
192 |   name: "roi_pool5"
193 |   type: "ROIPooling"
194 |   bottom: "conv5"
195 |   bottom: "rois"
196 |   top: "pool5"
197 |   roi_pooling_param {
198 |     pooled_w: 6
199 |     pooled_h: 6
200 |     spatial_scale: 0.0625 # 1/16
201 |   }
202 | }
203 | layer {
204 |   name: "fc6"
205 |   type: "InnerProduct"
206 |   bottom: "pool5"
207 |   top: "fc6"
208 |   param {
209 |     lr_mult: 1
210 |     decay_mult: 1
211 |   }
212 |   param {
213 |     lr_mult: 2
214 |     decay_mult: 0
215 |   }
216 |   inner_product_param {
217 |     num_output: 4096
218 |   }
219 | }
220 | layer {
221 |   name: "relu6"
222 |   type: "ReLU"
223 |   bottom: "fc6"
224 |   top: "fc6"
225 | }
226 | layer {
227 |   name: "drop6"
228 |   type: "Dropout"
229 |   bottom: "fc6"
230 |   top: "fc6"
231 |   dropout_param {
232 |     dropout_ratio: 0.5
233 |   }
234 | }
235 | layer {
236 |   name: "fc7"
237 |   type: "InnerProduct"
238 |   bottom: "fc6"
239 |   top: "fc7"
240 |   param {
241 |     lr_mult: 1
242 |     decay_mult: 1
243 |   }
244 |   param {
245 |     lr_mult: 2
246 |     decay_mult: 0
247 |   }
248 |   inner_product_param {
249 |     num_output: 4096
250 |   }
251 | }
252 | layer {
253 |   name: "relu7"
254 |   type: "ReLU"
255 |   bottom: "fc7"
256 |   top: "fc7"
257 | }
258 | layer {
259 |   name: "drop7"
260 |   type: "Dropout"
261 |   bottom: "fc7"
262 |   top: "fc7"
263 |   dropout_param {
264 |     dropout_ratio: 0.5
265 |   }
266 | }
267 | layer {
268 |   name: "cls_score"
269 |   type: "InnerProduct"
270 |   bottom: "fc7"
271 |   top: "cls_score"
272 |   param {
273 |     lr_mult: 1
274 |     decay_mult: 1
275 |   }
276 |   param {
277 |     lr_mult: 2
278 |     decay_mult: 0
279 |   }
280 |   inner_product_param {
281 |     num_output: 21
282 |     weight_filler {
283 |       type: "gaussian"
284 |       std: 0.01
285 |     }
286 |     bias_filler {
287 |       type: "constant"
288 |       value: 0
289 |     }
290 |   }
291 | }
292 | layer {
293 |   name: "bbox_pred"
294 |   type: "InnerProduct"
295 |   bottom: "fc7"
296 |   top: "bbox_pred"
297 |   param {
298 |     lr_mult: 1
299 |     decay_mult: 1
300 |   }
301 |   param {
302 |     lr_mult: 2
303 |     decay_mult: 0
304 |   }
305 |   inner_product_param {
306 |     num_output: 84
307 |     weight_filler {
308 |       type: "gaussian"
309 |       std: 0.001
310 |     }
311 |     bias_filler {
312 |       type: "constant"
313 |       value: 0
314 |     }
315 |   }
316 | }
317 | layer {
318 |   name: "loss_cls"
319 |   type: "SoftmaxWithLoss"
320 |   bottom: "cls_score"
321 |   bottom: "labels"
322 |   top: "loss_cls"
323 |   loss_weight: 1
324 | }
325 | layer {
326 |   name: "loss_bbox"
327 |   type: "SmoothL1Loss"
328 |   bottom: "bbox_pred"
329 |   bottom: "bbox_targets"
330 |   bottom: "bbox_loss_weights"
331 |   top: "loss_bbox"
332 |   loss_weight: 1
333 | }
334 | 


--------------------------------------------------------------------------------
/models/README.md:
--------------------------------------------------------------------------------
1 | Prototxt files that define models and solvers.
2 | 
3 | Three models are defined, with some variations of each to support experiments
4 | in the paper.
5 |  - Caffenet (model **S**)
6 |  - VGG_CNN_M_1024 (model **M**)
7 |  - VGG16 (model **L**)
8 | 


--------------------------------------------------------------------------------
/models/VGG16/fc_only/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/VGG16/fc_only/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | # iter_size: 1
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "vgg16_fast_rcnn"
16 | #debug_info: true
17 | 


--------------------------------------------------------------------------------
/models/VGG16/no_bbox_reg/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/VGG16/no_bbox_reg/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | # iter_size: 1
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "vgg16_fast_rcnn"
16 | #debug_info: true
17 | 


--------------------------------------------------------------------------------
/models/VGG16/no_bbox_reg/test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_16_layers"
  2 | 
  3 | input: "data"
  4 | input_shape {
  5 |   dim: 1
  6 |   dim: 3
  7 |   dim: 224
  8 |   dim: 224
  9 | }
 10 | 
 11 | input: "rois"
 12 | input_shape {
 13 |   dim: 1 # to be changed on-the-fly to num ROIs
 14 |   dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing
 15 | }
 16 | 
 17 | layer {
 18 |   name: "conv1_1"
 19 |   type: "Convolution"
 20 |   bottom: "data"
 21 |   top: "conv1_1"
 22 |   param {
 23 |     lr_mult: 0
 24 |     decay_mult: 0
 25 |   }
 26 |   param {
 27 |     lr_mult: 0
 28 |     decay_mult: 0
 29 |   }
 30 |   convolution_param {
 31 |     num_output: 64
 32 |     pad: 1
 33 |     kernel_size: 3
 34 |   }
 35 | }
 36 | layer {
 37 |   name: "relu1_1"
 38 |   type: "ReLU"
 39 |   bottom: "conv1_1"
 40 |   top: "conv1_1"
 41 | }
 42 | layer {
 43 |   name: "conv1_2"
 44 |   type: "Convolution"
 45 |   bottom: "conv1_1"
 46 |   top: "conv1_2"
 47 |   param {
 48 |     lr_mult: 0
 49 |     decay_mult: 0
 50 |   }
 51 |   param {
 52 |     lr_mult: 0
 53 |     decay_mult: 0
 54 |   }
 55 |   convolution_param {
 56 |     num_output: 64
 57 |     pad: 1
 58 |     kernel_size: 3
 59 |   }
 60 | }
 61 | layer {
 62 |   name: "relu1_2"
 63 |   type: "ReLU"
 64 |   bottom: "conv1_2"
 65 |   top: "conv1_2"
 66 | }
 67 | layer {
 68 |   name: "pool1"
 69 |   type: "Pooling"
 70 |   bottom: "conv1_2"
 71 |   top: "pool1"
 72 |   pooling_param {
 73 |     pool: MAX
 74 |     kernel_size: 2
 75 |     stride: 2
 76 |   }
 77 | }
 78 | layer {
 79 |   name: "conv2_1"
 80 |   type: "Convolution"
 81 |   bottom: "pool1"
 82 |   top: "conv2_1"
 83 |   param {
 84 |     lr_mult: 0
 85 |     decay_mult: 0
 86 |   }
 87 |   param {
 88 |     lr_mult: 0
 89 |     decay_mult: 0
 90 |   }
 91 |   convolution_param {
 92 |     num_output: 128
 93 |     pad: 1
 94 |     kernel_size: 3
 95 |   }
 96 | }
 97 | layer {
 98 |   name: "relu2_1"
 99 |   type: "ReLU"
100 |   bottom: "conv2_1"
101 |   top: "conv2_1"
102 | }
103 | layer {
104 |   name: "conv2_2"
105 |   type: "Convolution"
106 |   bottom: "conv2_1"
107 |   top: "conv2_2"
108 |   param {
109 |     lr_mult: 0
110 |     decay_mult: 0
111 |   }
112 |   param {
113 |     lr_mult: 0
114 |     decay_mult: 0
115 |   }
116 |   convolution_param {
117 |     num_output: 128
118 |     pad: 1
119 |     kernel_size: 3
120 |   }
121 | }
122 | layer {
123 |   name: "relu2_2"
124 |   type: "ReLU"
125 |   bottom: "conv2_2"
126 |   top: "conv2_2"
127 | }
128 | layer {
129 |   name: "pool2"
130 |   type: "Pooling"
131 |   bottom: "conv2_2"
132 |   top: "pool2"
133 |   pooling_param {
134 |     pool: MAX
135 |     kernel_size: 2
136 |     stride: 2
137 |   }
138 | }
139 | layer {
140 |   name: "conv3_1"
141 |   type: "Convolution"
142 |   bottom: "pool2"
143 |   top: "conv3_1"
144 |   param {
145 |     lr_mult: 1
146 |     decay_mult: 1
147 |   }
148 |   param {
149 |     lr_mult: 2
150 |     decay_mult: 0
151 |   }
152 |   convolution_param {
153 |     num_output: 256
154 |     pad: 1
155 |     kernel_size: 3
156 |   }
157 | }
158 | layer {
159 |   name: "relu3_1"
160 |   type: "ReLU"
161 |   bottom: "conv3_1"
162 |   top: "conv3_1"
163 | }
164 | layer {
165 |   name: "conv3_2"
166 |   type: "Convolution"
167 |   bottom: "conv3_1"
168 |   top: "conv3_2"
169 |   param {
170 |     lr_mult: 1
171 |     decay_mult: 1
172 |   }
173 |   param {
174 |     lr_mult: 2
175 |     decay_mult: 0
176 |   }
177 |   convolution_param {
178 |     num_output: 256
179 |     pad: 1
180 |     kernel_size: 3
181 |   }
182 | }
183 | layer {
184 |   name: "relu3_2"
185 |   type: "ReLU"
186 |   bottom: "conv3_2"
187 |   top: "conv3_2"
188 | }
189 | layer {
190 |   name: "conv3_3"
191 |   type: "Convolution"
192 |   bottom: "conv3_2"
193 |   top: "conv3_3"
194 |   param {
195 |     lr_mult: 1
196 |     decay_mult: 1
197 |   }
198 |   param {
199 |     lr_mult: 2
200 |     decay_mult: 0
201 |   }
202 |   convolution_param {
203 |     num_output: 256
204 |     pad: 1
205 |     kernel_size: 3
206 |   }
207 | }
208 | layer {
209 |   name: "relu3_3"
210 |   type: "ReLU"
211 |   bottom: "conv3_3"
212 |   top: "conv3_3"
213 | }
214 | layer {
215 |   name: "pool3"
216 |   type: "Pooling"
217 |   bottom: "conv3_3"
218 |   top: "pool3"
219 |   pooling_param {
220 |     pool: MAX
221 |     kernel_size: 2
222 |     stride: 2
223 |   }
224 | }
225 | layer {
226 |   name: "conv4_1"
227 |   type: "Convolution"
228 |   bottom: "pool3"
229 |   top: "conv4_1"
230 |   param {
231 |     lr_mult: 1
232 |     decay_mult: 1
233 |   }
234 |   param {
235 |     lr_mult: 2
236 |     decay_mult: 0
237 |   }
238 |   convolution_param {
239 |     num_output: 512
240 |     pad: 1
241 |     kernel_size: 3
242 |   }
243 | }
244 | layer {
245 |   name: "relu4_1"
246 |   type: "ReLU"
247 |   bottom: "conv4_1"
248 |   top: "conv4_1"
249 | }
250 | layer {
251 |   name: "conv4_2"
252 |   type: "Convolution"
253 |   bottom: "conv4_1"
254 |   top: "conv4_2"
255 |   param {
256 |     lr_mult: 1
257 |     decay_mult: 1
258 |   }
259 |   param {
260 |     lr_mult: 2
261 |     decay_mult: 0
262 |   }
263 |   convolution_param {
264 |     num_output: 512
265 |     pad: 1
266 |     kernel_size: 3
267 |   }
268 | }
269 | layer {
270 |   name: "relu4_2"
271 |   type: "ReLU"
272 |   bottom: "conv4_2"
273 |   top: "conv4_2"
274 | }
275 | layer {
276 |   name: "conv4_3"
277 |   type: "Convolution"
278 |   bottom: "conv4_2"
279 |   top: "conv4_3"
280 |   param {
281 |     lr_mult: 1
282 |     decay_mult: 1
283 |   }
284 |   param {
285 |     lr_mult: 2
286 |     decay_mult: 0
287 |   }
288 |   convolution_param {
289 |     num_output: 512
290 |     pad: 1
291 |     kernel_size: 3
292 |   }
293 | }
294 | layer {
295 |   name: "relu4_3"
296 |   type: "ReLU"
297 |   bottom: "conv4_3"
298 |   top: "conv4_3"
299 | }
300 | layer {
301 |   name: "pool4"
302 |   type: "Pooling"
303 |   bottom: "conv4_3"
304 |   top: "pool4"
305 |   pooling_param {
306 |     pool: MAX
307 |     kernel_size: 2
308 |     stride: 2
309 |   }
310 | }
311 | layer {
312 |   name: "conv5_1"
313 |   type: "Convolution"
314 |   bottom: "pool4"
315 |   top: "conv5_1"
316 |   param {
317 |     lr_mult: 1
318 |     decay_mult: 1
319 |   }
320 |   param {
321 |     lr_mult: 2
322 |     decay_mult: 0
323 |   }
324 |   convolution_param {
325 |     num_output: 512
326 |     pad: 1
327 |     kernel_size: 3
328 |   }
329 | }
330 | layer {
331 |   name: "relu5_1"
332 |   type: "ReLU"
333 |   bottom: "conv5_1"
334 |   top: "conv5_1"
335 | }
336 | layer {
337 |   name: "conv5_2"
338 |   type: "Convolution"
339 |   bottom: "conv5_1"
340 |   top: "conv5_2"
341 |   param {
342 |     lr_mult: 1
343 |     decay_mult: 1
344 |   }
345 |   param {
346 |     lr_mult: 2
347 |     decay_mult: 0
348 |   }
349 |   convolution_param {
350 |     num_output: 512
351 |     pad: 1
352 |     kernel_size: 3
353 |   }
354 | }
355 | layer {
356 |   name: "relu5_2"
357 |   type: "ReLU"
358 |   bottom: "conv5_2"
359 |   top: "conv5_2"
360 | }
361 | layer {
362 |   name: "conv5_3"
363 |   type: "Convolution"
364 |   bottom: "conv5_2"
365 |   top: "conv5_3"
366 |   param {
367 |     lr_mult: 1
368 |     decay_mult: 1
369 |   }
370 |   param {
371 |     lr_mult: 2
372 |     decay_mult: 0
373 |   }
374 |   convolution_param {
375 |     num_output: 512
376 |     pad: 1
377 |     kernel_size: 3
378 |   }
379 | }
380 | layer {
381 |   name: "relu5_3"
382 |   type: "ReLU"
383 |   bottom: "conv5_3"
384 |   top: "conv5_3"
385 | }
386 | layer {
387 |   name: "roi_pool5"
388 |   type: "ROIPooling"
389 |   bottom: "conv5_3"
390 |   bottom: "rois"
391 |   top: "pool5"
392 |   roi_pooling_param {
393 |     pooled_w: 7
394 |     pooled_h: 7
395 |     spatial_scale: 0.0625 # 1/16
396 |   }
397 | }
398 | layer {
399 |   name: "fc6"
400 |   type: "InnerProduct"
401 |   bottom: "pool5"
402 |   top: "fc6"
403 |   param {
404 |     lr_mult: 1
405 |     decay_mult: 1
406 |   }
407 |   param {
408 |     lr_mult: 2
409 |     decay_mult: 0
410 |   }
411 |   inner_product_param {
412 |     num_output: 4096
413 |   }
414 | }
415 | layer {
416 |   name: "relu6"
417 |   type: "ReLU"
418 |   bottom: "fc6"
419 |   top: "fc6"
420 | }
421 | layer {
422 |   name: "drop6"
423 |   type: "Dropout"
424 |   bottom: "fc6"
425 |   top: "fc6"
426 |   dropout_param {
427 |     dropout_ratio: 0.5
428 |   }
429 | }
430 | layer {
431 |   name: "fc7"
432 |   type: "InnerProduct"
433 |   bottom: "fc6"
434 |   top: "fc7"
435 |   param {
436 |     lr_mult: 1
437 |     decay_mult: 1
438 |   }
439 |   param {
440 |     lr_mult: 2
441 |     decay_mult: 0
442 |   }
443 |   inner_product_param {
444 |     num_output: 4096
445 |   }
446 | }
447 | layer {
448 |   name: "relu7"
449 |   type: "ReLU"
450 |   bottom: "fc7"
451 |   top: "fc7"
452 | }
453 | layer {
454 |   name: "drop7"
455 |   type: "Dropout"
456 |   bottom: "fc7"
457 |   top: "fc7"
458 |   dropout_param {
459 |     dropout_ratio: 0.5
460 |   }
461 | }
462 | layer {
463 |   name: "cls_score"
464 |   type: "InnerProduct"
465 |   bottom: "fc7"
466 |   top: "cls_score"
467 |   param {
468 |     lr_mult: 1
469 |     decay_mult: 1
470 |   }
471 |   param {
472 |     lr_mult: 2
473 |     decay_mult: 0
474 |   }
475 |   inner_product_param {
476 |     num_output: 21
477 |     weight_filler {
478 |       type: "gaussian"
479 |       std: 0.01
480 |     }
481 |     bias_filler {
482 |       type: "constant"
483 |       value: 0
484 |     }
485 |   }
486 | }
487 | layer {
488 |   name: "cls_prob"
489 |   type: "Softmax"
490 |   bottom: "cls_score"
491 |   top: "cls_prob"
492 | }
493 | 


--------------------------------------------------------------------------------
/models/VGG16/no_bbox_reg/train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_16_layers"
  2 | layer {
  3 |   name: 'data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'rois'
  7 |   top: 'labels'
  8 |   python_param {
  9 |     module: 'roi_data_layer.layer'
 10 |     layer: 'RoIDataLayer'
 11 |     param_str: "'num_classes': 21"
 12 |   }
 13 | }
 14 | layer {
 15 |   name: "conv1_1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1_1"
 19 |   param {
 20 |     lr_mult: 0
 21 |     decay_mult: 0
 22 |   }
 23 |   param {
 24 |     lr_mult: 0
 25 |     decay_mult: 0
 26 |   }
 27 |   convolution_param {
 28 |     num_output: 64
 29 |     pad: 1
 30 |     kernel_size: 3
 31 |   }
 32 | }
 33 | layer {
 34 |   name: "relu1_1"
 35 |   type: "ReLU"
 36 |   bottom: "conv1_1"
 37 |   top: "conv1_1"
 38 | }
 39 | layer {
 40 |   name: "conv1_2"
 41 |   type: "Convolution"
 42 |   bottom: "conv1_1"
 43 |   top: "conv1_2"
 44 |   param {
 45 |     lr_mult: 0
 46 |     decay_mult: 0
 47 |   }
 48 |   param {
 49 |     lr_mult: 0
 50 |     decay_mult: 0
 51 |   }
 52 |   convolution_param {
 53 |     num_output: 64
 54 |     pad: 1
 55 |     kernel_size: 3
 56 |   }
 57 | }
 58 | layer {
 59 |   name: "relu1_2"
 60 |   type: "ReLU"
 61 |   bottom: "conv1_2"
 62 |   top: "conv1_2"
 63 | }
 64 | layer {
 65 |   name: "pool1"
 66 |   type: "Pooling"
 67 |   bottom: "conv1_2"
 68 |   top: "pool1"
 69 |   pooling_param {
 70 |     pool: MAX
 71 |     kernel_size: 2
 72 |     stride: 2
 73 |   }
 74 | }
 75 | layer {
 76 |   name: "conv2_1"
 77 |   type: "Convolution"
 78 |   bottom: "pool1"
 79 |   top: "conv2_1"
 80 |   param {
 81 |     lr_mult: 0
 82 |     decay_mult: 0
 83 |   }
 84 |   param {
 85 |     lr_mult: 0
 86 |     decay_mult: 0
 87 |   }
 88 |   convolution_param {
 89 |     num_output: 128
 90 |     pad: 1
 91 |     kernel_size: 3
 92 |   }
 93 | }
 94 | layer {
 95 |   name: "relu2_1"
 96 |   type: "ReLU"
 97 |   bottom: "conv2_1"
 98 |   top: "conv2_1"
 99 | }
100 | layer {
101 |   name: "conv2_2"
102 |   type: "Convolution"
103 |   bottom: "conv2_1"
104 |   top: "conv2_2"
105 |   param {
106 |     lr_mult: 0
107 |     decay_mult: 0
108 |   }
109 |   param {
110 |     lr_mult: 0
111 |     decay_mult: 0
112 |   }
113 |   convolution_param {
114 |     num_output: 128
115 |     pad: 1
116 |     kernel_size: 3
117 |   }
118 | }
119 | layer {
120 |   name: "relu2_2"
121 |   type: "ReLU"
122 |   bottom: "conv2_2"
123 |   top: "conv2_2"
124 | }
125 | layer {
126 |   name: "pool2"
127 |   type: "Pooling"
128 |   bottom: "conv2_2"
129 |   top: "pool2"
130 |   pooling_param {
131 |     pool: MAX
132 |     kernel_size: 2
133 |     stride: 2
134 |   }
135 | }
136 | layer {
137 |   name: "conv3_1"
138 |   type: "Convolution"
139 |   bottom: "pool2"
140 |   top: "conv3_1"
141 |   param {
142 |     lr_mult: 1
143 |     decay_mult: 1
144 |   }
145 |   param {
146 |     lr_mult: 2
147 |     decay_mult: 0
148 |   }
149 |   convolution_param {
150 |     num_output: 256
151 |     pad: 1
152 |     kernel_size: 3
153 |   }
154 | }
155 | layer {
156 |   name: "relu3_1"
157 |   type: "ReLU"
158 |   bottom: "conv3_1"
159 |   top: "conv3_1"
160 | }
161 | layer {
162 |   name: "conv3_2"
163 |   type: "Convolution"
164 |   bottom: "conv3_1"
165 |   top: "conv3_2"
166 |   param {
167 |     lr_mult: 1
168 |     decay_mult: 1
169 |   }
170 |   param {
171 |     lr_mult: 2
172 |     decay_mult: 0
173 |   }
174 |   convolution_param {
175 |     num_output: 256
176 |     pad: 1
177 |     kernel_size: 3
178 |   }
179 | }
180 | layer {
181 |   name: "relu3_2"
182 |   type: "ReLU"
183 |   bottom: "conv3_2"
184 |   top: "conv3_2"
185 | }
186 | layer {
187 |   name: "conv3_3"
188 |   type: "Convolution"
189 |   bottom: "conv3_2"
190 |   top: "conv3_3"
191 |   param {
192 |     lr_mult: 1
193 |     decay_mult: 1
194 |   }
195 |   param {
196 |     lr_mult: 2
197 |     decay_mult: 0
198 |   }
199 |   convolution_param {
200 |     num_output: 256
201 |     pad: 1
202 |     kernel_size: 3
203 |   }
204 | }
205 | layer {
206 |   name: "relu3_3"
207 |   type: "ReLU"
208 |   bottom: "conv3_3"
209 |   top: "conv3_3"
210 | }
211 | layer {
212 |   name: "pool3"
213 |   type: "Pooling"
214 |   bottom: "conv3_3"
215 |   top: "pool3"
216 |   pooling_param {
217 |     pool: MAX
218 |     kernel_size: 2
219 |     stride: 2
220 |   }
221 | }
222 | layer {
223 |   name: "conv4_1"
224 |   type: "Convolution"
225 |   bottom: "pool3"
226 |   top: "conv4_1"
227 |   param {
228 |     lr_mult: 1
229 |     decay_mult: 1
230 |   }
231 |   param {
232 |     lr_mult: 2
233 |     decay_mult: 0
234 |   }
235 |   convolution_param {
236 |     num_output: 512
237 |     pad: 1
238 |     kernel_size: 3
239 |   }
240 | }
241 | layer {
242 |   name: "relu4_1"
243 |   type: "ReLU"
244 |   bottom: "conv4_1"
245 |   top: "conv4_1"
246 | }
247 | layer {
248 |   name: "conv4_2"
249 |   type: "Convolution"
250 |   bottom: "conv4_1"
251 |   top: "conv4_2"
252 |   param {
253 |     lr_mult: 1
254 |     decay_mult: 1
255 |   }
256 |   param {
257 |     lr_mult: 2
258 |     decay_mult: 0
259 |   }
260 |   convolution_param {
261 |     num_output: 512
262 |     pad: 1
263 |     kernel_size: 3
264 |   }
265 | }
266 | layer {
267 |   name: "relu4_2"
268 |   type: "ReLU"
269 |   bottom: "conv4_2"
270 |   top: "conv4_2"
271 | }
272 | layer {
273 |   name: "conv4_3"
274 |   type: "Convolution"
275 |   bottom: "conv4_2"
276 |   top: "conv4_3"
277 |   param {
278 |     lr_mult: 1
279 |     decay_mult: 1
280 |   }
281 |   param {
282 |     lr_mult: 2
283 |     decay_mult: 0
284 |   }
285 |   convolution_param {
286 |     num_output: 512
287 |     pad: 1
288 |     kernel_size: 3
289 |   }
290 | }
291 | layer {
292 |   name: "relu4_3"
293 |   type: "ReLU"
294 |   bottom: "conv4_3"
295 |   top: "conv4_3"
296 | }
297 | layer {
298 |   name: "pool4"
299 |   type: "Pooling"
300 |   bottom: "conv4_3"
301 |   top: "pool4"
302 |   pooling_param {
303 |     pool: MAX
304 |     kernel_size: 2
305 |     stride: 2
306 |   }
307 | }
308 | layer {
309 |   name: "conv5_1"
310 |   type: "Convolution"
311 |   bottom: "pool4"
312 |   top: "conv5_1"
313 |   param {
314 |     lr_mult: 1
315 |     decay_mult: 1
316 |   }
317 |   param {
318 |     lr_mult: 2
319 |     decay_mult: 0
320 |   }
321 |   convolution_param {
322 |     num_output: 512
323 |     pad: 1
324 |     kernel_size: 3
325 |   }
326 | }
327 | layer {
328 |   name: "relu5_1"
329 |   type: "ReLU"
330 |   bottom: "conv5_1"
331 |   top: "conv5_1"
332 | }
333 | layer {
334 |   name: "conv5_2"
335 |   type: "Convolution"
336 |   bottom: "conv5_1"
337 |   top: "conv5_2"
338 |   param {
339 |     lr_mult: 1
340 |     decay_mult: 1
341 |   }
342 |   param {
343 |     lr_mult: 2
344 |     decay_mult: 0
345 |   }
346 |   convolution_param {
347 |     num_output: 512
348 |     pad: 1
349 |     kernel_size: 3
350 |   }
351 | }
352 | layer {
353 |   name: "relu5_2"
354 |   type: "ReLU"
355 |   bottom: "conv5_2"
356 |   top: "conv5_2"
357 | }
358 | layer {
359 |   name: "conv5_3"
360 |   type: "Convolution"
361 |   bottom: "conv5_2"
362 |   top: "conv5_3"
363 |   param {
364 |     lr_mult: 1
365 |     decay_mult: 1
366 |   }
367 |   param {
368 |     lr_mult: 2
369 |     decay_mult: 0
370 |   }
371 |   convolution_param {
372 |     num_output: 512
373 |     pad: 1
374 |     kernel_size: 3
375 |   }
376 | }
377 | layer {
378 |   name: "relu5_3"
379 |   type: "ReLU"
380 |   bottom: "conv5_3"
381 |   top: "conv5_3"
382 | }
383 | layer {
384 |   name: "roi_pool5"
385 |   type: "ROIPooling"
386 |   bottom: "conv5_3"
387 |   bottom: "rois"
388 |   top: "pool5"
389 |   roi_pooling_param {
390 |     pooled_w: 7
391 |     pooled_h: 7
392 |     spatial_scale: 0.0625 # 1/16
393 |   }
394 | }
395 | layer {
396 |   name: "fc6"
397 |   type: "InnerProduct"
398 |   bottom: "pool5"
399 |   top: "fc6"
400 |   param {
401 |     lr_mult: 1
402 |     decay_mult: 1
403 |   }
404 |   param {
405 |     lr_mult: 2
406 |     decay_mult: 0
407 |   }
408 |   inner_product_param {
409 |     num_output: 4096
410 |   }
411 | }
412 | layer {
413 |   name: "relu6"
414 |   type: "ReLU"
415 |   bottom: "fc6"
416 |   top: "fc6"
417 | }
418 | layer {
419 |   name: "drop6"
420 |   type: "Dropout"
421 |   bottom: "fc6"
422 |   top: "fc6"
423 |   dropout_param {
424 |     dropout_ratio: 0.5
425 |   }
426 | }
427 | layer {
428 |   name: "fc7"
429 |   type: "InnerProduct"
430 |   bottom: "fc6"
431 |   top: "fc7"
432 |   param {
433 |     lr_mult: 1
434 |     decay_mult: 1
435 |   }
436 |   param {
437 |     lr_mult: 2
438 |     decay_mult: 0
439 |   }
440 |   inner_product_param {
441 |     num_output: 4096
442 |   }
443 | }
444 | layer {
445 |   name: "relu7"
446 |   type: "ReLU"
447 |   bottom: "fc7"
448 |   top: "fc7"
449 | }
450 | layer {
451 |   name: "drop7"
452 |   type: "Dropout"
453 |   bottom: "fc7"
454 |   top: "fc7"
455 |   dropout_param {
456 |     dropout_ratio: 0.5
457 |   }
458 | }
459 | layer {
460 |   name: "cls_score"
461 |   type: "InnerProduct"
462 |   bottom: "fc7"
463 |   top: "cls_score"
464 |   param {
465 |     lr_mult: 1
466 |     decay_mult: 1
467 |   }
468 |   param {
469 |     lr_mult: 2
470 |     decay_mult: 0
471 |   }
472 |   inner_product_param {
473 |     num_output: 21
474 |     weight_filler {
475 |       type: "gaussian"
476 |       std: 0.01
477 |     }
478 |     bias_filler {
479 |       type: "constant"
480 |       value: 0
481 |     }
482 |   }
483 | }
484 | layer {
485 |   name: "loss_cls"
486 |   type: "SoftmaxWithLoss"
487 |   bottom: "cls_score"
488 |   bottom: "labels"
489 |   top: "loss_cls"
490 |   loss_weight: 1
491 | }
492 | 


--------------------------------------------------------------------------------
/models/VGG16/piecewise/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/VGG16/piecewise/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | # iter_size: 1
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "vgg16_fast_rcnn"
16 | #debug_info: true
17 | 


--------------------------------------------------------------------------------
/models/VGG16/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/VGG16/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | # iter_size: 1
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "vgg16_fast_rcnn"
16 | #debug_info: true
17 | 


--------------------------------------------------------------------------------
/models/VGG16/test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_16_layers"
  2 | 
  3 | input: "data"
  4 | input_shape {
  5 |   dim: 1
  6 |   dim: 3
  7 |   dim: 224
  8 |   dim: 224
  9 | }
 10 | 
 11 | input: "rois"
 12 | input_shape {
 13 |   dim: 1 # to be changed on-the-fly to num ROIs
 14 |   dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing
 15 | }
 16 | 
 17 | layer {
 18 |   name: "conv1_1"
 19 |   type: "Convolution"
 20 |   bottom: "data"
 21 |   top: "conv1_1"
 22 |   param {
 23 |     lr_mult: 0
 24 |     decay_mult: 0
 25 |   }
 26 |   param {
 27 |     lr_mult: 0
 28 |     decay_mult: 0
 29 |   }
 30 |   convolution_param {
 31 |     num_output: 64
 32 |     pad: 1
 33 |     kernel_size: 3
 34 |   }
 35 | }
 36 | layer {
 37 |   name: "relu1_1"
 38 |   type: "ReLU"
 39 |   bottom: "conv1_1"
 40 |   top: "conv1_1"
 41 | }
 42 | layer {
 43 |   name: "conv1_2"
 44 |   type: "Convolution"
 45 |   bottom: "conv1_1"
 46 |   top: "conv1_2"
 47 |   param {
 48 |     lr_mult: 0
 49 |     decay_mult: 0
 50 |   }
 51 |   param {
 52 |     lr_mult: 0
 53 |     decay_mult: 0
 54 |   }
 55 |   convolution_param {
 56 |     num_output: 64
 57 |     pad: 1
 58 |     kernel_size: 3
 59 |   }
 60 | }
 61 | layer {
 62 |   name: "relu1_2"
 63 |   type: "ReLU"
 64 |   bottom: "conv1_2"
 65 |   top: "conv1_2"
 66 | }
 67 | layer {
 68 |   name: "pool1"
 69 |   type: "Pooling"
 70 |   bottom: "conv1_2"
 71 |   top: "pool1"
 72 |   pooling_param {
 73 |     pool: MAX
 74 |     kernel_size: 2
 75 |     stride: 2
 76 |   }
 77 | }
 78 | layer {
 79 |   name: "conv2_1"
 80 |   type: "Convolution"
 81 |   bottom: "pool1"
 82 |   top: "conv2_1"
 83 |   param {
 84 |     lr_mult: 0
 85 |     decay_mult: 0
 86 |   }
 87 |   param {
 88 |     lr_mult: 0
 89 |     decay_mult: 0
 90 |   }
 91 |   convolution_param {
 92 |     num_output: 128
 93 |     pad: 1
 94 |     kernel_size: 3
 95 |   }
 96 | }
 97 | layer {
 98 |   name: "relu2_1"
 99 |   type: "ReLU"
100 |   bottom: "conv2_1"
101 |   top: "conv2_1"
102 | }
103 | layer {
104 |   name: "conv2_2"
105 |   type: "Convolution"
106 |   bottom: "conv2_1"
107 |   top: "conv2_2"
108 |   param {
109 |     lr_mult: 0
110 |     decay_mult: 0
111 |   }
112 |   param {
113 |     lr_mult: 0
114 |     decay_mult: 0
115 |   }
116 |   convolution_param {
117 |     num_output: 128
118 |     pad: 1
119 |     kernel_size: 3
120 |   }
121 | }
122 | layer {
123 |   name: "relu2_2"
124 |   type: "ReLU"
125 |   bottom: "conv2_2"
126 |   top: "conv2_2"
127 | }
128 | layer {
129 |   name: "pool2"
130 |   type: "Pooling"
131 |   bottom: "conv2_2"
132 |   top: "pool2"
133 |   pooling_param {
134 |     pool: MAX
135 |     kernel_size: 2
136 |     stride: 2
137 |   }
138 | }
139 | layer {
140 |   name: "conv3_1"
141 |   type: "Convolution"
142 |   bottom: "pool2"
143 |   top: "conv3_1"
144 |   param {
145 |     lr_mult: 1
146 |     decay_mult: 1
147 |   }
148 |   param {
149 |     lr_mult: 2
150 |     decay_mult: 0
151 |   }
152 |   convolution_param {
153 |     num_output: 256
154 |     pad: 1
155 |     kernel_size: 3
156 |   }
157 | }
158 | layer {
159 |   name: "relu3_1"
160 |   type: "ReLU"
161 |   bottom: "conv3_1"
162 |   top: "conv3_1"
163 | }
164 | layer {
165 |   name: "conv3_2"
166 |   type: "Convolution"
167 |   bottom: "conv3_1"
168 |   top: "conv3_2"
169 |   param {
170 |     lr_mult: 1
171 |     decay_mult: 1
172 |   }
173 |   param {
174 |     lr_mult: 2
175 |     decay_mult: 0
176 |   }
177 |   convolution_param {
178 |     num_output: 256
179 |     pad: 1
180 |     kernel_size: 3
181 |   }
182 | }
183 | layer {
184 |   name: "relu3_2"
185 |   type: "ReLU"
186 |   bottom: "conv3_2"
187 |   top: "conv3_2"
188 | }
189 | layer {
190 |   name: "conv3_3"
191 |   type: "Convolution"
192 |   bottom: "conv3_2"
193 |   top: "conv3_3"
194 |   param {
195 |     lr_mult: 1
196 |     decay_mult: 1
197 |   }
198 |   param {
199 |     lr_mult: 2
200 |     decay_mult: 0
201 |   }
202 |   convolution_param {
203 |     num_output: 256
204 |     pad: 1
205 |     kernel_size: 3
206 |   }
207 | }
208 | layer {
209 |   name: "relu3_3"
210 |   type: "ReLU"
211 |   bottom: "conv3_3"
212 |   top: "conv3_3"
213 | }
214 | layer {
215 |   name: "pool3"
216 |   type: "Pooling"
217 |   bottom: "conv3_3"
218 |   top: "pool3"
219 |   pooling_param {
220 |     pool: MAX
221 |     kernel_size: 2
222 |     stride: 2
223 |   }
224 | }
225 | layer {
226 |   name: "conv4_1"
227 |   type: "Convolution"
228 |   bottom: "pool3"
229 |   top: "conv4_1"
230 |   param {
231 |     lr_mult: 1
232 |     decay_mult: 1
233 |   }
234 |   param {
235 |     lr_mult: 2
236 |     decay_mult: 0
237 |   }
238 |   convolution_param {
239 |     num_output: 512
240 |     pad: 1
241 |     kernel_size: 3
242 |   }
243 | }
244 | layer {
245 |   name: "relu4_1"
246 |   type: "ReLU"
247 |   bottom: "conv4_1"
248 |   top: "conv4_1"
249 | }
250 | layer {
251 |   name: "conv4_2"
252 |   type: "Convolution"
253 |   bottom: "conv4_1"
254 |   top: "conv4_2"
255 |   param {
256 |     lr_mult: 1
257 |     decay_mult: 1
258 |   }
259 |   param {
260 |     lr_mult: 2
261 |     decay_mult: 0
262 |   }
263 |   convolution_param {
264 |     num_output: 512
265 |     pad: 1
266 |     kernel_size: 3
267 |   }
268 | }
269 | layer {
270 |   name: "relu4_2"
271 |   type: "ReLU"
272 |   bottom: "conv4_2"
273 |   top: "conv4_2"
274 | }
275 | layer {
276 |   name: "conv4_3"
277 |   type: "Convolution"
278 |   bottom: "conv4_2"
279 |   top: "conv4_3"
280 |   param {
281 |     lr_mult: 1
282 |     decay_mult: 1
283 |   }
284 |   param {
285 |     lr_mult: 2
286 |     decay_mult: 0
287 |   }
288 |   convolution_param {
289 |     num_output: 512
290 |     pad: 1
291 |     kernel_size: 3
292 |   }
293 | }
294 | layer {
295 |   name: "relu4_3"
296 |   type: "ReLU"
297 |   bottom: "conv4_3"
298 |   top: "conv4_3"
299 | }
300 | layer {
301 |   name: "pool4"
302 |   type: "Pooling"
303 |   bottom: "conv4_3"
304 |   top: "pool4"
305 |   pooling_param {
306 |     pool: MAX
307 |     kernel_size: 2
308 |     stride: 2
309 |   }
310 | }
311 | layer {
312 |   name: "conv5_1"
313 |   type: "Convolution"
314 |   bottom: "pool4"
315 |   top: "conv5_1"
316 |   param {
317 |     lr_mult: 1
318 |     decay_mult: 1
319 |   }
320 |   param {
321 |     lr_mult: 2
322 |     decay_mult: 0
323 |   }
324 |   convolution_param {
325 |     num_output: 512
326 |     pad: 1
327 |     kernel_size: 3
328 |   }
329 | }
330 | layer {
331 |   name: "relu5_1"
332 |   type: "ReLU"
333 |   bottom: "conv5_1"
334 |   top: "conv5_1"
335 | }
336 | layer {
337 |   name: "conv5_2"
338 |   type: "Convolution"
339 |   bottom: "conv5_1"
340 |   top: "conv5_2"
341 |   param {
342 |     lr_mult: 1
343 |     decay_mult: 1
344 |   }
345 |   param {
346 |     lr_mult: 2
347 |     decay_mult: 0
348 |   }
349 |   convolution_param {
350 |     num_output: 512
351 |     pad: 1
352 |     kernel_size: 3
353 |   }
354 | }
355 | layer {
356 |   name: "relu5_2"
357 |   type: "ReLU"
358 |   bottom: "conv5_2"
359 |   top: "conv5_2"
360 | }
361 | layer {
362 |   name: "conv5_3"
363 |   type: "Convolution"
364 |   bottom: "conv5_2"
365 |   top: "conv5_3"
366 |   param {
367 |     lr_mult: 1
368 |     decay_mult: 1
369 |   }
370 |   param {
371 |     lr_mult: 2
372 |     decay_mult: 0
373 |   }
374 |   convolution_param {
375 |     num_output: 512
376 |     pad: 1
377 |     kernel_size: 3
378 |   }
379 | }
380 | layer {
381 |   name: "relu5_3"
382 |   type: "ReLU"
383 |   bottom: "conv5_3"
384 |   top: "conv5_3"
385 | }
386 | layer {
387 |   name: "roi_pool5"
388 |   type: "ROIPooling"
389 |   bottom: "conv5_3"
390 |   bottom: "rois"
391 |   top: "pool5"
392 |   roi_pooling_param {
393 |     pooled_w: 7
394 |     pooled_h: 7
395 |     spatial_scale: 0.0625 # 1/16
396 |   }
397 | }
398 | layer {
399 |   name: "fc6"
400 |   type: "InnerProduct"
401 |   bottom: "pool5"
402 |   top: "fc6"
403 |   param {
404 |     lr_mult: 1
405 |     decay_mult: 1
406 |   }
407 |   param {
408 |     lr_mult: 2
409 |     decay_mult: 0
410 |   }
411 |   inner_product_param {
412 |     num_output: 4096
413 |   }
414 | }
415 | layer {
416 |   name: "relu6"
417 |   type: "ReLU"
418 |   bottom: "fc6"
419 |   top: "fc6"
420 | }
421 | layer {
422 |   name: "drop6"
423 |   type: "Dropout"
424 |   bottom: "fc6"
425 |   top: "fc6"
426 |   dropout_param {
427 |     dropout_ratio: 0.5
428 |   }
429 | }
430 | layer {
431 |   name: "fc7"
432 |   type: "InnerProduct"
433 |   bottom: "fc6"
434 |   top: "fc7"
435 |   param {
436 |     lr_mult: 1
437 |     decay_mult: 1
438 |   }
439 |   param {
440 |     lr_mult: 2
441 |     decay_mult: 0
442 |   }
443 |   inner_product_param {
444 |     num_output: 4096
445 |   }
446 | }
447 | layer {
448 |   name: "relu7"
449 |   type: "ReLU"
450 |   bottom: "fc7"
451 |   top: "fc7"
452 | }
453 | layer {
454 |   name: "drop7"
455 |   type: "Dropout"
456 |   bottom: "fc7"
457 |   top: "fc7"
458 |   dropout_param {
459 |     dropout_ratio: 0.5
460 |   }
461 | }
462 | layer {
463 |   name: "cls_score"
464 |   type: "InnerProduct"
465 |   bottom: "fc7"
466 |   top: "cls_score"
467 |   param {
468 |     lr_mult: 1
469 |     decay_mult: 1
470 |   }
471 |   param {
472 |     lr_mult: 2
473 |     decay_mult: 0
474 |   }
475 |   inner_product_param {
476 |     num_output: 21
477 |     weight_filler {
478 |       type: "gaussian"
479 |       std: 0.01
480 |     }
481 |     bias_filler {
482 |       type: "constant"
483 |       value: 0
484 |     }
485 |   }
486 | }
487 | layer {
488 |   name: "bbox_pred"
489 |   type: "InnerProduct"
490 |   bottom: "fc7"
491 |   top: "bbox_pred"
492 |   param {
493 |     lr_mult: 1
494 |     decay_mult: 1
495 |   }
496 |   param {
497 |     lr_mult: 2
498 |     decay_mult: 0
499 |   }
500 |   inner_product_param {
501 |     num_output: 84
502 |     weight_filler {
503 |       type: "gaussian"
504 |       std: 0.001
505 |     }
506 |     bias_filler {
507 |       type: "constant"
508 |       value: 0
509 |     }
510 |   }
511 | }
512 | layer {
513 |   name: "cls_prob"
514 |   type: "Softmax"
515 |   bottom: "cls_score"
516 |   top: "cls_prob"
517 | }
518 | 


--------------------------------------------------------------------------------
/models/VGG_CNN_M_1024/compressed/test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_CNN_M_1024"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 224
  7 |   dim: 224
  8 | }
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 1 # to be changed on-the-fly to num ROIs
 12 |   dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   param {
 20 |     lr_mult: 0
 21 |     decay_mult: 0
 22 |   }
 23 |   param {
 24 |     lr_mult: 0
 25 |     decay_mult: 0
 26 |   }
 27 |   convolution_param {
 28 |     num_output: 96
 29 |     kernel_size: 7
 30 |     stride: 2
 31 |   }
 32 | }
 33 | layer {
 34 |   name: "relu1"
 35 |   type: "ReLU"
 36 |   bottom: "conv1"
 37 |   top: "conv1"
 38 | }
 39 | layer {
 40 |   name: "norm1"
 41 |   type: "LRN"
 42 |   bottom: "conv1"
 43 |   top: "norm1"
 44 |   lrn_param {
 45 |     local_size: 5
 46 |     alpha: 0.0005
 47 |     beta: 0.75
 48 |     k: 2
 49 |   }
 50 | }
 51 | layer {
 52 |   name: "pool1"
 53 |   type: "Pooling"
 54 |   bottom: "norm1"
 55 |   top: "pool1"
 56 |   pooling_param {
 57 |     pool: MAX
 58 |     kernel_size: 3
 59 |     stride: 2
 60 |   }
 61 | }
 62 | layer {
 63 |   name: "conv2"
 64 |   type: "Convolution"
 65 |   bottom: "pool1"
 66 |   top: "conv2"
 67 |   param {
 68 |     lr_mult: 1
 69 |     decay_mult: 1
 70 |   }
 71 |   param {
 72 |     lr_mult: 2
 73 |     decay_mult: 0
 74 |   }
 75 |   convolution_param {
 76 |     num_output: 256
 77 |     pad: 1
 78 |     kernel_size: 5
 79 |     stride: 2
 80 |   }
 81 | }
 82 | layer {
 83 |   name: "relu2"
 84 |   type: "ReLU"
 85 |   bottom: "conv2"
 86 |   top: "conv2"
 87 | }
 88 | layer {
 89 |   name: "norm2"
 90 |   type: "LRN"
 91 |   bottom: "conv2"
 92 |   top: "norm2"
 93 |   lrn_param {
 94 |     local_size: 5
 95 |     alpha: 0.0005
 96 |     beta: 0.75
 97 |     k: 2
 98 |   }
 99 | }
100 | layer {
101 |   name: "pool2"
102 |   type: "Pooling"
103 |   bottom: "norm2"
104 |   top: "pool2"
105 |   pooling_param {
106 |     pool: MAX
107 |     kernel_size: 3
108 |     stride: 2
109 |   }
110 | }
111 | layer {
112 |   name: "conv3"
113 |   type: "Convolution"
114 |   bottom: "pool2"
115 |   top: "conv3"
116 |   param {
117 |     lr_mult: 1
118 |     decay_mult: 1
119 |   }
120 |   param {
121 |     lr_mult: 2
122 |     decay_mult: 0
123 |   }
124 |   convolution_param {
125 |     num_output: 512
126 |     pad: 1
127 |     kernel_size: 3
128 |   }
129 | }
130 | layer {
131 |   name: "relu3"
132 |   type: "ReLU"
133 |   bottom: "conv3"
134 |   top: "conv3"
135 | }
136 | layer {
137 |   name: "conv4"
138 |   type: "Convolution"
139 |   bottom: "conv3"
140 |   top: "conv4"
141 |   param {
142 |     lr_mult: 1
143 |     decay_mult: 1
144 |   }
145 |   param {
146 |     lr_mult: 2
147 |     decay_mult: 0
148 |   }
149 |   convolution_param {
150 |     num_output: 512
151 |     pad: 1
152 |     kernel_size: 3
153 |   }
154 | }
155 | layer {
156 |   name: "relu4"
157 |   type: "ReLU"
158 |   bottom: "conv4"
159 |   top: "conv4"
160 | }
161 | layer {
162 |   name: "conv5"
163 |   type: "Convolution"
164 |   bottom: "conv4"
165 |   top: "conv5"
166 |   param {
167 |     lr_mult: 1
168 |     decay_mult: 1
169 |   }
170 |   param {
171 |     lr_mult: 2
172 |     decay_mult: 0
173 |   }
174 |   convolution_param {
175 |     num_output: 512
176 |     pad: 1
177 |     kernel_size: 3
178 |   }
179 | }
180 | layer {
181 |   name: "relu5"
182 |   type: "ReLU"
183 |   bottom: "conv5"
184 |   top: "conv5"
185 | }
186 | layer {
187 |   name: "roi_pool5"
188 |   type: "ROIPooling"
189 |   bottom: "conv5"
190 |   bottom: "rois"
191 |   top: "pool5"
192 |   roi_pooling_param {
193 |     pooled_w: 6
194 |     pooled_h: 6
195 |     spatial_scale: 0.0625 # 1/16
196 |   }
197 | }
198 | layer {
199 |   name: "fc6_L"
200 |   type: "InnerProduct"
201 |   bottom: "pool5"
202 |   top: "fc6_L"
203 |   param {
204 |     lr_mult: 1
205 |     decay_mult: 1
206 |   }
207 |   inner_product_param {
208 |     num_output: 1024
209 |     bias_term: false
210 |   }
211 | }
212 | layer {
213 |   name: "fc6_U"
214 |   type: "InnerProduct"
215 |   bottom: "fc6_L"
216 |   top: "fc6_U"
217 |   param {
218 |     lr_mult: 1
219 |     decay_mult: 1
220 |   }
221 |   param {
222 |     lr_mult: 2
223 |     decay_mult: 0
224 |   }
225 |   inner_product_param {
226 |     num_output: 4096
227 |   }
228 | }
229 | layer {
230 |   name: "relu6"
231 |   type: "ReLU"
232 |   bottom: "fc6_U"
233 |   top: "fc6_U"
234 | }
235 | layer {
236 |   name: "drop6"
237 |   type: "Dropout"
238 |   bottom: "fc6_U"
239 |   top: "fc6_U"
240 |   dropout_param {
241 |     dropout_ratio: 0.5
242 |   }
243 | }
244 | layer {
245 |   name: "fc7_L"
246 |   type: "InnerProduct"
247 |   bottom: "fc6_U"
248 |   top: "fc7_L"
249 |   param {
250 |     lr_mult: 1
251 |     decay_mult: 1
252 |   }
253 |   inner_product_param {
254 |     num_output: 256
255 |     bias_term: false
256 |   }
257 | }
258 | layer {
259 |   name: "fc7_U"
260 |   type: "InnerProduct"
261 |   bottom: "fc7_L"
262 |   top: "fc7_U"
263 |   param {
264 |     lr_mult: 1
265 |     decay_mult: 1
266 |   }
267 |   param {
268 |     lr_mult: 2
269 |     decay_mult: 0
270 |   }
271 |   inner_product_param {
272 |     num_output: 1024
273 |   }
274 | }
275 | layer {
276 |   name: "relu7"
277 |   type: "ReLU"
278 |   bottom: "fc7_U"
279 |   top: "fc7_U"
280 | }
281 | layer {
282 |   name: "drop7"
283 |   type: "Dropout"
284 |   bottom: "fc7_U"
285 |   top: "fc7_U"
286 |   dropout_param {
287 |     dropout_ratio: 0.5
288 |   }
289 | }
290 | layer {
291 |   name: "cls_score"
292 |   type: "InnerProduct"
293 |   bottom: "fc7_U"
294 |   top: "cls_score"
295 |   param {
296 |     lr_mult: 1
297 |     decay_mult: 1
298 |   }
299 |   param {
300 |     lr_mult: 2
301 |     decay_mult: 0
302 |   }
303 |   inner_product_param {
304 |     num_output: 21
305 |     weight_filler {
306 |       type: "gaussian"
307 |       std: 0.01
308 |     }
309 |     bias_filler {
310 |       type: "constant"
311 |       value: 0
312 |     }
313 |   }
314 | }
315 | layer {
316 |   name: "bbox_pred"
317 |   type: "InnerProduct"
318 |   bottom: "fc7_U"
319 |   top: "bbox_pred"
320 |   param {
321 |     lr_mult: 1
322 |     decay_mult: 1
323 |   }
324 |   param {
325 |     lr_mult: 2
326 |     decay_mult: 0
327 |   }
328 |   inner_product_param {
329 |     num_output: 84
330 |     weight_filler {
331 |       type: "gaussian"
332 |       std: 0.001
333 |     }
334 |     bias_filler {
335 |       type: "constant"
336 |       value: 0
337 |     }
338 |   }
339 | }
340 | layer {
341 |   name: "cls_prob"
342 |   type: "Softmax"
343 |   bottom: "cls_score"
344 |   top: "cls_prob"
345 | }
346 | 


--------------------------------------------------------------------------------
/models/VGG_CNN_M_1024/no_bbox_reg/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/VGG_CNN_M_1024/no_bbox_reg/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | # function
12 | snapshot: 0
13 | # We still use the snapshot prefix, though
14 | snapshot_prefix: "vgg_cnn_m_1024_fast_rcnn"
15 | #debug_info: true
16 | 


--------------------------------------------------------------------------------
/models/VGG_CNN_M_1024/no_bbox_reg/test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_CNN_M_1024"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 224
  7 |   dim: 224
  8 | }
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 1 # to be changed on-the-fly to num ROIs
 12 |   dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   param {
 20 |     lr_mult: 0
 21 |     decay_mult: 0
 22 |   }
 23 |   param {
 24 |     lr_mult: 0
 25 |     decay_mult: 0
 26 |   }
 27 |   convolution_param {
 28 |     num_output: 96
 29 |     kernel_size: 7
 30 |     stride: 2
 31 |   }
 32 | }
 33 | layer {
 34 |   name: "relu1"
 35 |   type: "ReLU"
 36 |   bottom: "conv1"
 37 |   top: "conv1"
 38 | }
 39 | layer {
 40 |   name: "norm1"
 41 |   type: "LRN"
 42 |   bottom: "conv1"
 43 |   top: "norm1"
 44 |   lrn_param {
 45 |     local_size: 5
 46 |     alpha: 0.0005
 47 |     beta: 0.75
 48 |     k: 2
 49 |   }
 50 | }
 51 | layer {
 52 |   name: "pool1"
 53 |   type: "Pooling"
 54 |   bottom: "norm1"
 55 |   top: "pool1"
 56 |   pooling_param {
 57 |     pool: MAX
 58 |     kernel_size: 3
 59 |     stride: 2
 60 |   }
 61 | }
 62 | layer {
 63 |   name: "conv2"
 64 |   type: "Convolution"
 65 |   bottom: "pool1"
 66 |   top: "conv2"
 67 |   param {
 68 |     lr_mult: 1
 69 |     decay_mult: 1
 70 |   }
 71 |   param {
 72 |     lr_mult: 2
 73 |     decay_mult: 0
 74 |   }
 75 |   convolution_param {
 76 |     num_output: 256
 77 |     pad: 1
 78 |     kernel_size: 5
 79 |     stride: 2
 80 |   }
 81 | }
 82 | layer {
 83 |   name: "relu2"
 84 |   type: "ReLU"
 85 |   bottom: "conv2"
 86 |   top: "conv2"
 87 | }
 88 | layer {
 89 |   name: "norm2"
 90 |   type: "LRN"
 91 |   bottom: "conv2"
 92 |   top: "norm2"
 93 |   lrn_param {
 94 |     local_size: 5
 95 |     alpha: 0.0005
 96 |     beta: 0.75
 97 |     k: 2
 98 |   }
 99 | }
100 | layer {
101 |   name: "pool2"
102 |   type: "Pooling"
103 |   bottom: "norm2"
104 |   top: "pool2"
105 |   pooling_param {
106 |     pool: MAX
107 |     kernel_size: 3
108 |     stride: 2
109 |   }
110 | }
111 | layer {
112 |   name: "conv3"
113 |   type: "Convolution"
114 |   bottom: "pool2"
115 |   top: "conv3"
116 |   param {
117 |     lr_mult: 1
118 |     decay_mult: 1
119 |   }
120 |   param {
121 |     lr_mult: 2
122 |     decay_mult: 0
123 |   }
124 |   convolution_param {
125 |     num_output: 512
126 |     pad: 1
127 |     kernel_size: 3
128 |   }
129 | }
130 | layer {
131 |   name: "relu3"
132 |   type: "ReLU"
133 |   bottom: "conv3"
134 |   top: "conv3"
135 | }
136 | layer {
137 |   name: "conv4"
138 |   type: "Convolution"
139 |   bottom: "conv3"
140 |   top: "conv4"
141 |   param {
142 |     lr_mult: 1
143 |     decay_mult: 1
144 |   }
145 |   param {
146 |     lr_mult: 2
147 |     decay_mult: 0
148 |   }
149 |   convolution_param {
150 |     num_output: 512
151 |     pad: 1
152 |     kernel_size: 3
153 |   }
154 | }
155 | layer {
156 |   name: "relu4"
157 |   type: "ReLU"
158 |   bottom: "conv4"
159 |   top: "conv4"
160 | }
161 | layer {
162 |   name: "conv5"
163 |   type: "Convolution"
164 |   bottom: "conv4"
165 |   top: "conv5"
166 |   param {
167 |     lr_mult: 1
168 |     decay_mult: 1
169 |   }
170 |   param {
171 |     lr_mult: 2
172 |     decay_mult: 0
173 |   }
174 |   convolution_param {
175 |     num_output: 512
176 |     pad: 1
177 |     kernel_size: 3
178 |   }
179 | }
180 | layer {
181 |   name: "relu5"
182 |   type: "ReLU"
183 |   bottom: "conv5"
184 |   top: "conv5"
185 | }
186 | layer {
187 |   name: "roi_pool5"
188 |   type: "ROIPooling"
189 |   bottom: "conv5"
190 |   bottom: "rois"
191 |   top: "pool5"
192 |   roi_pooling_param {
193 |     pooled_w: 6
194 |     pooled_h: 6
195 |     spatial_scale: 0.0625 # 1/16
196 |   }
197 | }
198 | layer {
199 |   name: "fc6"
200 |   type: "InnerProduct"
201 |   bottom: "pool5"
202 |   top: "fc6"
203 |   param {
204 |     lr_mult: 1
205 |     decay_mult: 1
206 |   }
207 |   param {
208 |     lr_mult: 2
209 |     decay_mult: 0
210 |   }
211 |   inner_product_param {
212 |     num_output: 4096
213 |   }
214 | }
215 | layer {
216 |   name: "relu6"
217 |   type: "ReLU"
218 |   bottom: "fc6"
219 |   top: "fc6"
220 | }
221 | layer {
222 |   name: "drop6"
223 |   type: "Dropout"
224 |   bottom: "fc6"
225 |   top: "fc6"
226 |   dropout_param {
227 |     dropout_ratio: 0.5
228 |   }
229 | }
230 | layer {
231 |   name: "fc7"
232 |   type: "InnerProduct"
233 |   bottom: "fc6"
234 |   top: "fc7"
235 |   param {
236 |     lr_mult: 1
237 |     decay_mult: 1
238 |   }
239 |   param {
240 |     lr_mult: 2
241 |     decay_mult: 0
242 |   }
243 |   inner_product_param {
244 |     num_output: 1024
245 |   }
246 | }
247 | layer {
248 |   name: "relu7"
249 |   type: "ReLU"
250 |   bottom: "fc7"
251 |   top: "fc7"
252 | }
253 | layer {
254 |   name: "drop7"
255 |   type: "Dropout"
256 |   bottom: "fc7"
257 |   top: "fc7"
258 |   dropout_param {
259 |     dropout_ratio: 0.5
260 |   }
261 | }
262 | layer {
263 |   name: "cls_score"
264 |   type: "InnerProduct"
265 |   bottom: "fc7"
266 |   top: "cls_score"
267 |   param {
268 |     lr_mult: 1
269 |     decay_mult: 1
270 |   }
271 |   param {
272 |     lr_mult: 2
273 |     decay_mult: 0
274 |   }
275 |   inner_product_param {
276 |     num_output: 21
277 |     weight_filler {
278 |       type: "gaussian"
279 |       std: 0.01
280 |     }
281 |     bias_filler {
282 |       type: "constant"
283 |       value: 0
284 |     }
285 |   }
286 | }
287 | layer {
288 |   name: "cls_prob"
289 |   type: "Softmax"
290 |   bottom: "cls_score"
291 |   top: "cls_prob"
292 | }
293 | 


--------------------------------------------------------------------------------
/models/VGG_CNN_M_1024/no_bbox_reg/train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_CNN_M_1024"
  2 | layer {
  3 |   name: 'data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'rois'
  7 |   top: 'labels'
  8 |   python_param {
  9 |     module: 'roi_data_layer.layer'
 10 |     layer: 'RoIDataLayer'
 11 |     param_str: "'num_classes': 21"
 12 |   }
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   param {
 20 |     lr_mult: 0
 21 |     decay_mult: 0
 22 |   }
 23 |   param {
 24 |     lr_mult: 0
 25 |     decay_mult: 0
 26 |   }
 27 |   convolution_param {
 28 |     num_output: 96
 29 |     kernel_size: 7
 30 |     stride: 2
 31 |   }
 32 | }
 33 | layer {
 34 |   name: "relu1"
 35 |   type: "ReLU"
 36 |   bottom: "conv1"
 37 |   top: "conv1"
 38 | }
 39 | layer {
 40 |   name: "norm1"
 41 |   type: "LRN"
 42 |   bottom: "conv1"
 43 |   top: "norm1"
 44 |   lrn_param {
 45 |     local_size: 5
 46 |     alpha: 0.0005
 47 |     beta: 0.75
 48 |     k: 2
 49 |   }
 50 | }
 51 | layer {
 52 |   name: "pool1"
 53 |   type: "Pooling"
 54 |   bottom: "norm1"
 55 |   top: "pool1"
 56 |   pooling_param {
 57 |     pool: MAX
 58 |     kernel_size: 3
 59 |     stride: 2
 60 |   }
 61 | }
 62 | layer {
 63 |   name: "conv2"
 64 |   type: "Convolution"
 65 |   bottom: "pool1"
 66 |   top: "conv2"
 67 |   param {
 68 |     lr_mult: 1
 69 |     decay_mult: 1
 70 |   }
 71 |   param {
 72 |     lr_mult: 2
 73 |     decay_mult: 0
 74 |   }
 75 |   convolution_param {
 76 |     num_output: 256
 77 |     pad: 1
 78 |     kernel_size: 5
 79 |     stride: 2
 80 |   }
 81 | }
 82 | layer {
 83 |   name: "relu2"
 84 |   type: "ReLU"
 85 |   bottom: "conv2"
 86 |   top: "conv2"
 87 | }
 88 | layer {
 89 |   name: "norm2"
 90 |   type: "LRN"
 91 |   bottom: "conv2"
 92 |   top: "norm2"
 93 |   lrn_param {
 94 |     local_size: 5
 95 |     alpha: 0.0005
 96 |     beta: 0.75
 97 |     k: 2
 98 |   }
 99 | }
100 | layer {
101 |   name: "pool2"
102 |   type: "Pooling"
103 |   bottom: "norm2"
104 |   top: "pool2"
105 |   pooling_param {
106 |     pool: MAX
107 |     kernel_size: 3
108 |     stride: 2
109 |   }
110 | }
111 | layer {
112 |   name: "conv3"
113 |   type: "Convolution"
114 |   bottom: "pool2"
115 |   top: "conv3"
116 |   param {
117 |     lr_mult: 1
118 |     decay_mult: 1
119 |   }
120 |   param {
121 |     lr_mult: 2
122 |     decay_mult: 0
123 |   }
124 |   convolution_param {
125 |     num_output: 512
126 |     pad: 1
127 |     kernel_size: 3
128 |   }
129 | }
130 | layer {
131 |   name: "relu3"
132 |   type: "ReLU"
133 |   bottom: "conv3"
134 |   top: "conv3"
135 | }
136 | layer {
137 |   name: "conv4"
138 |   type: "Convolution"
139 |   bottom: "conv3"
140 |   top: "conv4"
141 |   param {
142 |     lr_mult: 1
143 |     decay_mult: 1
144 |   }
145 |   param {
146 |     lr_mult: 2
147 |     decay_mult: 0
148 |   }
149 |   convolution_param {
150 |     num_output: 512
151 |     pad: 1
152 |     kernel_size: 3
153 |   }
154 | }
155 | layer {
156 |   name: "relu4"
157 |   type: "ReLU"
158 |   bottom: "conv4"
159 |   top: "conv4"
160 | }
161 | layer {
162 |   name: "conv5"
163 |   type: "Convolution"
164 |   bottom: "conv4"
165 |   top: "conv5"
166 |   param {
167 |     lr_mult: 1
168 |     decay_mult: 1
169 |   }
170 |   param {
171 |     lr_mult: 2
172 |     decay_mult: 0
173 |   }
174 |   convolution_param {
175 |     num_output: 512
176 |     pad: 1
177 |     kernel_size: 3
178 |   }
179 | }
180 | layer {
181 |   name: "relu5"
182 |   type: "ReLU"
183 |   bottom: "conv5"
184 |   top: "conv5"
185 | }
186 | layer {
187 |   name: "roi_pool5"
188 |   type: "ROIPooling"
189 |   bottom: "conv5"
190 |   bottom: "rois"
191 |   top: "pool5"
192 |   roi_pooling_param {
193 |     pooled_w: 6
194 |     pooled_h: 6
195 |     spatial_scale: 0.0625 # 1/16
196 |   }
197 | }
198 | layer {
199 |   name: "fc6"
200 |   type: "InnerProduct"
201 |   bottom: "pool5"
202 |   top: "fc6"
203 |   param {
204 |     lr_mult: 1
205 |     decay_mult: 1
206 |   }
207 |   param {
208 |     lr_mult: 2
209 |     decay_mult: 0
210 |   }
211 |   inner_product_param {
212 |     num_output: 4096
213 |   }
214 | }
215 | layer {
216 |   name: "relu6"
217 |   type: "ReLU"
218 |   bottom: "fc6"
219 |   top: "fc6"
220 | }
221 | layer {
222 |   name: "drop6"
223 |   type: "Dropout"
224 |   bottom: "fc6"
225 |   top: "fc6"
226 |   dropout_param {
227 |     dropout_ratio: 0.5
228 |   }
229 | }
230 | layer {
231 |   name: "fc7"
232 |   type: "InnerProduct"
233 |   bottom: "fc6"
234 |   top: "fc7"
235 |   param {
236 |     lr_mult: 1
237 |     decay_mult: 1
238 |   }
239 |   param {
240 |     lr_mult: 2
241 |     decay_mult: 0
242 |   }
243 |   inner_product_param {
244 |     num_output: 1024
245 |   }
246 | }
247 | layer {
248 |   name: "relu7"
249 |   type: "ReLU"
250 |   bottom: "fc7"
251 |   top: "fc7"
252 | }
253 | layer {
254 |   name: "drop7"
255 |   type: "Dropout"
256 |   bottom: "fc7"
257 |   top: "fc7"
258 |   dropout_param {
259 |     dropout_ratio: 0.5
260 |   }
261 | }
262 | layer {
263 |   name: "cls_score"
264 |   type: "InnerProduct"
265 |   bottom: "fc7"
266 |   top: "cls_score"
267 |   param {
268 |     lr_mult: 1
269 |     decay_mult: 1
270 |   }
271 |   param {
272 |     lr_mult: 2
273 |     decay_mult: 0
274 |   }
275 |   inner_product_param {
276 |     num_output: 21
277 |     weight_filler {
278 |       type: "gaussian"
279 |       std: 0.01
280 |     }
281 |     bias_filler {
282 |       type: "constant"
283 |       value: 0
284 |     }
285 |   }
286 | }
287 | layer {
288 |   name: "loss_cls"
289 |   type: "SoftmaxWithLoss"
290 |   bottom: "cls_score"
291 |   bottom: "labels"
292 |   top: "loss_cls"
293 |   loss_weight: 1
294 | }
295 | 


--------------------------------------------------------------------------------
/models/VGG_CNN_M_1024/piecewise/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/VGG_CNN_M_1024/piecewise/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | # function
12 | snapshot: 0
13 | # We still use the snapshot prefix, though
14 | snapshot_prefix: "vgg_cnn_m_1024_fast_rcnn"
15 | #debug_info: true
16 | 


--------------------------------------------------------------------------------
/models/VGG_CNN_M_1024/piecewise/train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_CNN_M_1024"
  2 | layer {
  3 |   name: 'data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'rois'
  7 |   top: 'labels'
  8 |   top: 'bbox_targets'
  9 |   top: 'bbox_loss_weights'
 10 |   python_param {
 11 |     module: 'roi_data_layer.layer'
 12 |     layer: 'RoIDataLayer'
 13 |     param_str: "'num_classes': 21"
 14 |   }
 15 | }
 16 | layer {
 17 |   name: "conv1"
 18 |   type: "Convolution"
 19 |   bottom: "data"
 20 |   top: "conv1"
 21 |   param {
 22 |     lr_mult: 0
 23 |     decay_mult: 0
 24 |   }
 25 |   param {
 26 |     lr_mult: 0
 27 |     decay_mult: 0
 28 |   }
 29 |   convolution_param {
 30 |     num_output: 96
 31 |     kernel_size: 7
 32 |     stride: 2
 33 |   }
 34 | }
 35 | layer {
 36 |   name: "relu1"
 37 |   type: "ReLU"
 38 |   bottom: "conv1"
 39 |   top: "conv1"
 40 | }
 41 | layer {
 42 |   name: "norm1"
 43 |   type: "LRN"
 44 |   bottom: "conv1"
 45 |   top: "norm1"
 46 |   lrn_param {
 47 |     local_size: 5
 48 |     alpha: 0.0005
 49 |     beta: 0.75
 50 |     k: 2
 51 |   }
 52 | }
 53 | layer {
 54 |   name: "pool1"
 55 |   type: "Pooling"
 56 |   bottom: "norm1"
 57 |   top: "pool1"
 58 |   pooling_param {
 59 |     pool: MAX
 60 |     kernel_size: 3
 61 |     stride: 2
 62 |   }
 63 | }
 64 | layer {
 65 |   name: "conv2"
 66 |   type: "Convolution"
 67 |   bottom: "pool1"
 68 |   top: "conv2"
 69 |   param {
 70 |     lr_mult: 0
 71 |     decay_mult: 0
 72 |   }
 73 |   param {
 74 |     lr_mult: 0
 75 |     decay_mult: 0
 76 |   }
 77 |   convolution_param {
 78 |     num_output: 256
 79 |     pad: 1
 80 |     kernel_size: 5
 81 |     stride: 2
 82 |   }
 83 | }
 84 | layer {
 85 |   name: "relu2"
 86 |   type: "ReLU"
 87 |   bottom: "conv2"
 88 |   top: "conv2"
 89 | }
 90 | layer {
 91 |   name: "norm2"
 92 |   type: "LRN"
 93 |   bottom: "conv2"
 94 |   top: "norm2"
 95 |   lrn_param {
 96 |     local_size: 5
 97 |     alpha: 0.0005
 98 |     beta: 0.75
 99 |     k: 2
100 |   }
101 | }
102 | layer {
103 |   name: "pool2"
104 |   type: "Pooling"
105 |   bottom: "norm2"
106 |   top: "pool2"
107 |   pooling_param {
108 |     pool: MAX
109 |     kernel_size: 3
110 |     stride: 2
111 |   }
112 | }
113 | layer {
114 |   name: "conv3"
115 |   type: "Convolution"
116 |   bottom: "pool2"
117 |   top: "conv3"
118 |   param {
119 |     lr_mult: 0
120 |     decay_mult: 0
121 |   }
122 |   param {
123 |     lr_mult: 0
124 |     decay_mult: 0
125 |   }
126 |   convolution_param {
127 |     num_output: 512
128 |     pad: 1
129 |     kernel_size: 3
130 |   }
131 | }
132 | layer {
133 |   name: "relu3"
134 |   type: "ReLU"
135 |   bottom: "conv3"
136 |   top: "conv3"
137 | }
138 | layer {
139 |   name: "conv4"
140 |   type: "Convolution"
141 |   bottom: "conv3"
142 |   top: "conv4"
143 |   param {
144 |     lr_mult: 0
145 |     decay_mult: 0
146 |   }
147 |   param {
148 |     lr_mult: 0
149 |     decay_mult: 0
150 |   }
151 |   convolution_param {
152 |     num_output: 512
153 |     pad: 1
154 |     kernel_size: 3
155 |   }
156 | }
157 | layer {
158 |   name: "relu4"
159 |   type: "ReLU"
160 |   bottom: "conv4"
161 |   top: "conv4"
162 | }
163 | layer {
164 |   name: "conv5"
165 |   type: "Convolution"
166 |   bottom: "conv4"
167 |   top: "conv5"
168 |   param {
169 |     lr_mult: 0
170 |     decay_mult: 0
171 |   }
172 |   param {
173 |     lr_mult: 0
174 |     decay_mult: 0
175 |   }
176 |   convolution_param {
177 |     num_output: 512
178 |     pad: 1
179 |     kernel_size: 3
180 |   }
181 | }
182 | layer {
183 |   name: "relu5"
184 |   type: "ReLU"
185 |   bottom: "conv5"
186 |   top: "conv5"
187 | }
188 | layer {
189 |   name: "roi_pool5"
190 |   type: "ROIPooling"
191 |   bottom: "conv5"
192 |   bottom: "rois"
193 |   top: "pool5"
194 |   roi_pooling_param {
195 |     pooled_w: 6
196 |     pooled_h: 6
197 |     spatial_scale: 0.0625 # 1/16
198 |   }
199 | }
200 | layer {
201 |   name: "fc6"
202 |   type: "InnerProduct"
203 |   bottom: "pool5"
204 |   top: "fc6"
205 |   param {
206 |     lr_mult: 0
207 |     decay_mult: 0
208 |   }
209 |   param {
210 |     lr_mult: 0
211 |     decay_mult: 0
212 |   }
213 |   inner_product_param {
214 |     num_output: 4096
215 |   }
216 | }
217 | layer {
218 |   name: "relu6"
219 |   type: "ReLU"
220 |   bottom: "fc6"
221 |   top: "fc6"
222 | }
223 | layer {
224 |   name: "drop6"
225 |   type: "Dropout"
226 |   bottom: "fc6"
227 |   top: "fc6"
228 |   dropout_param {
229 |     dropout_ratio: 0.5
230 |   }
231 | }
232 | layer {
233 |   name: "fc7"
234 |   type: "InnerProduct"
235 |   bottom: "fc6"
236 |   top: "fc7"
237 |   param {
238 |     lr_mult: 0
239 |     decay_mult: 0
240 |   }
241 |   param {
242 |     lr_mult: 0
243 |     decay_mult: 0
244 |   }
245 |   inner_product_param {
246 |     num_output: 1024
247 |   }
248 | }
249 | layer {
250 |   name: "relu7"
251 |   type: "ReLU"
252 |   bottom: "fc7"
253 |   top: "fc7"
254 | }
255 | layer {
256 |   name: "drop7"
257 |   type: "Dropout"
258 |   bottom: "fc7"
259 |   top: "fc7"
260 |   dropout_param {
261 |     dropout_ratio: 0.5
262 |   }
263 | }
264 | layer {
265 |   name: "cls_score"
266 |   type: "InnerProduct"
267 |   bottom: "fc7"
268 |   top: "cls_score"
269 |   param {
270 |     lr_mult: 0
271 |     decay_mult: 0
272 |   }
273 |   param {
274 |     lr_mult: 0
275 |     decay_mult: 0
276 |   }
277 |   inner_product_param {
278 |     num_output: 21
279 |     weight_filler {
280 |       type: "gaussian"
281 |       std: 0.01
282 |     }
283 |     bias_filler {
284 |       type: "constant"
285 |       value: 0
286 |     }
287 |   }
288 | }
289 | layer {
290 |   name: "bbox_pred"
291 |   type: "InnerProduct"
292 |   bottom: "fc7"
293 |   top: "bbox_pred"
294 |   param {
295 |     lr_mult: 1
296 |     decay_mult: 1
297 |   }
298 |   param {
299 |     lr_mult: 2
300 |     decay_mult: 0
301 |   }
302 |   inner_product_param {
303 |     num_output: 84
304 |     weight_filler {
305 |       type: "gaussian"
306 |       std: 0.001
307 |     }
308 |     bias_filler {
309 |       type: "constant"
310 |       value: 0
311 |     }
312 |   }
313 | }
314 | layer {
315 |   name: "loss_cls"
316 |   type: "SoftmaxWithLoss"
317 |   bottom: "cls_score"
318 |   bottom: "labels"
319 |   top: "loss_cls"
320 |   loss_weight: 0
321 | }
322 | layer {
323 |   name: "loss_bbox"
324 |   type: "SmoothL1Loss"
325 |   bottom: "bbox_pred"
326 |   bottom: "bbox_targets"
327 |   bottom: "bbox_loss_weights"
328 |   top: "loss_bbox"
329 |   loss_weight: 1
330 | }
331 | 


--------------------------------------------------------------------------------
/models/VGG_CNN_M_1024/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/VGG_CNN_M_1024/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | # function
12 | snapshot: 0
13 | # We still use the snapshot prefix, though
14 | snapshot_prefix: "vgg_cnn_m_1024_fast_rcnn"
15 | #debug_info: true
16 | 


--------------------------------------------------------------------------------
/models/VGG_CNN_M_1024/test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_CNN_M_1024"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 224
  7 |   dim: 224
  8 | }
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 1 # to be changed on-the-fly to num ROIs
 12 |   dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   param {
 20 |     lr_mult: 0
 21 |     decay_mult: 0
 22 |   }
 23 |   param {
 24 |     lr_mult: 0
 25 |     decay_mult: 0
 26 |   }
 27 |   convolution_param {
 28 |     num_output: 96
 29 |     kernel_size: 7
 30 |     stride: 2
 31 |   }
 32 | }
 33 | layer {
 34 |   name: "relu1"
 35 |   type: "ReLU"
 36 |   bottom: "conv1"
 37 |   top: "conv1"
 38 | }
 39 | layer {
 40 |   name: "norm1"
 41 |   type: "LRN"
 42 |   bottom: "conv1"
 43 |   top: "norm1"
 44 |   lrn_param {
 45 |     local_size: 5
 46 |     alpha: 0.0005
 47 |     beta: 0.75
 48 |     k: 2
 49 |   }
 50 | }
 51 | layer {
 52 |   name: "pool1"
 53 |   type: "Pooling"
 54 |   bottom: "norm1"
 55 |   top: "pool1"
 56 |   pooling_param {
 57 |     pool: MAX
 58 |     kernel_size: 3
 59 |     stride: 2
 60 |   }
 61 | }
 62 | layer {
 63 |   name: "conv2"
 64 |   type: "Convolution"
 65 |   bottom: "pool1"
 66 |   top: "conv2"
 67 |   param {
 68 |     lr_mult: 1
 69 |     decay_mult: 1
 70 |   }
 71 |   param {
 72 |     lr_mult: 2
 73 |     decay_mult: 0
 74 |   }
 75 |   convolution_param {
 76 |     num_output: 256
 77 |     pad: 1
 78 |     kernel_size: 5
 79 |     stride: 2
 80 |   }
 81 | }
 82 | layer {
 83 |   name: "relu2"
 84 |   type: "ReLU"
 85 |   bottom: "conv2"
 86 |   top: "conv2"
 87 | }
 88 | layer {
 89 |   name: "norm2"
 90 |   type: "LRN"
 91 |   bottom: "conv2"
 92 |   top: "norm2"
 93 |   lrn_param {
 94 |     local_size: 5
 95 |     alpha: 0.0005
 96 |     beta: 0.75
 97 |     k: 2
 98 |   }
 99 | }
100 | layer {
101 |   name: "pool2"
102 |   type: "Pooling"
103 |   bottom: "norm2"
104 |   top: "pool2"
105 |   pooling_param {
106 |     pool: MAX
107 |     kernel_size: 3
108 |     stride: 2
109 |   }
110 | }
111 | layer {
112 |   name: "conv3"
113 |   type: "Convolution"
114 |   bottom: "pool2"
115 |   top: "conv3"
116 |   param {
117 |     lr_mult: 1
118 |     decay_mult: 1
119 |   }
120 |   param {
121 |     lr_mult: 2
122 |     decay_mult: 0
123 |   }
124 |   convolution_param {
125 |     num_output: 512
126 |     pad: 1
127 |     kernel_size: 3
128 |   }
129 | }
130 | layer {
131 |   name: "relu3"
132 |   type: "ReLU"
133 |   bottom: "conv3"
134 |   top: "conv3"
135 | }
136 | layer {
137 |   name: "conv4"
138 |   type: "Convolution"
139 |   bottom: "conv3"
140 |   top: "conv4"
141 |   param {
142 |     lr_mult: 1
143 |     decay_mult: 1
144 |   }
145 |   param {
146 |     lr_mult: 2
147 |     decay_mult: 0
148 |   }
149 |   convolution_param {
150 |     num_output: 512
151 |     pad: 1
152 |     kernel_size: 3
153 |   }
154 | }
155 | layer {
156 |   name: "relu4"
157 |   type: "ReLU"
158 |   bottom: "conv4"
159 |   top: "conv4"
160 | }
161 | layer {
162 |   name: "conv5"
163 |   type: "Convolution"
164 |   bottom: "conv4"
165 |   top: "conv5"
166 |   param {
167 |     lr_mult: 1
168 |     decay_mult: 1
169 |   }
170 |   param {
171 |     lr_mult: 2
172 |     decay_mult: 0
173 |   }
174 |   convolution_param {
175 |     num_output: 512
176 |     pad: 1
177 |     kernel_size: 3
178 |   }
179 | }
180 | layer {
181 |   name: "relu5"
182 |   type: "ReLU"
183 |   bottom: "conv5"
184 |   top: "conv5"
185 | }
186 | layer {
187 |   name: "roi_pool5"
188 |   type: "ROIPooling"
189 |   bottom: "conv5"
190 |   bottom: "rois"
191 |   top: "pool5"
192 |   roi_pooling_param {
193 |     pooled_w: 6
194 |     pooled_h: 6
195 |     spatial_scale: 0.0625 # 1/16
196 |   }
197 | }
198 | layer {
199 |   name: "fc6"
200 |   type: "InnerProduct"
201 |   bottom: "pool5"
202 |   top: "fc6"
203 |   param {
204 |     lr_mult: 1
205 |     decay_mult: 1
206 |   }
207 |   param {
208 |     lr_mult: 2
209 |     decay_mult: 0
210 |   }
211 |   inner_product_param {
212 |     num_output: 4096
213 |   }
214 | }
215 | layer {
216 |   name: "relu6"
217 |   type: "ReLU"
218 |   bottom: "fc6"
219 |   top: "fc6"
220 | }
221 | layer {
222 |   name: "drop6"
223 |   type: "Dropout"
224 |   bottom: "fc6"
225 |   top: "fc6"
226 |   dropout_param {
227 |     dropout_ratio: 0.5
228 |   }
229 | }
230 | layer {
231 |   name: "fc7"
232 |   type: "InnerProduct"
233 |   bottom: "fc6"
234 |   top: "fc7"
235 |   param {
236 |     lr_mult: 1
237 |     decay_mult: 1
238 |   }
239 |   param {
240 |     lr_mult: 2
241 |     decay_mult: 0
242 |   }
243 |   inner_product_param {
244 |     num_output: 1024
245 |   }
246 | }
247 | layer {
248 |   name: "relu7"
249 |   type: "ReLU"
250 |   bottom: "fc7"
251 |   top: "fc7"
252 | }
253 | layer {
254 |   name: "drop7"
255 |   type: "Dropout"
256 |   bottom: "fc7"
257 |   top: "fc7"
258 |   dropout_param {
259 |     dropout_ratio: 0.5
260 |   }
261 | }
262 | layer {
263 |   name: "cls_score"
264 |   type: "InnerProduct"
265 |   bottom: "fc7"
266 |   top: "cls_score"
267 |   param {
268 |     lr_mult: 1
269 |     decay_mult: 1
270 |   }
271 |   param {
272 |     lr_mult: 2
273 |     decay_mult: 0
274 |   }
275 |   inner_product_param {
276 |     num_output: 21
277 |     weight_filler {
278 |       type: "gaussian"
279 |       std: 0.01
280 |     }
281 |     bias_filler {
282 |       type: "constant"
283 |       value: 0
284 |     }
285 |   }
286 | }
287 | layer {
288 |   name: "bbox_pred"
289 |   type: "InnerProduct"
290 |   bottom: "fc7"
291 |   top: "bbox_pred"
292 |   param {
293 |     lr_mult: 1
294 |     decay_mult: 1
295 |   }
296 |   param {
297 |     lr_mult: 2
298 |     decay_mult: 0
299 |   }
300 |   inner_product_param {
301 |     num_output: 84
302 |     weight_filler {
303 |       type: "gaussian"
304 |       std: 0.001
305 |     }
306 |     bias_filler {
307 |       type: "constant"
308 |       value: 0
309 |     }
310 |   }
311 | }
312 | layer {
313 |   name: "cls_prob"
314 |   type: "Softmax"
315 |   bottom: "cls_score"
316 |   top: "cls_prob"
317 | }
318 | 


--------------------------------------------------------------------------------
/models/VGG_CNN_M_1024/train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_CNN_M_1024"
  2 | layer {
  3 |   name: 'data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'rois'
  7 |   top: 'labels'
  8 |   top: 'bbox_targets'
  9 |   top: 'bbox_loss_weights'
 10 |   python_param {
 11 |     module: 'roi_data_layer.layer'
 12 |     layer: 'RoIDataLayer'
 13 |     param_str: "'num_classes': 21"
 14 |   }
 15 | }
 16 | layer {
 17 |   name: "conv1"
 18 |   type: "Convolution"
 19 |   bottom: "data"
 20 |   top: "conv1"
 21 |   param {
 22 |     lr_mult: 0
 23 |     decay_mult: 0
 24 |   }
 25 |   param {
 26 |     lr_mult: 0
 27 |     decay_mult: 0
 28 |   }
 29 |   convolution_param {
 30 |     num_output: 96
 31 |     kernel_size: 7
 32 |     stride: 2
 33 |   }
 34 | }
 35 | layer {
 36 |   name: "relu1"
 37 |   type: "ReLU"
 38 |   bottom: "conv1"
 39 |   top: "conv1"
 40 | }
 41 | layer {
 42 |   name: "norm1"
 43 |   type: "LRN"
 44 |   bottom: "conv1"
 45 |   top: "norm1"
 46 |   lrn_param {
 47 |     local_size: 5
 48 |     alpha: 0.0005
 49 |     beta: 0.75
 50 |     k: 2
 51 |   }
 52 | }
 53 | layer {
 54 |   name: "pool1"
 55 |   type: "Pooling"
 56 |   bottom: "norm1"
 57 |   top: "pool1"
 58 |   pooling_param {
 59 |     pool: MAX
 60 |     kernel_size: 3
 61 |     stride: 2
 62 |   }
 63 | }
 64 | layer {
 65 |   name: "conv2"
 66 |   type: "Convolution"
 67 |   bottom: "pool1"
 68 |   top: "conv2"
 69 |   param {
 70 |     lr_mult: 1
 71 |     decay_mult: 1
 72 |   }
 73 |   param {
 74 |     lr_mult: 2
 75 |     decay_mult: 0
 76 |   }
 77 |   convolution_param {
 78 |     num_output: 256
 79 |     pad: 1
 80 |     kernel_size: 5
 81 |     stride: 2
 82 |   }
 83 | }
 84 | layer {
 85 |   name: "relu2"
 86 |   type: "ReLU"
 87 |   bottom: "conv2"
 88 |   top: "conv2"
 89 | }
 90 | layer {
 91 |   name: "norm2"
 92 |   type: "LRN"
 93 |   bottom: "conv2"
 94 |   top: "norm2"
 95 |   lrn_param {
 96 |     local_size: 5
 97 |     alpha: 0.0005
 98 |     beta: 0.75
 99 |     k: 2
100 |   }
101 | }
102 | layer {
103 |   name: "pool2"
104 |   type: "Pooling"
105 |   bottom: "norm2"
106 |   top: "pool2"
107 |   pooling_param {
108 |     pool: MAX
109 |     kernel_size: 3
110 |     stride: 2
111 |   }
112 | }
113 | layer {
114 |   name: "conv3"
115 |   type: "Convolution"
116 |   bottom: "pool2"
117 |   top: "conv3"
118 |   param {
119 |     lr_mult: 1
120 |     decay_mult: 1
121 |   }
122 |   param {
123 |     lr_mult: 2
124 |     decay_mult: 0
125 |   }
126 |   convolution_param {
127 |     num_output: 512
128 |     pad: 1
129 |     kernel_size: 3
130 |   }
131 | }
132 | layer {
133 |   name: "relu3"
134 |   type: "ReLU"
135 |   bottom: "conv3"
136 |   top: "conv3"
137 | }
138 | layer {
139 |   name: "conv4"
140 |   type: "Convolution"
141 |   bottom: "conv3"
142 |   top: "conv4"
143 |   param {
144 |     lr_mult: 1
145 |     decay_mult: 1
146 |   }
147 |   param {
148 |     lr_mult: 2
149 |     decay_mult: 0
150 |   }
151 |   convolution_param {
152 |     num_output: 512
153 |     pad: 1
154 |     kernel_size: 3
155 |   }
156 | }
157 | layer {
158 |   name: "relu4"
159 |   type: "ReLU"
160 |   bottom: "conv4"
161 |   top: "conv4"
162 | }
163 | layer {
164 |   name: "conv5"
165 |   type: "Convolution"
166 |   bottom: "conv4"
167 |   top: "conv5"
168 |   param {
169 |     lr_mult: 1
170 |     decay_mult: 1
171 |   }
172 |   param {
173 |     lr_mult: 2
174 |     decay_mult: 0
175 |   }
176 |   convolution_param {
177 |     num_output: 512
178 |     pad: 1
179 |     kernel_size: 3
180 |   }
181 | }
182 | layer {
183 |   name: "relu5"
184 |   type: "ReLU"
185 |   bottom: "conv5"
186 |   top: "conv5"
187 | }
188 | layer {
189 |   name: "roi_pool5"
190 |   type: "ROIPooling"
191 |   bottom: "conv5"
192 |   bottom: "rois"
193 |   top: "pool5"
194 |   roi_pooling_param {
195 |     pooled_w: 6
196 |     pooled_h: 6
197 |     spatial_scale: 0.0625 # 1/16
198 |   }
199 | }
200 | layer {
201 |   name: "fc6"
202 |   type: "InnerProduct"
203 |   bottom: "pool5"
204 |   top: "fc6"
205 |   param {
206 |     lr_mult: 1
207 |     decay_mult: 1
208 |   }
209 |   param {
210 |     lr_mult: 2
211 |     decay_mult: 0
212 |   }
213 |   inner_product_param {
214 |     num_output: 4096
215 |   }
216 | }
217 | layer {
218 |   name: "relu6"
219 |   type: "ReLU"
220 |   bottom: "fc6"
221 |   top: "fc6"
222 | }
223 | layer {
224 |   name: "drop6"
225 |   type: "Dropout"
226 |   bottom: "fc6"
227 |   top: "fc6"
228 |   dropout_param {
229 |     dropout_ratio: 0.5
230 |   }
231 | }
232 | layer {
233 |   name: "fc7"
234 |   type: "InnerProduct"
235 |   bottom: "fc6"
236 |   top: "fc7"
237 |   param {
238 |     lr_mult: 1
239 |     decay_mult: 1
240 |   }
241 |   param {
242 |     lr_mult: 2
243 |     decay_mult: 0
244 |   }
245 |   inner_product_param {
246 |     num_output: 1024
247 |   }
248 | }
249 | layer {
250 |   name: "relu7"
251 |   type: "ReLU"
252 |   bottom: "fc7"
253 |   top: "fc7"
254 | }
255 | layer {
256 |   name: "drop7"
257 |   type: "Dropout"
258 |   bottom: "fc7"
259 |   top: "fc7"
260 |   dropout_param {
261 |     dropout_ratio: 0.5
262 |   }
263 | }
264 | layer {
265 |   name: "cls_score"
266 |   type: "InnerProduct"
267 |   bottom: "fc7"
268 |   top: "cls_score"
269 |   param {
270 |     lr_mult: 1
271 |     decay_mult: 1
272 |   }
273 |   param {
274 |     lr_mult: 2
275 |     decay_mult: 0
276 |   }
277 |   inner_product_param {
278 |     num_output: 21
279 |     weight_filler {
280 |       type: "gaussian"
281 |       std: 0.01
282 |     }
283 |     bias_filler {
284 |       type: "constant"
285 |       value: 0
286 |     }
287 |   }
288 | }
289 | layer {
290 |   name: "bbox_pred"
291 |   type: "InnerProduct"
292 |   bottom: "fc7"
293 |   top: "bbox_pred"
294 |   param {
295 |     lr_mult: 1
296 |     decay_mult: 1
297 |   }
298 |   param {
299 |     lr_mult: 2
300 |     decay_mult: 0
301 |   }
302 |   inner_product_param {
303 |     num_output: 84
304 |     weight_filler {
305 |       type: "gaussian"
306 |       std: 0.001
307 |     }
308 |     bias_filler {
309 |       type: "constant"
310 |       value: 0
311 |     }
312 |   }
313 | }
314 | layer {
315 |   name: "loss_cls"
316 |   type: "SoftmaxWithLoss"
317 |   bottom: "cls_score"
318 |   bottom: "labels"
319 |   top: "loss_cls"
320 |   loss_weight: 1
321 | }
322 | layer {
323 |   name: "loss_bbox"
324 |   type: "SmoothL1Loss"
325 |   bottom: "bbox_pred"
326 |   bottom: "bbox_targets"
327 |   bottom: "bbox_loss_weights"
328 |   top: "loss_bbox"
329 |   loss_weight: 1
330 | }
331 | 


--------------------------------------------------------------------------------
/output/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | 


--------------------------------------------------------------------------------
/output/README.md:
--------------------------------------------------------------------------------
 1 | Artifacts generated by the scripts in `tools` are written in this directory.
 2 | 
 3 | Trained Fast R-CNN networks are saved under:
 4 | 
 5 | ```
 6 | output/<experiment directory>/<dataset name>/
 7 | ```
 8 | 
 9 | Test outputs are saved under:
10 | 
11 | ```
12 | output/<experiment directory>/<dataset name>/<network snapshot name>/
13 | ```
14 | 


--------------------------------------------------------------------------------
/todo.txt:
--------------------------------------------------------------------------------
1 | - ImageNet ILSVRC detection dataset
2 | - COCO bounding-box detection
3 | - Port PASCAL evaluation code from Matlab to Python
4 | 


--------------------------------------------------------------------------------
/tools/README.md:
--------------------------------------------------------------------------------
1 | Tools for training, testing, and compressing Fast R-CNN networks.
2 | 


--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Set up paths for Fast R-CNN."""
 9 | 
10 | import os.path as osp
11 | import sys
12 | 
13 | def add_path(path):
14 |     if path not in sys.path:
15 |         sys.path.insert(0, path)
16 | 
17 | this_dir = osp.dirname(__file__)
18 | 
19 | # Add caffe to PYTHONPATH
20 | caffe_path = osp.join(this_dir, '..', 'caffe-fast-rcnn', 'python')
21 | add_path(caffe_path)
22 | 
23 | # Add lib to PYTHONPATH
24 | lib_path = osp.join(this_dir, '..', 'lib')
25 | add_path(lib_path)
26 | 


--------------------------------------------------------------------------------
/tools/compress_net.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Fast R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | """Compress a Fast R-CNN network using truncated SVD."""
 11 | 
 12 | import _init_paths
 13 | import caffe
 14 | import argparse
 15 | import numpy as np
 16 | import os, sys
 17 | 
 18 | def parse_args():
 19 |     """Parse input arguments."""
 20 |     parser = argparse.ArgumentParser(description='Compress a Fast R-CNN network')
 21 |     parser.add_argument('--def', dest='prototxt',
 22 |                         help='prototxt file defining the uncompressed network; '
 23 |                         'e.g., models/VGG16/test.prototxt',
 24 |                         default=None, type=str)
 25 |     parser.add_argument('--def-svd', dest='prototxt_svd',
 26 |                         help='prototxt file defining the SVD compressed network '
 27 |                         'e.g., models/VGG16/compressed/test.prototxt',
 28 |                         default=None, type=str)
 29 |     parser.add_argument('--net', dest='caffemodel',
 30 |                         help='model to compress',
 31 |                         default=None, type=str)
 32 | 
 33 |     if len(sys.argv) == 1:
 34 |         parser.print_help()
 35 |         sys.exit(1)
 36 | 
 37 |     args = parser.parse_args()
 38 |     return args
 39 | 
 40 | def compress_weights(W, l):
 41 |     """Compress the weight matrix W of an inner product (fully connected) layer
 42 |     using truncated SVD.
 43 | 
 44 |     Parameters:
 45 |     W: N x M weights matrix
 46 |     l: number of singular values to retain
 47 | 
 48 |     Returns:
 49 |     Ul, L: matrices such that W \approx Ul*L
 50 |     """
 51 | 
 52 |     # numpy doesn't seem to have a fast truncated SVD algorithm...
 53 |     # this could be faster
 54 |     U, s, V = np.linalg.svd(W, full_matrices=False)
 55 | 
 56 |     Ul = U[:, :l]
 57 |     sl = s[:l]
 58 |     Vl = V[:l, :]
 59 | 
 60 |     L = np.dot(np.diag(sl), Vl)
 61 |     return Ul, L
 62 | 
 63 | def main():
 64 |     args = parse_args()
 65 | 
 66 |     net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
 67 | 
 68 |     net_svd = caffe.Net(args.prototxt_svd, args.caffemodel, caffe.TEST)
 69 | 
 70 |     print('Uncompressed network {} : {}'.format(args.prototxt, args.caffemodel))
 71 |     print('Compressed network prototxt {}'.format(args.prototxt_svd))
 72 | 
 73 |     out = os.path.splitext(os.path.basename(args.caffemodel))[0] + '_svd'
 74 |     out_dir = os.path.dirname(args.caffemodel)
 75 | 
 76 |     # Compress fc6
 77 |     if net_svd.params.has_key('fc6_L'):
 78 |         l_fc6 = net_svd.params['fc6_L'][0].data.shape[0]
 79 |         print('  fc6_L bottleneck size: {}'.format(l_fc6))
 80 | 
 81 |         # uncompressed weights and biases
 82 |         W_fc6 = net.params['fc6'][0].data
 83 |         B_fc6 = net.params['fc6'][1].data
 84 | 
 85 |         print('  compressing fc6...')
 86 |         Ul_fc6, L_fc6 = compress_weights(W_fc6, l_fc6)
 87 | 
 88 |         assert(len(net_svd.params['fc6_L']) == 1)
 89 | 
 90 |         # install compressed matrix factors (and original biases)
 91 |         net_svd.params['fc6_L'][0].data[...] = L_fc6
 92 | 
 93 |         net_svd.params['fc6_U'][0].data[...] = Ul_fc6
 94 |         net_svd.params['fc6_U'][1].data[...] = B_fc6
 95 | 
 96 |         out += '_fc6_{}'.format(l_fc6)
 97 | 
 98 |     # Compress fc7
 99 |     if net_svd.params.has_key('fc7_L'):
100 |         l_fc7 = net_svd.params['fc7_L'][0].data.shape[0]
101 |         print '  fc7_L bottleneck size: {}'.format(l_fc7)
102 | 
103 |         W_fc7 = net.params['fc7'][0].data
104 |         B_fc7 = net.params['fc7'][1].data
105 | 
106 |         print('  compressing fc7...')
107 |         Ul_fc7, L_fc7 = compress_weights(W_fc7, l_fc7)
108 | 
109 |         assert(len(net_svd.params['fc7_L']) == 1)
110 | 
111 |         net_svd.params['fc7_L'][0].data[...] = L_fc7
112 | 
113 |         net_svd.params['fc7_U'][0].data[...] = Ul_fc7
114 |         net_svd.params['fc7_U'][1].data[...] = B_fc7
115 | 
116 |         out += '_fc7_{}'.format(l_fc7)
117 | 
118 |     filename = '{}/{}.caffemodel'.format(out_dir, out)
119 |     net_svd.save(filename)
120 |     print 'Wrote svd model to: {:s}'.format(filename)
121 | 
122 | if __name__ == '__main__':
123 |     main()
124 | 


--------------------------------------------------------------------------------
/tools/demo.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Fast R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | """
 11 | Demo script showing detections in sample images.
 12 | 
 13 | See README.md for installation instructions before running.
 14 | """
 15 | 
 16 | import _init_paths
 17 | from fast_rcnn.config import cfg
 18 | from fast_rcnn.test import im_detect
 19 | from utils.cython_nms import nms
 20 | from utils.timer import Timer
 21 | import matplotlib.pyplot as plt
 22 | import numpy as np
 23 | import scipy.io as sio
 24 | import caffe, os, sys, cv2
 25 | import argparse
 26 | 
 27 | CLASSES = ('__background__',
 28 |            'aeroplane', 'bicycle', 'bird', 'boat',
 29 |            'bottle', 'bus', 'car', 'cat', 'chair',
 30 |            'cow', 'diningtable', 'dog', 'horse',
 31 |            'motorbike', 'person', 'pottedplant',
 32 |            'sheep', 'sofa', 'train', 'tvmonitor')
 33 | 
 34 | NETS = {'vgg16': ('VGG16',
 35 |                   'vgg16_fast_rcnn_iter_40000.caffemodel'),
 36 |         'vgg_cnn_m_1024': ('VGG_CNN_M_1024',
 37 |                            'vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel'),
 38 |         'caffenet': ('CaffeNet',
 39 |                      'caffenet_fast_rcnn_iter_40000.caffemodel')}
 40 | 
 41 | 
 42 | def vis_detections(im, class_name, dets, thresh=0.5):
 43 |     """Draw detected bounding boxes."""
 44 |     inds = np.where(dets[:, -1] >= thresh)[0]
 45 |     if len(inds) == 0:
 46 |         return
 47 | 
 48 |     im = im[:, :, (2, 1, 0)]
 49 |     fig, ax = plt.subplots(figsize=(12, 12))
 50 |     ax.imshow(im, aspect='equal')
 51 |     for i in inds:
 52 |         bbox = dets[i, :4]
 53 |         score = dets[i, -1]
 54 | 
 55 |         ax.add_patch(
 56 |             plt.Rectangle((bbox[0], bbox[1]),
 57 |                           bbox[2] - bbox[0],
 58 |                           bbox[3] - bbox[1], fill=False,
 59 |                           edgecolor='red', linewidth=3.5)
 60 |             )
 61 |         ax.text(bbox[0], bbox[1] - 2,
 62 |                 '{:s} {:.3f}'.format(class_name, score),
 63 |                 bbox=dict(facecolor='blue', alpha=0.5),
 64 |                 fontsize=14, color='white')
 65 | 
 66 |     ax.set_title(('{} detections with '
 67 |                   'p({} | box) >= {:.1f}').format(class_name, class_name,
 68 |                                                   thresh),
 69 |                   fontsize=14)
 70 |     plt.axis('off')
 71 |     plt.tight_layout()
 72 |     plt.draw()
 73 | 
 74 | def demo(net, image_name, classes):
 75 |     """Detect object classes in an image using pre-computed object proposals."""
 76 | 
 77 |     # Load pre-computed Selected Search object proposals
 78 |     box_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo',
 79 |                             image_name + '_boxes.mat')
 80 |     obj_proposals = sio.loadmat(box_file)['boxes']
 81 | 
 82 |     # Load the demo image
 83 |     im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name + '.jpg')
 84 |     im = cv2.imread(im_file)
 85 | 
 86 |     # Detect all object classes and regress object bounds
 87 |     timer = Timer()
 88 |     timer.tic()
 89 |     scores, boxes = im_detect(net, im, obj_proposals)
 90 |     timer.toc()
 91 |     print ('Detection took {:.3f}s for '
 92 |            '{:d} object proposals').format(timer.total_time, boxes.shape[0])
 93 | 
 94 |     # Visualize detections for each class
 95 |     CONF_THRESH = 0.8
 96 |     NMS_THRESH = 0.3
 97 |     for cls in classes:
 98 |         cls_ind = CLASSES.index(cls)
 99 |         cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
100 |         cls_scores = scores[:, cls_ind]
101 |         keep = np.where(cls_scores >= CONF_THRESH)[0]
102 |         cls_boxes = cls_boxes[keep, :]
103 |         cls_scores = cls_scores[keep]
104 |         dets = np.hstack((cls_boxes,
105 |                           cls_scores[:, np.newaxis])).astype(np.float32)
106 |         keep = nms(dets, NMS_THRESH)
107 |         dets = dets[keep, :]
108 |         print 'All {} detections with p({} | box) >= {:.1f}'.format(cls, cls,
109 |                                                                     CONF_THRESH)
110 |         vis_detections(im, cls, dets, thresh=CONF_THRESH)
111 | 
112 | def parse_args():
113 |     """Parse input arguments."""
114 |     parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
115 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
116 |                         default=0, type=int)
117 |     parser.add_argument('--cpu', dest='cpu_mode',
118 |                         help='Use CPU mode (overrides --gpu)',
119 |                         action='store_true')
120 |     parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
121 |                         choices=NETS.keys(), default='vgg16')
122 | 
123 |     args = parser.parse_args()
124 | 
125 |     return args
126 | 
127 | if __name__ == '__main__':
128 |     args = parse_args()
129 | 
130 |     prototxt = os.path.join(cfg.ROOT_DIR, 'models', NETS[args.demo_net][0],
131 |                             'test.prototxt')
132 |     caffemodel = os.path.join(cfg.ROOT_DIR, 'data', 'fast_rcnn_models',
133 |                               NETS[args.demo_net][1])
134 | 
135 |     if not os.path.isfile(caffemodel):
136 |         raise IOError(('{:s} not found.\nDid you run ./data/scripts/'
137 |                        'fetch_fast_rcnn_models.sh?').format(caffemodel))
138 | 
139 |     if args.cpu_mode:
140 |         caffe.set_mode_cpu()
141 |     else:
142 |         caffe.set_mode_gpu()
143 |         caffe.set_device(args.gpu_id)
144 |     net = caffe.Net(prototxt, caffemodel, caffe.TEST)
145 | 
146 |     print '\n\nLoaded network {:s}'.format(caffemodel)
147 | 
148 |     print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
149 |     print 'Demo for data/demo/000004.jpg'
150 |     demo(net, '000004', ('car',))
151 | 
152 |     print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
153 |     print 'Demo for data/demo/001551.jpg'
154 |     demo(net, '001551', ('sofa', 'tvmonitor'))
155 | 
156 |     plt.show()
157 | 


--------------------------------------------------------------------------------
/tools/reval.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Reval = re-eval. Re-evaluate saved detections."""
11 | 
12 | import _init_paths
13 | from fast_rcnn.test import apply_nms
14 | from fast_rcnn.config import cfg
15 | from datasets.factory import get_imdb
16 | import cPickle
17 | import os, sys, argparse
18 | import numpy as np
19 | 
20 | def parse_args():
21 |     """
22 |     Parse input arguments
23 |     """
24 |     parser = argparse.ArgumentParser(description='Re-evaluate results')
25 |     parser.add_argument('output_dir', nargs=1, help='results directory',
26 |                         type=str)
27 |     parser.add_argument('--rerun', dest='rerun',
28 |                         help=('re-run evaluation code '
29 |                               '(otherwise: results are loaded from file)'),
30 |                         action='store_true')
31 |     parser.add_argument('--imdb', dest='imdb_name',
32 |                         help='dataset to re-evaluate',
33 |                         default='voc_2007_test', type=str)
34 |     parser.add_argument('--comp', dest='comp_mode', help='competition mode',
35 |                         action='store_true')
36 | 
37 |     if len(sys.argv) == 1:
38 |         parser.print_help()
39 |         sys.exit(1)
40 | 
41 |     args = parser.parse_args()
42 |     return args
43 | 
44 | 
45 | def from_mats(imdb_name, output_dir):
46 |     import scipy.io as sio
47 | 
48 |     imdb = get_imdb(imdb_name)
49 | 
50 |     aps = []
51 |     for i, cls in enumerate(imdb.classes[1:]):
52 |         mat = sio.loadmat(os.path.join(output_dir, cls + '_pr.mat'))
53 |         ap = mat['ap'][0, 0] * 100
54 |         apAuC = mat['ap_auc'][0, 0] * 100
55 |         print '!!! {} : {:.1f} {:.1f}'.format(cls, ap, apAuC)
56 |         aps.append(ap)
57 | 
58 |     print '~~~~~~~~~~~~~~~~~~~'
59 |     print 'Results (from mat files):'
60 |     for ap in aps:
61 |         print '{:.1f}'.format(ap)
62 |     print '{:.1f}'.format(np.array(aps).mean())
63 |     print '~~~~~~~~~~~~~~~~~~~'
64 | 
65 | 
66 | def from_dets(imdb_name, output_dir, comp_mode):
67 |     imdb = get_imdb(imdb_name)
68 |     imdb.competition_mode(comp_mode)
69 |     with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f:
70 |         dets = cPickle.load(f)
71 | 
72 |     print 'Applying NMS to all detections'
73 |     nms_dets = apply_nms(dets, cfg.TEST.NMS)
74 | 
75 |     print 'Evaluating detections'
76 |     imdb.evaluate_detections(nms_dets, output_dir)
77 | 
78 | if __name__ == '__main__':
79 |     args = parse_args()
80 | 
81 |     output_dir = os.path.abspath(args.output_dir[0])
82 |     imdb_name = args.imdb_name
83 | 
84 |     if args.comp_mode and not args.rerun:
85 |         raise ValueError('--rerun must be used with --comp')
86 | 
87 |     if args.rerun:
88 |         from_dets(imdb_name, output_dir, args.comp_mode)
89 |     else:
90 |         from_mats(imdb_name, output_dir)
91 | 


--------------------------------------------------------------------------------
/tools/test_net.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Test a Fast R-CNN network on an image database."""
11 | 
12 | import _init_paths
13 | from fast_rcnn.test import test_net
14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list
15 | from datasets.factory import get_imdb
16 | import caffe
17 | import argparse
18 | import pprint
19 | import time, os, sys
20 | 
21 | def parse_args():
22 |     """
23 |     Parse input arguments
24 |     """
25 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
26 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU id to use',
27 |                         default=0, type=int)
28 |     parser.add_argument('--def', dest='prototxt',
29 |                         help='prototxt file defining the network',
30 |                         default=None, type=str)
31 |     parser.add_argument('--net', dest='caffemodel',
32 |                         help='model to test',
33 |                         default=None, type=str)
34 |     parser.add_argument('--cfg', dest='cfg_file',
35 |                         help='optional config file', default=None, type=str)
36 |     parser.add_argument('--wait', dest='wait',
37 |                         help='wait until net file exists',
38 |                         default=True, type=bool)
39 |     parser.add_argument('--imdb', dest='imdb_name',
40 |                         help='dataset to test',
41 |                         default='voc_2007_test', type=str)
42 |     parser.add_argument('--comp', dest='comp_mode', help='competition mode',
43 |                         action='store_true')
44 |     parser.add_argument('--set', dest='set_cfgs',
45 |                         help='set config keys', default=None,
46 |                         nargs=argparse.REMAINDER)
47 | 
48 |     if len(sys.argv) == 1:
49 |         parser.print_help()
50 |         sys.exit(1)
51 | 
52 |     args = parser.parse_args()
53 |     return args
54 | 
55 | if __name__ == '__main__':
56 |     args = parse_args()
57 | 
58 |     print('Called with args:')
59 |     print(args)
60 | 
61 |     if args.cfg_file is not None:
62 |         cfg_from_file(args.cfg_file)
63 |     if args.set_cfgs is not None:
64 |         cfg_from_list(args.set_cfgs)
65 | 
66 |     print('Using config:')
67 |     pprint.pprint(cfg)
68 | 
69 |     while not os.path.exists(args.caffemodel) and args.wait:
70 |         print('Waiting for {} to exist...'.format(args.caffemodel))
71 |         time.sleep(10)
72 | 
73 |     caffe.set_mode_gpu()
74 |     caffe.set_device(args.gpu_id)
75 |     net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
76 |     net.name = os.path.splitext(os.path.basename(args.caffemodel))[0]
77 | 
78 |     imdb = get_imdb(args.imdb_name)
79 |     imdb.competition_mode(args.comp_mode)
80 | 
81 |     test_net(net, imdb)
82 | 


--------------------------------------------------------------------------------
/tools/train_net.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Train a Fast R-CNN network on a region of interest database."""
11 | 
12 | import _init_paths
13 | from fast_rcnn.train import get_training_roidb, train_net
14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
15 | from datasets.factory import get_imdb
16 | import caffe
17 | import argparse
18 | import pprint
19 | import numpy as np
20 | import sys
21 | 
22 | def parse_args():
23 |     """
24 |     Parse input arguments
25 |     """
26 |     parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
27 |     parser.add_argument('--gpu', dest='gpu_id',
28 |                         help='GPU device id to use [0]',
29 |                         default=0, type=int)
30 |     parser.add_argument('--solver', dest='solver',
31 |                         help='solver prototxt',
32 |                         default=None, type=str)
33 |     parser.add_argument('--iters', dest='max_iters',
34 |                         help='number of iterations to train',
35 |                         default=40000, type=int)
36 |     parser.add_argument('--weights', dest='pretrained_model',
37 |                         help='initialize with pretrained model weights',
38 |                         default=None, type=str)
39 |     parser.add_argument('--cfg', dest='cfg_file',
40 |                         help='optional config file',
41 |                         default=None, type=str)
42 |     parser.add_argument('--imdb', dest='imdb_name',
43 |                         help='dataset to train on',
44 |                         default='voc_2007_trainval', type=str)
45 |     parser.add_argument('--rand', dest='randomize',
46 |                         help='randomize (do not use a fixed seed)',
47 |                         action='store_true')
48 |     parser.add_argument('--set', dest='set_cfgs',
49 |                         help='set config keys', default=None,
50 |                         nargs=argparse.REMAINDER)
51 | 
52 |     if len(sys.argv) == 1:
53 |         parser.print_help()
54 |         sys.exit(1)
55 | 
56 |     args = parser.parse_args()
57 |     return args
58 | 
59 | if __name__ == '__main__':
60 |     args = parse_args()
61 | 
62 |     print('Called with args:')
63 |     print(args)
64 | 
65 |     if args.cfg_file is not None:
66 |         cfg_from_file(args.cfg_file)
67 |     if args.set_cfgs is not None:
68 |         cfg_from_list(args.set_cfgs)
69 | 
70 |     print('Using config:')
71 |     pprint.pprint(cfg)
72 | 
73 |     if not args.randomize:
74 |         # fix the random seeds (numpy and caffe) for reproducibility
75 |         np.random.seed(cfg.RNG_SEED)
76 |         caffe.set_random_seed(cfg.RNG_SEED)
77 | 
78 |     # set up caffe
79 |     caffe.set_mode_gpu()
80 |     if args.gpu_id is not None:
81 |         caffe.set_device(args.gpu_id)
82 | 
83 |     imdb = get_imdb(args.imdb_name)
84 |     print 'Loaded dataset `{:s}` for training'.format(imdb.name)
85 |     roidb = get_training_roidb(imdb)
86 | 
87 |     output_dir = get_output_dir(imdb, None)
88 |     print 'Output will be saved to `{:s}`'.format(output_dir)
89 | 
90 |     train_net(args.solver, roidb, output_dir,
91 |               pretrained_model=args.pretrained_model,
92 |               max_iters=args.max_iters)
93 | 


--------------------------------------------------------------------------------