├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── data
    ├── .gitignore
    ├── README.md
    ├── demo
    │   ├── 000456.jpg
    │   ├── 000542.jpg
    │   ├── 001150.jpg
    │   ├── 001763.jpg
    │   └── 004545.jpg
    ├── pylintrc
    └── scripts
    │   ├── fetch_faster_rcnn_models.sh
    │   ├── fetch_imagenet_models.sh
    │   └── fetch_selective_search_data.sh
├── experiments
    ├── README.md
    ├── cfgs
    │   ├── faster_rcnn_alt_opt.yml
    │   └── faster_rcnn_end2end.yml
    ├── logs
    │   └── .gitignore
    └── scripts
    │   ├── fast_rcnn.sh
    │   ├── faster_rcnn_alt_opt.sh
    │   └── faster_rcnn_end2end.sh
├── lib
    ├── Makefile
    ├── datasets
    │   ├── VOCdevkit-matlab-wrapper
    │   │   ├── get_voc_opts.m
    │   │   ├── voc_eval.m
    │   │   └── xVOCap.m
    │   ├── __init__.py
    │   ├── coco.py
    │   ├── ds_utils.py
    │   ├── factory.py
    │   ├── imdb.py
    │   ├── pascal_voc.py
    │   ├── tools
    │   │   └── mcg_munge.py
    │   └── voc_eval.py
    ├── fast_rcnn
    │   ├── __init__.py
    │   ├── bbox_transform.py
    │   ├── config.py
    │   ├── nms_wrapper.py
    │   ├── test.py
    │   └── train.py
    ├── nms
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── cpu_nms.pyx
    │   ├── gpu_nms.hpp
    │   ├── gpu_nms.pyx
    │   ├── nms_kernel.cu
    │   └── py_cpu_nms.py
    ├── pycocotools
    │   ├── UPSTREAM_REV
    │   ├── __init__.py
    │   ├── _mask.pyx
    │   ├── coco.py
    │   ├── cocoeval.py
    │   ├── license.txt
    │   ├── mask.py
    │   ├── maskApi.c
    │   └── maskApi.h
    ├── roi_data_layer
    │   ├── __init__.py
    │   ├── layer.py
    │   ├── minibatch.py
    │   └── roidb.py
    ├── rpn
    │   ├── README.md
    │   ├── __init__.py
    │   ├── anchor_target_layer.py
    │   ├── generate.py
    │   ├── generate_anchors.py
    │   ├── proposal_layer.py
    │   └── proposal_target_layer.py
    ├── setup.py
    ├── transform
    │   ├── __init__.py
    │   └── torch_image_transform_layer.py
    └── utils
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── bbox.pyx
    │   ├── blob.py
    │   └── timer.py
├── models
    ├── README.md
    ├── coco
    │   ├── VGG16
    │   │   ├── fast_rcnn
    │   │   │   ├── solver.prototxt
    │   │   │   ├── test.prototxt
    │   │   │   └── train.prototxt
    │   │   └── faster_rcnn_end2end
    │   │   │   ├── solver.prototxt
    │   │   │   ├── test.prototxt
    │   │   │   └── train.prototxt
    │   └── VGG_CNN_M_1024
    │   │   ├── fast_rcnn
    │   │       ├── solver.prototxt
    │   │       ├── test.prototxt
    │   │       └── train.prototxt
    │   │   └── faster_rcnn_end2end
    │   │       ├── solver.prototxt
    │   │       ├── test.prototxt
    │   │       └── train.prototxt
    └── pascal_voc
    │   ├── VGG16
    │       ├── fast_rcnn
    │       │   ├── solver.prototxt
    │       │   ├── test.prototxt
    │       │   └── train.prototxt
    │       ├── faster_rcnn_alt_opt
    │       │   ├── faster_rcnn_test.pt
    │       │   ├── rpn_test.pt
    │       │   ├── stage1_fast_rcnn_solver30k40k.pt
    │       │   ├── stage1_fast_rcnn_train.pt
    │       │   ├── stage1_rpn_solver60k80k.pt
    │       │   ├── stage1_rpn_train.pt
    │       │   ├── stage2_fast_rcnn_solver30k40k.pt
    │       │   ├── stage2_fast_rcnn_train.pt
    │       │   ├── stage2_rpn_solver60k80k.pt
    │       │   └── stage2_rpn_train.pt
    │       └── faster_rcnn_end2end
    │       │   ├── solver.prototxt
    │       │   ├── test.prototxt
    │       │   └── train.prototxt
    │   ├── VGG_CNN_M_1024
    │       ├── fast_rcnn
    │       │   ├── solver.prototxt
    │       │   ├── test.prototxt
    │       │   └── train.prototxt
    │       ├── faster_rcnn_alt_opt
    │       │   ├── faster_rcnn_test.pt
    │       │   ├── rpn_test.pt
    │       │   ├── stage1_fast_rcnn_solver30k40k.pt
    │       │   ├── stage1_fast_rcnn_train.pt
    │       │   ├── stage1_rpn_solver60k80k.pt
    │       │   ├── stage1_rpn_train.pt
    │       │   ├── stage2_fast_rcnn_solver30k40k.pt
    │       │   ├── stage2_fast_rcnn_train.pt
    │       │   ├── stage2_rpn_solver60k80k.pt
    │       │   └── stage2_rpn_train.pt
    │       └── faster_rcnn_end2end
    │       │   ├── solver.prototxt
    │       │   ├── test.prototxt
    │       │   └── train.prototxt
    │   └── ZF
    │       ├── fast_rcnn
    │           ├── solver.prototxt
    │           ├── test.prototxt
    │           └── train.prototxt
    │       ├── faster_rcnn_alt_opt
    │           ├── faster_rcnn_test.pt
    │           ├── rpn_test.pt
    │           ├── stage1_fast_rcnn_solver30k40k.pt
    │           ├── stage1_fast_rcnn_train.pt
    │           ├── stage1_rpn_solver60k80k.pt
    │           ├── stage1_rpn_train.pt
    │           ├── stage2_fast_rcnn_solver30k40k.pt
    │           ├── stage2_fast_rcnn_train.pt
    │           ├── stage2_rpn_solver60k80k.pt
    │           └── stage2_rpn_train.pt
    │       └── faster_rcnn_end2end
    │           ├── solver.prototxt
    │           ├── test.prototxt
    │           └── train.prototxt
└── tools
    ├── README.md
    ├── _init_paths.py
    ├── compress_net.py
    ├── demo.py
    ├── eval_recall.py
    ├── reval.py
    ├── rpn_generate.py
    ├── test_net.py
    ├── train_faster_rcnn_alt_opt.py
    ├── train_net.py
    └── train_svms.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .ipynb_checkpoints
3 | lib/build
4 | lib/pycocotools/_mask.c
5 | lib/pycocotools/_mask.so
6 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "caffe-fast-rcnn"]
2 | 	path = caffe-fast-rcnn
3 | 	url = https://github.com/rbgirshick/caffe-fast-rcnn.git
4 | 	branch = fast-rcnn
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Faster R-CNN
 2 | 
 3 | The MIT License (MIT)
 4 | 
 5 | Copyright (c) 2015 Microsoft Corporation
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | THE SOFTWARE.
24 | 
25 | ************************************************************************
26 | 
27 | THIRD-PARTY SOFTWARE NOTICES AND INFORMATION
28 | 
29 | This project, Faster R-CNN, incorporates material from the project(s)
30 | listed below (collectively, "Third Party Code").  Microsoft is not the
31 | original author of the Third Party Code.  The original copyright notice
32 | and license under which Microsoft received such Third Party Code are set
33 | out below. This Third Party Code is licensed to you under their original
34 | license terms set forth below.  Microsoft reserves all other rights not
35 | expressly granted, whether by implication, estoppel or otherwise.
36 | 
37 | 1.	Caffe, (https://github.com/BVLC/caffe/)
38 | 
39 | COPYRIGHT
40 | 
41 | All contributions by the University of California:
42 | Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
43 | All rights reserved.
44 | 
45 | All other contributions:
46 | Copyright (c) 2014, 2015, the respective contributors
47 | All rights reserved.
48 | 
49 | Caffe uses a shared copyright model: each contributor holds copyright
50 | over their contributions to Caffe. The project versioning records all
51 | such contribution and copyright details. If a contributor wants to
52 | further mark their specific copyright on a particular contribution,
53 | they should indicate their copyright solely in the commit message of
54 | the change when it is committed.
55 | 
56 | The BSD 2-Clause License
57 | 
58 | Redistribution and use in source and binary forms, with or without
59 | modification, are permitted provided that the following conditions
60 | are met:
61 | 
62 | 1. Redistributions of source code must retain the above copyright notice,
63 | this list of conditions and the following disclaimer.
64 | 
65 | 2. Redistributions in binary form must reproduce the above copyright
66 | notice, this list of conditions and the following disclaimer in the
67 | documentation and/or other materials provided with the distribution.
68 | 
69 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
70 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
71 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
72 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
73 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
74 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
75 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
76 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
77 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
78 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
79 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
80 | 
81 | ************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION**********
82 | 


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | selective_search*
2 | imagenet_models*
3 | fast_rcnn_models*
4 | VOCdevkit*
5 | cache
6 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | This directory holds (*after you download them*):
 2 | - Caffe models pre-trained on ImageNet
 3 | - Faster R-CNN models
 4 | - Symlinks to datasets
 5 | 
 6 | To download Caffe models (ZF, VGG16) pre-trained on ImageNet, run:
 7 | 
 8 | ```
 9 | ./data/scripts/fetch_imagenet_models.sh
10 | ```
11 | 
12 | This script will populate `data/imagenet_models`.
13 | 
14 | To download Faster R-CNN models trained on VOC 2007, run:
15 | 
16 | ```
17 | ./data/scripts/fetch_faster_rcnn_models.sh
18 | ```
19 | 
20 | This script will populate `data/faster_rcnn_models`.
21 | 
22 | In order to train and test with PASCAL VOC, you will need to establish symlinks.
23 | From the `data` directory (`cd data`):
24 | 
25 | ```
26 | # For VOC 2007
27 | ln -s /your/path/to/VOC2007/VOCdevkit VOCdevkit2007
28 | 
29 | # For VOC 2012
30 | ln -s /your/path/to/VOC2012/VOCdevkit VOCdevkit2012
31 | ```
32 | 
33 | Install the MS COCO dataset at /path/to/coco
34 | 
35 | ```
36 | ln -s /path/to/coco coco
37 | ```
38 | 
39 | For COCO with Fast R-CNN, place object proposals under `coco_proposals` (inside
40 | the `data` directory). You can obtain proposals on COCO from Jan Hosang at
41 | https://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-computing/research/object-recognition-and-scene-understanding/how-good-are-detection-proposals-really/.
42 | For COCO, using MCG is recommended over selective search. MCG boxes can be downloaded
43 | from http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/.
44 | Use the tool `lib/datasets/tools/mcg_munge.py` to convert the downloaded MCG data
45 | into the same file layout as those from Jan Hosang.
46 | 
47 | Since you'll likely be experimenting with multiple installs of Fast/er R-CNN in
48 | parallel, you'll probably want to keep all of this data in a shared place and
49 | use symlinks. On my system I create the following symlinks inside `data`:
50 | 
51 | Annotations for the 5k image 'minival' subset of COCO val2014 that I like to use
52 | can be found at https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0.
53 | Annotations for COCO val2014 (set) minus minival (~35k images) can be found at
54 | https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0.
55 | 
56 | ```
57 | # data/cache holds various outputs created by the datasets package
58 | ln -s /data/fast_rcnn_shared/cache
59 | 
60 | # move the imagenet_models to shared location and symlink to them
61 | ln -s /data/fast_rcnn_shared/imagenet_models
62 | 
63 | # move the selective search data to a shared location and symlink to them
64 | # (only applicable to Fast R-CNN training)
65 | ln -s /data/fast_rcnn_shared/selective_search_data
66 | 
67 | ln -s /data/VOC2007/VOCdevkit VOCdevkit2007
68 | ln -s /data/VOC2012/VOCdevkit VOCdevkit2012
69 | ```
70 | 


--------------------------------------------------------------------------------
/data/demo/000456.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/781a917b378dbfdedb45b6a56189a31982da1b43/data/demo/000456.jpg


--------------------------------------------------------------------------------
/data/demo/000542.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/781a917b378dbfdedb45b6a56189a31982da1b43/data/demo/000542.jpg


--------------------------------------------------------------------------------
/data/demo/001150.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/781a917b378dbfdedb45b6a56189a31982da1b43/data/demo/001150.jpg


--------------------------------------------------------------------------------
/data/demo/001763.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/781a917b378dbfdedb45b6a56189a31982da1b43/data/demo/001763.jpg


--------------------------------------------------------------------------------
/data/demo/004545.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/781a917b378dbfdedb45b6a56189a31982da1b43/data/demo/004545.jpg


--------------------------------------------------------------------------------
/data/pylintrc:
--------------------------------------------------------------------------------
1 | [TYPECHECK]
2 | 
3 | ignored-modules = numpy, numpy.random, cv2
4 | 


--------------------------------------------------------------------------------
/data/scripts/fetch_faster_rcnn_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | FILE=faster_rcnn_models.tgz
 7 | URL=https://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz?dl=0
 8 | CHECKSUM=ac116844f66aefe29587214272054668
 9 | 
10 | if [ -f $FILE ]; then
11 |   echo "File already exists. Checking md5..."
12 |   os=`uname -s`
13 |   if [ "$os" = "Linux" ]; then
14 |     checksum=`md5sum $FILE | awk '{ print $1 }'`
15 |   elif [ "$os" = "Darwin" ]; then
16 |     checksum=`cat $FILE | md5`
17 |   fi
18 |   if [ "$checksum" = "$CHECKSUM" ]; then
19 |     echo "Checksum is correct. No need to download."
20 |     exit 0
21 |   else
22 |     echo "Checksum is incorrect. Need to download again."
23 |   fi
24 | fi
25 | 
26 | echo "Downloading Faster R-CNN demo models (695M)..."
27 | 
28 | wget $URL -O $FILE
29 | 
30 | echo "Unzipping..."
31 | 
32 | tar zxvf $FILE
33 | 
34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM."
35 | 


--------------------------------------------------------------------------------
/data/scripts/fetch_imagenet_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | FILE=imagenet_models.tgz
 7 | URL=https://dl.dropbox.com/s/gstw7122padlf0l/imagenet_models.tgz?dl=0
 8 | CHECKSUM=ed34ca912d6782edfb673a8c3a0bda6d
 9 | 
10 | if [ -f $FILE ]; then
11 |   echo "File already exists. Checking md5..."
12 |   os=`uname -s`
13 |   if [ "$os" = "Linux" ]; then
14 |     checksum=`md5sum $FILE | awk '{ print $1 }'`
15 |   elif [ "$os" = "Darwin" ]; then
16 |     checksum=`cat $FILE | md5`
17 |   fi
18 |   if [ "$checksum" = "$CHECKSUM" ]; then
19 |     echo "Checksum is correct. No need to download."
20 |     exit 0
21 |   else
22 |     echo "Checksum is incorrect. Need to download again."
23 |   fi
24 | fi
25 | 
26 | echo "Downloading pretrained ImageNet models (1G)..."
27 | 
28 | wget $URL -O $FILE
29 | 
30 | echo "Unzipping..."
31 | 
32 | tar zxvf $FILE
33 | 
34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM."
35 | 


--------------------------------------------------------------------------------
/data/scripts/fetch_selective_search_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | FILE=selective_search_data.tgz
 7 | URL=https://dl.dropboxusercontent.com/s/orrt7o6bp6ae0tc/selective_search_data.tgz?dl=0
 8 | CHECKSUM=7078c1db87a7851b31966b96774cd9b9
 9 | 
10 | if [ -f $FILE ]; then
11 |   echo "File already exists. Checking md5..."
12 |   os=`uname -s`
13 |   if [ "$os" = "Linux" ]; then
14 |     checksum=`md5sum $FILE | awk '{ print $1 }'`
15 |   elif [ "$os" = "Darwin" ]; then
16 |     checksum=`cat $FILE | md5`
17 |   fi
18 |   if [ "$checksum" = "$CHECKSUM" ]; then
19 |     echo "Checksum is correct. No need to download."
20 |     exit 0
21 |   else
22 |     echo "Checksum is incorrect. Need to download again."
23 |   fi
24 | fi
25 | 
26 | echo "Downloading precomputed selective search boxes (0.5G)..."
27 | 
28 | wget $URL -O $FILE
29 | 
30 | echo "Unzipping..."
31 | 
32 | tar zxvf $FILE
33 | 
34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM."
35 | 


--------------------------------------------------------------------------------
/experiments/README.md:
--------------------------------------------------------------------------------
1 | Scripts are under `experiments/scripts`.
2 | 
3 | Each script saves a log file under `experiments/logs`.
4 | 
5 | Configuration override files used in the experiments are stored in `experiments/cfgs`.
6 | 


--------------------------------------------------------------------------------
/experiments/cfgs/faster_rcnn_alt_opt.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: faster_rcnn_alt_opt
2 | TRAIN:
3 |   BG_THRESH_LO: 0.0
4 | TEST:
5 |   HAS_RPN: True
6 | 


--------------------------------------------------------------------------------
/experiments/cfgs/faster_rcnn_end2end.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: faster_rcnn_end2end
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 | TEST:
11 |   HAS_RPN: True
12 | 


--------------------------------------------------------------------------------
/experiments/logs/.gitignore:
--------------------------------------------------------------------------------
1 | *.txt*
2 | 


--------------------------------------------------------------------------------
/experiments/scripts/fast_rcnn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Usage:
 3 | # ./experiments/scripts/fast_rcnn.sh GPU NET DATASET [options args to {train,test}_net.py]
 4 | # DATASET is either pascal_voc or coco.
 5 | #
 6 | # Example:
 7 | # ./experiments/scripts/fast_rcnn.sh 0 VGG_CNN_M_1024 pascal_voc \
 8 | #   --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400, 500, 600, 700]"
 9 | 
10 | set -x
11 | set -e
12 | 
13 | export PYTHONUNBUFFERED="True"
14 | 
15 | GPU_ID=$1
16 | NET=$2
17 | NET_lc=${NET,,}
18 | DATASET=$3
19 | 
20 | array=( $@ )
21 | len=${#array[@]}
22 | EXTRA_ARGS=${array[@]:3:$len}
23 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
24 | 
25 | case $DATASET in
26 |   pascal_voc)
27 |     TRAIN_IMDB="voc_2007_trainval"
28 |     TEST_IMDB="voc_2007_test"
29 |     PT_DIR="pascal_voc"
30 |     ITERS=40000
31 |     ;;
32 |   coco)
33 |     TRAIN_IMDB="coco_2014_train"
34 |     TEST_IMDB="coco_2014_minival"
35 |     PT_DIR="coco"
36 |     ITERS=280000
37 |     ;;
38 |   *)
39 |     echo "No dataset given"
40 |     exit
41 |     ;;
42 | esac
43 | 
44 | LOG="experiments/logs/fast_rcnn_${NET}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
45 | exec &> >(tee -a "$LOG")
46 | echo Logging output to "$LOG"
47 | 
48 | time ./tools/train_net.py --gpu ${GPU_ID} \
49 |   --solver models/${PT_DIR}/${NET}/fast_rcnn/solver.prototxt \
50 |   --weights data/imagenet_models/${NET}.v2.caffemodel \
51 |   --imdb ${TRAIN_IMDB} \
52 |   --iters ${ITERS} \
53 |   ${EXTRA_ARGS}
54 | 
55 | set +x
56 | NET_FINAL=`grep -B 1 "done solving" ${LOG} | grep "Wrote snapshot" | awk '{print $4}'`
57 | set -x
58 | 
59 | time ./tools/test_net.py --gpu ${GPU_ID} \
60 |   --def models/${PT_DIR}/${NET}/fast_rcnn/test.prototxt \
61 |   --net ${NET_FINAL} \
62 |   --imdb ${TEST_IMDB} \
63 |   ${EXTRA_ARGS}
64 | 


--------------------------------------------------------------------------------
/experiments/scripts/faster_rcnn_alt_opt.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Usage:
 3 | # ./experiments/scripts/faster_rcnn_alt_opt.sh GPU NET DATASET [options args to {train,test}_net.py]
 4 | # DATASET is only pascal_voc for now
 5 | #
 6 | # Example:
 7 | # ./experiments/scripts/faster_rcnn_alt_opt.sh 0 VGG_CNN_M_1024 pascal_voc \
 8 | #   --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400, 500, 600, 700]"
 9 | 
10 | set -x
11 | set -e
12 | 
13 | export PYTHONUNBUFFERED="True"
14 | 
15 | GPU_ID=$1
16 | NET=$2
17 | NET_lc=${NET,,}
18 | DATASET=$3
19 | 
20 | array=( $@ )
21 | len=${#array[@]}
22 | EXTRA_ARGS=${array[@]:3:$len}
23 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
24 | 
25 | case $DATASET in
26 |   pascal_voc)
27 |     TRAIN_IMDB="voc_2007_trainval"
28 |     TEST_IMDB="voc_2007_test"
29 |     PT_DIR="pascal_voc"
30 |     ITERS=40000
31 |     ;;
32 |   coco)
33 |     echo "Not implemented: use experiments/scripts/faster_rcnn_end2end.sh for coco"
34 |     exit
35 |     ;;
36 |   *)
37 |     echo "No dataset given"
38 |     exit
39 |     ;;
40 | esac
41 | 
42 | LOG="experiments/logs/faster_rcnn_alt_opt_${NET}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
43 | exec &> >(tee -a "$LOG")
44 | echo Logging output to "$LOG"
45 | 
46 | time ./tools/train_faster_rcnn_alt_opt.py --gpu ${GPU_ID} \
47 |   --net_name ${NET} \
48 |   --weights data/imagenet_models/${NET}.v2.caffemodel \
49 |   --imdb ${TRAIN_IMDB} \
50 |   --cfg experiments/cfgs/faster_rcnn_alt_opt.yml \
51 |   ${EXTRA_ARGS}
52 | 
53 | set +x
54 | NET_FINAL=`grep "Final model:" ${LOG} | awk '{print $3}'`
55 | set -x
56 | 
57 | time ./tools/test_net.py --gpu ${GPU_ID} \
58 |   --def models/${PT_DIR}/${NET}/faster_rcnn_alt_opt/faster_rcnn_test.pt \
59 |   --net ${NET_FINAL} \
60 |   --imdb ${TEST_IMDB} \
61 |   --cfg experiments/cfgs/faster_rcnn_alt_opt.yml \
62 |   ${EXTRA_ARGS}
63 | 


--------------------------------------------------------------------------------
/experiments/scripts/faster_rcnn_end2end.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Usage:
 3 | # ./experiments/scripts/faster_rcnn_end2end.sh GPU NET DATASET [options args to {train,test}_net.py]
 4 | # DATASET is either pascal_voc or coco.
 5 | #
 6 | # Example:
 7 | # ./experiments/scripts/faster_rcnn_end2end.sh 0 VGG_CNN_M_1024 pascal_voc \
 8 | #   --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400, 500, 600, 700]"
 9 | 
10 | set -x
11 | set -e
12 | 
13 | export PYTHONUNBUFFERED="True"
14 | 
15 | GPU_ID=$1
16 | NET=$2
17 | NET_lc=${NET,,}
18 | DATASET=$3
19 | 
20 | array=( $@ )
21 | len=${#array[@]}
22 | EXTRA_ARGS=${array[@]:3:$len}
23 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
24 | 
25 | case $DATASET in
26 |   pascal_voc)
27 |     TRAIN_IMDB="voc_2007_trainval"
28 |     TEST_IMDB="voc_2007_test"
29 |     PT_DIR="pascal_voc"
30 |     ITERS=70000
31 |     ;;
32 |   coco)
33 |     # This is a very long and slow training schedule
34 |     # You can probably use fewer iterations and reduce the
35 |     # time to the LR drop (set in the solver to 350,000 iterations).
36 |     TRAIN_IMDB="coco_2014_train"
37 |     TEST_IMDB="coco_2014_minival"
38 |     PT_DIR="coco"
39 |     ITERS=490000
40 |     ;;
41 |   *)
42 |     echo "No dataset given"
43 |     exit
44 |     ;;
45 | esac
46 | 
47 | LOG="experiments/logs/faster_rcnn_end2end_${NET}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
48 | exec &> >(tee -a "$LOG")
49 | echo Logging output to "$LOG"
50 | 
51 | time ./tools/train_net.py --gpu ${GPU_ID} \
52 |   --solver models/${PT_DIR}/${NET}/faster_rcnn_end2end/solver.prototxt \
53 |   --weights data/imagenet_models/${NET}.v2.caffemodel \
54 |   --imdb ${TRAIN_IMDB} \
55 |   --iters ${ITERS} \
56 |   --cfg experiments/cfgs/faster_rcnn_end2end.yml \
57 |   ${EXTRA_ARGS}
58 | 
59 | set +x
60 | NET_FINAL=`grep -B 1 "done solving" ${LOG} | grep "Wrote snapshot" | awk '{print $4}'`
61 | set -x
62 | 
63 | time ./tools/test_net.py --gpu ${GPU_ID} \
64 |   --def models/${PT_DIR}/${NET}/faster_rcnn_end2end/test.prototxt \
65 |   --net ${NET_FINAL} \
66 |   --imdb ${TEST_IMDB} \
67 |   --cfg experiments/cfgs/faster_rcnn_end2end.yml \
68 |   ${EXTRA_ARGS}
69 | 


--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
 1 | function res = voc_eval(path, comp_id, test_set, output_dir)
 2 | 
 3 | VOCopts = get_voc_opts(path);
 4 | VOCopts.testset = test_set;
 5 | 
 6 | for i = 1:length(VOCopts.classes)
 7 |   cls = VOCopts.classes{i};
 8 |   res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
 9 | end
10 | 
11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
12 | fprintf('Results:\n');
13 | aps = [res(:).ap]';
14 | fprintf('%.1f\n', aps * 100);
15 | fprintf('%.1f\n', mean(aps) * 100);
16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
17 | 
18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)
19 | 
20 | test_set = VOCopts.testset;
21 | year = VOCopts.dataset(4:end);
22 | 
23 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
24 | 
25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
26 | 
27 | recall = [];
28 | prec = [];
29 | ap = 0;
30 | ap_auc = 0;
31 | 
32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
33 | if do_eval
34 |   % Bug in VOCevaldet requires that tic has been called first
35 |   tic;
36 |   [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
37 |   ap_auc = xVOCap(recall, prec);
38 | 
39 |   % force plot limits
40 |   ylim([0 1]);
41 |   xlim([0 1]);
42 | 
43 |   print(gcf, '-djpeg', '-r0', ...
44 |         [output_dir '/' cls '_pr.jpg']);
45 | end
46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
47 | 
48 | res.recall = recall;
49 | res.prec = prec;
50 | res.ap = ap;
51 | res.ap_auc = ap_auc;
52 | 
53 | save([output_dir '/' cls '_pr.mat'], ...
54 |      'res', 'recall', 'prec', 'ap', 'ap_auc');
55 | 
56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
57 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick
 5 | # --------------------------------------------------------
 6 | 
 7 | import numpy as np
 8 | 
 9 | def unique_boxes(boxes, scale=1.0):
10 |     """Return indices of unique boxes."""
11 |     v = np.array([1, 1e3, 1e6, 1e9])
12 |     hashes = np.round(boxes * scale).dot(v)
13 |     _, index = np.unique(hashes, return_index=True)
14 |     return np.sort(index)
15 | 
16 | def xywh_to_xyxy(boxes):
17 |     """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
18 |     return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
19 | 
20 | def xyxy_to_xywh(boxes):
21 |     """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
22 |     return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
23 | 
24 | def validate_boxes(boxes, width=0, height=0):
25 |     """Check that a set of boxes are valid."""
26 |     x1 = boxes[:, 0]
27 |     y1 = boxes[:, 1]
28 |     x2 = boxes[:, 2]
29 |     y2 = boxes[:, 3]
30 |     assert (x1 >= 0).all()
31 |     assert (y1 >= 0).all()
32 |     assert (x2 >= x1).all()
33 |     assert (y2 >= y1).all()
34 |     assert (x2 < width).all()
35 |     assert (y2 < height).all()
36 | 
37 | def filter_small_boxes(boxes, min_size):
38 |     w = boxes[:, 2] - boxes[:, 0]
39 |     h = boxes[:, 3] - boxes[:, 1]
40 |     keep = np.where((w >= min_size) & (h > min_size))[0]
41 |     return keep
42 | 


--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Factory method for easily getting imdbs by name."""
 9 | 
10 | __sets = {}
11 | 
12 | from datasets.pascal_voc import pascal_voc
13 | from datasets.coco import coco
14 | import numpy as np
15 | 
16 | # Set up voc_<year>_<split> using selective search "fast" mode
17 | for year in ['2007', '2012']:
18 |     for split in ['train', 'val', 'trainval', 'test']:
19 |         name = 'voc_{}_{}'.format(year, split)
20 |         __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))
21 | 
22 | # Set up coco_2014_<split>
23 | for year in ['2014']:
24 |     for split in ['train', 'val', 'minival', 'valminusminival']:
25 |         name = 'coco_{}_{}'.format(year, split)
26 |         __sets[name] = (lambda split=split, year=year: coco(split, year))
27 | 
28 | # Set up coco_2015_<split>
29 | for year in ['2015']:
30 |     for split in ['test', 'test-dev']:
31 |         name = 'coco_{}_{}'.format(year, split)
32 |         __sets[name] = (lambda split=split, year=year: coco(split, year))
33 | 
34 | def get_imdb(name):
35 |     """Get an imdb (image database) by name."""
36 |     if not __sets.has_key(name):
37 |         raise KeyError('Unknown dataset: {}'.format(name))
38 |     return __sets[name]()
39 | 
40 | def list_imdbs():
41 |     """List all registered imdbs."""
42 |     return __sets.keys()
43 | 


--------------------------------------------------------------------------------
/lib/datasets/tools/mcg_munge.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | """Hacky tool to convert file system layout of MCG boxes downloaded from
 5 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/
 6 | so that it's consistent with those computed by Jan Hosang (see:
 7 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
 8 |   computing/research/object-recognition-and-scene-understanding/how-
 9 |   good-are-detection-proposals-really/)
10 | 
11 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order.
12 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order.
13 | """
14 | 
15 | def munge(src_dir):
16 |     # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat
17 |     # want:      ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat
18 | 
19 |     files = os.listdir(src_dir)
20 |     for fn in files:
21 |         base, ext = os.path.splitext(fn)
22 |         # first 14 chars / first 22 chars / all chars + .mat
23 |         # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
24 |         first = base[:14]
25 |         second = base[:22]
26 |         dst_dir = os.path.join('MCG', 'mat', first, second)
27 |         if not os.path.exists(dst_dir):
28 |             os.makedirs(dst_dir)
29 |         src = os.path.join(src_dir, fn)
30 |         dst = os.path.join(dst_dir, fn)
31 |         print 'MV: {} -> {}'.format(src, dst)
32 |         os.rename(src, dst)
33 | 
34 | if __name__ == '__main__':
35 |     # src_dir should look something like:
36 |     #  src_dir = 'MCG-COCO-val2014-boxes'
37 |     src_dir = sys.argv[1]
38 |     munge(src_dir)
39 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/bbox_transform.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def bbox_transform(ex_rois, gt_rois):
11 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
12 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
13 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
14 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
15 | 
16 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
17 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
18 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
19 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
20 | 
21 |     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
22 |     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
23 |     targets_dw = np.log(gt_widths / ex_widths)
24 |     targets_dh = np.log(gt_heights / ex_heights)
25 | 
26 |     targets = np.vstack(
27 |         (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
28 |     return targets
29 | 
30 | def bbox_transform_inv(boxes, deltas):
31 |     if boxes.shape[0] == 0:
32 |         return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
33 | 
34 |     boxes = boxes.astype(deltas.dtype, copy=False)
35 | 
36 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
37 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
38 |     ctr_x = boxes[:, 0] + 0.5 * widths
39 |     ctr_y = boxes[:, 1] + 0.5 * heights
40 | 
41 |     dx = deltas[:, 0::4]
42 |     dy = deltas[:, 1::4]
43 |     dw = deltas[:, 2::4]
44 |     dh = deltas[:, 3::4]
45 | 
46 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
47 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
48 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
49 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
50 | 
51 |     pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
52 |     # x1
53 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
54 |     # y1
55 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
56 |     # x2
57 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
58 |     # y2
59 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
60 | 
61 |     return pred_boxes
62 | 
63 | def clip_boxes(boxes, im_shape):
64 |     """
65 |     Clip boxes to image boundaries.
66 |     """
67 | 
68 |     # x1 >= 0
69 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
70 |     # y1 >= 0
71 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
72 |     # x2 < im_shape[1]
73 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
74 |     # y2 < im_shape[0]
75 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
76 |     return boxes
77 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from fast_rcnn.config import cfg
 9 | from nms.gpu_nms import gpu_nms
10 | from nms.cpu_nms import cpu_nms
11 | 
12 | def nms(dets, thresh, force_cpu=False):
13 |     """Dispatch to either CPU or GPU NMS implementations."""
14 | 
15 |     if dets.shape[0] == 0:
16 |         return []
17 |     if cfg.USE_GPU_NMS and not force_cpu:
18 |         return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 |     else:
20 |         return cpu_nms(dets, thresh)
21 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/train.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Train a Fast R-CNN network."""
  9 | 
 10 | import caffe
 11 | from fast_rcnn.config import cfg
 12 | import roi_data_layer.roidb as rdl_roidb
 13 | from utils.timer import Timer
 14 | import numpy as np
 15 | import os
 16 | 
 17 | from caffe.proto import caffe_pb2
 18 | import google.protobuf as pb2
 19 | 
 20 | class SolverWrapper(object):
 21 |     """A simple wrapper around Caffe's solver.
 22 |     This wrapper gives us control over he snapshotting process, which we
 23 |     use to unnormalize the learned bounding-box regression weights.
 24 |     """
 25 | 
 26 |     def __init__(self, solver_prototxt, roidb, output_dir,
 27 |                  pretrained_model=None):
 28 |         """Initialize the SolverWrapper."""
 29 |         self.output_dir = output_dir
 30 | 
 31 |         if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and
 32 |             cfg.TRAIN.BBOX_NORMALIZE_TARGETS):
 33 |             # RPN can only use precomputed normalization because there are no
 34 |             # fixed statistics to compute a priori
 35 |             assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED
 36 | 
 37 |         if cfg.TRAIN.BBOX_REG:
 38 |             print 'Computing bounding-box regression targets...'
 39 |             self.bbox_means, self.bbox_stds = \
 40 |                     rdl_roidb.add_bbox_regression_targets(roidb)
 41 |             print 'done'
 42 | 
 43 |         self.solver = caffe.SGDSolver(solver_prototxt)
 44 |         if pretrained_model is not None:
 45 |             print ('Loading pretrained model '
 46 |                    'weights from {:s}').format(pretrained_model)
 47 |             self.solver.net.copy_from(pretrained_model)
 48 | 
 49 |         self.solver_param = caffe_pb2.SolverParameter()
 50 |         with open(solver_prototxt, 'rt') as f:
 51 |             pb2.text_format.Merge(f.read(), self.solver_param)
 52 | 
 53 |         self.solver.net.layers[0].set_roidb(roidb)
 54 | 
 55 |     def snapshot(self):
 56 |         """Take a snapshot of the network after unnormalizing the learned
 57 |         bounding-box regression weights. This enables easy use at test-time.
 58 |         """
 59 |         net = self.solver.net
 60 | 
 61 |         scale_bbox_params = (cfg.TRAIN.BBOX_REG and
 62 |                              cfg.TRAIN.BBOX_NORMALIZE_TARGETS and
 63 |                              net.params.has_key('bbox_pred'))
 64 | 
 65 |         if scale_bbox_params:
 66 |             # save original values
 67 |             orig_0 = net.params['bbox_pred'][0].data.copy()
 68 |             orig_1 = net.params['bbox_pred'][1].data.copy()
 69 | 
 70 |             # scale and shift with bbox reg unnormalization; then save snapshot
 71 |             net.params['bbox_pred'][0].data[...] = \
 72 |                     (net.params['bbox_pred'][0].data *
 73 |                      self.bbox_stds[:, np.newaxis])
 74 |             net.params['bbox_pred'][1].data[...] = \
 75 |                     (net.params['bbox_pred'][1].data *
 76 |                      self.bbox_stds + self.bbox_means)
 77 | 
 78 |         infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX
 79 |                  if cfg.TRAIN.SNAPSHOT_INFIX != '' else '')
 80 |         filename = (self.solver_param.snapshot_prefix + infix +
 81 |                     '_iter_{:d}'.format(self.solver.iter) + '.caffemodel')
 82 |         filename = os.path.join(self.output_dir, filename)
 83 | 
 84 |         net.save(str(filename))
 85 |         print 'Wrote snapshot to: {:s}'.format(filename)
 86 | 
 87 |         if scale_bbox_params:
 88 |             # restore net to original state
 89 |             net.params['bbox_pred'][0].data[...] = orig_0
 90 |             net.params['bbox_pred'][1].data[...] = orig_1
 91 |         return filename
 92 | 
 93 |     def train_model(self, max_iters):
 94 |         """Network training loop."""
 95 |         last_snapshot_iter = -1
 96 |         timer = Timer()
 97 |         model_paths = []
 98 |         while self.solver.iter < max_iters:
 99 |             # Make one SGD update
100 |             timer.tic()
101 |             self.solver.step(1)
102 |             timer.toc()
103 |             if self.solver.iter % (10 * self.solver_param.display) == 0:
104 |                 print 'speed: {:.3f}s / iter'.format(timer.average_time)
105 | 
106 |             if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0:
107 |                 last_snapshot_iter = self.solver.iter
108 |                 model_paths.append(self.snapshot())
109 | 
110 |         if last_snapshot_iter != self.solver.iter:
111 |             model_paths.append(self.snapshot())
112 |         return model_paths
113 | 
114 | def get_training_roidb(imdb):
115 |     """Returns a roidb (Region of Interest database) for use in training."""
116 |     if cfg.TRAIN.USE_FLIPPED:
117 |         print 'Appending horizontally-flipped training examples...'
118 |         imdb.append_flipped_images()
119 |         print 'done'
120 | 
121 |     print 'Preparing training data...'
122 |     rdl_roidb.prepare_roidb(imdb)
123 |     print 'done'
124 | 
125 |     return imdb.roidb
126 | 
127 | def filter_roidb(roidb):
128 |     """Remove roidb entries that have no usable RoIs."""
129 | 
130 |     def is_valid(entry):
131 |         # Valid images have:
132 |         #   (1) At least one foreground RoI OR
133 |         #   (2) At least one background RoI
134 |         overlaps = entry['max_overlaps']
135 |         # find boxes with sufficient overlap
136 |         fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
137 |         # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
138 |         bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
139 |                            (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
140 |         # image is only valid if such boxes exist
141 |         valid = len(fg_inds) > 0 or len(bg_inds) > 0
142 |         return valid
143 | 
144 |     num = len(roidb)
145 |     filtered_roidb = [entry for entry in roidb if is_valid(entry)]
146 |     num_after = len(filtered_roidb)
147 |     print 'Filtered {} roidb entries: {} -> {}'.format(num - num_after,
148 |                                                        num, num_after)
149 |     return filtered_roidb
150 | 
151 | def train_net(solver_prototxt, roidb, output_dir,
152 |               pretrained_model=None, max_iters=40000):
153 |     """Train a Fast R-CNN network."""
154 | 
155 |     roidb = filter_roidb(roidb)
156 |     sw = SolverWrapper(solver_prototxt, roidb, output_dir,
157 |                        pretrained_model=pretrained_model)
158 | 
159 |     print 'Solving...'
160 |     model_paths = sw.train_model(max_iters)
161 |     print 'done solving'
162 |     return model_paths
163 | 


--------------------------------------------------------------------------------
/lib/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/781a917b378dbfdedb45b6a56189a31982da1b43/lib/nms/__init__.py


--------------------------------------------------------------------------------
/lib/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int_t, ndim=1] \
26 |         order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/lib/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/lib/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def py_cpu_nms(dets, thresh):
11 |     """Pure Python NMS baseline."""
12 |     x1 = dets[:, 0]
13 |     y1 = dets[:, 1]
14 |     x2 = dets[:, 2]
15 |     y2 = dets[:, 3]
16 |     scores = dets[:, 4]
17 | 
18 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 |     order = scores.argsort()[::-1]
20 | 
21 |     keep = []
22 |     while order.size > 0:
23 |         i = order[0]
24 |         keep.append(i)
25 |         xx1 = np.maximum(x1[i], x1[order[1:]])
26 |         yy1 = np.maximum(y1[i], y1[order[1:]])
27 |         xx2 = np.minimum(x2[i], x2[order[1:]])
28 |         yy2 = np.minimum(y2[i], y2[order[1:]])
29 | 
30 |         w = np.maximum(0.0, xx2 - xx1 + 1)
31 |         h = np.maximum(0.0, yy2 - yy1 + 1)
32 |         inter = w * h
33 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 | 
35 |         inds = np.where(ovr <= thresh)[0]
36 |         order = order[inds + 1]
37 | 
38 |     return keep
39 | 


--------------------------------------------------------------------------------
/lib/pycocotools/UPSTREAM_REV:
--------------------------------------------------------------------------------
1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574
2 | 


--------------------------------------------------------------------------------
/lib/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/lib/pycocotools/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met: 
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 |    list of conditions and the following disclaimer. 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution. 
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 | 
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies, 
26 | either expressed or implied, of the FreeBSD Project.
27 | 


--------------------------------------------------------------------------------
/lib/pycocotools/mask.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'tsungyi'
 2 | 
 3 | import pycocotools._mask as _mask
 4 | 
 5 | # Interface for manipulating masks stored in RLE format.
 6 | #
 7 | # RLE is a simple yet efficient format for storing binary masks. RLE
 8 | # first divides a vector (or vectorized image) into a series of piecewise
 9 | # constant regions and then for each piece simply stores the length of
10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
12 | # (note that the odd counts are always the numbers of zeros). Instead of
13 | # storing the counts directly, additional compression is achieved with a
14 | # variable bitrate representation based on a common scheme called LEB128.
15 | #
16 | # Compression is greatest given large piecewise constant regions.
17 | # Specifically, the size of the RLE is proportional to the number of
18 | # *boundaries* in M (or for an image the number of boundaries in the y
19 | # direction). Assuming fairly simple shapes, the RLE representation is
20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
21 | # is substantially lower, especially for large simple objects (large n).
22 | #
23 | # Many common operations on masks can be computed directly using the RLE
24 | # (without need for decoding). This includes computations such as area,
25 | # union, intersection, etc. All of these operations are linear in the
26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
27 | # of the object. Computing these operations on the original mask is O(n).
28 | # Thus, using the RLE can result in substantial computational savings.
29 | #
30 | # The following API functions are defined:
31 | #  encode         - Encode binary masks using RLE.
32 | #  decode         - Decode binary masks encoded via RLE.
33 | #  merge          - Compute union or intersection of encoded masks.
34 | #  iou            - Compute intersection over union between masks.
35 | #  area           - Compute area of encoded masks.
36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
38 | #
39 | # Usage:
40 | #  Rs     = encode( masks )
41 | #  masks  = decode( Rs )
42 | #  R      = merge( Rs, intersect=false )
43 | #  o      = iou( dt, gt, iscrowd )
44 | #  a      = area( Rs )
45 | #  bbs    = toBbox( Rs )
46 | #  Rs     = frPyObjects( [pyObjects], h, w )
47 | #
48 | # In the API the following formats are used:
49 | #  Rs      - [dict] Run-length encoding of binary masks
50 | #  R       - dict Run-length encoding of binary mask
51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
55 | #  dt,gt   - May be either bounding boxes or encoded masks
56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
57 | #
58 | # Finally, a note about the intersection over union (iou) computation.
59 | # The standard iou of a ground truth (gt) and detected (dt) object is
60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
61 | # For "crowd" regions, we use a modified criteria. If a gt object is
62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
66 | # For crowd gt regions we use this modified criteria above for the iou.
67 | #
68 | # To compile run "python setup.py build_ext --inplace"
69 | # Please do not contact us for help with compiling.
70 | #
71 | # Microsoft COCO Toolbox.      version 2.0
72 | # Data, paper, and tutorials available at:  http://mscoco.org/
73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
74 | # Licensed under the Simplified BSD License [see coco/license.txt]
75 | 
76 | encode      = _mask.encode
77 | decode      = _mask.decode
78 | iou         = _mask.iou
79 | merge       = _mask.merge
80 | area        = _mask.area
81 | toBbox      = _mask.toBbox
82 | frPyObjects = _mask.frPyObjects


--------------------------------------------------------------------------------
/lib/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | #include <stdbool.h>
 9 | 
10 | typedef unsigned int uint;
11 | typedef unsigned long siz;
12 | typedef unsigned char byte;
13 | typedef double* BB;
14 | typedef struct { siz h, w, m; uint *cnts; } RLE;
15 | 
16 | // Initialize/destroy RLE.
17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
18 | void rleFree( RLE *R );
19 | 
20 | // Initialize/destroy RLE array.
21 | void rlesInit( RLE **R, siz n );
22 | void rlesFree( RLE **R, siz n );
23 | 
24 | // Encode binary masks using RLE.
25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
26 | 
27 | // Decode binary masks encoded via RLE.
28 | void rleDecode( const RLE *R, byte *mask, siz n );
29 | 
30 | // Compute union or intersection of encoded masks.
31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect );
32 | 
33 | // Compute area of encoded masks.
34 | void rleArea( const RLE *R, siz n, uint *a );
35 | 
36 | // Compute intersection over union between masks.
37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
38 | 
39 | // Compute intersection over union between bounding boxes.
40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
41 | 
42 | // Get bounding boxes surrounding encoded masks.
43 | void rleToBbox( const RLE *R, BB bb, siz n );
44 | 
45 | // Convert bounding boxes to encoded masks.
46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
47 | 
48 | // Convert polygon to encoded mask.
49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
50 | 
51 | // Get compressed string representation of encoded mask.
52 | char* rleToString( const RLE *R );
53 | 
54 | // Convert from compressed string representation of encoded mask.
55 | void rleFrString( RLE *R, char *s, siz h, siz w );
56 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
  9 | 
 10 | import numpy as np
 11 | from fast_rcnn.config import cfg
 12 | from fast_rcnn.bbox_transform import bbox_transform
 13 | from utils.cython_bbox import bbox_overlaps
 14 | import PIL
 15 | 
 16 | def prepare_roidb(imdb):
 17 |     """Enrich the imdb's roidb by adding some derived quantities that
 18 |     are useful for training. This function precomputes the maximum
 19 |     overlap, taken over ground-truth boxes, between each ROI and
 20 |     each ground-truth box. The class with maximum overlap is also
 21 |     recorded.
 22 |     """
 23 |     sizes = [PIL.Image.open(imdb.image_path_at(i)).size
 24 |              for i in xrange(imdb.num_images)]
 25 |     roidb = imdb.roidb
 26 |     for i in xrange(len(imdb.image_index)):
 27 |         roidb[i]['image'] = imdb.image_path_at(i)
 28 |         roidb[i]['width'] = sizes[i][0]
 29 |         roidb[i]['height'] = sizes[i][1]
 30 |         # need gt_overlaps as a dense array for argmax
 31 |         gt_overlaps = roidb[i]['gt_overlaps'].toarray()
 32 |         # max overlap with gt over classes (columns)
 33 |         max_overlaps = gt_overlaps.max(axis=1)
 34 |         # gt class that had the max overlap
 35 |         max_classes = gt_overlaps.argmax(axis=1)
 36 |         roidb[i]['max_classes'] = max_classes
 37 |         roidb[i]['max_overlaps'] = max_overlaps
 38 |         # sanity checks
 39 |         # max overlap of 0 => class should be zero (background)
 40 |         zero_inds = np.where(max_overlaps == 0)[0]
 41 |         assert all(max_classes[zero_inds] == 0)
 42 |         # max overlap > 0 => class should not be zero (must be a fg class)
 43 |         nonzero_inds = np.where(max_overlaps > 0)[0]
 44 |         assert all(max_classes[nonzero_inds] != 0)
 45 | 
 46 | def add_bbox_regression_targets(roidb):
 47 |     """Add information needed to train bounding-box regressors."""
 48 |     assert len(roidb) > 0
 49 |     assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?'
 50 | 
 51 |     num_images = len(roidb)
 52 |     # Infer number of classes from the number of columns in gt_overlaps
 53 |     num_classes = roidb[0]['gt_overlaps'].shape[1]
 54 |     for im_i in xrange(num_images):
 55 |         rois = roidb[im_i]['boxes']
 56 |         max_overlaps = roidb[im_i]['max_overlaps']
 57 |         max_classes = roidb[im_i]['max_classes']
 58 |         roidb[im_i]['bbox_targets'] = \
 59 |                 _compute_targets(rois, max_overlaps, max_classes)
 60 | 
 61 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
 62 |         # Use fixed / precomputed "means" and "stds" instead of empirical values
 63 |         means = np.tile(
 64 |                 np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1))
 65 |         stds = np.tile(
 66 |                 np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1))
 67 |     else:
 68 |         # Compute values needed for means and stds
 69 |         # var(x) = E(x^2) - E(x)^2
 70 |         class_counts = np.zeros((num_classes, 1)) + cfg.EPS
 71 |         sums = np.zeros((num_classes, 4))
 72 |         squared_sums = np.zeros((num_classes, 4))
 73 |         for im_i in xrange(num_images):
 74 |             targets = roidb[im_i]['bbox_targets']
 75 |             for cls in xrange(1, num_classes):
 76 |                 cls_inds = np.where(targets[:, 0] == cls)[0]
 77 |                 if cls_inds.size > 0:
 78 |                     class_counts[cls] += cls_inds.size
 79 |                     sums[cls, :] += targets[cls_inds, 1:].sum(axis=0)
 80 |                     squared_sums[cls, :] += \
 81 |                             (targets[cls_inds, 1:] ** 2).sum(axis=0)
 82 | 
 83 |         means = sums / class_counts
 84 |         stds = np.sqrt(squared_sums / class_counts - means ** 2)
 85 | 
 86 |     print 'bbox target means:'
 87 |     print means
 88 |     print means[1:, :].mean(axis=0) # ignore bg class
 89 |     print 'bbox target stdevs:'
 90 |     print stds
 91 |     print stds[1:, :].mean(axis=0) # ignore bg class
 92 | 
 93 |     # Normalize targets
 94 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS:
 95 |         print "Normalizing targets"
 96 |         for im_i in xrange(num_images):
 97 |             targets = roidb[im_i]['bbox_targets']
 98 |             for cls in xrange(1, num_classes):
 99 |                 cls_inds = np.where(targets[:, 0] == cls)[0]
100 |                 roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
101 |                 roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :]
102 |     else:
103 |         print "NOT normalizing targets"
104 | 
105 |     # These values will be needed for making predictions
106 |     # (the predicts will need to be unnormalized and uncentered)
107 |     return means.ravel(), stds.ravel()
108 | 
109 | def _compute_targets(rois, overlaps, labels):
110 |     """Compute bounding-box regression targets for an image."""
111 |     # Indices of ground-truth ROIs
112 |     gt_inds = np.where(overlaps == 1)[0]
113 |     if len(gt_inds) == 0:
114 |         # Bail if the image has no ground-truth ROIs
115 |         return np.zeros((rois.shape[0], 5), dtype=np.float32)
116 |     # Indices of examples for which we try to make predictions
117 |     ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]
118 | 
119 |     # Get IoU overlap between each ex ROI and gt ROI
120 |     ex_gt_overlaps = bbox_overlaps(
121 |         np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
122 |         np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))
123 | 
124 |     # Find which gt ROI each ex ROI has max overlap with:
125 |     # this will be the ex ROI's gt target
126 |     gt_assignment = ex_gt_overlaps.argmax(axis=1)
127 |     gt_rois = rois[gt_inds[gt_assignment], :]
128 |     ex_rois = rois[ex_inds, :]
129 | 
130 |     targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
131 |     targets[ex_inds, 0] = labels[ex_inds]
132 |     targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
133 |     return targets
134 | 


--------------------------------------------------------------------------------
/lib/rpn/README.md:
--------------------------------------------------------------------------------
 1 | ### `rpn` module overview
 2 | 
 3 | ##### `generate_anchors.py`
 4 | 
 5 | Generates a regular grid of multi-scale, multi-aspect anchor boxes.
 6 | 
 7 | ##### `proposal_layer.py`
 8 | 
 9 | Converts RPN outputs (per-anchor scores and bbox regression estimates) into object proposals.
10 | 
11 | ##### `anchor_target_layer.py` 
12 | 
13 | Generates training targets/labels for each anchor. Classification labels are 1 (object), 0 (not object) or -1 (ignore).
14 | Bbox regression targets are specified when the classification label is > 0.
15 | 
16 | ##### `proposal_target_layer.py`
17 | 
18 | Generates training targets/labels for each object proposal: classification labels 0 - K (bg or object class 1, ... , K)
19 | and bbox regression targets in that case that the label is > 0.
20 | 
21 | ##### `generate.py`
22 | 
23 | Generate object detection proposals from an imdb using an RPN.
24 | 


--------------------------------------------------------------------------------
/lib/rpn/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Sean Bell
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/rpn/generate.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | from fast_rcnn.config import cfg
  9 | from utils.blob import im_list_to_blob
 10 | from utils.timer import Timer
 11 | import numpy as np
 12 | import cv2
 13 | 
 14 | def _vis_proposals(im, dets, thresh=0.5):
 15 |     """Draw detected bounding boxes."""
 16 |     inds = np.where(dets[:, -1] >= thresh)[0]
 17 |     if len(inds) == 0:
 18 |         return
 19 | 
 20 |     class_name = 'obj'
 21 |     im = im[:, :, (2, 1, 0)]
 22 |     fig, ax = plt.subplots(figsize=(12, 12))
 23 |     ax.imshow(im, aspect='equal')
 24 |     for i in inds:
 25 |         bbox = dets[i, :4]
 26 |         score = dets[i, -1]
 27 | 
 28 |         ax.add_patch(
 29 |             plt.Rectangle((bbox[0], bbox[1]),
 30 |                           bbox[2] - bbox[0],
 31 |                           bbox[3] - bbox[1], fill=False,
 32 |                           edgecolor='red', linewidth=3.5)
 33 |             )
 34 |         ax.text(bbox[0], bbox[1] - 2,
 35 |                 '{:s} {:.3f}'.format(class_name, score),
 36 |                 bbox=dict(facecolor='blue', alpha=0.5),
 37 |                 fontsize=14, color='white')
 38 | 
 39 |     ax.set_title(('{} detections with '
 40 |                   'p({} | box) >= {:.1f}').format(class_name, class_name,
 41 |                                                   thresh),
 42 |                   fontsize=14)
 43 |     plt.axis('off')
 44 |     plt.tight_layout()
 45 |     plt.draw()
 46 | 
 47 | def _get_image_blob(im):
 48 |     """Converts an image into a network input.
 49 | 
 50 |     Arguments:
 51 |         im (ndarray): a color image in BGR order
 52 | 
 53 |     Returns:
 54 |         blob (ndarray): a data blob holding an image pyramid
 55 |         im_scale_factors (list): list of image scales (relative to im) used
 56 |             in the image pyramid
 57 |     """
 58 |     im_orig = im.astype(np.float32, copy=True)
 59 |     im_orig -= cfg.PIXEL_MEANS
 60 | 
 61 |     im_shape = im_orig.shape
 62 |     im_size_min = np.min(im_shape[0:2])
 63 |     im_size_max = np.max(im_shape[0:2])
 64 | 
 65 |     processed_ims = []
 66 | 
 67 |     assert len(cfg.TEST.SCALES) == 1
 68 |     target_size = cfg.TEST.SCALES[0]
 69 | 
 70 |     im_scale = float(target_size) / float(im_size_min)
 71 |     # Prevent the biggest axis from being more than MAX_SIZE
 72 |     if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
 73 |         im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
 74 |     im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
 75 |                     interpolation=cv2.INTER_LINEAR)
 76 |     im_info = np.hstack((im.shape[:2], im_scale))[np.newaxis, :]
 77 |     processed_ims.append(im)
 78 | 
 79 |     # Create a blob to hold the input images
 80 |     blob = im_list_to_blob(processed_ims)
 81 | 
 82 |     return blob, im_info
 83 | 
 84 | def im_proposals(net, im):
 85 |     """Generate RPN proposals on a single image."""
 86 |     blobs = {}
 87 |     blobs['data'], blobs['im_info'] = _get_image_blob(im)
 88 |     net.blobs['data'].reshape(*(blobs['data'].shape))
 89 |     net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
 90 |     blobs_out = net.forward(
 91 |             data=blobs['data'].astype(np.float32, copy=False),
 92 |             im_info=blobs['im_info'].astype(np.float32, copy=False))
 93 | 
 94 |     scale = blobs['im_info'][0, 2]
 95 |     boxes = blobs_out['rois'][:, 1:].copy() / scale
 96 |     scores = blobs_out['scores'].copy()
 97 |     return boxes, scores
 98 | 
 99 | def imdb_proposals(net, imdb):
100 |     """Generate RPN proposals on all images in an imdb."""
101 | 
102 |     _t = Timer()
103 |     imdb_boxes = [[] for _ in xrange(imdb.num_images)]
104 |     for i in xrange(imdb.num_images):
105 |         im = cv2.imread(imdb.image_path_at(i))
106 |         _t.tic()
107 |         imdb_boxes[i], scores = im_proposals(net, im)
108 |         _t.toc()
109 |         print 'im_proposals: {:d}/{:d} {:.3f}s' \
110 |               .format(i + 1, imdb.num_images, _t.average_time)
111 |         if 0:
112 |             dets = np.hstack((imdb_boxes[i], scores))
113 |             # from IPython import embed; embed()
114 |             _vis_proposals(im, dets[:3, :], thresh=0.9)
115 |             plt.show()
116 | 
117 |     return imdb_boxes
118 | 


--------------------------------------------------------------------------------
/lib/rpn/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | 
 10 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 11 | #
 12 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 13 | #    >> anchors
 14 | #
 15 | #    anchors =
 16 | #
 17 | #       -83   -39   100    56
 18 | #      -175   -87   192   104
 19 | #      -359  -183   376   200
 20 | #       -55   -55    72    72
 21 | #      -119  -119   136   136
 22 | #      -247  -247   264   264
 23 | #       -35   -79    52    96
 24 | #       -79  -167    96   184
 25 | #      -167  -343   184   360
 26 | 
 27 | #array([[ -83.,  -39.,  100.,   56.],
 28 | #       [-175.,  -87.,  192.,  104.],
 29 | #       [-359., -183.,  376.,  200.],
 30 | #       [ -55.,  -55.,   72.,   72.],
 31 | #       [-119., -119.,  136.,  136.],
 32 | #       [-247., -247.,  264.,  264.],
 33 | #       [ -35.,  -79.,   52.,   96.],
 34 | #       [ -79., -167.,   96.,  184.],
 35 | #       [-167., -343.,  184.,  360.]])
 36 | 
 37 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 38 |                      scales=2**np.arange(3, 6)):
 39 |     """
 40 |     Generate anchor (reference) windows by enumerating aspect ratios X
 41 |     scales wrt a reference (0, 0, 15, 15) window.
 42 |     """
 43 | 
 44 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 45 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 46 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 47 |                          for i in xrange(ratio_anchors.shape[0])])
 48 |     return anchors
 49 | 
 50 | def _whctrs(anchor):
 51 |     """
 52 |     Return width, height, x center, and y center for an anchor (window).
 53 |     """
 54 | 
 55 |     w = anchor[2] - anchor[0] + 1
 56 |     h = anchor[3] - anchor[1] + 1
 57 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 58 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 59 |     return w, h, x_ctr, y_ctr
 60 | 
 61 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 62 |     """
 63 |     Given a vector of widths (ws) and heights (hs) around a center
 64 |     (x_ctr, y_ctr), output a set of anchors (windows).
 65 |     """
 66 | 
 67 |     ws = ws[:, np.newaxis]
 68 |     hs = hs[:, np.newaxis]
 69 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 70 |                          y_ctr - 0.5 * (hs - 1),
 71 |                          x_ctr + 0.5 * (ws - 1),
 72 |                          y_ctr + 0.5 * (hs - 1)))
 73 |     return anchors
 74 | 
 75 | def _ratio_enum(anchor, ratios):
 76 |     """
 77 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
 78 |     """
 79 | 
 80 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 81 |     size = w * h
 82 |     size_ratios = size / ratios
 83 |     ws = np.round(np.sqrt(size_ratios))
 84 |     hs = np.round(ws * ratios)
 85 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 86 |     return anchors
 87 | 
 88 | def _scale_enum(anchor, scales):
 89 |     """
 90 |     Enumerate a set of anchors for each scale wrt an anchor.
 91 |     """
 92 | 
 93 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 94 |     ws = w * scales
 95 |     hs = h * scales
 96 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 97 |     return anchors
 98 | 
 99 | if __name__ == '__main__':
100 |     import time
101 |     t = time.time()
102 |     a = generate_anchors()
103 |     print time.time() - t
104 |     print a
105 |     from IPython import embed; embed()
106 | 


--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | from setuptools import setup
 11 | from distutils.extension import Extension
 12 | from Cython.Distutils import build_ext
 13 | import subprocess
 14 | import numpy as np
 15 | 
 16 | def find_in_path(name, path):
 17 |     "Find a file in a search path"
 18 |     # Adapted fom
 19 |     # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 20 |     for dir in path.split(os.pathsep):
 21 |         binpath = pjoin(dir, name)
 22 |         if os.path.exists(binpath):
 23 |             return os.path.abspath(binpath)
 24 |     return None
 25 | 
 26 | 
 27 | def locate_cuda():
 28 |     """Locate the CUDA environment on the system
 29 | 
 30 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 31 |     and values giving the absolute path to each directory.
 32 | 
 33 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 34 |     is based on finding 'nvcc' in the PATH.
 35 |     """
 36 | 
 37 |     # first check if the CUDAHOME env variable is in use
 38 |     if 'CUDAHOME' in os.environ:
 39 |         home = os.environ['CUDAHOME']
 40 |         nvcc = pjoin(home, 'bin', 'nvcc')
 41 |     else:
 42 |         # otherwise, search the PATH for NVCC
 43 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 44 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 45 |         if nvcc is None:
 46 |             raise EnvironmentError('The nvcc binary could not be '
 47 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 48 |         home = os.path.dirname(os.path.dirname(nvcc))
 49 | 
 50 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 51 |                   'include': pjoin(home, 'include'),
 52 |                   'lib64': pjoin(home, 'lib64')}
 53 |     for k, v in cudaconfig.iteritems():
 54 |         if not os.path.exists(v):
 55 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 56 | 
 57 |     return cudaconfig
 58 | CUDA = locate_cuda()
 59 | 
 60 | 
 61 | # Obtain the numpy include directory.  This logic works across numpy versions.
 62 | try:
 63 |     numpy_include = np.get_include()
 64 | except AttributeError:
 65 |     numpy_include = np.get_numpy_include()
 66 | 
 67 | def customize_compiler_for_nvcc(self):
 68 |     """inject deep into distutils to customize how the dispatch
 69 |     to gcc/nvcc works.
 70 | 
 71 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 72 |     injected in, and still have the right customizations (i.e.
 73 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 74 |     the OO route, I have this. Note, it's kindof like a wierd functional
 75 |     subclassing going on."""
 76 | 
 77 |     # tell the compiler it can processes .cu
 78 |     self.src_extensions.append('.cu')
 79 | 
 80 |     # save references to the default compiler_so and _comple methods
 81 |     default_compiler_so = self.compiler_so
 82 |     super = self._compile
 83 | 
 84 |     # now redefine the _compile method. This gets executed for each
 85 |     # object but distutils doesn't have the ability to change compilers
 86 |     # based on source extension: we add it.
 87 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 88 |         if os.path.splitext(src)[1] == '.cu':
 89 |             # use the cuda for .cu files
 90 |             self.set_executable('compiler_so', CUDA['nvcc'])
 91 |             # use only a subset of the extra_postargs, which are 1-1 translated
 92 |             # from the extra_compile_args in the Extension class
 93 |             postargs = extra_postargs['nvcc']
 94 |         else:
 95 |             postargs = extra_postargs['gcc']
 96 | 
 97 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 98 |         # reset the default compiler_so, which we might have changed for cuda
 99 |         self.compiler_so = default_compiler_so
100 | 
101 |     # inject our redefined _compile method into the class
102 |     self._compile = _compile
103 | 
104 | 
105 | # run the customize_compiler
106 | class custom_build_ext(build_ext):
107 |     def build_extensions(self):
108 |         customize_compiler_for_nvcc(self.compiler)
109 |         build_ext.build_extensions(self)
110 | 
111 | 
112 | ext_modules = [
113 |     Extension(
114 |         "utils.cython_bbox",
115 |         ["utils/bbox.pyx"],
116 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
117 |         include_dirs = [numpy_include]
118 |     ),
119 |     Extension(
120 |         "nms.cpu_nms",
121 |         ["nms/cpu_nms.pyx"],
122 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
123 |         include_dirs = [numpy_include]
124 |     ),
125 |     Extension('nms.gpu_nms',
126 |         ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
127 |         library_dirs=[CUDA['lib64']],
128 |         libraries=['cudart'],
129 |         language='c++',
130 |         runtime_library_dirs=[CUDA['lib64']],
131 |         # this syntax is specific to this build system
132 |         # we're only going to use certain compiler args with nvcc and not with
133 |         # gcc the implementation of this trick is in customize_compiler() below
134 |         extra_compile_args={'gcc': ["-Wno-unused-function"],
135 |                             'nvcc': ['-arch=sm_35',
136 |                                      '--ptxas-options=-v',
137 |                                      '-c',
138 |                                      '--compiler-options',
139 |                                      "'-fPIC'"]},
140 |         include_dirs = [numpy_include, CUDA['include']]
141 |     ),
142 |     Extension(
143 |         'pycocotools._mask',
144 |         sources=['pycocotools/maskApi.c', 'pycocotools/_mask.pyx'],
145 |         include_dirs = [numpy_include, 'pycocotools'],
146 |         extra_compile_args={
147 |             'gcc': ['-Wno-cpp', '-Wno-unused-function', '-std=c99']},
148 |     ),
149 | ]
150 | 
151 | setup(
152 |     name='fast_rcnn',
153 |     ext_modules=ext_modules,
154 |     # inject our custom trigger
155 |     cmdclass={'build_ext': custom_build_ext},
156 | )
157 | 


--------------------------------------------------------------------------------
/lib/transform/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/781a917b378dbfdedb45b6a56189a31982da1b43/lib/transform/__init__.py


--------------------------------------------------------------------------------
/lib/transform/torch_image_transform_layer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # --------------------------------------------------------
 5 | 
 6 | """ Transform images for compatibility with models trained with
 7 | https://github.com/facebook/fb.resnet.torch.
 8 | 
 9 | Usage in model prototxt:
10 | 
11 | layer {
12 |   name: 'data_xform'
13 |   type: 'Python'
14 |   bottom: 'data_caffe'
15 |   top: 'data'
16 |   python_param {
17 |     module: 'transform.torch_image_transform_layer'
18 |     layer: 'TorchImageTransformLayer'
19 |   }
20 | }
21 | """
22 | 
23 | import caffe
24 | from fast_rcnn.config import cfg
25 | import numpy as np
26 | 
27 | class TorchImageTransformLayer(caffe.Layer):
28 |     def setup(self, bottom, top):
29 |         # (1, 3, 1, 1) shaped arrays
30 |         self.PIXEL_MEANS = \
31 |             np.array([[[[0.48462227599918]],
32 |                        [[0.45624044862054]],
33 |                        [[0.40588363755159]]]])
34 |         self.PIXEL_STDS = \
35 |             np.array([[[[0.22889466674951]],
36 |                        [[0.22446679341259]],
37 |                        [[0.22495548344775]]]])
38 |         # The default ("old") pixel means that were already subtracted
39 |         channel_swap = (0, 3, 1, 2)
40 |         self.OLD_PIXEL_MEANS = \
41 |             cfg.PIXEL_MEANS[np.newaxis, :, :, :].transpose(channel_swap)
42 | 
43 |         top[0].reshape(*(bottom[0].shape))
44 | 
45 |     def forward(self, bottom, top):
46 |         ims = bottom[0].data
47 |         # Invert the channel means that were already subtracted
48 |         ims += self.OLD_PIXEL_MEANS
49 |         # 1. Permute BGR to RGB and normalize to [0, 1]
50 |         ims = ims[:, [2, 1, 0], :, :] / 255.0
51 |         # 2. Remove channel means
52 |         ims -= self.PIXEL_MEANS
53 |         # 3. Standardize channels
54 |         ims /= self.PIXEL_STDS
55 |         top[0].reshape(*(ims.shape))
56 |         top[0].data[...] = ims
57 | 
58 |     def backward(self, top, propagate_down, bottom):
59 |         """This layer does not propagate gradients."""
60 |         pass
61 | 
62 |     def reshape(self, bottom, top):
63 |         """Reshaping happens during the call to forward."""
64 |         pass
65 | 


--------------------------------------------------------------------------------
/lib/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.so
3 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/utils/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 


--------------------------------------------------------------------------------
/lib/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | 
10 | import numpy as np
11 | import cv2
12 | 
13 | def im_list_to_blob(ims):
14 |     """Convert a list of images into a network input.
15 | 
16 |     Assumes images are already prepared (means subtracted, BGR order, ...).
17 |     """
18 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
19 |     num_images = len(ims)
20 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
21 |                     dtype=np.float32)
22 |     for i in xrange(num_images):
23 |         im = ims[i]
24 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
25 |     # Move channels (axis 3) to axis 1
26 |     # Axis order will become: (batch elem, channel, height, width)
27 |     channel_swap = (0, 3, 1, 2)
28 |     blob = blob.transpose(channel_swap)
29 |     return blob
30 | 
31 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
32 |     """Mean subtract and scale an image for use in a blob."""
33 |     im = im.astype(np.float32, copy=False)
34 |     im -= pixel_means
35 |     im_shape = im.shape
36 |     im_size_min = np.min(im_shape[0:2])
37 |     im_size_max = np.max(im_shape[0:2])
38 |     im_scale = float(target_size) / float(im_size_min)
39 |     # Prevent the biggest axis from being more than MAX_SIZE
40 |     if np.round(im_scale * im_size_max) > max_size:
41 |         im_scale = float(max_size) / float(im_size_max)
42 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
43 |                     interpolation=cv2.INTER_LINEAR)
44 | 
45 |     return im, im_scale
46 | 


--------------------------------------------------------------------------------
/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------
/models/README.md:
--------------------------------------------------------------------------------
 1 | ## Model Zoo
 2 | 
 3 | ### COCO Faster R-CNN VGG-16 trained using end-to-end
 4 | 
 5 | Model URL: https://dl.dropboxusercontent.com/s/cotx0y81zvbbhnt/coco_vgg16_faster_rcnn_final.caffemodel?dl=0
 6 | 
 7 | Training command:
 8 | ```
 9 | tools/train_net.py \
10 |     --gpu 0 \
11 |     --solver ./models/coco/VGG16/faster_rcnn_end2end/solver.prototxt \
12 |     --weights data/imagenet_models/VGG16.v2.caffemodel \
13 |     --imdb coco_2014_train+coco_2014_valminusminival \
14 |     --iters 490000 \
15 |     --cfg ./experiments/cfgs/faster_rcnn_end2end.yml
16 | ```
17 | 
18 | `py-faster-rcnn` commit: 68eec95
19 | 
20 | test-dev2015 results
21 | ```
22 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.242
23 |  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.453
24 |  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.235
25 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.077
26 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.264
27 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.371
28 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.238
29 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.340
30 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.346
31 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.120
32 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.385
33 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.544
34 | ```
35 | 
36 | test-standard2015 results
37 | ```
38 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.242
39 |  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.453
40 |  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.234
41 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.072
42 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.264
43 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.369
44 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.238
45 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.341
46 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.347
47 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.115
48 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.389
49 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.544
50 | ```
51 | 


--------------------------------------------------------------------------------
/models/coco/VGG16/fast_rcnn/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/coco/VGG16/fast_rcnn/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 200000
 6 | display: 20
 7 | average_loss: 100
 8 | # iter_size: 1
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "vgg16_fast_rcnn"
16 | #debug_info: true
17 | 


--------------------------------------------------------------------------------
/models/coco/VGG16/faster_rcnn_end2end/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/coco/VGG16/faster_rcnn_end2end/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 350000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | # function
12 | snapshot: 0
13 | # We still use the snapshot prefix, though
14 | snapshot_prefix: "vgg16_faster_rcnn"
15 | iter_size: 2
16 | 


--------------------------------------------------------------------------------
/models/coco/VGG_CNN_M_1024/fast_rcnn/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/coco/VGG_CNN_M_1024/fast_rcnn/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 200000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | # function
12 | snapshot: 0
13 | # We still use the snapshot prefix, though
14 | snapshot_prefix: "vgg_cnn_m_1024_fast_rcnn"
15 | #debug_info: true
16 | 


--------------------------------------------------------------------------------
/models/coco/VGG_CNN_M_1024/fast_rcnn/test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_CNN_M_1024"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 224
  7 |   dim: 224
  8 | }
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 1 # to be changed on-the-fly to num ROIs
 12 |   dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   param {
 20 |     lr_mult: 0
 21 |     decay_mult: 0
 22 |   }
 23 |   param {
 24 |     lr_mult: 0
 25 |     decay_mult: 0
 26 |   }
 27 |   convolution_param {
 28 |     num_output: 96
 29 |     kernel_size: 7
 30 |     stride: 2
 31 |   }
 32 | }
 33 | layer {
 34 |   name: "relu1"
 35 |   type: "ReLU"
 36 |   bottom: "conv1"
 37 |   top: "conv1"
 38 | }
 39 | layer {
 40 |   name: "norm1"
 41 |   type: "LRN"
 42 |   bottom: "conv1"
 43 |   top: "norm1"
 44 |   lrn_param {
 45 |     local_size: 5
 46 |     alpha: 0.0005
 47 |     beta: 0.75
 48 |     k: 2
 49 |   }
 50 | }
 51 | layer {
 52 |   name: "pool1"
 53 |   type: "Pooling"
 54 |   bottom: "norm1"
 55 |   top: "pool1"
 56 |   pooling_param {
 57 |     pool: MAX
 58 |     kernel_size: 3
 59 |     stride: 2
 60 |   }
 61 | }
 62 | layer {
 63 |   name: "conv2"
 64 |   type: "Convolution"
 65 |   bottom: "pool1"
 66 |   top: "conv2"
 67 |   param {
 68 |     lr_mult: 1
 69 |     decay_mult: 1
 70 |   }
 71 |   param {
 72 |     lr_mult: 2
 73 |     decay_mult: 0
 74 |   }
 75 |   convolution_param {
 76 |     num_output: 256
 77 |     pad: 1
 78 |     kernel_size: 5
 79 |     stride: 2
 80 |   }
 81 | }
 82 | layer {
 83 |   name: "relu2"
 84 |   type: "ReLU"
 85 |   bottom: "conv2"
 86 |   top: "conv2"
 87 | }
 88 | layer {
 89 |   name: "norm2"
 90 |   type: "LRN"
 91 |   bottom: "conv2"
 92 |   top: "norm2"
 93 |   lrn_param {
 94 |     local_size: 5
 95 |     alpha: 0.0005
 96 |     beta: 0.75
 97 |     k: 2
 98 |   }
 99 | }
100 | layer {
101 |   name: "pool2"
102 |   type: "Pooling"
103 |   bottom: "norm2"
104 |   top: "pool2"
105 |   pooling_param {
106 |     pool: MAX
107 |     kernel_size: 3
108 |     stride: 2
109 |   }
110 | }
111 | layer {
112 |   name: "conv3"
113 |   type: "Convolution"
114 |   bottom: "pool2"
115 |   top: "conv3"
116 |   param {
117 |     lr_mult: 1
118 |     decay_mult: 1
119 |   }
120 |   param {
121 |     lr_mult: 2
122 |     decay_mult: 0
123 |   }
124 |   convolution_param {
125 |     num_output: 512
126 |     pad: 1
127 |     kernel_size: 3
128 |   }
129 | }
130 | layer {
131 |   name: "relu3"
132 |   type: "ReLU"
133 |   bottom: "conv3"
134 |   top: "conv3"
135 | }
136 | layer {
137 |   name: "conv4"
138 |   type: "Convolution"
139 |   bottom: "conv3"
140 |   top: "conv4"
141 |   param {
142 |     lr_mult: 1
143 |     decay_mult: 1
144 |   }
145 |   param {
146 |     lr_mult: 2
147 |     decay_mult: 0
148 |   }
149 |   convolution_param {
150 |     num_output: 512
151 |     pad: 1
152 |     kernel_size: 3
153 |   }
154 | }
155 | layer {
156 |   name: "relu4"
157 |   type: "ReLU"
158 |   bottom: "conv4"
159 |   top: "conv4"
160 | }
161 | layer {
162 |   name: "conv5"
163 |   type: "Convolution"
164 |   bottom: "conv4"
165 |   top: "conv5"
166 |   param {
167 |     lr_mult: 1
168 |     decay_mult: 1
169 |   }
170 |   param {
171 |     lr_mult: 2
172 |     decay_mult: 0
173 |   }
174 |   convolution_param {
175 |     num_output: 512
176 |     pad: 1
177 |     kernel_size: 3
178 |   }
179 | }
180 | layer {
181 |   name: "relu5"
182 |   type: "ReLU"
183 |   bottom: "conv5"
184 |   top: "conv5"
185 | }
186 | layer {
187 |   name: "roi_pool5"
188 |   type: "ROIPooling"
189 |   bottom: "conv5"
190 |   bottom: "rois"
191 |   top: "pool5"
192 |   roi_pooling_param {
193 |     pooled_w: 6
194 |     pooled_h: 6
195 |     spatial_scale: 0.0625 # 1/16
196 |   }
197 | }
198 | layer {
199 |   name: "fc6"
200 |   type: "InnerProduct"
201 |   bottom: "pool5"
202 |   top: "fc6"
203 |   param {
204 |     lr_mult: 1
205 |     decay_mult: 1
206 |   }
207 |   param {
208 |     lr_mult: 2
209 |     decay_mult: 0
210 |   }
211 |   inner_product_param {
212 |     num_output: 4096
213 |   }
214 | }
215 | layer {
216 |   name: "relu6"
217 |   type: "ReLU"
218 |   bottom: "fc6"
219 |   top: "fc6"
220 | }
221 | layer {
222 |   name: "fc7"
223 |   type: "InnerProduct"
224 |   bottom: "fc6"
225 |   top: "fc7"
226 |   param {
227 |     lr_mult: 1
228 |     decay_mult: 1
229 |   }
230 |   param {
231 |     lr_mult: 2
232 |     decay_mult: 0
233 |   }
234 |   inner_product_param {
235 |     num_output: 1024
236 |   }
237 | }
238 | layer {
239 |   name: "relu7"
240 |   type: "ReLU"
241 |   bottom: "fc7"
242 |   top: "fc7"
243 | }
244 | layer {
245 |   name: "cls_score"
246 |   type: "InnerProduct"
247 |   bottom: "fc7"
248 |   top: "cls_score"
249 |   param {
250 |     lr_mult: 1
251 |     decay_mult: 1
252 |   }
253 |   param {
254 |     lr_mult: 2
255 |     decay_mult: 0
256 |   }
257 |   inner_product_param {
258 |     num_output: 81
259 |     weight_filler {
260 |       type: "gaussian"
261 |       std: 0.01
262 |     }
263 |     bias_filler {
264 |       type: "constant"
265 |       value: 0
266 |     }
267 |   }
268 | }
269 | layer {
270 |   name: "bbox_pred"
271 |   type: "InnerProduct"
272 |   bottom: "fc7"
273 |   top: "bbox_pred"
274 |   param {
275 |     lr_mult: 1
276 |     decay_mult: 1
277 |   }
278 |   param {
279 |     lr_mult: 2
280 |     decay_mult: 0
281 |   }
282 |   inner_product_param {
283 |     num_output: 324
284 |     weight_filler {
285 |       type: "gaussian"
286 |       std: 0.001
287 |     }
288 |     bias_filler {
289 |       type: "constant"
290 |       value: 0
291 |     }
292 |   }
293 | }
294 | layer {
295 |   name: "cls_prob"
296 |   type: "Softmax"
297 |   bottom: "cls_score"
298 |   top: "cls_prob"
299 | }
300 | 


--------------------------------------------------------------------------------
/models/coco/VGG_CNN_M_1024/fast_rcnn/train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_CNN_M_1024"
  2 | layer {
  3 |   name: 'data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'rois'
  7 |   top: 'labels'
  8 |   top: 'bbox_targets'
  9 |   top: 'bbox_inside_weights'
 10 |   top: 'bbox_outside_weights'
 11 |   python_param {
 12 |     module: 'roi_data_layer.layer'
 13 |     layer: 'RoIDataLayer'
 14 |     param_str: "'num_classes': 81"
 15 |   }
 16 | }
 17 | layer {
 18 |   name: "conv1"
 19 |   type: "Convolution"
 20 |   bottom: "data"
 21 |   top: "conv1"
 22 |   param { lr_mult: 0 decay_mult: 0 }
 23 |   param { lr_mult: 0 decay_mult: 0 }
 24 |   convolution_param {
 25 |     num_output: 96
 26 |     kernel_size: 7
 27 |     stride: 2
 28 |   }
 29 | }
 30 | layer {
 31 |   name: "relu1"
 32 |   type: "ReLU"
 33 |   bottom: "conv1"
 34 |   top: "conv1"
 35 | }
 36 | layer {
 37 |   name: "norm1"
 38 |   type: "LRN"
 39 |   bottom: "conv1"
 40 |   top: "norm1"
 41 |   lrn_param {
 42 |     local_size: 5
 43 |     alpha: 0.0005
 44 |     beta: 0.75
 45 |     k: 2
 46 |   }
 47 | }
 48 | layer {
 49 |   name: "pool1"
 50 |   type: "Pooling"
 51 |   bottom: "norm1"
 52 |   top: "pool1"
 53 |   pooling_param {
 54 |     pool: MAX
 55 |     kernel_size: 3
 56 |     stride: 2
 57 |   }
 58 | }
 59 | layer {
 60 |   name: "conv2"
 61 |   type: "Convolution"
 62 |   bottom: "pool1"
 63 |   top: "conv2"
 64 |   param {
 65 |     lr_mult: 1
 66 |   }
 67 |   param {
 68 |     lr_mult: 2
 69 |   }
 70 |   convolution_param {
 71 |     num_output: 256
 72 |     pad: 1
 73 |     kernel_size: 5
 74 |     stride: 2
 75 |   }
 76 | }
 77 | layer {
 78 |   name: "relu2"
 79 |   type: "ReLU"
 80 |   bottom: "conv2"
 81 |   top: "conv2"
 82 | }
 83 | layer {
 84 |   name: "norm2"
 85 |   type: "LRN"
 86 |   bottom: "conv2"
 87 |   top: "norm2"
 88 |   lrn_param {
 89 |     local_size: 5
 90 |     alpha: 0.0005
 91 |     beta: 0.75
 92 |     k: 2
 93 |   }
 94 | }
 95 | layer {
 96 |   name: "pool2"
 97 |   type: "Pooling"
 98 |   bottom: "norm2"
 99 |   top: "pool2"
100 |   pooling_param {
101 |     pool: MAX
102 |     kernel_size: 3
103 |     stride: 2
104 |   }
105 | }
106 | layer {
107 |   name: "conv3"
108 |   type: "Convolution"
109 |   bottom: "pool2"
110 |   top: "conv3"
111 |   param {
112 |     lr_mult: 1
113 |   }
114 |   param {
115 |     lr_mult: 2
116 |   }
117 |   convolution_param {
118 |     num_output: 512
119 |     pad: 1
120 |     kernel_size: 3
121 |   }
122 | }
123 | layer {
124 |   name: "relu3"
125 |   type: "ReLU"
126 |   bottom: "conv3"
127 |   top: "conv3"
128 | }
129 | layer {
130 |   name: "conv4"
131 |   type: "Convolution"
132 |   bottom: "conv3"
133 |   top: "conv4"
134 |   param {
135 |     lr_mult: 1
136 |   }
137 |   param {
138 |     lr_mult: 2
139 |   }
140 |   convolution_param {
141 |     num_output: 512
142 |     pad: 1
143 |     kernel_size: 3
144 |   }
145 | }
146 | layer {
147 |   name: "relu4"
148 |   type: "ReLU"
149 |   bottom: "conv4"
150 |   top: "conv4"
151 | }
152 | layer {
153 |   name: "conv5"
154 |   type: "Convolution"
155 |   bottom: "conv4"
156 |   top: "conv5"
157 |   param {
158 |     lr_mult: 1
159 |   }
160 |   param {
161 |     lr_mult: 2
162 |   }
163 |   convolution_param {
164 |     num_output: 512
165 |     pad: 1
166 |     kernel_size: 3
167 |   }
168 | }
169 | layer {
170 |   name: "relu5"
171 |   type: "ReLU"
172 |   bottom: "conv5"
173 |   top: "conv5"
174 | }
175 | layer {
176 |   name: "roi_pool5"
177 |   type: "ROIPooling"
178 |   bottom: "conv5"
179 |   bottom: "rois"
180 |   top: "pool5"
181 |   roi_pooling_param {
182 |     pooled_w: 6
183 |     pooled_h: 6
184 |     spatial_scale: 0.0625 # 1/16
185 |   }
186 | }
187 | layer {
188 |   name: "fc6"
189 |   type: "InnerProduct"
190 |   bottom: "pool5"
191 |   top: "fc6"
192 |   param {
193 |     lr_mult: 1
194 |   }
195 |   param {
196 |     lr_mult: 2
197 |   }
198 |   inner_product_param {
199 |     num_output: 4096
200 |   }
201 | }
202 | layer {
203 |   name: "relu6"
204 |   type: "ReLU"
205 |   bottom: "fc6"
206 |   top: "fc6"
207 | }
208 | layer {
209 |   name: "fc7"
210 |   type: "InnerProduct"
211 |   bottom: "fc6"
212 |   top: "fc7"
213 |   param {
214 |     lr_mult: 1
215 |   }
216 |   param {
217 |     lr_mult: 2
218 |   }
219 |   inner_product_param {
220 |     num_output: 1024
221 |   }
222 | }
223 | layer {
224 |   name: "relu7"
225 |   type: "ReLU"
226 |   bottom: "fc7"
227 |   top: "fc7"
228 | }
229 | layer {
230 |   name: "cls_score"
231 |   type: "InnerProduct"
232 |   bottom: "fc7"
233 |   top: "cls_score"
234 |   param {
235 |     lr_mult: 1
236 |   }
237 |   param {
238 |     lr_mult: 2
239 |   }
240 |   inner_product_param {
241 |     num_output: 81
242 |     weight_filler {
243 |       type: "gaussian"
244 |       std: 0.01
245 |     }
246 |     bias_filler {
247 |       type: "constant"
248 |       value: 0
249 |     }
250 |   }
251 | }
252 | layer {
253 |   name: "bbox_pred"
254 |   type: "InnerProduct"
255 |   bottom: "fc7"
256 |   top: "bbox_pred"
257 |   param {
258 |     lr_mult: 1
259 |   }
260 |   param {
261 |     lr_mult: 2
262 |   }
263 |   inner_product_param {
264 |     num_output: 324
265 |     weight_filler {
266 |       type: "gaussian"
267 |       std: 0.001
268 |     }
269 |     bias_filler {
270 |       type: "constant"
271 |       value: 0
272 |     }
273 |   }
274 | }
275 | layer {
276 |   name: "loss_cls"
277 |   type: "SoftmaxWithLoss"
278 |   bottom: "cls_score"
279 |   bottom: "labels"
280 |   top: "loss_cls"
281 |   loss_weight: 1
282 | }
283 | layer {
284 |   name: "loss_bbox"
285 |   type: "SmoothL1Loss"
286 |   bottom: "bbox_pred"
287 |   bottom: "bbox_targets"
288 |   bottom: "bbox_inside_weights"
289 |   bottom: "bbox_outside_weights"
290 |   top: "loss_bbox"
291 |   loss_weight: 1
292 | }
293 | 


--------------------------------------------------------------------------------
/models/coco/VGG_CNN_M_1024/faster_rcnn_end2end/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/coco/VGG_CNN_M_1024/faster_rcnn_end2end/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 350000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | # function
12 | snapshot: 0
13 | # We still use the snapshot prefix, though
14 | snapshot_prefix: "vgg_cnn_m_1024_faster_rcnn"
15 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG16/fast_rcnn/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/VGG16/fast_rcnn/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | # iter_size: 1
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "vgg16_fast_rcnn"
16 | #debug_info: true
17 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG16/faster_rcnn_alt_opt/rpn_test.pt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_16_layers"
  2 | 
  3 | input: "data"
  4 | input_shape {
  5 |   dim: 1
  6 |   dim: 3
  7 |   dim: 224
  8 |   dim: 224
  9 | }
 10 | 
 11 | input: "im_info"
 12 | input_shape {
 13 |   dim: 1
 14 |   dim: 3
 15 | }
 16 | 
 17 | layer {
 18 |   name: "conv1_1"
 19 |   type: "Convolution"
 20 |   bottom: "data"
 21 |   top: "conv1_1"
 22 |   convolution_param {
 23 |     num_output: 64
 24 |     pad: 1 kernel_size: 3
 25 |   }
 26 | }
 27 | layer {
 28 |   name: "relu1_1"
 29 |   type: "ReLU"
 30 |   bottom: "conv1_1"
 31 |   top: "conv1_1"
 32 | }
 33 | layer {
 34 |   name: "conv1_2"
 35 |   type: "Convolution"
 36 |   bottom: "conv1_1"
 37 |   top: "conv1_2"
 38 |   convolution_param {
 39 |     num_output: 64
 40 |     pad: 1 kernel_size: 3
 41 |   }
 42 | }
 43 | layer {
 44 |   name: "relu1_2"
 45 |   type: "ReLU"
 46 |   bottom: "conv1_2"
 47 |   top: "conv1_2"
 48 | }
 49 | layer {
 50 |   name: "pool1"
 51 |   type: "Pooling"
 52 |   bottom: "conv1_2"
 53 |   top: "pool1"
 54 |   pooling_param {
 55 |     pool: MAX
 56 |     kernel_size: 2 stride: 2
 57 |   }
 58 | }
 59 | layer {
 60 |   name: "conv2_1"
 61 |   type: "Convolution"
 62 |   bottom: "pool1"
 63 |   top: "conv2_1"
 64 |   convolution_param {
 65 |     num_output: 128
 66 |     pad: 1 kernel_size: 3
 67 |   }
 68 | }
 69 | layer {
 70 |   name: "relu2_1"
 71 |   type: "ReLU"
 72 |   bottom: "conv2_1"
 73 |   top: "conv2_1"
 74 | }
 75 | layer {
 76 |   name: "conv2_2"
 77 |   type: "Convolution"
 78 |   bottom: "conv2_1"
 79 |   top: "conv2_2"
 80 |   convolution_param {
 81 |     num_output: 128
 82 |     pad: 1 kernel_size: 3
 83 |   }
 84 | }
 85 | layer {
 86 |   name: "relu2_2"
 87 |   type: "ReLU"
 88 |   bottom: "conv2_2"
 89 |   top: "conv2_2"
 90 | }
 91 | layer {
 92 |   name: "pool2"
 93 |   type: "Pooling"
 94 |   bottom: "conv2_2"
 95 |   top: "pool2"
 96 |   pooling_param {
 97 |     pool: MAX
 98 |     kernel_size: 2 stride: 2
 99 |   }
100 | }
101 | layer {
102 |   name: "conv3_1"
103 |   type: "Convolution"
104 |   bottom: "pool2"
105 |   top: "conv3_1"
106 |   convolution_param {
107 |     num_output: 256
108 |     pad: 1 kernel_size: 3
109 |   }
110 | }
111 | layer {
112 |   name: "relu3_1"
113 |   type: "ReLU"
114 |   bottom: "conv3_1"
115 |   top: "conv3_1"
116 | }
117 | layer {
118 |   name: "conv3_2"
119 |   type: "Convolution"
120 |   bottom: "conv3_1"
121 |   top: "conv3_2"
122 |   convolution_param {
123 |     num_output: 256
124 |     pad: 1 kernel_size: 3
125 |   }
126 | }
127 | layer {
128 |   name: "relu3_2"
129 |   type: "ReLU"
130 |   bottom: "conv3_2"
131 |   top: "conv3_2"
132 | }
133 | layer {
134 |   name: "conv3_3"
135 |   type: "Convolution"
136 |   bottom: "conv3_2"
137 |   top: "conv3_3"
138 |   convolution_param {
139 |     num_output: 256
140 |     pad: 1 kernel_size: 3
141 |   }
142 | }
143 | layer {
144 |   name: "relu3_3"
145 |   type: "ReLU"
146 |   bottom: "conv3_3"
147 |   top: "conv3_3"
148 | }
149 | layer {
150 |   name: "pool3"
151 |   type: "Pooling"
152 |   bottom: "conv3_3"
153 |   top: "pool3"
154 |   pooling_param {
155 |     pool: MAX
156 |     kernel_size: 2 stride: 2
157 |   }
158 | }
159 | layer {
160 |   name: "conv4_1"
161 |   type: "Convolution"
162 |   bottom: "pool3"
163 |   top: "conv4_1"
164 |   convolution_param {
165 |     num_output: 512
166 |     pad: 1 kernel_size: 3
167 |   }
168 | }
169 | layer {
170 |   name: "relu4_1"
171 |   type: "ReLU"
172 |   bottom: "conv4_1"
173 |   top: "conv4_1"
174 | }
175 | layer {
176 |   name: "conv4_2"
177 |   type: "Convolution"
178 |   bottom: "conv4_1"
179 |   top: "conv4_2"
180 |   convolution_param {
181 |     num_output: 512
182 |     pad: 1 kernel_size: 3
183 |   }
184 | }
185 | layer {
186 |   name: "relu4_2"
187 |   type: "ReLU"
188 |   bottom: "conv4_2"
189 |   top: "conv4_2"
190 | }
191 | layer {
192 |   name: "conv4_3"
193 |   type: "Convolution"
194 |   bottom: "conv4_2"
195 |   top: "conv4_3"
196 |   convolution_param {
197 |     num_output: 512
198 |     pad: 1 kernel_size: 3
199 |   }
200 | }
201 | layer {
202 |   name: "relu4_3"
203 |   type: "ReLU"
204 |   bottom: "conv4_3"
205 |   top: "conv4_3"
206 | }
207 | layer {
208 |   name: "pool4"
209 |   type: "Pooling"
210 |   bottom: "conv4_3"
211 |   top: "pool4"
212 |   pooling_param {
213 |     pool: MAX
214 |     kernel_size: 2 stride: 2
215 |   }
216 | }
217 | layer {
218 |   name: "conv5_1"
219 |   type: "Convolution"
220 |   bottom: "pool4"
221 |   top: "conv5_1"
222 |   convolution_param {
223 |     num_output: 512
224 |     pad: 1 kernel_size: 3
225 |   }
226 | }
227 | layer {
228 |   name: "relu5_1"
229 |   type: "ReLU"
230 |   bottom: "conv5_1"
231 |   top: "conv5_1"
232 | }
233 | layer {
234 |   name: "conv5_2"
235 |   type: "Convolution"
236 |   bottom: "conv5_1"
237 |   top: "conv5_2"
238 |   convolution_param {
239 |     num_output: 512
240 |     pad: 1 kernel_size: 3
241 |   }
242 | }
243 | layer {
244 |   name: "relu5_2"
245 |   type: "ReLU"
246 |   bottom: "conv5_2"
247 |   top: "conv5_2"
248 | }
249 | layer {
250 |   name: "conv5_3"
251 |   type: "Convolution"
252 |   bottom: "conv5_2"
253 |   top: "conv5_3"
254 |   convolution_param {
255 |     num_output: 512
256 |     pad: 1 kernel_size: 3
257 |   }
258 | }
259 | layer {
260 |   name: "relu5_3"
261 |   type: "ReLU"
262 |   bottom: "conv5_3"
263 |   top: "conv5_3"
264 | }
265 | 
266 | #========= RPN ============
267 | 
268 | layer {
269 |   name: "rpn_conv/3x3"
270 |   type: "Convolution"
271 |   bottom: "conv5_3"
272 |   top: "rpn/output"
273 |   convolution_param {
274 |     num_output: 512
275 |     kernel_size: 3 pad: 1 stride: 1
276 |   }
277 | }
278 | layer {
279 |   name: "rpn_relu/3x3"
280 |   type: "ReLU"
281 |   bottom: "rpn/output"
282 |   top: "rpn/output"
283 | }
284 | 
285 | layer {
286 |   name: "rpn_cls_score"
287 |   type: "Convolution"
288 |   bottom: "rpn/output"
289 |   top: "rpn_cls_score"
290 |   convolution_param {
291 |     num_output: 18   # 2(bg/fg) * 9(anchors)
292 |     kernel_size: 1 pad: 0 stride: 1
293 |   }
294 | }
295 | layer {
296 |   name: "rpn_bbox_pred"
297 |   type: "Convolution"
298 |   bottom: "rpn/output"
299 |   top: "rpn_bbox_pred"
300 |   convolution_param {
301 |     num_output: 36   # 4 * 9(anchors)
302 |     kernel_size: 1 pad: 0 stride: 1
303 |   }
304 | }
305 | layer {
306 |    bottom: "rpn_cls_score"
307 |    top: "rpn_cls_score_reshape"
308 |    name: "rpn_cls_score_reshape"
309 |    type: "Reshape"
310 |    reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
311 | }
312 | 
313 | #========= RoI Proposal ============
314 | 
315 | layer {
316 |   name: "rpn_cls_prob"
317 |   type: "Softmax"
318 |   bottom: "rpn_cls_score_reshape"
319 |   top: "rpn_cls_prob"
320 | }
321 | layer {
322 |   name: 'rpn_cls_prob_reshape'
323 |   type: 'Reshape'
324 |   bottom: 'rpn_cls_prob'
325 |   top: 'rpn_cls_prob_reshape'
326 |   reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
327 | }
328 | layer {
329 |   name: 'proposal'
330 |   type: 'Python'
331 |   bottom: 'rpn_cls_prob_reshape'
332 |   bottom: 'rpn_bbox_pred'
333 |   bottom: 'im_info'
334 |   top: 'rois'
335 |   top: 'scores'
336 |   python_param {
337 |     module: 'rpn.proposal_layer'
338 |     layer: 'ProposalLayer'
339 |     param_str: "'feat_stride': 16"
340 |   }
341 | }
342 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage1_fast_rcnn_solver30k40k.pt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "step"
 5 | gamma: 0.1
 6 | stepsize: 30000
 7 | display: 20
 8 | average_loss: 100
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | 
12 | # We disable standard caffe solver snapshotting and implement our own snapshot
13 | # function
14 | snapshot: 0
15 | # We still use the snapshot prefix, though
16 | snapshot_prefix: "vgg16_fast_rcnn"
17 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage1_rpn_solver60k80k.pt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage1_rpn_train.pt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "step"
 5 | gamma: 0.1
 6 | stepsize: 60000
 7 | display: 20
 8 | average_loss: 100
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | 
12 | # We disable standard caffe solver snapshotting and implement our own snapshot
13 | # function
14 | snapshot: 0
15 | # We still use the snapshot prefix, though
16 | snapshot_prefix: "vgg16_rpn"
17 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage2_fast_rcnn_solver30k40k.pt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "step"
 5 | gamma: 0.1
 6 | stepsize: 30000
 7 | display: 20
 8 | average_loss: 100
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | 
12 | # We disable standard caffe solver snapshotting and implement our own snapshot
13 | # function
14 | snapshot: 0
15 | # We still use the snapshot prefix, though
16 | snapshot_prefix: "vgg16_fast_rcnn"
17 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage2_rpn_solver60k80k.pt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage2_rpn_train.pt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "step"
 5 | gamma: 0.1
 6 | stepsize: 60000
 7 | display: 20
 8 | average_loss: 100
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | 
12 | # We disable standard caffe solver snapshotting and implement our own snapshot
13 | # function
14 | snapshot: 0
15 | # We still use the snapshot prefix, though
16 | snapshot_prefix: "vgg16_rpn"
17 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG16/faster_rcnn_end2end/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/VGG16/faster_rcnn_end2end/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 50000
 6 | display: 20
 7 | average_loss: 100
 8 | # iter_size: 1
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "vgg16_faster_rcnn"
16 | iter_size: 2
17 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG_CNN_M_1024/fast_rcnn/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/VGG_CNN_M_1024/fast_rcnn/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | # function
12 | snapshot: 0
13 | # We still use the snapshot prefix, though
14 | snapshot_prefix: "vgg_cnn_m_1024_fast_rcnn"
15 | #debug_info: true
16 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG_CNN_M_1024/fast_rcnn/test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_CNN_M_1024"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 224
  7 |   dim: 224
  8 | }
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 1 # to be changed on-the-fly to num ROIs
 12 |   dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   param {
 20 |     lr_mult: 0
 21 |     decay_mult: 0
 22 |   }
 23 |   param {
 24 |     lr_mult: 0
 25 |     decay_mult: 0
 26 |   }
 27 |   convolution_param {
 28 |     num_output: 96
 29 |     kernel_size: 7
 30 |     stride: 2
 31 |   }
 32 | }
 33 | layer {
 34 |   name: "relu1"
 35 |   type: "ReLU"
 36 |   bottom: "conv1"
 37 |   top: "conv1"
 38 | }
 39 | layer {
 40 |   name: "norm1"
 41 |   type: "LRN"
 42 |   bottom: "conv1"
 43 |   top: "norm1"
 44 |   lrn_param {
 45 |     local_size: 5
 46 |     alpha: 0.0005
 47 |     beta: 0.75
 48 |     k: 2
 49 |   }
 50 | }
 51 | layer {
 52 |   name: "pool1"
 53 |   type: "Pooling"
 54 |   bottom: "norm1"
 55 |   top: "pool1"
 56 |   pooling_param {
 57 |     pool: MAX
 58 |     kernel_size: 3
 59 |     stride: 2
 60 |   }
 61 | }
 62 | layer {
 63 |   name: "conv2"
 64 |   type: "Convolution"
 65 |   bottom: "pool1"
 66 |   top: "conv2"
 67 |   param {
 68 |     lr_mult: 1
 69 |     decay_mult: 1
 70 |   }
 71 |   param {
 72 |     lr_mult: 2
 73 |     decay_mult: 0
 74 |   }
 75 |   convolution_param {
 76 |     num_output: 256
 77 |     pad: 1
 78 |     kernel_size: 5
 79 |     stride: 2
 80 |   }
 81 | }
 82 | layer {
 83 |   name: "relu2"
 84 |   type: "ReLU"
 85 |   bottom: "conv2"
 86 |   top: "conv2"
 87 | }
 88 | layer {
 89 |   name: "norm2"
 90 |   type: "LRN"
 91 |   bottom: "conv2"
 92 |   top: "norm2"
 93 |   lrn_param {
 94 |     local_size: 5
 95 |     alpha: 0.0005
 96 |     beta: 0.75
 97 |     k: 2
 98 |   }
 99 | }
100 | layer {
101 |   name: "pool2"
102 |   type: "Pooling"
103 |   bottom: "norm2"
104 |   top: "pool2"
105 |   pooling_param {
106 |     pool: MAX
107 |     kernel_size: 3
108 |     stride: 2
109 |   }
110 | }
111 | layer {
112 |   name: "conv3"
113 |   type: "Convolution"
114 |   bottom: "pool2"
115 |   top: "conv3"
116 |   param {
117 |     lr_mult: 1
118 |     decay_mult: 1
119 |   }
120 |   param {
121 |     lr_mult: 2
122 |     decay_mult: 0
123 |   }
124 |   convolution_param {
125 |     num_output: 512
126 |     pad: 1
127 |     kernel_size: 3
128 |   }
129 | }
130 | layer {
131 |   name: "relu3"
132 |   type: "ReLU"
133 |   bottom: "conv3"
134 |   top: "conv3"
135 | }
136 | layer {
137 |   name: "conv4"
138 |   type: "Convolution"
139 |   bottom: "conv3"
140 |   top: "conv4"
141 |   param {
142 |     lr_mult: 1
143 |     decay_mult: 1
144 |   }
145 |   param {
146 |     lr_mult: 2
147 |     decay_mult: 0
148 |   }
149 |   convolution_param {
150 |     num_output: 512
151 |     pad: 1
152 |     kernel_size: 3
153 |   }
154 | }
155 | layer {
156 |   name: "relu4"
157 |   type: "ReLU"
158 |   bottom: "conv4"
159 |   top: "conv4"
160 | }
161 | layer {
162 |   name: "conv5"
163 |   type: "Convolution"
164 |   bottom: "conv4"
165 |   top: "conv5"
166 |   param {
167 |     lr_mult: 1
168 |     decay_mult: 1
169 |   }
170 |   param {
171 |     lr_mult: 2
172 |     decay_mult: 0
173 |   }
174 |   convolution_param {
175 |     num_output: 512
176 |     pad: 1
177 |     kernel_size: 3
178 |   }
179 | }
180 | layer {
181 |   name: "relu5"
182 |   type: "ReLU"
183 |   bottom: "conv5"
184 |   top: "conv5"
185 | }
186 | layer {
187 |   name: "roi_pool5"
188 |   type: "ROIPooling"
189 |   bottom: "conv5"
190 |   bottom: "rois"
191 |   top: "pool5"
192 |   roi_pooling_param {
193 |     pooled_w: 6
194 |     pooled_h: 6
195 |     spatial_scale: 0.0625 # 1/16
196 |   }
197 | }
198 | layer {
199 |   name: "fc6"
200 |   type: "InnerProduct"
201 |   bottom: "pool5"
202 |   top: "fc6"
203 |   param {
204 |     lr_mult: 1
205 |     decay_mult: 1
206 |   }
207 |   param {
208 |     lr_mult: 2
209 |     decay_mult: 0
210 |   }
211 |   inner_product_param {
212 |     num_output: 4096
213 |   }
214 | }
215 | layer {
216 |   name: "relu6"
217 |   type: "ReLU"
218 |   bottom: "fc6"
219 |   top: "fc6"
220 | }
221 | layer {
222 |   name: "drop6"
223 |   type: "Dropout"
224 |   bottom: "fc6"
225 |   top: "fc6"
226 |   dropout_param {
227 |     dropout_ratio: 0.5
228 |   }
229 | }
230 | layer {
231 |   name: "fc7"
232 |   type: "InnerProduct"
233 |   bottom: "fc6"
234 |   top: "fc7"
235 |   param {
236 |     lr_mult: 1
237 |     decay_mult: 1
238 |   }
239 |   param {
240 |     lr_mult: 2
241 |     decay_mult: 0
242 |   }
243 |   inner_product_param {
244 |     num_output: 1024
245 |   }
246 | }
247 | layer {
248 |   name: "relu7"
249 |   type: "ReLU"
250 |   bottom: "fc7"
251 |   top: "fc7"
252 | }
253 | layer {
254 |   name: "drop7"
255 |   type: "Dropout"
256 |   bottom: "fc7"
257 |   top: "fc7"
258 |   dropout_param {
259 |     dropout_ratio: 0.5
260 |   }
261 | }
262 | layer {
263 |   name: "cls_score"
264 |   type: "InnerProduct"
265 |   bottom: "fc7"
266 |   top: "cls_score"
267 |   param {
268 |     lr_mult: 1
269 |     decay_mult: 1
270 |   }
271 |   param {
272 |     lr_mult: 2
273 |     decay_mult: 0
274 |   }
275 |   inner_product_param {
276 |     num_output: 21
277 |     weight_filler {
278 |       type: "gaussian"
279 |       std: 0.01
280 |     }
281 |     bias_filler {
282 |       type: "constant"
283 |       value: 0
284 |     }
285 |   }
286 | }
287 | layer {
288 |   name: "bbox_pred"
289 |   type: "InnerProduct"
290 |   bottom: "fc7"
291 |   top: "bbox_pred"
292 |   param {
293 |     lr_mult: 1
294 |     decay_mult: 1
295 |   }
296 |   param {
297 |     lr_mult: 2
298 |     decay_mult: 0
299 |   }
300 |   inner_product_param {
301 |     num_output: 84
302 |     weight_filler {
303 |       type: "gaussian"
304 |       std: 0.001
305 |     }
306 |     bias_filler {
307 |       type: "constant"
308 |       value: 0
309 |     }
310 |   }
311 | }
312 | layer {
313 |   name: "cls_prob"
314 |   type: "Softmax"
315 |   bottom: "cls_score"
316 |   top: "cls_prob"
317 | }
318 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG_CNN_M_1024/fast_rcnn/train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_CNN_M_1024"
  2 | layer {
  3 |   name: 'data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'rois'
  7 |   top: 'labels'
  8 |   top: 'bbox_targets'
  9 |   top: 'bbox_inside_weights'
 10 |   top: 'bbox_outside_weights'
 11 |   python_param {
 12 |     module: 'roi_data_layer.layer'
 13 |     layer: 'RoIDataLayer'
 14 |     param_str: "'num_classes': 21"
 15 |   }
 16 | }
 17 | layer {
 18 |   name: "conv1"
 19 |   type: "Convolution"
 20 |   bottom: "data"
 21 |   top: "conv1"
 22 |   param { lr_mult: 0 decay_mult: 0 }
 23 |   param { lr_mult: 0 decay_mult: 0 }
 24 |   convolution_param {
 25 |     num_output: 96
 26 |     kernel_size: 7
 27 |     stride: 2
 28 |   }
 29 | }
 30 | layer {
 31 |   name: "relu1"
 32 |   type: "ReLU"
 33 |   bottom: "conv1"
 34 |   top: "conv1"
 35 | }
 36 | layer {
 37 |   name: "norm1"
 38 |   type: "LRN"
 39 |   bottom: "conv1"
 40 |   top: "norm1"
 41 |   lrn_param {
 42 |     local_size: 5
 43 |     alpha: 0.0005
 44 |     beta: 0.75
 45 |     k: 2
 46 |   }
 47 | }
 48 | layer {
 49 |   name: "pool1"
 50 |   type: "Pooling"
 51 |   bottom: "norm1"
 52 |   top: "pool1"
 53 |   pooling_param {
 54 |     pool: MAX
 55 |     kernel_size: 3
 56 |     stride: 2
 57 |   }
 58 | }
 59 | layer {
 60 |   name: "conv2"
 61 |   type: "Convolution"
 62 |   bottom: "pool1"
 63 |   top: "conv2"
 64 |   param {
 65 |     lr_mult: 1
 66 |   }
 67 |   param {
 68 |     lr_mult: 2
 69 |   }
 70 |   convolution_param {
 71 |     num_output: 256
 72 |     pad: 1
 73 |     kernel_size: 5
 74 |     stride: 2
 75 |   }
 76 | }
 77 | layer {
 78 |   name: "relu2"
 79 |   type: "ReLU"
 80 |   bottom: "conv2"
 81 |   top: "conv2"
 82 | }
 83 | layer {
 84 |   name: "norm2"
 85 |   type: "LRN"
 86 |   bottom: "conv2"
 87 |   top: "norm2"
 88 |   lrn_param {
 89 |     local_size: 5
 90 |     alpha: 0.0005
 91 |     beta: 0.75
 92 |     k: 2
 93 |   }
 94 | }
 95 | layer {
 96 |   name: "pool2"
 97 |   type: "Pooling"
 98 |   bottom: "norm2"
 99 |   top: "pool2"
100 |   pooling_param {
101 |     pool: MAX
102 |     kernel_size: 3
103 |     stride: 2
104 |   }
105 | }
106 | layer {
107 |   name: "conv3"
108 |   type: "Convolution"
109 |   bottom: "pool2"
110 |   top: "conv3"
111 |   param {
112 |     lr_mult: 1
113 |   }
114 |   param {
115 |     lr_mult: 2
116 |   }
117 |   convolution_param {
118 |     num_output: 512
119 |     pad: 1
120 |     kernel_size: 3
121 |   }
122 | }
123 | layer {
124 |   name: "relu3"
125 |   type: "ReLU"
126 |   bottom: "conv3"
127 |   top: "conv3"
128 | }
129 | layer {
130 |   name: "conv4"
131 |   type: "Convolution"
132 |   bottom: "conv3"
133 |   top: "conv4"
134 |   param {
135 |     lr_mult: 1
136 |   }
137 |   param {
138 |     lr_mult: 2
139 |   }
140 |   convolution_param {
141 |     num_output: 512
142 |     pad: 1
143 |     kernel_size: 3
144 |   }
145 | }
146 | layer {
147 |   name: "relu4"
148 |   type: "ReLU"
149 |   bottom: "conv4"
150 |   top: "conv4"
151 | }
152 | layer {
153 |   name: "conv5"
154 |   type: "Convolution"
155 |   bottom: "conv4"
156 |   top: "conv5"
157 |   param {
158 |     lr_mult: 1
159 |   }
160 |   param {
161 |     lr_mult: 2
162 |   }
163 |   convolution_param {
164 |     num_output: 512
165 |     pad: 1
166 |     kernel_size: 3
167 |   }
168 | }
169 | layer {
170 |   name: "relu5"
171 |   type: "ReLU"
172 |   bottom: "conv5"
173 |   top: "conv5"
174 | }
175 | layer {
176 |   name: "roi_pool5"
177 |   type: "ROIPooling"
178 |   bottom: "conv5"
179 |   bottom: "rois"
180 |   top: "pool5"
181 |   roi_pooling_param {
182 |     pooled_w: 6
183 |     pooled_h: 6
184 |     spatial_scale: 0.0625 # 1/16
185 |   }
186 | }
187 | layer {
188 |   name: "fc6"
189 |   type: "InnerProduct"
190 |   bottom: "pool5"
191 |   top: "fc6"
192 |   param {
193 |     lr_mult: 1
194 |   }
195 |   param {
196 |     lr_mult: 2
197 |   }
198 |   inner_product_param {
199 |     num_output: 4096
200 |   }
201 | }
202 | layer {
203 |   name: "relu6"
204 |   type: "ReLU"
205 |   bottom: "fc6"
206 |   top: "fc6"
207 | }
208 | layer {
209 |   name: "drop6"
210 |   type: "Dropout"
211 |   bottom: "fc6"
212 |   top: "fc6"
213 |   dropout_param {
214 |     dropout_ratio: 0.5
215 |   }
216 | }
217 | layer {
218 |   name: "fc7"
219 |   type: "InnerProduct"
220 |   bottom: "fc6"
221 |   top: "fc7"
222 |   param {
223 |     lr_mult: 1
224 |   }
225 |   param {
226 |     lr_mult: 2
227 |   }
228 |   inner_product_param {
229 |     num_output: 1024
230 |   }
231 | }
232 | layer {
233 |   name: "relu7"
234 |   type: "ReLU"
235 |   bottom: "fc7"
236 |   top: "fc7"
237 | }
238 | layer {
239 |   name: "drop7"
240 |   type: "Dropout"
241 |   bottom: "fc7"
242 |   top: "fc7"
243 |   dropout_param {
244 |     dropout_ratio: 0.5
245 |   }
246 | }
247 | layer {
248 |   name: "cls_score"
249 |   type: "InnerProduct"
250 |   bottom: "fc7"
251 |   top: "cls_score"
252 |   param {
253 |     lr_mult: 1
254 |   }
255 |   param {
256 |     lr_mult: 2
257 |   }
258 |   inner_product_param {
259 |     num_output: 21
260 |     weight_filler {
261 |       type: "gaussian"
262 |       std: 0.01
263 |     }
264 |     bias_filler {
265 |       type: "constant"
266 |       value: 0
267 |     }
268 |   }
269 | }
270 | layer {
271 |   name: "bbox_pred"
272 |   type: "InnerProduct"
273 |   bottom: "fc7"
274 |   top: "bbox_pred"
275 |   param {
276 |     lr_mult: 1
277 |   }
278 |   param {
279 |     lr_mult: 2
280 |   }
281 |   inner_product_param {
282 |     num_output: 84
283 |     weight_filler {
284 |       type: "gaussian"
285 |       std: 0.001
286 |     }
287 |     bias_filler {
288 |       type: "constant"
289 |       value: 0
290 |     }
291 |   }
292 | }
293 | layer {
294 |   name: "loss_cls"
295 |   type: "SoftmaxWithLoss"
296 |   bottom: "cls_score"
297 |   bottom: "labels"
298 |   top: "loss_cls"
299 |   loss_weight: 1
300 | }
301 | layer {
302 |   name: "loss_bbox"
303 |   type: "SmoothL1Loss"
304 |   bottom: "bbox_pred"
305 |   bottom: "bbox_targets"
306 |   bottom: "bbox_inside_weights"
307 |   bottom: "bbox_outside_weights"
308 |   top: "loss_bbox"
309 |   loss_weight: 1
310 | }
311 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/faster_rcnn_test.pt:
--------------------------------------------------------------------------------
  1 | name: "VGG_CNN_M_1024"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 224
  7 |   dim: 224
  8 | }
  9 | input: "im_info"
 10 | input_shape {
 11 |   dim: 1
 12 |   dim: 3
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   convolution_param {
 20 |     num_output: 96
 21 |     kernel_size: 7
 22 |     stride: 2
 23 |   }
 24 | }
 25 | layer {
 26 |   name: "relu1"
 27 |   type: "ReLU"
 28 |   bottom: "conv1"
 29 |   top: "conv1"
 30 | }
 31 | layer {
 32 |   name: "norm1"
 33 |   type: "LRN"
 34 |   bottom: "conv1"
 35 |   top: "norm1"
 36 |   lrn_param {
 37 |     local_size: 5
 38 |     alpha: 0.0005
 39 |     beta: 0.75
 40 |     k: 2
 41 |   }
 42 | }
 43 | layer {
 44 |   name: "pool1"
 45 |   type: "Pooling"
 46 |   bottom: "norm1"
 47 |   top: "pool1"
 48 |   pooling_param {
 49 |     pool: MAX
 50 |     kernel_size: 3
 51 |     stride: 2
 52 |   }
 53 | }
 54 | layer {
 55 |   name: "conv2"
 56 |   type: "Convolution"
 57 |   bottom: "pool1"
 58 |   top: "conv2"
 59 |   convolution_param {
 60 |     num_output: 256
 61 |     pad: 1
 62 |     kernel_size: 5
 63 |     stride: 2
 64 |   }
 65 | }
 66 | layer {
 67 |   name: "relu2"
 68 |   type: "ReLU"
 69 |   bottom: "conv2"
 70 |   top: "conv2"
 71 | }
 72 | layer {
 73 |   name: "norm2"
 74 |   type: "LRN"
 75 |   bottom: "conv2"
 76 |   top: "norm2"
 77 |   lrn_param {
 78 |     local_size: 5
 79 |     alpha: 0.0005
 80 |     beta: 0.75
 81 |     k: 2
 82 |   }
 83 | }
 84 | layer {
 85 |   name: "pool2"
 86 |   type: "Pooling"
 87 |   bottom: "norm2"
 88 |   top: "pool2"
 89 |   pooling_param {
 90 |     pool: MAX
 91 |     kernel_size: 3
 92 |     stride: 2
 93 |   }
 94 | }
 95 | layer {
 96 |   name: "conv3"
 97 |   type: "Convolution"
 98 |   bottom: "pool2"
 99 |   top: "conv3"
100 |   convolution_param {
101 |     num_output: 512
102 |     pad: 1
103 |     kernel_size: 3
104 |   }
105 | }
106 | layer {
107 |   name: "relu3"
108 |   type: "ReLU"
109 |   bottom: "conv3"
110 |   top: "conv3"
111 | }
112 | layer {
113 |   name: "conv4"
114 |   type: "Convolution"
115 |   bottom: "conv3"
116 |   top: "conv4"
117 |   convolution_param {
118 |     num_output: 512
119 |     pad: 1
120 |     kernel_size: 3
121 |   }
122 | }
123 | layer {
124 |   name: "relu4"
125 |   type: "ReLU"
126 |   bottom: "conv4"
127 |   top: "conv4"
128 | }
129 | layer {
130 |   name: "conv5"
131 |   type: "Convolution"
132 |   bottom: "conv4"
133 |   top: "conv5"
134 |   convolution_param {
135 |     num_output: 512
136 |     pad: 1
137 |     kernel_size: 3
138 |   }
139 | }
140 | layer {
141 |   name: "relu5"
142 |   type: "ReLU"
143 |   bottom: "conv5"
144 |   top: "conv5"
145 | }
146 | 
147 | #========= RPN ============
148 | 
149 | layer {
150 |   name: "rpn_conv/3x3"
151 |   type: "Convolution"
152 |   bottom: "conv5"
153 |   top: "rpn/output"
154 |   convolution_param {
155 |     num_output: 256
156 |     kernel_size: 3 pad: 1 stride: 1
157 |   }
158 | }
159 | layer {
160 |   name: "rpn_relu/3x3"
161 |   type: "ReLU"
162 |   bottom: "rpn/output"
163 |   top: "rpn/output"
164 | }
165 | layer {
166 |   name: "rpn_cls_score"
167 |   type: "Convolution"
168 |   bottom: "rpn/output"
169 |   top: "rpn_cls_score"
170 |   convolution_param {
171 |     num_output: 18   # 2(bg/fg) * 9(anchors)
172 |     kernel_size: 1 pad: 0 stride: 1
173 |   }
174 | }
175 | layer {
176 |   name: "rpn_bbox_pred"
177 |   type: "Convolution"
178 |   bottom: "rpn/output"
179 |   top: "rpn_bbox_pred"
180 |   convolution_param {
181 |     num_output: 36   # 4 * 9(anchors)
182 |     kernel_size: 1 pad: 0 stride: 1
183 |   }
184 | }
185 | layer {
186 |    bottom: "rpn_cls_score"
187 |    top: "rpn_cls_score_reshape"
188 |    name: "rpn_cls_score_reshape"
189 |    type: "Reshape"
190 |    reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
191 | }
192 | 
193 | #========= RoI Proposal ============
194 | 
195 | layer {
196 |   name: "rpn_cls_prob"
197 |   type: "Softmax"
198 |   bottom: "rpn_cls_score_reshape"
199 |   top: "rpn_cls_prob"
200 | }
201 | layer {
202 |   name: 'rpn_cls_prob_reshape'
203 |   type: 'Reshape'
204 |   bottom: 'rpn_cls_prob'
205 |   top: 'rpn_cls_prob_reshape'
206 |   reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
207 | }
208 | layer {
209 |   name: 'proposal'
210 |   type: 'Python'
211 |   bottom: 'rpn_cls_prob_reshape'
212 |   bottom: 'rpn_bbox_pred'
213 |   bottom: 'im_info'
214 |   top: 'rois'
215 |   python_param {
216 |     module: 'rpn.proposal_layer'
217 |     layer: 'ProposalLayer'
218 |     param_str: "'feat_stride': 16"
219 |   }
220 | }
221 | 
222 | #========= RCNN ============
223 | 
224 | layer {
225 |   name: "roi_pool5"
226 |   type: "ROIPooling"
227 |   bottom: "conv5"
228 |   bottom: "rois"
229 |   top: "pool5"
230 |   roi_pooling_param {
231 |     pooled_w: 6
232 |     pooled_h: 6
233 |     spatial_scale: 0.0625 # 1/16
234 |   }
235 | }
236 | layer {
237 |   name: "fc6"
238 |   type: "InnerProduct"
239 |   bottom: "pool5"
240 |   top: "fc6"
241 |   inner_product_param {
242 |     num_output: 4096
243 |   }
244 | }
245 | layer {
246 |   name: "relu6"
247 |   type: "ReLU"
248 |   bottom: "fc6"
249 |   top: "fc6"
250 | }
251 | layer {
252 |   name: "fc7"
253 |   type: "InnerProduct"
254 |   bottom: "fc6"
255 |   top: "fc7"
256 |   inner_product_param {
257 |     num_output: 1024
258 |   }
259 | }
260 | layer {
261 |   name: "relu7"
262 |   type: "ReLU"
263 |   bottom: "fc7"
264 |   top: "fc7"
265 | }
266 | layer {
267 |   name: "cls_score"
268 |   type: "InnerProduct"
269 |   bottom: "fc7"
270 |   top: "cls_score"
271 |   inner_product_param {
272 |     num_output: 21
273 |   }
274 | }
275 | layer {
276 |   name: "bbox_pred"
277 |   type: "InnerProduct"
278 |   bottom: "fc7"
279 |   top: "bbox_pred"
280 |   inner_product_param {
281 |     num_output: 84
282 |   }
283 | }
284 | layer {
285 |   name: "cls_prob"
286 |   type: "Softmax"
287 |   bottom: "cls_score"
288 |   top: "cls_prob"
289 | }
290 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/rpn_test.pt:
--------------------------------------------------------------------------------
  1 | name: "VGG_CNN_M_1024"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 224
  7 |   dim: 224
  8 | }
  9 | input: "im_info"
 10 | input_shape {
 11 |   dim: 1
 12 |   dim: 3
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   convolution_param {
 20 |     num_output: 96
 21 |     kernel_size: 7
 22 |     stride: 2
 23 |   }
 24 | }
 25 | layer {
 26 |   name: "relu1"
 27 |   type: "ReLU"
 28 |   bottom: "conv1"
 29 |   top: "conv1"
 30 | }
 31 | layer {
 32 |   name: "norm1"
 33 |   type: "LRN"
 34 |   bottom: "conv1"
 35 |   top: "norm1"
 36 |   lrn_param {
 37 |     local_size: 5
 38 |     alpha: 0.0005
 39 |     beta: 0.75
 40 |     k: 2
 41 |   }
 42 | }
 43 | layer {
 44 |   name: "pool1"
 45 |   type: "Pooling"
 46 |   bottom: "norm1"
 47 |   top: "pool1"
 48 |   pooling_param {
 49 |     pool: MAX
 50 |     kernel_size: 3
 51 |     stride: 2
 52 |   }
 53 | }
 54 | layer {
 55 |   name: "conv2"
 56 |   type: "Convolution"
 57 |   bottom: "pool1"
 58 |   top: "conv2"
 59 |   convolution_param {
 60 |     num_output: 256
 61 |     pad: 1
 62 |     kernel_size: 5
 63 |     stride: 2
 64 |   }
 65 | }
 66 | layer {
 67 |   name: "relu2"
 68 |   type: "ReLU"
 69 |   bottom: "conv2"
 70 |   top: "conv2"
 71 | }
 72 | layer {
 73 |   name: "norm2"
 74 |   type: "LRN"
 75 |   bottom: "conv2"
 76 |   top: "norm2"
 77 |   lrn_param {
 78 |     local_size: 5
 79 |     alpha: 0.0005
 80 |     beta: 0.75
 81 |     k: 2
 82 |   }
 83 | }
 84 | layer {
 85 |   name: "pool2"
 86 |   type: "Pooling"
 87 |   bottom: "norm2"
 88 |   top: "pool2"
 89 |   pooling_param {
 90 |     pool: MAX
 91 |     kernel_size: 3
 92 |     stride: 2
 93 |   }
 94 | }
 95 | layer {
 96 |   name: "conv3"
 97 |   type: "Convolution"
 98 |   bottom: "pool2"
 99 |   top: "conv3"
100 |   convolution_param {
101 |     num_output: 512
102 |     pad: 1
103 |     kernel_size: 3
104 |   }
105 | }
106 | layer {
107 |   name: "relu3"
108 |   type: "ReLU"
109 |   bottom: "conv3"
110 |   top: "conv3"
111 | }
112 | layer {
113 |   name: "conv4"
114 |   type: "Convolution"
115 |   bottom: "conv3"
116 |   top: "conv4"
117 |   convolution_param {
118 |     num_output: 512
119 |     pad: 1
120 |     kernel_size: 3
121 |   }
122 | }
123 | layer {
124 |   name: "relu4"
125 |   type: "ReLU"
126 |   bottom: "conv4"
127 |   top: "conv4"
128 | }
129 | layer {
130 |   name: "conv5"
131 |   type: "Convolution"
132 |   bottom: "conv4"
133 |   top: "conv5"
134 |   convolution_param {
135 |     num_output: 512
136 |     pad: 1
137 |     kernel_size: 3
138 |   }
139 | }
140 | layer {
141 |   name: "relu5"
142 |   type: "ReLU"
143 |   bottom: "conv5"
144 |   top: "conv5"
145 | }
146 | 
147 | #========= RPN ============
148 | 
149 | layer {
150 |   name: "rpn_conv/3x3"
151 |   type: "Convolution"
152 |   bottom: "conv5"
153 |   top: "rpn/output"
154 |   convolution_param {
155 |     num_output: 256
156 |     kernel_size: 3 pad: 1 stride: 1
157 |   }
158 | }
159 | layer {
160 |   name: "rpn_relu/3x3"
161 |   type: "ReLU"
162 |   bottom: "rpn/output"
163 |   top: "rpn/output"
164 | }
165 | layer {
166 |   name: "rpn_cls_score"
167 |   type: "Convolution"
168 |   bottom: "rpn/output"
169 |   top: "rpn_cls_score"
170 |   convolution_param {
171 |     num_output: 18   # 2(bg/fg) * 9(anchors)
172 |     kernel_size: 1 pad: 0 stride: 1
173 |   }
174 | }
175 | layer {
176 |   name: "rpn_bbox_pred"
177 |   type: "Convolution"
178 |   bottom: "rpn/output"
179 |   top: "rpn_bbox_pred"
180 |   convolution_param {
181 |     num_output: 36   # 4 * 9(anchors)
182 |     kernel_size: 1 pad: 0 stride: 1
183 |   }
184 | }
185 | layer {
186 |    bottom: "rpn_cls_score"
187 |    top: "rpn_cls_score_reshape"
188 |    name: "rpn_cls_score_reshape"
189 |    type: "Reshape"
190 |    reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
191 | }
192 | 
193 | #========= RoI Proposal ============
194 | 
195 | layer {
196 |   name: "rpn_cls_prob"
197 |   type: "Softmax"
198 |   bottom: "rpn_cls_score_reshape"
199 |   top: "rpn_cls_prob"
200 | }
201 | layer {
202 |   name: 'rpn_cls_prob_reshape'
203 |   type: 'Reshape'
204 |   bottom: 'rpn_cls_prob'
205 |   top: 'rpn_cls_prob_reshape'
206 |   reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
207 | }
208 | layer {
209 |   name: 'proposal'
210 |   type: 'Python'
211 |   bottom: 'rpn_cls_prob_reshape'
212 |   bottom: 'rpn_bbox_pred'
213 |   bottom: 'im_info'
214 |   top: 'rois'
215 |   top: 'scores'
216 |   python_param {
217 |     module: 'rpn.proposal_layer'
218 |     layer: 'ProposalLayer'
219 |     param_str: "'feat_stride': 16"
220 |   }
221 | }
222 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage1_fast_rcnn_solver30k40k.pt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "step"
 5 | gamma: 0.1
 6 | stepsize: 30000
 7 | display: 20
 8 | average_loss: 100
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | 
12 | # We disable standard caffe solver snapshotting and implement our own snapshot
13 | # function
14 | snapshot: 0
15 | # We still use the snapshot prefix, though
16 | snapshot_prefix: "vgg_cnn_m_1024_fast_rcnn"
17 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage1_rpn_solver60k80k.pt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage1_rpn_train.pt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "step"
 5 | gamma: 0.1
 6 | stepsize: 60000
 7 | display: 20
 8 | average_loss: 100
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | 
12 | # We disable standard caffe solver snapshotting and implement our own snapshot
13 | # function
14 | snapshot: 0
15 | # We still use the snapshot prefix, though
16 | snapshot_prefix: "vgg_cnn_m_1024_rpn"
17 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage1_rpn_train.pt:
--------------------------------------------------------------------------------
  1 | name: "VGG_CNN_M_1024"
  2 | layer {
  3 |   name: 'input-data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'im_info'
  7 |   top: 'gt_boxes'
  8 |   python_param {
  9 |     module: 'roi_data_layer.layer'
 10 |     layer: 'RoIDataLayer'
 11 |     param_str: "'num_classes': 21"
 12 |   }
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   param { lr_mult: 0 decay_mult: 0 }
 20 |   param { lr_mult: 0 decay_mult: 0 }
 21 |   convolution_param {
 22 |     num_output: 96
 23 |     kernel_size: 7 stride: 2
 24 |   }
 25 | }
 26 | layer {
 27 |   name: "relu1"
 28 |   type: "ReLU"
 29 |   bottom: "conv1"
 30 |   top: "conv1"
 31 | }
 32 | layer {
 33 |   name: "norm1"
 34 |   type: "LRN"
 35 |   bottom: "conv1"
 36 |   top: "norm1"
 37 |   lrn_param {
 38 |     local_size: 5
 39 |     alpha: 0.0005
 40 |     beta: 0.75
 41 |     k: 2
 42 |   }
 43 | }
 44 | layer {
 45 |   name: "pool1"
 46 |   type: "Pooling"
 47 |   bottom: "norm1"
 48 |   top: "pool1"
 49 |   pooling_param {
 50 |     pool: MAX
 51 |     kernel_size: 3 stride: 2
 52 |   }
 53 | }
 54 | layer {
 55 |   name: "conv2"
 56 |   type: "Convolution"
 57 |   bottom: "pool1"
 58 |   top: "conv2"
 59 |   param { lr_mult: 1 }
 60 |   param { lr_mult: 2 }
 61 |   convolution_param {
 62 |     num_output: 256
 63 |     pad: 1 kernel_size: 5 stride: 2
 64 |   }
 65 | }
 66 | layer {
 67 |   name: "relu2"
 68 |   type: "ReLU"
 69 |   bottom: "conv2"
 70 |   top: "conv2"
 71 | }
 72 | layer {
 73 |   name: "norm2"
 74 |   type: "LRN"
 75 |   bottom: "conv2"
 76 |   top: "norm2"
 77 |   lrn_param {
 78 |     local_size: 5
 79 |     alpha: 0.0005
 80 |     beta: 0.75
 81 |     k: 2
 82 |   }
 83 | }
 84 | layer {
 85 |   name: "pool2"
 86 |   type: "Pooling"
 87 |   bottom: "norm2"
 88 |   top: "pool2"
 89 |   pooling_param {
 90 |     pool: MAX
 91 |     kernel_size: 3 stride: 2
 92 |   }
 93 | }
 94 | layer {
 95 |   name: "conv3"
 96 |   type: "Convolution"
 97 |   bottom: "pool2"
 98 |   top: "conv3"
 99 |   param { lr_mult: 1 }
100 |   param { lr_mult: 2 }
101 |   convolution_param {
102 |     num_output: 512
103 |     pad: 1 kernel_size: 3
104 |   }
105 | }
106 | layer {
107 |   name: "relu3"
108 |   type: "ReLU"
109 |   bottom: "conv3"
110 |   top: "conv3"
111 | }
112 | layer {
113 |   name: "conv4"
114 |   type: "Convolution"
115 |   bottom: "conv3"
116 |   top: "conv4"
117 |   param { lr_mult: 1 }
118 |   param { lr_mult: 2 }
119 |   convolution_param {
120 |     num_output: 512
121 |     pad: 1 kernel_size: 3
122 |   }
123 | }
124 | layer {
125 |   name: "relu4"
126 |   type: "ReLU"
127 |   bottom: "conv4"
128 |   top: "conv4"
129 | }
130 | layer {
131 |   name: "conv5"
132 |   type: "Convolution"
133 |   bottom: "conv4"
134 |   top: "conv5"
135 |   param { lr_mult: 1 }
136 |   param { lr_mult: 2 }
137 |   convolution_param {
138 |     num_output: 512
139 |     pad: 1 kernel_size: 3
140 |   }
141 | }
142 | layer {
143 |   name: "relu5"
144 |   type: "ReLU"
145 |   bottom: "conv5"
146 |   top: "conv5"
147 | }
148 | 
149 | #========= RPN ============
150 | 
151 | layer {
152 |   name: "rpn_conv/3x3"
153 |   type: "Convolution"
154 |   bottom: "conv5"
155 |   top: "rpn/output"
156 |   param { lr_mult: 1.0 }
157 |   param { lr_mult: 2.0 }
158 |   convolution_param {
159 |     num_output: 256
160 |     kernel_size: 3 pad: 1 stride: 1
161 |     weight_filler { type: "gaussian" std: 0.01 }
162 |     bias_filler { type: "constant" value: 0 }
163 |   }
164 | }
165 | layer {
166 |   name: "rpn_relu/3x3"
167 |   type: "ReLU"
168 |   bottom: "rpn/output"
169 |   top: "rpn/output"
170 | }
171 | layer {
172 |   name: "rpn_cls_score"
173 |   type: "Convolution"
174 |   bottom: "rpn/output"
175 |   top: "rpn_cls_score"
176 |   param { lr_mult: 1.0 }
177 |   param { lr_mult: 2.0 }
178 |   convolution_param {
179 |     num_output: 18   # 2(bg/fg) * 9(anchors)
180 |     kernel_size: 1 pad: 0 stride: 1
181 |     weight_filler { type: "gaussian" std: 0.01 }
182 |     bias_filler { type: "constant" value: 0 }
183 |   }
184 | }
185 | layer {
186 |   name: "rpn_bbox_pred"
187 |   type: "Convolution"
188 |   bottom: "rpn/output"
189 |   top: "rpn_bbox_pred"
190 |   param { lr_mult: 1.0 }
191 |   param { lr_mult: 2.0 }
192 |   convolution_param {
193 |     num_output: 36   # 4 * 9(anchors)
194 |     kernel_size: 1 pad: 0 stride: 1
195 |     weight_filler { type: "gaussian" std: 0.01 }
196 |     bias_filler { type: "constant" value: 0 }
197 |   }
198 | }
199 | layer {
200 |    bottom: "rpn_cls_score"
201 |    top: "rpn_cls_score_reshape"
202 |    name: "rpn_cls_score_reshape"
203 |    type: "Reshape"
204 |    reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
205 | }
206 | layer {
207 |   name: 'rpn-data'
208 |   type: 'Python'
209 |   bottom: 'rpn_cls_score'
210 |   bottom: 'gt_boxes'
211 |   bottom: 'im_info'
212 |   bottom: 'data'
213 |   top: 'rpn_labels'
214 |   top: 'rpn_bbox_targets'
215 |   top: 'rpn_bbox_inside_weights'
216 |   top: 'rpn_bbox_outside_weights'
217 |   python_param {
218 |     module: 'rpn.anchor_target_layer'
219 |     layer: 'AnchorTargetLayer'
220 |     param_str: "'feat_stride': 16"
221 |   }
222 | }
223 | layer {
224 |   name: "rpn_loss_cls"
225 |   type: "SoftmaxWithLoss"
226 |   bottom: "rpn_cls_score_reshape"
227 |   bottom: "rpn_labels"
228 |   propagate_down: 1
229 |   propagate_down: 0
230 |   top: "rpn_cls_loss"
231 |   loss_weight: 1
232 |   loss_param {
233 |     ignore_label: -1
234 |     normalize: true
235 |   }
236 | }
237 | layer {
238 |   name: "rpn_loss_bbox"
239 |   type: "SmoothL1Loss"
240 |   bottom: "rpn_bbox_pred"
241 |   bottom: "rpn_bbox_targets"
242 |   bottom: 'rpn_bbox_inside_weights'
243 |   bottom: 'rpn_bbox_outside_weights'
244 |   top: "rpn_loss_bbox"
245 |   loss_weight: 1
246 |   smooth_l1_loss_param { sigma: 3.0 }
247 | }
248 | 
249 | #========= RCNN ============
250 | 
251 | layer {
252 |   name: "dummy_roi_pool_conv5"
253 |   type: "DummyData"
254 |   top: "dummy_roi_pool_conv5"
255 |   dummy_data_param {
256 |     shape { dim: 1 dim: 18432 }
257 |     data_filler { type: "gaussian" std: 0.01 }
258 |   }
259 | }
260 | layer {
261 |   name: "fc6"
262 |   type: "InnerProduct"
263 |   bottom: "dummy_roi_pool_conv5"
264 |   top: "fc6"
265 |   param { lr_mult: 0 decay_mult: 0 }
266 |   param { lr_mult: 0 decay_mult: 0 }
267 |   inner_product_param {
268 |     num_output: 4096
269 |   }
270 | }
271 | layer {
272 |   name: "fc7"
273 |   type: "InnerProduct"
274 |   bottom: "fc6"
275 |   top: "fc7"
276 |   param { lr_mult: 0 decay_mult: 0 }
277 |   param { lr_mult: 0 decay_mult: 0 }
278 |   inner_product_param {
279 |     num_output: 1024
280 |   }
281 | }
282 | layer {
283 |   name: "silence_fc7"
284 |   type: "Silence"
285 |   bottom: "fc7"
286 | }
287 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage2_fast_rcnn_solver30k40k.pt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "step"
 5 | gamma: 0.1
 6 | stepsize: 30000
 7 | display: 20
 8 | average_loss: 100
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | 
12 | # We disable standard caffe solver snapshotting and implement our own snapshot
13 | # function
14 | snapshot: 0
15 | # We still use the snapshot prefix, though
16 | snapshot_prefix: "vgg_cnn_m_1024_fast_rcnn"
17 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage2_rpn_solver60k80k.pt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage2_rpn_train.pt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "step"
 5 | gamma: 0.1
 6 | stepsize: 60000
 7 | display: 20
 8 | average_loss: 100
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | 
12 | # We disable standard caffe solver snapshotting and implement our own snapshot
13 | # function
14 | snapshot: 0
15 | # We still use the snapshot prefix, though
16 | snapshot_prefix: "vgg_cnn_m_1024_rpn"
17 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage2_rpn_train.pt:
--------------------------------------------------------------------------------
  1 | name: "VGG_CNN_M_1024"
  2 | layer {
  3 |   name: 'input-data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'im_info'
  7 |   top: 'gt_boxes'
  8 |   python_param {
  9 |     module: 'roi_data_layer.layer'
 10 |     layer: 'RoIDataLayer'
 11 |     param_str: "'num_classes': 21"
 12 |   }
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   param { lr_mult: 0 decay_mult: 0 }
 20 |   param { lr_mult: 0 decay_mult: 0 }
 21 |   convolution_param {
 22 |     num_output: 96
 23 |     kernel_size: 7 stride: 2
 24 |   }
 25 | }
 26 | layer {
 27 |   name: "relu1"
 28 |   type: "ReLU"
 29 |   bottom: "conv1"
 30 |   top: "conv1"
 31 | }
 32 | layer {
 33 |   name: "norm1"
 34 |   type: "LRN"
 35 |   bottom: "conv1"
 36 |   top: "norm1"
 37 |   lrn_param {
 38 |     local_size: 5
 39 |     alpha: 0.0005
 40 |     beta: 0.75
 41 |     k: 2
 42 |   }
 43 | }
 44 | layer {
 45 |   name: "pool1"
 46 |   type: "Pooling"
 47 |   bottom: "norm1"
 48 |   top: "pool1"
 49 |   pooling_param {
 50 |     pool: MAX
 51 |     kernel_size: 3 stride: 2
 52 |   }
 53 | }
 54 | layer {
 55 |   name: "conv2"
 56 |   type: "Convolution"
 57 |   bottom: "pool1"
 58 |   top: "conv2"
 59 |   param { lr_mult: 0 decay_mult: 0 }
 60 |   param { lr_mult: 0 decay_mult: 0 }
 61 |   convolution_param {
 62 |     num_output: 256
 63 |     pad: 1 kernel_size: 5 stride: 2
 64 |   }
 65 | }
 66 | layer {
 67 |   name: "relu2"
 68 |   type: "ReLU"
 69 |   bottom: "conv2"
 70 |   top: "conv2"
 71 | }
 72 | layer {
 73 |   name: "norm2"
 74 |   type: "LRN"
 75 |   bottom: "conv2"
 76 |   top: "norm2"
 77 |   lrn_param {
 78 |     local_size: 5
 79 |     alpha: 0.0005
 80 |     beta: 0.75
 81 |     k: 2
 82 |   }
 83 | }
 84 | layer {
 85 |   name: "pool2"
 86 |   type: "Pooling"
 87 |   bottom: "norm2"
 88 |   top: "pool2"
 89 |   pooling_param {
 90 |     pool: MAX
 91 |     kernel_size: 3 stride: 2
 92 |   }
 93 | }
 94 | layer {
 95 |   name: "conv3"
 96 |   type: "Convolution"
 97 |   bottom: "pool2"
 98 |   top: "conv3"
 99 |   param { lr_mult: 0 decay_mult: 0 }
100 |   param { lr_mult: 0 decay_mult: 0 }
101 |   convolution_param {
102 |     num_output: 512
103 |     pad: 1 kernel_size: 3
104 |   }
105 | }
106 | layer {
107 |   name: "relu3"
108 |   type: "ReLU"
109 |   bottom: "conv3"
110 |   top: "conv3"
111 | }
112 | layer {
113 |   name: "conv4"
114 |   type: "Convolution"
115 |   bottom: "conv3"
116 |   top: "conv4"
117 |   param { lr_mult: 0 decay_mult: 0 }
118 |   param { lr_mult: 0 decay_mult: 0 }
119 |   convolution_param {
120 |     num_output: 512
121 |     pad: 1 kernel_size: 3
122 |   }
123 | }
124 | layer {
125 |   name: "relu4"
126 |   type: "ReLU"
127 |   bottom: "conv4"
128 |   top: "conv4"
129 | }
130 | layer {
131 |   name: "conv5"
132 |   type: "Convolution"
133 |   bottom: "conv4"
134 |   top: "conv5"
135 |   param { lr_mult: 0 decay_mult: 0 }
136 |   param { lr_mult: 0 decay_mult: 0 }
137 |   convolution_param {
138 |     num_output: 512
139 |     pad: 1 kernel_size: 3
140 |   }
141 | }
142 | layer {
143 |   name: "relu5"
144 |   type: "ReLU"
145 |   bottom: "conv5"
146 |   top: "conv5"
147 | }
148 | 
149 | #========= RPN ============
150 | 
151 | layer {
152 |   name: "rpn_conv/3x3"
153 |   type: "Convolution"
154 |   bottom: "conv5"
155 |   top: "rpn/output"
156 |   param { lr_mult: 1.0 }
157 |   param { lr_mult: 2.0 }
158 |   convolution_param {
159 |     num_output: 256
160 |     kernel_size: 3 pad: 1 stride: 1
161 |     weight_filler { type: "gaussian" std: 0.01 }
162 |     bias_filler { type: "constant" value: 0 }
163 |   }
164 | }
165 | layer {
166 |   name: "rpn_relu/3x3"
167 |   type: "ReLU"
168 |   bottom: "rpn/output"
169 |   top: "rpn/output"
170 | }
171 | layer {
172 |   name: "rpn_cls_score"
173 |   type: "Convolution"
174 |   bottom: "rpn/output"
175 |   top: "rpn_cls_score"
176 |   param { lr_mult: 1.0 }
177 |   param { lr_mult: 2.0 }
178 |   convolution_param {
179 |     num_output: 18   # 2(bg/fg) * 9(anchors)
180 |     kernel_size: 1 pad: 0 stride: 1
181 |     weight_filler { type: "gaussian" std: 0.01 }
182 |     bias_filler { type: "constant" value: 0 }
183 |   }
184 | }
185 | layer {
186 |   name: "rpn_bbox_pred"
187 |   type: "Convolution"
188 |   bottom: "rpn/output"
189 |   top: "rpn_bbox_pred"
190 |   param { lr_mult: 1.0 }
191 |   param { lr_mult: 2.0 }
192 |   convolution_param {
193 |     num_output: 36   # 4 * 9(anchors)
194 |     kernel_size: 1 pad: 0 stride: 1
195 |     weight_filler { type: "gaussian" std: 0.01 }
196 |     bias_filler { type: "constant" value: 0 }
197 |   }
198 | }
199 | layer {
200 |    bottom: "rpn_cls_score"
201 |    top: "rpn_cls_score_reshape"
202 |    name: "rpn_cls_score_reshape"
203 |    type: "Reshape"
204 |    reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
205 | }
206 | layer {
207 |   name: 'rpn-data'
208 |   type: 'Python'
209 |   bottom: 'rpn_cls_score'
210 |   bottom: 'gt_boxes'
211 |   bottom: 'im_info'
212 |   bottom: 'data'
213 |   top: 'rpn_labels'
214 |   top: 'rpn_bbox_targets'
215 |   top: 'rpn_bbox_inside_weights'
216 |   top: 'rpn_bbox_outside_weights'
217 |   python_param {
218 |     module: 'rpn.anchor_target_layer'
219 |     layer: 'AnchorTargetLayer'
220 |     param_str: "'feat_stride': 16"
221 |   }
222 | }
223 | layer {
224 |   name: "rpn_loss_cls"
225 |   type: "SoftmaxWithLoss"
226 |   bottom: "rpn_cls_score_reshape"
227 |   bottom: "rpn_labels"
228 |   propagate_down: 1
229 |   propagate_down: 0
230 |   top: "rpn_cls_loss"
231 |   loss_weight: 1
232 |   loss_param {
233 |     ignore_label: -1
234 |     normalize: true
235 |   }
236 | }
237 | layer {
238 |   name: "rpn_loss_bbox"
239 |   type: "SmoothL1Loss"
240 |   bottom: "rpn_bbox_pred"
241 |   bottom: "rpn_bbox_targets"
242 |   bottom: 'rpn_bbox_inside_weights'
243 |   bottom: 'rpn_bbox_outside_weights'
244 |   top: "rpn_loss_bbox"
245 |   loss_weight: 1
246 |   smooth_l1_loss_param { sigma: 3.0 }
247 | }
248 | 
249 | #========= RCNN ============
250 | 
251 | layer {
252 |   name: "dummy_roi_pool_conv5"
253 |   type: "DummyData"
254 |   top: "dummy_roi_pool_conv5"
255 |   dummy_data_param {
256 |     shape { dim: 1 dim: 18432 }
257 |     data_filler { type: "gaussian" std: 0.01 }
258 |   }
259 | }
260 | layer {
261 |   name: "fc6"
262 |   type: "InnerProduct"
263 |   bottom: "dummy_roi_pool_conv5"
264 |   top: "fc6"
265 |   param { lr_mult: 0 decay_mult: 0 }
266 |   param { lr_mult: 0 decay_mult: 0 }
267 |   inner_product_param {
268 |     num_output: 4096
269 |   }
270 | }
271 | layer {
272 |   name: "fc7"
273 |   type: "InnerProduct"
274 |   bottom: "fc6"
275 |   top: "fc7"
276 |   param { lr_mult: 0 decay_mult: 0 }
277 |   param { lr_mult: 0 decay_mult: 0 }
278 |   inner_product_param {
279 |     num_output: 1024
280 |   }
281 | }
282 | layer {
283 |   name: "silence_fc7"
284 |   type: "Silence"
285 |   bottom: "fc7"
286 | }
287 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_end2end/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_end2end/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 50000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | # function
12 | snapshot: 0
13 | # We still use the snapshot prefix, though
14 | snapshot_prefix: "vgg_cnn_m_1024_faster_rcnn"
15 | 


--------------------------------------------------------------------------------
/models/pascal_voc/ZF/fast_rcnn/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/ZF/fast_rcnn/train.prototxt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "step"
 5 | gamma: 0.1
 6 | stepsize: 30000
 7 | display: 20
 8 | average_loss: 100
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | 
12 | # We disable standard caffe solver snapshotting and implement our own snapshot
13 | # function
14 | snapshot: 0
15 | # We still use the snapshot prefix, though
16 | snapshot_prefix: "zf_fast_rcnn"
17 | #debug_info: true
18 | #iter_size: 2
19 | 


--------------------------------------------------------------------------------
/models/pascal_voc/ZF/fast_rcnn/test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "ZF"
  2 | 
  3 | input: "data"
  4 | input_shape {
  5 |   dim: 1
  6 |   dim: 3
  7 |   dim: 224
  8 |   dim: 224
  9 | }
 10 | 
 11 | input: "rois"
 12 | input_shape {
 13 |   dim: 1 # to be changed on-the-fly to num ROIs
 14 |   dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing
 15 | }
 16 | 
 17 | #========= conv1-conv5 ============
 18 | 
 19 | layer {
 20 | 	name: "conv1"
 21 | 	type: "Convolution"
 22 | 	bottom: "data"
 23 | 	top: "conv1"
 24 | 	convolution_param {
 25 | 		num_output: 96
 26 | 		kernel_size: 7
 27 | 		pad: 3
 28 | 		stride: 2
 29 | 	}
 30 | }
 31 | layer {
 32 | 	name: "relu1"
 33 | 	type: "ReLU"
 34 | 	bottom: "conv1"
 35 | 	top: "conv1"
 36 | }
 37 | layer {
 38 | 	name: "norm1"
 39 | 	type: "LRN"
 40 | 	bottom: "conv1"
 41 | 	top: "norm1"
 42 | 	lrn_param {
 43 | 		local_size: 3
 44 | 		alpha: 0.00005
 45 | 		beta: 0.75
 46 | 		norm_region: WITHIN_CHANNEL
 47 |     engine: CAFFE
 48 | 	}
 49 | }
 50 | layer {
 51 | 	name: "pool1"
 52 | 	type: "Pooling"
 53 | 	bottom: "norm1"
 54 | 	top: "pool1"
 55 | 	pooling_param {
 56 | 		kernel_size: 3
 57 | 		stride: 2
 58 | 		pad: 1
 59 | 		pool: MAX
 60 | 	}
 61 | }
 62 | layer {
 63 | 	name: "conv2"
 64 | 	type: "Convolution"
 65 | 	bottom: "pool1"
 66 | 	top: "conv2"
 67 | 	convolution_param {
 68 | 		num_output: 256
 69 | 		kernel_size: 5
 70 | 		pad: 2
 71 | 		stride: 2
 72 | 	}
 73 | }
 74 | layer {
 75 | 	name: "relu2"
 76 | 	type: "ReLU"
 77 | 	bottom: "conv2"
 78 | 	top: "conv2"
 79 | }
 80 | layer {
 81 | 	name: "norm2"
 82 | 	type: "LRN"
 83 | 	bottom: "conv2"
 84 | 	top: "norm2"
 85 | 	lrn_param {
 86 | 		local_size: 3
 87 | 		alpha: 0.00005
 88 | 		beta: 0.75
 89 | 		norm_region: WITHIN_CHANNEL
 90 |     engine: CAFFE
 91 | 	}
 92 | }
 93 | layer {
 94 | 	name: "pool2"
 95 | 	type: "Pooling"
 96 | 	bottom: "norm2"
 97 | 	top: "pool2"
 98 | 	pooling_param {
 99 | 		kernel_size: 3
100 | 		stride: 2
101 | 		pad: 1
102 | 		pool: MAX
103 | 	}
104 | }
105 | layer {
106 | 	name: "conv3"
107 | 	type: "Convolution"
108 | 	bottom: "pool2"
109 | 	top: "conv3"
110 | 	convolution_param {
111 | 		num_output: 384
112 | 		kernel_size: 3
113 | 		pad: 1
114 | 		stride: 1
115 | 	}
116 | }
117 | layer {
118 | 	name: "relu3"
119 | 	type: "ReLU"
120 | 	bottom: "conv3"
121 | 	top: "conv3"
122 | }
123 | layer {
124 | 	name: "conv4"
125 | 	type: "Convolution"
126 | 	bottom: "conv3"
127 | 	top: "conv4"
128 | 	convolution_param {
129 | 		num_output: 384
130 | 		kernel_size: 3
131 | 		pad: 1
132 | 		stride: 1
133 | 	}
134 | }
135 | layer {
136 | 	name: "relu4"
137 | 	type: "ReLU"
138 | 	bottom: "conv4"
139 | 	top: "conv4"
140 | }
141 | layer {
142 | 	name: "conv5"
143 | 	type: "Convolution"
144 | 	bottom: "conv4"
145 | 	top: "conv5"
146 | 	convolution_param {
147 | 		num_output: 256
148 | 		kernel_size: 3
149 | 		pad: 1
150 | 		stride: 1
151 | 	}
152 | }
153 | layer {
154 | 	name: "relu5"
155 | 	type: "ReLU"
156 | 	bottom: "conv5"
157 | 	top: "conv5"
158 | }
159 | 
160 | #========= RCNN ============
161 | 
162 | layer {
163 |   name: "roi_pool_conv5"
164 |   type: "ROIPooling"
165 |   bottom: "conv5"
166 |   bottom: "rois"
167 |   top: "roi_pool_conv5"
168 |   roi_pooling_param {
169 |     pooled_w: 6
170 |     pooled_h: 6
171 |     spatial_scale: 0.0625 # 1/16
172 |   }
173 | }
174 | layer {
175 |   name: "fc6"
176 |   type: "InnerProduct"
177 |   bottom: "roi_pool_conv5"
178 |   top: "fc6"
179 |   inner_product_param {
180 |     num_output: 4096
181 |   }
182 | }
183 | layer {
184 |   name: "relu6"
185 |   type: "ReLU"
186 |   bottom: "fc6"
187 |   top: "fc6"
188 | }
189 | layer {
190 |   name: "drop6"
191 |   type: "Dropout"
192 |   bottom: "fc6"
193 |   top: "fc6"
194 |   dropout_param {
195 |     dropout_ratio: 0.5
196 |     scale_train: false
197 |   }
198 | }
199 | layer {
200 |   name: "fc7"
201 |   type: "InnerProduct"
202 |   bottom: "fc6"
203 |   top: "fc7"
204 |   inner_product_param {
205 |     num_output: 4096
206 |   }
207 | }
208 | layer {
209 |   name: "relu7"
210 |   type: "ReLU"
211 |   bottom: "fc7"
212 |   top: "fc7"
213 | }
214 | layer {
215 |   name: "drop7"
216 |   type: "Dropout"
217 |   bottom: "fc7"
218 |   top: "fc7"
219 |   dropout_param {
220 |     dropout_ratio: 0.5
221 |     scale_train: false
222 |   }
223 | }
224 | layer {
225 |   name: "cls_score"
226 |   type: "InnerProduct"
227 |   bottom: "fc7"
228 |   top: "cls_score"
229 |   inner_product_param {
230 |     num_output: 21
231 |   }
232 | }
233 | layer {
234 |   name: "bbox_pred"
235 |   type: "InnerProduct"
236 |   bottom: "fc7"
237 |   top: "bbox_pred"
238 |   inner_product_param {
239 |     num_output: 84
240 |   }
241 | }
242 | layer {
243 |   name: "cls_prob"
244 |   type: "Softmax"
245 |   bottom: "cls_score"
246 |   top: "cls_prob"
247 |   loss_param {
248 |     ignore_label: -1
249 |     normalize: true
250 |   }
251 | }
252 | 


--------------------------------------------------------------------------------
/models/pascal_voc/ZF/fast_rcnn/train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "ZF"
  2 | layer {
  3 |   name: 'data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'rois'
  7 |   top: 'labels'
  8 |   top: 'bbox_targets'
  9 |   top: 'bbox_inside_weights'
 10 |   top: 'bbox_outside_weights'
 11 |   python_param {
 12 |     module: 'roi_data_layer.layer'
 13 |     layer: 'RoIDataLayer'
 14 |     param_str: "'num_classes': 21"
 15 |   }
 16 | }
 17 | 
 18 | #========= conv1-conv5 ============
 19 | 
 20 | layer {
 21 | 	name: "conv1"
 22 | 	type: "Convolution"
 23 | 	bottom: "data"
 24 | 	top: "conv1"
 25 | 	param { lr_mult: 1.0 }
 26 | 	param { lr_mult: 2.0 }
 27 | 	convolution_param {
 28 | 		num_output: 96
 29 | 		kernel_size: 7
 30 | 		pad: 3
 31 | 		stride: 2
 32 | 	}
 33 | }
 34 | layer {
 35 | 	name: "relu1"
 36 | 	type: "ReLU"
 37 | 	bottom: "conv1"
 38 | 	top: "conv1"
 39 | }
 40 | layer {
 41 | 	name: "norm1"
 42 | 	type: "LRN"
 43 | 	bottom: "conv1"
 44 | 	top: "norm1"
 45 | 	lrn_param {
 46 | 		local_size: 3
 47 | 		alpha: 0.00005
 48 | 		beta: 0.75
 49 | 		norm_region: WITHIN_CHANNEL
 50 |     engine: CAFFE
 51 | 	}
 52 | }
 53 | layer {
 54 | 	name: "pool1"
 55 | 	type: "Pooling"
 56 | 	bottom: "norm1"
 57 | 	top: "pool1"
 58 | 	pooling_param {
 59 | 		kernel_size: 3
 60 | 		stride: 2
 61 | 		pad: 1
 62 | 		pool: MAX
 63 | 	}
 64 | }
 65 | layer {
 66 | 	name: "conv2"
 67 | 	type: "Convolution"
 68 | 	bottom: "pool1"
 69 | 	top: "conv2"
 70 | 	param { lr_mult: 1.0 }
 71 | 	param { lr_mult: 2.0 }
 72 | 	convolution_param {
 73 | 		num_output: 256
 74 | 		kernel_size: 5
 75 | 		pad: 2
 76 | 		stride: 2
 77 | 	}
 78 | }
 79 | layer {
 80 | 	name: "relu2"
 81 | 	type: "ReLU"
 82 | 	bottom: "conv2"
 83 | 	top: "conv2"
 84 | }
 85 | layer {
 86 | 	name: "norm2"
 87 | 	type: "LRN"
 88 | 	bottom: "conv2"
 89 | 	top: "norm2"
 90 | 	lrn_param {
 91 | 		local_size: 3
 92 | 		alpha: 0.00005
 93 | 		beta: 0.75
 94 | 		norm_region: WITHIN_CHANNEL
 95 |     engine: CAFFE
 96 | 	}
 97 | }
 98 | layer {
 99 | 	name: "pool2"
100 | 	type: "Pooling"
101 | 	bottom: "norm2"
102 | 	top: "pool2"
103 | 	pooling_param {
104 | 		kernel_size: 3
105 | 		stride: 2
106 | 		pad: 1
107 | 		pool: MAX
108 | 	}
109 | }
110 | layer {
111 | 	name: "conv3"
112 | 	type: "Convolution"
113 | 	bottom: "pool2"
114 | 	top: "conv3"
115 | 	param { lr_mult: 1.0 }
116 | 	param { lr_mult: 2.0 }
117 | 	convolution_param {
118 | 		num_output: 384
119 | 		kernel_size: 3
120 | 		pad: 1
121 | 		stride: 1
122 | 	}
123 | }
124 | layer {
125 | 	name: "relu3"
126 | 	type: "ReLU"
127 | 	bottom: "conv3"
128 | 	top: "conv3"
129 | }
130 | layer {
131 | 	name: "conv4"
132 | 	type: "Convolution"
133 | 	bottom: "conv3"
134 | 	top: "conv4"
135 | 	param { lr_mult: 1.0 }
136 | 	param { lr_mult: 2.0 }
137 | 	convolution_param {
138 | 		num_output: 384
139 | 		kernel_size: 3
140 | 		pad: 1
141 | 		stride: 1
142 | 	}
143 | }
144 | layer {
145 | 	name: "relu4"
146 | 	type: "ReLU"
147 | 	bottom: "conv4"
148 | 	top: "conv4"
149 | }
150 | layer {
151 | 	name: "conv5"
152 | 	type: "Convolution"
153 | 	bottom: "conv4"
154 | 	top: "conv5"
155 | 	param { lr_mult: 1.0 }
156 | 	param { lr_mult: 2.0 }
157 | 	convolution_param {
158 | 		num_output: 256
159 | 		kernel_size: 3
160 | 		pad: 1
161 | 		stride: 1
162 | 	}
163 | }
164 | layer {
165 | 	name: "relu5"
166 | 	type: "ReLU"
167 | 	bottom: "conv5"
168 | 	top: "conv5"
169 | }
170 | 
171 | #========= RCNN ============
172 | 
173 | layer {
174 |   name: "roi_pool_conv5"
175 |   type: "ROIPooling"
176 |   bottom: "conv5"
177 |   bottom: "rois"
178 |   top: "roi_pool_conv5"
179 |   roi_pooling_param {
180 |     pooled_w: 6
181 |     pooled_h: 6
182 |     spatial_scale: 0.0625 # 1/16
183 |   }
184 | }
185 | layer {
186 |   name: "fc6"
187 |   type: "InnerProduct"
188 |   bottom: "roi_pool_conv5"
189 |   top: "fc6"
190 |   param { lr_mult: 1.0 }
191 |   param { lr_mult: 2.0 }
192 |   inner_product_param {
193 |     num_output: 4096
194 |   }
195 | }
196 | layer {
197 |   name: "relu6"
198 |   type: "ReLU"
199 |   bottom: "fc6"
200 |   top: "fc6"
201 | }
202 | layer {
203 |   name: "drop6"
204 |   type: "Dropout"
205 |   bottom: "fc6"
206 |   top: "fc6"
207 |   dropout_param {
208 |     dropout_ratio: 0.5
209 |     scale_train: false
210 |   }
211 | }
212 | layer {
213 |   name: "fc7"
214 |   type: "InnerProduct"
215 |   bottom: "fc6"
216 |   top: "fc7"
217 |   param { lr_mult: 1.0 }
218 |   param { lr_mult: 2.0 }
219 |   inner_product_param {
220 |     num_output: 4096
221 |   }
222 | }
223 | layer {
224 |   name: "relu7"
225 |   type: "ReLU"
226 |   bottom: "fc7"
227 |   top: "fc7"
228 | }
229 | layer {
230 |   name: "drop7"
231 |   type: "Dropout"
232 |   bottom: "fc7"
233 |   top: "fc7"
234 |   dropout_param {
235 |     dropout_ratio: 0.5
236 |     scale_train: false
237 |   }
238 | }
239 | layer {
240 |   name: "cls_score"
241 |   type: "InnerProduct"
242 |   bottom: "fc7"
243 |   top: "cls_score"
244 |   param { lr_mult: 1.0 }
245 |   param { lr_mult: 2.0 }
246 |   inner_product_param {
247 |     num_output: 21
248 |     weight_filler {
249 |       type: "gaussian"
250 |       std: 0.01
251 |     }
252 |     bias_filler {
253 |       type: "constant"
254 |       value: 0
255 |     }
256 |   }
257 | }
258 | layer {
259 |   name: "bbox_pred"
260 |   type: "InnerProduct"
261 |   bottom: "fc7"
262 |   top: "bbox_pred"
263 |   param { lr_mult: 1.0 }
264 |   param { lr_mult: 2.0 }
265 |   inner_product_param {
266 |     num_output: 84
267 |     weight_filler {
268 |       type: "gaussian"
269 |       std: 0.001
270 |     }
271 |     bias_filler {
272 |       type: "constant"
273 |       value: 0
274 |     }
275 |   }
276 | }
277 | layer {
278 |   name: "loss_cls"
279 |   type: "SoftmaxWithLoss"
280 |   bottom: "cls_score"
281 |   bottom: "labels"
282 |   propagate_down: 1
283 |   propagate_down: 0
284 |   top: "cls_loss"
285 |   loss_weight: 1
286 |   loss_param {
287 |     ignore_label: -1
288 |     normalize: true
289 |   }
290 | }
291 | layer {
292 |   name: "loss_bbox"
293 |   type: "SmoothL1Loss"
294 |   bottom: "bbox_pred"
295 |   bottom: "bbox_targets"
296 |   bottom: "bbox_inside_weights"
297 |   bottom: "bbox_outside_weights"
298 |   top: "bbox_loss"
299 |   loss_weight: 1
300 | }
301 | 


--------------------------------------------------------------------------------
/models/pascal_voc/ZF/faster_rcnn_alt_opt/faster_rcnn_test.pt:
--------------------------------------------------------------------------------
  1 | name: "ZF"
  2 | 
  3 | input: "data"
  4 | input_shape {
  5 |   dim: 1
  6 |   dim: 3
  7 |   dim: 224
  8 |   dim: 224
  9 | }
 10 | 
 11 | input: "im_info"
 12 | input_shape {
 13 |   dim: 1
 14 |   dim: 3
 15 | }
 16 | 
 17 | #========= conv1-conv5 ============
 18 | 
 19 | layer {
 20 | 	name: "conv1"
 21 | 	type: "Convolution"
 22 | 	bottom: "data"
 23 | 	top: "conv1"
 24 | 	convolution_param {
 25 | 		num_output: 96
 26 | 		kernel_size: 7
 27 | 		pad: 3
 28 | 		stride: 2
 29 | 	}
 30 | }
 31 | layer {
 32 | 	name: "relu1"
 33 | 	type: "ReLU"
 34 | 	bottom: "conv1"
 35 | 	top: "conv1"
 36 | }
 37 | layer {
 38 | 	name: "norm1"
 39 | 	type: "LRN"
 40 | 	bottom: "conv1"
 41 | 	top: "norm1"
 42 | 	lrn_param {
 43 | 		local_size: 3
 44 | 		alpha: 0.00005
 45 | 		beta: 0.75
 46 | 		norm_region: WITHIN_CHANNEL
 47 |     engine: CAFFE
 48 | 	}
 49 | }
 50 | layer {
 51 | 	name: "pool1"
 52 | 	type: "Pooling"
 53 | 	bottom: "norm1"
 54 | 	top: "pool1"
 55 | 	pooling_param {
 56 | 		kernel_size: 3
 57 | 		stride: 2
 58 | 		pad: 1
 59 | 		pool: MAX
 60 | 	}
 61 | }
 62 | layer {
 63 | 	name: "conv2"
 64 | 	type: "Convolution"
 65 | 	bottom: "pool1"
 66 | 	top: "conv2"
 67 | 	convolution_param {
 68 | 		num_output: 256
 69 | 		kernel_size: 5
 70 | 		pad: 2
 71 | 		stride: 2
 72 | 	}
 73 | }
 74 | layer {
 75 | 	name: "relu2"
 76 | 	type: "ReLU"
 77 | 	bottom: "conv2"
 78 | 	top: "conv2"
 79 | }
 80 | layer {
 81 | 	name: "norm2"
 82 | 	type: "LRN"
 83 | 	bottom: "conv2"
 84 | 	top: "norm2"
 85 | 	lrn_param {
 86 | 		local_size: 3
 87 | 		alpha: 0.00005
 88 | 		beta: 0.75
 89 | 		norm_region: WITHIN_CHANNEL
 90 |     engine: CAFFE
 91 | 	}
 92 | }
 93 | layer {
 94 | 	name: "pool2"
 95 | 	type: "Pooling"
 96 | 	bottom: "norm2"
 97 | 	top: "pool2"
 98 | 	pooling_param {
 99 | 		kernel_size: 3
100 | 		stride: 2
101 | 		pad: 1
102 | 		pool: MAX
103 | 	}
104 | }
105 | layer {
106 | 	name: "conv3"
107 | 	type: "Convolution"
108 | 	bottom: "pool2"
109 | 	top: "conv3"
110 | 	convolution_param {
111 | 		num_output: 384
112 | 		kernel_size: 3
113 | 		pad: 1
114 | 		stride: 1
115 | 	}
116 | }
117 | layer {
118 | 	name: "relu3"
119 | 	type: "ReLU"
120 | 	bottom: "conv3"
121 | 	top: "conv3"
122 | }
123 | layer {
124 | 	name: "conv4"
125 | 	type: "Convolution"
126 | 	bottom: "conv3"
127 | 	top: "conv4"
128 | 	convolution_param {
129 | 		num_output: 384
130 | 		kernel_size: 3
131 | 		pad: 1
132 | 		stride: 1
133 | 	}
134 | }
135 | layer {
136 | 	name: "relu4"
137 | 	type: "ReLU"
138 | 	bottom: "conv4"
139 | 	top: "conv4"
140 | }
141 | layer {
142 | 	name: "conv5"
143 | 	type: "Convolution"
144 | 	bottom: "conv4"
145 | 	top: "conv5"
146 | 	convolution_param {
147 | 		num_output: 256
148 | 		kernel_size: 3
149 | 		pad: 1
150 | 		stride: 1
151 | 	}
152 | }
153 | layer {
154 | 	name: "relu5"
155 | 	type: "ReLU"
156 | 	bottom: "conv5"
157 | 	top: "conv5"
158 | }
159 | 
160 | #========= RPN ============
161 | 
162 | 
163 | layer {
164 |   name: "rpn_conv1"
165 |   type: "Convolution"
166 |   bottom: "conv5"
167 |   top: "rpn_conv1"
168 |   convolution_param {
169 |     num_output: 256
170 |     kernel_size: 3 pad: 1 stride: 1
171 |   }
172 | }
173 | layer {
174 |   name: "rpn_relu1"
175 |   type: "ReLU"
176 |   bottom: "rpn_conv1"
177 |   top: "rpn_conv1"
178 | }
179 | layer {
180 |   name: "rpn_cls_score"
181 |   type: "Convolution"
182 |   bottom: "rpn_conv1"
183 |   top: "rpn_cls_score"
184 |   convolution_param {
185 |     num_output: 18   # 2(bg/fg) * 9(anchors)
186 |     kernel_size: 1 pad: 0 stride: 1
187 |   }
188 | }
189 | layer {
190 |   name: "rpn_bbox_pred"
191 |   type: "Convolution"
192 |   bottom: "rpn_conv1"
193 |   top: "rpn_bbox_pred"
194 |   convolution_param {
195 |     num_output: 36   # 4 * 9(anchors)
196 |     kernel_size: 1 pad: 0 stride: 1
197 |   }
198 | }
199 | layer {
200 |    bottom: "rpn_cls_score"
201 |    top: "rpn_cls_score_reshape"
202 |    name: "rpn_cls_score_reshape"
203 |    type: "Reshape"
204 |    reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
205 | }
206 | 
207 | #========= RoI Proposal ============
208 | 
209 | layer {
210 |   name: "rpn_cls_prob"
211 |   type: "Softmax"
212 |   bottom: "rpn_cls_score_reshape"
213 |   top: "rpn_cls_prob"
214 | }
215 | layer {
216 |   name: 'rpn_cls_prob_reshape'
217 |   type: 'Reshape'
218 |   bottom: 'rpn_cls_prob'
219 |   top: 'rpn_cls_prob_reshape'
220 |   reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
221 | }
222 | layer {
223 |   name: 'proposal'
224 |   type: 'Python'
225 |   bottom: 'rpn_cls_prob_reshape'
226 |   bottom: 'rpn_bbox_pred'
227 |   bottom: 'im_info'
228 |   top: 'rois'
229 |   python_param {
230 |     module: 'rpn.proposal_layer'
231 |     layer: 'ProposalLayer'
232 |     param_str: "'feat_stride': 16"
233 |   }
234 | }
235 | 
236 | #========= RCNN ============
237 | 
238 | layer {
239 |   name: "roi_pool_conv5"
240 |   type: "ROIPooling"
241 |   bottom: "conv5"
242 |   bottom: "rois"
243 |   top: "roi_pool_conv5"
244 |   roi_pooling_param {
245 |     pooled_w: 6
246 |     pooled_h: 6
247 |     spatial_scale: 0.0625 # 1/16
248 |   }
249 | }
250 | layer {
251 |   name: "fc6"
252 |   type: "InnerProduct"
253 |   bottom: "roi_pool_conv5"
254 |   top: "fc6"
255 |   inner_product_param {
256 |     num_output: 4096
257 |   }
258 | }
259 | layer {
260 |   name: "relu6"
261 |   type: "ReLU"
262 |   bottom: "fc6"
263 |   top: "fc6"
264 | }
265 | layer {
266 |   name: "drop6"
267 |   type: "Dropout"
268 |   bottom: "fc6"
269 |   top: "fc6"
270 |   dropout_param {
271 |     dropout_ratio: 0.5
272 |     scale_train: false
273 |   }
274 | }
275 | layer {
276 |   name: "fc7"
277 |   type: "InnerProduct"
278 |   bottom: "fc6"
279 |   top: "fc7"
280 |   inner_product_param {
281 |     num_output: 4096
282 |   }
283 | }
284 | layer {
285 |   name: "relu7"
286 |   type: "ReLU"
287 |   bottom: "fc7"
288 |   top: "fc7"
289 | }
290 | layer {
291 |   name: "drop7"
292 |   type: "Dropout"
293 |   bottom: "fc7"
294 |   top: "fc7"
295 |   dropout_param {
296 |     dropout_ratio: 0.5
297 |     scale_train: false
298 |   }
299 | }
300 | layer {
301 |   name: "cls_score"
302 |   type: "InnerProduct"
303 |   bottom: "fc7"
304 |   top: "cls_score"
305 |   inner_product_param {
306 |     num_output: 21
307 |   }
308 | }
309 | layer {
310 |   name: "bbox_pred"
311 |   type: "InnerProduct"
312 |   bottom: "fc7"
313 |   top: "bbox_pred"
314 |   inner_product_param {
315 |     num_output: 84
316 |   }
317 | }
318 | layer {
319 |   name: "cls_prob"
320 |   type: "Softmax"
321 |   bottom: "cls_score"
322 |   top: "cls_prob"
323 |   loss_param {
324 |     ignore_label: -1
325 |     normalize: true
326 |   }
327 | }
328 | 


--------------------------------------------------------------------------------
/models/pascal_voc/ZF/faster_rcnn_alt_opt/rpn_test.pt:
--------------------------------------------------------------------------------
  1 | name: "ZF"
  2 | 
  3 | input: "data"
  4 | input_shape {
  5 |   dim: 1
  6 |   dim: 3
  7 |   dim: 224
  8 |   dim: 224
  9 | }
 10 | 
 11 | input: "im_info"
 12 | input_shape {
 13 |   dim: 1
 14 |   dim: 3
 15 | }
 16 | 
 17 | # ------------------------ layer 1 -----------------------------
 18 | layer {
 19 | 	name: "conv1"
 20 | 	type: "Convolution"
 21 | 	bottom: "data"
 22 | 	top: "conv1"
 23 | 	convolution_param {
 24 | 		num_output: 96
 25 | 		kernel_size: 7
 26 | 		pad: 3
 27 | 		stride: 2
 28 | 	}
 29 | }
 30 | layer {
 31 | 	name: "relu1"
 32 | 	type: "ReLU"
 33 | 	bottom: "conv1"
 34 | 	top: "conv1"
 35 | }
 36 | layer {
 37 | 	name: "norm1"
 38 | 	type: "LRN"
 39 | 	bottom: "conv1"
 40 | 	top: "norm1"
 41 | 	lrn_param {
 42 | 		local_size: 3
 43 | 		alpha: 0.00005
 44 | 		beta: 0.75
 45 | 		norm_region: WITHIN_CHANNEL
 46 |     engine: CAFFE
 47 | 	}
 48 | }
 49 | layer {
 50 | 	name: "pool1"
 51 | 	type: "Pooling"
 52 | 	bottom: "norm1"
 53 | 	top: "pool1"
 54 | 	pooling_param {
 55 | 		kernel_size: 3
 56 | 		stride: 2
 57 | 		pad: 1
 58 | 		pool: MAX
 59 | 	}
 60 | }
 61 | layer {
 62 | 	name: "conv2"
 63 | 	type: "Convolution"
 64 | 	bottom: "pool1"
 65 | 	top: "conv2"
 66 | 	convolution_param {
 67 | 		num_output: 256
 68 | 		kernel_size: 5
 69 | 		pad: 2
 70 | 		stride: 2
 71 | 	}
 72 | }
 73 | layer {
 74 | 	name: "relu2"
 75 | 	type: "ReLU"
 76 | 	bottom: "conv2"
 77 | 	top: "conv2"
 78 | }
 79 | 
 80 | layer {
 81 | 	name: "norm2"
 82 | 	type: "LRN"
 83 | 	bottom: "conv2"
 84 | 	top: "norm2"
 85 | 	lrn_param {
 86 | 		local_size: 3
 87 | 		alpha: 0.00005
 88 | 		beta: 0.75
 89 | 		norm_region: WITHIN_CHANNEL
 90 |     engine: CAFFE
 91 | 	}
 92 | }
 93 | layer {
 94 | 	name: "pool2"
 95 | 	type: "Pooling"
 96 | 	bottom: "norm2"
 97 | 	top: "pool2"
 98 | 	pooling_param {
 99 | 		kernel_size: 3
100 | 		stride: 2
101 | 		pad: 1
102 | 		pool: MAX
103 | 	}
104 | }
105 | layer {
106 | 	name: "conv3"
107 | 	type: "Convolution"
108 | 	bottom: "pool2"
109 | 	top: "conv3"
110 | 	convolution_param {
111 | 		num_output: 384
112 | 		kernel_size: 3
113 | 		pad: 1
114 | 		stride: 1
115 | 	}
116 | }
117 | layer {
118 | 	name: "relu3"
119 | 	type: "ReLU"
120 | 	bottom: "conv3"
121 | 	top: "conv3"
122 | }
123 | layer {
124 | 	name: "conv4"
125 | 	type: "Convolution"
126 | 	bottom: "conv3"
127 | 	top: "conv4"
128 | 	convolution_param {
129 | 		num_output: 384
130 | 		kernel_size: 3
131 | 		pad: 1
132 | 		stride: 1
133 | 	}
134 | }
135 | layer {
136 | 	name: "relu4"
137 | 	type: "ReLU"
138 | 	bottom: "conv4"
139 | 	top: "conv4"
140 | }
141 | layer {
142 | 	name: "conv5"
143 | 	type: "Convolution"
144 | 	bottom: "conv4"
145 | 	top: "conv5"
146 | 	convolution_param {
147 | 		num_output: 256
148 | 		kernel_size: 3
149 | 		pad: 1
150 | 		stride: 1
151 | 	}
152 | }
153 | layer {
154 | 	name: "relu5"
155 | 	type: "ReLU"
156 | 	bottom: "conv5"
157 | 	top: "conv5"
158 | }
159 | 
160 | #-----------------------layer +-------------------------
161 | 
162 | layer {
163 |   name: "rpn_conv1"
164 |   type: "Convolution"
165 |   bottom: "conv5"
166 |   top: "rpn_conv1"
167 |   convolution_param {
168 |     num_output: 256
169 |     kernel_size: 3 pad: 1 stride: 1
170 |   }
171 | }
172 | layer {
173 |   name: "rpn_relu1"
174 |   type: "ReLU"
175 |   bottom: "rpn_conv1"
176 |   top: "rpn_conv1"
177 | }
178 | layer {
179 |   name: "rpn_cls_score"
180 |   type: "Convolution"
181 |   bottom: "rpn_conv1"
182 |   top: "rpn_cls_score"
183 |   convolution_param {
184 |     num_output: 18   # 2(bg/fg) * 9(anchors)
185 |     kernel_size: 1 pad: 0 stride: 1
186 |   }
187 | }
188 | layer {
189 |   name: "rpn_bbox_pred"
190 |   type: "Convolution"
191 |   bottom: "rpn_conv1"
192 |   top: "rpn_bbox_pred"
193 |   convolution_param {
194 |     num_output: 36   # 4 * 9(anchors)
195 |     kernel_size: 1 pad: 0 stride: 1
196 |   }
197 | }
198 | layer {
199 |    bottom: "rpn_cls_score"
200 |    top: "rpn_cls_score_reshape"
201 |    name: "rpn_cls_score_reshape"
202 |    type: "Reshape"
203 |    reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
204 | }
205 | 
206 | #-----------------------output------------------------
207 | layer {
208 |   name: "rpn_cls_prob"
209 |   type: "Softmax"
210 |   bottom: "rpn_cls_score_reshape"
211 |   top: "rpn_cls_prob"
212 | }
213 | layer {
214 |   name: 'rpn_cls_prob_reshape'
215 |   type: 'Reshape'
216 |   bottom: 'rpn_cls_prob'
217 |   top: 'rpn_cls_prob_reshape'
218 |   reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
219 | }
220 | layer {
221 |   name: 'proposal'
222 |   type: 'Python'
223 |   bottom: 'rpn_cls_prob_reshape'
224 |   bottom: 'rpn_bbox_pred'
225 |   bottom: 'im_info'
226 |   top: 'rois'
227 |   top: 'scores'
228 |   python_param {
229 |     module: 'rpn.proposal_layer'
230 |     layer: 'ProposalLayer'
231 |     param_str: "'feat_stride': 16"
232 |   }
233 | }
234 | 


--------------------------------------------------------------------------------
/models/pascal_voc/ZF/faster_rcnn_alt_opt/stage1_fast_rcnn_solver30k40k.pt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/ZF/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "step"
 5 | gamma: 0.1
 6 | stepsize: 30000
 7 | display: 20
 8 | average_loss: 100
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | 
12 | # We disable standard caffe solver snapshotting and implement our own snapshot
13 | # function
14 | snapshot: 0
15 | # We still use the snapshot prefix, though
16 | snapshot_prefix: "zf_fast_rcnn"
17 | 


--------------------------------------------------------------------------------
/models/pascal_voc/ZF/faster_rcnn_alt_opt/stage1_rpn_solver60k80k.pt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/ZF/faster_rcnn_alt_opt/stage1_rpn_train.pt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "step"
 5 | gamma: 0.1
 6 | stepsize: 60000
 7 | display: 20
 8 | average_loss: 100
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | 
12 | # We disable standard caffe solver snapshotting and implement our own snapshot
13 | # function
14 | snapshot: 0
15 | # We still use the snapshot prefix, though
16 | snapshot_prefix: "zf_rpn"
17 | 


--------------------------------------------------------------------------------
/models/pascal_voc/ZF/faster_rcnn_alt_opt/stage1_rpn_train.pt:
--------------------------------------------------------------------------------
  1 | name: "ZF"
  2 | layer {
  3 |   name: 'input-data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'im_info'
  7 |   top: 'gt_boxes'
  8 |   python_param {
  9 |     module: 'roi_data_layer.layer'
 10 |     layer: 'RoIDataLayer'
 11 |     param_str: "'num_classes': 21"
 12 |   }
 13 | }
 14 | 
 15 | #========= conv1-conv5 ============
 16 | 
 17 | layer {
 18 | 	name: "conv1"
 19 | 	type: "Convolution"
 20 | 	bottom: "data"
 21 | 	top: "conv1"
 22 | 	param { lr_mult: 1.0 }
 23 | 	param { lr_mult: 2.0 }
 24 | 	convolution_param {
 25 | 		num_output: 96
 26 | 		kernel_size: 7
 27 | 		pad: 3
 28 | 		stride: 2
 29 | 	}
 30 | }
 31 | layer {
 32 | 	name: "relu1"
 33 | 	type: "ReLU"
 34 | 	bottom: "conv1"
 35 | 	top: "conv1"
 36 | }
 37 | layer {
 38 | 	name: "norm1"
 39 | 	type: "LRN"
 40 | 	bottom: "conv1"
 41 | 	top: "norm1"
 42 | 	lrn_param {
 43 | 		local_size: 3
 44 | 		alpha: 0.00005
 45 | 		beta: 0.75
 46 | 		norm_region: WITHIN_CHANNEL
 47 |     engine: CAFFE
 48 | 	}
 49 | }
 50 | layer {
 51 | 	name: "pool1"
 52 | 	type: "Pooling"
 53 | 	bottom: "norm1"
 54 | 	top: "pool1"
 55 | 	pooling_param {
 56 | 		kernel_size: 3
 57 | 		stride: 2
 58 | 		pad: 1
 59 | 		pool: MAX
 60 | 	}
 61 | }
 62 | layer {
 63 | 	name: "conv2"
 64 | 	type: "Convolution"
 65 | 	bottom: "pool1"
 66 | 	top: "conv2"
 67 | 	param { lr_mult: 1.0 }
 68 | 	param { lr_mult: 2.0 }
 69 | 	convolution_param {
 70 | 		num_output: 256
 71 | 		kernel_size: 5
 72 | 		pad: 2
 73 | 		stride: 2
 74 | 	}
 75 | }
 76 | layer {
 77 | 	name: "relu2"
 78 | 	type: "ReLU"
 79 | 	bottom: "conv2"
 80 | 	top: "conv2"
 81 | }
 82 | layer {
 83 | 	name: "norm2"
 84 | 	type: "LRN"
 85 | 	bottom: "conv2"
 86 | 	top: "norm2"
 87 | 	lrn_param {
 88 | 		local_size: 3
 89 | 		alpha: 0.00005
 90 | 		beta: 0.75
 91 | 		norm_region: WITHIN_CHANNEL
 92 |     engine: CAFFE
 93 | 	}
 94 | }
 95 | layer {
 96 | 	name: "pool2"
 97 | 	type: "Pooling"
 98 | 	bottom: "norm2"
 99 | 	top: "pool2"
100 | 	pooling_param {
101 | 		kernel_size: 3
102 | 		stride: 2
103 | 		pad: 1
104 | 		pool: MAX
105 | 	}
106 | }
107 | layer {
108 | 	name: "conv3"
109 | 	type: "Convolution"
110 | 	bottom: "pool2"
111 | 	top: "conv3"
112 | 	param { lr_mult: 1.0 }
113 | 	param { lr_mult: 2.0 }
114 | 	convolution_param {
115 | 		num_output: 384
116 | 		kernel_size: 3
117 | 		pad: 1
118 | 		stride: 1
119 | 	}
120 | }
121 | layer {
122 | 	name: "relu3"
123 | 	type: "ReLU"
124 | 	bottom: "conv3"
125 | 	top: "conv3"
126 | }
127 | layer {
128 | 	name: "conv4"
129 | 	type: "Convolution"
130 | 	bottom: "conv3"
131 | 	top: "conv4"
132 | 	param { lr_mult: 1.0 }
133 | 	param { lr_mult: 2.0 }
134 | 	convolution_param {
135 | 		num_output: 384
136 | 		kernel_size: 3
137 | 		pad: 1
138 | 		stride: 1
139 | 	}
140 | }
141 | layer {
142 | 	name: "relu4"
143 | 	type: "ReLU"
144 | 	bottom: "conv4"
145 | 	top: "conv4"
146 | }
147 | layer {
148 | 	name: "conv5"
149 | 	type: "Convolution"
150 | 	bottom: "conv4"
151 | 	top: "conv5"
152 | 	param { lr_mult: 1.0 }
153 | 	param { lr_mult: 2.0 }
154 | 	convolution_param {
155 | 		num_output: 256
156 | 		kernel_size: 3
157 | 		pad: 1
158 | 		stride: 1
159 | 	}
160 | }
161 | layer {
162 | 	name: "relu5"
163 | 	type: "ReLU"
164 | 	bottom: "conv5"
165 | 	top: "conv5"
166 | }
167 | 
168 | #========= RPN ============
169 | 
170 | layer {
171 |   name: "rpn_conv1"
172 |   type: "Convolution"
173 |   bottom: "conv5"
174 |   top: "rpn_conv1"
175 |   param { lr_mult: 1.0 }
176 |   param { lr_mult: 2.0 }
177 |   convolution_param {
178 |     num_output: 256
179 |     kernel_size: 3 pad: 1 stride: 1
180 |     weight_filler { type: "gaussian" std: 0.01 }
181 |     bias_filler { type: "constant" value: 0 }
182 |   }
183 | }
184 | layer {
185 |   name: "rpn_relu1"
186 |   type: "ReLU"
187 |   bottom: "rpn_conv1"
188 |   top: "rpn_conv1"
189 | }
190 | layer {
191 |   name: "rpn_cls_score"
192 |   type: "Convolution"
193 |   bottom: "rpn_conv1"
194 |   top: "rpn_cls_score"
195 |   param { lr_mult: 1.0 }
196 |   param { lr_mult: 2.0 }
197 |   convolution_param {
198 |     num_output: 18   # 2(bg/fg) * 9(anchors)
199 |     kernel_size: 1 pad: 0 stride: 1
200 |     weight_filler { type: "gaussian" std: 0.01 }
201 |     bias_filler { type: "constant" value: 0 }
202 |   }
203 | }
204 | layer {
205 |   name: "rpn_bbox_pred"
206 |   type: "Convolution"
207 |   bottom: "rpn_conv1"
208 |   top: "rpn_bbox_pred"
209 |   param { lr_mult: 1.0 }
210 |   param { lr_mult: 2.0 }
211 |   convolution_param {
212 |     num_output: 36   # 4 * 9(anchors)
213 |     kernel_size: 1 pad: 0 stride: 1
214 |     weight_filler { type: "gaussian" std: 0.01 }
215 |     bias_filler { type: "constant" value: 0 }
216 |   }
217 | }
218 | layer {
219 |    bottom: "rpn_cls_score"
220 |    top: "rpn_cls_score_reshape"
221 |    name: "rpn_cls_score_reshape"
222 |    type: "Reshape"
223 |    reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
224 | }
225 | layer {
226 |   name: 'rpn-data'
227 |   type: 'Python'
228 |   bottom: 'rpn_cls_score'
229 |   bottom: 'gt_boxes'
230 |   bottom: 'im_info'
231 |   bottom: 'data'
232 |   top: 'rpn_labels'
233 |   top: 'rpn_bbox_targets'
234 |   top: 'rpn_bbox_inside_weights'
235 |   top: 'rpn_bbox_outside_weights'
236 |   python_param {
237 |     module: 'rpn.anchor_target_layer'
238 |     layer: 'AnchorTargetLayer'
239 |     param_str: "'feat_stride': 16"
240 |   }
241 | }
242 | layer {
243 |   name: "rpn_loss_cls"
244 |   type: "SoftmaxWithLoss"
245 |   bottom: "rpn_cls_score_reshape"
246 |   bottom: "rpn_labels"
247 |   propagate_down: 1
248 |   propagate_down: 0
249 |   top: "rpn_cls_loss"
250 |   loss_weight: 1
251 |   loss_param {
252 |     ignore_label: -1
253 |     normalize: true
254 |   }
255 | }
256 | layer {
257 |   name: "rpn_loss_bbox"
258 |   type: "SmoothL1Loss"
259 |   bottom: "rpn_bbox_pred"
260 |   bottom: "rpn_bbox_targets"
261 |   bottom: "rpn_bbox_inside_weights"
262 |   bottom: "rpn_bbox_outside_weights"
263 |   top: "rpn_loss_bbox"
264 |   loss_weight: 1
265 |   smooth_l1_loss_param { sigma: 3.0 }
266 | }
267 | 
268 | #========= RCNN ============
269 | # Dummy layers so that initial parameters are saved into the output net
270 | 
271 | layer {
272 |   name: "dummy_roi_pool_conv5"
273 |   type: "DummyData"
274 |   top: "dummy_roi_pool_conv5"
275 |   dummy_data_param {
276 |     shape { dim: 1 dim: 9216 }
277 |     data_filler { type: "gaussian" std: 0.01 }
278 |   }
279 | }
280 | layer {
281 |   name: "fc6"
282 |   type: "InnerProduct"
283 |   bottom: "dummy_roi_pool_conv5"
284 |   top: "fc6"
285 |   param { lr_mult: 0 decay_mult: 0 }
286 |   param { lr_mult: 0 decay_mult: 0 }
287 |   inner_product_param {
288 |     num_output: 4096
289 |   }
290 | }
291 | layer {
292 |   name: "relu6"
293 |   type: "ReLU"
294 |   bottom: "fc6"
295 |   top: "fc6"
296 | }
297 | layer {
298 |   name: "fc7"
299 |   type: "InnerProduct"
300 |   bottom: "fc6"
301 |   top: "fc7"
302 |   param { lr_mult: 0 decay_mult: 0 }
303 |   param { lr_mult: 0 decay_mult: 0 }
304 |   inner_product_param {
305 |     num_output: 4096
306 |   }
307 | }
308 | layer {
309 |   name: "silence_fc7"
310 |   type: "Silence"
311 |   bottom: "fc7"
312 | }
313 | 


--------------------------------------------------------------------------------
/models/pascal_voc/ZF/faster_rcnn_alt_opt/stage2_fast_rcnn_solver30k40k.pt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/ZF/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "step"
 5 | gamma: 0.1
 6 | stepsize: 30000
 7 | display: 20
 8 | average_loss: 100
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | 
12 | # We disable standard caffe solver snapshotting and implement our own snapshot
13 | # function
14 | snapshot: 0
15 | # We still use the snapshot prefix, though
16 | snapshot_prefix: "zf_fast_rcnn"
17 | 


--------------------------------------------------------------------------------
/models/pascal_voc/ZF/faster_rcnn_alt_opt/stage2_rpn_solver60k80k.pt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/ZF/faster_rcnn_alt_opt/stage2_rpn_train.pt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "step"
 5 | gamma: 0.1
 6 | stepsize: 60000
 7 | display: 20
 8 | average_loss: 100
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | 
12 | # We disable standard caffe solver snapshotting and implement our own snapshot
13 | # function
14 | snapshot: 0
15 | # We still use the snapshot prefix, though
16 | snapshot_prefix: "zf_rpn"
17 | 


--------------------------------------------------------------------------------
/models/pascal_voc/ZF/faster_rcnn_alt_opt/stage2_rpn_train.pt:
--------------------------------------------------------------------------------
  1 | name: "ZF"
  2 | layer {
  3 |   name: 'input-data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'im_info'
  7 |   top: 'gt_boxes'
  8 |   python_param {
  9 |     module: 'roi_data_layer.layer'
 10 |     layer: 'RoIDataLayer'
 11 |     param_str: "'num_classes': 21"
 12 |   }
 13 | }
 14 | 
 15 | #========= conv1-conv5 ============
 16 | 
 17 | layer {
 18 | 	name: "conv1"
 19 | 	type: "Convolution"
 20 | 	bottom: "data"
 21 | 	top: "conv1"
 22 |   param { lr_mult: 0 decay_mult: 0 }
 23 |   param { lr_mult: 0 decay_mult: 0 }
 24 | 	convolution_param {
 25 | 		num_output: 96
 26 | 		kernel_size: 7
 27 | 		pad: 3
 28 | 		stride: 2
 29 | 	}
 30 | }
 31 | layer {
 32 | 	name: "relu1"
 33 | 	type: "ReLU"
 34 | 	bottom: "conv1"
 35 | 	top: "conv1"
 36 | }
 37 | layer {
 38 | 	name: "norm1"
 39 | 	type: "LRN"
 40 | 	bottom: "conv1"
 41 | 	top: "norm1"
 42 | 	lrn_param {
 43 | 		local_size: 3
 44 | 		alpha: 0.00005
 45 | 		beta: 0.75
 46 | 		norm_region: WITHIN_CHANNEL
 47 |     engine: CAFFE
 48 | 	}
 49 | }
 50 | layer {
 51 | 	name: "pool1"
 52 | 	type: "Pooling"
 53 | 	bottom: "norm1"
 54 | 	top: "pool1"
 55 | 	pooling_param {
 56 | 		kernel_size: 3
 57 | 		stride: 2
 58 | 		pad: 1
 59 | 		pool: MAX
 60 | 	}
 61 | }
 62 | layer {
 63 | 	name: "conv2"
 64 | 	type: "Convolution"
 65 | 	bottom: "pool1"
 66 | 	top: "conv2"
 67 |   param { lr_mult: 0 decay_mult: 0 }
 68 |   param { lr_mult: 0 decay_mult: 0 }
 69 | 	convolution_param {
 70 | 		num_output: 256
 71 | 		kernel_size: 5
 72 | 		pad: 2
 73 | 		stride: 2
 74 | 	}
 75 | }
 76 | layer {
 77 | 	name: "relu2"
 78 | 	type: "ReLU"
 79 | 	bottom: "conv2"
 80 | 	top: "conv2"
 81 | }
 82 | layer {
 83 | 	name: "norm2"
 84 | 	type: "LRN"
 85 | 	bottom: "conv2"
 86 | 	top: "norm2"
 87 | 	lrn_param {
 88 | 		local_size: 3
 89 | 		alpha: 0.00005
 90 | 		beta: 0.75
 91 | 		norm_region: WITHIN_CHANNEL
 92 |     engine: CAFFE
 93 | 	}
 94 | }
 95 | layer {
 96 | 	name: "pool2"
 97 | 	type: "Pooling"
 98 | 	bottom: "norm2"
 99 | 	top: "pool2"
100 | 	pooling_param {
101 | 		kernel_size: 3
102 | 		stride: 2
103 | 		pad: 1
104 | 		pool: MAX
105 | 	}
106 | }
107 | layer {
108 | 	name: "conv3"
109 | 	type: "Convolution"
110 | 	bottom: "pool2"
111 | 	top: "conv3"
112 |   param { lr_mult: 0 decay_mult: 0 }
113 |   param { lr_mult: 0 decay_mult: 0 }
114 | 	convolution_param {
115 | 		num_output: 384
116 | 		kernel_size: 3
117 | 		pad: 1
118 | 		stride: 1
119 | 	}
120 | }
121 | layer {
122 | 	name: "relu3"
123 | 	type: "ReLU"
124 | 	bottom: "conv3"
125 | 	top: "conv3"
126 | }
127 | layer {
128 | 	name: "conv4"
129 | 	type: "Convolution"
130 | 	bottom: "conv3"
131 | 	top: "conv4"
132 |   param { lr_mult: 0 decay_mult: 0 }
133 |   param { lr_mult: 0 decay_mult: 0 }
134 | 	convolution_param {
135 | 		num_output: 384
136 | 		kernel_size: 3
137 | 		pad: 1
138 | 		stride: 1
139 | 	}
140 | }
141 | layer {
142 | 	name: "relu4"
143 | 	type: "ReLU"
144 | 	bottom: "conv4"
145 | 	top: "conv4"
146 | }
147 | layer {
148 | 	name: "conv5"
149 | 	type: "Convolution"
150 | 	bottom: "conv4"
151 | 	top: "conv5"
152 |   param { lr_mult: 0 decay_mult: 0 }
153 |   param { lr_mult: 0 decay_mult: 0 }
154 | 	convolution_param {
155 | 		num_output: 256
156 | 		kernel_size: 3
157 | 		pad: 1
158 | 		stride: 1
159 | 	}
160 | }
161 | layer {
162 | 	name: "relu5"
163 | 	type: "ReLU"
164 | 	bottom: "conv5"
165 | 	top: "conv5"
166 | }
167 | 
168 | #========= RPN ============
169 | 
170 | layer {
171 |   name: "rpn_conv1"
172 |   type: "Convolution"
173 |   bottom: "conv5"
174 |   top: "rpn_conv1"
175 |   param { lr_mult: 1.0 }
176 |   param { lr_mult: 2.0 }
177 |   convolution_param {
178 |     num_output: 256
179 |     kernel_size: 3 pad: 1 stride: 1
180 |     weight_filler { type: "gaussian" std: 0.01 }
181 |     bias_filler { type: "constant" value: 0 }
182 |   }
183 | }
184 | layer {
185 |   name: "rpn_relu1"
186 |   type: "ReLU"
187 |   bottom: "rpn_conv1"
188 |   top: "rpn_conv1"
189 | }
190 | layer {
191 |   name: "rpn_cls_score"
192 |   type: "Convolution"
193 |   bottom: "rpn_conv1"
194 |   top: "rpn_cls_score"
195 |   param { lr_mult: 1.0 }
196 |   param { lr_mult: 2.0 }
197 |   convolution_param {
198 |     num_output: 18   # 2(bg/fg) * 9(anchors)
199 |     kernel_size: 1 pad: 0 stride: 1
200 |     weight_filler { type: "gaussian" std: 0.01 }
201 |     bias_filler { type: "constant" value: 0 }
202 |   }
203 | }
204 | layer {
205 |   name: "rpn_bbox_pred"
206 |   type: "Convolution"
207 |   bottom: "rpn_conv1"
208 |   top: "rpn_bbox_pred"
209 |   param { lr_mult: 1.0 }
210 |   param { lr_mult: 2.0 }
211 |   convolution_param {
212 |     num_output: 36   # 4 * 9(anchors)
213 |     kernel_size: 1 pad: 0 stride: 1
214 |     weight_filler { type: "gaussian" std: 0.01 }
215 |     bias_filler { type: "constant" value: 0 }
216 |   }
217 | }
218 | layer {
219 |    bottom: "rpn_cls_score"
220 |    top: "rpn_cls_score_reshape"
221 |    name: "rpn_cls_score_reshape"
222 |    type: "Reshape"
223 |    reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
224 | }
225 | layer {
226 |   name: 'rpn-data'
227 |   type: 'Python'
228 |   bottom: 'rpn_cls_score'
229 |   bottom: 'gt_boxes'
230 |   bottom: 'im_info'
231 |   bottom: 'data'
232 |   top: 'rpn_labels'
233 |   top: 'rpn_bbox_targets'
234 |   top: 'rpn_bbox_inside_weights'
235 |   top: 'rpn_bbox_outside_weights'
236 |   python_param {
237 |     module: 'rpn.anchor_target_layer'
238 |     layer: 'AnchorTargetLayer'
239 |     param_str: "'feat_stride': 16"
240 |   }
241 | }
242 | layer {
243 |   name: "rpn_loss_cls"
244 |   type: "SoftmaxWithLoss"
245 |   bottom: "rpn_cls_score_reshape"
246 |   bottom: "rpn_labels"
247 |   propagate_down: 1
248 |   propagate_down: 0
249 |   top: "rpn_cls_loss"
250 |   loss_weight: 1
251 |   loss_param {
252 |     ignore_label: -1
253 |     normalize: true
254 |   }
255 | }
256 | layer {
257 |   name: "rpn_loss_bbox"
258 |   type: "SmoothL1Loss"
259 |   bottom: "rpn_bbox_pred"
260 |   bottom: "rpn_bbox_targets"
261 |   bottom: "rpn_bbox_inside_weights"
262 |   bottom: "rpn_bbox_outside_weights"
263 |   top: "rpn_loss_bbox"
264 |   loss_weight: 1
265 |   smooth_l1_loss_param { sigma: 3.0 }
266 | }
267 | 
268 | #========= RCNN ============
269 | # Dummy layers so that initial parameters are saved into the output net
270 | 
271 | layer {
272 |   name: "dummy_roi_pool_conv5"
273 |   type: "DummyData"
274 |   top: "dummy_roi_pool_conv5"
275 |   dummy_data_param {
276 |     shape { dim: 1 dim: 9216 }
277 |     data_filler { type: "gaussian" std: 0.01 }
278 |   }
279 | }
280 | layer {
281 |   name: "fc6"
282 |   type: "InnerProduct"
283 |   bottom: "dummy_roi_pool_conv5"
284 |   top: "fc6"
285 |   param { lr_mult: 0 decay_mult: 0 }
286 |   param { lr_mult: 0 decay_mult: 0 }
287 |   inner_product_param {
288 |     num_output: 4096
289 |   }
290 | }
291 | layer {
292 |   name: "relu6"
293 |   type: "ReLU"
294 |   bottom: "fc6"
295 |   top: "fc6"
296 | }
297 | layer {
298 |   name: "fc7"
299 |   type: "InnerProduct"
300 |   bottom: "fc6"
301 |   top: "fc7"
302 |   param { lr_mult: 0 decay_mult: 0 }
303 |   param { lr_mult: 0 decay_mult: 0 }
304 |   inner_product_param {
305 |     num_output: 4096
306 |   }
307 | }
308 | layer {
309 |   name: "silence_fc7"
310 |   type: "Silence"
311 |   bottom: "fc7"
312 | }
313 | 


--------------------------------------------------------------------------------
/models/pascal_voc/ZF/faster_rcnn_end2end/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/ZF/faster_rcnn_end2end/train.prototxt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "step"
 5 | gamma: 0.1
 6 | stepsize: 50000
 7 | display: 20
 8 | average_loss: 100
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | 
12 | #base_lr: 0.001
13 | #lr_policy: "exp"
14 | #gamma: 0.999539589  # (0.00001/0.001)^(1/10000)
15 | #display: 1
16 | #average_loss: 100
17 | #momentum: 0.9
18 | #weight_decay: 0.0005
19 | 
20 | # We disable standard caffe solver snapshotting and implement our own snapshot
21 | # function
22 | snapshot: 0
23 | # We still use the snapshot prefix, though
24 | snapshot_prefix: "zf_faster_rcnn"
25 | iter_size: 2
26 | 


--------------------------------------------------------------------------------
/tools/README.md:
--------------------------------------------------------------------------------
1 | Tools for training, testing, and compressing Fast R-CNN networks.
2 | 


--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Set up paths for Fast R-CNN."""
 9 | 
10 | import os.path as osp
11 | import sys
12 | 
13 | def add_path(path):
14 |     if path not in sys.path:
15 |         sys.path.insert(0, path)
16 | 
17 | this_dir = osp.dirname(__file__)
18 | 
19 | # Add caffe to PYTHONPATH
20 | caffe_path = osp.join(this_dir, '..', 'caffe-fast-rcnn', 'python')
21 | add_path(caffe_path)
22 | 
23 | # Add lib to PYTHONPATH
24 | lib_path = osp.join(this_dir, '..', 'lib')
25 | add_path(lib_path)
26 | 


--------------------------------------------------------------------------------
/tools/compress_net.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Fast R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | """Compress a Fast R-CNN network using truncated SVD."""
 11 | 
 12 | import _init_paths
 13 | import caffe
 14 | import argparse
 15 | import numpy as np
 16 | import os, sys
 17 | 
 18 | def parse_args():
 19 |     """Parse input arguments."""
 20 |     parser = argparse.ArgumentParser(description='Compress a Fast R-CNN network')
 21 |     parser.add_argument('--def', dest='prototxt',
 22 |                         help='prototxt file defining the uncompressed network',
 23 |                         default=None, type=str)
 24 |     parser.add_argument('--def-svd', dest='prototxt_svd',
 25 |                         help='prototxt file defining the SVD compressed network',
 26 |                         default=None, type=str)
 27 |     parser.add_argument('--net', dest='caffemodel',
 28 |                         help='model to compress',
 29 |                         default=None, type=str)
 30 | 
 31 |     if len(sys.argv) == 1:
 32 |         parser.print_help()
 33 |         sys.exit(1)
 34 | 
 35 |     args = parser.parse_args()
 36 |     return args
 37 | 
 38 | def compress_weights(W, l):
 39 |     """Compress the weight matrix W of an inner product (fully connected) layer
 40 |     using truncated SVD.
 41 | 
 42 |     Parameters:
 43 |     W: N x M weights matrix
 44 |     l: number of singular values to retain
 45 | 
 46 |     Returns:
 47 |     Ul, L: matrices such that W \approx Ul*L
 48 |     """
 49 | 
 50 |     # numpy doesn't seem to have a fast truncated SVD algorithm...
 51 |     # this could be faster
 52 |     U, s, V = np.linalg.svd(W, full_matrices=False)
 53 | 
 54 |     Ul = U[:, :l]
 55 |     sl = s[:l]
 56 |     Vl = V[:l, :]
 57 | 
 58 |     L = np.dot(np.diag(sl), Vl)
 59 |     return Ul, L
 60 | 
 61 | def main():
 62 |     args = parse_args()
 63 | 
 64 |     # prototxt = 'models/VGG16/test.prototxt'
 65 |     # caffemodel = 'snapshots/vgg16_fast_rcnn_iter_40000.caffemodel'
 66 |     net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
 67 | 
 68 |     # prototxt_svd = 'models/VGG16/svd/test_fc6_fc7.prototxt'
 69 |     # caffemodel = 'snapshots/vgg16_fast_rcnn_iter_40000.caffemodel'
 70 |     net_svd = caffe.Net(args.prototxt_svd, args.caffemodel, caffe.TEST)
 71 | 
 72 |     print('Uncompressed network {} : {}'.format(args.prototxt, args.caffemodel))
 73 |     print('Compressed network prototxt {}'.format(args.prototxt_svd))
 74 | 
 75 |     out = os.path.splitext(os.path.basename(args.caffemodel))[0] + '_svd'
 76 |     out_dir = os.path.dirname(args.caffemodel)
 77 | 
 78 |     # Compress fc6
 79 |     if net_svd.params.has_key('fc6_L'):
 80 |         l_fc6 = net_svd.params['fc6_L'][0].data.shape[0]
 81 |         print('  fc6_L bottleneck size: {}'.format(l_fc6))
 82 | 
 83 |         # uncompressed weights and biases
 84 |         W_fc6 = net.params['fc6'][0].data
 85 |         B_fc6 = net.params['fc6'][1].data
 86 | 
 87 |         print('  compressing fc6...')
 88 |         Ul_fc6, L_fc6 = compress_weights(W_fc6, l_fc6)
 89 | 
 90 |         assert(len(net_svd.params['fc6_L']) == 1)
 91 | 
 92 |         # install compressed matrix factors (and original biases)
 93 |         net_svd.params['fc6_L'][0].data[...] = L_fc6
 94 | 
 95 |         net_svd.params['fc6_U'][0].data[...] = Ul_fc6
 96 |         net_svd.params['fc6_U'][1].data[...] = B_fc6
 97 | 
 98 |         out += '_fc6_{}'.format(l_fc6)
 99 | 
100 |     # Compress fc7
101 |     if net_svd.params.has_key('fc7_L'):
102 |         l_fc7 = net_svd.params['fc7_L'][0].data.shape[0]
103 |         print '  fc7_L bottleneck size: {}'.format(l_fc7)
104 | 
105 |         W_fc7 = net.params['fc7'][0].data
106 |         B_fc7 = net.params['fc7'][1].data
107 | 
108 |         print('  compressing fc7...')
109 |         Ul_fc7, L_fc7 = compress_weights(W_fc7, l_fc7)
110 | 
111 |         assert(len(net_svd.params['fc7_L']) == 1)
112 | 
113 |         net_svd.params['fc7_L'][0].data[...] = L_fc7
114 | 
115 |         net_svd.params['fc7_U'][0].data[...] = Ul_fc7
116 |         net_svd.params['fc7_U'][1].data[...] = B_fc7
117 | 
118 |         out += '_fc7_{}'.format(l_fc7)
119 | 
120 |     filename = '{}/{}.caffemodel'.format(out_dir, out)
121 |     net_svd.save(filename)
122 |     print 'Wrote svd model to: {:s}'.format(filename)
123 | 
124 | if __name__ == '__main__':
125 |     main()
126 | 


--------------------------------------------------------------------------------
/tools/demo.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Faster R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | """
 11 | Demo script showing detections in sample images.
 12 | 
 13 | See README.md for installation instructions before running.
 14 | """
 15 | 
 16 | import _init_paths
 17 | from fast_rcnn.config import cfg
 18 | from fast_rcnn.test import im_detect
 19 | from fast_rcnn.nms_wrapper import nms
 20 | from utils.timer import Timer
 21 | import matplotlib.pyplot as plt
 22 | import numpy as np
 23 | import scipy.io as sio
 24 | import caffe, os, sys, cv2
 25 | import argparse
 26 | 
 27 | CLASSES = ('__background__',
 28 |            'aeroplane', 'bicycle', 'bird', 'boat',
 29 |            'bottle', 'bus', 'car', 'cat', 'chair',
 30 |            'cow', 'diningtable', 'dog', 'horse',
 31 |            'motorbike', 'person', 'pottedplant',
 32 |            'sheep', 'sofa', 'train', 'tvmonitor')
 33 | 
 34 | NETS = {'vgg16': ('VGG16',
 35 |                   'VGG16_faster_rcnn_final.caffemodel'),
 36 |         'zf': ('ZF',
 37 |                   'ZF_faster_rcnn_final.caffemodel')}
 38 | 
 39 | 
 40 | def vis_detections(im, class_name, dets, thresh=0.5):
 41 |     """Draw detected bounding boxes."""
 42 |     inds = np.where(dets[:, -1] >= thresh)[0]
 43 |     if len(inds) == 0:
 44 |         return
 45 | 
 46 |     im = im[:, :, (2, 1, 0)]
 47 |     fig, ax = plt.subplots(figsize=(12, 12))
 48 |     ax.imshow(im, aspect='equal')
 49 |     for i in inds:
 50 |         bbox = dets[i, :4]
 51 |         score = dets[i, -1]
 52 | 
 53 |         ax.add_patch(
 54 |             plt.Rectangle((bbox[0], bbox[1]),
 55 |                           bbox[2] - bbox[0],
 56 |                           bbox[3] - bbox[1], fill=False,
 57 |                           edgecolor='red', linewidth=3.5)
 58 |             )
 59 |         ax.text(bbox[0], bbox[1] - 2,
 60 |                 '{:s} {:.3f}'.format(class_name, score),
 61 |                 bbox=dict(facecolor='blue', alpha=0.5),
 62 |                 fontsize=14, color='white')
 63 | 
 64 |     ax.set_title(('{} detections with '
 65 |                   'p({} | box) >= {:.1f}').format(class_name, class_name,
 66 |                                                   thresh),
 67 |                   fontsize=14)
 68 |     plt.axis('off')
 69 |     plt.tight_layout()
 70 |     plt.draw()
 71 | 
 72 | def demo(net, image_name):
 73 |     """Detect object classes in an image using pre-computed object proposals."""
 74 | 
 75 |     # Load the demo image
 76 |     im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
 77 |     im = cv2.imread(im_file)
 78 | 
 79 |     # Detect all object classes and regress object bounds
 80 |     timer = Timer()
 81 |     timer.tic()
 82 |     scores, boxes = im_detect(net, im)
 83 |     timer.toc()
 84 |     print ('Detection took {:.3f}s for '
 85 |            '{:d} object proposals').format(timer.total_time, boxes.shape[0])
 86 | 
 87 |     # Visualize detections for each class
 88 |     CONF_THRESH = 0.8
 89 |     NMS_THRESH = 0.3
 90 |     for cls_ind, cls in enumerate(CLASSES[1:]):
 91 |         cls_ind += 1 # because we skipped background
 92 |         cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
 93 |         cls_scores = scores[:, cls_ind]
 94 |         dets = np.hstack((cls_boxes,
 95 |                           cls_scores[:, np.newaxis])).astype(np.float32)
 96 |         keep = nms(dets, NMS_THRESH)
 97 |         dets = dets[keep, :]
 98 |         vis_detections(im, cls, dets, thresh=CONF_THRESH)
 99 | 
100 | def parse_args():
101 |     """Parse input arguments."""
102 |     parser = argparse.ArgumentParser(description='Faster R-CNN demo')
103 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
104 |                         default=0, type=int)
105 |     parser.add_argument('--cpu', dest='cpu_mode',
106 |                         help='Use CPU mode (overrides --gpu)',
107 |                         action='store_true')
108 |     parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
109 |                         choices=NETS.keys(), default='vgg16')
110 | 
111 |     args = parser.parse_args()
112 | 
113 |     return args
114 | 
115 | if __name__ == '__main__':
116 |     cfg.TEST.HAS_RPN = True  # Use RPN for proposals
117 | 
118 |     args = parse_args()
119 | 
120 |     prototxt = os.path.join(cfg.MODELS_DIR, NETS[args.demo_net][0],
121 |                             'faster_rcnn_alt_opt', 'faster_rcnn_test.pt')
122 |     caffemodel = os.path.join(cfg.DATA_DIR, 'faster_rcnn_models',
123 |                               NETS[args.demo_net][1])
124 | 
125 |     if not os.path.isfile(caffemodel):
126 |         raise IOError(('{:s} not found.\nDid you run ./data/script/'
127 |                        'fetch_faster_rcnn_models.sh?').format(caffemodel))
128 | 
129 |     if args.cpu_mode:
130 |         caffe.set_mode_cpu()
131 |     else:
132 |         caffe.set_mode_gpu()
133 |         caffe.set_device(args.gpu_id)
134 |         cfg.GPU_ID = args.gpu_id
135 |     net = caffe.Net(prototxt, caffemodel, caffe.TEST)
136 | 
137 |     print '\n\nLoaded network {:s}'.format(caffemodel)
138 | 
139 |     # Warmup on a dummy image
140 |     im = 128 * np.ones((300, 500, 3), dtype=np.uint8)
141 |     for i in xrange(2):
142 |         _, _= im_detect(net, im)
143 | 
144 |     im_names = ['000456.jpg', '000542.jpg', '001150.jpg',
145 |                 '001763.jpg', '004545.jpg']
146 |     for im_name in im_names:
147 |         print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
148 |         print 'Demo for data/demo/{}'.format(im_name)
149 |         demo(net, im_name)
150 | 
151 |     plt.show()
152 | 


--------------------------------------------------------------------------------
/tools/eval_recall.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import _init_paths
 4 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list
 5 | from datasets.factory import get_imdb
 6 | import argparse
 7 | import time, os, sys
 8 | import numpy as np
 9 | 
10 | def parse_args():
11 |     """
12 |     Parse input arguments
13 |     """
14 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
15 |     parser.add_argument('--imdb', dest='imdb_name',
16 |                         help='dataset to test',
17 |                         default='voc_2007_test', type=str)
18 |     parser.add_argument('--method', dest='method',
19 |                         help='proposal method',
20 |                         default='selective_search', type=str)
21 |     parser.add_argument('--rpn-file', dest='rpn_file',
22 |                         default=None, type=str)
23 | 
24 |     if len(sys.argv) == 1:
25 |         parser.print_help()
26 |         sys.exit(1)
27 | 
28 |     args = parser.parse_args()
29 |     return args
30 | 
31 | if __name__ == '__main__':
32 |     args = parse_args()
33 | 
34 |     print('Called with args:')
35 |     print(args)
36 | 
37 |     imdb = get_imdb(args.imdb_name)
38 |     imdb.set_proposal_method(args.method)
39 |     if args.rpn_file is not None:
40 |         imdb.config['rpn_file'] = args.rpn_file
41 | 
42 |     candidate_boxes = None
43 |     if 0:
44 |         import scipy.io as sio
45 |         filename = 'debug/stage1_rpn_voc_2007_test.mat'
46 |         raw_data = sio.loadmat(filename)['aboxes'].ravel()
47 |         candidate_boxes = raw_data
48 | 
49 |     ar, gt_overlaps, recalls, thresholds = \
50 |         imdb.evaluate_recall(candidate_boxes=candidate_boxes)
51 |     print 'Method: {}'.format(args.method)
52 |     print 'AverageRec: {:.3f}'.format(ar)
53 | 
54 |     def recall_at(t):
55 |         ind = np.where(thresholds > t - 1e-5)[0][0]
56 |         assert np.isclose(thresholds[ind], t)
57 |         return recalls[ind]
58 | 
59 |     print 'Recall@0.5: {:.3f}'.format(recall_at(0.5))
60 |     print 'Recall@0.6: {:.3f}'.format(recall_at(0.6))
61 |     print 'Recall@0.7: {:.3f}'.format(recall_at(0.7))
62 |     print 'Recall@0.8: {:.3f}'.format(recall_at(0.8))
63 |     print 'Recall@0.9: {:.3f}'.format(recall_at(0.9))
64 |     # print again for easy spreadsheet copying
65 |     print '{:.3f}'.format(ar)
66 |     print '{:.3f}'.format(recall_at(0.5))
67 |     print '{:.3f}'.format(recall_at(0.6))
68 |     print '{:.3f}'.format(recall_at(0.7))
69 |     print '{:.3f}'.format(recall_at(0.8))
70 |     print '{:.3f}'.format(recall_at(0.9))
71 | 


--------------------------------------------------------------------------------
/tools/reval.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Reval = re-eval. Re-evaluate saved detections."""
11 | 
12 | import _init_paths
13 | from fast_rcnn.test import apply_nms
14 | from fast_rcnn.config import cfg
15 | from datasets.factory import get_imdb
16 | import cPickle
17 | import os, sys, argparse
18 | import numpy as np
19 | 
20 | def parse_args():
21 |     """
22 |     Parse input arguments
23 |     """
24 |     parser = argparse.ArgumentParser(description='Re-evaluate results')
25 |     parser.add_argument('output_dir', nargs=1, help='results directory',
26 |                         type=str)
27 |     parser.add_argument('--imdb', dest='imdb_name',
28 |                         help='dataset to re-evaluate',
29 |                         default='voc_2007_test', type=str)
30 |     parser.add_argument('--matlab', dest='matlab_eval',
31 |                         help='use matlab for evaluation',
32 |                         action='store_true')
33 |     parser.add_argument('--comp', dest='comp_mode', help='competition mode',
34 |                         action='store_true')
35 |     parser.add_argument('--nms', dest='apply_nms', help='apply nms',
36 |                         action='store_true')
37 | 
38 |     if len(sys.argv) == 1:
39 |         parser.print_help()
40 |         sys.exit(1)
41 | 
42 |     args = parser.parse_args()
43 |     return args
44 | 
45 | def from_dets(imdb_name, output_dir, args):
46 |     imdb = get_imdb(imdb_name)
47 |     imdb.competition_mode(args.comp_mode)
48 |     imdb.config['matlab_eval'] = args.matlab_eval
49 |     with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f:
50 |         dets = cPickle.load(f)
51 | 
52 |     if args.apply_nms:
53 |         print 'Applying NMS to all detections'
54 |         nms_dets = apply_nms(dets, cfg.TEST.NMS)
55 |     else:
56 |         nms_dets = dets
57 | 
58 |     print 'Evaluating detections'
59 |     imdb.evaluate_detections(nms_dets, output_dir)
60 | 
61 | if __name__ == '__main__':
62 |     args = parse_args()
63 | 
64 |     output_dir = os.path.abspath(args.output_dir[0])
65 |     imdb_name = args.imdb_name
66 |     from_dets(imdb_name, output_dir, args)
67 | 


--------------------------------------------------------------------------------
/tools/rpn_generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast/er/ R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Generate RPN proposals."""
11 | 
12 | import _init_paths
13 | import numpy as np
14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
15 | from datasets.factory import get_imdb
16 | from rpn.generate import imdb_proposals
17 | import cPickle
18 | import caffe
19 | import argparse
20 | import pprint
21 | import time, os, sys
22 | 
23 | def parse_args():
24 |     """
25 |     Parse input arguments
26 |     """
27 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
28 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU id to use',
29 |                         default=0, type=int)
30 |     parser.add_argument('--def', dest='prototxt',
31 |                         help='prototxt file defining the network',
32 |                         default=None, type=str)
33 |     parser.add_argument('--net', dest='caffemodel',
34 |                         help='model to test',
35 |                         default=None, type=str)
36 |     parser.add_argument('--cfg', dest='cfg_file',
37 |                         help='optional config file', default=None, type=str)
38 |     parser.add_argument('--wait', dest='wait',
39 |                         help='wait until net file exists',
40 |                         default=True, type=bool)
41 |     parser.add_argument('--imdb', dest='imdb_name',
42 |                         help='dataset to test',
43 |                         default='voc_2007_test', type=str)
44 |     parser.add_argument('--set', dest='set_cfgs',
45 |                         help='set config keys', default=None,
46 |                         nargs=argparse.REMAINDER)
47 | 
48 |     if len(sys.argv) == 1:
49 |         parser.print_help()
50 |         sys.exit(1)
51 | 
52 |     args = parser.parse_args()
53 |     return args
54 | 
55 | if __name__ == '__main__':
56 |     args = parse_args()
57 | 
58 |     print('Called with args:')
59 |     print(args)
60 | 
61 |     if args.cfg_file is not None:
62 |         cfg_from_file(args.cfg_file)
63 |     if args.set_cfgs is not None:
64 |         cfg_from_list(args.set_cfgs)
65 | 
66 |     cfg.GPU_ID = args.gpu_id
67 | 
68 |     # RPN test settings
69 |     cfg.TEST.RPN_PRE_NMS_TOP_N = -1
70 |     cfg.TEST.RPN_POST_NMS_TOP_N = 2000
71 | 
72 |     print('Using config:')
73 |     pprint.pprint(cfg)
74 | 
75 |     while not os.path.exists(args.caffemodel) and args.wait:
76 |         print('Waiting for {} to exist...'.format(args.caffemodel))
77 |         time.sleep(10)
78 | 
79 |     caffe.set_mode_gpu()
80 |     caffe.set_device(args.gpu_id)
81 |     net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
82 |     net.name = os.path.splitext(os.path.basename(args.caffemodel))[0]
83 | 
84 |     imdb = get_imdb(args.imdb_name)
85 |     imdb_boxes = imdb_proposals(net, imdb)
86 | 
87 |     output_dir = get_output_dir(imdb, net)
88 |     rpn_file = os.path.join(output_dir, net.name + '_rpn_proposals.pkl')
89 |     with open(rpn_file, 'wb') as f:
90 |         cPickle.dump(imdb_boxes, f, cPickle.HIGHEST_PROTOCOL)
91 |     print 'Wrote RPN proposals to {}'.format(rpn_file)
92 | 


--------------------------------------------------------------------------------
/tools/test_net.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Test a Fast R-CNN network on an image database."""
11 | 
12 | import _init_paths
13 | from fast_rcnn.test import test_net
14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list
15 | from datasets.factory import get_imdb
16 | import caffe
17 | import argparse
18 | import pprint
19 | import time, os, sys
20 | 
21 | def parse_args():
22 |     """
23 |     Parse input arguments
24 |     """
25 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
26 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU id to use',
27 |                         default=0, type=int)
28 |     parser.add_argument('--def', dest='prototxt',
29 |                         help='prototxt file defining the network',
30 |                         default=None, type=str)
31 |     parser.add_argument('--net', dest='caffemodel',
32 |                         help='model to test',
33 |                         default=None, type=str)
34 |     parser.add_argument('--cfg', dest='cfg_file',
35 |                         help='optional config file', default=None, type=str)
36 |     parser.add_argument('--wait', dest='wait',
37 |                         help='wait until net file exists',
38 |                         default=True, type=bool)
39 |     parser.add_argument('--imdb', dest='imdb_name',
40 |                         help='dataset to test',
41 |                         default='voc_2007_test', type=str)
42 |     parser.add_argument('--comp', dest='comp_mode', help='competition mode',
43 |                         action='store_true')
44 |     parser.add_argument('--set', dest='set_cfgs',
45 |                         help='set config keys', default=None,
46 |                         nargs=argparse.REMAINDER)
47 |     parser.add_argument('--vis', dest='vis', help='visualize detections',
48 |                         action='store_true')
49 |     parser.add_argument('--num_dets', dest='max_per_image',
50 |                         help='max number of detections per image',
51 |                         default=100, type=int)
52 | 
53 |     if len(sys.argv) == 1:
54 |         parser.print_help()
55 |         sys.exit(1)
56 | 
57 |     args = parser.parse_args()
58 |     return args
59 | 
60 | if __name__ == '__main__':
61 |     args = parse_args()
62 | 
63 |     print('Called with args:')
64 |     print(args)
65 | 
66 |     if args.cfg_file is not None:
67 |         cfg_from_file(args.cfg_file)
68 |     if args.set_cfgs is not None:
69 |         cfg_from_list(args.set_cfgs)
70 | 
71 |     cfg.GPU_ID = args.gpu_id
72 | 
73 |     print('Using config:')
74 |     pprint.pprint(cfg)
75 | 
76 |     while not os.path.exists(args.caffemodel) and args.wait:
77 |         print('Waiting for {} to exist...'.format(args.caffemodel))
78 |         time.sleep(10)
79 | 
80 |     caffe.set_mode_gpu()
81 |     caffe.set_device(args.gpu_id)
82 |     net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
83 |     net.name = os.path.splitext(os.path.basename(args.caffemodel))[0]
84 | 
85 |     imdb = get_imdb(args.imdb_name)
86 |     imdb.competition_mode(args.comp_mode)
87 |     if not cfg.TEST.HAS_RPN:
88 |         imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD)
89 | 
90 |     test_net(net, imdb, max_per_image=args.max_per_image, vis=args.vis)
91 | 


--------------------------------------------------------------------------------
/tools/train_net.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Fast R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | """Train a Fast R-CNN network on a region of interest database."""
 11 | 
 12 | import _init_paths
 13 | from fast_rcnn.train import get_training_roidb, train_net
 14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
 15 | from datasets.factory import get_imdb
 16 | import datasets.imdb
 17 | import caffe
 18 | import argparse
 19 | import pprint
 20 | import numpy as np
 21 | import sys
 22 | 
 23 | def parse_args():
 24 |     """
 25 |     Parse input arguments
 26 |     """
 27 |     parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
 28 |     parser.add_argument('--gpu', dest='gpu_id',
 29 |                         help='GPU device id to use [0]',
 30 |                         default=0, type=int)
 31 |     parser.add_argument('--solver', dest='solver',
 32 |                         help='solver prototxt',
 33 |                         default=None, type=str)
 34 |     parser.add_argument('--iters', dest='max_iters',
 35 |                         help='number of iterations to train',
 36 |                         default=40000, type=int)
 37 |     parser.add_argument('--weights', dest='pretrained_model',
 38 |                         help='initialize with pretrained model weights',
 39 |                         default=None, type=str)
 40 |     parser.add_argument('--cfg', dest='cfg_file',
 41 |                         help='optional config file',
 42 |                         default=None, type=str)
 43 |     parser.add_argument('--imdb', dest='imdb_name',
 44 |                         help='dataset to train on',
 45 |                         default='voc_2007_trainval', type=str)
 46 |     parser.add_argument('--rand', dest='randomize',
 47 |                         help='randomize (do not use a fixed seed)',
 48 |                         action='store_true')
 49 |     parser.add_argument('--set', dest='set_cfgs',
 50 |                         help='set config keys', default=None,
 51 |                         nargs=argparse.REMAINDER)
 52 | 
 53 |     if len(sys.argv) == 1:
 54 |         parser.print_help()
 55 |         sys.exit(1)
 56 | 
 57 |     args = parser.parse_args()
 58 |     return args
 59 | 
 60 | def combined_roidb(imdb_names):
 61 |     def get_roidb(imdb_name):
 62 |         imdb = get_imdb(imdb_name)
 63 |         print 'Loaded dataset `{:s}` for training'.format(imdb.name)
 64 |         imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
 65 |         print 'Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)
 66 |         roidb = get_training_roidb(imdb)
 67 |         return roidb
 68 | 
 69 |     roidbs = [get_roidb(s) for s in imdb_names.split('+')]
 70 |     roidb = roidbs[0]
 71 |     if len(roidbs) > 1:
 72 |         for r in roidbs[1:]:
 73 |             roidb.extend(r)
 74 |         imdb = datasets.imdb.imdb(imdb_names)
 75 |     else:
 76 |         imdb = get_imdb(imdb_names)
 77 |     return imdb, roidb
 78 | 
 79 | if __name__ == '__main__':
 80 |     args = parse_args()
 81 | 
 82 |     print('Called with args:')
 83 |     print(args)
 84 | 
 85 |     if args.cfg_file is not None:
 86 |         cfg_from_file(args.cfg_file)
 87 |     if args.set_cfgs is not None:
 88 |         cfg_from_list(args.set_cfgs)
 89 | 
 90 |     cfg.GPU_ID = args.gpu_id
 91 | 
 92 |     print('Using config:')
 93 |     pprint.pprint(cfg)
 94 | 
 95 |     if not args.randomize:
 96 |         # fix the random seeds (numpy and caffe) for reproducibility
 97 |         np.random.seed(cfg.RNG_SEED)
 98 |         caffe.set_random_seed(cfg.RNG_SEED)
 99 | 
100 |     # set up caffe
101 |     caffe.set_mode_gpu()
102 |     caffe.set_device(args.gpu_id)
103 | 
104 |     imdb, roidb = combined_roidb(args.imdb_name)
105 |     print '{:d} roidb entries'.format(len(roidb))
106 | 
107 |     output_dir = get_output_dir(imdb)
108 |     print 'Output will be saved to `{:s}`'.format(output_dir)
109 | 
110 |     train_net(args.solver, roidb, output_dir,
111 |               pretrained_model=args.pretrained_model,
112 |               max_iters=args.max_iters)
113 | 


--------------------------------------------------------------------------------