├── lib
    ├── nms
    │   ├── __init__.py
    │   ├── .gitignore
    │   ├── gpu_nms.hpp
    │   ├── py_cpu_nms.py
    │   ├── gpu_nms.pyx
    │   ├── cpu_nms.pyx
    │   └── nms_kernel.cu
    ├── transform
    │   ├── __init__.py
    │   └── torch_image_transform_layer.py
    ├── utils
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── timer.py
    │   ├── blob.py
    │   └── bbox.pyx
    ├── pycocotools
    │   ├── __init__.py
    │   ├── UPSTREAM_REV
    │   ├── license.txt
    │   ├── maskApi.h
    │   ├── mask.py
    │   └── maskApi.c
    ├── Makefile
    ├── datasets
    │   ├── __init__.py
    │   ├── VOCdevkit-matlab-wrapper
    │   │   ├── get_voc_opts.m
    │   │   ├── xVOCap.m
    │   │   └── voc_eval.m
    │   ├── ds_utils.py
    │   ├── factory.py
    │   ├── tools
    │   │   └── mcg_munge.py
    │   └── voc_eval.py
    ├── fast_rcnn
    │   ├── __init__.py
    │   ├── nms_wrapper.py
    │   ├── bbox_transform.py
    │   └── train.py
    ├── roi_data_layer
    │   ├── __init__.py
    │   └── roidb.py
    ├── rpn
    │   ├── __init__.py
    │   ├── README.md
    │   ├── generate_anchors.py
    │   ├── generate.py
    │   ├── proposal_layer.py
    │   └── proposal_target_layer.py
    └── setup.py
├── experiments
    ├── logs
    │   └── .gitignore
    ├── README.md
    ├── cfgs
    │   ├── fast_rcnn_adv_128.yml
    │   └── fast_rcnn_adv_pretrain.yml
    └── scripts
    │   ├── fast_rcnn_std.sh
    │   ├── fast_rcnn_adv_pretrain.sh
    │   └── fast_rcnn_adv.sh
├── data
    ├── pylintrc
    ├── demo
    │   ├── 000456.jpg
    │   ├── 000542.jpg
    │   ├── 001150.jpg
    │   ├── 001763.jpg
    │   └── 004545.jpg
    ├── .gitignore
    ├── scripts
    │   ├── fetch_imagenet_models.sh
    │   ├── fetch_selective_search_data.sh
    │   └── fetch_fast_rcnn_ohem_models.sh
    └── README.md
├── tools
    ├── README.md
    ├── _init_paths.py
    ├── reval.py
    ├── eval_recall.py
    ├── rpn_generate.py
    ├── test_net.py
    ├── train_net.py
    ├── compress_net.py
    └── demo.py
├── train.sh
├── python_utils
    ├── __init__.py
    ├── _init_paths.py
    ├── do_net_surgery.py
    ├── general_utils.py
    ├── pycaffe_utils.py
    └── evaluate_detection.py
├── copy_model.h
├── models
    └── pascal_voc
    │   ├── VGG16
    │       ├── fast_rcnn
    │       │   ├── solver.prototxt
    │       │   ├── train.prototxt
    │       │   └── test.prototxt
    │       ├── fast_rcnn_adv
    │       │   ├── solver.prototxt
    │       │   └── init_weights2.json
    │       ├── fast_rcnn_std
    │       │   ├── solver.prototxt
    │       │   ├── train.prototxt
    │       │   └── test.prototxt
    │       └── fast_rcnn_adv_pretrain
    │       │   └── solver.prototxt
    │   └── VGG_CNN_M_1024
    │       ├── fast_rcnn
    │           ├── solver.prototxt
    │           ├── train.prototxt
    │           └── test.prototxt
    │       └── fast_rcnn_ohem
    │           └── solver.prototxt
├── README.md
└── LICENSE


/lib/nms/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/transform/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/logs/.gitignore:
--------------------------------------------------------------------------------
1 | *.txt*
2 | 


--------------------------------------------------------------------------------
/lib/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.so
3 | 


--------------------------------------------------------------------------------
/lib/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/data/pylintrc:
--------------------------------------------------------------------------------
1 | [TYPECHECK]
2 | 
3 | ignored-modules = numpy, numpy.random, cv2
4 | 


--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 


--------------------------------------------------------------------------------
/tools/README.md:
--------------------------------------------------------------------------------
1 | Tools for training, testing, and compressing Fast R-CNN networks.
2 | 


--------------------------------------------------------------------------------
/data/demo/000456.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaolonw/adversarial-frcnn/HEAD/data/demo/000456.jpg


--------------------------------------------------------------------------------
/data/demo/000542.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaolonw/adversarial-frcnn/HEAD/data/demo/000542.jpg


--------------------------------------------------------------------------------
/data/demo/001150.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaolonw/adversarial-frcnn/HEAD/data/demo/001150.jpg


--------------------------------------------------------------------------------
/data/demo/001763.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaolonw/adversarial-frcnn/HEAD/data/demo/001763.jpg


--------------------------------------------------------------------------------
/data/demo/004545.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaolonw/adversarial-frcnn/HEAD/data/demo/004545.jpg


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | selective_search*
2 | imagenet_models*
3 | fast_rcnn_models*
4 | VOCdevkit*
5 | cache
6 | 


--------------------------------------------------------------------------------
/lib/pycocotools/UPSTREAM_REV:
--------------------------------------------------------------------------------
1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574
2 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/experiments/README.md:
--------------------------------------------------------------------------------
1 | Scripts are under `experiments/scripts`.
2 | 
3 | Each script saves a log file under `experiments/logs`.
4 | 
5 | Configuration override files used in the experiments are stored in `experiments/cfgs`.
6 | 


--------------------------------------------------------------------------------
/train.sh:
--------------------------------------------------------------------------------
1 | ./experiments/scripts/fast_rcnn_std.sh  0  VGG16 pascal_voc
2 | ./experiments/scripts/fast_rcnn_adv_pretrain.sh  0  VGG16 pascal_voc
3 | ./copy_model.h
4 | ./experiments/scripts/fast_rcnn_adv.sh  0  VGG16 pascal_voc
5 | 
6 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/python_utils/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Python Utils
3 | # Copyright (c) 2015 UC Berkeley 
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Saurabh Gupta
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/copy_model.h:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | python python_utils/do_net_surgery.py \
4 |   --out_net_def models/pascal_voc/VGG16/fast_rcnn_adv/train.prototxt \
5 |   --net_surgery_json models/pascal_voc/VGG16/fast_rcnn_adv/init_weights2.json \
6 |   --out_net_file output/fast_rcnn_adv/voc_2007_trainval/train_init.caffemodel
7 | 


--------------------------------------------------------------------------------
/lib/rpn/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Sean Bell
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/experiments/cfgs/fast_rcnn_adv_128.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: fast_rcnn_adv
 2 | MATLAB: /opt/matlab/8.1/bin/matlab
 3 | TRAIN:
 4 |   BG_THRESH_LO: 0.0
 5 |   # we use gradient accumulation,
 6 |   # see solver.prototxt (iter_size: 2)
 7 |   IMS_PER_BATCH: 1
 8 |   # adjust batch_size for iter_size
 9 |   BATCH_SIZE: 128
10 |   USE_OHEM: False
11 |   # Wasn't used in the paper (impact unknown).
12 |   ASPECT_GROUPING: False


--------------------------------------------------------------------------------
/experiments/cfgs/fast_rcnn_adv_pretrain.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: fast_rcnn_adv_pretrain
 2 | MATLAB: /opt/matlab/8.1/bin/matlab
 3 | TRAIN:
 4 |   BG_THRESH_LO: 0.0
 5 |   # we use gradient accumulation,
 6 |   # see solver.prototxt (iter_size: 2)
 7 |   IMS_PER_BATCH: 1
 8 |   # adjust batch_size for iter_size
 9 |   BATCH_SIZE: 64
10 |   FG_FRACTION: 0.75
11 |   USE_OHEM: False
12 |   # Wasn't used in the paper (impact unknown).
13 |   ASPECT_GROUPING: False
14 |   SNAPSHOT_ITERS: 2500 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG16/fast_rcnn/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/VGG16/fast_rcnn/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | # iter_size: 1
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "vgg16_fast_rcnn"
16 | #debug_info: true
17 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG16/fast_rcnn_adv/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/VGG16/fast_rcnn_adv/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | iter_size: 2
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "fast_rcnn_adv"
16 | #debug_info: true
17 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG16/fast_rcnn_std/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/VGG16/fast_rcnn_std/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | iter_size: 2
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "fast_rcnn_std"
16 | #debug_info: true
17 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG_CNN_M_1024/fast_rcnn/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/VGG_CNN_M_1024/fast_rcnn/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | # function
12 | snapshot: 0
13 | # We still use the snapshot prefix, though
14 | snapshot_prefix: "vgg_cnn_m_1024_fast_rcnn"
15 | #debug_info: true
16 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG16/fast_rcnn_adv_pretrain/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/VGG16/fast_rcnn_adv_pretrain/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 20000
 6 | display: 20
 7 | average_loss: 100
 8 | iter_size: 8
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "fast_rcnn_adv_pretrain"
16 | #debug_info: true
17 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG_CNN_M_1024/fast_rcnn_ohem/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/pascal_voc/VGG_CNN_M_1024/fast_rcnn_ohem/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | iter_size: 2
10 | weight_decay: 0.0005
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "vgg_cnn_m_1024_fast_rcnn"
16 | #debug_info: true
17 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from fast_rcnn.config import cfg
 9 | from nms.gpu_nms import gpu_nms
10 | from nms.cpu_nms import cpu_nms
11 | 
12 | def nms(dets, thresh, force_cpu=False):
13 |     """Dispatch to either CPU or GPU NMS implementations."""
14 | 
15 |     if dets.shape[0] == 0:
16 |         return []
17 |     if cfg.USE_GPU_NMS and not force_cpu:
18 |         return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 |     else:
20 |         return cpu_nms(dets, thresh)
21 | 


--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Set up paths for Fast R-CNN."""
 9 | 
10 | import os.path as osp
11 | import sys
12 | 
13 | def add_path(path):
14 |     if path not in sys.path:
15 |         sys.path.insert(0, path)
16 | 
17 | this_dir = osp.dirname(__file__)
18 | 
19 | # Add caffe to PYTHONPATH
20 | caffe_path = osp.join(this_dir, '..', 'caffe-fast-rcnn', 'python')
21 | add_path(caffe_path)
22 | 
23 | # Add lib to PYTHONPATH
24 | lib_path = osp.join(this_dir, '..', 'lib')
25 | add_path(lib_path)
26 | 


--------------------------------------------------------------------------------
/python_utils/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Set up paths for Fast R-CNN."""
 9 | 
10 | import os.path as osp
11 | import sys
12 | 
13 | def add_path(path):
14 |     if path not in sys.path:
15 |         sys.path.insert(0, path)
16 | 
17 | this_dir = osp.dirname(__file__)
18 | 
19 | # Add caffe to PYTHONPATH
20 | caffe_path = osp.join(this_dir, '..', 'caffe-fast-rcnn', 'python')
21 | add_path(caffe_path)
22 | 
23 | # Add lib to PYTHONPATH
24 | lib_path = osp.join(this_dir, '..', 'lib')
25 | add_path(lib_path)
26 | 


--------------------------------------------------------------------------------
/lib/rpn/README.md:
--------------------------------------------------------------------------------
 1 | ### `rpn` module overview
 2 | 
 3 | ##### `generate_anchors.py`
 4 | 
 5 | Generates a regular grid of multi-scale, multi-aspect anchor boxes.
 6 | 
 7 | ##### `proposal_layer.py`
 8 | 
 9 | Converts RPN outputs (per-anchor scores and bbox regression estimates) into object proposals.
10 | 
11 | ##### `anchor_target_layer.py` 
12 | 
13 | Generates training targets/labels for each anchor. Classification labels are 1 (object), 0 (not object) or -1 (ignore).
14 | Bbox regression targets are specified when the classification label is > 0.
15 | 
16 | ##### `proposal_target_layer.py`
17 | 
18 | Generates training targets/labels for each object proposal: classification labels 0 - K (bg or object class 1, ... , K)
19 | and bbox regression targets in that case that the label is > 0.
20 | 
21 | ##### `generate.py`
22 | 
23 | Generate object detection proposals from an imdb using an RPN.
24 | 


--------------------------------------------------------------------------------
/data/scripts/fetch_imagenet_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | FILE=imagenet_models.tgz
 7 | URL=http://www.cs.berkeley.edu/~rbg/faster-rcnn-data/$FILE
 8 | CHECKSUM=ed34ca912d6782edfb673a8c3a0bda6d
 9 | 
10 | if [ -f $FILE ]; then
11 |   echo "File already exists. Checking md5..."
12 |   os=`uname -s`
13 |   if [ "$os" = "Linux" ]; then
14 |     checksum=`md5sum $FILE | awk '{ print $1 }'`
15 |   elif [ "$os" = "Darwin" ]; then
16 |     checksum=`cat $FILE | md5`
17 |   fi
18 |   if [ "$checksum" = "$CHECKSUM" ]; then
19 |     echo "Checksum is correct. No need to download."
20 |     exit 0
21 |   else
22 |     echo "Checksum is incorrect. Need to download again."
23 |   fi
24 | fi
25 | 
26 | echo "Downloading pretrained ImageNet models (1G)..."
27 | 
28 | wget $URL -O $FILE
29 | 
30 | echo "Unzipping..."
31 | 
32 | tar zxvf $FILE
33 | 
34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM."
35 | 


--------------------------------------------------------------------------------
/data/scripts/fetch_selective_search_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | FILE=selective_search_data.tgz
 7 | URL=http://www.cs.berkeley.edu/~rbg/fast-rcnn-data/$FILE
 8 | CHECKSUM=7078c1db87a7851b31966b96774cd9b9
 9 | 
10 | if [ -f $FILE ]; then
11 |   echo "File already exists. Checking md5..."
12 |   os=`uname -s`
13 |   if [ "$os" = "Linux" ]; then
14 |     checksum=`md5sum $FILE | awk '{ print $1 }'`
15 |   elif [ "$os" = "Darwin" ]; then
16 |     checksum=`cat $FILE | md5`
17 |   fi
18 |   if [ "$checksum" = "$CHECKSUM" ]; then
19 |     echo "Checksum is correct. No need to download."
20 |     exit 0
21 |   else
22 |     echo "Checksum is incorrect. Need to download again."
23 |   fi
24 | fi
25 | 
26 | echo "Downloading precomputed selective search boxes (0.5G)..."
27 | 
28 | wget $URL -O $FILE
29 | 
30 | echo "Unzipping..."
31 | 
32 | tar zxvf $FILE
33 | 
34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM."
35 | 


--------------------------------------------------------------------------------
/data/scripts/fetch_fast_rcnn_ohem_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | FILE=fast_rcnn_ohem_models.tgz
 7 | URL=http://graphics.cs.cmu.edu/projects/ohem/data/$FILE
 8 | CHECKSUM=cbfd5b7ed5ec4d5cb838701cbf1f3ccb
 9 | 
10 | if [ -f $FILE ]; then
11 |   echo "File already exists. Checking md5..."
12 |   os=`uname -s`
13 |   if [ "$os" = "Linux" ]; then
14 |     checksum=`md5sum $FILE | awk '{ print $1 }'`
15 |   elif [ "$os" = "Darwin" ]; then
16 |     checksum=`cat $FILE | md5`
17 |   fi
18 |   if [ "$checksum" = "$CHECKSUM" ]; then
19 |     echo "Checksum is correct. No need to download."
20 |     exit 0
21 |   else
22 |     echo "Checksum is incorrect. Need to download again."
23 |   fi
24 | fi
25 | 
26 | echo "Downloading Fast R-CNN OHEM models (VGG16 and VGG_CNN_M_1024)(1.5G)..."
27 | 
28 | wget $URL -O $FILE
29 | 
30 | echo "Unzipping..."
31 | 
32 | tar zxvf $FILE
33 | 
34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM."
35 | 


--------------------------------------------------------------------------------
/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------
/lib/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def py_cpu_nms(dets, thresh):
11 |     """Pure Python NMS baseline."""
12 |     x1 = dets[:, 0]
13 |     y1 = dets[:, 1]
14 |     x2 = dets[:, 2]
15 |     y2 = dets[:, 3]
16 |     scores = dets[:, 4]
17 | 
18 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 |     order = scores.argsort()[::-1]
20 | 
21 |     keep = []
22 |     while order.size > 0:
23 |         i = order[0]
24 |         keep.append(i)
25 |         xx1 = np.maximum(x1[i], x1[order[1:]])
26 |         yy1 = np.maximum(y1[i], y1[order[1:]])
27 |         xx2 = np.minimum(x2[i], x2[order[1:]])
28 |         yy2 = np.minimum(y2[i], y2[order[1:]])
29 | 
30 |         w = np.maximum(0.0, xx2 - xx1 + 1)
31 |         h = np.maximum(0.0, yy2 - yy1 + 1)
32 |         inter = w * h
33 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 | 
35 |         inds = np.where(ovr <= thresh)[0]
36 |         order = order[inds + 1]
37 | 
38 |     return keep
39 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG16/fast_rcnn_adv/init_weights2.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "prototxt"  : "models/pascal_voc/VGG16/fast_rcnn_adv_pretrain/train.prototxt",
 4 |     "model"     : "output/fast_rcnn_adv_pretrain/voc_2007_trainval/fast_rcnn_adv_pretrain_iter_25000.caffemodel",
 5 |     "copy_ops"  : {
 6 |       "source"  : ["conv6_mask", "conv7_mask", "conv8_mask", "conv9_mask", "conv10_mask"],
 7 |       "dest"    : ["conv6_mask", "conv7_mask", "conv8_mask", "conv9_mask", "conv10_mask"],
 8 |       "reshape" : [0, 0, 0, 0, 0]
 9 |     }
10 |   }, 
11 |   {
12 |     "prototxt"  : "models/pascal_voc/VGG16/fast_rcnn/test.prototxt",
13 |     "model"     : "output/fast_rcnn_adv/voc_2007_trainval/fast_rcnn_std_iter_10000.caffemodel",
14 |     "copy_ops"  : {
15 |       "source"  : ["conv1_1", "conv1_2", "conv2_1", "conv2_2", "conv3_1", "conv3_2", "conv3_3", "conv4_1", "conv4_2", "conv4_3", "conv5_1", "conv5_2", "conv5_3", "fc6", "fc7", "cls_score"],
16 |       "dest"    : ["conv1_1", "conv1_2", "conv2_1", "conv2_2", "conv3_1", "conv3_2", "conv3_3", "conv4_1", "conv4_2", "conv4_3", "conv5_1", "conv5_2", "conv5_3", "fc6", "fc7", "cls_score"],
17 |       "reshape" : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
18 |     }
19 |   }
20 | ] 
21 | 
22 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int_t, ndim=1] \
26 |         order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/python_utils/do_net_surgery.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) 2015, Saurabh Gupta
 3 | # 
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # ---------------------------------------------------------
 6 | 
 7 | # For fusing network outputs
 8 | import _init_paths
 9 | import caffe
10 | import pycaffe_utils
11 | import sys, pprint, argparse
12 | 
13 | def parse_args():
14 |   """
15 |   Parse input arguments
16 |   """
17 |   parser = argparse.ArgumentParser(description='Network surgery script')
18 |   parser.add_argument('--out_net_def', help='prototxt file defining the output network', default=None, type=str)
19 |   parser.add_argument('--net_surgery_json', help='json file which defines what blobs to copy from where', default=None, type=str)
20 |   parser.add_argument('--out_net_file', help='caffemodel to save the ouput network to', default=None, type=str)
21 |   if len(sys.argv) == 1:
22 |     parser.print_help()
23 |     sys.exit(1)
24 |   args = parser.parse_args()
25 |   return args
26 | 
27 | if __name__ == '__main__':
28 |   args = parse_args()
29 |   net = caffe.Net(args.out_net_def, caffe.TEST)
30 |   pycaffe_utils.net_surgery(net, args.net_surgery_json)
31 |   net.save(args.out_net_file)
32 | 


--------------------------------------------------------------------------------
/experiments/scripts/fast_rcnn_std.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Usage:
 3 | # ./experiments/scripts/fast_rcnn.sh GPU NET DATASET [options args to {train,test}_net.py]
 4 | # DATASET is either pascal_voc or coco.
 5 | #
 6 | # Example:
 7 | # ./experiments/scripts/fast_rcnn.sh 0 VGG_CNN_M_1024 pascal_voc \
 8 | #   --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400, 500, 600, 700]"
 9 | 
10 | set -x
11 | set -e
12 | 
13 | export PYTHONUNBUFFERED="True"
14 | 
15 | GPU_ID=$1
16 | NET=$2
17 | NET_lc=${NET,,}
18 | DATASET=$3
19 | 
20 | array=( $@ )
21 | len=${#array[@]}
22 | EXTRA_ARGS=${array[@]:3:$len}
23 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
24 | 
25 | case $DATASET in
26 |   pascal_voc)
27 |     TRAIN_IMDB="voc_2007_trainval"
28 |     TEST_IMDB="voc_2007_test"
29 |     PT_DIR="pascal_voc"
30 |     ITERS=10000
31 |     ;;
32 |   coco)
33 |     TRAIN_IMDB="coco_2014_train"
34 |     TEST_IMDB="coco_2014_minival"
35 |     PT_DIR="coco"
36 |     ITERS=280000
37 |     ;;
38 |   *)
39 |     echo "No dataset given"
40 |     exit
41 |     ;;
42 | esac
43 | 
44 | LOG="experiments/logs/fast_rcnn_std.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
45 | exec &> >(tee -a "$LOG")
46 | echo Logging output to "$LOG"
47 | 
48 | time ./tools/train_net.py --gpu ${GPU_ID} \
49 |   --solver models/${PT_DIR}/${NET}/fast_rcnn_std/solver.prototxt \
50 |   --weights  data/imagenet_models/${NET}.v2.caffemodel \
51 |   --imdb ${TRAIN_IMDB} \
52 |   --iters ${ITERS} \
53 |   --cfg experiments/cfgs/fast_rcnn_adv_128.yml \
54 |   ${EXTRA_ARGS}
55 | 


--------------------------------------------------------------------------------
/lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick
 5 | # --------------------------------------------------------
 6 | 
 7 | import numpy as np
 8 | 
 9 | def unique_boxes(boxes, scale=1.0):
10 |     """Return indices of unique boxes."""
11 |     v = np.array([1, 1e3, 1e6, 1e9])
12 |     hashes = np.round(boxes * scale).dot(v)
13 |     _, index = np.unique(hashes, return_index=True)
14 |     return np.sort(index)
15 | 
16 | def xywh_to_xyxy(boxes):
17 |     """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
18 |     return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
19 | 
20 | def xyxy_to_xywh(boxes):
21 |     """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
22 |     return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
23 | 
24 | def validate_boxes(boxes, width=0, height=0):
25 |     """Check that a set of boxes are valid."""
26 |     x1 = boxes[:, 0]
27 |     y1 = boxes[:, 1]
28 |     x2 = boxes[:, 2]
29 |     y2 = boxes[:, 3]
30 |     assert (x1 >= 0).all()
31 |     assert (y1 >= 0).all()
32 |     assert (x2 >= x1).all()
33 |     assert (y2 >= y1).all()
34 |     assert (x2 < width).all()
35 |     assert (y2 < height).all()
36 | 
37 | def filter_small_boxes(boxes, min_size):
38 |     w = boxes[:, 2] - boxes[:, 0]
39 |     h = boxes[:, 3] - boxes[:, 1]
40 |     keep = np.where((w >= min_size) & (h > min_size))[0]
41 |     return keep
42 | 


--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Factory method for easily getting imdbs by name."""
 9 | 
10 | __sets = {}
11 | 
12 | from datasets.pascal_voc import pascal_voc
13 | from datasets.coco import coco
14 | import numpy as np
15 | 
16 | # Set up voc_<year>_<split> using selective search "fast" mode
17 | for year in ['2007', '2012']:
18 |     for split in ['train', 'val', 'trainval', 'test']:
19 |         name = 'voc_{}_{}'.format(year, split)
20 |         __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))
21 | 
22 | # Set up coco_2014_<split>
23 | for year in ['2014']:
24 |     for split in ['train', 'val', 'minival', 'valminusminival']:
25 |         name = 'coco_{}_{}'.format(year, split)
26 |         __sets[name] = (lambda split=split, year=year: coco(split, year))
27 | 
28 | # Set up coco_2015_<split>
29 | for year in ['2015']:
30 |     for split in ['test', 'test-dev']:
31 |         name = 'coco_{}_{}'.format(year, split)
32 |         __sets[name] = (lambda split=split, year=year: coco(split, year))
33 | 
34 | def get_imdb(name):
35 |     """Get an imdb (image database) by name."""
36 |     if not __sets.has_key(name):
37 |         raise KeyError('Unknown dataset: {}'.format(name))
38 |     return __sets[name]()
39 | 
40 | def list_imdbs():
41 |     """List all registered imdbs."""
42 |     return __sets.keys()
43 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
 1 | function res = voc_eval(path, comp_id, test_set, output_dir)
 2 | 
 3 | VOCopts = get_voc_opts(path);
 4 | VOCopts.testset = test_set;
 5 | 
 6 | for i = 1:length(VOCopts.classes)
 7 |   cls = VOCopts.classes{i};
 8 |   res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
 9 | end
10 | 
11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
12 | fprintf('Results:\n');
13 | aps = [res(:).ap]';
14 | fprintf('%.1f\n', aps * 100);
15 | fprintf('%.1f\n', mean(aps) * 100);
16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
17 | 
18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)
19 | 
20 | test_set = VOCopts.testset;
21 | year = VOCopts.dataset(4:end);
22 | 
23 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
24 | 
25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
26 | 
27 | recall = [];
28 | prec = [];
29 | ap = 0;
30 | ap_auc = 0;
31 | 
32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
33 | if do_eval
34 |   % Bug in VOCevaldet requires that tic has been called first
35 |   tic;
36 |   [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
37 |   ap_auc = xVOCap(recall, prec);
38 | 
39 |   % force plot limits
40 |   ylim([0 1]);
41 |   xlim([0 1]);
42 | 
43 |   print(gcf, '-djpeg', '-r0', ...
44 |         [output_dir '/' cls '_pr.jpg']);
45 | end
46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
47 | 
48 | res.recall = recall;
49 | res.prec = prec;
50 | res.ap = ap;
51 | res.ap_auc = ap_auc;
52 | 
53 | save([output_dir '/' cls '_pr.mat'], ...
54 |      'res', 'recall', 'prec', 'ap', 'ap_auc');
55 | 
56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
57 | 


--------------------------------------------------------------------------------
/lib/datasets/tools/mcg_munge.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | """Hacky tool to convert file system layout of MCG boxes downloaded from
 5 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/
 6 | so that it's consistent with those computed by Jan Hosang (see:
 7 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
 8 |   computing/research/object-recognition-and-scene-understanding/how-
 9 |   good-are-detection-proposals-really/)
10 | 
11 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order.
12 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order.
13 | """
14 | 
15 | def munge(src_dir):
16 |     # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat
17 |     # want:      ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat
18 | 
19 |     files = os.listdir(src_dir)
20 |     for fn in files:
21 |         base, ext = os.path.splitext(fn)
22 |         # first 14 chars / first 22 chars / all chars + .mat
23 |         # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
24 |         first = base[:14]
25 |         second = base[:22]
26 |         dst_dir = os.path.join('MCG', 'mat', first, second)
27 |         if not os.path.exists(dst_dir):
28 |             os.makedirs(dst_dir)
29 |         src = os.path.join(src_dir, fn)
30 |         dst = os.path.join(dst_dir, fn)
31 |         print 'MV: {} -> {}'.format(src, dst)
32 |         os.rename(src, dst)
33 | 
34 | if __name__ == '__main__':
35 |     # src_dir should look something like:
36 |     #  src_dir = 'MCG-COCO-val2014-boxes'
37 |     src_dir = sys.argv[1]
38 |     munge(src_dir)
39 | 


--------------------------------------------------------------------------------
/lib/pycocotools/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met: 
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 |    list of conditions and the following disclaimer. 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution. 
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 | 
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies, 
26 | either expressed or implied, of the FreeBSD Project.
27 | 


--------------------------------------------------------------------------------
/experiments/scripts/fast_rcnn_adv_pretrain.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Usage:
 3 | # ./experiments/scripts/fast_rcnn_ohem.sh GPU NET DATASET [options args to {train,test}_net.py]
 4 | # DATASET is either pascal_voc or coco.
 5 | #
 6 | # Example:
 7 | # ./experiments/scripts/fast_rcnn_ohem.sh 0 VGG16 pascal_voc \
 8 | #   --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400, 500, 600, 700]"
 9 | 
10 | set -x
11 | set -e
12 | 
13 | export PYTHONUNBUFFERED="True"
14 | 
15 | GPU_ID=$1
16 | NET=$2
17 | NET_lc=${NET,,}
18 | DATASET=$3
19 | 
20 | array=( $@ )
21 | len=${#array[@]}
22 | EXTRA_ARGS=${array[@]:3:$len}
23 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
24 | 
25 | case $DATASET in
26 |   pascal_voc)
27 |     TRAIN_IMDB="voc_2007_trainval"
28 |     TEST_IMDB="voc_2007_test"
29 |     PT_DIR="pascal_voc"
30 |     ITERS=25000
31 |     ;;
32 |   coco)
33 |     echo "Support coming soon. Stay tuned!"
34 |     exit
35 |     # TRAIN_IMDB="coco_2014_train"
36 |     # TEST_IMDB="coco_2014_minival"
37 |     # PT_DIR="coco"
38 |     # ITERS=280000
39 |     ;;
40 |   *)
41 |     echo "No dataset given"
42 |     exit
43 |     ;;
44 | esac
45 | 
46 | LOG="experiments/logs/fast_rcnn_adv_pretrain.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
47 | exec &> >(tee -a "$LOG")
48 | echo Logging output to "$LOG"
49 | 
50 | # /nfs.yoda/xiaolonw/faster_rcnn/ohem/output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_30000.caffemodel
51 | 
52 | time ./tools/train_net.py --gpu ${GPU_ID} \
53 |   --solver models/${PT_DIR}/${NET}/fast_rcnn_adv_pretrain/solver.prototxt \
54 |   --weights output/fast_rcnn_adv/voc_2007_trainval/fast_rcnn_std_iter_10000.caffemodel \
55 |   --imdb ${TRAIN_IMDB} \
56 |   --iters ${ITERS} \
57 |   --cfg experiments/cfgs/fast_rcnn_adv_pretrain.yml \
58 |   ${EXTRA_ARGS}
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/lib/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | 
10 | import numpy as np
11 | import cv2
12 | 
13 | def im_list_to_blob(ims):
14 |     """Convert a list of images into a network input.
15 | 
16 |     Assumes images are already prepared (means subtracted, BGR order, ...).
17 |     """
18 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
19 |     num_images = len(ims)
20 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
21 |                     dtype=np.float32)
22 |     for i in xrange(num_images):
23 |         im = ims[i]
24 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
25 |     # Move channels (axis 3) to axis 1
26 |     # Axis order will become: (batch elem, channel, height, width)
27 |     channel_swap = (0, 3, 1, 2)
28 |     blob = blob.transpose(channel_swap)
29 |     return blob
30 | 
31 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
32 |     """Mean subtract and scale an image for use in a blob."""
33 |     im = im.astype(np.float32, copy=False)
34 |     im -= pixel_means
35 |     im_shape = im.shape
36 |     im_size_min = np.min(im_shape[0:2])
37 |     im_size_max = np.max(im_shape[0:2])
38 |     im_scale = float(target_size) / float(im_size_min)
39 |     # Prevent the biggest axis from being more than MAX_SIZE
40 |     if np.round(im_scale * im_size_max) > max_size:
41 |         im_scale = float(max_size) / float(im_size_max)
42 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
43 |                     interpolation=cv2.INTER_LINEAR)
44 | 
45 |     return im, im_scale
46 | 


--------------------------------------------------------------------------------
/experiments/scripts/fast_rcnn_adv.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Usage:
 3 | # ./experiments/scripts/fast_rcnn_ohem.sh GPU NET DATASET [options args to {train,test}_net.py]
 4 | # DATASET is either pascal_voc or coco.
 5 | #
 6 | # Example:
 7 | # ./experiments/scripts/fast_rcnn_ohem.sh 0 VGG16 pascal_voc \
 8 | #   --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400, 500, 600, 700]"
 9 | 
10 | set -x
11 | set -e
12 | 
13 | export PYTHONUNBUFFERED="True"
14 | 
15 | GPU_ID=$1
16 | NET=$2
17 | NET_lc=${NET,,}
18 | DATASET=$3
19 | 
20 | array=( $@ )
21 | len=${#array[@]}
22 | EXTRA_ARGS=${array[@]:3:$len}
23 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
24 | 
25 | case $DATASET in
26 |   pascal_voc)
27 |     TRAIN_IMDB="voc_2007_trainval"
28 |     TEST_IMDB="voc_2007_test"
29 |     PT_DIR="pascal_voc"
30 |     ITERS=40000
31 |     ;;
32 |   coco)
33 |     echo "Support coming soon. Stay tuned!"
34 |     exit
35 |     # TRAIN_IMDB="coco_2014_train"
36 |     # TEST_IMDB="coco_2014_minival"
37 |     # PT_DIR="coco"
38 |     # ITERS=280000
39 |     ;;
40 |   *)
41 |     echo "No dataset given"
42 |     exit
43 |     ;;
44 | esac
45 | 
46 | LOG="experiments/logs/fast_rcnn_adv.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
47 | exec &> >(tee -a "$LOG")
48 | echo Logging output to "$LOG"
49 | 
50 | 
51 | time ./tools/train_net.py --gpu ${GPU_ID} \
52 |   --solver models/${PT_DIR}/${NET}/fast_rcnn_adv/solver.prototxt \
53 |   --weights  output/fast_rcnn_adv/voc_2007_trainval/train_init.caffemodel \
54 |   --imdb ${TRAIN_IMDB} \
55 |   --iters ${ITERS} \
56 |   --cfg experiments/cfgs/fast_rcnn_adv_128.yml \
57 |   ${EXTRA_ARGS}
58 | 
59 | set +x
60 | NET_FINAL=`grep -B 1 "done solving" ${LOG} | grep "Wrote snapshot" | awk '{print $4}'`
61 | set -x
62 | 
63 | time ./tools/test_net.py --gpu ${GPU_ID} \
64 |   --def models/${PT_DIR}/${NET}/fast_rcnn/test.prototxt \
65 |   --net ${NET_FINAL} \
66 |   --imdb ${TEST_IMDB} \
67 |   --cfg experiments/cfgs/fast_rcnn_adv_128.yml \
68 |   --num_dets 2000 \
69 |   --det_thresh 0.00001 \
70 |   ${EXTRA_ARGS}


--------------------------------------------------------------------------------
/lib/utils/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 


--------------------------------------------------------------------------------
/lib/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | #include <stdbool.h>
 9 | 
10 | typedef unsigned int uint;
11 | typedef unsigned long siz;
12 | typedef unsigned char byte;
13 | typedef double* BB;
14 | typedef struct { siz h, w, m; uint *cnts; } RLE;
15 | 
16 | // Initialize/destroy RLE.
17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
18 | void rleFree( RLE *R );
19 | 
20 | // Initialize/destroy RLE array.
21 | void rlesInit( RLE **R, siz n );
22 | void rlesFree( RLE **R, siz n );
23 | 
24 | // Encode binary masks using RLE.
25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
26 | 
27 | // Decode binary masks encoded via RLE.
28 | void rleDecode( const RLE *R, byte *mask, siz n );
29 | 
30 | // Compute union or intersection of encoded masks.
31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect );
32 | 
33 | // Compute area of encoded masks.
34 | void rleArea( const RLE *R, siz n, uint *a );
35 | 
36 | // Compute intersection over union between masks.
37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
38 | 
39 | // Compute intersection over union between bounding boxes.
40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
41 | 
42 | // Get bounding boxes surrounding encoded masks.
43 | void rleToBbox( const RLE *R, BB bb, siz n );
44 | 
45 | // Convert bounding boxes to encoded masks.
46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
47 | 
48 | // Convert polygon to encoded mask.
49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
50 | 
51 | // Get compressed string representation of encoded mask.
52 | char* rleToString( const RLE *R );
53 | 
54 | // Convert from compressed string representation of encoded mask.
55 | void rleFrString( RLE *R, char *s, siz h, siz w );
56 | 


--------------------------------------------------------------------------------
/python_utils/general_utils.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) 2015, Saurabh Gupta
 3 | # 
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # ---------------------------------------------------------
 6 | 
 7 | import numpy as np
 8 | import cPickle
 9 | import os
10 | from IPython.core.debugger import Tracer
11 | import scipy.io as scio
12 | import time
13 | 
14 | 
15 | def tic_toc_print(interval, string):
16 |   global tic_toc_print_time_old
17 |   if 'tic_toc_print_time_old' not in globals():
18 |     tic_toc_print_time_old = time.time()
19 |     print string
20 |   else:
21 |     new_time = time.time()
22 |     if new_time - tic_toc_print_time_old > interval:
23 |       tic_toc_print_time_old = new_time;
24 |       print string
25 | 
26 | def mkdir_if_missing(output_dir):
27 |   """
28 |   def mkdir_if_missing(output_dir)
29 |   """
30 |   if not os.path.exists(output_dir):
31 |     os.makedirs(output_dir)
32 | 
33 | def sigmoid(x):
34 |   """
35 |   def sigmoid(x)
36 |   """
37 |   y = x.copy().astype(np.float32)
38 |   ind = np.where(x > 0)[0]
39 |   y[ind] = 1/(1 + np.exp(-x[ind]))
40 |   ind = np.where(x <= 0)[0]
41 |   y[ind] = np.exp(x[ind])/(np.exp(x[ind]) + 1)
42 |   return y
43 | 
44 | def save_variables(pickle_file_name, var, info, overwrite = False):
45 |   """
46 |     def save_variables(pickle_file_name, var, info, overwrite = False)
47 |   """
48 |   if os.path.exists(pickle_file_name) and overwrite == False:
49 |     raise Exception('{:s} exists and over write is false.'.format(pickle_file_name))
50 |   # Construct the dictionary
51 |   assert(type(var) == list); assert(type(info) == list);
52 |   d = {}
53 |   for i in xrange(len(var)):
54 |     d[info[i]] = var[i]
55 |   with open(pickle_file_name, 'wb') as f:
56 |     cPickle.dump(d, f, cPickle.HIGHEST_PROTOCOL)
57 | 
58 | def load_variables(pickle_file_name):
59 |   """
60 |   d = load_variables(pickle_file_name)
61 |   Output:
62 |     d     is a dictionary of variables stored in the pickle file.
63 |   """
64 |   if os.path.exists(pickle_file_name):
65 |     with open(pickle_file_name, 'rb') as f:
66 |       d = cPickle.load(f)
67 |     return d
68 |   else:
69 |     raise Exception('{:s} does not exists.'.format(pickle_file_name))
70 | 


--------------------------------------------------------------------------------
/lib/transform/torch_image_transform_layer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # --------------------------------------------------------
 5 | 
 6 | """ Transform images for compatibility with models trained with
 7 | https://github.com/facebook/fb.resnet.torch.
 8 | 
 9 | Usage in model prototxt:
10 | 
11 | layer {
12 |   name: 'data_xform'
13 |   type: 'Python'
14 |   bottom: 'data_caffe'
15 |   top: 'data'
16 |   python_param {
17 |     module: 'transform.torch_image_transform_layer'
18 |     layer: 'TorchImageTransformLayer'
19 |   }
20 | }
21 | """
22 | 
23 | import caffe
24 | from fast_rcnn.config import cfg
25 | import numpy as np
26 | 
27 | class TorchImageTransformLayer(caffe.Layer):
28 |     def setup(self, bottom, top):
29 |         # (1, 3, 1, 1) shaped arrays
30 |         self.PIXEL_MEANS = \
31 |             np.array([[[[0.48462227599918]],
32 |                        [[0.45624044862054]],
33 |                        [[0.40588363755159]]]])
34 |         self.PIXEL_STDS = \
35 |             np.array([[[[0.22889466674951]],
36 |                        [[0.22446679341259]],
37 |                        [[0.22495548344775]]]])
38 |         # The default ("old") pixel means that were already subtracted
39 |         channel_swap = (0, 3, 1, 2)
40 |         self.OLD_PIXEL_MEANS = \
41 |             cfg.PIXEL_MEANS[np.newaxis, :, :, :].transpose(channel_swap)
42 | 
43 |         top[0].reshape(*(bottom[0].shape))
44 | 
45 |     def forward(self, bottom, top):
46 |         ims = bottom[0].data
47 |         # Invert the channel means that were already subtracted
48 |         ims += self.OLD_PIXEL_MEANS
49 |         # 1. Permute BGR to RGB and normalize to [0, 1]
50 |         ims = ims[:, [2, 1, 0], :, :] / 255.0
51 |         # 2. Remove channel means
52 |         ims -= self.PIXEL_MEANS
53 |         # 3. Standardize channels
54 |         ims /= self.PIXEL_STDS
55 |         top[0].reshape(*(ims.shape))
56 |         top[0].data[...] = ims
57 | 
58 |     def backward(self, top, propagate_down, bottom):
59 |         """This layer does not propagate gradients."""
60 |         pass
61 | 
62 |     def reshape(self, bottom, top):
63 |         """Reshaping happens during the call to forward."""
64 |         pass
65 | 


--------------------------------------------------------------------------------
/tools/reval.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Reval = re-eval. Re-evaluate saved detections."""
11 | 
12 | import _init_paths
13 | from fast_rcnn.test import apply_nms
14 | from fast_rcnn.config import cfg
15 | from datasets.factory import get_imdb
16 | import cPickle
17 | import os, sys, argparse
18 | import numpy as np
19 | 
20 | def parse_args():
21 |     """
22 |     Parse input arguments
23 |     """
24 |     parser = argparse.ArgumentParser(description='Re-evaluate results')
25 |     parser.add_argument('output_dir', nargs=1, help='results directory',
26 |                         type=str)
27 |     parser.add_argument('--imdb', dest='imdb_name',
28 |                         help='dataset to re-evaluate',
29 |                         default='voc_2007_test', type=str)
30 |     parser.add_argument('--matlab', dest='matlab_eval',
31 |                         help='use matlab for evaluation',
32 |                         action='store_true')
33 |     parser.add_argument('--comp', dest='comp_mode', help='competition mode',
34 |                         action='store_true')
35 |     parser.add_argument('--nms', dest='apply_nms', help='apply nms',
36 |                         action='store_true')
37 | 
38 |     if len(sys.argv) == 1:
39 |         parser.print_help()
40 |         sys.exit(1)
41 | 
42 |     args = parser.parse_args()
43 |     return args
44 | 
45 | def from_dets(imdb_name, output_dir, args):
46 |     imdb = get_imdb(imdb_name)
47 |     imdb.competition_mode(args.comp_mode)
48 |     imdb.config['matlab_eval'] = args.matlab_eval
49 |     with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f:
50 |         dets = cPickle.load(f)
51 | 
52 |     if args.apply_nms:
53 |         print 'Applying NMS to all detections'
54 |         nms_dets = apply_nms(dets, cfg.TEST.NMS)
55 |     else:
56 |         nms_dets = dets
57 | 
58 |     print 'Evaluating detections'
59 |     imdb.evaluate_detections(nms_dets, output_dir)
60 | 
61 | if __name__ == '__main__':
62 |     args = parse_args()
63 | 
64 |     output_dir = os.path.abspath(args.output_dir[0])
65 |     imdb_name = args.imdb_name
66 |     from_dets(imdb_name, output_dir, args)
67 | 


--------------------------------------------------------------------------------
/lib/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/tools/eval_recall.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import _init_paths
 4 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list
 5 | from datasets.factory import get_imdb
 6 | import argparse
 7 | import time, os, sys
 8 | import numpy as np
 9 | 
10 | def parse_args():
11 |     """
12 |     Parse input arguments
13 |     """
14 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
15 |     parser.add_argument('--imdb', dest='imdb_name',
16 |                         help='dataset to test',
17 |                         default='voc_2007_test', type=str)
18 |     parser.add_argument('--method', dest='method',
19 |                         help='proposal method',
20 |                         default='selective_search', type=str)
21 |     parser.add_argument('--rpn-file', dest='rpn_file',
22 |                         default=None, type=str)
23 | 
24 |     if len(sys.argv) == 1:
25 |         parser.print_help()
26 |         sys.exit(1)
27 | 
28 |     args = parser.parse_args()
29 |     return args
30 | 
31 | if __name__ == '__main__':
32 |     args = parse_args()
33 | 
34 |     print('Called with args:')
35 |     print(args)
36 | 
37 |     imdb = get_imdb(args.imdb_name)
38 |     imdb.set_proposal_method(args.method)
39 |     if args.rpn_file is not None:
40 |         imdb.config['rpn_file'] = args.rpn_file
41 | 
42 |     candidate_boxes = None
43 |     if 0:
44 |         import scipy.io as sio
45 |         filename = 'debug/stage1_rpn_voc_2007_test.mat'
46 |         raw_data = sio.loadmat(filename)['aboxes'].ravel()
47 |         candidate_boxes = raw_data
48 | 
49 |     ar, gt_overlaps, recalls, thresholds = \
50 |         imdb.evaluate_recall(candidate_boxes=candidate_boxes)
51 |     print 'Method: {}'.format(args.method)
52 |     print 'AverageRec: {:.3f}'.format(ar)
53 | 
54 |     def recall_at(t):
55 |         ind = np.where(thresholds > t - 1e-5)[0][0]
56 |         assert np.isclose(thresholds[ind], t)
57 |         return recalls[ind]
58 | 
59 |     print 'Recall@0.5: {:.3f}'.format(recall_at(0.5))
60 |     print 'Recall@0.6: {:.3f}'.format(recall_at(0.6))
61 |     print 'Recall@0.7: {:.3f}'.format(recall_at(0.7))
62 |     print 'Recall@0.8: {:.3f}'.format(recall_at(0.8))
63 |     print 'Recall@0.9: {:.3f}'.format(recall_at(0.9))
64 |     # print again for easy spreadsheet copying
65 |     print '{:.3f}'.format(ar)
66 |     print '{:.3f}'.format(recall_at(0.5))
67 |     print '{:.3f}'.format(recall_at(0.6))
68 |     print '{:.3f}'.format(recall_at(0.7))
69 |     print '{:.3f}'.format(recall_at(0.8))
70 |     print '{:.3f}'.format(recall_at(0.9))
71 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/bbox_transform.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def bbox_transform(ex_rois, gt_rois):
11 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
12 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
13 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
14 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
15 | 
16 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
17 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
18 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
19 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
20 | 
21 |     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
22 |     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
23 |     targets_dw = np.log(gt_widths / ex_widths)
24 |     targets_dh = np.log(gt_heights / ex_heights)
25 | 
26 |     targets = np.vstack(
27 |         (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
28 |     return targets
29 | 
30 | def bbox_transform_inv(boxes, deltas):
31 |     if boxes.shape[0] == 0:
32 |         return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
33 | 
34 |     boxes = boxes.astype(deltas.dtype, copy=False)
35 | 
36 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
37 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
38 |     ctr_x = boxes[:, 0] + 0.5 * widths
39 |     ctr_y = boxes[:, 1] + 0.5 * heights
40 | 
41 |     dx = deltas[:, 0::4]
42 |     dy = deltas[:, 1::4]
43 |     dw = deltas[:, 2::4]
44 |     dh = deltas[:, 3::4]
45 | 
46 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
47 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
48 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
49 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
50 | 
51 |     pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
52 |     # x1
53 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
54 |     # y1
55 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
56 |     # x2
57 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
58 |     # y2
59 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
60 | 
61 |     return pred_boxes
62 | 
63 | def clip_boxes(boxes, im_shape):
64 |     """
65 |     Clip boxes to image boundaries.
66 |     """
67 | 
68 |     # x1 >= 0
69 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
70 |     # y1 >= 0
71 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
72 |     # x2 < im_shape[1]
73 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
74 |     # y2 < im_shape[0]
75 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
76 |     return boxes
77 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | This directory holds (*after you download them*):
 2 | - Fast R-CNN models trained with OHEM on VOC 2007 trainval
 3 | - Caffe models pre-trained on ImageNet
 4 | - Symlinks to datasets
 5 | 
 6 | To download Fast R-CNN models (VGG_CNN_M_1024, VGG16) trained with OHEM on VOC 2007 trainval, run:
 7 | 
 8 | ```
 9 | ./data/scripts/fetch_fast_rcnn_ohem_models.sh
10 | ```
11 | 
12 | This script will populate `data/fast_rcnn_ohem_models` with VGG16 and VGG_CNN_M_1024 models (Fast R-CNN detectors trained with OHEM).
13 | 
14 | 
15 | To download Caffe models (ZF, VGG16) pre-trained on ImageNet, run:
16 | 
17 | ```
18 | ./data/scripts/fetch_imagenet_models.sh
19 | ```
20 | 
21 | This script will populate `data/imagenet_models`.
22 | 
23 | In order to train and test with PASCAL VOC, you will need to establish symlinks.
24 | From the `data` directory (`cd data`):
25 | 
26 | ```
27 | # For VOC 2007
28 | ln -s /your/path/to/VOC2007/VOCdevkit VOCdevkit2007
29 | 
30 | # For VOC 2012
31 | ln -s /your/path/to/VOC2012/VOCdevkit VOCdevkit2012
32 | ```
33 | 
34 | Install the MS COCO dataset at /path/to/coco
35 | 
36 | ```
37 | ln -s /path/to/coco coco
38 | ```
39 | 
40 | For COCO with Fast R-CNN, place object proposals under `coco_proposals` (inside
41 | the `data` directory). You can obtain proposals on COCO from Jan Hosang at
42 | https://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-computing/research/object-recognition-and-scene-understanding/how-good-are-detection-proposals-really/.
43 | For COCO, using MCG is recommended over selective search. MCG boxes can be downloaded
44 | from http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/.
45 | Use the tool `lib/datasets/tools/mcg_munge.py` to convert the downloaded MCG data
46 | into the same file layout as those from Jan Hosang.
47 | 
48 | Since you'll likely be experimenting with multiple installs of Fast/er R-CNN in
49 | parallel, you'll probably want to keep all of this data in a shared place and
50 | use symlinks. On my system I create the following symlinks inside `data`:
51 | 
52 | Annotations for the 5k image 'minival' subset of COCO val2014 that I like to use
53 | can be found at http://www.cs.berkeley.edu/~rbg/faster-rcnn-data/instances_minival2014.json.zip.
54 | Annotations for COCO val2014 (set) minus minival (~35k images) can be found at
55 | http://www.cs.berkeley.edu/~rbg/faster-rcnn-data/instances_valminusminival2014.json.zip.
56 | 
57 | ```
58 | # data/cache holds various outputs created by the datasets package
59 | ln -s /data/fast_rcnn_shared/cache
60 | 
61 | # move the imagenet_models to shared location and symlink to them
62 | ln -s /data/fast_rcnn_shared/imagenet_models
63 | 
64 | # move the selective search data to a shared location and symlink to them
65 | # (only applicable to Fast R-CNN training)
66 | ln -s /data/fast_rcnn_shared/selective_search_data
67 | 
68 | ln -s /data/VOC2007/VOCdevkit VOCdevkit2007
69 | ln -s /data/VOC2012/VOCdevkit VOCdevkit2012
70 | ```
71 | 


--------------------------------------------------------------------------------
/python_utils/pycaffe_utils.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) 2015, Saurabh Gupta
 3 | # 
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # ---------------------------------------------------------
 6 | 
 7 | 
 8 | import caffe, yaml 
 9 | 
10 | def net_surgery(net, json_file_or_dict):
11 |     # Load the JSON file
12 |     if isinstance(json_file_or_dict, str):
13 |         with open(json_file_or_dict, 'rt') as f:
14 |             source_description = yaml.load(f)
15 |     else:
16 |         source_description = json_file_or_dict
17 |     # Find a list of blobs in the target net
18 |     target_blobs = net.params.keys() 
19 |     target_blobs = dict(zip(target_blobs, [0]*len(target_blobs)))
20 | 
21 |     # For each item in the json file load the network and copy the layers
22 |     for src_desc in source_description:
23 |         net_source = caffe.Net(src_desc['prototxt'], src_desc['model'], caffe.TEST)
24 |         for j in xrange(len(src_desc['copy_ops']['dest'])):
25 |             dest_name = src_desc['copy_ops']['dest'][j]
26 |             
27 |             assert dest_name in target_blobs, \
28 |                 'Destination name {} not in target network blobs'.format(dest_name)
29 |             
30 |             src_name = src_desc['copy_ops']['source'][j]
31 |             assert src_name in net_source.params.keys(), \
32 |                 'Source name {} not in source network blobs'.format(src_name)
33 | 
34 |             allow_different_shape = src_desc['copy_ops']['reshape'][j]
35 |             
36 |             if target_blobs[dest_name] is not 0:
37 |                 print 'Target blob {} is being reassigned'.format(dest_name)
38 |             target_blobs[dest_name] = target_blobs[dest_name] + 1
39 | 
40 |             assert(len(net.params[dest_name]) == \
41 |               len(net_source.params[src_name])), \
42 |               'Number of blobs in {} in source do not match number of blobs in {} in destination'\
43 |               .format(src_name, dest_name)
44 | 
45 |             for k in xrange(len(net.params[dest_name])):
46 |                 src = net_source.params[src_name][k]
47 |                 dest = net.params[dest_name][k]
48 |                 if allow_different_shape:
49 |                     assert(src.count == dest.count), \
50 |                       'Count of blobs in {}[{:d}] in source do not match count of blobs in {}[{:d}] in destination'\
51 |                       .format(src_name, k, dest_name, k)
52 |                     dest.data[...] = src.data.reshape(dest.data.shape)
53 |                 else:
54 |                     src_shape = src.data.shape
55 |                     dest_shape = dest.data.shape
56 |                     assert(src_shape == dest_shape), \
57 |                       'Shape of blobs in {}[{:d}] {} in source do not match shape of blobs in {}[{:d}] {} in destination'\
58 |                       .format(src_name, k, str(src_shape), dest_name, k, str(dest_shape))
59 |                     dest.data[...] = src.data
60 | 
61 |     unusual = [x for x in target_blobs.keys() if target_blobs[x] is not 1]
62 |     for x in unusual:
63 |         print 'Parameter blob {} copied {:d} times.'.format(x, target_blobs[x])
64 | 
65 |     return target_blobs
66 | 
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/tools/rpn_generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast/er/ R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Generate RPN proposals."""
11 | 
12 | import _init_paths
13 | import numpy as np
14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
15 | from datasets.factory import get_imdb
16 | from rpn.generate import imdb_proposals
17 | import cPickle
18 | import caffe
19 | import argparse
20 | import pprint
21 | import time, os, sys
22 | 
23 | def parse_args():
24 |     """
25 |     Parse input arguments
26 |     """
27 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
28 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU id to use',
29 |                         default=0, type=int)
30 |     parser.add_argument('--def', dest='prototxt',
31 |                         help='prototxt file defining the network',
32 |                         default=None, type=str)
33 |     parser.add_argument('--net', dest='caffemodel',
34 |                         help='model to test',
35 |                         default=None, type=str)
36 |     parser.add_argument('--cfg', dest='cfg_file',
37 |                         help='optional config file', default=None, type=str)
38 |     parser.add_argument('--wait', dest='wait',
39 |                         help='wait until net file exists',
40 |                         default=True, type=bool)
41 |     parser.add_argument('--imdb', dest='imdb_name',
42 |                         help='dataset to test',
43 |                         default='voc_2007_test', type=str)
44 |     parser.add_argument('--set', dest='set_cfgs',
45 |                         help='set config keys', default=None,
46 |                         nargs=argparse.REMAINDER)
47 | 
48 |     if len(sys.argv) == 1:
49 |         parser.print_help()
50 |         sys.exit(1)
51 | 
52 |     args = parser.parse_args()
53 |     return args
54 | 
55 | if __name__ == '__main__':
56 |     args = parse_args()
57 | 
58 |     print('Called with args:')
59 |     print(args)
60 | 
61 |     if args.cfg_file is not None:
62 |         cfg_from_file(args.cfg_file)
63 |     if args.set_cfgs is not None:
64 |         cfg_from_list(args.set_cfgs)
65 | 
66 |     cfg.GPU_ID = args.gpu_id
67 | 
68 |     # RPN test settings
69 |     cfg.TEST.RPN_PRE_NMS_TOP_N = -1
70 |     cfg.TEST.RPN_POST_NMS_TOP_N = 2000
71 | 
72 |     print('Using config:')
73 |     pprint.pprint(cfg)
74 | 
75 |     while not os.path.exists(args.caffemodel) and args.wait:
76 |         print('Waiting for {} to exist...'.format(args.caffemodel))
77 |         time.sleep(10)
78 | 
79 |     caffe.set_mode_gpu()
80 |     caffe.set_device(args.gpu_id)
81 |     net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
82 |     net.name = os.path.splitext(os.path.basename(args.caffemodel))[0]
83 | 
84 |     imdb = get_imdb(args.imdb_name)
85 |     imdb_boxes = imdb_proposals(net, imdb)
86 | 
87 |     output_dir = get_output_dir(imdb, net)
88 |     rpn_file = os.path.join(output_dir, net.name + '_rpn_proposals.pkl')
89 |     with open(rpn_file, 'wb') as f:
90 |         cPickle.dump(imdb_boxes, f, cPickle.HIGHEST_PROTOCOL)
91 |     print 'Wrote RPN proposals to {}'.format(rpn_file)
92 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # A-Fast-RCNN: Hard Positive Generation via Adversary for Object Detection
 2 | By Xiaolong Wang, Abhinav Shrivastava, and Abhinav Gupta
 3 | 
 4 | ### Introduction
 5 | 
 6 | This is a Caffe based version of A-Fast-RCNN ([arxiv_link](https://arxiv.org/pdf/1704.03414.pdf)). Although we originally implement it on torch, this Caffe re-implementation is much simpler, faster and easier to use.
 7 | 
 8 | We release the code for training A-Fast-RCNN with Adversarial Spatial Dropout Network.
 9 | 
10 | 
11 | ### License
12 | 
13 | This code is released under the MIT License (refer to the LICENSE file for details).
14 | 
15 | ### Citing
16 | 
17 | If you find this useful in your research, please consider citing:
18 | 
19 |     @inproceedings{WangCVPR17afrcnn,
20 |         Author = {Xiaolong Wang and Abhinav Shrivastava and Abhinav Gupta},
21 |         Title = {A-Fast-RCNN: Hard Positive Generation via Adversary for Object Detection},
22 |         Booktitle = {Conference on Computer Vision and Pattern Recognition ({CVPR})},
23 |         Year = {2017}
24 |     }
25 | 
26 | ### Disclaimer
27 | 
28 | This implementation is built on a *fork* of the OHEM code ([here](https://github.com/abhi2610/ohem)), which in turn builds on the Faster R-CNN Python code ([here](https://github.com/rbgirshick/py-faster-rcnn)) and Fast R-CNN ([here](https://github.com/rbgirshick/fast-rcnn)). Please cite the appropriate papers depending on which part of the code and/or model you are using.
29 | 
30 | ### Results
31 |     | Approach                       | training data           | test data         | mAP
32 |     | Fast R-CNN  (FRCN)             | VOC 07 trainval         | VOC 07 test       | 67.6
33 |     | FRCN with adversary            | VOC 07 trainval         | VOC 07 test       | 70.8
34 | 
35 | **Note**: The reported results are based on the VGG16 network.
36 | 
37 | 
38 | 
39 | ### Installation
40 | 
41 | Please follow the exact installation and download the VOC data as the Faster R-CNN Python code ([here](https://github.com/rbgirshick/py-faster-rcnn)).
42 | 
43 | ### Usage
44 | 
45 | To run the code, one can simply do,
46 | ```Shell
47 | ./train.sh
48 | ```
49 | 
50 | It includes 3-stage of training:
51 | 
52 | ```Shell
53 | ./experiments/scripts/fast_rcnn_std.sh  [GPU_ID]  VGG16 pascal_voc
54 | ```
55 | which is used for training a standard Fast-RCNN for 10K iterations, you can download my [model](https://www.dropbox.com/s/ccs7lw3gydfzgvv/fast_rcnn_std_iter_10000.caffemodel?dl=0) and [logs](https://www.dropbox.com/s/hwbag60l1gmtxbb/fast_rcnn_std.txt.2017-04-08_16-53-59?dl=0) for this step.
56 | 
57 | ```Shell
58 | ./experiments/scripts/fast_rcnn_adv_pretrain.sh  [GPU_ID]  VGG16 pascal_voc
59 | ```
60 | which is a pre-training stage for the adversarial network, you can download my [model](https://www.dropbox.com/s/hvqpxn3bigarhdn/fast_rcnn_adv_pretrain_iter_25000.caffemodel?dl=0) and [logs](https://www.dropbox.com/s/i79j5hd0ee4ybke/fast_rcnn_adv_pretrain.txt.2017-04-08_19-39-49?dl=0) for this step.
61 | 
62 | ```Shell
63 | ./copy_model.h
64 | ```
65 | which is used to copy the weights of the above two models to initialize the joint model.
66 | 
67 | ```Shell
68 | ./experiments/scripts/fast_rcnn_adv.sh  [GPU_ID]  VGG16 pascal_voc
69 | ```
70 | which is joint training of the detector and the adversarial network, you can download my [model](https://www.dropbox.com/s/5wvxh8g5n3ewvp4/fast_rcnn_adv_iter_40000.caffemodel?dl=0) and [logs](https://www.dropbox.com/s/awrdrwyfthdgba5/fast_rcnn_adv.txt.2017-04-09_22-09-57?dl=0) for this step.
71 | 


--------------------------------------------------------------------------------
/lib/rpn/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | 
 10 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 11 | #
 12 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 13 | #    >> anchors
 14 | #
 15 | #    anchors =
 16 | #
 17 | #       -83   -39   100    56
 18 | #      -175   -87   192   104
 19 | #      -359  -183   376   200
 20 | #       -55   -55    72    72
 21 | #      -119  -119   136   136
 22 | #      -247  -247   264   264
 23 | #       -35   -79    52    96
 24 | #       -79  -167    96   184
 25 | #      -167  -343   184   360
 26 | 
 27 | #array([[ -83.,  -39.,  100.,   56.],
 28 | #       [-175.,  -87.,  192.,  104.],
 29 | #       [-359., -183.,  376.,  200.],
 30 | #       [ -55.,  -55.,   72.,   72.],
 31 | #       [-119., -119.,  136.,  136.],
 32 | #       [-247., -247.,  264.,  264.],
 33 | #       [ -35.,  -79.,   52.,   96.],
 34 | #       [ -79., -167.,   96.,  184.],
 35 | #       [-167., -343.,  184.,  360.]])
 36 | 
 37 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 38 |                      scales=2**np.arange(3, 6)):
 39 |     """
 40 |     Generate anchor (reference) windows by enumerating aspect ratios X
 41 |     scales wrt a reference (0, 0, 15, 15) window.
 42 |     """
 43 | 
 44 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 45 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 46 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 47 |                          for i in xrange(ratio_anchors.shape[0])])
 48 |     return anchors
 49 | 
 50 | def _whctrs(anchor):
 51 |     """
 52 |     Return width, height, x center, and y center for an anchor (window).
 53 |     """
 54 | 
 55 |     w = anchor[2] - anchor[0] + 1
 56 |     h = anchor[3] - anchor[1] + 1
 57 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 58 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 59 |     return w, h, x_ctr, y_ctr
 60 | 
 61 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 62 |     """
 63 |     Given a vector of widths (ws) and heights (hs) around a center
 64 |     (x_ctr, y_ctr), output a set of anchors (windows).
 65 |     """
 66 | 
 67 |     ws = ws[:, np.newaxis]
 68 |     hs = hs[:, np.newaxis]
 69 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 70 |                          y_ctr - 0.5 * (hs - 1),
 71 |                          x_ctr + 0.5 * (ws - 1),
 72 |                          y_ctr + 0.5 * (hs - 1)))
 73 |     return anchors
 74 | 
 75 | def _ratio_enum(anchor, ratios):
 76 |     """
 77 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
 78 |     """
 79 | 
 80 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 81 |     size = w * h
 82 |     size_ratios = size / ratios
 83 |     ws = np.round(np.sqrt(size_ratios))
 84 |     hs = np.round(ws * ratios)
 85 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 86 |     return anchors
 87 | 
 88 | def _scale_enum(anchor, scales):
 89 |     """
 90 |     Enumerate a set of anchors for each scale wrt an anchor.
 91 |     """
 92 | 
 93 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 94 |     ws = w * scales
 95 |     hs = h * scales
 96 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 97 |     return anchors
 98 | 
 99 | if __name__ == '__main__':
100 |     import time
101 |     t = time.time()
102 |     a = generate_anchors()
103 |     print time.time() - t
104 |     print a
105 |     from IPython import embed; embed()
106 | 


--------------------------------------------------------------------------------
/tools/test_net.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN with OHEM
 5 | # Licensed under The MIT License [see LICENSE for details]
 6 | # Written by Ross Girshick and Abhinav Shrivastava
 7 | # --------------------------------------------------------
 8 | 
 9 | """Test a Fast R-CNN network on an image database."""
10 | 
11 | import _init_paths
12 | from fast_rcnn.test import test_net
13 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list
14 | from datasets.factory import get_imdb
15 | import caffe
16 | import argparse
17 | import pprint
18 | import time, os, sys
19 | 
20 | def parse_args():
21 |     """
22 |     Parse input arguments
23 |     """
24 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
25 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU id to use',
26 |                         default=0, type=int)
27 |     parser.add_argument('--def', dest='prototxt',
28 |                         help='prototxt file defining the network',
29 |                         default=None, type=str)
30 |     parser.add_argument('--net', dest='caffemodel',
31 |                         help='model to test',
32 |                         default=None, type=str)
33 |     parser.add_argument('--cfg', dest='cfg_file',
34 |                         help='optional config file', default=None, type=str)
35 |     parser.add_argument('--wait', dest='wait',
36 |                         help='wait until net file exists',
37 |                         default=True, type=bool)
38 |     parser.add_argument('--imdb', dest='imdb_name',
39 |                         help='dataset to test',
40 |                         default='voc_2007_test', type=str)
41 |     parser.add_argument('--comp', dest='comp_mode', help='competition mode',
42 |                         action='store_true')
43 |     parser.add_argument('--set', dest='set_cfgs',
44 |                         help='set config keys', default=None,
45 |                         nargs=argparse.REMAINDER)
46 |     parser.add_argument('--vis', dest='vis', help='visualize detections',
47 |                         action='store_true')
48 |     parser.add_argument('--num_dets', dest='max_per_image',
49 |                         help='max number of detections per image',
50 |                         default=100, type=int)
51 |     parser.add_argument('--det_thresh', dest='det_thresh',
52 |                         help='detection score threshold',
53 |                         default=0.05, type=float)
54 | 
55 |     if len(sys.argv) == 1:
56 |         parser.print_help()
57 |         sys.exit(1)
58 | 
59 |     args = parser.parse_args()
60 |     return args
61 | 
62 | if __name__ == '__main__':
63 |     args = parse_args()
64 | 
65 |     print('Called with args:')
66 |     print(args)
67 | 
68 |     if args.cfg_file is not None:
69 |         cfg_from_file(args.cfg_file)
70 |     if args.set_cfgs is not None:
71 |         cfg_from_list(args.set_cfgs)
72 | 
73 |     cfg.GPU_ID = args.gpu_id
74 | 
75 |     print('Using config:')
76 |     pprint.pprint(cfg)
77 | 
78 |     while not os.path.exists(args.caffemodel) and args.wait:
79 |         print('Waiting for {} to exist...'.format(args.caffemodel))
80 |         time.sleep(10)
81 | 
82 |     caffe.set_mode_gpu()
83 |     caffe.set_device(args.gpu_id)
84 |     net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
85 |     net.name = os.path.splitext(os.path.basename(args.caffemodel))[0]
86 | 
87 |     imdb = get_imdb(args.imdb_name)
88 |     imdb.competition_mode(args.comp_mode)
89 |     if not cfg.TEST.HAS_RPN:
90 |         imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD)
91 | 
92 |     test_net(net, imdb, max_per_image=args.max_per_image, vis=args.vis, thresh=args.det_thresh)
93 | 


--------------------------------------------------------------------------------
/tools/train_net.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Fast R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | """Train a Fast R-CNN network on a region of interest database."""
 11 | 
 12 | import _init_paths
 13 | from fast_rcnn.train import get_training_roidb, train_net
 14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
 15 | from datasets.factory import get_imdb
 16 | import datasets.imdb
 17 | import caffe
 18 | import argparse
 19 | import pprint
 20 | import numpy as np
 21 | import sys
 22 | 
 23 | def parse_args():
 24 |     """
 25 |     Parse input arguments
 26 |     """
 27 |     parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
 28 |     parser.add_argument('--gpu', dest='gpu_id',
 29 |                         help='GPU device id to use [0]',
 30 |                         default=0, type=int)
 31 |     parser.add_argument('--solver', dest='solver',
 32 |                         help='solver prototxt',
 33 |                         default=None, type=str)
 34 |     parser.add_argument('--iters', dest='max_iters',
 35 |                         help='number of iterations to train',
 36 |                         default=40000, type=int)
 37 |     parser.add_argument('--weights', dest='pretrained_model',
 38 |                         help='initialize with pretrained model weights',
 39 |                         default=None, type=str)
 40 |     parser.add_argument('--cfg', dest='cfg_file',
 41 |                         help='optional config file',
 42 |                         default=None, type=str)
 43 |     parser.add_argument('--imdb', dest='imdb_name',
 44 |                         help='dataset to train on',
 45 |                         default='voc_2007_trainval', type=str)
 46 |     parser.add_argument('--rand', dest='randomize',
 47 |                         help='randomize (do not use a fixed seed)',
 48 |                         action='store_true')
 49 |     parser.add_argument('--set', dest='set_cfgs',
 50 |                         help='set config keys', default=None,
 51 |                         nargs=argparse.REMAINDER)
 52 | 
 53 |     if len(sys.argv) == 1:
 54 |         parser.print_help()
 55 |         sys.exit(1)
 56 | 
 57 |     args = parser.parse_args()
 58 |     return args
 59 | 
 60 | def combined_roidb(imdb_names):
 61 |     def get_roidb(imdb_name):
 62 |         imdb = get_imdb(imdb_name)
 63 |         print 'Loaded dataset `{:s}` for training'.format(imdb.name)
 64 |         imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
 65 |         print 'Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)
 66 |         roidb = get_training_roidb(imdb)
 67 |         return roidb
 68 | 
 69 |     roidbs = [get_roidb(s) for s in imdb_names.split('+')]
 70 |     roidb = roidbs[0]
 71 |     if len(roidbs) > 1:
 72 |         for r in roidbs[1:]:
 73 |             roidb.extend(r)
 74 |         imdb = datasets.imdb.imdb(imdb_names)
 75 |     else:
 76 |         imdb = get_imdb(imdb_names)
 77 |     return imdb, roidb
 78 | 
 79 | if __name__ == '__main__':
 80 |     args = parse_args()
 81 | 
 82 |     print('Called with args:')
 83 |     print(args)
 84 | 
 85 |     if args.cfg_file is not None:
 86 |         cfg_from_file(args.cfg_file)
 87 |     if args.set_cfgs is not None:
 88 |         cfg_from_list(args.set_cfgs)
 89 | 
 90 |     cfg.GPU_ID = args.gpu_id
 91 | 
 92 |     print('Using config:')
 93 |     pprint.pprint(cfg)
 94 | 
 95 |     if not args.randomize:
 96 |         # fix the random seeds (numpy and caffe) for reproducibility
 97 |         np.random.seed(cfg.RNG_SEED)
 98 |         caffe.set_random_seed(cfg.RNG_SEED)
 99 | 
100 |     # set up caffe
101 |     caffe.set_mode_gpu()
102 |     caffe.set_device(args.gpu_id)
103 | 
104 |     imdb, roidb = combined_roidb(args.imdb_name)
105 |     print '{:d} roidb entries'.format(len(roidb))
106 | 
107 |     output_dir = get_output_dir(imdb)
108 |     print 'Output will be saved to `{:s}`'.format(output_dir)
109 | 
110 |     train_net(args.solver, roidb, output_dir,
111 |               pretrained_model=args.pretrained_model,
112 |               max_iters=args.max_iters)
113 | 


--------------------------------------------------------------------------------
/lib/pycocotools/mask.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'tsungyi'
 2 | 
 3 | import pycocotools._mask as _mask
 4 | 
 5 | # Interface for manipulating masks stored in RLE format.
 6 | #
 7 | # RLE is a simple yet efficient format for storing binary masks. RLE
 8 | # first divides a vector (or vectorized image) into a series of piecewise
 9 | # constant regions and then for each piece simply stores the length of
10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
12 | # (note that the odd counts are always the numbers of zeros). Instead of
13 | # storing the counts directly, additional compression is achieved with a
14 | # variable bitrate representation based on a common scheme called LEB128.
15 | #
16 | # Compression is greatest given large piecewise constant regions.
17 | # Specifically, the size of the RLE is proportional to the number of
18 | # *boundaries* in M (or for an image the number of boundaries in the y
19 | # direction). Assuming fairly simple shapes, the RLE representation is
20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
21 | # is substantially lower, especially for large simple objects (large n).
22 | #
23 | # Many common operations on masks can be computed directly using the RLE
24 | # (without need for decoding). This includes computations such as area,
25 | # union, intersection, etc. All of these operations are linear in the
26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
27 | # of the object. Computing these operations on the original mask is O(n).
28 | # Thus, using the RLE can result in substantial computational savings.
29 | #
30 | # The following API functions are defined:
31 | #  encode         - Encode binary masks using RLE.
32 | #  decode         - Decode binary masks encoded via RLE.
33 | #  merge          - Compute union or intersection of encoded masks.
34 | #  iou            - Compute intersection over union between masks.
35 | #  area           - Compute area of encoded masks.
36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
38 | #
39 | # Usage:
40 | #  Rs     = encode( masks )
41 | #  masks  = decode( Rs )
42 | #  R      = merge( Rs, intersect=false )
43 | #  o      = iou( dt, gt, iscrowd )
44 | #  a      = area( Rs )
45 | #  bbs    = toBbox( Rs )
46 | #  Rs     = frPyObjects( [pyObjects], h, w )
47 | #
48 | # In the API the following formats are used:
49 | #  Rs      - [dict] Run-length encoding of binary masks
50 | #  R       - dict Run-length encoding of binary mask
51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
55 | #  dt,gt   - May be either bounding boxes or encoded masks
56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
57 | #
58 | # Finally, a note about the intersection over union (iou) computation.
59 | # The standard iou of a ground truth (gt) and detected (dt) object is
60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
61 | # For "crowd" regions, we use a modified criteria. If a gt object is
62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
66 | # For crowd gt regions we use this modified criteria above for the iou.
67 | #
68 | # To compile run "python setup.py build_ext --inplace"
69 | # Please do not contact us for help with compiling.
70 | #
71 | # Microsoft COCO Toolbox.      version 2.0
72 | # Data, paper, and tutorials available at:  http://mscoco.org/
73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
74 | # Licensed under the Simplified BSD License [see coco/license.txt]
75 | 
76 | encode      = _mask.encode
77 | decode      = _mask.decode
78 | iou         = _mask.iou
79 | merge       = _mask.merge
80 | area        = _mask.area
81 | toBbox      = _mask.toBbox
82 | frPyObjects = _mask.frPyObjects


--------------------------------------------------------------------------------
/lib/rpn/generate.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | from fast_rcnn.config import cfg
  9 | from utils.blob import im_list_to_blob
 10 | from utils.timer import Timer
 11 | import numpy as np
 12 | import cv2
 13 | 
 14 | def _vis_proposals(im, dets, thresh=0.5):
 15 |     """Draw detected bounding boxes."""
 16 |     inds = np.where(dets[:, -1] >= thresh)[0]
 17 |     if len(inds) == 0:
 18 |         return
 19 | 
 20 |     class_name = 'obj'
 21 |     im = im[:, :, (2, 1, 0)]
 22 |     fig, ax = plt.subplots(figsize=(12, 12))
 23 |     ax.imshow(im, aspect='equal')
 24 |     for i in inds:
 25 |         bbox = dets[i, :4]
 26 |         score = dets[i, -1]
 27 | 
 28 |         ax.add_patch(
 29 |             plt.Rectangle((bbox[0], bbox[1]),
 30 |                           bbox[2] - bbox[0],
 31 |                           bbox[3] - bbox[1], fill=False,
 32 |                           edgecolor='red', linewidth=3.5)
 33 |             )
 34 |         ax.text(bbox[0], bbox[1] - 2,
 35 |                 '{:s} {:.3f}'.format(class_name, score),
 36 |                 bbox=dict(facecolor='blue', alpha=0.5),
 37 |                 fontsize=14, color='white')
 38 | 
 39 |     ax.set_title(('{} detections with '
 40 |                   'p({} | box) >= {:.1f}').format(class_name, class_name,
 41 |                                                   thresh),
 42 |                   fontsize=14)
 43 |     plt.axis('off')
 44 |     plt.tight_layout()
 45 |     plt.draw()
 46 | 
 47 | def _get_image_blob(im):
 48 |     """Converts an image into a network input.
 49 | 
 50 |     Arguments:
 51 |         im (ndarray): a color image in BGR order
 52 | 
 53 |     Returns:
 54 |         blob (ndarray): a data blob holding an image pyramid
 55 |         im_scale_factors (list): list of image scales (relative to im) used
 56 |             in the image pyramid
 57 |     """
 58 |     im_orig = im.astype(np.float32, copy=True)
 59 |     im_orig -= cfg.PIXEL_MEANS
 60 | 
 61 |     im_shape = im_orig.shape
 62 |     im_size_min = np.min(im_shape[0:2])
 63 |     im_size_max = np.max(im_shape[0:2])
 64 | 
 65 |     processed_ims = []
 66 | 
 67 |     assert len(cfg.TEST.SCALES) == 1
 68 |     target_size = cfg.TEST.SCALES[0]
 69 | 
 70 |     im_scale = float(target_size) / float(im_size_min)
 71 |     # Prevent the biggest axis from being more than MAX_SIZE
 72 |     if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
 73 |         im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
 74 |     im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
 75 |                     interpolation=cv2.INTER_LINEAR)
 76 |     im_info = np.hstack((im.shape[:2], im_scale))[np.newaxis, :]
 77 |     processed_ims.append(im)
 78 | 
 79 |     # Create a blob to hold the input images
 80 |     blob = im_list_to_blob(processed_ims)
 81 | 
 82 |     return blob, im_info
 83 | 
 84 | def im_proposals(net, im):
 85 |     """Generate RPN proposals on a single image."""
 86 |     blobs = {}
 87 |     blobs['data'], blobs['im_info'] = _get_image_blob(im)
 88 |     net.blobs['data'].reshape(*(blobs['data'].shape))
 89 |     net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
 90 |     blobs_out = net.forward(
 91 |             data=blobs['data'].astype(np.float32, copy=False),
 92 |             im_info=blobs['im_info'].astype(np.float32, copy=False))
 93 | 
 94 |     scale = blobs['im_info'][0, 2]
 95 |     boxes = blobs_out['rois'][:, 1:].copy() / scale
 96 |     scores = blobs_out['scores'].copy()
 97 |     return boxes, scores
 98 | 
 99 | def imdb_proposals(net, imdb):
100 |     """Generate RPN proposals on all images in an imdb."""
101 | 
102 |     _t = Timer()
103 |     imdb_boxes = [[] for _ in xrange(imdb.num_images)]
104 |     for i in xrange(imdb.num_images):
105 |         im = cv2.imread(imdb.image_path_at(i))
106 |         _t.tic()
107 |         imdb_boxes[i], scores = im_proposals(net, im)
108 |         _t.toc()
109 |         print 'im_proposals: {:d}/{:d} {:.3f}s' \
110 |               .format(i + 1, imdb.num_images, _t.average_time)
111 |         if 0:
112 |             dets = np.hstack((imdb_boxes[i], scores))
113 |             # from IPython import embed; embed()
114 |             _vis_proposals(im, dets[:3, :], thresh=0.9)
115 |             plt.show()
116 | 
117 |     return imdb_boxes
118 | 


--------------------------------------------------------------------------------
/tools/compress_net.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Fast R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | """Compress a Fast R-CNN network using truncated SVD."""
 11 | 
 12 | import _init_paths
 13 | import caffe
 14 | import argparse
 15 | import numpy as np
 16 | import os, sys
 17 | 
 18 | def parse_args():
 19 |     """Parse input arguments."""
 20 |     parser = argparse.ArgumentParser(description='Compress a Fast R-CNN network')
 21 |     parser.add_argument('--def', dest='prototxt',
 22 |                         help='prototxt file defining the uncompressed network',
 23 |                         default=None, type=str)
 24 |     parser.add_argument('--def-svd', dest='prototxt_svd',
 25 |                         help='prototxt file defining the SVD compressed network',
 26 |                         default=None, type=str)
 27 |     parser.add_argument('--net', dest='caffemodel',
 28 |                         help='model to compress',
 29 |                         default=None, type=str)
 30 | 
 31 |     if len(sys.argv) == 1:
 32 |         parser.print_help()
 33 |         sys.exit(1)
 34 | 
 35 |     args = parser.parse_args()
 36 |     return args
 37 | 
 38 | def compress_weights(W, l):
 39 |     """Compress the weight matrix W of an inner product (fully connected) layer
 40 |     using truncated SVD.
 41 | 
 42 |     Parameters:
 43 |     W: N x M weights matrix
 44 |     l: number of singular values to retain
 45 | 
 46 |     Returns:
 47 |     Ul, L: matrices such that W \approx Ul*L
 48 |     """
 49 | 
 50 |     # numpy doesn't seem to have a fast truncated SVD algorithm...
 51 |     # this could be faster
 52 |     U, s, V = np.linalg.svd(W, full_matrices=False)
 53 | 
 54 |     Ul = U[:, :l]
 55 |     sl = s[:l]
 56 |     Vl = V[:l, :]
 57 | 
 58 |     L = np.dot(np.diag(sl), Vl)
 59 |     return Ul, L
 60 | 
 61 | def main():
 62 |     args = parse_args()
 63 | 
 64 |     # prototxt = 'models/VGG16/test.prototxt'
 65 |     # caffemodel = 'snapshots/vgg16_fast_rcnn_iter_40000.caffemodel'
 66 |     net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
 67 | 
 68 |     # prototxt_svd = 'models/VGG16/svd/test_fc6_fc7.prototxt'
 69 |     # caffemodel = 'snapshots/vgg16_fast_rcnn_iter_40000.caffemodel'
 70 |     net_svd = caffe.Net(args.prototxt_svd, args.caffemodel, caffe.TEST)
 71 | 
 72 |     print('Uncompressed network {} : {}'.format(args.prototxt, args.caffemodel))
 73 |     print('Compressed network prototxt {}'.format(args.prototxt_svd))
 74 | 
 75 |     out = os.path.splitext(os.path.basename(args.caffemodel))[0] + '_svd'
 76 |     out_dir = os.path.dirname(args.caffemodel)
 77 | 
 78 |     # Compress fc6
 79 |     if net_svd.params.has_key('fc6_L'):
 80 |         l_fc6 = net_svd.params['fc6_L'][0].data.shape[0]
 81 |         print('  fc6_L bottleneck size: {}'.format(l_fc6))
 82 | 
 83 |         # uncompressed weights and biases
 84 |         W_fc6 = net.params['fc6'][0].data
 85 |         B_fc6 = net.params['fc6'][1].data
 86 | 
 87 |         print('  compressing fc6...')
 88 |         Ul_fc6, L_fc6 = compress_weights(W_fc6, l_fc6)
 89 | 
 90 |         assert(len(net_svd.params['fc6_L']) == 1)
 91 | 
 92 |         # install compressed matrix factors (and original biases)
 93 |         net_svd.params['fc6_L'][0].data[...] = L_fc6
 94 | 
 95 |         net_svd.params['fc6_U'][0].data[...] = Ul_fc6
 96 |         net_svd.params['fc6_U'][1].data[...] = B_fc6
 97 | 
 98 |         out += '_fc6_{}'.format(l_fc6)
 99 | 
100 |     # Compress fc7
101 |     if net_svd.params.has_key('fc7_L'):
102 |         l_fc7 = net_svd.params['fc7_L'][0].data.shape[0]
103 |         print '  fc7_L bottleneck size: {}'.format(l_fc7)
104 | 
105 |         W_fc7 = net.params['fc7'][0].data
106 |         B_fc7 = net.params['fc7'][1].data
107 | 
108 |         print('  compressing fc7...')
109 |         Ul_fc7, L_fc7 = compress_weights(W_fc7, l_fc7)
110 | 
111 |         assert(len(net_svd.params['fc7_L']) == 1)
112 | 
113 |         net_svd.params['fc7_L'][0].data[...] = L_fc7
114 | 
115 |         net_svd.params['fc7_U'][0].data[...] = Ul_fc7
116 |         net_svd.params['fc7_U'][1].data[...] = B_fc7
117 | 
118 |         out += '_fc7_{}'.format(l_fc7)
119 | 
120 |     filename = '{}/{}.caffemodel'.format(out_dir, out)
121 |     net_svd.save(filename)
122 |     print 'Wrote svd model to: {:s}'.format(filename)
123 | 
124 | if __name__ == '__main__':
125 |     main()
126 | 


--------------------------------------------------------------------------------
/python_utils/evaluate_detection.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------
  2 | # Copyright (c) 2015, Saurabh Gupta
  3 | # 
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # ---------------------------------------------------------
  6 | 
  7 | import utils.cython_bbox
  8 | import numpy as np
  9 | 
 10 | def inst_bench_image(dt, gt, bOpts, overlap = None):
 11 |   
 12 |   nDt = len(dt['sc'])
 13 |   nGt = len(gt['diff'])
 14 |   numInst = np.sum(gt['diff'] == False)
 15 | 
 16 | 
 17 |   if overlap == None:
 18 |     overlap = utils.cython_bbox.bbox_overlaps(dt['boxInfo'].astype(np.float), gt['boxInfo'].astype(np.float))
 19 |   # assert(issorted(-dt.sc), 'Scores are not sorted.\n');
 20 |   sc = dt['sc'];
 21 | 
 22 |   det    = np.zeros((nGt,1)).astype(np.bool)
 23 |   tp     = np.zeros((nDt,1)).astype(np.bool)
 24 |   fp     = np.zeros((nDt,1)).astype(np.bool)
 25 |   dupDet = np.zeros((nDt,1)).astype(np.bool)
 26 |   instId = np.zeros((nDt,1)).astype(np.int32)
 27 |   ov     = np.zeros((nDt,1)).astype(np.float32)
 28 | 
 29 |   # Walk through the detections in decreasing score
 30 |   # and assign tp, fp, fn, tn labels
 31 |   for i in xrange(nDt):
 32 |     # assign detection to ground truth object if any
 33 |     if nGt > 0:
 34 |       maxOverlap = overlap[i,:].max(); maxInd = overlap[i,:].argmax();
 35 |       instId[i] = maxInd; ov[i] = maxOverlap;
 36 |     else:
 37 |       maxOverlap = 0; instId[i] = -1; maxInd = -1;
 38 |     # assign detection as true positive/don't care/false positive
 39 |     if maxOverlap >= bOpts['minoverlap']:
 40 |       if gt['diff'][maxInd] == False:
 41 |         if det[maxInd] == False:
 42 |           # true positive
 43 |           tp[i] = True;
 44 |           det[maxInd] = True;
 45 |         else:
 46 |           # false positive (multiple detection)
 47 |           fp[i] = True;
 48 |           dupDet[i] = True;
 49 |     else:
 50 |       # false positive
 51 |       fp[i] = True;
 52 |   return tp, fp, sc, numInst, dupDet, instId, ov
 53 | 
 54 | 
 55 | def inst_bench(dt, gt, bOpts, tp = None, fp = None, sc = None, numInst = None):
 56 |   """
 57 |   ap, rec, prec, npos, details = inst_bench(dt, gt, bOpts, tp = None, fp = None, sc = None, numInst = None)
 58 |   dt  - a list with a dict for each image and with following fields
 59 |     .boxInfo - info that will be used to cpmpute the overlap with ground truths, a list
 60 |     .sc - score 
 61 |   gt
 62 |     .boxInfo - info used to compute the overlap,  a list 
 63 |     .diff - a logical array of size nGtx1, saying if the instance is hard or not
 64 |   bOpt
 65 |     .minoverlap - the minimum overlap to call it a true positive
 66 |   [tp], [fp], [sc], [numInst] 
 67 |       Optional arguments, in case the inst_bench_image is being called outside of this function
 68 |   """
 69 |   if tp is None:
 70 |     # We do not have the tp, fp, sc, and numInst, so compute them from the structures gt, and out
 71 |     tp = []; fp = []; numInst = []; score = []; dupDet = []; instId = []; ov = []; 
 72 |     for i in range(len(gt)):
 73 |       # Sort dt by the score
 74 |       sc = dt[i]['sc']
 75 |       bb = dt[i]['boxInfo']
 76 |       ind = np.argsort(sc, axis = 0);
 77 |       ind = ind[::-1]
 78 |       if len(ind) > 0:
 79 |         sc = np.vstack((sc[i,:] for i in ind))
 80 |         bb = np.vstack((bb[i,:] for i in ind))
 81 |       else: 
 82 |         sc = np.zeros((0,1)).astype(np.float)
 83 |         bb = np.zeros((0,4)).astype(np.float)
 84 |       
 85 |       dtI = dict({'boxInfo': bb, 'sc': sc})
 86 |       tp_i, fp_i, sc_i, numInst_i, dupDet_i, instId_i, ov_i = inst_bench_image(dtI, gt[i], bOpts)
 87 |       tp.append(tp_i); fp.append(fp_i); score.append(sc_i); numInst.append(numInst_i);
 88 |       dupDet.append(dupDet_i); instId.append(instId_i); ov.append(ov_i);
 89 |   details = {'tp': list(tp), 'fp': list(fp), 'score': list(score), 'dupDet': list(dupDet),  
 90 |     'numInst': list(numInst), 'instId': list(instId), 'ov': list(ov)}
 91 |   
 92 |   tp = np.vstack(tp[:])
 93 |   fp = np.vstack(fp[:])
 94 |   sc = np.vstack(score[:])
 95 | 
 96 |   cat_all = np.hstack((tp,fp,sc))
 97 |   ind = np.argsort(cat_all[:,2])
 98 |   cat_all = cat_all[ind[::-1],:]
 99 |   tp = np.cumsum(cat_all[:,0], axis = 0);
100 |   fp = np.cumsum(cat_all[:,1], axis = 0);
101 |   thresh = cat_all[:,2];
102 |   npos = np.sum(numInst, axis = 0);
103 | 
104 |   # Compute precision/recall
105 |   rec = tp / npos;
106 |   prec = np.divide(tp, (fp+tp));
107 |   ap = VOCap(rec, prec);
108 |   return ap, rec, prec, npos, details
109 | 
110 | def VOCap(rec, prec):
111 |   rec = rec.reshape(rec.size,1); prec = prec.reshape(prec.size,1)
112 |   z = np.zeros((1,1)); o = np.ones((1,1));
113 |   mrec = np.vstack((z, rec, o))
114 |   mpre = np.vstack((z, prec, z))
115 |   for i in range(len(mpre)-2, -1, -1):
116 |     mpre[i] = max(mpre[i], mpre[i+1])
117 | 
118 |   I = np.where(mrec[1:] != mrec[0:-1])[0]+1;
119 |   ap = 0;
120 |   for i in I:
121 |     ap = ap + (mrec[i] - mrec[i-1])*mpre[i];
122 |   return ap
123 | 


--------------------------------------------------------------------------------
/lib/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/tools/demo.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Faster R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | """
 11 | Demo script showing detections in sample images.
 12 | 
 13 | See README.md for installation instructions before running.
 14 | """
 15 | 
 16 | import _init_paths
 17 | from fast_rcnn.config import cfg
 18 | from fast_rcnn.test import im_detect
 19 | from fast_rcnn.nms_wrapper import nms
 20 | from utils.timer import Timer
 21 | import matplotlib.pyplot as plt
 22 | import numpy as np
 23 | import scipy.io as sio
 24 | import caffe, os, sys, cv2
 25 | import argparse
 26 | 
 27 | CLASSES = ('__background__',
 28 |            'aeroplane', 'bicycle', 'bird', 'boat',
 29 |            'bottle', 'bus', 'car', 'cat', 'chair',
 30 |            'cow', 'diningtable', 'dog', 'horse',
 31 |            'motorbike', 'person', 'pottedplant',
 32 |            'sheep', 'sofa', 'train', 'tvmonitor')
 33 | 
 34 | NETS = {'vgg16': ('VGG16',
 35 |                   'VGG16_faster_rcnn_final.caffemodel'),
 36 |         'zf': ('ZF',
 37 |                   'ZF_faster_rcnn_final.caffemodel')}
 38 | 
 39 | 
 40 | def vis_detections(im, class_name, dets, thresh=0.5):
 41 |     """Draw detected bounding boxes."""
 42 |     inds = np.where(dets[:, -1] >= thresh)[0]
 43 |     if len(inds) == 0:
 44 |         return
 45 | 
 46 |     im = im[:, :, (2, 1, 0)]
 47 |     fig, ax = plt.subplots(figsize=(12, 12))
 48 |     ax.imshow(im, aspect='equal')
 49 |     for i in inds:
 50 |         bbox = dets[i, :4]
 51 |         score = dets[i, -1]
 52 | 
 53 |         ax.add_patch(
 54 |             plt.Rectangle((bbox[0], bbox[1]),
 55 |                           bbox[2] - bbox[0],
 56 |                           bbox[3] - bbox[1], fill=False,
 57 |                           edgecolor='red', linewidth=3.5)
 58 |             )
 59 |         ax.text(bbox[0], bbox[1] - 2,
 60 |                 '{:s} {:.3f}'.format(class_name, score),
 61 |                 bbox=dict(facecolor='blue', alpha=0.5),
 62 |                 fontsize=14, color='white')
 63 | 
 64 |     ax.set_title(('{} detections with '
 65 |                   'p({} | box) >= {:.1f}').format(class_name, class_name,
 66 |                                                   thresh),
 67 |                   fontsize=14)
 68 |     plt.axis('off')
 69 |     plt.tight_layout()
 70 |     plt.draw()
 71 | 
 72 | def demo(net, image_name):
 73 |     """Detect object classes in an image using pre-computed object proposals."""
 74 | 
 75 |     # Load the demo image
 76 |     im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
 77 |     im = cv2.imread(im_file)
 78 | 
 79 |     # Detect all object classes and regress object bounds
 80 |     timer = Timer()
 81 |     timer.tic()
 82 |     scores, boxes = im_detect(net, im)
 83 |     timer.toc()
 84 |     print ('Detection took {:.3f}s for '
 85 |            '{:d} object proposals').format(timer.total_time, boxes.shape[0])
 86 | 
 87 |     # Visualize detections for each class
 88 |     CONF_THRESH = 0.8
 89 |     NMS_THRESH = 0.3
 90 |     for cls_ind, cls in enumerate(CLASSES[1:]):
 91 |         cls_ind += 1 # because we skipped background
 92 |         cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
 93 |         cls_scores = scores[:, cls_ind]
 94 |         dets = np.hstack((cls_boxes,
 95 |                           cls_scores[:, np.newaxis])).astype(np.float32)
 96 |         keep = nms(dets, NMS_THRESH)
 97 |         dets = dets[keep, :]
 98 |         vis_detections(im, cls, dets, thresh=CONF_THRESH)
 99 | 
100 | def parse_args():
101 |     """Parse input arguments."""
102 |     parser = argparse.ArgumentParser(description='Faster R-CNN demo')
103 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
104 |                         default=0, type=int)
105 |     parser.add_argument('--cpu', dest='cpu_mode',
106 |                         help='Use CPU mode (overrides --gpu)',
107 |                         action='store_true')
108 |     parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
109 |                         choices=NETS.keys(), default='vgg16')
110 | 
111 |     args = parser.parse_args()
112 | 
113 |     return args
114 | 
115 | if __name__ == '__main__':
116 |     cfg.TEST.HAS_RPN = True  # Use RPN for proposals
117 | 
118 |     args = parse_args()
119 | 
120 |     prototxt = os.path.join(cfg.MODELS_DIR, NETS[args.demo_net][0],
121 |                             'faster_rcnn_alt_opt', 'faster_rcnn_test.pt')
122 |     caffemodel = os.path.join(cfg.DATA_DIR, 'faster_rcnn_models',
123 |                               NETS[args.demo_net][1])
124 | 
125 |     if not os.path.isfile(caffemodel):
126 |         raise IOError(('{:s} not found.\nDid you run ./data/script/'
127 |                        'fetch_faster_rcnn_models.sh?').format(caffemodel))
128 | 
129 |     if args.cpu_mode:
130 |         caffe.set_mode_cpu()
131 |     else:
132 |         caffe.set_mode_gpu()
133 |         caffe.set_device(args.gpu_id)
134 |         cfg.GPU_ID = args.gpu_id
135 |     net = caffe.Net(prototxt, caffemodel, caffe.TEST)
136 | 
137 |     print '\n\nLoaded network {:s}'.format(caffemodel)
138 | 
139 |     # Warmup on a dummy image
140 |     im = 128 * np.ones((300, 500, 3), dtype=np.uint8)
141 |     for i in xrange(2):
142 |         _, _= im_detect(net, im)
143 | 
144 |     im_names = ['000456.jpg', '000542.jpg', '001150.jpg',
145 |                 '001763.jpg', '004545.jpg']
146 |     for im_name in im_names:
147 |         print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
148 |         print 'Demo for data/demo/{}'.format(im_name)
149 |         demo(net, im_name)
150 | 
151 |     plt.show()
152 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG_CNN_M_1024/fast_rcnn/train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_CNN_M_1024"
  2 | layer {
  3 |   name: 'data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'rois'
  7 |   top: 'labels'
  8 |   top: 'bbox_targets'
  9 |   top: 'bbox_inside_weights'
 10 |   top: 'bbox_outside_weights'
 11 |   python_param {
 12 |     module: 'roi_data_layer.layer'
 13 |     layer: 'RoIDataLayer'
 14 |     param_str: "'num_classes': 21"
 15 |   }
 16 | }
 17 | layer {
 18 |   name: "conv1"
 19 |   type: "Convolution"
 20 |   bottom: "data"
 21 |   top: "conv1"
 22 |   param { lr_mult: 0 decay_mult: 0 }
 23 |   param { lr_mult: 0 decay_mult: 0 }
 24 |   convolution_param {
 25 |     num_output: 96
 26 |     kernel_size: 7
 27 |     stride: 2
 28 |   }
 29 | }
 30 | layer {
 31 |   name: "relu1"
 32 |   type: "ReLU"
 33 |   bottom: "conv1"
 34 |   top: "conv1"
 35 | }
 36 | layer {
 37 |   name: "norm1"
 38 |   type: "LRN"
 39 |   bottom: "conv1"
 40 |   top: "norm1"
 41 |   lrn_param {
 42 |     local_size: 5
 43 |     alpha: 0.0005
 44 |     beta: 0.75
 45 |     k: 2
 46 |   }
 47 | }
 48 | layer {
 49 |   name: "pool1"
 50 |   type: "Pooling"
 51 |   bottom: "norm1"
 52 |   top: "pool1"
 53 |   pooling_param {
 54 |     pool: MAX
 55 |     kernel_size: 3
 56 |     stride: 2
 57 |   }
 58 | }
 59 | layer {
 60 |   name: "conv2"
 61 |   type: "Convolution"
 62 |   bottom: "pool1"
 63 |   top: "conv2"
 64 |   param {
 65 |     lr_mult: 1
 66 |   }
 67 |   param {
 68 |     lr_mult: 2
 69 |   }
 70 |   convolution_param {
 71 |     num_output: 256
 72 |     pad: 1
 73 |     kernel_size: 5
 74 |     stride: 2
 75 |   }
 76 | }
 77 | layer {
 78 |   name: "relu2"
 79 |   type: "ReLU"
 80 |   bottom: "conv2"
 81 |   top: "conv2"
 82 | }
 83 | layer {
 84 |   name: "norm2"
 85 |   type: "LRN"
 86 |   bottom: "conv2"
 87 |   top: "norm2"
 88 |   lrn_param {
 89 |     local_size: 5
 90 |     alpha: 0.0005
 91 |     beta: 0.75
 92 |     k: 2
 93 |   }
 94 | }
 95 | layer {
 96 |   name: "pool2"
 97 |   type: "Pooling"
 98 |   bottom: "norm2"
 99 |   top: "pool2"
100 |   pooling_param {
101 |     pool: MAX
102 |     kernel_size: 3
103 |     stride: 2
104 |   }
105 | }
106 | layer {
107 |   name: "conv3"
108 |   type: "Convolution"
109 |   bottom: "pool2"
110 |   top: "conv3"
111 |   param {
112 |     lr_mult: 1
113 |   }
114 |   param {
115 |     lr_mult: 2
116 |   }
117 |   convolution_param {
118 |     num_output: 512
119 |     pad: 1
120 |     kernel_size: 3
121 |   }
122 | }
123 | layer {
124 |   name: "relu3"
125 |   type: "ReLU"
126 |   bottom: "conv3"
127 |   top: "conv3"
128 | }
129 | layer {
130 |   name: "conv4"
131 |   type: "Convolution"
132 |   bottom: "conv3"
133 |   top: "conv4"
134 |   param {
135 |     lr_mult: 1
136 |   }
137 |   param {
138 |     lr_mult: 2
139 |   }
140 |   convolution_param {
141 |     num_output: 512
142 |     pad: 1
143 |     kernel_size: 3
144 |   }
145 | }
146 | layer {
147 |   name: "relu4"
148 |   type: "ReLU"
149 |   bottom: "conv4"
150 |   top: "conv4"
151 | }
152 | layer {
153 |   name: "conv5"
154 |   type: "Convolution"
155 |   bottom: "conv4"
156 |   top: "conv5"
157 |   param {
158 |     lr_mult: 1
159 |   }
160 |   param {
161 |     lr_mult: 2
162 |   }
163 |   convolution_param {
164 |     num_output: 512
165 |     pad: 1
166 |     kernel_size: 3
167 |   }
168 | }
169 | layer {
170 |   name: "relu5"
171 |   type: "ReLU"
172 |   bottom: "conv5"
173 |   top: "conv5"
174 | }
175 | layer {
176 |   name: "roi_pool5"
177 |   type: "ROIPooling"
178 |   bottom: "conv5"
179 |   bottom: "rois"
180 |   top: "pool5"
181 |   roi_pooling_param {
182 |     pooled_w: 6
183 |     pooled_h: 6
184 |     spatial_scale: 0.0625 # 1/16
185 |   }
186 | }
187 | layer {
188 |   name: "fc6"
189 |   type: "InnerProduct"
190 |   bottom: "pool5"
191 |   top: "fc6"
192 |   param {
193 |     lr_mult: 1
194 |   }
195 |   param {
196 |     lr_mult: 2
197 |   }
198 |   inner_product_param {
199 |     num_output: 4096
200 |   }
201 | }
202 | layer {
203 |   name: "relu6"
204 |   type: "ReLU"
205 |   bottom: "fc6"
206 |   top: "fc6"
207 | }
208 | layer {
209 |   name: "drop6"
210 |   type: "Dropout"
211 |   bottom: "fc6"
212 |   top: "fc6"
213 |   dropout_param {
214 |     dropout_ratio: 0.5
215 |   }
216 | }
217 | layer {
218 |   name: "fc7"
219 |   type: "InnerProduct"
220 |   bottom: "fc6"
221 |   top: "fc7"
222 |   param {
223 |     lr_mult: 1
224 |   }
225 |   param {
226 |     lr_mult: 2
227 |   }
228 |   inner_product_param {
229 |     num_output: 1024
230 |   }
231 | }
232 | layer {
233 |   name: "relu7"
234 |   type: "ReLU"
235 |   bottom: "fc7"
236 |   top: "fc7"
237 | }
238 | layer {
239 |   name: "drop7"
240 |   type: "Dropout"
241 |   bottom: "fc7"
242 |   top: "fc7"
243 |   dropout_param {
244 |     dropout_ratio: 0.5
245 |   }
246 | }
247 | layer {
248 |   name: "cls_score"
249 |   type: "InnerProduct"
250 |   bottom: "fc7"
251 |   top: "cls_score"
252 |   param {
253 |     lr_mult: 1
254 |   }
255 |   param {
256 |     lr_mult: 2
257 |   }
258 |   inner_product_param {
259 |     num_output: 21
260 |     weight_filler {
261 |       type: "gaussian"
262 |       std: 0.01
263 |     }
264 |     bias_filler {
265 |       type: "constant"
266 |       value: 0
267 |     }
268 |   }
269 | }
270 | layer {
271 |   name: "bbox_pred"
272 |   type: "InnerProduct"
273 |   bottom: "fc7"
274 |   top: "bbox_pred"
275 |   param {
276 |     lr_mult: 1
277 |   }
278 |   param {
279 |     lr_mult: 2
280 |   }
281 |   inner_product_param {
282 |     num_output: 84
283 |     weight_filler {
284 |       type: "gaussian"
285 |       std: 0.001
286 |     }
287 |     bias_filler {
288 |       type: "constant"
289 |       value: 0
290 |     }
291 |   }
292 | }
293 | layer {
294 |   name: "loss_cls"
295 |   type: "SoftmaxWithLoss"
296 |   bottom: "cls_score"
297 |   bottom: "labels"
298 |   top: "loss_cls"
299 |   loss_weight: 1
300 | }
301 | layer {
302 |   name: "loss_bbox"
303 |   type: "SmoothL1Loss"
304 |   bottom: "bbox_pred"
305 |   bottom: "bbox_targets"
306 |   bottom: "bbox_inside_weights"
307 |   bottom: "bbox_outside_weights"
308 |   top: "loss_bbox"
309 |   loss_weight: 1
310 | }
311 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG_CNN_M_1024/fast_rcnn/test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_CNN_M_1024"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 224
  7 |   dim: 224
  8 | }
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 1 # to be changed on-the-fly to num ROIs
 12 |   dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   param {
 20 |     lr_mult: 0
 21 |     decay_mult: 0
 22 |   }
 23 |   param {
 24 |     lr_mult: 0
 25 |     decay_mult: 0
 26 |   }
 27 |   convolution_param {
 28 |     num_output: 96
 29 |     kernel_size: 7
 30 |     stride: 2
 31 |   }
 32 | }
 33 | layer {
 34 |   name: "relu1"
 35 |   type: "ReLU"
 36 |   bottom: "conv1"
 37 |   top: "conv1"
 38 | }
 39 | layer {
 40 |   name: "norm1"
 41 |   type: "LRN"
 42 |   bottom: "conv1"
 43 |   top: "norm1"
 44 |   lrn_param {
 45 |     local_size: 5
 46 |     alpha: 0.0005
 47 |     beta: 0.75
 48 |     k: 2
 49 |   }
 50 | }
 51 | layer {
 52 |   name: "pool1"
 53 |   type: "Pooling"
 54 |   bottom: "norm1"
 55 |   top: "pool1"
 56 |   pooling_param {
 57 |     pool: MAX
 58 |     kernel_size: 3
 59 |     stride: 2
 60 |   }
 61 | }
 62 | layer {
 63 |   name: "conv2"
 64 |   type: "Convolution"
 65 |   bottom: "pool1"
 66 |   top: "conv2"
 67 |   param {
 68 |     lr_mult: 1
 69 |     decay_mult: 1
 70 |   }
 71 |   param {
 72 |     lr_mult: 2
 73 |     decay_mult: 0
 74 |   }
 75 |   convolution_param {
 76 |     num_output: 256
 77 |     pad: 1
 78 |     kernel_size: 5
 79 |     stride: 2
 80 |   }
 81 | }
 82 | layer {
 83 |   name: "relu2"
 84 |   type: "ReLU"
 85 |   bottom: "conv2"
 86 |   top: "conv2"
 87 | }
 88 | layer {
 89 |   name: "norm2"
 90 |   type: "LRN"
 91 |   bottom: "conv2"
 92 |   top: "norm2"
 93 |   lrn_param {
 94 |     local_size: 5
 95 |     alpha: 0.0005
 96 |     beta: 0.75
 97 |     k: 2
 98 |   }
 99 | }
100 | layer {
101 |   name: "pool2"
102 |   type: "Pooling"
103 |   bottom: "norm2"
104 |   top: "pool2"
105 |   pooling_param {
106 |     pool: MAX
107 |     kernel_size: 3
108 |     stride: 2
109 |   }
110 | }
111 | layer {
112 |   name: "conv3"
113 |   type: "Convolution"
114 |   bottom: "pool2"
115 |   top: "conv3"
116 |   param {
117 |     lr_mult: 1
118 |     decay_mult: 1
119 |   }
120 |   param {
121 |     lr_mult: 2
122 |     decay_mult: 0
123 |   }
124 |   convolution_param {
125 |     num_output: 512
126 |     pad: 1
127 |     kernel_size: 3
128 |   }
129 | }
130 | layer {
131 |   name: "relu3"
132 |   type: "ReLU"
133 |   bottom: "conv3"
134 |   top: "conv3"
135 | }
136 | layer {
137 |   name: "conv4"
138 |   type: "Convolution"
139 |   bottom: "conv3"
140 |   top: "conv4"
141 |   param {
142 |     lr_mult: 1
143 |     decay_mult: 1
144 |   }
145 |   param {
146 |     lr_mult: 2
147 |     decay_mult: 0
148 |   }
149 |   convolution_param {
150 |     num_output: 512
151 |     pad: 1
152 |     kernel_size: 3
153 |   }
154 | }
155 | layer {
156 |   name: "relu4"
157 |   type: "ReLU"
158 |   bottom: "conv4"
159 |   top: "conv4"
160 | }
161 | layer {
162 |   name: "conv5"
163 |   type: "Convolution"
164 |   bottom: "conv4"
165 |   top: "conv5"
166 |   param {
167 |     lr_mult: 1
168 |     decay_mult: 1
169 |   }
170 |   param {
171 |     lr_mult: 2
172 |     decay_mult: 0
173 |   }
174 |   convolution_param {
175 |     num_output: 512
176 |     pad: 1
177 |     kernel_size: 3
178 |   }
179 | }
180 | layer {
181 |   name: "relu5"
182 |   type: "ReLU"
183 |   bottom: "conv5"
184 |   top: "conv5"
185 | }
186 | layer {
187 |   name: "roi_pool5"
188 |   type: "ROIPooling"
189 |   bottom: "conv5"
190 |   bottom: "rois"
191 |   top: "pool5"
192 |   roi_pooling_param {
193 |     pooled_w: 6
194 |     pooled_h: 6
195 |     spatial_scale: 0.0625 # 1/16
196 |   }
197 | }
198 | layer {
199 |   name: "fc6"
200 |   type: "InnerProduct"
201 |   bottom: "pool5"
202 |   top: "fc6"
203 |   param {
204 |     lr_mult: 1
205 |     decay_mult: 1
206 |   }
207 |   param {
208 |     lr_mult: 2
209 |     decay_mult: 0
210 |   }
211 |   inner_product_param {
212 |     num_output: 4096
213 |   }
214 | }
215 | layer {
216 |   name: "relu6"
217 |   type: "ReLU"
218 |   bottom: "fc6"
219 |   top: "fc6"
220 | }
221 | layer {
222 |   name: "drop6"
223 |   type: "Dropout"
224 |   bottom: "fc6"
225 |   top: "fc6"
226 |   dropout_param {
227 |     dropout_ratio: 0.5
228 |   }
229 | }
230 | layer {
231 |   name: "fc7"
232 |   type: "InnerProduct"
233 |   bottom: "fc6"
234 |   top: "fc7"
235 |   param {
236 |     lr_mult: 1
237 |     decay_mult: 1
238 |   }
239 |   param {
240 |     lr_mult: 2
241 |     decay_mult: 0
242 |   }
243 |   inner_product_param {
244 |     num_output: 1024
245 |   }
246 | }
247 | layer {
248 |   name: "relu7"
249 |   type: "ReLU"
250 |   bottom: "fc7"
251 |   top: "fc7"
252 | }
253 | layer {
254 |   name: "drop7"
255 |   type: "Dropout"
256 |   bottom: "fc7"
257 |   top: "fc7"
258 |   dropout_param {
259 |     dropout_ratio: 0.5
260 |   }
261 | }
262 | layer {
263 |   name: "cls_score"
264 |   type: "InnerProduct"
265 |   bottom: "fc7"
266 |   top: "cls_score"
267 |   param {
268 |     lr_mult: 1
269 |     decay_mult: 1
270 |   }
271 |   param {
272 |     lr_mult: 2
273 |     decay_mult: 0
274 |   }
275 |   inner_product_param {
276 |     num_output: 21
277 |     weight_filler {
278 |       type: "gaussian"
279 |       std: 0.01
280 |     }
281 |     bias_filler {
282 |       type: "constant"
283 |       value: 0
284 |     }
285 |   }
286 | }
287 | layer {
288 |   name: "bbox_pred"
289 |   type: "InnerProduct"
290 |   bottom: "fc7"
291 |   top: "bbox_pred"
292 |   param {
293 |     lr_mult: 1
294 |     decay_mult: 1
295 |   }
296 |   param {
297 |     lr_mult: 2
298 |     decay_mult: 0
299 |   }
300 |   inner_product_param {
301 |     num_output: 84
302 |     weight_filler {
303 |       type: "gaussian"
304 |       std: 0.001
305 |     }
306 |     bias_filler {
307 |       type: "constant"
308 |       value: 0
309 |     }
310 |   }
311 | }
312 | layer {
313 |   name: "cls_prob"
314 |   type: "Softmax"
315 |   bottom: "cls_score"
316 |   top: "cls_prob"
317 | }
318 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
  9 | 
 10 | import numpy as np
 11 | from fast_rcnn.config import cfg
 12 | from fast_rcnn.bbox_transform import bbox_transform
 13 | from utils.cython_bbox import bbox_overlaps
 14 | import PIL
 15 | 
 16 | def prepare_roidb(imdb):
 17 |     """Enrich the imdb's roidb by adding some derived quantities that
 18 |     are useful for training. This function precomputes the maximum
 19 |     overlap, taken over ground-truth boxes, between each ROI and
 20 |     each ground-truth box. The class with maximum overlap is also
 21 |     recorded.
 22 |     """
 23 |     sizes = [PIL.Image.open(imdb.image_path_at(i)).size
 24 |              for i in xrange(imdb.num_images)]
 25 |     roidb = imdb.roidb
 26 |     for i in xrange(len(imdb.image_index)):
 27 |         roidb[i]['image'] = imdb.image_path_at(i)
 28 |         roidb[i]['width'] = sizes[i][0]
 29 |         roidb[i]['height'] = sizes[i][1]
 30 |         # need gt_overlaps as a dense array for argmax
 31 |         gt_overlaps = roidb[i]['gt_overlaps'].toarray()
 32 |         # max overlap with gt over classes (columns)
 33 |         max_overlaps = gt_overlaps.max(axis=1)
 34 |         # gt class that had the max overlap
 35 |         max_classes = gt_overlaps.argmax(axis=1)
 36 |         roidb[i]['max_classes'] = max_classes
 37 |         roidb[i]['max_overlaps'] = max_overlaps
 38 |         # sanity checks
 39 |         # max overlap of 0 => class should be zero (background)
 40 |         zero_inds = np.where(max_overlaps == 0)[0]
 41 |         assert all(max_classes[zero_inds] == 0)
 42 |         # max overlap > 0 => class should not be zero (must be a fg class)
 43 |         nonzero_inds = np.where(max_overlaps > 0)[0]
 44 |         assert all(max_classes[nonzero_inds] != 0)
 45 | 
 46 | def add_bbox_regression_targets(roidb):
 47 |     """Add information needed to train bounding-box regressors."""
 48 |     assert len(roidb) > 0
 49 |     assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?'
 50 | 
 51 |     num_images = len(roidb)
 52 |     # Infer number of classes from the number of columns in gt_overlaps
 53 |     num_classes = roidb[0]['gt_overlaps'].shape[1]
 54 |     for im_i in xrange(num_images):
 55 |         rois = roidb[im_i]['boxes']
 56 |         max_overlaps = roidb[im_i]['max_overlaps']
 57 |         max_classes = roidb[im_i]['max_classes']
 58 |         roidb[im_i]['bbox_targets'] = \
 59 |                 _compute_targets(rois, max_overlaps, max_classes)
 60 | 
 61 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
 62 |         # Use fixed / precomputed "means" and "stds" instead of empirical values
 63 |         means = np.tile(
 64 |                 np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1))
 65 |         stds = np.tile(
 66 |                 np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1))
 67 |     else:
 68 |         # Compute values needed for means and stds
 69 |         # var(x) = E(x^2) - E(x)^2
 70 |         class_counts = np.zeros((num_classes, 1)) + cfg.EPS
 71 |         sums = np.zeros((num_classes, 4))
 72 |         squared_sums = np.zeros((num_classes, 4))
 73 |         for im_i in xrange(num_images):
 74 |             targets = roidb[im_i]['bbox_targets']
 75 |             for cls in xrange(1, num_classes):
 76 |                 cls_inds = np.where(targets[:, 0] == cls)[0]
 77 |                 if cls_inds.size > 0:
 78 |                     class_counts[cls] += cls_inds.size
 79 |                     sums[cls, :] += targets[cls_inds, 1:].sum(axis=0)
 80 |                     squared_sums[cls, :] += \
 81 |                             (targets[cls_inds, 1:] ** 2).sum(axis=0)
 82 | 
 83 |         means = sums / class_counts
 84 |         stds = np.sqrt(squared_sums / class_counts - means ** 2)
 85 | 
 86 |     print 'bbox target means:'
 87 |     print means
 88 |     print means[1:, :].mean(axis=0) # ignore bg class
 89 |     print 'bbox target stdevs:'
 90 |     print stds
 91 |     print stds[1:, :].mean(axis=0) # ignore bg class
 92 | 
 93 |     # Normalize targets
 94 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS:
 95 |         print "Normalizing targets"
 96 |         for im_i in xrange(num_images):
 97 |             targets = roidb[im_i]['bbox_targets']
 98 |             for cls in xrange(1, num_classes):
 99 |                 cls_inds = np.where(targets[:, 0] == cls)[0]
100 |                 roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
101 |                 roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :]
102 |     else:
103 |         print "NOT normalizing targets"
104 | 
105 |     # These values will be needed for making predictions
106 |     # (the predicts will need to be unnormalized and uncentered)
107 |     return means.ravel(), stds.ravel()
108 | 
109 | def _compute_targets(rois, overlaps, labels):
110 |     """Compute bounding-box regression targets for an image."""
111 |     # Indices of ground-truth ROIs
112 |     gt_inds = np.where(overlaps == 1)[0]
113 |     if len(gt_inds) == 0:
114 |         # Bail if the image has no ground-truth ROIs
115 |         return np.zeros((rois.shape[0], 5), dtype=np.float32)
116 |     # Indices of examples for which we try to make predictions
117 |     ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]
118 | 
119 |     # Get IoU overlap between each ex ROI and gt ROI
120 |     ex_gt_overlaps = bbox_overlaps(
121 |         np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
122 |         np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))
123 | 
124 |     # Find which gt ROI each ex ROI has max overlap with:
125 |     # this will be the ex ROI's gt target
126 |     gt_assignment = ex_gt_overlaps.argmax(axis=1)
127 |     gt_rois = rois[gt_inds[gt_assignment], :]
128 |     ex_rois = rois[ex_inds, :]
129 | 
130 |     targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
131 |     targets[ex_inds, 0] = labels[ex_inds]
132 |     targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
133 |     return targets
134 | 


--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | from setuptools import setup
 11 | from distutils.extension import Extension
 12 | from Cython.Distutils import build_ext
 13 | import subprocess
 14 | import numpy as np
 15 | 
 16 | def find_in_path(name, path):
 17 |     "Find a file in a search path"
 18 |     # Adapted fom
 19 |     # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 20 |     for dir in path.split(os.pathsep):
 21 |         binpath = pjoin(dir, name)
 22 |         if os.path.exists(binpath):
 23 |             return os.path.abspath(binpath)
 24 |     return None
 25 | 
 26 | 
 27 | def locate_cuda():
 28 |     """Locate the CUDA environment on the system
 29 | 
 30 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 31 |     and values giving the absolute path to each directory.
 32 | 
 33 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 34 |     is based on finding 'nvcc' in the PATH.
 35 |     """
 36 | 
 37 |     # first check if the CUDAHOME env variable is in use
 38 |     if 'CUDAHOME' in os.environ:
 39 |         home = os.environ['CUDAHOME']
 40 |         nvcc = pjoin(home, 'bin', 'nvcc')
 41 |     else:
 42 |         # otherwise, search the PATH for NVCC
 43 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 44 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 45 |         if nvcc is None:
 46 |             raise EnvironmentError('The nvcc binary could not be '
 47 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 48 |         home = os.path.dirname(os.path.dirname(nvcc))
 49 | 
 50 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 51 |                   'include': pjoin(home, 'include'),
 52 |                   'lib64': pjoin(home, 'lib64')}
 53 |     for k, v in cudaconfig.iteritems():
 54 |         if not os.path.exists(v):
 55 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 56 | 
 57 |     return cudaconfig
 58 | CUDA = locate_cuda()
 59 | 
 60 | 
 61 | # Obtain the numpy include directory.  This logic works across numpy versions.
 62 | try:
 63 |     numpy_include = np.get_include()
 64 | except AttributeError:
 65 |     numpy_include = np.get_numpy_include()
 66 | 
 67 | def customize_compiler_for_nvcc(self):
 68 |     """inject deep into distutils to customize how the dispatch
 69 |     to gcc/nvcc works.
 70 | 
 71 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 72 |     injected in, and still have the right customizations (i.e.
 73 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 74 |     the OO route, I have this. Note, it's kindof like a wierd functional
 75 |     subclassing going on."""
 76 | 
 77 |     # tell the compiler it can processes .cu
 78 |     self.src_extensions.append('.cu')
 79 | 
 80 |     # save references to the default compiler_so and _comple methods
 81 |     default_compiler_so = self.compiler_so
 82 |     super = self._compile
 83 | 
 84 |     # now redefine the _compile method. This gets executed for each
 85 |     # object but distutils doesn't have the ability to change compilers
 86 |     # based on source extension: we add it.
 87 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 88 |         if os.path.splitext(src)[1] == '.cu':
 89 |             # use the cuda for .cu files
 90 |             self.set_executable('compiler_so', CUDA['nvcc'])
 91 |             # use only a subset of the extra_postargs, which are 1-1 translated
 92 |             # from the extra_compile_args in the Extension class
 93 |             postargs = extra_postargs['nvcc']
 94 |         else:
 95 |             postargs = extra_postargs['gcc']
 96 | 
 97 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 98 |         # reset the default compiler_so, which we might have changed for cuda
 99 |         self.compiler_so = default_compiler_so
100 | 
101 |     # inject our redefined _compile method into the class
102 |     self._compile = _compile
103 | 
104 | 
105 | # run the customize_compiler
106 | class custom_build_ext(build_ext):
107 |     def build_extensions(self):
108 |         customize_compiler_for_nvcc(self.compiler)
109 |         build_ext.build_extensions(self)
110 | 
111 | 
112 | ext_modules = [
113 |     Extension(
114 |         "utils.cython_bbox",
115 |         ["utils/bbox.pyx"],
116 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
117 |         include_dirs = [numpy_include]
118 |     ),
119 |     Extension(
120 |         "nms.cpu_nms",
121 |         ["nms/cpu_nms.pyx"],
122 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
123 |         include_dirs = [numpy_include]
124 |     ),
125 |     Extension('nms.gpu_nms',
126 |         ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
127 |         library_dirs=[CUDA['lib64']],
128 |         libraries=['cudart'],
129 |         language='c++',
130 |         runtime_library_dirs=[CUDA['lib64']],
131 |         # this syntax is specific to this build system
132 |         # we're only going to use certain compiler args with nvcc and not with
133 |         # gcc the implementation of this trick is in customize_compiler() below
134 |         extra_compile_args={'gcc': ["-Wno-unused-function"],
135 |                             'nvcc': ['-arch=sm_35',
136 |                                      '--ptxas-options=-v',
137 |                                      '-c',
138 |                                      '--compiler-options',
139 |                                      "'-fPIC'"]},
140 |         include_dirs = [numpy_include, CUDA['include']]
141 |     ),
142 |     Extension(
143 |         'pycocotools._mask',
144 |         sources=['pycocotools/maskApi.c', 'pycocotools/_mask.pyx'],
145 |         include_dirs = [numpy_include, 'pycocotools'],
146 |         extra_compile_args={
147 |             'gcc': ['-Wno-cpp', '-Wno-unused-function', '-std=c99']},
148 |     ),
149 | ]
150 | 
151 | setup(
152 |     name='fast_rcnn',
153 |     ext_modules=ext_modules,
154 |     # inject our custom trigger
155 |     cmdclass={'build_ext': custom_build_ext},
156 | )
157 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Adversarial Fast-RCNN (A-FAST-RCNN)
  2 | 
  3 | Copyright (c) 2017, Xiaolong Wang
  4 | 
  5 | The MIT License (MIT)
  6 | 
  7 | Permission is hereby granted, free of charge, to any person obtaining a copy
  8 | of this software and associated documentation files (the "Software"), to deal
  9 | in the Software without restriction, including without limitation the rights
 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 | copies of the Software, and to permit persons to whom the Software is
 12 | furnished to do so, subject to the following conditions:
 13 | 
 14 | The above copyright notice and this permission notice shall be included in
 15 | all copies or substantial portions of the Software.
 16 | 
 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 23 | THE SOFTWARE.
 24 | 
 25 | ************************************************************************
 26 | 
 27 | THIRD-PARTY SOFTWARE NOTICES AND INFORMATION
 28 | 
 29 | This project, A-FAST-RCNN, incorporates material from the project(s)
 30 | listed below (collectively, "Third Party Code"). The original copyright 
 31 | notice and license of such Third Party Code are set out below. This 
 32 | Third Party Code is licensed to you under their original license terms 
 33 | set forth below.
 34 | 
 35 | 1. Fast R-CNN (https://github.com/rbgirshick/fast-rcnn)
 36 | 
 37 | Copyright (c) Microsoft Corporation
 38 | 
 39 | All rights reserved.
 40 | 
 41 | MIT License
 42 | 
 43 | Permission is hereby granted, free of charge, to any person obtaining a
 44 | copy of this software and associated documentation files (the "Software"),
 45 | to deal in the Software without restriction, including without limitation
 46 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
 47 | and/or sell copies of the Software, and to permit persons to whom the
 48 | Software is furnished to do so, subject to the following conditions:
 49 | 
 50 | The above copyright notice and this permission notice shall be included
 51 | in all copies or substantial portions of the Software.
 52 | 
 53 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 54 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 55 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 56 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 57 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 58 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 59 | OTHER DEALINGS IN THE SOFTWARE.
 60 | 
 61 | 
 62 | 2. Faster R-CNN (https://github.com/rbgirshick/py-faster-rcnn)
 63 | 
 64 | The MIT License (MIT)
 65 | 
 66 | Copyright (c) 2015 Microsoft Corporation
 67 | 
 68 | Permission is hereby granted, free of charge, to any person obtaining a copy
 69 | of this software and associated documentation files (the "Software"), to deal
 70 | in the Software without restriction, including without limitation the rights
 71 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 72 | copies of the Software, and to permit persons to whom the Software is
 73 | furnished to do so, subject to the following conditions:
 74 | 
 75 | The above copyright notice and this permission notice shall be included in
 76 | all copies or substantial portions of the Software.
 77 | 
 78 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 79 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 80 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 81 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 82 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 83 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 84 | THE SOFTWARE.
 85 | 
 86 | 3.	Caffe, (https://github.com/BVLC/caffe/)
 87 | 
 88 | COPYRIGHT
 89 | 
 90 | All contributions by the University of California:
 91 | Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
 92 | All rights reserved.
 93 | 
 94 | All other contributions:
 95 | Copyright (c) 2014, 2015, the respective contributors
 96 | All rights reserved.
 97 | 
 98 | Caffe uses a shared copyright model: each contributor holds copyright
 99 | over their contributions to Caffe. The project versioning records all
100 | such contribution and copyright details. If a contributor wants to
101 | further mark their specific copyright on a particular contribution,
102 | they should indicate their copyright solely in the commit message of
103 | the change when it is committed.
104 | 
105 | The BSD 2-Clause License
106 | 
107 | Redistribution and use in source and binary forms, with or without
108 | modification, are permitted provided that the following conditions
109 | are met:
110 | 
111 | 1. Redistributions of source code must retain the above copyright notice,
112 | this list of conditions and the following disclaimer.
113 | 
114 | 2. Redistributions in binary form must reproduce the above copyright
115 | notice, this list of conditions and the following disclaimer in the
116 | documentation and/or other materials provided with the distribution.
117 | 
118 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
119 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
120 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
121 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
122 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
123 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
124 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
125 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
126 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
127 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
128 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
129 | 
130 | ************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION**********
131 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/train.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Train a Fast R-CNN network."""
  9 | 
 10 | import caffe
 11 | from fast_rcnn.config import cfg
 12 | import roi_data_layer.roidb as rdl_roidb
 13 | from utils.timer import Timer
 14 | import numpy as np
 15 | import os
 16 | 
 17 | from caffe.proto import caffe_pb2
 18 | import google.protobuf as pb2
 19 | import google.protobuf.text_format
 20 | 
 21 | class SolverWrapper(object):
 22 |     """A simple wrapper around Caffe's solver.
 23 |     This wrapper gives us control over he snapshotting process, which we
 24 |     use to unnormalize the learned bounding-box regression weights.
 25 |     """
 26 | 
 27 |     def __init__(self, solver_prototxt, roidb, output_dir,
 28 |                  pretrained_model=None):
 29 |         """Initialize the SolverWrapper."""
 30 |         self.output_dir = output_dir
 31 | 
 32 |         if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and
 33 |             cfg.TRAIN.BBOX_NORMALIZE_TARGETS):
 34 |             # RPN can only use precomputed normalization because there are no
 35 |             # fixed statistics to compute a priori
 36 |             assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED
 37 | 
 38 |         if cfg.TRAIN.BBOX_REG:
 39 |             print 'Computing bounding-box regression targets...'
 40 |             self.bbox_means, self.bbox_stds = \
 41 |                     rdl_roidb.add_bbox_regression_targets(roidb)
 42 |             print 'done'
 43 | 
 44 |         self.solver = caffe.SGDSolver(solver_prototxt)
 45 |         if pretrained_model is not None:
 46 |             print ('Loading pretrained model '
 47 |                    'weights from {:s}').format(pretrained_model)
 48 |             self.solver.net.copy_from(pretrained_model)
 49 | 
 50 |         self.solver_param = caffe_pb2.SolverParameter()
 51 |         with open(solver_prototxt, 'rt') as f:
 52 |             pb2.text_format.Merge(f.read(), self.solver_param)
 53 | 
 54 |         self.solver.net.layers[0].set_roidb(roidb)
 55 | 
 56 |     def snapshot(self):
 57 |         """Take a snapshot of the network after unnormalizing the learned
 58 |         bounding-box regression weights. This enables easy use at test-time.
 59 |         """
 60 |         net = self.solver.net
 61 | 
 62 |         scale_bbox_params = (cfg.TRAIN.BBOX_REG and
 63 |                              cfg.TRAIN.BBOX_NORMALIZE_TARGETS and
 64 |                              net.params.has_key('bbox_pred'))
 65 | 
 66 |         if scale_bbox_params:
 67 |             # save original values
 68 |             orig_0 = net.params['bbox_pred'][0].data.copy()
 69 |             orig_1 = net.params['bbox_pred'][1].data.copy()
 70 | 
 71 |             # scale and shift with bbox reg unnormalization; then save snapshot
 72 |             net.params['bbox_pred'][0].data[...] = \
 73 |                     (net.params['bbox_pred'][0].data *
 74 |                      self.bbox_stds[:, np.newaxis])
 75 |             net.params['bbox_pred'][1].data[...] = \
 76 |                     (net.params['bbox_pred'][1].data *
 77 |                      self.bbox_stds + self.bbox_means)
 78 | 
 79 |         infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX
 80 |                  if cfg.TRAIN.SNAPSHOT_INFIX != '' else '')
 81 |         filename = (self.solver_param.snapshot_prefix + infix +
 82 |                     '_iter_{:d}'.format(self.solver.iter) + '.caffemodel')
 83 |         filename = os.path.join(self.output_dir, filename)
 84 | 
 85 |         net.save(str(filename))
 86 |         print 'Wrote snapshot to: {:s}'.format(filename)
 87 | 
 88 |         if scale_bbox_params:
 89 |             # restore net to original state
 90 |             net.params['bbox_pred'][0].data[...] = orig_0
 91 |             net.params['bbox_pred'][1].data[...] = orig_1
 92 |         return filename
 93 | 
 94 |     def train_model(self, max_iters):
 95 |         """Network training loop."""
 96 |         last_snapshot_iter = -1
 97 |         timer = Timer()
 98 |         model_paths = []
 99 |         while self.solver.iter < max_iters:
100 |             # Make one SGD update
101 |             timer.tic()
102 |             self.solver.step(1)
103 |             timer.toc()
104 |             if self.solver.iter % (10 * self.solver_param.display) == 0:
105 |                 print 'speed: {:.3f}s / iter'.format(timer.average_time)
106 | 
107 |             if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0:
108 |                 last_snapshot_iter = self.solver.iter
109 |                 model_paths.append(self.snapshot())
110 | 
111 |         if last_snapshot_iter != self.solver.iter:
112 |             model_paths.append(self.snapshot())
113 |         return model_paths
114 | 
115 | def get_training_roidb(imdb):
116 |     """Returns a roidb (Region of Interest database) for use in training."""
117 |     if cfg.TRAIN.USE_FLIPPED:
118 |         print 'Appending horizontally-flipped training examples...'
119 |         imdb.append_flipped_images()
120 |         print 'done'
121 | 
122 |     print 'Preparing training data...'
123 |     rdl_roidb.prepare_roidb(imdb)
124 |     print 'done'
125 | 
126 |     return imdb.roidb
127 | 
128 | def filter_roidb(roidb):
129 |     """Remove roidb entries that have no usable RoIs."""
130 | 
131 |     def is_valid(entry):
132 |         # Valid images have:
133 |         #   (1) At least one foreground RoI OR
134 |         #   (2) At least one background RoI
135 |         overlaps = entry['max_overlaps']
136 |         # find boxes with sufficient overlap
137 |         fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
138 |         # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
139 |         bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
140 |                            (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
141 |         # image is only valid if such boxes exist
142 |         valid = len(fg_inds) > 0 or len(bg_inds) > 0
143 |         return valid
144 | 
145 |     num = len(roidb)
146 |     filtered_roidb = [entry for entry in roidb if is_valid(entry)]
147 |     num_after = len(filtered_roidb)
148 |     print 'Filtered {} roidb entries: {} -> {}'.format(num - num_after,
149 |                                                        num, num_after)
150 |     return filtered_roidb
151 | 
152 | def train_net(solver_prototxt, roidb, output_dir,
153 |               pretrained_model=None, max_iters=40000):
154 |     """Train a Fast R-CNN network."""
155 | 
156 |     roidb = filter_roidb(roidb)
157 |     sw = SolverWrapper(solver_prototxt, roidb, output_dir,
158 |                        pretrained_model=pretrained_model)
159 | 
160 |     print 'Solving...'
161 |     model_paths = sw.train_model(max_iters)
162 |     print 'done solving'
163 |     return model_paths
164 | 


--------------------------------------------------------------------------------
/lib/rpn/proposal_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import caffe
  9 | import numpy as np
 10 | import yaml
 11 | from fast_rcnn.config import cfg
 12 | from generate_anchors import generate_anchors
 13 | from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes
 14 | from fast_rcnn.nms_wrapper import nms
 15 | 
 16 | DEBUG = False
 17 | 
 18 | class ProposalLayer(caffe.Layer):
 19 |     """
 20 |     Outputs object detection proposals by applying estimated bounding-box
 21 |     transformations to a set of regular boxes (called "anchors").
 22 |     """
 23 | 
 24 |     def setup(self, bottom, top):
 25 |         # parse the layer parameter string, which must be valid YAML
 26 |         layer_params = yaml.load(self.param_str_)
 27 | 
 28 |         self._feat_stride = layer_params['feat_stride']
 29 |         anchor_scales = layer_params.get('scales', (8, 16, 32))
 30 |         self._anchors = generate_anchors(scales=np.array(anchor_scales))
 31 |         self._num_anchors = self._anchors.shape[0]
 32 | 
 33 |         if DEBUG:
 34 |             print 'feat_stride: {}'.format(self._feat_stride)
 35 |             print 'anchors:'
 36 |             print self._anchors
 37 | 
 38 |         # rois blob: holds R regions of interest, each is a 5-tuple
 39 |         # (n, x1, y1, x2, y2) specifying an image batch index n and a
 40 |         # rectangle (x1, y1, x2, y2)
 41 |         top[0].reshape(1, 5)
 42 | 
 43 |         # scores blob: holds scores for R regions of interest
 44 |         if len(top) > 1:
 45 |             top[1].reshape(1, 1, 1, 1)
 46 | 
 47 |     def forward(self, bottom, top):
 48 |         # Algorithm:
 49 |         #
 50 |         # for each (H, W) location i
 51 |         #   generate A anchor boxes centered on cell i
 52 |         #   apply predicted bbox deltas at cell i to each of the A anchors
 53 |         # clip predicted boxes to image
 54 |         # remove predicted boxes with either height or width < threshold
 55 |         # sort all (proposal, score) pairs by score from highest to lowest
 56 |         # take top pre_nms_topN proposals before NMS
 57 |         # apply NMS with threshold 0.7 to remaining proposals
 58 |         # take after_nms_topN proposals after NMS
 59 |         # return the top proposals (-> RoIs top, scores top)
 60 | 
 61 |         assert bottom[0].data.shape[0] == 1, \
 62 |             'Only single item batches are supported'
 63 | 
 64 |         cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
 65 |         pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
 66 |         post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
 67 |         nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
 68 |         min_size      = cfg[cfg_key].RPN_MIN_SIZE
 69 | 
 70 |         # the first set of _num_anchors channels are bg probs
 71 |         # the second set are the fg probs, which we want
 72 |         scores = bottom[0].data[:, self._num_anchors:, :, :]
 73 |         bbox_deltas = bottom[1].data
 74 |         im_info = bottom[2].data[0, :]
 75 | 
 76 |         if DEBUG:
 77 |             print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
 78 |             print 'scale: {}'.format(im_info[2])
 79 | 
 80 |         # 1. Generate proposals from bbox deltas and shifted anchors
 81 |         height, width = scores.shape[-2:]
 82 | 
 83 |         if DEBUG:
 84 |             print 'score map size: {}'.format(scores.shape)
 85 | 
 86 |         # Enumerate all shifts
 87 |         shift_x = np.arange(0, width) * self._feat_stride
 88 |         shift_y = np.arange(0, height) * self._feat_stride
 89 |         shift_x, shift_y = np.meshgrid(shift_x, shift_y)
 90 |         shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
 91 |                             shift_x.ravel(), shift_y.ravel())).transpose()
 92 | 
 93 |         # Enumerate all shifted anchors:
 94 |         #
 95 |         # add A anchors (1, A, 4) to
 96 |         # cell K shifts (K, 1, 4) to get
 97 |         # shift anchors (K, A, 4)
 98 |         # reshape to (K*A, 4) shifted anchors
 99 |         A = self._num_anchors
100 |         K = shifts.shape[0]
101 |         anchors = self._anchors.reshape((1, A, 4)) + \
102 |                   shifts.reshape((1, K, 4)).transpose((1, 0, 2))
103 |         anchors = anchors.reshape((K * A, 4))
104 | 
105 |         # Transpose and reshape predicted bbox transformations to get them
106 |         # into the same order as the anchors:
107 |         #
108 |         # bbox deltas will be (1, 4 * A, H, W) format
109 |         # transpose to (1, H, W, 4 * A)
110 |         # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
111 |         # in slowest to fastest order
112 |         bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
113 | 
114 |         # Same story for the scores:
115 |         #
116 |         # scores are (1, A, H, W) format
117 |         # transpose to (1, H, W, A)
118 |         # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
119 |         scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
120 | 
121 |         # Convert anchors into proposals via bbox transformations
122 |         proposals = bbox_transform_inv(anchors, bbox_deltas)
123 | 
124 |         # 2. clip predicted boxes to image
125 |         proposals = clip_boxes(proposals, im_info[:2])
126 | 
127 |         # 3. remove predicted boxes with either height or width < threshold
128 |         # (NOTE: convert min_size to input image scale stored in im_info[2])
129 |         keep = _filter_boxes(proposals, min_size * im_info[2])
130 |         proposals = proposals[keep, :]
131 |         scores = scores[keep]
132 | 
133 |         # 4. sort all (proposal, score) pairs by score from highest to lowest
134 |         # 5. take top pre_nms_topN (e.g. 6000)
135 |         order = scores.ravel().argsort()[::-1]
136 |         if pre_nms_topN > 0:
137 |             order = order[:pre_nms_topN]
138 |         proposals = proposals[order, :]
139 |         scores = scores[order]
140 | 
141 |         # 6. apply nms (e.g. threshold = 0.7)
142 |         # 7. take after_nms_topN (e.g. 300)
143 |         # 8. return the top proposals (-> RoIs top)
144 |         keep = nms(np.hstack((proposals, scores)), nms_thresh)
145 |         if post_nms_topN > 0:
146 |             keep = keep[:post_nms_topN]
147 |         proposals = proposals[keep, :]
148 |         scores = scores[keep]
149 | 
150 |         # Output rois blob
151 |         # Our RPN implementation only supports a single input image, so all
152 |         # batch inds are 0
153 |         batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
154 |         blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
155 |         top[0].reshape(*(blob.shape))
156 |         top[0].data[...] = blob
157 | 
158 |         # [Optional] output scores blob
159 |         if len(top) > 1:
160 |             top[1].reshape(*(scores.shape))
161 |             top[1].data[...] = scores
162 | 
163 |     def backward(self, top, propagate_down, bottom):
164 |         """This layer does not propagate gradients."""
165 |         pass
166 | 
167 |     def reshape(self, bottom, top):
168 |         """Reshaping happens during the call to forward."""
169 |         pass
170 | 
171 | def _filter_boxes(boxes, min_size):
172 |     """Remove all boxes with any side smaller than min_size."""
173 |     ws = boxes[:, 2] - boxes[:, 0] + 1
174 |     hs = boxes[:, 3] - boxes[:, 1] + 1
175 |     keep = np.where((ws >= min_size) & (hs >= min_size))[0]
176 |     return keep
177 | 


--------------------------------------------------------------------------------
/lib/datasets/voc_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | 
  7 | import xml.etree.ElementTree as ET
  8 | import os
  9 | import cPickle
 10 | import numpy as np
 11 | 
 12 | def parse_rec(filename):
 13 |     """ Parse a PASCAL VOC xml file """
 14 |     tree = ET.parse(filename)
 15 |     objects = []
 16 |     for obj in tree.findall('object'):
 17 |         obj_struct = {}
 18 |         obj_struct['name'] = obj.find('name').text
 19 |         obj_struct['pose'] = obj.find('pose').text
 20 |         obj_struct['truncated'] = int(obj.find('truncated').text)
 21 |         obj_struct['difficult'] = int(obj.find('difficult').text)
 22 |         bbox = obj.find('bndbox')
 23 |         obj_struct['bbox'] = [int(bbox.find('xmin').text),
 24 |                               int(bbox.find('ymin').text),
 25 |                               int(bbox.find('xmax').text),
 26 |                               int(bbox.find('ymax').text)]
 27 |         objects.append(obj_struct)
 28 | 
 29 |     return objects
 30 | 
 31 | def voc_ap(rec, prec, use_07_metric=False):
 32 |     """ ap = voc_ap(rec, prec, [use_07_metric])
 33 |     Compute VOC AP given precision and recall.
 34 |     If use_07_metric is true, uses the
 35 |     VOC 07 11 point method (default:False).
 36 |     """
 37 |     if use_07_metric:
 38 |         # 11 point metric
 39 |         ap = 0.
 40 |         for t in np.arange(0., 1.1, 0.1):
 41 |             if np.sum(rec >= t) == 0:
 42 |                 p = 0
 43 |             else:
 44 |                 p = np.max(prec[rec >= t])
 45 |             ap = ap + p / 11.
 46 |     else:
 47 |         # correct AP calculation
 48 |         # first append sentinel values at the end
 49 |         mrec = np.concatenate(([0.], rec, [1.]))
 50 |         mpre = np.concatenate(([0.], prec, [0.]))
 51 | 
 52 |         # compute the precision envelope
 53 |         for i in range(mpre.size - 1, 0, -1):
 54 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 55 | 
 56 |         # to calculate area under PR curve, look for points
 57 |         # where X axis (recall) changes value
 58 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 59 | 
 60 |         # and sum (\Delta recall) * prec
 61 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 62 |     return ap
 63 | 
 64 | def voc_eval(detpath,
 65 |              annopath,
 66 |              imagesetfile,
 67 |              classname,
 68 |              cachedir,
 69 |              ovthresh=0.5,
 70 |              use_07_metric=False):
 71 |     """rec, prec, ap = voc_eval(detpath,
 72 |                                 annopath,
 73 |                                 imagesetfile,
 74 |                                 classname,
 75 |                                 [ovthresh],
 76 |                                 [use_07_metric])
 77 | 
 78 |     Top level function that does the PASCAL VOC evaluation.
 79 | 
 80 |     detpath: Path to detections
 81 |         detpath.format(classname) should produce the detection results file.
 82 |     annopath: Path to annotations
 83 |         annopath.format(imagename) should be the xml annotations file.
 84 |     imagesetfile: Text file containing the list of images, one image per line.
 85 |     classname: Category name (duh)
 86 |     cachedir: Directory for caching the annotations
 87 |     [ovthresh]: Overlap threshold (default = 0.5)
 88 |     [use_07_metric]: Whether to use VOC07's 11 point AP computation
 89 |         (default False)
 90 |     """
 91 |     # assumes detections are in detpath.format(classname)
 92 |     # assumes annotations are in annopath.format(imagename)
 93 |     # assumes imagesetfile is a text file with each line an image name
 94 |     # cachedir caches the annotations in a pickle file
 95 | 
 96 |     # first load gt
 97 |     if not os.path.isdir(cachedir):
 98 |         os.mkdir(cachedir)
 99 |     cachefile = os.path.join(cachedir, 'annots.pkl')
100 |     # read list of images
101 |     with open(imagesetfile, 'r') as f:
102 |         lines = f.readlines()
103 |     imagenames = [x.strip() for x in lines]
104 | 
105 |     if not os.path.isfile(cachefile):
106 |         # load annots
107 |         recs = {}
108 |         for i, imagename in enumerate(imagenames):
109 |             recs[imagename] = parse_rec(annopath.format(imagename))
110 |             if i % 100 == 0:
111 |                 print 'Reading annotation for {:d}/{:d}'.format(
112 |                     i + 1, len(imagenames))
113 |         # save
114 |         print 'Saving cached annotations to {:s}'.format(cachefile)
115 |         with open(cachefile, 'w') as f:
116 |             cPickle.dump(recs, f)
117 |     else:
118 |         # load
119 |         with open(cachefile, 'r') as f:
120 |             recs = cPickle.load(f)
121 | 
122 |     # extract gt objects for this class
123 |     class_recs = {}
124 |     npos = 0
125 |     for imagename in imagenames:
126 |         R = [obj for obj in recs[imagename] if obj['name'] == classname]
127 |         bbox = np.array([x['bbox'] for x in R])
128 |         difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
129 |         det = [False] * len(R)
130 |         npos = npos + sum(~difficult)
131 |         class_recs[imagename] = {'bbox': bbox,
132 |                                  'difficult': difficult,
133 |                                  'det': det}
134 | 
135 |     # read dets
136 |     detfile = detpath.format(classname)
137 |     with open(detfile, 'r') as f:
138 |         lines = f.readlines()
139 | 
140 |     splitlines = [x.strip().split(' ') for x in lines]
141 |     image_ids = [x[0] for x in splitlines]
142 |     confidence = np.array([float(x[1]) for x in splitlines])
143 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
144 | 
145 |     # sort by confidence
146 |     sorted_ind = np.argsort(-confidence)
147 |     sorted_scores = np.sort(-confidence)
148 |     BB = BB[sorted_ind, :]
149 |     image_ids = [image_ids[x] for x in sorted_ind]
150 | 
151 |     # go down dets and mark TPs and FPs
152 |     nd = len(image_ids)
153 |     tp = np.zeros(nd)
154 |     fp = np.zeros(nd)
155 |     for d in range(nd):
156 |         R = class_recs[image_ids[d]]
157 |         bb = BB[d, :].astype(float)
158 |         ovmax = -np.inf
159 |         BBGT = R['bbox'].astype(float)
160 | 
161 |         if BBGT.size > 0:
162 |             # compute overlaps
163 |             # intersection
164 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
165 |             iymin = np.maximum(BBGT[:, 1], bb[1])
166 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
167 |             iymax = np.minimum(BBGT[:, 3], bb[3])
168 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
169 |             ih = np.maximum(iymax - iymin + 1., 0.)
170 |             inters = iw * ih
171 | 
172 |             # union
173 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
174 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
175 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
176 | 
177 |             overlaps = inters / uni
178 |             ovmax = np.max(overlaps)
179 |             jmax = np.argmax(overlaps)
180 | 
181 |         if ovmax > ovthresh:
182 |             if not R['difficult'][jmax]:
183 |                 if not R['det'][jmax]:
184 |                     tp[d] = 1.
185 |                     R['det'][jmax] = 1
186 |                 else:
187 |                     fp[d] = 1.
188 |         else:
189 |             fp[d] = 1.
190 | 
191 |     # compute precision recall
192 |     fp = np.cumsum(fp)
193 |     tp = np.cumsum(tp)
194 |     rec = tp / float(npos)
195 |     # avoid divide by zero in case the first detection matches a difficult
196 |     # ground truth
197 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
198 |     ap = voc_ap(rec, prec, use_07_metric)
199 | 
200 |     return rec, prec, ap
201 | 


--------------------------------------------------------------------------------
/lib/rpn/proposal_target_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import caffe
  9 | import yaml
 10 | import numpy as np
 11 | import numpy.random as npr
 12 | from fast_rcnn.config import cfg
 13 | from fast_rcnn.bbox_transform import bbox_transform
 14 | from utils.cython_bbox import bbox_overlaps
 15 | 
 16 | DEBUG = False
 17 | 
 18 | class ProposalTargetLayer(caffe.Layer):
 19 |     """
 20 |     Assign object detection proposals to ground-truth targets. Produces proposal
 21 |     classification labels and bounding-box regression targets.
 22 |     """
 23 | 
 24 |     def setup(self, bottom, top):
 25 |         layer_params = yaml.load(self.param_str_)
 26 |         self._num_classes = layer_params['num_classes']
 27 | 
 28 |         # sampled rois (0, x1, y1, x2, y2)
 29 |         top[0].reshape(1, 5)
 30 |         # labels
 31 |         top[1].reshape(1, 1)
 32 |         # bbox_targets
 33 |         top[2].reshape(1, self._num_classes * 4)
 34 |         # bbox_inside_weights
 35 |         top[3].reshape(1, self._num_classes * 4)
 36 |         # bbox_outside_weights
 37 |         top[4].reshape(1, self._num_classes * 4)
 38 | 
 39 |     def forward(self, bottom, top):
 40 |         # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
 41 |         # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
 42 |         all_rois = bottom[0].data
 43 |         # GT boxes (x1, y1, x2, y2, label)
 44 |         # TODO(rbg): it's annoying that sometimes I have extra info before
 45 |         # and other times after box coordinates -- normalize to one format
 46 |         gt_boxes = bottom[1].data
 47 | 
 48 |         # Include ground-truth boxes in the set of candidate rois
 49 |         zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
 50 |         all_rois = np.vstack(
 51 |             (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
 52 |         )
 53 | 
 54 |         # Sanity check: single batch only
 55 |         assert np.all(all_rois[:, 0] == 0), \
 56 |                 'Only single item batches are supported'
 57 | 
 58 |         num_images = 1
 59 |         rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
 60 |         fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
 61 | 
 62 |         # Sample rois with classification labels and bounding box regression
 63 |         # targets
 64 |         labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
 65 |             all_rois, gt_boxes, fg_rois_per_image,
 66 |             rois_per_image, self._num_classes)
 67 | 
 68 |         if DEBUG:
 69 |             print 'num fg: {}'.format((labels > 0).sum())
 70 |             print 'num bg: {}'.format((labels == 0).sum())
 71 |             self._count += 1
 72 |             self._fg_num += (labels > 0).sum()
 73 |             self._bg_num += (labels == 0).sum()
 74 |             print 'num fg avg: {}'.format(self._fg_num / self._count)
 75 |             print 'num bg avg: {}'.format(self._bg_num / self._count)
 76 |             print 'ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num))
 77 | 
 78 |         # sampled rois
 79 |         top[0].reshape(*rois.shape)
 80 |         top[0].data[...] = rois
 81 | 
 82 |         # classification labels
 83 |         top[1].reshape(*labels.shape)
 84 |         top[1].data[...] = labels
 85 | 
 86 |         # bbox_targets
 87 |         top[2].reshape(*bbox_targets.shape)
 88 |         top[2].data[...] = bbox_targets
 89 | 
 90 |         # bbox_inside_weights
 91 |         top[3].reshape(*bbox_inside_weights.shape)
 92 |         top[3].data[...] = bbox_inside_weights
 93 | 
 94 |         # bbox_outside_weights
 95 |         top[4].reshape(*bbox_inside_weights.shape)
 96 |         top[4].data[...] = np.array(bbox_inside_weights > 0).astype(np.float32)
 97 | 
 98 |     def backward(self, top, propagate_down, bottom):
 99 |         """This layer does not propagate gradients."""
100 |         pass
101 | 
102 |     def reshape(self, bottom, top):
103 |         """Reshaping happens during the call to forward."""
104 |         pass
105 | 
106 | 
107 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
108 |     """Bounding-box regression targets (bbox_target_data) are stored in a
109 |     compact form N x (class, tx, ty, tw, th)
110 | 
111 |     This function expands those targets into the 4-of-4*K representation used
112 |     by the network (i.e. only one class has non-zero targets).
113 | 
114 |     Returns:
115 |         bbox_target (ndarray): N x 4K blob of regression targets
116 |         bbox_inside_weights (ndarray): N x 4K blob of loss weights
117 |     """
118 | 
119 |     clss = bbox_target_data[:, 0]
120 |     bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
121 |     bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
122 |     inds = np.where(clss > 0)[0]
123 |     for ind in inds:
124 |         cls = clss[ind]
125 |         start = 4 * cls
126 |         end = start + 4
127 |         bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
128 |         bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
129 |     return bbox_targets, bbox_inside_weights
130 | 
131 | 
132 | def _compute_targets(ex_rois, gt_rois, labels):
133 |     """Compute bounding-box regression targets for an image."""
134 | 
135 |     assert ex_rois.shape[0] == gt_rois.shape[0]
136 |     assert ex_rois.shape[1] == 4
137 |     assert gt_rois.shape[1] == 4
138 | 
139 |     targets = bbox_transform(ex_rois, gt_rois)
140 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
141 |         # Optionally normalize targets by a precomputed mean and stdev
142 |         targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
143 |                 / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
144 |     return np.hstack(
145 |             (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
146 | 
147 | def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
148 |     """Generate a random sample of RoIs comprising foreground and background
149 |     examples.
150 |     """
151 |     # overlaps: (rois x gt_boxes)
152 |     overlaps = bbox_overlaps(
153 |         np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
154 |         np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
155 |     gt_assignment = overlaps.argmax(axis=1)
156 |     max_overlaps = overlaps.max(axis=1)
157 |     labels = gt_boxes[gt_assignment, 4]
158 | 
159 |     # Select foreground RoIs as those with >= FG_THRESH overlap
160 |     fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
161 |     # Guard against the case when an image has fewer than fg_rois_per_image
162 |     # foreground RoIs
163 |     fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
164 |     # Sample foreground regions without replacement
165 |     if fg_inds.size > 0:
166 |         fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
167 | 
168 |     # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
169 |     bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
170 |                        (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
171 |     # Compute number of background RoIs to take from this image (guarding
172 |     # against there being fewer than desired)
173 |     bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
174 |     bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
175 |     # Sample background regions without replacement
176 |     if bg_inds.size > 0:
177 |         bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
178 | 
179 |     # The indices that we're selecting (both fg and bg)
180 |     keep_inds = np.append(fg_inds, bg_inds)
181 |     # Select sampled values from various arrays:
182 |     labels = labels[keep_inds]
183 |     # Clamp labels for the background RoIs to 0
184 |     labels[fg_rois_per_this_image:] = 0
185 |     rois = all_rois[keep_inds]
186 | 
187 |     bbox_target_data = _compute_targets(
188 |         rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
189 | 
190 |     bbox_targets, bbox_inside_weights = \
191 |         _get_bbox_regression_labels(bbox_target_data, num_classes)
192 | 
193 |     return labels, rois, bbox_targets, bbox_inside_weights
194 | 


--------------------------------------------------------------------------------
/lib/pycocotools/maskApi.c:
--------------------------------------------------------------------------------
  1 | /**************************************************************************
  2 | * Microsoft COCO Toolbox.      version 2.0
  3 | * Data, paper, and tutorials available at:  http://mscoco.org/
  4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  5 | * Licensed under the Simplified BSD License [see coco/license.txt]
  6 | **************************************************************************/
  7 | #include "maskApi.h"
  8 | #include <math.h>
  9 | #include <stdlib.h>
 10 | 
 11 | uint umin( uint a, uint b ) { return (a<b) ? a : b; }
 12 | uint umax( uint a, uint b ) { return (a>b) ? a : b; }
 13 | 
 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) {
 15 |   R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m);
 16 |   if(cnts) for(siz j=0; j<m; j++) R->cnts[j]=cnts[j];
 17 | }
 18 | 
 19 | void rleFree( RLE *R ) {
 20 |   free(R->cnts); R->cnts=0;
 21 | }
 22 | 
 23 | void rlesInit( RLE **R, siz n ) {
 24 |   *R = (RLE*) malloc(sizeof(RLE)*n);
 25 |   for(siz i=0; i<n; i++) rleInit((*R)+i,0,0,0,0);
 26 | }
 27 | 
 28 | void rlesFree( RLE **R, siz n ) {
 29 |   for(siz i=0; i<n; i++) rleFree((*R)+i); free(*R); *R=0;
 30 | }
 31 | 
 32 | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) {
 33 |   siz i, j, k, a=w*h; uint c, *cnts; byte p;
 34 |   cnts = malloc(sizeof(uint)*(a+1));
 35 |   for(i=0; i<n; i++) {
 36 |     const byte *T=M+a*i; k=0; p=0; c=0;
 37 |     for(j=0; j<a; j++) { if(T[j]!=p) { cnts[k++]=c; c=0; p=T[j]; } c++; }
 38 |     cnts[k++]=c; rleInit(R+i,h,w,k,cnts);
 39 |   }
 40 |   free(cnts);
 41 | }
 42 | 
 43 | void rleDecode( const RLE *R, byte *M, siz n ) {
 44 |   for( siz i=0; i<n; i++ ) {
 45 |     byte v=0; for( siz j=0; j<R[i].m; j++ ) {
 46 |       for( siz k=0; k<R[i].cnts[j]; k++ ) *(M++)=v; v=!v; }}
 47 | }
 48 | 
 49 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ) {
 50 |   uint *cnts, c, ca, cb, cc, ct; bool v, va, vb, vp;
 51 |   siz i, a, b, h=R[0].h, w=R[0].w, m=R[0].m; RLE A, B;
 52 |   if(n==0) { rleInit(M,0,0,0,0); return; }
 53 |   if(n==1) { rleInit(M,h,w,m,R[0].cnts); return; }
 54 |   cnts = malloc(sizeof(uint)*(h*w+1));
 55 |   for( a=0; a<m; a++ ) cnts[a]=R[0].cnts[a];
 56 |   for( i=1; i<n; i++ ) {
 57 |     B=R[i]; if(B.h!=h||B.w!=w) { h=w=m=0; break; }
 58 |     rleInit(&A,h,w,m,cnts); ca=A.cnts[0]; cb=B.cnts[0];
 59 |     v=va=vb=0; m=0; a=b=1; cc=0; ct=1;
 60 |     while( ct>0 ) {
 61 |       c=umin(ca,cb); cc+=c; ct=0;
 62 |       ca-=c; if(!ca && a<A.m) { ca=A.cnts[a++]; va=!va; } ct+=ca;
 63 |       cb-=c; if(!cb && b<B.m) { cb=B.cnts[b++]; vb=!vb; } ct+=cb;
 64 |       vp=v; if(intersect) v=va&&vb; else v=va||vb;
 65 |       if( v!=vp||ct==0 ) { cnts[m++]=cc; cc=0; }
 66 |     }
 67 |     rleFree(&A);
 68 |   }
 69 |   rleInit(M,h,w,m,cnts); free(cnts);
 70 | }
 71 | 
 72 | void rleArea( const RLE *R, siz n, uint *a ) {
 73 |   for( siz i=0; i<n; i++ ) {
 74 |     a[i]=0; for( siz j=1; j<R[i].m; j+=2 ) a[i]+=R[i].cnts[j]; }
 75 | }
 76 | 
 77 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) {
 78 |   siz g, d; BB db, gb; bool crowd;
 79 |   db=malloc(sizeof(double)*m*4); rleToBbox(dt,db,m);
 80 |   gb=malloc(sizeof(double)*n*4); rleToBbox(gt,gb,n);
 81 |   bbIou(db,gb,m,n,iscrowd,o); free(db); free(gb);
 82 |   for( g=0; g<n; g++ ) for( d=0; d<m; d++ ) if(o[g*m+d]>0) {
 83 |     crowd=iscrowd!=NULL && iscrowd[g];
 84 |     if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; }
 85 |     siz ka, kb, a, b; uint c, ca, cb, ct, i, u; bool va, vb;
 86 |     ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0;
 87 |     cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1;
 88 |     while( ct>0 ) {
 89 |       c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0;
 90 |       ca-=c; if(!ca && a<ka) { ca=dt[d].cnts[a++]; va=!va; } ct+=ca;
 91 |       cb-=c; if(!cb && b<kb) { cb=gt[g].cnts[b++]; vb=!vb; } ct+=cb;
 92 |     }
 93 |     if(i==0) u=1; else if(crowd) rleArea(dt+d,1,&u);
 94 |     o[g*m+d] = (double)i/(double)u;
 95 |   }
 96 | }
 97 | 
 98 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) {
 99 |   double h, w, i, u, ga, da; siz g, d; bool crowd;
100 |   for( g=0; g<n; g++ ) {
101 |     BB G=gt+g*4; ga=G[2]*G[3]; crowd=iscrowd!=NULL && iscrowd[g];
102 |     for( d=0; d<m; d++ ) {
103 |       BB D=dt+d*4; da=D[2]*D[3]; o[g*m+d]=0;
104 |       w=fmin(D[2]+D[0],G[2]+G[0])-fmax(D[0],G[0]); if(w<=0) continue;
105 |       h=fmin(D[3]+D[1],G[3]+G[1])-fmax(D[1],G[1]); if(h<=0) continue;
106 |       i=w*h; u = crowd ? da : da+ga-i; o[g*m+d]=i/u;
107 |     }
108 |   }
109 | }
110 | 
111 | void rleToBbox( const RLE *R, BB bb, siz n ) {
112 |   for( siz i=0; i<n; i++ ) {
113 |     uint h, w, x, y, xs, ys, xe, ye, cc, t; siz j, m;
114 |     h=(uint)R[i].h; w=(uint)R[i].w; m=R[i].m;
115 |     m=((siz)(m/2))*2; xs=w; ys=h; xe=ye=0; cc=0;
116 |     if(m==0) { bb[4*i+0]=bb[4*i+1]=bb[4*i+2]=bb[4*i+3]=0; continue; }
117 |     for( j=0; j<m; j++ ) {
118 |       cc+=R[i].cnts[j]; t=cc-j%2; y=t%h; x=(t-y)/h;
119 |       xs=umin(xs,x); xe=umax(xe,x); ys=umin(ys,y); ye=umax(ye,y);
120 |     }
121 |     bb[4*i+0]=xs; bb[4*i+2]=xe-xs+1;
122 |     bb[4*i+1]=ys; bb[4*i+3]=ye-ys+1;
123 |   }
124 | }
125 | 
126 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) {
127 |   for( siz i=0; i<n; i++ ) {
128 |     double xs=bb[4*i+0], xe=xs+bb[4*i+2];
129 |     double ys=bb[4*i+1], ye=ys+bb[4*i+3];
130 |     double xy[8] = {xs,ys,xs,ye,xe,ye,xe,ys};
131 |     rleFrPoly( R+i, xy, 4, h, w );
132 |   }
133 | }
134 | 
135 | int uintCompare(const void *a, const void *b) {
136 |   uint c=*((uint*)a), d=*((uint*)b); return c>d?1:c<d?-1:0;
137 | }
138 | 
139 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) {
140 |   // upsample and get discrete points densely along entire boundary
141 |   siz j, m=0; double scale=5; int *x, *y, *u, *v; uint *a, *b;
142 |   x=malloc(sizeof(int)*(k+1)); y=malloc(sizeof(int)*(k+1));
143 |   for(j=0; j<k; j++) x[j]=(int)(scale*xy[j*2+0]+.5); x[k]=x[0];
144 |   for(j=0; j<k; j++) y[j]=(int)(scale*xy[j*2+1]+.5); y[k]=y[0];
145 |   for(j=0; j<k; j++) m+=umax(abs(x[j]-x[j+1]),abs(y[j]-y[j+1]))+1;
146 |   u=malloc(sizeof(int)*m); v=malloc(sizeof(int)*m); m=0;
147 |   for( j=0; j<k; j++ ) {
148 |     int xs=x[j], xe=x[j+1], ys=y[j], ye=y[j+1], dx, dy, t;
149 |     bool flip; double s; dx=abs(xe-xs); dy=abs(ys-ye);
150 |     flip = (dx>=dy && xs>xe) || (dx<dy && ys>ye);
151 |     if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; }
152 |     s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy;
153 |     if(dx>=dy) for( int d=0; d<=dx; d++ ) {
154 |       t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++;
155 |     } else for( int d=0; d<=dy; d++ ) {
156 |       t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++;
157 |     }
158 |   }
159 |   // get points along y-boundary and downsample
160 |   free(x); free(y); k=m; m=0; double xd, yd;
161 |   x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k);
162 |   for( j=1; j<k; j++ ) if(u[j]!=u[j-1]) {
163 |     xd=(double)(u[j]<u[j-1]?u[j]:u[j]-1); xd=(xd+.5)/scale-.5;
164 |     if( floor(xd)!=xd || xd<0 || xd>w-1 ) continue;
165 |     yd=(double)(v[j]<v[j-1]?v[j]:v[j-1]); yd=(yd+.5)/scale-.5;
166 |     if(yd<0) yd=0; else if(yd>h) yd=h; yd=ceil(yd);
167 |     x[m]=(int) xd; y[m]=(int) yd; m++;
168 |   }
169 |   // compute rle encoding given y-boundary points
170 |   k=m; a=malloc(sizeof(uint)*(k+1));
171 |   for( j=0; j<k; j++ ) a[j]=(uint)(x[j]*(int)(h)+y[j]);
172 |   a[k++]=(uint)(h*w); free(u); free(v); free(x); free(y);
173 |   qsort(a,k,sizeof(uint),uintCompare); uint p=0;
174 |   for( j=0; j<k; j++ ) { uint t=a[j]; a[j]-=p; p=t; }
175 |   b=malloc(sizeof(uint)*k); j=m=0; b[m++]=a[j++];
176 |   while(j<k) if(a[j]>0) b[m++]=a[j++]; else {
177 |     j++; if(j<k) b[m-1]+=a[j++]; }
178 |   rleInit(R,h,w,m,b); free(a); free(b);
179 | }
180 | 
181 | char* rleToString( const RLE *R ) {
182 |   // Similar to LEB128 but using 6 bits/char and ascii chars 48-111.
183 |   siz i, m=R->m, p=0; long x; bool more;
184 |   char *s=malloc(sizeof(char)*m*6);
185 |   for( i=0; i<m; i++ ) {
186 |     x=(long) R->cnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1;
187 |     while( more ) {
188 |       char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0;
189 |       if(more) c |= 0x20; c+=48; s[p++]=c;
190 |     }
191 |   }
192 |   s[p]=0; return s;
193 | }
194 | 
195 | void rleFrString( RLE *R, char *s, siz h, siz w ) {
196 |   siz m=0, p=0, k; long x; bool more; uint *cnts;
197 |   while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0;
198 |   while( s[p] ) {
199 |     x=0; k=0; more=1;
200 |     while( more ) {
201 |       char c=s[p]-48; x |= (c & 0x1f) << 5*k;
202 |       more = c & 0x20; p++; k++;
203 |       if(!more && (c & 0x10)) x |= -1 << 5*k;
204 |     }
205 |     if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x;
206 |   }
207 |   rleInit(R,h,w,m,cnts); free(cnts);
208 | }
209 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG16/fast_rcnn/train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_16_layers"
  2 | layer {
  3 |   name: 'data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'rois'
  7 |   top: 'labels'
  8 |   top: 'bbox_targets'
  9 |   top: 'bbox_inside_weights'
 10 |   top: 'bbox_outside_weights'
 11 |   python_param {
 12 |     module: 'roi_data_layer.layer'
 13 |     layer: 'RoIDataLayer'
 14 |     param_str: "'num_classes': 21"
 15 |   }
 16 | }
 17 | layer {
 18 |   name: "conv1_1"
 19 |   type: "Convolution"
 20 |   bottom: "data"
 21 |   top: "conv1_1"
 22 |   param {
 23 |     lr_mult: 0
 24 |     decay_mult: 0
 25 |   }
 26 |   param {
 27 |     lr_mult: 0
 28 |     decay_mult: 0
 29 |   }
 30 |   convolution_param {
 31 |     num_output: 64
 32 |     pad: 1
 33 |     kernel_size: 3
 34 |   }
 35 | }
 36 | layer {
 37 |   name: "relu1_1"
 38 |   type: "ReLU"
 39 |   bottom: "conv1_1"
 40 |   top: "conv1_1"
 41 | }
 42 | layer {
 43 |   name: "conv1_2"
 44 |   type: "Convolution"
 45 |   bottom: "conv1_1"
 46 |   top: "conv1_2"
 47 |   param {
 48 |     lr_mult: 0
 49 |     decay_mult: 0
 50 |   }
 51 |   param {
 52 |     lr_mult: 0
 53 |     decay_mult: 0
 54 |   }
 55 |   convolution_param {
 56 |     num_output: 64
 57 |     pad: 1
 58 |     kernel_size: 3
 59 |   }
 60 | }
 61 | layer {
 62 |   name: "relu1_2"
 63 |   type: "ReLU"
 64 |   bottom: "conv1_2"
 65 |   top: "conv1_2"
 66 | }
 67 | layer {
 68 |   name: "pool1"
 69 |   type: "Pooling"
 70 |   bottom: "conv1_2"
 71 |   top: "pool1"
 72 |   pooling_param {
 73 |     pool: MAX
 74 |     kernel_size: 2
 75 |     stride: 2
 76 |   }
 77 | }
 78 | layer {
 79 |   name: "conv2_1"
 80 |   type: "Convolution"
 81 |   bottom: "pool1"
 82 |   top: "conv2_1"
 83 |   param {
 84 |     lr_mult: 0
 85 |     decay_mult: 0
 86 |   }
 87 |   param {
 88 |     lr_mult: 0
 89 |     decay_mult: 0
 90 |   }
 91 |   convolution_param {
 92 |     num_output: 128
 93 |     pad: 1
 94 |     kernel_size: 3
 95 |   }
 96 | }
 97 | layer {
 98 |   name: "relu2_1"
 99 |   type: "ReLU"
100 |   bottom: "conv2_1"
101 |   top: "conv2_1"
102 | }
103 | layer {
104 |   name: "conv2_2"
105 |   type: "Convolution"
106 |   bottom: "conv2_1"
107 |   top: "conv2_2"
108 |   param {
109 |     lr_mult: 0
110 |     decay_mult: 0
111 |   }
112 |   param {
113 |     lr_mult: 0
114 |     decay_mult: 0
115 |   }
116 |   convolution_param {
117 |     num_output: 128
118 |     pad: 1
119 |     kernel_size: 3
120 |   }
121 | }
122 | layer {
123 |   name: "relu2_2"
124 |   type: "ReLU"
125 |   bottom: "conv2_2"
126 |   top: "conv2_2"
127 | }
128 | layer {
129 |   name: "pool2"
130 |   type: "Pooling"
131 |   bottom: "conv2_2"
132 |   top: "pool2"
133 |   pooling_param {
134 |     pool: MAX
135 |     kernel_size: 2
136 |     stride: 2
137 |   }
138 | }
139 | layer {
140 |   name: "conv3_1"
141 |   type: "Convolution"
142 |   bottom: "pool2"
143 |   top: "conv3_1"
144 |   param {
145 |     lr_mult: 1
146 |   }
147 |   param {
148 |     lr_mult: 2
149 |   }
150 |   convolution_param {
151 |     num_output: 256
152 |     pad: 1
153 |     kernel_size: 3
154 |   }
155 | }
156 | layer {
157 |   name: "relu3_1"
158 |   type: "ReLU"
159 |   bottom: "conv3_1"
160 |   top: "conv3_1"
161 | }
162 | layer {
163 |   name: "conv3_2"
164 |   type: "Convolution"
165 |   bottom: "conv3_1"
166 |   top: "conv3_2"
167 |   param {
168 |     lr_mult: 1
169 |   }
170 |   param {
171 |     lr_mult: 2
172 |   }
173 |   convolution_param {
174 |     num_output: 256
175 |     pad: 1
176 |     kernel_size: 3
177 |   }
178 | }
179 | layer {
180 |   name: "relu3_2"
181 |   type: "ReLU"
182 |   bottom: "conv3_2"
183 |   top: "conv3_2"
184 | }
185 | layer {
186 |   name: "conv3_3"
187 |   type: "Convolution"
188 |   bottom: "conv3_2"
189 |   top: "conv3_3"
190 |   param {
191 |     lr_mult: 1
192 |   }
193 |   param {
194 |     lr_mult: 2
195 |   }
196 |   convolution_param {
197 |     num_output: 256
198 |     pad: 1
199 |     kernel_size: 3
200 |   }
201 | }
202 | layer {
203 |   name: "relu3_3"
204 |   type: "ReLU"
205 |   bottom: "conv3_3"
206 |   top: "conv3_3"
207 | }
208 | layer {
209 |   name: "pool3"
210 |   type: "Pooling"
211 |   bottom: "conv3_3"
212 |   top: "pool3"
213 |   pooling_param {
214 |     pool: MAX
215 |     kernel_size: 2
216 |     stride: 2
217 |   }
218 | }
219 | layer {
220 |   name: "conv4_1"
221 |   type: "Convolution"
222 |   bottom: "pool3"
223 |   top: "conv4_1"
224 |   param {
225 |     lr_mult: 1
226 |   }
227 |   param {
228 |     lr_mult: 2
229 |   }
230 |   convolution_param {
231 |     num_output: 512
232 |     pad: 1
233 |     kernel_size: 3
234 |   }
235 | }
236 | layer {
237 |   name: "relu4_1"
238 |   type: "ReLU"
239 |   bottom: "conv4_1"
240 |   top: "conv4_1"
241 | }
242 | layer {
243 |   name: "conv4_2"
244 |   type: "Convolution"
245 |   bottom: "conv4_1"
246 |   top: "conv4_2"
247 |   param {
248 |     lr_mult: 1
249 |   }
250 |   param {
251 |     lr_mult: 2
252 |   }
253 |   convolution_param {
254 |     num_output: 512
255 |     pad: 1
256 |     kernel_size: 3
257 |   }
258 | }
259 | layer {
260 |   name: "relu4_2"
261 |   type: "ReLU"
262 |   bottom: "conv4_2"
263 |   top: "conv4_2"
264 | }
265 | layer {
266 |   name: "conv4_3"
267 |   type: "Convolution"
268 |   bottom: "conv4_2"
269 |   top: "conv4_3"
270 |   param {
271 |     lr_mult: 1
272 |   }
273 |   param {
274 |     lr_mult: 2
275 |   }
276 |   convolution_param {
277 |     num_output: 512
278 |     pad: 1
279 |     kernel_size: 3
280 |   }
281 | }
282 | layer {
283 |   name: "relu4_3"
284 |   type: "ReLU"
285 |   bottom: "conv4_3"
286 |   top: "conv4_3"
287 | }
288 | layer {
289 |   name: "pool4"
290 |   type: "Pooling"
291 |   bottom: "conv4_3"
292 |   top: "pool4"
293 |   pooling_param {
294 |     pool: MAX
295 |     kernel_size: 2
296 |     stride: 2
297 |   }
298 | }
299 | layer {
300 |   name: "conv5_1"
301 |   type: "Convolution"
302 |   bottom: "pool4"
303 |   top: "conv5_1"
304 |   param {
305 |     lr_mult: 1
306 |   }
307 |   param {
308 |     lr_mult: 2
309 |   }
310 |   convolution_param {
311 |     num_output: 512
312 |     pad: 1
313 |     kernel_size: 3
314 |   }
315 | }
316 | layer {
317 |   name: "relu5_1"
318 |   type: "ReLU"
319 |   bottom: "conv5_1"
320 |   top: "conv5_1"
321 | }
322 | layer {
323 |   name: "conv5_2"
324 |   type: "Convolution"
325 |   bottom: "conv5_1"
326 |   top: "conv5_2"
327 |   param {
328 |     lr_mult: 1
329 |   }
330 |   param {
331 |     lr_mult: 2
332 |   }
333 |   convolution_param {
334 |     num_output: 512
335 |     pad: 1
336 |     kernel_size: 3
337 |   }
338 | }
339 | layer {
340 |   name: "relu5_2"
341 |   type: "ReLU"
342 |   bottom: "conv5_2"
343 |   top: "conv5_2"
344 | }
345 | layer {
346 |   name: "conv5_3"
347 |   type: "Convolution"
348 |   bottom: "conv5_2"
349 |   top: "conv5_3"
350 |   param {
351 |     lr_mult: 1
352 |   }
353 |   param {
354 |     lr_mult: 2
355 |   }
356 |   convolution_param {
357 |     num_output: 512
358 |     pad: 1
359 |     kernel_size: 3
360 |   }
361 | }
362 | layer {
363 |   name: "relu5_3"
364 |   type: "ReLU"
365 |   bottom: "conv5_3"
366 |   top: "conv5_3"
367 | }
368 | layer {
369 |   name: "roi_pool5"
370 |   type: "ROIPooling"
371 |   bottom: "conv5_3"
372 |   bottom: "rois"
373 |   top: "pool5"
374 |   roi_pooling_param {
375 |     pooled_w: 7
376 |     pooled_h: 7
377 |     spatial_scale: 0.0625 # 1/16
378 |   }
379 | }
380 | layer {
381 |   name: "fc6"
382 |   type: "InnerProduct"
383 |   bottom: "pool5"
384 |   top: "fc6"
385 |   param {
386 |     lr_mult: 1
387 |   }
388 |   param {
389 |     lr_mult: 2
390 |   }
391 |   inner_product_param {
392 |     num_output: 4096
393 |   }
394 | }
395 | layer {
396 |   name: "relu6"
397 |   type: "ReLU"
398 |   bottom: "fc6"
399 |   top: "fc6"
400 | }
401 | layer {
402 |   name: "drop6"
403 |   type: "Dropout"
404 |   bottom: "fc6"
405 |   top: "fc6"
406 |   dropout_param {
407 |     dropout_ratio: 0.5
408 |   }
409 | }
410 | layer {
411 |   name: "fc7"
412 |   type: "InnerProduct"
413 |   bottom: "fc6"
414 |   top: "fc7"
415 |   param {
416 |     lr_mult: 1
417 |   }
418 |   param {
419 |     lr_mult: 2
420 |   }
421 |   inner_product_param {
422 |     num_output: 4096
423 |   }
424 | }
425 | layer {
426 |   name: "relu7"
427 |   type: "ReLU"
428 |   bottom: "fc7"
429 |   top: "fc7"
430 | }
431 | layer {
432 |   name: "drop7"
433 |   type: "Dropout"
434 |   bottom: "fc7"
435 |   top: "fc7"
436 |   dropout_param {
437 |     dropout_ratio: 0.5
438 |   }
439 | }
440 | layer {
441 |   name: "cls_score"
442 |   type: "InnerProduct"
443 |   bottom: "fc7"
444 |   top: "cls_score"
445 |   param {
446 |     lr_mult: 1
447 |   }
448 |   param {
449 |     lr_mult: 2
450 |   }
451 |   inner_product_param {
452 |     num_output: 21
453 |     weight_filler {
454 |       type: "gaussian"
455 |       std: 0.01
456 |     }
457 |     bias_filler {
458 |       type: "constant"
459 |       value: 0
460 |     }
461 |   }
462 | }
463 | layer {
464 |   name: "bbox_pred"
465 |   type: "InnerProduct"
466 |   bottom: "fc7"
467 |   top: "bbox_pred"
468 |   param {
469 |     lr_mult: 1
470 |   }
471 |   param {
472 |     lr_mult: 2
473 |   }
474 |   inner_product_param {
475 |     num_output: 84
476 |     weight_filler {
477 |       type: "gaussian"
478 |       std: 0.001
479 |     }
480 |     bias_filler {
481 |       type: "constant"
482 |       value: 0
483 |     }
484 |   }
485 | }
486 | layer {
487 |   name: "loss_cls"
488 |   type: "SoftmaxWithLoss"
489 |   bottom: "cls_score"
490 |   bottom: "labels"
491 |   top: "loss_cls"
492 |   loss_weight: 1
493 | }
494 | layer {
495 |   name: "loss_bbox"
496 |   type: "SmoothL1Loss"
497 |   bottom: "bbox_pred"
498 |   bottom: "bbox_targets"
499 |   bottom: "bbox_inside_weights"
500 |   bottom: "bbox_outside_weights"
501 |   top: "loss_bbox"
502 |   loss_weight: 1
503 | }
504 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG16/fast_rcnn_std/train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_16_layers"
  2 | layer {
  3 |   name: 'data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'rois'
  7 |   top: 'labels'
  8 |   top: 'bbox_targets'
  9 |   top: 'bbox_inside_weights'
 10 |   top: 'bbox_outside_weights'
 11 |   python_param {
 12 |     module: 'roi_data_layer.layer'
 13 |     layer: 'RoIDataLayer'
 14 |     param_str: "'num_classes': 21"
 15 |   }
 16 | }
 17 | layer {
 18 |   name: "conv1_1"
 19 |   type: "Convolution"
 20 |   bottom: "data"
 21 |   top: "conv1_1"
 22 |   param {
 23 |     lr_mult: 0
 24 |     decay_mult: 0
 25 |   }
 26 |   param {
 27 |     lr_mult: 0
 28 |     decay_mult: 0
 29 |   }
 30 |   convolution_param {
 31 |     num_output: 64
 32 |     pad: 1
 33 |     kernel_size: 3
 34 |   }
 35 | }
 36 | layer {
 37 |   name: "relu1_1"
 38 |   type: "ReLU"
 39 |   bottom: "conv1_1"
 40 |   top: "conv1_1"
 41 | }
 42 | layer {
 43 |   name: "conv1_2"
 44 |   type: "Convolution"
 45 |   bottom: "conv1_1"
 46 |   top: "conv1_2"
 47 |   param {
 48 |     lr_mult: 0
 49 |     decay_mult: 0
 50 |   }
 51 |   param {
 52 |     lr_mult: 0
 53 |     decay_mult: 0
 54 |   }
 55 |   convolution_param {
 56 |     num_output: 64
 57 |     pad: 1
 58 |     kernel_size: 3
 59 |   }
 60 | }
 61 | layer {
 62 |   name: "relu1_2"
 63 |   type: "ReLU"
 64 |   bottom: "conv1_2"
 65 |   top: "conv1_2"
 66 | }
 67 | layer {
 68 |   name: "pool1"
 69 |   type: "Pooling"
 70 |   bottom: "conv1_2"
 71 |   top: "pool1"
 72 |   pooling_param {
 73 |     pool: MAX
 74 |     kernel_size: 2
 75 |     stride: 2
 76 |   }
 77 | }
 78 | layer {
 79 |   name: "conv2_1"
 80 |   type: "Convolution"
 81 |   bottom: "pool1"
 82 |   top: "conv2_1"
 83 |   param {
 84 |     lr_mult: 0
 85 |     decay_mult: 0
 86 |   }
 87 |   param {
 88 |     lr_mult: 0
 89 |     decay_mult: 0
 90 |   }
 91 |   convolution_param {
 92 |     num_output: 128
 93 |     pad: 1
 94 |     kernel_size: 3
 95 |   }
 96 | }
 97 | layer {
 98 |   name: "relu2_1"
 99 |   type: "ReLU"
100 |   bottom: "conv2_1"
101 |   top: "conv2_1"
102 | }
103 | layer {
104 |   name: "conv2_2"
105 |   type: "Convolution"
106 |   bottom: "conv2_1"
107 |   top: "conv2_2"
108 |   param {
109 |     lr_mult: 0
110 |     decay_mult: 0
111 |   }
112 |   param {
113 |     lr_mult: 0
114 |     decay_mult: 0
115 |   }
116 |   convolution_param {
117 |     num_output: 128
118 |     pad: 1
119 |     kernel_size: 3
120 |   }
121 | }
122 | layer {
123 |   name: "relu2_2"
124 |   type: "ReLU"
125 |   bottom: "conv2_2"
126 |   top: "conv2_2"
127 | }
128 | layer {
129 |   name: "pool2"
130 |   type: "Pooling"
131 |   bottom: "conv2_2"
132 |   top: "pool2"
133 |   pooling_param {
134 |     pool: MAX
135 |     kernel_size: 2
136 |     stride: 2
137 |   }
138 | }
139 | layer {
140 |   name: "conv3_1"
141 |   type: "Convolution"
142 |   bottom: "pool2"
143 |   top: "conv3_1"
144 |   param {
145 |     lr_mult: 1
146 |   }
147 |   param {
148 |     lr_mult: 2
149 |   }
150 |   convolution_param {
151 |     num_output: 256
152 |     pad: 1
153 |     kernel_size: 3
154 |   }
155 | }
156 | layer {
157 |   name: "relu3_1"
158 |   type: "ReLU"
159 |   bottom: "conv3_1"
160 |   top: "conv3_1"
161 | }
162 | layer {
163 |   name: "conv3_2"
164 |   type: "Convolution"
165 |   bottom: "conv3_1"
166 |   top: "conv3_2"
167 |   param {
168 |     lr_mult: 1
169 |   }
170 |   param {
171 |     lr_mult: 2
172 |   }
173 |   convolution_param {
174 |     num_output: 256
175 |     pad: 1
176 |     kernel_size: 3
177 |   }
178 | }
179 | layer {
180 |   name: "relu3_2"
181 |   type: "ReLU"
182 |   bottom: "conv3_2"
183 |   top: "conv3_2"
184 | }
185 | layer {
186 |   name: "conv3_3"
187 |   type: "Convolution"
188 |   bottom: "conv3_2"
189 |   top: "conv3_3"
190 |   param {
191 |     lr_mult: 1
192 |   }
193 |   param {
194 |     lr_mult: 2
195 |   }
196 |   convolution_param {
197 |     num_output: 256
198 |     pad: 1
199 |     kernel_size: 3
200 |   }
201 | }
202 | layer {
203 |   name: "relu3_3"
204 |   type: "ReLU"
205 |   bottom: "conv3_3"
206 |   top: "conv3_3"
207 | }
208 | layer {
209 |   name: "pool3"
210 |   type: "Pooling"
211 |   bottom: "conv3_3"
212 |   top: "pool3"
213 |   pooling_param {
214 |     pool: MAX
215 |     kernel_size: 2
216 |     stride: 2
217 |   }
218 | }
219 | layer {
220 |   name: "conv4_1"
221 |   type: "Convolution"
222 |   bottom: "pool3"
223 |   top: "conv4_1"
224 |   param {
225 |     lr_mult: 1
226 |   }
227 |   param {
228 |     lr_mult: 2
229 |   }
230 |   convolution_param {
231 |     num_output: 512
232 |     pad: 1
233 |     kernel_size: 3
234 |   }
235 | }
236 | layer {
237 |   name: "relu4_1"
238 |   type: "ReLU"
239 |   bottom: "conv4_1"
240 |   top: "conv4_1"
241 | }
242 | layer {
243 |   name: "conv4_2"
244 |   type: "Convolution"
245 |   bottom: "conv4_1"
246 |   top: "conv4_2"
247 |   param {
248 |     lr_mult: 1
249 |   }
250 |   param {
251 |     lr_mult: 2
252 |   }
253 |   convolution_param {
254 |     num_output: 512
255 |     pad: 1
256 |     kernel_size: 3
257 |   }
258 | }
259 | layer {
260 |   name: "relu4_2"
261 |   type: "ReLU"
262 |   bottom: "conv4_2"
263 |   top: "conv4_2"
264 | }
265 | layer {
266 |   name: "conv4_3"
267 |   type: "Convolution"
268 |   bottom: "conv4_2"
269 |   top: "conv4_3"
270 |   param {
271 |     lr_mult: 1
272 |   }
273 |   param {
274 |     lr_mult: 2
275 |   }
276 |   convolution_param {
277 |     num_output: 512
278 |     pad: 1
279 |     kernel_size: 3
280 |   }
281 | }
282 | layer {
283 |   name: "relu4_3"
284 |   type: "ReLU"
285 |   bottom: "conv4_3"
286 |   top: "conv4_3"
287 | }
288 | layer {
289 |   name: "pool4"
290 |   type: "Pooling"
291 |   bottom: "conv4_3"
292 |   top: "pool4"
293 |   pooling_param {
294 |     pool: MAX
295 |     kernel_size: 2
296 |     stride: 2
297 |   }
298 | }
299 | layer {
300 |   name: "conv5_1"
301 |   type: "Convolution"
302 |   bottom: "pool4"
303 |   top: "conv5_1"
304 |   param {
305 |     lr_mult: 1
306 |   }
307 |   param {
308 |     lr_mult: 2
309 |   }
310 |   convolution_param {
311 |     num_output: 512
312 |     pad: 1
313 |     kernel_size: 3
314 |   }
315 | }
316 | layer {
317 |   name: "relu5_1"
318 |   type: "ReLU"
319 |   bottom: "conv5_1"
320 |   top: "conv5_1"
321 | }
322 | layer {
323 |   name: "conv5_2"
324 |   type: "Convolution"
325 |   bottom: "conv5_1"
326 |   top: "conv5_2"
327 |   param {
328 |     lr_mult: 1
329 |   }
330 |   param {
331 |     lr_mult: 2
332 |   }
333 |   convolution_param {
334 |     num_output: 512
335 |     pad: 1
336 |     kernel_size: 3
337 |   }
338 | }
339 | layer {
340 |   name: "relu5_2"
341 |   type: "ReLU"
342 |   bottom: "conv5_2"
343 |   top: "conv5_2"
344 | }
345 | layer {
346 |   name: "conv5_3"
347 |   type: "Convolution"
348 |   bottom: "conv5_2"
349 |   top: "conv5_3"
350 |   param {
351 |     lr_mult: 1
352 |   }
353 |   param {
354 |     lr_mult: 2
355 |   }
356 |   convolution_param {
357 |     num_output: 512
358 |     pad: 1
359 |     kernel_size: 3
360 |   }
361 | }
362 | layer {
363 |   name: "relu5_3"
364 |   type: "ReLU"
365 |   bottom: "conv5_3"
366 |   top: "conv5_3"
367 | }
368 | layer {
369 |   name: "roi_pool5"
370 |   type: "ROIPooling"
371 |   bottom: "conv5_3"
372 |   bottom: "rois"
373 |   top: "pool5"
374 |   roi_pooling_param {
375 |     pooled_w: 7
376 |     pooled_h: 7
377 |     spatial_scale: 0.0625 # 1/16
378 |   }
379 | }
380 | layer {
381 |   name: "fc6"
382 |   type: "InnerProduct"
383 |   bottom: "pool5"
384 |   top: "fc6"
385 |   param {
386 |     lr_mult: 1
387 |   }
388 |   param {
389 |     lr_mult: 2
390 |   }
391 |   inner_product_param {
392 |     num_output: 4096
393 |   }
394 | }
395 | layer {
396 |   name: "relu6"
397 |   type: "ReLU"
398 |   bottom: "fc6"
399 |   top: "fc6"
400 | }
401 | layer {
402 |   name: "drop6"
403 |   type: "Dropout"
404 |   bottom: "fc6"
405 |   top: "fc6"
406 |   dropout_param {
407 |     dropout_ratio: 0.5
408 |   }
409 | }
410 | layer {
411 |   name: "fc7"
412 |   type: "InnerProduct"
413 |   bottom: "fc6"
414 |   top: "fc7"
415 |   param {
416 |     lr_mult: 1
417 |   }
418 |   param {
419 |     lr_mult: 2
420 |   }
421 |   inner_product_param {
422 |     num_output: 4096
423 |   }
424 | }
425 | layer {
426 |   name: "relu7"
427 |   type: "ReLU"
428 |   bottom: "fc7"
429 |   top: "fc7"
430 | }
431 | layer {
432 |   name: "drop7"
433 |   type: "Dropout"
434 |   bottom: "fc7"
435 |   top: "fc7"
436 |   dropout_param {
437 |     dropout_ratio: 0.5
438 |   }
439 | }
440 | layer {
441 |   name: "cls_score"
442 |   type: "InnerProduct"
443 |   bottom: "fc7"
444 |   top: "cls_score"
445 |   param {
446 |     lr_mult: 1
447 |   }
448 |   param {
449 |     lr_mult: 2
450 |   }
451 |   inner_product_param {
452 |     num_output: 21
453 |     weight_filler {
454 |       type: "gaussian"
455 |       std: 0.01
456 |     }
457 |     bias_filler {
458 |       type: "constant"
459 |       value: 0
460 |     }
461 |   }
462 | }
463 | layer {
464 |   name: "bbox_pred"
465 |   type: "InnerProduct"
466 |   bottom: "fc7"
467 |   top: "bbox_pred"
468 |   param {
469 |     lr_mult: 1
470 |   }
471 |   param {
472 |     lr_mult: 2
473 |   }
474 |   inner_product_param {
475 |     num_output: 84
476 |     weight_filler {
477 |       type: "gaussian"
478 |       std: 0.001
479 |     }
480 |     bias_filler {
481 |       type: "constant"
482 |       value: 0
483 |     }
484 |   }
485 | }
486 | layer {
487 |   name: "loss_cls"
488 |   type: "SoftmaxWithLoss"
489 |   bottom: "cls_score"
490 |   bottom: "labels"
491 |   top: "loss_cls"
492 |   loss_weight: 1
493 | }
494 | layer {
495 |   name: "loss_bbox"
496 |   type: "SmoothL1Loss"
497 |   bottom: "bbox_pred"
498 |   bottom: "bbox_targets"
499 |   bottom: "bbox_inside_weights"
500 |   bottom: "bbox_outside_weights"
501 |   top: "loss_bbox"
502 |   loss_weight: 1
503 | }
504 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG16/fast_rcnn/test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_16_layers"
  2 | 
  3 | input: "data"
  4 | input_shape {
  5 |   dim: 1
  6 |   dim: 3
  7 |   dim: 224
  8 |   dim: 224
  9 | }
 10 | 
 11 | input: "rois"
 12 | input_shape {
 13 |   dim: 1 # to be changed on-the-fly to num ROIs
 14 |   dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing
 15 | }
 16 | 
 17 | layer {
 18 |   name: "conv1_1"
 19 |   type: "Convolution"
 20 |   bottom: "data"
 21 |   top: "conv1_1"
 22 |   param {
 23 |     lr_mult: 0
 24 |     decay_mult: 0
 25 |   }
 26 |   param {
 27 |     lr_mult: 0
 28 |     decay_mult: 0
 29 |   }
 30 |   convolution_param {
 31 |     num_output: 64
 32 |     pad: 1
 33 |     kernel_size: 3
 34 |   }
 35 | }
 36 | layer {
 37 |   name: "relu1_1"
 38 |   type: "ReLU"
 39 |   bottom: "conv1_1"
 40 |   top: "conv1_1"
 41 | }
 42 | layer {
 43 |   name: "conv1_2"
 44 |   type: "Convolution"
 45 |   bottom: "conv1_1"
 46 |   top: "conv1_2"
 47 |   param {
 48 |     lr_mult: 0
 49 |     decay_mult: 0
 50 |   }
 51 |   param {
 52 |     lr_mult: 0
 53 |     decay_mult: 0
 54 |   }
 55 |   convolution_param {
 56 |     num_output: 64
 57 |     pad: 1
 58 |     kernel_size: 3
 59 |   }
 60 | }
 61 | layer {
 62 |   name: "relu1_2"
 63 |   type: "ReLU"
 64 |   bottom: "conv1_2"
 65 |   top: "conv1_2"
 66 | }
 67 | layer {
 68 |   name: "pool1"
 69 |   type: "Pooling"
 70 |   bottom: "conv1_2"
 71 |   top: "pool1"
 72 |   pooling_param {
 73 |     pool: MAX
 74 |     kernel_size: 2
 75 |     stride: 2
 76 |   }
 77 | }
 78 | layer {
 79 |   name: "conv2_1"
 80 |   type: "Convolution"
 81 |   bottom: "pool1"
 82 |   top: "conv2_1"
 83 |   param {
 84 |     lr_mult: 0
 85 |     decay_mult: 0
 86 |   }
 87 |   param {
 88 |     lr_mult: 0
 89 |     decay_mult: 0
 90 |   }
 91 |   convolution_param {
 92 |     num_output: 128
 93 |     pad: 1
 94 |     kernel_size: 3
 95 |   }
 96 | }
 97 | layer {
 98 |   name: "relu2_1"
 99 |   type: "ReLU"
100 |   bottom: "conv2_1"
101 |   top: "conv2_1"
102 | }
103 | layer {
104 |   name: "conv2_2"
105 |   type: "Convolution"
106 |   bottom: "conv2_1"
107 |   top: "conv2_2"
108 |   param {
109 |     lr_mult: 0
110 |     decay_mult: 0
111 |   }
112 |   param {
113 |     lr_mult: 0
114 |     decay_mult: 0
115 |   }
116 |   convolution_param {
117 |     num_output: 128
118 |     pad: 1
119 |     kernel_size: 3
120 |   }
121 | }
122 | layer {
123 |   name: "relu2_2"
124 |   type: "ReLU"
125 |   bottom: "conv2_2"
126 |   top: "conv2_2"
127 | }
128 | layer {
129 |   name: "pool2"
130 |   type: "Pooling"
131 |   bottom: "conv2_2"
132 |   top: "pool2"
133 |   pooling_param {
134 |     pool: MAX
135 |     kernel_size: 2
136 |     stride: 2
137 |   }
138 | }
139 | layer {
140 |   name: "conv3_1"
141 |   type: "Convolution"
142 |   bottom: "pool2"
143 |   top: "conv3_1"
144 |   param {
145 |     lr_mult: 1
146 |     decay_mult: 1
147 |   }
148 |   param {
149 |     lr_mult: 2
150 |     decay_mult: 0
151 |   }
152 |   convolution_param {
153 |     num_output: 256
154 |     pad: 1
155 |     kernel_size: 3
156 |   }
157 | }
158 | layer {
159 |   name: "relu3_1"
160 |   type: "ReLU"
161 |   bottom: "conv3_1"
162 |   top: "conv3_1"
163 | }
164 | layer {
165 |   name: "conv3_2"
166 |   type: "Convolution"
167 |   bottom: "conv3_1"
168 |   top: "conv3_2"
169 |   param {
170 |     lr_mult: 1
171 |     decay_mult: 1
172 |   }
173 |   param {
174 |     lr_mult: 2
175 |     decay_mult: 0
176 |   }
177 |   convolution_param {
178 |     num_output: 256
179 |     pad: 1
180 |     kernel_size: 3
181 |   }
182 | }
183 | layer {
184 |   name: "relu3_2"
185 |   type: "ReLU"
186 |   bottom: "conv3_2"
187 |   top: "conv3_2"
188 | }
189 | layer {
190 |   name: "conv3_3"
191 |   type: "Convolution"
192 |   bottom: "conv3_2"
193 |   top: "conv3_3"
194 |   param {
195 |     lr_mult: 1
196 |     decay_mult: 1
197 |   }
198 |   param {
199 |     lr_mult: 2
200 |     decay_mult: 0
201 |   }
202 |   convolution_param {
203 |     num_output: 256
204 |     pad: 1
205 |     kernel_size: 3
206 |   }
207 | }
208 | layer {
209 |   name: "relu3_3"
210 |   type: "ReLU"
211 |   bottom: "conv3_3"
212 |   top: "conv3_3"
213 | }
214 | layer {
215 |   name: "pool3"
216 |   type: "Pooling"
217 |   bottom: "conv3_3"
218 |   top: "pool3"
219 |   pooling_param {
220 |     pool: MAX
221 |     kernel_size: 2
222 |     stride: 2
223 |   }
224 | }
225 | layer {
226 |   name: "conv4_1"
227 |   type: "Convolution"
228 |   bottom: "pool3"
229 |   top: "conv4_1"
230 |   param {
231 |     lr_mult: 1
232 |     decay_mult: 1
233 |   }
234 |   param {
235 |     lr_mult: 2
236 |     decay_mult: 0
237 |   }
238 |   convolution_param {
239 |     num_output: 512
240 |     pad: 1
241 |     kernel_size: 3
242 |   }
243 | }
244 | layer {
245 |   name: "relu4_1"
246 |   type: "ReLU"
247 |   bottom: "conv4_1"
248 |   top: "conv4_1"
249 | }
250 | layer {
251 |   name: "conv4_2"
252 |   type: "Convolution"
253 |   bottom: "conv4_1"
254 |   top: "conv4_2"
255 |   param {
256 |     lr_mult: 1
257 |     decay_mult: 1
258 |   }
259 |   param {
260 |     lr_mult: 2
261 |     decay_mult: 0
262 |   }
263 |   convolution_param {
264 |     num_output: 512
265 |     pad: 1
266 |     kernel_size: 3
267 |   }
268 | }
269 | layer {
270 |   name: "relu4_2"
271 |   type: "ReLU"
272 |   bottom: "conv4_2"
273 |   top: "conv4_2"
274 | }
275 | layer {
276 |   name: "conv4_3"
277 |   type: "Convolution"
278 |   bottom: "conv4_2"
279 |   top: "conv4_3"
280 |   param {
281 |     lr_mult: 1
282 |     decay_mult: 1
283 |   }
284 |   param {
285 |     lr_mult: 2
286 |     decay_mult: 0
287 |   }
288 |   convolution_param {
289 |     num_output: 512
290 |     pad: 1
291 |     kernel_size: 3
292 |   }
293 | }
294 | layer {
295 |   name: "relu4_3"
296 |   type: "ReLU"
297 |   bottom: "conv4_3"
298 |   top: "conv4_3"
299 | }
300 | layer {
301 |   name: "pool4"
302 |   type: "Pooling"
303 |   bottom: "conv4_3"
304 |   top: "pool4"
305 |   pooling_param {
306 |     pool: MAX
307 |     kernel_size: 2
308 |     stride: 2
309 |   }
310 | }
311 | layer {
312 |   name: "conv5_1"
313 |   type: "Convolution"
314 |   bottom: "pool4"
315 |   top: "conv5_1"
316 |   param {
317 |     lr_mult: 1
318 |     decay_mult: 1
319 |   }
320 |   param {
321 |     lr_mult: 2
322 |     decay_mult: 0
323 |   }
324 |   convolution_param {
325 |     num_output: 512
326 |     pad: 1
327 |     kernel_size: 3
328 |   }
329 | }
330 | layer {
331 |   name: "relu5_1"
332 |   type: "ReLU"
333 |   bottom: "conv5_1"
334 |   top: "conv5_1"
335 | }
336 | layer {
337 |   name: "conv5_2"
338 |   type: "Convolution"
339 |   bottom: "conv5_1"
340 |   top: "conv5_2"
341 |   param {
342 |     lr_mult: 1
343 |     decay_mult: 1
344 |   }
345 |   param {
346 |     lr_mult: 2
347 |     decay_mult: 0
348 |   }
349 |   convolution_param {
350 |     num_output: 512
351 |     pad: 1
352 |     kernel_size: 3
353 |   }
354 | }
355 | layer {
356 |   name: "relu5_2"
357 |   type: "ReLU"
358 |   bottom: "conv5_2"
359 |   top: "conv5_2"
360 | }
361 | layer {
362 |   name: "conv5_3"
363 |   type: "Convolution"
364 |   bottom: "conv5_2"
365 |   top: "conv5_3"
366 |   param {
367 |     lr_mult: 1
368 |     decay_mult: 1
369 |   }
370 |   param {
371 |     lr_mult: 2
372 |     decay_mult: 0
373 |   }
374 |   convolution_param {
375 |     num_output: 512
376 |     pad: 1
377 |     kernel_size: 3
378 |   }
379 | }
380 | layer {
381 |   name: "relu5_3"
382 |   type: "ReLU"
383 |   bottom: "conv5_3"
384 |   top: "conv5_3"
385 | }
386 | layer {
387 |   name: "roi_pool5"
388 |   type: "ROIPooling"
389 |   bottom: "conv5_3"
390 |   bottom: "rois"
391 |   top: "pool5"
392 |   roi_pooling_param {
393 |     pooled_w: 7
394 |     pooled_h: 7
395 |     spatial_scale: 0.0625 # 1/16
396 |   }
397 | }
398 | layer {
399 |   name: "fc6"
400 |   type: "InnerProduct"
401 |   bottom: "pool5"
402 |   top: "fc6"
403 |   param {
404 |     lr_mult: 1
405 |     decay_mult: 1
406 |   }
407 |   param {
408 |     lr_mult: 2
409 |     decay_mult: 0
410 |   }
411 |   inner_product_param {
412 |     num_output: 4096
413 |   }
414 | }
415 | layer {
416 |   name: "relu6"
417 |   type: "ReLU"
418 |   bottom: "fc6"
419 |   top: "fc6"
420 | }
421 | layer {
422 |   name: "drop6"
423 |   type: "Dropout"
424 |   bottom: "fc6"
425 |   top: "fc6"
426 |   dropout_param {
427 |     dropout_ratio: 0.5
428 |   }
429 | }
430 | layer {
431 |   name: "fc7"
432 |   type: "InnerProduct"
433 |   bottom: "fc6"
434 |   top: "fc7"
435 |   param {
436 |     lr_mult: 1
437 |     decay_mult: 1
438 |   }
439 |   param {
440 |     lr_mult: 2
441 |     decay_mult: 0
442 |   }
443 |   inner_product_param {
444 |     num_output: 4096
445 |   }
446 | }
447 | layer {
448 |   name: "relu7"
449 |   type: "ReLU"
450 |   bottom: "fc7"
451 |   top: "fc7"
452 | }
453 | layer {
454 |   name: "drop7"
455 |   type: "Dropout"
456 |   bottom: "fc7"
457 |   top: "fc7"
458 |   dropout_param {
459 |     dropout_ratio: 0.5
460 |   }
461 | }
462 | layer {
463 |   name: "cls_score"
464 |   type: "InnerProduct"
465 |   bottom: "fc7"
466 |   top: "cls_score"
467 |   param {
468 |     lr_mult: 1
469 |     decay_mult: 1
470 |   }
471 |   param {
472 |     lr_mult: 2
473 |     decay_mult: 0
474 |   }
475 |   inner_product_param {
476 |     num_output: 21
477 |     weight_filler {
478 |       type: "gaussian"
479 |       std: 0.01
480 |     }
481 |     bias_filler {
482 |       type: "constant"
483 |       value: 0
484 |     }
485 |   }
486 | }
487 | layer {
488 |   name: "bbox_pred"
489 |   type: "InnerProduct"
490 |   bottom: "fc7"
491 |   top: "bbox_pred"
492 |   param {
493 |     lr_mult: 1
494 |     decay_mult: 1
495 |   }
496 |   param {
497 |     lr_mult: 2
498 |     decay_mult: 0
499 |   }
500 |   inner_product_param {
501 |     num_output: 84
502 |     weight_filler {
503 |       type: "gaussian"
504 |       std: 0.001
505 |     }
506 |     bias_filler {
507 |       type: "constant"
508 |       value: 0
509 |     }
510 |   }
511 | }
512 | layer {
513 |   name: "cls_prob"
514 |   type: "Softmax"
515 |   bottom: "cls_score"
516 |   top: "cls_prob"
517 | }
518 | 


--------------------------------------------------------------------------------
/models/pascal_voc/VGG16/fast_rcnn_std/test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_16_layers"
  2 | 
  3 | input: "data"
  4 | input_shape {
  5 |   dim: 1
  6 |   dim: 3
  7 |   dim: 224
  8 |   dim: 224
  9 | }
 10 | 
 11 | input: "rois"
 12 | input_shape {
 13 |   dim: 1 # to be changed on-the-fly to num ROIs
 14 |   dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing
 15 | }
 16 | 
 17 | layer {
 18 |   name: "conv1_1"
 19 |   type: "Convolution"
 20 |   bottom: "data"
 21 |   top: "conv1_1"
 22 |   param {
 23 |     lr_mult: 0
 24 |     decay_mult: 0
 25 |   }
 26 |   param {
 27 |     lr_mult: 0
 28 |     decay_mult: 0
 29 |   }
 30 |   convolution_param {
 31 |     num_output: 64
 32 |     pad: 1
 33 |     kernel_size: 3
 34 |   }
 35 | }
 36 | layer {
 37 |   name: "relu1_1"
 38 |   type: "ReLU"
 39 |   bottom: "conv1_1"
 40 |   top: "conv1_1"
 41 | }
 42 | layer {
 43 |   name: "conv1_2"
 44 |   type: "Convolution"
 45 |   bottom: "conv1_1"
 46 |   top: "conv1_2"
 47 |   param {
 48 |     lr_mult: 0
 49 |     decay_mult: 0
 50 |   }
 51 |   param {
 52 |     lr_mult: 0
 53 |     decay_mult: 0
 54 |   }
 55 |   convolution_param {
 56 |     num_output: 64
 57 |     pad: 1
 58 |     kernel_size: 3
 59 |   }
 60 | }
 61 | layer {
 62 |   name: "relu1_2"
 63 |   type: "ReLU"
 64 |   bottom: "conv1_2"
 65 |   top: "conv1_2"
 66 | }
 67 | layer {
 68 |   name: "pool1"
 69 |   type: "Pooling"
 70 |   bottom: "conv1_2"
 71 |   top: "pool1"
 72 |   pooling_param {
 73 |     pool: MAX
 74 |     kernel_size: 2
 75 |     stride: 2
 76 |   }
 77 | }
 78 | layer {
 79 |   name: "conv2_1"
 80 |   type: "Convolution"
 81 |   bottom: "pool1"
 82 |   top: "conv2_1"
 83 |   param {
 84 |     lr_mult: 0
 85 |     decay_mult: 0
 86 |   }
 87 |   param {
 88 |     lr_mult: 0
 89 |     decay_mult: 0
 90 |   }
 91 |   convolution_param {
 92 |     num_output: 128
 93 |     pad: 1
 94 |     kernel_size: 3
 95 |   }
 96 | }
 97 | layer {
 98 |   name: "relu2_1"
 99 |   type: "ReLU"
100 |   bottom: "conv2_1"
101 |   top: "conv2_1"
102 | }
103 | layer {
104 |   name: "conv2_2"
105 |   type: "Convolution"
106 |   bottom: "conv2_1"
107 |   top: "conv2_2"
108 |   param {
109 |     lr_mult: 0
110 |     decay_mult: 0
111 |   }
112 |   param {
113 |     lr_mult: 0
114 |     decay_mult: 0
115 |   }
116 |   convolution_param {
117 |     num_output: 128
118 |     pad: 1
119 |     kernel_size: 3
120 |   }
121 | }
122 | layer {
123 |   name: "relu2_2"
124 |   type: "ReLU"
125 |   bottom: "conv2_2"
126 |   top: "conv2_2"
127 | }
128 | layer {
129 |   name: "pool2"
130 |   type: "Pooling"
131 |   bottom: "conv2_2"
132 |   top: "pool2"
133 |   pooling_param {
134 |     pool: MAX
135 |     kernel_size: 2
136 |     stride: 2
137 |   }
138 | }
139 | layer {
140 |   name: "conv3_1"
141 |   type: "Convolution"
142 |   bottom: "pool2"
143 |   top: "conv3_1"
144 |   param {
145 |     lr_mult: 1
146 |     decay_mult: 1
147 |   }
148 |   param {
149 |     lr_mult: 2
150 |     decay_mult: 0
151 |   }
152 |   convolution_param {
153 |     num_output: 256
154 |     pad: 1
155 |     kernel_size: 3
156 |   }
157 | }
158 | layer {
159 |   name: "relu3_1"
160 |   type: "ReLU"
161 |   bottom: "conv3_1"
162 |   top: "conv3_1"
163 | }
164 | layer {
165 |   name: "conv3_2"
166 |   type: "Convolution"
167 |   bottom: "conv3_1"
168 |   top: "conv3_2"
169 |   param {
170 |     lr_mult: 1
171 |     decay_mult: 1
172 |   }
173 |   param {
174 |     lr_mult: 2
175 |     decay_mult: 0
176 |   }
177 |   convolution_param {
178 |     num_output: 256
179 |     pad: 1
180 |     kernel_size: 3
181 |   }
182 | }
183 | layer {
184 |   name: "relu3_2"
185 |   type: "ReLU"
186 |   bottom: "conv3_2"
187 |   top: "conv3_2"
188 | }
189 | layer {
190 |   name: "conv3_3"
191 |   type: "Convolution"
192 |   bottom: "conv3_2"
193 |   top: "conv3_3"
194 |   param {
195 |     lr_mult: 1
196 |     decay_mult: 1
197 |   }
198 |   param {
199 |     lr_mult: 2
200 |     decay_mult: 0
201 |   }
202 |   convolution_param {
203 |     num_output: 256
204 |     pad: 1
205 |     kernel_size: 3
206 |   }
207 | }
208 | layer {
209 |   name: "relu3_3"
210 |   type: "ReLU"
211 |   bottom: "conv3_3"
212 |   top: "conv3_3"
213 | }
214 | layer {
215 |   name: "pool3"
216 |   type: "Pooling"
217 |   bottom: "conv3_3"
218 |   top: "pool3"
219 |   pooling_param {
220 |     pool: MAX
221 |     kernel_size: 2
222 |     stride: 2
223 |   }
224 | }
225 | layer {
226 |   name: "conv4_1"
227 |   type: "Convolution"
228 |   bottom: "pool3"
229 |   top: "conv4_1"
230 |   param {
231 |     lr_mult: 1
232 |     decay_mult: 1
233 |   }
234 |   param {
235 |     lr_mult: 2
236 |     decay_mult: 0
237 |   }
238 |   convolution_param {
239 |     num_output: 512
240 |     pad: 1
241 |     kernel_size: 3
242 |   }
243 | }
244 | layer {
245 |   name: "relu4_1"
246 |   type: "ReLU"
247 |   bottom: "conv4_1"
248 |   top: "conv4_1"
249 | }
250 | layer {
251 |   name: "conv4_2"
252 |   type: "Convolution"
253 |   bottom: "conv4_1"
254 |   top: "conv4_2"
255 |   param {
256 |     lr_mult: 1
257 |     decay_mult: 1
258 |   }
259 |   param {
260 |     lr_mult: 2
261 |     decay_mult: 0
262 |   }
263 |   convolution_param {
264 |     num_output: 512
265 |     pad: 1
266 |     kernel_size: 3
267 |   }
268 | }
269 | layer {
270 |   name: "relu4_2"
271 |   type: "ReLU"
272 |   bottom: "conv4_2"
273 |   top: "conv4_2"
274 | }
275 | layer {
276 |   name: "conv4_3"
277 |   type: "Convolution"
278 |   bottom: "conv4_2"
279 |   top: "conv4_3"
280 |   param {
281 |     lr_mult: 1
282 |     decay_mult: 1
283 |   }
284 |   param {
285 |     lr_mult: 2
286 |     decay_mult: 0
287 |   }
288 |   convolution_param {
289 |     num_output: 512
290 |     pad: 1
291 |     kernel_size: 3
292 |   }
293 | }
294 | layer {
295 |   name: "relu4_3"
296 |   type: "ReLU"
297 |   bottom: "conv4_3"
298 |   top: "conv4_3"
299 | }
300 | layer {
301 |   name: "pool4"
302 |   type: "Pooling"
303 |   bottom: "conv4_3"
304 |   top: "pool4"
305 |   pooling_param {
306 |     pool: MAX
307 |     kernel_size: 2
308 |     stride: 2
309 |   }
310 | }
311 | layer {
312 |   name: "conv5_1"
313 |   type: "Convolution"
314 |   bottom: "pool4"
315 |   top: "conv5_1"
316 |   param {
317 |     lr_mult: 1
318 |     decay_mult: 1
319 |   }
320 |   param {
321 |     lr_mult: 2
322 |     decay_mult: 0
323 |   }
324 |   convolution_param {
325 |     num_output: 512
326 |     pad: 1
327 |     kernel_size: 3
328 |   }
329 | }
330 | layer {
331 |   name: "relu5_1"
332 |   type: "ReLU"
333 |   bottom: "conv5_1"
334 |   top: "conv5_1"
335 | }
336 | layer {
337 |   name: "conv5_2"
338 |   type: "Convolution"
339 |   bottom: "conv5_1"
340 |   top: "conv5_2"
341 |   param {
342 |     lr_mult: 1
343 |     decay_mult: 1
344 |   }
345 |   param {
346 |     lr_mult: 2
347 |     decay_mult: 0
348 |   }
349 |   convolution_param {
350 |     num_output: 512
351 |     pad: 1
352 |     kernel_size: 3
353 |   }
354 | }
355 | layer {
356 |   name: "relu5_2"
357 |   type: "ReLU"
358 |   bottom: "conv5_2"
359 |   top: "conv5_2"
360 | }
361 | layer {
362 |   name: "conv5_3"
363 |   type: "Convolution"
364 |   bottom: "conv5_2"
365 |   top: "conv5_3"
366 |   param {
367 |     lr_mult: 1
368 |     decay_mult: 1
369 |   }
370 |   param {
371 |     lr_mult: 2
372 |     decay_mult: 0
373 |   }
374 |   convolution_param {
375 |     num_output: 512
376 |     pad: 1
377 |     kernel_size: 3
378 |   }
379 | }
380 | layer {
381 |   name: "relu5_3"
382 |   type: "ReLU"
383 |   bottom: "conv5_3"
384 |   top: "conv5_3"
385 | }
386 | layer {
387 |   name: "roi_pool5"
388 |   type: "ROIPooling"
389 |   bottom: "conv5_3"
390 |   bottom: "rois"
391 |   top: "pool5"
392 |   roi_pooling_param {
393 |     pooled_w: 7
394 |     pooled_h: 7
395 |     spatial_scale: 0.0625 # 1/16
396 |   }
397 | }
398 | layer {
399 |   name: "fc6"
400 |   type: "InnerProduct"
401 |   bottom: "pool5"
402 |   top: "fc6"
403 |   param {
404 |     lr_mult: 1
405 |     decay_mult: 1
406 |   }
407 |   param {
408 |     lr_mult: 2
409 |     decay_mult: 0
410 |   }
411 |   inner_product_param {
412 |     num_output: 4096
413 |   }
414 | }
415 | layer {
416 |   name: "relu6"
417 |   type: "ReLU"
418 |   bottom: "fc6"
419 |   top: "fc6"
420 | }
421 | layer {
422 |   name: "drop6"
423 |   type: "Dropout"
424 |   bottom: "fc6"
425 |   top: "fc6"
426 |   dropout_param {
427 |     dropout_ratio: 0.5
428 |   }
429 | }
430 | layer {
431 |   name: "fc7"
432 |   type: "InnerProduct"
433 |   bottom: "fc6"
434 |   top: "fc7"
435 |   param {
436 |     lr_mult: 1
437 |     decay_mult: 1
438 |   }
439 |   param {
440 |     lr_mult: 2
441 |     decay_mult: 0
442 |   }
443 |   inner_product_param {
444 |     num_output: 4096
445 |   }
446 | }
447 | layer {
448 |   name: "relu7"
449 |   type: "ReLU"
450 |   bottom: "fc7"
451 |   top: "fc7"
452 | }
453 | layer {
454 |   name: "drop7"
455 |   type: "Dropout"
456 |   bottom: "fc7"
457 |   top: "fc7"
458 |   dropout_param {
459 |     dropout_ratio: 0.5
460 |   }
461 | }
462 | layer {
463 |   name: "cls_score"
464 |   type: "InnerProduct"
465 |   bottom: "fc7"
466 |   top: "cls_score"
467 |   param {
468 |     lr_mult: 1
469 |     decay_mult: 1
470 |   }
471 |   param {
472 |     lr_mult: 2
473 |     decay_mult: 0
474 |   }
475 |   inner_product_param {
476 |     num_output: 21
477 |     weight_filler {
478 |       type: "gaussian"
479 |       std: 0.01
480 |     }
481 |     bias_filler {
482 |       type: "constant"
483 |       value: 0
484 |     }
485 |   }
486 | }
487 | layer {
488 |   name: "bbox_pred"
489 |   type: "InnerProduct"
490 |   bottom: "fc7"
491 |   top: "bbox_pred"
492 |   param {
493 |     lr_mult: 1
494 |     decay_mult: 1
495 |   }
496 |   param {
497 |     lr_mult: 2
498 |     decay_mult: 0
499 |   }
500 |   inner_product_param {
501 |     num_output: 84
502 |     weight_filler {
503 |       type: "gaussian"
504 |       std: 0.001
505 |     }
506 |     bias_filler {
507 |       type: "constant"
508 |       value: 0
509 |     }
510 |   }
511 | }
512 | layer {
513 |   name: "cls_prob"
514 |   type: "Softmax"
515 |   bottom: "cls_score"
516 |   top: "cls_prob"
517 | }
518 | 


--------------------------------------------------------------------------------