├── .gitignore
├── LICENSE
├── README.md
├── data
    ├── __init__.py
    └── demo
    │   ├── 000456.jpg
    │   ├── 000542.jpg
    │   ├── 001150.jpg
    │   ├── 001763.jpg
    │   └── 004545.jpg
├── experiments
    ├── README.md
    ├── cfgs
    │   ├── faster_rcnn_alt_opt.yml
    │   ├── faster_rcnn_end2end.yml
    │   └── kitti_rcnn.yml
    └── scripts
    │   └── faster_rcnn_end2end.sh
├── lib
    ├── Makefile
    ├── datasets
    │   ├── __init__.py
    │   ├── coco.py
    │   ├── ds_utils.py
    │   ├── factory.py
    │   ├── imagenet3d.py
    │   ├── imdb.py
    │   ├── imdb2.py
    │   ├── kitti.py
    │   ├── kitti_tracking.py
    │   ├── nissan.py
    │   ├── nthu.py
    │   ├── pascal3d.py
    │   ├── pascal_voc.py
    │   ├── pascal_voc2.py
    │   └── voc_eval.py
    ├── fast_rcnn
    │   ├── __init__.py
    │   ├── bbox_transform.py
    │   ├── config.py
    │   ├── nms_wrapper.py
    │   ├── test.py
    │   └── train.py
    ├── gt_data_layer
    │   ├── __init__.py
    │   ├── layer.py
    │   ├── minibatch.py
    │   └── roidb.py
    ├── make.sh
    ├── networks
    │   ├── .VGGnet.py.swo
    │   ├── VGGnet_test.py
    │   ├── VGGnet_train.py
    │   ├── __init__.py
    │   ├── factory.py
    │   └── network.py
    ├── nms
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── cpu_nms.pyx
    │   ├── gpu_nms.hpp
    │   ├── gpu_nms.pyx
    │   ├── nms_kernel.cu
    │   └── py_cpu_nms.py
    ├── roi_data_layer
    │   ├── __init__.py
    │   ├── layer.py
    │   ├── minibatch.py
    │   ├── minibatch2.py
    │   ├── roidb.py
    │   └── roidb2.py
    ├── roi_pooling_layer
    │   ├── __init__.py
    │   ├── roi_pooling_op.cc
    │   ├── roi_pooling_op.py
    │   ├── roi_pooling_op_gpu.cu.cc
    │   ├── roi_pooling_op_gpu.h
    │   ├── roi_pooling_op_grad.py
    │   ├── roi_pooling_op_test.py
    │   └── work_sharder.h
    ├── rpn_msr
    │   ├── __init__.py
    │   ├── anchor_target_layer.py
    │   ├── anchor_target_layer_tf.py
    │   ├── generate.py
    │   ├── generate_anchors.py
    │   ├── proposal_layer.py
    │   ├── proposal_layer_tf.py
    │   └── proposal_target_layer_tf.py
    ├── setup.py
    └── utils
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── bbox.pyx
    │   ├── blob.py
    │   ├── boxes_grid.py
    │   ├── nms.py
    │   ├── nms.pyx
    │   └── timer.py
└── tools
    ├── _init_paths.py
    ├── demo.py
    ├── test_net.py
    └── train_net.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.so
3 | *.o
4 | faster_rcnn_models
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Fu-Hsiang Chan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Faster-RCNN_TF
  2 | 
  3 | This is an experimental Tensorflow implementation of Faster RCNN - a convnet for object detection with a region proposal network.
  4 | For details about R-CNN please refer to the paper [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](http://arxiv.org/pdf/1506.01497v3.pdf) by Shaoqing Ren, Kaiming He, Ross Girshick, Jian Sun.
  5 | 
  6 | 
  7 | ### Requirements: software
  8 | 
  9 | 1. Requirements for Tensorflow (see: [Tensorflow](https://www.tensorflow.org/))
 10 | 
 11 | 2. Python packages you might not have: `cython`, `python-opencv`, `easydict`
 12 | 
 13 | ### Requirements: hardware
 14 | 
 15 | 1. For training the end-to-end version of Faster R-CNN with VGG16, 3G of GPU memory is sufficient (using CUDNN)
 16 | 
 17 | ### Installation (sufficient for the demo)
 18 | 
 19 | 1. Clone the Faster R-CNN repository
 20 |   ```Shell
 21 |   # Make sure to clone with --recursive
 22 |   git clone --recursive https://github.com/smallcorgi/Faster-RCNN_TF.git
 23 |   ```
 24 | 
 25 | 2. Build the Cython modules
 26 |     ```Shell
 27 |     cd $FRCN_ROOT/lib
 28 |     make
 29 |     ```
 30 | 
 31 | ### Demo
 32 | 
 33 | *After successfully completing [basic installation](#installation-sufficient-for-the-demo)*, you'll be ready to run the demo.
 34 | 
 35 | Download model training on PASCAL VOC 2007  [[Google Drive]](https://drive.google.com/file/d/0ByuDEGFYmWsbZ0EzeUlHcGFIVWM/view?usp=sharing&resourcekey=0-wl5NWArb595zL-BX6ctTgQ) [[Dropbox]](https://www.dropbox.com/s/cfz3blmtmwj6bdh/VGGnet_fast_rcnn_iter_70000.ckpt?dl=0)
 36 | 
 37 | To run the demo
 38 | ```Shell
 39 | cd $FRCN_ROOT
 40 | python ./tools/demo.py --model model_path
 41 | ```
 42 | The demo performs detection using a VGG16 network trained for detection on PASCAL VOC 2007.
 43 | 
 44 | ### Training Model
 45 | 1. Download the training, validation, test data and VOCdevkit
 46 | 
 47 | 	```Shell
 48 | 	wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
 49 | 	wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
 50 | 	wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar
 51 | 	```
 52 | 
 53 | 2. Extract all of these tars into one directory named `VOCdevkit`
 54 | 
 55 | 	```Shell
 56 | 	tar xvf VOCtrainval_06-Nov-2007.tar
 57 | 	tar xvf VOCtest_06-Nov-2007.tar
 58 | 	tar xvf VOCdevkit_08-Jun-2007.tar
 59 | 	```
 60 | 
 61 | 3. It should have this basic structure
 62 | 
 63 | 	```Shell
 64 |   	$VOCdevkit/                           # development kit
 65 |   	$VOCdevkit/VOCcode/                   # VOC utility code
 66 |   	$VOCdevkit/VOC2007                    # image sets, annotations, etc.
 67 |   	# ... and several other directories ...
 68 |   	```
 69 | 
 70 | 4. Create symlinks for the PASCAL VOC dataset
 71 | 
 72 | 	```Shell
 73 |     cd $FRCN_ROOT/data
 74 |     ln -s $VOCdevkit VOCdevkit2007
 75 |     ```
 76 |     
 77 | 5. Download pre-trained ImageNet models
 78 | 
 79 |    Download the pre-trained ImageNet models [[Google Drive]](https://drive.google.com/file/d/0ByuDEGFYmWsbNVF5eExySUtMZmM/view?usp=sharing&resourcekey=0-bxxPp_3P3tc-6Ca7sPrLsw) [[Dropbox]](https://www.dropbox.com/s/po2kzdhdgl4ix55/VGG_imagenet.npy?dl=0)
 80 |    
 81 |    	```Shell
 82 |     mv VGG_imagenet.npy $FRCN_ROOT/data/pretrain_model/VGG_imagenet.npy
 83 |     ```
 84 | 
 85 | 6. Run script to train and test model
 86 | 	```Shell
 87 | 	cd $FRCN_ROOT
 88 | 	./experiments/scripts/faster_rcnn_end2end.sh $DEVICE $DEVICE_ID VGG16 pascal_voc
 89 | 	```
 90 |   DEVICE is either cpu/gpu
 91 | 
 92 | ### The result of testing on PASCAL VOC 2007 
 93 | 
 94 | | Classes       | AP     |
 95 | |-------------|--------|
 96 | | aeroplane   | 0.698 |
 97 | | bicycle     | 0.788 |
 98 | | bird        | 0.657 |
 99 | | boat        | 0.565 |
100 | | bottle      | 0.478 |
101 | | bus         | 0.762 |
102 | | car         | 0.797 |
103 | | cat         | 0.793 |
104 | | chair       | 0.479 |
105 | | cow         | 0.724 |
106 | | diningtable | 0.648 |
107 | | dog         | 0.803 |
108 | | horse       | 0.797 |
109 | | motorbike   | 0.732 |
110 | | person      | 0.770 |
111 | | pottedplant | 0.384 |
112 | | sheep       | 0.664 |
113 | | sofa        | 0.650 |
114 | | train       | 0.766 |
115 | | tvmonitor   | 0.666 |
116 | | mAP        | 0.681 |
117 | 
118 | 
119 | ###References
120 | [Faster R-CNN caffe version](https://github.com/rbgirshick/py-faster-rcnn)
121 | 
122 | [A tensorflow implementation of SubCNN (working progress)](https://github.com/yuxng/SubCNN_TF)
123 | 
124 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smallcorgi/Faster-RCNN_TF/d9adb24c8ffdbae3b56eb55fc629d719fee3d741/data/__init__.py


--------------------------------------------------------------------------------
/data/demo/000456.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smallcorgi/Faster-RCNN_TF/d9adb24c8ffdbae3b56eb55fc629d719fee3d741/data/demo/000456.jpg


--------------------------------------------------------------------------------
/data/demo/000542.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smallcorgi/Faster-RCNN_TF/d9adb24c8ffdbae3b56eb55fc629d719fee3d741/data/demo/000542.jpg


--------------------------------------------------------------------------------
/data/demo/001150.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smallcorgi/Faster-RCNN_TF/d9adb24c8ffdbae3b56eb55fc629d719fee3d741/data/demo/001150.jpg


--------------------------------------------------------------------------------
/data/demo/001763.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smallcorgi/Faster-RCNN_TF/d9adb24c8ffdbae3b56eb55fc629d719fee3d741/data/demo/001763.jpg


--------------------------------------------------------------------------------
/data/demo/004545.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smallcorgi/Faster-RCNN_TF/d9adb24c8ffdbae3b56eb55fc629d719fee3d741/data/demo/004545.jpg


--------------------------------------------------------------------------------
/experiments/README.md:
--------------------------------------------------------------------------------
1 | Scripts are under `experiments/scripts`.
2 | 
3 | Each script saves a log file under `experiments/logs`.
4 | 
5 | Configuration override files used in the experiments are stored in `experiments/cfgs`.
6 | 


--------------------------------------------------------------------------------
/experiments/cfgs/faster_rcnn_alt_opt.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: faster_rcnn_alt_opt
2 | TRAIN:
3 |   BG_THRESH_LO: 0.0
4 | TEST:
5 |   HAS_RPN: True
6 | 


--------------------------------------------------------------------------------
/experiments/cfgs/faster_rcnn_end2end.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: faster_rcnn_end2end
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 | TEST:
11 |   HAS_RPN: True
12 | 


--------------------------------------------------------------------------------
/experiments/cfgs/kitti_rcnn.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: kitti
 2 | IS_RPN: False
 3 | IS_MULTISCALE: True
 4 | IS_EXTRAPOLATING: True
 5 | REGION_PROPOSAL: 'RPN'
 6 | TRAIN:
 7 |   LEARNING_RATE: 0.001
 8 |   MOMENTUM: 0.9
 9 |   GAMMA: 0.1
10 |   STEPSIZE: 30000
11 |   SCALES_BASE: !!python/tuple [1.0, 2.0, 3.0, 4.0]
12 |   NUM_PER_OCTAVE: 4
13 |   IMS_PER_BATCH: 2
14 |   FG_FRACTION: 0.25
15 |   FG_THRESH: !!python/tuple [0.7, 0.5, 0.5]
16 |   BG_THRESH_HI: !!python/tuple [0.7, 0.5, 0.5]
17 |   BG_THRESH_LO: !!python/tuple [0.1, 0.1, 0.1]
18 |   BBOX_THRESH: !!python/tuple [0.7, 0.5, 0.5]
19 |   ROI_THRESHOLD: 0.01
20 |   SNAPSHOT_ITERS: 10000
21 |   SNAPSHOT_INFIX: kitti
22 |   SNAPSHOT_PREFIX: caffenet_fast_rcnn
23 | TEST:
24 |   SCALES_BASE: !!python/tuple [1.0, 2.0, 3.0, 4.0]
25 |   NUM_PER_OCTAVE: 4
26 |   NMS: 0.5
27 | 


--------------------------------------------------------------------------------
/experiments/scripts/faster_rcnn_end2end.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Usage:
 3 | # ./experiments/scripts/faster_rcnn_end2end.sh GPU NET DATASET [options args to {train,test}_net.py]
 4 | # DATASET is either pascal_voc or coco.
 5 | #
 6 | # Example:
 7 | # ./experiments/scripts/faster_rcnn_end2end.sh 0 VGG_CNN_M_1024 pascal_voc \
 8 | #   --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400, 500, 600, 700]"
 9 | 
10 | set -x
11 | set -e
12 | 
13 | export PYTHONUNBUFFERED="True"
14 | 
15 | DEV=$1
16 | DEV_ID=$2
17 | NET=$3
18 | DATASET=$4
19 | 
20 | array=( $@ )
21 | len=${#array[@]}
22 | EXTRA_ARGS=${array[@]:4:$len}
23 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
24 | 
25 | case $DATASET in
26 |   pascal_voc)
27 |     TRAIN_IMDB="voc_2007_trainval"
28 |     TEST_IMDB="voc_2007_test"
29 |     PT_DIR="pascal_voc"
30 |     ITERS=70000
31 |     ;;
32 |   coco)
33 |     # This is a very long and slow training schedule
34 |     # You can probably use fewer iterations and reduce the
35 |     # time to the LR drop (set in the solver to 350,000 iterations).
36 |     TRAIN_IMDB="coco_2014_train"
37 |     TEST_IMDB="coco_2014_minival"
38 |     PT_DIR="coco"
39 |     ITERS=490000
40 |     ;;
41 |   *)
42 |     echo "No dataset given"
43 |     exit
44 |     ;;
45 | esac
46 | 
47 | LOG="experiments/logs/faster_rcnn_end2end_${NET}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
48 | exec &> >(tee -a "$LOG")
49 | echo Logging output to "$LOG"
50 | 
51 | time python ./tools/train_net.py --device ${DEV} --device_id ${DEV_ID} \
52 |   --weights data/pretrain_model/VGG_imagenet.npy \
53 |   --imdb ${TRAIN_IMDB} \
54 |   --iters ${ITERS} \
55 |   --cfg experiments/cfgs/faster_rcnn_end2end.yml \
56 |   --network VGGnet_train \
57 |   ${EXTRA_ARGS}
58 | 
59 | set +x
60 | NET_FINAL=`grep -B 1 "done solving" ${LOG} | grep "Wrote snapshot" | awk '{print $4}'`
61 | set -x
62 | 
63 | time python ./tools/test_net.py --device ${DEV} --device_id ${DEV_ID} \
64 |   --weights ${NET_FINAL} \
65 |   --imdb ${TEST_IMDB} \
66 |   --cfg experiments/cfgs/faster_rcnn_end2end.yml \
67 |   --network VGGnet_test \
68 |   ${EXTRA_ARGS}
69 | 


--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 	bash make.sh
5 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from .imdb import imdb
 9 | from .pascal_voc import pascal_voc
10 | from .pascal3d import pascal3d
11 | from .imagenet3d import imagenet3d
12 | from .kitti import kitti
13 | from .kitti_tracking import kitti_tracking
14 | from .nissan import nissan
15 | from .nthu import nthu
16 | from . import factory
17 | 
18 | import os.path as osp
19 | ROOT_DIR = osp.join(osp.dirname(__file__), '..', '..')
20 | 
21 | # We assume your matlab binary is in your path and called `matlab'.
22 | # If either is not true, just add it to your path and alias it as matlab, or
23 | # you could change this file.
24 | MATLAB = 'matlab_r2013b'
25 | 
26 | # http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python
27 | def _which(program):
28 |     import os
29 |     def is_exe(fpath):
30 |         return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
31 | 
32 |     fpath, fname = os.path.split(program)
33 |     if fpath:
34 |         if is_exe(program):
35 |             return program
36 |     else:
37 |         for path in os.environ["PATH"].split(os.pathsep):
38 |             path = path.strip('"')
39 |             exe_file = os.path.join(path, program)
40 |             if is_exe(exe_file):
41 |                 return exe_file
42 | 
43 |     return None
44 | """
45 | if _which(MATLAB) is None:
46 |     msg = ("MATLAB command '{}' not found. "
47 |            "Please add '{}' to your PATH.").format(MATLAB, MATLAB)
48 |     raise EnvironmentError(msg)
49 | """
50 | 


--------------------------------------------------------------------------------
/lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick
 5 | # --------------------------------------------------------
 6 | 
 7 | import numpy as np
 8 | 
 9 | def unique_boxes(boxes, scale=1.0):
10 |     """Return indices of unique boxes."""
11 |     v = np.array([1, 1e3, 1e6, 1e9])
12 |     hashes = np.round(boxes * scale).dot(v)
13 |     _, index = np.unique(hashes, return_index=True)
14 |     return np.sort(index)
15 | 
16 | def xywh_to_xyxy(boxes):
17 |     """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
18 |     return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
19 | 
20 | def xyxy_to_xywh(boxes):
21 |     """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
22 |     return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
23 | 
24 | def validate_boxes(boxes, width=0, height=0):
25 |     """Check that a set of boxes are valid."""
26 |     x1 = boxes[:, 0]
27 |     y1 = boxes[:, 1]
28 |     x2 = boxes[:, 2]
29 |     y2 = boxes[:, 3]
30 |     assert (x1 >= 0).all()
31 |     assert (y1 >= 0).all()
32 |     assert (x2 >= x1).all()
33 |     assert (y2 >= y1).all()
34 |     assert (x2 < width).all()
35 |     assert (y2 < height).all()
36 | 
37 | def filter_small_boxes(boxes, min_size):
38 |     w = boxes[:, 2] - boxes[:, 0]
39 |     h = boxes[:, 3] - boxes[:, 1]
40 |     keep = np.where((w >= min_size) & (h > min_size))[0]
41 |     return keep
42 | 


--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Factory method for easily getting imdbs by name."""
 9 | 
10 | __sets = {}
11 | 
12 | import datasets.pascal_voc
13 | import datasets.imagenet3d
14 | import datasets.kitti
15 | import datasets.kitti_tracking
16 | import numpy as np
17 | 
18 | def _selective_search_IJCV_top_k(split, year, top_k):
19 |     """Return an imdb that uses the top k proposals from the selective search
20 |     IJCV code.
21 |     """
22 |     imdb = datasets.pascal_voc(split, year)
23 |     imdb.roidb_handler = imdb.selective_search_IJCV_roidb
24 |     imdb.config['top_k'] = top_k
25 |     return imdb
26 | 
27 | # Set up voc_<year>_<split> using selective search "fast" mode
28 | for year in ['2007', '2012']:
29 |     for split in ['train', 'val', 'trainval', 'test']:
30 |         name = 'voc_{}_{}'.format(year, split)
31 |         __sets[name] = (lambda split=split, year=year:
32 |                 datasets.pascal_voc(split, year))
33 | """
34 | # Set up voc_<year>_<split>_top_<k> using selective search "quality" mode
35 | # but only returning the first k boxes
36 | for top_k in np.arange(1000, 11000, 1000):
37 |     for year in ['2007', '2012']:
38 |         for split in ['train', 'val', 'trainval', 'test']:
39 |             name = 'voc_{}_{}_top_{:d}'.format(year, split, top_k)
40 |             __sets[name] = (lambda split=split, year=year, top_k=top_k:
41 |                     _selective_search_IJCV_top_k(split, year, top_k))
42 | """
43 | 
44 | # Set up voc_<year>_<split> using selective search "fast" mode
45 | for year in ['2007']:
46 |     for split in ['train', 'val', 'trainval', 'test']:
47 |         name = 'voc_{}_{}'.format(year, split)
48 |         print name
49 |         __sets[name] = (lambda split=split, year=year:
50 |                 datasets.pascal_voc(split, year))
51 | 
52 | # KITTI dataset
53 | for split in ['train', 'val', 'trainval', 'test']:
54 |     name = 'kitti_{}'.format(split)
55 |     print name
56 |     __sets[name] = (lambda split=split:
57 |             datasets.kitti(split))
58 | 
59 | # Set up coco_2014_<split>
60 | for year in ['2014']:
61 |     for split in ['train', 'val', 'minival', 'valminusminival']:
62 |         name = 'coco_{}_{}'.format(year, split)
63 |         __sets[name] = (lambda split=split, year=year: coco(split, year))
64 | 
65 | # Set up coco_2015_<split>
66 | for year in ['2015']:
67 |     for split in ['test', 'test-dev']:
68 |         name = 'coco_{}_{}'.format(year, split)
69 |         __sets[name] = (lambda split=split, year=year: coco(split, year))
70 | 
71 | # NTHU dataset
72 | for split in ['71', '370']:
73 |     name = 'nthu_{}'.format(split)
74 |     print name
75 |     __sets[name] = (lambda split=split:
76 |             datasets.nthu(split))
77 | 
78 | 
79 | def get_imdb(name):
80 |     """Get an imdb (image database) by name."""
81 |     if not __sets.has_key(name):
82 |         raise KeyError('Unknown dataset: {}'.format(name))
83 |     return __sets[name]()
84 | 
85 | def list_imdbs():
86 |     """List all registered imdbs."""
87 |     return __sets.keys()
88 | 


--------------------------------------------------------------------------------
/lib/datasets/imdb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | import os.path as osp
 10 | import PIL
 11 | from utils.cython_bbox import bbox_overlaps
 12 | import numpy as np
 13 | import scipy.sparse
 14 | from fast_rcnn.config import cfg
 15 | 
 16 | 
 17 | class imdb(object):
 18 |     """Image database."""
 19 | 
 20 |     def __init__(self, name):
 21 |         self._name = name
 22 |         self._num_classes = 0
 23 |         self._classes = []
 24 |         self._image_index = []
 25 |         self._obj_proposer = 'selective_search'
 26 |         self._roidb = None
 27 |         print self.default_roidb
 28 |         self._roidb_handler = self.default_roidb
 29 |         # Use this dict for storing dataset specific config options
 30 |         self.config = {}
 31 | 
 32 |     @property
 33 |     def name(self):
 34 |         return self._name
 35 | 
 36 |     @property
 37 |     def num_classes(self):
 38 |         return len(self._classes)
 39 | 
 40 |     @property
 41 |     def classes(self):
 42 |         return self._classes
 43 | 
 44 |     @property
 45 |     def image_index(self):
 46 |         return self._image_index
 47 | 
 48 |     @property
 49 |     def roidb_handler(self):
 50 |         return self._roidb_handler
 51 | 
 52 |     @roidb_handler.setter
 53 |     def roidb_handler(self, val):
 54 |         self._roidb_handler = val
 55 | 
 56 |     def set_proposal_method(self, method):
 57 |         method = eval('self.' + method + '_roidb')
 58 |         self.roidb_handler = method
 59 | 
 60 |     @property
 61 |     def roidb(self):
 62 |         # A roidb is a list of dictionaries, each with the following keys:
 63 |         #   boxes
 64 |         #   gt_overlaps
 65 |         #   gt_classes
 66 |         #   flipped
 67 |         if self._roidb is not None:
 68 |             return self._roidb
 69 |         self._roidb = self.roidb_handler()
 70 |         return self._roidb
 71 | 
 72 |     @property
 73 |     def cache_path(self):
 74 |         cache_path = osp.abspath(osp.join(cfg.DATA_DIR, 'cache'))
 75 |         if not os.path.exists(cache_path):
 76 |             os.makedirs(cache_path)
 77 |         return cache_path
 78 | 
 79 |     @property
 80 |     def num_images(self):
 81 |       return len(self.image_index)
 82 | 
 83 |     def image_path_at(self, i):
 84 |         raise NotImplementedError
 85 | 
 86 |     def default_roidb(self):
 87 |         raise NotImplementedError
 88 | 
 89 |     def evaluate_detections(self, all_boxes, output_dir=None):
 90 |         """
 91 |         all_boxes is a list of length number-of-classes.
 92 |         Each list element is a list of length number-of-images.
 93 |         Each of those list elements is either an empty list []
 94 |         or a numpy array of detection.
 95 | 
 96 |         all_boxes[class][image] = [] or np.array of shape #dets x 5
 97 |         """
 98 |         raise NotImplementedError
 99 | 
100 |     def _get_widths(self):
101 |       return [PIL.Image.open(self.image_path_at(i)).size[0]
102 |               for i in xrange(self.num_images)]
103 | 
104 |     def append_flipped_images(self):
105 |         num_images = self.num_images
106 |         widths = self._get_widths()
107 |         for i in xrange(num_images):
108 |             boxes = self.roidb[i]['boxes'].copy()
109 |             oldx1 = boxes[:, 0].copy()
110 |             oldx2 = boxes[:, 2].copy()
111 |             boxes[:, 0] = widths[i] - oldx2 - 1
112 |             boxes[:, 2] = widths[i] - oldx1 - 1
113 |             assert (boxes[:, 2] >= boxes[:, 0]).all()
114 |             entry = {'boxes' : boxes,
115 |                      'gt_overlaps' : self.roidb[i]['gt_overlaps'],
116 |                      'gt_classes' : self.roidb[i]['gt_classes'],
117 |                      'flipped' : True}
118 |             self.roidb.append(entry)
119 |         self._image_index = self._image_index * 2
120 | 
121 |     def evaluate_recall(self, candidate_boxes=None, thresholds=None,
122 |                         area='all', limit=None):
123 |         """Evaluate detection proposal recall metrics.
124 | 
125 |         Returns:
126 |             results: dictionary of results with keys
127 |                 'ar': average recall
128 |                 'recalls': vector recalls at each IoU overlap threshold
129 |                 'thresholds': vector of IoU overlap thresholds
130 |                 'gt_overlaps': vector of all ground-truth overlaps
131 |         """
132 |         # Record max overlap value for each gt box
133 |         # Return vector of overlap values
134 |         areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3,
135 |                   '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7}
136 |         area_ranges = [ [0**2, 1e5**2],    # all
137 |                         [0**2, 32**2],     # small
138 |                         [32**2, 96**2],    # medium
139 |                         [96**2, 1e5**2],   # large
140 |                         [96**2, 128**2],   # 96-128
141 |                         [128**2, 256**2],  # 128-256
142 |                         [256**2, 512**2],  # 256-512
143 |                         [512**2, 1e5**2],  # 512-inf
144 |                       ]
145 |         assert areas.has_key(area), 'unknown area range: {}'.format(area)
146 |         area_range = area_ranges[areas[area]]
147 |         gt_overlaps = np.zeros(0)
148 |         num_pos = 0
149 |         for i in xrange(self.num_images):
150 |             # Checking for max_overlaps == 1 avoids including crowd annotations
151 |             # (...pretty hacking :/)
152 |             max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1)
153 |             gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) &
154 |                                (max_gt_overlaps == 1))[0]
155 |             gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
156 |             gt_areas = self.roidb[i]['seg_areas'][gt_inds]
157 |             valid_gt_inds = np.where((gt_areas >= area_range[0]) &
158 |                                      (gt_areas <= area_range[1]))[0]
159 |             gt_boxes = gt_boxes[valid_gt_inds, :]
160 |             num_pos += len(valid_gt_inds)
161 | 
162 |             if candidate_boxes is None:
163 |                 # If candidate_boxes is not supplied, the default is to use the
164 |                 # non-ground-truth boxes from this roidb
165 |                 non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
166 |                 boxes = self.roidb[i]['boxes'][non_gt_inds, :]
167 |             else:
168 |                 boxes = candidate_boxes[i]
169 |             if boxes.shape[0] == 0:
170 |                 continue
171 |             if limit is not None and boxes.shape[0] > limit:
172 |                 boxes = boxes[:limit, :]
173 | 
174 |             overlaps = bbox_overlaps(boxes.astype(np.float),
175 |                                      gt_boxes.astype(np.float))
176 | 
177 |             _gt_overlaps = np.zeros((gt_boxes.shape[0]))
178 |             for j in xrange(gt_boxes.shape[0]):
179 |                 # find which proposal box maximally covers each gt box
180 |                 argmax_overlaps = overlaps.argmax(axis=0)
181 |                 # and get the iou amount of coverage for each gt box
182 |                 max_overlaps = overlaps.max(axis=0)
183 |                 # find which gt box is 'best' covered (i.e. 'best' = most iou)
184 |                 gt_ind = max_overlaps.argmax()
185 |                 gt_ovr = max_overlaps.max()
186 |                 assert(gt_ovr >= 0)
187 |                 # find the proposal box that covers the best covered gt box
188 |                 box_ind = argmax_overlaps[gt_ind]
189 |                 # record the iou coverage of this gt box
190 |                 _gt_overlaps[j] = overlaps[box_ind, gt_ind]
191 |                 assert(_gt_overlaps[j] == gt_ovr)
192 |                 # mark the proposal box and the gt box as used
193 |                 overlaps[box_ind, :] = -1
194 |                 overlaps[:, gt_ind] = -1
195 |             # append recorded iou coverage level
196 |             gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
197 | 
198 |         gt_overlaps = np.sort(gt_overlaps)
199 |         if thresholds is None:
200 |             step = 0.05
201 |             thresholds = np.arange(0.5, 0.95 + 1e-5, step)
202 |         recalls = np.zeros_like(thresholds)
203 |         # compute recall for each iou threshold
204 |         for i, t in enumerate(thresholds):
205 |             recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
206 |         # ar = 2 * np.trapz(recalls, thresholds)
207 |         ar = recalls.mean()
208 |         return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
209 |                 'gt_overlaps': gt_overlaps}
210 | 
211 |     def create_roidb_from_box_list(self, box_list, gt_roidb):
212 |         assert len(box_list) == self.num_images, \
213 |                 'Number of boxes must match number of ground-truth images'
214 |         roidb = []
215 |         for i in xrange(self.num_images):
216 |             boxes = box_list[i]
217 |             num_boxes = boxes.shape[0]
218 |             overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)
219 | 
220 |             if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
221 |                 gt_boxes = gt_roidb[i]['boxes']
222 |                 gt_classes = gt_roidb[i]['gt_classes']
223 |                 gt_overlaps = bbox_overlaps(boxes.astype(np.float),
224 |                                             gt_boxes.astype(np.float))
225 |                 argmaxes = gt_overlaps.argmax(axis=1)
226 |                 maxes = gt_overlaps.max(axis=1)
227 |                 I = np.where(maxes > 0)[0]
228 |                 overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
229 | 
230 |             overlaps = scipy.sparse.csr_matrix(overlaps)
231 |             roidb.append({
232 |                 'boxes' : boxes,
233 |                 'gt_classes' : np.zeros((num_boxes,), dtype=np.int32),
234 |                 'gt_overlaps' : overlaps,
235 |                 'flipped' : False,
236 |                 'seg_areas' : np.zeros((num_boxes,), dtype=np.float32),
237 |             })
238 |         return roidb
239 | 
240 |     @staticmethod
241 |     def merge_roidbs(a, b):
242 |         assert len(a) == len(b)
243 |         for i in xrange(len(a)):
244 |             a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes']))
245 |             a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'],
246 |                                             b[i]['gt_classes']))
247 |             a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'],
248 |                                                        b[i]['gt_overlaps']])
249 |             a[i]['seg_areas'] = np.hstack((a[i]['seg_areas'],
250 |                                            b[i]['seg_areas']))
251 |         return a
252 | 
253 |     def competition_mode(self, on):
254 |         """Turn competition mode on or off."""
255 |         pass
256 | 


--------------------------------------------------------------------------------
/lib/datasets/nissan.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'yuxiang'
  2 | 
  3 | import datasets
  4 | import datasets.nissan
  5 | import os
  6 | import PIL
  7 | import datasets.imdb
  8 | import numpy as np
  9 | import scipy.sparse
 10 | from utils.cython_bbox import bbox_overlaps
 11 | from utils.boxes_grid import get_boxes_grid
 12 | import subprocess
 13 | import cPickle
 14 | from fast_rcnn.config import cfg
 15 | import math
 16 | from rpn_msr.generate_anchors import generate_anchors
 17 | 
 18 | class nissan(datasets.imdb):
 19 |     def __init__(self, image_set, nissan_path=None):
 20 |         datasets.imdb.__init__(self, 'nissan_' + image_set)
 21 |         self._image_set = image_set
 22 |         self._nissan_path = self._get_default_path() if nissan_path is None \
 23 |                             else nissan_path
 24 |         self._data_path = os.path.join(self._nissan_path, 'Images')
 25 |         self._classes = ('__background__', 'Car', 'Pedestrian', 'Cyclist')
 26 |         self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes)))
 27 |         self._image_ext = '.png'
 28 |         self._image_index = self._load_image_set_index()
 29 |         # Default to roidb handler
 30 |         if cfg.IS_RPN:
 31 |             self._roidb_handler = self.gt_roidb
 32 |         else:
 33 |             self._roidb_handler = self.region_proposal_roidb
 34 | 
 35 |         # num of subclasses
 36 |         self._num_subclasses = 227 + 36 + 36 + 1
 37 | 
 38 |         # load the mapping for subcalss to class
 39 |         filename = os.path.join(self._nissan_path, 'mapping.txt')
 40 |         assert os.path.exists(filename), 'Path does not exist: {}'.format(filename)
 41 |         
 42 |         mapping = np.zeros(self._num_subclasses, dtype=np.int)
 43 |         with open(filename) as f:
 44 |             for line in f:
 45 |                 words = line.split()
 46 |                 subcls = int(words[0])
 47 |                 mapping[subcls] = self._class_to_ind[words[1]]
 48 |         self._subclass_mapping = mapping
 49 | 
 50 |         self.config = {'top_k': 100000}
 51 | 
 52 |         # statistics for computing recall
 53 |         self._num_boxes_all = np.zeros(self.num_classes, dtype=np.int)
 54 |         self._num_boxes_covered = np.zeros(self.num_classes, dtype=np.int)
 55 |         self._num_boxes_proposal = 0
 56 | 
 57 |         assert os.path.exists(self._nissan_path), \
 58 |                 'Nissan path does not exist: {}'.format(self._nissan_path)
 59 |         assert os.path.exists(self._data_path), \
 60 |                 'Path does not exist: {}'.format(self._data_path)
 61 | 
 62 |     def image_path_at(self, i):
 63 |         """
 64 |         Return the absolute path to image i in the image sequence.
 65 |         """
 66 |         return self.image_path_from_index(self.image_index[i])
 67 | 
 68 |     def image_path_from_index(self, index):
 69 |         """
 70 |         Construct an image path from the image's "index" identifier.
 71 |         """
 72 |         # set the prefix
 73 |         prefix = self._image_set
 74 | 
 75 |         image_path = os.path.join(self._data_path, prefix, index + self._image_ext)
 76 |         assert os.path.exists(image_path), \
 77 |                 'Path does not exist: {}'.format(image_path)
 78 |         return image_path
 79 | 
 80 |     def _load_image_set_index(self):
 81 |         """
 82 |         Load the indexes listed in this dataset's image set file.
 83 |         """
 84 |         image_set_file = os.path.join(self._data_path, self._image_set + '.txt')
 85 |         assert os.path.exists(image_set_file), \
 86 |                 'Path does not exist: {}'.format(image_set_file)
 87 | 
 88 |         with open(image_set_file) as f:
 89 |             image_index = [x.rstrip('\n') for x in f.readlines()]
 90 |         return image_index
 91 | 
 92 |     def _get_default_path(self):
 93 |         """
 94 |         Return the default path where NISSAN is expected to be installed.
 95 |         """
 96 |         return os.path.join(datasets.ROOT_DIR, 'data', 'NISSAN')
 97 | 
 98 | 
 99 |     def gt_roidb(self):
100 |         """
101 |         Return the database of ground-truth regions of interest.
102 |         No implementation.
103 |         """
104 | 
105 |         gt_roidb = []
106 |         return gt_roidb
107 | 
108 |     def region_proposal_roidb(self):
109 |         """
110 |         Return the database of regions of interest.
111 |         Ground-truth ROIs are also included.
112 | 
113 |         This function loads/saves from/to a cache file to speed up future calls.
114 |         """
115 |         cache_file = os.path.join(self.cache_path,
116 |                                   self.name + '_' + cfg.REGION_PROPOSAL + '_region_proposal_roidb.pkl')
117 | 
118 |         if os.path.exists(cache_file):
119 |             with open(cache_file, 'rb') as fid:
120 |                 roidb = cPickle.load(fid)
121 |             print '{} roidb loaded from {}'.format(self.name, cache_file)
122 |             return roidb
123 | 
124 |         print 'Loading region proposal network boxes...'
125 |         model = cfg.REGION_PROPOSAL
126 |         roidb = self._load_rpn_roidb(None, model)
127 |         print 'Region proposal network boxes loaded'
128 |         print '{} region proposals per image'.format(self._num_boxes_proposal / len(self.image_index))
129 | 
130 |         with open(cache_file, 'wb') as fid:
131 |             cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
132 |         print 'wrote roidb to {}'.format(cache_file)
133 | 
134 |         return roidb
135 | 
136 |     def _load_rpn_roidb(self, gt_roidb, model):
137 |         # set the prefix
138 |         prefix = model
139 | 
140 |         box_list = []
141 |         for index in self.image_index:
142 |             filename = os.path.join(self._nissan_path, 'region_proposals',  prefix, self._image_set, index + '.txt')
143 |             assert os.path.exists(filename), \
144 |                 'RPN data not found at: {}'.format(filename)
145 |             raw_data = np.loadtxt(filename, dtype=float)
146 |             if len(raw_data.shape) == 1:
147 |                 if raw_data.size == 0:
148 |                     raw_data = raw_data.reshape((0, 5))
149 |                 else:
150 |                     raw_data = raw_data.reshape((1, 5))
151 | 
152 |             x1 = raw_data[:, 0]
153 |             y1 = raw_data[:, 1]
154 |             x2 = raw_data[:, 2]
155 |             y2 = raw_data[:, 3]
156 |             score = raw_data[:, 4]
157 |             inds = np.where((x2 > x1) & (y2 > y1))[0]
158 |             raw_data = raw_data[inds,:4]
159 |             self._num_boxes_proposal += raw_data.shape[0]
160 |             box_list.append(raw_data)
161 | 
162 |         return self.create_roidb_from_box_list(box_list, gt_roidb)
163 | 
164 |     def evaluate_detections(self, all_boxes, output_dir):
165 |         # load the mapping for subcalss the alpha (viewpoint)
166 |         filename = os.path.join(self._nissan_path, 'mapping.txt')
167 |         assert os.path.exists(filename), \
168 |                 'Path does not exist: {}'.format(filename)
169 | 
170 |         mapping = np.zeros(self._num_subclasses, dtype=np.float)
171 |         with open(filename) as f:
172 |             for line in f:
173 |                 words = line.split()
174 |                 subcls = int(words[0])
175 |                 mapping[subcls] = float(words[3])
176 | 
177 |         # for each image
178 |         for im_ind, index in enumerate(self.image_index):
179 |             filename = os.path.join(output_dir, index + '.txt')
180 |             print 'Writing NISSAN results to file ' + filename
181 |             with open(filename, 'wt') as f:
182 |                 # for each class
183 |                 for cls_ind, cls in enumerate(self.classes):
184 |                     if cls == '__background__':
185 |                         continue
186 |                     dets = all_boxes[cls_ind][im_ind]
187 |                     if dets == []:
188 |                         continue
189 |                     for k in xrange(dets.shape[0]):
190 |                         subcls = int(dets[k, 5])
191 |                         cls_name = self.classes[self.subclass_mapping[subcls]]
192 |                         assert (cls_name == cls), 'subclass not in class'
193 |                         alpha = mapping[subcls]
194 |                         f.write('{:s} -1 -1 {:f} {:f} {:f} {:f} {:f} -1 -1 -1 -1 -1 -1 -1 {:.32f}\n'.format(\
195 |                                  cls, alpha, dets[k, 0], dets[k, 1], dets[k, 2], dets[k, 3], dets[k, 4]))
196 | 
197 |     # write detection results into one file
198 |     def evaluate_detections_one_file(self, all_boxes, output_dir):
199 |         # open results file
200 |         filename = os.path.join(output_dir, 'detections.txt')
201 |         print 'Writing all NISSAN results to file ' + filename
202 |         with open(filename, 'wt') as f:
203 |             # for each image
204 |             for im_ind, index in enumerate(self.image_index):
205 |                 # for each class
206 |                 for cls_ind, cls in enumerate(self.classes):
207 |                     if cls == '__background__':
208 |                         continue
209 |                     dets = all_boxes[cls_ind][im_ind]
210 |                     if dets == []:
211 |                         continue
212 |                     for k in xrange(dets.shape[0]):
213 |                         subcls = int(dets[k, 5])
214 |                         cls_name = self.classes[self.subclass_mapping[subcls]]
215 |                         assert (cls_name == cls), 'subclass not in class'
216 |                         f.write('{:s} {:s} {:f} {:f} {:f} {:f} {:d} {:f}\n'.format(\
217 |                                  index, cls, dets[k, 0], dets[k, 1], dets[k, 2], dets[k, 3], subcls, dets[k, 4]))
218 | 
219 |     def evaluate_proposals(self, all_boxes, output_dir):
220 |         # for each image
221 |         for im_ind, index in enumerate(self.image_index):
222 |             filename = os.path.join(output_dir, index + '.txt')
223 |             print 'Writing NISSAN results to file ' + filename
224 |             with open(filename, 'wt') as f:
225 |                 # for each class
226 |                 for cls_ind, cls in enumerate(self.classes):
227 |                     if cls == '__background__':
228 |                         continue
229 |                     dets = all_boxes[cls_ind][im_ind]
230 |                     if dets == []:
231 |                         continue
232 |                     for k in xrange(dets.shape[0]):
233 |                         f.write('{:f} {:f} {:f} {:f} {:.32f}\n'.format(\
234 |                                  dets[k, 0], dets[k, 1], dets[k, 2], dets[k, 3], dets[k, 4]))
235 | 
236 |     def evaluate_proposals_msr(self, all_boxes, output_dir):
237 |         # for each image
238 |         for im_ind, index in enumerate(self.image_index):
239 |             filename = os.path.join(output_dir, index + '.txt')
240 |             print 'Writing NISSAN results to file ' + filename
241 |             with open(filename, 'wt') as f:
242 |                 dets = all_boxes[im_ind]
243 |                 if dets == []:
244 |                     continue
245 |                 for k in xrange(dets.shape[0]):
246 |                     f.write('{:f} {:f} {:f} {:f} {:.32f}\n'.format(dets[k, 0], dets[k, 1], dets[k, 2], dets[k, 3], dets[k, 4]))
247 | 
248 | 
249 | if __name__ == '__main__':
250 |     d = datasets.nissan('2015-10-21-16-25-12')
251 |     res = d.roidb
252 |     from IPython import embed; embed()
253 | 


--------------------------------------------------------------------------------
/lib/datasets/nthu.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'yuxiang'
  2 | 
  3 | import datasets
  4 | import datasets.nthu
  5 | import os
  6 | import PIL
  7 | import datasets.imdb
  8 | import numpy as np
  9 | import scipy.sparse
 10 | from utils.cython_bbox import bbox_overlaps
 11 | from utils.boxes_grid import get_boxes_grid
 12 | import subprocess
 13 | import cPickle
 14 | from fast_rcnn.config import cfg
 15 | import math
 16 | from rpn_msr.generate_anchors import generate_anchors
 17 | 
 18 | class nthu(datasets.imdb):
 19 |     def __init__(self, image_set, nthu_path=None):
 20 |         datasets.imdb.__init__(self, 'nthu_' + image_set)
 21 |         self._image_set = image_set
 22 |         self._nthu_path = self._get_default_path() if nthu_path is None \
 23 |                             else nthu_path
 24 |         self._data_path = os.path.join(self._nthu_path, 'data')
 25 |         self._classes = ('__background__', 'Car', 'Pedestrian', 'Cyclist')
 26 |         self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes)))
 27 |         self._image_ext = '.jpg'
 28 |         self._image_index = self._load_image_set_index()
 29 |         # Default to roidb handler
 30 |         if cfg.IS_RPN:
 31 |             self._roidb_handler = self.gt_roidb
 32 |         else:
 33 |             self._roidb_handler = self.region_proposal_roidb
 34 | 
 35 |         # num of subclasses
 36 |         self._num_subclasses = 227 + 36 + 36 + 1
 37 | 
 38 |         # load the mapping for subcalss to class
 39 |         filename = os.path.join(self._nthu_path, 'mapping.txt')
 40 |         assert os.path.exists(filename), 'Path does not exist: {}'.format(filename)
 41 |         
 42 |         mapping = np.zeros(self._num_subclasses, dtype=np.int)
 43 |         with open(filename) as f:
 44 |             for line in f:
 45 |                 words = line.split()
 46 |                 subcls = int(words[0])
 47 |                 mapping[subcls] = self._class_to_ind[words[1]]
 48 |         self._subclass_mapping = mapping
 49 | 
 50 |         self.config = {'top_k': 100000}
 51 | 
 52 |         # statistics for computing recall
 53 |         self._num_boxes_all = np.zeros(self.num_classes, dtype=np.int)
 54 |         self._num_boxes_covered = np.zeros(self.num_classes, dtype=np.int)
 55 |         self._num_boxes_proposal = 0
 56 | 
 57 |         assert os.path.exists(self._nthu_path), \
 58 |                 'NTHU path does not exist: {}'.format(self._nthu_path)
 59 |         assert os.path.exists(self._data_path), \
 60 |                 'Path does not exist: {}'.format(self._data_path)
 61 | 
 62 |     def image_path_at(self, i):
 63 |         """
 64 |         Return the absolute path to image i in the image sequence.
 65 |         """
 66 |         return self.image_path_from_index(self.image_index[i])
 67 | 
 68 |     def image_path_from_index(self, index):
 69 |         """
 70 |         Construct an image path from the image's "index" identifier.
 71 |         """
 72 |         # set the prefix
 73 |         prefix = self._image_set
 74 | 
 75 |         image_path = os.path.join(self._data_path, prefix, index + self._image_ext)
 76 |         assert os.path.exists(image_path), \
 77 |                 'Path does not exist: {}'.format(image_path)
 78 |         return image_path
 79 | 
 80 |     def _load_image_set_index(self):
 81 |         """
 82 |         Load the indexes listed in this dataset's image set file.
 83 |         """
 84 |         image_set_file = os.path.join(self._data_path, self._image_set + '.txt')
 85 |         assert os.path.exists(image_set_file), \
 86 |                 'Path does not exist: {}'.format(image_set_file)
 87 | 
 88 |         with open(image_set_file) as f:
 89 |             image_index = [x.rstrip('\n') for x in f.readlines()]
 90 |         return image_index
 91 | 
 92 |     def _get_default_path(self):
 93 |         """
 94 |         Return the default path where nthu is expected to be installed.
 95 |         """
 96 |         return os.path.join(datasets.ROOT_DIR, 'data', 'NTHU')
 97 | 
 98 | 
 99 |     def gt_roidb(self):
100 |         """
101 |         Return the database of ground-truth regions of interest.
102 |         No implementation.
103 |         """
104 | 
105 |         gt_roidb = []
106 |         return gt_roidb
107 | 
108 |     def region_proposal_roidb(self):
109 |         """
110 |         Return the database of regions of interest.
111 |         Ground-truth ROIs are also included.
112 | 
113 |         This function loads/saves from/to a cache file to speed up future calls.
114 |         """
115 |         cache_file = os.path.join(self.cache_path,
116 |                                   self.name + '_' + cfg.REGION_PROPOSAL + '_region_proposal_roidb.pkl')
117 | 
118 |         if os.path.exists(cache_file):
119 |             with open(cache_file, 'rb') as fid:
120 |                 roidb = cPickle.load(fid)
121 |             print '{} roidb loaded from {}'.format(self.name, cache_file)
122 |             return roidb
123 | 
124 |         print 'Loading region proposal network boxes...'
125 |         model = cfg.REGION_PROPOSAL
126 |         roidb = self._load_rpn_roidb(None, model)
127 |         print 'Region proposal network boxes loaded'
128 |         print '{} region proposals per image'.format(self._num_boxes_proposal / len(self.image_index))
129 | 
130 |         with open(cache_file, 'wb') as fid:
131 |             cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
132 |         print 'wrote roidb to {}'.format(cache_file)
133 | 
134 |         return roidb
135 | 
136 |     def _load_rpn_roidb(self, gt_roidb, model):
137 |         # set the prefix
138 |         prefix = model
139 | 
140 |         box_list = []
141 |         for index in self.image_index:
142 |             filename = os.path.join(self._nthu_path, 'region_proposals',  prefix, self._image_set, index + '.txt')
143 |             assert os.path.exists(filename), \
144 |                 'RPN data not found at: {}'.format(filename)
145 |             raw_data = np.loadtxt(filename, dtype=float)
146 |             if len(raw_data.shape) == 1:
147 |                 if raw_data.size == 0:
148 |                     raw_data = raw_data.reshape((0, 5))
149 |                 else:
150 |                     raw_data = raw_data.reshape((1, 5))
151 | 
152 |             x1 = raw_data[:, 0]
153 |             y1 = raw_data[:, 1]
154 |             x2 = raw_data[:, 2]
155 |             y2 = raw_data[:, 3]
156 |             score = raw_data[:, 4]
157 |             inds = np.where((x2 > x1) & (y2 > y1))[0]
158 |             raw_data = raw_data[inds,:4]
159 |             self._num_boxes_proposal += raw_data.shape[0]
160 |             box_list.append(raw_data)
161 | 
162 |         return self.create_roidb_from_box_list(box_list, gt_roidb)
163 | 
164 |     def evaluate_detections(self, all_boxes, output_dir):
165 |         # load the mapping for subcalss the alpha (viewpoint)
166 |         filename = os.path.join(self._nthu_path, 'mapping.txt')
167 |         assert os.path.exists(filename), \
168 |                 'Path does not exist: {}'.format(filename)
169 | 
170 |         mapping = np.zeros(self._num_subclasses, dtype=np.float)
171 |         with open(filename) as f:
172 |             for line in f:
173 |                 words = line.split()
174 |                 subcls = int(words[0])
175 |                 mapping[subcls] = float(words[3])
176 | 
177 |         # for each image
178 |         for im_ind, index in enumerate(self.image_index):
179 |             filename = os.path.join(output_dir, index + '.txt')
180 |             print 'Writing nthu results to file ' + filename
181 |             with open(filename, 'wt') as f:
182 |                 # for each class
183 |                 for cls_ind, cls in enumerate(self.classes):
184 |                     if cls == '__background__':
185 |                         continue
186 |                     dets = all_boxes[cls_ind][im_ind]
187 |                     if dets == []:
188 |                         continue
189 |                     for k in xrange(dets.shape[0]):
190 |                         subcls = int(dets[k, 5])
191 |                         cls_name = self.classes[self.subclass_mapping[subcls]]
192 |                         assert (cls_name == cls), 'subclass not in class'
193 |                         alpha = mapping[subcls]
194 |                         f.write('{:s} -1 -1 {:f} {:f} {:f} {:f} {:f} -1 -1 -1 -1 -1 -1 -1 {:.32f}\n'.format(\
195 |                                  cls, alpha, dets[k, 0], dets[k, 1], dets[k, 2], dets[k, 3], dets[k, 4]))
196 | 
197 |     # write detection results into one file
198 |     def evaluate_detections_one_file(self, all_boxes, output_dir):
199 |         # open results file
200 |         filename = os.path.join(output_dir, 'detections.txt')
201 |         print 'Writing all nthu results to file ' + filename
202 |         with open(filename, 'wt') as f:
203 |             # for each image
204 |             for im_ind, index in enumerate(self.image_index):
205 |                 # for each class
206 |                 for cls_ind, cls in enumerate(self.classes):
207 |                     if cls == '__background__':
208 |                         continue
209 |                     dets = all_boxes[cls_ind][im_ind]
210 |                     if dets == []:
211 |                         continue
212 |                     for k in xrange(dets.shape[0]):
213 |                         subcls = int(dets[k, 5])
214 |                         cls_name = self.classes[self.subclass_mapping[subcls]]
215 |                         assert (cls_name == cls), 'subclass not in class'
216 |                         f.write('{:s} {:s} {:f} {:f} {:f} {:f} {:d} {:f}\n'.format(\
217 |                                  index, cls, dets[k, 0], dets[k, 1], dets[k, 2], dets[k, 3], subcls, dets[k, 4]))
218 | 
219 |     def evaluate_proposals(self, all_boxes, output_dir):
220 |         # for each image
221 |         for im_ind, index in enumerate(self.image_index):
222 |             filename = os.path.join(output_dir, index + '.txt')
223 |             print 'Writing nthu results to file ' + filename
224 |             with open(filename, 'wt') as f:
225 |                 # for each class
226 |                 for cls_ind, cls in enumerate(self.classes):
227 |                     if cls == '__background__':
228 |                         continue
229 |                     dets = all_boxes[cls_ind][im_ind]
230 |                     if dets == []:
231 |                         continue
232 |                     for k in xrange(dets.shape[0]):
233 |                         f.write('{:f} {:f} {:f} {:f} {:.32f}\n'.format(\
234 |                                  dets[k, 0], dets[k, 1], dets[k, 2], dets[k, 3], dets[k, 4]))
235 | 
236 |     def evaluate_proposals_msr(self, all_boxes, output_dir):
237 |         # for each image
238 |         for im_ind, index in enumerate(self.image_index):
239 |             filename = os.path.join(output_dir, index + '.txt')
240 |             print 'Writing nthu results to file ' + filename
241 |             with open(filename, 'wt') as f:
242 |                 dets = all_boxes[im_ind]
243 |                 if dets == []:
244 |                     continue
245 |                 for k in xrange(dets.shape[0]):
246 |                     f.write('{:f} {:f} {:f} {:f} {:.32f}\n'.format(dets[k, 0], dets[k, 1], dets[k, 2], dets[k, 3], dets[k, 4]))
247 | 
248 | 
249 | if __name__ == '__main__':
250 |     d = datasets.nthu('71')
251 |     res = d.roidb
252 |     from IPython import embed; embed()
253 | 


--------------------------------------------------------------------------------
/lib/datasets/voc_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | 
  7 | import xml.etree.ElementTree as ET
  8 | import os
  9 | import cPickle
 10 | import numpy as np
 11 | import pdb
 12 | def parse_rec(filename):
 13 |     """ Parse a PASCAL VOC xml file """
 14 |     tree = ET.parse(filename)
 15 |     objects = []
 16 |     for obj in tree.findall('object'):
 17 |         obj_struct = {}
 18 |         obj_struct['name'] = obj.find('name').text
 19 |         obj_struct['pose'] = obj.find('pose').text
 20 |         obj_struct['truncated'] = int(obj.find('truncated').text)
 21 |         obj_struct['difficult'] = int(obj.find('difficult').text)
 22 |         bbox = obj.find('bndbox')
 23 |         obj_struct['bbox'] = [int(bbox.find('xmin').text),
 24 |                               int(bbox.find('ymin').text),
 25 |                               int(bbox.find('xmax').text),
 26 |                               int(bbox.find('ymax').text)]
 27 |         objects.append(obj_struct)
 28 | 
 29 |     return objects
 30 | 
 31 | def voc_ap(rec, prec, use_07_metric=False):
 32 |     """ ap = voc_ap(rec, prec, [use_07_metric])
 33 |     Compute VOC AP given precision and recall.
 34 |     If use_07_metric is true, uses the
 35 |     VOC 07 11 point method (default:False).
 36 |     """
 37 |     if use_07_metric:
 38 |         # 11 point metric
 39 |         ap = 0.
 40 |         for t in np.arange(0., 1.1, 0.1):
 41 |             if np.sum(rec >= t) == 0:
 42 |                 p = 0
 43 |             else:
 44 |                 p = np.max(prec[rec >= t])
 45 |             ap = ap + p / 11.
 46 |     else:
 47 |         # correct AP calculation
 48 |         # first append sentinel values at the end
 49 |         mrec = np.concatenate(([0.], rec, [1.]))
 50 |         mpre = np.concatenate(([0.], prec, [0.]))
 51 | 
 52 |         # compute the precision envelope
 53 |         for i in range(mpre.size - 1, 0, -1):
 54 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 55 | 
 56 |         # to calculate area under PR curve, look for points
 57 |         # where X axis (recall) changes value
 58 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 59 | 
 60 |         # and sum (\Delta recall) * prec
 61 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 62 |     return ap
 63 | 
 64 | def voc_eval(detpath,
 65 |              annopath,
 66 |              imagesetfile,
 67 |              classname,
 68 |              cachedir,
 69 |              ovthresh=0.5,
 70 |              use_07_metric=False):
 71 |     """rec, prec, ap = voc_eval(detpath,
 72 |                                 annopath,
 73 |                                 imagesetfile,
 74 |                                 classname,
 75 |                                 [ovthresh],
 76 |                                 [use_07_metric])
 77 | 
 78 |     Top level function that does the PASCAL VOC evaluation.
 79 | 
 80 |     detpath: Path to detections
 81 |         detpath.format(classname) should produce the detection results file.
 82 |     annopath: Path to annotations
 83 |         annopath.format(imagename) should be the xml annotations file.
 84 |     imagesetfile: Text file containing the list of images, one image per line.
 85 |     classname: Category name (duh)
 86 |     cachedir: Directory for caching the annotations
 87 |     [ovthresh]: Overlap threshold (default = 0.5)
 88 |     [use_07_metric]: Whether to use VOC07's 11 point AP computation
 89 |         (default False)
 90 |     """
 91 |     # assumes detections are in detpath.format(classname)
 92 |     # assumes annotations are in annopath.format(imagename)
 93 |     # assumes imagesetfile is a text file with each line an image name
 94 |     # cachedir caches the annotations in a pickle file
 95 | 
 96 |     # first load gt
 97 |     if not os.path.isdir(cachedir):
 98 |         os.mkdir(cachedir)
 99 |     cachefile = os.path.join(cachedir, 'annots.pkl')
100 |     # read list of images
101 |     with open(imagesetfile, 'r') as f:
102 |         lines = f.readlines()
103 |     imagenames = [x.strip() for x in lines]
104 | 
105 |     if not os.path.isfile(cachefile):
106 |         # load annots
107 |         recs = {}
108 |         for i, imagename in enumerate(imagenames):
109 |             recs[imagename] = parse_rec(annopath.format(imagename))
110 |             if i % 100 == 0:
111 |                 print 'Reading annotation for {:d}/{:d}'.format(
112 |                     i + 1, len(imagenames))
113 |         # save
114 |         print 'Saving cached annotations to {:s}'.format(cachefile)
115 |         with open(cachefile, 'w') as f:
116 |             cPickle.dump(recs, f)
117 |     else:
118 |         # load
119 |         with open(cachefile, 'r') as f:
120 |             recs = cPickle.load(f)
121 | 
122 |     # extract gt objects for this class
123 |     class_recs = {}
124 |     npos = 0
125 |     for imagename in imagenames:
126 |         R = [obj for obj in recs[imagename] if obj['name'] == classname]
127 |         bbox = np.array([x['bbox'] for x in R])
128 |         difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
129 |         det = [False] * len(R)
130 |         npos = npos + sum(~difficult)
131 |         class_recs[imagename] = {'bbox': bbox,
132 |                                  'difficult': difficult,
133 |                                  'det': det}
134 | 
135 |     # read dets
136 |     detfile = detpath.format(classname)
137 |     with open(detfile, 'r') as f:
138 |         lines = f.readlines()
139 |     if any(lines) == 1:
140 | 
141 |         splitlines = [x.strip().split(' ') for x in lines]
142 |         image_ids = [x[0] for x in splitlines]
143 |         confidence = np.array([float(x[1]) for x in splitlines])
144 |         BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
145 | 
146 |         # sort by confidence
147 |         sorted_ind = np.argsort(-confidence)
148 |         sorted_scores = np.sort(-confidence)
149 |         BB = BB[sorted_ind, :]
150 |         image_ids = [image_ids[x] for x in sorted_ind]
151 | 
152 |         # go down dets and mark TPs and FPs
153 |         nd = len(image_ids)
154 |         tp = np.zeros(nd)
155 |         fp = np.zeros(nd)
156 |         for d in range(nd):
157 |             R = class_recs[image_ids[d]]
158 |             bb = BB[d, :].astype(float)
159 |             ovmax = -np.inf
160 |             BBGT = R['bbox'].astype(float)
161 | 
162 |             if BBGT.size > 0:
163 |                 # compute overlaps
164 |                 # intersection
165 |                 ixmin = np.maximum(BBGT[:, 0], bb[0])
166 |                 iymin = np.maximum(BBGT[:, 1], bb[1])
167 |                 ixmax = np.minimum(BBGT[:, 2], bb[2])
168 |                 iymax = np.minimum(BBGT[:, 3], bb[3])
169 |                 iw = np.maximum(ixmax - ixmin + 1., 0.)
170 |                 ih = np.maximum(iymax - iymin + 1., 0.)
171 |                 inters = iw * ih
172 | 
173 |                 # union
174 |                 uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
175 |                        (BBGT[:, 2] - BBGT[:, 0] + 1.) *
176 |                        (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
177 | 
178 |                 overlaps = inters / uni
179 |                 ovmax = np.max(overlaps)
180 |                 jmax = np.argmax(overlaps)
181 | 
182 |             if ovmax > ovthresh:
183 |                 if not R['difficult'][jmax]:
184 |                     if not R['det'][jmax]:
185 |                         tp[d] = 1.
186 |                         R['det'][jmax] = 1
187 |                     else:
188 |                         fp[d] = 1.
189 |             else:
190 |                 fp[d] = 1.
191 | 
192 |         # compute precision recall
193 |         fp = np.cumsum(fp)
194 |         tp = np.cumsum(tp)
195 |         rec = tp / float(npos)
196 |         # avoid divide by zero in case the first detection matches a difficult
197 |         # ground truth
198 |         prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
199 |         ap = voc_ap(rec, prec, use_07_metric)
200 |     else:
201 |          rec = -1
202 |          prec = -1
203 |          ap = -1
204 | 
205 |     return rec, prec, ap
206 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from . import config
 9 | from . import train
10 | from . import test
11 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/bbox_transform.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def bbox_transform(ex_rois, gt_rois):
11 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
12 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
13 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
14 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
15 | 
16 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
17 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
18 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
19 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
20 | 
21 |     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
22 |     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
23 |     targets_dw = np.log(gt_widths / ex_widths)
24 |     targets_dh = np.log(gt_heights / ex_heights)
25 | 
26 |     targets = np.vstack(
27 |         (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
28 |     return targets
29 | 
30 | def bbox_transform_inv(boxes, deltas):
31 |     if boxes.shape[0] == 0:
32 |         return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
33 | 
34 |     boxes = boxes.astype(deltas.dtype, copy=False)
35 | 
36 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
37 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
38 |     ctr_x = boxes[:, 0] + 0.5 * widths
39 |     ctr_y = boxes[:, 1] + 0.5 * heights
40 | 
41 |     dx = deltas[:, 0::4]
42 |     dy = deltas[:, 1::4]
43 |     dw = deltas[:, 2::4]
44 |     dh = deltas[:, 3::4]
45 | 
46 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
47 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
48 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
49 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
50 | 
51 |     pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
52 |     # x1
53 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
54 |     # y1
55 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
56 |     # x2
57 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
58 |     # y2
59 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
60 | 
61 |     return pred_boxes
62 | 
63 | def clip_boxes(boxes, im_shape):
64 |     """
65 |     Clip boxes to image boundaries.
66 |     """
67 | 
68 |     # x1 >= 0
69 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
70 |     # y1 >= 0
71 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
72 |     # x2 < im_shape[1]
73 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
74 |     # y2 < im_shape[0]
75 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
76 |     return boxes
77 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/config.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Fast R-CNN config system.
  9 | 
 10 | This file specifies default config options for Fast R-CNN. You should not
 11 | change values in this file. Instead, you should write a config file (in yaml)
 12 | and use cfg_from_file(yaml_file) to load it and override the default options.
 13 | 
 14 | Most tools in $ROOT/tools take a --cfg option to specify an override file.
 15 |     - See tools/{train,test}_net.py for example code that uses cfg_from_file()
 16 |     - See experiments/cfgs/*.yml for example YAML config override files
 17 | """
 18 | 
 19 | import os
 20 | import os.path as osp
 21 | import numpy as np
 22 | from distutils import spawn
 23 | # `pip install easydict` if you don't have it
 24 | from easydict import EasyDict as edict
 25 | 
 26 | __C = edict()
 27 | # Consumers can get config by:
 28 | #   from fast_rcnn_config import cfg
 29 | cfg = __C
 30 | 
 31 | #
 32 | # Training options
 33 | #
 34 | 
 35 | __C.TRAIN = edict()
 36 | #__C.NET_NAME = 'VGGnet'
 37 | # learning rate
 38 | __C.TRAIN.LEARNING_RATE = 0.001
 39 | __C.TRAIN.MOMENTUM = 0.9
 40 | __C.TRAIN.GAMMA = 0.1
 41 | __C.TRAIN.STEPSIZE = 50000
 42 | __C.TRAIN.DISPLAY = 10
 43 | __C.IS_MULTISCALE = False
 44 | 
 45 | # Scales to compute real features
 46 | #__C.TRAIN.SCALES_BASE = (0.25, 0.5, 1.0, 2.0, 3.0)
 47 | #__C.TRAIN.SCALES_BASE = (1.0,)
 48 | 
 49 | # parameters for ROI generating
 50 | #__C.TRAIN.SPATIAL_SCALE = 0.0625
 51 | #__C.TRAIN.KERNEL_SIZE = 5
 52 | 
 53 | # Aspect ratio to use during training
 54 | #__C.TRAIN.ASPECTS = (1, 0.75, 0.5, 0.25)
 55 | #__C.TRAIN.ASPECTS= (1,)
 56 | 
 57 | 
 58 | # Scales to use during training (can list multiple scales)
 59 | # Each scale is the pixel size of an image's shortest side
 60 | __C.TRAIN.SCALES = (600,)
 61 | 
 62 | # Max pixel size of the longest side of a scaled input image
 63 | __C.TRAIN.MAX_SIZE = 1000
 64 | 
 65 | # Images to use per minibatch
 66 | __C.TRAIN.IMS_PER_BATCH = 2
 67 | 
 68 | # Minibatch size (number of regions of interest [ROIs])
 69 | __C.TRAIN.BATCH_SIZE = 128
 70 | 
 71 | # Fraction of minibatch that is labeled foreground (i.e. class > 0)
 72 | __C.TRAIN.FG_FRACTION = 0.25
 73 | 
 74 | # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 75 | __C.TRAIN.FG_THRESH = 0.5
 76 | 
 77 | # Overlap threshold for a ROI to be considered background (class = 0 if
 78 | # overlap in [LO, HI))
 79 | __C.TRAIN.BG_THRESH_HI = 0.5
 80 | __C.TRAIN.BG_THRESH_LO = 0.1
 81 | 
 82 | # Use horizontally-flipped images during training?
 83 | __C.TRAIN.USE_FLIPPED = True
 84 | 
 85 | # Train bounding-box regressors
 86 | __C.TRAIN.BBOX_REG = True
 87 | 
 88 | # Overlap required between a ROI and ground-truth box in order for that ROI to
 89 | # be used as a bounding-box regression training example
 90 | __C.TRAIN.BBOX_THRESH = 0.5
 91 | 
 92 | # Iterations between snapshots
 93 | __C.TRAIN.SNAPSHOT_ITERS = 5000
 94 | 
 95 | # solver.prototxt specifies the snapshot path prefix, this adds an optional
 96 | # infix to yield the path: <prefix>[_<infix>]_iters_XYZ.caffemodel
 97 | __C.TRAIN.SNAPSHOT_PREFIX = 'VGGnet_fast_rcnn'
 98 | __C.TRAIN.SNAPSHOT_INFIX = ''
 99 | 
100 | # Use a prefetch thread in roi_data_layer.layer
101 | # So far I haven't found this useful; likely more engineering work is required
102 | __C.TRAIN.USE_PREFETCH = False
103 | 
104 | # Normalize the targets (subtract empirical mean, divide by empirical stddev)
105 | __C.TRAIN.BBOX_NORMALIZE_TARGETS = True
106 | # Deprecated (inside weights)
107 | __C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
108 | # Normalize the targets using "precomputed" (or made up) means and stdevs
109 | # (BBOX_NORMALIZE_TARGETS must also be True)
110 | __C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = False
111 | __C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
112 | __C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
113 | 
114 | # Train using these proposals
115 | __C.TRAIN.PROPOSAL_METHOD = 'selective_search'
116 | 
117 | # Make minibatches from images that have similar aspect ratios (i.e. both
118 | # tall and thin or both short and wide) in order to avoid wasting computation
119 | # on zero-padding.
120 | __C.TRAIN.ASPECT_GROUPING = True
121 | 
122 | # Use RPN to detect objects
123 | __C.TRAIN.HAS_RPN = False
124 | # IOU >= thresh: positive example
125 | __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
126 | # IOU < thresh: negative example
127 | __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
128 | # If an anchor statisfied by positive and negative conditions set to negative
129 | __C.TRAIN.RPN_CLOBBER_POSITIVES = False
130 | # Max number of foreground examples
131 | __C.TRAIN.RPN_FG_FRACTION = 0.5
132 | # Total number of examples
133 | __C.TRAIN.RPN_BATCHSIZE = 256
134 | # NMS threshold used on RPN proposals
135 | __C.TRAIN.RPN_NMS_THRESH = 0.7
136 | # Number of top scoring boxes to keep before apply NMS to RPN proposals
137 | __C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
138 | # Number of top scoring boxes to keep after applying NMS to RPN proposals
139 | __C.TRAIN.RPN_POST_NMS_TOP_N = 2000
140 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
141 | __C.TRAIN.RPN_MIN_SIZE = 16
142 | # Deprecated (outside weights)
143 | __C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
144 | # Give the positive RPN examples weight of p * 1 / {num positives}
145 | # and give negatives a weight of (1 - p)
146 | # Set to -1.0 to use uniform example weighting
147 | __C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0
148 | 
149 | # Enable timeline generation
150 | __C.TRAIN.DEBUG_TIMELINE = False
151 | 
152 | #
153 | # Testing options
154 | #
155 | 
156 | __C.TEST = edict()
157 | 
158 | # Scales to use during testing (can list multiple scales)
159 | # Each scale is the pixel size of an image's shortest side
160 | __C.TEST.SCALES = (600,)
161 | 
162 | # Max pixel size of the longest side of a scaled input image
163 | __C.TEST.MAX_SIZE = 1000
164 | 
165 | # Overlap threshold used for non-maximum suppression (suppress boxes with
166 | # IoU >= this threshold)
167 | __C.TEST.NMS = 0.3
168 | 
169 | # Experimental: treat the (K+1) units in the cls_score layer as linear
170 | # predictors (trained, eg, with one-vs-rest SVMs).
171 | __C.TEST.SVM = False
172 | 
173 | # Test using bounding-box regressors
174 | __C.TEST.BBOX_REG = True
175 | 
176 | # Propose boxes
177 | __C.TEST.HAS_RPN = True
178 | 
179 | # Test using these proposals
180 | __C.TEST.PROPOSAL_METHOD = 'selective_search'
181 | 
182 | ## NMS threshold used on RPN proposals
183 | __C.TEST.RPN_NMS_THRESH = 0.7
184 | ## Number of top scoring boxes to keep before apply NMS to RPN proposals
185 | __C.TEST.RPN_PRE_NMS_TOP_N = 6000
186 | #__C.TEST.RPN_PRE_NMS_TOP_N = 12000
187 | ## Number of top scoring boxes to keep after applying NMS to RPN proposals
188 | __C.TEST.RPN_POST_NMS_TOP_N = 300
189 | #__C.TEST.RPN_POST_NMS_TOP_N = 2000
190 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
191 | __C.TEST.RPN_MIN_SIZE = 16
192 | 
193 | # Enable timeline generation
194 | __C.TEST.DEBUG_TIMELINE = False
195 | 
196 | #
197 | # MISC
198 | #
199 | 
200 | # The mapping from image coordinates to feature map coordinates might cause
201 | # some boxes that are distinct in image space to become identical in feature
202 | # coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor
203 | # for identifying duplicate boxes.
204 | # 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16
205 | __C.DEDUP_BOXES = 1./16.
206 | 
207 | # Pixel mean values (BGR order) as a (1, 1, 3) array
208 | # We use the same pixel mean for all networks even though it's not exactly what
209 | # they were trained with
210 | __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
211 | 
212 | # For reproducibility
213 | __C.RNG_SEED = 3
214 | 
215 | # A small number that's used many times
216 | __C.EPS = 1e-14
217 | 
218 | # Root directory of project
219 | __C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
220 | 
221 | # Data directory
222 | __C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
223 | 
224 | # Model directory
225 | __C.MODELS_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'models', 'pascal_voc'))
226 | 
227 | # Name (or path to) the matlab executable
228 | __C.MATLAB = 'matlab'
229 | 
230 | # Place outputs under an experiments directory
231 | __C.EXP_DIR = 'default'
232 | 
233 | 
234 | if spawn.find_executable("nvcc"):
235 |     # Use GPU implementation of non-maximum suppression
236 |     __C.USE_GPU_NMS = True
237 | 
238 |     # Default GPU device id
239 |     __C.GPU_ID = 0
240 | else:
241 |     __C.USE_GPU_NMS = False
242 | 
243 | 
244 | def get_output_dir(imdb, weights_filename):
245 |     """Return the directory where experimental artifacts are placed.
246 |     If the directory does not exist, it is created.
247 | 
248 |     A canonical path is built using the name from an imdb and a network
249 |     (if not None).
250 |     """
251 |     outdir = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name))
252 |     if weights_filename is not None:
253 |         outdir = osp.join(outdir, weights_filename)
254 |     if not os.path.exists(outdir):
255 |         os.makedirs(outdir)
256 |     return outdir
257 | 
258 | def _merge_a_into_b(a, b):
259 |     """Merge config dictionary a into config dictionary b, clobbering the
260 |     options in b whenever they are also specified in a.
261 |     """
262 |     if type(a) is not edict:
263 |         return
264 | 
265 |     for k, v in a.iteritems():
266 |         # a must specify keys that are in b
267 |         if not b.has_key(k):
268 |             raise KeyError('{} is not a valid config key'.format(k))
269 | 
270 |         # the types must match, too
271 |         old_type = type(b[k])
272 |         if old_type is not type(v):
273 |             if isinstance(b[k], np.ndarray):
274 |                 v = np.array(v, dtype=b[k].dtype)
275 |             else:
276 |                 raise ValueError(('Type mismatch ({} vs. {}) '
277 |                                 'for config key: {}').format(type(b[k]),
278 |                                                             type(v), k))
279 | 
280 |         # recursively merge dicts
281 |         if type(v) is edict:
282 |             try:
283 |                 _merge_a_into_b(a[k], b[k])
284 |             except:
285 |                 print('Error under config key: {}'.format(k))
286 |                 raise
287 |         else:
288 |             b[k] = v
289 | 
290 | def cfg_from_file(filename):
291 |     """Load a config file and merge it into the default options."""
292 |     import yaml
293 |     with open(filename, 'r') as f:
294 |         yaml_cfg = edict(yaml.load(f))
295 | 
296 |     _merge_a_into_b(yaml_cfg, __C)
297 | 
298 | def cfg_from_list(cfg_list):
299 |     """Set config keys via list (e.g., from command line)."""
300 |     from ast import literal_eval
301 |     assert len(cfg_list) % 2 == 0
302 |     for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
303 |         key_list = k.split('.')
304 |         d = __C
305 |         for subkey in key_list[:-1]:
306 |             assert d.has_key(subkey)
307 |             d = d[subkey]
308 |         subkey = key_list[-1]
309 |         assert d.has_key(subkey)
310 |         try:
311 |             value = literal_eval(v)
312 |         except:
313 |             # handle the case when v is a string literal
314 |             value = v
315 |         assert type(value) == type(d[subkey]), \
316 |             'type {} does not match original type {}'.format(
317 |             type(value), type(d[subkey]))
318 |         d[subkey] = value
319 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from fast_rcnn.config import cfg
 9 | if cfg.USE_GPU_NMS:
10 |     from nms.gpu_nms import gpu_nms
11 | from nms.cpu_nms import cpu_nms
12 | 
13 | def nms(dets, thresh, force_cpu=False):
14 |     """Dispatch to either CPU or GPU NMS implementations."""
15 | 
16 |     if dets.shape[0] == 0:
17 |         return []
18 |     if cfg.USE_GPU_NMS and not force_cpu:
19 |         return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
20 |     else:
21 |         return cpu_nms(dets, thresh)
22 | 


--------------------------------------------------------------------------------
/lib/gt_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/gt_data_layer/layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """The data layer used during training to train a Fast R-CNN network.
  9 | 
 10 | GtDataLayer implements a Caffe Python layer.
 11 | """
 12 | 
 13 | import caffe
 14 | from fast_rcnn.config import cfg
 15 | from gt_data_layer.minibatch import get_minibatch
 16 | import numpy as np
 17 | import yaml
 18 | from multiprocessing import Process, Queue
 19 | 
 20 | class GtDataLayer(caffe.Layer):
 21 |     """Fast R-CNN data layer used for training."""
 22 | 
 23 |     def _shuffle_roidb_inds(self):
 24 |         """Randomly permute the training roidb."""
 25 |         self._perm = np.random.permutation(np.arange(len(self._roidb)))
 26 |         self._cur = 0
 27 | 
 28 |     def _get_next_minibatch_inds(self):
 29 |         """Return the roidb indices for the next minibatch."""
 30 |         if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
 31 |             self._shuffle_roidb_inds()
 32 | 
 33 |         db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
 34 |         self._cur += cfg.TRAIN.IMS_PER_BATCH
 35 | 
 36 |         """
 37 |         # sample images with gt objects
 38 |         db_inds = np.zeros((cfg.TRAIN.IMS_PER_BATCH), dtype=np.int32)
 39 |         i = 0
 40 |         while (i < cfg.TRAIN.IMS_PER_BATCH):
 41 |             ind = self._perm[self._cur]
 42 |             num_objs = self._roidb[ind]['boxes'].shape[0]
 43 |             if num_objs != 0:
 44 |                 db_inds[i] = ind
 45 |                 i += 1
 46 | 
 47 |             self._cur += 1
 48 |             if self._cur >= len(self._roidb):
 49 |                 self._shuffle_roidb_inds()
 50 |         """
 51 | 
 52 |         return db_inds
 53 | 
 54 |     def _get_next_minibatch(self):
 55 |         """Return the blobs to be used for the next minibatch."""
 56 |         db_inds = self._get_next_minibatch_inds()
 57 |         minibatch_db = [self._roidb[i] for i in db_inds]
 58 |         return get_minibatch(minibatch_db, self._num_classes)
 59 | 
 60 |     # this function is called in training the net
 61 |     def set_roidb(self, roidb):
 62 |         """Set the roidb to be used by this layer during training."""
 63 |         self._roidb = roidb
 64 |         self._shuffle_roidb_inds()
 65 | 
 66 |     def setup(self, bottom, top):
 67 |         """Setup the GtDataLayer."""
 68 | 
 69 |         # parse the layer parameter string, which must be valid YAML
 70 |         layer_params = yaml.load(self.param_str_)
 71 | 
 72 |         self._num_classes = layer_params['num_classes']
 73 | 
 74 |         self._name_to_top_map = {
 75 |             'data': 0,
 76 |             'info_boxes': 1,
 77 |             'parameters': 2}
 78 | 
 79 |         # data blob: holds a batch of N images, each with 3 channels
 80 |         # The height and width (100 x 100) are dummy values
 81 |         num_scale_base = len(cfg.TRAIN.SCALES_BASE)
 82 |         top[0].reshape(num_scale_base, 3, 100, 100)
 83 | 
 84 |         # info boxes blob
 85 |         top[1].reshape(1, 18)
 86 | 
 87 |         # parameters blob
 88 |         num_scale = len(cfg.TRAIN.SCALES)
 89 |         num_aspect = len(cfg.TRAIN.ASPECTS)
 90 |         top[2].reshape(2 + 2*num_scale + 2*num_aspect)
 91 |             
 92 |     def forward(self, bottom, top):
 93 |         """Get blobs and copy them into this layer's top blob vector."""
 94 |         blobs = self._get_next_minibatch()
 95 | 
 96 |         for blob_name, blob in blobs.iteritems():
 97 |             top_ind = self._name_to_top_map[blob_name]
 98 |             # Reshape net's input blobs
 99 |             top[top_ind].reshape(*(blob.shape))
100 |             # Copy data into net's input blobs
101 |             top[top_ind].data[...] = blob.astype(np.float32, copy=False)
102 | 
103 |     def backward(self, top, propagate_down, bottom):
104 |         """This layer does not propagate gradients."""
105 |         pass
106 | 
107 |     def reshape(self, bottom, top):
108 |         """Reshaping happens during the call to forward."""
109 |         pass
110 | 


--------------------------------------------------------------------------------
/lib/gt_data_layer/minibatch.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Compute minibatch blobs for training a Fast R-CNN network."""
  9 | 
 10 | import numpy as np
 11 | import numpy.random as npr
 12 | import cv2
 13 | from fast_rcnn.config import cfg
 14 | from utils.blob import prep_im_for_blob, im_list_to_blob
 15 | 
 16 | def get_minibatch(roidb, num_classes):
 17 |     """Given a roidb, construct a minibatch sampled from it."""
 18 |     num_images = len(roidb)
 19 |     assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
 20 |         'num_images ({}) must divide BATCH_SIZE ({})'. \
 21 |         format(num_images, cfg.TRAIN.BATCH_SIZE)
 22 | 
 23 |     # Get the input image blob, formatted for caffe
 24 |     im_blob = _get_image_blob(roidb)
 25 | 
 26 |     # build the box information blob
 27 |     info_boxes_blob = np.zeros((0, 18), dtype=np.float32)
 28 |     num_scale = len(cfg.TRAIN.SCALES)
 29 |     for i in xrange(num_images):
 30 |         info_boxes = roidb[i]['info_boxes']
 31 | 
 32 |         # change the batch index
 33 |         info_boxes[:,2] += i * num_scale
 34 |         info_boxes[:,7] += i * num_scale
 35 | 
 36 |         info_boxes_blob = np.vstack((info_boxes_blob, info_boxes))
 37 | 
 38 |     # build the parameter blob
 39 |     num_aspect = len(cfg.TRAIN.ASPECTS)
 40 |     num = 2 + 2 * num_scale + 2 * num_aspect
 41 |     parameters_blob = np.zeros((num), dtype=np.float32)
 42 |     parameters_blob[0] = num_scale
 43 |     parameters_blob[1] = num_aspect
 44 |     parameters_blob[2:2+num_scale] = cfg.TRAIN.SCALES
 45 |     parameters_blob[2+num_scale:2+2*num_scale] = cfg.TRAIN.SCALE_MAPPING
 46 |     parameters_blob[2+2*num_scale:2+2*num_scale+num_aspect] = cfg.TRAIN.ASPECT_HEIGHTS
 47 |     parameters_blob[2+2*num_scale+num_aspect:2+2*num_scale+2*num_aspect] = cfg.TRAIN.ASPECT_WIDTHS
 48 | 
 49 |     # For debug visualizations
 50 |     # _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob)
 51 | 
 52 |     blobs = {'data': im_blob,
 53 |              'info_boxes': info_boxes_blob,
 54 |              'parameters': parameters_blob}
 55 | 
 56 |     return blobs
 57 | 
 58 | def _get_image_blob(roidb):
 59 |     """Builds an input blob from the images in the roidb at the different scales.
 60 |     """
 61 |     num_images = len(roidb)
 62 |     processed_ims = []
 63 | 
 64 |     for i in xrange(num_images):
 65 |         # read image
 66 |         im = cv2.imread(roidb[i]['image'])
 67 |         if roidb[i]['flipped']:
 68 |             im = im[:, ::-1, :]
 69 | 
 70 |         im_orig = im.astype(np.float32, copy=True)
 71 |         im_orig -= cfg.PIXEL_MEANS
 72 | 
 73 |         # build image pyramid
 74 |         for im_scale in cfg.TRAIN.SCALES_BASE:
 75 |             im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
 76 |                         interpolation=cv2.INTER_LINEAR)
 77 | 
 78 |             processed_ims.append(im)
 79 | 
 80 |     # Create a blob to hold the input images
 81 |     blob = im_list_to_blob(processed_ims)
 82 | 
 83 |     return blob
 84 | 
 85 | def _project_im_rois(im_rois, im_scale_factor):
 86 |     """Project image RoIs into the rescaled training image."""
 87 |     rois = im_rois * im_scale_factor
 88 |     return rois
 89 | 
 90 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
 91 |     """Bounding-box regression targets are stored in a compact form in the
 92 |     roidb.
 93 | 
 94 |     This function expands those targets into the 4-of-4*K representation used
 95 |     by the network (i.e. only one class has non-zero targets). The loss weights
 96 |     are similarly expanded.
 97 | 
 98 |     Returns:
 99 |         bbox_target_data (ndarray): N x 4K blob of regression targets
100 |         bbox_loss_weights (ndarray): N x 4K blob of loss weights
101 |     """
102 |     clss = bbox_target_data[:, 0]
103 |     bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
104 |     bbox_loss_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
105 |     inds = np.where(clss > 0)[0]
106 |     for ind in inds:
107 |         cls = clss[ind]
108 |         start = 4 * cls
109 |         end = start + 4
110 |         bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
111 |         bbox_loss_weights[ind, start:end] = [1., 1., 1., 1.]
112 |     return bbox_targets, bbox_loss_weights
113 | 
114 | 
115 | def _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob):
116 |     """Visualize a mini-batch for debugging."""
117 |     import matplotlib.pyplot as plt
118 |     for i in xrange(rois_blob.shape[0]):
119 |         rois = rois_blob[i, :]
120 |         im_ind = rois[0]
121 |         roi = rois[2:]
122 |         im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy()
123 |         im += cfg.PIXEL_MEANS
124 |         im = im[:, :, (2, 1, 0)]
125 |         im = im.astype(np.uint8)
126 |         cls = labels_blob[i]
127 |         subcls = sublabels_blob[i]
128 |         plt.imshow(im)
129 |         print 'class: ', cls, ' subclass: ', subcls
130 |         plt.gca().add_patch(
131 |             plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0],
132 |                           roi[3] - roi[1], fill=False,
133 |                           edgecolor='r', linewidth=3)
134 |             )
135 |         plt.show()
136 | 


--------------------------------------------------------------------------------
/lib/gt_data_layer/roidb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
  9 | 
 10 | import numpy as np
 11 | from fast_rcnn.config import cfg
 12 | from utils.cython_bbox import bbox_overlaps
 13 | from utils.boxes_grid import get_boxes_grid
 14 | import scipy.sparse
 15 | import PIL
 16 | import math
 17 | import os
 18 | import cPickle
 19 | import pdb
 20 | 
 21 | 
 22 | def prepare_roidb(imdb):
 23 |     """Enrich the imdb's roidb by adding some derived quantities that
 24 |     are useful for training. This function precomputes the maximum
 25 |     overlap, taken over ground-truth boxes, between each ROI and
 26 |     each ground-truth box. The class with maximum overlap is also
 27 |     recorded.
 28 |     """
 29 |     cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl')
 30 |     if os.path.exists(cache_file):
 31 |         with open(cache_file, 'rb') as fid:
 32 |             imdb._roidb = cPickle.load(fid)
 33 |         print '{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file)
 34 |         return
 35 | 
 36 |     roidb = imdb.roidb
 37 |     for i in xrange(len(imdb.image_index)):
 38 |         roidb[i]['image'] = imdb.image_path_at(i)
 39 |         boxes = roidb[i]['boxes']
 40 |         labels = roidb[i]['gt_classes']
 41 |         info_boxes = np.zeros((0, 18), dtype=np.float32)
 42 | 
 43 |         if boxes.shape[0] == 0:
 44 |             roidb[i]['info_boxes'] = info_boxes
 45 |             continue
 46 | 
 47 |         # compute grid boxes
 48 |         s = PIL.Image.open(imdb.image_path_at(i)).size
 49 |         image_height = s[1]
 50 |         image_width = s[0]
 51 |         boxes_grid, cx, cy = get_boxes_grid(image_height, image_width)
 52 |         
 53 |         # for each scale
 54 |         for scale_ind, scale in enumerate(cfg.TRAIN.SCALES):
 55 |             boxes_rescaled = boxes * scale
 56 | 
 57 |             # compute overlap
 58 |             overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float))
 59 |             max_overlaps = overlaps.max(axis = 1)
 60 |             argmax_overlaps = overlaps.argmax(axis = 1)
 61 |             max_classes = labels[argmax_overlaps]
 62 | 
 63 |             # select positive boxes
 64 |             fg_inds = []
 65 |             for k in xrange(1, imdb.num_classes):
 66 |                 fg_inds.extend(np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0])
 67 | 
 68 |             if len(fg_inds) > 0:
 69 |                 gt_inds = argmax_overlaps[fg_inds]
 70 |                 # bounding box regression targets
 71 |                 gt_targets = _compute_targets(boxes_grid[fg_inds,:], boxes_rescaled[gt_inds,:])
 72 |                 # scale mapping for RoI pooling
 73 |                 scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind]
 74 |                 scale_map = cfg.TRAIN.SCALES[scale_ind_map]
 75 |                 # contruct the list of positive boxes
 76 |                 # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target)
 77 |                 info_box = np.zeros((len(fg_inds), 18), dtype=np.float32)
 78 |                 info_box[:, 0] = cx[fg_inds]
 79 |                 info_box[:, 1] = cy[fg_inds]
 80 |                 info_box[:, 2] = scale_ind
 81 |                 info_box[:, 3:7] = boxes_grid[fg_inds,:]
 82 |                 info_box[:, 7] = scale_ind_map
 83 |                 info_box[:, 8:12] = boxes_grid[fg_inds,:] * scale_map / scale
 84 |                 info_box[:, 12] = labels[gt_inds]
 85 |                 info_box[:, 14:] = gt_targets
 86 |                 info_boxes = np.vstack((info_boxes, info_box))
 87 | 
 88 |         roidb[i]['info_boxes'] = info_boxes
 89 | 
 90 |     with open(cache_file, 'wb') as fid:
 91 |         cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
 92 |     print 'wrote gt roidb prepared to {}'.format(cache_file)
 93 | 
 94 | def add_bbox_regression_targets(roidb):
 95 |     """Add information needed to train bounding-box regressors."""
 96 |     assert len(roidb) > 0
 97 |     assert 'info_boxes' in roidb[0], 'Did you call prepare_roidb first?'
 98 | 
 99 |     num_images = len(roidb)
100 |     # Infer number of classes from the number of columns in gt_overlaps
101 |     num_classes = roidb[0]['gt_overlaps'].shape[1]
102 | 
103 |     # Compute values needed for means and stds
104 |     # var(x) = E(x^2) - E(x)^2
105 |     class_counts = np.zeros((num_classes, 1)) + cfg.EPS
106 |     sums = np.zeros((num_classes, 4))
107 |     squared_sums = np.zeros((num_classes, 4))
108 |     for im_i in xrange(num_images):
109 |         targets = roidb[im_i]['info_boxes']
110 |         for cls in xrange(1, num_classes):
111 |             cls_inds = np.where(targets[:, 12] == cls)[0]
112 |             if cls_inds.size > 0:
113 |                 class_counts[cls] += cls_inds.size
114 |                 sums[cls, :] += targets[cls_inds, 14:].sum(axis=0)
115 |                 squared_sums[cls, :] += (targets[cls_inds, 14:] ** 2).sum(axis=0)
116 | 
117 |     means = sums / class_counts
118 |     stds = np.sqrt(squared_sums / class_counts - means ** 2)
119 | 
120 |     # Normalize targets
121 |     for im_i in xrange(num_images):
122 |         targets = roidb[im_i]['info_boxes']
123 |         for cls in xrange(1, num_classes):
124 |             cls_inds = np.where(targets[:, 12] == cls)[0]
125 |             roidb[im_i]['info_boxes'][cls_inds, 14:] -= means[cls, :]
126 |             if stds[cls, 0] != 0:
127 |                 roidb[im_i]['info_boxes'][cls_inds, 14:] /= stds[cls, :]
128 | 
129 |     # These values will be needed for making predictions
130 |     # (the predicts will need to be unnormalized and uncentered)
131 |     return means.ravel(), stds.ravel()
132 | 
133 | def _compute_targets(ex_rois, gt_rois):
134 |     """Compute bounding-box regression targets for an image. The targets are scale invariance"""
135 | 
136 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS
137 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS
138 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
139 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
140 | 
141 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS
142 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS
143 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
144 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
145 | 
146 |     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
147 |     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
148 |     targets_dw = np.log(gt_widths / ex_widths)
149 |     targets_dh = np.log(gt_heights / ex_heights)
150 | 
151 |     targets = np.zeros((ex_rois.shape[0], 4), dtype=np.float32)
152 |     targets[:, 0] = targets_dx
153 |     targets[:, 1] = targets_dy
154 |     targets[:, 2] = targets_dw
155 |     targets[:, 3] = targets_dh
156 |     return targets
157 | 


--------------------------------------------------------------------------------
/lib/make.sh:
--------------------------------------------------------------------------------
 1 | TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | CXXFLAGS=''
 5 | 
 6 | if [[ "$OSTYPE" =~ ^darwin ]]; then
 7 | 	CXXFLAGS+='-undefined dynamic_lookup'
 8 | fi
 9 | 
10 | cd roi_pooling_layer
11 | 
12 | if [ -d "$CUDA_PATH" ]; then
13 | 	nvcc -std=c++11 -c -o roi_pooling_op.cu.o roi_pooling_op_gpu.cu.cc \
14 | 		-I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CXXFLAGS \
15 | 		-arch=sm_37
16 | 
17 | 	g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \
18 | 		roi_pooling_op.cu.o -I $TF_INC  -D GOOGLE_CUDA=1 -fPIC $CXXFLAGS \
19 | 		-lcudart -L $CUDA_PATH/lib64
20 | else
21 | 	g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \
22 | 		-I $TF_INC -fPIC $CXXFLAGS
23 | fi
24 | 
25 | cd ..
26 | 
27 | #cd feature_extrapolating_layer
28 | 
29 | #nvcc -std=c++11 -c -o feature_extrapolating_op.cu.o feature_extrapolating_op_gpu.cu.cc \
30 | #	-I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_50
31 | 
32 | #g++ -std=c++11 -shared -o feature_extrapolating.so feature_extrapolating_op.cc \
33 | #	feature_extrapolating_op.cu.o -I $TF_INC -fPIC -lcudart -L $CUDA_PATH/lib64
34 | #cd ..
35 | 


--------------------------------------------------------------------------------
/lib/networks/.VGGnet.py.swo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smallcorgi/Faster-RCNN_TF/d9adb24c8ffdbae3b56eb55fc629d719fee3d741/lib/networks/.VGGnet.py.swo


--------------------------------------------------------------------------------
/lib/networks/VGGnet_test.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from networks.network import Network
 3 | 
 4 | n_classes = 21
 5 | _feat_stride = [16,]
 6 | anchor_scales = [8, 16, 32] 
 7 | 
 8 | class VGGnet_test(Network):
 9 |     def __init__(self, trainable=True):
10 |         self.inputs = []
11 |         self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3])
12 |         self.im_info = tf.placeholder(tf.float32, shape=[None, 3])
13 |         self.keep_prob = tf.placeholder(tf.float32)
14 |         self.layers = dict({'data':self.data, 'im_info':self.im_info})
15 |         self.trainable = trainable
16 |         self.setup()
17 | 
18 |     def setup(self):
19 |         (self.feed('data')
20 |              .conv(3, 3, 64, 1, 1, name='conv1_1', trainable=False)
21 |              .conv(3, 3, 64, 1, 1, name='conv1_2', trainable=False)
22 |              .max_pool(2, 2, 2, 2, padding='VALID', name='pool1')
23 |              .conv(3, 3, 128, 1, 1, name='conv2_1', trainable=False)
24 |              .conv(3, 3, 128, 1, 1, name='conv2_2', trainable=False)
25 |              .max_pool(2, 2, 2, 2, padding='VALID', name='pool2')
26 |              .conv(3, 3, 256, 1, 1, name='conv3_1')
27 |              .conv(3, 3, 256, 1, 1, name='conv3_2')
28 |              .conv(3, 3, 256, 1, 1, name='conv3_3')
29 |              .max_pool(2, 2, 2, 2, padding='VALID', name='pool3')
30 |              .conv(3, 3, 512, 1, 1, name='conv4_1')
31 |              .conv(3, 3, 512, 1, 1, name='conv4_2')
32 |              .conv(3, 3, 512, 1, 1, name='conv4_3')
33 |              .max_pool(2, 2, 2, 2, padding='VALID', name='pool4')
34 |              .conv(3, 3, 512, 1, 1, name='conv5_1')
35 |              .conv(3, 3, 512, 1, 1, name='conv5_2')
36 |              .conv(3, 3, 512, 1, 1, name='conv5_3'))
37 | 
38 |         (self.feed('conv5_3')
39 |              .conv(3,3,512,1,1,name='rpn_conv/3x3')
40 |              .conv(1,1,len(anchor_scales)*3*2,1,1,padding='VALID',relu = False,name='rpn_cls_score'))
41 | 
42 |         (self.feed('rpn_conv/3x3')
43 |              .conv(1,1,len(anchor_scales)*3*4,1,1,padding='VALID',relu = False,name='rpn_bbox_pred'))
44 | 
45 |         (self.feed('rpn_cls_score')
46 |              .reshape_layer(2,name = 'rpn_cls_score_reshape')
47 |              .softmax(name='rpn_cls_prob'))
48 | 
49 |         (self.feed('rpn_cls_prob')
50 |              .reshape_layer(len(anchor_scales)*3*2,name = 'rpn_cls_prob_reshape'))
51 | 
52 |         (self.feed('rpn_cls_prob_reshape','rpn_bbox_pred','im_info')
53 |              .proposal_layer(_feat_stride, anchor_scales, 'TEST', name = 'rois'))
54 |         
55 |         (self.feed('conv5_3', 'rois')
56 |              .roi_pool(7, 7, 1.0/16, name='pool_5')
57 |              .fc(4096, name='fc6')
58 |              .fc(4096, name='fc7')
59 |              .fc(n_classes, relu=False, name='cls_score')
60 |              .softmax(name='cls_prob'))
61 | 
62 |         (self.feed('fc7')
63 |              .fc(n_classes*4, relu=False, name='bbox_pred'))
64 | 
65 | 


--------------------------------------------------------------------------------
/lib/networks/VGGnet_train.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from networks.network import Network
 3 | 
 4 | 
 5 | #define
 6 | 
 7 | n_classes = 21
 8 | _feat_stride = [16,]
 9 | anchor_scales = [8, 16, 32]
10 | 
11 | class VGGnet_train(Network):
12 |     def __init__(self, trainable=True):
13 |         self.inputs = []
14 |         self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3])
15 |         self.im_info = tf.placeholder(tf.float32, shape=[None, 3])
16 |         self.gt_boxes = tf.placeholder(tf.float32, shape=[None, 5])
17 |         self.keep_prob = tf.placeholder(tf.float32)
18 |         self.layers = dict({'data':self.data, 'im_info':self.im_info, 'gt_boxes':self.gt_boxes})
19 |         self.trainable = trainable
20 |         self.setup()
21 | 
22 |         # create ops and placeholders for bbox normalization process
23 |         with tf.variable_scope('bbox_pred', reuse=True):
24 |             weights = tf.get_variable("weights")
25 |             biases = tf.get_variable("biases")
26 | 
27 |             self.bbox_weights = tf.placeholder(weights.dtype, shape=weights.get_shape())
28 |             self.bbox_biases = tf.placeholder(biases.dtype, shape=biases.get_shape())
29 | 
30 |             self.bbox_weights_assign = weights.assign(self.bbox_weights)
31 |             self.bbox_bias_assign = biases.assign(self.bbox_biases)
32 | 
33 |     def setup(self):
34 |         (self.feed('data')
35 |              .conv(3, 3, 64, 1, 1, name='conv1_1', trainable=False)
36 |              .conv(3, 3, 64, 1, 1, name='conv1_2', trainable=False)
37 |              .max_pool(2, 2, 2, 2, padding='VALID', name='pool1')
38 |              .conv(3, 3, 128, 1, 1, name='conv2_1', trainable=False)
39 |              .conv(3, 3, 128, 1, 1, name='conv2_2', trainable=False)
40 |              .max_pool(2, 2, 2, 2, padding='VALID', name='pool2')
41 |              .conv(3, 3, 256, 1, 1, name='conv3_1')
42 |              .conv(3, 3, 256, 1, 1, name='conv3_2')
43 |              .conv(3, 3, 256, 1, 1, name='conv3_3')
44 |              .max_pool(2, 2, 2, 2, padding='VALID', name='pool3')
45 |              .conv(3, 3, 512, 1, 1, name='conv4_1')
46 |              .conv(3, 3, 512, 1, 1, name='conv4_2')
47 |              .conv(3, 3, 512, 1, 1, name='conv4_3')
48 |              .max_pool(2, 2, 2, 2, padding='VALID', name='pool4')
49 |              .conv(3, 3, 512, 1, 1, name='conv5_1')
50 |              .conv(3, 3, 512, 1, 1, name='conv5_2')
51 |              .conv(3, 3, 512, 1, 1, name='conv5_3'))
52 |         #========= RPN ============
53 |         (self.feed('conv5_3')
54 |              .conv(3,3,512,1,1,name='rpn_conv/3x3')
55 |              .conv(1,1,len(anchor_scales)*3*2 ,1 , 1, padding='VALID', relu = False, name='rpn_cls_score'))
56 | 
57 |         (self.feed('rpn_cls_score','gt_boxes','im_info','data')
58 |              .anchor_target_layer(_feat_stride, anchor_scales, name = 'rpn-data' ))
59 | 
60 |         # Loss of rpn_cls & rpn_boxes
61 | 
62 |         (self.feed('rpn_conv/3x3')
63 |              .conv(1,1,len(anchor_scales)*3*4, 1, 1, padding='VALID', relu = False, name='rpn_bbox_pred'))
64 | 
65 |         #========= RoI Proposal ============
66 |         (self.feed('rpn_cls_score')
67 |              .reshape_layer(2,name = 'rpn_cls_score_reshape')
68 |              .softmax(name='rpn_cls_prob'))
69 | 
70 |         (self.feed('rpn_cls_prob')
71 |              .reshape_layer(len(anchor_scales)*3*2,name = 'rpn_cls_prob_reshape'))
72 | 
73 |         (self.feed('rpn_cls_prob_reshape','rpn_bbox_pred','im_info')
74 |              .proposal_layer(_feat_stride, anchor_scales, 'TRAIN',name = 'rpn_rois'))
75 | 
76 |         (self.feed('rpn_rois','gt_boxes')
77 |              .proposal_target_layer(n_classes,name = 'roi-data'))
78 | 
79 | 
80 |         #========= RCNN ============
81 |         (self.feed('conv5_3', 'roi-data')
82 |              .roi_pool(7, 7, 1.0/16, name='pool_5')
83 |              .fc(4096, name='fc6')
84 |              .dropout(0.5, name='drop6')
85 |              .fc(4096, name='fc7')
86 |              .dropout(0.5, name='drop7')
87 |              .fc(n_classes, relu=False, name='cls_score')
88 |              .softmax(name='cls_prob'))
89 | 
90 |         (self.feed('drop7')
91 |              .fc(n_classes*4, relu=False, name='bbox_pred'))
92 | 
93 | 


--------------------------------------------------------------------------------
/lib/networks/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from .VGGnet_train import VGGnet_train
 9 | from .VGGnet_test import VGGnet_test
10 | from . import factory
11 | 


--------------------------------------------------------------------------------
/lib/networks/factory.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # SubCNN_TF
 3 | # Copyright (c) 2016 CVGL Stanford
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Yu Xiang
 6 | # --------------------------------------------------------
 7 | 
 8 | """Factory method for easily getting imdbs by name."""
 9 | 
10 | __sets = {}
11 | 
12 | import networks.VGGnet_train
13 | import networks.VGGnet_test
14 | import pdb
15 | import tensorflow as tf
16 | 
17 | #__sets['VGGnet_train'] = networks.VGGnet_train()
18 | 
19 | #__sets['VGGnet_test'] = networks.VGGnet_test()
20 | 
21 | 
22 | def get_network(name):
23 |     """Get a network by name."""
24 |     #if not __sets.has_key(name):
25 |     #    raise KeyError('Unknown dataset: {}'.format(name))
26 |     #return __sets[name]
27 |     if name.split('_')[1] == 'test':
28 |        return networks.VGGnet_test()
29 |     elif name.split('_')[1] == 'train':
30 |        return networks.VGGnet_train()
31 |     else:
32 |        raise KeyError('Unknown dataset: {}'.format(name))
33 |     
34 | 
35 | def list_networks():
36 |     """List all registered imdbs."""
37 |     return __sets.keys()
38 | 


--------------------------------------------------------------------------------
/lib/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smallcorgi/Faster-RCNN_TF/d9adb24c8ffdbae3b56eb55fc629d719fee3d741/lib/nms/__init__.py


--------------------------------------------------------------------------------
/lib/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int_t, ndim=1] \
26 |         order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/lib/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/lib/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def py_cpu_nms(dets, thresh):
11 |     """Pure Python NMS baseline."""
12 |     x1 = dets[:, 0]
13 |     y1 = dets[:, 1]
14 |     x2 = dets[:, 2]
15 |     y2 = dets[:, 3]
16 |     scores = dets[:, 4]
17 | 
18 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 |     order = scores.argsort()[::-1]
20 | 
21 |     keep = []
22 |     while order.size > 0:
23 |         i = order[0]
24 |         keep.append(i)
25 |         xx1 = np.maximum(x1[i], x1[order[1:]])
26 |         yy1 = np.maximum(y1[i], y1[order[1:]])
27 |         xx2 = np.minimum(x2[i], x2[order[1:]])
28 |         yy2 = np.minimum(y2[i], y2[order[1:]])
29 | 
30 |         w = np.maximum(0.0, xx2 - xx1 + 1)
31 |         h = np.maximum(0.0, yy2 - yy1 + 1)
32 |         inter = w * h
33 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 | 
35 |         inds = np.where(ovr <= thresh)[0]
36 |         order = order[inds + 1]
37 | 
38 |     return keep
39 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/layer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """The data layer used during training to train a Fast R-CNN network.
 9 | 
10 | RoIDataLayer implements a Caffe Python layer.
11 | """
12 | 
13 | from fast_rcnn.config import cfg
14 | from roi_data_layer.minibatch import get_minibatch
15 | import numpy as np
16 | 
17 | class RoIDataLayer(object):
18 |     """Fast R-CNN data layer used for training."""
19 | 
20 |     def __init__(self, roidb, num_classes):
21 |         """Set the roidb to be used by this layer during training."""
22 |         self._roidb = roidb
23 |         self._num_classes = num_classes
24 |         self._shuffle_roidb_inds()
25 | 
26 |     def _shuffle_roidb_inds(self):
27 |         """Randomly permute the training roidb."""
28 |         self._perm = np.random.permutation(np.arange(len(self._roidb)))
29 |         self._cur = 0
30 | 
31 |     def _get_next_minibatch_inds(self):
32 |         """Return the roidb indices for the next minibatch."""
33 |         
34 |         if cfg.TRAIN.HAS_RPN:
35 |             if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
36 |                 self._shuffle_roidb_inds()
37 | 
38 |             db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
39 |             self._cur += cfg.TRAIN.IMS_PER_BATCH
40 |         else:
41 |             # sample images
42 |             db_inds = np.zeros((cfg.TRAIN.IMS_PER_BATCH), dtype=np.int32)
43 |             i = 0
44 |             while (i < cfg.TRAIN.IMS_PER_BATCH):
45 |                 ind = self._perm[self._cur]
46 |                 num_objs = self._roidb[ind]['boxes'].shape[0]
47 |                 if num_objs != 0:
48 |                     db_inds[i] = ind
49 |                     i += 1
50 | 
51 |                 self._cur += 1
52 |                 if self._cur >= len(self._roidb):
53 |                     self._shuffle_roidb_inds()
54 | 
55 |         return db_inds
56 | 
57 |     def _get_next_minibatch(self):
58 |         """Return the blobs to be used for the next minibatch.
59 | 
60 |         If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a
61 |         separate process and made available through self._blob_queue.
62 |         """
63 |         db_inds = self._get_next_minibatch_inds()
64 |         minibatch_db = [self._roidb[i] for i in db_inds]
65 |         return get_minibatch(minibatch_db, self._num_classes)
66 |             
67 |     def forward(self):
68 |         """Get blobs and copy them into this layer's top blob vector."""
69 |         blobs = self._get_next_minibatch()
70 |         return blobs
71 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/minibatch.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Compute minibatch blobs for training a Fast R-CNN network."""
  9 | 
 10 | import numpy as np
 11 | import numpy.random as npr
 12 | import cv2
 13 | from fast_rcnn.config import cfg
 14 | from utils.blob import prep_im_for_blob, im_list_to_blob
 15 | 
 16 | def get_minibatch(roidb, num_classes):
 17 |     """Given a roidb, construct a minibatch sampled from it."""
 18 |     num_images = len(roidb)
 19 |     # Sample random scales to use for each image in this batch
 20 |     random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
 21 |                                     size=num_images)
 22 |     assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
 23 |         'num_images ({}) must divide BATCH_SIZE ({})'. \
 24 |         format(num_images, cfg.TRAIN.BATCH_SIZE)
 25 |     rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
 26 |     fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
 27 | 
 28 |     # Get the input image blob, formatted for caffe
 29 |     im_blob, im_scales = _get_image_blob(roidb, random_scale_inds)
 30 | 
 31 |     blobs = {'data': im_blob}
 32 | 
 33 |     if cfg.TRAIN.HAS_RPN:
 34 |         assert len(im_scales) == 1, "Single batch only"
 35 |         assert len(roidb) == 1, "Single batch only"
 36 |         # gt boxes: (x1, y1, x2, y2, cls)
 37 |         gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
 38 |         gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
 39 |         gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
 40 |         gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
 41 |         blobs['gt_boxes'] = gt_boxes
 42 |         blobs['im_info'] = np.array(
 43 |             [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
 44 |             dtype=np.float32)
 45 |     else: # not using RPN
 46 |         # Now, build the region of interest and label blobs
 47 |         rois_blob = np.zeros((0, 5), dtype=np.float32)
 48 |         labels_blob = np.zeros((0), dtype=np.float32)
 49 |         bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32)
 50 |         bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32)
 51 |         # all_overlaps = []
 52 |         for im_i in xrange(num_images):
 53 |             labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \
 54 |                 = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image,
 55 |                                num_classes)
 56 | 
 57 |             # Add to RoIs blob
 58 |             rois = _project_im_rois(im_rois, im_scales[im_i])
 59 |             batch_ind = im_i * np.ones((rois.shape[0], 1))
 60 |             rois_blob_this_image = np.hstack((batch_ind, rois))
 61 |             rois_blob = np.vstack((rois_blob, rois_blob_this_image))
 62 | 
 63 |             # Add to labels, bbox targets, and bbox loss blobs
 64 |             labels_blob = np.hstack((labels_blob, labels))
 65 |             bbox_targets_blob = np.vstack((bbox_targets_blob, bbox_targets))
 66 |             bbox_inside_blob = np.vstack((bbox_inside_blob, bbox_inside_weights))
 67 |             # all_overlaps = np.hstack((all_overlaps, overlaps))
 68 | 
 69 |         # For debug visualizations
 70 |         # _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps)
 71 | 
 72 |         blobs['rois'] = rois_blob
 73 |         blobs['labels'] = labels_blob
 74 | 
 75 |         if cfg.TRAIN.BBOX_REG:
 76 |             blobs['bbox_targets'] = bbox_targets_blob
 77 |             blobs['bbox_inside_weights'] = bbox_inside_blob
 78 |             blobs['bbox_outside_weights'] = \
 79 |                 np.array(bbox_inside_blob > 0).astype(np.float32)
 80 | 
 81 |     return blobs
 82 | 
 83 | def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes):
 84 |     """Generate a random sample of RoIs comprising foreground and background
 85 |     examples.
 86 |     """
 87 |     # label = class RoI has max overlap with
 88 |     labels = roidb['max_classes']
 89 |     overlaps = roidb['max_overlaps']
 90 |     rois = roidb['boxes']
 91 | 
 92 |     # Select foreground RoIs as those with >= FG_THRESH overlap
 93 |     fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
 94 |     # Guard against the case when an image has fewer than fg_rois_per_image
 95 |     # foreground RoIs
 96 |     fg_rois_per_this_image = int(np.minimum(fg_rois_per_image, fg_inds.size))
 97 |     # Sample foreground regions without replacement
 98 |     if fg_inds.size > 0:
 99 |         fg_inds = npr.choice(
100 |                 fg_inds, size=fg_rois_per_this_image, replace=False)
101 | 
102 |     # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
103 |     bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
104 |                        (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
105 |     # Compute number of background RoIs to take from this image (guarding
106 |     # against there being fewer than desired)
107 |     bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
108 |     bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
109 |                                         bg_inds.size)
110 |     # Sample foreground regions without replacement
111 |     if bg_inds.size > 0:
112 |         bg_inds = npr.choice(
113 |                 bg_inds, size=bg_rois_per_this_image, replace=False)
114 | 
115 |     # The indices that we're selecting (both fg and bg)
116 |     keep_inds = np.append(fg_inds, bg_inds)
117 |     # Select sampled values from various arrays:
118 |     labels = labels[keep_inds]
119 |     # Clamp labels for the background RoIs to 0
120 |     labels[fg_rois_per_this_image:] = 0
121 |     overlaps = overlaps[keep_inds]
122 |     rois = rois[keep_inds]
123 | 
124 |     bbox_targets, bbox_inside_weights = _get_bbox_regression_labels(
125 |             roidb['bbox_targets'][keep_inds, :], num_classes)
126 | 
127 |     return labels, overlaps, rois, bbox_targets, bbox_inside_weights
128 | 
129 | def _get_image_blob(roidb, scale_inds):
130 |     """Builds an input blob from the images in the roidb at the specified
131 |     scales.
132 |     """
133 |     num_images = len(roidb)
134 |     processed_ims = []
135 |     im_scales = []
136 |     for i in xrange(num_images):
137 |         im = cv2.imread(roidb[i]['image'])
138 |         if roidb[i]['flipped']:
139 |             im = im[:, ::-1, :]
140 |         target_size = cfg.TRAIN.SCALES[scale_inds[i]]
141 |         im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
142 |                                         cfg.TRAIN.MAX_SIZE)
143 |         im_scales.append(im_scale)
144 |         processed_ims.append(im)
145 | 
146 |     # Create a blob to hold the input images
147 |     blob = im_list_to_blob(processed_ims)
148 | 
149 |     return blob, im_scales
150 | 
151 | def _project_im_rois(im_rois, im_scale_factor):
152 |     """Project image RoIs into the rescaled training image."""
153 |     rois = im_rois * im_scale_factor
154 |     return rois
155 | 
156 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
157 |     """Bounding-box regression targets are stored in a compact form in the
158 |     roidb.
159 | 
160 |     This function expands those targets into the 4-of-4*K representation used
161 |     by the network (i.e. only one class has non-zero targets). The loss weights
162 |     are similarly expanded.
163 | 
164 |     Returns:
165 |         bbox_target_data (ndarray): N x 4K blob of regression targets
166 |         bbox_inside_weights (ndarray): N x 4K blob of loss weights
167 |     """
168 |     clss = np.array(bbox_target_data[:, 0], dtype=np.uint16, copy=True)
169 |     bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
170 |     bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
171 |     inds = np.where(clss > 0)[0]
172 |     for ind in inds:
173 |         cls = clss[ind]
174 |         start = 4 * cls
175 |         end = start + 4
176 |         bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
177 |         bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
178 |     return bbox_targets, bbox_inside_weights
179 | 
180 | def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps):
181 |     """Visualize a mini-batch for debugging."""
182 |     import matplotlib.pyplot as plt
183 |     for i in xrange(rois_blob.shape[0]):
184 |         rois = rois_blob[i, :]
185 |         im_ind = rois[0]
186 |         roi = rois[1:]
187 |         im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy()
188 |         im += cfg.PIXEL_MEANS
189 |         im = im[:, :, (2, 1, 0)]
190 |         im = im.astype(np.uint8)
191 |         cls = labels_blob[i]
192 |         plt.imshow(im)
193 |         print 'class: ', cls, ' overlap: ', overlaps[i]
194 |         plt.gca().add_patch(
195 |             plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0],
196 |                           roi[3] - roi[1], fill=False,
197 |                           edgecolor='r', linewidth=3)
198 |             )
199 |         plt.show()
200 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
  9 | 
 10 | import numpy as np
 11 | from fast_rcnn.config import cfg
 12 | from fast_rcnn.bbox_transform import bbox_transform
 13 | from utils.cython_bbox import bbox_overlaps
 14 | import PIL
 15 | 
 16 | def prepare_roidb(imdb):
 17 |     """Enrich the imdb's roidb by adding some derived quantities that
 18 |     are useful for training. This function precomputes the maximum
 19 |     overlap, taken over ground-truth boxes, between each ROI and
 20 |     each ground-truth box. The class with maximum overlap is also
 21 |     recorded.
 22 |     """
 23 |     sizes = [PIL.Image.open(imdb.image_path_at(i)).size
 24 |              for i in xrange(imdb.num_images)]
 25 |     roidb = imdb.roidb
 26 |     for i in xrange(len(imdb.image_index)):
 27 |         roidb[i]['image'] = imdb.image_path_at(i)
 28 |         roidb[i]['width'] = sizes[i][0]
 29 |         roidb[i]['height'] = sizes[i][1]
 30 |         # need gt_overlaps as a dense array for argmax
 31 |         gt_overlaps = roidb[i]['gt_overlaps'].toarray()
 32 |         # max overlap with gt over classes (columns)
 33 |         max_overlaps = gt_overlaps.max(axis=1)
 34 |         # gt class that had the max overlap
 35 |         max_classes = gt_overlaps.argmax(axis=1)
 36 |         roidb[i]['max_classes'] = max_classes
 37 |         roidb[i]['max_overlaps'] = max_overlaps
 38 |         # sanity checks
 39 |         # max overlap of 0 => class should be zero (background)
 40 |         zero_inds = np.where(max_overlaps == 0)[0]
 41 |         assert all(max_classes[zero_inds] == 0)
 42 |         # max overlap > 0 => class should not be zero (must be a fg class)
 43 |         nonzero_inds = np.where(max_overlaps > 0)[0]
 44 |         assert all(max_classes[nonzero_inds] != 0)
 45 | 
 46 | def add_bbox_regression_targets(roidb):
 47 |     """Add information needed to train bounding-box regressors."""
 48 |     assert len(roidb) > 0
 49 |     assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?'
 50 | 
 51 |     num_images = len(roidb)
 52 |     # Infer number of classes from the number of columns in gt_overlaps
 53 |     num_classes = roidb[0]['gt_overlaps'].shape[1]
 54 |     for im_i in xrange(num_images):
 55 |         rois = roidb[im_i]['boxes']
 56 |         max_overlaps = roidb[im_i]['max_overlaps']
 57 |         max_classes = roidb[im_i]['max_classes']
 58 |         roidb[im_i]['bbox_targets'] = \
 59 |                 _compute_targets(rois, max_overlaps, max_classes)
 60 | 
 61 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
 62 |         # Use fixed / precomputed "means" and "stds" instead of empirical values
 63 |         means = np.tile(
 64 |                 np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1))
 65 |         stds = np.tile(
 66 |                 np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1))
 67 |     else:
 68 |         # Compute values needed for means and stds
 69 |         # var(x) = E(x^2) - E(x)^2
 70 |         class_counts = np.zeros((num_classes, 1)) + cfg.EPS
 71 |         sums = np.zeros((num_classes, 4))
 72 |         squared_sums = np.zeros((num_classes, 4))
 73 |         for im_i in xrange(num_images):
 74 |             targets = roidb[im_i]['bbox_targets']
 75 |             for cls in xrange(1, num_classes):
 76 |                 cls_inds = np.where(targets[:, 0] == cls)[0]
 77 |                 if cls_inds.size > 0:
 78 |                     class_counts[cls] += cls_inds.size
 79 |                     sums[cls, :] += targets[cls_inds, 1:].sum(axis=0)
 80 |                     squared_sums[cls, :] += \
 81 |                             (targets[cls_inds, 1:] ** 2).sum(axis=0)
 82 | 
 83 |         means = sums / class_counts
 84 |         stds = np.sqrt(squared_sums / class_counts - means ** 2)
 85 | 
 86 |     print 'bbox target means:'
 87 |     print means
 88 |     print means[1:, :].mean(axis=0) # ignore bg class
 89 |     print 'bbox target stdevs:'
 90 |     print stds
 91 |     print stds[1:, :].mean(axis=0) # ignore bg class
 92 | 
 93 |     # Normalize targets
 94 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS:
 95 |         print "Normalizing targets"
 96 |         for im_i in xrange(num_images):
 97 |             targets = roidb[im_i]['bbox_targets']
 98 |             for cls in xrange(1, num_classes):
 99 |                 cls_inds = np.where(targets[:, 0] == cls)[0]
100 |                 roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
101 |                 roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :]
102 |     else:
103 |         print "NOT normalizing targets"
104 | 
105 |     # These values will be needed for making predictions
106 |     # (the predicts will need to be unnormalized and uncentered)
107 |     return means.ravel(), stds.ravel()
108 | 
109 | def _compute_targets(rois, overlaps, labels):
110 |     """Compute bounding-box regression targets for an image."""
111 |     # Indices of ground-truth ROIs
112 |     gt_inds = np.where(overlaps == 1)[0]
113 |     if len(gt_inds) == 0:
114 |         # Bail if the image has no ground-truth ROIs
115 |         return np.zeros((rois.shape[0], 5), dtype=np.float32)
116 |     # Indices of examples for which we try to make predictions
117 |     ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]
118 | 
119 |     # Get IoU overlap between each ex ROI and gt ROI
120 |     ex_gt_overlaps = bbox_overlaps(
121 |         np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
122 |         np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))
123 | 
124 |     # Find which gt ROI each ex ROI has max overlap with:
125 |     # this will be the ex ROI's gt target
126 |     gt_assignment = ex_gt_overlaps.argmax(axis=1)
127 |     gt_rois = rois[gt_inds[gt_assignment], :]
128 |     ex_rois = rois[ex_inds, :]
129 | 
130 |     targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
131 |     targets[ex_inds, 0] = labels[ex_inds]
132 |     targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
133 |     return targets
134 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb2.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
  9 | 
 10 | import numpy as np
 11 | from fast_rcnn.config import cfg
 12 | import utils.cython_bbox
 13 | 
 14 | def prepare_roidb(imdb):
 15 |     """Enrich the imdb's roidb by adding some derived quantities that
 16 |     are useful for training. This function precomputes the maximum
 17 |     overlap, taken over ground-truth boxes, between each ROI and
 18 |     each ground-truth box. The class with maximum overlap is also
 19 |     recorded.
 20 |     """
 21 |     roidb = imdb.roidb
 22 |     for i in xrange(len(imdb.image_index)):
 23 |         roidb[i]['image'] = imdb.image_path_at(i)
 24 |         # need gt_overlaps as a dense array for argmax
 25 |         gt_overlaps = roidb[i]['gt_overlaps'].toarray()
 26 |         # max overlap with gt over classes (columns)
 27 |         max_overlaps = gt_overlaps.max(axis=1)
 28 |         # gt class that had the max overlap
 29 |         max_classes = gt_overlaps.argmax(axis=1)
 30 | 
 31 |         roidb[i]['max_classes'] = max_classes
 32 |         roidb[i]['max_overlaps'] = max_overlaps
 33 | 
 34 |         # sanity checks
 35 |         # max overlap of 0 => class should be zero (background)
 36 |         zero_inds = np.where(max_overlaps == 0)[0]
 37 |         assert all(max_classes[zero_inds] == 0)
 38 |         # max overlap > 0 => class should not be zero (must be a fg class)
 39 |         nonzero_inds = np.where(max_overlaps > 0)[0]
 40 |         assert all(max_classes[nonzero_inds] != 0)
 41 | 
 42 | def add_bbox_regression_targets(roidb):
 43 |     """Add information needed to train bounding-box regressors."""
 44 |     assert len(roidb) > 0
 45 |     assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?'
 46 | 
 47 |     num_images = len(roidb)
 48 |     # Infer number of classes from the number of columns in gt_overlaps
 49 |     num_classes = roidb[0]['gt_overlaps'].shape[1]
 50 |     for im_i in xrange(num_images):
 51 |         rois = roidb[im_i]['boxes']
 52 |         max_overlaps = roidb[im_i]['max_overlaps']
 53 |         max_classes = roidb[im_i]['max_classes']
 54 |         roidb[im_i]['bbox_targets'] = \
 55 |                 _compute_targets(rois, max_overlaps, max_classes, num_classes)
 56 | 
 57 |     # Compute values needed for means and stds
 58 |     # var(x) = E(x^2) - E(x)^2
 59 |     class_counts = np.zeros((num_classes, 1)) + cfg.EPS
 60 |     sums = np.zeros((num_classes, 4))
 61 |     squared_sums = np.zeros((num_classes, 4))
 62 |     for im_i in xrange(num_images):
 63 |         targets = roidb[im_i]['bbox_targets']
 64 |         for cls in xrange(1, num_classes):
 65 |             cls_inds = np.where(targets[:, 0] == cls)[0]
 66 |             if cls_inds.size > 0:
 67 |                 class_counts[cls] += cls_inds.size
 68 |                 sums[cls, :] += targets[cls_inds, 1:].sum(axis=0)
 69 |                 squared_sums[cls, :] += (targets[cls_inds, 1:] ** 2).sum(axis=0)
 70 | 
 71 |     means = sums / class_counts
 72 |     stds = np.sqrt(squared_sums / class_counts - means ** 2)
 73 | 
 74 |     # Normalize targets
 75 |     for im_i in xrange(num_images):
 76 |         targets = roidb[im_i]['bbox_targets']
 77 |         for cls in xrange(1, num_classes):
 78 |             cls_inds = np.where(targets[:, 0] == cls)[0]
 79 |             roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
 80 |             if stds[cls, 0] != 0:
 81 |                 roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :]
 82 | 
 83 |     # These values will be needed for making predictions
 84 |     # (the predicts will need to be unnormalized and uncentered)
 85 |     return means.ravel(), stds.ravel()
 86 | 
 87 | def _compute_targets(rois, overlaps, labels, num_classes):
 88 |     """Compute bounding-box regression targets for an image."""
 89 |     # Ensure ROIs are floats
 90 |     rois = rois.astype(np.float, copy=False)
 91 | 
 92 |     # Indices of ground-truth ROIs
 93 |     gt_inds = np.where(overlaps == 1)[0]
 94 |     # Indices of examples for which we try to make predictions
 95 |     ex_inds = []
 96 |     for i in xrange(1, num_classes):
 97 |         ex_inds.extend( np.where((labels == i) & (overlaps >= cfg.TRAIN.BBOX_THRESH))[0] )
 98 | 
 99 |     # Get IoU overlap between each ex ROI and gt ROI
100 |     ex_gt_overlaps = utils.cython_bbox.bbox_overlaps(rois[ex_inds, :],
101 |                                                      rois[gt_inds, :])
102 | 
103 |     # Find which gt ROI each ex ROI has max overlap with:
104 |     # this will be the ex ROI's gt target
105 |     if ex_gt_overlaps.shape[0] != 0:
106 |         gt_assignment = ex_gt_overlaps.argmax(axis=1)
107 |     else:
108 |         gt_assignment = []
109 |     gt_rois = rois[gt_inds[gt_assignment], :]
110 |     ex_rois = rois[ex_inds, :]
111 | 
112 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS
113 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS
114 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
115 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
116 | 
117 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS
118 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS
119 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
120 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
121 | 
122 |     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
123 |     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
124 |     targets_dw = np.log(gt_widths / ex_widths)
125 |     targets_dh = np.log(gt_heights / ex_heights)
126 | 
127 |     targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
128 |     targets[ex_inds, 0] = labels[ex_inds]
129 |     targets[ex_inds, 1] = targets_dx
130 |     targets[ex_inds, 2] = targets_dy
131 |     targets[ex_inds, 3] = targets_dw
132 |     targets[ex_inds, 4] = targets_dh
133 |     return targets
134 | 


--------------------------------------------------------------------------------
/lib/roi_pooling_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/roi_pooling_layer/roi_pooling_op.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import os.path as osp
3 | 
4 | filename = osp.join(osp.dirname(__file__), 'roi_pooling.so')
5 | _roi_pooling_module = tf.load_op_library(filename)
6 | roi_pool = _roi_pooling_module.roi_pool
7 | roi_pool_grad = _roi_pooling_module.roi_pool_grad
8 | 


--------------------------------------------------------------------------------
/lib/roi_pooling_layer/roi_pooling_op_gpu.cu.cc:
--------------------------------------------------------------------------------
  1 | #if GOOGLE_CUDA
  2 | 
  3 | #define EIGEN_USE_GPU
  4 | 
  5 | #include <stdio.h>
  6 | #include <cfloat>
  7 | #include "roi_pooling_op_gpu.h"
  8 | 
  9 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 10 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 11 |        i += blockDim.x * gridDim.x)
 12 | 
 13 | using std::max;
 14 | using std::min;
 15 | 
 16 | // namespace tensorflow {
 17 | using namespace tensorflow;
 18 | 
 19 | template <typename Dtype>
 20 | __global__ void ROIPoolForward(const int nthreads, const Dtype* bottom_data,
 21 |     const Dtype spatial_scale, const int height, const int width, 
 22 |     const int channels, const int pooled_height, const int pooled_width,
 23 |     const Dtype* bottom_rois, Dtype* top_data, int* argmax_data) 
 24 | {
 25 |   CUDA_1D_KERNEL_LOOP(index, nthreads) 
 26 |   {
 27 |     // (n, ph, pw, c) is an element in the pooled output
 28 |     int n = index;
 29 |     int c = n % channels;
 30 |     n /= channels;
 31 |     int pw = n % pooled_width;
 32 |     n /= pooled_width;
 33 |     int ph = n % pooled_height;
 34 |     n /= pooled_height;
 35 | 
 36 |     bottom_rois += n * 5;
 37 |     int roi_batch_ind = bottom_rois[0];
 38 |     int roi_start_w = round(bottom_rois[1] * spatial_scale);
 39 |     int roi_start_h = round(bottom_rois[2] * spatial_scale);
 40 |     int roi_end_w = round(bottom_rois[3] * spatial_scale);
 41 |     int roi_end_h = round(bottom_rois[4] * spatial_scale);
 42 | 
 43 |     // Force malformed ROIs to be 1x1
 44 |     int roi_width = max(roi_end_w - roi_start_w + 1, 1);
 45 |     int roi_height = max(roi_end_h - roi_start_h + 1, 1);
 46 |     Dtype bin_size_h = static_cast<Dtype>(roi_height)
 47 |                        / static_cast<Dtype>(pooled_height);
 48 |     Dtype bin_size_w = static_cast<Dtype>(roi_width)
 49 |                        / static_cast<Dtype>(pooled_width);
 50 | 
 51 |     int hstart = static_cast<int>(floor(static_cast<Dtype>(ph)
 52 |                                         * bin_size_h));
 53 |     int wstart = static_cast<int>(floor(static_cast<Dtype>(pw)
 54 |                                         * bin_size_w));
 55 |     int hend = static_cast<int>(ceil(static_cast<Dtype>(ph + 1)
 56 |                                      * bin_size_h));
 57 |     int wend = static_cast<int>(ceil(static_cast<Dtype>(pw + 1)
 58 |                                      * bin_size_w));
 59 | 
 60 |     // Add roi offsets and clip to input boundaries
 61 |     hstart = min(max(hstart + roi_start_h, 0), height);
 62 |     hend = min(max(hend + roi_start_h, 0), height);
 63 |     wstart = min(max(wstart + roi_start_w, 0), width);
 64 |     wend = min(max(wend + roi_start_w, 0), width);
 65 |     bool is_empty = (hend <= hstart) || (wend <= wstart);
 66 | 
 67 |     // Define an empty pooling region to be zero
 68 |     Dtype maxval = is_empty ? 0 : -FLT_MAX;
 69 |     // If nothing is pooled, argmax = -1 causes nothing to be backprop'd
 70 |     int maxidx = -1;
 71 |     bottom_data += roi_batch_ind * channels * height * width;
 72 |     for (int h = hstart; h < hend; ++h) {
 73 |       for (int w = wstart; w < wend; ++w) {
 74 |         int bottom_index = (h * width + w) * channels + c;
 75 |         if (bottom_data[bottom_index] > maxval) {
 76 |           maxval = bottom_data[bottom_index];
 77 |           maxidx = bottom_index;
 78 |         }
 79 |       }
 80 |     }
 81 |     top_data[index] = maxval;
 82 |     if (argmax_data != nullptr)
 83 |       argmax_data[index] = maxidx;
 84 |   }
 85 | }
 86 | 
 87 | bool ROIPoolForwardLaucher(
 88 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
 89 |     const int width, const int channels, const int pooled_height,
 90 |     const int pooled_width, const float* bottom_rois,
 91 |     float* top_data, int* argmax_data, const Eigen::GpuDevice& d) 
 92 | {
 93 |   const int kThreadsPerBlock = 1024;
 94 |   const int output_size = num_rois * pooled_height * pooled_width * channels;
 95 |   cudaError_t err;
 96 | 
 97 |   ROIPoolForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
 98 |                        kThreadsPerBlock, 0, d.stream()>>>(
 99 |       output_size, bottom_data, spatial_scale, height, width, channels, pooled_height,
100 |       pooled_width, bottom_rois, top_data, argmax_data);
101 | 
102 |   err = cudaGetLastError();
103 |   if(cudaSuccess != err)
104 |   {
105 |     fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
106 |     exit( -1 );
107 |   }
108 | 
109 |   return d.ok();
110 | }
111 | 
112 | 
113 | template <typename Dtype>
114 | __global__ void ROIPoolBackward(const int nthreads, const Dtype* top_diff,
115 |     const int* argmax_data, const int num_rois, const Dtype spatial_scale,
116 |     const int height, const int width, const int channels, 
117 |     const int pooled_height, const int pooled_width, Dtype* bottom_diff,
118 |     const Dtype* bottom_rois) {
119 |   CUDA_1D_KERNEL_LOOP(index, nthreads) 
120 |   {
121 |     // (n, h, w, c) coords in bottom data
122 |     int n = index;
123 |     int c = n % channels;
124 |     n /= channels;
125 |     int w = n % width;
126 |     n /= width;
127 |     int h = n % height;
128 |     n /= height;
129 | 
130 |     Dtype gradient = 0;
131 |     // Accumulate gradient over all ROIs that pooled this element
132 |     for (int roi_n = 0; roi_n < num_rois; ++roi_n) 
133 |     {
134 |       const Dtype* offset_bottom_rois = bottom_rois + roi_n * 5;
135 |       int roi_batch_ind = offset_bottom_rois[0];
136 |       // Skip if ROI's batch index doesn't match n
137 |       if (n != roi_batch_ind) {
138 |         continue;
139 |       }
140 | 
141 |       int roi_start_w = round(offset_bottom_rois[1] * spatial_scale);
142 |       int roi_start_h = round(offset_bottom_rois[2] * spatial_scale);
143 |       int roi_end_w = round(offset_bottom_rois[3] * spatial_scale);
144 |       int roi_end_h = round(offset_bottom_rois[4] * spatial_scale);
145 | 
146 |       // Skip if ROI doesn't include (h, w)
147 |       const bool in_roi = (w >= roi_start_w && w <= roi_end_w &&
148 |                            h >= roi_start_h && h <= roi_end_h);
149 |       if (!in_roi) {
150 |         continue;
151 |       }
152 | 
153 |       int offset = roi_n * pooled_height * pooled_width * channels;
154 |       const Dtype* offset_top_diff = top_diff + offset;
155 |       const int* offset_argmax_data = argmax_data + offset;
156 | 
157 |       // Compute feasible set of pooled units that could have pooled
158 |       // this bottom unit
159 | 
160 |       // Force malformed ROIs to be 1x1
161 |       int roi_width = max(roi_end_w - roi_start_w + 1, 1);
162 |       int roi_height = max(roi_end_h - roi_start_h + 1, 1);
163 | 
164 |       Dtype bin_size_h = static_cast<Dtype>(roi_height)
165 |                          / static_cast<Dtype>(pooled_height);
166 |       Dtype bin_size_w = static_cast<Dtype>(roi_width)
167 |                          / static_cast<Dtype>(pooled_width);
168 | 
169 |       int phstart = floor(static_cast<Dtype>(h - roi_start_h) / bin_size_h);
170 |       int phend = ceil(static_cast<Dtype>(h - roi_start_h + 1) / bin_size_h);
171 |       int pwstart = floor(static_cast<Dtype>(w - roi_start_w) / bin_size_w);
172 |       int pwend = ceil(static_cast<Dtype>(w - roi_start_w + 1) / bin_size_w);
173 | 
174 |       phstart = min(max(phstart, 0), pooled_height);
175 |       phend = min(max(phend, 0), pooled_height);
176 |       pwstart = min(max(pwstart, 0), pooled_width);
177 |       pwend = min(max(pwend, 0), pooled_width);
178 | 
179 |       for (int ph = phstart; ph < phend; ++ph) {
180 |         for (int pw = pwstart; pw < pwend; ++pw) {
181 |           if (offset_argmax_data[(ph * pooled_width + pw) * channels + c] == (h * width + w) * channels + c) 
182 |           {
183 |             gradient += offset_top_diff[(ph * pooled_width + pw) * channels + c];
184 |           }
185 |         }
186 |       }
187 |     }
188 |     bottom_diff[index] = gradient;
189 |   }
190 | }
191 | 
192 | 
193 | bool ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
194 |     const int height, const int width, const int channels, const int pooled_height,
195 |     const int pooled_width, const float* bottom_rois,
196 |     float* bottom_diff, const int* argmax_data, const Eigen::GpuDevice& d) 
197 | {
198 |   const int kThreadsPerBlock = 1024;
199 |   const int output_size = batch_size * height * width * channels;
200 |   cudaError_t err;
201 | 
202 |   ROIPoolBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
203 |                        kThreadsPerBlock, 0, d.stream()>>>(
204 |       output_size, top_diff, argmax_data, num_rois, spatial_scale, height, width, channels, pooled_height,
205 |       pooled_width, bottom_diff, bottom_rois);
206 | 
207 |   err = cudaGetLastError();
208 |   if(cudaSuccess != err)
209 |   {
210 |     fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
211 |     exit( -1 );
212 |   }
213 | 
214 |   return d.ok();
215 | }
216 | 
217 | // }  // namespace tensorflow
218 | 
219 | #endif  // GOOGLE_CUDA
220 | 


--------------------------------------------------------------------------------
/lib/roi_pooling_layer/roi_pooling_op_gpu.h:
--------------------------------------------------------------------------------
 1 | #if !GOOGLE_CUDA
 2 | #error This file must only be included when building with Cuda support
 3 | #endif
 4 | 
 5 | #ifndef TENSORFLOW_USER_OPS_ROIPOOLING_OP_GPU_H_
 6 | #define TENSORFLOW_USER_OPS_ROIPOOLING_OP_GPU_H_
 7 | 
 8 | #define EIGEN_USE_GPU
 9 | 
10 | #include "tensorflow/core/framework/tensor_types.h"
11 | #include "tensorflow/core/platform/types.h"
12 | 
13 | namespace tensorflow {
14 | 
15 | // Run the forward pass of max pooling, optionally writing the argmax indices to
16 | // the mask array, if it is not nullptr. If mask is passed in as nullptr, the
17 | // argmax indices are not written.
18 | bool ROIPoolForwardLaucher(
19 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
20 |     const int width, const int channels, const int pooled_height,
21 |     const int pooled_width, const float* bottom_rois,
22 |     float* top_data, int* argmax_data, const Eigen::GpuDevice& d);
23 | 
24 | bool ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 |     const int height, const int width, const int channels, const int pooled_height,
26 |     const int pooled_width, const float* bottom_rois,
27 |     float* bottom_diff, const int* argmax_data, const Eigen::GpuDevice& d);
28 | 
29 | }  // namespace tensorflow
30 | 
31 | #endif  // TENSORFLOW_CORE_KERNELS_MAXPOOLING_OP_GPU_H_
32 | 


--------------------------------------------------------------------------------
/lib/roi_pooling_layer/roi_pooling_op_grad.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python.framework import ops
 3 | import roi_pooling_op
 4 | import pdb
 5 | 
 6 | 
 7 | @ops.RegisterShape("RoiPool")
 8 | def _roi_pool_shape(op):
 9 |   """Shape function for the RoiPool op.
10 | 
11 |   """
12 |   dims_data = op.inputs[0].get_shape().as_list()
13 |   channels = dims_data[3]
14 |   dims_rois = op.inputs[1].get_shape().as_list()
15 |   num_rois = dims_rois[0]
16 | 
17 |   pooled_height = op.get_attr('pooled_height')
18 |   pooled_width = op.get_attr('pooled_width')
19 | 
20 |   output_shape = tf.TensorShape([num_rois, pooled_height, pooled_width, channels])
21 |   return [output_shape, output_shape]
22 | 
23 | @ops.RegisterGradient("RoiPool")
24 | def _roi_pool_grad(op, grad, _):
25 |   """The gradients for `roi_pool`.
26 |   Args:
27 |     op: The `roi_pool` `Operation` that we are differentiating, which we can use
28 |       to find the inputs and outputs of the original op.
29 |     grad: Gradient with respect to the output of the `roi_pool` op.
30 |   Returns:
31 |     Gradients with respect to the input of `zero_out`.
32 |   """
33 |   data = op.inputs[0]
34 |   rois = op.inputs[1]
35 |   argmax = op.outputs[1]
36 |   pooled_height = op.get_attr('pooled_height')
37 |   pooled_width = op.get_attr('pooled_width')
38 |   spatial_scale = op.get_attr('spatial_scale')
39 | 
40 |   # compute gradient
41 |   data_grad = roi_pooling_op.roi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale)
42 | 
43 |   return [data_grad, None]  # List of one Tensor, since we have one input
44 | 


--------------------------------------------------------------------------------
/lib/roi_pooling_layer/roi_pooling_op_test.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import roi_pooling_op
 4 | import roi_pooling_op_grad
 5 | import tensorflow as tf
 6 | import pdb
 7 | 
 8 | 
 9 | def weight_variable(shape):
10 |   initial = tf.truncated_normal(shape, stddev=0.1)
11 |   return tf.Variable(initial)
12 | 
13 | def conv2d(x, W):
14 |   return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
15 | 
16 | array = np.random.rand(32, 100, 100, 3)
17 | data = tf.convert_to_tensor(array, dtype=tf.float32)
18 | rois = tf.convert_to_tensor([[0, 10, 10, 20, 20], [31, 30, 30, 40, 40]], dtype=tf.float32)
19 | 
20 | W = weight_variable([3, 3, 3, 1])
21 | h = conv2d(data, W)
22 | 
23 | [y, argmax] = roi_pooling_op.roi_pool(h, rois, 6, 6, 1.0/3)
24 | pdb.set_trace()
25 | y_data = tf.convert_to_tensor(np.ones((2, 6, 6, 1)), dtype=tf.float32)
26 | print y_data, y, argmax
27 | 
28 | # Minimize the mean squared errors.
29 | loss = tf.reduce_mean(tf.square(y - y_data))
30 | optimizer = tf.train.GradientDescentOptimizer(0.5)
31 | train = optimizer.minimize(loss)
32 | 
33 | init = tf.initialize_all_variables()
34 | 
35 | # Launch the graph.
36 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
37 | sess.run(init)
38 | pdb.set_trace()
39 | for step in xrange(10):
40 |     sess.run(train)
41 |     print(step, sess.run(W))
42 |     print(sess.run(y))
43 | 
44 | #with tf.device('/gpu:0'):
45 | #  result = module.roi_pool(data, rois, 1, 1, 1.0/1)
46 | #  print result.eval()
47 | #with tf.device('/cpu:0'):
48 | #  run(init)
49 | 


--------------------------------------------------------------------------------
/lib/roi_pooling_layer/work_sharder.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 | 
16 | #ifndef TENSORFLOW_UTIL_WORK_SHARDER_H_
17 | #define TENSORFLOW_UTIL_WORK_SHARDER_H_
18 | 
19 | #include <functional>
20 | 
21 | #include "tensorflow/core/lib/core/threadpool.h"
22 | #include "tensorflow/core/platform/types.h"
23 | 
24 | namespace tensorflow {
25 | 
26 | // Shards the "total" unit of work assuming each unit of work having
27 | // roughly "cost_per_unit". Each unit of work is indexed 0, 1, ...,
28 | // total - 1. Each shard contains 1 or more units of work and the
29 | // total cost of each shard is roughly the same. The calling thread and the
30 | // "workers" are used to compute each shard (calling work(start,
31 | // limit). A common configuration is that "workers" is a thread pool
32 | // with at least "max_parallelism" threads.
33 | //
34 | // "cost_per_unit" is an estimate of the number of CPU cycles (or nanoseconds
35 | // if not CPU-bound) to complete a unit of work. Overestimating creates too
36 | // many shards and CPU time will be dominated by per-shard overhead, such as
37 | // Context creation. Underestimating may not fully make use of the specified
38 | // parallelism.
39 | //
40 | // "work" should be a callable taking (int64, int64) arguments.
41 | // work(start, limit) computes the work units from [start,
42 | // limit), i.e., [start, limit) is a shard.
43 | //
44 | // REQUIRES: max_parallelism >= 0
45 | // REQUIRES: workers != nullptr
46 | // REQUIRES: total >= 0
47 | // REQUIRES: cost_per_unit >= 0
48 | void Shard(int max_parallelism, thread::ThreadPool* workers, int64 total,
49 |            int64 cost_per_unit, std::function<void(int64, int64)> work);
50 | 
51 | }  // end namespace tensorflow
52 | 
53 | #endif  // TENSORFLOW_UTIL_WORK_SHARDER_H_
54 | 


--------------------------------------------------------------------------------
/lib/rpn_msr/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Sean Bell
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/rpn_msr/anchor_target_layer_tf.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | import yaml
 10 | from fast_rcnn.config import cfg
 11 | import numpy as np
 12 | import numpy.random as npr
 13 | from generate_anchors import generate_anchors
 14 | from utils.cython_bbox import bbox_overlaps
 15 | from fast_rcnn.bbox_transform import bbox_transform
 16 | import pdb
 17 | 
 18 | DEBUG = False
 19 | 
 20 | def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride = [16,], anchor_scales = [4 ,8, 16, 32]):
 21 |     """
 22 |     Assign anchors to ground-truth targets. Produces anchor classification
 23 |     labels and bounding-box regression targets.
 24 |     """
 25 |     _anchors = generate_anchors(scales=np.array(anchor_scales))
 26 |     _num_anchors = _anchors.shape[0]
 27 | 
 28 |     if DEBUG:
 29 |         print 'anchors:'
 30 |         print _anchors
 31 |         print 'anchor shapes:'
 32 |         print np.hstack((
 33 |             _anchors[:, 2::4] - _anchors[:, 0::4],
 34 |             _anchors[:, 3::4] - _anchors[:, 1::4],
 35 |         ))
 36 |         _counts = cfg.EPS
 37 |         _sums = np.zeros((1, 4))
 38 |         _squared_sums = np.zeros((1, 4))
 39 |         _fg_sum = 0
 40 |         _bg_sum = 0
 41 |         _count = 0
 42 | 
 43 |     # allow boxes to sit over the edge by a small amount
 44 |     _allowed_border =  0
 45 |     # map of shape (..., H, W)
 46 |     #height, width = rpn_cls_score.shape[1:3]
 47 | 
 48 |     im_info = im_info[0]
 49 | 
 50 |     # Algorithm:
 51 |     #
 52 |     # for each (H, W) location i
 53 |     #   generate 9 anchor boxes centered on cell i
 54 |     #   apply predicted bbox deltas at cell i to each of the 9 anchors
 55 |     # filter out-of-image anchors
 56 |     # measure GT overlap
 57 | 
 58 |     assert rpn_cls_score.shape[0] == 1, \
 59 |         'Only single item batches are supported'
 60 | 
 61 |     # map of shape (..., H, W)
 62 |     height, width = rpn_cls_score.shape[1:3]
 63 | 
 64 |     if DEBUG:
 65 |         print 'AnchorTargetLayer: height', height, 'width', width
 66 |         print ''
 67 |         print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
 68 |         print 'scale: {}'.format(im_info[2])
 69 |         print 'height, width: ({}, {})'.format(height, width)
 70 |         print 'rpn: gt_boxes.shape', gt_boxes.shape
 71 |         print 'rpn: gt_boxes', gt_boxes
 72 | 
 73 |     # 1. Generate proposals from bbox deltas and shifted anchors
 74 |     shift_x = np.arange(0, width) * _feat_stride
 75 |     shift_y = np.arange(0, height) * _feat_stride
 76 |     shift_x, shift_y = np.meshgrid(shift_x, shift_y)
 77 |     shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
 78 |                         shift_x.ravel(), shift_y.ravel())).transpose()
 79 |     # add A anchors (1, A, 4) to
 80 |     # cell K shifts (K, 1, 4) to get
 81 |     # shift anchors (K, A, 4)
 82 |     # reshape to (K*A, 4) shifted anchors
 83 |     A = _num_anchors
 84 |     K = shifts.shape[0]
 85 |     all_anchors = (_anchors.reshape((1, A, 4)) +
 86 |                    shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
 87 |     all_anchors = all_anchors.reshape((K * A, 4))
 88 |     total_anchors = int(K * A)
 89 | 
 90 |     # only keep anchors inside the image
 91 |     inds_inside = np.where(
 92 |         (all_anchors[:, 0] >= -_allowed_border) &
 93 |         (all_anchors[:, 1] >= -_allowed_border) &
 94 |         (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
 95 |         (all_anchors[:, 3] < im_info[0] + _allowed_border)    # height
 96 |     )[0]
 97 | 
 98 |     if DEBUG:
 99 |         print 'total_anchors', total_anchors
100 |         print 'inds_inside', len(inds_inside)
101 | 
102 |     # keep only inside anchors
103 |     anchors = all_anchors[inds_inside, :]
104 |     if DEBUG:
105 |         print 'anchors.shape', anchors.shape
106 | 
107 |     # label: 1 is positive, 0 is negative, -1 is dont care
108 |     labels = np.empty((len(inds_inside), ), dtype=np.float32)
109 |     labels.fill(-1)
110 | 
111 |     # overlaps between the anchors and the gt boxes
112 |     # overlaps (ex, gt)
113 |     overlaps = bbox_overlaps(
114 |         np.ascontiguousarray(anchors, dtype=np.float),
115 |         np.ascontiguousarray(gt_boxes, dtype=np.float))
116 |     argmax_overlaps = overlaps.argmax(axis=1)
117 |     max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
118 |     gt_argmax_overlaps = overlaps.argmax(axis=0)
119 |     gt_max_overlaps = overlaps[gt_argmax_overlaps,
120 |                                np.arange(overlaps.shape[1])]
121 |     gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
122 | 
123 |     if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
124 |         # assign bg labels first so that positive labels can clobber them
125 |         labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
126 | 
127 |     # fg label: for each gt, anchor with highest overlap
128 |     labels[gt_argmax_overlaps] = 1
129 | 
130 |     # fg label: above threshold IOU
131 |     labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
132 | 
133 |     if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
134 |         # assign bg labels last so that negative labels can clobber positives
135 |         labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
136 | 
137 |     # subsample positive labels if we have too many
138 |     num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
139 |     fg_inds = np.where(labels == 1)[0]
140 |     if len(fg_inds) > num_fg:
141 |         disable_inds = npr.choice(
142 |             fg_inds, size=(len(fg_inds) - num_fg), replace=False)
143 |         labels[disable_inds] = -1
144 | 
145 |     # subsample negative labels if we have too many
146 |     num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
147 |     bg_inds = np.where(labels == 0)[0]
148 |     if len(bg_inds) > num_bg:
149 |         disable_inds = npr.choice(
150 |             bg_inds, size=(len(bg_inds) - num_bg), replace=False)
151 |         labels[disable_inds] = -1
152 |         #print "was %s inds, disabling %s, now %s inds" % (
153 |             #len(bg_inds), len(disable_inds), np.sum(labels == 0))
154 | 
155 |     bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
156 |     bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
157 | 
158 |     bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
159 |     bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)
160 | 
161 |     bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
162 |     if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
163 |         # uniform weighting of examples (given non-uniform sampling)
164 |         num_examples = np.sum(labels >= 0)
165 |         positive_weights = np.ones((1, 4)) * 1.0 / num_examples
166 |         negative_weights = np.ones((1, 4)) * 1.0 / num_examples
167 |     else:
168 |         assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
169 |                 (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
170 |         positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
171 |                             np.sum(labels == 1))
172 |         negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
173 |                             np.sum(labels == 0))
174 |     bbox_outside_weights[labels == 1, :] = positive_weights
175 |     bbox_outside_weights[labels == 0, :] = negative_weights
176 | 
177 |     if DEBUG:
178 |         _sums += bbox_targets[labels == 1, :].sum(axis=0)
179 |         _squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
180 |         _counts += np.sum(labels == 1)
181 |         means = _sums / _counts
182 |         stds = np.sqrt(_squared_sums / _counts - means ** 2)
183 |         print 'means:'
184 |         print means
185 |         print 'stdevs:'
186 |         print stds
187 | 
188 |     # map up to original set of anchors
189 |     labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
190 |     bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
191 |     bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
192 |     bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)
193 | 
194 |     if DEBUG:
195 |         print 'rpn: max max_overlap', np.max(max_overlaps)
196 |         print 'rpn: num_positive', np.sum(labels == 1)
197 |         print 'rpn: num_negative', np.sum(labels == 0)
198 |         _fg_sum += np.sum(labels == 1)
199 |         _bg_sum += np.sum(labels == 0)
200 |         _count += 1
201 |         print 'rpn: num_positive avg', _fg_sum / _count
202 |         print 'rpn: num_negative avg', _bg_sum / _count
203 | 
204 |     # labels
205 |     #pdb.set_trace()
206 |     labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
207 |     labels = labels.reshape((1, 1, A * height, width))
208 |     rpn_labels = labels
209 | 
210 |     # bbox_targets
211 |     bbox_targets = bbox_targets \
212 |         .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
213 | 
214 |     rpn_bbox_targets = bbox_targets
215 |     # bbox_inside_weights
216 |     bbox_inside_weights = bbox_inside_weights \
217 |         .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
218 |     #assert bbox_inside_weights.shape[2] == height
219 |     #assert bbox_inside_weights.shape[3] == width
220 | 
221 |     rpn_bbox_inside_weights = bbox_inside_weights
222 | 
223 |     # bbox_outside_weights
224 |     bbox_outside_weights = bbox_outside_weights \
225 |         .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
226 |     #assert bbox_outside_weights.shape[2] == height
227 |     #assert bbox_outside_weights.shape[3] == width
228 | 
229 |     rpn_bbox_outside_weights = bbox_outside_weights
230 | 
231 |     return rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights
232 | 
233 | 
234 | 
235 | def _unmap(data, count, inds, fill=0):
236 |     """ Unmap a subset of item (data) back to the original set of items (of
237 |     size count) """
238 |     if len(data.shape) == 1:
239 |         ret = np.empty((count, ), dtype=np.float32)
240 |         ret.fill(fill)
241 |         ret[inds] = data
242 |     else:
243 |         ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
244 |         ret.fill(fill)
245 |         ret[inds, :] = data
246 |     return ret
247 | 
248 | 
249 | def _compute_targets(ex_rois, gt_rois):
250 |     """Compute bounding-box regression targets for an image."""
251 | 
252 |     assert ex_rois.shape[0] == gt_rois.shape[0]
253 |     assert ex_rois.shape[1] == 4
254 |     assert gt_rois.shape[1] == 5
255 | 
256 |     return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
257 | 


--------------------------------------------------------------------------------
/lib/rpn_msr/generate.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | from fast_rcnn.config import cfg
  9 | from utils.blob import im_list_to_blob
 10 | from utils.timer import Timer
 11 | import numpy as np
 12 | import cv2
 13 | 
 14 | def _vis_proposals(im, dets, thresh=0.5):
 15 |     """Draw detected bounding boxes."""
 16 |     inds = np.where(dets[:, -1] >= thresh)[0]
 17 |     if len(inds) == 0:
 18 |         return
 19 | 
 20 |     class_name = 'obj'
 21 |     im = im[:, :, (2, 1, 0)]
 22 |     fig, ax = plt.subplots(figsize=(12, 12))
 23 |     ax.imshow(im, aspect='equal')
 24 |     for i in inds:
 25 |         bbox = dets[i, :4]
 26 |         score = dets[i, -1]
 27 | 
 28 |         ax.add_patch(
 29 |             plt.Rectangle((bbox[0], bbox[1]),
 30 |                           bbox[2] - bbox[0],
 31 |                           bbox[3] - bbox[1], fill=False,
 32 |                           edgecolor='red', linewidth=3.5)
 33 |             )
 34 |         ax.text(bbox[0], bbox[1] - 2,
 35 |                 '{:s} {:.3f}'.format(class_name, score),
 36 |                 bbox=dict(facecolor='blue', alpha=0.5),
 37 |                 fontsize=14, color='white')
 38 | 
 39 |     ax.set_title(('{} detections with '
 40 |                   'p({} | box) >= {:.1f}').format(class_name, class_name,
 41 |                                                   thresh),
 42 |                   fontsize=14)
 43 |     plt.axis('off')
 44 |     plt.tight_layout()
 45 |     plt.draw()
 46 | 
 47 | def _get_image_blob(im):
 48 |     """Converts an image into a network input.
 49 | 
 50 |     Arguments:
 51 |         im (ndarray): a color image in BGR order
 52 | 
 53 |     Returns:
 54 |         blob (ndarray): a data blob holding an image pyramid
 55 |         im_scale_factors (list): list of image scales (relative to im) used
 56 |             in the image pyramid
 57 |     """
 58 |     im_orig = im.astype(np.float32, copy=True)
 59 |     im_orig -= cfg.PIXEL_MEANS
 60 | 
 61 |     processed_ims = []
 62 | 
 63 |     assert len(cfg.TEST.SCALES_BASE) == 1
 64 |     im_scale = cfg.TRAIN.SCALES_BASE[0]
 65 | 
 66 |     im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
 67 |                     interpolation=cv2.INTER_LINEAR)
 68 |     im_info = np.hstack((im.shape[:2], im_scale))[np.newaxis, :]
 69 |     processed_ims.append(im)
 70 | 
 71 |     # Create a blob to hold the input images
 72 |     blob = im_list_to_blob(processed_ims)
 73 | 
 74 |     return blob, im_info
 75 | 
 76 | def im_proposals(net, im):
 77 |     """Generate RPN proposals on a single image."""
 78 |     blobs = {}
 79 |     blobs['data'], blobs['im_info'] = _get_image_blob(im)
 80 |     net.blobs['data'].reshape(*(blobs['data'].shape))
 81 |     net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
 82 |     blobs_out = net.forward(
 83 |             data=blobs['data'].astype(np.float32, copy=False),
 84 |             im_info=blobs['im_info'].astype(np.float32, copy=False))
 85 | 
 86 |     scale = blobs['im_info'][0, 2]
 87 |     boxes = blobs_out['rois'][:, 1:].copy() / scale
 88 |     scores = blobs_out['scores'].copy()
 89 |     return boxes, scores
 90 | 
 91 | def imdb_proposals(net, imdb):
 92 |     """Generate RPN proposals on all images in an imdb."""
 93 | 
 94 |     _t = Timer()
 95 |     imdb_boxes = [[] for _ in xrange(imdb.num_images)]
 96 |     for i in xrange(imdb.num_images):
 97 |         im = cv2.imread(imdb.image_path_at(i))
 98 |         _t.tic()
 99 |         imdb_boxes[i], scores = im_proposals(net, im)
100 |         _t.toc()
101 |         print 'im_proposals: {:d}/{:d} {:.3f}s' \
102 |               .format(i + 1, imdb.num_images, _t.average_time)
103 |         if 0:
104 |             dets = np.hstack((imdb_boxes[i], scores))
105 |             # from IPython import embed; embed()
106 |             _vis_proposals(im, dets[:3, :], thresh=0.9)
107 |             plt.show()
108 | 
109 |     return imdb_boxes
110 | 
111 | def imdb_proposals_det(net, imdb):
112 |     """Generate RPN proposals on all images in an imdb."""
113 | 
114 |     _t = Timer()
115 |     imdb_boxes = [[] for _ in xrange(imdb.num_images)]
116 |     for i in xrange(imdb.num_images):
117 |         im = cv2.imread(imdb.image_path_at(i))
118 |         _t.tic()
119 |         boxes, scores = im_proposals(net, im)
120 |         _t.toc()
121 |         print 'im_proposals: {:d}/{:d} {:.3f}s' \
122 |               .format(i + 1, imdb.num_images, _t.average_time)
123 |         dets = np.hstack((boxes, scores))
124 |         imdb_boxes[i] = dets
125 | 
126 |         if 0:            
127 |             # from IPython import embed; embed()
128 |             _vis_proposals(im, dets[:3, :], thresh=0.9)
129 |             plt.show()
130 | 
131 |     return imdb_boxes
132 | 


--------------------------------------------------------------------------------
/lib/rpn_msr/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | 
 10 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 11 | #
 12 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 13 | #    >> anchors
 14 | #
 15 | #    anchors =
 16 | #
 17 | #       -83   -39   100    56
 18 | #      -175   -87   192   104
 19 | #      -359  -183   376   200
 20 | #       -55   -55    72    72
 21 | #      -119  -119   136   136
 22 | #      -247  -247   264   264
 23 | #       -35   -79    52    96
 24 | #       -79  -167    96   184
 25 | #      -167  -343   184   360
 26 | 
 27 | #array([[ -83.,  -39.,  100.,   56.],
 28 | #       [-175.,  -87.,  192.,  104.],
 29 | #       [-359., -183.,  376.,  200.],
 30 | #       [ -55.,  -55.,   72.,   72.],
 31 | #       [-119., -119.,  136.,  136.],
 32 | #       [-247., -247.,  264.,  264.],
 33 | #       [ -35.,  -79.,   52.,   96.],
 34 | #       [ -79., -167.,   96.,  184.],
 35 | #       [-167., -343.,  184.,  360.]])
 36 | 
 37 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 38 |                      scales=2**np.arange(3, 6)):
 39 |     """
 40 |     Generate anchor (reference) windows by enumerating aspect ratios X
 41 |     scales wrt a reference (0, 0, 15, 15) window.
 42 |     """
 43 | 
 44 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 45 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 46 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 47 |                          for i in xrange(ratio_anchors.shape[0])])
 48 |     return anchors
 49 | 
 50 | def _whctrs(anchor):
 51 |     """
 52 |     Return width, height, x center, and y center for an anchor (window).
 53 |     """
 54 | 
 55 |     w = anchor[2] - anchor[0] + 1
 56 |     h = anchor[3] - anchor[1] + 1
 57 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 58 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 59 |     return w, h, x_ctr, y_ctr
 60 | 
 61 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 62 |     """
 63 |     Given a vector of widths (ws) and heights (hs) around a center
 64 |     (x_ctr, y_ctr), output a set of anchors (windows).
 65 |     """
 66 | 
 67 |     ws = ws[:, np.newaxis]
 68 |     hs = hs[:, np.newaxis]
 69 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 70 |                          y_ctr - 0.5 * (hs - 1),
 71 |                          x_ctr + 0.5 * (ws - 1),
 72 |                          y_ctr + 0.5 * (hs - 1)))
 73 |     return anchors
 74 | 
 75 | def _ratio_enum(anchor, ratios):
 76 |     """
 77 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
 78 |     """
 79 | 
 80 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 81 |     size = w * h
 82 |     size_ratios = size / ratios
 83 |     ws = np.round(np.sqrt(size_ratios))
 84 |     hs = np.round(ws * ratios)
 85 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 86 |     return anchors
 87 | 
 88 | def _scale_enum(anchor, scales):
 89 |     """
 90 |     Enumerate a set of anchors for each scale wrt an anchor.
 91 |     """
 92 | 
 93 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 94 |     ws = w * scales
 95 |     hs = h * scales
 96 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 97 |     return anchors
 98 | 
 99 | if __name__ == '__main__':
100 |     import time
101 |     t = time.time()
102 |     a = generate_anchors()
103 |     print time.time() - t
104 |     print a
105 |     from IPython import embed; embed()
106 | 


--------------------------------------------------------------------------------
/lib/rpn_msr/proposal_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import caffe
  9 | import numpy as np
 10 | import yaml
 11 | from fast_rcnn.config import cfg
 12 | from generate_anchors import generate_anchors
 13 | from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes
 14 | from fast_rcnn.nms_wrapper import nms
 15 | 
 16 | DEBUG = False
 17 | 
 18 | class ProposalLayer(caffe.Layer):
 19 |     """
 20 |     Outputs object detection proposals by applying estimated bounding-box
 21 |     transformations to a set of regular boxes (called "anchors").
 22 |     """
 23 | 
 24 |     def setup(self, bottom, top):
 25 |         # parse the layer parameter string, which must be valid YAML
 26 |         layer_params = yaml.load(self.param_str_)
 27 | 
 28 |         self._feat_stride = layer_params['feat_stride']
 29 |         self._anchors     = generate_anchors(cfg.TRAIN.RPN_BASE_SIZE, cfg.TRAIN.RPN_ASPECTS, cfg.TRAIN.RPN_SCALES)
 30 |         self._num_anchors = self._anchors.shape[0]
 31 | 
 32 |         if DEBUG:
 33 |             print 'feat_stride: {}'.format(self._feat_stride)
 34 |             print 'anchors:'
 35 |             print self._anchors
 36 | 
 37 |         # rois blob: holds R regions of interest, each is a 5-tuple
 38 |         # (n, x1, y1, x2, y2) specifying an image batch index n and a
 39 |         # rectangle (x1, y1, x2, y2)
 40 |         top[0].reshape(1, 5)
 41 | 
 42 |         # scores blob: holds scores for R regions of interest
 43 |         if len(top) > 1:
 44 |             top[1].reshape(1, 1, 1, 1)
 45 | 
 46 |     def forward(self, bottom, top):
 47 |         # Algorithm:
 48 |         #
 49 |         # for each (H, W) location i
 50 |         #   generate A anchor boxes centered on cell i
 51 |         #   apply predicted bbox deltas at cell i to each of the A anchors
 52 |         # clip predicted boxes to image
 53 |         # remove predicted boxes with either height or width < threshold
 54 |         # sort all (proposal, score) pairs by score from highest to lowest
 55 |         # take top pre_nms_topN proposals before NMS
 56 |         # apply NMS with threshold 0.7 to remaining proposals
 57 |         # take after_nms_topN proposals after NMS
 58 |         # return the top proposals (-> RoIs top, scores top)
 59 | 
 60 |         assert bottom[0].data.shape[0] == 1, \
 61 |             'Only single item batches are supported'
 62 |         # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
 63 |         cfg_key = 'TEST'
 64 |         pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
 65 |         post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
 66 |         nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
 67 |         min_size      = cfg[cfg_key].RPN_MIN_SIZE
 68 | 
 69 |         # the first set of _num_anchors channels are bg probs
 70 |         # the second set are the fg probs, which we want
 71 |         scores = bottom[0].data[:, self._num_anchors:, :, :]
 72 |         bbox_deltas = bottom[1].data
 73 |         im_info = bottom[2].data[0, :]
 74 | 
 75 |         if DEBUG:
 76 |             print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
 77 |             print 'scale: {}'.format(im_info[2])
 78 | 
 79 |         # 1. Generate proposals from bbox deltas and shifted anchors
 80 |         height, width = scores.shape[-2:]
 81 | 
 82 |         if DEBUG:
 83 |             print 'score map size: {}'.format(scores.shape)
 84 | 
 85 |         # Enumerate all shifts
 86 |         shift_x = np.arange(0, width) * self._feat_stride
 87 |         shift_y = np.arange(0, height) * self._feat_stride
 88 |         shift_x, shift_y = np.meshgrid(shift_x, shift_y)
 89 |         shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
 90 |                             shift_x.ravel(), shift_y.ravel())).transpose()
 91 | 
 92 |         # Enumerate all shifted anchors:
 93 |         #
 94 |         # add A anchors (1, A, 4) to
 95 |         # cell K shifts (K, 1, 4) to get
 96 |         # shift anchors (K, A, 4)
 97 |         # reshape to (K*A, 4) shifted anchors
 98 |         A = self._num_anchors
 99 |         K = shifts.shape[0]
100 |         anchors = self._anchors.reshape((1, A, 4)) + \
101 |                   shifts.reshape((1, K, 4)).transpose((1, 0, 2))
102 |         anchors = anchors.reshape((K * A, 4))
103 | 
104 |         # Transpose and reshape predicted bbox transformations to get them
105 |         # into the same order as the anchors:
106 |         #
107 |         # bbox deltas will be (1, 4 * A, H, W) format
108 |         # transpose to (1, H, W, 4 * A)
109 |         # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
110 |         # in slowest to fastest order
111 |         bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
112 | 
113 |         # Same story for the scores:
114 |         #
115 |         # scores are (1, A, H, W) format
116 |         # transpose to (1, H, W, A)
117 |         # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
118 |         scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
119 | 
120 |         # Convert anchors into proposals via bbox transformations
121 |         proposals = bbox_transform_inv(anchors, bbox_deltas)
122 | 
123 |         # 2. clip predicted boxes to image
124 |         proposals = clip_boxes(proposals, im_info[:2])
125 | 
126 |         # 3. remove predicted boxes with either height or width < threshold
127 |         # (NOTE: convert min_size to input image scale stored in im_info[2])
128 |         keep = _filter_boxes(proposals, min_size * im_info[2])
129 |         proposals = proposals[keep, :]
130 |         scores = scores[keep]
131 | 
132 |         # 4. sort all (proposal, score) pairs by score from highest to lowest
133 |         # 5. take top pre_nms_topN (e.g. 6000)
134 |         order = scores.ravel().argsort()[::-1]
135 |         if pre_nms_topN > 0:
136 |             order = order[:pre_nms_topN]
137 |         proposals = proposals[order, :]
138 |         scores = scores[order]
139 | 
140 |         # 6. apply nms (e.g. threshold = 0.7)
141 |         # 7. take after_nms_topN (e.g. 300)
142 |         # 8. return the top proposals (-> RoIs top)
143 |         keep = nms(np.hstack((proposals, scores)), nms_thresh)
144 |         if post_nms_topN > 0:
145 |             keep = keep[:post_nms_topN]
146 |         proposals = proposals[keep, :]
147 |         scores = scores[keep]
148 |         print scores.shape
149 | 
150 |         # Output rois blob
151 |         # Our RPN implementation only supports a single input image, so all
152 |         # batch inds are 0
153 |         batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
154 |         blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
155 |         top[0].reshape(*(blob.shape))
156 |         top[0].data[...] = blob
157 | 
158 |         # [Optional] output scores blob
159 |         if len(top) > 1:
160 |             top[1].reshape(*(scores.shape))
161 |             top[1].data[...] = scores
162 | 
163 |     def backward(self, top, propagate_down, bottom):
164 |         """This layer does not propagate gradients."""
165 |         pass
166 | 
167 |     def reshape(self, bottom, top):
168 |         """Reshaping happens during the call to forward."""
169 |         pass
170 | 
171 | def _filter_boxes(boxes, min_size):
172 |     """Remove all boxes with any side smaller than min_size."""
173 |     ws = boxes[:, 2] - boxes[:, 0] + 1
174 |     hs = boxes[:, 3] - boxes[:, 1] + 1
175 |     keep = np.where((ws >= min_size) & (hs >= min_size))[0]
176 |     return keep
177 | 


--------------------------------------------------------------------------------
/lib/rpn_msr/proposal_layer_tf.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | import yaml
 10 | from fast_rcnn.config import cfg
 11 | from generate_anchors import generate_anchors
 12 | from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes
 13 | from fast_rcnn.nms_wrapper import nms
 14 | import pdb
 15 | 
 16 | 
 17 | DEBUG = False
 18 | """
 19 | Outputs object detection proposals by applying estimated bounding-box
 20 | transformations to a set of regular boxes (called "anchors").
 21 | """
 22 | def proposal_layer(rpn_cls_prob_reshape,rpn_bbox_pred,im_info,cfg_key,_feat_stride = [16,],anchor_scales = [8, 16, 32]):
 23 |     # Algorithm:
 24 |     #
 25 |     # for each (H, W) location i
 26 |     #   generate A anchor boxes centered on cell i
 27 |     #   apply predicted bbox deltas at cell i to each of the A anchors
 28 |     # clip predicted boxes to image
 29 |     # remove predicted boxes with either height or width < threshold
 30 |     # sort all (proposal, score) pairs by score from highest to lowest
 31 |     # take top pre_nms_topN proposals before NMS
 32 |     # apply NMS with threshold 0.7 to remaining proposals
 33 |     # take after_nms_topN proposals after NMS
 34 |     # return the top proposals (-> RoIs top, scores top)
 35 |     #layer_params = yaml.load(self.param_str_)
 36 |     _anchors = generate_anchors(scales=np.array(anchor_scales))
 37 |     _num_anchors = _anchors.shape[0]
 38 |     rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape,[0,3,1,2])
 39 |     rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,1,2])
 40 |     #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1])
 41 |     #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1])
 42 |     im_info = im_info[0]
 43 | 
 44 |     assert rpn_cls_prob_reshape.shape[0] == 1, \
 45 |         'Only single item batches are supported'
 46 |     # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
 47 |     #cfg_key = 'TEST'
 48 |     pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
 49 |     post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
 50 |     nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
 51 |     min_size      = cfg[cfg_key].RPN_MIN_SIZE
 52 | 
 53 |     # the first set of _num_anchors channels are bg probs
 54 |     # the second set are the fg probs, which we want
 55 |     scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]
 56 |     bbox_deltas = rpn_bbox_pred
 57 |     #im_info = bottom[2].data[0, :]
 58 | 
 59 |     if DEBUG:
 60 |         print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
 61 |         print 'scale: {}'.format(im_info[2])
 62 | 
 63 |     # 1. Generate proposals from bbox deltas and shifted anchors
 64 |     height, width = scores.shape[-2:]
 65 | 
 66 |     if DEBUG:
 67 |         print 'score map size: {}'.format(scores.shape)
 68 | 
 69 |     # Enumerate all shifts
 70 |     shift_x = np.arange(0, width) * _feat_stride
 71 |     shift_y = np.arange(0, height) * _feat_stride
 72 |     shift_x, shift_y = np.meshgrid(shift_x, shift_y)
 73 |     shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
 74 |                         shift_x.ravel(), shift_y.ravel())).transpose()
 75 | 
 76 |     # Enumerate all shifted anchors:
 77 |     #
 78 |     # add A anchors (1, A, 4) to
 79 |     # cell K shifts (K, 1, 4) to get
 80 |     # shift anchors (K, A, 4)
 81 |     # reshape to (K*A, 4) shifted anchors
 82 |     A = _num_anchors
 83 |     K = shifts.shape[0]
 84 |     anchors = _anchors.reshape((1, A, 4)) + \
 85 |               shifts.reshape((1, K, 4)).transpose((1, 0, 2))
 86 |     anchors = anchors.reshape((K * A, 4))
 87 | 
 88 |     # Transpose and reshape predicted bbox transformations to get them
 89 |     # into the same order as the anchors:
 90 |     #
 91 |     # bbox deltas will be (1, 4 * A, H, W) format
 92 |     # transpose to (1, H, W, 4 * A)
 93 |     # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
 94 |     # in slowest to fastest order
 95 |     bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
 96 | 
 97 |     # Same story for the scores:
 98 |     #
 99 |     # scores are (1, A, H, W) format
100 |     # transpose to (1, H, W, A)
101 |     # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
102 |     scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
103 | 
104 |     # Convert anchors into proposals via bbox transformations
105 |     proposals = bbox_transform_inv(anchors, bbox_deltas)
106 | 
107 |     # 2. clip predicted boxes to image
108 |     proposals = clip_boxes(proposals, im_info[:2])
109 | 
110 |     # 3. remove predicted boxes with either height or width < threshold
111 |     # (NOTE: convert min_size to input image scale stored in im_info[2])
112 |     keep = _filter_boxes(proposals, min_size * im_info[2])
113 |     proposals = proposals[keep, :]
114 |     scores = scores[keep]
115 | 
116 |     # 4. sort all (proposal, score) pairs by score from highest to lowest
117 |     # 5. take top pre_nms_topN (e.g. 6000)
118 |     order = scores.ravel().argsort()[::-1]
119 |     if pre_nms_topN > 0:
120 |         order = order[:pre_nms_topN]
121 |     proposals = proposals[order, :]
122 |     scores = scores[order]
123 | 
124 |     # 6. apply nms (e.g. threshold = 0.7)
125 |     # 7. take after_nms_topN (e.g. 300)
126 |     # 8. return the top proposals (-> RoIs top)
127 |     keep = nms(np.hstack((proposals, scores)), nms_thresh)
128 |     if post_nms_topN > 0:
129 |         keep = keep[:post_nms_topN]
130 |     proposals = proposals[keep, :]
131 |     scores = scores[keep]
132 |     # Output rois blob
133 |     # Our RPN implementation only supports a single input image, so all
134 |     # batch inds are 0
135 |     batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
136 |     blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
137 |     return blob
138 |     #top[0].reshape(*(blob.shape))
139 |     #top[0].data[...] = blob
140 | 
141 |     # [Optional] output scores blob
142 |     #if len(top) > 1:
143 |     #    top[1].reshape(*(scores.shape))
144 |     #    top[1].data[...] = scores
145 | 
146 | def _filter_boxes(boxes, min_size):
147 |     """Remove all boxes with any side smaller than min_size."""
148 |     ws = boxes[:, 2] - boxes[:, 0] + 1
149 |     hs = boxes[:, 3] - boxes[:, 1] + 1
150 |     keep = np.where((ws >= min_size) & (hs >= min_size))[0]
151 |     return keep
152 | 


--------------------------------------------------------------------------------
/lib/rpn_msr/proposal_target_layer_tf.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import yaml
  9 | import numpy as np
 10 | import numpy.random as npr
 11 | from fast_rcnn.config import cfg
 12 | from fast_rcnn.bbox_transform import bbox_transform
 13 | from utils.cython_bbox import bbox_overlaps
 14 | import pdb
 15 | 
 16 | DEBUG = False
 17 | 
 18 | def proposal_target_layer(rpn_rois, gt_boxes,_num_classes):
 19 |     """
 20 |     Assign object detection proposals to ground-truth targets. Produces proposal
 21 |     classification labels and bounding-box regression targets.
 22 |     """
 23 | 
 24 |     # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
 25 |     # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
 26 |     all_rois = rpn_rois
 27 |     # TODO(rbg): it's annoying that sometimes I have extra info before
 28 |     # and other times after box coordinates -- normalize to one format
 29 | 
 30 |     # Include ground-truth boxes in the set of candidate rois
 31 |     zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
 32 |     all_rois = np.vstack(
 33 |         (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
 34 |     )
 35 | 
 36 |     # Sanity check: single batch only
 37 |     assert np.all(all_rois[:, 0] == 0), \
 38 |             'Only single item batches are supported'
 39 | 
 40 |     num_images = 1
 41 |     rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
 42 |     fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
 43 | 
 44 |     # Sample rois with classification labels and bounding box regression
 45 |     # targets
 46 |     labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
 47 |         all_rois, gt_boxes, fg_rois_per_image,
 48 |         rois_per_image, _num_classes)
 49 | 
 50 |     if DEBUG:
 51 |         print 'num fg: {}'.format((labels > 0).sum())
 52 |         print 'num bg: {}'.format((labels == 0).sum())
 53 |         _count += 1
 54 |         _fg_num += (labels > 0).sum()
 55 |         _bg_num += (labels == 0).sum()
 56 |         print 'num fg avg: {}'.format(_fg_num / _count)
 57 |         print 'num bg avg: {}'.format(_bg_num / _count)
 58 |         print 'ratio: {:.3f}'.format(float(_fg_num) / float(_bg_num))
 59 | 
 60 |     rois = rois.reshape(-1,5)
 61 |     labels = labels.reshape(-1,1)
 62 |     bbox_targets = bbox_targets.reshape(-1,_num_classes*4)
 63 |     bbox_inside_weights = bbox_inside_weights.reshape(-1,_num_classes*4)
 64 | 
 65 |     bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
 66 | 
 67 |     return rois,labels,bbox_targets,bbox_inside_weights,bbox_outside_weights
 68 | 
 69 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
 70 |     """Bounding-box regression targets (bbox_target_data) are stored in a
 71 |     compact form N x (class, tx, ty, tw, th)
 72 | 
 73 |     This function expands those targets into the 4-of-4*K representation used
 74 |     by the network (i.e. only one class has non-zero targets).
 75 | 
 76 |     Returns:
 77 |         bbox_target (ndarray): N x 4K blob of regression targets
 78 |         bbox_inside_weights (ndarray): N x 4K blob of loss weights
 79 |     """
 80 | 
 81 |     clss = np.array(bbox_target_data[:, 0], dtype=np.uint16, copy=True)
 82 |     bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
 83 |     bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
 84 |     inds = np.where(clss > 0)[0]
 85 |     for ind in inds:
 86 |         cls = clss[ind]
 87 |         start = 4 * cls
 88 |         end = start + 4
 89 |         bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
 90 |         bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
 91 |     return bbox_targets, bbox_inside_weights
 92 | 
 93 | 
 94 | def _compute_targets(ex_rois, gt_rois, labels):
 95 |     """Compute bounding-box regression targets for an image."""
 96 | 
 97 |     assert ex_rois.shape[0] == gt_rois.shape[0]
 98 |     assert ex_rois.shape[1] == 4
 99 |     assert gt_rois.shape[1] == 4
100 | 
101 |     targets = bbox_transform(ex_rois, gt_rois)
102 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
103 |         # Optionally normalize targets by a precomputed mean and stdev
104 |         targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
105 |                 / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
106 |     return np.hstack(
107 |             (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
108 | 
109 | def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
110 |     """Generate a random sample of RoIs comprising foreground and background
111 |     examples.
112 |     """
113 |     # overlaps: (rois x gt_boxes)
114 |     overlaps = bbox_overlaps(
115 |         np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
116 |         np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
117 |     gt_assignment = overlaps.argmax(axis=1)
118 |     max_overlaps = overlaps.max(axis=1)
119 |     labels = gt_boxes[gt_assignment, 4]
120 | 
121 |     # Select foreground RoIs as those with >= FG_THRESH overlap
122 |     fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
123 |     # Guard against the case when an image has fewer than fg_rois_per_image
124 |     # foreground RoIs
125 |     fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
126 |     # Sample foreground regions without replacement
127 |     if fg_inds.size > 0:
128 |         fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
129 | 
130 |     # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
131 |     bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
132 |                        (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
133 |     # Compute number of background RoIs to take from this image (guarding
134 |     # against there being fewer than desired)
135 |     bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
136 |     bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
137 |     # Sample background regions without replacement
138 |     if bg_inds.size > 0:
139 |         bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
140 | 
141 |     # The indices that we're selecting (both fg and bg)
142 |     keep_inds = np.append(fg_inds, bg_inds)
143 |     # Select sampled values from various arrays:
144 |     labels = labels[keep_inds]
145 |     # Clamp labels for the background RoIs to 0
146 |     labels[fg_rois_per_this_image:] = 0
147 |     rois = all_rois[keep_inds]
148 | 
149 |     bbox_target_data = _compute_targets(
150 |         rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
151 | 
152 |     bbox_targets, bbox_inside_weights = \
153 |         _get_bbox_regression_labels(bbox_target_data, num_classes)
154 | 
155 |     return labels, rois, bbox_targets, bbox_inside_weights
156 | 


--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | import numpy as np
 11 | from distutils.core import setup
 12 | from distutils.extension import Extension
 13 | from Cython.Distutils import build_ext
 14 | 
 15 | def find_in_path(name, path):
 16 |     "Find a file in a search path"
 17 |     #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 18 |     for dir in path.split(os.pathsep):
 19 |         binpath = pjoin(dir, name)
 20 |         if os.path.exists(binpath):
 21 |             return os.path.abspath(binpath)
 22 |     return None
 23 | 
 24 | def locate_cuda():
 25 |     """Locate the CUDA environment on the system
 26 | 
 27 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 28 |     and values giving the absolute path to each directory.
 29 | 
 30 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 31 |     is based on finding 'nvcc' in the PATH.
 32 |     """
 33 | 
 34 |     # first check if the CUDAHOME env variable is in use
 35 |     if 'CUDAHOME' in os.environ:
 36 |         home = os.environ['CUDAHOME']
 37 |         nvcc = pjoin(home, 'bin', 'nvcc')
 38 |     else:
 39 |         # otherwise, search the PATH for NVCC
 40 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 41 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 42 |         if nvcc is None:
 43 |           return None;
 44 |         home = os.path.dirname(os.path.dirname(nvcc))
 45 | 
 46 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 47 |                   'include': pjoin(home, 'include'),
 48 |                   'lib64': pjoin(home, 'lib64')}
 49 |     for k, v in cudaconfig.iteritems():
 50 |         if not os.path.exists(v):
 51 |             return None;
 52 | 
 53 |     return cudaconfig
 54 | 
 55 | CUDA = locate_cuda()
 56 | 
 57 | # Obtain the numpy include directory.  This logic works across numpy versions.
 58 | try:
 59 |     numpy_include = np.get_include()
 60 | except AttributeError:
 61 |     numpy_include = np.get_numpy_include()
 62 | 
 63 | def customize_compiler_for_nvcc(self):
 64 |     """inject deep into distutils to customize how the dispatch
 65 |     to gcc/nvcc works.
 66 | 
 67 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 68 |     injected in, and still have the right customizations (i.e.
 69 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 70 |     the OO route, I have this. Note, it's kindof like a wierd functional
 71 |     subclassing going on."""
 72 | 
 73 |     # tell the compiler it can processes .cu
 74 |     self.src_extensions.append('.cu')
 75 | 
 76 |     # save references to the default compiler_so and _comple methods
 77 |     default_compiler_so = self.compiler_so
 78 |     super = self._compile
 79 | 
 80 |     # now redefine the _compile method. This gets executed for each
 81 |     # object but distutils doesn't have the ability to change compilers
 82 |     # based on source extension: we add it.
 83 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 84 |         print extra_postargs
 85 |         if os.path.splitext(src)[1] == '.cu':
 86 |             # use the cuda for .cu files
 87 |             self.set_executable('compiler_so', CUDA['nvcc'])
 88 |             # use only a subset of the extra_postargs, which are 1-1 translated
 89 |             # from the extra_compile_args in the Extension class
 90 |             postargs = extra_postargs['nvcc']
 91 |         else:
 92 |             postargs = extra_postargs['gcc']
 93 | 
 94 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 95 |         # reset the default compiler_so, which we might have changed for cuda
 96 |         self.compiler_so = default_compiler_so
 97 | 
 98 |     # inject our redefined _compile method into the class
 99 |     self._compile = _compile
100 | 
101 | 
102 | # run the customize_compiler
103 | class custom_build_ext(build_ext):
104 |     def build_extensions(self):
105 |         customize_compiler_for_nvcc(self.compiler)
106 |         build_ext.build_extensions(self)
107 | 
108 | ext_modules = [
109 |     Extension(
110 |         "utils.cython_bbox",
111 |         ["utils/bbox.pyx"],
112 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
113 | 	include_dirs = [numpy_include]
114 | 	),
115 |     Extension(
116 |         "utils.cython_nms",
117 |         ["utils/nms.pyx"],
118 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
119 |         include_dirs = [numpy_include]
120 |     ),
121 |     Extension(
122 |         "nms.cpu_nms",
123 |         ["nms/cpu_nms.pyx"],
124 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
125 |         include_dirs = [numpy_include]
126 |     )
127 | ]
128 | 
129 | if CUDA:
130 |     ext_modules.append(
131 |         Extension('nms.gpu_nms',
132 |             ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
133 |             library_dirs=[CUDA['lib64']],
134 |             libraries=['cudart'],
135 |             language='c++',
136 |             runtime_library_dirs=[CUDA['lib64']],
137 |             # this syntax is specific to this build system
138 |             # we're only going to use certain compiler args with nvcc and not with gcc
139 |             # the implementation of this trick is in customize_compiler() below
140 |             extra_compile_args={'gcc': ["-Wno-unused-function"],
141 |                                 'nvcc': ['-arch=sm_35',
142 |                                          '--ptxas-options=-v',
143 |                                          '-c',
144 |                                          '--compiler-options',
145 |                                          "'-fPIC'"]},
146 |             include_dirs = [numpy_include, CUDA['include']]
147 |         )
148 |     )
149 | 
150 | setup(
151 |     name='fast_rcnn',
152 |     ext_modules=ext_modules,
153 |     # inject our custom trigger
154 |     cmdclass={'build_ext': custom_build_ext},
155 | )
156 | 


--------------------------------------------------------------------------------
/lib/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/utils/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 


--------------------------------------------------------------------------------
/lib/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | 
10 | import numpy as np
11 | import cv2
12 | 
13 | def im_list_to_blob(ims):
14 |     """Convert a list of images into a network input.
15 | 
16 |     Assumes images are already prepared (means subtracted, BGR order, ...).
17 |     """
18 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
19 |     num_images = len(ims)
20 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
21 |                     dtype=np.float32)
22 |     for i in xrange(num_images):
23 |         im = ims[i]
24 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
25 | 
26 |     return blob
27 | 
28 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
29 |     """Mean subtract and scale an image for use in a blob."""
30 |     im = im.astype(np.float32, copy=False)
31 |     im -= pixel_means
32 |     im_shape = im.shape
33 |     im_size_min = np.min(im_shape[0:2])
34 |     im_size_max = np.max(im_shape[0:2])
35 |     im_scale = float(target_size) / float(im_size_min)
36 |     # Prevent the biggest axis from being more than MAX_SIZE
37 |     if np.round(im_scale * im_size_max) > max_size:
38 |         im_scale = float(max_size) / float(im_size_max)
39 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
40 |                     interpolation=cv2.INTER_LINEAR)
41 | 
42 |     return im, im_scale
43 | 


--------------------------------------------------------------------------------
/lib/utils/boxes_grid.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Subcategory CNN
 3 | # Copyright (c) 2015 CVGL Stanford
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Yu Xiang
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | import math
10 | from fast_rcnn.config import cfg
11 | 
12 | def get_boxes_grid(image_height, image_width):
13 |     """
14 |     Return the boxes on image grid.
15 |     """
16 | 
17 |     # height and width of the heatmap
18 |     if cfg.NET_NAME == 'CaffeNet':
19 |         height = np.floor((image_height * max(cfg.TRAIN.SCALES) - 1) / 4.0 + 1)
20 |         height = np.floor((height - 1) / 2.0 + 1 + 0.5)
21 |         height = np.floor((height - 1) / 2.0 + 1 + 0.5)
22 | 
23 |         width = np.floor((image_width * max(cfg.TRAIN.SCALES) - 1) / 4.0 + 1)
24 |         width = np.floor((width - 1) / 2.0 + 1 + 0.5)
25 |         width = np.floor((width - 1) / 2.0 + 1 + 0.5)
26 |     elif cfg.NET_NAME == 'VGGnet':
27 |         height = np.floor(image_height * max(cfg.TRAIN.SCALES) / 2.0 + 0.5)
28 |         height = np.floor(height / 2.0 + 0.5)
29 |         height = np.floor(height / 2.0 + 0.5)
30 |         height = np.floor(height / 2.0 + 0.5)
31 | 
32 |         width = np.floor(image_width * max(cfg.TRAIN.SCALES) / 2.0 + 0.5)
33 |         width = np.floor(width / 2.0 + 0.5)
34 |         width = np.floor(width / 2.0 + 0.5)
35 |         width = np.floor(width / 2.0 + 0.5)
36 |     else:
37 |         assert (1), 'The network architecture is not supported in utils.get_boxes_grid!'
38 | 
39 |     # compute the grid box centers
40 |     h = np.arange(height)
41 |     w = np.arange(width)
42 |     y, x = np.meshgrid(h, w, indexing='ij') 
43 |     centers = np.dstack((x, y))
44 |     centers = np.reshape(centers, (-1, 2))
45 |     num = centers.shape[0]
46 | 
47 |     # compute width and height of grid box
48 |     area = cfg.TRAIN.KERNEL_SIZE * cfg.TRAIN.KERNEL_SIZE
49 |     aspect = cfg.TRAIN.ASPECTS  # height / width
50 |     num_aspect = len(aspect)
51 |     widths = np.zeros((1, num_aspect), dtype=np.float32)
52 |     heights = np.zeros((1, num_aspect), dtype=np.float32)
53 |     for i in xrange(num_aspect):
54 |         widths[0,i] = math.sqrt(area / aspect[i])
55 |         heights[0,i] = widths[0,i] * aspect[i]
56 | 
57 |     # construct grid boxes
58 |     centers = np.repeat(centers, num_aspect, axis=0)
59 |     widths = np.tile(widths, num).transpose()
60 |     heights = np.tile(heights, num).transpose()
61 | 
62 |     x1 = np.reshape(centers[:,0], (-1, 1)) - widths * 0.5
63 |     x2 = np.reshape(centers[:,0], (-1, 1)) + widths * 0.5
64 |     y1 = np.reshape(centers[:,1], (-1, 1)) - heights * 0.5
65 |     y2 = np.reshape(centers[:,1], (-1, 1)) + heights * 0.5
66 |     
67 |     boxes_grid = np.hstack((x1, y1, x2, y2)) / cfg.TRAIN.SPATIAL_SCALE
68 | 
69 |     return boxes_grid, centers[:,0], centers[:,1]
70 | 


--------------------------------------------------------------------------------
/lib/utils/nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def nms(dets, thresh):
11 |     x1 = dets[:, 0]
12 |     y1 = dets[:, 1]
13 |     x2 = dets[:, 2]
14 |     y2 = dets[:, 3]
15 |     scores = dets[:, 4]
16 | 
17 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
18 |     order = scores.argsort()[::-1]
19 | 
20 |     keep = []
21 |     while order.size > 0:
22 |         i = order[0]
23 |         keep.append(i)
24 |         xx1 = np.maximum(x1[i], x1[order[1:]])
25 |         yy1 = np.maximum(y1[i], y1[order[1:]])
26 |         xx2 = np.minimum(x2[i], x2[order[1:]])
27 |         yy2 = np.minimum(y2[i], y2[order[1:]])
28 | 
29 |         w = np.maximum(0.0, xx2 - xx1 + 1)
30 |         h = np.maximum(0.0, yy2 - yy1 + 1)
31 |         inter = w * h
32 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
33 | 
34 |         inds = np.where(ovr <= thresh)[0]
35 |         order = order[inds + 1]
36 | 
37 |     return keep
38 | 


--------------------------------------------------------------------------------
/lib/utils/nms.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | cimport numpy as np
 10 | 
 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
 12 |     return a if a >= b else b
 13 | 
 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
 15 |     return a if a <= b else b
 16 | 
 17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
 19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
 20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
 21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
 22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
 23 | 
 24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
 26 | 
 27 |     cdef int ndets = dets.shape[0]
 28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
 29 |             np.zeros((ndets), dtype=np.int)
 30 | 
 31 |     # nominal indices
 32 |     cdef int _i, _j
 33 |     # sorted indices
 34 |     cdef int i, j
 35 |     # temp variables for box i's (the box currently under consideration)
 36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
 37 |     # variables for computing overlap with box j (lower scoring box)
 38 |     cdef np.float32_t xx1, yy1, xx2, yy2
 39 |     cdef np.float32_t w, h
 40 |     cdef np.float32_t inter, ovr
 41 | 
 42 |     keep = []
 43 |     for _i in range(ndets):
 44 |         i = order[_i]
 45 |         if suppressed[i] == 1:
 46 |             continue
 47 |         keep.append(i)
 48 |         ix1 = x1[i]
 49 |         iy1 = y1[i]
 50 |         ix2 = x2[i]
 51 |         iy2 = y2[i]
 52 |         iarea = areas[i]
 53 |         for _j in range(_i + 1, ndets):
 54 |             j = order[_j]
 55 |             if suppressed[j] == 1:
 56 |                 continue
 57 |             xx1 = max(ix1, x1[j])
 58 |             yy1 = max(iy1, y1[j])
 59 |             xx2 = min(ix2, x2[j])
 60 |             yy2 = min(iy2, y2[j])
 61 |             w = max(0.0, xx2 - xx1 + 1)
 62 |             h = max(0.0, yy2 - yy1 + 1)
 63 |             inter = w * h
 64 |             ovr = inter / (iarea + areas[j] - inter)
 65 |             if ovr >= thresh:
 66 |                 suppressed[j] = 1
 67 | 
 68 |     return keep
 69 | 
 70 | def nms_new(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 71 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
 72 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
 73 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
 74 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
 75 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
 76 | 
 77 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 78 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
 79 | 
 80 |     cdef int ndets = dets.shape[0]
 81 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
 82 |             np.zeros((ndets), dtype=np.int)
 83 | 
 84 |     # nominal indices
 85 |     cdef int _i, _j
 86 |     # sorted indices
 87 |     cdef int i, j
 88 |     # temp variables for box i's (the box currently under consideration)
 89 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
 90 |     # variables for computing overlap with box j (lower scoring box)
 91 |     cdef np.float32_t xx1, yy1, xx2, yy2
 92 |     cdef np.float32_t w, h
 93 |     cdef np.float32_t inter, ovr
 94 | 
 95 |     keep = []
 96 |     for _i in range(ndets):
 97 |         i = order[_i]
 98 |         if suppressed[i] == 1:
 99 |             continue
100 |         keep.append(i)
101 |         ix1 = x1[i]
102 |         iy1 = y1[i]
103 |         ix2 = x2[i]
104 |         iy2 = y2[i]
105 |         iarea = areas[i]
106 |         for _j in range(_i + 1, ndets):
107 |             j = order[_j]
108 |             if suppressed[j] == 1:
109 |                 continue
110 |             xx1 = max(ix1, x1[j])
111 |             yy1 = max(iy1, y1[j])
112 |             xx2 = min(ix2, x2[j])
113 |             yy2 = min(iy2, y2[j])
114 |             w = max(0.0, xx2 - xx1 + 1)
115 |             h = max(0.0, yy2 - yy1 + 1)
116 |             inter = w * h
117 |             ovr = inter / (iarea + areas[j] - inter)
118 |             ovr1 = inter / iarea
119 |             ovr2 = inter / areas[j]
120 |             if ovr >= thresh or ovr1 > 0.95 or ovr2 > 0.95:
121 |                 suppressed[j] = 1
122 | 
123 |     return keep
124 | 


--------------------------------------------------------------------------------
/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Set up paths for Fast R-CNN."""
 9 | 
10 | import os.path as osp
11 | import sys
12 | 
13 | def add_path(path):
14 |     if path not in sys.path:
15 |         sys.path.insert(0, path)
16 | 
17 | this_dir = osp.dirname(__file__)
18 | 
19 | # Add caffe to PYTHONPATH
20 | caffe_path = osp.join(this_dir, '..', 'caffe-fast-rcnn', 'python')
21 | add_path(caffe_path)
22 | 
23 | # Add lib to PYTHONPATH
24 | lib_path = osp.join(this_dir, '..', 'lib')
25 | add_path(lib_path)
26 | 
27 | 
28 | lib_path = osp.join(this_dir,'mftracker')
29 | add_path(lib_path)
30 | 


--------------------------------------------------------------------------------
/tools/demo.py:
--------------------------------------------------------------------------------
  1 | import _init_paths
  2 | import tensorflow as tf
  3 | from fast_rcnn.config import cfg
  4 | from fast_rcnn.test import im_detect
  5 | from fast_rcnn.nms_wrapper import nms
  6 | from utils.timer import Timer
  7 | import matplotlib.pyplot as plt
  8 | import numpy as np
  9 | import os, sys, cv2
 10 | import argparse
 11 | from networks.factory import get_network
 12 | 
 13 | 
 14 | CLASSES = ('__background__',
 15 |            'aeroplane', 'bicycle', 'bird', 'boat',
 16 |            'bottle', 'bus', 'car', 'cat', 'chair',
 17 |            'cow', 'diningtable', 'dog', 'horse',
 18 |            'motorbike', 'person', 'pottedplant',
 19 |            'sheep', 'sofa', 'train', 'tvmonitor')
 20 | 
 21 | 
 22 | #CLASSES = ('__background__','person','bike','motorbike','car','bus')
 23 | 
 24 | def vis_detections(im, class_name, dets,ax, thresh=0.5):
 25 |     """Draw detected bounding boxes."""
 26 |     inds = np.where(dets[:, -1] >= thresh)[0]
 27 |     if len(inds) == 0:
 28 |         return
 29 | 
 30 |     for i in inds:
 31 |         bbox = dets[i, :4]
 32 |         score = dets[i, -1]
 33 | 
 34 |         ax.add_patch(
 35 |             plt.Rectangle((bbox[0], bbox[1]),
 36 |                           bbox[2] - bbox[0],
 37 |                           bbox[3] - bbox[1], fill=False,
 38 |                           edgecolor='red', linewidth=3.5)
 39 |             )
 40 |         ax.text(bbox[0], bbox[1] - 2,
 41 |                 '{:s} {:.3f}'.format(class_name, score),
 42 |                 bbox=dict(facecolor='blue', alpha=0.5),
 43 |                 fontsize=14, color='white')
 44 | 
 45 |     ax.set_title(('{} detections with '
 46 |                   'p({} | box) >= {:.1f}').format(class_name, class_name,
 47 |                                                   thresh),
 48 |                   fontsize=14)
 49 |     plt.axis('off')
 50 |     plt.tight_layout()
 51 |     plt.draw()
 52 | 
 53 | 
 54 | def demo(sess, net, image_name):
 55 |     """Detect object classes in an image using pre-computed object proposals."""
 56 | 
 57 |     # Load the demo image
 58 |     im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
 59 |     #im_file = os.path.join('/home/corgi/Lab/label/pos_frame/ACCV/training/000001/',image_name)
 60 |     im = cv2.imread(im_file)
 61 | 
 62 |     # Detect all object classes and regress object bounds
 63 |     timer = Timer()
 64 |     timer.tic()
 65 |     scores, boxes = im_detect(sess, net, im)
 66 |     timer.toc()
 67 |     print ('Detection took {:.3f}s for '
 68 |            '{:d} object proposals').format(timer.total_time, boxes.shape[0])
 69 | 
 70 |     # Visualize detections for each class
 71 |     im = im[:, :, (2, 1, 0)]
 72 |     fig, ax = plt.subplots(figsize=(12, 12))
 73 |     ax.imshow(im, aspect='equal')
 74 | 
 75 |     CONF_THRESH = 0.8
 76 |     NMS_THRESH = 0.3
 77 |     for cls_ind, cls in enumerate(CLASSES[1:]):
 78 |         cls_ind += 1 # because we skipped background
 79 |         cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
 80 |         cls_scores = scores[:, cls_ind]
 81 |         dets = np.hstack((cls_boxes,
 82 |                           cls_scores[:, np.newaxis])).astype(np.float32)
 83 |         keep = nms(dets, NMS_THRESH)
 84 |         dets = dets[keep, :]
 85 |         vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
 86 | 
 87 | def parse_args():
 88 |     """Parse input arguments."""
 89 |     parser = argparse.ArgumentParser(description='Faster R-CNN demo')
 90 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
 91 |                         default=0, type=int)
 92 |     parser.add_argument('--cpu', dest='cpu_mode',
 93 |                         help='Use CPU mode (overrides --gpu)',
 94 |                         action='store_true')
 95 |     parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
 96 |                         default='VGGnet_test')
 97 |     parser.add_argument('--model', dest='model', help='Model path',
 98 |                         default=' ')
 99 | 
100 |     args = parser.parse_args()
101 | 
102 |     return args
103 | if __name__ == '__main__':
104 |     cfg.TEST.HAS_RPN = True  # Use RPN for proposals
105 | 
106 |     args = parse_args()
107 | 
108 |     if args.model == ' ':
109 |         raise IOError(('Error: Model not found.\n'))
110 |         
111 |     # init session
112 |     sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
113 |     # load network
114 |     net = get_network(args.demo_net)
115 |     # load model
116 |     saver = tf.train.Saver(write_version=tf.train.SaverDef.V1)
117 |     saver.restore(sess, args.model)
118 |    
119 |     #sess.run(tf.initialize_all_variables())
120 | 
121 |     print '\n\nLoaded network {:s}'.format(args.model)
122 | 
123 |     # Warmup on a dummy image
124 |     im = 128 * np.ones((300, 300, 3), dtype=np.uint8)
125 |     for i in xrange(2):
126 |         _, _= im_detect(sess, net, im)
127 | 
128 |     im_names = ['000456.jpg', '000542.jpg', '001150.jpg',
129 |                 '001763.jpg', '004545.jpg']
130 | 
131 | 
132 |     for im_name in im_names:
133 |         print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
134 |         print 'Demo for data/demo/{}'.format(im_name)
135 |         demo(sess, net, im_name)
136 | 
137 |     plt.show()
138 | 
139 | 


--------------------------------------------------------------------------------
/tools/test_net.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Test a Fast R-CNN network on an image database."""
11 | 
12 | import _init_paths
13 | from fast_rcnn.test import test_net
14 | from fast_rcnn.config import cfg, cfg_from_file
15 | from datasets.factory import get_imdb
16 | from networks.factory import get_network
17 | import argparse
18 | import pprint
19 | import time, os, sys
20 | import tensorflow as tf
21 | 
22 | def parse_args():
23 |     """
24 |     Parse input arguments
25 |     """
26 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
27 |     parser.add_argument('--device', dest='device', help='device to use',
28 |                         default='cpu', type=str)
29 |     parser.add_argument('--device_id', dest='device_id', help='device id to use',
30 |                         default=0, type=int)
31 |     parser.add_argument('--def', dest='prototxt',
32 |                         help='prototxt file defining the network',
33 |                         default=None, type=str)
34 |     parser.add_argument('--weights', dest='model',
35 |                         help='model to test',
36 |                         default=None, type=str)
37 |     parser.add_argument('--cfg', dest='cfg_file',
38 |                         help='optional config file', default=None, type=str)
39 |     parser.add_argument('--wait', dest='wait',
40 |                         help='wait until net file exists',
41 |                         default=True, type=bool)
42 |     parser.add_argument('--imdb', dest='imdb_name',
43 |                         help='dataset to test',
44 |                         default='voc_2007_test', type=str)
45 |     parser.add_argument('--comp', dest='comp_mode', help='competition mode',
46 |                         action='store_true')
47 |     parser.add_argument('--network', dest='network_name',
48 |                         help='name of the network',
49 |                         default=None, type=str)
50 | 
51 |     if len(sys.argv) == 1:
52 |         parser.print_help()
53 |         sys.exit(1)
54 | 
55 |     args = parser.parse_args()
56 |     return args
57 | 
58 | if __name__ == '__main__':
59 |     args = parse_args()
60 | 
61 |     print('Called with args:')
62 |     print(args)
63 | 
64 |     if args.cfg_file is not None:
65 |         cfg_from_file(args.cfg_file)
66 | 
67 |     print('Using config:')
68 |     pprint.pprint(cfg)
69 | 
70 |     while not os.path.exists(args.model) and args.wait:
71 |         print('Waiting for {} to exist...'.format(args.model))
72 |         time.sleep(10)
73 | 
74 |     weights_filename = os.path.splitext(os.path.basename(args.model))[0]
75 | 
76 |     imdb = get_imdb(args.imdb_name)
77 |     imdb.competition_mode(args.comp_mode)
78 | 
79 |     device_name = '/{}:{:d}'.format(args.device,args.device_id)
80 |     print device_name
81 | 
82 |     network = get_network(args.network_name)
83 |     print 'Use network `{:s}` in training'.format(args.network_name)
84 | 
85 |     if args.device == 'gpu':
86 |         cfg.USE_GPU_NMS = True
87 |         cfg.GPU_ID = args.device_id
88 |     else:
89 |         cfg.USE_GPU_NMS = False
90 | 
91 |     # start a session
92 |     saver = tf.train.Saver()
93 |     sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
94 |     saver.restore(sess, args.model)
95 |     print ('Loading model weights from {:s}').format(args.model)
96 | 
97 |     test_net(sess, network, imdb, weights_filename)
98 | 


--------------------------------------------------------------------------------
/tools/train_net.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Train a Fast R-CNN network on a region of interest database."""
11 | 
12 | import _init_paths
13 | from fast_rcnn.train import get_training_roidb, train_net
14 | from fast_rcnn.config import cfg,cfg_from_file, cfg_from_list, get_output_dir
15 | from datasets.factory import get_imdb
16 | from networks.factory import get_network
17 | import argparse
18 | import pprint
19 | import numpy as np
20 | import sys
21 | import pdb
22 | 
23 | def parse_args():
24 |     """
25 |     Parse input arguments
26 |     """
27 |     parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
28 |     parser.add_argument('--device', dest='device', help='device to use',
29 |                         default='cpu', type=str)
30 |     parser.add_argument('--device_id', dest='device_id', help='device id to use',
31 |                         default=0, type=int)
32 |     parser.add_argument('--solver', dest='solver',
33 |                         help='solver prototxt',
34 |                         default=None, type=str)
35 |     parser.add_argument('--iters', dest='max_iters',
36 |                         help='number of iterations to train',
37 |                         default=70000, type=int)
38 |     parser.add_argument('--weights', dest='pretrained_model',
39 |                         help='initialize with pretrained model weights',
40 |                         default=None, type=str)
41 |     parser.add_argument('--cfg', dest='cfg_file',
42 |                         help='optional config file',
43 |                         default=None, type=str)
44 |     parser.add_argument('--imdb', dest='imdb_name',
45 |                         help='dataset to train on',
46 |                         default='kitti_train', type=str)
47 |     parser.add_argument('--rand', dest='randomize',
48 |                         help='randomize (do not use a fixed seed)',
49 |                         action='store_true')
50 |     parser.add_argument('--network', dest='network_name',
51 |                         help='name of the network',
52 |                         default=None, type=str)
53 |     parser.add_argument('--set', dest='set_cfgs',
54 |                         help='set config keys', default=None,
55 |                         nargs=argparse.REMAINDER)
56 | 
57 |     if len(sys.argv) == 1:
58 |         parser.print_help()
59 |         sys.exit(1)
60 | 
61 |     args = parser.parse_args()
62 |     return args
63 | 
64 | if __name__ == '__main__':
65 |     args = parse_args()
66 | 
67 |     print('Called with args:')
68 |     print(args)
69 | 
70 |     if args.cfg_file is not None:
71 |         cfg_from_file(args.cfg_file)
72 |     if args.set_cfgs is not None:
73 |         cfg_from_list(args.set_cfgs)
74 | 
75 |     print('Using config:')
76 |     pprint.pprint(cfg)
77 | 
78 |     if not args.randomize:
79 |         # fix the random seeds (numpy and caffe) for reproducibility
80 |         np.random.seed(cfg.RNG_SEED)
81 |     imdb = get_imdb(args.imdb_name)
82 |     print 'Loaded dataset `{:s}` for training'.format(imdb.name)
83 |     roidb = get_training_roidb(imdb)
84 | 
85 |     output_dir = get_output_dir(imdb, None)
86 |     print 'Output will be saved to `{:s}`'.format(output_dir)
87 | 
88 |     device_name = '/{}:{:d}'.format(args.device,args.device_id)
89 |     print device_name
90 | 
91 |     network = get_network(args.network_name)
92 |     print 'Use network `{:s}` in training'.format(args.network_name)
93 | 
94 |     train_net(network, imdb, roidb, output_dir,
95 |               pretrained_model=args.pretrained_model,
96 |               max_iters=args.max_iters)
97 | 


--------------------------------------------------------------------------------