├── LICENSE
├── README.md
└── lib
    ├── Makefile
    ├── datasets
        ├── VOCdevkit-matlab-wrapper
        │   ├── get_voc_opts.m
        │   ├── voc_eval.m
        │   └── xVOCap.m
        ├── __init__.py
        ├── coco.py
        ├── ds_utils.py
        ├── factory.py
        ├── imdb.py
        ├── pascal_voc.py
        ├── tools
        │   └── mcg_munge.py
        └── voc_eval.py
    ├── fast_rcnn
        ├── __init__.py
        ├── bbox_transform.py
        ├── config.py
        ├── nms_wrapper.py
        ├── test.py
        └── train.py
    ├── nms
        ├── .gitignore
        ├── __init__.py
        ├── __init__.pyc
        ├── cpu_nms.pyd
        ├── cpu_nms.pyx
        ├── gpu_nms.cu
        ├── gpu_nms.hpp
        ├── gpu_nms.pyd
        ├── gpu_nms.pyx
        ├── nms_kernel.cu
        └── py_cpu_nms.py
    ├── pycocotools
        ├── UPSTREAM_REV
        ├── __init__.py
        ├── _mask.pyx
        ├── coco.py
        ├── cocoeval.py
        ├── license.txt
        ├── mask.py
        ├── maskApi.c
        └── maskApi.h
    ├── roi_data_layer
        ├── __init__.py
        ├── layer.py
        ├── minibatch.py
        └── roidb.py
    ├── rpn
        ├── README.md
        ├── __init__.py
        ├── anchor_target_layer.py
        ├── generate.py
        ├── generate_anchors.py
        ├── proposal_layer.py
        └── proposal_target_layer.py
    ├── setup.py
    ├── setup_cuda.py
    ├── transform
        ├── __init__.py
        └── torch_image_transform_layer.py
    └── utils
        ├── .gitignore
        ├── __init__.py
        ├── bbox.pyx
        ├── blob.py
        └── timer.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 MrGF
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # py-faster-rcnn-windows
 2 | py-faster-rcnn that can compile on windows directly
 3 | 
 4 | Usage:
 5 | 
 6 | 1.  Download py-faster-rcnn from this url:
 7 | https://github.com/rbgirshick/py-faster-rcnn
 8 | 
 9 | This version of faster-rcnn can be compiled on linux directly.
10 | 
11 | 2.  For windows user, download the lib from this url:
12 | https://github.com/MrGF/py-faster-rcnn-windows
13 | 
14 | and replace it with the original lib provided by rbgirshick.
15 | 
16 | Then you can run :
17 | 	python setup.py
18 | 	python setup_cuda.py 
19 | to compile the lib on windows directly. Enjoy!
20 | 
21 | 
22 | Note:
23 | Please see the LICENSE on https://github.com/rbgirshick/py-faster-rcnn for details.
24 | 


--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
 1 | function res = voc_eval(path, comp_id, test_set, output_dir)
 2 | 
 3 | VOCopts = get_voc_opts(path);
 4 | VOCopts.testset = test_set;
 5 | 
 6 | for i = 1:length(VOCopts.classes)
 7 |   cls = VOCopts.classes{i};
 8 |   res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
 9 | end
10 | 
11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
12 | fprintf('Results:\n');
13 | aps = [res(:).ap]';
14 | fprintf('%.1f\n', aps * 100);
15 | fprintf('%.1f\n', mean(aps) * 100);
16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
17 | 
18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)
19 | 
20 | test_set = VOCopts.testset;
21 | year = VOCopts.dataset(4:end);
22 | 
23 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
24 | 
25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
26 | 
27 | recall = [];
28 | prec = [];
29 | ap = 0;
30 | ap_auc = 0;
31 | 
32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
33 | if do_eval
34 |   % Bug in VOCevaldet requires that tic has been called first
35 |   tic;
36 |   [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
37 |   ap_auc = xVOCap(recall, prec);
38 | 
39 |   % force plot limits
40 |   ylim([0 1]);
41 |   xlim([0 1]);
42 | 
43 |   print(gcf, '-djpeg', '-r0', ...
44 |         [output_dir '/' cls '_pr.jpg']);
45 | end
46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
47 | 
48 | res.recall = recall;
49 | res.prec = prec;
50 | res.ap = ap;
51 | res.ap_auc = ap_auc;
52 | 
53 | save([output_dir '/' cls '_pr.mat'], ...
54 |      'res', 'recall', 'prec', 'ap', 'ap_auc');
55 | 
56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
57 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick
 5 | # --------------------------------------------------------
 6 | 
 7 | import numpy as np
 8 | 
 9 | def unique_boxes(boxes, scale=1.0):
10 |     """Return indices of unique boxes."""
11 |     v = np.array([1, 1e3, 1e6, 1e9])
12 |     hashes = np.round(boxes * scale).dot(v)
13 |     _, index = np.unique(hashes, return_index=True)
14 |     return np.sort(index)
15 | 
16 | def xywh_to_xyxy(boxes):
17 |     """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
18 |     return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
19 | 
20 | def xyxy_to_xywh(boxes):
21 |     """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
22 |     return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
23 | 
24 | def validate_boxes(boxes, width=0, height=0):
25 |     """Check that a set of boxes are valid."""
26 |     x1 = boxes[:, 0]
27 |     y1 = boxes[:, 1]
28 |     x2 = boxes[:, 2]
29 |     y2 = boxes[:, 3]
30 |     assert (x1 >= 0).all()
31 |     assert (y1 >= 0).all()
32 |     assert (x2 >= x1).all()
33 |     assert (y2 >= y1).all()
34 |     assert (x2 < width).all()
35 |     assert (y2 < height).all()
36 | 
37 | def filter_small_boxes(boxes, min_size):
38 |     w = boxes[:, 2] - boxes[:, 0]
39 |     h = boxes[:, 3] - boxes[:, 1]
40 |     keep = np.where((w >= min_size) & (h > min_size))[0]
41 |     return keep
42 | 


--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Factory method for easily getting imdbs by name."""
 9 | 
10 | __sets = {}
11 | 
12 | from datasets.pascal_voc import pascal_voc
13 | from datasets.coco import coco
14 | import numpy as np
15 | 
16 | # Set up voc_<year>_<split> using selective search "fast" mode
17 | for year in ['2007', '2012']:
18 |     for split in ['train', 'val', 'trainval', 'test']:
19 |         name = 'voc_{}_{}'.format(year, split)
20 |         __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))
21 | 
22 | # Set up coco_2014_<split>
23 | for year in ['2014']:
24 |     for split in ['train', 'val', 'minival', 'valminusminival']:
25 |         name = 'coco_{}_{}'.format(year, split)
26 |         __sets[name] = (lambda split=split, year=year: coco(split, year))
27 | 
28 | # Set up coco_2015_<split>
29 | for year in ['2015']:
30 |     for split in ['test', 'test-dev']:
31 |         name = 'coco_{}_{}'.format(year, split)
32 |         __sets[name] = (lambda split=split, year=year: coco(split, year))
33 | 
34 | def get_imdb(name):
35 |     """Get an imdb (image database) by name."""
36 |     if not __sets.has_key(name):
37 |         raise KeyError('Unknown dataset: {}'.format(name))
38 |     return __sets[name]()
39 | 
40 | def list_imdbs():
41 |     """List all registered imdbs."""
42 |     return __sets.keys()
43 | 


--------------------------------------------------------------------------------
/lib/datasets/imdb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | import os.path as osp
 10 | import PIL
 11 | from utils.cython_bbox import bbox_overlaps
 12 | import numpy as np
 13 | import scipy.sparse
 14 | from fast_rcnn.config import cfg
 15 | 
 16 | class imdb(object):
 17 |     """Image database."""
 18 | 
 19 |     def __init__(self, name):
 20 |         self._name = name
 21 |         self._num_classes = 0
 22 |         self._classes = []
 23 |         self._image_index = []
 24 |         self._obj_proposer = 'selective_search'
 25 |         self._roidb = None
 26 |         self._roidb_handler = self.default_roidb
 27 |         # Use this dict for storing dataset specific config options
 28 |         self.config = {}
 29 | 
 30 |     @property
 31 |     def name(self):
 32 |         return self._name
 33 | 
 34 |     @property
 35 |     def num_classes(self):
 36 |         return len(self._classes)
 37 | 
 38 |     @property
 39 |     def classes(self):
 40 |         return self._classes
 41 | 
 42 |     @property
 43 |     def image_index(self):
 44 |         return self._image_index
 45 | 
 46 |     @property
 47 |     def roidb_handler(self):
 48 |         return self._roidb_handler
 49 | 
 50 |     @roidb_handler.setter
 51 |     def roidb_handler(self, val):
 52 |         self._roidb_handler = val
 53 | 
 54 |     def set_proposal_method(self, method):
 55 |         method = eval('self.' + method + '_roidb')
 56 |         self.roidb_handler = method
 57 | 
 58 |     @property
 59 |     def roidb(self):
 60 |         # A roidb is a list of dictionaries, each with the following keys:
 61 |         #   boxes
 62 |         #   gt_overlaps
 63 |         #   gt_classes
 64 |         #   flipped
 65 |         if self._roidb is not None:
 66 |             return self._roidb
 67 |         self._roidb = self.roidb_handler()
 68 |         return self._roidb
 69 | 
 70 |     @property
 71 |     def cache_path(self):
 72 |         cache_path = osp.abspath(osp.join(cfg.DATA_DIR, 'cache'))
 73 |         if not os.path.exists(cache_path):
 74 |             os.makedirs(cache_path)
 75 |         return cache_path
 76 | 
 77 |     @property
 78 |     def num_images(self):
 79 |       return len(self.image_index)
 80 | 
 81 |     def image_path_at(self, i):
 82 |         raise NotImplementedError
 83 | 
 84 |     def default_roidb(self):
 85 |         raise NotImplementedError
 86 | 
 87 |     def evaluate_detections(self, all_boxes, output_dir=None):
 88 |         """
 89 |         all_boxes is a list of length number-of-classes.
 90 |         Each list element is a list of length number-of-images.
 91 |         Each of those list elements is either an empty list []
 92 |         or a numpy array of detection.
 93 | 
 94 |         all_boxes[class][image] = [] or np.array of shape #dets x 5
 95 |         """
 96 |         raise NotImplementedError
 97 | 
 98 |     def _get_widths(self):
 99 |       return [PIL.Image.open(self.image_path_at(i)).size[0]
100 |               for i in xrange(self.num_images)]
101 | 
102 |     def append_flipped_images(self):
103 |         num_images = self.num_images
104 |         widths = self._get_widths()
105 |         for i in xrange(num_images):
106 |             boxes = self.roidb[i]['boxes'].copy()
107 |             oldx1 = boxes[:, 0].copy()
108 |             oldx2 = boxes[:, 2].copy()
109 |             boxes[:, 0] = widths[i] - oldx2 - 1
110 |             boxes[:, 2] = widths[i] - oldx1 - 1
111 |             assert (boxes[:, 2] >= boxes[:, 0]).all()
112 |             entry = {'boxes' : boxes,
113 |                      'gt_overlaps' : self.roidb[i]['gt_overlaps'],
114 |                      'gt_classes' : self.roidb[i]['gt_classes'],
115 |                      'flipped' : True}
116 |             self.roidb.append(entry)
117 |         self._image_index = self._image_index * 2
118 | 
119 |     def evaluate_recall(self, candidate_boxes=None, thresholds=None,
120 |                         area='all', limit=None):
121 |         """Evaluate detection proposal recall metrics.
122 | 
123 |         Returns:
124 |             results: dictionary of results with keys
125 |                 'ar': average recall
126 |                 'recalls': vector recalls at each IoU overlap threshold
127 |                 'thresholds': vector of IoU overlap thresholds
128 |                 'gt_overlaps': vector of all ground-truth overlaps
129 |         """
130 |         # Record max overlap value for each gt box
131 |         # Return vector of overlap values
132 |         areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3,
133 |                   '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7}
134 |         area_ranges = [ [0**2, 1e5**2],    # all
135 |                         [0**2, 32**2],     # small
136 |                         [32**2, 96**2],    # medium
137 |                         [96**2, 1e5**2],   # large
138 |                         [96**2, 128**2],   # 96-128
139 |                         [128**2, 256**2],  # 128-256
140 |                         [256**2, 512**2],  # 256-512
141 |                         [512**2, 1e5**2],  # 512-inf
142 |                       ]
143 |         assert areas.has_key(area), 'unknown area range: {}'.format(area)
144 |         area_range = area_ranges[areas[area]]
145 |         gt_overlaps = np.zeros(0)
146 |         num_pos = 0
147 |         for i in xrange(self.num_images):
148 |             # Checking for max_overlaps == 1 avoids including crowd annotations
149 |             # (...pretty hacking :/)
150 |             max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1)
151 |             gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) &
152 |                                (max_gt_overlaps == 1))[0]
153 |             gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
154 |             gt_areas = self.roidb[i]['seg_areas'][gt_inds]
155 |             valid_gt_inds = np.where((gt_areas >= area_range[0]) &
156 |                                      (gt_areas <= area_range[1]))[0]
157 |             gt_boxes = gt_boxes[valid_gt_inds, :]
158 |             num_pos += len(valid_gt_inds)
159 | 
160 |             if candidate_boxes is None:
161 |                 # If candidate_boxes is not supplied, the default is to use the
162 |                 # non-ground-truth boxes from this roidb
163 |                 non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
164 |                 boxes = self.roidb[i]['boxes'][non_gt_inds, :]
165 |             else:
166 |                 boxes = candidate_boxes[i]
167 |             if boxes.shape[0] == 0:
168 |                 continue
169 |             if limit is not None and boxes.shape[0] > limit:
170 |                 boxes = boxes[:limit, :]
171 | 
172 |             overlaps = bbox_overlaps(boxes.astype(np.float),
173 |                                      gt_boxes.astype(np.float))
174 | 
175 |             _gt_overlaps = np.zeros((gt_boxes.shape[0]))
176 |             for j in xrange(gt_boxes.shape[0]):
177 |                 # find which proposal box maximally covers each gt box
178 |                 argmax_overlaps = overlaps.argmax(axis=0)
179 |                 # and get the iou amount of coverage for each gt box
180 |                 max_overlaps = overlaps.max(axis=0)
181 |                 # find which gt box is 'best' covered (i.e. 'best' = most iou)
182 |                 gt_ind = max_overlaps.argmax()
183 |                 gt_ovr = max_overlaps.max()
184 |                 assert(gt_ovr >= 0)
185 |                 # find the proposal box that covers the best covered gt box
186 |                 box_ind = argmax_overlaps[gt_ind]
187 |                 # record the iou coverage of this gt box
188 |                 _gt_overlaps[j] = overlaps[box_ind, gt_ind]
189 |                 assert(_gt_overlaps[j] == gt_ovr)
190 |                 # mark the proposal box and the gt box as used
191 |                 overlaps[box_ind, :] = -1
192 |                 overlaps[:, gt_ind] = -1
193 |             # append recorded iou coverage level
194 |             gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
195 | 
196 |         gt_overlaps = np.sort(gt_overlaps)
197 |         if thresholds is None:
198 |             step = 0.05
199 |             thresholds = np.arange(0.5, 0.95 + 1e-5, step)
200 |         recalls = np.zeros_like(thresholds)
201 |         # compute recall for each iou threshold
202 |         for i, t in enumerate(thresholds):
203 |             recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
204 |         # ar = 2 * np.trapz(recalls, thresholds)
205 |         ar = recalls.mean()
206 |         return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
207 |                 'gt_overlaps': gt_overlaps}
208 | 
209 |     def create_roidb_from_box_list(self, box_list, gt_roidb):
210 |         assert len(box_list) == self.num_images, \
211 |                 'Number of boxes must match number of ground-truth images'
212 |         roidb = []
213 |         for i in xrange(self.num_images):
214 |             boxes = box_list[i]
215 |             num_boxes = boxes.shape[0]
216 |             overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)
217 | 
218 |             if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
219 |                 gt_boxes = gt_roidb[i]['boxes']
220 |                 gt_classes = gt_roidb[i]['gt_classes']
221 |                 gt_overlaps = bbox_overlaps(boxes.astype(np.float),
222 |                                             gt_boxes.astype(np.float))
223 |                 argmaxes = gt_overlaps.argmax(axis=1)
224 |                 maxes = gt_overlaps.max(axis=1)
225 |                 I = np.where(maxes > 0)[0]
226 |                 overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
227 | 
228 |             overlaps = scipy.sparse.csr_matrix(overlaps)
229 |             roidb.append({
230 |                 'boxes' : boxes,
231 |                 'gt_classes' : np.zeros((num_boxes,), dtype=np.int32),
232 |                 'gt_overlaps' : overlaps,
233 |                 'flipped' : False,
234 |                 'seg_areas' : np.zeros((num_boxes,), dtype=np.float32),
235 |             })
236 |         return roidb
237 | 
238 |     @staticmethod
239 |     def merge_roidbs(a, b):
240 |         assert len(a) == len(b)
241 |         for i in xrange(len(a)):
242 |             a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes']))
243 |             a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'],
244 |                                             b[i]['gt_classes']))
245 |             a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'],
246 |                                                        b[i]['gt_overlaps']])
247 |             a[i]['seg_areas'] = np.hstack((a[i]['seg_areas'],
248 |                                            b[i]['seg_areas']))
249 |         return a
250 | 
251 |     def competition_mode(self, on):
252 |         """Turn competition mode on or off."""
253 |         pass
254 | 


--------------------------------------------------------------------------------
/lib/datasets/pascal_voc.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from datasets.imdb import imdb
 10 | import datasets.ds_utils as ds_utils
 11 | import xml.etree.ElementTree as ET
 12 | import numpy as np
 13 | import scipy.sparse
 14 | import scipy.io as sio
 15 | import utils.cython_bbox
 16 | import cPickle
 17 | import subprocess
 18 | import uuid
 19 | from voc_eval import voc_eval
 20 | from fast_rcnn.config import cfg
 21 | 
 22 | class pascal_voc(imdb):
 23 |     def __init__(self, image_set, year, devkit_path=None):
 24 |         imdb.__init__(self, 'voc_' + year + '_' + image_set)
 25 |         self._year = year
 26 |         self._image_set = image_set
 27 |         self._devkit_path = self._get_default_path() if devkit_path is None \
 28 |                             else devkit_path
 29 |         self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year)
 30 |         self._classes = ('__background__', # always index 0
 31 |                          'aeroplane', 'bicycle', 'bird', 'boat',
 32 |                          'bottle', 'bus', 'car', 'cat', 'chair',
 33 |                          'cow', 'diningtable', 'dog', 'horse',
 34 |                          'motorbike', 'person', 'pottedplant',
 35 |                          'sheep', 'sofa', 'train', 'tvmonitor')
 36 |         self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes)))
 37 |         self._image_ext = '.jpg'
 38 |         self._image_index = self._load_image_set_index()
 39 |         # Default to roidb handler
 40 |         self._roidb_handler = self.selective_search_roidb
 41 |         self._salt = str(uuid.uuid4())
 42 |         self._comp_id = 'comp4'
 43 | 
 44 |         # PASCAL specific config options
 45 |         self.config = {'cleanup'     : True,
 46 |                        'use_salt'    : True,
 47 |                        'use_diff'    : False,
 48 |                        'matlab_eval' : False,
 49 |                        'rpn_file'    : None,
 50 |                        'min_size'    : 2}
 51 | 
 52 |         assert os.path.exists(self._devkit_path), \
 53 |                 'VOCdevkit path does not exist: {}'.format(self._devkit_path)
 54 |         assert os.path.exists(self._data_path), \
 55 |                 'Path does not exist: {}'.format(self._data_path)
 56 | 
 57 |     def image_path_at(self, i):
 58 |         """
 59 |         Return the absolute path to image i in the image sequence.
 60 |         """
 61 |         return self.image_path_from_index(self._image_index[i])
 62 | 
 63 |     def image_path_from_index(self, index):
 64 |         """
 65 |         Construct an image path from the image's "index" identifier.
 66 |         """
 67 |         image_path = os.path.join(self._data_path, 'JPEGImages',
 68 |                                   index + self._image_ext)
 69 |         assert os.path.exists(image_path), \
 70 |                 'Path does not exist: {}'.format(image_path)
 71 |         return image_path
 72 | 
 73 |     def _load_image_set_index(self):
 74 |         """
 75 |         Load the indexes listed in this dataset's image set file.
 76 |         """
 77 |         # Example path to image set file:
 78 |         # self._devkit_path + /VOCdevkit2007/VOC2007/ImageSets/Main/val.txt
 79 |         image_set_file = os.path.join(self._data_path, 'ImageSets', 'Main',
 80 |                                       self._image_set + '.txt')
 81 |         assert os.path.exists(image_set_file), \
 82 |                 'Path does not exist: {}'.format(image_set_file)
 83 |         with open(image_set_file) as f:
 84 |             image_index = [x.strip() for x in f.readlines()]
 85 |         return image_index
 86 | 
 87 |     def _get_default_path(self):
 88 |         """
 89 |         Return the default path where PASCAL VOC is expected to be installed.
 90 |         """
 91 |         return os.path.join(cfg.DATA_DIR, 'VOCdevkit' + self._year)
 92 | 
 93 |     def gt_roidb(self):
 94 |         """
 95 |         Return the database of ground-truth regions of interest.
 96 | 
 97 |         This function loads/saves from/to a cache file to speed up future calls.
 98 |         """
 99 |         cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
100 |         if os.path.exists(cache_file):
101 |             with open(cache_file, 'rb') as fid:
102 |                 roidb = cPickle.load(fid)
103 |             print '{} gt roidb loaded from {}'.format(self.name, cache_file)
104 |             return roidb
105 | 
106 |         gt_roidb = [self._load_pascal_annotation(index)
107 |                     for index in self.image_index]
108 |         with open(cache_file, 'wb') as fid:
109 |             cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL)
110 |         print 'wrote gt roidb to {}'.format(cache_file)
111 | 
112 |         return gt_roidb
113 | 
114 |     def selective_search_roidb(self):
115 |         """
116 |         Return the database of selective search regions of interest.
117 |         Ground-truth ROIs are also included.
118 | 
119 |         This function loads/saves from/to a cache file to speed up future calls.
120 |         """
121 |         cache_file = os.path.join(self.cache_path,
122 |                                   self.name + '_selective_search_roidb.pkl')
123 | 
124 |         if os.path.exists(cache_file):
125 |             with open(cache_file, 'rb') as fid:
126 |                 roidb = cPickle.load(fid)
127 |             print '{} ss roidb loaded from {}'.format(self.name, cache_file)
128 |             return roidb
129 | 
130 |         if int(self._year) == 2007 or self._image_set != 'test':
131 |             gt_roidb = self.gt_roidb()
132 |             ss_roidb = self._load_selective_search_roidb(gt_roidb)
133 |             roidb = imdb.merge_roidbs(gt_roidb, ss_roidb)
134 |         else:
135 |             roidb = self._load_selective_search_roidb(None)
136 |         with open(cache_file, 'wb') as fid:
137 |             cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
138 |         print 'wrote ss roidb to {}'.format(cache_file)
139 | 
140 |         return roidb
141 | 
142 |     def rpn_roidb(self):
143 |         if int(self._year) == 2007 or self._image_set != 'test':
144 |             gt_roidb = self.gt_roidb()
145 |             rpn_roidb = self._load_rpn_roidb(gt_roidb)
146 |             roidb = imdb.merge_roidbs(gt_roidb, rpn_roidb)
147 |         else:
148 |             roidb = self._load_rpn_roidb(None)
149 | 
150 |         return roidb
151 | 
152 |     def _load_rpn_roidb(self, gt_roidb):
153 |         filename = self.config['rpn_file']
154 |         print 'loading {}'.format(filename)
155 |         assert os.path.exists(filename), \
156 |                'rpn data not found at: {}'.format(filename)
157 |         with open(filename, 'rb') as f:
158 |             box_list = cPickle.load(f)
159 |         return self.create_roidb_from_box_list(box_list, gt_roidb)
160 | 
161 |     def _load_selective_search_roidb(self, gt_roidb):
162 |         filename = os.path.abspath(os.path.join(cfg.DATA_DIR,
163 |                                                 'selective_search_data',
164 |                                                 self.name + '.mat'))
165 |         assert os.path.exists(filename), \
166 |                'Selective search data not found at: {}'.format(filename)
167 |         raw_data = sio.loadmat(filename)['boxes'].ravel()
168 | 
169 |         box_list = []
170 |         for i in xrange(raw_data.shape[0]):
171 |             boxes = raw_data[i][:, (1, 0, 3, 2)] - 1
172 |             keep = ds_utils.unique_boxes(boxes)
173 |             boxes = boxes[keep, :]
174 |             keep = ds_utils.filter_small_boxes(boxes, self.config['min_size'])
175 |             boxes = boxes[keep, :]
176 |             box_list.append(boxes)
177 | 
178 |         return self.create_roidb_from_box_list(box_list, gt_roidb)
179 | 
180 |     def _load_pascal_annotation(self, index):
181 |         """
182 |         Load image and bounding boxes info from XML file in the PASCAL VOC
183 |         format.
184 |         """
185 |         filename = os.path.join(self._data_path, 'Annotations', index + '.xml')
186 |         tree = ET.parse(filename)
187 |         objs = tree.findall('object')
188 |         if not self.config['use_diff']:
189 |             # Exclude the samples labeled as difficult
190 |             non_diff_objs = [
191 |                 obj for obj in objs if int(obj.find('difficult').text) == 0]
192 |             # if len(non_diff_objs) != len(objs):
193 |             #     print 'Removed {} difficult objects'.format(
194 |             #         len(objs) - len(non_diff_objs))
195 |             objs = non_diff_objs
196 |         num_objs = len(objs)
197 | 
198 |         boxes = np.zeros((num_objs, 4), dtype=np.uint16)
199 |         gt_classes = np.zeros((num_objs), dtype=np.int32)
200 |         overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
201 |         # "Seg" area for pascal is just the box area
202 |         seg_areas = np.zeros((num_objs), dtype=np.float32)
203 | 
204 |         # Load object bounding boxes into a data frame.
205 |         for ix, obj in enumerate(objs):
206 |             bbox = obj.find('bndbox')
207 |             # Make pixel indexes 0-based
208 |             x1 = float(bbox.find('xmin').text) - 1
209 |             y1 = float(bbox.find('ymin').text) - 1
210 |             x2 = float(bbox.find('xmax').text) - 1
211 |             y2 = float(bbox.find('ymax').text) - 1
212 |             cls = self._class_to_ind[obj.find('name').text.lower().strip()]
213 |             boxes[ix, :] = [x1, y1, x2, y2]
214 |             gt_classes[ix] = cls
215 |             overlaps[ix, cls] = 1.0
216 |             seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1)
217 | 
218 |         overlaps = scipy.sparse.csr_matrix(overlaps)
219 | 
220 |         return {'boxes' : boxes,
221 |                 'gt_classes': gt_classes,
222 |                 'gt_overlaps' : overlaps,
223 |                 'flipped' : False,
224 |                 'seg_areas' : seg_areas}
225 | 
226 |     def _get_comp_id(self):
227 |         comp_id = (self._comp_id + '_' + self._salt if self.config['use_salt']
228 |             else self._comp_id)
229 |         return comp_id
230 | 
231 |     def _get_voc_results_file_template(self):
232 |         # VOCdevkit/results/VOC2007/Main/<comp_id>_det_test_aeroplane.txt
233 |         filename = self._get_comp_id() + '_det_' + self._image_set + '_{:s}.txt'
234 |         path = os.path.join(
235 |             self._devkit_path,
236 |             'results',
237 |             'VOC' + self._year,
238 |             'Main',
239 |             filename)
240 |         return path
241 | 
242 |     def _write_voc_results_file(self, all_boxes):
243 |         for cls_ind, cls in enumerate(self.classes):
244 |             if cls == '__background__':
245 |                 continue
246 |             print 'Writing {} VOC results file'.format(cls)
247 |             filename = self._get_voc_results_file_template().format(cls)
248 |             with open(filename, 'wt') as f:
249 |                 for im_ind, index in enumerate(self.image_index):
250 |                     dets = all_boxes[cls_ind][im_ind]
251 |                     if dets == []:
252 |                         continue
253 |                     # the VOCdevkit expects 1-based indices
254 |                     for k in xrange(dets.shape[0]):
255 |                         f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
256 |                                 format(index, dets[k, -1],
257 |                                        dets[k, 0] + 1, dets[k, 1] + 1,
258 |                                        dets[k, 2] + 1, dets[k, 3] + 1))
259 | 
260 |     def _do_python_eval(self, output_dir = 'output'):
261 |         annopath = os.path.join(
262 |             self._devkit_path,
263 |             'VOC' + self._year,
264 |             'Annotations',
265 |             '{:s}.xml')
266 |         imagesetfile = os.path.join(
267 |             self._devkit_path,
268 |             'VOC' + self._year,
269 |             'ImageSets',
270 |             'Main',
271 |             self._image_set + '.txt')
272 |         cachedir = os.path.join(self._devkit_path, 'annotations_cache')
273 |         aps = []
274 |         # The PASCAL VOC metric changed in 2010
275 |         use_07_metric = True if int(self._year) < 2010 else False
276 |         print 'VOC07 metric? ' + ('Yes' if use_07_metric else 'No')
277 |         if not os.path.isdir(output_dir):
278 |             os.mkdir(output_dir)
279 |         for i, cls in enumerate(self._classes):
280 |             if cls == '__background__':
281 |                 continue
282 |             filename = self._get_voc_results_file_template().format(cls)
283 |             rec, prec, ap = voc_eval(
284 |                 filename, annopath, imagesetfile, cls, cachedir, ovthresh=0.5,
285 |                 use_07_metric=use_07_metric)
286 |             aps += [ap]
287 |             print('AP for {} = {:.4f}'.format(cls, ap))
288 |             with open(os.path.join(output_dir, cls + '_pr.pkl'), 'w') as f:
289 |                 cPickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
290 |         print('Mean AP = {:.4f}'.format(np.mean(aps)))
291 |         print('~~~~~~~~')
292 |         print('Results:')
293 |         for ap in aps:
294 |             print('{:.3f}'.format(ap))
295 |         print('{:.3f}'.format(np.mean(aps)))
296 |         print('~~~~~~~~')
297 |         print('')
298 |         print('--------------------------------------------------------------')
299 |         print('Results computed with the **unofficial** Python eval code.')
300 |         print('Results should be very close to the official MATLAB eval code.')
301 |         print('Recompute with `./tools/reval.py --matlab ...` for your paper.')
302 |         print('-- Thanks, The Management')
303 |         print('--------------------------------------------------------------')
304 | 
305 |     def _do_matlab_eval(self, output_dir='output'):
306 |         print '-----------------------------------------------------'
307 |         print 'Computing results with the official MATLAB eval code.'
308 |         print '-----------------------------------------------------'
309 |         path = os.path.join(cfg.ROOT_DIR, 'lib', 'datasets',
310 |                             'VOCdevkit-matlab-wrapper')
311 |         cmd = 'cd {} && '.format(path)
312 |         cmd += '{:s} -nodisplay -nodesktop '.format(cfg.MATLAB)
313 |         cmd += '-r "dbstop if error; '
314 |         cmd += 'voc_eval(\'{:s}\',\'{:s}\',\'{:s}\',\'{:s}\'); quit;"' \
315 |                .format(self._devkit_path, self._get_comp_id(),
316 |                        self._image_set, output_dir)
317 |         print('Running:\n{}'.format(cmd))
318 |         status = subprocess.call(cmd, shell=True)
319 | 
320 |     def evaluate_detections(self, all_boxes, output_dir):
321 |         self._write_voc_results_file(all_boxes)
322 |         self._do_python_eval(output_dir)
323 |         if self.config['matlab_eval']:
324 |             self._do_matlab_eval(output_dir)
325 |         if self.config['cleanup']:
326 |             for cls in self._classes:
327 |                 if cls == '__background__':
328 |                     continue
329 |                 filename = self._get_voc_results_file_template().format(cls)
330 |                 os.remove(filename)
331 | 
332 |     def competition_mode(self, on):
333 |         if on:
334 |             self.config['use_salt'] = False
335 |             self.config['cleanup'] = False
336 |         else:
337 |             self.config['use_salt'] = True
338 |             self.config['cleanup'] = True
339 | 
340 | if __name__ == '__main__':
341 |     from datasets.pascal_voc import pascal_voc
342 |     d = pascal_voc('trainval', '2007')
343 |     res = d.roidb
344 |     from IPython import embed; embed()
345 | 


--------------------------------------------------------------------------------
/lib/datasets/tools/mcg_munge.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | """Hacky tool to convert file system layout of MCG boxes downloaded from
 5 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/
 6 | so that it's consistent with those computed by Jan Hosang (see:
 7 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
 8 |   computing/research/object-recognition-and-scene-understanding/how-
 9 |   good-are-detection-proposals-really/)
10 | 
11 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order.
12 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order.
13 | """
14 | 
15 | def munge(src_dir):
16 |     # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat
17 |     # want:      ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat
18 | 
19 |     files = os.listdir(src_dir)
20 |     for fn in files:
21 |         base, ext = os.path.splitext(fn)
22 |         # first 14 chars / first 22 chars / all chars + .mat
23 |         # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
24 |         first = base[:14]
25 |         second = base[:22]
26 |         dst_dir = os.path.join('MCG', 'mat', first, second)
27 |         if not os.path.exists(dst_dir):
28 |             os.makedirs(dst_dir)
29 |         src = os.path.join(src_dir, fn)
30 |         dst = os.path.join(dst_dir, fn)
31 |         print 'MV: {} -> {}'.format(src, dst)
32 |         os.rename(src, dst)
33 | 
34 | if __name__ == '__main__':
35 |     # src_dir should look something like:
36 |     #  src_dir = 'MCG-COCO-val2014-boxes'
37 |     src_dir = sys.argv[1]
38 |     munge(src_dir)
39 | 


--------------------------------------------------------------------------------
/lib/datasets/voc_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | 
  7 | import xml.etree.ElementTree as ET
  8 | import os
  9 | import cPickle
 10 | import numpy as np
 11 | 
 12 | def parse_rec(filename):
 13 |     """ Parse a PASCAL VOC xml file """
 14 |     tree = ET.parse(filename)
 15 |     objects = []
 16 |     for obj in tree.findall('object'):
 17 |         obj_struct = {}
 18 |         obj_struct['name'] = obj.find('name').text
 19 |         obj_struct['pose'] = obj.find('pose').text
 20 |         obj_struct['truncated'] = int(obj.find('truncated').text)
 21 |         obj_struct['difficult'] = int(obj.find('difficult').text)
 22 |         bbox = obj.find('bndbox')
 23 |         obj_struct['bbox'] = [int(bbox.find('xmin').text),
 24 |                               int(bbox.find('ymin').text),
 25 |                               int(bbox.find('xmax').text),
 26 |                               int(bbox.find('ymax').text)]
 27 |         objects.append(obj_struct)
 28 | 
 29 |     return objects
 30 | 
 31 | def voc_ap(rec, prec, use_07_metric=False):
 32 |     """ ap = voc_ap(rec, prec, [use_07_metric])
 33 |     Compute VOC AP given precision and recall.
 34 |     If use_07_metric is true, uses the
 35 |     VOC 07 11 point method (default:False).
 36 |     """
 37 |     if use_07_metric:
 38 |         # 11 point metric
 39 |         ap = 0.
 40 |         for t in np.arange(0., 1.1, 0.1):
 41 |             if np.sum(rec >= t) == 0:
 42 |                 p = 0
 43 |             else:
 44 |                 p = np.max(prec[rec >= t])
 45 |             ap = ap + p / 11.
 46 |     else:
 47 |         # correct AP calculation
 48 |         # first append sentinel values at the end
 49 |         mrec = np.concatenate(([0.], rec, [1.]))
 50 |         mpre = np.concatenate(([0.], prec, [0.]))
 51 | 
 52 |         # compute the precision envelope
 53 |         for i in range(mpre.size - 1, 0, -1):
 54 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 55 | 
 56 |         # to calculate area under PR curve, look for points
 57 |         # where X axis (recall) changes value
 58 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 59 | 
 60 |         # and sum (\Delta recall) * prec
 61 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 62 |     return ap
 63 | 
 64 | def voc_eval(detpath,
 65 |              annopath,
 66 |              imagesetfile,
 67 |              classname,
 68 |              cachedir,
 69 |              ovthresh=0.5,
 70 |              use_07_metric=False):
 71 |     """rec, prec, ap = voc_eval(detpath,
 72 |                                 annopath,
 73 |                                 imagesetfile,
 74 |                                 classname,
 75 |                                 [ovthresh],
 76 |                                 [use_07_metric])
 77 | 
 78 |     Top level function that does the PASCAL VOC evaluation.
 79 | 
 80 |     detpath: Path to detections
 81 |         detpath.format(classname) should produce the detection results file.
 82 |     annopath: Path to annotations
 83 |         annopath.format(imagename) should be the xml annotations file.
 84 |     imagesetfile: Text file containing the list of images, one image per line.
 85 |     classname: Category name (duh)
 86 |     cachedir: Directory for caching the annotations
 87 |     [ovthresh]: Overlap threshold (default = 0.5)
 88 |     [use_07_metric]: Whether to use VOC07's 11 point AP computation
 89 |         (default False)
 90 |     """
 91 |     # assumes detections are in detpath.format(classname)
 92 |     # assumes annotations are in annopath.format(imagename)
 93 |     # assumes imagesetfile is a text file with each line an image name
 94 |     # cachedir caches the annotations in a pickle file
 95 | 
 96 |     # first load gt
 97 |     if not os.path.isdir(cachedir):
 98 |         os.mkdir(cachedir)
 99 |     cachefile = os.path.join(cachedir, 'annots.pkl')
100 |     # read list of images
101 |     with open(imagesetfile, 'r') as f:
102 |         lines = f.readlines()
103 |     imagenames = [x.strip() for x in lines]
104 | 
105 |     if not os.path.isfile(cachefile):
106 |         # load annots
107 |         recs = {}
108 |         for i, imagename in enumerate(imagenames):
109 |             recs[imagename] = parse_rec(annopath.format(imagename))
110 |             if i % 100 == 0:
111 |                 print 'Reading annotation for {:d}/{:d}'.format(
112 |                     i + 1, len(imagenames))
113 |         # save
114 |         print 'Saving cached annotations to {:s}'.format(cachefile)
115 |         with open(cachefile, 'w') as f:
116 |             cPickle.dump(recs, f)
117 |     else:
118 |         # load
119 |         with open(cachefile, 'r') as f:
120 |             recs = cPickle.load(f)
121 | 
122 |     # extract gt objects for this class
123 |     class_recs = {}
124 |     npos = 0
125 |     for imagename in imagenames:
126 |         R = [obj for obj in recs[imagename] if obj['name'] == classname]
127 |         bbox = np.array([x['bbox'] for x in R])
128 |         difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
129 |         det = [False] * len(R)
130 |         npos = npos + sum(~difficult)
131 |         class_recs[imagename] = {'bbox': bbox,
132 |                                  'difficult': difficult,
133 |                                  'det': det}
134 | 
135 |     # read dets
136 |     detfile = detpath.format(classname)
137 |     with open(detfile, 'r') as f:
138 |         lines = f.readlines()
139 | 
140 |     splitlines = [x.strip().split(' ') for x in lines]
141 |     image_ids = [x[0] for x in splitlines]
142 |     confidence = np.array([float(x[1]) for x in splitlines])
143 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
144 | 
145 |     # sort by confidence
146 |     sorted_ind = np.argsort(-confidence)
147 |     sorted_scores = np.sort(-confidence)
148 |     BB = BB[sorted_ind, :]
149 |     image_ids = [image_ids[x] for x in sorted_ind]
150 | 
151 |     # go down dets and mark TPs and FPs
152 |     nd = len(image_ids)
153 |     tp = np.zeros(nd)
154 |     fp = np.zeros(nd)
155 |     for d in range(nd):
156 |         R = class_recs[image_ids[d]]
157 |         bb = BB[d, :].astype(float)
158 |         ovmax = -np.inf
159 |         BBGT = R['bbox'].astype(float)
160 | 
161 |         if BBGT.size > 0:
162 |             # compute overlaps
163 |             # intersection
164 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
165 |             iymin = np.maximum(BBGT[:, 1], bb[1])
166 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
167 |             iymax = np.minimum(BBGT[:, 3], bb[3])
168 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
169 |             ih = np.maximum(iymax - iymin + 1., 0.)
170 |             inters = iw * ih
171 | 
172 |             # union
173 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
174 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
175 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
176 | 
177 |             overlaps = inters / uni
178 |             ovmax = np.max(overlaps)
179 |             jmax = np.argmax(overlaps)
180 | 
181 |         if ovmax > ovthresh:
182 |             if not R['difficult'][jmax]:
183 |                 if not R['det'][jmax]:
184 |                     tp[d] = 1.
185 |                     R['det'][jmax] = 1
186 |                 else:
187 |                     fp[d] = 1.
188 |         else:
189 |             fp[d] = 1.
190 | 
191 |     # compute precision recall
192 |     fp = np.cumsum(fp)
193 |     tp = np.cumsum(tp)
194 |     rec = tp / float(npos)
195 |     # avoid divide by zero in case the first detection matches a difficult
196 |     # ground truth
197 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
198 |     ap = voc_ap(rec, prec, use_07_metric)
199 | 
200 |     return rec, prec, ap
201 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/bbox_transform.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def bbox_transform(ex_rois, gt_rois):
11 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
12 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
13 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
14 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
15 | 
16 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
17 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
18 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
19 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
20 | 
21 |     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
22 |     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
23 |     targets_dw = np.log(gt_widths / ex_widths)
24 |     targets_dh = np.log(gt_heights / ex_heights)
25 | 
26 |     targets = np.vstack(
27 |         (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
28 |     return targets
29 | 
30 | def bbox_transform_inv(boxes, deltas):
31 |     if boxes.shape[0] == 0:
32 |         return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
33 | 
34 |     boxes = boxes.astype(deltas.dtype, copy=False)
35 | 
36 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
37 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
38 |     ctr_x = boxes[:, 0] + 0.5 * widths
39 |     ctr_y = boxes[:, 1] + 0.5 * heights
40 | 
41 |     dx = deltas[:, 0::4]
42 |     dy = deltas[:, 1::4]
43 |     dw = deltas[:, 2::4]
44 |     dh = deltas[:, 3::4]
45 | 
46 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
47 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
48 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
49 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
50 | 
51 |     pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
52 |     # x1
53 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
54 |     # y1
55 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
56 |     # x2
57 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
58 |     # y2
59 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
60 | 
61 |     return pred_boxes
62 | 
63 | def clip_boxes(boxes, im_shape):
64 |     """
65 |     Clip boxes to image boundaries.
66 |     """
67 | 
68 |     # x1 >= 0
69 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
70 |     # y1 >= 0
71 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
72 |     # x2 < im_shape[1]
73 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
74 |     # y2 < im_shape[0]
75 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
76 |     return boxes
77 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/config.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Fast R-CNN config system.
  9 | 
 10 | This file specifies default config options for Fast R-CNN. You should not
 11 | change values in this file. Instead, you should write a config file (in yaml)
 12 | and use cfg_from_file(yaml_file) to load it and override the default options.
 13 | 
 14 | Most tools in $ROOT/tools take a --cfg option to specify an override file.
 15 |     - See tools/{train,test}_net.py for example code that uses cfg_from_file()
 16 |     - See experiments/cfgs/*.yml for example YAML config override files
 17 | """
 18 | 
 19 | import os
 20 | import os.path as osp
 21 | import numpy as np
 22 | # `pip install easydict` if you don't have it
 23 | from easydict import EasyDict as edict
 24 | 
 25 | __C = edict()
 26 | # Consumers can get config by:
 27 | #   from fast_rcnn_config import cfg
 28 | cfg = __C
 29 | 
 30 | #
 31 | # Training options
 32 | #
 33 | 
 34 | __C.TRAIN = edict()
 35 | 
 36 | # Scales to use during training (can list multiple scales)
 37 | # Each scale is the pixel size of an image's shortest side
 38 | __C.TRAIN.SCALES = (600,)
 39 | 
 40 | # Max pixel size of the longest side of a scaled input image
 41 | __C.TRAIN.MAX_SIZE = 1000
 42 | 
 43 | # Images to use per minibatch
 44 | __C.TRAIN.IMS_PER_BATCH = 2
 45 | 
 46 | # Minibatch size (number of regions of interest [ROIs])
 47 | __C.TRAIN.BATCH_SIZE = 128
 48 | 
 49 | # Fraction of minibatch that is labeled foreground (i.e. class > 0)
 50 | __C.TRAIN.FG_FRACTION = 0.25
 51 | 
 52 | # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 53 | __C.TRAIN.FG_THRESH = 0.5
 54 | 
 55 | # Overlap threshold for a ROI to be considered background (class = 0 if
 56 | # overlap in [LO, HI))
 57 | __C.TRAIN.BG_THRESH_HI = 0.5
 58 | __C.TRAIN.BG_THRESH_LO = 0.1
 59 | 
 60 | # Use horizontally-flipped images during training?
 61 | __C.TRAIN.USE_FLIPPED = True
 62 | 
 63 | # Train bounding-box regressors
 64 | __C.TRAIN.BBOX_REG = True
 65 | 
 66 | # Overlap required between a ROI and ground-truth box in order for that ROI to
 67 | # be used as a bounding-box regression training example
 68 | __C.TRAIN.BBOX_THRESH = 0.5
 69 | 
 70 | # Iterations between snapshots
 71 | __C.TRAIN.SNAPSHOT_ITERS = 10000
 72 | 
 73 | # solver.prototxt specifies the snapshot path prefix, this adds an optional
 74 | # infix to yield the path: <prefix>[_<infix>]_iters_XYZ.caffemodel
 75 | __C.TRAIN.SNAPSHOT_INFIX = ''
 76 | 
 77 | # Use a prefetch thread in roi_data_layer.layer
 78 | # So far I haven't found this useful; likely more engineering work is required
 79 | __C.TRAIN.USE_PREFETCH = False
 80 | 
 81 | # Normalize the targets (subtract empirical mean, divide by empirical stddev)
 82 | __C.TRAIN.BBOX_NORMALIZE_TARGETS = True
 83 | # Deprecated (inside weights)
 84 | __C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
 85 | # Normalize the targets using "precomputed" (or made up) means and stdevs
 86 | # (BBOX_NORMALIZE_TARGETS must also be True)
 87 | __C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = False
 88 | __C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
 89 | __C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
 90 | 
 91 | # Train using these proposals
 92 | __C.TRAIN.PROPOSAL_METHOD = 'selective_search'
 93 | 
 94 | # Make minibatches from images that have similar aspect ratios (i.e. both
 95 | # tall and thin or both short and wide) in order to avoid wasting computation
 96 | # on zero-padding.
 97 | __C.TRAIN.ASPECT_GROUPING = True
 98 | 
 99 | # Use RPN to detect objects
100 | __C.TRAIN.HAS_RPN = False
101 | # IOU >= thresh: positive example
102 | __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
103 | # IOU < thresh: negative example
104 | __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
105 | # If an anchor statisfied by positive and negative conditions set to negative
106 | __C.TRAIN.RPN_CLOBBER_POSITIVES = False
107 | # Max number of foreground examples
108 | __C.TRAIN.RPN_FG_FRACTION = 0.5
109 | # Total number of examples
110 | __C.TRAIN.RPN_BATCHSIZE = 256
111 | # NMS threshold used on RPN proposals
112 | __C.TRAIN.RPN_NMS_THRESH = 0.7
113 | # Number of top scoring boxes to keep before apply NMS to RPN proposals
114 | __C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
115 | # Number of top scoring boxes to keep after applying NMS to RPN proposals
116 | __C.TRAIN.RPN_POST_NMS_TOP_N = 2000
117 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
118 | __C.TRAIN.RPN_MIN_SIZE = 16
119 | # Deprecated (outside weights)
120 | __C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
121 | # Give the positive RPN examples weight of p * 1 / {num positives}
122 | # and give negatives a weight of (1 - p)
123 | # Set to -1.0 to use uniform example weighting
124 | __C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0
125 | 
126 | 
127 | #
128 | # Testing options
129 | #
130 | 
131 | __C.TEST = edict()
132 | 
133 | # Scales to use during testing (can list multiple scales)
134 | # Each scale is the pixel size of an image's shortest side
135 | __C.TEST.SCALES = (600,)
136 | 
137 | # Max pixel size of the longest side of a scaled input image
138 | __C.TEST.MAX_SIZE = 1000
139 | 
140 | # Overlap threshold used for non-maximum suppression (suppress boxes with
141 | # IoU >= this threshold)
142 | __C.TEST.NMS = 0.3
143 | 
144 | # Experimental: treat the (K+1) units in the cls_score layer as linear
145 | # predictors (trained, eg, with one-vs-rest SVMs).
146 | __C.TEST.SVM = False
147 | 
148 | # Test using bounding-box regressors
149 | __C.TEST.BBOX_REG = True
150 | 
151 | # Propose boxes
152 | __C.TEST.HAS_RPN = False
153 | 
154 | # Test using these proposals
155 | __C.TEST.PROPOSAL_METHOD = 'selective_search'
156 | 
157 | ## NMS threshold used on RPN proposals
158 | __C.TEST.RPN_NMS_THRESH = 0.7
159 | ## Number of top scoring boxes to keep before apply NMS to RPN proposals
160 | __C.TEST.RPN_PRE_NMS_TOP_N = 6000
161 | ## Number of top scoring boxes to keep after applying NMS to RPN proposals
162 | __C.TEST.RPN_POST_NMS_TOP_N = 300
163 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
164 | __C.TEST.RPN_MIN_SIZE = 16
165 | 
166 | 
167 | #
168 | # MISC
169 | #
170 | 
171 | # The mapping from image coordinates to feature map coordinates might cause
172 | # some boxes that are distinct in image space to become identical in feature
173 | # coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor
174 | # for identifying duplicate boxes.
175 | # 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16
176 | __C.DEDUP_BOXES = 1./16.
177 | 
178 | # Pixel mean values (BGR order) as a (1, 1, 3) array
179 | # We use the same pixel mean for all networks even though it's not exactly what
180 | # they were trained with
181 | __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
182 | 
183 | # For reproducibility
184 | __C.RNG_SEED = 3
185 | 
186 | # A small number that's used many times
187 | __C.EPS = 1e-14
188 | 
189 | # Root directory of project
190 | __C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
191 | 
192 | # Data directory
193 | __C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
194 | 
195 | # Model directory
196 | __C.MODELS_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'models', 'pascal_voc'))
197 | 
198 | # Name (or path to) the matlab executable
199 | __C.MATLAB = 'matlab'
200 | 
201 | # Place outputs under an experiments directory
202 | __C.EXP_DIR = 'default'
203 | 
204 | # Use GPU implementation of non-maximum suppression
205 | __C.USE_GPU_NMS = True
206 | 
207 | # Default GPU device id
208 | __C.GPU_ID = 0
209 | 
210 | 
211 | def get_output_dir(imdb, net=None):
212 |     """Return the directory where experimental artifacts are placed.
213 |     If the directory does not exist, it is created.
214 | 
215 |     A canonical path is built using the name from an imdb and a network
216 |     (if not None).
217 |     """
218 |     outdir = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name))
219 |     if net is not None:
220 |         outdir = osp.join(outdir, net.name)
221 |     if not os.path.exists(outdir):
222 |         os.makedirs(outdir)
223 |     return outdir
224 | 
225 | def _merge_a_into_b(a, b):
226 |     """Merge config dictionary a into config dictionary b, clobbering the
227 |     options in b whenever they are also specified in a.
228 |     """
229 |     if type(a) is not edict:
230 |         return
231 | 
232 |     for k, v in a.iteritems():
233 |         # a must specify keys that are in b
234 |         if not b.has_key(k):
235 |             raise KeyError('{} is not a valid config key'.format(k))
236 | 
237 |         # the types must match, too
238 |         old_type = type(b[k])
239 |         if old_type is not type(v):
240 |             if isinstance(b[k], np.ndarray):
241 |                 v = np.array(v, dtype=b[k].dtype)
242 |             else:
243 |                 raise ValueError(('Type mismatch ({} vs. {}) '
244 |                                 'for config key: {}').format(type(b[k]),
245 |                                                             type(v), k))
246 | 
247 |         # recursively merge dicts
248 |         if type(v) is edict:
249 |             try:
250 |                 _merge_a_into_b(a[k], b[k])
251 |             except:
252 |                 print('Error under config key: {}'.format(k))
253 |                 raise
254 |         else:
255 |             b[k] = v
256 | 
257 | def cfg_from_file(filename):
258 |     """Load a config file and merge it into the default options."""
259 |     import yaml
260 |     with open(filename, 'r') as f:
261 |         yaml_cfg = edict(yaml.load(f))
262 | 
263 |     _merge_a_into_b(yaml_cfg, __C)
264 | 
265 | def cfg_from_list(cfg_list):
266 |     """Set config keys via list (e.g., from command line)."""
267 |     from ast import literal_eval
268 |     assert len(cfg_list) % 2 == 0
269 |     for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
270 |         key_list = k.split('.')
271 |         d = __C
272 |         for subkey in key_list[:-1]:
273 |             assert d.has_key(subkey)
274 |             d = d[subkey]
275 |         subkey = key_list[-1]
276 |         assert d.has_key(subkey)
277 |         try:
278 |             value = literal_eval(v)
279 |         except:
280 |             # handle the case when v is a string literal
281 |             value = v
282 |         assert type(value) == type(d[subkey]), \
283 |             'type {} does not match original type {}'.format(
284 |             type(value), type(d[subkey]))
285 |         d[subkey] = value
286 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from fast_rcnn.config import cfg
 9 | from nms.gpu_nms import gpu_nms
10 | from nms.cpu_nms import cpu_nms
11 | 
12 | def nms(dets, thresh, force_cpu=False):
13 |     """Dispatch to either CPU or GPU NMS implementations."""
14 | 
15 |     if dets.shape[0] == 0:
16 |         return []
17 |     if cfg.USE_GPU_NMS and not force_cpu:
18 |         return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 |     else:
20 |         return cpu_nms(dets, thresh)
21 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/test.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Test a Fast R-CNN network on an imdb (image database)."""
  9 | 
 10 | from fast_rcnn.config import cfg, get_output_dir
 11 | from fast_rcnn.bbox_transform import clip_boxes, bbox_transform_inv
 12 | import argparse
 13 | from utils.timer import Timer
 14 | import numpy as np
 15 | import cv2
 16 | import caffe
 17 | from fast_rcnn.nms_wrapper import nms
 18 | import cPickle
 19 | from utils.blob import im_list_to_blob
 20 | import os
 21 | 
 22 | def _get_image_blob(im):
 23 |     """Converts an image into a network input.
 24 | 
 25 |     Arguments:
 26 |         im (ndarray): a color image in BGR order
 27 | 
 28 |     Returns:
 29 |         blob (ndarray): a data blob holding an image pyramid
 30 |         im_scale_factors (list): list of image scales (relative to im) used
 31 |             in the image pyramid
 32 |     """
 33 |     im_orig = im.astype(np.float32, copy=True)
 34 |     im_orig -= cfg.PIXEL_MEANS
 35 | 
 36 |     im_shape = im_orig.shape
 37 |     im_size_min = np.min(im_shape[0:2])
 38 |     im_size_max = np.max(im_shape[0:2])
 39 | 
 40 |     processed_ims = []
 41 |     im_scale_factors = []
 42 | 
 43 |     for target_size in cfg.TEST.SCALES:
 44 |         im_scale = float(target_size) / float(im_size_min)
 45 |         # Prevent the biggest axis from being more than MAX_SIZE
 46 |         if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
 47 |             im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
 48 |         im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
 49 |                         interpolation=cv2.INTER_LINEAR)
 50 |         im_scale_factors.append(im_scale)
 51 |         processed_ims.append(im)
 52 | 
 53 |     # Create a blob to hold the input images
 54 |     blob = im_list_to_blob(processed_ims)
 55 | 
 56 |     return blob, np.array(im_scale_factors)
 57 | 
 58 | def _get_rois_blob(im_rois, im_scale_factors):
 59 |     """Converts RoIs into network inputs.
 60 | 
 61 |     Arguments:
 62 |         im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
 63 |         im_scale_factors (list): scale factors as returned by _get_image_blob
 64 | 
 65 |     Returns:
 66 |         blob (ndarray): R x 5 matrix of RoIs in the image pyramid
 67 |     """
 68 |     rois, levels = _project_im_rois(im_rois, im_scale_factors)
 69 |     rois_blob = np.hstack((levels, rois))
 70 |     return rois_blob.astype(np.float32, copy=False)
 71 | 
 72 | def _project_im_rois(im_rois, scales):
 73 |     """Project image RoIs into the image pyramid built by _get_image_blob.
 74 | 
 75 |     Arguments:
 76 |         im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
 77 |         scales (list): scale factors as returned by _get_image_blob
 78 | 
 79 |     Returns:
 80 |         rois (ndarray): R x 4 matrix of projected RoI coordinates
 81 |         levels (list): image pyramid levels used by each projected RoI
 82 |     """
 83 |     im_rois = im_rois.astype(np.float, copy=False)
 84 | 
 85 |     if len(scales) > 1:
 86 |         widths = im_rois[:, 2] - im_rois[:, 0] + 1
 87 |         heights = im_rois[:, 3] - im_rois[:, 1] + 1
 88 | 
 89 |         areas = widths * heights
 90 |         scaled_areas = areas[:, np.newaxis] * (scales[np.newaxis, :] ** 2)
 91 |         diff_areas = np.abs(scaled_areas - 224 * 224)
 92 |         levels = diff_areas.argmin(axis=1)[:, np.newaxis]
 93 |     else:
 94 |         levels = np.zeros((im_rois.shape[0], 1), dtype=np.int)
 95 | 
 96 |     rois = im_rois * scales[levels]
 97 | 
 98 |     return rois, levels
 99 | 
100 | def _get_blobs(im, rois):
101 |     """Convert an image and RoIs within that image into network inputs."""
102 |     blobs = {'data' : None, 'rois' : None}
103 |     blobs['data'], im_scale_factors = _get_image_blob(im)
104 |     if not cfg.TEST.HAS_RPN:
105 |         blobs['rois'] = _get_rois_blob(rois, im_scale_factors)
106 |     return blobs, im_scale_factors
107 | 
108 | def im_detect(net, im, boxes=None):
109 |     """Detect object classes in an image given object proposals.
110 | 
111 |     Arguments:
112 |         net (caffe.Net): Fast R-CNN network to use
113 |         im (ndarray): color image to test (in BGR order)
114 |         boxes (ndarray): R x 4 array of object proposals or None (for RPN)
115 | 
116 |     Returns:
117 |         scores (ndarray): R x K array of object class scores (K includes
118 |             background as object category 0)
119 |         boxes (ndarray): R x (4*K) array of predicted bounding boxes
120 |     """
121 |     blobs, im_scales = _get_blobs(im, boxes)
122 | 
123 |     # When mapping from image ROIs to feature map ROIs, there's some aliasing
124 |     # (some distinct image ROIs get mapped to the same feature ROI).
125 |     # Here, we identify duplicate feature ROIs, so we only compute features
126 |     # on the unique subset.
127 |     if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
128 |         v = np.array([1, 1e3, 1e6, 1e9, 1e12])
129 |         hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
130 |         _, index, inv_index = np.unique(hashes, return_index=True,
131 |                                         return_inverse=True)
132 |         blobs['rois'] = blobs['rois'][index, :]
133 |         boxes = boxes[index, :]
134 | 
135 |     if cfg.TEST.HAS_RPN:
136 |         im_blob = blobs['data']
137 |         blobs['im_info'] = np.array(
138 |             [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
139 |             dtype=np.float32)
140 | 
141 |     # reshape network inputs
142 |     net.blobs['data'].reshape(*(blobs['data'].shape))
143 |     if cfg.TEST.HAS_RPN:
144 |         net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
145 |     else:
146 |         net.blobs['rois'].reshape(*(blobs['rois'].shape))
147 | 
148 |     # do forward
149 |     forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
150 |     if cfg.TEST.HAS_RPN:
151 |         forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False)
152 |     else:
153 |         forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
154 |     blobs_out = net.forward(**forward_kwargs)
155 | 
156 |     if cfg.TEST.HAS_RPN:
157 |         assert len(im_scales) == 1, "Only single-image batch implemented"
158 |         rois = net.blobs['rois'].data.copy()
159 |         # unscale back to raw image space
160 |         boxes = rois[:, 1:5] / im_scales[0]
161 | 
162 |     if cfg.TEST.SVM:
163 |         # use the raw scores before softmax under the assumption they
164 |         # were trained as linear SVMs
165 |         scores = net.blobs['cls_score'].data
166 |     else:
167 |         # use softmax estimated probabilities
168 |         scores = blobs_out['cls_prob']
169 | 
170 |     if cfg.TEST.BBOX_REG:
171 |         # Apply bounding-box regression deltas
172 |         box_deltas = blobs_out['bbox_pred']
173 |         pred_boxes = bbox_transform_inv(boxes, box_deltas)
174 |         pred_boxes = clip_boxes(pred_boxes, im.shape)
175 |     else:
176 |         # Simply repeat the boxes, once for each class
177 |         pred_boxes = np.tile(boxes, (1, scores.shape[1]))
178 | 
179 |     if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
180 |         # Map scores and predictions back to the original set of boxes
181 |         scores = scores[inv_index, :]
182 |         pred_boxes = pred_boxes[inv_index, :]
183 | 
184 |     return scores, pred_boxes
185 | 
186 | def vis_detections(im, class_name, dets, thresh=0.3):
187 |     """Visual debugging of detections."""
188 |     import matplotlib.pyplot as plt
189 |     im = im[:, :, (2, 1, 0)]
190 |     for i in xrange(np.minimum(10, dets.shape[0])):
191 |         bbox = dets[i, :4]
192 |         score = dets[i, -1]
193 |         if score > thresh:
194 |             plt.cla()
195 |             plt.imshow(im)
196 |             plt.gca().add_patch(
197 |                 plt.Rectangle((bbox[0], bbox[1]),
198 |                               bbox[2] - bbox[0],
199 |                               bbox[3] - bbox[1], fill=False,
200 |                               edgecolor='g', linewidth=3)
201 |                 )
202 |             plt.title('{}  {:.3f}'.format(class_name, score))
203 |             plt.show()
204 | 
205 | def apply_nms(all_boxes, thresh):
206 |     """Apply non-maximum suppression to all predicted boxes output by the
207 |     test_net method.
208 |     """
209 |     num_classes = len(all_boxes)
210 |     num_images = len(all_boxes[0])
211 |     nms_boxes = [[[] for _ in xrange(num_images)]
212 |                  for _ in xrange(num_classes)]
213 |     for cls_ind in xrange(num_classes):
214 |         for im_ind in xrange(num_images):
215 |             dets = all_boxes[cls_ind][im_ind]
216 |             if dets == []:
217 |                 continue
218 |             # CPU NMS is much faster than GPU NMS when the number of boxes
219 |             # is relative small (e.g., < 10k)
220 |             # TODO(rbg): autotune NMS dispatch
221 |             keep = nms(dets, thresh, force_cpu=True)
222 |             if len(keep) == 0:
223 |                 continue
224 |             nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
225 |     return nms_boxes
226 | 
227 | def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False):
228 |     """Test a Fast R-CNN network on an image database."""
229 |     num_images = len(imdb.image_index)
230 |     # all detections are collected into:
231 |     #    all_boxes[cls][image] = N x 5 array of detections in
232 |     #    (x1, y1, x2, y2, score)
233 |     all_boxes = [[[] for _ in xrange(num_images)]
234 |                  for _ in xrange(imdb.num_classes)]
235 | 
236 |     output_dir = get_output_dir(imdb, net)
237 | 
238 |     # timers
239 |     _t = {'im_detect' : Timer(), 'misc' : Timer()}
240 | 
241 |     if not cfg.TEST.HAS_RPN:
242 |         roidb = imdb.roidb
243 | 
244 |     for i in xrange(num_images):
245 |         # filter out any ground truth boxes
246 |         if cfg.TEST.HAS_RPN:
247 |             box_proposals = None
248 |         else:
249 |             # The roidb may contain ground-truth rois (for example, if the roidb
250 |             # comes from the training or val split). We only want to evaluate
251 |             # detection on the *non*-ground-truth rois. We select those the rois
252 |             # that have the gt_classes field set to 0, which means there's no
253 |             # ground truth.
254 |             box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]
255 | 
256 |         im = cv2.imread(imdb.image_path_at(i))
257 |         _t['im_detect'].tic()
258 |         scores, boxes = im_detect(net, im, box_proposals)
259 |         _t['im_detect'].toc()
260 | 
261 |         _t['misc'].tic()
262 |         # skip j = 0, because it's the background class
263 |         for j in xrange(1, imdb.num_classes):
264 |             inds = np.where(scores[:, j] > thresh)[0]
265 |             cls_scores = scores[inds, j]
266 |             cls_boxes = boxes[inds, j*4:(j+1)*4]
267 |             cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
268 |                 .astype(np.float32, copy=False)
269 |             keep = nms(cls_dets, cfg.TEST.NMS)
270 |             cls_dets = cls_dets[keep, :]
271 |             if vis:
272 |                 vis_detections(im, imdb.classes[j], cls_dets)
273 |             all_boxes[j][i] = cls_dets
274 | 
275 |         # Limit to max_per_image detections *over all classes*
276 |         if max_per_image > 0:
277 |             image_scores = np.hstack([all_boxes[j][i][:, -1]
278 |                                       for j in xrange(1, imdb.num_classes)])
279 |             if len(image_scores) > max_per_image:
280 |                 image_thresh = np.sort(image_scores)[-max_per_image]
281 |                 for j in xrange(1, imdb.num_classes):
282 |                     keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
283 |                     all_boxes[j][i] = all_boxes[j][i][keep, :]
284 |         _t['misc'].toc()
285 | 
286 |         print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
287 |               .format(i + 1, num_images, _t['im_detect'].average_time,
288 |                       _t['misc'].average_time)
289 | 
290 |     det_file = os.path.join(output_dir, 'detections.pkl')
291 |     with open(det_file, 'wb') as f:
292 |         cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)
293 | 
294 |     print 'Evaluating detections'
295 |     imdb.evaluate_detections(all_boxes, output_dir)
296 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/train.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Train a Fast R-CNN network."""
  9 | 
 10 | import caffe
 11 | from fast_rcnn.config import cfg
 12 | import roi_data_layer.roidb as rdl_roidb
 13 | from utils.timer import Timer
 14 | import numpy as np
 15 | import os
 16 | 
 17 | from caffe.proto import caffe_pb2
 18 | import google.protobuf as pb2
 19 | 
 20 | class SolverWrapper(object):
 21 |     """A simple wrapper around Caffe's solver.
 22 |     This wrapper gives us control over he snapshotting process, which we
 23 |     use to unnormalize the learned bounding-box regression weights.
 24 |     """
 25 | 
 26 |     def __init__(self, solver_prototxt, roidb, output_dir,
 27 |                  pretrained_model=None):
 28 |         """Initialize the SolverWrapper."""
 29 |         self.output_dir = output_dir
 30 | 
 31 |         if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and
 32 |             cfg.TRAIN.BBOX_NORMALIZE_TARGETS):
 33 |             # RPN can only use precomputed normalization because there are no
 34 |             # fixed statistics to compute a priori
 35 |             assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED
 36 | 
 37 |         if cfg.TRAIN.BBOX_REG:
 38 |             print 'Computing bounding-box regression targets...'
 39 |             self.bbox_means, self.bbox_stds = \
 40 |                     rdl_roidb.add_bbox_regression_targets(roidb)
 41 |             print 'done'
 42 | 
 43 |         self.solver = caffe.SGDSolver(solver_prototxt)
 44 |         if pretrained_model is not None:
 45 |             print ('Loading pretrained model '
 46 |                    'weights from {:s}').format(pretrained_model)
 47 |             self.solver.net.copy_from(pretrained_model)
 48 | 
 49 |         self.solver_param = caffe_pb2.SolverParameter()
 50 |         with open(solver_prototxt, 'rt') as f:
 51 |             pb2.text_format.Merge(f.read(), self.solver_param)
 52 | 
 53 |         self.solver.net.layers[0].set_roidb(roidb)
 54 | 
 55 |     def snapshot(self):
 56 |         """Take a snapshot of the network after unnormalizing the learned
 57 |         bounding-box regression weights. This enables easy use at test-time.
 58 |         """
 59 |         net = self.solver.net
 60 | 
 61 |         scale_bbox_params = (cfg.TRAIN.BBOX_REG and
 62 |                              cfg.TRAIN.BBOX_NORMALIZE_TARGETS and
 63 |                              net.params.has_key('bbox_pred'))
 64 | 
 65 |         if scale_bbox_params:
 66 |             # save original values
 67 |             orig_0 = net.params['bbox_pred'][0].data.copy()
 68 |             orig_1 = net.params['bbox_pred'][1].data.copy()
 69 | 
 70 |             # scale and shift with bbox reg unnormalization; then save snapshot
 71 |             net.params['bbox_pred'][0].data[...] = \
 72 |                     (net.params['bbox_pred'][0].data *
 73 |                      self.bbox_stds[:, np.newaxis])
 74 |             net.params['bbox_pred'][1].data[...] = \
 75 |                     (net.params['bbox_pred'][1].data *
 76 |                      self.bbox_stds + self.bbox_means)
 77 | 
 78 |         infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX
 79 |                  if cfg.TRAIN.SNAPSHOT_INFIX != '' else '')
 80 |         filename = (self.solver_param.snapshot_prefix + infix +
 81 |                     '_iter_{:d}'.format(self.solver.iter) + '.caffemodel')
 82 |         filename = os.path.join(self.output_dir, filename)
 83 | 
 84 |         net.save(str(filename))
 85 |         print 'Wrote snapshot to: {:s}'.format(filename)
 86 | 
 87 |         if scale_bbox_params:
 88 |             # restore net to original state
 89 |             net.params['bbox_pred'][0].data[...] = orig_0
 90 |             net.params['bbox_pred'][1].data[...] = orig_1
 91 |         return filename
 92 | 
 93 |     def train_model(self, max_iters):
 94 |         """Network training loop."""
 95 |         last_snapshot_iter = -1
 96 |         timer = Timer()
 97 |         model_paths = []
 98 |         while self.solver.iter < max_iters:
 99 |             # Make one SGD update
100 |             timer.tic()
101 |             self.solver.step(1)
102 |             timer.toc()
103 |             if self.solver.iter % (10 * self.solver_param.display) == 0:
104 |                 print 'speed: {:.3f}s / iter'.format(timer.average_time)
105 | 
106 |             if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0:
107 |                 last_snapshot_iter = self.solver.iter
108 |                 model_paths.append(self.snapshot())
109 | 
110 |         if last_snapshot_iter != self.solver.iter:
111 |             model_paths.append(self.snapshot())
112 |         return model_paths
113 | 
114 | def get_training_roidb(imdb):
115 |     """Returns a roidb (Region of Interest database) for use in training."""
116 |     if cfg.TRAIN.USE_FLIPPED:
117 |         print 'Appending horizontally-flipped training examples...'
118 |         imdb.append_flipped_images()
119 |         print 'done'
120 | 
121 |     print 'Preparing training data...'
122 |     rdl_roidb.prepare_roidb(imdb)
123 |     print 'done'
124 | 
125 |     return imdb.roidb
126 | 
127 | def filter_roidb(roidb):
128 |     """Remove roidb entries that have no usable RoIs."""
129 | 
130 |     def is_valid(entry):
131 |         # Valid images have:
132 |         #   (1) At least one foreground RoI OR
133 |         #   (2) At least one background RoI
134 |         overlaps = entry['max_overlaps']
135 |         # find boxes with sufficient overlap
136 |         fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
137 |         # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
138 |         bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
139 |                            (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
140 |         # image is only valid if such boxes exist
141 |         valid = len(fg_inds) > 0 or len(bg_inds) > 0
142 |         return valid
143 | 
144 |     num = len(roidb)
145 |     filtered_roidb = [entry for entry in roidb if is_valid(entry)]
146 |     num_after = len(filtered_roidb)
147 |     print 'Filtered {} roidb entries: {} -> {}'.format(num - num_after,
148 |                                                        num, num_after)
149 |     return filtered_roidb
150 | 
151 | def train_net(solver_prototxt, roidb, output_dir,
152 |               pretrained_model=None, max_iters=40000):
153 |     """Train a Fast R-CNN network."""
154 | 
155 |     roidb = filter_roidb(roidb)
156 |     sw = SolverWrapper(solver_prototxt, roidb, output_dir,
157 |                        pretrained_model=pretrained_model)
158 | 
159 |     print 'Solving...'
160 |     model_paths = sw.train_model(max_iters)
161 |     print 'done solving'
162 |     return model_paths
163 | 


--------------------------------------------------------------------------------
/lib/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrGF/py-faster-rcnn-windows/12e11924217e79fd7124d05a18baa49b9908340d/lib/nms/__init__.py


--------------------------------------------------------------------------------
/lib/nms/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrGF/py-faster-rcnn-windows/12e11924217e79fd7124d05a18baa49b9908340d/lib/nms/__init__.pyc


--------------------------------------------------------------------------------
/lib/nms/cpu_nms.pyd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrGF/py-faster-rcnn-windows/12e11924217e79fd7124d05a18baa49b9908340d/lib/nms/cpu_nms.pyd


--------------------------------------------------------------------------------
/lib/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     #cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]  #20160531, by MrX
26 |     cdef np.ndarray[np.intp_t, ndim=1] order = scores.argsort()[::-1]
27 | 
28 |     cdef int ndets = dets.shape[0]
29 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
30 |             np.zeros((ndets), dtype=np.int)
31 | 
32 |     # nominal indices
33 |     cdef int _i, _j
34 |     # sorted indices
35 |     cdef int i, j
36 |     # temp variables for box i's (the box currently under consideration)
37 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
38 |     # variables for computing overlap with box j (lower scoring box)
39 |     cdef np.float32_t xx1, yy1, xx2, yy2
40 |     cdef np.float32_t w, h
41 |     cdef np.float32_t inter, ovr
42 | 
43 |     keep = []
44 |     for _i in range(ndets):
45 |         i = order[_i]
46 |         if suppressed[i] == 1:
47 |             continue
48 |         keep.append(i)
49 |         ix1 = x1[i]
50 |         iy1 = y1[i]
51 |         ix2 = x2[i]
52 |         iy2 = y2[i]
53 |         iarea = areas[i]
54 |         for _j in range(_i + 1, ndets):
55 |             j = order[_j]
56 |             if suppressed[j] == 1:
57 |                 continue
58 |             xx1 = max(ix1, x1[j])
59 |             yy1 = max(iy1, y1[j])
60 |             xx2 = min(ix2, x2[j])
61 |             yy2 = min(iy2, y2[j])
62 |             w = max(0.0, xx2 - xx1 + 1)
63 |             h = max(0.0, yy2 - yy1 + 1)
64 |             inter = w * h
65 |             ovr = inter / (iarea + areas[j] - inter)
66 |             if ovr >= thresh:
67 |                 suppressed[j] = 1
68 | 
69 |     return keep
70 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(long* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.pyd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrGF/py-faster-rcnn-windows/12e11924217e79fd7124d05a18baa49b9908340d/lib/nms/gpu_nms.pyd


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     #cdef np.ndarray[np.int_t, ndim=1] \  // 20160601, by MrX
26 |     #    order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.intp_t, ndim=1] \
28 |         order = scores.argsort()[::-1]
29 |     cdef np.ndarray[np.float32_t, ndim=2] \
30 |         sorted_dets = dets[order, :]
31 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
32 |     keep = keep[:num_out]
33 |     return list(order[keep])
34 | 


--------------------------------------------------------------------------------
/lib/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(long* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/lib/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def py_cpu_nms(dets, thresh):
11 |     """Pure Python NMS baseline."""
12 |     x1 = dets[:, 0]
13 |     y1 = dets[:, 1]
14 |     x2 = dets[:, 2]
15 |     y2 = dets[:, 3]
16 |     scores = dets[:, 4]
17 | 
18 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 |     order = scores.argsort()[::-1]
20 | 
21 |     keep = []
22 |     while order.size > 0:
23 |         i = order[0]
24 |         keep.append(i)
25 |         xx1 = np.maximum(x1[i], x1[order[1:]])
26 |         yy1 = np.maximum(y1[i], y1[order[1:]])
27 |         xx2 = np.minimum(x2[i], x2[order[1:]])
28 |         yy2 = np.minimum(y2[i], y2[order[1:]])
29 | 
30 |         w = np.maximum(0.0, xx2 - xx1 + 1)
31 |         h = np.maximum(0.0, yy2 - yy1 + 1)
32 |         inter = w * h
33 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 | 
35 |         inds = np.where(ovr <= thresh)[0]
36 |         order = order[inds + 1]
37 | 
38 |     return keep
39 | 


--------------------------------------------------------------------------------
/lib/pycocotools/UPSTREAM_REV:
--------------------------------------------------------------------------------
1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574
2 | 


--------------------------------------------------------------------------------
/lib/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/lib/pycocotools/_mask.pyx:
--------------------------------------------------------------------------------
  1 | # distutils: language = c
  2 | # distutils: sources = ../MatlabAPI/private/maskApi.c
  3 | 
  4 | #**************************************************************************
  5 | # Microsoft COCO Toolbox.      version 2.0
  6 | # Data, paper, and tutorials available at:  http://mscoco.org/
  7 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  8 | # Licensed under the Simplified BSD License [see coco/license.txt]
  9 | #**************************************************************************
 10 | 
 11 | __author__ = 'tsungyi'
 12 | 
 13 | # import both Python-level and C-level symbols of Numpy
 14 | # the API uses Numpy to interface C and Python
 15 | import numpy as np
 16 | cimport numpy as np
 17 | from libc.stdlib cimport malloc, free
 18 | 
 19 | # intialized Numpy. must do.
 20 | np.import_array()
 21 | 
 22 | # import numpy C function
 23 | # we use PyArray_ENABLEFLAGS to make Numpy ndarray responsible to memoery management
 24 | cdef extern from "numpy/arrayobject.h":
 25 |     void PyArray_ENABLEFLAGS(np.ndarray arr, int flags)
 26 | 
 27 | # Declare the prototype of the C functions in MaskApi.h
 28 | cdef extern from "maskApi.h":
 29 |     ctypedef unsigned int uint
 30 |     ctypedef unsigned long siz
 31 |     ctypedef unsigned char byte
 32 |     ctypedef double* BB
 33 |     ctypedef struct RLE:
 34 |         siz h,
 35 |         siz w,
 36 |         siz m,
 37 |         uint* cnts,
 38 |     void rlesInit( RLE **R, siz n )
 39 |     void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n )
 40 |     void rleDecode( const RLE *R, byte *mask, siz n )
 41 |     void rleMerge( const RLE *R, RLE *M, siz n, bint intersect )
 42 |     void rleArea( const RLE *R, siz n, uint *a )
 43 |     void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o )
 44 |     void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o )
 45 |     void rleToBbox( const RLE *R, BB bb, siz n )
 46 |     void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n )
 47 |     void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w )
 48 |     char* rleToString( const RLE *R )
 49 |     void rleFrString( RLE *R, char *s, siz h, siz w )
 50 | 
 51 | # python class to wrap RLE array in C
 52 | # the class handles the memory allocation and deallocation
 53 | cdef class RLEs:
 54 |     cdef RLE *_R
 55 |     cdef siz _n
 56 | 
 57 |     def __cinit__(self, siz n =0):
 58 |         rlesInit(&self._R, n)
 59 |         self._n = n
 60 | 
 61 |     # free the RLE array here
 62 |     def __dealloc__(self):
 63 |         if self._R is not NULL:
 64 |             for i in range(self._n):
 65 |                 free(self._R[i].cnts)
 66 |             free(self._R)
 67 |     def __getattr__(self, key):
 68 |         if key == 'n':
 69 |             return self._n
 70 |         raise AttributeError(key)
 71 | 
 72 | # python class to wrap Mask array in C
 73 | # the class handles the memory allocation and deallocation
 74 | cdef class Masks:
 75 |     cdef byte *_mask
 76 |     cdef siz _h
 77 |     cdef siz _w
 78 |     cdef siz _n
 79 | 
 80 |     def __cinit__(self, h, w, n):
 81 |         self._mask = <byte*> malloc(h*w*n* sizeof(byte))
 82 |         self._h = h
 83 |         self._w = w
 84 |         self._n = n
 85 |     # def __dealloc__(self):
 86 |         # the memory management of _mask has been passed to np.ndarray
 87 |         # it doesn't need to be freed here
 88 | 
 89 |     # called when passing into np.array() and return an np.ndarray in column-major order
 90 |     def __array__(self):
 91 |         cdef np.npy_intp shape[1]
 92 |         shape[0] = <np.npy_intp> self._h*self._w*self._n
 93 |         # Create a 1D array, and reshape it to fortran/Matlab column-major array
 94 |         ndarray = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT8, self._mask).reshape((self._h, self._w, self._n), order='F')
 95 |         # The _mask allocated by Masks is now handled by ndarray
 96 |         PyArray_ENABLEFLAGS(ndarray, np.NPY_OWNDATA)
 97 |         return ndarray
 98 | 
 99 | # internal conversion from Python RLEs object to compressed RLE format
100 | def _toString(RLEs Rs):
101 |     cdef siz n = Rs.n
102 |     cdef bytes py_string
103 |     cdef char* c_string
104 |     objs = []
105 |     for i in range(n):
106 |         c_string = rleToString( <RLE*> &Rs._R[i] )
107 |         py_string = c_string
108 |         objs.append({
109 |             'size': [Rs._R[i].h, Rs._R[i].w],
110 |             'counts': py_string
111 |         })
112 |         free(c_string)
113 |     return objs
114 | 
115 | # internal conversion from compressed RLE format to Python RLEs object
116 | def _frString(rleObjs):
117 |     cdef siz n = len(rleObjs)
118 |     Rs = RLEs(n)
119 |     cdef bytes py_string
120 |     cdef char* c_string
121 |     for i, obj in enumerate(rleObjs):
122 |         py_string = str(obj['counts'])
123 |         c_string = py_string
124 |         rleFrString( <RLE*> &Rs._R[i], <char*> c_string, obj['size'][0], obj['size'][1] )
125 |     return Rs
126 | 
127 | # encode mask to RLEs objects
128 | # list of RLE string can be generated by RLEs member function
129 | def encode(np.ndarray[np.uint8_t, ndim=3, mode='fortran'] mask):
130 |     h, w, n = mask.shape[0], mask.shape[1], mask.shape[2]
131 |     cdef RLEs Rs = RLEs(n)
132 |     rleEncode(Rs._R,<byte*>mask.data,h,w,n)
133 |     objs = _toString(Rs)
134 |     return objs
135 | 
136 | # decode mask from compressed list of RLE string or RLEs object
137 | def decode(rleObjs):
138 |     cdef RLEs Rs = _frString(rleObjs)
139 |     h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n
140 |     masks = Masks(h, w, n)
141 |     rleDecode( <RLE*>Rs._R, masks._mask, n );
142 |     return np.array(masks)
143 | 
144 | def merge(rleObjs, bint intersect=0):
145 |     cdef RLEs Rs = _frString(rleObjs)
146 |     cdef RLEs R = RLEs(1)
147 |     rleMerge(<RLE*>Rs._R, <RLE*> R._R, <siz> Rs._n, intersect)
148 |     obj = _toString(R)[0]
149 |     return obj
150 | 
151 | def area(rleObjs):
152 |     cdef RLEs Rs = _frString(rleObjs)
153 |     cdef uint* _a = <uint*> malloc(Rs._n* sizeof(uint))
154 |     rleArea(Rs._R, Rs._n, _a)
155 |     cdef np.npy_intp shape[1]
156 |     shape[0] = <np.npy_intp> Rs._n
157 |     a = np.array((Rs._n, ), dtype=np.uint8)
158 |     a = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT32, _a)
159 |     PyArray_ENABLEFLAGS(a, np.NPY_OWNDATA)
160 |     return a
161 | 
162 | # iou computation. support function overload (RLEs-RLEs and bbox-bbox).
163 | def iou( dt, gt, pyiscrowd ):
164 |     def _preproc(objs):
165 |         if len(objs) == 0:
166 |             return objs
167 |         if type(objs) == np.ndarray:
168 |             if len(objs.shape) == 1:
169 |                 objs = objs.reshape((objs[0], 1))
170 |             # check if it's Nx4 bbox
171 |             if not len(objs.shape) == 2 or not objs.shape[1] == 4:
172 |                 raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension')
173 |             objs = objs.astype(np.double)
174 |         elif type(objs) == list:
175 |             # check if list is in box format and convert it to np.ndarray
176 |             isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs]))
177 |             isrle = np.all(np.array([type(obj) == dict for obj in objs]))
178 |             if isbox:
179 |                 objs = np.array(objs, dtype=np.double)
180 |                 if len(objs.shape) == 1:
181 |                     objs = objs.reshape((1,objs.shape[0]))
182 |             elif isrle:
183 |                 objs = _frString(objs)
184 |             else:
185 |                 raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])')
186 |         else:
187 |             raise Exception('unrecognized type.  The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.')
188 |         return objs
189 |     def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t,  ndim=1] _iou):
190 |         rleIou( <RLE*> dt._R, <RLE*> gt._R, m, n, <byte*> iscrowd.data, <double*> _iou.data )
191 |     def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou):
192 |         bbIou( <BB> dt.data, <BB> gt.data, m, n, <byte*> iscrowd.data, <double*>_iou.data )
193 |     def _len(obj):
194 |         cdef siz N = 0
195 |         if type(obj) == RLEs:
196 |             N = obj.n
197 |         elif len(obj)==0:
198 |             pass
199 |         elif type(obj) == np.ndarray:
200 |             N = obj.shape[0]
201 |         return N
202 |     # convert iscrowd to numpy array
203 |     cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8)
204 |     # simple type checking
205 |     cdef siz m, n
206 |     dt = _preproc(dt)
207 |     gt = _preproc(gt)
208 |     m = _len(dt)
209 |     n = _len(gt)
210 |     if m == 0 or n == 0:
211 |         return []
212 |     if not type(dt) == type(gt):
213 |         raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray')
214 | 
215 |     # define local variables
216 |     cdef double* _iou = <double*> 0
217 |     cdef np.npy_intp shape[1]
218 |     # check type and assign iou function
219 |     if type(dt) == RLEs:
220 |         _iouFun = _rleIou
221 |     elif type(dt) == np.ndarray:
222 |         _iouFun = _bbIou
223 |     else:
224 |         raise Exception('input data type not allowed.')
225 |     _iou = <double*> malloc(m*n* sizeof(double))
226 |     iou = np.zeros((m*n, ), dtype=np.double)
227 |     shape[0] = <np.npy_intp> m*n
228 |     iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou)
229 |     PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA)
230 |     _iouFun(dt, gt, iscrowd, m, n, iou)
231 |     return iou.reshape((m,n), order='F')
232 | 
233 | def toBbox( rleObjs ):
234 |     cdef RLEs Rs = _frString(rleObjs)
235 |     cdef siz n = Rs.n
236 |     cdef BB _bb = <BB> malloc(4*n* sizeof(double))
237 |     rleToBbox( <const RLE*> Rs._R, _bb, n )
238 |     cdef np.npy_intp shape[1]
239 |     shape[0] = <np.npy_intp> 4*n
240 |     bb = np.array((1,4*n), dtype=np.double)
241 |     bb = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _bb).reshape((n, 4))
242 |     PyArray_ENABLEFLAGS(bb, np.NPY_OWNDATA)
243 |     return bb
244 | 
245 | def frBbox(np.ndarray[np.double_t, ndim=2] bb, siz h, siz w ):
246 |     cdef siz n = bb.shape[0]
247 |     Rs = RLEs(n)
248 |     rleFrBbox( <RLE*> Rs._R, <const BB> bb.data, h, w, n )
249 |     objs = _toString(Rs)
250 |     return objs
251 | 
252 | def frPoly( poly, siz h, siz w ):
253 |     cdef np.ndarray[np.double_t, ndim=1] np_poly
254 |     n = len(poly)
255 |     Rs = RLEs(n)
256 |     for i, p in enumerate(poly):
257 |         np_poly = np.array(p, dtype=np.double, order='F')
258 |         rleFrPoly( <RLE*>&Rs._R[i], <const double*> np_poly.data, len(np_poly)/2, h, w )
259 |     objs = _toString(Rs)
260 |     return objs
261 | 
262 | def frUncompressedRLE(ucRles, siz h, siz w):
263 |     cdef np.ndarray[np.uint32_t, ndim=1] cnts
264 |     cdef RLE R
265 |     cdef uint *data
266 |     n = len(ucRles)
267 |     objs = []
268 |     for i in range(n):
269 |         Rs = RLEs(1)
270 |         cnts = np.array(ucRles[i]['counts'], dtype=np.uint32)
271 |         # time for malloc can be saved here but it's fine
272 |         data = <uint*> malloc(len(cnts)* sizeof(uint))
273 |         for j in range(len(cnts)):
274 |             data[j] = <uint> cnts[j]
275 |         R = RLE(ucRles[i]['size'][0], ucRles[i]['size'][1], len(cnts), <uint*> data)
276 |         Rs._R[0] = R
277 |         objs.append(_toString(Rs)[0])
278 |     return objs
279 | 
280 | def frPyObjects(pyobj, siz h, w):
281 |     if type(pyobj) == np.ndarray:
282 |         objs = frBbox(pyobj, h, w )
283 |     elif type(pyobj) == list and len(pyobj[0]) == 4:
284 |         objs = frBbox(pyobj, h, w )
285 |     elif type(pyobj) == list and len(pyobj[0]) > 4:
286 |         objs = frPoly(pyobj, h, w )
287 |     elif type(pyobj) == list and type(pyobj[0]) == dict:
288 |         objs = frUncompressedRLE(pyobj, h, w)
289 |     else:
290 |         raise Exception('input type is not supported.')
291 |     return objs
292 | 


--------------------------------------------------------------------------------
/lib/pycocotools/coco.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tylin'
  2 | __version__ = '1.0.1'
  3 | # Interface for accessing the Microsoft COCO dataset.
  4 | 
  5 | # Microsoft COCO is a large image dataset designed for object detection,
  6 | # segmentation, and caption generation. pycocotools is a Python API that
  7 | # assists in loading, parsing and visualizing the annotations in COCO.
  8 | # Please visit http://mscoco.org/ for more information on COCO, including
  9 | # for the data, paper, and tutorials. The exact format of the annotations
 10 | # is also described on the COCO website. For example usage of the pycocotools
 11 | # please see pycocotools_demo.ipynb. In addition to this API, please download both
 12 | # the COCO images and annotations in order to run the demo.
 13 | 
 14 | # An alternative to using the API is to load the annotations directly
 15 | # into Python dictionary
 16 | # Using the API provides additional utility functions. Note that this API
 17 | # supports both *instance* and *caption* annotations. In the case of
 18 | # captions not all functions are defined (e.g. categories are undefined).
 19 | 
 20 | # The following API functions are defined:
 21 | #  COCO       - COCO api class that loads COCO annotation file and prepare data structures.
 22 | #  decodeMask - Decode binary mask M encoded via run-length encoding.
 23 | #  encodeMask - Encode binary mask M using run-length encoding.
 24 | #  getAnnIds  - Get ann ids that satisfy given filter conditions.
 25 | #  getCatIds  - Get cat ids that satisfy given filter conditions.
 26 | #  getImgIds  - Get img ids that satisfy given filter conditions.
 27 | #  loadAnns   - Load anns with the specified ids.
 28 | #  loadCats   - Load cats with the specified ids.
 29 | #  loadImgs   - Load imgs with the specified ids.
 30 | #  segToMask  - Convert polygon segmentation to binary mask.
 31 | #  showAnns   - Display the specified annotations.
 32 | #  loadRes    - Load algorithm results and create API for accessing them.
 33 | #  download   - Download COCO images from mscoco.org server.
 34 | # Throughout the API "ann"=annotation, "cat"=category, and "img"=image.
 35 | # Help on each functions can be accessed by: "help COCO>function".
 36 | 
 37 | # See also COCO>decodeMask,
 38 | # COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds,
 39 | # COCO>getImgIds, COCO>loadAnns, COCO>loadCats,
 40 | # COCO>loadImgs, COCO>segToMask, COCO>showAnns
 41 | 
 42 | # Microsoft COCO Toolbox.      version 2.0
 43 | # Data, paper, and tutorials available at:  http://mscoco.org/
 44 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2014.
 45 | # Licensed under the Simplified BSD License [see bsd.txt]
 46 | 
 47 | import json
 48 | import datetime
 49 | import time
 50 | import matplotlib.pyplot as plt
 51 | from matplotlib.collections import PatchCollection
 52 | from matplotlib.patches import Polygon
 53 | import numpy as np
 54 | from skimage.draw import polygon
 55 | import urllib
 56 | import copy
 57 | import itertools
 58 | import mask
 59 | import os
 60 | 
 61 | class COCO:
 62 |     def __init__(self, annotation_file=None):
 63 |         """
 64 |         Constructor of Microsoft COCO helper class for reading and visualizing annotations.
 65 |         :param annotation_file (str): location of annotation file
 66 |         :param image_folder (str): location to the folder that hosts images.
 67 |         :return:
 68 |         """
 69 |         # load dataset
 70 |         self.dataset = {}
 71 |         self.anns = []
 72 |         self.imgToAnns = {}
 73 |         self.catToImgs = {}
 74 |         self.imgs = {}
 75 |         self.cats = {}
 76 |         if not annotation_file == None:
 77 |             print 'loading annotations into memory...'
 78 |             tic = time.time()
 79 |             dataset = json.load(open(annotation_file, 'r'))
 80 |             print 'Done (t=%0.2fs)'%(time.time()- tic)
 81 |             self.dataset = dataset
 82 |             self.createIndex()
 83 | 
 84 |     def createIndex(self):
 85 |         # create index
 86 |         print 'creating index...'
 87 |         anns = {}
 88 |         imgToAnns = {}
 89 |         catToImgs = {}
 90 |         cats = {}
 91 |         imgs = {}
 92 |         if 'annotations' in self.dataset:
 93 |             imgToAnns = {ann['image_id']: [] for ann in self.dataset['annotations']}
 94 |             anns =      {ann['id']:       [] for ann in self.dataset['annotations']}
 95 |             for ann in self.dataset['annotations']:
 96 |                 imgToAnns[ann['image_id']] += [ann]
 97 |                 anns[ann['id']] = ann
 98 | 
 99 |         if 'images' in self.dataset:
100 |             imgs      = {im['id']: {} for im in self.dataset['images']}
101 |             for img in self.dataset['images']:
102 |                 imgs[img['id']] = img
103 | 
104 |         if 'categories' in self.dataset:
105 |             cats = {cat['id']: [] for cat in self.dataset['categories']}
106 |             for cat in self.dataset['categories']:
107 |                 cats[cat['id']] = cat
108 |             catToImgs = {cat['id']: [] for cat in self.dataset['categories']}
109 |             if 'annotations' in self.dataset:
110 |                 for ann in self.dataset['annotations']:
111 |                     catToImgs[ann['category_id']] += [ann['image_id']]
112 | 
113 |         print 'index created!'
114 | 
115 |         # create class members
116 |         self.anns = anns
117 |         self.imgToAnns = imgToAnns
118 |         self.catToImgs = catToImgs
119 |         self.imgs = imgs
120 |         self.cats = cats
121 | 
122 |     def info(self):
123 |         """
124 |         Print information about the annotation file.
125 |         :return:
126 |         """
127 |         for key, value in self.dataset['info'].items():
128 |             print '%s: %s'%(key, value)
129 | 
130 |     def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
131 |         """
132 |         Get ann ids that satisfy given filter conditions. default skips that filter
133 |         :param imgIds  (int array)     : get anns for given imgs
134 |                catIds  (int array)     : get anns for given cats
135 |                areaRng (float array)   : get anns for given area range (e.g. [0 inf])
136 |                iscrowd (boolean)       : get anns for given crowd label (False or True)
137 |         :return: ids (int array)       : integer array of ann ids
138 |         """
139 |         imgIds = imgIds if type(imgIds) == list else [imgIds]
140 |         catIds = catIds if type(catIds) == list else [catIds]
141 | 
142 |         if len(imgIds) == len(catIds) == len(areaRng) == 0:
143 |             anns = self.dataset['annotations']
144 |         else:
145 |             if not len(imgIds) == 0:
146 |                 # this can be changed by defaultdict
147 |                 lists = [self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns]
148 |                 anns = list(itertools.chain.from_iterable(lists))
149 |             else:
150 |                 anns = self.dataset['annotations']
151 |             anns = anns if len(catIds)  == 0 else [ann for ann in anns if ann['category_id'] in catIds]
152 |             anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]]
153 |         if not iscrowd == None:
154 |             ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd]
155 |         else:
156 |             ids = [ann['id'] for ann in anns]
157 |         return ids
158 | 
159 |     def getCatIds(self, catNms=[], supNms=[], catIds=[]):
160 |         """
161 |         filtering parameters. default skips that filter.
162 |         :param catNms (str array)  : get cats for given cat names
163 |         :param supNms (str array)  : get cats for given supercategory names
164 |         :param catIds (int array)  : get cats for given cat ids
165 |         :return: ids (int array)   : integer array of cat ids
166 |         """
167 |         catNms = catNms if type(catNms) == list else [catNms]
168 |         supNms = supNms if type(supNms) == list else [supNms]
169 |         catIds = catIds if type(catIds) == list else [catIds]
170 | 
171 |         if len(catNms) == len(supNms) == len(catIds) == 0:
172 |             cats = self.dataset['categories']
173 |         else:
174 |             cats = self.dataset['categories']
175 |             cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name']          in catNms]
176 |             cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms]
177 |             cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id']            in catIds]
178 |         ids = [cat['id'] for cat in cats]
179 |         return ids
180 | 
181 |     def getImgIds(self, imgIds=[], catIds=[]):
182 |         '''
183 |         Get img ids that satisfy given filter conditions.
184 |         :param imgIds (int array) : get imgs for given ids
185 |         :param catIds (int array) : get imgs with all given cats
186 |         :return: ids (int array)  : integer array of img ids
187 |         '''
188 |         imgIds = imgIds if type(imgIds) == list else [imgIds]
189 |         catIds = catIds if type(catIds) == list else [catIds]
190 | 
191 |         if len(imgIds) == len(catIds) == 0:
192 |             ids = self.imgs.keys()
193 |         else:
194 |             ids = set(imgIds)
195 |             for i, catId in enumerate(catIds):
196 |                 if i == 0 and len(ids) == 0:
197 |                     ids = set(self.catToImgs[catId])
198 |                 else:
199 |                     ids &= set(self.catToImgs[catId])
200 |         return list(ids)
201 | 
202 |     def loadAnns(self, ids=[]):
203 |         """
204 |         Load anns with the specified ids.
205 |         :param ids (int array)       : integer ids specifying anns
206 |         :return: anns (object array) : loaded ann objects
207 |         """
208 |         if type(ids) == list:
209 |             return [self.anns[id] for id in ids]
210 |         elif type(ids) == int:
211 |             return [self.anns[ids]]
212 | 
213 |     def loadCats(self, ids=[]):
214 |         """
215 |         Load cats with the specified ids.
216 |         :param ids (int array)       : integer ids specifying cats
217 |         :return: cats (object array) : loaded cat objects
218 |         """
219 |         if type(ids) == list:
220 |             return [self.cats[id] for id in ids]
221 |         elif type(ids) == int:
222 |             return [self.cats[ids]]
223 | 
224 |     def loadImgs(self, ids=[]):
225 |         """
226 |         Load anns with the specified ids.
227 |         :param ids (int array)       : integer ids specifying img
228 |         :return: imgs (object array) : loaded img objects
229 |         """
230 |         if type(ids) == list:
231 |             return [self.imgs[id] for id in ids]
232 |         elif type(ids) == int:
233 |             return [self.imgs[ids]]
234 | 
235 |     def showAnns(self, anns):
236 |         """
237 |         Display the specified annotations.
238 |         :param anns (array of object): annotations to display
239 |         :return: None
240 |         """
241 |         if len(anns) == 0:
242 |             return 0
243 |         if 'segmentation' in anns[0]:
244 |             datasetType = 'instances'
245 |         elif 'caption' in anns[0]:
246 |             datasetType = 'captions'
247 |         if datasetType == 'instances':
248 |             ax = plt.gca()
249 |             polygons = []
250 |             color = []
251 |             for ann in anns:
252 |                 c = np.random.random((1, 3)).tolist()[0]
253 |                 if type(ann['segmentation']) == list:
254 |                     # polygon
255 |                     for seg in ann['segmentation']:
256 |                         poly = np.array(seg).reshape((len(seg)/2, 2))
257 |                         polygons.append(Polygon(poly, True,alpha=0.4))
258 |                         color.append(c)
259 |                 else:
260 |                     # mask
261 |                     t = self.imgs[ann['image_id']]
262 |                     if type(ann['segmentation']['counts']) == list:
263 |                         rle = mask.frPyObjects([ann['segmentation']], t['height'], t['width'])
264 |                     else:
265 |                         rle = [ann['segmentation']]
266 |                     m = mask.decode(rle)
267 |                     img = np.ones( (m.shape[0], m.shape[1], 3) )
268 |                     if ann['iscrowd'] == 1:
269 |                         color_mask = np.array([2.0,166.0,101.0])/255
270 |                     if ann['iscrowd'] == 0:
271 |                         color_mask = np.random.random((1, 3)).tolist()[0]
272 |                     for i in range(3):
273 |                         img[:,:,i] = color_mask[i]
274 |                     ax.imshow(np.dstack( (img, m*0.5) ))
275 |             p = PatchCollection(polygons, facecolors=color, edgecolors=(0,0,0,1), linewidths=3, alpha=0.4)
276 |             ax.add_collection(p)
277 |         elif datasetType == 'captions':
278 |             for ann in anns:
279 |                 print ann['caption']
280 | 
281 |     def loadRes(self, resFile):
282 |         """
283 |         Load result file and return a result api object.
284 |         :param   resFile (str)     : file name of result file
285 |         :return: res (obj)         : result api object
286 |         """
287 |         res = COCO()
288 |         res.dataset['images'] = [img for img in self.dataset['images']]
289 |         # res.dataset['info'] = copy.deepcopy(self.dataset['info'])
290 |         # res.dataset['licenses'] = copy.deepcopy(self.dataset['licenses'])
291 | 
292 |         print 'Loading and preparing results...     '
293 |         tic = time.time()
294 |         anns    = json.load(open(resFile))
295 |         assert type(anns) == list, 'results in not an array of objects'
296 |         annsImgIds = [ann['image_id'] for ann in anns]
297 |         assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
298 |                'Results do not correspond to current coco set'
299 |         if 'caption' in anns[0]:
300 |             imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
301 |             res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
302 |             for id, ann in enumerate(anns):
303 |                 ann['id'] = id+1
304 |         elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
305 |             res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
306 |             for id, ann in enumerate(anns):
307 |                 bb = ann['bbox']
308 |                 x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]]
309 |                 if not 'segmentation' in ann:
310 |                     ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
311 |                 ann['area'] = bb[2]*bb[3]
312 |                 ann['id'] = id+1
313 |                 ann['iscrowd'] = 0
314 |         elif 'segmentation' in anns[0]:
315 |             res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
316 |             for id, ann in enumerate(anns):
317 |                 # now only support compressed RLE format as segmentation results
318 |                 ann['area'] = mask.area([ann['segmentation']])[0]
319 |                 if not 'bbox' in ann:
320 |                     ann['bbox'] = mask.toBbox([ann['segmentation']])[0]
321 |                 ann['id'] = id+1
322 |                 ann['iscrowd'] = 0
323 |         print 'DONE (t=%0.2fs)'%(time.time()- tic)
324 | 
325 |         res.dataset['annotations'] = anns
326 |         res.createIndex()
327 |         return res
328 | 
329 |     def download( self, tarDir = None, imgIds = [] ):
330 |         '''
331 |         Download COCO images from mscoco.org server.
332 |         :param tarDir (str): COCO results directory name
333 |                imgIds (list): images to be downloaded
334 |         :return:
335 |         '''
336 |         if tarDir is None:
337 |             print 'Please specify target directory'
338 |             return -1
339 |         if len(imgIds) == 0:
340 |             imgs = self.imgs.values()
341 |         else:
342 |             imgs = self.loadImgs(imgIds)
343 |         N = len(imgs)
344 |         if not os.path.exists(tarDir):
345 |             os.makedirs(tarDir)
346 |         for i, img in enumerate(imgs):
347 |             tic = time.time()
348 |             fname = os.path.join(tarDir, img['file_name'])
349 |             if not os.path.exists(fname):
350 |                 urllib.urlretrieve(img['coco_url'], fname)
351 |             print 'downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)
352 | 


--------------------------------------------------------------------------------
/lib/pycocotools/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met: 
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 |    list of conditions and the following disclaimer. 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution. 
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 | 
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies, 
26 | either expressed or implied, of the FreeBSD Project.
27 | 


--------------------------------------------------------------------------------
/lib/pycocotools/mask.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'tsungyi'
 2 | 
 3 | import pycocotools._mask as _mask
 4 | 
 5 | # Interface for manipulating masks stored in RLE format.
 6 | #
 7 | # RLE is a simple yet efficient format for storing binary masks. RLE
 8 | # first divides a vector (or vectorized image) into a series of piecewise
 9 | # constant regions and then for each piece simply stores the length of
10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
12 | # (note that the odd counts are always the numbers of zeros). Instead of
13 | # storing the counts directly, additional compression is achieved with a
14 | # variable bitrate representation based on a common scheme called LEB128.
15 | #
16 | # Compression is greatest given large piecewise constant regions.
17 | # Specifically, the size of the RLE is proportional to the number of
18 | # *boundaries* in M (or for an image the number of boundaries in the y
19 | # direction). Assuming fairly simple shapes, the RLE representation is
20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
21 | # is substantially lower, especially for large simple objects (large n).
22 | #
23 | # Many common operations on masks can be computed directly using the RLE
24 | # (without need for decoding). This includes computations such as area,
25 | # union, intersection, etc. All of these operations are linear in the
26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
27 | # of the object. Computing these operations on the original mask is O(n).
28 | # Thus, using the RLE can result in substantial computational savings.
29 | #
30 | # The following API functions are defined:
31 | #  encode         - Encode binary masks using RLE.
32 | #  decode         - Decode binary masks encoded via RLE.
33 | #  merge          - Compute union or intersection of encoded masks.
34 | #  iou            - Compute intersection over union between masks.
35 | #  area           - Compute area of encoded masks.
36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
38 | #
39 | # Usage:
40 | #  Rs     = encode( masks )
41 | #  masks  = decode( Rs )
42 | #  R      = merge( Rs, intersect=false )
43 | #  o      = iou( dt, gt, iscrowd )
44 | #  a      = area( Rs )
45 | #  bbs    = toBbox( Rs )
46 | #  Rs     = frPyObjects( [pyObjects], h, w )
47 | #
48 | # In the API the following formats are used:
49 | #  Rs      - [dict] Run-length encoding of binary masks
50 | #  R       - dict Run-length encoding of binary mask
51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
55 | #  dt,gt   - May be either bounding boxes or encoded masks
56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
57 | #
58 | # Finally, a note about the intersection over union (iou) computation.
59 | # The standard iou of a ground truth (gt) and detected (dt) object is
60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
61 | # For "crowd" regions, we use a modified criteria. If a gt object is
62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
66 | # For crowd gt regions we use this modified criteria above for the iou.
67 | #
68 | # To compile run "python setup.py build_ext --inplace"
69 | # Please do not contact us for help with compiling.
70 | #
71 | # Microsoft COCO Toolbox.      version 2.0
72 | # Data, paper, and tutorials available at:  http://mscoco.org/
73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
74 | # Licensed under the Simplified BSD License [see coco/license.txt]
75 | 
76 | encode      = _mask.encode
77 | decode      = _mask.decode
78 | iou         = _mask.iou
79 | merge       = _mask.merge
80 | area        = _mask.area
81 | toBbox      = _mask.toBbox
82 | frPyObjects = _mask.frPyObjects


--------------------------------------------------------------------------------
/lib/pycocotools/maskApi.c:
--------------------------------------------------------------------------------
  1 | /**************************************************************************
  2 | * Microsoft COCO Toolbox.      version 2.0
  3 | * Data, paper, and tutorials available at:  http://mscoco.org/
  4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  5 | * Licensed under the Simplified BSD License [see coco/license.txt]
  6 | **************************************************************************/
  7 | #include "maskApi.h"
  8 | #include <math.h>
  9 | #include <stdlib.h>
 10 | 
 11 | uint umin( uint a, uint b ) { return (a<b) ? a : b; }
 12 | uint umax( uint a, uint b ) { return (a>b) ? a : b; }
 13 | 
 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) {
 15 |   R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m);
 16 |   if(cnts) for(siz j=0; j<m; j++) R->cnts[j]=cnts[j];
 17 | }
 18 | 
 19 | void rleFree( RLE *R ) {
 20 |   free(R->cnts); R->cnts=0;
 21 | }
 22 | 
 23 | void rlesInit( RLE **R, siz n ) {
 24 |   *R = (RLE*) malloc(sizeof(RLE)*n);
 25 |   for(siz i=0; i<n; i++) rleInit((*R)+i,0,0,0,0);
 26 | }
 27 | 
 28 | void rlesFree( RLE **R, siz n ) {
 29 |   for(siz i=0; i<n; i++) rleFree((*R)+i); free(*R); *R=0;
 30 | }
 31 | 
 32 | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) {
 33 |   siz i, j, k, a=w*h; uint c, *cnts; byte p;
 34 |   cnts = malloc(sizeof(uint)*(a+1));
 35 |   for(i=0; i<n; i++) {
 36 |     const byte *T=M+a*i; k=0; p=0; c=0;
 37 |     for(j=0; j<a; j++) { if(T[j]!=p) { cnts[k++]=c; c=0; p=T[j]; } c++; }
 38 |     cnts[k++]=c; rleInit(R+i,h,w,k,cnts);
 39 |   }
 40 |   free(cnts);
 41 | }
 42 | 
 43 | void rleDecode( const RLE *R, byte *M, siz n ) {
 44 |   for( siz i=0; i<n; i++ ) {
 45 |     byte v=0; for( siz j=0; j<R[i].m; j++ ) {
 46 |       for( siz k=0; k<R[i].cnts[j]; k++ ) *(M++)=v; v=!v; }}
 47 | }
 48 | 
 49 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ) {
 50 |   uint *cnts, c, ca, cb, cc, ct; bool v, va, vb, vp;
 51 |   siz i, a, b, h=R[0].h, w=R[0].w, m=R[0].m; RLE A, B;
 52 |   if(n==0) { rleInit(M,0,0,0,0); return; }
 53 |   if(n==1) { rleInit(M,h,w,m,R[0].cnts); return; }
 54 |   cnts = malloc(sizeof(uint)*(h*w+1));
 55 |   for( a=0; a<m; a++ ) cnts[a]=R[0].cnts[a];
 56 |   for( i=1; i<n; i++ ) {
 57 |     B=R[i]; if(B.h!=h||B.w!=w) { h=w=m=0; break; }
 58 |     rleInit(&A,h,w,m,cnts); ca=A.cnts[0]; cb=B.cnts[0];
 59 |     v=va=vb=0; m=0; a=b=1; cc=0; ct=1;
 60 |     while( ct>0 ) {
 61 |       c=umin(ca,cb); cc+=c; ct=0;
 62 |       ca-=c; if(!ca && a<A.m) { ca=A.cnts[a++]; va=!va; } ct+=ca;
 63 |       cb-=c; if(!cb && b<B.m) { cb=B.cnts[b++]; vb=!vb; } ct+=cb;
 64 |       vp=v; if(intersect) v=va&&vb; else v=va||vb;
 65 |       if( v!=vp||ct==0 ) { cnts[m++]=cc; cc=0; }
 66 |     }
 67 |     rleFree(&A);
 68 |   }
 69 |   rleInit(M,h,w,m,cnts); free(cnts);
 70 | }
 71 | 
 72 | void rleArea( const RLE *R, siz n, uint *a ) {
 73 |   for( siz i=0; i<n; i++ ) {
 74 |     a[i]=0; for( siz j=1; j<R[i].m; j+=2 ) a[i]+=R[i].cnts[j]; }
 75 | }
 76 | 
 77 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) {
 78 |   siz g, d; BB db, gb; bool crowd;
 79 |   db=malloc(sizeof(double)*m*4); rleToBbox(dt,db,m);
 80 |   gb=malloc(sizeof(double)*n*4); rleToBbox(gt,gb,n);
 81 |   bbIou(db,gb,m,n,iscrowd,o); free(db); free(gb);
 82 |   for( g=0; g<n; g++ ) for( d=0; d<m; d++ ) if(o[g*m+d]>0) {
 83 |     crowd=iscrowd!=NULL && iscrowd[g];
 84 |     if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; }
 85 |     siz ka, kb, a, b; uint c, ca, cb, ct, i, u; bool va, vb;
 86 |     ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0;
 87 |     cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1;
 88 |     while( ct>0 ) {
 89 |       c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0;
 90 |       ca-=c; if(!ca && a<ka) { ca=dt[d].cnts[a++]; va=!va; } ct+=ca;
 91 |       cb-=c; if(!cb && b<kb) { cb=gt[g].cnts[b++]; vb=!vb; } ct+=cb;
 92 |     }
 93 |     if(i==0) u=1; else if(crowd) rleArea(dt+d,1,&u);
 94 |     o[g*m+d] = (double)i/(double)u;
 95 |   }
 96 | }
 97 | 
 98 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) {
 99 |   double h, w, i, u, ga, da; siz g, d; bool crowd;
100 |   for( g=0; g<n; g++ ) {
101 |     BB G=gt+g*4; ga=G[2]*G[3]; crowd=iscrowd!=NULL && iscrowd[g];
102 |     for( d=0; d<m; d++ ) {
103 |       BB D=dt+d*4; da=D[2]*D[3]; o[g*m+d]=0;
104 |       w=fmin(D[2]+D[0],G[2]+G[0])-fmax(D[0],G[0]); if(w<=0) continue;
105 |       h=fmin(D[3]+D[1],G[3]+G[1])-fmax(D[1],G[1]); if(h<=0) continue;
106 |       i=w*h; u = crowd ? da : da+ga-i; o[g*m+d]=i/u;
107 |     }
108 |   }
109 | }
110 | 
111 | void rleToBbox( const RLE *R, BB bb, siz n ) {
112 |   for( siz i=0; i<n; i++ ) {
113 |     uint h, w, x, y, xs, ys, xe, ye, cc, t; siz j, m;
114 |     h=(uint)R[i].h; w=(uint)R[i].w; m=R[i].m;
115 |     m=((siz)(m/2))*2; xs=w; ys=h; xe=ye=0; cc=0;
116 |     if(m==0) { bb[4*i+0]=bb[4*i+1]=bb[4*i+2]=bb[4*i+3]=0; continue; }
117 |     for( j=0; j<m; j++ ) {
118 |       cc+=R[i].cnts[j]; t=cc-j%2; y=t%h; x=(t-y)/h;
119 |       xs=umin(xs,x); xe=umax(xe,x); ys=umin(ys,y); ye=umax(ye,y);
120 |     }
121 |     bb[4*i+0]=xs; bb[4*i+2]=xe-xs+1;
122 |     bb[4*i+1]=ys; bb[4*i+3]=ye-ys+1;
123 |   }
124 | }
125 | 
126 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) {
127 |   for( siz i=0; i<n; i++ ) {
128 |     double xs=bb[4*i+0], xe=xs+bb[4*i+2];
129 |     double ys=bb[4*i+1], ye=ys+bb[4*i+3];
130 |     double xy[8] = {xs,ys,xs,ye,xe,ye,xe,ys};
131 |     rleFrPoly( R+i, xy, 4, h, w );
132 |   }
133 | }
134 | 
135 | int uintCompare(const void *a, const void *b) {
136 |   uint c=*((uint*)a), d=*((uint*)b); return c>d?1:c<d?-1:0;
137 | }
138 | 
139 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) {
140 |   // upsample and get discrete points densely along entire boundary
141 |   siz j, m=0; double scale=5; int *x, *y, *u, *v; uint *a, *b;
142 |   x=malloc(sizeof(int)*(k+1)); y=malloc(sizeof(int)*(k+1));
143 |   for(j=0; j<k; j++) x[j]=(int)(scale*xy[j*2+0]+.5); x[k]=x[0];
144 |   for(j=0; j<k; j++) y[j]=(int)(scale*xy[j*2+1]+.5); y[k]=y[0];
145 |   for(j=0; j<k; j++) m+=umax(abs(x[j]-x[j+1]),abs(y[j]-y[j+1]))+1;
146 |   u=malloc(sizeof(int)*m); v=malloc(sizeof(int)*m); m=0;
147 |   for( j=0; j<k; j++ ) {
148 |     int xs=x[j], xe=x[j+1], ys=y[j], ye=y[j+1], dx, dy, t;
149 |     bool flip; double s; dx=abs(xe-xs); dy=abs(ys-ye);
150 |     flip = (dx>=dy && xs>xe) || (dx<dy && ys>ye);
151 |     if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; }
152 |     s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy;
153 |     if(dx>=dy) for( int d=0; d<=dx; d++ ) {
154 |       t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++;
155 |     } else for( int d=0; d<=dy; d++ ) {
156 |       t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++;
157 |     }
158 |   }
159 |   // get points along y-boundary and downsample
160 |   free(x); free(y); k=m; m=0; double xd, yd;
161 |   x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k);
162 |   for( j=1; j<k; j++ ) if(u[j]!=u[j-1]) {
163 |     xd=(double)(u[j]<u[j-1]?u[j]:u[j]-1); xd=(xd+.5)/scale-.5;
164 |     if( floor(xd)!=xd || xd<0 || xd>w-1 ) continue;
165 |     yd=(double)(v[j]<v[j-1]?v[j]:v[j-1]); yd=(yd+.5)/scale-.5;
166 |     if(yd<0) yd=0; else if(yd>h) yd=h; yd=ceil(yd);
167 |     x[m]=(int) xd; y[m]=(int) yd; m++;
168 |   }
169 |   // compute rle encoding given y-boundary points
170 |   k=m; a=malloc(sizeof(uint)*(k+1));
171 |   for( j=0; j<k; j++ ) a[j]=(uint)(x[j]*(int)(h)+y[j]);
172 |   a[k++]=(uint)(h*w); free(u); free(v); free(x); free(y);
173 |   qsort(a,k,sizeof(uint),uintCompare); uint p=0;
174 |   for( j=0; j<k; j++ ) { uint t=a[j]; a[j]-=p; p=t; }
175 |   b=malloc(sizeof(uint)*k); j=m=0; b[m++]=a[j++];
176 |   while(j<k) if(a[j]>0) b[m++]=a[j++]; else {
177 |     j++; if(j<k) b[m-1]+=a[j++]; }
178 |   rleInit(R,h,w,m,b); free(a); free(b);
179 | }
180 | 
181 | char* rleToString( const RLE *R ) {
182 |   // Similar to LEB128 but using 6 bits/char and ascii chars 48-111.
183 |   siz i, m=R->m, p=0; long x; bool more;
184 |   char *s=malloc(sizeof(char)*m*6);
185 |   for( i=0; i<m; i++ ) {
186 |     x=(long) R->cnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1;
187 |     while( more ) {
188 |       char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0;
189 |       if(more) c |= 0x20; c+=48; s[p++]=c;
190 |     }
191 |   }
192 |   s[p]=0; return s;
193 | }
194 | 
195 | void rleFrString( RLE *R, char *s, siz h, siz w ) {
196 |   siz m=0, p=0, k; long x; bool more; uint *cnts;
197 |   while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0;
198 |   while( s[p] ) {
199 |     x=0; k=0; more=1;
200 |     while( more ) {
201 |       char c=s[p]-48; x |= (c & 0x1f) << 5*k;
202 |       more = c & 0x20; p++; k++;
203 |       if(!more && (c & 0x10)) x |= -1 << 5*k;
204 |     }
205 |     if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x;
206 |   }
207 |   rleInit(R,h,w,m,cnts); free(cnts);
208 | }
209 | 


--------------------------------------------------------------------------------
/lib/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | #include <stdbool.h>
 9 | 
10 | typedef unsigned int uint;
11 | typedef unsigned long siz;
12 | typedef unsigned char byte;
13 | typedef double* BB;
14 | typedef struct { siz h, w, m; uint *cnts; } RLE;
15 | 
16 | // Initialize/destroy RLE.
17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
18 | void rleFree( RLE *R );
19 | 
20 | // Initialize/destroy RLE array.
21 | void rlesInit( RLE **R, siz n );
22 | void rlesFree( RLE **R, siz n );
23 | 
24 | // Encode binary masks using RLE.
25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
26 | 
27 | // Decode binary masks encoded via RLE.
28 | void rleDecode( const RLE *R, byte *mask, siz n );
29 | 
30 | // Compute union or intersection of encoded masks.
31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect );
32 | 
33 | // Compute area of encoded masks.
34 | void rleArea( const RLE *R, siz n, uint *a );
35 | 
36 | // Compute intersection over union between masks.
37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
38 | 
39 | // Compute intersection over union between bounding boxes.
40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
41 | 
42 | // Get bounding boxes surrounding encoded masks.
43 | void rleToBbox( const RLE *R, BB bb, siz n );
44 | 
45 | // Convert bounding boxes to encoded masks.
46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
47 | 
48 | // Convert polygon to encoded mask.
49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
50 | 
51 | // Get compressed string representation of encoded mask.
52 | char* rleToString( const RLE *R );
53 | 
54 | // Convert from compressed string representation of encoded mask.
55 | void rleFrString( RLE *R, char *s, siz h, siz w );
56 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """The data layer used during training to train a Fast R-CNN network.
  9 | 
 10 | RoIDataLayer implements a Caffe Python layer.
 11 | """
 12 | 
 13 | import caffe
 14 | from fast_rcnn.config import cfg
 15 | from roi_data_layer.minibatch import get_minibatch
 16 | import numpy as np
 17 | import yaml
 18 | from multiprocessing import Process, Queue
 19 | 
 20 | class RoIDataLayer(caffe.Layer):
 21 |     """Fast R-CNN data layer used for training."""
 22 | 
 23 |     def _shuffle_roidb_inds(self):
 24 |         """Randomly permute the training roidb."""
 25 |         if cfg.TRAIN.ASPECT_GROUPING:
 26 |             widths = np.array([r['width'] for r in self._roidb])
 27 |             heights = np.array([r['height'] for r in self._roidb])
 28 |             horz = (widths >= heights)
 29 |             vert = np.logical_not(horz)
 30 |             horz_inds = np.where(horz)[0]
 31 |             vert_inds = np.where(vert)[0]
 32 |             inds = np.hstack((
 33 |                 np.random.permutation(horz_inds),
 34 |                 np.random.permutation(vert_inds)))
 35 |             inds = np.reshape(inds, (-1, 2))
 36 |             row_perm = np.random.permutation(np.arange(inds.shape[0]))
 37 |             inds = np.reshape(inds[row_perm, :], (-1,))
 38 |             self._perm = inds
 39 |         else:
 40 |             self._perm = np.random.permutation(np.arange(len(self._roidb)))
 41 |         self._cur = 0
 42 | 
 43 |     def _get_next_minibatch_inds(self):
 44 |         """Return the roidb indices for the next minibatch."""
 45 |         if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
 46 |             self._shuffle_roidb_inds()
 47 | 
 48 |         db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
 49 |         self._cur += cfg.TRAIN.IMS_PER_BATCH
 50 |         return db_inds
 51 | 
 52 |     def _get_next_minibatch(self):
 53 |         """Return the blobs to be used for the next minibatch.
 54 | 
 55 |         If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a
 56 |         separate process and made available through self._blob_queue.
 57 |         """
 58 |         if cfg.TRAIN.USE_PREFETCH:
 59 |             return self._blob_queue.get()
 60 |         else:
 61 |             db_inds = self._get_next_minibatch_inds()
 62 |             minibatch_db = [self._roidb[i] for i in db_inds]
 63 |             return get_minibatch(minibatch_db, self._num_classes)
 64 | 
 65 |     def set_roidb(self, roidb):
 66 |         """Set the roidb to be used by this layer during training."""
 67 |         self._roidb = roidb
 68 |         self._shuffle_roidb_inds()
 69 |         if cfg.TRAIN.USE_PREFETCH:
 70 |             self._blob_queue = Queue(10)
 71 |             self._prefetch_process = BlobFetcher(self._blob_queue,
 72 |                                                  self._roidb,
 73 |                                                  self._num_classes)
 74 |             self._prefetch_process.start()
 75 |             # Terminate the child process when the parent exists
 76 |             def cleanup():
 77 |                 print 'Terminating BlobFetcher'
 78 |                 self._prefetch_process.terminate()
 79 |                 self._prefetch_process.join()
 80 |             import atexit
 81 |             atexit.register(cleanup)
 82 | 
 83 |     def setup(self, bottom, top):
 84 |         """Setup the RoIDataLayer."""
 85 | 
 86 |         # parse the layer parameter string, which must be valid YAML
 87 |         layer_params = yaml.load(self.param_str_)
 88 | 
 89 |         self._num_classes = layer_params['num_classes']
 90 | 
 91 |         self._name_to_top_map = {}
 92 | 
 93 |         # data blob: holds a batch of N images, each with 3 channels
 94 |         idx = 0
 95 |         top[idx].reshape(cfg.TRAIN.IMS_PER_BATCH, 3,
 96 |             max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE)
 97 |         self._name_to_top_map['data'] = idx
 98 |         idx += 1
 99 | 
100 |         if cfg.TRAIN.HAS_RPN:
101 |             top[idx].reshape(1, 3)
102 |             self._name_to_top_map['im_info'] = idx
103 |             idx += 1
104 | 
105 |             top[idx].reshape(1, 4)
106 |             self._name_to_top_map['gt_boxes'] = idx
107 |             idx += 1
108 |         else: # not using RPN
109 |             # rois blob: holds R regions of interest, each is a 5-tuple
110 |             # (n, x1, y1, x2, y2) specifying an image batch index n and a
111 |             # rectangle (x1, y1, x2, y2)
112 |             top[idx].reshape(1, 5)
113 |             self._name_to_top_map['rois'] = idx
114 |             idx += 1
115 | 
116 |             # labels blob: R categorical labels in [0, ..., K] for K foreground
117 |             # classes plus background
118 |             top[idx].reshape(1)
119 |             self._name_to_top_map['labels'] = idx
120 |             idx += 1
121 | 
122 |             if cfg.TRAIN.BBOX_REG:
123 |                 # bbox_targets blob: R bounding-box regression targets with 4
124 |                 # targets per class
125 |                 top[idx].reshape(1, self._num_classes * 4)
126 |                 self._name_to_top_map['bbox_targets'] = idx
127 |                 idx += 1
128 | 
129 |                 # bbox_inside_weights blob: At most 4 targets per roi are active;
130 |                 # thisbinary vector sepcifies the subset of active targets
131 |                 top[idx].reshape(1, self._num_classes * 4)
132 |                 self._name_to_top_map['bbox_inside_weights'] = idx
133 |                 idx += 1
134 | 
135 |                 top[idx].reshape(1, self._num_classes * 4)
136 |                 self._name_to_top_map['bbox_outside_weights'] = idx
137 |                 idx += 1
138 | 
139 |         print 'RoiDataLayer: name_to_top:', self._name_to_top_map
140 |         assert len(top) == len(self._name_to_top_map)
141 | 
142 |     def forward(self, bottom, top):
143 |         """Get blobs and copy them into this layer's top blob vector."""
144 |         blobs = self._get_next_minibatch()
145 | 
146 |         for blob_name, blob in blobs.iteritems():
147 |             top_ind = self._name_to_top_map[blob_name]
148 |             # Reshape net's input blobs
149 |             top[top_ind].reshape(*(blob.shape))
150 |             # Copy data into net's input blobs
151 |             top[top_ind].data[...] = blob.astype(np.float32, copy=False)
152 | 
153 |     def backward(self, top, propagate_down, bottom):
154 |         """This layer does not propagate gradients."""
155 |         pass
156 | 
157 |     def reshape(self, bottom, top):
158 |         """Reshaping happens during the call to forward."""
159 |         pass
160 | 
161 | class BlobFetcher(Process):
162 |     """Experimental class for prefetching blobs in a separate process."""
163 |     def __init__(self, queue, roidb, num_classes):
164 |         super(BlobFetcher, self).__init__()
165 |         self._queue = queue
166 |         self._roidb = roidb
167 |         self._num_classes = num_classes
168 |         self._perm = None
169 |         self._cur = 0
170 |         self._shuffle_roidb_inds()
171 |         # fix the random seed for reproducibility
172 |         np.random.seed(cfg.RNG_SEED)
173 | 
174 |     def _shuffle_roidb_inds(self):
175 |         """Randomly permute the training roidb."""
176 |         # TODO(rbg): remove duplicated code
177 |         self._perm = np.random.permutation(np.arange(len(self._roidb)))
178 |         self._cur = 0
179 | 
180 |     def _get_next_minibatch_inds(self):
181 |         """Return the roidb indices for the next minibatch."""
182 |         # TODO(rbg): remove duplicated code
183 |         if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
184 |             self._shuffle_roidb_inds()
185 | 
186 |         db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
187 |         self._cur += cfg.TRAIN.IMS_PER_BATCH
188 |         return db_inds
189 | 
190 |     def run(self):
191 |         print 'BlobFetcher started'
192 |         while True:
193 |             db_inds = self._get_next_minibatch_inds()
194 |             minibatch_db = [self._roidb[i] for i in db_inds]
195 |             blobs = get_minibatch(minibatch_db, self._num_classes)
196 |             self._queue.put(blobs)
197 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/minibatch.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Compute minibatch blobs for training a Fast R-CNN network."""
  9 | 
 10 | import numpy as np
 11 | import numpy.random as npr
 12 | import cv2
 13 | from fast_rcnn.config import cfg
 14 | from utils.blob import prep_im_for_blob, im_list_to_blob
 15 | 
 16 | def get_minibatch(roidb, num_classes):
 17 |     """Given a roidb, construct a minibatch sampled from it."""
 18 |     num_images = len(roidb)
 19 |     # Sample random scales to use for each image in this batch
 20 |     random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
 21 |                                     size=num_images)
 22 |     assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
 23 |         'num_images ({}) must divide BATCH_SIZE ({})'. \
 24 |         format(num_images, cfg.TRAIN.BATCH_SIZE)
 25 |     rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
 26 |     fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
 27 | 
 28 |     # Get the input image blob, formatted for caffe
 29 |     im_blob, im_scales = _get_image_blob(roidb, random_scale_inds)
 30 | 
 31 |     blobs = {'data': im_blob}
 32 | 
 33 |     if cfg.TRAIN.HAS_RPN:
 34 |         assert len(im_scales) == 1, "Single batch only"
 35 |         assert len(roidb) == 1, "Single batch only"
 36 |         # gt boxes: (x1, y1, x2, y2, cls)
 37 |         gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
 38 |         gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
 39 |         gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
 40 |         gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
 41 |         blobs['gt_boxes'] = gt_boxes
 42 |         blobs['im_info'] = np.array(
 43 |             [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
 44 |             dtype=np.float32)
 45 |     else: # not using RPN
 46 |         # Now, build the region of interest and label blobs
 47 |         rois_blob = np.zeros((0, 5), dtype=np.float32)
 48 |         labels_blob = np.zeros((0), dtype=np.float32)
 49 |         bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32)
 50 |         bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32)
 51 |         # all_overlaps = []
 52 |         for im_i in xrange(num_images):
 53 |             labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \
 54 |                 = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image,
 55 |                                num_classes)
 56 | 
 57 |             # Add to RoIs blob
 58 |             rois = _project_im_rois(im_rois, im_scales[im_i])
 59 |             batch_ind = im_i * np.ones((rois.shape[0], 1))
 60 |             rois_blob_this_image = np.hstack((batch_ind, rois))
 61 |             rois_blob = np.vstack((rois_blob, rois_blob_this_image))
 62 | 
 63 |             # Add to labels, bbox targets, and bbox loss blobs
 64 |             labels_blob = np.hstack((labels_blob, labels))
 65 |             bbox_targets_blob = np.vstack((bbox_targets_blob, bbox_targets))
 66 |             bbox_inside_blob = np.vstack((bbox_inside_blob, bbox_inside_weights))
 67 |             # all_overlaps = np.hstack((all_overlaps, overlaps))
 68 | 
 69 |         # For debug visualizations
 70 |         # _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps)
 71 | 
 72 |         blobs['rois'] = rois_blob
 73 |         blobs['labels'] = labels_blob
 74 | 
 75 |         if cfg.TRAIN.BBOX_REG:
 76 |             blobs['bbox_targets'] = bbox_targets_blob
 77 |             blobs['bbox_inside_weights'] = bbox_inside_blob
 78 |             blobs['bbox_outside_weights'] = \
 79 |                 np.array(bbox_inside_blob > 0).astype(np.float32)
 80 | 
 81 |     return blobs
 82 | 
 83 | def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes):
 84 |     """Generate a random sample of RoIs comprising foreground and background
 85 |     examples.
 86 |     """
 87 |     # label = class RoI has max overlap with
 88 |     labels = roidb['max_classes']
 89 |     overlaps = roidb['max_overlaps']
 90 |     rois = roidb['boxes']
 91 | 
 92 |     # Select foreground RoIs as those with >= FG_THRESH overlap
 93 |     fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
 94 |     # Guard against the case when an image has fewer than fg_rois_per_image
 95 |     # foreground RoIs
 96 |     fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size)
 97 |     # Sample foreground regions without replacement
 98 |     if fg_inds.size > 0:
 99 |         fg_inds = npr.choice(
100 |                 fg_inds, size=fg_rois_per_this_image, replace=False)
101 | 
102 |     # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
103 |     bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
104 |                        (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
105 |     # Compute number of background RoIs to take from this image (guarding
106 |     # against there being fewer than desired)
107 |     bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
108 |     bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
109 |                                         bg_inds.size)
110 |     # Sample foreground regions without replacement
111 |     if bg_inds.size > 0:
112 |         bg_inds = npr.choice(
113 |                 bg_inds, size=bg_rois_per_this_image, replace=False)
114 | 
115 |     # The indices that we're selecting (both fg and bg)
116 |     keep_inds = np.append(fg_inds, bg_inds)
117 |     # Select sampled values from various arrays:
118 |     labels = labels[keep_inds]
119 |     # Clamp labels for the background RoIs to 0
120 |     labels[fg_rois_per_this_image:] = 0
121 |     overlaps = overlaps[keep_inds]
122 |     rois = rois[keep_inds]
123 | 
124 |     bbox_targets, bbox_inside_weights = _get_bbox_regression_labels(
125 |             roidb['bbox_targets'][keep_inds, :], num_classes)
126 | 
127 |     return labels, overlaps, rois, bbox_targets, bbox_inside_weights
128 | 
129 | def _get_image_blob(roidb, scale_inds):
130 |     """Builds an input blob from the images in the roidb at the specified
131 |     scales.
132 |     """
133 |     num_images = len(roidb)
134 |     processed_ims = []
135 |     im_scales = []
136 |     for i in xrange(num_images):
137 |         im = cv2.imread(roidb[i]['image'])
138 |         if roidb[i]['flipped']:
139 |             im = im[:, ::-1, :]
140 |         target_size = cfg.TRAIN.SCALES[scale_inds[i]]
141 |         im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
142 |                                         cfg.TRAIN.MAX_SIZE)
143 |         im_scales.append(im_scale)
144 |         processed_ims.append(im)
145 | 
146 |     # Create a blob to hold the input images
147 |     blob = im_list_to_blob(processed_ims)
148 | 
149 |     return blob, im_scales
150 | 
151 | def _project_im_rois(im_rois, im_scale_factor):
152 |     """Project image RoIs into the rescaled training image."""
153 |     rois = im_rois * im_scale_factor
154 |     return rois
155 | 
156 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
157 |     """Bounding-box regression targets are stored in a compact form in the
158 |     roidb.
159 | 
160 |     This function expands those targets into the 4-of-4*K representation used
161 |     by the network (i.e. only one class has non-zero targets). The loss weights
162 |     are similarly expanded.
163 | 
164 |     Returns:
165 |         bbox_target_data (ndarray): N x 4K blob of regression targets
166 |         bbox_inside_weights (ndarray): N x 4K blob of loss weights
167 |     """
168 |     clss = bbox_target_data[:, 0]
169 |     bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
170 |     bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
171 |     inds = np.where(clss > 0)[0]
172 |     for ind in inds:
173 |         cls = clss[ind]
174 |         start = 4 * cls
175 |         end = start + 4
176 |         bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
177 |         bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
178 |     return bbox_targets, bbox_inside_weights
179 | 
180 | def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps):
181 |     """Visualize a mini-batch for debugging."""
182 |     import matplotlib.pyplot as plt
183 |     for i in xrange(rois_blob.shape[0]):
184 |         rois = rois_blob[i, :]
185 |         im_ind = rois[0]
186 |         roi = rois[1:]
187 |         im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy()
188 |         im += cfg.PIXEL_MEANS
189 |         im = im[:, :, (2, 1, 0)]
190 |         im = im.astype(np.uint8)
191 |         cls = labels_blob[i]
192 |         plt.imshow(im)
193 |         print 'class: ', cls, ' overlap: ', overlaps[i]
194 |         plt.gca().add_patch(
195 |             plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0],
196 |                           roi[3] - roi[1], fill=False,
197 |                           edgecolor='r', linewidth=3)
198 |             )
199 |         plt.show()
200 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
  9 | 
 10 | import numpy as np
 11 | from fast_rcnn.config import cfg
 12 | from fast_rcnn.bbox_transform import bbox_transform
 13 | from utils.cython_bbox import bbox_overlaps
 14 | import PIL
 15 | 
 16 | def prepare_roidb(imdb):
 17 |     """Enrich the imdb's roidb by adding some derived quantities that
 18 |     are useful for training. This function precomputes the maximum
 19 |     overlap, taken over ground-truth boxes, between each ROI and
 20 |     each ground-truth box. The class with maximum overlap is also
 21 |     recorded.
 22 |     """
 23 |     sizes = [PIL.Image.open(imdb.image_path_at(i)).size
 24 |              for i in xrange(imdb.num_images)]
 25 |     roidb = imdb.roidb
 26 |     for i in xrange(len(imdb.image_index)):
 27 |         roidb[i]['image'] = imdb.image_path_at(i)
 28 |         roidb[i]['width'] = sizes[i][0]
 29 |         roidb[i]['height'] = sizes[i][1]
 30 |         # need gt_overlaps as a dense array for argmax
 31 |         gt_overlaps = roidb[i]['gt_overlaps'].toarray()
 32 |         # max overlap with gt over classes (columns)
 33 |         max_overlaps = gt_overlaps.max(axis=1)
 34 |         # gt class that had the max overlap
 35 |         max_classes = gt_overlaps.argmax(axis=1)
 36 |         roidb[i]['max_classes'] = max_classes
 37 |         roidb[i]['max_overlaps'] = max_overlaps
 38 |         # sanity checks
 39 |         # max overlap of 0 => class should be zero (background)
 40 |         zero_inds = np.where(max_overlaps == 0)[0]
 41 |         assert all(max_classes[zero_inds] == 0)
 42 |         # max overlap > 0 => class should not be zero (must be a fg class)
 43 |         nonzero_inds = np.where(max_overlaps > 0)[0]
 44 |         assert all(max_classes[nonzero_inds] != 0)
 45 | 
 46 | def add_bbox_regression_targets(roidb):
 47 |     """Add information needed to train bounding-box regressors."""
 48 |     assert len(roidb) > 0
 49 |     assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?'
 50 | 
 51 |     num_images = len(roidb)
 52 |     # Infer number of classes from the number of columns in gt_overlaps
 53 |     num_classes = roidb[0]['gt_overlaps'].shape[1]
 54 |     for im_i in xrange(num_images):
 55 |         rois = roidb[im_i]['boxes']
 56 |         max_overlaps = roidb[im_i]['max_overlaps']
 57 |         max_classes = roidb[im_i]['max_classes']
 58 |         roidb[im_i]['bbox_targets'] = \
 59 |                 _compute_targets(rois, max_overlaps, max_classes)
 60 | 
 61 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
 62 |         # Use fixed / precomputed "means" and "stds" instead of empirical values
 63 |         means = np.tile(
 64 |                 np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1))
 65 |         stds = np.tile(
 66 |                 np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1))
 67 |     else:
 68 |         # Compute values needed for means and stds
 69 |         # var(x) = E(x^2) - E(x)^2
 70 |         class_counts = np.zeros((num_classes, 1)) + cfg.EPS
 71 |         sums = np.zeros((num_classes, 4))
 72 |         squared_sums = np.zeros((num_classes, 4))
 73 |         for im_i in xrange(num_images):
 74 |             targets = roidb[im_i]['bbox_targets']
 75 |             for cls in xrange(1, num_classes):
 76 |                 cls_inds = np.where(targets[:, 0] == cls)[0]
 77 |                 if cls_inds.size > 0:
 78 |                     class_counts[cls] += cls_inds.size
 79 |                     sums[cls, :] += targets[cls_inds, 1:].sum(axis=0)
 80 |                     squared_sums[cls, :] += \
 81 |                             (targets[cls_inds, 1:] ** 2).sum(axis=0)
 82 | 
 83 |         means = sums / class_counts
 84 |         stds = np.sqrt(squared_sums / class_counts - means ** 2)
 85 | 
 86 |     print 'bbox target means:'
 87 |     print means
 88 |     print means[1:, :].mean(axis=0) # ignore bg class
 89 |     print 'bbox target stdevs:'
 90 |     print stds
 91 |     print stds[1:, :].mean(axis=0) # ignore bg class
 92 | 
 93 |     # Normalize targets
 94 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS:
 95 |         print "Normalizing targets"
 96 |         for im_i in xrange(num_images):
 97 |             targets = roidb[im_i]['bbox_targets']
 98 |             for cls in xrange(1, num_classes):
 99 |                 cls_inds = np.where(targets[:, 0] == cls)[0]
100 |                 roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
101 |                 roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :]
102 |     else:
103 |         print "NOT normalizing targets"
104 | 
105 |     # These values will be needed for making predictions
106 |     # (the predicts will need to be unnormalized and uncentered)
107 |     return means.ravel(), stds.ravel()
108 | 
109 | def _compute_targets(rois, overlaps, labels):
110 |     """Compute bounding-box regression targets for an image."""
111 |     # Indices of ground-truth ROIs
112 |     gt_inds = np.where(overlaps == 1)[0]
113 |     if len(gt_inds) == 0:
114 |         # Bail if the image has no ground-truth ROIs
115 |         return np.zeros((rois.shape[0], 5), dtype=np.float32)
116 |     # Indices of examples for which we try to make predictions
117 |     ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]
118 | 
119 |     # Get IoU overlap between each ex ROI and gt ROI
120 |     ex_gt_overlaps = bbox_overlaps(
121 |         np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
122 |         np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))
123 | 
124 |     # Find which gt ROI each ex ROI has max overlap with:
125 |     # this will be the ex ROI's gt target
126 |     gt_assignment = ex_gt_overlaps.argmax(axis=1)
127 |     gt_rois = rois[gt_inds[gt_assignment], :]
128 |     ex_rois = rois[ex_inds, :]
129 | 
130 |     targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
131 |     targets[ex_inds, 0] = labels[ex_inds]
132 |     targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
133 |     return targets
134 | 


--------------------------------------------------------------------------------
/lib/rpn/README.md:
--------------------------------------------------------------------------------
 1 | ### `rpn` module overview
 2 | 
 3 | ##### `generate_anchors.py`
 4 | 
 5 | Generates a regular grid of multi-scale, multi-aspect anchor boxes.
 6 | 
 7 | ##### `proposal_layer.py`
 8 | 
 9 | Converts RPN outputs (per-anchor scores and bbox regression estimates) into object proposals.
10 | 
11 | ##### `anchor_target_layer.py` 
12 | 
13 | Generates training targets/labels for each anchor. Classification labels are 1 (object), 0 (not object) or -1 (ignore).
14 | Bbox regression targets are specified when the classification label is > 0.
15 | 
16 | ##### `proposal_target_layer.py`
17 | 
18 | Generates training targets/labels for each object proposal: classification labels 0 - K (bg or object class 1, ... , K)
19 | and bbox regression targets in that case that the label is > 0.
20 | 
21 | ##### `generate.py`
22 | 
23 | Generate object detection proposals from an imdb using an RPN.
24 | 


--------------------------------------------------------------------------------
/lib/rpn/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Sean Bell
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/rpn/anchor_target_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | import caffe
 10 | import yaml
 11 | from fast_rcnn.config import cfg
 12 | import numpy as np
 13 | import numpy.random as npr
 14 | from generate_anchors import generate_anchors
 15 | from utils.cython_bbox import bbox_overlaps
 16 | from fast_rcnn.bbox_transform import bbox_transform
 17 | 
 18 | DEBUG = False
 19 | 
 20 | class AnchorTargetLayer(caffe.Layer):
 21 |     """
 22 |     Assign anchors to ground-truth targets. Produces anchor classification
 23 |     labels and bounding-box regression targets.
 24 |     """
 25 | 
 26 |     def setup(self, bottom, top):
 27 |         layer_params = yaml.load(self.param_str_)
 28 |         anchor_scales = layer_params.get('scales', (8, 16, 32))
 29 |         self._anchors = generate_anchors(scales=np.array(anchor_scales))
 30 |         self._num_anchors = self._anchors.shape[0]
 31 |         self._feat_stride = layer_params['feat_stride']
 32 | 
 33 |         if DEBUG:
 34 |             print 'anchors:'
 35 |             print self._anchors
 36 |             print 'anchor shapes:'
 37 |             print np.hstack((
 38 |                 self._anchors[:, 2::4] - self._anchors[:, 0::4],
 39 |                 self._anchors[:, 3::4] - self._anchors[:, 1::4],
 40 |             ))
 41 |             self._counts = cfg.EPS
 42 |             self._sums = np.zeros((1, 4))
 43 |             self._squared_sums = np.zeros((1, 4))
 44 |             self._fg_sum = 0
 45 |             self._bg_sum = 0
 46 |             self._count = 0
 47 | 
 48 |         # allow boxes to sit over the edge by a small amount
 49 |         self._allowed_border = layer_params.get('allowed_border', 0)
 50 | 
 51 |         height, width = bottom[0].data.shape[-2:]
 52 |         if DEBUG:
 53 |             print 'AnchorTargetLayer: height', height, 'width', width
 54 | 
 55 |         A = self._num_anchors
 56 |         # labels
 57 |         top[0].reshape(1, 1, A * height, width)
 58 |         # bbox_targets
 59 |         top[1].reshape(1, A * 4, height, width)
 60 |         # bbox_inside_weights
 61 |         top[2].reshape(1, A * 4, height, width)
 62 |         # bbox_outside_weights
 63 |         top[3].reshape(1, A * 4, height, width)
 64 | 
 65 |     def forward(self, bottom, top):
 66 |         # Algorithm:
 67 |         #
 68 |         # for each (H, W) location i
 69 |         #   generate 9 anchor boxes centered on cell i
 70 |         #   apply predicted bbox deltas at cell i to each of the 9 anchors
 71 |         # filter out-of-image anchors
 72 |         # measure GT overlap
 73 | 
 74 |         assert bottom[0].data.shape[0] == 1, \
 75 |             'Only single item batches are supported'
 76 | 
 77 |         # map of shape (..., H, W)
 78 |         height, width = bottom[0].data.shape[-2:]
 79 |         # GT boxes (x1, y1, x2, y2, label)
 80 |         gt_boxes = bottom[1].data
 81 |         # im_info
 82 |         im_info = bottom[2].data[0, :]
 83 | 
 84 |         if DEBUG:
 85 |             print ''
 86 |             print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
 87 |             print 'scale: {}'.format(im_info[2])
 88 |             print 'height, width: ({}, {})'.format(height, width)
 89 |             print 'rpn: gt_boxes.shape', gt_boxes.shape
 90 |             print 'rpn: gt_boxes', gt_boxes
 91 | 
 92 |         # 1. Generate proposals from bbox deltas and shifted anchors
 93 |         shift_x = np.arange(0, width) * self._feat_stride
 94 |         shift_y = np.arange(0, height) * self._feat_stride
 95 |         shift_x, shift_y = np.meshgrid(shift_x, shift_y)
 96 |         shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
 97 |                             shift_x.ravel(), shift_y.ravel())).transpose()
 98 |         # add A anchors (1, A, 4) to
 99 |         # cell K shifts (K, 1, 4) to get
100 |         # shift anchors (K, A, 4)
101 |         # reshape to (K*A, 4) shifted anchors
102 |         A = self._num_anchors
103 |         K = shifts.shape[0]
104 |         all_anchors = (self._anchors.reshape((1, A, 4)) +
105 |                        shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
106 |         all_anchors = all_anchors.reshape((K * A, 4))
107 |         total_anchors = int(K * A)
108 | 
109 |         # only keep anchors inside the image
110 |         inds_inside = np.where(
111 |             (all_anchors[:, 0] >= -self._allowed_border) &
112 |             (all_anchors[:, 1] >= -self._allowed_border) &
113 |             (all_anchors[:, 2] < im_info[1] + self._allowed_border) &  # width
114 |             (all_anchors[:, 3] < im_info[0] + self._allowed_border)    # height
115 |         )[0]
116 | 
117 |         if DEBUG:
118 |             print 'total_anchors', total_anchors
119 |             print 'inds_inside', len(inds_inside)
120 | 
121 |         # keep only inside anchors
122 |         anchors = all_anchors[inds_inside, :]
123 |         if DEBUG:
124 |             print 'anchors.shape', anchors.shape
125 | 
126 |         # label: 1 is positive, 0 is negative, -1 is dont care
127 |         labels = np.empty((len(inds_inside), ), dtype=np.float32)
128 |         labels.fill(-1)
129 | 
130 |         # overlaps between the anchors and the gt boxes
131 |         # overlaps (ex, gt)
132 |         overlaps = bbox_overlaps(
133 |             np.ascontiguousarray(anchors, dtype=np.float),
134 |             np.ascontiguousarray(gt_boxes, dtype=np.float))
135 |         argmax_overlaps = overlaps.argmax(axis=1)
136 |         max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
137 |         gt_argmax_overlaps = overlaps.argmax(axis=0)
138 |         gt_max_overlaps = overlaps[gt_argmax_overlaps,
139 |                                    np.arange(overlaps.shape[1])]
140 |         gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
141 | 
142 |         if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
143 |             # assign bg labels first so that positive labels can clobber them
144 |             labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
145 | 
146 |         # fg label: for each gt, anchor with highest overlap
147 |         labels[gt_argmax_overlaps] = 1
148 | 
149 |         # fg label: above threshold IOU
150 |         labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
151 | 
152 |         if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
153 |             # assign bg labels last so that negative labels can clobber positives
154 |             labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
155 | 
156 |         # subsample positive labels if we have too many
157 |         num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
158 |         fg_inds = np.where(labels == 1)[0]
159 |         if len(fg_inds) > num_fg:
160 |             disable_inds = npr.choice(
161 |                 fg_inds, size=(len(fg_inds) - num_fg), replace=False)
162 |             labels[disable_inds] = -1
163 | 
164 |         # subsample negative labels if we have too many
165 |         num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
166 |         bg_inds = np.where(labels == 0)[0]
167 |         if len(bg_inds) > num_bg:
168 |             disable_inds = npr.choice(
169 |                 bg_inds, size=(len(bg_inds) - num_bg), replace=False)
170 |             labels[disable_inds] = -1
171 |             #print "was %s inds, disabling %s, now %s inds" % (
172 |                 #len(bg_inds), len(disable_inds), np.sum(labels == 0))
173 | 
174 |         bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
175 |         bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
176 | 
177 |         bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
178 |         bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)
179 | 
180 |         bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
181 |         if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
182 |             # uniform weighting of examples (given non-uniform sampling)
183 |             num_examples = np.sum(labels >= 0)
184 |             positive_weights = np.ones((1, 4)) * 1.0 / num_examples
185 |             negative_weights = np.ones((1, 4)) * 1.0 / num_examples
186 |         else:
187 |             assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
188 |                     (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
189 |             positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
190 |                                 np.sum(labels == 1))
191 |             negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
192 |                                 np.sum(labels == 0))
193 |         bbox_outside_weights[labels == 1, :] = positive_weights
194 |         bbox_outside_weights[labels == 0, :] = negative_weights
195 | 
196 |         if DEBUG:
197 |             self._sums += bbox_targets[labels == 1, :].sum(axis=0)
198 |             self._squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
199 |             self._counts += np.sum(labels == 1)
200 |             means = self._sums / self._counts
201 |             stds = np.sqrt(self._squared_sums / self._counts - means ** 2)
202 |             print 'means:'
203 |             print means
204 |             print 'stdevs:'
205 |             print stds
206 | 
207 |         # map up to original set of anchors
208 |         labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
209 |         bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
210 |         bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
211 |         bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)
212 | 
213 |         if DEBUG:
214 |             print 'rpn: max max_overlap', np.max(max_overlaps)
215 |             print 'rpn: num_positive', np.sum(labels == 1)
216 |             print 'rpn: num_negative', np.sum(labels == 0)
217 |             self._fg_sum += np.sum(labels == 1)
218 |             self._bg_sum += np.sum(labels == 0)
219 |             self._count += 1
220 |             print 'rpn: num_positive avg', self._fg_sum / self._count
221 |             print 'rpn: num_negative avg', self._bg_sum / self._count
222 | 
223 |         # labels
224 |         labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
225 |         labels = labels.reshape((1, 1, A * height, width))
226 |         top[0].reshape(*labels.shape)
227 |         top[0].data[...] = labels
228 | 
229 |         # bbox_targets
230 |         bbox_targets = bbox_targets \
231 |             .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
232 |         top[1].reshape(*bbox_targets.shape)
233 |         top[1].data[...] = bbox_targets
234 | 
235 |         # bbox_inside_weights
236 |         bbox_inside_weights = bbox_inside_weights \
237 |             .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
238 |         assert bbox_inside_weights.shape[2] == height
239 |         assert bbox_inside_weights.shape[3] == width
240 |         top[2].reshape(*bbox_inside_weights.shape)
241 |         top[2].data[...] = bbox_inside_weights
242 | 
243 |         # bbox_outside_weights
244 |         bbox_outside_weights = bbox_outside_weights \
245 |             .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
246 |         assert bbox_outside_weights.shape[2] == height
247 |         assert bbox_outside_weights.shape[3] == width
248 |         top[3].reshape(*bbox_outside_weights.shape)
249 |         top[3].data[...] = bbox_outside_weights
250 | 
251 |     def backward(self, top, propagate_down, bottom):
252 |         """This layer does not propagate gradients."""
253 |         pass
254 | 
255 |     def reshape(self, bottom, top):
256 |         """Reshaping happens during the call to forward."""
257 |         pass
258 | 
259 | 
260 | def _unmap(data, count, inds, fill=0):
261 |     """ Unmap a subset of item (data) back to the original set of items (of
262 |     size count) """
263 |     if len(data.shape) == 1:
264 |         ret = np.empty((count, ), dtype=np.float32)
265 |         ret.fill(fill)
266 |         ret[inds] = data
267 |     else:
268 |         ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
269 |         ret.fill(fill)
270 |         ret[inds, :] = data
271 |     return ret
272 | 
273 | 
274 | def _compute_targets(ex_rois, gt_rois):
275 |     """Compute bounding-box regression targets for an image."""
276 | 
277 |     assert ex_rois.shape[0] == gt_rois.shape[0]
278 |     assert ex_rois.shape[1] == 4
279 |     assert gt_rois.shape[1] == 5
280 | 
281 |     return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
282 | 


--------------------------------------------------------------------------------
/lib/rpn/generate.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | from fast_rcnn.config import cfg
  9 | from utils.blob import im_list_to_blob
 10 | from utils.timer import Timer
 11 | import numpy as np
 12 | import cv2
 13 | 
 14 | def _vis_proposals(im, dets, thresh=0.5):
 15 |     """Draw detected bounding boxes."""
 16 |     inds = np.where(dets[:, -1] >= thresh)[0]
 17 |     if len(inds) == 0:
 18 |         return
 19 | 
 20 |     class_name = 'obj'
 21 |     im = im[:, :, (2, 1, 0)]
 22 |     fig, ax = plt.subplots(figsize=(12, 12))
 23 |     ax.imshow(im, aspect='equal')
 24 |     for i in inds:
 25 |         bbox = dets[i, :4]
 26 |         score = dets[i, -1]
 27 | 
 28 |         ax.add_patch(
 29 |             plt.Rectangle((bbox[0], bbox[1]),
 30 |                           bbox[2] - bbox[0],
 31 |                           bbox[3] - bbox[1], fill=False,
 32 |                           edgecolor='red', linewidth=3.5)
 33 |             )
 34 |         ax.text(bbox[0], bbox[1] - 2,
 35 |                 '{:s} {:.3f}'.format(class_name, score),
 36 |                 bbox=dict(facecolor='blue', alpha=0.5),
 37 |                 fontsize=14, color='white')
 38 | 
 39 |     ax.set_title(('{} detections with '
 40 |                   'p({} | box) >= {:.1f}').format(class_name, class_name,
 41 |                                                   thresh),
 42 |                   fontsize=14)
 43 |     plt.axis('off')
 44 |     plt.tight_layout()
 45 |     plt.draw()
 46 | 
 47 | def _get_image_blob(im):
 48 |     """Converts an image into a network input.
 49 | 
 50 |     Arguments:
 51 |         im (ndarray): a color image in BGR order
 52 | 
 53 |     Returns:
 54 |         blob (ndarray): a data blob holding an image pyramid
 55 |         im_scale_factors (list): list of image scales (relative to im) used
 56 |             in the image pyramid
 57 |     """
 58 |     im_orig = im.astype(np.float32, copy=True)
 59 |     im_orig -= cfg.PIXEL_MEANS
 60 | 
 61 |     im_shape = im_orig.shape
 62 |     im_size_min = np.min(im_shape[0:2])
 63 |     im_size_max = np.max(im_shape[0:2])
 64 | 
 65 |     processed_ims = []
 66 | 
 67 |     assert len(cfg.TEST.SCALES) == 1
 68 |     target_size = cfg.TEST.SCALES[0]
 69 | 
 70 |     im_scale = float(target_size) / float(im_size_min)
 71 |     # Prevent the biggest axis from being more than MAX_SIZE
 72 |     if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
 73 |         im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
 74 |     im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
 75 |                     interpolation=cv2.INTER_LINEAR)
 76 |     im_info = np.hstack((im.shape[:2], im_scale))[np.newaxis, :]
 77 |     processed_ims.append(im)
 78 | 
 79 |     # Create a blob to hold the input images
 80 |     blob = im_list_to_blob(processed_ims)
 81 | 
 82 |     return blob, im_info
 83 | 
 84 | def im_proposals(net, im):
 85 |     """Generate RPN proposals on a single image."""
 86 |     blobs = {}
 87 |     blobs['data'], blobs['im_info'] = _get_image_blob(im)
 88 |     net.blobs['data'].reshape(*(blobs['data'].shape))
 89 |     net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
 90 |     blobs_out = net.forward(
 91 |             data=blobs['data'].astype(np.float32, copy=False),
 92 |             im_info=blobs['im_info'].astype(np.float32, copy=False))
 93 | 
 94 |     scale = blobs['im_info'][0, 2]
 95 |     boxes = blobs_out['rois'][:, 1:].copy() / scale
 96 |     scores = blobs_out['scores'].copy()
 97 |     return boxes, scores
 98 | 
 99 | def imdb_proposals(net, imdb):
100 |     """Generate RPN proposals on all images in an imdb."""
101 | 
102 |     _t = Timer()
103 |     imdb_boxes = [[] for _ in xrange(imdb.num_images)]
104 |     for i in xrange(imdb.num_images):
105 |         im = cv2.imread(imdb.image_path_at(i))
106 |         _t.tic()
107 |         imdb_boxes[i], scores = im_proposals(net, im)
108 |         _t.toc()
109 |         print 'im_proposals: {:d}/{:d} {:.3f}s' \
110 |               .format(i + 1, imdb.num_images, _t.average_time)
111 |         if 0:
112 |             dets = np.hstack((imdb_boxes[i], scores))
113 |             # from IPython import embed; embed()
114 |             _vis_proposals(im, dets[:3, :], thresh=0.9)
115 |             plt.show()
116 | 
117 |     return imdb_boxes
118 | 


--------------------------------------------------------------------------------
/lib/rpn/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | 
 10 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 11 | #
 12 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 13 | #    >> anchors
 14 | #
 15 | #    anchors =
 16 | #
 17 | #       -83   -39   100    56
 18 | #      -175   -87   192   104
 19 | #      -359  -183   376   200
 20 | #       -55   -55    72    72
 21 | #      -119  -119   136   136
 22 | #      -247  -247   264   264
 23 | #       -35   -79    52    96
 24 | #       -79  -167    96   184
 25 | #      -167  -343   184   360
 26 | 
 27 | #array([[ -83.,  -39.,  100.,   56.],
 28 | #       [-175.,  -87.,  192.,  104.],
 29 | #       [-359., -183.,  376.,  200.],
 30 | #       [ -55.,  -55.,   72.,   72.],
 31 | #       [-119., -119.,  136.,  136.],
 32 | #       [-247., -247.,  264.,  264.],
 33 | #       [ -35.,  -79.,   52.,   96.],
 34 | #       [ -79., -167.,   96.,  184.],
 35 | #       [-167., -343.,  184.,  360.]])
 36 | 
 37 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 38 |                      scales=2**np.arange(3, 6)):
 39 |     """
 40 |     Generate anchor (reference) windows by enumerating aspect ratios X
 41 |     scales wrt a reference (0, 0, 15, 15) window.
 42 |     """
 43 | 
 44 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 45 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 46 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 47 |                          for i in xrange(ratio_anchors.shape[0])])
 48 |     return anchors
 49 | 
 50 | def _whctrs(anchor):
 51 |     """
 52 |     Return width, height, x center, and y center for an anchor (window).
 53 |     """
 54 | 
 55 |     w = anchor[2] - anchor[0] + 1
 56 |     h = anchor[3] - anchor[1] + 1
 57 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 58 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 59 |     return w, h, x_ctr, y_ctr
 60 | 
 61 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 62 |     """
 63 |     Given a vector of widths (ws) and heights (hs) around a center
 64 |     (x_ctr, y_ctr), output a set of anchors (windows).
 65 |     """
 66 | 
 67 |     ws = ws[:, np.newaxis]
 68 |     hs = hs[:, np.newaxis]
 69 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 70 |                          y_ctr - 0.5 * (hs - 1),
 71 |                          x_ctr + 0.5 * (ws - 1),
 72 |                          y_ctr + 0.5 * (hs - 1)))
 73 |     return anchors
 74 | 
 75 | def _ratio_enum(anchor, ratios):
 76 |     """
 77 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
 78 |     """
 79 | 
 80 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 81 |     size = w * h
 82 |     size_ratios = size / ratios
 83 |     ws = np.round(np.sqrt(size_ratios))
 84 |     hs = np.round(ws * ratios)
 85 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 86 |     return anchors
 87 | 
 88 | def _scale_enum(anchor, scales):
 89 |     """
 90 |     Enumerate a set of anchors for each scale wrt an anchor.
 91 |     """
 92 | 
 93 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 94 |     ws = w * scales
 95 |     hs = h * scales
 96 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 97 |     return anchors
 98 | 
 99 | if __name__ == '__main__':
100 |     import time
101 |     t = time.time()
102 |     a = generate_anchors()
103 |     print time.time() - t
104 |     print a
105 |     from IPython import embed; embed()
106 | 


--------------------------------------------------------------------------------
/lib/rpn/proposal_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import caffe
  9 | import numpy as np
 10 | import yaml
 11 | from fast_rcnn.config import cfg
 12 | from generate_anchors import generate_anchors
 13 | from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes
 14 | from fast_rcnn.nms_wrapper import nms
 15 | 
 16 | DEBUG = False
 17 | 
 18 | class ProposalLayer(caffe.Layer):
 19 |     """
 20 |     Outputs object detection proposals by applying estimated bounding-box
 21 |     transformations to a set of regular boxes (called "anchors").
 22 |     """
 23 | 
 24 |     def setup(self, bottom, top):
 25 |         # parse the layer parameter string, which must be valid YAML
 26 |         layer_params = yaml.load(self.param_str_)
 27 | 
 28 |         self._feat_stride = layer_params['feat_stride']
 29 |         anchor_scales = layer_params.get('scales', (8, 16, 32))
 30 |         self._anchors = generate_anchors(scales=np.array(anchor_scales))
 31 |         self._num_anchors = self._anchors.shape[0]
 32 | 
 33 |         if DEBUG:
 34 |             print 'feat_stride: {}'.format(self._feat_stride)
 35 |             print 'anchors:'
 36 |             print self._anchors
 37 | 
 38 |         # rois blob: holds R regions of interest, each is a 5-tuple
 39 |         # (n, x1, y1, x2, y2) specifying an image batch index n and a
 40 |         # rectangle (x1, y1, x2, y2)
 41 |         top[0].reshape(1, 5)
 42 | 
 43 |         # scores blob: holds scores for R regions of interest
 44 |         if len(top) > 1:
 45 |             top[1].reshape(1, 1, 1, 1)
 46 | 
 47 |     def forward(self, bottom, top):
 48 |         # Algorithm:
 49 |         #
 50 |         # for each (H, W) location i
 51 |         #   generate A anchor boxes centered on cell i
 52 |         #   apply predicted bbox deltas at cell i to each of the A anchors
 53 |         # clip predicted boxes to image
 54 |         # remove predicted boxes with either height or width < threshold
 55 |         # sort all (proposal, score) pairs by score from highest to lowest
 56 |         # take top pre_nms_topN proposals before NMS
 57 |         # apply NMS with threshold 0.7 to remaining proposals
 58 |         # take after_nms_topN proposals after NMS
 59 |         # return the top proposals (-> RoIs top, scores top)
 60 | 
 61 |         assert bottom[0].data.shape[0] == 1, \
 62 |             'Only single item batches are supported'
 63 | 
 64 |         cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
 65 |         pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
 66 |         post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
 67 |         nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
 68 |         min_size      = cfg[cfg_key].RPN_MIN_SIZE
 69 | 
 70 |         # the first set of _num_anchors channels are bg probs
 71 |         # the second set are the fg probs, which we want
 72 |         scores = bottom[0].data[:, self._num_anchors:, :, :]
 73 |         bbox_deltas = bottom[1].data
 74 |         im_info = bottom[2].data[0, :]
 75 | 
 76 |         if DEBUG:
 77 |             print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
 78 |             print 'scale: {}'.format(im_info[2])
 79 | 
 80 |         # 1. Generate proposals from bbox deltas and shifted anchors
 81 |         height, width = scores.shape[-2:]
 82 | 
 83 |         if DEBUG:
 84 |             print 'score map size: {}'.format(scores.shape)
 85 | 
 86 |         # Enumerate all shifts
 87 |         shift_x = np.arange(0, width) * self._feat_stride
 88 |         shift_y = np.arange(0, height) * self._feat_stride
 89 |         shift_x, shift_y = np.meshgrid(shift_x, shift_y)
 90 |         shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
 91 |                             shift_x.ravel(), shift_y.ravel())).transpose()
 92 | 
 93 |         # Enumerate all shifted anchors:
 94 |         #
 95 |         # add A anchors (1, A, 4) to
 96 |         # cell K shifts (K, 1, 4) to get
 97 |         # shift anchors (K, A, 4)
 98 |         # reshape to (K*A, 4) shifted anchors
 99 |         A = self._num_anchors
100 |         K = shifts.shape[0]
101 |         anchors = self._anchors.reshape((1, A, 4)) + \
102 |                   shifts.reshape((1, K, 4)).transpose((1, 0, 2))
103 |         anchors = anchors.reshape((K * A, 4))
104 | 
105 |         # Transpose and reshape predicted bbox transformations to get them
106 |         # into the same order as the anchors:
107 |         #
108 |         # bbox deltas will be (1, 4 * A, H, W) format
109 |         # transpose to (1, H, W, 4 * A)
110 |         # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
111 |         # in slowest to fastest order
112 |         bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
113 | 
114 |         # Same story for the scores:
115 |         #
116 |         # scores are (1, A, H, W) format
117 |         # transpose to (1, H, W, A)
118 |         # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
119 |         scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
120 | 
121 |         # Convert anchors into proposals via bbox transformations
122 |         proposals = bbox_transform_inv(anchors, bbox_deltas)
123 | 
124 |         # 2. clip predicted boxes to image
125 |         proposals = clip_boxes(proposals, im_info[:2])
126 | 
127 |         # 3. remove predicted boxes with either height or width < threshold
128 |         # (NOTE: convert min_size to input image scale stored in im_info[2])
129 |         keep = _filter_boxes(proposals, min_size * im_info[2])
130 |         proposals = proposals[keep, :]
131 |         scores = scores[keep]
132 | 
133 |         # 4. sort all (proposal, score) pairs by score from highest to lowest
134 |         # 5. take top pre_nms_topN (e.g. 6000)
135 |         order = scores.ravel().argsort()[::-1]
136 |         if pre_nms_topN > 0:
137 |             order = order[:pre_nms_topN]
138 |         proposals = proposals[order, :]
139 |         scores = scores[order]
140 | 
141 |         # 6. apply nms (e.g. threshold = 0.7)
142 |         # 7. take after_nms_topN (e.g. 300)
143 |         # 8. return the top proposals (-> RoIs top)
144 |         keep = nms(np.hstack((proposals, scores)), nms_thresh)
145 |         if post_nms_topN > 0:
146 |             keep = keep[:post_nms_topN]
147 |         proposals = proposals[keep, :]
148 |         scores = scores[keep]
149 | 
150 |         # Output rois blob
151 |         # Our RPN implementation only supports a single input image, so all
152 |         # batch inds are 0
153 |         batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
154 |         blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
155 |         top[0].reshape(*(blob.shape))
156 |         top[0].data[...] = blob
157 | 
158 |         # [Optional] output scores blob
159 |         if len(top) > 1:
160 |             top[1].reshape(*(scores.shape))
161 |             top[1].data[...] = scores
162 | 
163 |     def backward(self, top, propagate_down, bottom):
164 |         """This layer does not propagate gradients."""
165 |         pass
166 | 
167 |     def reshape(self, bottom, top):
168 |         """Reshaping happens during the call to forward."""
169 |         pass
170 | 
171 | def _filter_boxes(boxes, min_size):
172 |     """Remove all boxes with any side smaller than min_size."""
173 |     ws = boxes[:, 2] - boxes[:, 0] + 1
174 |     hs = boxes[:, 3] - boxes[:, 1] + 1
175 |     keep = np.where((ws >= min_size) & (hs >= min_size))[0]
176 |     return keep
177 | 


--------------------------------------------------------------------------------
/lib/rpn/proposal_target_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import caffe
  9 | import yaml
 10 | import numpy as np
 11 | import numpy.random as npr
 12 | from fast_rcnn.config import cfg
 13 | from fast_rcnn.bbox_transform import bbox_transform
 14 | from utils.cython_bbox import bbox_overlaps
 15 | 
 16 | DEBUG = False
 17 | 
 18 | class ProposalTargetLayer(caffe.Layer):
 19 |     """
 20 |     Assign object detection proposals to ground-truth targets. Produces proposal
 21 |     classification labels and bounding-box regression targets.
 22 |     """
 23 | 
 24 |     def setup(self, bottom, top):
 25 |         layer_params = yaml.load(self.param_str_)
 26 |         self._num_classes = layer_params['num_classes']
 27 | 
 28 |         # sampled rois (0, x1, y1, x2, y2)
 29 |         top[0].reshape(1, 5)
 30 |         # labels
 31 |         top[1].reshape(1, 1)
 32 |         # bbox_targets
 33 |         top[2].reshape(1, self._num_classes * 4)
 34 |         # bbox_inside_weights
 35 |         top[3].reshape(1, self._num_classes * 4)
 36 |         # bbox_outside_weights
 37 |         top[4].reshape(1, self._num_classes * 4)
 38 | 
 39 |     def forward(self, bottom, top):
 40 |         # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
 41 |         # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
 42 |         all_rois = bottom[0].data
 43 |         # GT boxes (x1, y1, x2, y2, label)
 44 |         # TODO(rbg): it's annoying that sometimes I have extra info before
 45 |         # and other times after box coordinates -- normalize to one format
 46 |         gt_boxes = bottom[1].data
 47 | 
 48 |         # Include ground-truth boxes in the set of candidate rois
 49 |         zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
 50 |         all_rois = np.vstack(
 51 |             (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
 52 |         )
 53 | 
 54 |         # Sanity check: single batch only
 55 |         assert np.all(all_rois[:, 0] == 0), \
 56 |                 'Only single item batches are supported'
 57 | 
 58 |         num_images = 1
 59 |         rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
 60 |         fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
 61 | 
 62 |         # Sample rois with classification labels and bounding box regression
 63 |         # targets
 64 |         labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
 65 |             all_rois, gt_boxes, fg_rois_per_image,
 66 |             rois_per_image, self._num_classes)
 67 | 
 68 |         if DEBUG:
 69 |             print 'num fg: {}'.format((labels > 0).sum())
 70 |             print 'num bg: {}'.format((labels == 0).sum())
 71 |             self._count += 1
 72 |             self._fg_num += (labels > 0).sum()
 73 |             self._bg_num += (labels == 0).sum()
 74 |             print 'num fg avg: {}'.format(self._fg_num / self._count)
 75 |             print 'num bg avg: {}'.format(self._bg_num / self._count)
 76 |             print 'ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num))
 77 | 
 78 |         # sampled rois
 79 |         top[0].reshape(*rois.shape)
 80 |         top[0].data[...] = rois
 81 | 
 82 |         # classification labels
 83 |         top[1].reshape(*labels.shape)
 84 |         top[1].data[...] = labels
 85 | 
 86 |         # bbox_targets
 87 |         top[2].reshape(*bbox_targets.shape)
 88 |         top[2].data[...] = bbox_targets
 89 | 
 90 |         # bbox_inside_weights
 91 |         top[3].reshape(*bbox_inside_weights.shape)
 92 |         top[3].data[...] = bbox_inside_weights
 93 | 
 94 |         # bbox_outside_weights
 95 |         top[4].reshape(*bbox_inside_weights.shape)
 96 |         top[4].data[...] = np.array(bbox_inside_weights > 0).astype(np.float32)
 97 | 
 98 |     def backward(self, top, propagate_down, bottom):
 99 |         """This layer does not propagate gradients."""
100 |         pass
101 | 
102 |     def reshape(self, bottom, top):
103 |         """Reshaping happens during the call to forward."""
104 |         pass
105 | 
106 | 
107 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
108 |     """Bounding-box regression targets (bbox_target_data) are stored in a
109 |     compact form N x (class, tx, ty, tw, th)
110 | 
111 |     This function expands those targets into the 4-of-4*K representation used
112 |     by the network (i.e. only one class has non-zero targets).
113 | 
114 |     Returns:
115 |         bbox_target (ndarray): N x 4K blob of regression targets
116 |         bbox_inside_weights (ndarray): N x 4K blob of loss weights
117 |     """
118 | 
119 |     clss = bbox_target_data[:, 0]
120 |     bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
121 |     bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
122 |     inds = np.where(clss > 0)[0]
123 |     for ind in inds:
124 |         cls = clss[ind]
125 |         start = 4 * cls
126 |         end = start + 4
127 |         bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
128 |         bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
129 |     return bbox_targets, bbox_inside_weights
130 | 
131 | 
132 | def _compute_targets(ex_rois, gt_rois, labels):
133 |     """Compute bounding-box regression targets for an image."""
134 | 
135 |     assert ex_rois.shape[0] == gt_rois.shape[0]
136 |     assert ex_rois.shape[1] == 4
137 |     assert gt_rois.shape[1] == 4
138 | 
139 |     targets = bbox_transform(ex_rois, gt_rois)
140 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
141 |         # Optionally normalize targets by a precomputed mean and stdev
142 |         targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
143 |                 / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
144 |     return np.hstack(
145 |             (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
146 | 
147 | def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
148 |     """Generate a random sample of RoIs comprising foreground and background
149 |     examples.
150 |     """
151 |     # overlaps: (rois x gt_boxes)
152 |     overlaps = bbox_overlaps(
153 |         np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
154 |         np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
155 |     gt_assignment = overlaps.argmax(axis=1)
156 |     max_overlaps = overlaps.max(axis=1)
157 |     labels = gt_boxes[gt_assignment, 4]
158 | 
159 |     # Select foreground RoIs as those with >= FG_THRESH overlap
160 |     fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
161 |     # Guard against the case when an image has fewer than fg_rois_per_image
162 |     # foreground RoIs
163 |     fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
164 |     # Sample foreground regions without replacement
165 |     if fg_inds.size > 0:
166 |         fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
167 | 
168 |     # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
169 |     bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
170 |                        (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
171 |     # Compute number of background RoIs to take from this image (guarding
172 |     # against there being fewer than desired)
173 |     bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
174 |     bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
175 |     # Sample background regions without replacement
176 |     if bg_inds.size > 0:
177 |         bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
178 | 
179 |     # The indices that we're selecting (both fg and bg)
180 |     keep_inds = np.append(fg_inds, bg_inds)
181 |     # Select sampled values from various arrays:
182 |     labels = labels[keep_inds]
183 |     # Clamp labels for the background RoIs to 0
184 |     labels[fg_rois_per_this_image:] = 0
185 |     rois = all_rois[keep_inds]
186 | 
187 |     bbox_target_data = _compute_targets(
188 |         rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
189 | 
190 |     bbox_targets, bbox_inside_weights = \
191 |         _get_bbox_regression_labels(bbox_target_data, num_classes)
192 | 
193 |     return labels, rois, bbox_targets, bbox_inside_weights
194 | 


--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | import os
 10 | from os.path import join as pjoin
 11 | #from distutils.core import setup
 12 | from setuptools import setup
 13 | from distutils.extension import Extension
 14 | from Cython.Distutils import build_ext
 15 | import subprocess
 16 | 
 17 | #change for windows, by MrX
 18 | nvcc_bin = 'nvcc.exe'
 19 | lib_dir = 'lib/x64'
 20 | 
 21 | def find_in_path(name, path):
 22 |     "Find a file in a search path"
 23 |     # Adapted fom
 24 |     # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 25 |     for dir in path.split(os.pathsep):
 26 |         binpath = pjoin(dir, name)
 27 |         if os.path.exists(binpath):
 28 |             return os.path.abspath(binpath)
 29 |     return None
 30 | 
 31 | 
 32 | def locate_cuda():
 33 |     """Locate the CUDA environment on the system
 34 | 
 35 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 36 |     and values giving the absolute path to each directory.
 37 | 
 38 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 39 |     is based on finding 'nvcc' in the PATH.
 40 |     """
 41 | 
 42 |     # first check if the CUDAHOME env variable is in use
 43 |     if 'CUDA_PATH' in os.environ:
 44 |         home = os.environ['CUDA_PATH']
 45 |         print("home = %s\n" % home)
 46 |         nvcc = pjoin(home, 'bin', nvcc_bin)
 47 |     else:
 48 |         # otherwise, search the PATH for NVCC
 49 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 50 |         nvcc = find_in_path(nvcc_bin, os.environ['PATH'] + os.pathsep + default_path)
 51 |         if nvcc is None:
 52 |             raise EnvironmentError('The nvcc binary could not be '
 53 |                 'located in your $PATH. Either add it to your path, or set $CUDA_PATH')
 54 |         home = os.path.dirname(os.path.dirname(nvcc))
 55 |         print("home = %s, nvcc = %s\n" % (home, nvcc))
 56 | 
 57 | 
 58 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 59 |                   'include': pjoin(home, 'include'),
 60 |                   'lib64': pjoin(home, lib_dir)}
 61 |     for k, v in cudaconfig.iteritems():
 62 |         if not os.path.exists(v):
 63 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 64 | 
 65 |     return cudaconfig
 66 | CUDA = locate_cuda()
 67 | 
 68 | 
 69 | # Obtain the numpy include directory.  This logic works across numpy versions.
 70 | try:
 71 |     numpy_include = np.get_include()
 72 | except AttributeError:
 73 |     numpy_include = np.get_numpy_include()
 74 | 
 75 | 
 76 | def customize_compiler_for_nvcc(self):
 77 |     """inject deep into distutils to customize how the dispatch
 78 |     to gcc/nvcc works.
 79 | 
 80 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 81 |     injected in, and still have the right customizations (i.e.
 82 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 83 |     the OO route, I have this. Note, it's kindof like a wierd functional
 84 |     subclassing going on."""
 85 | 
 86 |     # tell the compiler it can processes .cu
 87 |     #self.src_extensions.append('.cu')
 88 | 
 89 | 	
 90 |     # save references to the default compiler_so and _comple methods
 91 |     #default_compiler_so = self.spawn 
 92 |     #default_compiler_so = self.rc
 93 |     super = self.compile
 94 | 
 95 |     # now redefine the _compile method. This gets executed for each
 96 |     # object but distutils doesn't have the ability to change compilers
 97 |     # based on source extension: we add it.
 98 |     def compile(sources, output_dir=None, macros=None, include_dirs=None, debug=0, extra_preargs=None, extra_postargs=None, depends=None):
 99 |         postfix=os.path.splitext(sources[0])[1]
100 |         
101 |         if postfix == '.cu':
102 |             # use the cuda for .cu files
103 |             #self.set_executable('compiler_so', CUDA['nvcc'])
104 |             # use only a subset of the extra_postargs, which are 1-1 translated
105 |             # from the extra_compile_args in the Extension class
106 |             postargs = extra_postargs['nvcc']
107 |         else:
108 |             postargs = extra_postargs['gcc']
109 | 
110 | 
111 |         return super(sources, output_dir, macros, include_dirs, debug, extra_preargs, postargs, depends)
112 |         # reset the default compiler_so, which we might have changed for cuda
113 |         #self.rc = default_compiler_so
114 | 
115 |     # inject our redefined _compile method into the class
116 |     self.compile = compile
117 | 
118 | 
119 | # run the customize_compiler
120 | class custom_build_ext(build_ext):
121 |     def build_extensions(self):
122 |         customize_compiler_for_nvcc(self.compiler)
123 |         build_ext.build_extensions(self)
124 | 
125 | 
126 | ext_modules = [
127 |     # unix _compile: obj, src, ext, cc_args, extra_postargs, pp_opts
128 |     Extension(
129 |         "utils.cython_bbox",
130 |         sources=["utils\\bbox.pyx"],
131 |         #define_macros={'/LD'},
132 |         #extra_compile_args={'gcc': ['/link', '/DLL', '/OUT:cython_bbox.dll']},
133 |         #extra_compile_args={'gcc': ['/LD']},
134 |         extra_compile_args={'gcc': []},
135 |         include_dirs = [numpy_include]
136 |     ),
137 |     Extension(
138 |         "nms.cpu_nms",
139 |         sources=["nms\\cpu_nms.pyx"],
140 |         extra_compile_args={'gcc': []},
141 |         include_dirs = [numpy_include],
142 |     ),
143 |     Extension(
144 |         "pycocotools._mask",
145 |         sources=['pycocotools\\maskApi.c', 'pycocotools\\_mask.pyx'],
146 |         include_dirs = [numpy_include, 'pycocotools'],
147 |         extra_compile_args={
148 |             'gcc': ['/Qstd=c99']},
149 |     ),
150 |     #Extension(   # just used to get nms\gpu_nms.obj
151 |     #    "nms.gpu_nms",
152 |     #    sources=['nms\\gpu_nms.pyx'],
153 |     #    language='c++',
154 |     #    extra_compile_args={'gcc': []},
155 |     #    include_dirs = [numpy_include]
156 |     #),
157 | ]
158 | 
159 | setup(
160 |     name='fast_rcnn',
161 |     ext_modules=ext_modules,
162 |     # inject our custom trigger
163 |     cmdclass={'build_ext': custom_build_ext},
164 | )
165 | 


--------------------------------------------------------------------------------
/lib/setup_cuda.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import numpy as np
  4 | import os
  5 | # on Windows, we need the original PATH without Anaconda's compiler in it:
  6 | PATH = os.environ.get('PATH')
  7 | from distutils.spawn import spawn, find_executable
  8 | from setuptools import setup, find_packages, Extension
  9 | from setuptools.command.build_ext import build_ext
 10 | import sys
 11 | 
 12 | # CUDA specific config
 13 | # nvcc is assumed to be in user's PATH
 14 | nvcc_compile_args = ['-O', '--ptxas-options=-v', '-arch=sm_35', '-c', '--compiler-options=-fPIC']
 15 | nvcc_compile_args = os.environ.get('NVCCFLAGS', '').split() + nvcc_compile_args
 16 | cuda_libs = ['cublas']
 17 | 
 18 | 
 19 | # Obtain the numpy include directory.  This logic works across numpy versions.
 20 | try:
 21 |     numpy_include = np.get_include()
 22 | except AttributeError:
 23 |     numpy_include = np.get_numpy_include()
 24 | 
 25 | 
 26 | cudamat_ext = Extension('nms.gpu_nms',
 27 |                         sources=[
 28 |                                 'nms\\gpu_nms.cu'
 29 |                                 ],
 30 |                         language='c++',
 31 |                         libraries=cuda_libs,
 32 |                         extra_compile_args=nvcc_compile_args,
 33 |                         include_dirs = [numpy_include, 'C:\\Programming\\CUDA\\v7.5\\include'])
 34 | 
 35 | 
 36 | class CUDA_build_ext(build_ext):
 37 |     """
 38 |     Custom build_ext command that compiles CUDA files.
 39 |     Note that all extension source files will be processed with this compiler.
 40 |     """
 41 |     def build_extensions(self):
 42 |         self.compiler.src_extensions.append('.cu')
 43 |         self.compiler.set_executable('compiler_so', 'nvcc')
 44 |         self.compiler.set_executable('linker_so', 'nvcc --shared')
 45 |         if hasattr(self.compiler, '_c_extensions'):
 46 |             self.compiler._c_extensions.append('.cu')  # needed for Windows
 47 |         self.compiler.spawn = self.spawn
 48 |         build_ext.build_extensions(self)
 49 | 
 50 |     def spawn(self, cmd, search_path=1, verbose=0, dry_run=0):
 51 |         """
 52 |         Perform any CUDA specific customizations before actually launching
 53 |         compile/link etc. commands.
 54 |         """
 55 |         if (sys.platform == 'darwin' and len(cmd) >= 2 and cmd[0] == 'nvcc' and
 56 |                 cmd[1] == '--shared' and cmd.count('-arch') > 0):
 57 |             # Versions of distutils on OSX earlier than 2.7.9 inject
 58 |             # '-arch x86_64' which we need to strip while using nvcc for
 59 |             # linking
 60 |             while True:
 61 |                 try:
 62 |                     index = cmd.index('-arch')
 63 |                     del cmd[index:index+2]
 64 |                 except ValueError:
 65 |                     break
 66 |         elif self.compiler.compiler_type == 'msvc':
 67 |             # There are several things we need to do to change the commands
 68 |             # issued by MSVCCompiler into one that works with nvcc. In the end,
 69 |             # it might have been easier to write our own CCompiler class for
 70 |             # nvcc, as we're only interested in creating a shared library to
 71 |             # load with ctypes, not in creating an importable Python extension.
 72 |             # - First, we replace the cl.exe or link.exe call with an nvcc
 73 |             #   call. In case we're running Anaconda, we search cl.exe in the
 74 |             #   original search path we captured further above -- Anaconda
 75 |             #   inserts a MSVC version into PATH that is too old for nvcc.
 76 |             cmd[:1] = ['nvcc', '--compiler-bindir',
 77 |                        os.path.dirname(find_executable("cl.exe", PATH))
 78 |                        or cmd[0]]
 79 |             # - Secondly, we fix a bunch of command line arguments.
 80 |             for idx, c in enumerate(cmd):
 81 |                 # create .dll instead of .pyd files
 82 |                 #if '.pyd' in c: cmd[idx] = c = c.replace('.pyd', '.dll')  #20160601, by MrX
 83 |                 # replace /c by -c
 84 |                 if c == '/c': cmd[idx] = '-c'
 85 |                 # replace /DLL by --shared
 86 |                 elif c == '/DLL': cmd[idx] = '--shared'
 87 |                 # remove --compiler-options=-fPIC
 88 |                 elif '-fPIC' in c: del cmd[idx]
 89 |                 # replace /Tc... by ...
 90 |                 elif c.startswith('/Tc'): cmd[idx] = c[3:]
 91 |                 # replace /Fo... by -o ...
 92 |                 elif c.startswith('/Fo'): cmd[idx:idx+1] = ['-o', c[3:]]
 93 |                 # replace /LIBPATH:... by -L...
 94 |                 elif c.startswith('/LIBPATH:'): cmd[idx] = '-L' + c[9:]
 95 |                 # replace /OUT:... by -o ...
 96 |                 elif c.startswith('/OUT:'): cmd[idx:idx+1] = ['-o', c[5:]]
 97 |                 # remove /EXPORT:initlibcudamat or /EXPORT:initlibcudalearn
 98 |                 elif c.startswith('/EXPORT:'): del cmd[idx]
 99 |                 # replace cublas.lib by -lcublas
100 |                 elif c == 'cublas.lib': cmd[idx] = '-lcublas'
101 |             # - Finally, we pass on all arguments starting with a '/' to the
102 |             #   compiler or linker, and have nvcc handle all other arguments
103 |             if '--shared' in cmd:
104 |                 pass_on = '--linker-options='
105 |                 # we only need MSVCRT for a .dll, remove CMT if it sneaks in:
106 |                 cmd.append('/NODEFAULTLIB:libcmt.lib')
107 |             else:
108 |                 pass_on = '--compiler-options='
109 |             cmd = ([c for c in cmd if c[0] != '/'] +
110 |                    [pass_on + ','.join(c for c in cmd if c[0] == '/')])
111 |             # For the future: Apart from the wrongly set PATH by Anaconda, it
112 |             # would suffice to run the following for compilation on Windows:
113 |             # nvcc -c -O -o <file>.obj <file>.cu
114 |             # And the following for linking:
115 |             # nvcc --shared -o <file>.dll <file1>.obj <file2>.obj -lcublas
116 |             # This could be done by a NVCCCompiler class for all platforms.
117 |         spawn(cmd, search_path, verbose, dry_run)
118 | 
119 | setup(name="py_fast_rcnn_gpu",
120 |       description="Performs linear algebra computation on the GPU via CUDA",
121 |       ext_modules=[cudamat_ext],
122 |       cmdclass={'build_ext': CUDA_build_ext},
123 | )
124 | 


--------------------------------------------------------------------------------
/lib/transform/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrGF/py-faster-rcnn-windows/12e11924217e79fd7124d05a18baa49b9908340d/lib/transform/__init__.py


--------------------------------------------------------------------------------
/lib/transform/torch_image_transform_layer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # --------------------------------------------------------
 5 | 
 6 | """ Transform images for compatibility with models trained with
 7 | https://github.com/facebook/fb.resnet.torch.
 8 | 
 9 | Usage in model prototxt:
10 | 
11 | layer {
12 |   name: 'data_xform'
13 |   type: 'Python'
14 |   bottom: 'data_caffe'
15 |   top: 'data'
16 |   python_param {
17 |     module: 'transform.torch_image_transform_layer'
18 |     layer: 'TorchImageTransformLayer'
19 |   }
20 | }
21 | """
22 | 
23 | import caffe
24 | from fast_rcnn.config import cfg
25 | import numpy as np
26 | 
27 | class TorchImageTransformLayer(caffe.Layer):
28 |     def setup(self, bottom, top):
29 |         # (1, 3, 1, 1) shaped arrays
30 |         self.PIXEL_MEANS = \
31 |             np.array([[[[0.48462227599918]],
32 |                        [[0.45624044862054]],
33 |                        [[0.40588363755159]]]])
34 |         self.PIXEL_STDS = \
35 |             np.array([[[[0.22889466674951]],
36 |                        [[0.22446679341259]],
37 |                        [[0.22495548344775]]]])
38 |         # The default ("old") pixel means that were already subtracted
39 |         channel_swap = (0, 3, 1, 2)
40 |         self.OLD_PIXEL_MEANS = \
41 |             cfg.PIXEL_MEANS[np.newaxis, :, :, :].transpose(channel_swap)
42 | 
43 |         top[0].reshape(*(bottom[0].shape))
44 | 
45 |     def forward(self, bottom, top):
46 |         ims = bottom[0].data
47 |         # Invert the channel means that were already subtracted
48 |         ims += self.OLD_PIXEL_MEANS
49 |         # 1. Permute BGR to RGB and normalize to [0, 1]
50 |         ims = ims[:, [2, 1, 0], :, :] / 255.0
51 |         # 2. Remove channel means
52 |         ims -= self.PIXEL_MEANS
53 |         # 3. Standardize channels
54 |         ims /= self.PIXEL_STDS
55 |         top[0].reshape(*(ims.shape))
56 |         top[0].data[...] = ims
57 | 
58 |     def backward(self, top, propagate_down, bottom):
59 |         """This layer does not propagate gradients."""
60 |         pass
61 | 
62 |     def reshape(self, bottom, top):
63 |         """Reshaping happens during the call to forward."""
64 |         pass
65 | 


--------------------------------------------------------------------------------
/lib/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.so
3 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/utils/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 


--------------------------------------------------------------------------------
/lib/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | 
10 | import numpy as np
11 | import cv2
12 | 
13 | def im_list_to_blob(ims):
14 |     """Convert a list of images into a network input.
15 | 
16 |     Assumes images are already prepared (means subtracted, BGR order, ...).
17 |     """
18 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
19 |     num_images = len(ims)
20 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
21 |                     dtype=np.float32)
22 |     for i in xrange(num_images):
23 |         im = ims[i]
24 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
25 |     # Move channels (axis 3) to axis 1
26 |     # Axis order will become: (batch elem, channel, height, width)
27 |     channel_swap = (0, 3, 1, 2)
28 |     blob = blob.transpose(channel_swap)
29 |     return blob
30 | 
31 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
32 |     """Mean subtract and scale an image for use in a blob."""
33 |     im = im.astype(np.float32, copy=False)
34 |     im -= pixel_means
35 |     im_shape = im.shape
36 |     im_size_min = np.min(im_shape[0:2])
37 |     im_size_max = np.max(im_shape[0:2])
38 |     im_scale = float(target_size) / float(im_size_min)
39 |     # Prevent the biggest axis from being more than MAX_SIZE
40 |     if np.round(im_scale * im_size_max) > max_size:
41 |         im_scale = float(max_size) / float(im_size_max)
42 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
43 |                     interpolation=cv2.INTER_LINEAR)
44 | 
45 |     return im, im_scale
46 | 


--------------------------------------------------------------------------------
/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------