├── .gitattributes
├── LICENSE
├── README.md
├── demo
    ├── Agent+Carter+Season+2+gun+rifle+scenes_85.png
    ├── Division+The+gun+rifle+scenes_2.png
    ├── Downfall+movie+gun+rifle+scenes_67.png
    ├── Salvador+movie+gun+rifle+scenes_40.png
    └── north+korea+army_38.png
├── fast_rcnn
    ├── config.py
    └── test.py
├── faster_rcnn_test.pt
├── ftMap_Warp_2.py
├── ftmap_transform.py
├── images
    ├── flow_diagram_web.jpg
    └── more_results_web.jpg
├── makebboxproposals.py
└── tools
    └── demo_firearms.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 makhtar17004
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Orientation Aware Object Detection with Applications to Firearms
 2 | 
 3 | Automatic detection of firearms is important for enhancing security and safety of people, however, it is a challenging task owing to the wide variations in shape, size and appearance of firearms. Viewing angle variations and occlusions by the weapon’s carrier and the surrounding people, further increases the difficulty of the task. Moreover, the existing object detectors process rectangular areas, though a thin and long rifle may actually cover only a small percentage of that area and the rest may contain irrelevant details suppressing the required object signatures. To handle these challenges we propose an Orientation Aware Object Detector (OAOD) which has achieved improved firearm detection and localization performance.
 4 | 
 5 | ![alt text](https://github.com/makhtar17004/orientation-aware-firearm-detection/blob/master/images/flow_diagram_web.jpg)
 6 | 
 7 | 
 8 | 
 9 | # Instructions
10 | 
11 | This code is modified using Faster RCNN. We made the two phases in Faster RCNN by adopting cascade approach. Please see the setup details of Faster RCNN [here](https://github.com/rbgirshick/py-faster-rcnn). This will assist in runnig our model.
12 | 
13 | We provide necessaery files to run the test script only using our model. Download our model from this [link](https://drive.google.com/file/d/1ShZoCTfoBga9j0y-GPINOFgdf1x8Ti9t/view?usp=sharing). Put it into
14 | 
15 | ```.../data/faster_rcnn_models directory```
16 | 
17 | Replace the cfg, test in fast_rcnn folder. Also replace the prototxt file for test with the provided one. Also put images in 
18 | 
19 | ```.../data/demo folder```
20 | 
21 | 
22 | After installation and setup, to run the test file. Place it into .../tools directory:
23 | 
24 | ```python demo_firearms.py```
25 | 
26 | 
27 | 
28 | 
29 | # Results:
30 | 
31 | ![alt text](https://github.com/makhtar17004/orientation-aware-firearm-detection/blob/master/images/more_results_web.jpg)
32 | 
33 | 
34 | # Paper and Model Link
35 | 
36 | Here is the arXiv link: https://arxiv.org/abs/1904.10032
37 | 
38 | Here is the web-link: http://im.itu.edu.pk/orientation-aware-firearms-detection/
39 | 
40 | Trained model: [link](https://drive.google.com/file/d/1ShZoCTfoBga9j0y-GPINOFgdf1x8Ti9t/view?usp=sharing)
41 | 
42 | # DATASET
43 | 
44 | [DATASET is available upon request [Google Form]](https://forms.gle/t3dS5g5JQdfPoSvn9)
45 | 
46 | 
47 | 
48 | BIBTEX:
49 | 
50 | ```
51 | @article{oaod2021neuro,
52 |   title={Leveraging orientation for weakly supervised object detection with application to firearm localization},
53 |   author={Iqbal, Javed and Munir, Muhammad Akhtar and Mahmood, Arif and Ali, Afsheen Rafaqat and Ali, Mohsen},
54 |   journal={Neurocomputing},
55 |   volume={440},
56 |   pages={310--320},
57 |   year={2021},
58 |   publisher={Elsevier}
59 | }
60 | ```
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/demo/Agent+Carter+Season+2+gun+rifle+scenes_85.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akhtarvision/orientation-aware-firearm-detection/2ebf283bc4baf993feb31f6fcb01cdd5bf2bce9e/demo/Agent+Carter+Season+2+gun+rifle+scenes_85.png


--------------------------------------------------------------------------------
/demo/Division+The+gun+rifle+scenes_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akhtarvision/orientation-aware-firearm-detection/2ebf283bc4baf993feb31f6fcb01cdd5bf2bce9e/demo/Division+The+gun+rifle+scenes_2.png


--------------------------------------------------------------------------------
/demo/Downfall+movie+gun+rifle+scenes_67.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akhtarvision/orientation-aware-firearm-detection/2ebf283bc4baf993feb31f6fcb01cdd5bf2bce9e/demo/Downfall+movie+gun+rifle+scenes_67.png


--------------------------------------------------------------------------------
/demo/Salvador+movie+gun+rifle+scenes_40.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akhtarvision/orientation-aware-firearm-detection/2ebf283bc4baf993feb31f6fcb01cdd5bf2bce9e/demo/Salvador+movie+gun+rifle+scenes_40.png


--------------------------------------------------------------------------------
/demo/north+korea+army_38.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akhtarvision/orientation-aware-firearm-detection/2ebf283bc4baf993feb31f6fcb01cdd5bf2bce9e/demo/north+korea+army_38.png


--------------------------------------------------------------------------------
/fast_rcnn/config.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Fast R-CNN config system.
  9 | 
 10 | This file specifies default config options for Fast R-CNN. You should not
 11 | change values in this file. Instead, you should write a config file (in yaml)
 12 | and use cfg_from_file(yaml_file) to load it and override the default options.
 13 | 
 14 | Most tools in $ROOT/tools take a --cfg option to specify an override file.
 15 |     - See tools/{train,test}_net.py for example code that uses cfg_from_file()
 16 |     - See experiments/cfgs/*.yml for example YAML config override files
 17 | """
 18 | 
 19 | import os
 20 | import os.path as osp
 21 | import numpy as np
 22 | # `pip install easydict` if you don't have it
 23 | from easydict import EasyDict as edict
 24 | 
 25 | __C = edict()
 26 | # Consumers can get config by:
 27 | #   from fast_rcnn_config import cfg
 28 | cfg = __C
 29 | 
 30 | #
 31 | # Training options
 32 | #
 33 | 
 34 | __C.TRAIN = edict()
 35 | 
 36 | # Scales to use during training (can list multiple scales)
 37 | # Each scale is the pixel size of an image's shortest side
 38 | #__C.TRAIN.SCALES = (600,)
 39 | __C.TRAIN.SCALES = (480,)
 40 | 
 41 | # Max pixel size of the longest side of a scaled input image
 42 | #__C.TRAIN.MAX_SIZE = 1000
 43 | __C.TRAIN.MAX_SIZE = 800
 44 | 
 45 | # Images to use per minibatch
 46 | __C.TRAIN.IMS_PER_BATCH = 1#2
 47 | 
 48 | # Minibatch size (number of regions of interest [ROIs])
 49 | __C.TRAIN.BATCH_SIZE = 48#128
 50 | 
 51 | # Fraction of minibatch that is labeled foreground (i.e. class > 0)
 52 | __C.TRAIN.FG_FRACTION = 0.25
 53 | 
 54 | # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 55 | __C.TRAIN.FG_THRESH = 0.5
 56 | 
 57 | # Overlap threshold for a ROI to be considered background (class = 0 if
 58 | # overlap in [LO, HI))
 59 | __C.TRAIN.BG_THRESH_HI = 0.5
 60 | __C.TRAIN.BG_THRESH_LO = 0.1
 61 | 
 62 | # Use horizontally-flipped images during training?
 63 | __C.TRAIN.USE_FLIPPED = True
 64 | 
 65 | # Train bounding-box regressors
 66 | __C.TRAIN.BBOX_REG = True
 67 | 
 68 | # Overlap required between a ROI and ground-truth box in order for that ROI to
 69 | # be used as a bounding-box regression training example
 70 | __C.TRAIN.BBOX_THRESH = 0.5
 71 | 
 72 | # Iterations between snapshots
 73 | #__C.TRAIN.SNAPSHOT_ITERS = 10000
 74 | __C.TRAIN.SNAPSHOT_ITERS = 20000
 75 | 
 76 | # solver.prototxt specifies the snapshot path prefix, this adds an optional
 77 | # infix to yield the path: <prefix>[_<infix>]_iters_XYZ.caffemodel
 78 | __C.TRAIN.SNAPSHOT_INFIX = ''
 79 | 
 80 | # Use a prefetch thread in roi_data_layer.layer
 81 | # So far I haven't found this useful; likely more engineering work is required
 82 | __C.TRAIN.USE_PREFETCH = False
 83 | 
 84 | # Normalize the targets (subtract empirical mean, divide by empirical stddev)
 85 | #__C.TRAIN.BBOX_NORMALIZE_TARGETS = True
 86 | __C.TRAIN.BBOX_NORMALIZE_TARGETS = False
 87 | # Deprecated (inside weights)
 88 | __C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
 89 | # Normalize the targets using "precomputed" (or made up) means and stdevs
 90 | # (BBOX_NORMALIZE_TARGETS must also be True)
 91 | __C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = False
 92 | __C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
 93 | __C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
 94 | 
 95 | # Train using these proposals
 96 | __C.TRAIN.PROPOSAL_METHOD = 'selective_search'
 97 | 
 98 | # Make minibatches from images that have similar aspect ratios (i.e. both
 99 | # tall and thin or both short and wide) in order to avoid wasting computation
100 | # on zero-padding.
101 | __C.TRAIN.ASPECT_GROUPING = True
102 | 
103 | # Use RPN to detect objects
104 | __C.TRAIN.HAS_RPN = False
105 | # IOU >= thresh: positive example
106 | __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
107 | # IOU < thresh: negative example
108 | __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
109 | # If an anchor statisfied by positive and negative conditions set to negative
110 | __C.TRAIN.RPN_CLOBBER_POSITIVES = False
111 | # Max number of foreground examples
112 | __C.TRAIN.RPN_FG_FRACTION = 0.5
113 | # Total number of examples
114 | __C.TRAIN.RPN_BATCHSIZE = 256
115 | # NMS threshold used on RPN proposals
116 | __C.TRAIN.RPN_NMS_THRESH = 0.7
117 | # Number of top scoring boxes to keep before apply NMS to RPN proposals
118 | __C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
119 | # Number of top scoring boxes to keep after applying NMS to RPN proposals
120 | __C.TRAIN.RPN_POST_NMS_TOP_N = 2000
121 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
122 | __C.TRAIN.RPN_MIN_SIZE = 16
123 | # Deprecated (outside weights)
124 | __C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
125 | # Give the positive RPN examples weight of p * 1 / {num positives}
126 | # and give negatives a weight of (1 - p)
127 | # Set to -1.0 to use uniform example weighting
128 | __C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0
129 | 
130 | 
131 | #
132 | # Testing options
133 | #
134 | 
135 | __C.TEST = edict()
136 | 
137 | # Scales to use during testing (can list multiple scales)
138 | # Each scale is the pixel size of an image's shortest side
139 | #__C.TEST.SCALES = (600,)
140 | __C.TEST.SCALES = (480,)
141 | 
142 | # Max pixel size of the longest side of a scaled input image
143 | __C.TEST.MAX_SIZE = 800#1000
144 | #__C.TEST.MAX_SIZE = 1000
145 | 
146 | # Overlap threshold used for non-maximum suppression (suppress boxes with
147 | # IoU >= this threshold)
148 | __C.TEST.NMS = 0.3 #0.3
149 | 
150 | # Experimental: treat the (K+1) units in the cls_score layer as linear
151 | # predictors (trained, eg, with one-vs-rest SVMs).
152 | __C.TEST.SVM = False
153 | 
154 | # Test using bounding-box regressors
155 | __C.TEST.BBOX_REG = True
156 | 
157 | # Propose boxes
158 | __C.TEST.HAS_RPN = False
159 | 
160 | # Test using these proposals
161 | __C.TEST.PROPOSAL_METHOD = 'selective_search'
162 | 
163 | ## NMS threshold used on RPN proposals
164 | __C.TEST.RPN_NMS_THRESH = 0.7 #0.7
165 | ## Number of top scoring boxes to keep before apply NMS to RPN proposals
166 | __C.TEST.RPN_PRE_NMS_TOP_N = 6000
167 | ## Number of top scoring boxes to keep after applying NMS to RPN proposals
168 | __C.TEST.RPN_POST_NMS_TOP_N = 200
169 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
170 | __C.TEST.RPN_MIN_SIZE = 16
171 | 
172 | 
173 | #
174 | # MISC
175 | #
176 | 
177 | # The mapping from image coordinates to feature map coordinates might cause
178 | # some boxes that are distinct in image space to become identical in feature
179 | # coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor
180 | # for identifying duplicate boxes.
181 | # 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16
182 | __C.DEDUP_BOXES = 1./16.
183 | 
184 | # Pixel mean values (BGR order) as a (1, 1, 3) array
185 | # We use the same pixel mean for all networks even though it's not exactly what
186 | # they were trained with
187 | __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
188 | 
189 | # For reproducibility
190 | __C.RNG_SEED = 3
191 | 
192 | # A small number that's used many times
193 | __C.EPS = 1e-14
194 | 
195 | # Root directory of project
196 | __C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
197 | 
198 | # Data directory
199 | __C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
200 | 
201 | # Model directory
202 | __C.MODELS_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'models', 'pascal_voc'))
203 | 
204 | # Name (or path to) the matlab executable
205 | __C.MATLAB = 'matlab'
206 | 
207 | # Place outputs under an experiments directory
208 | __C.EXP_DIR = 'default'
209 | 
210 | # Use GPU implementation of non-maximum suppression
211 | __C.USE_GPU_NMS = False
212 | 
213 | # Default GPU device id
214 | __C.GPU_ID = 0
215 | 
216 | 
217 | def get_output_dir(imdb, net=None):
218 |     """Return the directory where experimental artifacts are placed.
219 |     If the directory does not exist, it is created.
220 | 
221 |     A canonical path is built using the name from an imdb and a network
222 |     (if not None).
223 |     """
224 |     outdir = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name))
225 |     if net is not None:
226 |         outdir = osp.join(outdir, net.name)
227 |     if not os.path.exists(outdir):
228 |         os.makedirs(outdir)
229 |     return outdir
230 | 
231 | def _merge_a_into_b(a, b):
232 |     """Merge config dictionary a into config dictionary b, clobbering the
233 |     options in b whenever they are also specified in a.
234 |     """
235 |     if type(a) is not edict:
236 |         return
237 | 
238 |     for k, v in a.iteritems():
239 |         # a must specify keys that are in b
240 |         if not b.has_key(k):
241 |             raise KeyError('{} is not a valid config key'.format(k))
242 | 
243 |         # the types must match, too
244 |         old_type = type(b[k])
245 |         if old_type is not type(v):
246 |             if isinstance(b[k], np.ndarray):
247 |                 v = np.array(v, dtype=b[k].dtype)
248 |             else:
249 |                 raise ValueError(('Type mismatch ({} vs. {}) '
250 |                                 'for config key: {}').format(type(b[k]),
251 |                                                             type(v), k))
252 | 
253 |         # recursively merge dicts
254 |         if type(v) is edict:
255 |             try:
256 |                 _merge_a_into_b(a[k], b[k])
257 |             except:
258 |                 print('Error under config key: {}'.format(k))
259 |                 raise
260 |         else:
261 |             b[k] = v
262 | 
263 | def cfg_from_file(filename):
264 |     """Load a config file and merge it into the default options."""
265 |     import yaml
266 |     with open(filename, 'r') as f:
267 |         yaml_cfg = edict(yaml.load(f))
268 | 
269 |     _merge_a_into_b(yaml_cfg, __C)
270 | 
271 | def cfg_from_list(cfg_list):
272 |     """Set config keys via list (e.g., from command line)."""
273 |     from ast import literal_eval
274 |     assert len(cfg_list) % 2 == 0
275 |     for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
276 |         key_list = k.split('.')
277 |         d = __C
278 |         for subkey in key_list[:-1]:
279 |             assert d.has_key(subkey)
280 |             d = d[subkey]
281 |         subkey = key_list[-1]
282 |         assert d.has_key(subkey)
283 |         try:
284 |             value = literal_eval(v)
285 |         except:
286 |             # handle the case when v is a string literal
287 |             value = v
288 |         assert type(value) == type(d[subkey]), \
289 |             'type {} does not match original type {}'.format(
290 |             type(value), type(d[subkey]))
291 |         d[subkey] = value
292 | 


--------------------------------------------------------------------------------
/fast_rcnn/test.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | # Modified by CVML group @ITU- Punjab
  9 | 
 10 | """Test a Fast R-CNN network on an imdb (image database)."""
 11 | 
 12 | from fast_rcnn.config import cfg, get_output_dir
 13 | from fast_rcnn.bbox_transform import clip_boxes, bbox_transform_inv
 14 | #from fast_rcnn.ftmap_transform import transformer_layer as trans_layer
 15 | import argparse
 16 | from utils.timer import Timer
 17 | import numpy as np
 18 | import cv2
 19 | from numpy.linalg import inv
 20 | import caffe
 21 | from fast_rcnn.nms_wrapper import nms
 22 | import cPickle
 23 | from utils.blob import im_list_to_blob
 24 | import os
 25 | import matplotlib.pyplot as plt
 26 | import xml.etree.ElementTree as ET
 27 | import gc
 28 | 
 29 | #from nms.py_cpu_nms_rotated import py_cpu_nms
 30 | 
 31 | def _get_image_blob(im):
 32 |     """Converts an image into a network input.
 33 | 
 34 |     Arguments:
 35 |         im (ndarray): a color image in BGR order
 36 | 
 37 |     Returns:
 38 |         blob (ndarray): a data blob holding an image pyramid
 39 |         im_scale_factors (list): list of image scales (relative to im) used
 40 |             in the image pyramid
 41 |     """
 42 |     im_orig = im.astype(np.float32, copy=True)
 43 |     im_orig -= cfg.PIXEL_MEANS
 44 | 
 45 |     im_shape = im_orig.shape
 46 |     im_size_min = np.min(im_shape[0:2])
 47 |     im_size_max = np.max(im_shape[0:2])
 48 | 
 49 |     processed_ims = []
 50 |     im_scale_factors = []
 51 | 
 52 |     for target_size in cfg.TEST.SCALES:
 53 |         im_scale = float(target_size) / float(im_size_min)
 54 |         # Prevent the biggest axis from being more than MAX_SIZE
 55 |         if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
 56 |             im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
 57 |         im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
 58 |                         interpolation=cv2.INTER_LINEAR)
 59 |         im_scale_factors.append(im_scale)
 60 |         processed_ims.append(im)
 61 | 
 62 |     # Create a blob to hold the input images
 63 |     blob = im_list_to_blob(processed_ims)
 64 | 
 65 |     return blob, np.array(im_scale_factors)
 66 | 
 67 | def _get_rois_blob(im_rois, im_scale_factors):
 68 |     """Converts RoIs into network inputs.
 69 | 
 70 |     Arguments:
 71 |         im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
 72 |         im_scale_factors (list): scale factors as returned by _get_image_blob
 73 | 
 74 |     Returns:
 75 |         blob (ndarray): R x 5 matrix of RoIs in the image pyramid
 76 |     """
 77 |     rois, levels = _project_im_rois(im_rois, im_scale_factors)
 78 |     rois_blob = np.hstack((levels, rois))
 79 |     return rois_blob.astype(np.float32, copy=False)
 80 | 
 81 | def _project_im_rois(im_rois, scales):
 82 |     """Project image RoIs into the image pyramid built by _get_image_blob.
 83 | 
 84 |     Arguments:
 85 |         im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
 86 |         scales (list): scale factors as returned by _get_image_blob
 87 | 
 88 |     Returns:
 89 |         rois (ndarray): R x 4 matrix of projected RoI coordinates
 90 |         levels (list): image pyramid levels used by each projected RoI
 91 |     """
 92 |     im_rois = im_rois.astype(np.float, copy=False)
 93 | 
 94 |     if len(scales) > 1:
 95 |         widths = im_rois[:, 2] - im_rois[:, 0] + 1
 96 |         heights = im_rois[:, 3] - im_rois[:, 1] + 1
 97 | 
 98 |         areas = widths * heights
 99 |         scaled_areas = areas[:, np.newaxis] * (scales[np.newaxis, :] ** 2)
100 |         diff_areas = np.abs(scaled_areas - 224 * 224)
101 |         levels = diff_areas.argmin(axis=1)[:, np.newaxis]
102 |     else:
103 |         levels = np.zeros((im_rois.shape[0], 1), dtype=np.int)
104 | 
105 |     rois = im_rois * scales[levels]
106 | 
107 |     return rois, levels
108 | 
109 | def _get_blobs(im, rois):
110 |     """Convert an image and RoIs within that image into network inputs."""
111 |     blobs = {'data' : None, 'rois' : None}
112 |     blobs['data'], im_scale_factors = _get_image_blob(im)
113 |     if not cfg.TEST.HAS_RPN:
114 |         blobs['rois'] = _get_rois_blob(rois, im_scale_factors)
115 | 	#print ('lll: ', blobs['rois'])
116 |     return blobs, im_scale_factors
117 | 
118 | def im_detect(net, im, boxes=None, extract_feat=False):
119 |     """Detect object classes in an image given object proposals.
120 | 
121 |     Arguments:
122 |         net (caffe.Net): Fast R-CNN network to use
123 |         im (ndarray): color image to test (in BGR order)
124 |         boxes (ndarray): R x 4 array of object proposals or None (for RPN)
125 | 
126 |     Returns:
127 |         scores (ndarray): R x K array of object class scores (K includes
128 |             background as object category 0)
129 |         boxes (ndarray): R x (4*K) array of predicted bounding boxes
130 |     """
131 |     blobs, im_scales = _get_blobs(im, boxes)
132 |     #print 'blobs: ', blobs
133 | 
134 |     # When mapping from image ROIs to feature map ROIs, there's some aliasing
135 |     # (some distinct image ROIs get mapped to the same feature ROI).
136 |     # Here, we identify duplicate feature ROIs, so we only compute features
137 |     # on the unique subset.
138 |     if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
139 |         v = np.array([1, 1e3, 1e6, 1e9, 1e12])
140 |         hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
141 |         _, index, inv_index = np.unique(hashes, return_index=True,
142 |                                         return_inverse=True)
143 |         blobs['rois'] = blobs['rois'][index, :]
144 |         boxes = boxes[index, :]
145 | 	#print ('lll: ', not cfg.TEST.HAS_RPN)
146 | 
147 |     if cfg.TEST.HAS_RPN:
148 |         im_blob = blobs['data']
149 |         permanent_shape = im_blob.shape
150 | 	#print ('lll: ', permanent_shape)
151 |         blobs['im_info'] = np.array(
152 |             [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
153 |             dtype=np.float32)
154 |         print blobs['im_info']
155 | 
156 | 
157 |     # reshape network inputs
158 | 
159 |     net.blobs['data'].reshape(*(blobs['data'].shape))
160 | 
161 |     if cfg.TEST.HAS_RPN:
162 |         net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
163 |     else:
164 |         net.blobs['rois'].reshape(*(blobs['rois'].shape))
165 | 
166 |     # do forward
167 |     forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
168 |     if cfg.TEST.HAS_RPN:
169 |         forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False)
170 |     else:
171 |         forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
172 | 
173 |         
174 |     #print('going to net forward')
175 |     blobs_out = net.forward(**forward_kwargs)
176 |     #print('blobs[rois]  : ', blobs_out)
177 | 
178 |     #print ('start_ind: ', list(net._layer_names))
179 |     li = list(net._layer_names).index('roi_pool5')
180 |     tops = [(net._blob_names[bi], net.blobs[net._blob_names[bi]].data.shape) for bi in list(net._top_ids(li))]
181 |     #for bi in range(len(list(net._layer_names))):
182 | 	#print ('hello: ', net._blob_names[bi] ,net.blobs[net._blob_names[bi]].data.shape)
183 |     '''print li
184 |     print list(net._top_ids(li)), net._blob_names[28]
185 |     print net.blobs['rois'].data.shape
186 |     for ip in range (35):
187 | 	print ip, net._blob_names[ip]'''
188 | 
189 |     if cfg.TEST.HAS_RPN:
190 |         assert len(im_scales) == 1, "Only single-image batch implemented"
191 |         rois = net.blobs['rois'].data.copy()
192 | 	
193 | 	#print('rois :', rois[0,:])
194 | 
195 | 	# unscale back to raw image space
196 |         rpn_boxes = rois[:, 1:5] / im_scales[0]
197 | 	#print ('shape: ', rpn_boxes.shape)
198 | 	rpn_scores = net.blobs['scores'].data.copy()
199 | 
200 | 	pred_scores = net.blobs['cls_prob'].data.copy()
201 | 	box_deltas = net.blobs['bbox_pred'].data.copy()
202 | 	#box_deltas = blobs_out['bbox_pred1']
203 |         pred_boxes = bbox_transform_inv(rpn_boxes, box_deltas)
204 | 	#pred_boxes = np.hstack((rpn_boxes, rpn_boxes, rpn_boxes))
205 |         pred_boxes = clip_boxes(pred_boxes, im.shape)
206 | 
207 | 	#print('comp :', rpn_boxes[:50,:], pred_boxes[:50,:])
208 | 	orient_prob = net.blobs['orient_prob'].data.copy()
209 | 
210 | 	warpedrois = net.blobs['warpedrois'].data.copy()
211 | 	transApplied = net.blobs['transApplied'].data.copy()
212 | 	rpn_boxes1 = warpedrois[:, 1:5] / im_scales[0]
213 | 
214 |         # use softmax estimated probabilities
215 |         pred_scores1 = blobs_out['cls_prob1']
216 | 	#print 'im_detect'
217 | 	#print (pred_scores.shape)
218 |         # Apply bounding-box regression deltas by accessing blob with name bbox_pred
219 |         box_deltas1 = blobs_out['bbox_pred1']
220 |         pred_boxes1 = bbox_transform_inv(rpn_boxes1, box_deltas1)
221 | 	#pred_boxes1 = np.hstack((rpn_boxes1, rpn_boxes1, rpn_boxes1))
222 |         #pred_boxes1 = clip_boxes(pred_boxes1, im.shape)
223 | 
224 |         #orient_prob = blobs_out['orient_prob']
225 | 	#orient_prob = np.zeros((len(pred_scores),4), dtype='float')
226 |         
227 | 	# unscale back to raw image space
228 |         '''rpn_boxes = rois[:, 1:5] / im_scales[0]
229 | 	#print ('shape: ', rpn_boxes[45:60,:])
230 | 	rpn_scores = net.blobs['scores'].data.copy()
231 | 
232 |         # use softmax estimated probabilities
233 |         pred_scores = blobs_out['cls_prob']
234 | 	#print 'im_detect'
235 | 	#print (pred_scores.shape)
236 |         # Apply bounding-box regression deltas by accessing blob with name bbox_pred
237 |         box_deltas = blobs_out['bbox_pred']
238 |         pred_boxes = bbox_transform_inv(rpn_boxes, box_deltas)
239 |         pred_boxes = clip_boxes(pred_boxes, im.shape)
240 | 
241 |         orient_prob = blobs_out['orient_prob']
242 | 	#orient_prob = np.zeros((len(pred_scores),4), dtype='float')
243 | 
244 |         if extract_feat == True:
245 |         	conv_feat = net.blobs['conv5_3'].data.copy()
246 |                 #print conv_feat.shape
247 | 		return rpn_boxes, rpn_scores, pred_boxes, pred_scores, orient_prob, conv_feat, permanent_shape'''
248 | 
249 | 
250 | 
251 |     return rpn_boxes, rpn_scores, pred_boxes, pred_scores, orient_prob, pred_boxes1, pred_scores1, transApplied
252 | 
253 | 
254 | def im_detect_new(net, im,  perm_shape, blobs, ross,im_scales, boxes=None, extract_feat=False):
255 |     """Detect object classes in an image given object proposals.
256 | 
257 |     Arguments:
258 |         net (caffe.Net): Fast R-CNN network to use
259 |         im (ndarray): color image to test (in BGR order)
260 |         boxes (ndarray): R x 4 array of object proposals or None (for RPN)
261 | 
262 |     Returns:
263 |         scores (ndarray): R x K array of object class scores (K includes
264 |             background as object category 0)
265 |         boxes (ndarray): R x (4*K) array of predicted bounding boxes
266 |     """
267 | 
268 |     #blobs, im_scales = _get_blobs(im, boxes)
269 |     # When mapping from image ROIs to feature map ROIs, there's some aliasing
270 |     # (some distinct image ROIs get mapped to the same feature ROI).
271 |     # Here, we identify duplicate feature ROIs, so we only compute features
272 |     # on the unique subset.
273 |     if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
274 |         v = np.array([1, 1e3, 1e6, 1e9, 1e12])
275 |         hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
276 |         _, index, inv_index = np.unique(hashes, return_index=True,
277 |                                         return_inverse=True)
278 |         blobs['rois'] = blobs['rois'][index, :]
279 |         boxes = boxes[index, :]
280 | 
281 |     if cfg.TEST.HAS_RPN:
282 |         im_blob = blobs['data']
283 |         #print 'change me'
284 |         #print im_blob.shape
285 |         blobs['im_info'] = np.array(
286 |             [[perm_shape[2], perm_shape[3], im_scales[0]]],
287 |             dtype=np.float32)
288 | 
289 | 	blobs['rois'] = np.array(ross)
290 | 
291 | 
292 |     # reshape network inputs
293 |     net.blobs['data'].reshape(*(blobs['data'].shape))
294 |     if cfg.TEST.HAS_RPN:
295 |         net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
296 | 	#print 'rois shape', net.blobs['rois']
297 |         net.blobs['rois'].reshape(*(blobs['rois'].shape))
298 | 	#print 'rois shape', net.blobs['rois'].shape
299 |     else:
300 |         net.blobs['rois'].reshape(*(blobs['rois'].shape))
301 | 
302 |     # do forward
303 |     forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
304 | 
305 |     if cfg.TEST.HAS_RPN:
306 |         forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False)
307 | 	forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
308 |     else:
309 |         forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
310 | 
311 | 
312 |     blobs_out = net.forward(**forward_kwargs)
313 | 
314 | 
315 |     if cfg.TEST.HAS_RPN:
316 |         assert len(im_scales) == 1, "Only single-image batch implemented"
317 |         rois = net.blobs['rois'].data.copy()
318 | 	# unscale back to raw image space
319 | 	#print 'in'
320 |         rpn_boxes = rois[:, 1:5] / im_scales[0]
321 | 	#rpn_boxes = rois[:, 1:5]    
322 | 	#print ('shape: ', rpn_boxes)          
323 |         #rpn_boxes = rois / im_scales[0]         
324 | 	#rpn_scores = net.blobs['scores'].data.copy()
325 | 	rpn_scores = np.array([[0.7]])
326 | 	#print ('shape1: ', rpn_scores)          
327 | 
328 |         # use softmax estimated probabilities
329 |         pred_scores = blobs_out['cls_prob']
330 | 
331 | 	#print ('shape2: ', pred_scores)          
332 | 
333 |         # Apply bounding-box regression deltas by accessing blob with name bbox_pred
334 |         box_deltas = blobs_out['bbox_pred']
335 | 	#print 'box_deltas: ', (box_deltas.max())*16
336 |         pred_boxes = bbox_transform_inv(rpn_boxes, box_deltas)
337 |         pred_boxes = clip_boxes(pred_boxes, im.shape)
338 | 	#print ('pred_boxes: ', pred_boxes)          
339 | 
340 |         orient_prob = blobs_out['orient_prob']
341 | 	#orient_prob = np.zeros((len(pred_scores),4), dtype='float')
342 | 
343 |     	if extract_feat == True:
344 |         	conv_feat = net.blobs['conv5_3'].data.copy()
345 |                 #print conv_feat.shape
346 | 		return rpn_boxes, rpn_scores, pred_boxes, pred_scores, orient_prob, conv_feat
347 |         
348 | 
349 |     return rpn_boxes, rpn_scores, pred_boxes, pred_scores, orient_prob
350 |     #return rpn_boxes, rpn_scores, pred_boxes
351 | 
352 | def vis_detections(im, class_name, dets, thresh=0.3):
353 |     """Visual debugging of detections."""
354 |     import matplotlib.pyplot as plt
355 |     im = im[:, :, (2, 1, 0)]
356 |     for i in xrange(np.minimum(10, dets.shape[0])):
357 |         bbox = dets[i, :4]
358 |         score = dets[i, -1]
359 |         if score > thresh:
360 |             plt.cla()
361 |             plt.imshow(im)
362 |             plt.gca().add_patch(
363 |                 plt.Rectangle((bbox[0], bbox[1]),
364 |                               bbox[2] - bbox[0],
365 |                               bbox[3] - bbox[1], fill=False,
366 |                               edgecolor='g', linewidth=3)
367 |                 )
368 |             plt.title('{}  {:.3f}'.format(class_name, score))
369 |             plt.show()
370 | 
371 | def apply_nms(all_boxes, thresh):
372 |     """Apply non-maximum suppression to all predicted boxes output by the
373 |     test_net method.
374 |     """
375 |     num_classes = len(all_boxes)
376 |     num_images = len(all_boxes[0])
377 |     nms_boxes = [[[] for _ in xrange(num_images)]
378 |                  for _ in xrange(num_classes)]
379 |     for cls_ind in xrange(num_classes):
380 |         for im_ind in xrange(num_images):
381 |             dets = all_boxes[cls_ind][im_ind]
382 |             if dets == []:
383 |                 continue
384 |             # CPU NMS is much faster than GPU NMS when the number of boxes
385 |             # is relative small (e.g., < 10k)
386 |             # TODO(rbg): autotune NMS dispatch
387 |             keep = nms(dets, thresh, force_cpu=True)
388 |             if len(keep) == 0:
389 |                 continue
390 |             nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
391 |     return nms_boxes
392 | 
393 | def vis_detections_rpn(fname, class_name, dets, scores, im_name):
394 |     """Visual debugging of detections."""
395 | 
396 |     for i in xrange(np.minimum(len(scores), dets.shape[0])):
397 |         #print im_name
398 | 	im = cv2.imread(fname)
399 |         bbox = map(int, dets[i, :])
400 | 	score = scores[i]
401 | 
402 |     	txt = str(score)
403 |           
404 |         cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), [0,0,255], 2, 16)
405 |     	ret, baseline = cv2.getTextSize(txt, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
406 |     	cv2.rectangle(im, (bbox[0], bbox[1] - ret[1] - baseline),(bbox[0] + ret[0], bbox[1]), (255, 0, 0), -1)
407 |         
408 |     	cv2.putText(im, txt, (bbox[0], bbox[1] - baseline),
409 |                	cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, 16)
410 | 
411 | 
412 | 	foldername = '/home/javed/1070/Projects/py-faster-rcnn-full_shift/data/output_rpn/' + im_name + '/' 
413 | 	if not os.path.isdir(foldername): 
414 |     		os.makedirs(foldername)
415 | 	filename = foldername + str(i) + '.jpg'
416 |         #print filename
417 |         cv2.imwrite(filename, im)
418 | 
419 | #def vis_detections_final(im, class_name, all_final_boxes, im_name,thresh):
420 | def vis_detections_final(im, class_name, all_final_boxes, im_name,thresh, cntG,cntR, cG, cR, rpn_sscores, rpn_bo, all_final_boxes_rotated):
421 |     """Visual debugging of detections."""
422 |     #print 'i am in visualizer'
423 |     #print len(all_final_boxes)
424 |     boxes = all_final_boxes[:,:4]
425 |     scores = all_final_boxes[:,4]
426 |     scor = all_final_boxes[:,10]  
427 |     rpnns =   all_final_boxes[:,6:10] 
428 | 
429 |     xAll = all_final_boxes_rotated[:,:4]
430 |     yAll = all_final_boxes_rotated[:,4:8]
431 | 
432 |     orient_class = all_final_boxes[:,5]
433 |     s=[]
434 |     for i in xrange(len(scores)):
435 |     	
436 | 	bbox = map(int, boxes[i,:])
437 | 	#rpn_bo = map(int, rpnns[i,:])
438 | 	score = scores[i]
439 | 	orient_cls = orient_class[i]
440 | 	rpn_s = scor[i]
441 | 
442 | 
443 | 	if score > thresh:
444 |                 #print 'greater than thresh'
445 |                 #print bbox
446 |     		txt = class_name + ': ' + str(orient_cls) + ': ' + str(score)
447 | 		#txt = class_name + ': ' + str(orient_cls) + ': ' + str(rpn_s)
448 | 		#print rpn_sscores
449 | 		s.append(score)
450 |     		#cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), [0,0,255], 2, 16)
451 |     		#cv2.rectangle(im, (rpn_bo[0], rpn_bo[1]), (rpn_bo[2], rpn_bo[3]), [255,0,255], 2, 16)
452 | 		#print('writing done')
453 |     		#ret, baseline = cv2.getTextSize(txt, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
454 |     		#cv2.rectangle(im, (bbox[0], bbox[1] + ret[1] + baseline),
455 |                 # 		(bbox[0] + ret[0], bbox[1]), (255, 0, 0), -1)
456 |         
457 |     		#cv2.putText(im, txt, (bbox[0], bbox[1] + ret[1]+ baseline),
458 |                	#	cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, 16)
459 | 
460 | 		pts = np.array([[xAll[i,0],yAll[i,0]],[xAll[i,1],yAll[i,1]],[xAll[i,3],yAll[i,3]],[xAll[i,2],yAll[i,2]]], np.int32)
461 | 		#cv2.polylines(im, [pts],True,(0,255,255), 2)
462 | 		#cv2.polylines(im, [pts],True,(128,0,255), 2) #voilet like
463 | 		cv2.polylines(im, [pts],True,(147, 20,255), 6) # pink like
464 | 
465 |     if s:
466 | #	print type(scores)
467 | #	print 'max: ', max(scores)
468 | 
469 | 	    if (class_name == 'Gun'):
470 | 		cntG = max(s)+cntG
471 | 		cG=cG+1
472 | 
473 | 	    if (class_name == 'Riffle'):
474 | 		cntR = max(s)+cntR
475 | 		cR=cR+1
476 | 
477 |     #print (cntG,cntR)
478 |     #print (cG,cR)
479 | 
480 |     return im,cntG,cntR, cG, cR
481 |     #return im
482 | 
483 | def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False):
484 |     """Test a Fast R-CNN network on an image database."""
485 |     num_images = len(imdb.image_index)
486 |     '''foldername = '/home/javed/1070/Projects/py-faster-rcnn-master/data/output_images_detected/' 
487 |     foldername_all = '/home/javed/1070/Projects/py-faster-rcnn-master/data/output_images_all/'
488 | 
489 |     net2 = caffe.Net('/home/javed/1070/Projects/py-faster-rcnn-master/models/pascal_voc/VGG16/faster_rcnn_alt_opt/faster_rcnn_test_2.pt', '/home/javed/1070/Projects/py-faster-rcnn-master/output/faster_rcnn_alt_opt/voc_2007_trainval/VGG16_faster_rcnn_final.caffemodel', caffe.TEST) '''
490 | 
491 |     foldername = '/media/akhtar/6D2C8F896B2F79E0/Projects/py-faster-rcnn-master/data/output_images_detected/' 
492 |     foldername_all = '/home/itu/faster-rcnn-1070/data/output_images_all/'
493 | 
494 |     '''net2 = caffe.Net('/home/itu/faster-rcnn-1070/models/pascal_voc/VGG16/faster_rcnn_alt_opt/faster_rcnn_test_3.pt', '/home/itu/faster-rcnn-1070/output/faster_rcnn_alt_opt/voc_2007_trainval/VGG16_faster_rcnn_final.caffemodel', caffe.TEST) '''
495 | 
496 |     all_boxes = [[] for _ in xrange(num_images)]
497 | 
498 |     ntopProp = [1,4,50,100,300]
499 |     ntopProp = [300]
500 |     #ntopProp = [50]
501 |     theta = [0, 90, 135, 45, 157.5, 112.5, 67.5, 22.5]
502 |     #theta = [45,90,135,45, 157.5, 112.5,67.5, 22.5]
503 | 
504 |     for t in xrange(0,len(ntopProp)):
505 |     	output_dir = get_output_dir(imdb, net)
506 | 
507 |     	# timers
508 |     	_t = {'im_detect' : Timer(), 'misc' : Timer()}
509 | 
510 |     	if not cfg.TEST.HAS_RPN:
511 |         	roidb = imdb.roidb
512 | 
513 |     	all_final_boxes = [[[] for _ in xrange(num_images)]
514 |                  	for _ in xrange(imdb.num_classes)]
515 | 
516 | 	all_final_boxes_rotated = [[[] for _ in xrange(num_images)]
517 |                  	for _ in xrange(imdb.num_classes)]
518 | 
519 |     	all_rpn_boxes = [[[] for _ in xrange(num_images)]
520 |                  	for _ in xrange(1)]
521 | 
522 | 	#print('all_final_boxes_rotated :', all_final_boxes_rotated)
523 | 	cntG = 0
524 | 	cntR = 0
525 | 	cG = 0
526 | 	cR = 0
527 | 
528 |     	for i in xrange(num_images):
529 |         	# filter out any ground truth boxes
530 |         	if cfg.TEST.HAS_RPN:
531 |             		box_proposals = None
532 |         	else:
533 |             	# The roidb may contain ground-truth rois (for example, if the roidb
534 |             	# comes from the training or val split). We only want to evaluate
535 |             	# detection on the *non*-ground-truth rois. We select those the rois
536 |             	# that have the gt_classes field set to 0, which means there's no
537 |             	# ground truth.
538 |             		box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]
539 |  
540 | 		fname = imdb.image_path_at(i)
541 | 		ind = fname.rindex('/')
542 | 		ind_end = fname.rindex('.')
543 | 		filename = fname[ind+1:ind_end]
544 |                 print filename
545 | 
546 |         	im = cv2.imread(imdb.image_path_at(i))
547 | 
548 | 		fname = foldername + filename + '.jpg'
549 | 
550 | 		'''rpn_boxes_rot = np.zeros((1,4))
551 |                 rpn_scores_rot = np.zeros((1))
552 |                 final_boxes_rot = np.zeros((1,12))
553 |                 final_scores_rot = np.zeros((1,3))
554 |                 orient_prob_rot = np.zeros((1,8))'''
555 | 
556 |         	_t['im_detect'].tic()
557 |                 #print 'first pass'
558 |         	
559 |                 #rpn_boxes, rpn_scores, final_boxes, final_scores, orient_score, conv_feat, f_shape = im_detect(net, im, box_proposals, True)
560 |                 rpn_boxes, rpn_scores, final_boxes, final_scores, orient_score, final_boxes1, final_scores1, transApplied = im_detect(net, im, box_proposals, True)
561 |                 
562 | 
563 | 		#print('orient_scores:  ', orient_score.shape)
564 |                 #print conv_feat.shape
565 |                 #in_feat = np.rollaxis(conv_feat, 1, 4)  
566 |                 #print in_feat.sum()
567 | 
568 |                 if ntopProp[t] == 300:
569 | 			if len(rpn_scores) > 299:
570 | 				rpn_boxes = rpn_boxes[0:ntopProp[t],:]
571 | 				rpn_scores = rpn_scores[0:ntopProp[t],:]
572 | 				final_boxes = final_boxes[0:ntopProp[t],:]
573 | 				final_scores = final_scores[0:ntopProp[t],:]
574 |                                 orient_scores = orient_score[0:ntopProp[t],:]
575 | 				final_boxes1 = final_boxes1[0:ntopProp[t],:]
576 | 				final_scores1 = final_scores1[0:ntopProp[t],:]
577 | 				transApplied = transApplied[0:ntopProp[t],:,:,:]
578 | 		else:	
579 | 			rpn_boxes = rpn_boxes[0:ntopProp[t],:]
580 | 			rpn_scores = rpn_scores[0:ntopProp[t],:]
581 | 			final_boxes = final_boxes[0:ntopProp[t],:]
582 | 			final_scores = final_scores[0:ntopProp[t],:]
583 |                         orient_scores = orient_score[0:ntopProp[t],:]
584 | 			final_boxes1 = final_boxes1[0:ntopProp[t],:]
585 | 			final_scores1 = final_scores1[0:ntopProp[t],:]
586 | 			transApplied = transApplied[0:ntopProp[t],:,:,:]
587 | 
588 | 		#print('orient_scores:  ', orient_scores.shape)
589 | 		#top_proposals_pass_2 = 50
590 |                 temp_boxes = None
591 |                 blobs, im_scales = _get_blobs(im, temp_boxes)
592 |                 #print len(rpn_boxes)
593 | 
594 | 		rotatedBoxesAll = np.zeros((len(rpn_boxes), 3,2,4))
595 | 		for iii in range(0, len(rpn_boxes)):
596 | 			final_boxes_tr = final_boxes1[iii,:]
597 | 			#print('final_boxes_tr :', final_boxes_tr)
598 | 			final_boxes_tr = ((final_boxes_tr * im_scales[0]) / 16)
599 | 
600 | 			final_boxes_tr = trans_box1(final_boxes_tr,transApplied[iii,0,:,:],transApplied[iii,1,:,:])
601 | 			
602 | 			final_boxes_tr = ((final_boxes_tr * 16) / im_scales[0])
603 | 
604 | 			rotatedBoxesAll[iii, :,:,:] = final_boxes_tr[0,:,:,:]
605 | 			
606 | 
607 | 		
608 | 
609 |                 #print('rotatedBoxesAll :', rotatedBoxesAll.shape)  
610 |         	#vis_detections_rpn(fname, 'fireArm', rpn_boxes, rpn_scores, filename)
611 |                 #print hi
612 | 
613 |    		rpn_dets = np.hstack((rpn_boxes, rpn_scores)) \
614 |                 	.astype(np.float32, copy=False)
615 | 		all_rpn_boxes[0][i] = rpn_dets
616 | 
617 | 
618 |         	_t['misc'].tic()
619 | 
620 |         	# skip j = 0, because it's the background class
621 | 		#maxScore = np.maximum(final_scores1, final_scores)
622 | 		maxScore = final_scores1
623 |         	for j in xrange(1, imdb.num_classes):
624 | 			#inds = np.where(final_scores1[:, j] > thresh)[0]
625 | 			#cls_scores = final_scores1[inds, j]
626 | 			inds = np.where(maxScore[:, j] > thresh)[0]
627 | 			cls_scores = maxScore[inds, j]
628 | 			cls_boxes = final_boxes[inds, j*4:(j+1)*4]
629 | 			cls_orient = np.argmax(orient_score[inds, :], axis = 1)
630 | 			rpn_bboxes = rpn_boxes[inds,:]
631 | 			rpn_sscores = rpn_scores[inds]
632 | 			
633 | 			cls_scores1 = final_scores[inds, j]
634 | 
635 | 			rotatedBoxesClass = np.hstack((rotatedBoxesAll[inds,j,0,:], rotatedBoxesAll[inds,j,1,:])).astype(np.float32, copy=False)
636 | 			#print('rotatedBoxesClass :', rotatedBoxesClass.shape)
637 | 
638 | 			cls_dets_temp_rotated = np.hstack((rotatedBoxesAll[inds,j,0,:], rotatedBoxesAll[inds,j,1,:], cls_scores[:, np.newaxis])) \
639 |                 		.astype(np.float32, copy=False)
640 | 
641 | 
642 |             		cls_dets_temp = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
643 |                 		.astype(np.float32, copy=False)
644 | 
645 |                         #print('cls_dets_temp', cls_dets_temp.shape)
646 | 
647 |            		cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], cls_orient[:, np.newaxis], rpn_bboxes, rpn_sscores)) \
648 |                 		.astype(np.float32, copy=False)
649 | 
650 | 			'''keep = py_cpu_nms(cls_dets_temp_rotated, 0.4)
651 | 			cls_dets = cls_dets[keep, :]
652 | 			rotatedBoxesClass = rotatedBoxesClass[keep, :]
653 | 			cls_dets_temp = cls_dets_temp[keep,:]'''
654 | 
655 |             		keep = nms(cls_dets_temp, cfg.TEST.NMS)
656 | 			#keep = nms(cls_dets_temp, 0.3)
657 | 			
658 | 			cls_dets = cls_dets[keep, :]
659 | 			rotatedBoxesClass = rotatedBoxesClass[keep, :]
660 | 			
661 | 			'''cls_dets_temp_rotated = cls_dets_temp_rotated[keep,:]
662 | 			keep = py_cpu_nms(cls_dets_temp_rotated, 0.4)
663 | 			#print('keep :', keep)
664 |             		cls_dets = cls_dets[keep, :]
665 | 			rotatedBoxesClass = rotatedBoxesClass[keep, :]'''
666 | 
667 |             		all_final_boxes[j][i] = cls_dets
668 | 			all_final_boxes_rotated[j][i] = rotatedBoxesClass
669 | 			#print('rotatedBoxesClass :', rotatedBoxesClass.shape, cls_dets.shape)
670 | 
671 | 		#print('all_final_boxes_rotated :', all_final_boxes_rotated)
672 | 		#print('all_final_boxes :', all_final_boxes)
673 |         	# Limit to max_per_image detections *over all classes*
674 | 
675 |         	if max_per_image > 0:
676 |             		image_scores = np.hstack([all_final_boxes[j][i][:, 4]
677 |                           		            for j in xrange(1, imdb.num_classes)])
678 | 
679 |             		if len(image_scores) > max_per_image:
680 |                 		image_thresh = np.sort(image_scores)[-max_per_image]
681 |                 		for j in xrange(1, imdb.num_classes):
682 |                    			keep = np.where(all_final_boxes[j][i][:, -1] >= image_thresh)[0]
683 |                     			all_final_boxes[j][i] = all_final_boxes[j][i][keep, :]
684 | 					all_final_boxes_rotated[j][i] = all_final_boxes_rotated[j][i][keep, :]
685 | 
686 |                 
687 |                 for j in xrange(1, imdb.num_classes):
688 | 			#rpn_bo = np.array([616, 405, 825, 556])
689 | 			#rpn_bo = np.array([231,129,621,939])
690 | 			rpn_bo = np.array([208, 58, 2243, 1094])
691 | 			#im = vis_detections_final(im, imdb.classes[j], all_final_boxes[j][i], filename, 0.65)
692 | 			im,cntG,cntR, cG, cR = vis_detections_final(im, imdb.classes[j], all_final_boxes[j][i], filename, 0.75, cntG,cntR, cG, cR, rpn_sscores, rpn_bo, all_final_boxes_rotated[j][i])
693 |                         #print hi
694 | 
695 | 		fname = foldername_all + filename + '.jpg'
696 |                 print fname
697 | 		cv2.imwrite(fname, im)
698 |                     
699 | 
700 |         	_t['misc'].toc()
701 |         	print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
702 |               	.format(i + 1, num_images, _t['im_detect'].average_time,
703 |                       	_t['misc'].average_time)
704 | 
705 | 
706 | 		#print('all_rpn_boxes', len(all_rpn_boxes), len(all_final_boxes), output_dir)
707 |     	#print ('Evaluating RPN detections for top Proposals: ' + str(ntopProp[t]) )
708 |     	#imdb.evaluate_rpn(all_rpn_boxes, output_dir, ntopProp[t])
709 | 
710 |     	#print ('Evaluating detections for top Proposals: ' + str(ntopProp[t]) )
711 |     	#imdb.evaluate_detections(all_final_boxes, output_dir,ntopProp[t])
712 | 
713 | 
714 | def trans_box1(final_boxes,T_final, T11):
715 |     final_boxes = final_boxes.reshape(1,12)
716 |     final_boxes_final = np.zeros((len(final_boxes),3, 2,4))
717 |     #print('final_boxes :', final_boxes.shape, final_boxes_final.shape)
718 | 
719 |     for k in range(0, len(final_boxes)):
720 |         #print('k :', k)
721 | 	
722 |         class1 = final_boxes[k,0:4]
723 |         class2 = final_boxes[k,4:8]
724 |         class3 = final_boxes[k,8:12]
725 | 
726 |         box1 = [ class1[0] , class1[1] , class1[2] , class1[3] ]
727 |         box2 = [ class2[0] , class2[1] , class2[2] , class2[3] ] 
728 |         box3 = [ class3[0] , class3[1] , class3[2] , class3[3] ]
729 | 
730 |         class1_out = trans_layer1(T_final, T11, box1)
731 |         class2_out = trans_layer1(T_final, T11, box2)
732 |         class3_out = trans_layer1(T_final, T11, box3)
733 | 
734 | 	final_boxes_final[k,0,:,:] = class1_out
735 | 	final_boxes_final[k,1,:,:] = class2_out
736 | 	final_boxes_final[k,2,:,:] = class2_out
737 |         #final_boxes_final[k,:] = [ class1_out[0], class1_out[1], class1_out[2], class1_out[3], class2_out[0], class2_out[1], class2_out[2], class2_out[3], class3_out[0], class3_out[1], class3_out[2], class3_out[3]]
738 | 
739 |     return final_boxes_final
740 | 
741 | def trans_layer1(T_final,T11, final_b):
742 | 
743 | 	nT0 = inv(T11)
744 | 	ncorner_pts = [[final_b[0],final_b[2],final_b[0],final_b[2]],[final_b[1],final_b[1],final_b[3],final_b[3]],[1,1,1,1]]
745 | 	nboxx = np.dot(nT0[0:2,:],ncorner_pts)
746 | 	rxymin_nb = nboxx.min(1)
747 | 	rxymax_nb = nboxx.max(1)
748 | 
749 | 	T2 = inv(T_final)
750 | 	boxx2 = np.dot(T2[0:2,:],[nboxx[0], nboxx[1],[1,1,1,1]])
751 | 
752 | 	#print('nboxx', nboxx.shape, boxx2.shape)
753 | 	
754 | 	#fin_cropped_box =  [rxymin_nb[0], rxymin_nb[1], rxymin_nb[0], rxymin_nb[1]]
755 | 
756 | 
757 |     	
758 | 	return boxx2
759 | 


--------------------------------------------------------------------------------
/faster_rcnn_test.pt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_16_layers"
  2 | 
  3 | input: "data"
  4 | input_shape {
  5 |   dim: 1
  6 |   dim: 3
  7 |   dim: 224
  8 |   dim: 224
  9 | }
 10 | 
 11 | input: "im_info"
 12 | input_shape {
 13 |   dim: 1
 14 |   dim: 3
 15 | }
 16 | 
 17 | 
 18 | layer {
 19 |   name: "conv1_1"
 20 |   type: "Convolution"
 21 |   bottom: "data"
 22 |   top: "conv1_1"
 23 |   convolution_param {
 24 |     num_output: 64
 25 |     pad: 1 kernel_size: 3
 26 |   }
 27 | }
 28 | layer {
 29 |   name: "relu1_1"
 30 |   type: "ReLU"
 31 |   bottom: "conv1_1"
 32 |   top: "conv1_1"
 33 | }
 34 | layer {
 35 |   name: "conv1_2"
 36 |   type: "Convolution"
 37 |   bottom: "conv1_1"
 38 |   top: "conv1_2"
 39 |   convolution_param {
 40 |     num_output: 64
 41 |     pad: 1 kernel_size: 3
 42 |   }
 43 | }
 44 | layer {
 45 |   name: "relu1_2"
 46 |   type: "ReLU"
 47 |   bottom: "conv1_2"
 48 |   top: "conv1_2"
 49 | }
 50 | layer {
 51 |   name: "pool1"
 52 |   type: "Pooling"
 53 |   bottom: "conv1_2"
 54 |   top: "pool1"
 55 |   pooling_param {
 56 |     pool: MAX
 57 |     kernel_size: 2 stride: 2
 58 |   }
 59 | }
 60 | layer {
 61 |   name: "conv2_1"
 62 |   type: "Convolution"
 63 |   bottom: "pool1"
 64 |   top: "conv2_1"
 65 |   convolution_param {
 66 |     num_output: 128
 67 |     pad: 1 kernel_size: 3
 68 |   }
 69 | }
 70 | layer {
 71 |   name: "relu2_1"
 72 |   type: "ReLU"
 73 |   bottom: "conv2_1"
 74 |   top: "conv2_1"
 75 | }
 76 | layer {
 77 |   name: "conv2_2"
 78 |   type: "Convolution"
 79 |   bottom: "conv2_1"
 80 |   top: "conv2_2"
 81 |   convolution_param {
 82 |     num_output: 128
 83 |     pad: 1 kernel_size: 3
 84 |   }
 85 | }
 86 | layer {
 87 |   name: "relu2_2"
 88 |   type: "ReLU"
 89 |   bottom: "conv2_2"
 90 |   top: "conv2_2"
 91 | }
 92 | layer {
 93 |   name: "pool2"
 94 |   type: "Pooling"
 95 |   bottom: "conv2_2"
 96 |   top: "pool2"
 97 |   pooling_param {
 98 |     pool: MAX
 99 |     kernel_size: 2 stride: 2
100 |   }
101 | }
102 | layer {
103 |   name: "conv3_1"
104 |   type: "Convolution"
105 |   bottom: "pool2"
106 |   top: "conv3_1"
107 |   convolution_param {
108 |     num_output: 256
109 |     pad: 1 kernel_size: 3
110 |   }
111 | }
112 | layer {
113 |   name: "relu3_1"
114 |   type: "ReLU"
115 |   bottom: "conv3_1"
116 |   top: "conv3_1"
117 | }
118 | layer {
119 |   name: "conv3_2"
120 |   type: "Convolution"
121 |   bottom: "conv3_1"
122 |   top: "conv3_2"
123 |   convolution_param {
124 |     num_output: 256
125 |     pad: 1 kernel_size: 3
126 |   }
127 | }
128 | layer {
129 |   name: "relu3_2"
130 |   type: "ReLU"
131 |   bottom: "conv3_2"
132 |   top: "conv3_2"
133 | }
134 | layer {
135 |   name: "conv3_3"
136 |   type: "Convolution"
137 |   bottom: "conv3_2"
138 |   top: "conv3_3"
139 |   convolution_param {
140 |     num_output: 256
141 |     pad: 1 kernel_size: 3
142 |   }
143 | }
144 | layer {
145 |   name: "relu3_3"
146 |   type: "ReLU"
147 |   bottom: "conv3_3"
148 |   top: "conv3_3"
149 | }
150 | layer {
151 |   name: "pool3"
152 |   type: "Pooling"
153 |   bottom: "conv3_3"
154 |   top: "pool3"
155 |   pooling_param {
156 |     pool: MAX
157 |     kernel_size: 2 stride: 2
158 |   }
159 | }
160 | layer {
161 |   name: "conv4_1"
162 |   type: "Convolution"
163 |   bottom: "pool3"
164 |   top: "conv4_1"
165 |   convolution_param {
166 |     num_output: 512
167 |     pad: 1 kernel_size: 3
168 |   }
169 | }
170 | layer {
171 |   name: "relu4_1"
172 |   type: "ReLU"
173 |   bottom: "conv4_1"
174 |   top: "conv4_1"
175 | }
176 | layer {
177 |   name: "conv4_2"
178 |   type: "Convolution"
179 |   bottom: "conv4_1"
180 |   top: "conv4_2"
181 |   convolution_param {
182 |     num_output: 512
183 |     pad: 1 kernel_size: 3
184 |   }
185 | }
186 | layer {
187 |   name: "relu4_2"
188 |   type: "ReLU"
189 |   bottom: "conv4_2"
190 |   top: "conv4_2"
191 | }
192 | layer {
193 |   name: "conv4_3"
194 |   type: "Convolution"
195 |   bottom: "conv4_2"
196 |   top: "conv4_3"
197 |   convolution_param {
198 |     num_output: 512
199 |     pad: 1 kernel_size: 3
200 |   }
201 | }
202 | layer {
203 |   name: "relu4_3"
204 |   type: "ReLU"
205 |   bottom: "conv4_3"
206 |   top: "conv4_3"
207 | }
208 | layer {
209 |   name: "pool4"
210 |   type: "Pooling"
211 |   bottom: "conv4_3"
212 |   top: "pool4"
213 |   pooling_param {
214 |     pool: MAX
215 |     kernel_size: 2 stride: 2
216 |   }
217 | }
218 | layer {
219 |   name: "conv5_1"
220 |   type: "Convolution"
221 |   bottom: "pool4"
222 |   top: "conv5_1"
223 |   convolution_param {
224 |     num_output: 512
225 |     pad: 1 kernel_size: 3
226 |   }
227 | }
228 | layer {
229 |   name: "relu5_1"
230 |   type: "ReLU"
231 |   bottom: "conv5_1"
232 |   top: "conv5_1"
233 | }
234 | layer {
235 |   name: "conv5_2"
236 |   type: "Convolution"
237 |   bottom: "conv5_1"
238 |   top: "conv5_2"
239 |   convolution_param {
240 |     num_output: 512
241 |     pad: 1 kernel_size: 3
242 |   }
243 | }
244 | layer {
245 |   name: "relu5_2"
246 |   type: "ReLU"
247 |   bottom: "conv5_2"
248 |   top: "conv5_2"
249 | }
250 | layer {
251 |   name: "conv5_3"
252 |   type: "Convolution"
253 |   bottom: "conv5_2"
254 |   top: "conv5_3"
255 |   param {
256 |   name: "conv5_3_w"
257 |   }
258 |   param {
259 |   name: "conv5_3_b"
260 |   }
261 |   convolution_param {
262 |     num_output: 512
263 |     pad: 1 kernel_size: 3
264 |   }
265 | }
266 | layer {
267 |   name: "relu5_3"
268 |   type: "ReLU"
269 |   bottom: "conv5_3"
270 |   top: "conv5_3"
271 | }
272 | 
273 | #========= RPN ============
274 | 
275 | layer {
276 |   name: "rpn_conv/3x3"
277 |   type: "Convolution"
278 |   bottom: "conv5_3"
279 |   top: "rpn/output"
280 |   param {
281 |   name: "rpn/output_w"
282 |   }
283 |   param {
284 |   name: "rpn/output_b"
285 |   }
286 |   convolution_param {
287 |     num_output: 512
288 |     kernel_size: 3 pad: 1 stride: 1
289 |   }
290 | }
291 | layer {
292 |   name: "rpn_relu/3x3"
293 |   type: "ReLU"
294 |   bottom: "rpn/output"
295 |   top: "rpn/output"
296 | }
297 | 
298 | layer {
299 |   name: "rpn_cls_score"
300 |   type: "Convolution"
301 |   bottom: "rpn/output"
302 |   top: "rpn_cls_score"
303 |   param {
304 |   name: "rpn_cls_score_w"
305 |   }
306 |   param {
307 |   name: "rpn_cls_score_b"
308 |   }
309 |   convolution_param {
310 |     num_output: 18  
311 |     kernel_size: 1 pad: 0 stride: 1
312 |   }
313 | }
314 | 
315 | layer {
316 |   name: "rpn_bbox_pred"
317 |   type: "Convolution"
318 |   bottom: "rpn/output"
319 |   top: "rpn_bbox_pred"
320 |   param {
321 |   name: "rpn_bbox_pred_w"
322 |   }
323 |   param {
324 |   name: "rpn_bbox_pred_b"
325 |   }
326 |   convolution_param {
327 |     num_output: 36   # 4 * 9(anchors)
328 |     kernel_size: 1 pad: 0 stride: 1
329 |   }
330 | }
331 | 
332 | layer {
333 |    bottom: "rpn_cls_score"
334 |    top: "rpn_cls_score_reshape"
335 |    name: "rpn_cls_score_reshape"
336 |    type: "Reshape"
337 |    reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
338 | }
339 | 
340 | #========= RoI Proposal ============
341 | 
342 | layer {
343 |   name: "rpn_cls_prob"
344 |   type: "Softmax"
345 |   bottom: "rpn_cls_score_reshape"
346 |   top: "rpn_cls_prob"
347 | }
348 | layer {
349 |   name: 'rpn_cls_prob_reshape'
350 |   type: 'Reshape'
351 |   bottom: 'rpn_cls_prob'
352 |   top: 'rpn_cls_prob_reshape'
353 |   reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
354 | }
355 | layer {
356 |   name: 'proposal'
357 |   type: 'Python'
358 |   bottom: 'rpn_cls_prob_reshape'
359 |   bottom: 'rpn_bbox_pred'
360 |   bottom: 'im_info'
361 |   top: 'rois'
362 |   top: 'scores'
363 |   python_param {
364 |     module: 'rpn.proposal_layer'
365 |     layer: 'ProposalLayer'
366 |     param_str: "'feat_stride': 16"
367 |   }
368 | }
369 | 
370 | #========= RCNN ============
371 | 
372 | layer {
373 |   name: "roi_pool5"
374 |   type: "ROIPooling"
375 |   bottom: "conv5_3"
376 |   bottom: "rois"
377 |   top: "pool5"
378 |   roi_pooling_param {
379 |     pooled_w: 7
380 |     pooled_h: 7
381 |     spatial_scale: 0.0625 # 1/16
382 |   }
383 | }
384 | layer {
385 |   name: "fc6"
386 |   type: "InnerProduct"
387 |   bottom: "pool5"
388 |   top: "fc6"
389 |   param {
390 |   name: "fc6_w"
391 |   }
392 |   param {
393 |   name: "fc6_b"
394 |   }
395 |   inner_product_param {
396 |     num_output: 4096
397 |   }
398 | }
399 | layer {
400 |   name: "relu6"
401 |   type: "ReLU"
402 |   bottom: "fc6"
403 |   top: "fc6"
404 | }
405 | layer {
406 |   name: "fc7"
407 |   type: "InnerProduct"
408 |   bottom: "fc6"
409 |   top: "fc7"
410 |   param {
411 |   name: "fc7_w"
412 |   }
413 |   param {
414 |   name: "fc7_b"
415 |   }
416 |   inner_product_param {
417 |     num_output: 4096
418 |   }
419 | }
420 | layer {
421 |   name: "relu7"
422 |   type: "ReLU"
423 |   bottom: "fc7"
424 |   top: "fc7"
425 | }
426 | layer {
427 |   name: "cls_score"
428 |   type: "InnerProduct"
429 |   bottom: "fc7"
430 |   top: "cls_score"
431 |   param {
432 |   name: "cls_score_w"
433 |   }
434 |   param {
435 |   name: "cls_score_b"
436 |   }
437 |   inner_product_param {
438 |     num_output: 3
439 |   }
440 | }
441 | layer {
442 |   name: "orient_pred"
443 |   type: "InnerProduct"
444 |   bottom: "fc7"
445 |   top: "orient_score"
446 |   param {
447 |   name: "orient_pred_w"
448 |   }
449 |   param {
450 |   name: "orient_pred_b"
451 |   }
452 |   inner_product_param {
453 |     num_output: 8
454 |   }
455 | }
456 | layer {
457 |   name: "bbox_pred"
458 |   type: "InnerProduct"
459 |   bottom: "fc7"
460 |   top: "bbox_pred"
461 |   param {
462 |   name: "bbox_pred_w"
463 |   }
464 |   param {
465 |   name: "bbox_pred_b"
466 |   }
467 |   inner_product_param {
468 |     num_output: 12
469 |   }
470 | }
471 | 
472 | layer {
473 |   name: "cls_prob"
474 |   type: "Softmax"
475 |   bottom: "cls_score"
476 |   top: "cls_prob"
477 | }
478 | layer {
479 |   name: "orient_prob"
480 |   type: "Softmax"
481 |   bottom: "orient_score"
482 |   top: "orient_prob"
483 | }
484 | layer {
485 |   name: "silense"
486 |   type: "Silence"
487 |   bottom: "scores"
488 | }
489 | 
490 | 
491 | 
492 | 
493 | 
494 | 
495 | 
496 | layer {
497 |   name: "rois_making"
498 |   type: "Python"
499 |   bottom: "rois"
500 |   bottom: "bbox_pred"
501 |   bottom: "im_info"
502 |   bottom: "cls_prob"
503 |   top: "rois1"
504 |   python_param {
505 |     module: 'rpn.makebboxproposals'
506 |     layer: 'makebBox_regionProposal'
507 |     #param_str: "'feat_stride': 16"
508 |   }
509 | }
510 | 
511 | 
512 | 
513 | 
514 | 
515 | layer {
516 |   name: "roi_warping"
517 |   type: "Python"
518 |   bottom: "conv5_3"
519 |   bottom: "rois1"
520 |   bottom: "orient_prob"
521 |   top: "warpedMap"
522 |   top: "warpedrois"
523 |   top: "transApplied"
524 |   python_param {
525 |     module: 'rpn.ftMap_Warp_2'
526 |     layer: 'fMapWarpLayerSep'
527 |     #param_str: "'feat_stride': 16"
528 |   }
529 | }
530 | 
531 | layer {
532 |   name: "roi_pool51"
533 |   type: "ROIPooling"
534 |   bottom: "warpedMap"
535 |   bottom: "warpedrois"
536 |   top: "pool51"
537 |   roi_pooling_param {
538 |     pooled_w: 7
539 |     pooled_h: 7
540 |     spatial_scale: 0.0625 # 1/16
541 |   }
542 | }
543 | 
544 | 
545 | 
546 | 
547 | 
548 | 
549 | layer {
550 |   name: "fc61"
551 |   type: "InnerProduct"
552 |   bottom: "pool51"
553 |   top: "fc61"
554 |   param {
555 |     lr_mult: 1
556 |   }
557 |   param {
558 |     lr_mult: 2
559 |   }
560 |   inner_product_param {
561 |     num_output: 4096
562 |   }
563 | }
564 | layer {
565 |   name: "relu61"
566 |   type: "ReLU"
567 |   bottom: "fc61"
568 |   top: "fc61"
569 | }
570 | 
571 | 
572 | layer {
573 |   name: "fc71"
574 |   type: "InnerProduct"
575 |   bottom: "fc61"
576 |   top: "fc71"
577 |   param {
578 |     lr_mult: 1
579 |   }
580 |   param {
581 |     lr_mult: 2
582 |   }
583 |   inner_product_param {
584 |     num_output: 4096
585 |   }
586 | }
587 | layer {
588 |   name: "relu71"
589 |   type: "ReLU"
590 |   bottom: "fc71"
591 |   top: "fc71"
592 | }
593 | 
594 | layer {
595 |   name: "cls_score1"
596 |   type: "InnerProduct"
597 |   bottom: "fc71"
598 |   top: "cls_score1"
599 |   param {
600 |     lr_mult: 1
601 |   }
602 |   param {
603 |     lr_mult: 2
604 |   }
605 |   inner_product_param {
606 |     num_output: 3
607 |     weight_filler {
608 |       type: "gaussian"
609 |       std: 0.01
610 |     }
611 |     bias_filler {
612 |       type: "constant"
613 |       value: 0
614 |     }
615 |   }
616 | }
617 | 
618 | layer {
619 |   name: "bbox_pred1"
620 |   type: "InnerProduct"
621 |   bottom: "fc71"
622 |   top: "bbox_pred1"
623 |   param {
624 |     lr_mult: 1
625 |   }
626 |   param {
627 |     lr_mult: 2
628 |   }
629 |   inner_product_param {
630 |     num_output: 12
631 |     weight_filler {
632 |       type: "gaussian"
633 |       std: 0.001
634 |     }
635 |     bias_filler {
636 |       type: "constant"
637 |       value: 0
638 |     }
639 |   }
640 | }
641 | 
642 | 
643 | 
644 | layer {
645 |   name: "cls_prob1"
646 |   type: "Softmax"
647 |   bottom: "cls_score1"
648 |   top: "cls_prob1"
649 | }
650 | 
651 | #layer {
652 | #  name: "bbox_prob1"
653 | #  type: "Softmax"
654 | #  bottom: "bbox_pred1"
655 | #  top: "bbox_prob1"
656 | #}
657 | 
658 | 
659 | 
660 | 


--------------------------------------------------------------------------------
/ftMap_Warp_2.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Written by CVML
  3 | # --------------------------------------------------------
  4 | 
  5 | 
  6 | 
  7 | from fast_rcnn.config import cfg
  8 | from fast_rcnn.bbox_transform import clip_boxes, bbox_transform_inv
  9 | from fast_rcnn.ftmap_transform import transformer_layer_fMapSep as fMap_trans_layer
 10 | from fast_rcnn.ftmap_transform import transformer_layer_fMapSep_backward as fMap_trans_layer_backward
 11 | import argparse
 12 | from utils.timer import Timer
 13 | import numpy as np
 14 | import cv2
 15 | from numpy.linalg import inv
 16 | import caffe
 17 | from fast_rcnn.nms_wrapper import nms
 18 | import cPickle
 19 | from utils.blob import im_list_to_blob
 20 | import os
 21 | import matplotlib.pyplot as plt
 22 | import xml.etree.ElementTree as ET
 23 | import gc
 24 | import yaml
 25 | 
 26 | 
 27 | DEBUG = False
 28 | 
 29 | class fMapWarpLayerSep(caffe.Layer):
 30 |     """
 31 |     transforms feature map and corresponding bouning boxes with respect to angle
 32 |     """
 33 | 
 34 |     def setup(self, bottom, top):
 35 | 
 36 | 	pass
 37 | 
 38 |     def forward(self, bottom, top):
 39 | 
 40 |         assert bottom[0].data.shape[0] == 1, \
 41 |             'Only single item batches are supported'
 42 | 
 43 | 
 44 |         # the first set of _num_anchors channels are bg probs
 45 |         # the second set are the fg probs, which we want
 46 |         conv_feat = bottom[0].data
 47 |         rpn_boxes = bottom[1].data
 48 | 	angle = bottom[2].data
 49 | 
 50 | 	out_feat, ross, transApplied, T_final = warp_fMap(conv_feat, rpn_boxes, angle) 
 51 | 
 52 |         blob = np.rollaxis(out_feat, 3, 1) 
 53 | 
 54 | 	top[0].reshape(*(blob.shape))
 55 |         top[0].data[...] = blob
 56 | 	top[1].reshape(*(ross.shape))
 57 | 	top[1].data[...] = ross
 58 | 	
 59 | 	top[2].reshape(*(transApplied.shape))
 60 | 	top[2].data[...] = transApplied
 61 | 	
 62 | 	#top[3].reshape(*(T_final.shape))
 63 | 	#top[3].data[...] = T_final
 64 | 
 65 | 	#print('blob', blob.shape, ross.shape)
 66 | 
 67 |     def backward(self, top, propagate_down, bottom):
 68 |         """This layer does not propagate gradients."""
 69 | 
 70 |         grad_warpMap = top[0].diff
 71 |         #rpn_boxes = bottom[1].data
 72 |         rpn_boxes_gwm = top[1].data
 73 | 	angle = bottom[2].data
 74 | 
 75 | 	in_gwm = np.rollaxis(grad_warpMap, 1, 4)
 76 | 
 77 | 	out_gwm, rotated_gwm, transApplied_gwm = fMap_trans_layer_backward(in_gwm, angle, rpn_boxes_gwm)
 78 | 
 79 | 
 80 |     def reshape(self, bottom, top):
 81 |         """Reshaping happens during the call to forward."""
 82 | 	top[0].reshape(*bottom[0].shape)
 83 | 	top[1].reshape(*bottom[1].shape)
 84 | 	top[2].reshape(*bottom[2].shape)
 85 |         #pass
 86 | 
 87 | 
 88 | 
 89 | def warp_fMap(conv_feat, rpn_boxes, angle):
 90 | 
 91 | 	
 92 | 	#angle = 22.5
 93 |         
 94 |         #print conv_feat.shape
 95 |         in_feat = np.rollaxis(conv_feat, 1, 4)  
 96 |         #print in_feat.sum()
 97 | 
 98 | 
 99 | 	top_proposals_pass_2 = 50
100 |         temp_boxes = None
101 | 		
102 | 	out_feat, rotated_rpns, transApplied, T_final = fMap_trans_layer(in_feat, angle, rpn_boxes)
103 | 
104 | 	ross = rotated_rpns
105 | 
106 | 	ross = np.array(ross)
107 | 	#print('ross :', ross.shape)
108 | 	
109 | 	transApplied = np.array(transApplied)
110 | 
111 | 	return out_feat, ross, transApplied, T_final
112 | 
113 | 
114 | 
115 | 


--------------------------------------------------------------------------------
/ftmap_transform.py:
--------------------------------------------------------------------------------
  1 | # Modified & Written by CVML
  2 | 
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | import cv2
  6 | import re
  7 | import math
  8 | from numpy.linalg import inv
  9 | from numpy import linalg
 10 | from numpy import matrix
 11 | import xml.etree.ElementTree as ET
 12 | 
 13 | def transformer_layer(input_fmap, angle, box, output_tr_flag, out_dims=None,  **kwargs):
 14 |     
 15 |   
 16 |     B = np.shape(input_fmap)[0]
 17 |     H = np.shape(input_fmap)[1]
 18 |     W = np.shape(input_fmap)[2]
 19 |     C = np.shape(input_fmap)[3]
 20 |     
 21 |    
 22 |     cntr = np.asarray([(box[1]+box[3])/2, (box[0]+box[2])/2])
 23 | 
 24 |     T1 = [[1,0,0],[0,1,0],[-cntr[1],-cntr[0],1]]
 25 |     T2 = np.asarray([[np.cos(np.deg2rad(int(angle))), np.sin(np.deg2rad(int(angle))), 0],[-np.sin(np.deg2rad(int(angle))),np.cos(np.deg2rad(int(angle))),0],[0,0,1]])
 26 |     T3 = [[1,0,0],[0,1,0],[cntr[1],cntr[0],1]]
 27 | 
 28 |     T = np.dot(np.transpose(T3),np.dot(np.transpose(T2),np.transpose(T1)))
 29 |   
 30 |     corner_pts = [[0,W-1,0,W-1],[0,0,H-1,H-1],[1,1,1,1]]
 31 | 
 32 |     trans_cpoints = np.dot(T[0:2,:],corner_pts)
 33 |     
 34 |     xymin = trans_cpoints.min(1)
 35 |     xymax = trans_cpoints.max(1)
 36 | 
 37 |     out_H = np.int32(xymax[1] - xymin[1] + 1)
 38 |     out_W = np.int32(xymax[0] - xymin[0] + 1)
 39 | 
 40 |     out_fmap_size = [0,0,out_W, out_H]
 41 | 
 42 |     T4 = [[1,0,0],[0,1,0],[-xymin[0], -xymin[1],1]];
 43 |    
 44 |     T_final = np.dot(np.transpose(T4),T);
 45 | 
 46 |     rect_pts = [ [box[0],box[2], box[0], box[2]] ,[box[1],box[1],box[3],box[3]],[1,1,1,1]]
 47 | 
 48 |     trans_rpoints = np.dot(T_final[0:2,:],rect_pts)
 49 | 
 50 |     rxymin = trans_rpoints.min(1)
 51 |     rxymax = trans_rpoints.max(1)
 52 | 
 53 |     cropped_box = [np.int32(np.floor(rxymin[0])), np.int32(np.floor(rxymin[1])), np.int32(np.floor(rxymax[0])), np.int32(np.floor(rxymax[1]))]
 54 | 
 55 |     intSec1, intSec2 = LinesIntersectionForLargestBox(trans_rpoints, np.array(rect_pts), angle)
 56 |     height_deltas = [intSec1[1]-cropped_box[1], cropped_box[3]-intSec2[1]]
 57 | 
 58 |     if output_tr_flag == True:
 59 |         
 60 |         return cropped_box
 61 |     
 62 |     else:
 63 |         
 64 |     	batch_grids = affine_grid_generator(out_H, out_W, T_final)
 65 |     
 66 |     	x_s = batch_grids[:,0, :, :]
 67 |     	y_s = batch_grids[:,1, :, :]
 68 | 
 69 | 	out_fmap = bilinear_sampler_Interpol(input_fmap, x_s, y_s) # Interpolation with in bbox and extend outside using Ia
 70 |     
 71 |     	negative_flag = False
 72 |     	if cropped_box[0] < 0:
 73 | 
 74 |        		cropped_box[2] = int(cropped_box[2]) - int(cropped_box[0])
 75 |        		cropped_box[0] = int(cropped_box[0]) - int(cropped_box[0])
 76 | 
 77 |     	if cropped_box[1] < 0:
 78 | 
 79 |        		cropped_box[3] = int(cropped_box[3]) - int(cropped_box[1])
 80 |        		cropped_box[1] = int(cropped_box[1]) - int(cropped_box[1])
 81 | 
 82 | 
 83 |     	f_map = out_fmap[:, cropped_box[1]:cropped_box[3] , cropped_box[0]:cropped_box[2] ,:]
 84 | 
 85 |     	return f_map, T_final, cropped_box, negative_flag, trans_rpoints, trans_cpoints, out_fmap_size, height_deltas
 86 | 
 87 | 
 88 | def affine_grid_generator(H, W, theta):
 89 |  
 90 | 
 91 |     # create normalized 2D grid
 92 |     x = np.arange(W)
 93 |     y = np.arange(H)
 94 | 
 95 |     x_t, y_t = np.meshgrid(x, y)
 96 |     
 97 |     # flatten
 98 |     x_t_flat = np.reshape(x_t, (-1))
 99 |     y_t_flat = np.reshape(y_t, (-1))
100 |          
101 |     # reshape to [x_t, y_t , 1] - (homogeneous form)
102 |     ones = np.ones((np.shape(x_t_flat)[0])) 
103 | 
104 |     sampling_grid = np.stack([x_t_flat, y_t_flat, ones])
105 | 
106 |     # transform the sampling grid - batch multiply
107 |     theta_inv = np.linalg.inv(theta)
108 | 
109 |     out_sampGrid = np.dot(theta_inv[0:2,:], sampling_grid)    
110 |     
111 |     # batch grid has shape (num_batch, 2, H*W)
112 |     # reshape to (num_batch, 2, H, W)
113 |     batch_grids = out_sampGrid.reshape((1, 2, H, W))
114 |         
115 |     return batch_grids
116 | 
117 | def bilinear_sampler(input_fmap, x, y):
118 |     
119 |     # prepare useful params
120 |     B = np.shape(input_fmap)[0]
121 |     H = np.shape(input_fmap)[1]
122 |     W = np.shape(input_fmap)[2]
123 |     C = np.shape(input_fmap)[3]
124 | 
125 |     max_y = np.int32(H - 1)
126 |     max_x = np.int32(W - 1)
127 |     
128 |     zero = np.zeros([], dtype='int32')
129 |     
130 |     # grab 4 nearest corner points for each (x_i, y_i)
131 |     # i.e. we need a rectangle around the point of interest
132 |     x0 = np.int32(np.floor(x))
133 |     x1 = np.int32(x0 + 1)
134 |     y0 = np.int32(np.floor(y))
135 |     y1 = np.int32(y0 + 1)
136 |     
137 | 
138 |     # clip to range [0, H/W] to not violate img boundaries
139 |     x0 = np.clip(x0, zero, max_x)
140 |     x1 = np.clip(x1, zero, max_x)
141 |     y0 = np.clip(y0, zero, max_y)
142 |     y1 = np.clip(y1, zero, max_y) 
143 |     
144 |     # get pixel value at corner coords
145 |     Ia = input_fmap[0,y0,x0,:]
146 |     Ib = input_fmap[0,y1,x0,:]
147 |     Ic = input_fmap[0,y0,x1,:]
148 |     Id = input_fmap[0,y1,x1,:]
149 | 
150 |     # recast as float for delta calculation
151 |     x0 = np.float32(x0)
152 |     x1 = np.float32(x1)
153 |     y0 = np.float32(y0)
154 |     y1 = np.float32(y1)
155 | 
156 |     # calculate deltas
157 |     wa = (x1-x) * (y1-y)
158 |     wb = (x1-x) * (y-y0)
159 |     wc = (x-x0) * (y1-y)
160 |     wd = (x-x0) * (y-y0)
161 | 
162 | 
163 |     # add dimension for addition
164 |     wa = np.expand_dims(wa,axis=3)
165 |     wb = np.expand_dims(wb,axis=3)
166 |     wc = np.expand_dims(wc,axis=3)
167 |     wd = np.expand_dims(wd,axis=3)
168 |     
169 |     wa[np.where(wa<0)]=0
170 |     wb[np.where(wb<0)]=0
171 |     wc[np.where(wc<0)]=0
172 |     wd[np.where(wd<0)]=0
173 |     
174 |     output_fmap = Ib
175 | 
176 |     return output_fmap
177 | 
178 | def bilinear_sampler_Interpol(input_fmap, x, y):
179 |     
180 |     # prepare useful params
181 |     B = np.shape(input_fmap)[0]
182 |     H = np.shape(input_fmap)[1]
183 |     W = np.shape(input_fmap)[2]
184 |     C = np.shape(input_fmap)[3]
185 | 
186 |     max_y = np.int32(H - 1)
187 |     max_x = np.int32(W - 1)
188 |     
189 |     zero = np.zeros([], dtype='int32')
190 |     
191 |     # grab 4 nearest corner points for each (x_i, y_i)
192 |     # i.e. we need a rectangle around the point of interest
193 |     x0 = np.int32(np.floor(x))
194 |     x1 = np.int32(x0 + 1)
195 |     y0 = np.int32(np.floor(y))
196 |     y1 = np.int32(y0 + 1)
197 |     
198 | 
199 |     ad1 = 1*(x0>=0)
200 |     ad2 = 1*(x0<=max_x)
201 |     ad3 = 1*(y0>=0)
202 |     ad4 = 1*(y0<=max_y)
203 | 
204 |     maskx = 1*(ad1[0,:,:] * ad2[0,:,:])
205 |     masky = 1*(ad3[0,:,:] * ad4[0,:,:])
206 |     mask = maskx*masky
207 |     mask = mask.reshape(1,mask.shape[0], mask.shape[1], 1)
208 | 
209 | 
210 |     # clip to range [0, H/W] to not violate img boundaries
211 |     x0 = np.clip(x0, zero, max_x)
212 |     x1 = np.clip(x1, zero, max_x)
213 |     y0 = np.clip(y0, zero, max_y)
214 |     y1 = np.clip(y1, zero, max_y) 
215 |     
216 |     # get pixel value at corner coords
217 |     Ia = input_fmap[0,y0,x0,:]
218 |     Ib = input_fmap[0,y1,x0,:]
219 |     Ic = input_fmap[0,y0,x1,:]
220 |     Id = input_fmap[0,y1,x1,:]
221 | 
222 |     # recast as float for delta calculation
223 |     x0 = np.float32(x0)
224 |     x1 = np.float32(x1)
225 |     y0 = np.float32(y0)
226 |     y1 = np.float32(y1)
227 | 
228 |     # calculate deltas
229 |     wa = (x1-x) * (y1-y)
230 |     wb = (x1-x) * (y-y0)
231 |     wc = (x-x0) * (y1-y)
232 |     wd = (x-x0) * (y-y0)
233 | 
234 | 
235 |     # add dimension for addition
236 |     wa = np.expand_dims(wa,axis=3)
237 |     wb = np.expand_dims(wb,axis=3)
238 |     wc = np.expand_dims(wc,axis=3)
239 |     wd = np.expand_dims(wd,axis=3)
240 |     
241 |     wa[np.where(wa<0)]=0
242 |     wb[np.where(wb<0)]=0
243 |     wc[np.where(wc<0)]=0
244 |     wd[np.where(wd<0)]=0
245 |     
246 |     # compute output
247 |     output_fmap = (wa*Ia + wb*Ib + wc*Ic + wd*Id)
248 |     #output_fmap = Ib
249 |     outM = output_fmap.copy()
250 |     outM = outM * mask
251 | 
252 |     mask0 = 1*(mask==0)
253 |     conVals = Ia * mask0
254 |     output_fmap = conVals + outM
255 | 
256 |     output_fmap = Ia
257 |     return output_fmap
258 | 
259 | 
260 | 
261 | def LinesIntersectionForLargestBox(trans_rpoints, rect_pts, theta):
262 | 
263 |     def line(p1, p2):
264 |         A = (p1[1] - p2[1])
265 |         B = (p2[0] - p1[0])
266 |         C = (p1[0]*p2[1] - p2[0]*p1[1])
267 |         return A, B, -C
268 |     
269 |     def intersection(L1, L2):
270 |         D  = L1[0] * L2[1] - L1[1] * L2[0]
271 |         Dx = L1[2] * L2[1] - L1[1] * L2[2]
272 |         Dy = L1[0] * L2[2] - L1[2] * L2[0]
273 |         if D != 0:
274 |             x = Dx / D
275 |             y = Dy / D
276 |             return x,y
277 |         else:
278 |             return False
279 | 
280 |     rxymin = trans_rpoints.min(1)
281 |     rxymax = trans_rpoints.max(1)
282 | 
283 |     widN = rxymax[0] - rxymin[0]
284 |     higN = rxymax[1] - rxymin[1]
285 |     
286 |     rxyminOrig = rect_pts.min(1)
287 |     rxymaxOrig = rect_pts.max(1)
288 |     wid = rxymaxOrig[0] - rxyminOrig[0]
289 |     hig = rxymaxOrig[1] - rxyminOrig[1]
290 | 
291 | 
292 | # positive angle smaller width
293 |     if wid<=hig and theta>=0:
294 |         L1 = line([trans_rpoints[0][0], trans_rpoints[1][0]], [trans_rpoints[0][2], trans_rpoints[1][2]])
295 |         L2 = line([trans_rpoints[0][1], trans_rpoints[1][1]], [trans_rpoints[0][3], trans_rpoints[1][3]])
296 |         L3 = line([rxymin[0], rxymin[1]], [rxymax[0], rxymax[1]])
297 |         #print('Condition 01 executed')
298 | # positive angle greater width
299 |     elif wid>hig and theta>=0:
300 |         L1 = line([trans_rpoints[0][0], trans_rpoints[1][0]], [trans_rpoints[0][1], trans_rpoints[1][1]])
301 |         L2 = line([trans_rpoints[0][2], trans_rpoints[1][2]], [trans_rpoints[0][3], trans_rpoints[1][3]])
302 |         L3 = line([rxymax[0], rxymin[1]], [rxymin[0], rxymax[1]])
303 |         #print('Condition 02 executed')
304 | 
305 | # negative angle greater width
306 |     elif wid>hig and theta<0:# and (widOrig>higOrig):
307 |         L1 = line([trans_rpoints[0][0], trans_rpoints[1][0]], [trans_rpoints[0][1], trans_rpoints[1][1]])
308 |         L2 = line([trans_rpoints[0][2], trans_rpoints[1][2]], [trans_rpoints[0][3], trans_rpoints[1][3]])
309 |         L3 = line([rxymin[0], rxymin[1]], [rxymax[0], rxymax[1]])
310 |         #print('Condition 03 executed')
311 |     
312 | # negative angle smaller width
313 |     elif (wid<=hig and theta<0): #or (widOrig<=higOrig):
314 |         L1 = line([trans_rpoints[0][0], trans_rpoints[1][0]], [trans_rpoints[0][2], trans_rpoints[1][2]])
315 |         L2 = line([trans_rpoints[0][1], trans_rpoints[1][1]], [trans_rpoints[0][3], trans_rpoints[1][3]])
316 |         L3 = line([rxymax[0], rxymin[1]], [rxymin[0], rxymax[1]])
317 |         #print('Condition 04 executed')
318 |     
319 | 
320 |     if L1 and L3:
321 | 	intSec1 = intersection(L1, L3)
322 |     if L2 and L3:
323 | 	intSec2 = intersection(L2, L3)
324 | 
325 |     if not intSec1 or not intSec2:
326 | 	intSec1 = [0, 0]
327 | 	intSec2 = [widN, higN]
328 |     
329 |     return intSec1, intSec2
330 | 
331 | def transformer_layer_fMap(input_fmap, angle, rpn_boxes, out_dims=None,  **kwargs):
332 |     
333 |   
334 |     B = np.shape(input_fmap)[0]
335 |     H = np.shape(input_fmap)[1]
336 |     W = np.shape(input_fmap)[2]
337 |     C = np.shape(input_fmap)[3]
338 |     
339 |    
340 |     #cntr = np.asarray([(box[1]+box[3])/2, (box[0]+box[2])/2])
341 |     cntr = np.asarray([H/2, W/2])
342 |     #print('Original RPN :', box)
343 | 
344 |     T1 = [[1,0,0],[0,1,0],[-cntr[1],-cntr[0],1]]
345 |     T2 = np.asarray([[np.cos(np.deg2rad(int(angle))), np.sin(np.deg2rad(int(angle))), 0],[-np.sin(np.deg2rad(int(angle))),np.cos(np.deg2rad(int(angle))),0],[0,0,1]])
346 |     T3 = [[1,0,0],[0,1,0],[cntr[1],cntr[0],1]]
347 |     
348 | 
349 |     T = np.dot(np.transpose(T3),np.dot(np.transpose(T2),np.transpose(T1)))
350 |     #T = np.dot(np.transpose(T3),np.dot(np.transpose(T2),T1))
351 | 
352 |     
353 |       
354 |     corner_pts = [[0,W-1,0,W-1],[0,0,H-1,H-1],[1,1,1,1]]
355 |     #print('corner_pts', corner_pts)
356 | 
357 |     trans_cpoints = np.dot(T[0:2,:],corner_pts)
358 |     
359 |     xymin = trans_cpoints.min(1)
360 |     xymax = trans_cpoints.max(1)
361 | 
362 |     out_H = np.int32(xymax[1] - xymin[1] + 1)
363 |     out_W = np.int32(xymax[0] - xymin[0] + 1)
364 |     #print('out_W', out_W, out_H, W,H)
365 | 
366 |     out_fmap_size = [0,0,out_W, out_H]
367 | 
368 |     T4 = [[1,0,0],[0,1,0],[-xymin[0], -xymin[1],1]];
369 |     #print T
370 |     #print T4
371 |    
372 |     T_final = np.dot(np.transpose(T4),T);
373 |     #print T_final
374 |     
375 | 
376 |     tr_rotated_box_all = []
377 | 
378 |     for idx in range(0, len(rpn_boxes)):
379 | 
380 | 	box = rpn_boxes[idx,1:]/16
381 | 	#print('box', box)
382 | 
383 | 	rect_pts = [ [box[0],box[2], box[0], box[2]] ,[box[1],box[1],box[3],box[3]],[1,1,1,1]]
384 | 
385 | 	trans_rpoints = np.dot(T_final[0:2,:],rect_pts)
386 | 
387 | 	rxymin = trans_rpoints.min(1)
388 | 	rxymax = trans_rpoints.max(1)
389 | 
390 | 	cropped_box = [np.int32(np.floor(rxymin[0])), np.int32(np.floor(rxymin[1])), np.int32(np.floor(rxymax[0])), np.int32(np.floor(rxymax[1]))]
391 | 	#cropped_box = [rxymin[0],rxymin[1],rxymax[0],rxymax[1]]
392 | 
393 | 	# find coordinates for maximum area inscribed rectangle 
394 | 	intSec1, intSec2 = LinesIntersectionForLargestBox(trans_rpoints, np.array(rect_pts), angle)
395 | 	height_deltas = [intSec1[1]-cropped_box[1], cropped_box[3]-intSec2[1]]
396 |         #print('height_deltas: ', height_deltas)
397 | 
398 | 	tr_rotated_box = [rxymin[0], rxymin[1]+height_deltas[0], rxymax[0], rxymax[1]-height_deltas[1]]
399 | 	#print 'tr_cropped_box: ', tr_cropped_box
400 | 	tr_rotated_box = [ik * 16 for ik in tr_rotated_box]
401 | 
402 | 	tr_rotated_box_all.append(tr_rotated_box)
403 | 
404 |    
405 | 
406 |     batch_grids = affine_grid_generator(out_H, out_W, T_final)
407 |     
408 |     x_s = batch_grids[:,0, :, :]
409 |     y_s = batch_grids[:,1, :, :]
410 | 
411 |     out_fmap = bilinear_sampler(input_fmap, x_s, y_s)
412 |     #print 'out_fmap ', out_fmap.shape
413 |     #print 'input_fmap ', input_fmap.shape
414 |     
415 |     #tr_rotated_box_all = np.array(tr_rotated_box_all)
416 |     return out_fmap, T_final, tr_rotated_box_all
417 | 
418 | 
419 | def transformer_layer_fMapSep(input_fmap, orient_scores, rpn_boxes, out_dims=None,  **kwargs):
420 |     
421 |     theta = [0, 90, 135, 45, 157.5, 112.5, 67.5, 22.5]
422 | 
423 |     B = np.shape(input_fmap)[0]
424 |     H = np.shape(input_fmap)[1]
425 |     W = np.shape(input_fmap)[2]
426 |     C = np.shape(input_fmap)[3]
427 |     print('widHig', B, H, W, C)
428 |     
429 | 
430 |     outMap = np.zeros((len(rpn_boxes), 72,72, input_fmap.shape[3]), dtype = float)
431 |     #outMap = np.zeros((len(rpn_boxes), 50,50, input_fmap.shape[3]))
432 | 
433 | 
434 |     tr_rotated_box_all = []
435 |     transApplied = []
436 |     #ang1 = np.array(np.argmax(orient_scores, axis = 1))
437 |     #print('Im here :', 1*(ang1==0), 1*(ang1==1))
438 |     #idx0 = np.where(np.logical_or((ang1 == 0)*1,(ang1 == 1)*1))[0]
439 |     #print(idx0)
440 | 
441 |     for idx in range(0, len(rpn_boxes)):
442 | 	transCurrent = []
443 | 
444 | 	angle = theta[np.argmax(orient_scores[idx, :], axis = 0)]
445 | 
446 | 	if angle==0 or angle==90 :
447 | 
448 | 	    #print ("input_fmap.shape",input_fmap.shape)
449 | 	    outMap[idx, 0:input_fmap.shape[1], 0:input_fmap.shape[2], 0:input_fmap.shape[3]] = input_fmap
450 | 	    #print rpn_boxes[idx,1:5], [idx]+[rpn_boxes[idx,1],rpn_boxes[idx,2], rpn_boxes[idx,3], rpn_boxes[idx,4]]
451 | 	    tr_rotated_box_all.append([idx]+[rpn_boxes[idx,1],rpn_boxes[idx,2], rpn_boxes[idx,3], rpn_boxes[idx,4]])
452 | 
453 | 	    T11 = [[1,0,0],[0,1,0],[0,0,1]]
454 | 	    transCurrent.append(T11)
455 | 	    transCurrent.append(T11)
456 | 	    transApplied.append(transCurrent)
457 | 
458 | 	    box = rpn_boxes[idx,1:5]/16		
459 | 
460 | 	    sz = [box[3]-box[1], box[2]-box[0]]
461 | 
462 | 	    cntr = np.asarray([(box[1]+box[3])/2, (box[0]+box[2])/2])
463 | 
464 | 	    T1 = [[1,0,0],[0,1,0],[-cntr[1],-cntr[0],1]]
465 | 	    T2 = np.asarray([[np.cos(np.deg2rad(int(angle))), np.sin(np.deg2rad(int(angle))), 0],[-np.sin(np.deg2rad(int(angle))),np.cos(np.deg2rad(int(angle))),0],[0,0,1]])
466 | 	    T3 = [[1,0,0],[0,1,0],[cntr[1],cntr[0],1]]
467 | 
468 | 	    T = np.dot(np.transpose(T3),np.dot(np.transpose(T2),np.transpose(T1)))
469 | 
470 | 	    corner_pts = [[0,W-1,0,W-1],[0,0,H-1,H-1],[1,1,1,1]]
471 | 
472 | 	    trans_cpoints = np.dot(T[0:2,:],corner_pts)
473 | 
474 | 	    xymin = trans_cpoints.min(1)
475 | 	    xymax = trans_cpoints.max(1)
476 | 
477 | 	    out_H = np.int32(xymax[1] - xymin[1] + 1)
478 | 	    out_W = np.int32(xymax[0] - xymin[0] + 1)
479 | 
480 | 	    out_fmap_size = [0,0,out_W, out_H]
481 | 
482 | 	    T4 = [[1,0,0],[0,1,0],[-xymin[0], -xymin[1],1]];
483 | 
484 | 	    T_final = np.dot(np.transpose(T4),T);
485 | 
486 | 	else:
487 | 		if angle>90:
488 | 		    angle = angle-180
489 | 
490 | 		box = rpn_boxes[idx,1:5]/16		
491 | 		#print('angle', angle)
492 | 
493 | 
494 | 		sz = [box[3]-box[1], box[2]-box[0]]
495 | 	    
496 | 	   
497 | 	    	cntr = np.asarray([(box[1]+box[3])/2, (box[0]+box[2])/2])
498 | 
499 | 	    #print('Original RPN :', box)
500 | 
501 | 		T1 = [[1,0,0],[0,1,0],[-cntr[1],-cntr[0],1]]
502 | 		T2 = np.asarray([[np.cos(np.deg2rad(int(angle))), np.sin(np.deg2rad(int(angle))), 0],[-np.sin(np.deg2rad(int(angle))),np.cos(np.deg2rad(int(angle))),0],[0,0,1]])
503 | 		T3 = [[1,0,0],[0,1,0],[cntr[1],cntr[0],1]]
504 | 
505 | 
506 | 		T = np.dot(np.transpose(T3),np.dot(np.transpose(T2),np.transpose(T1)))
507 | 		#T = np.dot(np.transpose(T3),np.dot(np.transpose(T2),T1))
508 | 
509 | 	    
510 | 	      
511 | 		corner_pts = [[0,W-1,0,W-1],[0,0,H-1,H-1],[1,1,1,1]]
512 | 		#print('corner_pts', corner_pts)
513 | 
514 | 		trans_cpoints = np.dot(T[0:2,:],corner_pts)
515 | 
516 | 		xymin = trans_cpoints.min(1)
517 | 		xymax = trans_cpoints.max(1)
518 | 
519 | 		out_H = np.int32(xymax[1] - xymin[1] + 1)
520 | 		out_W = np.int32(xymax[0] - xymin[0] + 1)
521 | 		#print('out_W', out_W, out_H, W,H)
522 | 
523 | 		out_fmap_size = [0,0,out_W, out_H]
524 | 
525 | 		T4 = [[1,0,0],[0,1,0],[-xymin[0], -xymin[1],1]];
526 | 		#print T
527 | 		#print T4
528 | 
529 | 		T_final = np.dot(np.transpose(T4),T);
530 | 
531 | 		#print T_final
532 | 
533 | 		rect_pts = [ [box[0],box[2], box[0], box[2]] ,[box[1],box[1],box[3],box[3]],[1,1,1,1]]
534 | 
535 | 		trans_rpoints = np.dot(T_final[0:2,:],rect_pts)
536 | 
537 | 		rxymin = trans_rpoints.min(1)
538 | 		rxymax = trans_rpoints.max(1)
539 | 
540 | 		cropped_box = [np.int32(np.floor(rxymin[0])), np.int32(np.floor(rxymin[1])), np.int32(np.floor(rxymax[0])), np.int32(np.floor(rxymax[1]))]
541 | 		#print('cropped_box :', cropped_box)
542 | 		#print('trans_rpoints :', trans_rpoints)
543 | 	
544 | 
545 | 		# find coordinates for maximum area inscribed rectangle 
546 | 		intSec1, intSec2 = LinesIntersectionForLargestBox(trans_rpoints, np.array(rect_pts), angle)
547 | 		height_deltas = [intSec1[1]-cropped_box[1], cropped_box[3]-intSec2[1]]
548 | 		#print('height_deltas: ', height_deltas)
549 | 
550 | 		T11 = [[1,0,0],[0,1,0],[-rxymin[0],-rxymin[1],1]]
551 | 		T11 = np.transpose(T11)
552 | 
553 | 		rect_pts1 = [trans_rpoints[0], trans_rpoints[1],[1,1,1,1]]
554 | 		trans_rpoints = np.dot(T11[0:2,:],rect_pts1)
555 | 		rxymin1 = trans_rpoints.min(1)
556 | 		rxymax1 = trans_rpoints.max(1)
557 | 
558 | 		#print ('trans_rpoints00 : ', trans_rpoints)
559 | 		tr_rotated_box = [rxymin1[0], rxymin1[1]+height_deltas[0], rxymax1[0], rxymax1[1]-height_deltas[1]]
560 | 		#print ('box : ', box)
561 | 		#print ('tr_rotated_box : ', tr_rotated_box)
562 | 
563 | 		tr_rotated_box = [ik * 16 for ik in tr_rotated_box]
564 | 		#ross = [[0]+ il for il in rotated_rpns]
565 | 		tr_rotated_box_all.append([idx]+tr_rotated_box)
566 | 	
567 | 		transCurrent.append(T_final)
568 | 		transCurrent.append(T11)
569 | 	
570 | 		batch_grids = affine_grid_generator(out_H, out_W, T_final)
571 | 
572 | 		x_s = batch_grids[:,0, :, :]
573 | 		y_s = batch_grids[:,1, :, :]
574 | 
575 | 		out_fmap = bilinear_sampler_Interpol(input_fmap.copy(), x_s, y_s)
576 | 
577 | 	    	if cropped_box[0] < 0:
578 | 
579 | 	       		cropped_box[2] = int(cropped_box[2] - cropped_box[0])
580 | 	       		cropped_box[0] = int(cropped_box[0] - cropped_box[0])
581 | 
582 | 	    	if cropped_box[1] < 0:
583 | 
584 | 	       		cropped_box[3] = int(cropped_box[3] - cropped_box[1])
585 | 	       		cropped_box[1] = int(cropped_box[1] - cropped_box[1])
586 | 	    
587 | 		f_map = out_fmap[:, cropped_box[1]:cropped_box[3] , cropped_box[0]:cropped_box[2] ,:]
588 | 		#print('output_fmap', (f_map[0,:,:,:]).sum())
589 | 		outMap[idx, 0:f_map.shape[1], 0:f_map.shape[2], 0:f_map.shape[3]] = f_map
590 | 		#print('output_fmap1', (outMap[idx,:,:,:]).sum())
591 | 	    
592 | 	    #tr_rotated_box_all = np.array(tr_rotated_box_all)
593 | 	    #print('featureMap size : ', outMap.shape)
594 | 
595 | 		transApplied.append(transCurrent)
596 | 
597 |     return outMap, tr_rotated_box_all, transApplied, T_final
598 | 
599 | 
600 | 
601 | 
602 | 
603 | ###### backward ######
604 | 
605 | #def transformer_layer_fMapSep_backward(input_fmap, orient_scores, rpn_boxes, out_dims=None,  **kwargs):
606 | def transformer_layer_fMapSep_backward(input_grad, orient_scores, in_rpn_boxes, out_dims=None,  **kwargs):
607 |     
608 |     theta = [0, 90, 135, 45, 157.5, 112.5, 67.5, 22.5]
609 | 
610 |     B = np.shape(input_grad)[0]
611 |     H = np.shape(input_grad)[1]
612 |     W = np.shape(input_grad)[2]
613 |     C = np.shape(input_grad)[3]
614 |     print('widHig', B, H, W, C)
615 |     
616 | 
617 |     outMap = np.zeros((len(in_rpn_boxes), 102,102, input_grad.shape[3]), dtype = float)
618 |     #outMap = np.zeros((len(rpn_boxes), 50,50, input_fmap.shape[3]))
619 | 
620 | 
621 |     tr_rotated_box_all = []
622 |     transApplied = []
623 |     #ang1 = np.array(np.argmax(orient_scores, axis = 1))
624 |     #print('Im here :', 1*(ang1==0), 1*(ang1==1))
625 |     #idx0 = np.where(np.logical_or((ang1 == 0)*1,(ang1 == 1)*1))[0]
626 |     #print("len",len(in_rpn_boxes))
627 | 
628 |     for idx in range(0, len(in_rpn_boxes)):
629 | 	transCurrent = []
630 | 
631 | 	angle = theta[np.argmax(orient_scores[idx, :], axis = 0)]
632 | 	#print ("angle", angle)
633 | 
634 | 
635 | 
636 | 	if angle==0 or angle==90 :
637 | 	    #print ("input_grad.shape",input_grad.shape)
638 | 	    outMap[idx, 0:input_grad.shape[1], 0:input_grad.shape[2], 0:input_grad.shape[3]] = input_grad[idx, :, :, :]
639 | 	    #print rpn_boxes[idx,1:5], [idx]+[rpn_boxes[idx,1],rpn_boxes[idx,2], rpn_boxes[idx,3], rpn_boxes[idx,4]]
640 | 	    tr_rotated_box_all.append([idx]+[in_rpn_boxes[idx,1],in_rpn_boxes[idx,2], in_rpn_boxes[idx,3], in_rpn_boxes[idx,4]])
641 | 
642 | 	    T11 = [[1,0,0],[0,1,0],[0,0,1]]
643 | 	    transCurrent.append(T11)
644 | 	    transCurrent.append(T11)
645 | 	    transApplied.append(transCurrent)
646 | 
647 | 
648 | 
649 | 	    box = in_rpn_boxes[idx,1:5]/16		
650 | 
651 | 
652 | 	    sz = [box[3]-box[1], box[2]-box[0]]
653 | 
654 | 
655 | 	    cntr = np.asarray([(box[1]+box[3])/2, (box[0]+box[2])/2])
656 | 
657 | 
658 | 	    T1 = [[1,0,0],[0,1,0],[-cntr[1],-cntr[0],1]]
659 | 	    T2 = np.asarray([[np.cos(np.deg2rad(int(angle))), np.sin(np.deg2rad(int(angle))), 0],[-np.sin(np.deg2rad(int(angle))),np.cos(np.deg2rad(int(angle))),0],[0,0,1]])
660 | 	    T3 = [[1,0,0],[0,1,0],[cntr[1],cntr[0],1]]
661 | 
662 | 	    T = np.dot(np.transpose(T3),np.dot(np.transpose(T2),np.transpose(T1)))
663 | 
664 | 	    corner_pts = [[0,W-1,0,W-1],[0,0,H-1,H-1],[1,1,1,1]]
665 | 
666 | 	    trans_cpoints = np.dot(T[0:2,:],corner_pts)
667 | 
668 | 	    xymin = trans_cpoints.min(1)
669 | 	    xymax = trans_cpoints.max(1)
670 | 
671 | 	    out_H = np.int32(xymax[1] - xymin[1] + 1)
672 | 	    out_W = np.int32(xymax[0] - xymin[0] + 1)
673 | 
674 | 	    out_fmap_size = [0,0,out_W, out_H]
675 | 
676 | 	    T4 = [[1,0,0],[0,1,0],[-xymin[0], -xymin[1],1]];
677 | 
678 | 	    T_final = np.dot(np.transpose(T4),T);
679 | 	    T_final_inv = inv(T_final)
680 | 
681 | 
682 | 	else:
683 | 		if angle>90:
684 | 		    angle = angle-180
685 | 
686 | 		box = in_rpn_boxes[idx,1:5]/16		
687 | 		#print('angle', angle)
688 | 
689 | 
690 | 		sz = [box[3]-box[1], box[2]-box[0]]
691 | 	    
692 | 	   
693 | 	    	cntr = np.asarray([(box[1]+box[3])/2, (box[0]+box[2])/2])
694 | 
695 | 	    #print('Original RPN :', box)
696 | 
697 | 		T1 = [[1,0,0],[0,1,0],[-cntr[1],-cntr[0],1]]
698 | 		T2 = np.asarray([[np.cos(np.deg2rad(int(angle))), np.sin(np.deg2rad(int(angle))), 0],[-np.sin(np.deg2rad(int(angle))),np.cos(np.deg2rad(int(angle))),0],[0,0,1]])
699 | 		T3 = [[1,0,0],[0,1,0],[cntr[1],cntr[0],1]]
700 | 
701 | 
702 | 		T = np.dot(np.transpose(T3),np.dot(np.transpose(T2),np.transpose(T1)))
703 | 		#T = np.dot(np.transpose(T3),np.dot(np.transpose(T2),T1))
704 | 
705 | 		corner_pts = [[0,W-1,0,W-1],[0,0,H-1,H-1],[1,1,1,1]]
706 | 		#print('corner_pts', corner_pts)
707 | 
708 | 		trans_cpoints = np.dot(T[0:2,:],corner_pts)
709 | 
710 | 		xymin = trans_cpoints.min(1)
711 | 		xymax = trans_cpoints.max(1)
712 | 
713 | 		out_H = np.int32(xymax[1] - xymin[1] + 1)
714 | 		out_W = np.int32(xymax[0] - xymin[0] + 1)
715 | 		#print('out_W', out_W, out_H, W,H)
716 | 
717 | 		out_fmap_size = [0,0,out_W, out_H]
718 | 
719 | 		T4 = [[1,0,0],[0,1,0],[-xymin[0], -xymin[1],1]];
720 | 		#print T
721 | 		#print T4
722 | 
723 | 		T_final = np.dot(np.transpose(T4),T);
724 | 		T_final_inv = inv(T_final)
725 | 
726 | 		#print T_final
727 | 
728 | 		rect_pts = [ [box[0],box[2], box[0], box[2]] ,[box[1],box[1],box[3],box[3]],[1,1,1,1]]
729 | 
730 | 		trans_rpoints = np.dot(T_final_inv[0:2,:],rect_pts)
731 | 
732 | 		rxymin = trans_rpoints.min(1)
733 | 		rxymax = trans_rpoints.max(1)
734 | 
735 | 		cropped_box = [np.int32(np.floor(rxymin[0])), np.int32(np.floor(rxymin[1])), np.int32(np.floor(rxymax[0])), np.int32(np.floor(rxymax[1]))]
736 | 		#print('cropped_box :', cropped_box)
737 | 		#print('trans_rpoints :', trans_rpoints)
738 | 	
739 | 
740 | 		# find coordinates for maximum area inscribed rectangle 
741 | 		#intSec1, intSec2 = LinesIntersectionForLargestBox(trans_rpoints, np.array(rect_pts), angle)
742 | 		#height_deltas = [intSec1[1]-cropped_box[1], cropped_box[3]-intSec2[1]]
743 | 		#print('height_deltas: ', height_deltas)
744 | 
745 | 		T11 = [[1,0,0],[0,1,0],[-rxymin[0],-rxymin[1],1]]
746 | 		T11 = np.transpose(T11)
747 | 
748 | 		rect_pts1 = [trans_rpoints[0], trans_rpoints[1],[1,1,1,1]]
749 | 		trans_rpoints = np.dot(T11[0:2,:],rect_pts1)
750 | 		rxymin1 = trans_rpoints.min(1)
751 | 		rxymax1 = trans_rpoints.max(1)
752 | 
753 | 		#print ('trans_rpoints00 : ', trans_rpoints)
754 | 		#tr_rotated_box = [rxymin1[0], rxymin1[1]+height_deltas[0], rxymax1[0], rxymax1[1]-height_deltas[1]]
755 | 		tr_rotated_box = [rxymin1[0], rxymin1[1], rxymax1[0], rxymax1[1]]
756 | 		#print ('box : ', box)
757 | 		#print ('tr_rotated_box : ', tr_rotated_box)
758 | 
759 | 
760 | 		tr_rotated_box = [ik * 16 for ik in tr_rotated_box]
761 | 		#ross = [[0]+ il for il in rotated_rpns]
762 | 		tr_rotated_box_all.append([idx]+tr_rotated_box)
763 | 	
764 | 		transCurrent.append(T_final_inv)
765 | 		transCurrent.append(T11)
766 | 	
767 | 		batch_grids = affine_grid_generator(out_H, out_W, T_final_inv)
768 | 
769 | 		x_s = batch_grids[:,0, :, :]
770 | 		y_s = batch_grids[:,1, :, :]
771 | 		tup = np.reshape(input_grad[idx, :, :, :], (1,np.shape(input_grad)[1],np.shape(input_grad)[2],np.shape(input_grad)[3]))
772 | 		out_fmap = bilinear_sampler_Interpol(tup.copy(), x_s, y_s)
773 | 		#print ("tup.shape",tup.shape)
774 | 		#print (xyz)
775 | 
776 | 	    	if cropped_box[0] < 0:
777 | 
778 | 	       		cropped_box[2] = int(cropped_box[2] - cropped_box[0])
779 | 	       		cropped_box[0] = int(cropped_box[0] - cropped_box[0])
780 | 
781 | 	    	if cropped_box[1] < 0:
782 | 
783 | 	       		cropped_box[3] = int(cropped_box[3] - cropped_box[1])
784 | 	       		cropped_box[1] = int(cropped_box[1] - cropped_box[1])
785 | 	    
786 | 		f_map = out_fmap[:, cropped_box[1]:cropped_box[3] , cropped_box[0]:cropped_box[2] ,:]
787 | 		#print('output_fmap', (f_map[0,:,:,:]).sum())
788 | 		outMap[idx, 0:f_map.shape[1], 0:f_map.shape[2], 0:f_map.shape[3]] = f_map
789 | 		#print('output_fmap1', (outMap[idx,:,:,:]).sum())
790 | 	    
791 | 	    #tr_rotated_box_all = np.array(tr_rotated_box_all)
792 | 	    #print('featureMap size : ', outMap.shape)
793 | 
794 | 		transApplied.append(transCurrent)
795 | 
796 |     return outMap, tr_rotated_box_all, transApplied
797 | 


--------------------------------------------------------------------------------
/images/flow_diagram_web.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akhtarvision/orientation-aware-firearm-detection/2ebf283bc4baf993feb31f6fcb01cdd5bf2bce9e/images/flow_diagram_web.jpg


--------------------------------------------------------------------------------
/images/more_results_web.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akhtarvision/orientation-aware-firearm-detection/2ebf283bc4baf993feb31f6fcb01cdd5bf2bce9e/images/more_results_web.jpg


--------------------------------------------------------------------------------
/makebboxproposals.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Written by CVML
  3 | # --------------------------------------------------------
  4 | 
  5 | 
  6 | 
  7 | from fast_rcnn.config import cfg
  8 | from fast_rcnn.bbox_transform import clip_boxes, bbox_transform_inv
  9 | import argparse
 10 | from utils.timer import Timer
 11 | import numpy as np
 12 | import cv2
 13 | from numpy.linalg import inv
 14 | import caffe
 15 | from utils.blob import im_list_to_blob
 16 | import os
 17 | import matplotlib.pyplot as plt
 18 | import gc
 19 | import yaml
 20 | 
 21 | 
 22 | DEBUG = False
 23 | 
 24 | class makebBox_regionProposal(caffe.Layer):
 25 |     """
 26 |     transforms feature map and corresponding bouning boxes with respect to angle
 27 |     """
 28 | 
 29 |     def setup(self, bottom, top):
 30 | 
 31 | 	pass
 32 | 
 33 |     def forward(self, bottom, top):
 34 | 
 35 | 
 36 |         #assert bottom[0].data.shape[0] == 1, \
 37 |         #    'Only single item batches are supported'
 38 | 
 39 | 
 40 |         # the first set of _num_anchors channels are bg probs
 41 |         # the second set are the fg probs, which we want
 42 |         rpn_boxes_Actual = bottom[0].data
 43 |         box_deltas = bottom[1].data
 44 | 	im_info = bottom[2].data
 45 | 	scores = bottom[3].data
 46 | 
 47 | 
 48 | 	#print('im_info : ', im_info)
 49 | 
 50 | 	im_scales = im_info[0][2]
 51 | 
 52 | 	im_shape = np.array([im_info[0][0], im_info[0][1]]) / im_scales
 53 | 	#print('conv_feat : ', conv_feat.shape)
 54 | 	#print('rpnBoxes : ', rpn_boxes.shape)
 55 | 	
 56 | 
 57 | 	#for idx in range(len(cls_idx)):
 58 | 		#cls_boxes = final_boxes[inds, j*4:(j+1)*4]
 59 | 
 60 | 
 61 | 	rpn_boxes = rpn_boxes_Actual[:, 1:5] / im_scales
 62 | 	pred_boxes = bbox_transform_inv(rpn_boxes, box_deltas)
 63 |         pred_boxes = clip_boxes(pred_boxes, im_shape)
 64 | 
 65 | 	cls_idx = np.argmax(scores, axis = 1)
 66 | 
 67 | 	#print('cls_idx', cls_idx.shape, cls_idx )
 68 | 
 69 | 	#cls_idx = cls_idx.reshape(len(cls_idx), 1)
 70 | 	#print('cls_idx', cls_idx.shape)
 71 | 	#pred_boxes = pred_boxes[:, cls_idx*4:(cls_idx+1)*4]
 72 | 	temp = np.zeros((len(cls_idx), 5))
 73 | 
 74 | 	for idx in range(len(cls_idx)):
 75 | 		#print(cls_idx[idx])
 76 | 		temp[idx,1:] = pred_boxes[idx, cls_idx[idx]*4:(cls_idx[idx]+1)*4]
 77 | 
 78 | 	
 79 | 	pred_boxes = temp * im_scales
 80 | 	#addd = cls_idx >0
 81 | 	#print('Compare :', rpn_boxes[cls_idx>0,:], temp[cls_idx>0,:])
 82 | 
 83 | 	#rpn_boxes_Actual[:,1:5] = pred_boxes
 84 | 
 85 | 	top[0].reshape(*(pred_boxes.shape))
 86 |         top[0].data[...] = pred_boxes
 87 | 
 88 |     def backward(self, top, propagate_down, bottom):
 89 |         """This layer does not propagate gradients."""
 90 |         pass
 91 | 
 92 |     def reshape(self, bottom, top):
 93 |         """Reshaping happens during the call to forward."""
 94 | 	top[0].reshape(*bottom[0].shape)
 95 | 	#top[1].reshape(*bottom[1].shape)
 96 |         #pass
 97 | 
 98 | 
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/tools/demo_firearms.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Faster R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | 
 11 | # Modified by CVML group @ITU- Punjab
 12 | """
 13 | Demo script showing detections in sample images.
 14 | 
 15 | See README.md for installation instructions before running.
 16 | """
 17 | 
 18 | import _init_paths
 19 | from fast_rcnn.config import cfg
 20 | from fast_rcnn.test import im_detect
 21 | from fast_rcnn.nms_wrapper import nms
 22 | from utils.timer import Timer
 23 | import matplotlib.pyplot as plt
 24 | import numpy as np
 25 | import scipy.io as sio
 26 | import caffe, os, sys, cv2
 27 | import argparse
 28 | from utils.blob import im_list_to_blob
 29 | from numpy.linalg import inv
 30 | 
 31 | CLASSES = ('__background__',
 32 |            'Gun','Riffle')
 33 | 
 34 | NETS = {'vgg16': ('VGG16',
 35 |                   'vgg16_fast_rcnn_cascade_firearms_iter_60000.caffemodel'),
 36 |         'zf': ('ZF',
 37 |                   'ZF_faster_rcnn_final.caffemodel')}
 38 | 
 39 | 
 40 | 
 41 | def vis_detections_final(im, class_name, all_final_boxes,thresh, cntG,cntR, cG, cR, rpn_sscores, rpn_bo, all_final_boxes_rotated):
 42 |     """Visual debugging of detections."""
 43 |     #print 'i am in visualizer'
 44 |     #print len(all_final_boxes)
 45 | 
 46 |     #fig, ax = plt.subplots(figsize=(12, 12))
 47 |     #ax.imshow(im, aspect='equal')
 48 | 
 49 | 
 50 |     boxes = all_final_boxes[:,:4]
 51 |     scores = all_final_boxes[:,4]
 52 |     scor = all_final_boxes[:,10]  
 53 |     rpnns =   all_final_boxes[:,6:10] 
 54 | 
 55 |     xAll = all_final_boxes_rotated[:,:4]
 56 |     yAll = all_final_boxes_rotated[:,4:8]
 57 | 
 58 |     orient_class = all_final_boxes[:,5]
 59 |     s=[]
 60 |     for i in xrange(len(scores)):
 61 |     	
 62 | 	bbox = map(int, boxes[i,:])
 63 | 	#rpn_bo = map(int, rpnns[i,:])
 64 | 	score = scores[i]
 65 | 	orient_cls = orient_class[i]
 66 | 	rpn_s = scor[i]
 67 | 
 68 | 
 69 | 	if score > thresh:
 70 | 
 71 |     		txt = class_name + ': ' + str(orient_cls) + ': ' + str(score)
 72 | 
 73 | 		s.append(score)
 74 | 
 75 | 		pts = np.array([[xAll[i,0],yAll[i,0]],[xAll[i,1],yAll[i,1]],[xAll[i,3],yAll[i,3]],[xAll[i,2],yAll[i,2]]], np.int32)
 76 | 		#cv2.polylines(im, [pts],True,(0,255,255), 2)
 77 | 		#cv2.polylines(im, [pts],True,(128,0,255), 2) #voilet like
 78 | 		cv2.polylines(im, [pts],True,(147, 20,255), 6) # pink like
 79 | 
 80 |     if s:
 81 | 
 82 | 
 83 | 	    if (class_name == 'Gun'):
 84 | 		cntG = max(s)+cntG
 85 | 		cG=cG+1
 86 | 
 87 | 	    if (class_name == 'Riffle'):
 88 | 		cntR = max(s)+cntR
 89 | 		cR=cR+1
 90 | 
 91 |     #print (cntG,cntR)
 92 |     #print (cG,cR)
 93 | 
 94 |     return im,cntG,cntR, cG, cR
 95 |     #return im
 96 | 
 97 | 
 98 | 
 99 | def _get_image_blob(im):
100 |     """Converts an image into a network input.
101 | 
102 |     Arguments:
103 |         im (ndarray): a color image in BGR order
104 | 
105 |     Returns:
106 |         blob (ndarray): a data blob holding an image pyramid
107 |         im_scale_factors (list): list of image scales (relative to im) used
108 |             in the image pyramid
109 |     """
110 |     im_orig = im.astype(np.float32, copy=True)
111 |     im_orig -= cfg.PIXEL_MEANS
112 | 
113 |     im_shape = im_orig.shape
114 |     im_size_min = np.min(im_shape[0:2])
115 |     im_size_max = np.max(im_shape[0:2])
116 | 
117 |     processed_ims = []
118 |     im_scale_factors = []
119 | 
120 |     for target_size in cfg.TEST.SCALES:
121 |         im_scale = float(target_size) / float(im_size_min)
122 |         # Prevent the biggest axis from being more than MAX_SIZE
123 |         if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
124 |             im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
125 |         im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
126 |                         interpolation=cv2.INTER_LINEAR)
127 |         im_scale_factors.append(im_scale)
128 |         processed_ims.append(im)
129 | 
130 |     # Create a blob to hold the input images
131 |     blob = im_list_to_blob(processed_ims)
132 | 
133 |     return blob, np.array(im_scale_factors)
134 | 
135 | def _get_blobs(im, rois):
136 |     """Convert an image and RoIs within that image into network inputs."""
137 |     blobs = {'data' : None, 'rois' : None}
138 |     blobs['data'], im_scale_factors = _get_image_blob(im)
139 |     #if not cfg.TEST.HAS_RPN:
140 |         #blobs['rois'] = _get_rois_blob(rois, im_scale_factors)
141 | 	#print ('lll: ', blobs['rois'])
142 |     return blobs, im_scale_factors
143 | 
144 | def demo(net, image_name):
145 |     """Detect object classes in an image using pre-computed object proposals."""
146 | 
147 |     num_images = 1
148 |     foldername = '/media/akhtar/6D2C8F896B2F79E0/Projects/py-faster-rcnn-master/data/output_images_detected/' 
149 |     foldername_all = '/home/itu/faster-rcnn-1070/data/output_images_all/'
150 |     thresh=0.05
151 |     max_per_image=100
152 | 
153 |     all_boxes = [[] for _ in xrange(num_images)]
154 | 
155 |     ntopProp = [300]
156 | 
157 | 
158 |     theta = [0, 90, 135, 45, 157.5, 112.5, 67.5, 22.5]
159 | 
160 |     for t in xrange(0,len(ntopProp)):
161 |     	#output_dir = get_output_dir(imdb, net)
162 | 
163 | 
164 | 
165 |     	if not cfg.TEST.HAS_RPN:
166 |         	roidb = imdb.roidb
167 | 
168 |     	all_final_boxes = [[[] for _ in xrange(num_images)]
169 |                  	for _ in xrange(3)]
170 | 
171 | 	all_final_boxes_rotated = [[[] for _ in xrange(num_images)]
172 |                  	for _ in xrange(3)]
173 | 
174 |     	all_rpn_boxes = [[[] for _ in xrange(num_images)]
175 |                  	for _ in xrange(1)]
176 | 
177 | 	#print('all_final_boxes_rotated :', all_final_boxes_rotated)
178 | 	cntG = 0
179 | 	cntR = 0
180 | 	cG = 0
181 | 	cR = 0
182 | 
183 |     	for i in xrange(num_images):
184 | 		# filter out any ground truth boxes
185 | 		if cfg.TEST.HAS_RPN:
186 | 	    		box_proposals = None
187 | 		else:
188 | 	    	# The roidb may contain ground-truth rois (for example, if the roidb
189 | 	    	# comes from the training or val split). We only want to evaluate
190 | 	    	# detection on the *non*-ground-truth rois. We select those the rois
191 | 	    	# that have the gt_classes field set to 0, which means there's no
192 | 	    	# ground truth.
193 | 	    		box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]
194 | 
195 | 
196 | 
197 | 	    # Load the demo image
198 | 		im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
199 | 	
200 | 		im = cv2.imread(im_file)
201 | 
202 | 
203 | 
204 | 
205 | 		rpn_boxes, rpn_scores, final_boxes, final_scores, orient_score, final_boxes1, final_scores1, transApplied = im_detect(net, im, box_proposals, True)
206 | 
207 | 		if ntopProp[t] == 300:
208 | 			if len(rpn_scores) > 299:
209 | 				rpn_boxes = rpn_boxes[0:ntopProp[t],:]
210 | 				rpn_scores = rpn_scores[0:ntopProp[t],:]
211 | 				final_boxes = final_boxes[0:ntopProp[t],:]
212 | 				final_scores = final_scores[0:ntopProp[t],:]
213 | 				orient_scores = orient_score[0:ntopProp[t],:]
214 | 				final_boxes1 = final_boxes1[0:ntopProp[t],:]
215 | 				final_scores1 = final_scores1[0:ntopProp[t],:]
216 | 				transApplied = transApplied[0:ntopProp[t],:,:,:]
217 | 		else:	
218 | 			rpn_boxes = rpn_boxes[0:ntopProp[t],:]
219 | 			rpn_scores = rpn_scores[0:ntopProp[t],:]
220 | 			final_boxes = final_boxes[0:ntopProp[t],:]
221 | 			final_scores = final_scores[0:ntopProp[t],:]
222 | 			orient_scores = orient_score[0:ntopProp[t],:]
223 | 			final_boxes1 = final_boxes1[0:ntopProp[t],:]
224 | 			final_scores1 = final_scores1[0:ntopProp[t],:]
225 | 			transApplied = transApplied[0:ntopProp[t],:,:,:]
226 | 
227 | 		temp_boxes = None
228 | 		blobs, im_scales = _get_blobs(im, temp_boxes)
229 | 
230 | 		rotatedBoxesAll = np.zeros((len(rpn_boxes), 3,2,4))
231 | 
232 | 		for iii in range(0, len(rpn_boxes)):
233 | 			final_boxes_tr = final_boxes1[iii,:]
234 | 			#print('final_boxes_tr :', final_boxes_tr)
235 | 			final_boxes_tr = ((final_boxes_tr * im_scales[0]) / 16)
236 | 
237 | 			final_boxes_tr = trans_box1(final_boxes_tr,transApplied[iii,0,:,:],transApplied[iii,1,:,:])
238 | 	
239 | 			final_boxes_tr = ((final_boxes_tr * 16) / im_scales[0])
240 | 
241 | 			rotatedBoxesAll[iii, :,:,:] = final_boxes_tr[0,:,:,:]
242 | 	
243 | 
244 | 		rpn_dets = np.hstack((rpn_boxes, rpn_scores)) \
245 | 			.astype(np.float32, copy=False)
246 | 		#all_rpn_boxes[0][i] = rpn_dets
247 | 
248 | 
249 | 		#_t['misc'].tic()
250 | 
251 | 		maxScore = final_scores1
252 | 		for j in xrange(1, 3):
253 | 
254 | 			inds = np.where(maxScore[:, j] > thresh)[0]
255 | 			cls_scores = maxScore[inds, j]
256 | 			cls_boxes = final_boxes[inds, j*4:(j+1)*4]
257 | 			cls_orient = np.argmax(orient_score[inds, :], axis = 1)
258 | 			rpn_bboxes = rpn_boxes[inds,:]
259 | 			rpn_sscores = rpn_scores[inds]
260 | 	
261 | 			cls_scores1 = final_scores[inds, j]
262 | 
263 | 			rotatedBoxesClass = np.hstack((rotatedBoxesAll[inds,j,0,:], rotatedBoxesAll[inds,j,1,:])).astype(np.float32, copy=False)
264 | 			#print('rotatedBoxesClass :', rotatedBoxesClass.shape)
265 | 
266 | 			cls_dets_temp_rotated = np.hstack((rotatedBoxesAll[inds,j,0,:], rotatedBoxesAll[inds,j,1,:], cls_scores[:, np.newaxis])) \
267 | 				.astype(np.float32, copy=False)
268 | 
269 | 
270 | 	    		cls_dets_temp = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
271 | 				.astype(np.float32, copy=False)
272 | 
273 | 			#print('cls_dets_temp', cls_dets_temp.shape)
274 | 
275 | 	   		cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], cls_orient[:, np.newaxis], rpn_bboxes, rpn_sscores)) \
276 | 				.astype(np.float32, copy=False)
277 | 
278 | 
279 | 	    		keep = nms(cls_dets_temp, cfg.TEST.NMS)
280 | 			#keep = nms(cls_dets_temp, 0.3)
281 | 	
282 | 			cls_dets = cls_dets[keep, :]
283 | 			rotatedBoxesClass = rotatedBoxesClass[keep, :]
284 | 	
285 | 
286 | 	    		all_final_boxes[j][i] = cls_dets
287 | 			all_final_boxes_rotated[j][i] = rotatedBoxesClass
288 | 
289 | 		if max_per_image > 0:
290 | 	    		image_scores = np.hstack([all_final_boxes[j][i][:, 4]
291 | 			  		            for j in xrange(1, 3)])
292 | 
293 | 	    		if len(image_scores) > max_per_image:
294 | 				image_thresh = np.sort(image_scores)[-max_per_image]
295 | 				for j in xrange(1, 3):
296 | 		   			keep = np.where(all_final_boxes[j][i][:, -1] >= image_thresh)[0]
297 | 		    			all_final_boxes[j][i] = all_final_boxes[j][i][keep, :]
298 | 					all_final_boxes_rotated[j][i] = all_final_boxes_rotated[j][i][keep, :]
299 | 
300 | 	
301 | 		for j in xrange(1, 3):
302 | 
303 | 			rpn_bo = np.array([208, 58, 2243, 1094])
304 | 
305 | 
306 | 			im,cntG,cntR, cG, cR = vis_detections_final(im, CLASSES[j], all_final_boxes[j][i], 0.75, cntG,cntR, cG, cR, rpn_sscores, rpn_bo, all_final_boxes_rotated[j][i])
307 | 
308 | 
309 | 		print ('check: ',os.path.join(cfg.DATA_DIR, 'demo', 're_'+image_name))
310 | 		cv2.imwrite(os.path.join(cfg.DATA_DIR, 'demo', 're_'+image_name), im)
311 | 		    
312 | 
313 | 
314 | def trans_box1(final_boxes,T_final, T11):
315 | 	final_boxes = final_boxes.reshape(1,12)
316 | 	final_boxes_final = np.zeros((len(final_boxes),3, 2,4))
317 | 
318 | 	for k in range(0, len(final_boxes)):
319 | 
320 | 		class1 = final_boxes[k,0:4]
321 | 		class2 = final_boxes[k,4:8]
322 | 		class3 = final_boxes[k,8:12]
323 | 
324 | 		box1 = [ class1[0] , class1[1] , class1[2] , class1[3] ]
325 | 		box2 = [ class2[0] , class2[1] , class2[2] , class2[3] ] 
326 | 		box3 = [ class3[0] , class3[1] , class3[2] , class3[3] ]
327 | 
328 | 		class1_out = trans_layer1(T_final, T11, box1)
329 | 		class2_out = trans_layer1(T_final, T11, box2)
330 | 		class3_out = trans_layer1(T_final, T11, box3)
331 | 
332 | 		final_boxes_final[k,0,:,:] = class1_out
333 | 		final_boxes_final[k,1,:,:] = class2_out
334 | 		final_boxes_final[k,2,:,:] = class2_out
335 | 
336 | 		return final_boxes_final
337 | 
338 | def trans_layer1(T_final,T11, final_b):
339 | 
340 | 	nT0 = inv(T11)
341 | 	ncorner_pts = [[final_b[0],final_b[2],final_b[0],final_b[2]],[final_b[1],final_b[1],final_b[3],final_b[3]],[1,1,1,1]]
342 | 	nboxx = np.dot(nT0[0:2,:],ncorner_pts)
343 | 	rxymin_nb = nboxx.min(1)
344 | 	rxymax_nb = nboxx.max(1)
345 | 
346 | 	T2 = inv(T_final)
347 | 	boxx2 = np.dot(T2[0:2,:],[nboxx[0], nboxx[1],[1,1,1,1]])
348 | 
349 | 	return boxx2
350 | 
351 | 
352 | def parse_args():
353 |     """Parse input arguments."""
354 |     parser = argparse.ArgumentParser(description='Faster R-CNN demo')
355 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
356 |                         default=0, type=int)
357 |     parser.add_argument('--cpu', dest='cpu_mode',
358 |                         help='Use CPU mode (overrides --gpu)',
359 |                         action='store_true')
360 |     parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
361 |                         choices=NETS.keys(), default='vgg16')
362 | 
363 |     args = parser.parse_args()
364 | 
365 |     return args
366 | 
367 | if __name__ == '__main__':
368 |     cfg.TEST.HAS_RPN = True  # Use RPN for proposals
369 | 
370 |     args = parse_args()
371 | 
372 |     prototxt = os.path.join(cfg.MODELS_DIR, NETS[args.demo_net][0],
373 |                             'faster_rcnn_alt_opt', 'faster_rcnn_test.pt')
374 |     print ('prototxt: ',prototxt)
375 |     caffemodel = os.path.join(cfg.DATA_DIR, 'faster_rcnn_models',
376 |                               NETS[args.demo_net][1])
377 |     print ('caffemodel: ',caffemodel)
378 | 
379 |     if not os.path.isfile(caffemodel):
380 |         raise IOError(('{:s} not found.\nDid you run ./data/script/'
381 |                        'fetch_faster_rcnn_models.sh?').format(caffemodel))
382 | 
383 |     if args.cpu_mode:
384 |         caffe.set_mode_cpu()
385 |     else:
386 |         caffe.set_mode_gpu()
387 |         caffe.set_device(args.gpu_id)
388 |         cfg.GPU_ID = args.gpu_id
389 |     net = caffe.Net(prototxt, caffemodel, caffe.TEST)
390 | 
391 |     print '\n\nLoaded network {:s}'.format(caffemodel)
392 | 
393 | 
394 |     im_names = ['north+korea+army_38.png']
395 |     for im_name in im_names:
396 |         print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
397 |         print 'Demo for data/demo/{}'.format(im_name)
398 |         demo(net, im_name)
399 | 


--------------------------------------------------------------------------------