├── .gitattributes ├── LICENSE ├── README.md ├── demo ├── Agent+Carter+Season+2+gun+rifle+scenes_85.png ├── Division+The+gun+rifle+scenes_2.png ├── Downfall+movie+gun+rifle+scenes_67.png ├── Salvador+movie+gun+rifle+scenes_40.png └── north+korea+army_38.png ├── fast_rcnn ├── config.py └── test.py ├── faster_rcnn_test.pt ├── ftMap_Warp_2.py ├── ftmap_transform.py ├── images ├── flow_diagram_web.jpg └── more_results_web.jpg ├── makebboxproposals.py └── tools └── demo_firearms.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 makhtar17004 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Orientation Aware Object Detection with Applications to Firearms 2 | 3 | Automatic detection of firearms is important for enhancing security and safety of people, however, it is a challenging task owing to the wide variations in shape, size and appearance of firearms. Viewing angle variations and occlusions by the weapon’s carrier and the surrounding people, further increases the difficulty of the task. Moreover, the existing object detectors process rectangular areas, though a thin and long rifle may actually cover only a small percentage of that area and the rest may contain irrelevant details suppressing the required object signatures. To handle these challenges we propose an Orientation Aware Object Detector (OAOD) which has achieved improved firearm detection and localization performance. 4 | 5 | ![alt text](https://github.com/makhtar17004/orientation-aware-firearm-detection/blob/master/images/flow_diagram_web.jpg) 6 | 7 | 8 | 9 | # Instructions 10 | 11 | This code is modified using Faster RCNN. We made the two phases in Faster RCNN by adopting cascade approach. Please see the setup details of Faster RCNN [here](https://github.com/rbgirshick/py-faster-rcnn). This will assist in runnig our model. 12 | 13 | We provide necessaery files to run the test script only using our model. Download our model from this [link](https://drive.google.com/file/d/1ShZoCTfoBga9j0y-GPINOFgdf1x8Ti9t/view?usp=sharing). Put it into 14 | 15 | ```.../data/faster_rcnn_models directory``` 16 | 17 | Replace the cfg, test in fast_rcnn folder. Also replace the prototxt file for test with the provided one. Also put images in 18 | 19 | ```.../data/demo folder``` 20 | 21 | 22 | After installation and setup, to run the test file. Place it into .../tools directory: 23 | 24 | ```python demo_firearms.py``` 25 | 26 | 27 | 28 | 29 | # Results: 30 | 31 | ![alt text](https://github.com/makhtar17004/orientation-aware-firearm-detection/blob/master/images/more_results_web.jpg) 32 | 33 | 34 | # Paper and Model Link 35 | 36 | Here is the arXiv link: https://arxiv.org/abs/1904.10032 37 | 38 | Here is the web-link: http://im.itu.edu.pk/orientation-aware-firearms-detection/ 39 | 40 | Trained model: [link](https://drive.google.com/file/d/1ShZoCTfoBga9j0y-GPINOFgdf1x8Ti9t/view?usp=sharing) 41 | 42 | # DATASET 43 | 44 | [DATASET is available upon request [Google Form]](https://forms.gle/t3dS5g5JQdfPoSvn9) 45 | 46 | 47 | 48 | BIBTEX: 49 | 50 | ``` 51 | @article{oaod2021neuro, 52 | title={Leveraging orientation for weakly supervised object detection with application to firearm localization}, 53 | author={Iqbal, Javed and Munir, Muhammad Akhtar and Mahmood, Arif and Ali, Afsheen Rafaqat and Ali, Mohsen}, 54 | journal={Neurocomputing}, 55 | volume={440}, 56 | pages={310--320}, 57 | year={2021}, 58 | publisher={Elsevier} 59 | } 60 | ``` 61 | 62 | 63 | -------------------------------------------------------------------------------- /demo/Agent+Carter+Season+2+gun+rifle+scenes_85.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akhtarvision/orientation-aware-firearm-detection/2ebf283bc4baf993feb31f6fcb01cdd5bf2bce9e/demo/Agent+Carter+Season+2+gun+rifle+scenes_85.png -------------------------------------------------------------------------------- /demo/Division+The+gun+rifle+scenes_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akhtarvision/orientation-aware-firearm-detection/2ebf283bc4baf993feb31f6fcb01cdd5bf2bce9e/demo/Division+The+gun+rifle+scenes_2.png -------------------------------------------------------------------------------- /demo/Downfall+movie+gun+rifle+scenes_67.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akhtarvision/orientation-aware-firearm-detection/2ebf283bc4baf993feb31f6fcb01cdd5bf2bce9e/demo/Downfall+movie+gun+rifle+scenes_67.png -------------------------------------------------------------------------------- /demo/Salvador+movie+gun+rifle+scenes_40.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akhtarvision/orientation-aware-firearm-detection/2ebf283bc4baf993feb31f6fcb01cdd5bf2bce9e/demo/Salvador+movie+gun+rifle+scenes_40.png -------------------------------------------------------------------------------- /demo/north+korea+army_38.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akhtarvision/orientation-aware-firearm-detection/2ebf283bc4baf993feb31f6fcb01cdd5bf2bce9e/demo/north+korea+army_38.png -------------------------------------------------------------------------------- /fast_rcnn/config.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Fast R-CNN config system. 9 | 10 | This file specifies default config options for Fast R-CNN. You should not 11 | change values in this file. Instead, you should write a config file (in yaml) 12 | and use cfg_from_file(yaml_file) to load it and override the default options. 13 | 14 | Most tools in $ROOT/tools take a --cfg option to specify an override file. 15 | - See tools/{train,test}_net.py for example code that uses cfg_from_file() 16 | - See experiments/cfgs/*.yml for example YAML config override files 17 | """ 18 | 19 | import os 20 | import os.path as osp 21 | import numpy as np 22 | # `pip install easydict` if you don't have it 23 | from easydict import EasyDict as edict 24 | 25 | __C = edict() 26 | # Consumers can get config by: 27 | # from fast_rcnn_config import cfg 28 | cfg = __C 29 | 30 | # 31 | # Training options 32 | # 33 | 34 | __C.TRAIN = edict() 35 | 36 | # Scales to use during training (can list multiple scales) 37 | # Each scale is the pixel size of an image's shortest side 38 | #__C.TRAIN.SCALES = (600,) 39 | __C.TRAIN.SCALES = (480,) 40 | 41 | # Max pixel size of the longest side of a scaled input image 42 | #__C.TRAIN.MAX_SIZE = 1000 43 | __C.TRAIN.MAX_SIZE = 800 44 | 45 | # Images to use per minibatch 46 | __C.TRAIN.IMS_PER_BATCH = 1#2 47 | 48 | # Minibatch size (number of regions of interest [ROIs]) 49 | __C.TRAIN.BATCH_SIZE = 48#128 50 | 51 | # Fraction of minibatch that is labeled foreground (i.e. class > 0) 52 | __C.TRAIN.FG_FRACTION = 0.25 53 | 54 | # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) 55 | __C.TRAIN.FG_THRESH = 0.5 56 | 57 | # Overlap threshold for a ROI to be considered background (class = 0 if 58 | # overlap in [LO, HI)) 59 | __C.TRAIN.BG_THRESH_HI = 0.5 60 | __C.TRAIN.BG_THRESH_LO = 0.1 61 | 62 | # Use horizontally-flipped images during training? 63 | __C.TRAIN.USE_FLIPPED = True 64 | 65 | # Train bounding-box regressors 66 | __C.TRAIN.BBOX_REG = True 67 | 68 | # Overlap required between a ROI and ground-truth box in order for that ROI to 69 | # be used as a bounding-box regression training example 70 | __C.TRAIN.BBOX_THRESH = 0.5 71 | 72 | # Iterations between snapshots 73 | #__C.TRAIN.SNAPSHOT_ITERS = 10000 74 | __C.TRAIN.SNAPSHOT_ITERS = 20000 75 | 76 | # solver.prototxt specifies the snapshot path prefix, this adds an optional 77 | # infix to yield the path: [_]_iters_XYZ.caffemodel 78 | __C.TRAIN.SNAPSHOT_INFIX = '' 79 | 80 | # Use a prefetch thread in roi_data_layer.layer 81 | # So far I haven't found this useful; likely more engineering work is required 82 | __C.TRAIN.USE_PREFETCH = False 83 | 84 | # Normalize the targets (subtract empirical mean, divide by empirical stddev) 85 | #__C.TRAIN.BBOX_NORMALIZE_TARGETS = True 86 | __C.TRAIN.BBOX_NORMALIZE_TARGETS = False 87 | # Deprecated (inside weights) 88 | __C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) 89 | # Normalize the targets using "precomputed" (or made up) means and stdevs 90 | # (BBOX_NORMALIZE_TARGETS must also be True) 91 | __C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = False 92 | __C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 93 | __C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 94 | 95 | # Train using these proposals 96 | __C.TRAIN.PROPOSAL_METHOD = 'selective_search' 97 | 98 | # Make minibatches from images that have similar aspect ratios (i.e. both 99 | # tall and thin or both short and wide) in order to avoid wasting computation 100 | # on zero-padding. 101 | __C.TRAIN.ASPECT_GROUPING = True 102 | 103 | # Use RPN to detect objects 104 | __C.TRAIN.HAS_RPN = False 105 | # IOU >= thresh: positive example 106 | __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7 107 | # IOU < thresh: negative example 108 | __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 109 | # If an anchor statisfied by positive and negative conditions set to negative 110 | __C.TRAIN.RPN_CLOBBER_POSITIVES = False 111 | # Max number of foreground examples 112 | __C.TRAIN.RPN_FG_FRACTION = 0.5 113 | # Total number of examples 114 | __C.TRAIN.RPN_BATCHSIZE = 256 115 | # NMS threshold used on RPN proposals 116 | __C.TRAIN.RPN_NMS_THRESH = 0.7 117 | # Number of top scoring boxes to keep before apply NMS to RPN proposals 118 | __C.TRAIN.RPN_PRE_NMS_TOP_N = 12000 119 | # Number of top scoring boxes to keep after applying NMS to RPN proposals 120 | __C.TRAIN.RPN_POST_NMS_TOP_N = 2000 121 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) 122 | __C.TRAIN.RPN_MIN_SIZE = 16 123 | # Deprecated (outside weights) 124 | __C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) 125 | # Give the positive RPN examples weight of p * 1 / {num positives} 126 | # and give negatives a weight of (1 - p) 127 | # Set to -1.0 to use uniform example weighting 128 | __C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0 129 | 130 | 131 | # 132 | # Testing options 133 | # 134 | 135 | __C.TEST = edict() 136 | 137 | # Scales to use during testing (can list multiple scales) 138 | # Each scale is the pixel size of an image's shortest side 139 | #__C.TEST.SCALES = (600,) 140 | __C.TEST.SCALES = (480,) 141 | 142 | # Max pixel size of the longest side of a scaled input image 143 | __C.TEST.MAX_SIZE = 800#1000 144 | #__C.TEST.MAX_SIZE = 1000 145 | 146 | # Overlap threshold used for non-maximum suppression (suppress boxes with 147 | # IoU >= this threshold) 148 | __C.TEST.NMS = 0.3 #0.3 149 | 150 | # Experimental: treat the (K+1) units in the cls_score layer as linear 151 | # predictors (trained, eg, with one-vs-rest SVMs). 152 | __C.TEST.SVM = False 153 | 154 | # Test using bounding-box regressors 155 | __C.TEST.BBOX_REG = True 156 | 157 | # Propose boxes 158 | __C.TEST.HAS_RPN = False 159 | 160 | # Test using these proposals 161 | __C.TEST.PROPOSAL_METHOD = 'selective_search' 162 | 163 | ## NMS threshold used on RPN proposals 164 | __C.TEST.RPN_NMS_THRESH = 0.7 #0.7 165 | ## Number of top scoring boxes to keep before apply NMS to RPN proposals 166 | __C.TEST.RPN_PRE_NMS_TOP_N = 6000 167 | ## Number of top scoring boxes to keep after applying NMS to RPN proposals 168 | __C.TEST.RPN_POST_NMS_TOP_N = 200 169 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) 170 | __C.TEST.RPN_MIN_SIZE = 16 171 | 172 | 173 | # 174 | # MISC 175 | # 176 | 177 | # The mapping from image coordinates to feature map coordinates might cause 178 | # some boxes that are distinct in image space to become identical in feature 179 | # coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor 180 | # for identifying duplicate boxes. 181 | # 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16 182 | __C.DEDUP_BOXES = 1./16. 183 | 184 | # Pixel mean values (BGR order) as a (1, 1, 3) array 185 | # We use the same pixel mean for all networks even though it's not exactly what 186 | # they were trained with 187 | __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]]) 188 | 189 | # For reproducibility 190 | __C.RNG_SEED = 3 191 | 192 | # A small number that's used many times 193 | __C.EPS = 1e-14 194 | 195 | # Root directory of project 196 | __C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..')) 197 | 198 | # Data directory 199 | __C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data')) 200 | 201 | # Model directory 202 | __C.MODELS_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'models', 'pascal_voc')) 203 | 204 | # Name (or path to) the matlab executable 205 | __C.MATLAB = 'matlab' 206 | 207 | # Place outputs under an experiments directory 208 | __C.EXP_DIR = 'default' 209 | 210 | # Use GPU implementation of non-maximum suppression 211 | __C.USE_GPU_NMS = False 212 | 213 | # Default GPU device id 214 | __C.GPU_ID = 0 215 | 216 | 217 | def get_output_dir(imdb, net=None): 218 | """Return the directory where experimental artifacts are placed. 219 | If the directory does not exist, it is created. 220 | 221 | A canonical path is built using the name from an imdb and a network 222 | (if not None). 223 | """ 224 | outdir = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name)) 225 | if net is not None: 226 | outdir = osp.join(outdir, net.name) 227 | if not os.path.exists(outdir): 228 | os.makedirs(outdir) 229 | return outdir 230 | 231 | def _merge_a_into_b(a, b): 232 | """Merge config dictionary a into config dictionary b, clobbering the 233 | options in b whenever they are also specified in a. 234 | """ 235 | if type(a) is not edict: 236 | return 237 | 238 | for k, v in a.iteritems(): 239 | # a must specify keys that are in b 240 | if not b.has_key(k): 241 | raise KeyError('{} is not a valid config key'.format(k)) 242 | 243 | # the types must match, too 244 | old_type = type(b[k]) 245 | if old_type is not type(v): 246 | if isinstance(b[k], np.ndarray): 247 | v = np.array(v, dtype=b[k].dtype) 248 | else: 249 | raise ValueError(('Type mismatch ({} vs. {}) ' 250 | 'for config key: {}').format(type(b[k]), 251 | type(v), k)) 252 | 253 | # recursively merge dicts 254 | if type(v) is edict: 255 | try: 256 | _merge_a_into_b(a[k], b[k]) 257 | except: 258 | print('Error under config key: {}'.format(k)) 259 | raise 260 | else: 261 | b[k] = v 262 | 263 | def cfg_from_file(filename): 264 | """Load a config file and merge it into the default options.""" 265 | import yaml 266 | with open(filename, 'r') as f: 267 | yaml_cfg = edict(yaml.load(f)) 268 | 269 | _merge_a_into_b(yaml_cfg, __C) 270 | 271 | def cfg_from_list(cfg_list): 272 | """Set config keys via list (e.g., from command line).""" 273 | from ast import literal_eval 274 | assert len(cfg_list) % 2 == 0 275 | for k, v in zip(cfg_list[0::2], cfg_list[1::2]): 276 | key_list = k.split('.') 277 | d = __C 278 | for subkey in key_list[:-1]: 279 | assert d.has_key(subkey) 280 | d = d[subkey] 281 | subkey = key_list[-1] 282 | assert d.has_key(subkey) 283 | try: 284 | value = literal_eval(v) 285 | except: 286 | # handle the case when v is a string literal 287 | value = v 288 | assert type(value) == type(d[subkey]), \ 289 | 'type {} does not match original type {}'.format( 290 | type(value), type(d[subkey])) 291 | d[subkey] = value 292 | -------------------------------------------------------------------------------- /fast_rcnn/test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | # Modified by CVML group @ITU- Punjab 9 | 10 | """Test a Fast R-CNN network on an imdb (image database).""" 11 | 12 | from fast_rcnn.config import cfg, get_output_dir 13 | from fast_rcnn.bbox_transform import clip_boxes, bbox_transform_inv 14 | #from fast_rcnn.ftmap_transform import transformer_layer as trans_layer 15 | import argparse 16 | from utils.timer import Timer 17 | import numpy as np 18 | import cv2 19 | from numpy.linalg import inv 20 | import caffe 21 | from fast_rcnn.nms_wrapper import nms 22 | import cPickle 23 | from utils.blob import im_list_to_blob 24 | import os 25 | import matplotlib.pyplot as plt 26 | import xml.etree.ElementTree as ET 27 | import gc 28 | 29 | #from nms.py_cpu_nms_rotated import py_cpu_nms 30 | 31 | def _get_image_blob(im): 32 | """Converts an image into a network input. 33 | 34 | Arguments: 35 | im (ndarray): a color image in BGR order 36 | 37 | Returns: 38 | blob (ndarray): a data blob holding an image pyramid 39 | im_scale_factors (list): list of image scales (relative to im) used 40 | in the image pyramid 41 | """ 42 | im_orig = im.astype(np.float32, copy=True) 43 | im_orig -= cfg.PIXEL_MEANS 44 | 45 | im_shape = im_orig.shape 46 | im_size_min = np.min(im_shape[0:2]) 47 | im_size_max = np.max(im_shape[0:2]) 48 | 49 | processed_ims = [] 50 | im_scale_factors = [] 51 | 52 | for target_size in cfg.TEST.SCALES: 53 | im_scale = float(target_size) / float(im_size_min) 54 | # Prevent the biggest axis from being more than MAX_SIZE 55 | if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: 56 | im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) 57 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, 58 | interpolation=cv2.INTER_LINEAR) 59 | im_scale_factors.append(im_scale) 60 | processed_ims.append(im) 61 | 62 | # Create a blob to hold the input images 63 | blob = im_list_to_blob(processed_ims) 64 | 65 | return blob, np.array(im_scale_factors) 66 | 67 | def _get_rois_blob(im_rois, im_scale_factors): 68 | """Converts RoIs into network inputs. 69 | 70 | Arguments: 71 | im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates 72 | im_scale_factors (list): scale factors as returned by _get_image_blob 73 | 74 | Returns: 75 | blob (ndarray): R x 5 matrix of RoIs in the image pyramid 76 | """ 77 | rois, levels = _project_im_rois(im_rois, im_scale_factors) 78 | rois_blob = np.hstack((levels, rois)) 79 | return rois_blob.astype(np.float32, copy=False) 80 | 81 | def _project_im_rois(im_rois, scales): 82 | """Project image RoIs into the image pyramid built by _get_image_blob. 83 | 84 | Arguments: 85 | im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates 86 | scales (list): scale factors as returned by _get_image_blob 87 | 88 | Returns: 89 | rois (ndarray): R x 4 matrix of projected RoI coordinates 90 | levels (list): image pyramid levels used by each projected RoI 91 | """ 92 | im_rois = im_rois.astype(np.float, copy=False) 93 | 94 | if len(scales) > 1: 95 | widths = im_rois[:, 2] - im_rois[:, 0] + 1 96 | heights = im_rois[:, 3] - im_rois[:, 1] + 1 97 | 98 | areas = widths * heights 99 | scaled_areas = areas[:, np.newaxis] * (scales[np.newaxis, :] ** 2) 100 | diff_areas = np.abs(scaled_areas - 224 * 224) 101 | levels = diff_areas.argmin(axis=1)[:, np.newaxis] 102 | else: 103 | levels = np.zeros((im_rois.shape[0], 1), dtype=np.int) 104 | 105 | rois = im_rois * scales[levels] 106 | 107 | return rois, levels 108 | 109 | def _get_blobs(im, rois): 110 | """Convert an image and RoIs within that image into network inputs.""" 111 | blobs = {'data' : None, 'rois' : None} 112 | blobs['data'], im_scale_factors = _get_image_blob(im) 113 | if not cfg.TEST.HAS_RPN: 114 | blobs['rois'] = _get_rois_blob(rois, im_scale_factors) 115 | #print ('lll: ', blobs['rois']) 116 | return blobs, im_scale_factors 117 | 118 | def im_detect(net, im, boxes=None, extract_feat=False): 119 | """Detect object classes in an image given object proposals. 120 | 121 | Arguments: 122 | net (caffe.Net): Fast R-CNN network to use 123 | im (ndarray): color image to test (in BGR order) 124 | boxes (ndarray): R x 4 array of object proposals or None (for RPN) 125 | 126 | Returns: 127 | scores (ndarray): R x K array of object class scores (K includes 128 | background as object category 0) 129 | boxes (ndarray): R x (4*K) array of predicted bounding boxes 130 | """ 131 | blobs, im_scales = _get_blobs(im, boxes) 132 | #print 'blobs: ', blobs 133 | 134 | # When mapping from image ROIs to feature map ROIs, there's some aliasing 135 | # (some distinct image ROIs get mapped to the same feature ROI). 136 | # Here, we identify duplicate feature ROIs, so we only compute features 137 | # on the unique subset. 138 | if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: 139 | v = np.array([1, 1e3, 1e6, 1e9, 1e12]) 140 | hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) 141 | _, index, inv_index = np.unique(hashes, return_index=True, 142 | return_inverse=True) 143 | blobs['rois'] = blobs['rois'][index, :] 144 | boxes = boxes[index, :] 145 | #print ('lll: ', not cfg.TEST.HAS_RPN) 146 | 147 | if cfg.TEST.HAS_RPN: 148 | im_blob = blobs['data'] 149 | permanent_shape = im_blob.shape 150 | #print ('lll: ', permanent_shape) 151 | blobs['im_info'] = np.array( 152 | [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], 153 | dtype=np.float32) 154 | print blobs['im_info'] 155 | 156 | 157 | # reshape network inputs 158 | 159 | net.blobs['data'].reshape(*(blobs['data'].shape)) 160 | 161 | if cfg.TEST.HAS_RPN: 162 | net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) 163 | else: 164 | net.blobs['rois'].reshape(*(blobs['rois'].shape)) 165 | 166 | # do forward 167 | forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} 168 | if cfg.TEST.HAS_RPN: 169 | forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) 170 | else: 171 | forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) 172 | 173 | 174 | #print('going to net forward') 175 | blobs_out = net.forward(**forward_kwargs) 176 | #print('blobs[rois] : ', blobs_out) 177 | 178 | #print ('start_ind: ', list(net._layer_names)) 179 | li = list(net._layer_names).index('roi_pool5') 180 | tops = [(net._blob_names[bi], net.blobs[net._blob_names[bi]].data.shape) for bi in list(net._top_ids(li))] 181 | #for bi in range(len(list(net._layer_names))): 182 | #print ('hello: ', net._blob_names[bi] ,net.blobs[net._blob_names[bi]].data.shape) 183 | '''print li 184 | print list(net._top_ids(li)), net._blob_names[28] 185 | print net.blobs['rois'].data.shape 186 | for ip in range (35): 187 | print ip, net._blob_names[ip]''' 188 | 189 | if cfg.TEST.HAS_RPN: 190 | assert len(im_scales) == 1, "Only single-image batch implemented" 191 | rois = net.blobs['rois'].data.copy() 192 | 193 | #print('rois :', rois[0,:]) 194 | 195 | # unscale back to raw image space 196 | rpn_boxes = rois[:, 1:5] / im_scales[0] 197 | #print ('shape: ', rpn_boxes.shape) 198 | rpn_scores = net.blobs['scores'].data.copy() 199 | 200 | pred_scores = net.blobs['cls_prob'].data.copy() 201 | box_deltas = net.blobs['bbox_pred'].data.copy() 202 | #box_deltas = blobs_out['bbox_pred1'] 203 | pred_boxes = bbox_transform_inv(rpn_boxes, box_deltas) 204 | #pred_boxes = np.hstack((rpn_boxes, rpn_boxes, rpn_boxes)) 205 | pred_boxes = clip_boxes(pred_boxes, im.shape) 206 | 207 | #print('comp :', rpn_boxes[:50,:], pred_boxes[:50,:]) 208 | orient_prob = net.blobs['orient_prob'].data.copy() 209 | 210 | warpedrois = net.blobs['warpedrois'].data.copy() 211 | transApplied = net.blobs['transApplied'].data.copy() 212 | rpn_boxes1 = warpedrois[:, 1:5] / im_scales[0] 213 | 214 | # use softmax estimated probabilities 215 | pred_scores1 = blobs_out['cls_prob1'] 216 | #print 'im_detect' 217 | #print (pred_scores.shape) 218 | # Apply bounding-box regression deltas by accessing blob with name bbox_pred 219 | box_deltas1 = blobs_out['bbox_pred1'] 220 | pred_boxes1 = bbox_transform_inv(rpn_boxes1, box_deltas1) 221 | #pred_boxes1 = np.hstack((rpn_boxes1, rpn_boxes1, rpn_boxes1)) 222 | #pred_boxes1 = clip_boxes(pred_boxes1, im.shape) 223 | 224 | #orient_prob = blobs_out['orient_prob'] 225 | #orient_prob = np.zeros((len(pred_scores),4), dtype='float') 226 | 227 | # unscale back to raw image space 228 | '''rpn_boxes = rois[:, 1:5] / im_scales[0] 229 | #print ('shape: ', rpn_boxes[45:60,:]) 230 | rpn_scores = net.blobs['scores'].data.copy() 231 | 232 | # use softmax estimated probabilities 233 | pred_scores = blobs_out['cls_prob'] 234 | #print 'im_detect' 235 | #print (pred_scores.shape) 236 | # Apply bounding-box regression deltas by accessing blob with name bbox_pred 237 | box_deltas = blobs_out['bbox_pred'] 238 | pred_boxes = bbox_transform_inv(rpn_boxes, box_deltas) 239 | pred_boxes = clip_boxes(pred_boxes, im.shape) 240 | 241 | orient_prob = blobs_out['orient_prob'] 242 | #orient_prob = np.zeros((len(pred_scores),4), dtype='float') 243 | 244 | if extract_feat == True: 245 | conv_feat = net.blobs['conv5_3'].data.copy() 246 | #print conv_feat.shape 247 | return rpn_boxes, rpn_scores, pred_boxes, pred_scores, orient_prob, conv_feat, permanent_shape''' 248 | 249 | 250 | 251 | return rpn_boxes, rpn_scores, pred_boxes, pred_scores, orient_prob, pred_boxes1, pred_scores1, transApplied 252 | 253 | 254 | def im_detect_new(net, im, perm_shape, blobs, ross,im_scales, boxes=None, extract_feat=False): 255 | """Detect object classes in an image given object proposals. 256 | 257 | Arguments: 258 | net (caffe.Net): Fast R-CNN network to use 259 | im (ndarray): color image to test (in BGR order) 260 | boxes (ndarray): R x 4 array of object proposals or None (for RPN) 261 | 262 | Returns: 263 | scores (ndarray): R x K array of object class scores (K includes 264 | background as object category 0) 265 | boxes (ndarray): R x (4*K) array of predicted bounding boxes 266 | """ 267 | 268 | #blobs, im_scales = _get_blobs(im, boxes) 269 | # When mapping from image ROIs to feature map ROIs, there's some aliasing 270 | # (some distinct image ROIs get mapped to the same feature ROI). 271 | # Here, we identify duplicate feature ROIs, so we only compute features 272 | # on the unique subset. 273 | if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: 274 | v = np.array([1, 1e3, 1e6, 1e9, 1e12]) 275 | hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) 276 | _, index, inv_index = np.unique(hashes, return_index=True, 277 | return_inverse=True) 278 | blobs['rois'] = blobs['rois'][index, :] 279 | boxes = boxes[index, :] 280 | 281 | if cfg.TEST.HAS_RPN: 282 | im_blob = blobs['data'] 283 | #print 'change me' 284 | #print im_blob.shape 285 | blobs['im_info'] = np.array( 286 | [[perm_shape[2], perm_shape[3], im_scales[0]]], 287 | dtype=np.float32) 288 | 289 | blobs['rois'] = np.array(ross) 290 | 291 | 292 | # reshape network inputs 293 | net.blobs['data'].reshape(*(blobs['data'].shape)) 294 | if cfg.TEST.HAS_RPN: 295 | net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) 296 | #print 'rois shape', net.blobs['rois'] 297 | net.blobs['rois'].reshape(*(blobs['rois'].shape)) 298 | #print 'rois shape', net.blobs['rois'].shape 299 | else: 300 | net.blobs['rois'].reshape(*(blobs['rois'].shape)) 301 | 302 | # do forward 303 | forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} 304 | 305 | if cfg.TEST.HAS_RPN: 306 | forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) 307 | forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) 308 | else: 309 | forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) 310 | 311 | 312 | blobs_out = net.forward(**forward_kwargs) 313 | 314 | 315 | if cfg.TEST.HAS_RPN: 316 | assert len(im_scales) == 1, "Only single-image batch implemented" 317 | rois = net.blobs['rois'].data.copy() 318 | # unscale back to raw image space 319 | #print 'in' 320 | rpn_boxes = rois[:, 1:5] / im_scales[0] 321 | #rpn_boxes = rois[:, 1:5] 322 | #print ('shape: ', rpn_boxes) 323 | #rpn_boxes = rois / im_scales[0] 324 | #rpn_scores = net.blobs['scores'].data.copy() 325 | rpn_scores = np.array([[0.7]]) 326 | #print ('shape1: ', rpn_scores) 327 | 328 | # use softmax estimated probabilities 329 | pred_scores = blobs_out['cls_prob'] 330 | 331 | #print ('shape2: ', pred_scores) 332 | 333 | # Apply bounding-box regression deltas by accessing blob with name bbox_pred 334 | box_deltas = blobs_out['bbox_pred'] 335 | #print 'box_deltas: ', (box_deltas.max())*16 336 | pred_boxes = bbox_transform_inv(rpn_boxes, box_deltas) 337 | pred_boxes = clip_boxes(pred_boxes, im.shape) 338 | #print ('pred_boxes: ', pred_boxes) 339 | 340 | orient_prob = blobs_out['orient_prob'] 341 | #orient_prob = np.zeros((len(pred_scores),4), dtype='float') 342 | 343 | if extract_feat == True: 344 | conv_feat = net.blobs['conv5_3'].data.copy() 345 | #print conv_feat.shape 346 | return rpn_boxes, rpn_scores, pred_boxes, pred_scores, orient_prob, conv_feat 347 | 348 | 349 | return rpn_boxes, rpn_scores, pred_boxes, pred_scores, orient_prob 350 | #return rpn_boxes, rpn_scores, pred_boxes 351 | 352 | def vis_detections(im, class_name, dets, thresh=0.3): 353 | """Visual debugging of detections.""" 354 | import matplotlib.pyplot as plt 355 | im = im[:, :, (2, 1, 0)] 356 | for i in xrange(np.minimum(10, dets.shape[0])): 357 | bbox = dets[i, :4] 358 | score = dets[i, -1] 359 | if score > thresh: 360 | plt.cla() 361 | plt.imshow(im) 362 | plt.gca().add_patch( 363 | plt.Rectangle((bbox[0], bbox[1]), 364 | bbox[2] - bbox[0], 365 | bbox[3] - bbox[1], fill=False, 366 | edgecolor='g', linewidth=3) 367 | ) 368 | plt.title('{} {:.3f}'.format(class_name, score)) 369 | plt.show() 370 | 371 | def apply_nms(all_boxes, thresh): 372 | """Apply non-maximum suppression to all predicted boxes output by the 373 | test_net method. 374 | """ 375 | num_classes = len(all_boxes) 376 | num_images = len(all_boxes[0]) 377 | nms_boxes = [[[] for _ in xrange(num_images)] 378 | for _ in xrange(num_classes)] 379 | for cls_ind in xrange(num_classes): 380 | for im_ind in xrange(num_images): 381 | dets = all_boxes[cls_ind][im_ind] 382 | if dets == []: 383 | continue 384 | # CPU NMS is much faster than GPU NMS when the number of boxes 385 | # is relative small (e.g., < 10k) 386 | # TODO(rbg): autotune NMS dispatch 387 | keep = nms(dets, thresh, force_cpu=True) 388 | if len(keep) == 0: 389 | continue 390 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() 391 | return nms_boxes 392 | 393 | def vis_detections_rpn(fname, class_name, dets, scores, im_name): 394 | """Visual debugging of detections.""" 395 | 396 | for i in xrange(np.minimum(len(scores), dets.shape[0])): 397 | #print im_name 398 | im = cv2.imread(fname) 399 | bbox = map(int, dets[i, :]) 400 | score = scores[i] 401 | 402 | txt = str(score) 403 | 404 | cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), [0,0,255], 2, 16) 405 | ret, baseline = cv2.getTextSize(txt, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1) 406 | cv2.rectangle(im, (bbox[0], bbox[1] - ret[1] - baseline),(bbox[0] + ret[0], bbox[1]), (255, 0, 0), -1) 407 | 408 | cv2.putText(im, txt, (bbox[0], bbox[1] - baseline), 409 | cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, 16) 410 | 411 | 412 | foldername = '/home/javed/1070/Projects/py-faster-rcnn-full_shift/data/output_rpn/' + im_name + '/' 413 | if not os.path.isdir(foldername): 414 | os.makedirs(foldername) 415 | filename = foldername + str(i) + '.jpg' 416 | #print filename 417 | cv2.imwrite(filename, im) 418 | 419 | #def vis_detections_final(im, class_name, all_final_boxes, im_name,thresh): 420 | def vis_detections_final(im, class_name, all_final_boxes, im_name,thresh, cntG,cntR, cG, cR, rpn_sscores, rpn_bo, all_final_boxes_rotated): 421 | """Visual debugging of detections.""" 422 | #print 'i am in visualizer' 423 | #print len(all_final_boxes) 424 | boxes = all_final_boxes[:,:4] 425 | scores = all_final_boxes[:,4] 426 | scor = all_final_boxes[:,10] 427 | rpnns = all_final_boxes[:,6:10] 428 | 429 | xAll = all_final_boxes_rotated[:,:4] 430 | yAll = all_final_boxes_rotated[:,4:8] 431 | 432 | orient_class = all_final_boxes[:,5] 433 | s=[] 434 | for i in xrange(len(scores)): 435 | 436 | bbox = map(int, boxes[i,:]) 437 | #rpn_bo = map(int, rpnns[i,:]) 438 | score = scores[i] 439 | orient_cls = orient_class[i] 440 | rpn_s = scor[i] 441 | 442 | 443 | if score > thresh: 444 | #print 'greater than thresh' 445 | #print bbox 446 | txt = class_name + ': ' + str(orient_cls) + ': ' + str(score) 447 | #txt = class_name + ': ' + str(orient_cls) + ': ' + str(rpn_s) 448 | #print rpn_sscores 449 | s.append(score) 450 | #cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), [0,0,255], 2, 16) 451 | #cv2.rectangle(im, (rpn_bo[0], rpn_bo[1]), (rpn_bo[2], rpn_bo[3]), [255,0,255], 2, 16) 452 | #print('writing done') 453 | #ret, baseline = cv2.getTextSize(txt, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1) 454 | #cv2.rectangle(im, (bbox[0], bbox[1] + ret[1] + baseline), 455 | # (bbox[0] + ret[0], bbox[1]), (255, 0, 0), -1) 456 | 457 | #cv2.putText(im, txt, (bbox[0], bbox[1] + ret[1]+ baseline), 458 | # cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, 16) 459 | 460 | pts = np.array([[xAll[i,0],yAll[i,0]],[xAll[i,1],yAll[i,1]],[xAll[i,3],yAll[i,3]],[xAll[i,2],yAll[i,2]]], np.int32) 461 | #cv2.polylines(im, [pts],True,(0,255,255), 2) 462 | #cv2.polylines(im, [pts],True,(128,0,255), 2) #voilet like 463 | cv2.polylines(im, [pts],True,(147, 20,255), 6) # pink like 464 | 465 | if s: 466 | # print type(scores) 467 | # print 'max: ', max(scores) 468 | 469 | if (class_name == 'Gun'): 470 | cntG = max(s)+cntG 471 | cG=cG+1 472 | 473 | if (class_name == 'Riffle'): 474 | cntR = max(s)+cntR 475 | cR=cR+1 476 | 477 | #print (cntG,cntR) 478 | #print (cG,cR) 479 | 480 | return im,cntG,cntR, cG, cR 481 | #return im 482 | 483 | def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): 484 | """Test a Fast R-CNN network on an image database.""" 485 | num_images = len(imdb.image_index) 486 | '''foldername = '/home/javed/1070/Projects/py-faster-rcnn-master/data/output_images_detected/' 487 | foldername_all = '/home/javed/1070/Projects/py-faster-rcnn-master/data/output_images_all/' 488 | 489 | net2 = caffe.Net('/home/javed/1070/Projects/py-faster-rcnn-master/models/pascal_voc/VGG16/faster_rcnn_alt_opt/faster_rcnn_test_2.pt', '/home/javed/1070/Projects/py-faster-rcnn-master/output/faster_rcnn_alt_opt/voc_2007_trainval/VGG16_faster_rcnn_final.caffemodel', caffe.TEST) ''' 490 | 491 | foldername = '/media/akhtar/6D2C8F896B2F79E0/Projects/py-faster-rcnn-master/data/output_images_detected/' 492 | foldername_all = '/home/itu/faster-rcnn-1070/data/output_images_all/' 493 | 494 | '''net2 = caffe.Net('/home/itu/faster-rcnn-1070/models/pascal_voc/VGG16/faster_rcnn_alt_opt/faster_rcnn_test_3.pt', '/home/itu/faster-rcnn-1070/output/faster_rcnn_alt_opt/voc_2007_trainval/VGG16_faster_rcnn_final.caffemodel', caffe.TEST) ''' 495 | 496 | all_boxes = [[] for _ in xrange(num_images)] 497 | 498 | ntopProp = [1,4,50,100,300] 499 | ntopProp = [300] 500 | #ntopProp = [50] 501 | theta = [0, 90, 135, 45, 157.5, 112.5, 67.5, 22.5] 502 | #theta = [45,90,135,45, 157.5, 112.5,67.5, 22.5] 503 | 504 | for t in xrange(0,len(ntopProp)): 505 | output_dir = get_output_dir(imdb, net) 506 | 507 | # timers 508 | _t = {'im_detect' : Timer(), 'misc' : Timer()} 509 | 510 | if not cfg.TEST.HAS_RPN: 511 | roidb = imdb.roidb 512 | 513 | all_final_boxes = [[[] for _ in xrange(num_images)] 514 | for _ in xrange(imdb.num_classes)] 515 | 516 | all_final_boxes_rotated = [[[] for _ in xrange(num_images)] 517 | for _ in xrange(imdb.num_classes)] 518 | 519 | all_rpn_boxes = [[[] for _ in xrange(num_images)] 520 | for _ in xrange(1)] 521 | 522 | #print('all_final_boxes_rotated :', all_final_boxes_rotated) 523 | cntG = 0 524 | cntR = 0 525 | cG = 0 526 | cR = 0 527 | 528 | for i in xrange(num_images): 529 | # filter out any ground truth boxes 530 | if cfg.TEST.HAS_RPN: 531 | box_proposals = None 532 | else: 533 | # The roidb may contain ground-truth rois (for example, if the roidb 534 | # comes from the training or val split). We only want to evaluate 535 | # detection on the *non*-ground-truth rois. We select those the rois 536 | # that have the gt_classes field set to 0, which means there's no 537 | # ground truth. 538 | box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] 539 | 540 | fname = imdb.image_path_at(i) 541 | ind = fname.rindex('/') 542 | ind_end = fname.rindex('.') 543 | filename = fname[ind+1:ind_end] 544 | print filename 545 | 546 | im = cv2.imread(imdb.image_path_at(i)) 547 | 548 | fname = foldername + filename + '.jpg' 549 | 550 | '''rpn_boxes_rot = np.zeros((1,4)) 551 | rpn_scores_rot = np.zeros((1)) 552 | final_boxes_rot = np.zeros((1,12)) 553 | final_scores_rot = np.zeros((1,3)) 554 | orient_prob_rot = np.zeros((1,8))''' 555 | 556 | _t['im_detect'].tic() 557 | #print 'first pass' 558 | 559 | #rpn_boxes, rpn_scores, final_boxes, final_scores, orient_score, conv_feat, f_shape = im_detect(net, im, box_proposals, True) 560 | rpn_boxes, rpn_scores, final_boxes, final_scores, orient_score, final_boxes1, final_scores1, transApplied = im_detect(net, im, box_proposals, True) 561 | 562 | 563 | #print('orient_scores: ', orient_score.shape) 564 | #print conv_feat.shape 565 | #in_feat = np.rollaxis(conv_feat, 1, 4) 566 | #print in_feat.sum() 567 | 568 | if ntopProp[t] == 300: 569 | if len(rpn_scores) > 299: 570 | rpn_boxes = rpn_boxes[0:ntopProp[t],:] 571 | rpn_scores = rpn_scores[0:ntopProp[t],:] 572 | final_boxes = final_boxes[0:ntopProp[t],:] 573 | final_scores = final_scores[0:ntopProp[t],:] 574 | orient_scores = orient_score[0:ntopProp[t],:] 575 | final_boxes1 = final_boxes1[0:ntopProp[t],:] 576 | final_scores1 = final_scores1[0:ntopProp[t],:] 577 | transApplied = transApplied[0:ntopProp[t],:,:,:] 578 | else: 579 | rpn_boxes = rpn_boxes[0:ntopProp[t],:] 580 | rpn_scores = rpn_scores[0:ntopProp[t],:] 581 | final_boxes = final_boxes[0:ntopProp[t],:] 582 | final_scores = final_scores[0:ntopProp[t],:] 583 | orient_scores = orient_score[0:ntopProp[t],:] 584 | final_boxes1 = final_boxes1[0:ntopProp[t],:] 585 | final_scores1 = final_scores1[0:ntopProp[t],:] 586 | transApplied = transApplied[0:ntopProp[t],:,:,:] 587 | 588 | #print('orient_scores: ', orient_scores.shape) 589 | #top_proposals_pass_2 = 50 590 | temp_boxes = None 591 | blobs, im_scales = _get_blobs(im, temp_boxes) 592 | #print len(rpn_boxes) 593 | 594 | rotatedBoxesAll = np.zeros((len(rpn_boxes), 3,2,4)) 595 | for iii in range(0, len(rpn_boxes)): 596 | final_boxes_tr = final_boxes1[iii,:] 597 | #print('final_boxes_tr :', final_boxes_tr) 598 | final_boxes_tr = ((final_boxes_tr * im_scales[0]) / 16) 599 | 600 | final_boxes_tr = trans_box1(final_boxes_tr,transApplied[iii,0,:,:],transApplied[iii,1,:,:]) 601 | 602 | final_boxes_tr = ((final_boxes_tr * 16) / im_scales[0]) 603 | 604 | rotatedBoxesAll[iii, :,:,:] = final_boxes_tr[0,:,:,:] 605 | 606 | 607 | 608 | 609 | #print('rotatedBoxesAll :', rotatedBoxesAll.shape) 610 | #vis_detections_rpn(fname, 'fireArm', rpn_boxes, rpn_scores, filename) 611 | #print hi 612 | 613 | rpn_dets = np.hstack((rpn_boxes, rpn_scores)) \ 614 | .astype(np.float32, copy=False) 615 | all_rpn_boxes[0][i] = rpn_dets 616 | 617 | 618 | _t['misc'].tic() 619 | 620 | # skip j = 0, because it's the background class 621 | #maxScore = np.maximum(final_scores1, final_scores) 622 | maxScore = final_scores1 623 | for j in xrange(1, imdb.num_classes): 624 | #inds = np.where(final_scores1[:, j] > thresh)[0] 625 | #cls_scores = final_scores1[inds, j] 626 | inds = np.where(maxScore[:, j] > thresh)[0] 627 | cls_scores = maxScore[inds, j] 628 | cls_boxes = final_boxes[inds, j*4:(j+1)*4] 629 | cls_orient = np.argmax(orient_score[inds, :], axis = 1) 630 | rpn_bboxes = rpn_boxes[inds,:] 631 | rpn_sscores = rpn_scores[inds] 632 | 633 | cls_scores1 = final_scores[inds, j] 634 | 635 | rotatedBoxesClass = np.hstack((rotatedBoxesAll[inds,j,0,:], rotatedBoxesAll[inds,j,1,:])).astype(np.float32, copy=False) 636 | #print('rotatedBoxesClass :', rotatedBoxesClass.shape) 637 | 638 | cls_dets_temp_rotated = np.hstack((rotatedBoxesAll[inds,j,0,:], rotatedBoxesAll[inds,j,1,:], cls_scores[:, np.newaxis])) \ 639 | .astype(np.float32, copy=False) 640 | 641 | 642 | cls_dets_temp = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ 643 | .astype(np.float32, copy=False) 644 | 645 | #print('cls_dets_temp', cls_dets_temp.shape) 646 | 647 | cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], cls_orient[:, np.newaxis], rpn_bboxes, rpn_sscores)) \ 648 | .astype(np.float32, copy=False) 649 | 650 | '''keep = py_cpu_nms(cls_dets_temp_rotated, 0.4) 651 | cls_dets = cls_dets[keep, :] 652 | rotatedBoxesClass = rotatedBoxesClass[keep, :] 653 | cls_dets_temp = cls_dets_temp[keep,:]''' 654 | 655 | keep = nms(cls_dets_temp, cfg.TEST.NMS) 656 | #keep = nms(cls_dets_temp, 0.3) 657 | 658 | cls_dets = cls_dets[keep, :] 659 | rotatedBoxesClass = rotatedBoxesClass[keep, :] 660 | 661 | '''cls_dets_temp_rotated = cls_dets_temp_rotated[keep,:] 662 | keep = py_cpu_nms(cls_dets_temp_rotated, 0.4) 663 | #print('keep :', keep) 664 | cls_dets = cls_dets[keep, :] 665 | rotatedBoxesClass = rotatedBoxesClass[keep, :]''' 666 | 667 | all_final_boxes[j][i] = cls_dets 668 | all_final_boxes_rotated[j][i] = rotatedBoxesClass 669 | #print('rotatedBoxesClass :', rotatedBoxesClass.shape, cls_dets.shape) 670 | 671 | #print('all_final_boxes_rotated :', all_final_boxes_rotated) 672 | #print('all_final_boxes :', all_final_boxes) 673 | # Limit to max_per_image detections *over all classes* 674 | 675 | if max_per_image > 0: 676 | image_scores = np.hstack([all_final_boxes[j][i][:, 4] 677 | for j in xrange(1, imdb.num_classes)]) 678 | 679 | if len(image_scores) > max_per_image: 680 | image_thresh = np.sort(image_scores)[-max_per_image] 681 | for j in xrange(1, imdb.num_classes): 682 | keep = np.where(all_final_boxes[j][i][:, -1] >= image_thresh)[0] 683 | all_final_boxes[j][i] = all_final_boxes[j][i][keep, :] 684 | all_final_boxes_rotated[j][i] = all_final_boxes_rotated[j][i][keep, :] 685 | 686 | 687 | for j in xrange(1, imdb.num_classes): 688 | #rpn_bo = np.array([616, 405, 825, 556]) 689 | #rpn_bo = np.array([231,129,621,939]) 690 | rpn_bo = np.array([208, 58, 2243, 1094]) 691 | #im = vis_detections_final(im, imdb.classes[j], all_final_boxes[j][i], filename, 0.65) 692 | im,cntG,cntR, cG, cR = vis_detections_final(im, imdb.classes[j], all_final_boxes[j][i], filename, 0.75, cntG,cntR, cG, cR, rpn_sscores, rpn_bo, all_final_boxes_rotated[j][i]) 693 | #print hi 694 | 695 | fname = foldername_all + filename + '.jpg' 696 | print fname 697 | cv2.imwrite(fname, im) 698 | 699 | 700 | _t['misc'].toc() 701 | print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ 702 | .format(i + 1, num_images, _t['im_detect'].average_time, 703 | _t['misc'].average_time) 704 | 705 | 706 | #print('all_rpn_boxes', len(all_rpn_boxes), len(all_final_boxes), output_dir) 707 | #print ('Evaluating RPN detections for top Proposals: ' + str(ntopProp[t]) ) 708 | #imdb.evaluate_rpn(all_rpn_boxes, output_dir, ntopProp[t]) 709 | 710 | #print ('Evaluating detections for top Proposals: ' + str(ntopProp[t]) ) 711 | #imdb.evaluate_detections(all_final_boxes, output_dir,ntopProp[t]) 712 | 713 | 714 | def trans_box1(final_boxes,T_final, T11): 715 | final_boxes = final_boxes.reshape(1,12) 716 | final_boxes_final = np.zeros((len(final_boxes),3, 2,4)) 717 | #print('final_boxes :', final_boxes.shape, final_boxes_final.shape) 718 | 719 | for k in range(0, len(final_boxes)): 720 | #print('k :', k) 721 | 722 | class1 = final_boxes[k,0:4] 723 | class2 = final_boxes[k,4:8] 724 | class3 = final_boxes[k,8:12] 725 | 726 | box1 = [ class1[0] , class1[1] , class1[2] , class1[3] ] 727 | box2 = [ class2[0] , class2[1] , class2[2] , class2[3] ] 728 | box3 = [ class3[0] , class3[1] , class3[2] , class3[3] ] 729 | 730 | class1_out = trans_layer1(T_final, T11, box1) 731 | class2_out = trans_layer1(T_final, T11, box2) 732 | class3_out = trans_layer1(T_final, T11, box3) 733 | 734 | final_boxes_final[k,0,:,:] = class1_out 735 | final_boxes_final[k,1,:,:] = class2_out 736 | final_boxes_final[k,2,:,:] = class2_out 737 | #final_boxes_final[k,:] = [ class1_out[0], class1_out[1], class1_out[2], class1_out[3], class2_out[0], class2_out[1], class2_out[2], class2_out[3], class3_out[0], class3_out[1], class3_out[2], class3_out[3]] 738 | 739 | return final_boxes_final 740 | 741 | def trans_layer1(T_final,T11, final_b): 742 | 743 | nT0 = inv(T11) 744 | ncorner_pts = [[final_b[0],final_b[2],final_b[0],final_b[2]],[final_b[1],final_b[1],final_b[3],final_b[3]],[1,1,1,1]] 745 | nboxx = np.dot(nT0[0:2,:],ncorner_pts) 746 | rxymin_nb = nboxx.min(1) 747 | rxymax_nb = nboxx.max(1) 748 | 749 | T2 = inv(T_final) 750 | boxx2 = np.dot(T2[0:2,:],[nboxx[0], nboxx[1],[1,1,1,1]]) 751 | 752 | #print('nboxx', nboxx.shape, boxx2.shape) 753 | 754 | #fin_cropped_box = [rxymin_nb[0], rxymin_nb[1], rxymin_nb[0], rxymin_nb[1]] 755 | 756 | 757 | 758 | return boxx2 759 | -------------------------------------------------------------------------------- /faster_rcnn_test.pt: -------------------------------------------------------------------------------- 1 | name: "VGG_ILSVRC_16_layers" 2 | 3 | input: "data" 4 | input_shape { 5 | dim: 1 6 | dim: 3 7 | dim: 224 8 | dim: 224 9 | } 10 | 11 | input: "im_info" 12 | input_shape { 13 | dim: 1 14 | dim: 3 15 | } 16 | 17 | 18 | layer { 19 | name: "conv1_1" 20 | type: "Convolution" 21 | bottom: "data" 22 | top: "conv1_1" 23 | convolution_param { 24 | num_output: 64 25 | pad: 1 kernel_size: 3 26 | } 27 | } 28 | layer { 29 | name: "relu1_1" 30 | type: "ReLU" 31 | bottom: "conv1_1" 32 | top: "conv1_1" 33 | } 34 | layer { 35 | name: "conv1_2" 36 | type: "Convolution" 37 | bottom: "conv1_1" 38 | top: "conv1_2" 39 | convolution_param { 40 | num_output: 64 41 | pad: 1 kernel_size: 3 42 | } 43 | } 44 | layer { 45 | name: "relu1_2" 46 | type: "ReLU" 47 | bottom: "conv1_2" 48 | top: "conv1_2" 49 | } 50 | layer { 51 | name: "pool1" 52 | type: "Pooling" 53 | bottom: "conv1_2" 54 | top: "pool1" 55 | pooling_param { 56 | pool: MAX 57 | kernel_size: 2 stride: 2 58 | } 59 | } 60 | layer { 61 | name: "conv2_1" 62 | type: "Convolution" 63 | bottom: "pool1" 64 | top: "conv2_1" 65 | convolution_param { 66 | num_output: 128 67 | pad: 1 kernel_size: 3 68 | } 69 | } 70 | layer { 71 | name: "relu2_1" 72 | type: "ReLU" 73 | bottom: "conv2_1" 74 | top: "conv2_1" 75 | } 76 | layer { 77 | name: "conv2_2" 78 | type: "Convolution" 79 | bottom: "conv2_1" 80 | top: "conv2_2" 81 | convolution_param { 82 | num_output: 128 83 | pad: 1 kernel_size: 3 84 | } 85 | } 86 | layer { 87 | name: "relu2_2" 88 | type: "ReLU" 89 | bottom: "conv2_2" 90 | top: "conv2_2" 91 | } 92 | layer { 93 | name: "pool2" 94 | type: "Pooling" 95 | bottom: "conv2_2" 96 | top: "pool2" 97 | pooling_param { 98 | pool: MAX 99 | kernel_size: 2 stride: 2 100 | } 101 | } 102 | layer { 103 | name: "conv3_1" 104 | type: "Convolution" 105 | bottom: "pool2" 106 | top: "conv3_1" 107 | convolution_param { 108 | num_output: 256 109 | pad: 1 kernel_size: 3 110 | } 111 | } 112 | layer { 113 | name: "relu3_1" 114 | type: "ReLU" 115 | bottom: "conv3_1" 116 | top: "conv3_1" 117 | } 118 | layer { 119 | name: "conv3_2" 120 | type: "Convolution" 121 | bottom: "conv3_1" 122 | top: "conv3_2" 123 | convolution_param { 124 | num_output: 256 125 | pad: 1 kernel_size: 3 126 | } 127 | } 128 | layer { 129 | name: "relu3_2" 130 | type: "ReLU" 131 | bottom: "conv3_2" 132 | top: "conv3_2" 133 | } 134 | layer { 135 | name: "conv3_3" 136 | type: "Convolution" 137 | bottom: "conv3_2" 138 | top: "conv3_3" 139 | convolution_param { 140 | num_output: 256 141 | pad: 1 kernel_size: 3 142 | } 143 | } 144 | layer { 145 | name: "relu3_3" 146 | type: "ReLU" 147 | bottom: "conv3_3" 148 | top: "conv3_3" 149 | } 150 | layer { 151 | name: "pool3" 152 | type: "Pooling" 153 | bottom: "conv3_3" 154 | top: "pool3" 155 | pooling_param { 156 | pool: MAX 157 | kernel_size: 2 stride: 2 158 | } 159 | } 160 | layer { 161 | name: "conv4_1" 162 | type: "Convolution" 163 | bottom: "pool3" 164 | top: "conv4_1" 165 | convolution_param { 166 | num_output: 512 167 | pad: 1 kernel_size: 3 168 | } 169 | } 170 | layer { 171 | name: "relu4_1" 172 | type: "ReLU" 173 | bottom: "conv4_1" 174 | top: "conv4_1" 175 | } 176 | layer { 177 | name: "conv4_2" 178 | type: "Convolution" 179 | bottom: "conv4_1" 180 | top: "conv4_2" 181 | convolution_param { 182 | num_output: 512 183 | pad: 1 kernel_size: 3 184 | } 185 | } 186 | layer { 187 | name: "relu4_2" 188 | type: "ReLU" 189 | bottom: "conv4_2" 190 | top: "conv4_2" 191 | } 192 | layer { 193 | name: "conv4_3" 194 | type: "Convolution" 195 | bottom: "conv4_2" 196 | top: "conv4_3" 197 | convolution_param { 198 | num_output: 512 199 | pad: 1 kernel_size: 3 200 | } 201 | } 202 | layer { 203 | name: "relu4_3" 204 | type: "ReLU" 205 | bottom: "conv4_3" 206 | top: "conv4_3" 207 | } 208 | layer { 209 | name: "pool4" 210 | type: "Pooling" 211 | bottom: "conv4_3" 212 | top: "pool4" 213 | pooling_param { 214 | pool: MAX 215 | kernel_size: 2 stride: 2 216 | } 217 | } 218 | layer { 219 | name: "conv5_1" 220 | type: "Convolution" 221 | bottom: "pool4" 222 | top: "conv5_1" 223 | convolution_param { 224 | num_output: 512 225 | pad: 1 kernel_size: 3 226 | } 227 | } 228 | layer { 229 | name: "relu5_1" 230 | type: "ReLU" 231 | bottom: "conv5_1" 232 | top: "conv5_1" 233 | } 234 | layer { 235 | name: "conv5_2" 236 | type: "Convolution" 237 | bottom: "conv5_1" 238 | top: "conv5_2" 239 | convolution_param { 240 | num_output: 512 241 | pad: 1 kernel_size: 3 242 | } 243 | } 244 | layer { 245 | name: "relu5_2" 246 | type: "ReLU" 247 | bottom: "conv5_2" 248 | top: "conv5_2" 249 | } 250 | layer { 251 | name: "conv5_3" 252 | type: "Convolution" 253 | bottom: "conv5_2" 254 | top: "conv5_3" 255 | param { 256 | name: "conv5_3_w" 257 | } 258 | param { 259 | name: "conv5_3_b" 260 | } 261 | convolution_param { 262 | num_output: 512 263 | pad: 1 kernel_size: 3 264 | } 265 | } 266 | layer { 267 | name: "relu5_3" 268 | type: "ReLU" 269 | bottom: "conv5_3" 270 | top: "conv5_3" 271 | } 272 | 273 | #========= RPN ============ 274 | 275 | layer { 276 | name: "rpn_conv/3x3" 277 | type: "Convolution" 278 | bottom: "conv5_3" 279 | top: "rpn/output" 280 | param { 281 | name: "rpn/output_w" 282 | } 283 | param { 284 | name: "rpn/output_b" 285 | } 286 | convolution_param { 287 | num_output: 512 288 | kernel_size: 3 pad: 1 stride: 1 289 | } 290 | } 291 | layer { 292 | name: "rpn_relu/3x3" 293 | type: "ReLU" 294 | bottom: "rpn/output" 295 | top: "rpn/output" 296 | } 297 | 298 | layer { 299 | name: "rpn_cls_score" 300 | type: "Convolution" 301 | bottom: "rpn/output" 302 | top: "rpn_cls_score" 303 | param { 304 | name: "rpn_cls_score_w" 305 | } 306 | param { 307 | name: "rpn_cls_score_b" 308 | } 309 | convolution_param { 310 | num_output: 18 311 | kernel_size: 1 pad: 0 stride: 1 312 | } 313 | } 314 | 315 | layer { 316 | name: "rpn_bbox_pred" 317 | type: "Convolution" 318 | bottom: "rpn/output" 319 | top: "rpn_bbox_pred" 320 | param { 321 | name: "rpn_bbox_pred_w" 322 | } 323 | param { 324 | name: "rpn_bbox_pred_b" 325 | } 326 | convolution_param { 327 | num_output: 36 # 4 * 9(anchors) 328 | kernel_size: 1 pad: 0 stride: 1 329 | } 330 | } 331 | 332 | layer { 333 | bottom: "rpn_cls_score" 334 | top: "rpn_cls_score_reshape" 335 | name: "rpn_cls_score_reshape" 336 | type: "Reshape" 337 | reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } 338 | } 339 | 340 | #========= RoI Proposal ============ 341 | 342 | layer { 343 | name: "rpn_cls_prob" 344 | type: "Softmax" 345 | bottom: "rpn_cls_score_reshape" 346 | top: "rpn_cls_prob" 347 | } 348 | layer { 349 | name: 'rpn_cls_prob_reshape' 350 | type: 'Reshape' 351 | bottom: 'rpn_cls_prob' 352 | top: 'rpn_cls_prob_reshape' 353 | reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } } 354 | } 355 | layer { 356 | name: 'proposal' 357 | type: 'Python' 358 | bottom: 'rpn_cls_prob_reshape' 359 | bottom: 'rpn_bbox_pred' 360 | bottom: 'im_info' 361 | top: 'rois' 362 | top: 'scores' 363 | python_param { 364 | module: 'rpn.proposal_layer' 365 | layer: 'ProposalLayer' 366 | param_str: "'feat_stride': 16" 367 | } 368 | } 369 | 370 | #========= RCNN ============ 371 | 372 | layer { 373 | name: "roi_pool5" 374 | type: "ROIPooling" 375 | bottom: "conv5_3" 376 | bottom: "rois" 377 | top: "pool5" 378 | roi_pooling_param { 379 | pooled_w: 7 380 | pooled_h: 7 381 | spatial_scale: 0.0625 # 1/16 382 | } 383 | } 384 | layer { 385 | name: "fc6" 386 | type: "InnerProduct" 387 | bottom: "pool5" 388 | top: "fc6" 389 | param { 390 | name: "fc6_w" 391 | } 392 | param { 393 | name: "fc6_b" 394 | } 395 | inner_product_param { 396 | num_output: 4096 397 | } 398 | } 399 | layer { 400 | name: "relu6" 401 | type: "ReLU" 402 | bottom: "fc6" 403 | top: "fc6" 404 | } 405 | layer { 406 | name: "fc7" 407 | type: "InnerProduct" 408 | bottom: "fc6" 409 | top: "fc7" 410 | param { 411 | name: "fc7_w" 412 | } 413 | param { 414 | name: "fc7_b" 415 | } 416 | inner_product_param { 417 | num_output: 4096 418 | } 419 | } 420 | layer { 421 | name: "relu7" 422 | type: "ReLU" 423 | bottom: "fc7" 424 | top: "fc7" 425 | } 426 | layer { 427 | name: "cls_score" 428 | type: "InnerProduct" 429 | bottom: "fc7" 430 | top: "cls_score" 431 | param { 432 | name: "cls_score_w" 433 | } 434 | param { 435 | name: "cls_score_b" 436 | } 437 | inner_product_param { 438 | num_output: 3 439 | } 440 | } 441 | layer { 442 | name: "orient_pred" 443 | type: "InnerProduct" 444 | bottom: "fc7" 445 | top: "orient_score" 446 | param { 447 | name: "orient_pred_w" 448 | } 449 | param { 450 | name: "orient_pred_b" 451 | } 452 | inner_product_param { 453 | num_output: 8 454 | } 455 | } 456 | layer { 457 | name: "bbox_pred" 458 | type: "InnerProduct" 459 | bottom: "fc7" 460 | top: "bbox_pred" 461 | param { 462 | name: "bbox_pred_w" 463 | } 464 | param { 465 | name: "bbox_pred_b" 466 | } 467 | inner_product_param { 468 | num_output: 12 469 | } 470 | } 471 | 472 | layer { 473 | name: "cls_prob" 474 | type: "Softmax" 475 | bottom: "cls_score" 476 | top: "cls_prob" 477 | } 478 | layer { 479 | name: "orient_prob" 480 | type: "Softmax" 481 | bottom: "orient_score" 482 | top: "orient_prob" 483 | } 484 | layer { 485 | name: "silense" 486 | type: "Silence" 487 | bottom: "scores" 488 | } 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | layer { 497 | name: "rois_making" 498 | type: "Python" 499 | bottom: "rois" 500 | bottom: "bbox_pred" 501 | bottom: "im_info" 502 | bottom: "cls_prob" 503 | top: "rois1" 504 | python_param { 505 | module: 'rpn.makebboxproposals' 506 | layer: 'makebBox_regionProposal' 507 | #param_str: "'feat_stride': 16" 508 | } 509 | } 510 | 511 | 512 | 513 | 514 | 515 | layer { 516 | name: "roi_warping" 517 | type: "Python" 518 | bottom: "conv5_3" 519 | bottom: "rois1" 520 | bottom: "orient_prob" 521 | top: "warpedMap" 522 | top: "warpedrois" 523 | top: "transApplied" 524 | python_param { 525 | module: 'rpn.ftMap_Warp_2' 526 | layer: 'fMapWarpLayerSep' 527 | #param_str: "'feat_stride': 16" 528 | } 529 | } 530 | 531 | layer { 532 | name: "roi_pool51" 533 | type: "ROIPooling" 534 | bottom: "warpedMap" 535 | bottom: "warpedrois" 536 | top: "pool51" 537 | roi_pooling_param { 538 | pooled_w: 7 539 | pooled_h: 7 540 | spatial_scale: 0.0625 # 1/16 541 | } 542 | } 543 | 544 | 545 | 546 | 547 | 548 | 549 | layer { 550 | name: "fc61" 551 | type: "InnerProduct" 552 | bottom: "pool51" 553 | top: "fc61" 554 | param { 555 | lr_mult: 1 556 | } 557 | param { 558 | lr_mult: 2 559 | } 560 | inner_product_param { 561 | num_output: 4096 562 | } 563 | } 564 | layer { 565 | name: "relu61" 566 | type: "ReLU" 567 | bottom: "fc61" 568 | top: "fc61" 569 | } 570 | 571 | 572 | layer { 573 | name: "fc71" 574 | type: "InnerProduct" 575 | bottom: "fc61" 576 | top: "fc71" 577 | param { 578 | lr_mult: 1 579 | } 580 | param { 581 | lr_mult: 2 582 | } 583 | inner_product_param { 584 | num_output: 4096 585 | } 586 | } 587 | layer { 588 | name: "relu71" 589 | type: "ReLU" 590 | bottom: "fc71" 591 | top: "fc71" 592 | } 593 | 594 | layer { 595 | name: "cls_score1" 596 | type: "InnerProduct" 597 | bottom: "fc71" 598 | top: "cls_score1" 599 | param { 600 | lr_mult: 1 601 | } 602 | param { 603 | lr_mult: 2 604 | } 605 | inner_product_param { 606 | num_output: 3 607 | weight_filler { 608 | type: "gaussian" 609 | std: 0.01 610 | } 611 | bias_filler { 612 | type: "constant" 613 | value: 0 614 | } 615 | } 616 | } 617 | 618 | layer { 619 | name: "bbox_pred1" 620 | type: "InnerProduct" 621 | bottom: "fc71" 622 | top: "bbox_pred1" 623 | param { 624 | lr_mult: 1 625 | } 626 | param { 627 | lr_mult: 2 628 | } 629 | inner_product_param { 630 | num_output: 12 631 | weight_filler { 632 | type: "gaussian" 633 | std: 0.001 634 | } 635 | bias_filler { 636 | type: "constant" 637 | value: 0 638 | } 639 | } 640 | } 641 | 642 | 643 | 644 | layer { 645 | name: "cls_prob1" 646 | type: "Softmax" 647 | bottom: "cls_score1" 648 | top: "cls_prob1" 649 | } 650 | 651 | #layer { 652 | # name: "bbox_prob1" 653 | # type: "Softmax" 654 | # bottom: "bbox_pred1" 655 | # top: "bbox_prob1" 656 | #} 657 | 658 | 659 | 660 | -------------------------------------------------------------------------------- /ftMap_Warp_2.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Written by CVML 3 | # -------------------------------------------------------- 4 | 5 | 6 | 7 | from fast_rcnn.config import cfg 8 | from fast_rcnn.bbox_transform import clip_boxes, bbox_transform_inv 9 | from fast_rcnn.ftmap_transform import transformer_layer_fMapSep as fMap_trans_layer 10 | from fast_rcnn.ftmap_transform import transformer_layer_fMapSep_backward as fMap_trans_layer_backward 11 | import argparse 12 | from utils.timer import Timer 13 | import numpy as np 14 | import cv2 15 | from numpy.linalg import inv 16 | import caffe 17 | from fast_rcnn.nms_wrapper import nms 18 | import cPickle 19 | from utils.blob import im_list_to_blob 20 | import os 21 | import matplotlib.pyplot as plt 22 | import xml.etree.ElementTree as ET 23 | import gc 24 | import yaml 25 | 26 | 27 | DEBUG = False 28 | 29 | class fMapWarpLayerSep(caffe.Layer): 30 | """ 31 | transforms feature map and corresponding bouning boxes with respect to angle 32 | """ 33 | 34 | def setup(self, bottom, top): 35 | 36 | pass 37 | 38 | def forward(self, bottom, top): 39 | 40 | assert bottom[0].data.shape[0] == 1, \ 41 | 'Only single item batches are supported' 42 | 43 | 44 | # the first set of _num_anchors channels are bg probs 45 | # the second set are the fg probs, which we want 46 | conv_feat = bottom[0].data 47 | rpn_boxes = bottom[1].data 48 | angle = bottom[2].data 49 | 50 | out_feat, ross, transApplied, T_final = warp_fMap(conv_feat, rpn_boxes, angle) 51 | 52 | blob = np.rollaxis(out_feat, 3, 1) 53 | 54 | top[0].reshape(*(blob.shape)) 55 | top[0].data[...] = blob 56 | top[1].reshape(*(ross.shape)) 57 | top[1].data[...] = ross 58 | 59 | top[2].reshape(*(transApplied.shape)) 60 | top[2].data[...] = transApplied 61 | 62 | #top[3].reshape(*(T_final.shape)) 63 | #top[3].data[...] = T_final 64 | 65 | #print('blob', blob.shape, ross.shape) 66 | 67 | def backward(self, top, propagate_down, bottom): 68 | """This layer does not propagate gradients.""" 69 | 70 | grad_warpMap = top[0].diff 71 | #rpn_boxes = bottom[1].data 72 | rpn_boxes_gwm = top[1].data 73 | angle = bottom[2].data 74 | 75 | in_gwm = np.rollaxis(grad_warpMap, 1, 4) 76 | 77 | out_gwm, rotated_gwm, transApplied_gwm = fMap_trans_layer_backward(in_gwm, angle, rpn_boxes_gwm) 78 | 79 | 80 | def reshape(self, bottom, top): 81 | """Reshaping happens during the call to forward.""" 82 | top[0].reshape(*bottom[0].shape) 83 | top[1].reshape(*bottom[1].shape) 84 | top[2].reshape(*bottom[2].shape) 85 | #pass 86 | 87 | 88 | 89 | def warp_fMap(conv_feat, rpn_boxes, angle): 90 | 91 | 92 | #angle = 22.5 93 | 94 | #print conv_feat.shape 95 | in_feat = np.rollaxis(conv_feat, 1, 4) 96 | #print in_feat.sum() 97 | 98 | 99 | top_proposals_pass_2 = 50 100 | temp_boxes = None 101 | 102 | out_feat, rotated_rpns, transApplied, T_final = fMap_trans_layer(in_feat, angle, rpn_boxes) 103 | 104 | ross = rotated_rpns 105 | 106 | ross = np.array(ross) 107 | #print('ross :', ross.shape) 108 | 109 | transApplied = np.array(transApplied) 110 | 111 | return out_feat, ross, transApplied, T_final 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /ftmap_transform.py: -------------------------------------------------------------------------------- 1 | # Modified & Written by CVML 2 | 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import cv2 6 | import re 7 | import math 8 | from numpy.linalg import inv 9 | from numpy import linalg 10 | from numpy import matrix 11 | import xml.etree.ElementTree as ET 12 | 13 | def transformer_layer(input_fmap, angle, box, output_tr_flag, out_dims=None, **kwargs): 14 | 15 | 16 | B = np.shape(input_fmap)[0] 17 | H = np.shape(input_fmap)[1] 18 | W = np.shape(input_fmap)[2] 19 | C = np.shape(input_fmap)[3] 20 | 21 | 22 | cntr = np.asarray([(box[1]+box[3])/2, (box[0]+box[2])/2]) 23 | 24 | T1 = [[1,0,0],[0,1,0],[-cntr[1],-cntr[0],1]] 25 | T2 = np.asarray([[np.cos(np.deg2rad(int(angle))), np.sin(np.deg2rad(int(angle))), 0],[-np.sin(np.deg2rad(int(angle))),np.cos(np.deg2rad(int(angle))),0],[0,0,1]]) 26 | T3 = [[1,0,0],[0,1,0],[cntr[1],cntr[0],1]] 27 | 28 | T = np.dot(np.transpose(T3),np.dot(np.transpose(T2),np.transpose(T1))) 29 | 30 | corner_pts = [[0,W-1,0,W-1],[0,0,H-1,H-1],[1,1,1,1]] 31 | 32 | trans_cpoints = np.dot(T[0:2,:],corner_pts) 33 | 34 | xymin = trans_cpoints.min(1) 35 | xymax = trans_cpoints.max(1) 36 | 37 | out_H = np.int32(xymax[1] - xymin[1] + 1) 38 | out_W = np.int32(xymax[0] - xymin[0] + 1) 39 | 40 | out_fmap_size = [0,0,out_W, out_H] 41 | 42 | T4 = [[1,0,0],[0,1,0],[-xymin[0], -xymin[1],1]]; 43 | 44 | T_final = np.dot(np.transpose(T4),T); 45 | 46 | rect_pts = [ [box[0],box[2], box[0], box[2]] ,[box[1],box[1],box[3],box[3]],[1,1,1,1]] 47 | 48 | trans_rpoints = np.dot(T_final[0:2,:],rect_pts) 49 | 50 | rxymin = trans_rpoints.min(1) 51 | rxymax = trans_rpoints.max(1) 52 | 53 | cropped_box = [np.int32(np.floor(rxymin[0])), np.int32(np.floor(rxymin[1])), np.int32(np.floor(rxymax[0])), np.int32(np.floor(rxymax[1]))] 54 | 55 | intSec1, intSec2 = LinesIntersectionForLargestBox(trans_rpoints, np.array(rect_pts), angle) 56 | height_deltas = [intSec1[1]-cropped_box[1], cropped_box[3]-intSec2[1]] 57 | 58 | if output_tr_flag == True: 59 | 60 | return cropped_box 61 | 62 | else: 63 | 64 | batch_grids = affine_grid_generator(out_H, out_W, T_final) 65 | 66 | x_s = batch_grids[:,0, :, :] 67 | y_s = batch_grids[:,1, :, :] 68 | 69 | out_fmap = bilinear_sampler_Interpol(input_fmap, x_s, y_s) # Interpolation with in bbox and extend outside using Ia 70 | 71 | negative_flag = False 72 | if cropped_box[0] < 0: 73 | 74 | cropped_box[2] = int(cropped_box[2]) - int(cropped_box[0]) 75 | cropped_box[0] = int(cropped_box[0]) - int(cropped_box[0]) 76 | 77 | if cropped_box[1] < 0: 78 | 79 | cropped_box[3] = int(cropped_box[3]) - int(cropped_box[1]) 80 | cropped_box[1] = int(cropped_box[1]) - int(cropped_box[1]) 81 | 82 | 83 | f_map = out_fmap[:, cropped_box[1]:cropped_box[3] , cropped_box[0]:cropped_box[2] ,:] 84 | 85 | return f_map, T_final, cropped_box, negative_flag, trans_rpoints, trans_cpoints, out_fmap_size, height_deltas 86 | 87 | 88 | def affine_grid_generator(H, W, theta): 89 | 90 | 91 | # create normalized 2D grid 92 | x = np.arange(W) 93 | y = np.arange(H) 94 | 95 | x_t, y_t = np.meshgrid(x, y) 96 | 97 | # flatten 98 | x_t_flat = np.reshape(x_t, (-1)) 99 | y_t_flat = np.reshape(y_t, (-1)) 100 | 101 | # reshape to [x_t, y_t , 1] - (homogeneous form) 102 | ones = np.ones((np.shape(x_t_flat)[0])) 103 | 104 | sampling_grid = np.stack([x_t_flat, y_t_flat, ones]) 105 | 106 | # transform the sampling grid - batch multiply 107 | theta_inv = np.linalg.inv(theta) 108 | 109 | out_sampGrid = np.dot(theta_inv[0:2,:], sampling_grid) 110 | 111 | # batch grid has shape (num_batch, 2, H*W) 112 | # reshape to (num_batch, 2, H, W) 113 | batch_grids = out_sampGrid.reshape((1, 2, H, W)) 114 | 115 | return batch_grids 116 | 117 | def bilinear_sampler(input_fmap, x, y): 118 | 119 | # prepare useful params 120 | B = np.shape(input_fmap)[0] 121 | H = np.shape(input_fmap)[1] 122 | W = np.shape(input_fmap)[2] 123 | C = np.shape(input_fmap)[3] 124 | 125 | max_y = np.int32(H - 1) 126 | max_x = np.int32(W - 1) 127 | 128 | zero = np.zeros([], dtype='int32') 129 | 130 | # grab 4 nearest corner points for each (x_i, y_i) 131 | # i.e. we need a rectangle around the point of interest 132 | x0 = np.int32(np.floor(x)) 133 | x1 = np.int32(x0 + 1) 134 | y0 = np.int32(np.floor(y)) 135 | y1 = np.int32(y0 + 1) 136 | 137 | 138 | # clip to range [0, H/W] to not violate img boundaries 139 | x0 = np.clip(x0, zero, max_x) 140 | x1 = np.clip(x1, zero, max_x) 141 | y0 = np.clip(y0, zero, max_y) 142 | y1 = np.clip(y1, zero, max_y) 143 | 144 | # get pixel value at corner coords 145 | Ia = input_fmap[0,y0,x0,:] 146 | Ib = input_fmap[0,y1,x0,:] 147 | Ic = input_fmap[0,y0,x1,:] 148 | Id = input_fmap[0,y1,x1,:] 149 | 150 | # recast as float for delta calculation 151 | x0 = np.float32(x0) 152 | x1 = np.float32(x1) 153 | y0 = np.float32(y0) 154 | y1 = np.float32(y1) 155 | 156 | # calculate deltas 157 | wa = (x1-x) * (y1-y) 158 | wb = (x1-x) * (y-y0) 159 | wc = (x-x0) * (y1-y) 160 | wd = (x-x0) * (y-y0) 161 | 162 | 163 | # add dimension for addition 164 | wa = np.expand_dims(wa,axis=3) 165 | wb = np.expand_dims(wb,axis=3) 166 | wc = np.expand_dims(wc,axis=3) 167 | wd = np.expand_dims(wd,axis=3) 168 | 169 | wa[np.where(wa<0)]=0 170 | wb[np.where(wb<0)]=0 171 | wc[np.where(wc<0)]=0 172 | wd[np.where(wd<0)]=0 173 | 174 | output_fmap = Ib 175 | 176 | return output_fmap 177 | 178 | def bilinear_sampler_Interpol(input_fmap, x, y): 179 | 180 | # prepare useful params 181 | B = np.shape(input_fmap)[0] 182 | H = np.shape(input_fmap)[1] 183 | W = np.shape(input_fmap)[2] 184 | C = np.shape(input_fmap)[3] 185 | 186 | max_y = np.int32(H - 1) 187 | max_x = np.int32(W - 1) 188 | 189 | zero = np.zeros([], dtype='int32') 190 | 191 | # grab 4 nearest corner points for each (x_i, y_i) 192 | # i.e. we need a rectangle around the point of interest 193 | x0 = np.int32(np.floor(x)) 194 | x1 = np.int32(x0 + 1) 195 | y0 = np.int32(np.floor(y)) 196 | y1 = np.int32(y0 + 1) 197 | 198 | 199 | ad1 = 1*(x0>=0) 200 | ad2 = 1*(x0<=max_x) 201 | ad3 = 1*(y0>=0) 202 | ad4 = 1*(y0<=max_y) 203 | 204 | maskx = 1*(ad1[0,:,:] * ad2[0,:,:]) 205 | masky = 1*(ad3[0,:,:] * ad4[0,:,:]) 206 | mask = maskx*masky 207 | mask = mask.reshape(1,mask.shape[0], mask.shape[1], 1) 208 | 209 | 210 | # clip to range [0, H/W] to not violate img boundaries 211 | x0 = np.clip(x0, zero, max_x) 212 | x1 = np.clip(x1, zero, max_x) 213 | y0 = np.clip(y0, zero, max_y) 214 | y1 = np.clip(y1, zero, max_y) 215 | 216 | # get pixel value at corner coords 217 | Ia = input_fmap[0,y0,x0,:] 218 | Ib = input_fmap[0,y1,x0,:] 219 | Ic = input_fmap[0,y0,x1,:] 220 | Id = input_fmap[0,y1,x1,:] 221 | 222 | # recast as float for delta calculation 223 | x0 = np.float32(x0) 224 | x1 = np.float32(x1) 225 | y0 = np.float32(y0) 226 | y1 = np.float32(y1) 227 | 228 | # calculate deltas 229 | wa = (x1-x) * (y1-y) 230 | wb = (x1-x) * (y-y0) 231 | wc = (x-x0) * (y1-y) 232 | wd = (x-x0) * (y-y0) 233 | 234 | 235 | # add dimension for addition 236 | wa = np.expand_dims(wa,axis=3) 237 | wb = np.expand_dims(wb,axis=3) 238 | wc = np.expand_dims(wc,axis=3) 239 | wd = np.expand_dims(wd,axis=3) 240 | 241 | wa[np.where(wa<0)]=0 242 | wb[np.where(wb<0)]=0 243 | wc[np.where(wc<0)]=0 244 | wd[np.where(wd<0)]=0 245 | 246 | # compute output 247 | output_fmap = (wa*Ia + wb*Ib + wc*Ic + wd*Id) 248 | #output_fmap = Ib 249 | outM = output_fmap.copy() 250 | outM = outM * mask 251 | 252 | mask0 = 1*(mask==0) 253 | conVals = Ia * mask0 254 | output_fmap = conVals + outM 255 | 256 | output_fmap = Ia 257 | return output_fmap 258 | 259 | 260 | 261 | def LinesIntersectionForLargestBox(trans_rpoints, rect_pts, theta): 262 | 263 | def line(p1, p2): 264 | A = (p1[1] - p2[1]) 265 | B = (p2[0] - p1[0]) 266 | C = (p1[0]*p2[1] - p2[0]*p1[1]) 267 | return A, B, -C 268 | 269 | def intersection(L1, L2): 270 | D = L1[0] * L2[1] - L1[1] * L2[0] 271 | Dx = L1[2] * L2[1] - L1[1] * L2[2] 272 | Dy = L1[0] * L2[2] - L1[2] * L2[0] 273 | if D != 0: 274 | x = Dx / D 275 | y = Dy / D 276 | return x,y 277 | else: 278 | return False 279 | 280 | rxymin = trans_rpoints.min(1) 281 | rxymax = trans_rpoints.max(1) 282 | 283 | widN = rxymax[0] - rxymin[0] 284 | higN = rxymax[1] - rxymin[1] 285 | 286 | rxyminOrig = rect_pts.min(1) 287 | rxymaxOrig = rect_pts.max(1) 288 | wid = rxymaxOrig[0] - rxyminOrig[0] 289 | hig = rxymaxOrig[1] - rxyminOrig[1] 290 | 291 | 292 | # positive angle smaller width 293 | if wid<=hig and theta>=0: 294 | L1 = line([trans_rpoints[0][0], trans_rpoints[1][0]], [trans_rpoints[0][2], trans_rpoints[1][2]]) 295 | L2 = line([trans_rpoints[0][1], trans_rpoints[1][1]], [trans_rpoints[0][3], trans_rpoints[1][3]]) 296 | L3 = line([rxymin[0], rxymin[1]], [rxymax[0], rxymax[1]]) 297 | #print('Condition 01 executed') 298 | # positive angle greater width 299 | elif wid>hig and theta>=0: 300 | L1 = line([trans_rpoints[0][0], trans_rpoints[1][0]], [trans_rpoints[0][1], trans_rpoints[1][1]]) 301 | L2 = line([trans_rpoints[0][2], trans_rpoints[1][2]], [trans_rpoints[0][3], trans_rpoints[1][3]]) 302 | L3 = line([rxymax[0], rxymin[1]], [rxymin[0], rxymax[1]]) 303 | #print('Condition 02 executed') 304 | 305 | # negative angle greater width 306 | elif wid>hig and theta<0:# and (widOrig>higOrig): 307 | L1 = line([trans_rpoints[0][0], trans_rpoints[1][0]], [trans_rpoints[0][1], trans_rpoints[1][1]]) 308 | L2 = line([trans_rpoints[0][2], trans_rpoints[1][2]], [trans_rpoints[0][3], trans_rpoints[1][3]]) 309 | L3 = line([rxymin[0], rxymin[1]], [rxymax[0], rxymax[1]]) 310 | #print('Condition 03 executed') 311 | 312 | # negative angle smaller width 313 | elif (wid<=hig and theta<0): #or (widOrig<=higOrig): 314 | L1 = line([trans_rpoints[0][0], trans_rpoints[1][0]], [trans_rpoints[0][2], trans_rpoints[1][2]]) 315 | L2 = line([trans_rpoints[0][1], trans_rpoints[1][1]], [trans_rpoints[0][3], trans_rpoints[1][3]]) 316 | L3 = line([rxymax[0], rxymin[1]], [rxymin[0], rxymax[1]]) 317 | #print('Condition 04 executed') 318 | 319 | 320 | if L1 and L3: 321 | intSec1 = intersection(L1, L3) 322 | if L2 and L3: 323 | intSec2 = intersection(L2, L3) 324 | 325 | if not intSec1 or not intSec2: 326 | intSec1 = [0, 0] 327 | intSec2 = [widN, higN] 328 | 329 | return intSec1, intSec2 330 | 331 | def transformer_layer_fMap(input_fmap, angle, rpn_boxes, out_dims=None, **kwargs): 332 | 333 | 334 | B = np.shape(input_fmap)[0] 335 | H = np.shape(input_fmap)[1] 336 | W = np.shape(input_fmap)[2] 337 | C = np.shape(input_fmap)[3] 338 | 339 | 340 | #cntr = np.asarray([(box[1]+box[3])/2, (box[0]+box[2])/2]) 341 | cntr = np.asarray([H/2, W/2]) 342 | #print('Original RPN :', box) 343 | 344 | T1 = [[1,0,0],[0,1,0],[-cntr[1],-cntr[0],1]] 345 | T2 = np.asarray([[np.cos(np.deg2rad(int(angle))), np.sin(np.deg2rad(int(angle))), 0],[-np.sin(np.deg2rad(int(angle))),np.cos(np.deg2rad(int(angle))),0],[0,0,1]]) 346 | T3 = [[1,0,0],[0,1,0],[cntr[1],cntr[0],1]] 347 | 348 | 349 | T = np.dot(np.transpose(T3),np.dot(np.transpose(T2),np.transpose(T1))) 350 | #T = np.dot(np.transpose(T3),np.dot(np.transpose(T2),T1)) 351 | 352 | 353 | 354 | corner_pts = [[0,W-1,0,W-1],[0,0,H-1,H-1],[1,1,1,1]] 355 | #print('corner_pts', corner_pts) 356 | 357 | trans_cpoints = np.dot(T[0:2,:],corner_pts) 358 | 359 | xymin = trans_cpoints.min(1) 360 | xymax = trans_cpoints.max(1) 361 | 362 | out_H = np.int32(xymax[1] - xymin[1] + 1) 363 | out_W = np.int32(xymax[0] - xymin[0] + 1) 364 | #print('out_W', out_W, out_H, W,H) 365 | 366 | out_fmap_size = [0,0,out_W, out_H] 367 | 368 | T4 = [[1,0,0],[0,1,0],[-xymin[0], -xymin[1],1]]; 369 | #print T 370 | #print T4 371 | 372 | T_final = np.dot(np.transpose(T4),T); 373 | #print T_final 374 | 375 | 376 | tr_rotated_box_all = [] 377 | 378 | for idx in range(0, len(rpn_boxes)): 379 | 380 | box = rpn_boxes[idx,1:]/16 381 | #print('box', box) 382 | 383 | rect_pts = [ [box[0],box[2], box[0], box[2]] ,[box[1],box[1],box[3],box[3]],[1,1,1,1]] 384 | 385 | trans_rpoints = np.dot(T_final[0:2,:],rect_pts) 386 | 387 | rxymin = trans_rpoints.min(1) 388 | rxymax = trans_rpoints.max(1) 389 | 390 | cropped_box = [np.int32(np.floor(rxymin[0])), np.int32(np.floor(rxymin[1])), np.int32(np.floor(rxymax[0])), np.int32(np.floor(rxymax[1]))] 391 | #cropped_box = [rxymin[0],rxymin[1],rxymax[0],rxymax[1]] 392 | 393 | # find coordinates for maximum area inscribed rectangle 394 | intSec1, intSec2 = LinesIntersectionForLargestBox(trans_rpoints, np.array(rect_pts), angle) 395 | height_deltas = [intSec1[1]-cropped_box[1], cropped_box[3]-intSec2[1]] 396 | #print('height_deltas: ', height_deltas) 397 | 398 | tr_rotated_box = [rxymin[0], rxymin[1]+height_deltas[0], rxymax[0], rxymax[1]-height_deltas[1]] 399 | #print 'tr_cropped_box: ', tr_cropped_box 400 | tr_rotated_box = [ik * 16 for ik in tr_rotated_box] 401 | 402 | tr_rotated_box_all.append(tr_rotated_box) 403 | 404 | 405 | 406 | batch_grids = affine_grid_generator(out_H, out_W, T_final) 407 | 408 | x_s = batch_grids[:,0, :, :] 409 | y_s = batch_grids[:,1, :, :] 410 | 411 | out_fmap = bilinear_sampler(input_fmap, x_s, y_s) 412 | #print 'out_fmap ', out_fmap.shape 413 | #print 'input_fmap ', input_fmap.shape 414 | 415 | #tr_rotated_box_all = np.array(tr_rotated_box_all) 416 | return out_fmap, T_final, tr_rotated_box_all 417 | 418 | 419 | def transformer_layer_fMapSep(input_fmap, orient_scores, rpn_boxes, out_dims=None, **kwargs): 420 | 421 | theta = [0, 90, 135, 45, 157.5, 112.5, 67.5, 22.5] 422 | 423 | B = np.shape(input_fmap)[0] 424 | H = np.shape(input_fmap)[1] 425 | W = np.shape(input_fmap)[2] 426 | C = np.shape(input_fmap)[3] 427 | print('widHig', B, H, W, C) 428 | 429 | 430 | outMap = np.zeros((len(rpn_boxes), 72,72, input_fmap.shape[3]), dtype = float) 431 | #outMap = np.zeros((len(rpn_boxes), 50,50, input_fmap.shape[3])) 432 | 433 | 434 | tr_rotated_box_all = [] 435 | transApplied = [] 436 | #ang1 = np.array(np.argmax(orient_scores, axis = 1)) 437 | #print('Im here :', 1*(ang1==0), 1*(ang1==1)) 438 | #idx0 = np.where(np.logical_or((ang1 == 0)*1,(ang1 == 1)*1))[0] 439 | #print(idx0) 440 | 441 | for idx in range(0, len(rpn_boxes)): 442 | transCurrent = [] 443 | 444 | angle = theta[np.argmax(orient_scores[idx, :], axis = 0)] 445 | 446 | if angle==0 or angle==90 : 447 | 448 | #print ("input_fmap.shape",input_fmap.shape) 449 | outMap[idx, 0:input_fmap.shape[1], 0:input_fmap.shape[2], 0:input_fmap.shape[3]] = input_fmap 450 | #print rpn_boxes[idx,1:5], [idx]+[rpn_boxes[idx,1],rpn_boxes[idx,2], rpn_boxes[idx,3], rpn_boxes[idx,4]] 451 | tr_rotated_box_all.append([idx]+[rpn_boxes[idx,1],rpn_boxes[idx,2], rpn_boxes[idx,3], rpn_boxes[idx,4]]) 452 | 453 | T11 = [[1,0,0],[0,1,0],[0,0,1]] 454 | transCurrent.append(T11) 455 | transCurrent.append(T11) 456 | transApplied.append(transCurrent) 457 | 458 | box = rpn_boxes[idx,1:5]/16 459 | 460 | sz = [box[3]-box[1], box[2]-box[0]] 461 | 462 | cntr = np.asarray([(box[1]+box[3])/2, (box[0]+box[2])/2]) 463 | 464 | T1 = [[1,0,0],[0,1,0],[-cntr[1],-cntr[0],1]] 465 | T2 = np.asarray([[np.cos(np.deg2rad(int(angle))), np.sin(np.deg2rad(int(angle))), 0],[-np.sin(np.deg2rad(int(angle))),np.cos(np.deg2rad(int(angle))),0],[0,0,1]]) 466 | T3 = [[1,0,0],[0,1,0],[cntr[1],cntr[0],1]] 467 | 468 | T = np.dot(np.transpose(T3),np.dot(np.transpose(T2),np.transpose(T1))) 469 | 470 | corner_pts = [[0,W-1,0,W-1],[0,0,H-1,H-1],[1,1,1,1]] 471 | 472 | trans_cpoints = np.dot(T[0:2,:],corner_pts) 473 | 474 | xymin = trans_cpoints.min(1) 475 | xymax = trans_cpoints.max(1) 476 | 477 | out_H = np.int32(xymax[1] - xymin[1] + 1) 478 | out_W = np.int32(xymax[0] - xymin[0] + 1) 479 | 480 | out_fmap_size = [0,0,out_W, out_H] 481 | 482 | T4 = [[1,0,0],[0,1,0],[-xymin[0], -xymin[1],1]]; 483 | 484 | T_final = np.dot(np.transpose(T4),T); 485 | 486 | else: 487 | if angle>90: 488 | angle = angle-180 489 | 490 | box = rpn_boxes[idx,1:5]/16 491 | #print('angle', angle) 492 | 493 | 494 | sz = [box[3]-box[1], box[2]-box[0]] 495 | 496 | 497 | cntr = np.asarray([(box[1]+box[3])/2, (box[0]+box[2])/2]) 498 | 499 | #print('Original RPN :', box) 500 | 501 | T1 = [[1,0,0],[0,1,0],[-cntr[1],-cntr[0],1]] 502 | T2 = np.asarray([[np.cos(np.deg2rad(int(angle))), np.sin(np.deg2rad(int(angle))), 0],[-np.sin(np.deg2rad(int(angle))),np.cos(np.deg2rad(int(angle))),0],[0,0,1]]) 503 | T3 = [[1,0,0],[0,1,0],[cntr[1],cntr[0],1]] 504 | 505 | 506 | T = np.dot(np.transpose(T3),np.dot(np.transpose(T2),np.transpose(T1))) 507 | #T = np.dot(np.transpose(T3),np.dot(np.transpose(T2),T1)) 508 | 509 | 510 | 511 | corner_pts = [[0,W-1,0,W-1],[0,0,H-1,H-1],[1,1,1,1]] 512 | #print('corner_pts', corner_pts) 513 | 514 | trans_cpoints = np.dot(T[0:2,:],corner_pts) 515 | 516 | xymin = trans_cpoints.min(1) 517 | xymax = trans_cpoints.max(1) 518 | 519 | out_H = np.int32(xymax[1] - xymin[1] + 1) 520 | out_W = np.int32(xymax[0] - xymin[0] + 1) 521 | #print('out_W', out_W, out_H, W,H) 522 | 523 | out_fmap_size = [0,0,out_W, out_H] 524 | 525 | T4 = [[1,0,0],[0,1,0],[-xymin[0], -xymin[1],1]]; 526 | #print T 527 | #print T4 528 | 529 | T_final = np.dot(np.transpose(T4),T); 530 | 531 | #print T_final 532 | 533 | rect_pts = [ [box[0],box[2], box[0], box[2]] ,[box[1],box[1],box[3],box[3]],[1,1,1,1]] 534 | 535 | trans_rpoints = np.dot(T_final[0:2,:],rect_pts) 536 | 537 | rxymin = trans_rpoints.min(1) 538 | rxymax = trans_rpoints.max(1) 539 | 540 | cropped_box = [np.int32(np.floor(rxymin[0])), np.int32(np.floor(rxymin[1])), np.int32(np.floor(rxymax[0])), np.int32(np.floor(rxymax[1]))] 541 | #print('cropped_box :', cropped_box) 542 | #print('trans_rpoints :', trans_rpoints) 543 | 544 | 545 | # find coordinates for maximum area inscribed rectangle 546 | intSec1, intSec2 = LinesIntersectionForLargestBox(trans_rpoints, np.array(rect_pts), angle) 547 | height_deltas = [intSec1[1]-cropped_box[1], cropped_box[3]-intSec2[1]] 548 | #print('height_deltas: ', height_deltas) 549 | 550 | T11 = [[1,0,0],[0,1,0],[-rxymin[0],-rxymin[1],1]] 551 | T11 = np.transpose(T11) 552 | 553 | rect_pts1 = [trans_rpoints[0], trans_rpoints[1],[1,1,1,1]] 554 | trans_rpoints = np.dot(T11[0:2,:],rect_pts1) 555 | rxymin1 = trans_rpoints.min(1) 556 | rxymax1 = trans_rpoints.max(1) 557 | 558 | #print ('trans_rpoints00 : ', trans_rpoints) 559 | tr_rotated_box = [rxymin1[0], rxymin1[1]+height_deltas[0], rxymax1[0], rxymax1[1]-height_deltas[1]] 560 | #print ('box : ', box) 561 | #print ('tr_rotated_box : ', tr_rotated_box) 562 | 563 | tr_rotated_box = [ik * 16 for ik in tr_rotated_box] 564 | #ross = [[0]+ il for il in rotated_rpns] 565 | tr_rotated_box_all.append([idx]+tr_rotated_box) 566 | 567 | transCurrent.append(T_final) 568 | transCurrent.append(T11) 569 | 570 | batch_grids = affine_grid_generator(out_H, out_W, T_final) 571 | 572 | x_s = batch_grids[:,0, :, :] 573 | y_s = batch_grids[:,1, :, :] 574 | 575 | out_fmap = bilinear_sampler_Interpol(input_fmap.copy(), x_s, y_s) 576 | 577 | if cropped_box[0] < 0: 578 | 579 | cropped_box[2] = int(cropped_box[2] - cropped_box[0]) 580 | cropped_box[0] = int(cropped_box[0] - cropped_box[0]) 581 | 582 | if cropped_box[1] < 0: 583 | 584 | cropped_box[3] = int(cropped_box[3] - cropped_box[1]) 585 | cropped_box[1] = int(cropped_box[1] - cropped_box[1]) 586 | 587 | f_map = out_fmap[:, cropped_box[1]:cropped_box[3] , cropped_box[0]:cropped_box[2] ,:] 588 | #print('output_fmap', (f_map[0,:,:,:]).sum()) 589 | outMap[idx, 0:f_map.shape[1], 0:f_map.shape[2], 0:f_map.shape[3]] = f_map 590 | #print('output_fmap1', (outMap[idx,:,:,:]).sum()) 591 | 592 | #tr_rotated_box_all = np.array(tr_rotated_box_all) 593 | #print('featureMap size : ', outMap.shape) 594 | 595 | transApplied.append(transCurrent) 596 | 597 | return outMap, tr_rotated_box_all, transApplied, T_final 598 | 599 | 600 | 601 | 602 | 603 | ###### backward ###### 604 | 605 | #def transformer_layer_fMapSep_backward(input_fmap, orient_scores, rpn_boxes, out_dims=None, **kwargs): 606 | def transformer_layer_fMapSep_backward(input_grad, orient_scores, in_rpn_boxes, out_dims=None, **kwargs): 607 | 608 | theta = [0, 90, 135, 45, 157.5, 112.5, 67.5, 22.5] 609 | 610 | B = np.shape(input_grad)[0] 611 | H = np.shape(input_grad)[1] 612 | W = np.shape(input_grad)[2] 613 | C = np.shape(input_grad)[3] 614 | print('widHig', B, H, W, C) 615 | 616 | 617 | outMap = np.zeros((len(in_rpn_boxes), 102,102, input_grad.shape[3]), dtype = float) 618 | #outMap = np.zeros((len(rpn_boxes), 50,50, input_fmap.shape[3])) 619 | 620 | 621 | tr_rotated_box_all = [] 622 | transApplied = [] 623 | #ang1 = np.array(np.argmax(orient_scores, axis = 1)) 624 | #print('Im here :', 1*(ang1==0), 1*(ang1==1)) 625 | #idx0 = np.where(np.logical_or((ang1 == 0)*1,(ang1 == 1)*1))[0] 626 | #print("len",len(in_rpn_boxes)) 627 | 628 | for idx in range(0, len(in_rpn_boxes)): 629 | transCurrent = [] 630 | 631 | angle = theta[np.argmax(orient_scores[idx, :], axis = 0)] 632 | #print ("angle", angle) 633 | 634 | 635 | 636 | if angle==0 or angle==90 : 637 | #print ("input_grad.shape",input_grad.shape) 638 | outMap[idx, 0:input_grad.shape[1], 0:input_grad.shape[2], 0:input_grad.shape[3]] = input_grad[idx, :, :, :] 639 | #print rpn_boxes[idx,1:5], [idx]+[rpn_boxes[idx,1],rpn_boxes[idx,2], rpn_boxes[idx,3], rpn_boxes[idx,4]] 640 | tr_rotated_box_all.append([idx]+[in_rpn_boxes[idx,1],in_rpn_boxes[idx,2], in_rpn_boxes[idx,3], in_rpn_boxes[idx,4]]) 641 | 642 | T11 = [[1,0,0],[0,1,0],[0,0,1]] 643 | transCurrent.append(T11) 644 | transCurrent.append(T11) 645 | transApplied.append(transCurrent) 646 | 647 | 648 | 649 | box = in_rpn_boxes[idx,1:5]/16 650 | 651 | 652 | sz = [box[3]-box[1], box[2]-box[0]] 653 | 654 | 655 | cntr = np.asarray([(box[1]+box[3])/2, (box[0]+box[2])/2]) 656 | 657 | 658 | T1 = [[1,0,0],[0,1,0],[-cntr[1],-cntr[0],1]] 659 | T2 = np.asarray([[np.cos(np.deg2rad(int(angle))), np.sin(np.deg2rad(int(angle))), 0],[-np.sin(np.deg2rad(int(angle))),np.cos(np.deg2rad(int(angle))),0],[0,0,1]]) 660 | T3 = [[1,0,0],[0,1,0],[cntr[1],cntr[0],1]] 661 | 662 | T = np.dot(np.transpose(T3),np.dot(np.transpose(T2),np.transpose(T1))) 663 | 664 | corner_pts = [[0,W-1,0,W-1],[0,0,H-1,H-1],[1,1,1,1]] 665 | 666 | trans_cpoints = np.dot(T[0:2,:],corner_pts) 667 | 668 | xymin = trans_cpoints.min(1) 669 | xymax = trans_cpoints.max(1) 670 | 671 | out_H = np.int32(xymax[1] - xymin[1] + 1) 672 | out_W = np.int32(xymax[0] - xymin[0] + 1) 673 | 674 | out_fmap_size = [0,0,out_W, out_H] 675 | 676 | T4 = [[1,0,0],[0,1,0],[-xymin[0], -xymin[1],1]]; 677 | 678 | T_final = np.dot(np.transpose(T4),T); 679 | T_final_inv = inv(T_final) 680 | 681 | 682 | else: 683 | if angle>90: 684 | angle = angle-180 685 | 686 | box = in_rpn_boxes[idx,1:5]/16 687 | #print('angle', angle) 688 | 689 | 690 | sz = [box[3]-box[1], box[2]-box[0]] 691 | 692 | 693 | cntr = np.asarray([(box[1]+box[3])/2, (box[0]+box[2])/2]) 694 | 695 | #print('Original RPN :', box) 696 | 697 | T1 = [[1,0,0],[0,1,0],[-cntr[1],-cntr[0],1]] 698 | T2 = np.asarray([[np.cos(np.deg2rad(int(angle))), np.sin(np.deg2rad(int(angle))), 0],[-np.sin(np.deg2rad(int(angle))),np.cos(np.deg2rad(int(angle))),0],[0,0,1]]) 699 | T3 = [[1,0,0],[0,1,0],[cntr[1],cntr[0],1]] 700 | 701 | 702 | T = np.dot(np.transpose(T3),np.dot(np.transpose(T2),np.transpose(T1))) 703 | #T = np.dot(np.transpose(T3),np.dot(np.transpose(T2),T1)) 704 | 705 | corner_pts = [[0,W-1,0,W-1],[0,0,H-1,H-1],[1,1,1,1]] 706 | #print('corner_pts', corner_pts) 707 | 708 | trans_cpoints = np.dot(T[0:2,:],corner_pts) 709 | 710 | xymin = trans_cpoints.min(1) 711 | xymax = trans_cpoints.max(1) 712 | 713 | out_H = np.int32(xymax[1] - xymin[1] + 1) 714 | out_W = np.int32(xymax[0] - xymin[0] + 1) 715 | #print('out_W', out_W, out_H, W,H) 716 | 717 | out_fmap_size = [0,0,out_W, out_H] 718 | 719 | T4 = [[1,0,0],[0,1,0],[-xymin[0], -xymin[1],1]]; 720 | #print T 721 | #print T4 722 | 723 | T_final = np.dot(np.transpose(T4),T); 724 | T_final_inv = inv(T_final) 725 | 726 | #print T_final 727 | 728 | rect_pts = [ [box[0],box[2], box[0], box[2]] ,[box[1],box[1],box[3],box[3]],[1,1,1,1]] 729 | 730 | trans_rpoints = np.dot(T_final_inv[0:2,:],rect_pts) 731 | 732 | rxymin = trans_rpoints.min(1) 733 | rxymax = trans_rpoints.max(1) 734 | 735 | cropped_box = [np.int32(np.floor(rxymin[0])), np.int32(np.floor(rxymin[1])), np.int32(np.floor(rxymax[0])), np.int32(np.floor(rxymax[1]))] 736 | #print('cropped_box :', cropped_box) 737 | #print('trans_rpoints :', trans_rpoints) 738 | 739 | 740 | # find coordinates for maximum area inscribed rectangle 741 | #intSec1, intSec2 = LinesIntersectionForLargestBox(trans_rpoints, np.array(rect_pts), angle) 742 | #height_deltas = [intSec1[1]-cropped_box[1], cropped_box[3]-intSec2[1]] 743 | #print('height_deltas: ', height_deltas) 744 | 745 | T11 = [[1,0,0],[0,1,0],[-rxymin[0],-rxymin[1],1]] 746 | T11 = np.transpose(T11) 747 | 748 | rect_pts1 = [trans_rpoints[0], trans_rpoints[1],[1,1,1,1]] 749 | trans_rpoints = np.dot(T11[0:2,:],rect_pts1) 750 | rxymin1 = trans_rpoints.min(1) 751 | rxymax1 = trans_rpoints.max(1) 752 | 753 | #print ('trans_rpoints00 : ', trans_rpoints) 754 | #tr_rotated_box = [rxymin1[0], rxymin1[1]+height_deltas[0], rxymax1[0], rxymax1[1]-height_deltas[1]] 755 | tr_rotated_box = [rxymin1[0], rxymin1[1], rxymax1[0], rxymax1[1]] 756 | #print ('box : ', box) 757 | #print ('tr_rotated_box : ', tr_rotated_box) 758 | 759 | 760 | tr_rotated_box = [ik * 16 for ik in tr_rotated_box] 761 | #ross = [[0]+ il for il in rotated_rpns] 762 | tr_rotated_box_all.append([idx]+tr_rotated_box) 763 | 764 | transCurrent.append(T_final_inv) 765 | transCurrent.append(T11) 766 | 767 | batch_grids = affine_grid_generator(out_H, out_W, T_final_inv) 768 | 769 | x_s = batch_grids[:,0, :, :] 770 | y_s = batch_grids[:,1, :, :] 771 | tup = np.reshape(input_grad[idx, :, :, :], (1,np.shape(input_grad)[1],np.shape(input_grad)[2],np.shape(input_grad)[3])) 772 | out_fmap = bilinear_sampler_Interpol(tup.copy(), x_s, y_s) 773 | #print ("tup.shape",tup.shape) 774 | #print (xyz) 775 | 776 | if cropped_box[0] < 0: 777 | 778 | cropped_box[2] = int(cropped_box[2] - cropped_box[0]) 779 | cropped_box[0] = int(cropped_box[0] - cropped_box[0]) 780 | 781 | if cropped_box[1] < 0: 782 | 783 | cropped_box[3] = int(cropped_box[3] - cropped_box[1]) 784 | cropped_box[1] = int(cropped_box[1] - cropped_box[1]) 785 | 786 | f_map = out_fmap[:, cropped_box[1]:cropped_box[3] , cropped_box[0]:cropped_box[2] ,:] 787 | #print('output_fmap', (f_map[0,:,:,:]).sum()) 788 | outMap[idx, 0:f_map.shape[1], 0:f_map.shape[2], 0:f_map.shape[3]] = f_map 789 | #print('output_fmap1', (outMap[idx,:,:,:]).sum()) 790 | 791 | #tr_rotated_box_all = np.array(tr_rotated_box_all) 792 | #print('featureMap size : ', outMap.shape) 793 | 794 | transApplied.append(transCurrent) 795 | 796 | return outMap, tr_rotated_box_all, transApplied 797 | -------------------------------------------------------------------------------- /images/flow_diagram_web.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akhtarvision/orientation-aware-firearm-detection/2ebf283bc4baf993feb31f6fcb01cdd5bf2bce9e/images/flow_diagram_web.jpg -------------------------------------------------------------------------------- /images/more_results_web.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akhtarvision/orientation-aware-firearm-detection/2ebf283bc4baf993feb31f6fcb01cdd5bf2bce9e/images/more_results_web.jpg -------------------------------------------------------------------------------- /makebboxproposals.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Written by CVML 3 | # -------------------------------------------------------- 4 | 5 | 6 | 7 | from fast_rcnn.config import cfg 8 | from fast_rcnn.bbox_transform import clip_boxes, bbox_transform_inv 9 | import argparse 10 | from utils.timer import Timer 11 | import numpy as np 12 | import cv2 13 | from numpy.linalg import inv 14 | import caffe 15 | from utils.blob import im_list_to_blob 16 | import os 17 | import matplotlib.pyplot as plt 18 | import gc 19 | import yaml 20 | 21 | 22 | DEBUG = False 23 | 24 | class makebBox_regionProposal(caffe.Layer): 25 | """ 26 | transforms feature map and corresponding bouning boxes with respect to angle 27 | """ 28 | 29 | def setup(self, bottom, top): 30 | 31 | pass 32 | 33 | def forward(self, bottom, top): 34 | 35 | 36 | #assert bottom[0].data.shape[0] == 1, \ 37 | # 'Only single item batches are supported' 38 | 39 | 40 | # the first set of _num_anchors channels are bg probs 41 | # the second set are the fg probs, which we want 42 | rpn_boxes_Actual = bottom[0].data 43 | box_deltas = bottom[1].data 44 | im_info = bottom[2].data 45 | scores = bottom[3].data 46 | 47 | 48 | #print('im_info : ', im_info) 49 | 50 | im_scales = im_info[0][2] 51 | 52 | im_shape = np.array([im_info[0][0], im_info[0][1]]) / im_scales 53 | #print('conv_feat : ', conv_feat.shape) 54 | #print('rpnBoxes : ', rpn_boxes.shape) 55 | 56 | 57 | #for idx in range(len(cls_idx)): 58 | #cls_boxes = final_boxes[inds, j*4:(j+1)*4] 59 | 60 | 61 | rpn_boxes = rpn_boxes_Actual[:, 1:5] / im_scales 62 | pred_boxes = bbox_transform_inv(rpn_boxes, box_deltas) 63 | pred_boxes = clip_boxes(pred_boxes, im_shape) 64 | 65 | cls_idx = np.argmax(scores, axis = 1) 66 | 67 | #print('cls_idx', cls_idx.shape, cls_idx ) 68 | 69 | #cls_idx = cls_idx.reshape(len(cls_idx), 1) 70 | #print('cls_idx', cls_idx.shape) 71 | #pred_boxes = pred_boxes[:, cls_idx*4:(cls_idx+1)*4] 72 | temp = np.zeros((len(cls_idx), 5)) 73 | 74 | for idx in range(len(cls_idx)): 75 | #print(cls_idx[idx]) 76 | temp[idx,1:] = pred_boxes[idx, cls_idx[idx]*4:(cls_idx[idx]+1)*4] 77 | 78 | 79 | pred_boxes = temp * im_scales 80 | #addd = cls_idx >0 81 | #print('Compare :', rpn_boxes[cls_idx>0,:], temp[cls_idx>0,:]) 82 | 83 | #rpn_boxes_Actual[:,1:5] = pred_boxes 84 | 85 | top[0].reshape(*(pred_boxes.shape)) 86 | top[0].data[...] = pred_boxes 87 | 88 | def backward(self, top, propagate_down, bottom): 89 | """This layer does not propagate gradients.""" 90 | pass 91 | 92 | def reshape(self, bottom, top): 93 | """Reshaping happens during the call to forward.""" 94 | top[0].reshape(*bottom[0].shape) 95 | #top[1].reshape(*bottom[1].shape) 96 | #pass 97 | 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /tools/demo_firearms.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Faster R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | 11 | # Modified by CVML group @ITU- Punjab 12 | """ 13 | Demo script showing detections in sample images. 14 | 15 | See README.md for installation instructions before running. 16 | """ 17 | 18 | import _init_paths 19 | from fast_rcnn.config import cfg 20 | from fast_rcnn.test import im_detect 21 | from fast_rcnn.nms_wrapper import nms 22 | from utils.timer import Timer 23 | import matplotlib.pyplot as plt 24 | import numpy as np 25 | import scipy.io as sio 26 | import caffe, os, sys, cv2 27 | import argparse 28 | from utils.blob import im_list_to_blob 29 | from numpy.linalg import inv 30 | 31 | CLASSES = ('__background__', 32 | 'Gun','Riffle') 33 | 34 | NETS = {'vgg16': ('VGG16', 35 | 'vgg16_fast_rcnn_cascade_firearms_iter_60000.caffemodel'), 36 | 'zf': ('ZF', 37 | 'ZF_faster_rcnn_final.caffemodel')} 38 | 39 | 40 | 41 | def vis_detections_final(im, class_name, all_final_boxes,thresh, cntG,cntR, cG, cR, rpn_sscores, rpn_bo, all_final_boxes_rotated): 42 | """Visual debugging of detections.""" 43 | #print 'i am in visualizer' 44 | #print len(all_final_boxes) 45 | 46 | #fig, ax = plt.subplots(figsize=(12, 12)) 47 | #ax.imshow(im, aspect='equal') 48 | 49 | 50 | boxes = all_final_boxes[:,:4] 51 | scores = all_final_boxes[:,4] 52 | scor = all_final_boxes[:,10] 53 | rpnns = all_final_boxes[:,6:10] 54 | 55 | xAll = all_final_boxes_rotated[:,:4] 56 | yAll = all_final_boxes_rotated[:,4:8] 57 | 58 | orient_class = all_final_boxes[:,5] 59 | s=[] 60 | for i in xrange(len(scores)): 61 | 62 | bbox = map(int, boxes[i,:]) 63 | #rpn_bo = map(int, rpnns[i,:]) 64 | score = scores[i] 65 | orient_cls = orient_class[i] 66 | rpn_s = scor[i] 67 | 68 | 69 | if score > thresh: 70 | 71 | txt = class_name + ': ' + str(orient_cls) + ': ' + str(score) 72 | 73 | s.append(score) 74 | 75 | pts = np.array([[xAll[i,0],yAll[i,0]],[xAll[i,1],yAll[i,1]],[xAll[i,3],yAll[i,3]],[xAll[i,2],yAll[i,2]]], np.int32) 76 | #cv2.polylines(im, [pts],True,(0,255,255), 2) 77 | #cv2.polylines(im, [pts],True,(128,0,255), 2) #voilet like 78 | cv2.polylines(im, [pts],True,(147, 20,255), 6) # pink like 79 | 80 | if s: 81 | 82 | 83 | if (class_name == 'Gun'): 84 | cntG = max(s)+cntG 85 | cG=cG+1 86 | 87 | if (class_name == 'Riffle'): 88 | cntR = max(s)+cntR 89 | cR=cR+1 90 | 91 | #print (cntG,cntR) 92 | #print (cG,cR) 93 | 94 | return im,cntG,cntR, cG, cR 95 | #return im 96 | 97 | 98 | 99 | def _get_image_blob(im): 100 | """Converts an image into a network input. 101 | 102 | Arguments: 103 | im (ndarray): a color image in BGR order 104 | 105 | Returns: 106 | blob (ndarray): a data blob holding an image pyramid 107 | im_scale_factors (list): list of image scales (relative to im) used 108 | in the image pyramid 109 | """ 110 | im_orig = im.astype(np.float32, copy=True) 111 | im_orig -= cfg.PIXEL_MEANS 112 | 113 | im_shape = im_orig.shape 114 | im_size_min = np.min(im_shape[0:2]) 115 | im_size_max = np.max(im_shape[0:2]) 116 | 117 | processed_ims = [] 118 | im_scale_factors = [] 119 | 120 | for target_size in cfg.TEST.SCALES: 121 | im_scale = float(target_size) / float(im_size_min) 122 | # Prevent the biggest axis from being more than MAX_SIZE 123 | if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: 124 | im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) 125 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, 126 | interpolation=cv2.INTER_LINEAR) 127 | im_scale_factors.append(im_scale) 128 | processed_ims.append(im) 129 | 130 | # Create a blob to hold the input images 131 | blob = im_list_to_blob(processed_ims) 132 | 133 | return blob, np.array(im_scale_factors) 134 | 135 | def _get_blobs(im, rois): 136 | """Convert an image and RoIs within that image into network inputs.""" 137 | blobs = {'data' : None, 'rois' : None} 138 | blobs['data'], im_scale_factors = _get_image_blob(im) 139 | #if not cfg.TEST.HAS_RPN: 140 | #blobs['rois'] = _get_rois_blob(rois, im_scale_factors) 141 | #print ('lll: ', blobs['rois']) 142 | return blobs, im_scale_factors 143 | 144 | def demo(net, image_name): 145 | """Detect object classes in an image using pre-computed object proposals.""" 146 | 147 | num_images = 1 148 | foldername = '/media/akhtar/6D2C8F896B2F79E0/Projects/py-faster-rcnn-master/data/output_images_detected/' 149 | foldername_all = '/home/itu/faster-rcnn-1070/data/output_images_all/' 150 | thresh=0.05 151 | max_per_image=100 152 | 153 | all_boxes = [[] for _ in xrange(num_images)] 154 | 155 | ntopProp = [300] 156 | 157 | 158 | theta = [0, 90, 135, 45, 157.5, 112.5, 67.5, 22.5] 159 | 160 | for t in xrange(0,len(ntopProp)): 161 | #output_dir = get_output_dir(imdb, net) 162 | 163 | 164 | 165 | if not cfg.TEST.HAS_RPN: 166 | roidb = imdb.roidb 167 | 168 | all_final_boxes = [[[] for _ in xrange(num_images)] 169 | for _ in xrange(3)] 170 | 171 | all_final_boxes_rotated = [[[] for _ in xrange(num_images)] 172 | for _ in xrange(3)] 173 | 174 | all_rpn_boxes = [[[] for _ in xrange(num_images)] 175 | for _ in xrange(1)] 176 | 177 | #print('all_final_boxes_rotated :', all_final_boxes_rotated) 178 | cntG = 0 179 | cntR = 0 180 | cG = 0 181 | cR = 0 182 | 183 | for i in xrange(num_images): 184 | # filter out any ground truth boxes 185 | if cfg.TEST.HAS_RPN: 186 | box_proposals = None 187 | else: 188 | # The roidb may contain ground-truth rois (for example, if the roidb 189 | # comes from the training or val split). We only want to evaluate 190 | # detection on the *non*-ground-truth rois. We select those the rois 191 | # that have the gt_classes field set to 0, which means there's no 192 | # ground truth. 193 | box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] 194 | 195 | 196 | 197 | # Load the demo image 198 | im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) 199 | 200 | im = cv2.imread(im_file) 201 | 202 | 203 | 204 | 205 | rpn_boxes, rpn_scores, final_boxes, final_scores, orient_score, final_boxes1, final_scores1, transApplied = im_detect(net, im, box_proposals, True) 206 | 207 | if ntopProp[t] == 300: 208 | if len(rpn_scores) > 299: 209 | rpn_boxes = rpn_boxes[0:ntopProp[t],:] 210 | rpn_scores = rpn_scores[0:ntopProp[t],:] 211 | final_boxes = final_boxes[0:ntopProp[t],:] 212 | final_scores = final_scores[0:ntopProp[t],:] 213 | orient_scores = orient_score[0:ntopProp[t],:] 214 | final_boxes1 = final_boxes1[0:ntopProp[t],:] 215 | final_scores1 = final_scores1[0:ntopProp[t],:] 216 | transApplied = transApplied[0:ntopProp[t],:,:,:] 217 | else: 218 | rpn_boxes = rpn_boxes[0:ntopProp[t],:] 219 | rpn_scores = rpn_scores[0:ntopProp[t],:] 220 | final_boxes = final_boxes[0:ntopProp[t],:] 221 | final_scores = final_scores[0:ntopProp[t],:] 222 | orient_scores = orient_score[0:ntopProp[t],:] 223 | final_boxes1 = final_boxes1[0:ntopProp[t],:] 224 | final_scores1 = final_scores1[0:ntopProp[t],:] 225 | transApplied = transApplied[0:ntopProp[t],:,:,:] 226 | 227 | temp_boxes = None 228 | blobs, im_scales = _get_blobs(im, temp_boxes) 229 | 230 | rotatedBoxesAll = np.zeros((len(rpn_boxes), 3,2,4)) 231 | 232 | for iii in range(0, len(rpn_boxes)): 233 | final_boxes_tr = final_boxes1[iii,:] 234 | #print('final_boxes_tr :', final_boxes_tr) 235 | final_boxes_tr = ((final_boxes_tr * im_scales[0]) / 16) 236 | 237 | final_boxes_tr = trans_box1(final_boxes_tr,transApplied[iii,0,:,:],transApplied[iii,1,:,:]) 238 | 239 | final_boxes_tr = ((final_boxes_tr * 16) / im_scales[0]) 240 | 241 | rotatedBoxesAll[iii, :,:,:] = final_boxes_tr[0,:,:,:] 242 | 243 | 244 | rpn_dets = np.hstack((rpn_boxes, rpn_scores)) \ 245 | .astype(np.float32, copy=False) 246 | #all_rpn_boxes[0][i] = rpn_dets 247 | 248 | 249 | #_t['misc'].tic() 250 | 251 | maxScore = final_scores1 252 | for j in xrange(1, 3): 253 | 254 | inds = np.where(maxScore[:, j] > thresh)[0] 255 | cls_scores = maxScore[inds, j] 256 | cls_boxes = final_boxes[inds, j*4:(j+1)*4] 257 | cls_orient = np.argmax(orient_score[inds, :], axis = 1) 258 | rpn_bboxes = rpn_boxes[inds,:] 259 | rpn_sscores = rpn_scores[inds] 260 | 261 | cls_scores1 = final_scores[inds, j] 262 | 263 | rotatedBoxesClass = np.hstack((rotatedBoxesAll[inds,j,0,:], rotatedBoxesAll[inds,j,1,:])).astype(np.float32, copy=False) 264 | #print('rotatedBoxesClass :', rotatedBoxesClass.shape) 265 | 266 | cls_dets_temp_rotated = np.hstack((rotatedBoxesAll[inds,j,0,:], rotatedBoxesAll[inds,j,1,:], cls_scores[:, np.newaxis])) \ 267 | .astype(np.float32, copy=False) 268 | 269 | 270 | cls_dets_temp = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ 271 | .astype(np.float32, copy=False) 272 | 273 | #print('cls_dets_temp', cls_dets_temp.shape) 274 | 275 | cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], cls_orient[:, np.newaxis], rpn_bboxes, rpn_sscores)) \ 276 | .astype(np.float32, copy=False) 277 | 278 | 279 | keep = nms(cls_dets_temp, cfg.TEST.NMS) 280 | #keep = nms(cls_dets_temp, 0.3) 281 | 282 | cls_dets = cls_dets[keep, :] 283 | rotatedBoxesClass = rotatedBoxesClass[keep, :] 284 | 285 | 286 | all_final_boxes[j][i] = cls_dets 287 | all_final_boxes_rotated[j][i] = rotatedBoxesClass 288 | 289 | if max_per_image > 0: 290 | image_scores = np.hstack([all_final_boxes[j][i][:, 4] 291 | for j in xrange(1, 3)]) 292 | 293 | if len(image_scores) > max_per_image: 294 | image_thresh = np.sort(image_scores)[-max_per_image] 295 | for j in xrange(1, 3): 296 | keep = np.where(all_final_boxes[j][i][:, -1] >= image_thresh)[0] 297 | all_final_boxes[j][i] = all_final_boxes[j][i][keep, :] 298 | all_final_boxes_rotated[j][i] = all_final_boxes_rotated[j][i][keep, :] 299 | 300 | 301 | for j in xrange(1, 3): 302 | 303 | rpn_bo = np.array([208, 58, 2243, 1094]) 304 | 305 | 306 | im,cntG,cntR, cG, cR = vis_detections_final(im, CLASSES[j], all_final_boxes[j][i], 0.75, cntG,cntR, cG, cR, rpn_sscores, rpn_bo, all_final_boxes_rotated[j][i]) 307 | 308 | 309 | print ('check: ',os.path.join(cfg.DATA_DIR, 'demo', 're_'+image_name)) 310 | cv2.imwrite(os.path.join(cfg.DATA_DIR, 'demo', 're_'+image_name), im) 311 | 312 | 313 | 314 | def trans_box1(final_boxes,T_final, T11): 315 | final_boxes = final_boxes.reshape(1,12) 316 | final_boxes_final = np.zeros((len(final_boxes),3, 2,4)) 317 | 318 | for k in range(0, len(final_boxes)): 319 | 320 | class1 = final_boxes[k,0:4] 321 | class2 = final_boxes[k,4:8] 322 | class3 = final_boxes[k,8:12] 323 | 324 | box1 = [ class1[0] , class1[1] , class1[2] , class1[3] ] 325 | box2 = [ class2[0] , class2[1] , class2[2] , class2[3] ] 326 | box3 = [ class3[0] , class3[1] , class3[2] , class3[3] ] 327 | 328 | class1_out = trans_layer1(T_final, T11, box1) 329 | class2_out = trans_layer1(T_final, T11, box2) 330 | class3_out = trans_layer1(T_final, T11, box3) 331 | 332 | final_boxes_final[k,0,:,:] = class1_out 333 | final_boxes_final[k,1,:,:] = class2_out 334 | final_boxes_final[k,2,:,:] = class2_out 335 | 336 | return final_boxes_final 337 | 338 | def trans_layer1(T_final,T11, final_b): 339 | 340 | nT0 = inv(T11) 341 | ncorner_pts = [[final_b[0],final_b[2],final_b[0],final_b[2]],[final_b[1],final_b[1],final_b[3],final_b[3]],[1,1,1,1]] 342 | nboxx = np.dot(nT0[0:2,:],ncorner_pts) 343 | rxymin_nb = nboxx.min(1) 344 | rxymax_nb = nboxx.max(1) 345 | 346 | T2 = inv(T_final) 347 | boxx2 = np.dot(T2[0:2,:],[nboxx[0], nboxx[1],[1,1,1,1]]) 348 | 349 | return boxx2 350 | 351 | 352 | def parse_args(): 353 | """Parse input arguments.""" 354 | parser = argparse.ArgumentParser(description='Faster R-CNN demo') 355 | parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', 356 | default=0, type=int) 357 | parser.add_argument('--cpu', dest='cpu_mode', 358 | help='Use CPU mode (overrides --gpu)', 359 | action='store_true') 360 | parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]', 361 | choices=NETS.keys(), default='vgg16') 362 | 363 | args = parser.parse_args() 364 | 365 | return args 366 | 367 | if __name__ == '__main__': 368 | cfg.TEST.HAS_RPN = True # Use RPN for proposals 369 | 370 | args = parse_args() 371 | 372 | prototxt = os.path.join(cfg.MODELS_DIR, NETS[args.demo_net][0], 373 | 'faster_rcnn_alt_opt', 'faster_rcnn_test.pt') 374 | print ('prototxt: ',prototxt) 375 | caffemodel = os.path.join(cfg.DATA_DIR, 'faster_rcnn_models', 376 | NETS[args.demo_net][1]) 377 | print ('caffemodel: ',caffemodel) 378 | 379 | if not os.path.isfile(caffemodel): 380 | raise IOError(('{:s} not found.\nDid you run ./data/script/' 381 | 'fetch_faster_rcnn_models.sh?').format(caffemodel)) 382 | 383 | if args.cpu_mode: 384 | caffe.set_mode_cpu() 385 | else: 386 | caffe.set_mode_gpu() 387 | caffe.set_device(args.gpu_id) 388 | cfg.GPU_ID = args.gpu_id 389 | net = caffe.Net(prototxt, caffemodel, caffe.TEST) 390 | 391 | print '\n\nLoaded network {:s}'.format(caffemodel) 392 | 393 | 394 | im_names = ['north+korea+army_38.png'] 395 | for im_name in im_names: 396 | print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' 397 | print 'Demo for data/demo/{}'.format(im_name) 398 | demo(net, im_name) 399 | --------------------------------------------------------------------------------