├── ILSVRC2014_train_00010391.JPEG ├── RCNN.py ├── README.md ├── RPN.py └── utils.py /ILSVRC2014_train_00010391.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongjk/faster_rcnn_keras/4c01554ba2bf494badd50e9e22a7e7e65046f5b8/ILSVRC2014_train_00010391.JPEG -------------------------------------------------------------------------------- /RCNN.py: -------------------------------------------------------------------------------- 1 | import os 2 | import traceback 3 | import numpy as np 4 | import numpy.random as npr 5 | import tensorflow as tf 6 | import keras.backend as K 7 | from keras.models import load_model 8 | from keras.layers import Conv2D, TimeDistributed, Flatten, Dense, BatchNormalization 9 | from keras.models import Input, Model, Layer 10 | from keras.applications import InceptionResNetV2 11 | from keras.preprocessing.image import load_img, img_to_array 12 | from utils import generate_anchors, draw_anchors, bbox_overlaps, bbox_transform,\ 13 | loss_cls, smoothL1, parse_label, unmap, filter_boxes, \ 14 | clip_boxes, py_cpu_nms, bbox_transform_inv 15 | 16 | 17 | ################## R-CNN Model ####################### 18 | # RoI Pooling layer 19 | class RoIPooling(Layer): 20 | def __init__(self, size=(7, 7)): 21 | self.size = size 22 | super(RoIPooling, self).__init__() 23 | 24 | def build(self, input_shape): 25 | self.shape = input_shape 26 | super(RoIPooling, self).build(input_shape) 27 | 28 | def call(self, inputs, **kwargs): 29 | ind=K.reshape(inputs[2],(-1,)) 30 | x = K.tf.image.crop_and_resize(inputs[0], inputs[1], ind, self.size) 31 | return x 32 | 33 | def compute_output_shape(self, input_shape): 34 | a=input_shape[1][0] 35 | b=self.size[0] 36 | c=self.size[1] 37 | d=input_shape[0][3] 38 | return (a,b,c,d) 39 | 40 | 41 | BATCH=256 42 | 43 | feature_map=Input(batch_shape=(None,None,None,1536)) 44 | rois=Input(batch_shape=(None, 4)) 45 | ind=Input(batch_shape=(None, 1),dtype='int32') 46 | 47 | p1=RoIPooling()([feature_map, rois, ind]) 48 | 49 | flat1 = Flatten()(p1) 50 | 51 | 52 | fc1 = Dense( 53 | units=1024, 54 | activation="relu", 55 | name="fc2" 56 | )(flat1) 57 | fc1=BatchNormalization()(fc1) 58 | output_deltas = Dense( 59 | units=4 * 200, 60 | activation="linear", 61 | kernel_initializer="uniform", 62 | name="deltas2" 63 | )(fc1) 64 | 65 | output_scores = Dense( 66 | units=1 * 200, 67 | activation="softmax", 68 | kernel_initializer="uniform", 69 | name="scores2" 70 | )(fc1) 71 | 72 | model=Model(inputs=[feature_map, rois, ind],outputs=[output_scores,output_deltas]) 73 | model.summary() 74 | model.compile(optimizer='rmsprop', 75 | loss={'deltas2':smoothL1, 'scores2':'categorical_crossentropy'}) 76 | 77 | ################## prepare batch ####################### 78 | 79 | FG_FRAC=.25 80 | FG_THRESH=.5 81 | BG_THRESH_HI=.5 82 | BG_THRESH_LO=.1 83 | 84 | #load an example to void graph problem 85 | #TODO fix this. 86 | pretrained_model = InceptionResNetV2(include_top=False) 87 | img=load_img("./ILSVRC2014_train_00010391.JPEG") 88 | x = img_to_array(img) 89 | x = np.expand_dims(x, axis=0) 90 | not_used=pretrained_model.predict(x) 91 | 92 | rpn_model = load_model('weights.hdf5', 93 | custom_objects={'loss_cls': loss_cls,'smoothL1':smoothL1}) 94 | not_used=rpn_model.predict(np.load('n02676566_6914')['fc']) 95 | 96 | def produce_batch(filepath, gt_boxes, h_w, category): 97 | img=load_img(filepath) 98 | img_width=np.shape(img)[1] * scale[1] 99 | img_height=np.shape(img)[0] * scale[0] 100 | img=img.resize((int(img_width),int(img_height))) 101 | #feed image to pretrained model and get feature map 102 | img = img_to_array(img) 103 | img = np.expand_dims(img, axis=0) 104 | feature_map=pretrained_model.predict(img) 105 | height = np.shape(feature_map)[1] 106 | width = np.shape(feature_map)[2] 107 | num_feature_map=width*height 108 | #calculate output w, h stride 109 | w_stride = h_w[1] / width 110 | h_stride = h_w[0] / height 111 | #generate base anchors according output stride. 112 | #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1) 113 | base_anchors=generate_anchors(w_stride,h_stride) 114 | #slice tiles according to image size and stride. 115 | #each 1x1x1532 feature map is mapping to a tile. 116 | shift_x = np.arange(0, width) * w_stride 117 | shift_y = np.arange(0, height) * h_stride 118 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 119 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), 120 | shift_y.ravel())).transpose() 121 | #apply base anchors to all tiles, to have a num_feature_map*9 anchors. 122 | all_anchors = (base_anchors.reshape((1, 9, 4)) + 123 | shifts.reshape((1, num_feature_map, 4)).transpose((1, 0, 2))) 124 | total_anchors = num_feature_map*9 125 | all_anchors = all_anchors.reshape((total_anchors, 4)) 126 | # feed feature map to pretrained RPN model, get proposal labels and bboxes. 127 | res=rpn_model.predict(feature_map) 128 | scores=res[0] 129 | scores=scores.reshape(-1,1) 130 | deltas=res[1] 131 | deltas=np.reshape(deltas,(-1,4)) 132 | # proposals transform to bbox values (x1, y1, x2, y2) 133 | proposals =bbox_transform_inv(all_anchors, deltas) 134 | proposals = clip_boxes(proposals, (h_w[0],h_w[1])) 135 | # remove small boxes, here threshold is 40 pixel 136 | keep = filter_boxes(proposals, 40) 137 | proposals = proposals[keep, :] 138 | scores = scores[keep] 139 | 140 | # sort socres and only keep top 6000. 141 | pre_nms_topN=6000 142 | order = scores.ravel().argsort()[::-1] 143 | if pre_nms_topN > 0: 144 | order = order[:pre_nms_topN] 145 | proposals = proposals[order, :] 146 | scores = scores[order] 147 | # apply NMS to to 6000, and then keep top 300 148 | post_nms_topN=300 149 | keep = py_cpu_nms(np.hstack((proposals, scores)), 0.7) 150 | if post_nms_topN > 0: 151 | keep = keep[:post_nms_topN] 152 | proposals = proposals[keep, :] 153 | scores = scores[keep] 154 | # add gt_boxes to proposals. 155 | proposals=np.vstack( (proposals, gt_boxes) ) 156 | # calculate overlaps of proposal and gt_boxes 157 | overlaps = bbox_overlaps(proposals, gt_boxes) 158 | gt_assignment = overlaps.argmax(axis=1) 159 | max_overlaps = overlaps.max(axis=1) 160 | # labels = gt_labels[gt_assignment] #? 161 | 162 | # sub sample 163 | fg_inds = np.where(max_overlaps >= FG_THRESH)[0] 164 | fg_rois_per_this_image = min(int(BATCH*FG_FRAC), fg_inds.size) 165 | # Sample foreground regions without replacement 166 | if fg_inds.size > 0: 167 | fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) 168 | bg_inds = np.where((max_overlaps < BG_THRESH_HI) & 169 | (max_overlaps >= BG_THRESH_LO))[0] 170 | bg_rois_per_this_image = BATCH - fg_rois_per_this_image 171 | bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) 172 | # Sample background regions without replacement 173 | if bg_inds.size > 0: 174 | bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) 175 | # The indices that we're selecting (both fg and bg) 176 | keep_inds = np.append(fg_inds, bg_inds) 177 | # Select sampled values from various arrays: 178 | # labels = labels[keep_inds] 179 | rois = proposals[keep_inds] 180 | gt_rois=gt_boxes[gt_assignment[keep_inds]] 181 | targets = bbox_transform(rois, gt_rois)#input rois 182 | rois_num=targets.shape[0] 183 | batch_box=np.zeros((rois_num, 200, 4)) 184 | for i in range(rois_num): 185 | batch_box[i, category] = targets[i] 186 | batch_box = np.reshape(batch_box, (rois_num, -1)) 187 | # get gt category 188 | batch_categories = np.zeros((rois_num, 200, 1)) 189 | for i in range(rois_num): 190 | batch_categories[i, category] = 1 191 | batch_categories = np.reshape(batch_categories, (rois_num, -1)) 192 | return rois, batch_box, batch_categories 193 | 194 | ################## generate data ####################### 195 | ILSVRC_dataset_path='/home/jk/faster_rcnn/' 196 | img_path=ILSVRC_dataset_path+'Data/DET/train/' 197 | anno_path=ILSVRC_dataset_path+'Annotations/DET/train/' 198 | import glob 199 | from multiprocessing import Process, Queue 200 | 201 | def worker(path): 202 | print('worker start ' + path) 203 | batch_rois=[] 204 | batch_featuremap_inds=[] 205 | batch_categories=[] 206 | batch_bboxes=[] 207 | fc_index=0 208 | dataset={} 209 | #'/ImageSets/DET/train_*' 210 | for fname in glob.glob(ILSVRC_dataset_path+path): 211 | print(fname) 212 | with open(fname,'r') as f: 213 | basename = os.path.basename(fname) 214 | category = int(basename.split('_')[1].split('.')[0]) 215 | content=[] 216 | for line in f: 217 | if 'extra' not in line: 218 | content.append(line) 219 | dataset[category]=content 220 | print(len(dataset)) 221 | from random import randint 222 | while 1: 223 | try: 224 | category = randint(1, 200) 225 | content=dataset[category] 226 | n=randint(0,len(content)) 227 | line=content[n] 228 | _, gt_boxes, h_w = parse_label(anno_path+line.split()[0]+'.xml') 229 | if len(gt_boxes)==0: 230 | continue 231 | rois, bboxes, categories = produce_batch(img_path+line.split()[0]+'.JPEG', gt_boxes, h_w, category) 232 | except Exception: 233 | # print('parse label or produce batch failed: for: '+line.split()[0]) 234 | # traceback.print_exc() 235 | continue 236 | if len(rois) <= 0 : 237 | continue 238 | 239 | for i in range(len(rois)): 240 | batch_rois.append(rois[i]) 241 | batch_featuremap_inds.append(fc_index) 242 | batch_categories.append(categories[i]) 243 | batch_bboxes.append(bboxes[i]) 244 | a=feature_map 245 | b=np.asarray(batch_rois) 246 | c=np.asarray(batch_featuremap_inds) 247 | d=np.asarray(batch_categories) 248 | e=np.asarray(batch_bboxes) 249 | f=np.zeros((len(rois),a.shape[1],a.shape[2],a.shape[3])) 250 | f[0]=feature_map[0] 251 | yield [f,b,c], [d,e] 252 | batch_rois=[] 253 | batch_featuremap_inds=[] 254 | batch_categories=[] 255 | batch_bboxes=[] 256 | fc_index=0 257 | 258 | ################## start train ####################### 259 | # model.load_weights('./rcnn_weights_1.hdf5') 260 | from keras.callbacks import ModelCheckpoint 261 | checkpointer = ModelCheckpoint(filepath='./rcnn_weights_2.hdf5', monitor='loss', verbose=1, save_best_only=True) 262 | model.fit_generator(worker('/ImageSets/DET/train_*.txt'), steps_per_epoch=1000, epochs=100, callbacks=[checkpointer]) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This is the code base of my post [Faster R-CNN step by step](https://dongjk.github.io/code/object+detection/keras/2018/05/21/Faster_R-CNN_step_by_step,_Part_I.html) 2 | 3 | In the post, I will implement Faster R-CNN step by step in keras, build a trainable model, and dive into the details of all trick part. -------------------------------------------------------------------------------- /RPN.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | import numpy as np 3 | import numpy.random as npr 4 | from keras.layers import Conv2D 5 | from keras.models import Input, Model 6 | from keras.applications import InceptionResNetV2 7 | from keras.preprocessing.image import load_img, img_to_array 8 | from utils import generate_anchors, draw_anchors, bbox_overlaps, bbox_transform,\ 9 | loss_cls, smoothL1, parse_label, unmap 10 | 11 | k=9 #anchor number for each point 12 | ################## RPN Model ####################### 13 | feature_map_tile = Input(shape=(None,None,1536)) 14 | convolution_3x3 = Conv2D( 15 | filters=512, 16 | kernel_size=(3, 3), 17 | padding='same', 18 | name="3x3" 19 | )(feature_map_tile) 20 | 21 | output_deltas = Conv2D( 22 | filters= 4 * k, 23 | kernel_size=(1, 1), 24 | activation="linear", 25 | kernel_initializer="uniform", 26 | name="deltas1" 27 | )(convolution_3x3) 28 | 29 | output_scores = Conv2D( 30 | filters=1 * k, 31 | kernel_size=(1, 1), 32 | activation="sigmoid", 33 | kernel_initializer="uniform", 34 | name="scores1" 35 | )(convolution_3x3) 36 | 37 | model = Model(inputs=[feature_map_tile], outputs=[output_scores, output_deltas]) 38 | model.compile(optimizer='adam', loss={'scores1':loss_cls, 'deltas1':smoothL1}) 39 | 40 | ################## prepare batch ####################### 41 | BG_FG_FRAC=2 42 | 43 | #load an example to void graph problem 44 | #TODO fix this. 45 | pretrained_model = InceptionResNetV2(include_top=False) 46 | img=load_img("./ILSVRC2014_train_00010391.JPEG") 47 | x = img_to_array(img) 48 | x = np.expand_dims(x, axis=0) 49 | not_used=pretrained_model.predict(x) 50 | 51 | def produce_batch(filepath, gt_boxes, scale): 52 | img=load_img(filepath) 53 | img_width=np.shape(img)[1] * scale[1] 54 | img_height=np.shape(img)[0] * scale[0] 55 | img=img.resize((int(img_width),int(img_height))) 56 | #feed image to pretrained model and get feature map 57 | img = img_to_array(img) 58 | img = np.expand_dims(img, axis=0) 59 | feature_map=pretrained_model.predict(img) 60 | height = np.shape(feature_map)[1] 61 | width = np.shape(feature_map)[2] 62 | num_feature_map=width*height 63 | #calculate output w, h stride 64 | w_stride = img_width / width 65 | h_stride = img_height / height 66 | #generate base anchors according output stride. 67 | #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1) 68 | base_anchors=generate_anchors(w_stride,h_stride) 69 | #slice tiles according to image size and stride. 70 | #each 1x1x1532 feature map is mapping to a tile. 71 | shift_x = np.arange(0, width) * w_stride 72 | shift_y = np.arange(0, height) * h_stride 73 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 74 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), 75 | shift_y.ravel())).transpose() 76 | #apply base anchors to all tiles, to have a num_feature_map*9 anchors. 77 | all_anchors = (base_anchors.reshape((1, 9, 4)) + 78 | shifts.reshape((1, num_feature_map, 4)).transpose((1, 0, 2))) 79 | total_anchors = num_feature_map*9 80 | all_anchors = all_anchors.reshape((total_anchors, 4)) 81 | #only keep anchors inside image+borader. 82 | border=0 83 | inds_inside = np.where( 84 | (all_anchors[:, 0] >= -border) & 85 | (all_anchors[:, 1] >= -border) & 86 | (all_anchors[:, 2] < img_width+border ) & # width 87 | (all_anchors[:, 3] < img_height+border) # height 88 | )[0] 89 | anchors=all_anchors[inds_inside] 90 | # calculate overlaps each anchors to each gt boxes, 91 | # a matrix with shape [len(anchors) x len(gt_boxes)] 92 | overlaps = bbox_overlaps(anchors, gt_boxes) 93 | # find the gt box with biggest overlap to each anchors, 94 | # and the overlap ratio. result (len(anchors),) 95 | argmax_overlaps = overlaps.argmax(axis=1) 96 | max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] 97 | # find the anchor with biggest overlap to each gt boxes, 98 | # and the overlap ratio. result (len(gt_boxes),) 99 | gt_argmax_overlaps = overlaps.argmax(axis=0) 100 | gt_max_overlaps = overlaps[gt_argmax_overlaps, 101 | np.arange(overlaps.shape[1])] 102 | gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] 103 | #labels, 1=fg/0=bg/-1=ignore 104 | labels = np.empty((len(inds_inside), ), dtype=np.float32) 105 | labels.fill(-1) 106 | # set positive label, define in Paper3.1.2: 107 | # We assign a positive label to two kinds of anchors: (i) the 108 | # anchor/anchors with the highest Intersection-overUnion 109 | # (IoU) overlap with a ground-truth box, or (ii) an 110 | # anchor that has an IoU overlap higher than 0.7 with any gt boxes 111 | labels[gt_argmax_overlaps] = 1 112 | labels[max_overlaps >= .7] = 1 113 | # set negative labels 114 | labels[max_overlaps <= .3] = 0 115 | # subsample positive labels if we have too many 116 | # num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE) 117 | fg_inds = np.where(labels == 1)[0] 118 | # if len(fg_inds) > num_fg: 119 | # disable_inds = npr.choice( 120 | # fg_inds, size=(len(fg_inds) - num_fg), replace=False) 121 | # labels[disable_inds] = -1 122 | # subsample negative labels if we have too many 123 | num_bg = int(len(fg_inds) * BG_FG_FRAC) 124 | bg_inds = np.where(labels == 0)[0] 125 | if len(bg_inds) > num_bg: 126 | disable_inds = npr.choice( 127 | bg_inds, size=(len(bg_inds) - num_bg), replace=False) 128 | labels[disable_inds] = -1 129 | # 130 | batch_inds=inds_inside[labels!=-1] 131 | batch_inds=(batch_inds / k).astype(np.int) 132 | full_labels = unmap(labels, total_anchors, inds_inside, fill=-1) 133 | batch_label_targets=full_labels.reshape(-1,1,1,1*k)[batch_inds] 134 | bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) 135 | # bbox_targets = bbox_transform(anchors, gt_boxes[argmax_overlaps, :] 136 | pos_anchors=all_anchors[inds_inside[labels==1]] 137 | bbox_targets = bbox_transform(pos_anchors, gt_boxes[argmax_overlaps, :][labels==1]) 138 | bbox_targets = unmap(bbox_targets, total_anchors, inds_inside[labels==1], fill=0) 139 | batch_bbox_targets = bbox_targets.reshape(-1,1,1,4*k)[batch_inds] 140 | padded_fcmap=np.pad(feature_map,((0,0),(1,1),(1,1),(0,0)),mode='constant') 141 | padded_fcmap=np.squeeze(padded_fcmap) 142 | batch_tiles=[] 143 | for ind in batch_inds: 144 | x = ind % width 145 | y = int(ind/width) 146 | fc_3x3=padded_fcmap[y:y+3,x:x+3,:] 147 | batch_tiles.append(fc_3x3) 148 | return np.asarray(batch_tiles), batch_label_targets.tolist(), batch_bbox_targets.tolist() 149 | 150 | ################## generate data ####################### 151 | ILSVRC_dataset_path='/home/jk/wi/ILSVRC/' 152 | img_path=ILSVRC_dataset_path+'Data/DET/train/' 153 | anno_path=ILSVRC_dataset_path+'/Annotations/DET/train/' 154 | import glob 155 | 156 | BATCH_SIZE=512 157 | def input_generator(): 158 | batch_tiles=[] 159 | batch_labels=[] 160 | batch_bboxes=[] 161 | count=0 162 | while 1: 163 | for fname in glob.glob(ILSVRC_dataset_path+'/ImageSets/DET/train_*'): 164 | with open(fname,'r') as f: 165 | for line in f: 166 | if 'extra' not in line: 167 | try: 168 | category, gt_boxes, scale = parse_label(anno_path+line.split()[0]+'.xml') 169 | if len(gt_boxes)==0: 170 | continue 171 | tiles, labels, bboxes = produce_batch(img_path+line.split()[0]+'.JPEG', gt_boxes, scale) 172 | except Exception: 173 | print('parse label or produce batch failed: for: '+line.split()[0]) 174 | traceback.print_exc() 175 | continue 176 | for i in range(len(tiles)): 177 | batch_tiles.append(tiles[i]) 178 | batch_labels.append(labels[i]) 179 | batch_bboxes.append(bboxes[i]) 180 | if(len(batch_tiles)==BATCH_SIZE): 181 | a=np.asarray(batch_tiles) 182 | b=np.asarray(batch_labels) 183 | c=np.asarray(batch_bboxes) 184 | if not a.any() or not b.any() or not c.any(): 185 | print("empty array found.") 186 | 187 | yield a, [b, c] 188 | batch_tiles=[] 189 | batch_labels=[] 190 | batch_bboxes=[] 191 | 192 | 193 | ################## start train ####################### 194 | from keras.callbacks import ModelCheckpoint 195 | checkpointer = ModelCheckpoint(filepath='./weights.hdf5', verbose=1, save_best_only=True) 196 | model.fit_generator(input_generator(), steps_per_epoch=1000, epochs=800, callbacks=[checkpointer]) 197 | 198 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import keras.backend as K 4 | import xml.etree.ElementTree as ET 5 | from PIL import Image, ImageDraw 6 | 7 | def parse_label(xml_file): 8 | try: 9 | tree = ET.parse(xml_file) 10 | except Exception: 11 | print('Failed to parse: ' + xml_file, file=sys.stderr) 12 | return None 13 | root = tree.getroot() 14 | w_scale=1 15 | h_scale=1 16 | width=0 17 | height=0 18 | for x in root.iter('width'): 19 | width=int(x.text) 20 | if width < 333: 21 | width=333 22 | w_scale=333/float(x.text) 23 | for x in root.iter('height'): 24 | height==int(x.text) 25 | if height < 333: 26 | height=333 27 | h_scale=333/float(x.text) 28 | category=[] 29 | xmin=[] 30 | ymin=[] 31 | xmax=[] 32 | ymax=[] 33 | for x in root.iter('name'): 34 | category.append(x.text) 35 | for x in root.iter('xmin'): 36 | xmin.append(int(x.text)*w_scale) 37 | for x in root.iter('ymin'): 38 | ymin.append(int(x.text)*h_scale) 39 | for x in root.iter('xmax'): 40 | xmax.append(int(x.text)*w_scale) 41 | for x in root.iter('ymax'): 42 | ymax.append(int(x.text)*h_scale) 43 | gt_boxes=[list(box) for box in zip(xmin,ymin,xmax,ymax)] 44 | return category, np.asarray(gt_boxes, np.float), (h_scale,w_scale) 45 | 46 | 47 | def loss_cls(y_true, y_pred): 48 | condition = K.not_equal(y_true, -1) 49 | indices = K.tf.where(condition) 50 | 51 | target = K.tf.gather_nd(y_true, indices) 52 | output = K.tf.gather_nd(y_pred, indices) 53 | loss = K.binary_crossentropy(target, output) 54 | return K.mean(loss) 55 | 56 | 57 | def smoothL1(y_true, y_pred): 58 | nd=K.tf.where(K.tf.not_equal(y_true,0)) 59 | y_true=K.tf.gather_nd(y_true,nd) 60 | y_pred=K.tf.gather_nd(y_pred,nd) 61 | x = K.tf.losses.huber_loss(y_true,y_pred) 62 | # x = K.switch(x < HUBER_DELTA, 0.5 * x ** 2, HUBER_DELTA * (x - 0.5 * HUBER_DELTA)) 63 | return x 64 | 65 | 66 | def draw_anchors(img_path, anchors, pad_size=50): 67 | im = Image.open(img_path) 68 | w,h=im.size 69 | a4im = Image.new('RGB', 70 | (w+2*pad_size, h+2*pad_size), # A4 at 72dpi 71 | (255, 255, 255)) # White 72 | a4im.paste(im, (pad_size,pad_size)) # Not centered, top-left corner 73 | for a in anchors: 74 | a=(a+pad_size).astype(int).tolist() 75 | draw = ImageDraw.Draw(a4im) 76 | draw.rectangle(a,outline=(255,0,0), fill=None) 77 | return a4im 78 | 79 | def generate_anchors(base_width=16, base_height=16, ratios=[0.5, 1, 2], 80 | scales=np.asarray([3,6,12])): 81 | """ 82 | Generate anchor (reference) windows by enumerating aspect ratios X 83 | scales wrt a reference (0, 0, w_stride-1, h_stride-1) window. 84 | """ 85 | 86 | base_anchor = np.array([1, 1, base_width, base_height]) - 1 87 | ratio_anchors = _ratio_enum(base_anchor, ratios) 88 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 89 | for i in range(ratio_anchors.shape[0])]) 90 | return anchors 91 | 92 | def _whctrs(anchor): 93 | """ 94 | Return width, height, x center, and y center for an anchor (window). 95 | """ 96 | 97 | w = anchor[2] - anchor[0] + 1 98 | h = anchor[3] - anchor[1] + 1 99 | x_ctr = anchor[0] + 0.5 * (w - 1) 100 | y_ctr = anchor[1] + 0.5 * (h - 1) 101 | return w, h, x_ctr, y_ctr 102 | 103 | def _mkanchors(ws, hs, x_ctr, y_ctr): 104 | """ 105 | Given a vector of widths (ws) and heights (hs) around a center 106 | (x_ctr, y_ctr), output a set of anchors (windows). 107 | """ 108 | 109 | ws = ws[:, np.newaxis] 110 | hs = hs[:, np.newaxis] 111 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 112 | y_ctr - 0.5 * (hs - 1), 113 | x_ctr + 0.5 * (ws - 1), 114 | y_ctr + 0.5 * (hs - 1))) 115 | return anchors 116 | 117 | def _ratio_enum(anchor, ratios): 118 | """ 119 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 120 | """ 121 | 122 | w, h, x_ctr, y_ctr = _whctrs(anchor) 123 | size = w * h 124 | size_ratios = size / ratios 125 | ws = np.round(np.sqrt(size_ratios)) 126 | hs = np.round(ws * ratios) 127 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 128 | return anchors 129 | 130 | def _scale_enum(anchor, scales): 131 | """ 132 | Enumerate a set of anchors for each scale wrt an anchor. 133 | """ 134 | 135 | w, h, x_ctr, y_ctr = _whctrs(anchor) 136 | ws = w * scales 137 | hs = h * scales 138 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 139 | return anchors 140 | 141 | def clip_boxes(boxes, im_shape): 142 | """ 143 | Clip boxes to image boundaries. 144 | """ 145 | 146 | # x1 >= 0 147 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 148 | # y1 >= 0 149 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 150 | # x2 < im_shape[1] 151 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 152 | # y2 < im_shape[0] 153 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 154 | return boxes 155 | 156 | def filter_boxes(boxes, min_size): 157 | """Remove all boxes with any side smaller than min_size.""" 158 | ws = boxes[:, 2] - boxes[:, 0] + 1 159 | hs = boxes[:, 3] - boxes[:, 1] + 1 160 | keep = np.where((ws >= min_size) & (hs >= min_size))[0] 161 | return keep 162 | 163 | def py_cpu_nms(dets, thresh): 164 | """Pure Python NMS baseline.""" 165 | x1 = dets[:, 0] 166 | y1 = dets[:, 1] 167 | x2 = dets[:, 2] 168 | y2 = dets[:, 3] 169 | scores = dets[:, 4] 170 | 171 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 172 | order = scores.argsort()[::-1] 173 | 174 | keep = [] 175 | while order.size > 0: 176 | i = order[0] 177 | keep.append(i) 178 | xx1 = np.maximum(x1[i], x1[order[1:]]) 179 | yy1 = np.maximum(y1[i], y1[order[1:]]) 180 | xx2 = np.minimum(x2[i], x2[order[1:]]) 181 | yy2 = np.minimum(y2[i], y2[order[1:]]) 182 | 183 | w = np.maximum(0.0, xx2 - xx1 + 1) 184 | h = np.maximum(0.0, yy2 - yy1 + 1) 185 | inter = w * h 186 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 187 | 188 | inds = np.where(ovr <= thresh)[0] 189 | order = order[inds + 1] 190 | 191 | return keep 192 | 193 | 194 | def bbox_transform(ex_rois, gt_rois): 195 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 196 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 197 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 198 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 199 | 200 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 201 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 202 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 203 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 204 | 205 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 206 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 207 | targets_dw = np.log(gt_widths / ex_widths) 208 | targets_dh = np.log(gt_heights / ex_heights) 209 | 210 | targets = np.stack((targets_dx, targets_dy, targets_dw, targets_dh)) 211 | 212 | targets = np.transpose(targets) 213 | 214 | return targets 215 | 216 | 217 | def bbox_transform_inv(boxes, deltas): 218 | if boxes.shape[0] == 0: 219 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) 220 | 221 | boxes = boxes.astype(deltas.dtype, copy=False) 222 | 223 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 224 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 225 | ctr_x = boxes[:, 0] + 0.5 * widths 226 | ctr_y = boxes[:, 1] + 0.5 * heights 227 | 228 | dx = deltas[:, 0::4] 229 | dy = deltas[:, 1::4] 230 | dw = deltas[:, 2::4] 231 | dh = deltas[:, 3::4] 232 | 233 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 234 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 235 | pred_w = np.exp(dw) * widths[:, np.newaxis] 236 | pred_h = np.exp(dh) * heights[:, np.newaxis] 237 | 238 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) 239 | # x1 240 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 241 | # y1 242 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 243 | # x2 244 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w 245 | # y2 246 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h 247 | 248 | return pred_boxes 249 | 250 | def bbox_overlaps(boxes, query_boxes): 251 | """ 252 | Parameters 253 | ---------- 254 | boxes: (N, 4) ndarray of float 255 | query_boxes: (K, 4) ndarray of float 256 | Returns 257 | ------- 258 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 259 | """ 260 | boxes=boxes.astype(int) 261 | N = boxes.shape[0] 262 | K = query_boxes.shape[0] 263 | 264 | overlaps = np.zeros((N, K), dtype=np.float) 265 | 266 | for k in range(K): 267 | box_area = ((query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1)) 268 | for n in range(N): 269 | iw = (min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1) 270 | if iw > 0: 271 | ih = (min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1) 272 | 273 | if ih > 0: 274 | ua = float((boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1) + box_area - iw * ih) 275 | overlaps[n, k] = iw * ih / ua 276 | 277 | return overlaps 278 | 279 | def unmap(data, count, inds, fill=0): 280 | """ Unmap a subset of item (data) back to the original set of items (of 281 | size count) """ 282 | if len(data.shape) == 1: 283 | ret = np.empty((count, ), dtype=np.float32) 284 | ret.fill(fill) 285 | ret[inds] = data 286 | else: 287 | ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) 288 | ret.fill(fill) 289 | ret[inds, :] = data 290 | return ret 291 | --------------------------------------------------------------------------------