├── ILSVRC2014_train_00010391.JPEG
├── RCNN.py
├── README.md
├── RPN.py
└── utils.py


/ILSVRC2014_train_00010391.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongjk/faster_rcnn_keras/4c01554ba2bf494badd50e9e22a7e7e65046f5b8/ILSVRC2014_train_00010391.JPEG


--------------------------------------------------------------------------------
/RCNN.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import traceback
  3 | import numpy as np
  4 | import numpy.random as npr
  5 | import tensorflow as tf
  6 | import keras.backend as K
  7 | from keras.models import load_model
  8 | from keras.layers import Conv2D, TimeDistributed, Flatten, Dense, BatchNormalization
  9 | from keras.models import Input, Model, Layer
 10 | from keras.applications import InceptionResNetV2
 11 | from keras.preprocessing.image import load_img, img_to_array
 12 | from utils import generate_anchors, draw_anchors, bbox_overlaps, bbox_transform,\
 13 |                     loss_cls, smoothL1, parse_label, unmap, filter_boxes, \
 14 |                     clip_boxes, py_cpu_nms, bbox_transform_inv
 15 | 
 16 | 
 17 | ##################  R-CNN Model  #######################
 18 | # RoI Pooling layer
 19 | class RoIPooling(Layer):
 20 |     def __init__(self, size=(7, 7)):
 21 |         self.size = size
 22 |         super(RoIPooling, self).__init__()
 23 | 
 24 |     def build(self, input_shape):
 25 |         self.shape = input_shape
 26 |         super(RoIPooling, self).build(input_shape)
 27 | 
 28 |     def call(self, inputs, **kwargs):
 29 |         ind=K.reshape(inputs[2],(-1,))
 30 |         x = K.tf.image.crop_and_resize(inputs[0], inputs[1], ind, self.size)
 31 |         return x
 32 | 
 33 |     def compute_output_shape(self, input_shape):
 34 |         a=input_shape[1][0]
 35 |         b=self.size[0]
 36 |         c=self.size[1]
 37 |         d=input_shape[0][3]
 38 |         return (a,b,c,d)
 39 | 
 40 | 
 41 | BATCH=256
 42 | 
 43 | feature_map=Input(batch_shape=(None,None,None,1536))
 44 | rois=Input(batch_shape=(None, 4))
 45 | ind=Input(batch_shape=(None, 1),dtype='int32')
 46 | 
 47 | p1=RoIPooling()([feature_map, rois, ind])
 48 | 
 49 | flat1 = Flatten()(p1)
 50 | 
 51 | 
 52 | fc1 = Dense(
 53 |         units=1024,
 54 |         activation="relu",
 55 |         name="fc2"
 56 |     )(flat1)
 57 | fc1=BatchNormalization()(fc1)
 58 | output_deltas = Dense(
 59 |         units=4 * 200,
 60 |         activation="linear",
 61 |         kernel_initializer="uniform",
 62 |         name="deltas2"
 63 |     )(fc1)
 64 | 
 65 | output_scores = Dense(
 66 |         units=1 * 200,
 67 |         activation="softmax",
 68 |         kernel_initializer="uniform",
 69 |         name="scores2"
 70 |     )(fc1)
 71 | 
 72 | model=Model(inputs=[feature_map, rois, ind],outputs=[output_scores,output_deltas])
 73 | model.summary()
 74 | model.compile(optimizer='rmsprop',
 75 |             loss={'deltas2':smoothL1, 'scores2':'categorical_crossentropy'})
 76 | 
 77 | ##################  prepare batch  #######################
 78 | 
 79 | FG_FRAC=.25
 80 | FG_THRESH=.5
 81 | BG_THRESH_HI=.5
 82 | BG_THRESH_LO=.1
 83 | 
 84 | #load an example to void graph problem
 85 | #TODO fix this.
 86 | pretrained_model = InceptionResNetV2(include_top=False)
 87 | img=load_img("./ILSVRC2014_train_00010391.JPEG")
 88 | x = img_to_array(img)
 89 | x = np.expand_dims(x, axis=0)
 90 | not_used=pretrained_model.predict(x)
 91 | 
 92 | rpn_model = load_model('weights.hdf5',
 93 |             custom_objects={'loss_cls': loss_cls,'smoothL1':smoothL1})
 94 | not_used=rpn_model.predict(np.load('n02676566_6914')['fc'])
 95 | 
 96 | def produce_batch(filepath, gt_boxes, h_w, category):
 97 |     img=load_img(filepath)
 98 |     img_width=np.shape(img)[1] * scale[1]
 99 |     img_height=np.shape(img)[0] * scale[0]
100 |     img=img.resize((int(img_width),int(img_height)))
101 |     #feed image to pretrained model and get feature map
102 |     img = img_to_array(img)
103 |     img = np.expand_dims(img, axis=0)
104 |     feature_map=pretrained_model.predict(img)
105 |     height = np.shape(feature_map)[1]
106 |     width = np.shape(feature_map)[2]
107 |     num_feature_map=width*height
108 |     #calculate output w, h stride
109 |     w_stride = h_w[1] / width
110 |     h_stride = h_w[0] / height
111 |     #generate base anchors according output stride.
112 |     #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1)
113 |     base_anchors=generate_anchors(w_stride,h_stride)
114 |     #slice tiles according to image size and stride.
115 |     #each 1x1x1532 feature map is mapping to a tile.
116 |     shift_x = np.arange(0, width) * w_stride
117 |     shift_y = np.arange(0, height) * h_stride
118 |     shift_x, shift_y = np.meshgrid(shift_x, shift_y)
119 |     shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
120 |                             shift_y.ravel())).transpose()
121 |     #apply base anchors to all tiles, to have a num_feature_map*9 anchors.
122 |     all_anchors = (base_anchors.reshape((1, 9, 4)) +
123 |                     shifts.reshape((1, num_feature_map, 4)).transpose((1, 0, 2)))
124 |     total_anchors = num_feature_map*9
125 |     all_anchors = all_anchors.reshape((total_anchors, 4))
126 |     # feed feature map to pretrained RPN model, get proposal labels and bboxes.
127 |     res=rpn_model.predict(feature_map)
128 |     scores=res[0]
129 |     scores=scores.reshape(-1,1)
130 |     deltas=res[1]
131 |     deltas=np.reshape(deltas,(-1,4))
132 |     # proposals transform to bbox values (x1, y1, x2, y2)
133 |     proposals =bbox_transform_inv(all_anchors, deltas)
134 |     proposals = clip_boxes(proposals, (h_w[0],h_w[1]))
135 |     # remove small boxes, here threshold is 40 pixel
136 |     keep = filter_boxes(proposals, 40)
137 |     proposals = proposals[keep, :]
138 |     scores = scores[keep]
139 | 
140 |     # sort socres and only keep top 6000.
141 |     pre_nms_topN=6000
142 |     order = scores.ravel().argsort()[::-1]
143 |     if pre_nms_topN > 0:
144 |         order = order[:pre_nms_topN]
145 |     proposals = proposals[order, :]
146 |     scores = scores[order]
147 |     # apply NMS to to 6000, and then keep top 300
148 |     post_nms_topN=300
149 |     keep = py_cpu_nms(np.hstack((proposals, scores)), 0.7)
150 |     if post_nms_topN > 0:
151 |         keep = keep[:post_nms_topN]
152 |     proposals = proposals[keep, :]
153 |     scores = scores[keep]
154 |     # add gt_boxes to proposals.
155 |     proposals=np.vstack( (proposals, gt_boxes) )
156 |     # calculate overlaps of proposal and gt_boxes
157 |     overlaps = bbox_overlaps(proposals, gt_boxes)
158 |     gt_assignment = overlaps.argmax(axis=1)
159 |     max_overlaps = overlaps.max(axis=1)
160 |     # labels = gt_labels[gt_assignment] #?
161 | 
162 |     # sub sample
163 |     fg_inds = np.where(max_overlaps >= FG_THRESH)[0]
164 |     fg_rois_per_this_image = min(int(BATCH*FG_FRAC), fg_inds.size)
165 |     # Sample foreground regions without replacement
166 |     if fg_inds.size > 0:
167 |         fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
168 |     bg_inds = np.where((max_overlaps < BG_THRESH_HI) &
169 |                        (max_overlaps >= BG_THRESH_LO))[0]
170 |     bg_rois_per_this_image = BATCH - fg_rois_per_this_image
171 |     bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
172 |     # Sample background regions without replacement
173 |     if bg_inds.size > 0:
174 |         bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
175 |     # The indices that we're selecting (both fg and bg)
176 |     keep_inds = np.append(fg_inds, bg_inds)
177 |     # Select sampled values from various arrays:
178 |     # labels = labels[keep_inds]
179 |     rois = proposals[keep_inds]
180 |     gt_rois=gt_boxes[gt_assignment[keep_inds]]
181 |     targets = bbox_transform(rois, gt_rois)#input rois
182 |     rois_num=targets.shape[0]
183 |     batch_box=np.zeros((rois_num, 200, 4))
184 |     for i in range(rois_num):
185 |         batch_box[i, category] = targets[i]
186 |     batch_box = np.reshape(batch_box, (rois_num, -1))
187 |     # get gt category
188 |     batch_categories = np.zeros((rois_num, 200, 1))
189 |     for i in range(rois_num):
190 |         batch_categories[i, category] = 1
191 |     batch_categories = np.reshape(batch_categories, (rois_num, -1))
192 |     return rois, batch_box, batch_categories
193 | 
194 | ##################  generate data  #######################
195 | ILSVRC_dataset_path='/home/jk/faster_rcnn/'
196 | img_path=ILSVRC_dataset_path+'Data/DET/train/'
197 | anno_path=ILSVRC_dataset_path+'Annotations/DET/train/'
198 | import glob
199 | from multiprocessing import Process, Queue
200 | 
201 | def worker(path):
202 |     print('worker start ' + path)
203 |     batch_rois=[]
204 |     batch_featuremap_inds=[]
205 |     batch_categories=[]
206 |     batch_bboxes=[]
207 |     fc_index=0
208 |     dataset={}
209 |     #'/ImageSets/DET/train_*'
210 |     for fname in glob.glob(ILSVRC_dataset_path+path):
211 |         print(fname)
212 |         with open(fname,'r') as f:
213 |             basename = os.path.basename(fname)
214 |             category = int(basename.split('_')[1].split('.')[0])
215 |             content=[]
216 |             for line in f:
217 |                 if 'extra' not in line:
218 |                     content.append(line)
219 |             dataset[category]=content
220 |     print(len(dataset))
221 |     from random import randint
222 |     while 1:
223 |         try:
224 |             category = randint(1, 200)
225 |             content=dataset[category]
226 |             n=randint(0,len(content))
227 |             line=content[n]
228 |             _, gt_boxes, h_w = parse_label(anno_path+line.split()[0]+'.xml')
229 |             if len(gt_boxes)==0:
230 |                 continue
231 |             rois, bboxes, categories = produce_batch(img_path+line.split()[0]+'.JPEG', gt_boxes, h_w, category)
232 |         except Exception:
233 |             # print('parse label or produce batch failed: for: '+line.split()[0])
234 |             # traceback.print_exc()
235 |             continue
236 |         if len(rois) <= 0 :
237 |             continue
238 | 
239 |         for i in range(len(rois)):
240 |             batch_rois.append(rois[i])
241 |             batch_featuremap_inds.append(fc_index)
242 |             batch_categories.append(categories[i])
243 |             batch_bboxes.append(bboxes[i])
244 |         a=feature_map
245 |         b=np.asarray(batch_rois)
246 |         c=np.asarray(batch_featuremap_inds)
247 |         d=np.asarray(batch_categories)
248 |         e=np.asarray(batch_bboxes)
249 |         f=np.zeros((len(rois),a.shape[1],a.shape[2],a.shape[3]))
250 |         f[0]=feature_map[0]
251 |         yield [f,b,c], [d,e]
252 |         batch_rois=[]
253 |         batch_featuremap_inds=[]
254 |         batch_categories=[]
255 |         batch_bboxes=[]
256 |         fc_index=0
257 | 
258 | ##################   start train   #######################
259 | # model.load_weights('./rcnn_weights_1.hdf5')
260 | from keras.callbacks import ModelCheckpoint
261 | checkpointer = ModelCheckpoint(filepath='./rcnn_weights_2.hdf5', monitor='loss', verbose=1, save_best_only=True)
262 | model.fit_generator(worker('/ImageSets/DET/train_*.txt'), steps_per_epoch=1000, epochs=100, callbacks=[checkpointer])


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | This is the code base of my post  [Faster R-CNN step by step](https://dongjk.github.io/code/object+detection/keras/2018/05/21/Faster_R-CNN_step_by_step,_Part_I.html)
2 | 
3 | In the post, I will implement Faster R-CNN step by step in keras, build a trainable model, and dive into the details of all trick part.


--------------------------------------------------------------------------------
/RPN.py:
--------------------------------------------------------------------------------
  1 | import traceback
  2 | import numpy as np
  3 | import numpy.random as npr
  4 | from keras.layers import Conv2D
  5 | from keras.models import Input, Model
  6 | from keras.applications import InceptionResNetV2
  7 | from keras.preprocessing.image import load_img, img_to_array
  8 | from utils import generate_anchors, draw_anchors, bbox_overlaps, bbox_transform,\
  9 |                     loss_cls, smoothL1, parse_label, unmap
 10 | 
 11 | k=9 #anchor number for each point
 12 | ##################  RPN Model  #######################
 13 | feature_map_tile = Input(shape=(None,None,1536))
 14 | convolution_3x3 = Conv2D(
 15 |     filters=512,
 16 |     kernel_size=(3, 3),
 17 |     padding='same',
 18 |     name="3x3"
 19 | )(feature_map_tile)
 20 | 
 21 | output_deltas = Conv2D(
 22 |     filters= 4 * k,
 23 |     kernel_size=(1, 1),
 24 |     activation="linear",
 25 |     kernel_initializer="uniform",
 26 |     name="deltas1"
 27 | )(convolution_3x3)
 28 | 
 29 | output_scores = Conv2D(
 30 |     filters=1 * k,
 31 |     kernel_size=(1, 1),
 32 |     activation="sigmoid",
 33 |     kernel_initializer="uniform",
 34 |     name="scores1"
 35 | )(convolution_3x3)
 36 | 
 37 | model = Model(inputs=[feature_map_tile], outputs=[output_scores, output_deltas])
 38 | model.compile(optimizer='adam', loss={'scores1':loss_cls, 'deltas1':smoothL1})
 39 | 
 40 | ##################  prepare batch  #######################
 41 | BG_FG_FRAC=2
 42 | 
 43 | #load an example to void graph problem
 44 | #TODO fix this.
 45 | pretrained_model = InceptionResNetV2(include_top=False)
 46 | img=load_img("./ILSVRC2014_train_00010391.JPEG")
 47 | x = img_to_array(img)
 48 | x = np.expand_dims(x, axis=0)
 49 | not_used=pretrained_model.predict(x)
 50 | 
 51 | def produce_batch(filepath, gt_boxes, scale):
 52 |     img=load_img(filepath)
 53 |     img_width=np.shape(img)[1] * scale[1]
 54 |     img_height=np.shape(img)[0] * scale[0]
 55 |     img=img.resize((int(img_width),int(img_height)))
 56 |     #feed image to pretrained model and get feature map
 57 |     img = img_to_array(img)
 58 |     img = np.expand_dims(img, axis=0)
 59 |     feature_map=pretrained_model.predict(img)
 60 |     height = np.shape(feature_map)[1]
 61 |     width = np.shape(feature_map)[2]
 62 |     num_feature_map=width*height
 63 |     #calculate output w, h stride
 64 |     w_stride = img_width / width
 65 |     h_stride = img_height / height
 66 |     #generate base anchors according output stride.
 67 |     #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1)
 68 |     base_anchors=generate_anchors(w_stride,h_stride)
 69 |     #slice tiles according to image size and stride.
 70 |     #each 1x1x1532 feature map is mapping to a tile.
 71 |     shift_x = np.arange(0, width) * w_stride
 72 |     shift_y = np.arange(0, height) * h_stride
 73 |     shift_x, shift_y = np.meshgrid(shift_x, shift_y)
 74 |     shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
 75 |                             shift_y.ravel())).transpose()
 76 |     #apply base anchors to all tiles, to have a num_feature_map*9 anchors.
 77 |     all_anchors = (base_anchors.reshape((1, 9, 4)) +
 78 |                     shifts.reshape((1, num_feature_map, 4)).transpose((1, 0, 2)))
 79 |     total_anchors = num_feature_map*9
 80 |     all_anchors = all_anchors.reshape((total_anchors, 4))
 81 |     #only keep anchors inside image+borader.
 82 |     border=0
 83 |     inds_inside = np.where(
 84 |             (all_anchors[:, 0] >= -border) &
 85 |             (all_anchors[:, 1] >= -border) &
 86 |             (all_anchors[:, 2] < img_width+border ) &  # width
 87 |             (all_anchors[:, 3] < img_height+border)    # height
 88 |     )[0]
 89 |     anchors=all_anchors[inds_inside]
 90 |     # calculate overlaps each anchors to each gt boxes,
 91 |     # a matrix with shape [len(anchors) x len(gt_boxes)]
 92 |     overlaps = bbox_overlaps(anchors, gt_boxes)
 93 |     # find the gt box with biggest overlap to each anchors,
 94 |     # and the overlap ratio. result (len(anchors),)
 95 |     argmax_overlaps = overlaps.argmax(axis=1)
 96 |     max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
 97 |     # find the anchor with biggest overlap to each gt boxes,
 98 |     # and the overlap ratio. result (len(gt_boxes),)
 99 |     gt_argmax_overlaps = overlaps.argmax(axis=0)
100 |     gt_max_overlaps = overlaps[gt_argmax_overlaps,
101 |                                 np.arange(overlaps.shape[1])]
102 |     gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
103 |     #labels, 1=fg/0=bg/-1=ignore
104 |     labels = np.empty((len(inds_inside), ), dtype=np.float32)
105 |     labels.fill(-1)
106 |     # set positive label, define in Paper3.1.2:
107 |     # We assign a positive label to two kinds of anchors: (i) the
108 |     # anchor/anchors with the highest Intersection-overUnion
109 |     # (IoU) overlap with a ground-truth box, or (ii) an
110 |     # anchor that has an IoU overlap higher than 0.7 with any gt boxes
111 |     labels[gt_argmax_overlaps] = 1
112 |     labels[max_overlaps >= .7] = 1
113 |     # set negative labels
114 |     labels[max_overlaps <= .3] = 0
115 |     # subsample positive labels if we have too many
116 | #     num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE)
117 |     fg_inds = np.where(labels == 1)[0]
118 | #     if len(fg_inds) > num_fg:
119 | #         disable_inds = npr.choice(
120 | #             fg_inds, size=(len(fg_inds) - num_fg), replace=False)
121 | #         labels[disable_inds] = -1
122 |     # subsample negative labels if we have too many
123 |     num_bg = int(len(fg_inds) * BG_FG_FRAC)
124 |     bg_inds = np.where(labels == 0)[0]
125 |     if len(bg_inds) > num_bg:
126 |         disable_inds = npr.choice(
127 |             bg_inds, size=(len(bg_inds) - num_bg), replace=False)
128 |         labels[disable_inds] = -1
129 |     #
130 |     batch_inds=inds_inside[labels!=-1]
131 |     batch_inds=(batch_inds / k).astype(np.int)
132 |     full_labels = unmap(labels, total_anchors, inds_inside, fill=-1)
133 |     batch_label_targets=full_labels.reshape(-1,1,1,1*k)[batch_inds]
134 |     bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
135 |     # bbox_targets = bbox_transform(anchors, gt_boxes[argmax_overlaps, :]
136 |     pos_anchors=all_anchors[inds_inside[labels==1]]
137 |     bbox_targets = bbox_transform(pos_anchors, gt_boxes[argmax_overlaps, :][labels==1])
138 |     bbox_targets = unmap(bbox_targets, total_anchors, inds_inside[labels==1], fill=0)
139 |     batch_bbox_targets = bbox_targets.reshape(-1,1,1,4*k)[batch_inds]
140 |     padded_fcmap=np.pad(feature_map,((0,0),(1,1),(1,1),(0,0)),mode='constant')
141 |     padded_fcmap=np.squeeze(padded_fcmap)
142 |     batch_tiles=[]
143 |     for ind in batch_inds:
144 |         x = ind % width
145 |         y = int(ind/width)
146 |         fc_3x3=padded_fcmap[y:y+3,x:x+3,:]
147 |         batch_tiles.append(fc_3x3)
148 |     return np.asarray(batch_tiles), batch_label_targets.tolist(), batch_bbox_targets.tolist()
149 | 
150 | ##################  generate data  #######################
151 | ILSVRC_dataset_path='/home/jk/wi/ILSVRC/'
152 | img_path=ILSVRC_dataset_path+'Data/DET/train/'
153 | anno_path=ILSVRC_dataset_path+'/Annotations/DET/train/'
154 | import glob
155 | 
156 | BATCH_SIZE=512
157 | def input_generator():
158 |     batch_tiles=[]
159 |     batch_labels=[]
160 |     batch_bboxes=[]
161 |     count=0
162 |     while 1:
163 |         for fname in glob.glob(ILSVRC_dataset_path+'/ImageSets/DET/train_*'):
164 |             with open(fname,'r') as f:
165 |                 for line in f:
166 |                     if 'extra' not in line:
167 |                         try:
168 |                             category, gt_boxes, scale = parse_label(anno_path+line.split()[0]+'.xml')
169 |                             if len(gt_boxes)==0:
170 |                                 continue
171 |                             tiles, labels, bboxes = produce_batch(img_path+line.split()[0]+'.JPEG', gt_boxes, scale)
172 |                         except Exception:
173 |                             print('parse label or produce batch failed: for: '+line.split()[0])
174 |                             traceback.print_exc()
175 |                             continue
176 |                         for i in range(len(tiles)):
177 |                             batch_tiles.append(tiles[i])
178 |                             batch_labels.append(labels[i])
179 |                             batch_bboxes.append(bboxes[i])
180 |                             if(len(batch_tiles)==BATCH_SIZE):
181 |                                 a=np.asarray(batch_tiles)
182 |                                 b=np.asarray(batch_labels)
183 |                                 c=np.asarray(batch_bboxes)
184 |                                 if not a.any() or not b.any() or not c.any():
185 |                                     print("empty array found.")
186 | 
187 |                                 yield a, [b, c]
188 |                                 batch_tiles=[]
189 |                                 batch_labels=[]
190 |                                 batch_bboxes=[]
191 | 
192 | 
193 | ##################   start train   #######################
194 | from keras.callbacks import ModelCheckpoint
195 | checkpointer = ModelCheckpoint(filepath='./weights.hdf5', verbose=1, save_best_only=True)
196 | model.fit_generator(input_generator(), steps_per_epoch=1000, epochs=800, callbacks=[checkpointer])
197 | 
198 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import numpy as np
  3 | import keras.backend as K
  4 | import xml.etree.ElementTree as ET
  5 | from PIL import Image, ImageDraw
  6 | 
  7 | def parse_label(xml_file):
  8 |     try:
  9 |         tree = ET.parse(xml_file)
 10 |     except Exception:
 11 |         print('Failed to parse: ' + xml_file, file=sys.stderr)
 12 |         return None
 13 |     root = tree.getroot()
 14 |     w_scale=1
 15 |     h_scale=1
 16 |     width=0
 17 |     height=0
 18 |     for x in root.iter('width'):
 19 |         width=int(x.text)
 20 |         if width < 333:
 21 |             width=333
 22 |             w_scale=333/float(x.text)
 23 |     for x in root.iter('height'):
 24 |         height==int(x.text)
 25 |         if height < 333:
 26 |             height=333
 27 |             h_scale=333/float(x.text)
 28 |     category=[]
 29 |     xmin=[]
 30 |     ymin=[]
 31 |     xmax=[]
 32 |     ymax=[]
 33 |     for x in root.iter('name'):
 34 |         category.append(x.text)
 35 |     for x in root.iter('xmin'):
 36 |         xmin.append(int(x.text)*w_scale)
 37 |     for x in root.iter('ymin'):
 38 |         ymin.append(int(x.text)*h_scale)
 39 |     for x in root.iter('xmax'):
 40 |         xmax.append(int(x.text)*w_scale)
 41 |     for x in root.iter('ymax'):
 42 |         ymax.append(int(x.text)*h_scale)
 43 |     gt_boxes=[list(box) for box in zip(xmin,ymin,xmax,ymax)]
 44 |     return category, np.asarray(gt_boxes, np.float), (h_scale,w_scale)
 45 | 
 46 | 
 47 | def loss_cls(y_true, y_pred):
 48 |     condition = K.not_equal(y_true, -1)
 49 |     indices = K.tf.where(condition)
 50 | 
 51 |     target = K.tf.gather_nd(y_true, indices)
 52 |     output = K.tf.gather_nd(y_pred, indices)
 53 |     loss = K.binary_crossentropy(target, output)
 54 |     return K.mean(loss)
 55 | 
 56 | 
 57 | def smoothL1(y_true, y_pred):
 58 |     nd=K.tf.where(K.tf.not_equal(y_true,0))
 59 |     y_true=K.tf.gather_nd(y_true,nd)
 60 |     y_pred=K.tf.gather_nd(y_pred,nd)
 61 |     x = K.tf.losses.huber_loss(y_true,y_pred)
 62 | #     x   = K.switch(x < HUBER_DELTA, 0.5 * x ** 2, HUBER_DELTA * (x - 0.5 * HUBER_DELTA))
 63 |     return x
 64 | 
 65 | 
 66 | def draw_anchors(img_path, anchors, pad_size=50):
 67 |     im = Image.open(img_path)
 68 |     w,h=im.size
 69 |     a4im = Image.new('RGB',
 70 |                     (w+2*pad_size, h+2*pad_size),   # A4 at 72dpi
 71 |                     (255, 255, 255))  # White
 72 |     a4im.paste(im, (pad_size,pad_size))  # Not centered, top-left corner
 73 |     for a in anchors:
 74 |         a=(a+pad_size).astype(int).tolist()
 75 |         draw = ImageDraw.Draw(a4im)
 76 |         draw.rectangle(a,outline=(255,0,0), fill=None)
 77 |     return a4im
 78 | 
 79 | def generate_anchors(base_width=16, base_height=16, ratios=[0.5, 1, 2],
 80 |                      scales=np.asarray([3,6,12])):
 81 |     """
 82 |     Generate anchor (reference) windows by enumerating aspect ratios X
 83 |     scales wrt a reference (0, 0, w_stride-1, h_stride-1) window.
 84 |     """
 85 | 
 86 |     base_anchor = np.array([1, 1, base_width, base_height]) - 1
 87 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 88 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 89 |                          for i in range(ratio_anchors.shape[0])])
 90 |     return anchors
 91 | 
 92 | def _whctrs(anchor):
 93 |     """
 94 |     Return width, height, x center, and y center for an anchor (window).
 95 |     """
 96 | 
 97 |     w = anchor[2] - anchor[0] + 1
 98 |     h = anchor[3] - anchor[1] + 1
 99 |     x_ctr = anchor[0] + 0.5 * (w - 1)
100 |     y_ctr = anchor[1] + 0.5 * (h - 1)
101 |     return w, h, x_ctr, y_ctr
102 | 
103 | def _mkanchors(ws, hs, x_ctr, y_ctr):
104 |     """
105 |     Given a vector of widths (ws) and heights (hs) around a center
106 |     (x_ctr, y_ctr), output a set of anchors (windows).
107 |     """
108 | 
109 |     ws = ws[:, np.newaxis]
110 |     hs = hs[:, np.newaxis]
111 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
112 |                          y_ctr - 0.5 * (hs - 1),
113 |                          x_ctr + 0.5 * (ws - 1),
114 |                          y_ctr + 0.5 * (hs - 1)))
115 |     return anchors
116 | 
117 | def _ratio_enum(anchor, ratios):
118 |     """
119 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
120 |     """
121 | 
122 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
123 |     size = w * h
124 |     size_ratios = size / ratios
125 |     ws = np.round(np.sqrt(size_ratios))
126 |     hs = np.round(ws * ratios)
127 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
128 |     return anchors
129 | 
130 | def _scale_enum(anchor, scales):
131 |     """
132 |     Enumerate a set of anchors for each scale wrt an anchor.
133 |     """
134 | 
135 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
136 |     ws = w * scales
137 |     hs = h * scales
138 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
139 |     return anchors
140 | 
141 | def clip_boxes(boxes, im_shape):
142 |     """
143 |     Clip boxes to image boundaries.
144 |     """
145 | 
146 |     # x1 >= 0
147 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
148 |     # y1 >= 0
149 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
150 |     # x2 < im_shape[1]
151 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
152 |     # y2 < im_shape[0]
153 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
154 |     return boxes
155 | 
156 | def filter_boxes(boxes, min_size):
157 |     """Remove all boxes with any side smaller than min_size."""
158 |     ws = boxes[:, 2] - boxes[:, 0] + 1
159 |     hs = boxes[:, 3] - boxes[:, 1] + 1
160 |     keep = np.where((ws >= min_size) & (hs >= min_size))[0]
161 |     return keep
162 | 
163 | def py_cpu_nms(dets, thresh):
164 |     """Pure Python NMS baseline."""
165 |     x1 = dets[:, 0]
166 |     y1 = dets[:, 1]
167 |     x2 = dets[:, 2]
168 |     y2 = dets[:, 3]
169 |     scores = dets[:, 4]
170 | 
171 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
172 |     order = scores.argsort()[::-1]
173 | 
174 |     keep = []
175 |     while order.size > 0:
176 |         i = order[0]
177 |         keep.append(i)
178 |         xx1 = np.maximum(x1[i], x1[order[1:]])
179 |         yy1 = np.maximum(y1[i], y1[order[1:]])
180 |         xx2 = np.minimum(x2[i], x2[order[1:]])
181 |         yy2 = np.minimum(y2[i], y2[order[1:]])
182 | 
183 |         w = np.maximum(0.0, xx2 - xx1 + 1)
184 |         h = np.maximum(0.0, yy2 - yy1 + 1)
185 |         inter = w * h
186 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
187 | 
188 |         inds = np.where(ovr <= thresh)[0]
189 |         order = order[inds + 1]
190 | 
191 |     return keep
192 | 
193 | 
194 | def bbox_transform(ex_rois, gt_rois):
195 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
196 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
197 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
198 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
199 | 
200 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
201 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
202 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
203 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
204 | 
205 |     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
206 |     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
207 |     targets_dw = np.log(gt_widths / ex_widths)
208 |     targets_dh = np.log(gt_heights / ex_heights)
209 | 
210 |     targets = np.stack((targets_dx, targets_dy, targets_dw, targets_dh))
211 | 
212 |     targets = np.transpose(targets)
213 | 
214 |     return targets
215 | 
216 | 
217 | def bbox_transform_inv(boxes, deltas):
218 |     if boxes.shape[0] == 0:
219 |         return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
220 | 
221 |     boxes = boxes.astype(deltas.dtype, copy=False)
222 | 
223 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
224 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
225 |     ctr_x = boxes[:, 0] + 0.5 * widths
226 |     ctr_y = boxes[:, 1] + 0.5 * heights
227 | 
228 |     dx = deltas[:, 0::4]
229 |     dy = deltas[:, 1::4]
230 |     dw = deltas[:, 2::4]
231 |     dh = deltas[:, 3::4]
232 | 
233 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
234 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
235 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
236 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
237 | 
238 |     pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
239 |     # x1
240 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
241 |     # y1
242 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
243 |     # x2
244 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
245 |     # y2
246 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
247 | 
248 |     return pred_boxes
249 | 
250 | def bbox_overlaps(boxes, query_boxes):
251 |     """
252 |     Parameters
253 |     ----------
254 |     boxes: (N, 4) ndarray of float
255 |     query_boxes: (K, 4) ndarray of float
256 |     Returns
257 |     -------
258 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
259 |     """
260 |     boxes=boxes.astype(int)
261 |     N = boxes.shape[0]
262 |     K = query_boxes.shape[0]
263 | 
264 |     overlaps = np.zeros((N, K), dtype=np.float)
265 | 
266 |     for k in range(K):
267 |         box_area = ((query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1))
268 |         for n in range(N):
269 |             iw = (min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1)
270 |             if iw > 0:
271 |                 ih = (min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1)
272 | 
273 |                 if ih > 0:
274 |                     ua = float((boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1) + box_area - iw * ih)
275 |                     overlaps[n, k] = iw * ih / ua
276 | 
277 |     return overlaps
278 | 
279 | def unmap(data, count, inds, fill=0):
280 |     """ Unmap a subset of item (data) back to the original set of items (of
281 |     size count) """
282 |     if len(data.shape) == 1:
283 |         ret = np.empty((count, ), dtype=np.float32)
284 |         ret.fill(fill)
285 |         ret[inds] = data
286 |     else:
287 |         ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
288 |         ret.fill(fill)
289 |         ret[inds, :] = data
290 |     return ret
291 | 


--------------------------------------------------------------------------------