├── .gitignore
├── README.md
├── __init__.py
├── config.py
├── core
    ├── MtcnnDetector.py
    ├── __init__.py
    ├── detector.py
    ├── fcn_detector.py
    ├── imdb.py
    ├── loader.py
    ├── metric.py
    ├── minibatch.py
    ├── negativemining.py
    └── symbol.py
├── demo.py
├── example
    ├── __init__.py
    ├── train.py
    ├── train_O_net.py
    ├── train_P_net.py
    └── train_R_net.py
├── fddb_result.png
├── model
    ├── onet-0016.params
    ├── pnet-0016.params
    └── rnet-0016.params
├── mxnet_diff.patch
├── prepare_data
    ├── __init__.py
    ├── gen_hard_example.py
    ├── gen_imglist.py
    ├── gen_pnet_data.py
    ├── utils.py
    └── wider_annotations
    │   ├── __init__.py
    │   ├── readme.txt
    │   ├── transform.m
    │   ├── transform.py
    │   ├── wider_face_test.mat
    │   ├── wider_face_train.mat
    │   ├── wider_face_val.mat
    │   ├── wider_loader.py
    │   └── writeLabel.m
├── test01.jpg
├── test_fddb.py
└── tools
    ├── __init__.py
    ├── image_processing.py
    ├── load_model.py
    └── nms.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Introduction
 2 | this repository is the implementation of MTCNN in MXnet
 3 | * `core`: core routines for MTCNN training and testing.
 4 | * `tools`: utilities for training and testing
 5 | * `data`: Refer to `Data Folder Structure` for dataset reference. Usually dataset contains `images` and `imglists` 
 6 | * `model`: Folder to save training symbol and model
 7 | * `prepare_data`: scripts for generating training data for pnet, rnet and onet 
 8 | 
 9 | ## Useful information
10 | You're required to modify mxnet/src/regression_output-inl.h according to mxnet_diff.patch before using the code for training.
11 | 
12 | * Dataset format
13 | The images used for training are stored in ./data/dataset_name/images/ 
14 | The annotation file is placed in ./data/dataset_name/imglists/
15 | 
16 |     * For training:
17 |     Each line of the annotation file states a training sample.  
18 |     The format is:
19 |     [path to image] [cls_label] [bbox_label]  
20 |     cls_label: 1 for positive, 0 for negative, -1 for part face.  
21 |     bbox_label are the offset of x1, y1, x2, y2, calculated by (xgt(ygt) - x(y)) / width(height)  
22 |     An example would be `12/positive/28 1 -0.05 0.11 -0.05 -0.11`.  
23 |     Note that all the strings are seperated by space.
24 | 
25 |     * For testing:
26 |     Similar to training but only path-to-image is needed. 
27 | 
28 | * Data Folder Structure (suppose root is `data`)
29 | ```
30 | cache (created by imdb)
31 | -- name + image set + gt_roidb
32 | -- results (created by detection and evaluation)
33 | mtcnn # contains images and anno for training mtcnn
34 | -- images
35 | ---- 12 (images of size 12 x 12, used by pnet)
36 | ---- 24 (images of size 24 x 24, used by rnet)
37 | ---- 48 (images of size 48 x 48, used by onet)
38 | -- imglists 
39 | ---- train_12.txt
40 | ---- train_24.txt
41 | ---- train_48.txt
42 | custom (datasets for testing) 
43 | -- images
44 | -- imglists
45 | ---- image_set.txt
46 | ```
47 | 
48 | * Scripts to generate training data(from wider face dataset)
49 |     * run wider_annotations/transform.m (or transform.py) to get the annotation file of the format we need.
50 |     * gen_pnet_data.py: obtain training samples for pnet
51 |     * gen_hard_example.py: prepare hard examples. 
52 | 	you can set test_mode to "pnet" to get training data for rnet,
53 | 	or set test_mode to "rnet" to get training data for onet.
54 |     * gen_imglist.py: ramdom sample images generated by gen_pnet_data.py or gen_hard_example.py to form training set.
55 | 
56 | ## Results
57 |  
58 | ![image](https://github.com/Seanlinx/mtcnn/blob/master/fddb_result.png)
59 |     
60 | ## License
61 | MIT LICENSE
62 | 
63 | ## Reference
64 | Kaipeng Zhang, Zhanpeng Zhang, Zhifeng Li, Yu Qiao , " Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Networks," IEEE Signal Processing Letter
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 |  
73 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seanlinx/mtcnn/feacb3b6ae1bf177664f2a0b676ed4cfd3f5ca55/__init__.py


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from easydict import EasyDict as edict
 3 | 
 4 | config = edict()
 5 | 
 6 | config.BATCH_SIZE = 128
 7 | 
 8 | config.CLS_OHEM = True
 9 | config.CLS_OHEM_RATIO = 0.7
10 | config.BBOX_OHEM = False
11 | config.BBOX_OHEM_RATIO = 0.7
12 | 
13 | config.EPS = 1e-14
14 | config.LR_EPOCH = [8, 14]
15 | 


--------------------------------------------------------------------------------
/core/MtcnnDetector.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import mxnet as mx
  3 | import time
  4 | from tools import image_processing
  5 | #from mx.model import FeedForward
  6 | import numpy as np
  7 | from config import config
  8 | from tools.nms import py_nms
  9 | 
 10 | class MtcnnDetector(object):
 11 |     """
 12 |         Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Neural Networks
 13 |         see https://github.com/kpzhang93/MTCNN_face_detection_alignment
 14 |         this is a mxnet version
 15 |     """
 16 |     def __init__(self,
 17 |                  detectors,
 18 |                  min_face_size=24,
 19 |                  stride=2,
 20 |                  threshold=[0.6, 0.7, 0.7],
 21 |                  scale_factor=0.709,
 22 |                  ctx=mx.cpu(),
 23 |                  slide_window=False):
 24 | 
 25 |         self.pnet_detector = detectors[0]
 26 |         self.rnet_detector = detectors[1]
 27 |         self.onet_detector = detectors[2]
 28 |         self.min_face_size = min_face_size
 29 |         self.stride=stride
 30 |         self.thresh = threshold
 31 |         self.ctx = ctx
 32 |         self.scale_factor = scale_factor
 33 |         self.slide_window = slide_window
 34 | 
 35 | 
 36 |     def convert_to_square(self, bbox):
 37 |         """
 38 |             convert bbox to square
 39 |         Parameters:
 40 |         ----------
 41 |             bbox: numpy array , shape n x 5
 42 |                 input bbox
 43 |         Returns:
 44 |         -------
 45 |             square bbox
 46 |         """
 47 |         square_bbox = bbox.copy()
 48 | 
 49 |         h = bbox[:, 3] - bbox[:, 1] + 1
 50 |         w = bbox[:, 2] - bbox[:, 0] + 1
 51 |         max_side = np.maximum(h,w)
 52 |         square_bbox[:, 0] = bbox[:, 0] + w*0.5 - max_side*0.5
 53 |         square_bbox[:, 1] = bbox[:, 1] + h*0.5 - max_side*0.5
 54 |         square_bbox[:, 2] = square_bbox[:, 0] + max_side - 1
 55 |         square_bbox[:, 3] = square_bbox[:, 1] + max_side - 1
 56 |         return square_bbox
 57 | 
 58 |     def calibrate_box(self, bbox, reg):
 59 |         """
 60 |             calibrate bboxes
 61 |         Parameters:
 62 |         ----------
 63 |             bbox: numpy array, shape n x 5
 64 |                 input bboxes
 65 |             reg:  numpy array, shape n x 4
 66 |                 bboxes adjustment
 67 |         Returns:
 68 |         -------
 69 |             bboxes after refinement
 70 |         """
 71 | 
 72 |         bbox_c = bbox.copy()
 73 |         w = bbox[:, 2] - bbox[:, 0] + 1
 74 |         w = np.expand_dims(w, 1)
 75 |         h = bbox[:, 3] - bbox[:, 1] + 1
 76 |         h = np.expand_dims(h, 1)
 77 |         reg_m = np.hstack([w, h, w, h])
 78 |         aug = reg_m * reg
 79 |         bbox_c[:, 0:4] = bbox_c[:, 0:4] + aug
 80 |         return bbox_c
 81 | 
 82 |     def generate_bbox(self, map, reg, scale, threshold):
 83 |         """
 84 |             generate bbox from feature map
 85 |         Parameters:
 86 |         ----------
 87 |             map: numpy array , n x m x 1
 88 |                 detect score for each position
 89 |             reg: numpy array , n x m x 4
 90 |                 bbox
 91 |             scale: float number
 92 |                 scale of this detection
 93 |             threshold: float number
 94 |                 detect threshold
 95 |         Returns:
 96 |         -------
 97 |             bbox array
 98 |         """
 99 |         stride = 2
100 |         cellsize = 12
101 | 
102 |         t_index = np.where(map>threshold)
103 | 
104 |         # find nothing
105 |         if t_index[0].size == 0:
106 |             return np.array([])
107 | 
108 |         dx1, dy1, dx2, dy2 = [reg[0, i, t_index[0], t_index[1]] for i in range(4)]
109 | 
110 |         reg = np.array([dx1, dy1, dx2, dy2])
111 |         score = map[t_index[0], t_index[1]]
112 |         boundingbox = np.vstack([np.round((stride*t_index[1])/scale),
113 |                                  np.round((stride*t_index[0])/scale),
114 |                                  np.round((stride*t_index[1]+cellsize)/scale),
115 |                                  np.round((stride*t_index[0]+cellsize)/scale),
116 |                                  score,
117 |                                  reg])
118 | 
119 |         return boundingbox.T
120 | 
121 | 
122 |     def resize_image(self, img, scale):
123 |         """
124 |             resize image and transform dimention to [batchsize, channel, height, width]
125 |         Parameters:
126 |         ----------
127 |             img: numpy array , height x width x channel
128 |                 input image, channels in BGR order here
129 |             scale: float number
130 |                 scale factor of resize operation
131 |         Returns:
132 |         -------
133 |             transformed image tensor , 1 x channel x height x width
134 |         """
135 |         height, width, channels = img.shape
136 |         new_height = int(height * scale)     # resized new height
137 |         new_width = int(width * scale)       # resized new width
138 |         new_dim = (new_width, new_height)
139 |         img_resized = cv2.resize(img, new_dim, interpolation=cv2.INTER_LINEAR)      # resized image
140 |         img_resized = image_processing.transform(img_resized)
141 |         return img_resized # (batch_size, c, h, w)
142 | 
143 | 
144 |     def pad(self, bboxes, w, h):
145 |         """
146 |             pad the the bboxes, alse restrict the size of it
147 |         Parameters:
148 |         ----------
149 |             bboxes: numpy array, n x 5
150 |                 input bboxes
151 |             w: float number
152 |                 width of the input image
153 |             h: float number
154 |                 height of the input image
155 |         Returns :
156 |         ------
157 |             dy, dx : numpy array, n x 1
158 |                 start point of the bbox in target image
159 |             edy, edx : numpy array, n x 1
160 |                 end point of the bbox in target image
161 |             y, x : numpy array, n x 1
162 |                 start point of the bbox in original image
163 |             ex, ex : numpy array, n x 1
164 |                 end point of the bbox in original image
165 |             tmph, tmpw: numpy array, n x 1
166 |                 height and width of the bbox
167 |         """
168 |         tmpw, tmph = bboxes[:, 2] - bboxes[:, 0] + 1,  bboxes[:, 3] - bboxes[:, 1] + 1
169 |         num_box = bboxes.shape[0]
170 | 
171 |         dx , dy= np.zeros((num_box, )), np.zeros((num_box, ))
172 |         edx, edy  = tmpw.copy()-1, tmph.copy()-1
173 | 
174 |         x, y, ex, ey = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3]
175 | 
176 |         tmp_index = np.where(ex > w-1)
177 |         edx[tmp_index] = tmpw[tmp_index] + w - 2 - ex[tmp_index]
178 |         ex[tmp_index] = w - 1
179 | 
180 |         tmp_index = np.where(ey > h-1)
181 |         edy[tmp_index] = tmph[tmp_index] + h - 2 - ey[tmp_index]
182 |         ey[tmp_index] = h - 1
183 | 
184 |         tmp_index = np.where(x < 0)
185 |         dx[tmp_index] = 0 - x[tmp_index]
186 |         x[tmp_index] = 0
187 | 
188 |         tmp_index = np.where(y < 0)
189 |         dy[tmp_index] = 0 - y[tmp_index]
190 |         y[tmp_index] = 0
191 | 
192 |         return_list = [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]
193 |         return_list = [item.astype(np.int32) for item in return_list]
194 | 
195 |         return return_list
196 | 
197 | 
198 |     def detect_pnet(self, im):
199 |         """Get face candidates through pnet
200 | 
201 |         Parameters:
202 |         ----------
203 |         im: numpy array
204 |             input image array
205 | 
206 |         Returns:
207 |         -------
208 |         boxes: numpy array
209 |             detected boxes before calibration
210 |         boxes_c: numpy array
211 |             boxes after calibration
212 |         """
213 |         h, w, c = im.shape
214 |         net_size = 12
215 | 
216 |         current_scale = float(net_size) / self.min_face_size    # find initial scale
217 |         im_resized = self.resize_image(im, current_scale)
218 |         _, _, current_height, current_width = im_resized.shape
219 | 
220 |         if self.slide_window:
221 |             # sliding window
222 |             temp_rectangles = list()
223 |             rectangles = list()     # list of rectangles [x11, y11, x12, y12, confidence] (corresponding to original image)
224 |             all_cropped_ims = list()
225 |             while min(current_height, current_width) > net_size:
226 |                 current_y_list = range(0, current_height - net_size + 1, self.stride) if (current_height - net_size) % self.stride == 0 \
227 |                 else range(0, current_height - net_size + 1, self.stride) + [current_height - net_size]
228 |                 current_x_list = range(0, current_width - net_size + 1, self.stride) if (current_width - net_size) % self.stride == 0 \
229 |                 else range(0, current_width - net_size + 1, self.stride) + [current_width - net_size]
230 | 
231 |                 for current_y in current_y_list:
232 |                     for current_x in current_x_list:
233 |                         cropped_im = im_resized[:, :, current_y:current_y + net_size, current_x:current_x + net_size]
234 | 
235 |                         current_rectangle = [int(w * float(current_x) / current_width), int(h * float(current_y) / current_height),
236 |                                              int(w * float(current_x) / current_width) + int(w * float(net_size) / current_width),
237 |                                              int(h * float(current_y) / current_height) + int(w * float(net_size) / current_width),
238 |                                                  0.0]
239 |                         temp_rectangles.append(current_rectangle)
240 |                         all_cropped_ims.append(cropped_im)
241 | 
242 |                 current_scale *= self.scale_factor
243 |                 im_resized = self.resize_image(im, current_scale)
244 |                 _, _, current_height, current_width = im_resized.shape
245 | 
246 |             '''
247 |             # helper for setting PNet batch size
248 |             num_boxes = len(all_cropped_ims)
249 |             batch_size = self.pnet_detector.batch_size
250 |             ratio = float(num_boxes) / batch_size
251 |             if ratio > 3 or ratio < 0.3:
252 |                 print "You may need to reset PNet batch size if this info appears frequently, \
253 | face candidates:%d, current batch_size:%d"%(num_boxes, batch_size)
254 |             '''
255 |             all_cropped_ims = np.vstack(all_cropped_ims)
256 |             cls_scores, reg = self.pnet_detector.predict(all_cropped_ims)
257 | 
258 |             cls_scores = cls_scores[:, 1].flatten()
259 |             keep_inds = np.where(cls_scores > self.thresh[0])[0]
260 | 
261 |             if len(keep_inds) > 0:
262 |                 boxes = np.vstack(temp_rectangles[ind] for ind in keep_inds)
263 |                 boxes[:, 4] = cls_scores[keep_inds]
264 |                 reg = reg[keep_inds].reshape(-1, 4)
265 |             else:
266 |                 return None, None
267 | 
268 | 
269 |             keep = py_nms(boxes, 0.7, 'Union')
270 |             boxes = boxes[keep]
271 | 
272 |             boxes_c = self.calibrate_box(boxes, reg[keep])
273 | 
274 |         else:
275 |             # fcn
276 |             all_boxes = list()
277 |             while min(current_height, current_width) > net_size:
278 |                 cls_map, reg = self.pnet_detector.predict(im_resized)
279 |                 cls_map = cls_map.asnumpy()
280 |                 reg = reg.asnumpy()
281 |                 boxes = self.generate_bbox(cls_map[0, 1, :, :], reg, current_scale, self.thresh[0])
282 | 
283 |                 current_scale *= self.scale_factor
284 |                 im_resized = self.resize_image(im, current_scale)
285 |                 _, _, current_height, current_width = im_resized.shape
286 | 
287 |                 if boxes.size == 0:
288 |                     continue
289 |                 keep = py_nms(boxes[:, :5], 0.5, 'Union')
290 |                 boxes = boxes[keep]
291 |                 all_boxes.append(boxes)
292 | 
293 |             if len(all_boxes) == 0:
294 |                 return None, None
295 | 
296 |             all_boxes = np.vstack(all_boxes)
297 | 
298 |             # merge the detection from first stage
299 |             keep = py_nms(all_boxes[:, 0:5], 0.7, 'Union')
300 |             all_boxes = all_boxes[keep]
301 |             boxes = all_boxes[:, :5]
302 | 
303 |             bbw = all_boxes[:, 2] - all_boxes[:, 0] + 1
304 |             bbh = all_boxes[:, 3] - all_boxes[:, 1] + 1
305 | 
306 |             # refine the boxes
307 |             boxes_c = np.vstack([all_boxes[:, 0] + all_boxes[:, 5] * bbw,
308 |                                  all_boxes[:, 1] + all_boxes[:, 6] * bbh,
309 |                                  all_boxes[:, 2] + all_boxes[:, 7] * bbw,
310 |                                  all_boxes[:, 3] + all_boxes[:, 8] * bbh,
311 |                                  all_boxes[:, 4]])
312 |             boxes_c = boxes_c.T
313 | 
314 |         return boxes, boxes_c
315 | 
316 |     def detect_rnet(self, im, dets):
317 |         """Get face candidates using rnet
318 | 
319 |         Parameters:
320 |         ----------
321 |         im: numpy array
322 |             input image array
323 |         dets: numpy array
324 |             detection results of pnet
325 | 
326 |         Returns:
327 |         -------
328 |         boxes: numpy array
329 |             detected boxes before calibration
330 |         boxes_c: numpy array
331 |             boxes after calibration
332 |         """
333 |         h, w, c = im.shape
334 |         dets = self.convert_to_square(dets)
335 |         dets[:, 0:4] = np.round(dets[:, 0:4])
336 | 
337 |         [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
338 |         num_boxes = dets.shape[0]
339 | 
340 |         '''
341 |         # helper for setting RNet batch size
342 |         batch_size = self.rnet_detector.batch_size
343 |         ratio = float(num_boxes) / batch_size
344 |         if ratio > 3 or ratio < 0.3:
345 |             print "You may need to reset RNet batch size if this info appears frequently, \
346 | face candidates:%d, current batch_size:%d"%(num_boxes, batch_size)
347 |         '''
348 | 
349 |         cropped_ims = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32)
350 |         for i in range(num_boxes):
351 |             tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
352 |             tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = im[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
353 |             cropped_ims[i, :, :, :] = image_processing.transform(cv2.resize(tmp, (24, 24)))
354 | 
355 |         cls_scores, reg = self.rnet_detector.predict(cropped_ims)
356 |         cls_scores = cls_scores[:, 1].flatten()
357 |         keep_inds = np.where(cls_scores > self.thresh[1])[0]
358 | 
359 |         if len(keep_inds) > 0:
360 |             boxes = dets[keep_inds]
361 |             boxes[:, 4] = cls_scores[keep_inds]
362 |             reg = reg[keep_inds]
363 |         else:
364 |             return None, None
365 | 
366 |         keep = py_nms(boxes, 0.7)
367 |         boxes = boxes[keep]
368 | 
369 |         boxes_c = self.calibrate_box(boxes, reg[keep])
370 | 
371 |         return boxes, boxes_c
372 | 
373 |     def detect_onet(self, im, dets):
374 |         """Get face candidates using onet
375 | 
376 |         Parameters:
377 |         ----------
378 |         im: numpy array
379 |             input image array
380 |         dets: numpy array
381 |             detection results of rnet
382 | 
383 |         Returns:
384 |         -------
385 |         boxes: numpy array
386 |             detected boxes before calibration
387 |         boxes_c: numpy array
388 |             boxes after calibration
389 |         """
390 |         h, w, c = im.shape
391 |         dets = self.convert_to_square(dets)
392 |         dets[:, 0:4] = np.round(dets[:, 0:4])
393 | 
394 |         [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
395 |         num_boxes = dets.shape[0]
396 | 
397 |         '''
398 |         # helper for setting ONet batch size
399 |         batch_size = self.onet_detector.batch_size
400 |         ratio = float(num_boxes) / batch_size
401 |         if ratio > 3 or ratio < 0.3:
402 |             print "You may need to reset ONet batch size if this info appears frequently, \
403 | face candidates:%d, current batch_size:%d"%(num_boxes, batch_size)
404 |         '''
405 | 
406 |         cropped_ims = np.zeros((num_boxes, 3, 48, 48), dtype=np.float32)
407 |         for i in range(num_boxes):
408 |             tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
409 |             tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = im[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
410 |             cropped_ims[i, :, :, :] = image_processing.transform(cv2.resize(tmp, (48, 48)))
411 |         cls_scores, reg = self.onet_detector.predict(cropped_ims)
412 | 
413 |         cls_scores = cls_scores[:, 1].flatten()
414 |         keep_inds = np.where(cls_scores > self.thresh[2])[0]
415 | 
416 |         if len(keep_inds) > 0:
417 |             boxes = dets[keep_inds]
418 |             boxes[:, 4] = cls_scores[keep_inds]
419 |             reg = reg[keep_inds]
420 |         else:
421 |             return None, None
422 | 
423 |         boxes_c = self.calibrate_box(boxes, reg)
424 | 
425 |         keep = py_nms(boxes_c, 0.7, "Minimum")
426 |         boxes_c = boxes_c[keep]
427 | 
428 |         return boxes, boxes_c
429 | 
430 | 
431 |     def detect_face(self, imdb, test_data, vis):
432 |         """Detect face over image
433 | 
434 |         Parameters:
435 |         ----------
436 |         imdb: imdb
437 |             image database
438 |         test_data: data iter
439 |             test data iterator
440 |         vis: bool
441 |             whether to visualize detection results
442 | 
443 |         Returns:
444 |         -------
445 |         """
446 |         all_boxes = list()
447 |         batch_idx = 0
448 |         for databatch in test_data:
449 |             if batch_idx % 100 == 0:
450 |                 print "%d images done"%batch_idx
451 |             im = databatch.data[0].asnumpy().astype(np.uint8)
452 |             t = time.time()
453 | 
454 |             # pnet
455 |             if self.pnet_detector:
456 |                 boxes, boxes_c = self.detect_pnet(im)
457 |                 if boxes_c is None:
458 |                     all_boxes.append(np.array([]))
459 |                     batch_idx += 1
460 |                     continue
461 |                 if vis:
462 |                     rgb_im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
463 |                     self.vis_two(rgb_im, boxes, boxes_c)
464 | 
465 |                 t1 = time.time() - t
466 |                 t = time.time()
467 | 
468 |             # rnet
469 |             if self.rnet_detector:
470 |                 boxes, boxes_c = self.detect_rnet(im, boxes_c)
471 |                 if boxes_c is None:
472 |                     all_boxes.append(np.array([]))
473 |                     batch_idx += 1
474 |                     continue
475 |                 if vis:
476 |                     self.vis_two(rgb_im, boxes, boxes_c)
477 | 
478 |                 t2 = time.time() - t
479 |                 t = time.time()
480 | 
481 |             # onet
482 |             if self.onet_detector:
483 |                 boxes, boxes_c = self.detect_onet(im, boxes_c)
484 |                 if boxes_c is None:
485 |                     all_boxes.append(np.array([]))
486 |                     batch_idx += 1
487 |                     continue
488 | #                all_boxes.append(boxes_c)
489 |                 if vis:
490 |                     self.vis_two(rgb_im, boxes, boxes_c)
491 | 
492 |                 t3 = time.time() - t
493 |                 t = time.time()
494 |                 print "time cost " + '{:.3f}'.format(t1+t2+t3) + '  pnet {:.3f}  rnet {:.3f}  onet {:.3f}'.format(t1, t2, t3)
495 | 
496 |             all_boxes.append(boxes_c)
497 |             batch_idx += 1
498 |         # save detections into fddb format
499 | #        imdb.write_results(all_boxes)
500 |         return all_boxes
501 | 
502 | 
503 |     def vis_two(self, im_array, dets1, dets2, thresh=0.9):
504 |         """Visualize detection results before and after calibration
505 | 
506 |         Parameters:
507 |         ----------
508 |         im_array: numpy.ndarray, shape(1, c, h, w)
509 |             test image in rgb
510 |         dets1: numpy.ndarray([[x1 y1 x2 y2 score]])
511 |             detection results before calibration
512 |         dets2: numpy.ndarray([[x1 y1 x2 y2 score]])
513 |             detection results after calibration
514 |         thresh: float
515 |             boxes with scores > thresh will be drawn in red otherwise yellow
516 | 
517 |         Returns:
518 |         -------
519 |         """
520 |         import matplotlib.pyplot as plt
521 |         import random
522 | 
523 |         figure = plt.figure()
524 |         plt.subplot(121)
525 |         plt.imshow(im_array)
526 |         color = 'yellow'
527 | 
528 |         for i in range(dets1.shape[0]):
529 |             bbox = dets1[i, :4]
530 |             score = dets1[i, 4]
531 |             if score > thresh:
532 |                 rect = plt.Rectangle((bbox[0], bbox[1]),
533 |                                      bbox[2] - bbox[0],
534 |                                      bbox[3] - bbox[1], fill=False,
535 |                                      edgecolor='red', linewidth=0.7)
536 |                 plt.gca().add_patch(rect)
537 |                 plt.gca().text(bbox[0], bbox[1] - 2,
538 |                                '{:.3f}'.format(score),
539 |                                bbox=dict(facecolor='blue', alpha=0.5), fontsize=12, color='white')
540 |             else:
541 |                 rect = plt.Rectangle((bbox[0], bbox[1]),
542 |                                      bbox[2] - bbox[0],
543 |                                      bbox[3] - bbox[1], fill=False,
544 |                                      edgecolor=color, linewidth=0.5)
545 |                 plt.gca().add_patch(rect)
546 | 
547 |         plt.subplot(122)
548 |         plt.imshow(im_array)
549 |         color = 'yellow'
550 | 
551 |         for i in range(dets2.shape[0]):
552 |             bbox = dets2[i, :4]
553 |             score = dets2[i, 4]
554 |             if score > thresh:
555 |                 rect = plt.Rectangle((bbox[0], bbox[1]),
556 |                                      bbox[2] - bbox[0],
557 |                                      bbox[3] - bbox[1], fill=False,
558 |                                      edgecolor='red', linewidth=0.7)
559 |                 plt.gca().add_patch(rect)
560 |                 plt.gca().text(bbox[0], bbox[1] - 2,
561 |                                '{:.3f}'.format(score),
562 |                                bbox=dict(facecolor='blue', alpha=0.5), fontsize=12, color='white')
563 |             else:
564 |                 rect = plt.Rectangle((bbox[0], bbox[1]),
565 |                                      bbox[2] - bbox[0],
566 |                                      bbox[3] - bbox[1], fill=False,
567 |                                      edgecolor=color, linewidth=0.5)
568 |                 plt.gca().add_patch(rect)
569 |         plt.show()
570 | 


--------------------------------------------------------------------------------
/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seanlinx/mtcnn/feacb3b6ae1bf177664f2a0b676ed4cfd3f5ca55/core/__init__.py


--------------------------------------------------------------------------------
/core/detector.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | import numpy as np
 3 | 
 4 | from config import config
 5 | 
 6 | class Detector(object):
 7 |     def __init__(self, symbol, data_size, batch_size, ctx=None,
 8 |                  arg_params=None, aux_params=None):
 9 |         self.symbol = symbol
10 |         self.data_size = data_size
11 |         self.ctx = ctx
12 |         if self.ctx is None:
13 |             self.ctx = mx.cpu()
14 |         self.arg_params = arg_params
15 |         self.aux_params = aux_params
16 | 
17 |         self.batch_size = batch_size
18 |         data_shapes = {'data': (self.batch_size, 3, self.data_size, self.data_size)}
19 |         executor = self.symbol.simple_bind(self.ctx, grad_req='null', **dict(data_shapes))
20 |         executor.copy_params_from(self.arg_params, self.aux_params)
21 |         self.executor = executor
22 | 
23 |         self.output_dict = None
24 |         self.data_shape = data_shapes
25 |         self.t = 0
26 | 
27 | 
28 |     def predict(self, databatch):
29 |         # access data
30 |         # databatch: N x 3 x data_size x data_size
31 |         scores = []
32 |         batch_size = self.batch_size
33 | 
34 |         minibatch = []
35 |         cur = 0
36 |         n = databatch.shape[0]
37 |         while cur < n:
38 |             minibatch.append(databatch[cur:min(cur+batch_size, n), :, :, :])
39 |             cur += batch_size
40 | 
41 |         data_arrays = self.executor.arg_dict['data']
42 |         out_list = [[] for _ in range(len(self.executor.outputs))]
43 | 
44 |         for idx, data in enumerate(minibatch):
45 |             m = data.shape[0]
46 |             real_size = self.batch_size
47 |             if m < batch_size:
48 |                 keep_inds = np.arange(m)
49 |                 gap = self.batch_size - m
50 |                 while gap >= len(keep_inds):
51 |                     gap -= len(keep_inds)
52 |                     keep_inds = np.concatenate((keep_inds, keep_inds))
53 |                 if gap != 0:
54 |                     keep_inds = np.concatenate((keep_inds, keep_inds[:gap]))
55 |                 data = data[keep_inds]
56 |                 real_size = m
57 | 
58 |             data_arrays[:] = data
59 |             self.executor.forward(is_train=False)
60 | 
61 |             for o_list, o_nd in zip(out_list, self.executor.outputs):
62 |                 o_list.append(o_nd[0:real_size].asnumpy())
63 | 
64 |         out = list()
65 | 
66 |         for o in out_list:
67 |             out.append(np.vstack(o))
68 | 
69 |         return out
70 | 


--------------------------------------------------------------------------------
/core/fcn_detector.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | import numpy as np
 3 | 
 4 | from config import config
 5 | 
 6 | class FcnDetector(object):
 7 |     def __init__(self, symbol, ctx=None,
 8 |                  arg_params=None, aux_params=None):
 9 |         self.symbol = symbol
10 |         self.ctx = ctx
11 |         if self.ctx is None:
12 |             self.ctx = mx.cpu()
13 |         self.arg_params = arg_params
14 |         self.aux_params = aux_params
15 |         self.output_dict = None
16 | 
17 |     def predict(self, databatch):
18 |         data_shape = {'data': databatch.shape}
19 |         self.arg_params['data'] = mx.nd.array(databatch, self.ctx)
20 | 
21 |         arg_shapes, out_shape, aux_shapes = self.symbol.infer_shape(**data_shape)
22 |         arg_shapes_dict = dict(zip(self.symbol.list_arguments(), arg_shapes))
23 | 
24 |         self.executor = self.symbol.bind(self.ctx, self.arg_params, args_grad=None,
25 |                                          grad_req='null', aux_states=self.aux_params)
26 | 
27 |         self.executor.forward(is_train=False)
28 |         outputs = self.executor.outputs
29 | 
30 |         return outputs
31 | 


--------------------------------------------------------------------------------
/core/imdb.py:
--------------------------------------------------------------------------------
  1 | import mxnet as mx
  2 | import os
  3 | import cPickle
  4 | import numpy as np
  5 | from config import config
  6 | 
  7 | class IMDB(object):
  8 |     def __init__(self, name, image_set, root_path, dataset_path, mode='train'):
  9 |         self.name = name + '_' + image_set
 10 |         self.image_set = image_set
 11 |         self.root_path = root_path
 12 |         self.data_path = dataset_path
 13 |         self.mode = mode
 14 | 
 15 |         self.classes = ['__background__', 'face']
 16 |         self.num_classes = 2
 17 |         self.image_set_index = self.load_image_set_index()
 18 |         self.num_images = len(self.image_set_index)
 19 | 
 20 | 
 21 |     @property
 22 |     def cache_path(self):
 23 |         """Make a directory to store all caches
 24 | 
 25 |         Parameters:
 26 |         ----------
 27 |         Returns:
 28 |         -------
 29 |         cache_path: str
 30 |             directory to store caches
 31 |         """
 32 |         cache_path = os.path.join(self.root_path, 'cache')
 33 |         if not os.path.exists(cache_path):
 34 |             os.mkdir(cache_path)
 35 |         return cache_path
 36 | 
 37 | 
 38 |     def load_image_set_index(self):
 39 |         """Get image index
 40 | 
 41 |         Parameters:
 42 |         ----------
 43 |         Returns:
 44 |         -------
 45 |         image_set_index: str
 46 |             relative path of image
 47 |         """
 48 |         image_set_index_file = os.path.join(self.data_path, 'imglists', self.image_set + '.txt')
 49 |         assert os.path.exists(image_set_index_file), 'Path does not exist: {}'.format(image_set_index_file)
 50 |         with open(image_set_index_file, 'r') as f:
 51 |             image_set_index = [x.strip().split(' ')[0] for x in f.readlines()]
 52 |         return image_set_index
 53 | 
 54 | 
 55 |     def gt_imdb(self):
 56 |         """Get and save ground truth image database
 57 | 
 58 |         Parameters:
 59 |         ----------
 60 |         Returns:
 61 |         -------
 62 |         gt_imdb: dict
 63 |             image database with annotations
 64 |         """
 65 |         cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
 66 |         if os.path.exists(cache_file):
 67 |             with open(cache_file, 'rb') as f:
 68 |                 imdb = cPickle.load(f)
 69 |             print '{} gt imdb loaded from {}'.format(self.name, cache_file)
 70 |             return imdb
 71 |         gt_imdb = self.load_annotations()
 72 |         with open(cache_file, 'wb') as f:
 73 |             cPickle.dump(gt_imdb, f, cPickle.HIGHEST_PROTOCOL)
 74 |         return gt_imdb
 75 | 
 76 | 
 77 |     def image_path_from_index(self, index):
 78 |         """Given image index, return full path
 79 | 
 80 |         Parameters:
 81 |         ----------
 82 |         index: str
 83 |             relative path of image
 84 |         Returns:
 85 |         -------
 86 |         image_file: str
 87 |             full path of image
 88 |         """
 89 |         image_file = os.path.join(self.data_path, 'images', index)
 90 |         if "." not in image_file:
 91 |             image_file = image_file + '.jpg'
 92 |         assert os.path.exists(image_file), 'Path does not exist: {}'.format(image_file)
 93 |         return image_file
 94 | 
 95 | 
 96 |     def load_annotations(self):
 97 |         """Load annotations
 98 | 
 99 |         Parameters:
100 |         ----------
101 |         Returns:
102 |         -------
103 |         imdb: dict
104 |             image database with annotations
105 |         """
106 |         annotation_file = os.path.join(self.data_path, 'imglists', self.image_set + '.txt')
107 |         assert os.path.exists(annotation_file), 'annotations not found at {}'.format(annotation_file)
108 |         with open(annotation_file, 'r') as f:
109 |             annotations = f.readlines()
110 | 
111 |         imdb = []
112 |         for i in range(self.num_images):
113 |             annotation = annotations[i].strip().split(' ')
114 |             index = annotation[0]
115 |             im_path = self.image_path_from_index(index)
116 |             imdb_ = dict()
117 |             imdb_['image'] = im_path
118 |             if self.mode == 'test':
119 | #                gt_boxes = map(float, annotation[1:])
120 | #                boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4)
121 | #                imdb_['gt_boxes'] = boxes
122 |                 pass
123 |             else:
124 |                 label = annotation[1]
125 |                 imdb_['label'] = int(label)
126 |                 imdb_['flipped'] = False
127 |                 imdb_['bbox_target'] = np.zeros((4,))
128 |                 if len(annotation[2:]) == 4:
129 |                     bbox_target = annotation[2:]
130 |                     imdb_['bbox_target'] = np.array(bbox_target).astype(float)
131 | 
132 |             imdb.append(imdb_)
133 |         return imdb
134 | 
135 | 
136 |     def append_flipped_images(self, imdb):
137 |         """append flipped images to imdb
138 | 
139 |         Parameters:
140 |         ----------
141 |         imdb: imdb
142 |             image database
143 |         Returns:
144 |         -------
145 |         imdb: dict
146 |             image database with flipped image annotations added
147 |         """
148 |         print 'append flipped images to imdb', len(imdb)
149 |         for i in range(len(imdb)):
150 |             imdb_ = imdb[i]
151 |             m_bbox = imdb_['bbox_target'].copy()
152 |             m_bbox[0], m_bbox[2] = -m_bbox[2], -m_bbox[0]
153 | 
154 |             entry = {'image': imdb_['image'],
155 |                      'label': imdb_['label'],
156 |                      'bbox_target': m_bbox,
157 |                      'flipped': True}
158 | 
159 |             imdb.append(entry)
160 |         self.image_set_index *= 2
161 |         return imdb
162 | 
163 |     def write_results(self, all_boxes):
164 |         """write results
165 | 
166 |         Parameters:
167 |         ----------
168 |         all_boxes: list of numpy.ndarray
169 |             detection results
170 |         Returns:
171 |         -------
172 |         """
173 |         print 'Writing fddb results'
174 |         res_folder = os.path.join(self.cache_path, 'results')
175 |         if not os.path.exists(res_folder):
176 |             os.makedirs(res_folder)
177 | 
178 |         # save results to fddb format
179 |         filename = os.path.join(res_folder, self.image_set + '-out.txt')
180 |         with open(filename, 'w') as f:
181 |             for im_ind, index in enumerate(self.image_set_index):
182 |                 f.write('%s\n'%index)
183 |                 dets = all_boxes[im_ind]
184 |                 f.write('%d\n'%dets.shape[0])
185 |                 if len(dets) == 0:
186 |                     continue
187 |                 for k in range(dets.shape[0]):
188 |                     f.write('{:.2f} {:.2f} {:.2f} {:.2f} {:.5f}\n'.
189 |                             format(dets[k, 0], dets[k, 1], dets[k, 2]-dets[k, 0], dets[k, 3]-dets[k, 1], dets[k, 4]))
190 | 


--------------------------------------------------------------------------------
/core/loader.py:
--------------------------------------------------------------------------------
  1 | import mxnet as mx
  2 | import numpy as np
  3 | import minibatch
  4 | from config import config
  5 | 
  6 | class TestLoader(mx.io.DataIter):
  7 |     def __init__(self, imdb, batch_size=1, shuffle=False):
  8 |         self.imdb = imdb
  9 |         self.batch_size = batch_size
 10 |         self.shuffle = shuffle
 11 |         self.size = len(imdb)
 12 |         self.index = np.arange(self.size)
 13 | 
 14 |         self.cur = 0
 15 |         self.data = None
 16 |         self.label = None
 17 | 
 18 |         self.data_names = ['data']
 19 |         self.label_names = []
 20 | 
 21 |         self.reset()
 22 |         self.get_batch()
 23 | 
 24 |     @property
 25 |     def provide_data(self):
 26 |         return [(k, v.shape) for k, v in zip(self.data_names, self.data)]
 27 | 
 28 |     @property
 29 |     def provide_label(self):
 30 |         return [(k, v.shape) for k, v in zip(self.label_names, self.label)]
 31 | 
 32 |     def reset(self):
 33 |         self.cur = 0
 34 |         if self.shuffle:
 35 |             np.random.shuffle(self.index)
 36 | 
 37 |     def iter_next(self):
 38 |         return self.cur + self.batch_size <= self.size
 39 | 
 40 |     def next(self):
 41 |         if self.iter_next():
 42 |             self.get_batch()
 43 |             self.cur += self.batch_size
 44 |             return mx.io.DataBatch(data=self.data, label=self.label,
 45 |                                    pad=self.getpad(), index=self.getindex(),
 46 |                                    provide_data=self.provide_data, provide_label=self.provide_label)
 47 |         else:
 48 |             raise StopIteration
 49 | 
 50 |     def getindex(self):
 51 |         return self.cur / self.batch_size
 52 | 
 53 |     def getpad(self):
 54 |         if self.cur + self.batch_size > self.size:
 55 |             return self.cur + self.batch_size - self.size
 56 |         else:
 57 |             return 0
 58 | 
 59 |     def get_batch(self):
 60 |         cur_from = self.cur
 61 |         cur_to = min(cur_from + self.batch_size, self.size)
 62 |         imdb = [self.imdb[self.index[i]] for i in range(cur_from, cur_to)]
 63 |         data, label = minibatch.get_testbatch(imdb)
 64 |         self.data = [mx.nd.array(data[name]) for name in self.data_names]
 65 |         self.label = [mx.nd.array(label[name]) for name in self.label_names]
 66 | 
 67 | class ImageLoader(mx.io.DataIter):
 68 |     def __init__(self, imdb, im_size, batch_size=config.BATCH_SIZE, shuffle=False, ctx=None, work_load_list=None):
 69 | 
 70 |         super(ImageLoader, self).__init__()
 71 | 
 72 |         self.imdb = imdb
 73 |         self.batch_size = batch_size
 74 |         self.im_size = im_size
 75 |         self.shuffle = shuffle
 76 |         self.ctx = ctx
 77 |         if self.ctx is None:
 78 |             self.ctx = [mx.cpu()]
 79 |         self.work_load_list = work_load_list
 80 | 
 81 |         self.cur = 0
 82 |         self.size = len(imdb)
 83 |         self.index = np.arange(self.size)
 84 |         self.num_classes = 2
 85 | 
 86 |         self.batch = None
 87 |         self.data = None
 88 |         self.label = None
 89 | 
 90 |         self.label_names= ['label', 'bbox_target']
 91 |         self.reset()
 92 |         self.get_batch()
 93 | 
 94 |     @property
 95 |     def provide_data(self):
 96 |         return [('data', self.data[0].shape)]
 97 |       #  return [(k, v.shape) for k, v in zip(self.data_name, self.data)]
 98 | 
 99 | 
100 |     @property
101 |     def provide_label(self):
102 |         return [(k, v.shape) for k, v in zip(self.label_names, self.label)]
103 | 
104 | 
105 |     def reset(self):
106 |         self.cur = 0
107 |         if self.shuffle:
108 |             np.random.shuffle(self.index)
109 | 
110 |     def iter_next(self):
111 |         return self.cur + self.batch_size <= self.size
112 | 
113 |     def next(self):
114 |         if self.iter_next():
115 |             self.get_batch()
116 |             self.cur += self.batch_size
117 |             return mx.io.DataBatch(data=self.data, label=self.label,
118 |                                    pad=self.getpad(), index=self.getindex(),
119 |                                    provide_data=self.provide_data, provide_label=self.provide_label)
120 |         else:
121 |             raise StopIteration
122 | 
123 |     def getindex(self):
124 |         return self.cur / self.batch_size
125 | 
126 |     def getpad(self):
127 |         if self.cur + self.batch_size > self.size:
128 |             return self.cur + self.batch_size - self.size
129 |         else:
130 |             return 0
131 | 
132 |     def get_batch(self):
133 |         cur_from = self.cur
134 |         cur_to = min(cur_from + self.batch_size, self.size)
135 |         imdb = [self.imdb[self.index[i]] for i in range(cur_from, cur_to)]
136 |         data, label = minibatch.get_minibatch(imdb, self.num_classes, self.im_size)
137 |         self.data = [mx.nd.array(data['data'])]
138 |         self.label = [mx.nd.array(label[name]) for name in self.label_names]
139 | 


--------------------------------------------------------------------------------
/core/metric.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | import numpy as np
 3 | from config import config
 4 | 
 5 | 
 6 | class Accuracy(mx.metric.EvalMetric):
 7 |     def __init__(self):
 8 |         super(Accuracy, self).__init__('Accuracy')
 9 | 
10 |     def update(self, labels, preds):
11 |         # output: cls_prob_output, bbox_pred_output, cls_keep_inds, bbox_keep_inds
12 |         # label: label, bbox_target
13 |         pred_label = mx.ndarray.argmax_channel(preds[0]).asnumpy().astype('int32')
14 |         label = labels[0].asnumpy()
15 | 
16 |         # negative mining
17 |         cls_keep = preds[2].asnumpy()
18 |         keep = np.where(cls_keep == 1)[0]
19 | 
20 |         pred_label = pred_label[keep]
21 |         label = label[keep]
22 | 
23 |         self.sum_metric += (pred_label.flat == label.flat).sum()
24 |         self.num_inst += len(pred_label.flat)
25 | 
26 | 
27 | class LogLoss(mx.metric.EvalMetric):
28 |     def __init__(self):
29 |         super(LogLoss, self).__init__('LogLoss')
30 | 
31 |     def update(self, labels, preds):
32 |         # output: cls_prob_output, bbox_pred_output, cls_keep_inds, bbox_keep_inds
33 |         # label: label, bbox_target
34 |         pred_cls = preds[0].asnumpy()
35 |         label = labels[0].asnumpy().astype('int32')
36 | 
37 |         cls_keep = preds[2].asnumpy()
38 |         keep = np.where(cls_keep == 1)[0]
39 | 
40 |         pred_cls = pred_cls[keep].reshape(-1, 2)
41 |         label = label[keep]
42 | 
43 |         cls = pred_cls[np.arange(label.shape[0]), label.flat]
44 | 
45 |         cls += config.EPS
46 |         cls_loss = -1 * np.log(cls)
47 | 
48 |         cls_loss = np.sum(cls_loss)
49 |         self.sum_metric += cls_loss
50 |         self.num_inst += label.shape[0]
51 | 
52 | 
53 | class BBOX_MSE(mx.metric.EvalMetric):
54 |     def __init__(self):
55 |         super(BBOX_MSE, self).__init__('BBOX_MSE')
56 | 
57 |     def update(self,labels, preds):
58 |         pred_delta = preds[1].asnumpy()
59 |         bbox_target = labels[1].asnumpy()
60 | 
61 |         bbox_keep = preds[3].asnumpy()
62 |         keep = np.where(bbox_keep == 1)[0]
63 | 
64 |         pred_delta = pred_delta[keep]
65 |         bbox_target = bbox_target[keep]
66 | 
67 |         e = (pred_delta - bbox_target)**2
68 |         error = np.sum(e)
69 |         self.sum_metric += error
70 |         self.num_inst += e.size
71 | 


--------------------------------------------------------------------------------
/core/minibatch.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | from tools import image_processing
 3 | from config import config
 4 | import numpy as np
 5 | 
 6 | def get_minibatch(imdb, num_classes, im_size):
 7 |     # im_size: 12, 24 or 48
 8 |     num_images = len(imdb)
 9 |     processed_ims = list()
10 |     cls_label = list()
11 |     bbox_reg_target = list()
12 |     for i in range(num_images):
13 |         im = cv2.imread(imdb[i]['image'])
14 |         h, w, c = im.shape
15 |         cls = imdb[i]['label']
16 |         bbox_target = imdb[i]['bbox_target']
17 | 
18 |         assert h == w == im_size, "image size wrong"
19 |         if imdb[i]['flipped']:
20 |             im = im[:, ::-1, :]
21 | 
22 |         im_tensor = image_processing.transform(im)
23 |         processed_ims.append(im_tensor)
24 |         cls_label.append(cls)
25 |         bbox_reg_target.append(bbox_target)
26 | 
27 |     im_array = np.vstack(processed_ims)
28 |     label_array = np.array(cls_label)
29 |     bbox_target_array = np.vstack(bbox_reg_target)
30 |     '''
31 |     bbox_reg_weight = np.ones(label_array.shape)
32 |     invalid = np.where(label_array == 0)[0]
33 |     bbox_reg_weight[invalid] = 0
34 |     bbox_reg_weight = np.repeat(bbox_reg_weight, 4, axis=1)
35 |     '''
36 |     if im_size == 12:
37 |         label_array = label_array.reshape(-1, 1)
38 | 
39 |     data = {'data': im_array}
40 |     label = {'label': label_array,
41 |              'bbox_target': bbox_target_array}
42 | 
43 |     return data, label
44 | 
45 | def get_testbatch(imdb):
46 |     assert len(imdb) == 1, "Single batch only"
47 |     im = cv2.imread(imdb[0]['image'])
48 |     im_array = im
49 |     data = {'data': im_array}
50 |     label = {}
51 |     return data, label
52 | 


--------------------------------------------------------------------------------
/core/negativemining.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | import numpy as np
 3 | from config import config
 4 | 
 5 | class NegativeMiningOperator(mx.operator.CustomOp):
 6 |     def __init__(self, cls_ohem=config.CLS_OHEM, cls_ohem_ratio=config.CLS_OHEM_RATIO,
 7 |             bbox_ohem=config.BBOX_OHEM, bbox_ohem_ratio=config.BBOX_OHEM_RATIO):
 8 |         super(NegativeMiningOperator, self).__init__()
 9 |         self.cls_ohem = cls_ohem
10 |         self.cls_ohem_ratio = cls_ohem_ratio
11 |         self.bbox_ohem = bbox_ohem
12 |         self.bbox_ohem_ratio = bbox_ohem_ratio
13 | 
14 |     def forward(self, is_train, req, in_data, out_data, aux):
15 |         cls_prob = in_data[0].asnumpy() # batchsize x 2 x 1 x 1
16 |         bbox_pred = in_data[1].asnumpy() # batchsize x 4
17 |         label = in_data[2].asnumpy().astype(int) # batchsize x 1
18 |         bbox_target = in_data[3].asnumpy() # batchsize x 4
19 | 
20 |         self.assign(out_data[0], req[0], in_data[0])
21 |         self.assign(out_data[1], req[1], in_data[1])
22 | 
23 |         # cls
24 |         cls_prob = cls_prob.reshape(-1, 2)
25 |         valid_inds = np.where(label > -1)[0]
26 |         cls_keep = np.zeros(cls_prob.shape[0])
27 | 
28 |         if self.cls_ohem:
29 |             keep_num = int(len(valid_inds) * self.cls_ohem_ratio)
30 |             cls_valid = cls_prob[valid_inds, :]
31 |             label_valid = label.flatten()[valid_inds]
32 | 
33 |             cls = cls_valid[np.arange(len(valid_inds)), label_valid] + config.EPS
34 |             log_loss = - np.log(cls)
35 |             keep = np.argsort(log_loss)[::-1][:keep_num]
36 |             cls_keep[valid_inds[keep]] = 1
37 |         else:
38 |             cls_keep[valid_inds] = 1
39 |         self.assign(out_data[2], req[2], mx.nd.array(cls_keep))
40 | 
41 |         # bbox
42 |         valid_inds = np.where(abs(label) == 1)[0]
43 |         bbox_keep = np.zeros(cls_prob.shape[0])
44 | 
45 |         if self.bbox_ohem:
46 |             keep_num = int(len(valid_inds) * self.bbox_ohem_ratio)
47 |             bbox_valid = bbox_pred[valid_inds, :]
48 |             bbox_target_valid = bbox_target[valid_inds, :]
49 |             square_error = np.sum((bbox_valid - bbox_target_valid)**2, axis=1)
50 |             keep = np.argsort(square_error)[::-1][:keep_num]
51 |             bbox_keep[valid_inds[keep]] = 1
52 |         else:
53 |             bbox_keep[valid_inds] = 1
54 |         self.assign(out_data[3], req[3], mx.nd.array(bbox_keep))
55 | 
56 | 
57 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
58 |         cls_keep = out_data[2].asnumpy().reshape(-1, 1)
59 |         bbox_keep = out_data[3].asnumpy().reshape(-1, 1)
60 | 
61 |         cls_grad = np.repeat(cls_keep, 2, axis=1)
62 |         bbox_grad = np.repeat(bbox_keep, 4, axis=1)
63 | 
64 |         cls_grad /= len(np.where(cls_keep == 1)[0])
65 |         bbox_grad /= len(np.where(bbox_keep == 1)[0])
66 | 
67 |         cls_grad = cls_grad.reshape(in_data[0].shape)
68 |         self.assign(in_grad[0], req[0], mx.nd.array(cls_grad))
69 |         self.assign(in_grad[1], req[1], mx.nd.array(bbox_grad))
70 | 
71 | 
72 | @mx.operator.register("negativemining")
73 | class NegativeMiningProp(mx.operator.CustomOpProp):
74 |     def __init__(self):
75 |         super(NegativeMiningProp, self).__init__(need_top_grad=False)
76 | 
77 |     def list_arguments(self):
78 |         return ['cls_prob', 'bbox_pred', 'label', 'bbox_target']
79 | 
80 |     def list_outputs(self):
81 |         return ['cls_out', 'bbox_out', 'cls_keep', 'bbox_keep']
82 | 
83 |     def infer_shape(self, in_shape):
84 |         keep_shape = (in_shape[0][0], )
85 |         return in_shape, [in_shape[0], in_shape[1], keep_shape, keep_shape]
86 | 
87 |     def create_operator(self, ctx, shapes, dtypes):
88 |         return NegativeMiningOperator()
89 | 


--------------------------------------------------------------------------------
/core/symbol.py:
--------------------------------------------------------------------------------
  1 | import mxnet as mx
  2 | import negativemining
  3 | from config import config
  4 | 
  5 | def P_Net(mode='train'):
  6 |     """
  7 |     Proposal Network
  8 |     input shape 3 x 12 x 12
  9 |     """
 10 |     data = mx.symbol.Variable(name="data")
 11 |     bbox_target = mx.symbol.Variable(name="bbox_target")
 12 |     label = mx.symbol.Variable(name="label")
 13 | 
 14 |     conv1 = mx.symbol.Convolution(data=data, kernel=(3, 3), num_filter=10, name="conv1")
 15 |     prelu1 = mx.symbol.LeakyReLU(data=conv1, act_type="prelu", name="prelu1")
 16 |     pool1 = mx.symbol.Pooling(data=prelu1, pool_type="max", pooling_convention="full", kernel=(2, 2), stride=(2, 2), name="pool1")
 17 | 
 18 |     conv2 = mx.symbol.Convolution(data=pool1, kernel=(3, 3), num_filter=16, name="conv2")
 19 |     prelu2 = mx.symbol.LeakyReLU(data=conv2, act_type="prelu", name="prelu2")
 20 | 
 21 |     conv3 = mx.symbol.Convolution(data=prelu2, kernel=(3, 3), num_filter=32, name="conv3")
 22 |     prelu3 = mx.symbol.LeakyReLU(data=conv3, act_type="prelu", name="prelu3")
 23 | 
 24 |     conv4_1 = mx.symbol.Convolution(data=prelu3, kernel=(1, 1), num_filter=2, name="conv4_1")
 25 |     conv4_2 = mx.symbol.Convolution(data=prelu3, kernel=(1, 1), num_filter=4, name="conv4_2")
 26 | 
 27 |     if mode == 'test':
 28 |         cls_prob = mx.symbol.SoftmaxActivation(data=conv4_1, mode="channel", name="cls_prob")
 29 |         bbox_pred = conv4_2
 30 |         group = mx.symbol.Group([cls_prob, bbox_pred])
 31 | 
 32 |     else:
 33 |         cls_prob = mx.symbol.SoftmaxOutput(data=conv4_1, label=label,
 34 |                                            multi_output=True, use_ignore=True,
 35 |                                            out_grad=True, name="cls_prob")
 36 |         conv4_2_reshape = mx.symbol.Reshape(data = conv4_2, shape=(-1, 4), name="conv4_2_reshape")
 37 |         bbox_pred = mx.symbol.LinearRegressionOutput(data=conv4_2_reshape, label=bbox_target,
 38 |                                                      grad_scale=1, out_grad=True, name="bbox_pred")
 39 | 
 40 |         out = mx.symbol.Custom(cls_prob=cls_prob, bbox_pred=bbox_pred,
 41 |                                label=label, bbox_target=bbox_target,
 42 |                                op_type='negativemining', name="negative_mining")
 43 |         group = mx.symbol.Group([out])
 44 |     return group
 45 | 
 46 | 
 47 | def R_Net(mode='train'):
 48 |     """
 49 |     Refine Network
 50 |     input shape 3 x 24 x 24
 51 |     """
 52 |     data = mx.symbol.Variable(name="data")
 53 |     bbox_target = mx.symbol.Variable(name="bbox_target")
 54 |     label = mx.symbol.Variable(name="label")
 55 | 
 56 |     conv1 = mx.symbol.Convolution(data=data, kernel=(3, 3), num_filter=28, name="conv1")
 57 |     prelu1 = mx.symbol.LeakyReLU(data=conv1, act_type="prelu", name="prelu1")
 58 |     pool1 = mx.symbol.Pooling(data=prelu1, pool_type="max", pooling_convention="full", kernel=(3, 3), stride=(2, 2), name="pool1")
 59 | 
 60 |     conv2 = mx.symbol.Convolution(data=pool1, kernel=(3, 3), num_filter=48, name="conv2")
 61 |     prelu2 = mx.symbol.LeakyReLU(data=conv2, act_type="prelu", name="prelu2")
 62 |     pool2 = mx.symbol.Pooling(data=prelu2, pool_type="max", pooling_convention="full", kernel=(3, 3), stride=(2, 2), name="pool2")
 63 | 
 64 |     conv3 = mx.symbol.Convolution(data=pool2, kernel=(2, 2), num_filter=64, name="conv3")
 65 |     prelu3 = mx.symbol.LeakyReLU(data=conv3, act_type="prelu", name="prelu3")
 66 | 
 67 |     fc1 = mx.symbol.FullyConnected(data=prelu3, num_hidden=128, name="fc1")
 68 |     prelu4 = mx.symbol.LeakyReLU(data=fc1, act_type="prelu", name="prelu4")
 69 | 
 70 |     fc2 = mx.symbol.FullyConnected(data=prelu4, num_hidden=2, name="fc2")
 71 |     fc3 = mx.symbol.FullyConnected(data=prelu4, num_hidden=4, name="fc3")
 72 | 
 73 |     cls_prob = mx.symbol.SoftmaxOutput(data=fc2, label=label, use_ignore=True,
 74 |                                        out_grad=True, name="cls_prob")
 75 |     if mode == 'test':
 76 |         cls_prob = mx.symbol.SoftmaxOutput(data=fc2, label=label, use_ignore=True, name="cls_prob")
 77 |         bbox_pred = fc3
 78 |         group = mx.symbol.Group([cls_prob, bbox_pred])
 79 |     else:
 80 |         bbox_pred = mx.symbol.LinearRegressionOutput(data=fc3, label=bbox_target,
 81 |                                                      out_grad=True, grad_scale=1, name="bbox_pred")
 82 | 
 83 |         out = mx.symbol.Custom(cls_prob=cls_prob, bbox_pred=bbox_pred, label=label,
 84 |                                bbox_target=bbox_target, op_type='negativemining', name="negative_mining")
 85 | 
 86 |         group = mx.symbol.Group([out])
 87 |     return group
 88 | 
 89 | 
 90 | def O_Net(mode="train"):
 91 |     """
 92 |     Refine Network
 93 |     input shape 3 x 48 x 48
 94 |     """
 95 |     data = mx.symbol.Variable(name="data")
 96 |     bbox_target = mx.symbol.Variable(name="bbox_target")
 97 |     label = mx.symbol.Variable(name="label")
 98 | 
 99 |     conv1 = mx.symbol.Convolution(data=data, kernel=(3, 3), num_filter=32, name="conv1")
100 |     prelu1 = mx.symbol.LeakyReLU(data=conv1, act_type="prelu", name="prelu1")
101 |     pool1 = mx.symbol.Pooling(data=prelu1, pool_type="max", pooling_convention="full", kernel=(3, 3), stride=(2, 2), name="pool1")
102 | 
103 |     conv2 = mx.symbol.Convolution(data=pool1, kernel=(3, 3), num_filter=64, name="conv2")
104 |     prelu2 = mx.symbol.LeakyReLU(data=conv2, act_type="prelu", name="prelu2")
105 |     pool2 = mx.symbol.Pooling(data=prelu2, pool_type="max", pooling_convention="full", kernel=(3, 3), stride=(2, 2), name="pool2")
106 | 
107 |     conv3 = mx.symbol.Convolution(data=pool2, kernel=(3, 3), num_filter=64, name="conv3")
108 |     prelu3 = mx.symbol.LeakyReLU(data=conv3, act_type="prelu", name="prelu3")
109 |     pool3 = mx.symbol.Pooling(data=prelu3, pool_type="max", pooling_convention="full", kernel=(2, 2), stride=(2, 2), name="pool3")
110 | 
111 |     conv4 = mx.symbol.Convolution(data=pool3, kernel=(2, 2), num_filter=128, name="conv4")
112 |     prelu4 = mx.symbol.LeakyReLU(data=conv4, act_type="prelu", name="prelu4")
113 | 
114 |     fc1 = mx.symbol.FullyConnected(data=prelu4, num_hidden=256, name="fc1")
115 |     prelu5 = mx.symbol.LeakyReLU(data=fc1, act_type="prelu", name="prelu5")
116 | 
117 |     fc2 = mx.symbol.FullyConnected(data=prelu5, num_hidden=2, name="fc2")
118 |     fc3 = mx.symbol.FullyConnected(data=prelu5, num_hidden=4, name="fc3")
119 | 
120 |     cls_prob = mx.symbol.SoftmaxOutput(data=fc2, label=label, use_ignore=True, out_grad=True, name="cls_prob")
121 |     if mode == "test":
122 |         bbox_pred = fc3
123 |         group = mx.symbol.Group([cls_prob, bbox_pred])
124 |     else:
125 |         bbox_pred = mx.symbol.LinearRegressionOutput(data=fc3, label=bbox_target,
126 |                                                      grad_scale=1, out_grad=True, name="bbox_pred")
127 |         out = mx.symbol.Custom(cls_prob=cls_prob, bbox_pred=bbox_pred, label=label,
128 |                                bbox_target=bbox_target, op_type='negativemining', name="negative_mining")
129 |         group = mx.symbol.Group([out])
130 |     return group
131 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import mxnet as mx
 3 | import argparse
 4 | import cv2
 5 | import time
 6 | from core.symbol import P_Net, R_Net, O_Net
 7 | from core.imdb import IMDB
 8 | from config import config
 9 | from core.loader import TestLoader
10 | from core.detector import Detector
11 | from core.fcn_detector import FcnDetector
12 | from tools.load_model import load_param
13 | from core.MtcnnDetector import MtcnnDetector
14 | 
15 | 
16 | def test_net(prefix, epoch, batch_size, ctx,
17 |              thresh=[0.6, 0.6, 0.7], min_face_size=24,
18 |              stride=2, slide_window=False):
19 | 
20 |     detectors = [None, None, None]
21 | 
22 |     # load pnet model
23 |     args, auxs = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
24 |     if slide_window:
25 |         PNet = Detector(P_Net("test"), 12, batch_size[0], ctx, args, auxs)
26 |     else:
27 |         PNet = FcnDetector(P_Net("test"), ctx, args, auxs)
28 |     detectors[0] = PNet
29 | 
30 |     # load rnet model
31 |     args, auxs = load_param(prefix[1], epoch[0], convert=True, ctx=ctx)
32 |     RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)
33 |     detectors[1] = RNet
34 | 
35 |     # load onet model
36 |     args, auxs = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
37 |     ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args, auxs)
38 |     detectors[2] = ONet
39 | 
40 |     mtcnn_detector = MtcnnDetector(detectors=detectors, ctx=ctx, min_face_size=min_face_size,
41 |                                    stride=stride, threshold=thresh, slide_window=slide_window)
42 | 
43 |     img = cv2.imread('test01.jpg')
44 |     t1 = time.time()
45 | 
46 |     boxes, boxes_c = mtcnn_detector.detect_pnet(img)
47 |     boxes, boxes_c = mtcnn_detector.detect_rnet(img, boxes_c)
48 |     boxes, boxes_c = mtcnn_detector.detect_onet(img, boxes_c)
49 | 
50 |     print 'time: ',time.time() - t1
51 | 
52 |     if boxes_c is not None:
53 |         draw = img.copy()
54 |         font = cv2.FONT_HERSHEY_SIMPLEX
55 |         for b in boxes_c:
56 |             cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (0, 255, 255), 1)
57 |             cv2.putText(draw, '%.3f'%b[4], (int(b[0]), int(b[1])), font, 0.4, (255, 255, 255), 1)
58 | 
59 |         cv2.imshow("detection result", draw)
60 |         cv2.waitKey(0)
61 | 
62 | 
63 | 
64 | def parse_args():
65 |     parser = argparse.ArgumentParser(description='Test mtcnn',
66 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
67 |     parser.add_argument('--prefix', dest='prefix', help='prefix of model name', nargs="+",
68 |                         default=['model/pnet', 'model/rnet', 'model/onet'], type=str)
69 |     parser.add_argument('--epoch', dest='epoch', help='epoch number of model to load', nargs="+",
70 |                         default=[16, 16, 16], type=int)
71 |     parser.add_argument('--batch_size', dest='batch_size', help='list of batch size used in prediction', nargs="+",
72 |                         default=[2048, 256, 16], type=int)
73 |     parser.add_argument('--thresh', dest='thresh', help='list of thresh for pnet, rnet, onet', nargs="+",
74 |                         default=[0.5, 0.5, 0.7], type=float)
75 |     parser.add_argument('--min_face', dest='min_face', help='minimum face size for detection',
76 |                         default=40, type=int)
77 |     parser.add_argument('--stride', dest='stride', help='stride of sliding window',
78 |                         default=2, type=int)
79 |     parser.add_argument('--sw', dest='slide_window', help='use sliding window in pnet', action='store_true')
80 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU device to train with',
81 |                         default=0, type=int)
82 |     args = parser.parse_args()
83 |     return args
84 | 
85 | if __name__ == '__main__':
86 |     args = parse_args()
87 |     print 'Called with argument:'
88 |     print args
89 |     ctx = mx.gpu(args.gpu_id)
90 |     if args.gpu_id == -1:
91 |         ctx = mx.cpu(0)
92 |     test_net(args.prefix, args.epoch, args.batch_size,
93 |              ctx, args.thresh, args.min_face,
94 |              args.stride, args.slide_window)
95 | 


--------------------------------------------------------------------------------
/example/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seanlinx/mtcnn/feacb3b6ae1bf177664f2a0b676ed4cfd3f5ca55/example/__init__.py


--------------------------------------------------------------------------------
/example/train.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import mxnet as mx
 3 | import core.metric as metric
 4 | from mxnet.module.module import Module
 5 | from core.loader import ImageLoader
 6 | from core.imdb import IMDB
 7 | from config import config
 8 | from tools.load_model import load_param
 9 | 
10 | def train_net(sym, prefix, ctx, pretrained, epoch, begin_epoch, end_epoch, imdb,
11 |               net=12, frequent=50, initialize=True, base_lr=0.01):
12 |     logger = logging.getLogger()
13 |     logger.setLevel(logging.INFO)
14 | 
15 |     train_data = ImageLoader(imdb, net, config.BATCH_SIZE, shuffle=True, ctx=ctx)
16 | 
17 |     if not initialize:
18 |         args, auxs = load_param(pretrained, epoch, convert=True)
19 | 
20 |     if initialize:
21 |         print "init weights and bias:"
22 |         data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
23 |         arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict)
24 |         arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
25 |         aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
26 |         init = mx.init.Xavier(factor_type="in", rnd_type='gaussian', magnitude=2)
27 |         args = dict()
28 |         auxs = dict()
29 | 
30 |         for k in sym.list_arguments():
31 |             if k in data_shape_dict:
32 |                 continue
33 | 
34 |             print 'init', k
35 | 
36 |             args[k] = mx.nd.zeros(arg_shape_dict[k])
37 |             init(k, args[k])
38 |             if k.startswith('fc'):
39 |                 args[k][:] /= 10
40 | 
41 |             '''
42 |             if k.endswith('weight'):
43 |                 if k.startswith('conv'):
44 |                     args[k] = mx.random.normal(loc=0, scale=0.001, shape=arg_shape_dict[k])
45 |                 else:
46 |                     args[k] = mx.random.normal(loc=0, scale=0.01, shape=arg_shape_dict[k])
47 |             else: # bias
48 |                 args[k] = mx.nd.zeros(shape=arg_shape_dict[k])
49 |             '''
50 | 
51 |         for k in sym.list_auxiliary_states():
52 |             auxs[k] = mx.nd.zeros()
53 |             init(k, auxs[k])
54 | 
55 |     lr_factor = 0.1
56 |     lr_epoch = config.LR_EPOCH
57 |     lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
58 |     lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
59 |     lr_iters = [int(epoch * len(imdb) / config.BATCH_SIZE) for epoch in lr_epoch_diff]
60 |     print 'lr', lr, 'lr_epoch', lr_epoch, 'lr_epoch_diff', lr_epoch_diff
61 |     lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
62 | 
63 |     data_names = [k[0] for k in train_data.provide_data]
64 |     label_names = [k[0] for k in train_data.provide_label]
65 | 
66 |     batch_end_callback = mx.callback.Speedometer(train_data.batch_size, frequent=frequent)
67 |     epoch_end_callback = mx.callback.do_checkpoint(prefix)
68 |     eval_metrics = mx.metric.CompositeEvalMetric()
69 |     metric1 = metric.Accuracy()
70 |     metric2 = metric.LogLoss()
71 |     metric3 = metric.BBOX_MSE()
72 |     for child_metric in [metric1, metric2, metric3]:
73 |         eval_metrics.add(child_metric)
74 |     optimizer_params = {'momentum': 0.9,
75 |                         'wd': 0.00001,
76 |                         'learning_rate': lr,
77 |                         'lr_scheduler': lr_scheduler,
78 |                         'rescale_grad': 1.0}
79 | 
80 |     mod = Module(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx)
81 |     mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
82 |             batch_end_callback=batch_end_callback,
83 |             optimizer='sgd', optimizer_params=optimizer_params,
84 |             arg_params=args, aux_params=auxs, begin_epoch=begin_epoch, num_epoch=end_epoch)
85 | 
86 | 


--------------------------------------------------------------------------------
/example/train_O_net.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import mxnet as mx
 3 | from core.imdb import IMDB
 4 | from train import train_net
 5 | from core.symbol import O_Net
 6 | 
 7 | def train_O_net(image_set, root_path, dataset_path, prefix, ctx,
 8 |                 pretrained, epoch, begin_epoch,
 9 |                 end_epoch, frequent, lr, resume):
10 |     imdb = IMDB("mtcnn", image_set, root_path, dataset_path)
11 |     gt_imdb = imdb.gt_imdb()
12 |     gt_imdb = imdb.append_flipped_images(gt_imdb)
13 |     sym = O_Net()
14 | 
15 |     train_net(sym, prefix, ctx, pretrained, epoch, begin_epoch, end_epoch, gt_imdb,
16 |               48, frequent, not resume, lr)
17 | 
18 | def parse_args():
19 |     parser = argparse.ArgumentParser(description='Train O_net(48-net)',
20 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
21 |     parser.add_argument('--image_set', dest='image_set', help='training set',
22 |                         default='train_48', type=str)
23 |     parser.add_argument('--root_path', dest='root_path', help='output data folder',
24 |                         default='data', type=str)
25 |     parser.add_argument('--dataset_path', dest='dataset_path', help='dataset folder',
26 |                         default='data/mtcnn', type=str)
27 |     parser.add_argument('--prefix', dest='prefix', help='new model prefix',
28 |                         default='model/onet', type=str)
29 |     parser.add_argument('--gpus', dest='gpu_ids', help='GPU device to train with',
30 |                         default='0', type=str)
31 |     parser.add_argument('--pretrained', dest='pretrained', help='pretrained prefix',
32 |                         default='model/onet', type=str)
33 |     parser.add_argument('--epoch', dest='epoch', help='load epoch',
34 |                         default=0, type=int)
35 |     parser.add_argument('--begin_epoch', dest='begin_epoch', help='begin epoch of training',
36 |                         default=0, type=int)
37 |     parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training',
38 |                         default=16, type=int)
39 |     parser.add_argument('--frequent', dest='frequent', help='frequency of logging',
40 |                         default=200, type=int)
41 |     parser.add_argument('--lr', dest='lr', help='learning rate',
42 |                         default=0.01, type=float)
43 |     parser.add_argument('--resume', dest='resume', help='continue training', action='store_true')
44 |     args = parser.parse_args()
45 |     return args
46 | 
47 | if __name__ == '__main__':
48 |     args = parse_args()
49 |     print 'Called with argument:'
50 |     print args
51 |     ctx = [mx.gpu(int(i)) for i in args.gpu_ids.split(',')]
52 |     train_O_net(args.image_set, args.root_path, args.dataset_path, args.prefix,
53 |                 ctx, args.pretrained, args.epoch, args.begin_epoch, 
54 |                 args.end_epoch, args.frequent, args.lr, args.resume)
55 | 


--------------------------------------------------------------------------------
/example/train_P_net.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import mxnet as mx
 3 | from core.imdb import IMDB
 4 | from train import train_net
 5 | from core.symbol import P_Net
 6 | 
 7 | def train_P_net(image_set, root_path, dataset_path, prefix, ctx,
 8 |                 pretrained, epoch, begin_epoch,
 9 |                 end_epoch, frequent, lr, resume):
10 |     imdb = IMDB("mtcnn", image_set, root_path, dataset_path)
11 |     gt_imdb = imdb.gt_imdb()
12 |     gt_imdb = imdb.append_flipped_images(gt_imdb)
13 |     sym = P_Net()
14 | 
15 |     train_net(sym, prefix, ctx, pretrained, epoch, begin_epoch, end_epoch, gt_imdb,
16 |               12, frequent, not resume, lr)
17 | 
18 | def parse_args():
19 |     parser = argparse.ArgumentParser(description='Train proposal net(12-net)',
20 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
21 |     parser.add_argument('--image_set', dest='image_set', help='training set',
22 |                         default='train_12', type=str)
23 |     parser.add_argument('--root_path', dest='root_path', help='output data folder',
24 |                         default='data', type=str)
25 |     parser.add_argument('--dataset_path', dest='dataset_path', help='dataset folder',
26 |                         default='data/mtcnn', type=str)
27 |     parser.add_argument('--prefix', dest='prefix', help='new model prefix',
28 |                         default='model/pnet', type=str)
29 |     parser.add_argument('--gpus', dest='gpu_ids', help='GPU device to train with',
30 |                         default='0', type=str)
31 |     parser.add_argument('--pretrained', dest='pretrained', help='pretrained prefix',
32 |                         default='model/pnet', type=str)
33 |     parser.add_argument('--epoch', dest='epoch', help='load epoch',
34 |                         default=0, type=int)
35 |     parser.add_argument('--begin_epoch', dest='begin_epoch', help='begin epoch of training',
36 |                         default=0, type=int)
37 |     parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training',
38 |                         default=16, type=int)
39 |     parser.add_argument('--frequent', dest='frequent', help='frequency of logging',
40 |                         default=200, type=int)
41 |     parser.add_argument('--lr', dest='lr', help='learning rate',
42 |                         default=0.01, type=float)
43 |     parser.add_argument('--resume', dest='resume', help='continue training', action='store_true')
44 |     args = parser.parse_args()
45 |     return args
46 | 
47 | if __name__ == '__main__':
48 |     args = parse_args()
49 |     print 'Called with argument:'
50 |     print args
51 |     ctx = [mx.gpu(int(i)) for i in args.gpu_ids.split(',')]
52 |     train_P_net(args.image_set, args.root_path, args.dataset_path, args.prefix, ctx,
53 |                 args.pretrained, args.epoch,
54 |                 args.begin_epoch, args.end_epoch, args.frequent, args.lr, args.resume)
55 | 


--------------------------------------------------------------------------------
/example/train_R_net.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import mxnet as mx
 3 | from core.imdb import IMDB
 4 | from train import train_net
 5 | from core.symbol import R_Net
 6 | 
 7 | def train_R_net(image_set, root_path, dataset_path, prefix, ctx,
 8 |                 pretrained, epoch, begin_epoch,
 9 |                 end_epoch, frequent, lr, resume):
10 |     imdb = IMDB("mtcnn", image_set, root_path, dataset_path)
11 |     gt_imdb = imdb.gt_imdb()
12 |     gt_imdb = imdb.append_flipped_images(gt_imdb)
13 |     sym = R_Net()
14 | 
15 |     train_net(sym, prefix, ctx, pretrained, epoch, begin_epoch, end_epoch, gt_imdb,
16 |               24, frequent, not resume, lr)
17 | 
18 | def parse_args():
19 |     parser = argparse.ArgumentParser(description='Train refine net(24-net)',
20 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
21 |     parser.add_argument('--image_set', dest='image_set', help='training set',
22 |                         default='train_24', type=str)
23 |     parser.add_argument('--root_path', dest='root_path', help='output data folder',
24 |                         default='data', type=str)
25 |     parser.add_argument('--dataset_path', dest='dataset_path', help='dataset folder',
26 |                         default='data/mtcnn', type=str)
27 |     parser.add_argument('--prefix', dest='prefix', help='new model prefix',
28 |                         default='model/rnet', type=str)
29 |     parser.add_argument('--gpus', dest='gpu_ids', help='GPU device to train with',
30 |                         default='0', type=str)
31 |     parser.add_argument('--pretrained', dest='pretrained', help='pretrained prefix',
32 |                         default='model/rnet', type=str)
33 |     parser.add_argument('--epoch', dest='epoch', help='load epoch',
34 |                         default=0, type=int)
35 |     parser.add_argument('--begin_epoch', dest='begin_epoch', help='begin epoch of training',
36 |                         default=0, type=int)
37 |     parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training',
38 |                         default=16, type=int)
39 |     parser.add_argument('--frequent', dest='frequent', help='frequency of logging',
40 |                         default=200, type=int)
41 |     parser.add_argument('--lr', dest='lr', help='learning rate',
42 |                         default=0.01, type=float)
43 |     parser.add_argument('--resume', dest='resume', help='continue training', action='store_true')
44 |     args = parser.parse_args()
45 |     return args
46 | 
47 | if __name__ == '__main__':
48 |     args = parse_args()
49 |     print 'Called with argument:'
50 |     print args
51 |     ctx = [mx.gpu(int(i)) for i in args.gpu_ids.split(',')]
52 |     train_R_net(args.image_set, args.root_path, args.dataset_path, args.prefix,
53 |                 ctx, args.pretrained, args.epoch, args.begin_epoch,
54 |                 args.end_epoch, args.frequent, args.lr, args.resume)
55 | 


--------------------------------------------------------------------------------
/fddb_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seanlinx/mtcnn/feacb3b6ae1bf177664f2a0b676ed4cfd3f5ca55/fddb_result.png


--------------------------------------------------------------------------------
/model/onet-0016.params:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seanlinx/mtcnn/feacb3b6ae1bf177664f2a0b676ed4cfd3f5ca55/model/onet-0016.params


--------------------------------------------------------------------------------
/model/pnet-0016.params:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seanlinx/mtcnn/feacb3b6ae1bf177664f2a0b676ed4cfd3f5ca55/model/pnet-0016.params


--------------------------------------------------------------------------------
/model/rnet-0016.params:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seanlinx/mtcnn/feacb3b6ae1bf177664f2a0b676ed4cfd3f5ca55/model/rnet-0016.params


--------------------------------------------------------------------------------
/mxnet_diff.patch:
--------------------------------------------------------------------------------
 1 | diff --git a/src/operator/regression_output-inl.h b/src/operator/regression_output-inl.h
 2 | index d70066d..acebc2c 100644
 3 | --- a/src/operator/regression_output-inl.h
 4 | +++ b/src/operator/regression_output-inl.h
 5 | @@ -25,9 +25,12 @@ enum RegressionOutputType {kLinear, kLogistic, kMAE};
 6 |  
 7 |  struct RegressionOutputParam : public dmlc::Parameter<RegressionOutputParam> {
 8 |    float grad_scale;
 9 | +  bool out_grad;
10 |    DMLC_DECLARE_PARAMETER(RegressionOutputParam) {
11 |      DMLC_DECLARE_FIELD(grad_scale).set_default(1.0f)
12 |      .describe("Scale the gradient by a float factor");
13 | +    DMLC_DECLARE_FIELD(out_grad).set_default(false)
14 | +    .describe("Apply weighting from output gradient");
15 |    };
16 |  };
17 |  
18 | @@ -75,6 +78,10 @@ class RegressionOutputOp : public Operator {
19 |        .get_with_shape<xpu, 2, real_t>(out.shape_, s);
20 |      Assign(grad, req[reg_enum::kData], param_.grad_scale/num_output*
21 |        F<BackwardOp>(out, reshape(label, grad.shape_)));
22 | +    if (param_.out_grad) {
23 | +      Tensor<xpu, 2> ograd = out_grad[reg_enum::kOut].FlatTo2D<xpu, real_t>(s);
24 | +      grad *= ograd;
25 | +    }
26 |    }
27 |  
28 |   private:
29 | @@ -148,7 +155,12 @@ class RegressionOutputProp : public OperatorProperty {
30 |      const std::vector<int> &out_grad,
31 |      const std::vector<int> &in_data,
32 |      const std::vector<int> &out_data) const override {
33 | -    return {in_data[reg_enum::kLabel], out_data[reg_enum::kOut]};
34 | +    if (param_.out_grad) {
35 | +      return {in_data[reg_enum::kLabel], out_data[reg_enum::kOut],
36 | +                out_grad[reg_enum::kOut]};
37 | +    } else {
38 | +      return {in_data[reg_enum::kLabel], out_data[reg_enum::kOut]};
39 | +    }
40 |    }
41 |  
42 |    std::vector<std::pair<int, void*> > BackwardInplaceOption(
43 | 


--------------------------------------------------------------------------------
/prepare_data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seanlinx/mtcnn/feacb3b6ae1bf177664f2a0b676ed4cfd3f5ca55/prepare_data/__init__.py


--------------------------------------------------------------------------------
/prepare_data/gen_hard_example.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import mxnet as mx
  3 | import argparse
  4 | import os
  5 | import cPickle
  6 | import cv2
  7 | from core.symbol import P_Net, R_Net, O_Net
  8 | from core.imdb import IMDB
  9 | from config import config
 10 | from core.loader import TestLoader
 11 | from core.detector import Detector
 12 | from core.fcn_detector import FcnDetector
 13 | from tools.load_model import load_param
 14 | from core.MtcnnDetector import MtcnnDetector
 15 | from utils import *
 16 | 
 17 | def save_hard_example(net):
 18 | 
 19 |     image_dir = "./data/wider/images"
 20 |     neg_save_dir = "/data3/seanlx/mtcnn1/24/negative"
 21 |     pos_save_dir = "/data3/seanlx/mtcnn1/24/positive"
 22 |     part_save_dir = "/data3/seanlx/mtcnn1/24/part"
 23 | 
 24 |     # load ground truth from annotation file
 25 |     # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image
 26 |     anno_file = './prepare_data/wider_annotations/anno.txt'
 27 |     with open(anno_file, 'r') as f:
 28 |         annotations = f.readlines()
 29 | 
 30 |     if net == "rnet":
 31 |         image_size = 24
 32 |     if net == "onet":
 33 |         image_size = 48
 34 | 
 35 |     im_idx_list = list()
 36 |     gt_boxes_list = list()
 37 |     num_of_images = len(annotations)
 38 |     print "processing %d images in total"%num_of_images
 39 | 
 40 |     for annotation in annotations:
 41 |         annotation = annotation.strip().split(' ')
 42 |         im_idx = annotation[0]
 43 | 
 44 |         boxes = map(float, annotation[1:])
 45 |         boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4)
 46 |         im_idx_list.append(im_idx)
 47 |         gt_boxes_list.append(boxes)
 48 | 
 49 |     save_path = "./prepare_data/%s"%net
 50 |     f1 = open(os.path.join(save_path, 'pos_%d.txt'%image_size), 'w')
 51 |     f2 = open(os.path.join(save_path, 'neg_%d.txt'%image_size), 'w')
 52 |     f3 = open(os.path.join(save_path, 'part_%d.txt'%image_size), 'w')
 53 | 
 54 |     det_boxes = cPickle.load(open(os.path.join(save_path, 'detections.pkl'), 'r'))
 55 |     assert len(det_boxes) == num_of_images, "incorrect detections or ground truths"
 56 | 
 57 |     # index of neg, pos and part face, used as their image names
 58 |     n_idx = 0
 59 |     p_idx = 0
 60 |     d_idx = 0
 61 |     image_done = 0
 62 |     for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list):
 63 |         if image_done % 100 == 0:
 64 |             print "%d images done"%image_done
 65 |         image_done += 1
 66 | 
 67 |         if dets.shape[0]==0:
 68 |             continue
 69 |         img = cv2.imread(os.path.join(image_dir, im_idx+'.jpg'))
 70 |         dets = convert_to_square(dets)
 71 |         dets[:, 0:4] = np.round(dets[:, 0:4])
 72 | 
 73 |         for box in dets:
 74 |             x_left, y_top, x_right, y_bottom, _ = box.astype(int)
 75 |             width = x_right - x_left + 1
 76 |             height = y_bottom - y_top + 1
 77 | 
 78 |             # ignore box that is too small or beyond image border
 79 |             if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1:
 80 |                 continue
 81 | 
 82 |             # compute intersection over union(IoU) between current box and all gt boxes
 83 |             Iou = IoU(box, gts)
 84 |             cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]
 85 |             resized_im = cv2.resize(cropped_im, (image_size, image_size),
 86 |                                     interpolation=cv2.INTER_LINEAR)
 87 | 
 88 |             # save negative images and write label
 89 |             if np.max(Iou) < 0.3:
 90 |                 # Iou with all gts must below 0.3
 91 |                 save_file = os.path.join(neg_save_dir, "%s.jpg"%n_idx)
 92 |                 f2.write("%s/negative/%s"%(image_size, n_idx) + ' 0\n')
 93 |                 cv2.imwrite(save_file, resized_im)
 94 |                 n_idx += 1
 95 |             else:
 96 |                 # find gt_box with the highest iou
 97 |                 idx = np.argmax(Iou)
 98 |                 assigned_gt = gts[idx]
 99 |                 x1, y1, x2, y2 = assigned_gt
100 | 
101 |                 # compute bbox reg label
102 |                 offset_x1 = (x1 - x_left) / float(width)
103 |                 offset_y1 = (y1 - y_top) / float(height)
104 |                 offset_x2 = (x2 - x_right) / float(width)
105 |                 offset_y2 = (y2 - y_bottom ) / float(height)
106 | 
107 |                 # save positive and part-face images and write labels
108 |                 if np.max(Iou) >= 0.65:
109 |                     save_file = os.path.join(pos_save_dir, "%s.jpg"%p_idx)
110 |                     f1.write("%s/positive/%s"%(image_size, p_idx) + ' 1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2))
111 |                     cv2.imwrite(save_file, resized_im)
112 |                     p_idx += 1
113 | 
114 |                 elif np.max(Iou) >= 0.4:
115 |                     save_file = os.path.join(part_save_dir, "%s.jpg"%d_idx)
116 |                     f3.write("%s/part/%s"%(image_size, d_idx) + ' -1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2))
117 |                     cv2.imwrite(save_file, resized_im)
118 |                     d_idx += 1
119 |     f1.close()
120 |     f2.close()
121 |     f3.close()
122 | 
123 | def test_net(root_path, dataset_path, image_set, prefix, epoch,
124 |              batch_size, ctx, test_mode="rnet",
125 |              thresh=[0.6, 0.6, 0.7], min_face_size=24,
126 |              stride=2, slide_window=False, shuffle=False, vis=False):
127 | 
128 |     detectors = [None, None, None]
129 | 
130 |     # load pnet model
131 |     args, auxs = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
132 |     if slide_window:
133 |         PNet = Detector(P_Net("test"), 12, batch_size[0], ctx, args, auxs)
134 |     else:
135 |         PNet = FcnDetector(P_Net("test"), ctx, args, auxs)
136 |     detectors[0] = PNet
137 | 
138 |     # load rnet model
139 |     if test_mode in ["rnet", "onet"]:
140 |         args, auxs = load_param(prefix[1], epoch[0], convert=True, ctx=ctx)
141 |         RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)
142 |         detectors[1] = RNet
143 | 
144 |     # load onet model
145 |     if test_mode == "onet":
146 |         args, auxs = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
147 |         ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args, auxs)
148 |         detectors[2] = ONet
149 | 
150 |     mtcnn_detector = MtcnnDetector(detectors=detectors, ctx=ctx, min_face_size=min_face_size,
151 |                                    stride=stride, threshold=thresh, slide_window=slide_window)
152 | 
153 | 
154 |     imdb = IMDB("wider", image_set, root_path, dataset_path, 'test')
155 |     gt_imdb = imdb.gt_imdb()
156 | 
157 |     test_data = TestLoader(gt_imdb)
158 |     detections = mtcnn_detector.detect_face(imdb, test_data, vis=vis)
159 | 
160 |     if test_mode == "pnet":
161 |         net = "rnet"
162 |     elif test_mode == "rnet":
163 |         net = "onet"
164 | 
165 |     save_path = "./prepare_data/%s"%net
166 |     if not os.path.exists(save_path):
167 |         os.mkdir(save_path)
168 |     save_file = os.path.join(save_path, "detections.pkl")
169 |     with open(save_file, 'wb') as f:
170 |         cPickle.dump(detections, f, cPickle.HIGHEST_PROTOCOL)
171 | 
172 |     save_hard_example(net)
173 | 
174 | 
175 | def parse_args():
176 |     parser = argparse.ArgumentParser(description='Test mtcnn',
177 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
178 |     parser.add_argument('--root_path', dest='root_path', help='output data folder',
179 |                         default='data', type=str)
180 |     parser.add_argument('--dataset_path', dest='dataset_path', help='dataset folder',
181 |                         default='data/wider', type=str)
182 |     parser.add_argument('--image_set', dest='image_set', help='image set',
183 |                         default='train', type=str)
184 |     parser.add_argument('--test_mode', dest='test_mode', help='test net type, can be pnet, rnet or onet',
185 |                         default='pnet', type=str)
186 |     parser.add_argument('--prefix', dest='prefix', help='prefix of model name', nargs="+",
187 |                         default=['model/pnet', 'model/rnet', 'model/onet'], type=str)
188 |     parser.add_argument('--epoch', dest='epoch', help='epoch number of model to load', nargs="+",
189 |                         default=[16, 16, 16], type=int)
190 |     parser.add_argument('--batch_size', dest='batch_size', help='list of batch size used in prediction', nargs="+",
191 |                         default=[2048, 256, 16], type=int)
192 |     parser.add_argument('--thresh', dest='thresh', help='list of thresh for pnet, rnet, onet', nargs="+",
193 |                         default=[0.6, 0.7, 0.7], type=float)
194 |     parser.add_argument('--min_face', dest='min_face', help='minimum face size for detection',
195 |                         default=24, type=int)
196 |     parser.add_argument('--stride', dest='stride', help='stride of sliding window',
197 |                         default=2, type=int)
198 |     parser.add_argument('--sw', dest='slide_window', help='use sliding window in pnet', action='store_true')
199 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU device to train with',
200 |                         default=0, type=int)
201 |     parser.add_argument('--shuffle', dest='shuffle', help='shuffle data on visualization', action='store_true')
202 |     parser.add_argument('--vis', dest='vis', help='turn on visualization', action='store_true')
203 |     args = parser.parse_args()
204 |     return args
205 | 
206 | if __name__ == '__main__':
207 |     args = parse_args()
208 |     print 'Called with argument:'
209 |     print args
210 |     ctx = mx.gpu(args.gpu_id)
211 |     if args.gpu_id == -1:
212 |         ctx = mx.cpu(0)
213 |     test_net(args.root_path, args.dataset_path, args.image_set, args.prefix,
214 |              args.epoch, args.batch_size, ctx, args.test_mode,
215 |              args.thresh, args.min_face, args.stride,
216 |              args.slide_window, args.shuffle, args.vis)
217 | 


--------------------------------------------------------------------------------
/prepare_data/gen_imglist.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import numpy.random as npr
 3 | 
 4 | size = 12
 5 | 
 6 | if size == 12:
 7 |     net = "pnet"
 8 | elif size == 24:
 9 |     net = "rnet"
10 | elif size == 48:
11 |     net = "onet"
12 | 
13 | with open('%s/pos_%s.txt'%(net, size), 'r') as f:
14 |     pos = f.readlines()
15 | 
16 | with open('%s/neg_%s.txt'%(net, size), 'r') as f:
17 |     neg = f.readlines()
18 | 
19 | with open('%s/part_%s.txt'%(net, size), 'r') as f:
20 |     part = f.readlines()
21 | 
22 | 
23 | with open("%s/train_%s.txt"%(net, size), "w") as f:
24 |     f.writelines(pos)
25 |     neg_keep = npr.choice(len(neg), size=600000, replace=False)
26 |     part_keep = npr.choice(len(part), size=300000, replace=False)
27 |     for i in neg_keep:
28 |         f.write(neg[i])
29 |     for i in part_keep:
30 |         f.write(part[i])
31 | 


--------------------------------------------------------------------------------
/prepare_data/gen_pnet_data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import os
  4 | import numpy.random as npr
  5 | from utils import IoU
  6 | 
  7 | anno_file = "./wider_annotations/anno.txt"
  8 | im_dir = "/home/seanlx/Dataset/wider_face/WIDER_train/images"
  9 | neg_save_dir = "/data3/seanlx/mtcnn1/12/negative"
 10 | pos_save_dir = "/data3/seanlx/mtcnn1/12/positive"
 11 | part_save_dir = "/data3/seanlx/mtcnn1/12/part"
 12 | 
 13 | save_dir = "./pnet"
 14 | if not os.path.exists(save_dir):
 15 |     os.mkdir(save_dir)
 16 | f1 = open(os.path.join(save_dir, 'pos_12.txt'), 'w')
 17 | f2 = open(os.path.join(save_dir, 'neg_12.txt'), 'w')
 18 | f3 = open(os.path.join(save_dir, 'part_12.txt'), 'w')
 19 | 
 20 | with open(anno_file, 'r') as f:
 21 |     annotations = f.readlines()
 22 | 
 23 | num = len(annotations)
 24 | print "%d pics in total" % num
 25 | p_idx = 0 # positive
 26 | n_idx = 0 # negative
 27 | d_idx = 0 # dont care
 28 | idx = 0
 29 | box_idx = 0
 30 | for annotation in annotations:
 31 |     annotation = annotation.strip().split(' ')
 32 |     im_path = annotation[0]
 33 |     bbox = map(float, annotation[1:])
 34 |     boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4)
 35 |     img = cv2.imread(os.path.join(im_dir, im_path + '.jpg'))
 36 |     idx += 1
 37 |     if idx % 100 == 0:
 38 |         print idx, "images done"
 39 | 
 40 |     height, width, channel = img.shape
 41 | 
 42 |     neg_num = 0
 43 |     while neg_num < 50:
 44 |         size = npr.randint(12, min(width, height) / 2)
 45 |         nx = npr.randint(0, width - size)
 46 |         ny = npr.randint(0, height - size)
 47 |         crop_box = np.array([nx, ny, nx + size, ny + size])
 48 | 
 49 |         Iou = IoU(crop_box, boxes)
 50 | 
 51 |         cropped_im = img[ny : ny + size, nx : nx + size, :]
 52 |         resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)
 53 | 
 54 |         if np.max(Iou) < 0.3:
 55 |             # Iou with all gts must below 0.3
 56 |             save_file = os.path.join(neg_save_dir, "%s.jpg"%n_idx)
 57 |             f2.write("12/negative/%s"%n_idx + ' 0\n')
 58 |             cv2.imwrite(save_file, resized_im)
 59 |             n_idx += 1
 60 |             neg_num += 1
 61 | 
 62 | 
 63 |     for box in boxes:
 64 |         # box (x_left, y_top, x_right, y_bottom)
 65 |         x1, y1, x2, y2 = box
 66 |         w = x2 - x1 + 1
 67 |         h = y2 - y1 + 1
 68 | 
 69 |         # ignore small faces
 70 |         # in case the ground truth boxes of small faces are not accurate
 71 |         if max(w, h) < 40 or x1 < 0 or y1 < 0:
 72 |             continue
 73 | 
 74 |         # generate negative examples that have overlap with gt
 75 |         for i in range(5):
 76 |             size = npr.randint(12,  min(width, height) / 2)
 77 |             # delta_x and delta_y are offsets of (x1, y1)
 78 |             delta_x = npr.randint(max(-size, -x1), w)
 79 |             delta_y = npr.randint(max(-size, -y1), h)
 80 |             nx1 = max(0, x1 + delta_x)
 81 |             ny1 = max(0, y1 + delta_y)
 82 |             if nx1 + size > width or ny1 + size > height:
 83 |                 continue
 84 |             crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size])
 85 |             Iou = IoU(crop_box, boxes)
 86 | 
 87 |             cropped_im = img[ny1 : ny1 + size, nx1 : nx1 + size, :]
 88 |             resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)
 89 | 
 90 |             if np.max(Iou) < 0.3:
 91 |                 # Iou with all gts must below 0.3
 92 |                 save_file = os.path.join(neg_save_dir, "%s.jpg"%n_idx)
 93 |                 f2.write("12/negative/%s"%n_idx + ' 0\n')
 94 |                 cv2.imwrite(save_file, resized_im)
 95 |                 n_idx += 1
 96 | 
 97 |         # generate positive examples and part faces
 98 |         for i in range(20):
 99 |             size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h)))
100 | 
101 |             # delta here is the offset of box center
102 |             delta_x = npr.randint(-w * 0.2, w * 0.2)
103 |             delta_y = npr.randint(-h * 0.2, h * 0.2)
104 | 
105 |             nx1 = max(x1 + w / 2 + delta_x - size / 2, 0)
106 |             ny1 = max(y1 + h / 2 + delta_y - size / 2, 0)
107 |             nx2 = nx1 + size
108 |             ny2 = ny1 + size
109 | 
110 |             if nx2 > width or ny2 > height:
111 |                 continue
112 |             crop_box = np.array([nx1, ny1, nx2, ny2])
113 | 
114 |             offset_x1 = (x1 - nx1) / float(size)
115 |             offset_y1 = (y1 - ny1) / float(size)
116 |             offset_x2 = (x2 - nx2) / float(size)
117 |             offset_y2 = (y2 - ny2) / float(size)
118 | 
119 |             cropped_im = img[ny1 : ny2, nx1 : nx2, :]
120 |             resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)
121 | 
122 |             box_ = box.reshape(1, -1)
123 |             if IoU(crop_box, box_) >= 0.65:
124 |                 save_file = os.path.join(pos_save_dir, "%s.jpg"%p_idx)
125 |                 f1.write("12/positive/%s"%p_idx + ' 1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2))
126 |                 cv2.imwrite(save_file, resized_im)
127 |                 p_idx += 1
128 |             elif IoU(crop_box, box_) >= 0.4:
129 |                 save_file = os.path.join(part_save_dir, "%s.jpg"%d_idx)
130 |                 f3.write("12/part/%s"%d_idx + ' -1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2))
131 |                 cv2.imwrite(save_file, resized_im)
132 |                 d_idx += 1
133 |         box_idx += 1
134 |         print "%s images done, pos: %s part: %s neg: %s"%(idx, p_idx, d_idx, n_idx)
135 | 
136 | f1.close()
137 | f2.close()
138 | f3.close()
139 | 


--------------------------------------------------------------------------------
/prepare_data/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def IoU(box, boxes):
 4 |     """Compute IoU between detect box and gt boxes
 5 | 
 6 |     Parameters:
 7 |     ----------
 8 |     box: numpy array , shape (5, ): x1, y1, x2, y2, score
 9 |         input box
10 |     boxes: numpy array, shape (n, 4): x1, y1, x2, y2
11 |         input ground truth boxes
12 | 
13 |     Returns:
14 |     -------
15 |     ovr: numpy.array, shape (n, )
16 |         IoU
17 |     """
18 |     box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1)
19 |     area = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)
20 |     xx1 = np.maximum(box[0], boxes[:, 0])
21 |     yy1 = np.maximum(box[1], boxes[:, 1])
22 |     xx2 = np.minimum(box[2], boxes[:, 2])
23 |     yy2 = np.minimum(box[3], boxes[:, 3])
24 | 
25 |     # compute the width and height of the bounding box
26 |     w = np.maximum(0, xx2 - xx1 + 1)
27 |     h = np.maximum(0, yy2 - yy1 + 1)
28 | 
29 |     inter = w * h
30 |     ovr = inter / (box_area + area - inter)
31 |     return ovr
32 | 
33 | 
34 | def convert_to_square(bbox):
35 |     """Convert bbox to square
36 | 
37 |     Parameters:
38 |     ----------
39 |     bbox: numpy array , shape n x 5
40 |         input bbox
41 | 
42 |     Returns:
43 |     -------
44 |     square bbox
45 |     """
46 |     square_bbox = bbox.copy()
47 | 
48 |     h = bbox[:, 3] - bbox[:, 1] + 1
49 |     w = bbox[:, 2] - bbox[:, 0] + 1
50 |     max_side = np.maximum(h,w)
51 |     square_bbox[:, 0] = bbox[:, 0] + w*0.5 - max_side*0.5
52 |     square_bbox[:, 1] = bbox[:, 1] + h*0.5 - max_side*0.5
53 |     square_bbox[:, 2] = square_bbox[:, 0] + max_side - 1
54 |     square_bbox[:, 3] = square_bbox[:, 1] + max_side - 1
55 |     return square_bbox
56 | 


--------------------------------------------------------------------------------
/prepare_data/wider_annotations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seanlinx/mtcnn/feacb3b6ae1bf177664f2a0b676ed4cfd3f5ca55/prepare_data/wider_annotations/__init__.py


--------------------------------------------------------------------------------
/prepare_data/wider_annotations/readme.txt:
--------------------------------------------------------------------------------
 1 | Attached the mappings between attribute names and label values.
 2 | 
 3 | blur:
 4 |   clear->0
 5 |   normal blur->1
 6 |   heavy blur->2
 7 | 
 8 | expression:
 9 |   typical expression->0
10 |   exaggerate expression->1
11 | 
12 | illumination:
13 |   normal illumination->0
14 |   extreme illumination->1
15 | 
16 | occlusion:
17 |   no occlusion->0
18 |   partial occlusion->1
19 |   heavy occlusion->2
20 | 
21 | pose:
22 |   typical pose->0
23 |   atypical pose->1
24 | 
25 | invalid:
26 |   false->0(valid image)
27 |   true->1(invalid image)


--------------------------------------------------------------------------------
/prepare_data/wider_annotations/transform.m:
--------------------------------------------------------------------------------
1 | writeLabel('train');
2 | 


--------------------------------------------------------------------------------
/prepare_data/wider_annotations/transform.py:
--------------------------------------------------------------------------------
 1 | from wider_loader import WIDER
 2 | import cv2
 3 | import time
 4 | 
 5 | #wider face original images path
 6 | path_to_image = '/home/seanlx/Dataset/wider_face/WIDER_train/images'
 7 | 
 8 | #matlab file path
 9 | file_to_label = './wider_face_train.mat'
10 | 
11 | #target file path
12 | target_file = './anno.txt'
13 | 
14 | wider = WIDER(file_to_label, path_to_image)
15 | 
16 | 
17 | line_count = 0
18 | box_count = 0
19 | 
20 | print 'start transforming....'
21 | t = time.time()
22 | 
23 | with open(target_file, 'w+') as f:
24 |     # press ctrl-C to stop the process
25 |     for data in wider.next():
26 |         line = []
27 |         line.append(str(data.image_name))
28 |         line_count += 1
29 |         for i,box in enumerate(data.bboxes):
30 |             box_count += 1
31 |             for j,bvalue in enumerate(box):
32 |                 line.append(str(bvalue))
33 | 
34 |         line.append('\n')
35 | 
36 |         line_str = ' '.join(line)
37 |         f.write(line_str)
38 | 
39 | st = time.time()-t
40 | print 'end transforming'
41 | 
42 | print 'spend time:%ld'%st
43 | print 'total line(images):%d'%line_count
44 | print 'total boxes(faces):%d'%box_count
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/prepare_data/wider_annotations/wider_face_test.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seanlinx/mtcnn/feacb3b6ae1bf177664f2a0b676ed4cfd3f5ca55/prepare_data/wider_annotations/wider_face_test.mat


--------------------------------------------------------------------------------
/prepare_data/wider_annotations/wider_face_train.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seanlinx/mtcnn/feacb3b6ae1bf177664f2a0b676ed4cfd3f5ca55/prepare_data/wider_annotations/wider_face_train.mat


--------------------------------------------------------------------------------
/prepare_data/wider_annotations/wider_face_val.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seanlinx/mtcnn/feacb3b6ae1bf177664f2a0b676ed4cfd3f5ca55/prepare_data/wider_annotations/wider_face_val.mat


--------------------------------------------------------------------------------
/prepare_data/wider_annotations/wider_loader.py:
--------------------------------------------------------------------------------
 1 | import h5py
 2 | import os
 3 | 
 4 | 
 5 | class DATA(object):
 6 |     def __init__(self, image_name, bboxes):
 7 |         self.image_name = image_name
 8 |         self.bboxes = bboxes
 9 | 
10 | 
11 | class WIDER(object):
12 |     def __init__(self, file_to_label, path_to_image):
13 |         self.file_to_label = file_to_label
14 |         self.path_to_image = path_to_image
15 | 
16 |         self.f = h5py.File(file_to_label, 'r')
17 |         self.event_list = self.f.get('event_list')
18 |         self.file_list = self.f.get('file_list')
19 |         self.face_bbx_list = self.f.get('face_bbx_list')
20 | 
21 |     def next(self):
22 | 
23 |         for event_idx, event in enumerate(self.event_list.value[0]):
24 |             directory = self.f[event].value.tostring().decode('utf-16')
25 |             for im_idx, im in enumerate(
26 |                     self.f[self.file_list.value[0][event_idx]].value[0]):
27 | 
28 |                 im_name = self.f[im].value.tostring().decode('utf-16')
29 |                 face_bbx = self.f[self.f[self.face_bbx_list.value
30 |                                   [0][event_idx]].value[0][im_idx]].value
31 | 
32 |                 bboxes = []
33 | 
34 |                 for i in range(face_bbx.shape[1]):
35 |                     xmin = int(face_bbx[0][i])
36 |                     ymin = int(face_bbx[1][i])
37 |                     xmax = int(face_bbx[0][i] + face_bbx[2][i])
38 |                     ymax = int(face_bbx[1][i] + face_bbx[3][i])
39 |                     bboxes.append((xmin, ymin, xmax, ymax))
40 | 
41 |                 yield DATA(os.path.join(self.path_to_image, directory,
42 |                            im_name + '.jpg'), bboxes)
43 | 


--------------------------------------------------------------------------------
/prepare_data/wider_annotations/writeLabel.m:
--------------------------------------------------------------------------------
 1 | function writeLabel(image_set)
 2 | 
 3 | f=load(sprintf('wider_face_%s.mat', image_set));
 4 | fid = fopen(sprintf('%s.txt', image_set), 'a');
 5 | for i = 1 : length(f.event_list)
 6 |     for j = 1 : length(f.file_list{i})
 7 |         folder_name = f.event_list{i};
 8 |         file_name = f.file_list{i}{j};
 9 |         face_bboxes = f.face_bbx_list{i}{j};
10 |         fprintf(fid, '%s/%s ', folder_name, file_name);
11 |         for k = 1 : size(face_bboxes, 1)
12 |             bbox = face_bboxes(k, :);
13 |             bbox(3) = bbox(1) + bbox(3);
14 |             bbox(4) = bbox(2) + bbox(4);      
15 |             for id = 1:4
16 |                 fprintf(fid, '%.2f ', bbox(id));
17 |             end
18 |         end
19 |         fprintf(fid, '\n');
20 |     end
21 | end            
22 | fclose(fid);        
23 |         
24 | 


--------------------------------------------------------------------------------
/test01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seanlinx/mtcnn/feacb3b6ae1bf177664f2a0b676ed4cfd3f5ca55/test01.jpg


--------------------------------------------------------------------------------
/test_fddb.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import mxnet as mx
 3 | import argparse
 4 | from core.symbol import P_Net, R_Net, O_Net
 5 | from core.imdb import IMDB
 6 | from config import config
 7 | from core.loader import TestLoader
 8 | from core.detector import Detector
 9 | from core.fcn_detector import FcnDetector
10 | from tools.load_model import load_param
11 | from core.MtcnnDetector import MtcnnDetector
12 | 
13 | 
14 | def test_net(root_path, dataset_path, prefix, epoch,
15 |              batch_size, ctx, test_mode="onet",
16 |              thresh=[0.6, 0.6, 0.7], min_face_size=24,
17 |              stride=2, slide_window=False, shuffle=False, vis=False):
18 | 
19 |     detectors = [None, None, None]
20 | 
21 |     # load pnet model
22 |     args, auxs = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
23 |     if slide_window:
24 |         PNet = Detector(P_Net("test"), 12, batch_size[0], ctx, args, auxs)
25 |     else:
26 |         PNet = FcnDetector(P_Net("test"), ctx, args, auxs)
27 |     detectors[0] = PNet
28 | 
29 |     # load rnet model
30 |     if test_mode in ["rnet", "onet"]:
31 |         args, auxs = load_param(prefix[1], epoch[0], convert=True, ctx=ctx)
32 |         RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)
33 |         detectors[1] = RNet
34 | 
35 |     # load onet model
36 |     if test_mode == "onet":
37 |         args, auxs = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
38 |         ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args, auxs)
39 |         detectors[2] = ONet
40 | 
41 |     mtcnn_detector = MtcnnDetector(detectors=detectors, ctx=ctx, min_face_size=min_face_size,
42 |                                    stride=stride, threshold=thresh, slide_window=slide_window)
43 | 
44 |     for i in range(1,11):
45 |         image_set = "fold-" + str(i).zfill(2)
46 |         imdb = IMDB("fddb", image_set, root_path, dataset_path, 'test')
47 |         gt_imdb = imdb.gt_imdb()
48 | 
49 |         test_data = TestLoader(gt_imdb)
50 |         all_boxes = mtcnn_detector.detect_face(imdb, test_data, vis=vis)
51 |         imdb.write_results(all_boxes)
52 | 
53 | 
54 | 
55 | def parse_args():
56 |     parser = argparse.ArgumentParser(description='Test mtcnn',
57 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
58 |     parser.add_argument('--root_path', dest='root_path', help='output data folder',
59 |                         default='data', type=str)
60 |     parser.add_argument('--dataset_path', dest='dataset_path', help='dataset folder',
61 |                         default='data/fddb', type=str)
62 |     parser.add_argument('--test_mode', dest='test_mode', help='test net type, can be pnet, rnet or onet',
63 |                         default='onet', type=str)
64 |     parser.add_argument('--prefix', dest='prefix', help='prefix of model name', nargs="+",
65 |                         default=['model/pnet', 'model/rnet', 'model/onet'], type=str)
66 |     parser.add_argument('--epoch', dest='epoch', help='epoch number of model to load', nargs="+",
67 |                         default=[16, 16, 16], type=int)
68 |     parser.add_argument('--batch_size', dest='batch_size', help='list of batch size used in prediction', nargs="+",
69 |                         default=[2048, 256, 16], type=int)
70 |     parser.add_argument('--thresh', dest='thresh', help='list of thresh for pnet, rnet, onet', nargs="+",
71 |                         default=[0.6, 0.7, 0.7], type=float)
72 |     parser.add_argument('--min_face', dest='min_face', help='minimum face size for detection',
73 |                         default=24, type=int)
74 |     parser.add_argument('--stride', dest='stride', help='stride of sliding window',
75 |                         default=2, type=int)
76 |     parser.add_argument('--sw', dest='slide_window', help='use sliding window in pnet', action='store_true')
77 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU device to train with',
78 |                         default=0, type=int)
79 |     parser.add_argument('--shuffle', dest='shuffle', help='shuffle data on visualization', action='store_true')
80 |     parser.add_argument('--vis', dest='vis', help='turn on visualization', action='store_true')
81 |     args = parser.parse_args()
82 |     return args
83 | 
84 | if __name__ == '__main__':
85 |     args = parse_args()
86 |     print 'Called with argument:'
87 |     print args
88 |     ctx = mx.gpu(args.gpu_id)
89 |     if args.gpu_id == -1:
90 |         ctx = mx.cpu(0)
91 |     test_net(args.root_path, args.dataset_path, args.prefix,
92 |              args.epoch, args.batch_size, ctx, args.test_mode,
93 |              args.thresh, args.min_face, args.stride,
94 |              args.slide_window, args.shuffle, args.vis)
95 | 


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seanlinx/mtcnn/feacb3b6ae1bf177664f2a0b676ed4cfd3f5ca55/tools/__init__.py


--------------------------------------------------------------------------------
/tools/image_processing.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def transform(im):
 4 |     """
 5 |     transform into mxnet tensor
 6 |     substract pixel size and transform to correct format
 7 |     :param im: [height, width, channel] in BGR
 8 |     :return: [batch, channel, height, width]
 9 |     """
10 |     im_tensor = im.transpose(2, 0, 1)
11 |     im_tensor = im_tensor[np.newaxis, :]
12 |     im_tensor = (im_tensor - 127.5)*0.0078125
13 |     return im_tensor
14 | 


--------------------------------------------------------------------------------
/tools/load_model.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | 
 3 | 
 4 | def load_checkpoint(prefix, epoch):
 5 |     """
 6 |     Load model checkpoint from file.
 7 |     :param prefix: Prefix of model name.
 8 |     :param epoch: Epoch number of model we would like to load.
 9 |     :return: (arg_params, aux_params)
10 |     arg_params : dict of str to NDArray
11 |         Model parameter, dict of name to NDArray of net's weights.
12 |     aux_params : dict of str to NDArray
13 |         Model parameter, dict of name to NDArray of net's auxiliary states.
14 |     """
15 |     save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch))
16 |     arg_params = {}
17 |     aux_params = {}
18 |     for k, v in save_dict.items():
19 |         tp, name = k.split(':', 1)
20 |         if tp == 'arg':
21 |             arg_params[name] = v
22 |         if tp == 'aux':
23 |             aux_params[name] = v
24 |     return arg_params, aux_params
25 | 
26 | 
27 | def convert_context(params, ctx):
28 |     """
29 |     :param params: dict of str to NDArray
30 |     :param ctx: the context to convert to
31 |     :return: dict of str of NDArray with context ctx
32 |     """
33 |     new_params = dict()
34 |     for k, v in params.items():
35 |         new_params[k] = v.as_in_context(ctx)
36 |     return new_params
37 | 
38 | 
39 | def load_param(prefix, epoch, convert=False, ctx=None):
40 |     """
41 |     wrapper for load checkpoint
42 |     :param prefix: Prefix of model name.
43 |     :param epoch: Epoch number of model we would like to load.
44 |     :param convert: reference model should be converted to GPU NDArray first
45 |     :param ctx: if convert then ctx must be designated.
46 |     :return: (arg_params, aux_params)
47 |     """
48 |     arg_params, aux_params = load_checkpoint(prefix, epoch)
49 |     if convert:
50 |         if ctx is None:
51 |             ctx = mx.cpu()
52 |         arg_params = convert_context(arg_params, ctx)
53 |         aux_params = convert_context(aux_params, ctx)
54 |     return arg_params, aux_params
55 | 


--------------------------------------------------------------------------------
/tools/nms.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def py_nms(dets, thresh, mode="Union"):
 5 |     """
 6 |     greedily select boxes with high confidence
 7 |     keep boxes overlap <= thresh
 8 |     rule out overlap > thresh
 9 |     :param dets: [[x1, y1, x2, y2 score]]
10 |     :param thresh: retain overlap <= thresh
11 |     :return: indexes to keep
12 |     """
13 |     x1 = dets[:, 0]
14 |     y1 = dets[:, 1]
15 |     x2 = dets[:, 2]
16 |     y2 = dets[:, 3]
17 |     scores = dets[:, 4]
18 | 
19 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
20 |     order = scores.argsort()[::-1]
21 | 
22 |     keep = []
23 |     while order.size > 0:
24 |         i = order[0]
25 |         keep.append(i)
26 |         xx1 = np.maximum(x1[i], x1[order[1:]])
27 |         yy1 = np.maximum(y1[i], y1[order[1:]])
28 |         xx2 = np.minimum(x2[i], x2[order[1:]])
29 |         yy2 = np.minimum(y2[i], y2[order[1:]])
30 | 
31 |         w = np.maximum(0.0, xx2 - xx1 + 1)
32 |         h = np.maximum(0.0, yy2 - yy1 + 1)
33 |         inter = w * h
34 |         if mode == "Union":
35 |             ovr = inter / (areas[i] + areas[order[1:]] - inter)
36 |         elif mode == "Minimum":
37 |             ovr = inter / np.minimum(areas[i], areas[order[1:]])
38 | 
39 |         inds = np.where(ovr <= thresh)[0]
40 |         order = order[inds + 1]
41 | 
42 |     return keep
43 | 


--------------------------------------------------------------------------------