├── .idea
├── face_detection_in_realtime.iml
├── misc.xml
├── modules.xml
└── workspace.xml
├── .ipynb_checkpoints
└── detect_realtime_instruction-checkpoint.ipynb
├── README.md
├── config.py
├── detect_realtime.py
├── detect_realtime_instruction.ipynb
├── images
└── img_test.jpg
├── outputs
├── 0.gif
├── image_c0.png
├── image_c1.png
└── yolo_shufflenet2_infer.png
├── utils.py
└── weights
└── shufflenetv2.h5
/.idea/face_detection_in_realtime.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 | 1534761887208
144 |
145 |
146 | 1534761887208
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Face Detection in Realtime
2 | This repository is the implementation of face detection in real time using YOLOv3 framework with keras(tensorflow backend). For using in embeded devices, so I choose a computation-efficient CNN architecture named ShuffleNet version 2 and train it from scratch(about 50 epoches) on [FDDB](http://vis-www.cs.umass.edu/fddb/index.html) Datasets.
3 |
4 | For some reasons,I just public the pre_trained weights, inference code and network architecture, if you want to know more,please feel free to drop a comment or [contact me](gao.gzhou#gmail.com).
5 |
6 | 
7 |
8 | 
9 |
10 | 
11 |
12 | ### 1.Requirements
13 | - tensorflow
14 | - keras
15 | - cv2
16 | - dlib(optional)
17 | - basic packages, e.g. numpy, matplotlib,etc.
18 |
19 | ### 2. Usage
20 | - there is only one parameter should be noticed, i.e. pre-trained model path, run
21 | `python detect_realtime.py -m path_to_pretrained_model (default './weights/shufflenetv2.h5' for this repo)`,
22 | another parameter is `video`,the video path. *script for video*.
23 | - or follow [detect_realtime_instruction.ipynb](./detect_realtime_instruction.ipynb) for more detail, *notebook for picture*.
24 |
25 | ### 3.References
26 | - [(repo) YOLOv3](https://github.com/experiencor/keras-yolo3)
27 | - [(repo) ShuffleNetV2](https://github.com/opconty/keras-shufflenetV2)
28 | - [(paper) YOLOv3: An Incremental Improvement](https://arxiv.org/pdf/1804.02767.pdf)
29 | - [(paper) ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164)
30 |
31 | ### 4. Appendix
32 | - shufflenetV2 for face detection architecture
33 |
34 | 
35 |
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | #-*- coding:utf-8 -*-
2 | #'''
3 | # Created on 18-8-20 下午6:58
4 | #
5 | # @Author: Greg Gao(laygin)
6 | #'''
7 |
8 | net_h = 320 # fixed
9 | net_w = 320 # fixed
10 | anchors = [26,42, 51,85, 67,138, 93,107, 93,193, 128,149, 142,289, 192,212, 272,306]
11 | obj_thresh = 0.9
12 | nms_thresh = 0.3
13 | weights_name = r'./weights/shufflenetv2.h5'
14 |
--------------------------------------------------------------------------------
/detect_realtime.py:
--------------------------------------------------------------------------------
1 | #-*- coding: utf-8 -*-
2 | #'''
3 | # Created on 2018/8/7 14:25
4 | #
5 | # @Author: Greg Gao (laygin)
6 | #'''
7 | import cv2
8 | import os
9 | from keras.models import load_model
10 | from config import *
11 | from utils import get_yolo_boxes, draw_boxes
12 | import argparse
13 |
14 |
15 | ap = argparse.ArgumentParser('face detection in realtime.')
16 | ap.add_argument('-m', '--model', default=weights_name,
17 | help='path to pre-trained weights.')
18 | ap.add_argument('-v', '--video', default=None, help='path to video.')
19 | args = vars(ap.parse_args())
20 |
21 | os.environ['CUDA_VISIBLE_DEVICES'] = ''
22 | model = load_model(args['model'])
23 |
24 |
25 | if os.path.exists(args['video']):
26 | camera = cv2.VideoCapture(args['video'])
27 | else:
28 | camera = cv2.VideoCapture(0)
29 |
30 | while camera.isOpened():
31 | ret, frame = camera.read()
32 | h, w = frame.shape[:2]
33 | boxes = get_yolo_boxes(model, [frame], net_h, net_w, anchors, obj_thresh, nms_thresh)[0]
34 |
35 | print('[INFO] length of boxes{}'.format(len(boxes)))
36 | frame = draw_boxes(frame, boxes, obj_thresh, rect=False)
37 |
38 | cv2.imshow('Face Detection', frame)
39 | if cv2.waitKey(1) & 0xFF == ord('q'):
40 | break
41 |
42 | camera.release()
43 | cv2.destroyAllWindows()
44 |
45 | if __name__ == '__main__':
46 |
47 | pass
48 |
--------------------------------------------------------------------------------
/images/img_test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opconty/face_detection_in_realtime/ef57421a379f87171bf172fa3d0dc01759a81f48/images/img_test.jpg
--------------------------------------------------------------------------------
/outputs/0.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opconty/face_detection_in_realtime/ef57421a379f87171bf172fa3d0dc01759a81f48/outputs/0.gif
--------------------------------------------------------------------------------
/outputs/image_c0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opconty/face_detection_in_realtime/ef57421a379f87171bf172fa3d0dc01759a81f48/outputs/image_c0.png
--------------------------------------------------------------------------------
/outputs/image_c1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opconty/face_detection_in_realtime/ef57421a379f87171bf172fa3d0dc01759a81f48/outputs/image_c1.png
--------------------------------------------------------------------------------
/outputs/yolo_shufflenet2_infer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opconty/face_detection_in_realtime/ef57421a379f87171bf172fa3d0dc01759a81f48/outputs/yolo_shufflenet2_infer.png
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | #-*- coding:utf-8 -*-
2 | #'''
3 | # Created on 18-8-20 下午6:48
4 | #
5 | # @Author: Greg Gao(laygin)
6 | #'''
7 | '''
8 | mostly,refers to https://github.com/experiencor/keras-yolo3
9 | '''
10 | import numpy as np
11 | import cv2
12 | from scipy.special import expit
13 |
14 |
15 | class BoundBox:
16 | def __init__(self, xmin, ymin, xmax, ymax, c=None):
17 | self.xmin = xmin
18 | self.ymin = ymin
19 | self.xmax = xmax
20 | self.ymax = ymax
21 | self.c = c
22 |
23 | def get_score(self):
24 | return self.c
25 |
26 |
27 | def normalize(image):
28 | return image / 255.
29 |
30 |
31 | def preprocess_input(image, net_h, net_w,normalized=True):
32 | new_h, new_w, _ = image.shape
33 |
34 | # determine the new size of the image
35 | if (float(net_w) / new_w) < (float(net_h) / new_h):
36 | new_h = (new_h * net_w) // new_w
37 | new_w = net_w
38 | else:
39 | new_w = (new_w * net_h) // new_h
40 | new_h = net_h
41 |
42 | if normalized:
43 | image = normalize(image[:, :, ::-1])
44 | resized = cv2.resize(image, (new_w, new_h))
45 | # resize the image to the new size
46 | else:
47 | resized = cv2.resize(image[:, :, ::-1] / 255., (new_w, new_h))
48 |
49 | # embed the image into the standard letter box
50 | new_image = np.ones((net_h, net_w, 3)) * 0.5
51 | new_image[(net_h - new_h) // 2:(net_h + new_h) // 2, (net_w - new_w) // 2:(net_w + new_w) // 2, :] = resized
52 | new_image = np.expand_dims(new_image, 0)
53 |
54 | return new_image
55 |
56 |
57 | def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
58 | grid_h, grid_w = netout.shape[:2]
59 | nb_box = 3
60 | netout = netout.reshape((grid_h, grid_w, nb_box, -1))
61 | # nb_class = netout.shape[-1] - 5
62 |
63 | boxes = []
64 |
65 | netout[..., :2] = _sigmoid(netout[..., :2])
66 | netout[..., 4] = _sigmoid(netout[..., 4])
67 |
68 | for i in range(grid_h * grid_w):
69 | row = i // grid_w
70 | col = i % grid_w
71 |
72 | for b in range(nb_box):
73 | # 4th element is objectness score
74 | objectness = netout[row, col, b, 4]
75 |
76 | if (objectness <= obj_thresh): continue
77 |
78 | # first 4 elements are x, y, w, and h
79 | x, y, w, h = netout[row, col, b, :4]
80 |
81 | x = (col + x) / grid_w # center position, unit: image width
82 | y = (row + y) / grid_h # center position, unit: image height
83 | w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
84 | h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height
85 |
86 | box = BoundBox(x - w / 2, y - h / 2, x + w / 2, y + h / 2, objectness)
87 |
88 | boxes.append(box)
89 |
90 | return boxes
91 |
92 |
93 | def draw_boxes(image, boxes, obj_thresh, rect=True, quiet=True):
94 | image_c = image.copy()
95 | for box in boxes:
96 | label_str = ''
97 | label = -1
98 |
99 | if box.c > obj_thresh:
100 | if label_str != '': label_str += ', '
101 | label_str += (str(round(box.c * 100, 2)) + '%')
102 | label = 0
103 | if not quiet: print(label_str)
104 |
105 | if label >= 0:
106 | text_size = cv2.getTextSize(label_str, cv2.FONT_HERSHEY_SIMPLEX, 1.1e-3 * image.shape[0], 2)
107 |
108 | if rect:
109 | cv2.rectangle(img=image_c, pt1=(box.xmin, box.ymin), pt2=(box.xmax, box.ymax), color=[255, 0, 0],
110 | thickness=2)
111 | else:
112 | ra, rb, theta = int((box.xmax - box.xmin)/2), int((box.ymax - box.ymin)/2), 0
113 | cx, cy = box.xmin + ra, box.ymin + rb
114 | # print(ra, rb, cx, cy)
115 | cv2.ellipse(image_c, (cx,cy), (ra,rb), theta,0,360,(0,0,255),2)
116 |
117 | cv2.putText(img=image_c,
118 | text=label_str,
119 | org=(box.xmin + 13, box.ymax + 13),
120 | fontFace=cv2.FONT_HERSHEY_SIMPLEX,
121 | fontScale=1e-3 * image_c.shape[0],
122 | color=(13, 200, 13),
123 | thickness=2)
124 |
125 | return image_c
126 |
127 |
128 |
129 | def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
130 | if (float(net_w) / image_w) < (float(net_h) / image_h):
131 | new_w = net_w
132 | new_h = (image_h * net_w) / image_w
133 | else:
134 | new_h = net_w
135 | new_w = (image_w * net_h) / image_h
136 |
137 | for i in range(len(boxes)):
138 | x_offset, x_scale = (net_w - new_w) / 2. / net_w, float(new_w) / net_w
139 | y_offset, y_scale = (net_h - new_h) / 2. / net_h, float(new_h) / net_h
140 |
141 | boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
142 | boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
143 | boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
144 | boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
145 |
146 |
147 | def _interval_overlap(interval_a, interval_b):
148 | x1, x2 = interval_a
149 | x3, x4 = interval_b
150 |
151 | if x3 < x1:
152 | if x4 < x1:
153 | return 0
154 | else:
155 | return min(x2, x4) - x1
156 | else:
157 | if x2 < x3:
158 | return 0
159 | else:
160 | return min(x2, x4) - x3
161 |
162 |
163 | def bbox_iou(box1, box2):
164 | intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
165 | intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
166 |
167 | intersect = intersect_w * intersect_h
168 |
169 | w1, h1 = box1.xmax - box1.xmin, box1.ymax - box1.ymin
170 | w2, h2 = box2.xmax - box2.xmin, box2.ymax - box2.ymin
171 |
172 | union = w1 * h1 + w2 * h2 - intersect
173 |
174 | return float(intersect) / union
175 |
176 |
177 | def do_nms(boxes, nms_thresh):
178 | # haah, if there is no boxes,break
179 | if len(boxes) > 0:
180 | sorted_indices = np.argsort([box.c for box in boxes])[::-1]
181 | for i in range(len(boxes)):
182 | index_i = sorted_indices[i] # get the largest score box index
183 |
184 | # if boxes[index_i].classes[c] == 0: continue
185 | if boxes[index_i].c == 0: continue
186 |
187 | for j in range(i + 1, len(boxes)):
188 | index_j = sorted_indices[j] # get the second large score box index
189 |
190 | iou = bbox_iou(boxes[index_i], boxes[index_j])
191 |
192 | if iou >= nms_thresh:
193 | # boxes[index_j].c = 0
194 | boxes[index_j].c = boxes[index_j].c * (1-iou) # 18-7-18 apply soft nms
195 |
196 | else:
197 | return
198 |
199 |
200 | def get_yolo_boxes(model, images, net_h, net_w, anchors, obj_thresh, nms_thresh):
201 | image_h, image_w, _ = images[0].shape
202 | nb_images = len(images)
203 | batch_input = np.zeros((nb_images, net_h, net_w, 3))
204 |
205 | # preprocess the input
206 | for i in range(nb_images):
207 | batch_input[i] = preprocess_input(images[i], net_h, net_w)
208 |
209 | # run the prediction
210 | batch_output = model.predict_on_batch(batch_input)
211 | batch_boxes = [None] * nb_images
212 |
213 | for i in range(nb_images):
214 | yolos = [batch_output[0][i], batch_output[1][i], batch_output[2][i]]
215 | boxes = []
216 |
217 | # decode the output of the network
218 | for j in range(len(yolos)):
219 | yolo_anchors = anchors[(2 - j) * 6:(3 - j) * 6] # config['model']['anchors']
220 | boxes += decode_netout(yolos[j], yolo_anchors, obj_thresh, net_h, net_w)
221 |
222 | # correct the sizes of the bounding boxes
223 | correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)
224 |
225 | # suppress non-maximal boxes
226 | do_nms(boxes, nms_thresh)
227 |
228 | batch_boxes[i] = boxes
229 |
230 | return batch_boxes
231 |
232 |
233 | def _sigmoid(x):
234 | return expit(x)
--------------------------------------------------------------------------------
/weights/shufflenetv2.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opconty/face_detection_in_realtime/ef57421a379f87171bf172fa3d0dc01759a81f48/weights/shufflenetv2.h5
--------------------------------------------------------------------------------