├── README.md
├── detect_face.py
├── detect_face.pyc
├── facenet.py
├── facenet.pyc
├── images
    ├── real time face detection and recognition.jpg
    ├── video_guai_20.jpg
    └── video_guai_2192.jpg
├── knn_classifier.model
├── model_check_point
    ├── det1.npy
    ├── det2.npy
    ├── det3.npy
    └── knn_classifier.model
├── nn4.py
├── nn4.pyc
├── real time face detection and  recognition.ipynb
├── save_video frame.ipynb
└── train your classifier.ipynb


/README.md:
--------------------------------------------------------------------------------
 1 | # real_time_face_detection and recognition
 2 | This is a real time face detection and recognition project base  on opencv/tensorflow/mtcnn/facenet. Chinese version of description is [here](https://zhuanlan.zhihu.com/p/25025596) .Face detection is based on [MTCNN](https://kpzhang93.github.io/MTCNN_face_detection_alignment/index.html).Face embedding is based on [Facenet](https://arxiv.org/abs/1503.03832).
 3 | ##Workflow
 4 | ![](https://github.com/shanren7/real_time_face_recognition/blob/master/images/real%20time%20face%20detection%20and%20recognition.jpg)
 5 | 
 6 | ##Inspiration
 7 | The code was inspired by several projects as follows:
 8 | 
 9 | 1.[OpenFace](https://github.com/cmusatyalab/openface). The main idea was inspired by openface. However, I prefer python and tensorflow,so there comes this project.
10 | 
11 | 2.[davidsandberg/facenet](https://github.com/davidsandberg/facenet).
12 | 
13 |    facenet.py was taken from https://github.com/davidsandberg/facenet/blob/master/facenet/src/facenet.py
14 |     
15 |    nn4.py was taken from https://github.com/davidsandberg/facenet/blob/master/src/models/nn4.py
16 |     
17 |    detect_face.py was taken from https://github.com/davidsandberg/facenet/blob/master/src/align/detect_face.py
18 |     
19 | 3.[yobibyte/yobiface](https://github.com/yobibyte/yobiface).
20 | 
21 | ##Dependencies
22 | 1.tensorflow
23 | 2.opencv with python bindings (cv2)
24 | 3.jupyter notebook for running .ipynb examples
25 | 
26 | ##Running
27 | 1.Downloading pre-trained facenet from https://github.com/yobibyte/yobiface/blob/master/model/model-20160506.ckpt-500000 and putting in model_check_point folder.
28 | 
29 | 2.Running [real time face detection and recognition.ipynb](https://github.com/shanren7/real_time_face_recognition/blob/master/real%20time%20face%20detection%20and%20%20recognition.ipynb) with jupyter notebook
30 | 
31 | ##Results
32 | ![](https://github.com/shanren7/real_time_face_recognition/blob/master/images/video_guai_20.jpg)
33 | ![](https://github.com/shanren7/real_time_face_recognition/blob/master/images/video_guai_2192.jpg)
34 | 


--------------------------------------------------------------------------------
/detect_face.py:
--------------------------------------------------------------------------------
  1 | """ Tensorflow implementation of the face detection / alignment algorithm found at
  2 | https://github.com/kpzhang93/MTCNN_face_detection_alignment
  3 | """
  4 | # MIT License
  5 | # 
  6 | # Copyright (c) 2016 David Sandberg
  7 | # 
  8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | # of this software and associated documentation files (the "Software"), to deal
 10 | # in the Software without restriction, including without limitation the rights
 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | # copies of the Software, and to permit persons to whom the Software is
 13 | # furnished to do so, subject to the following conditions:
 14 | # 
 15 | # The above copyright notice and this permission notice shall be included in all
 16 | # copies or substantial portions of the Software.
 17 | # 
 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 24 | # SOFTWARE.
 25 | 
 26 | from __future__ import absolute_import
 27 | from __future__ import division
 28 | from __future__ import print_function
 29 | 
 30 | import numpy as np
 31 | import tensorflow as tf
 32 | #from math import floor
 33 | import cv2
 34 | import os
 35 | 
 36 | def layer(op):
 37 |     '''Decorator for composable network layers.'''
 38 | 
 39 |     def layer_decorated(self, *args, **kwargs):
 40 |         # Automatically set a name if not provided.
 41 |         name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
 42 |         # Figure out the layer inputs.
 43 |         if len(self.terminals) == 0:
 44 |             raise RuntimeError('No input variables found for layer %s.' % name)
 45 |         elif len(self.terminals) == 1:
 46 |             layer_input = self.terminals[0]
 47 |         else:
 48 |             layer_input = list(self.terminals)
 49 |         # Perform the operation and get the output.
 50 |         layer_output = op(self, layer_input, *args, **kwargs)
 51 |         # Add to layer LUT.
 52 |         self.layers[name] = layer_output
 53 |         # This output is now the input for the next layer.
 54 |         self.feed(layer_output)
 55 |         # Return self for chained calls.
 56 |         return self
 57 | 
 58 |     return layer_decorated
 59 | 
 60 | class Network(object):
 61 | 
 62 |     def __init__(self, inputs, trainable=True):
 63 |         # The input nodes for this network
 64 |         self.inputs = inputs
 65 |         # The current list of terminal nodes
 66 |         self.terminals = []
 67 |         # Mapping from layer names to layers
 68 |         self.layers = dict(inputs)
 69 |         # If true, the resulting variables are set as trainable
 70 |         self.trainable = trainable
 71 | 
 72 |         self.setup()
 73 | 
 74 |     def setup(self):
 75 |         '''Construct the network. '''
 76 |         raise NotImplementedError('Must be implemented by the subclass.')
 77 | 
 78 |     def load(self, data_path, session, ignore_missing=False):
 79 |         '''Load network weights.
 80 |         data_path: The path to the numpy-serialized network weights
 81 |         session: The current TensorFlow session
 82 |         ignore_missing: If true, serialized weights for missing layers are ignored.
 83 |         '''
 84 |         data_dict = np.load(data_path).item() #pylint: disable=no-member
 85 |         for op_name in data_dict:
 86 |             with tf.variable_scope(op_name, reuse=True):
 87 |                 for param_name, data in data_dict[op_name].iteritems():
 88 |                     try:
 89 |                         var = tf.get_variable(param_name)
 90 |                         session.run(var.assign(data))
 91 |                     except ValueError:
 92 |                         if not ignore_missing:
 93 |                             raise
 94 | 
 95 |     def feed(self, *args):
 96 |         '''Set the input(s) for the next operation by replacing the terminal nodes.
 97 |         The arguments can be either layer names or the actual layers.
 98 |         '''
 99 |         assert len(args) != 0
100 |         self.terminals = []
101 |         for fed_layer in args:
102 |             if isinstance(fed_layer, basestring):
103 |                 try:
104 |                     fed_layer = self.layers[fed_layer]
105 |                 except KeyError:
106 |                     raise KeyError('Unknown layer name fed: %s' % fed_layer)
107 |             self.terminals.append(fed_layer)
108 |         return self
109 | 
110 |     def get_output(self):
111 |         '''Returns the current network output.'''
112 |         return self.terminals[-1]
113 | 
114 |     def get_unique_name(self, prefix):
115 |         '''Returns an index-suffixed unique name for the given prefix.
116 |         This is used for auto-generating layer names based on the type-prefix.
117 |         '''
118 |         ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
119 |         return '%s_%d' % (prefix, ident)
120 | 
121 |     def make_var(self, name, shape):
122 |         '''Creates a new TensorFlow variable.'''
123 |         return tf.get_variable(name, shape, trainable=self.trainable)
124 | 
125 |     def validate_padding(self, padding):
126 |         '''Verifies that the padding is one of the supported ones.'''
127 |         assert padding in ('SAME', 'VALID')
128 | 
129 |     @layer
130 |     def conv(self,
131 |              inp,
132 |              k_h,
133 |              k_w,
134 |              c_o,
135 |              s_h,
136 |              s_w,
137 |              name,
138 |              relu=True,
139 |              padding='SAME',
140 |              group=1,
141 |              biased=True):
142 |         # Verify that the padding is acceptable
143 |         self.validate_padding(padding)
144 |         # Get the number of channels in the input
145 |         c_i = inp.get_shape()[-1]
146 |         # Verify that the grouping parameter is valid
147 |         assert c_i % group == 0
148 |         assert c_o % group == 0
149 |         # Convolution for a given input and kernel
150 |         convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
151 |         with tf.variable_scope(name) as scope:
152 |             kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
153 |             # This is the common-case. Convolve the input without any further complications.
154 |             output = convolve(inp, kernel)
155 |             # Add the biases
156 |             if biased:
157 |                 biases = self.make_var('biases', [c_o])
158 |                 output = tf.nn.bias_add(output, biases)
159 |             if relu:
160 |                 # ReLU non-linearity
161 |                 output = tf.nn.relu(output, name=scope.name)
162 |             return output
163 | 
164 |     @layer
165 |     def prelu(self, inp, name):
166 |         with tf.variable_scope(name):
167 |             i = inp.get_shape().as_list()
168 |             alpha = self.make_var('alpha', shape=(i[-1]))
169 |             output = tf.nn.relu(inp) + tf.mul(alpha, -tf.nn.relu(-inp))
170 |         return output
171 | 
172 |     @layer
173 |     def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'):
174 |         self.validate_padding(padding)
175 |         return tf.nn.max_pool(inp,
176 |                               ksize=[1, k_h, k_w, 1],
177 |                               strides=[1, s_h, s_w, 1],
178 |                               padding=padding,
179 |                               name=name)
180 | 
181 |     @layer
182 |     def fc(self, inp, num_out, name, relu=True):
183 |         with tf.variable_scope(name):
184 |             input_shape = inp.get_shape()
185 |             if input_shape.ndims == 4:
186 |                 # The input is spatial. Vectorize it first.
187 |                 dim = 1
188 |                 for d in input_shape[1:].as_list():
189 |                     dim *= d
190 |                 feed_in = tf.reshape(inp, [-1, dim])
191 |             else:
192 |                 feed_in, dim = (inp, input_shape[-1].value)
193 |             weights = self.make_var('weights', shape=[dim, num_out])
194 |             biases = self.make_var('biases', [num_out])
195 |             op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
196 |             fc = op(feed_in, weights, biases, name=name)
197 |             return fc
198 | 
199 | 
200 |     """
201 |     Multi dimensional softmax,
202 |     refer to https://github.com/tensorflow/tensorflow/issues/210
203 |     compute softmax along the dimension of target
204 |     the native softmax only supports batch_size x dimension
205 |     """
206 |     @layer
207 |     def softmax(self, target, axis, name=None):
208 |         max_axis = tf.reduce_max(target, axis, keep_dims=True)
209 |         target_exp = tf.exp(target-max_axis)
210 |         normalize = tf.reduce_sum(target_exp, axis, keep_dims=True)
211 |         softmax = tf.div(target_exp, normalize, name)
212 |         return softmax
213 |     
214 | class PNet(Network):
215 |     def setup(self):
216 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
217 |              .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1')
218 |              .prelu(name='PReLU1')
219 |              .max_pool(2, 2, 2, 2, name='pool1')
220 |              .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2')
221 |              .prelu(name='PReLU2')
222 |              .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3')
223 |              .prelu(name='PReLU3')
224 |              .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1')
225 |              .softmax(3,name='prob1'))
226 | 
227 |         (self.feed('PReLU3') #pylint: disable=no-value-for-parameter
228 |              .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))
229 |         
230 | class RNet(Network):
231 |     def setup(self):
232 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
233 |              .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1')
234 |              .prelu(name='prelu1')
235 |              .max_pool(3, 3, 2, 2, name='pool1')
236 |              .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2')
237 |              .prelu(name='prelu2')
238 |              .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
239 |              .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3')
240 |              .prelu(name='prelu3')
241 |              .fc(128, relu=False, name='conv4')
242 |              .prelu(name='prelu4')
243 |              .fc(2, relu=False, name='conv5-1')
244 |              .softmax(1,name='prob1'))
245 | 
246 |         (self.feed('prelu4') #pylint: disable=no-value-for-parameter
247 |              .fc(4, relu=False, name='conv5-2'))
248 | 
249 | class ONet(Network):
250 |     def setup(self):
251 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
252 |              .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1')
253 |              .prelu(name='prelu1')
254 |              .max_pool(3, 3, 2, 2, name='pool1')
255 |              .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2')
256 |              .prelu(name='prelu2')
257 |              .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
258 |              .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3')
259 |              .prelu(name='prelu3')
260 |              .max_pool(2, 2, 2, 2, name='pool3')
261 |              .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4')
262 |              .prelu(name='prelu4')
263 |              .fc(256, relu=False, name='conv5')
264 |              .prelu(name='prelu5')
265 |              .fc(2, relu=False, name='conv6-1')
266 |              .softmax(1, name='prob1'))
267 | 
268 |         (self.feed('prelu5') #pylint: disable=no-value-for-parameter
269 |              .fc(4, relu=False, name='conv6-2'))
270 | 
271 |         (self.feed('prelu5') #pylint: disable=no-value-for-parameter
272 |              .fc(10, relu=False, name='conv6-3'))
273 | 
274 | def create_mtcnn(sess, model_path):
275 |     with tf.variable_scope('pnet'):
276 |         data = tf.placeholder(tf.float32, (None,None,None,3), 'input')
277 |         pnet = PNet({'data':data})
278 |         pnet.load(os.path.join(model_path, 'det1.npy'), sess)
279 |     with tf.variable_scope('rnet'):
280 |         data = tf.placeholder(tf.float32, (None,24,24,3), 'input')
281 |         rnet = RNet({'data':data})
282 |         rnet.load(os.path.join(model_path, 'det2.npy'), sess)
283 |     with tf.variable_scope('onet'):
284 |         data = tf.placeholder(tf.float32, (None,48,48,3), 'input')
285 |         onet = ONet({'data':data})
286 |         onet.load(os.path.join(model_path, 'det3.npy'), sess)
287 |         
288 |     pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img})
289 |     rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img})
290 |     onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img})
291 |     return pnet_fun, rnet_fun, onet_fun
292 | 
293 | def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
294 |     # im: input image
295 |     # minsize: minimum of faces' size
296 |     # pnet, rnet, onet: caffemodel
297 |     # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold
298 |     # fastresize: resize img from last scale (using in high-resolution images) if fastresize==true
299 |     factor_count=0
300 |     total_boxes=np.empty((0,9))
301 |     points=[]
302 |     h=img.shape[0]
303 |     w=img.shape[1]
304 |     minl=np.amin([h, w])
305 |     m=12.0/minsize
306 |     minl=minl*m
307 |     # creat scale pyramid
308 |     scales=[]
309 |     while minl>=12:
310 |         scales += [m*np.power(factor, factor_count)]
311 |         minl = minl*factor
312 |         factor_count += 1
313 | 
314 |     # first stage
315 |     for j in range(len(scales)):
316 |         scale=scales[j]
317 |         hs=int(np.ceil(h*scale))
318 |         ws=int(np.ceil(w*scale))
319 |         im_data = imresample(img, (hs, ws))
320 |         im_data = (im_data-127.5)*0.0078125
321 |         img_x = np.expand_dims(im_data, 0)
322 |         img_y = np.transpose(img_x, (0,2,1,3))
323 |         out = pnet(img_y)
324 |         out0 = np.transpose(out[0], (0,2,1,3))
325 |         out1 = np.transpose(out[1], (0,2,1,3))
326 |         
327 |         boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])
328 |         
329 |         # inter-scale nms
330 |         pick = nms(boxes.copy(), 0.5, 'Union')
331 |         if boxes.size>0 and pick.size>0:
332 |             boxes = boxes[pick,:]
333 |             total_boxes = np.append(total_boxes, boxes, axis=0)
334 | 
335 |     numbox = total_boxes.shape[0]
336 |     if numbox>0:
337 |         pick = nms(total_boxes.copy(), 0.7, 'Union')
338 |         total_boxes = total_boxes[pick,:]
339 |         regw = total_boxes[:,2]-total_boxes[:,0]
340 |         regh = total_boxes[:,3]-total_boxes[:,1]
341 |         qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
342 |         qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
343 |         qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
344 |         qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
345 |         total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
346 |         total_boxes = rerec(total_boxes.copy())
347 |         total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
348 |         dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
349 | 
350 |     numbox = total_boxes.shape[0]
351 |     if numbox>0:
352 |         # second stage
353 |         tempimg = np.zeros((24,24,3,numbox))
354 |         for k in range(0,numbox):
355 |             tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
356 |             tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
357 |             if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
358 |                 tempimg[:,:,:,k] = imresample(tmp, (24, 24))
359 |             else:
360 |                 return np.empty()
361 |         tempimg = (tempimg-127.5)*0.0078125
362 |         tempimg1 = np.transpose(tempimg, (3,1,0,2))
363 |         out = rnet(tempimg1)
364 |         out0 = np.transpose(out[0])
365 |         out1 = np.transpose(out[1])
366 |         score = out1[1,:]
367 |         ipass = np.where(score>threshold[1])
368 |         total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
369 |         mv = out0[:,ipass[0]]
370 |         if total_boxes.shape[0]>0:
371 |             pick = nms(total_boxes, 0.7, 'Union')
372 |             total_boxes = total_boxes[pick,:]
373 |             total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
374 |             total_boxes = rerec(total_boxes.copy())
375 | 
376 |     numbox = total_boxes.shape[0]
377 |     if numbox>0:
378 |         # third stage
379 |         total_boxes = np.fix(total_boxes).astype(np.int32)
380 |         dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
381 |         tempimg = np.zeros((48,48,3,numbox))
382 |         for k in range(0,numbox):
383 |             tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
384 |             tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
385 |             if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
386 |                 tempimg[:,:,:,k] = imresample(tmp, (48, 48))
387 |             else:
388 |                 return np.empty()
389 |         tempimg = (tempimg-127.5)*0.0078125
390 |         tempimg1 = np.transpose(tempimg, (3,1,0,2))
391 |         out = onet(tempimg1)
392 |         out0 = np.transpose(out[0])
393 |         out1 = np.transpose(out[1])
394 |         out2 = np.transpose(out[2])
395 |         score = out2[1,:]
396 |         points = out1
397 |         ipass = np.where(score>threshold[2])
398 |         points = points[:,ipass[0]]
399 |         total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
400 |         mv = out0[:,ipass[0]]
401 | 
402 |         w = total_boxes[:,2]-total_boxes[:,0]+1
403 |         h = total_boxes[:,3]-total_boxes[:,1]+1
404 |         points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
405 |         points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
406 |         if total_boxes.shape[0]>0:
407 |             total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
408 |             pick = nms(total_boxes.copy(), 0.7, 'Min')
409 |             total_boxes = total_boxes[pick,:]
410 |             points = points[:,pick]
411 |                 
412 |     return total_boxes, points
413 |             
414 |  
415 | # function [boundingbox] = bbreg(boundingbox,reg)
416 | def bbreg(boundingbox,reg):
417 |     # calibrate bounding boxes
418 |     if reg.shape[1]==1:
419 |         reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
420 | 
421 |     w = boundingbox[:,2]-boundingbox[:,0]+1
422 |     h = boundingbox[:,3]-boundingbox[:,1]+1
423 |     b1 = boundingbox[:,0]+reg[:,0]*w
424 |     b2 = boundingbox[:,1]+reg[:,1]*h
425 |     b3 = boundingbox[:,2]+reg[:,2]*w
426 |     b4 = boundingbox[:,3]+reg[:,3]*h
427 |     boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ]))
428 |     return boundingbox
429 |  
430 | def generateBoundingBox(imap, reg, scale, t):
431 |     # use heatmap to generate bounding boxes
432 |     stride=2
433 |     cellsize=12
434 | 
435 |     imap = np.transpose(imap)
436 |     dx1 = np.transpose(reg[:,:,0])
437 |     dy1 = np.transpose(reg[:,:,1])
438 |     dx2 = np.transpose(reg[:,:,2])
439 |     dy2 = np.transpose(reg[:,:,3])
440 |     y, x = np.where(imap >= t)
441 |     if y.shape[0]==1:
442 |         dx1 = np.flipud(dx1)
443 |         dy1 = np.flipud(dy1)
444 |         dx2 = np.flipud(dx2)
445 |         dy2 = np.flipud(dy2)
446 |     score = imap[(y,x)]
447 |     reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))
448 |     if reg.size==0:
449 |         reg = np.empty((0,3))
450 |     bb = np.transpose(np.vstack([y,x]))
451 |     q1 = np.fix((stride*bb+1)/scale)
452 |     q2 = np.fix((stride*bb+cellsize-1+1)/scale)
453 |     boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])
454 |     return boundingbox, reg
455 |  
456 | # function pick = nms(boxes,threshold,type)
457 | def nms(boxes, threshold, method):
458 |     if boxes.size==0:
459 |         return np.empty((0,3))
460 |     x1 = boxes[:,0]
461 |     y1 = boxes[:,1]
462 |     x2 = boxes[:,2]
463 |     y2 = boxes[:,3]
464 |     s = boxes[:,4]
465 |     area = (x2-x1+1) * (y2-y1+1)
466 |     I = np.argsort(s)
467 |     pick = np.zeros_like(s, dtype=np.int16)
468 |     counter = 0
469 |     while I.size>0:
470 |         i = I[-1]
471 |         pick[counter] = i
472 |         counter += 1
473 |         idx = I[0:-1]
474 |         xx1 = np.maximum(x1[i], x1[idx])
475 |         yy1 = np.maximum(y1[i], y1[idx])
476 |         xx2 = np.minimum(x2[i], x2[idx])
477 |         yy2 = np.minimum(y2[i], y2[idx])
478 |         w = np.maximum(0.0, xx2-xx1+1)
479 |         h = np.maximum(0.0, yy2-yy1+1)
480 |         inter = w * h
481 |         if method is 'Min':
482 |             o = inter / np.minimum(area[i], area[idx])
483 |         else:
484 |             o = inter / (area[i] + area[idx] - inter)
485 |         I = I[np.where(o<=threshold)]
486 |     pick = pick[0:counter]
487 |     return pick
488 | 
489 | # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
490 | def pad(total_boxes, w, h):
491 |     # compute the padding coordinates (pad the bounding boxes to square)
492 |     tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32)
493 |     tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32)
494 |     numbox = total_boxes.shape[0]
495 | 
496 |     dx = np.ones((numbox), dtype=np.int32)
497 |     dy = np.ones((numbox), dtype=np.int32)
498 |     edx = tmpw.copy().astype(np.int32)
499 |     edy = tmph.copy().astype(np.int32)
500 | 
501 |     x = total_boxes[:,0].copy().astype(np.int32)
502 |     y = total_boxes[:,1].copy().astype(np.int32)
503 |     ex = total_boxes[:,2].copy().astype(np.int32)
504 |     ey = total_boxes[:,3].copy().astype(np.int32)
505 | 
506 |     tmp = np.where(ex>w)
507 |     edx[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1)
508 |     ex[tmp] = w
509 |     
510 |     tmp = np.where(ey>h)
511 |     edy[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1)
512 |     ey[tmp] = h
513 | 
514 |     tmp = np.where(x<1)
515 |     dx[tmp] = np.expand_dims(2-x[tmp],1)
516 |     x[tmp] = 1
517 | 
518 |     tmp = np.where(y<1)
519 |     dy[tmp] = np.expand_dims(2-y[tmp],1)
520 |     y[tmp] = 1
521 |     
522 |     return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
523 | 
524 | # function [bboxA] = rerec(bboxA)
525 | def rerec(bboxA):
526 |     # convert bboxA to square
527 |     h = bboxA[:,3]-bboxA[:,1]
528 |     w = bboxA[:,2]-bboxA[:,0]
529 |     l = np.maximum(w, h)
530 |     bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5
531 |     bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5
532 |     bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1)))
533 |     return bboxA
534 | 
535 | def imresample(img, sz):
536 |     im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #pylint: disable=no-member
537 |     return im_data
538 | 
539 |     # This method is kept for debugging purpose
540 | #     h=img.shape[0]
541 | #     w=img.shape[1]
542 | #     hs, ws = sz
543 | #     dx = float(w) / ws
544 | #     dy = float(h) / hs
545 | #     im_data = np.zeros((hs,ws,3))
546 | #     for a1 in range(0,hs):
547 | #         for a2 in range(0,ws):
548 | #             for a3 in range(0,3):
549 | #                 im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3]
550 | #     return im_data
551 | 
552 | 


--------------------------------------------------------------------------------
/detect_face.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shanren7/real_time_face_recognition/6e6764607c92f9fa852ced0e1faa9561ef024857/detect_face.pyc


--------------------------------------------------------------------------------
/facenet.py:
--------------------------------------------------------------------------------
  1 | # The whole file was taken from @davidsandberg implementation
  2 | # https://github.com/davidsandberg/facenet/blob/master/facenet/src/facenet.py
  3 | 
  4 | """Functions for building the face recognition network.
  5 | """
  6 | # pylint: disable=missing-docstring
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | from os import path
 13 | from six.moves import xrange
 14 | import tensorflow as tf
 15 | from tensorflow.python.ops import array_ops
 16 | from tensorflow.python.ops import control_flow_ops
 17 | import numpy as np
 18 | from scipy import misc
 19 | import matplotlib.pyplot as plt
 20 | from sklearn.cross_validation import KFold
 21 | 
 22 | parameters = []
 23 | conv_counter = 1
 24 | pool_counter = 1
 25 | affine_counter = 1
 26 | 
 27 | def conv(inpOp, nIn, nOut, kH, kW, dH, dW, padType, prefix, phase_train=True, use_batch_norm=True):
 28 |   global conv_counter
 29 |   global parameters
 30 |   name = prefix + '_' + str(conv_counter)
 31 |   conv_counter += 1
 32 |   with tf.name_scope(name) as scope:
 33 |     kernel = tf.Variable(tf.truncated_normal([kH, kW, nIn, nOut],
 34 |                                              dtype=tf.float32,
 35 |                                              stddev=1e-1), name='weights')
 36 |     conv = tf.nn.conv2d(inpOp, kernel, [1, dH, dW, 1], padding=padType)
 37 |     
 38 |     if use_batch_norm:
 39 |       conv_bn = batch_norm(conv, nOut, phase_train, 'batch_norm')
 40 |     else:
 41 |       conv_bn = conv
 42 |     biases = tf.Variable(tf.constant(0.0, shape=[nOut], dtype=tf.float32),
 43 |                          trainable=True, name='biases')
 44 |     bias = tf.nn.bias_add(conv_bn, biases)
 45 |     conv1 = tf.nn.relu(bias, name=scope)
 46 |     parameters += [kernel, biases]
 47 |   return conv1
 48 | 
 49 | def affine(inpOp, nIn, nOut):
 50 |   global affine_counter
 51 |   global parameters
 52 |   name = 'affine' + str(affine_counter)
 53 |   affine_counter += 1
 54 |   with tf.name_scope(name):
 55 |     kernel = tf.Variable(tf.truncated_normal([nIn, nOut],
 56 |                                              dtype=tf.float32,
 57 |                                              stddev=1e-1), name='weights')
 58 |     biases = tf.Variable(tf.constant(0.0, shape=[nOut], dtype=tf.float32),
 59 |                          trainable=True, name='biases')
 60 |     affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name)
 61 |     parameters += [kernel, biases]
 62 |     return affine1
 63 |   
 64 | def lppool(inpOp, pnorm, kH, kW, dH, dW, padding):
 65 |   global pool_counter
 66 |   global parameters
 67 |   name = 'pool' + str(pool_counter)
 68 |   pool_counter += 1
 69 |   
 70 |   with tf.name_scope('lppool'):
 71 |     if pnorm == 2:
 72 |       pwr = tf.square(inpOp)
 73 |     else:
 74 |       pwr = tf.pow(inpOp, pnorm)
 75 |       
 76 |     subsamp = tf.nn.avg_pool(pwr,
 77 |                           ksize=[1, kH, kW, 1],
 78 |                           strides=[1, dH, dW, 1],
 79 |                           padding=padding,
 80 |                           name=name)
 81 |     subsamp_sum = tf.mul(subsamp, kH*kW)
 82 |     
 83 |     if pnorm == 2:
 84 |       out = tf.sqrt(subsamp_sum)
 85 |     else:
 86 |       out = tf.pow(subsamp_sum, 1/pnorm)
 87 |     
 88 |   return out
 89 | 
 90 | def mpool(inpOp, kH, kW, dH, dW, padding):
 91 |   global pool_counter
 92 |   global parameters
 93 |   name = 'pool' + str(pool_counter)
 94 |   pool_counter += 1
 95 |   with tf.name_scope('maxpool'):
 96 |     maxpool = tf.nn.max_pool(inpOp,
 97 |                    ksize=[1, kH, kW, 1],
 98 |                    strides=[1, dH, dW, 1],
 99 |                    padding=padding,
100 |                    name=name)  
101 |   return maxpool
102 | 
103 | def apool(inpOp, kH, kW, dH, dW, padding):
104 |   global pool_counter
105 |   global parameters
106 |   name = 'pool' + str(pool_counter)
107 |   pool_counter += 1
108 |   return tf.nn.avg_pool(inpOp,
109 |                         ksize=[1, kH, kW, 1],
110 |                         strides=[1, dH, dW, 1],
111 |                         padding=padding,
112 |                         name=name)
113 | 
114 | def batch_norm(x, n_out, phase_train, name, affine=True):
115 |   """
116 |   Batch normalization on convolutional maps.
117 |   Args:
118 |       x:           Tensor, 4D BHWD input maps
119 |       n_out:       integer, depth of input maps
120 |       phase_train: boolean tf.Variable, true indicates training phase
121 |       scope:       string, variable scope
122 |       affine:      whether to affine-transform outputs
123 |   Return:
124 |       normed:      batch-normalized maps
125 |   Ref: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow/33950177
126 |   """
127 |   global parameters
128 | 
129 |   with tf.name_scope(name):
130 | 
131 |     beta = tf.Variable(tf.constant(0.0, shape=[n_out]),
132 |                        name=name+'/beta', trainable=True)
133 |     gamma = tf.Variable(tf.constant(1.0, shape=[n_out]),
134 |                         name=name+'/gamma', trainable=affine)
135 |   
136 |     batch_mean, batch_var = tf.nn.moments(x, [0,1,2], name='moments')
137 |     ema = tf.train.ExponentialMovingAverage(decay=0.9)
138 |     def mean_var_with_update():
139 |       ema_apply_op = ema.apply([batch_mean, batch_var])
140 |       with tf.control_dependencies([ema_apply_op]):
141 |         return tf.identity(batch_mean), tf.identity(batch_var)
142 |     mean, var = control_flow_ops.cond(phase_train,
143 |                                       mean_var_with_update,
144 |                                       lambda: (ema.average(batch_mean), ema.average(batch_var)))
145 |     normed = tf.nn.batch_norm_with_global_normalization(x, mean, var,
146 |                                                         beta, gamma, 1e-3, affine, name=name)
147 |     parameters += [beta, gamma]
148 |   return normed
149 | 
150 | def inception(inp, inSize, ks, o1s, o2s1, o2s2, o3s1, o3s2, o4s1, o4s2, o4s3, poolType, name, phase_train=True, use_batch_norm=True):
151 |   
152 |   print('name = ', name)
153 |   print('inputSize = ', inSize)
154 |   print('kernelSize = {3,5}')
155 |   print('kernelStride = {%d,%d}' % (ks,ks))
156 |   print('outputSize = {%d,%d}' % (o2s2,o3s2))
157 |   print('reduceSize = {%d,%d,%d,%d}' % (o2s1,o3s1,o4s2,o1s))
158 |   print('pooling = {%s, %d, %d, %d, %d}' % (poolType, o4s1, o4s1, o4s3, o4s3))
159 |   if (o4s2>0):
160 |     o4 = o4s2
161 |   else:
162 |     o4 = inSize
163 |   print('outputSize = ', o1s+o2s2+o3s2+o4)
164 |   print()
165 |   
166 |   net = []
167 |   
168 |   with tf.name_scope(name):
169 |     if o1s>0:
170 |       conv1 = conv(inp, inSize, o1s, 1, 1, 1, 1, 'SAME', 'in1_conv1x1', phase_train=phase_train, use_batch_norm=use_batch_norm)
171 |       net.append(conv1)
172 |   
173 |     if o2s1>0:
174 |       conv3a = conv(inp, inSize, o2s1, 1, 1, 1, 1, 'SAME', 'in2_conv1x1', phase_train=phase_train, use_batch_norm=use_batch_norm)
175 |       conv3 = conv(conv3a, o2s1, o2s2, 3, 3, ks, ks, 'SAME', 'in2_conv3x3', phase_train=phase_train, use_batch_norm=use_batch_norm)
176 |       net.append(conv3)
177 |   
178 |     if o3s1>0:
179 |       conv5a = conv(inp, inSize, o3s1, 1, 1, 1, 1, 'SAME', 'in3_conv1x1', phase_train=phase_train, use_batch_norm=use_batch_norm)
180 |       conv5 = conv(conv5a, o3s1, o3s2, 5, 5, ks, ks, 'SAME', 'in3_conv5x5', phase_train=phase_train, use_batch_norm=use_batch_norm)
181 |       net.append(conv5)
182 |   
183 |     if poolType=='MAX':
184 |       pool = mpool(inp, o4s1, o4s1, o4s3, o4s3, 'SAME')
185 |     elif poolType=='L2':
186 |       pool = lppool(inp, 2, o4s1, o4s1, o4s3, o4s3, 'SAME')
187 |     else:
188 |       raise ValueError('Invalid pooling type "%s"' % poolType)
189 |     
190 |     if o4s2>0:
191 |       pool_conv = conv(pool, inSize, o4s2, 1, 1, 1, 1, 'SAME', 'in4_conv1x1', phase_train=phase_train, use_batch_norm=use_batch_norm)
192 |     else:
193 |       pool_conv = pool
194 |     net.append(pool_conv)
195 |   
196 |     incept = array_ops.concat(3, net, name=name)
197 |   return incept
198 | 
199 | def triplet_loss(anchor, positive, negative, alpha):
200 |   """Calculate the triplet loss according to the FaceNet paper
201 |   
202 |   Args:
203 |     anchor: the embeddings for the anchor images.
204 |     positive: the embeddings for the positive images.
205 |     positive: the embeddings for the negative images.
206 | 
207 |   Returns:
208 |     the triplet loss according to the FaceNet paper as a float tensor.
209 |   """
210 |   with tf.name_scope('triplet_loss'):
211 |     pos_dist = tf.reduce_sum(tf.square(tf.sub(anchor, positive)), 1)  # Summing over distances in each batch
212 |     neg_dist = tf.reduce_sum(tf.square(tf.sub(anchor, negative)), 1)
213 |     
214 |     basic_loss = tf.add(tf.sub(pos_dist,neg_dist), alpha)
215 |     loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0, name='tripletloss')
216 |     
217 |   return loss
218 | 
219 | def _add_loss_summaries(total_loss):
220 |   """Add summaries for losses in CIFAR-10 model.
221 | 
222 |   Generates moving average for all losses and associated summaries for
223 |   visualizing the performance of the network.
224 | 
225 |   Args:
226 |     total_loss: Total loss from loss().
227 |   Returns:
228 |     loss_averages_op: op for generating moving averages of losses.
229 |   """
230 |   # Compute the moving average of all individual losses and the total loss.
231 |   loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
232 |   losses = tf.get_collection('losses')
233 |   loss_averages_op = loss_averages.apply(losses + [total_loss])
234 | 
235 |   # Attach a scalar summmary to all individual losses and the total loss; do the
236 |   # same for the averaged version of the losses.
237 |   for l in losses + [total_loss]:
238 |     # Name each loss as '(raw)' and name the moving average version of the loss
239 |     # as the original loss name.
240 |     tf.scalar_summary(l.op.name +' (raw)', l)
241 |     tf.scalar_summary(l.op.name, loss_averages.average(l))
242 | 
243 |   return loss_averages_op
244 | 
245 | def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay):
246 |   """Setup training for the FaceNet model.
247 | 
248 |   Create an optimizer and apply to all trainable variables. Add moving
249 |   average for all trainable variables.
250 | 
251 |   Args:
252 |     total_loss: Total loss from loss().
253 |     global_step: Integer Variable counting the number of training steps
254 |       processed.
255 |   Returns:
256 |     train_op: op for training.
257 |   """
258 |   # Generate moving averages of all losses and associated summaries.
259 |   loss_averages_op = _add_loss_summaries(total_loss)
260 | 
261 |   # Compute gradients.
262 |   with tf.control_dependencies([loss_averages_op]):
263 |     if optimizer=='ADAGRAD':
264 |       opt = tf.train.AdagradOptimizer(learning_rate)
265 |     elif optimizer=='ADADELTA':
266 |       opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6)
267 |     elif optimizer=='ADAM':
268 |       opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8)
269 |     else:
270 |       raise ValueError('Invalid optimization algorithm')
271 | 
272 |     grads = opt.compute_gradients(total_loss)
273 |     
274 |   # Apply gradients.
275 |   apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
276 | 
277 |   # Add histograms for trainable variables.
278 |   for var in tf.trainable_variables():
279 |     tf.histogram_summary(var.op.name, var)
280 | 
281 |   # Add histograms for gradients.
282 |   for grad, var in grads:
283 |     if grad is not None:
284 |       tf.histogram_summary(var.op.name + '/gradients', grad)
285 | 
286 |   # Track the moving averages of all trainable variables.
287 |   variable_averages = tf.train.ExponentialMovingAverage(
288 |       moving_average_decay, global_step)
289 |   variables_averages_op = variable_averages.apply(tf.trainable_variables())
290 | 
291 |   with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
292 |     train_op = tf.no_op(name='train')
293 | 
294 |   return train_op, grads
295 | 
296 | def prewhiten(x):
297 |   mean = np.mean(x)
298 |   std = np.std(x)
299 |   std_adj = np.max(std, 1.0/np.sqrt(x.size))
300 |   y = np.multiply(np.subtract(x, mean), 1/std_adj)
301 |   return y  
302 | 
303 | def crop(image, random_crop, image_size):
304 |   if image.shape[1]>image_size:
305 |     sz1 = image.shape[1]/2
306 |     sz2 = image_size/2
307 |     if random_crop:
308 |       diff = sz1-sz2
309 |       (h, v) = (np.random.randint(-diff, diff+1), np.random.randint(-diff, diff+1))
310 |     else:
311 |       (h, v) = (0,0)
312 |     image = image[(sz1-sz2+v):(sz1+sz2+v),(sz1-sz2+h):(sz1+sz2+h),:]
313 |   return image
314 |   
315 | def flip(image, random_flip):
316 |   if random_flip and np.random.choice([True, False]):
317 |     image = np.fliplr(image)
318 |   return image
319 | 
320 | def to_rgb(img):
321 |   w, h = img.shape
322 |   ret = np.empty((w, h, 3), dtype=np.uint8)
323 |   ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
324 |   return ret
325 |   
326 | def load_data(image_paths, do_random_crop, do_random_flip, image_size, do_prewhiten=True):
327 |   nrof_samples = len(image_paths)
328 |   img_list = [None] * nrof_samples
329 |   for i in xrange(nrof_samples):
330 |     img = misc.imread(image_paths[i])
331 |     if img.ndim == 2:
332 |       img = to_rgb(img)
333 |     if do_prewhiten:
334 |       img = prewhiten(img)
335 |     img = crop(img, do_random_crop, image_size)
336 |     img = flip(img, do_random_flip)
337 |     img_list[i] = img
338 |   images = np.stack(img_list)
339 |   return images
340 | 
341 | def get_batch(image_data, batch_size, batch_index):
342 |   nrof_examples = np.size(image_data, 0)
343 |   j = batch_index*batch_size % nrof_examples
344 |   if j+batch_size<=nrof_examples:
345 |     batch = image_data[j:j+batch_size,:,:,:]
346 |   else:
347 |     x1 = image_data[j:nrof_examples,:,:,:]
348 |     x2 = image_data[0:nrof_examples-j,:,:,:]
349 |     batch = np.vstack([x1,x2])
350 |   batch_float = batch.astype(np.float32)
351 |   return batch_float
352 | 
353 | def get_triplet_batch(triplets, batch_index, batch_size):
354 |   ax, px, nx = triplets
355 |   a = get_batch(ax, int(batch_size/3), batch_index)
356 |   p = get_batch(px, int(batch_size/3), batch_index)
357 |   n = get_batch(nx, int(batch_size/3), batch_index)
358 |   batch = np.vstack([a, p, n])
359 |   return batch
360 | 
361 | def select_training_triplets(embeddings, num_per_class, image_data, people_per_batch, alpha):
362 | 
363 |   def dist(emb1, emb2):
364 |     x = np.square(np.subtract(emb1, emb2))
365 |     return np.sum(x, 0)
366 | 
367 |   nrof_images = image_data.shape[0]
368 |   nrof_triplets = nrof_images - people_per_batch
369 |   shp = [nrof_triplets, image_data.shape[1], image_data.shape[2], image_data.shape[3]]
370 |   as_arr = np.zeros(shp)
371 |   ps_arr = np.zeros(shp)
372 |   ns_arr = np.zeros(shp)
373 |   
374 |   trip_idx = 0
375 |   shuffle = np.arange(nrof_triplets)
376 |   np.random.shuffle(shuffle)
377 |   emb_start_idx = 0
378 |   nrof_random_negs = 0
379 |   for i in xrange(people_per_batch):
380 |     n = num_per_class[i]
381 |     for j in range(1,n):
382 |       a_idx = emb_start_idx
383 |       p_idx = emb_start_idx + j
384 |       as_arr[shuffle[trip_idx]] = image_data[a_idx]
385 |       ps_arr[shuffle[trip_idx]] = image_data[p_idx]
386 | 
387 |       # Select a semi-hard negative that has a distance
388 |       #  further away from the positive exemplar.
389 |       pos_dist = dist(embeddings[a_idx][:], embeddings[p_idx][:])
390 |       sel_neg_idx = emb_start_idx
391 |       while sel_neg_idx>=emb_start_idx and sel_neg_idx<=emb_start_idx+n-1:
392 |         sel_neg_idx = (np.random.randint(1, 2**32) % nrof_images) -1  # Seems to give the same result as the lua implementation
393 |         #sel_neg_idx = np.random.random_integers(0, nrof_images-1)
394 |       sel_neg_dist = dist(embeddings[a_idx][:], embeddings[sel_neg_idx][:])
395 | 
396 |       random_neg = True
397 |       for k in range(nrof_images):
398 |         if k<emb_start_idx or k>emb_start_idx+n-1:
399 |           neg_dist = dist(embeddings[a_idx][:], embeddings[k][:])
400 |           if pos_dist<neg_dist and neg_dist<sel_neg_dist and np.abs(pos_dist-neg_dist)<alpha:
401 |             random_neg = False
402 |             sel_neg_dist = neg_dist
403 |             sel_neg_idx = k
404 |       
405 |       if random_neg:
406 |         nrof_random_negs += 1
407 |         
408 |       ns_arr[shuffle[trip_idx]] = image_data[sel_neg_idx]
409 |       #print('Triplet %d: (%d, %d, %d), pos_dist=%2.3f, neg_dist=%2.3f, sel_neg_dist=%2.3f' % (trip_idx, a_idx, p_idx, sel_neg_idx, pos_dist, neg_dist, sel_neg_dist))
410 |       trip_idx += 1
411 |       
412 |     emb_start_idx += n
413 |   
414 |   triplets = (as_arr, ps_arr, ns_arr)
415 |   
416 |   return triplets, nrof_random_negs, nrof_triplets
417 | 
418 |   
419 | def select_validation_triplets(num_per_class, people_per_batch, image_data, batch_size):
420 |   
421 |   nrof_images = image_data.shape[0]
422 |   nrof_trip = nrof_images - people_per_batch
423 |   shp = [nrof_trip, image_data.shape[1], image_data.shape[2], image_data.shape[3]]
424 |   as_arr = np.zeros(shp)
425 |   ps_arr = np.zeros(shp)
426 |   ns_arr = np.zeros(shp)
427 |   
428 |   trip_idx = 0
429 |   shuffle = np.arange(nrof_trip)
430 |   np.random.shuffle(shuffle)
431 |   emb_start_idx = 0
432 |   for i in xrange(len(num_per_class)):
433 |     n = num_per_class[i]
434 |     for j in range(1,n):
435 |       a_idx = emb_start_idx
436 |       p_idx = emb_start_idx + j
437 |       as_arr[shuffle[trip_idx]] = image_data[a_idx]
438 |       ps_arr[shuffle[trip_idx]] = image_data[p_idx]
439 | 
440 |       # Select a random negative example
441 |       sel_neg_idx = emb_start_idx
442 |       while sel_neg_idx>=emb_start_idx and sel_neg_idx<=emb_start_idx+n-1:
443 |         sel_neg_idx = (np.random.randint(1, 2**32) % nrof_images) -1
444 | 
445 |       ns_arr[shuffle[trip_idx]] = image_data[sel_neg_idx]
446 |       trip_idx += 1
447 |       
448 |     emb_start_idx += n
449 |     
450 |   nrof_triplets = trip_idx // batch_size * batch_size
451 |   triplets = (as_arr[0:nrof_triplets,:,:,:], ps_arr[0:nrof_triplets,:,:,:], ns_arr[0:nrof_triplets,:,:,:])
452 | 
453 |   return triplets, nrof_triplets
454 |   
455 | 
456 | class ImageClass():
457 |   "Stores the paths to images for a given class"
458 |   def __init__(self, name, image_paths):
459 |     self.name = name
460 |     self.image_paths = image_paths
461 | 
462 |   def __str__(self):
463 |     return self.name + ', ' + str(len(self.image_paths)) + ' images'
464 | 
465 |   def __len__(self):
466 |     return len(self.image_paths)
467 |   
468 | def get_dataset(paths):
469 |   dataset = []
470 |   for path in paths.split(':'):
471 |     path_exp = os.path.expanduser(path)
472 |     classes = os.listdir(path_exp)
473 |     classes.sort()
474 |     nrof_classes = len(classes)
475 |     for i in range(nrof_classes):
476 |       class_name = classes[i]
477 |       facedir = os.path.join(path_exp, class_name)
478 |       if os.path.isdir(facedir):
479 |         images = os.listdir(facedir)
480 |         image_paths = map(lambda x: os.path.join(facedir,x), images)
481 |         dataset.append(ImageClass(class_name, image_paths))
482 | 
483 |   return dataset
484 | 
485 | def split_dataset(dataset, split_ratio, mode):
486 |   if mode=='SPLIT_CLASSES':
487 |     nrof_classes = len(dataset)
488 |     class_indices = np.arange(nrof_classes)
489 |     np.random.shuffle(class_indices)
490 |     split = int(round(nrof_classes*split_ratio))
491 |     train_set = [dataset[i] for i in class_indices[0:split]]
492 |     test_set = [dataset[i] for i in class_indices[split:-1]]
493 |   elif mode=='SPLIT_IMAGES':
494 |     train_set = []
495 |     test_set = []
496 |     min_nrof_images = 2
497 |     for cls in dataset:
498 |       paths = cls.image_paths
499 |       np.random.shuffle(paths)
500 |       split = int(round(len(paths)*split_ratio))
501 |       if split<min_nrof_images:
502 |         # If the number of train set images are too few we throw an exception
503 |         raise ValueError('Too few images in train set (%d) for class "%s"' % (split, cls.name))
504 |       if len(paths)-split<min_nrof_images:
505 |         # If the number of test set images are too few we use all images for training
506 |         split = len(paths)
507 |       train_set.append(ImageClass(cls.name, paths[0:split]))
508 |       if split<len(paths):
509 |         test_set.append(ImageClass(cls.name, paths[split:-1]))
510 |   else:
511 |     raise ValueError('Invalid train/test split mode "%s"' % mode)
512 |   return train_set, test_set
513 | 
514 | def sample_people(dataset, people_per_batch, images_per_person):
515 |   nrof_images = people_per_batch * images_per_person
516 | 
517 |   # Sample classes from the dataset
518 |   nrof_classes = len(dataset)
519 |   class_indices = np.arange(nrof_classes)
520 |   np.random.shuffle(class_indices)
521 |   
522 |   i = 0
523 |   image_paths = []
524 |   num_per_class = []
525 |   sampled_class_indices = []
526 |   # Sample images from these classes until we have enough
527 |   while len(image_paths)<nrof_images:
528 |     class_index = class_indices[i]
529 |     nrof_images_in_class = len(dataset[class_index])
530 |     image_indices = np.arange(nrof_images_in_class)
531 |     np.random.shuffle(image_indices)
532 |     nrof_images_from_class = min(nrof_images_in_class, images_per_person, nrof_images-len(image_paths))
533 |     idx = image_indices[0:nrof_images_from_class]
534 |     image_paths_for_class = [dataset[class_index].image_paths[j] for j in idx]
535 |     sampled_class_indices += [class_index]*nrof_images_from_class
536 |     image_paths += image_paths_for_class
537 |     num_per_class.append(nrof_images_from_class)
538 |     i+=1
539 | 
540 |   return image_paths, num_per_class
541 | 
542 | def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, seed):
543 |   assert(embeddings1.shape[0] == embeddings2.shape[0])
544 |   assert(embeddings1.shape[1] == embeddings2.shape[1])
545 |   nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
546 |   nrof_thresholds = len(thresholds)
547 |   nrof_folds = 10
548 |   folds = KFold(n=nrof_pairs, n_folds=nrof_folds, shuffle=True, random_state=seed)
549 |   
550 |   tprs = np.zeros((nrof_folds,nrof_thresholds))
551 |   fprs = np.zeros((nrof_folds,nrof_thresholds))
552 |   accuracy = np.zeros((nrof_folds))
553 |   
554 |   diff = np.subtract(embeddings1, embeddings2)
555 |   dist = np.sum(np.square(diff),1)
556 |   
557 |   for fold_idx, (train, test) in enumerate(folds):
558 |     
559 |     # Find the best threshold for the fold
560 |     acc_train = np.zeros((nrof_thresholds))
561 |     for threshold_idx, threshold in enumerate(thresholds):
562 |       _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train], actual_issame[train])
563 |     best_threshold_index = np.argmax(acc_train)
564 |     for threshold_idx, threshold in enumerate(thresholds):
565 |       tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test], actual_issame[test])
566 |     _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test], actual_issame[test])
567 |       
568 |     tpr = np.mean(tprs,0)
569 |     fpr = np.mean(fprs,0)
570 |   return tpr, fpr, accuracy
571 | 
572 | def calculate_accuracy(threshold, dist, actual_issame):
573 |   predict_issame = np.less(dist, threshold)
574 |   tp = np.sum(np.logical_and(predict_issame, actual_issame))
575 |   fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
576 |   tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
577 |   fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
578 | 
579 |   tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn)
580 |   fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn)
581 |   acc = float(tp+tn)/dist.size
582 |   return tpr, fpr, acc
583 | 
584 | def plot_roc(fpr, tpr, label):
585 |   plt.plot(fpr, tpr, label=label)
586 |   plt.title('Receiver Operating Characteristics')
587 |   plt.xlabel('False Positive Rate')
588 |   plt.ylabel('True Positive Rate')
589 |   plt.legend()
590 |   plt.plot([0, 1], [0, 1], 'g--')
591 |   plt.grid(True)
592 |   plt.show()
593 |   
594 | 


--------------------------------------------------------------------------------
/facenet.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shanren7/real_time_face_recognition/6e6764607c92f9fa852ced0e1faa9561ef024857/facenet.pyc


--------------------------------------------------------------------------------
/images/real time face detection and recognition.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shanren7/real_time_face_recognition/6e6764607c92f9fa852ced0e1faa9561ef024857/images/real time face detection and recognition.jpg


--------------------------------------------------------------------------------
/images/video_guai_20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shanren7/real_time_face_recognition/6e6764607c92f9fa852ced0e1faa9561ef024857/images/video_guai_20.jpg


--------------------------------------------------------------------------------
/images/video_guai_2192.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shanren7/real_time_face_recognition/6e6764607c92f9fa852ced0e1faa9561ef024857/images/video_guai_2192.jpg


--------------------------------------------------------------------------------
/knn_classifier.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shanren7/real_time_face_recognition/6e6764607c92f9fa852ced0e1faa9561ef024857/knn_classifier.model


--------------------------------------------------------------------------------
/model_check_point/det1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shanren7/real_time_face_recognition/6e6764607c92f9fa852ced0e1faa9561ef024857/model_check_point/det1.npy


--------------------------------------------------------------------------------
/model_check_point/det2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shanren7/real_time_face_recognition/6e6764607c92f9fa852ced0e1faa9561ef024857/model_check_point/det2.npy


--------------------------------------------------------------------------------
/model_check_point/det3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shanren7/real_time_face_recognition/6e6764607c92f9fa852ced0e1faa9561ef024857/model_check_point/det3.npy


--------------------------------------------------------------------------------
/model_check_point/knn_classifier.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shanren7/real_time_face_recognition/6e6764607c92f9fa852ced0e1faa9561ef024857/model_check_point/knn_classifier.model


--------------------------------------------------------------------------------
/nn4.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import facenet
 3 | 
 4 | def inference(images, pool_type, use_lrn, keep_probability, phase_train=True):
 5 |   """ Define an inference network for face recognition based 
 6 |          on inception modules using batch normalization
 7 |   
 8 |   Args:
 9 |     images: The images to run inference on, dimensions batch_size x height x width x channels
10 |     phase_train: True if batch normalization should operate in training mode
11 |   """
12 |   conv1 = facenet.conv(images, 3, 64, 7, 7, 2, 2, 'SAME', 'conv1_7x7', phase_train=phase_train, use_batch_norm=True)
13 |   pool1 = facenet.mpool(conv1,  3, 3, 2, 2, 'SAME')
14 |   if use_lrn:
15 |     lrn1 = tf.nn.local_response_normalization(pool1, depth_radius=5, bias=1.0, alpha=1e-4, beta=0.75)
16 |   else:
17 |     lrn1 = pool1
18 |   conv2 = facenet.conv(lrn1,  64, 64, 1, 1, 1, 1, 'SAME', 'conv2_1x1', phase_train=phase_train, use_batch_norm=True)
19 |   conv3 = facenet.conv(conv2,  64, 192, 3, 3, 1, 1, 'SAME', 'conv3_3x3', phase_train=phase_train, use_batch_norm=True)
20 |   if use_lrn:
21 |     lrn2 = tf.nn.local_response_normalization(conv3, depth_radius=5, bias=1.0, alpha=1e-4, beta=0.75)
22 |   else:
23 |     lrn2 = conv3
24 |   pool3 = facenet.mpool(lrn2,  3, 3, 2, 2, 'SAME')
25 | 
26 |   incept3a = facenet.inception(pool3,    192, 1, 64, 96, 128, 16, 32, 3, 32, 1, 'MAX', 'incept3a', phase_train=phase_train, use_batch_norm=True)
27 |   incept3b = facenet.inception(incept3a, 256, 1, 64, 96, 128, 32, 64, 3, 64, 1, pool_type, 'incept3b', phase_train=phase_train, use_batch_norm=True)
28 |   incept3c = facenet.inception(incept3b, 320, 2, 0, 128, 256, 32, 64, 3, 0, 2, 'MAX', 'incept3c', phase_train=phase_train, use_batch_norm=True)
29 |   
30 |   incept4a = facenet.inception(incept3c, 640, 1, 256, 96, 192, 32, 64, 3, 128, 1, pool_type, 'incept4a', phase_train=phase_train, use_batch_norm=True)
31 |   incept4b = facenet.inception(incept4a, 640, 1, 224, 112, 224, 32, 64, 3, 128, 1, pool_type, 'incept4b', phase_train=phase_train, use_batch_norm=True)
32 |   incept4c = facenet.inception(incept4b, 640, 1, 192, 128, 256, 32, 64, 3, 128, 1, pool_type, 'incept4c', phase_train=phase_train, use_batch_norm=True)
33 |   incept4d = facenet.inception(incept4c, 640, 1, 160, 144, 288, 32, 64, 3, 128, 1, pool_type, 'incept4d', phase_train=phase_train, use_batch_norm=True)
34 |   incept4e = facenet.inception(incept4d, 640, 2, 0, 160, 256, 64, 128, 3, 0, 2, 'MAX', 'incept4e', phase_train=phase_train, use_batch_norm=True)
35 |   
36 |   incept5a = facenet.inception(incept4e,    1024, 1, 384, 192, 384, 0, 0, 3, 128, 1, pool_type, 'incept5a', phase_train=phase_train, use_batch_norm=True)
37 |   incept5b = facenet.inception(incept5a, 896, 1, 384, 192, 384, 0, 0, 3, 128, 1, 'MAX', 'incept5b', phase_train=phase_train, use_batch_norm=True)
38 |   pool6 = facenet.apool(incept5b,  3, 3, 1, 1, 'VALID')
39 | 
40 |   resh1 = tf.reshape(pool6, [-1, 896])
41 |   affn1 = facenet.affine(resh1, 896, 128)
42 |   dropout = tf.nn.dropout(affn1, keep_probability)
43 |   norm = tf.nn.l2_normalize(dropout, 1, 1e-10, name='embeddings')
44 | 
45 |   return norm
46 | 


--------------------------------------------------------------------------------
/nn4.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shanren7/real_time_face_recognition/6e6764607c92f9fa852ced0e1faa9561ef024857/nn4.pyc


--------------------------------------------------------------------------------
/real time face detection and  recognition.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# This script obtaining frames from camera,using mtcnn detecting faces,croping and embedding faces with pre-trained facenet and finally face recogition with pre-trained classifier.\n",
  8 |     "\n"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "metadata": {
 15 |     "collapsed": false
 16 |    },
 17 |    "outputs": [
 18 |     {
 19 |      "name": "stderr",
 20 |      "output_type": "stream",
 21 |      "text": [
 22 |       "/usr/local/lib/python2.7/dist-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.\n",
 23 |       "  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')\n"
 24 |      ]
 25 |     },
 26 |     {
 27 |      "name": "stdout",
 28 |      "output_type": "stream",
 29 |      "text": [
 30 |       "Populating the interactive namespace from numpy and matplotlib\n"
 31 |      ]
 32 |     },
 33 |     {
 34 |      "name": "stderr",
 35 |      "output_type": "stream",
 36 |      "text": [
 37 |       "/usr/local/lib/python2.7/dist-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
 38 |       "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
 39 |      ]
 40 |     }
 41 |    ],
 42 |    "source": [
 43 |     "\n",
 44 |     "import tensorflow as tf\n",
 45 |     "import numpy as np\n",
 46 |     "import cv2\n",
 47 |     "import matplotlib.pyplot as plt\n",
 48 |     "%pylab inline\n",
 49 |     "import os\n",
 50 |     "from os.path import join as pjoin\n",
 51 |     "import sys\n",
 52 |     "import copy\n",
 53 |     "import detect_face\n",
 54 |     "import nn4 as network\n",
 55 |     "import random\n",
 56 |     "\n",
 57 |     "\n",
 58 |     "import sklearn\n",
 59 |     "\n",
 60 |     "from sklearn.externals import joblib"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 2,
 66 |    "metadata": {
 67 |     "collapsed": true
 68 |    },
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "#face detection parameters\n",
 72 |     "minsize = 20 # minimum size of face\n",
 73 |     "threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold\n",
 74 |     "factor = 0.709 # scale factor\n",
 75 |     "\n",
 76 |     "#facenet embedding parameters\n",
 77 |     "\n",
 78 |     "model_dir='./model_check_point/model.ckpt-500000'#\"Directory containing the graph definition and checkpoint files.\")\n",
 79 |     "model_def= 'models.nn4'  # \"Points to a module containing the definition of the inference graph.\")\n",
 80 |     "image_size=96 #\"Image size (height, width) in pixels.\"\n",
 81 |     "pool_type='MAX' #\"The type of pooling to use for some of the inception layers {'MAX', 'L2'}.\n",
 82 |     "use_lrn=False #\"Enables Local Response Normalization after the first layers of the inception network.\"\n",
 83 |     "seed=42,# \"Random seed.\"\n",
 84 |     "batch_size= None # \"Number of images to process in a batch.\"\n",
 85 |     "\n",
 86 |     "\n",
 87 |     "\n",
 88 |     "\n",
 89 |     "frame_interval=3 # frame intervals  "
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 3,
 95 |    "metadata": {
 96 |     "collapsed": true
 97 |    },
 98 |    "outputs": [],
 99 |    "source": [
100 |     "def to_rgb(img):\n",
101 |     "  w, h = img.shape\n",
102 |     "  ret = np.empty((w, h, 3), dtype=np.uint8)\n",
103 |     "  ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img\n",
104 |     "  return ret"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 4,
110 |    "metadata": {
111 |     "collapsed": false
112 |    },
113 |    "outputs": [
114 |     {
115 |      "name": "stdout",
116 |      "output_type": "stream",
117 |      "text": [
118 |       "Creating networks and loading parameters\n"
119 |      ]
120 |     }
121 |    ],
122 |    "source": [
123 |     "#restore mtcnn model\n",
124 |     "\n",
125 |     "print('Creating networks and loading parameters')\n",
126 |     "gpu_memory_fraction=1.0\n",
127 |     "with tf.Graph().as_default():\n",
128 |     "    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction)\n",
129 |     "    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))\n",
130 |     "    with sess.as_default():\n",
131 |     "        pnet, rnet, onet = detect_face.create_mtcnn(sess, './model_check_point/')\n",
132 |     "        \n",
133 |     "      "
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": 5,
139 |    "metadata": {
140 |     "collapsed": false
141 |    },
142 |    "outputs": [
143 |     {
144 |      "name": "stdout",
145 |      "output_type": "stream",
146 |      "text": [
147 |       "建立facenet embedding模型\n",
148 |       "name =  incept3a\n",
149 |       "inputSize =  192\n",
150 |       "kernelSize = {3,5}\n",
151 |       "kernelStride = {1,1}\n",
152 |       "outputSize = {128,32}\n",
153 |       "reduceSize = {96,16,32,64}\n",
154 |       "pooling = {MAX, 3, 3, 1, 1}\n",
155 |       "outputSize =  256\n",
156 |       "\n",
157 |       "name =  incept3b\n",
158 |       "inputSize =  256\n",
159 |       "kernelSize = {3,5}\n",
160 |       "kernelStride = {1,1}\n",
161 |       "outputSize = {128,64}\n",
162 |       "reduceSize = {96,32,64,64}\n",
163 |       "pooling = {MAX, 3, 3, 1, 1}\n",
164 |       "outputSize =  320\n",
165 |       "\n",
166 |       "name =  incept3c\n",
167 |       "inputSize =  320\n",
168 |       "kernelSize = {3,5}\n",
169 |       "kernelStride = {2,2}\n",
170 |       "outputSize = {256,64}\n",
171 |       "reduceSize = {128,32,0,0}\n",
172 |       "pooling = {MAX, 3, 3, 2, 2}\n",
173 |       "outputSize =  640\n",
174 |       "\n",
175 |       "name =  incept4a\n",
176 |       "inputSize =  640\n",
177 |       "kernelSize = {3,5}\n",
178 |       "kernelStride = {1,1}\n",
179 |       "outputSize = {192,64}\n",
180 |       "reduceSize = {96,32,128,256}\n",
181 |       "pooling = {MAX, 3, 3, 1, 1}\n",
182 |       "outputSize =  640\n",
183 |       "\n",
184 |       "name =  incept4b\n",
185 |       "inputSize =  640\n",
186 |       "kernelSize = {3,5}\n",
187 |       "kernelStride = {1,1}\n",
188 |       "outputSize = {224,64}\n",
189 |       "reduceSize = {112,32,128,224}\n",
190 |       "pooling = {MAX, 3, 3, 1, 1}\n",
191 |       "outputSize =  640\n",
192 |       "\n",
193 |       "name =  incept4c\n",
194 |       "inputSize =  640\n",
195 |       "kernelSize = {3,5}\n",
196 |       "kernelStride = {1,1}\n",
197 |       "outputSize = {256,64}\n",
198 |       "reduceSize = {128,32,128,192}\n",
199 |       "pooling = {MAX, 3, 3, 1, 1}\n",
200 |       "outputSize =  640\n",
201 |       "\n",
202 |       "name =  incept4d\n",
203 |       "inputSize =  640\n",
204 |       "kernelSize = {3,5}\n",
205 |       "kernelStride = {1,1}\n",
206 |       "outputSize = {288,64}\n",
207 |       "reduceSize = {144,32,128,160}\n",
208 |       "pooling = {MAX, 3, 3, 1, 1}\n",
209 |       "outputSize =  640\n",
210 |       "\n",
211 |       "name =  incept4e\n",
212 |       "inputSize =  640\n",
213 |       "kernelSize = {3,5}\n",
214 |       "kernelStride = {2,2}\n",
215 |       "outputSize = {256,128}\n",
216 |       "reduceSize = {160,64,0,0}\n",
217 |       "pooling = {MAX, 3, 3, 2, 2}\n",
218 |       "outputSize =  1024\n",
219 |       "\n",
220 |       "name =  incept5a\n",
221 |       "inputSize =  1024\n",
222 |       "kernelSize = {3,5}\n",
223 |       "kernelStride = {1,1}\n",
224 |       "outputSize = {384,0}\n",
225 |       "reduceSize = {192,0,128,384}\n",
226 |       "pooling = {MAX, 3, 3, 1, 1}\n",
227 |       "outputSize =  896\n",
228 |       "\n",
229 |       "name =  incept5b\n",
230 |       "inputSize =  896\n",
231 |       "kernelSize = {3,5}\n",
232 |       "kernelStride = {1,1}\n",
233 |       "outputSize = {384,0}\n",
234 |       "reduceSize = {192,0,128,384}\n",
235 |       "pooling = {MAX, 3, 3, 1, 1}\n",
236 |       "outputSize =  896\n",
237 |       "\n",
238 |       "facenet embedding模型建立完毕\n"
239 |      ]
240 |     }
241 |    ],
242 |    "source": [
243 |     "#restore facenet model\n",
244 |     "print('建立facenet embedding模型')\n",
245 |     "tf.Graph().as_default()\n",
246 |     "sess = tf.Session()\n",
247 |     "images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, \n",
248 |     "                                                       image_size, \n",
249 |     "                                                       image_size, 3), name='input')\n",
250 |     "\n",
251 |     "phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')\n",
252 |     "\n",
253 |     "\n",
254 |     "\n",
255 |     "embeddings = network.inference(images_placeholder, pool_type, \n",
256 |     "                               use_lrn, \n",
257 |     "                               1.0, \n",
258 |     "                               phase_train=phase_train_placeholder)\n",
259 |     "\n",
260 |     "\n",
261 |     "\n",
262 |     "ema = tf.train.ExponentialMovingAverage(1.0)\n",
263 |     "saver = tf.train.Saver(ema.variables_to_restore())\n",
264 |     "#ckpt = tf.train.get_checkpoint_state(os.path.expanduser(model_dir))\n",
265 |     "#saver.restore(sess, ckpt.model_checkpoint_path)\n",
266 |     "\n",
267 |     "model_checkpoint_path='./model_check_point/model-20160506.ckpt-500000'\n",
268 |     "#ckpt = tf.train.get_checkpoint_state(os.path.expanduser(model_dir))\n",
269 |     "#model_checkpoint_path='model-20160506.ckpt-500000'\n",
270 |     "\n",
271 |     "\n",
272 |     "#saver.restore(sess, ckpt.model_checkpoint_path)\n",
273 |     "saver.restore(sess, model_checkpoint_path)\n",
274 |     "print('facenet embedding模型建立完毕')"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "code",
279 |    "execution_count": 6,
280 |    "metadata": {
281 |     "collapsed": false
282 |    },
283 |    "outputs": [],
284 |    "source": [
285 |     "#restore pre-trained knn classifier\n",
286 |     "model = joblib.load('./model_check_point/knn_classifier.model')"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "markdown",
291 |    "metadata": {},
292 |    "source": [
293 |     "# real time face detection and recognition"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": 7,
299 |    "metadata": {
300 |     "collapsed": false
301 |    },
302 |    "outputs": [],
303 |    "source": [
304 |     "#obtaining frames from camera--->converting to gray--->converting to rgb\n",
305 |     "#--->detecting faces---->croping faces--->embedding--->classifying--->print\n",
306 |     "\n",
307 |     "\n",
308 |     "video_capture = cv2.VideoCapture(0)\n",
309 |     "c=0\n",
310 |     " \n",
311 |     "while True:\n",
312 |     "    # Capture frame-by-frame\n",
313 |     "\n",
314 |     "    ret, frame = video_capture.read()\n",
315 |     "    #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)\n",
316 |     "    #print(frame.shape)\n",
317 |     "    \n",
318 |     "    timeF = frame_interval\n",
319 |     "    \n",
320 |     "    \n",
321 |     "    if(c%timeF == 0): #frame_interval==3, face detection every 3 frames\n",
322 |     "        \n",
323 |     "        find_results=[]\n",
324 |     "        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)\n",
325 |     "        \n",
326 |     "        \n",
327 |     "        if gray.ndim == 2:\n",
328 |     "            img = to_rgb(gray)\n",
329 |     "        \n",
330 |     "            \n",
331 |     "\n",
332 |     "        bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)\n",
333 |     "\n",
334 |     "        \n",
335 |     "        \n",
336 |     "        nrof_faces = bounding_boxes.shape[0]#number of faces\n",
337 |     "        #print('找到人脸数目为：{}'.format(nrof_faces))\n",
338 |     "        \n",
339 |     "\n",
340 |     "        for face_position in bounding_boxes:\n",
341 |     "            \n",
342 |     "            face_position=face_position.astype(int)\n",
343 |     "            \n",
344 |     "            #print((int(face_position[0]), int( face_position[1])))\n",
345 |     "            #word_position.append((int(face_position[0]), int( face_position[1])))\n",
346 |     "           \n",
347 |     "            cv2.rectangle(frame, (face_position[0], \n",
348 |     "                            face_position[1]), \n",
349 |     "                      (face_position[2], face_position[3]), \n",
350 |     "                      (0, 255, 0), 2)\n",
351 |     "            \n",
352 |     "            crop=img[face_position[1]:face_position[3],face_position[0]:face_position[2],]\n",
353 |     "    \n",
354 |     "            crop = cv2.resize(crop, (96, 96), interpolation=cv2.INTER_CUBIC )\n",
355 |     "        \n",
356 |     "            data=crop.reshape(-1,96,96,3)\n",
357 |     "        \n",
358 |     "            emb_data = sess.run([embeddings], \n",
359 |     "                                feed_dict={images_placeholder: np.array(data), \n",
360 |     "                                           phase_train_placeholder: False })[0]\n",
361 |     "            \n",
362 |     "            predict = model.predict(emb_data) \n",
363 |     "         \n",
364 |     "       \n",
365 |     "            if predict==1:\n",
366 |     "                find_results.append('me')\n",
367 |     "            elif predict==2:\n",
368 |     "                find_results.append('others')\n",
369 |     "\n",
370 |     "        \n",
371 |     "    \n",
372 |     " \n",
373 |     "        cv2.putText(frame,'detected:{}'.format(find_results), (50,100), \n",
374 |     "                cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (255, 0 ,0), \n",
375 |     "                thickness = 2, lineType = 2)\n",
376 |     "  \n",
377 |     "            \n",
378 |     "    #print(faces)\n",
379 |     "    c+=1\n",
380 |     "    # Draw a rectangle around the faces\n",
381 |     "    \n",
382 |     "\n",
383 |     "\n",
384 |     "    # Display the resulting frame\n",
385 |     "\n",
386 |     "    cv2.imshow('Video', frame)\n",
387 |     "\n",
388 |     "    if cv2.waitKey(1) & 0xFF == ord('q'):\n",
389 |     "        break\n",
390 |     "    \n",
391 |     "\n",
392 |     "\n",
393 |     "# When everything is done, release the capture\n",
394 |     "\n",
395 |     "video_capture.release()\n",
396 |     "cv2.destroyAllWindows()\n"
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "code",
401 |    "execution_count": null,
402 |    "metadata": {
403 |     "collapsed": true
404 |    },
405 |    "outputs": [],
406 |    "source": []
407 |   },
408 |   {
409 |    "cell_type": "code",
410 |    "execution_count": null,
411 |    "metadata": {
412 |     "collapsed": true
413 |    },
414 |    "outputs": [],
415 |    "source": []
416 |   }
417 |  ],
418 |  "metadata": {
419 |   "kernelspec": {
420 |    "display_name": "Python 2",
421 |    "language": "python",
422 |    "name": "python2"
423 |   },
424 |   "language_info": {
425 |    "codemirror_mode": {
426 |     "name": "ipython",
427 |     "version": 2
428 |    },
429 |    "file_extension": ".py",
430 |    "mimetype": "text/x-python",
431 |    "name": "python",
432 |    "nbconvert_exporter": "python",
433 |    "pygments_lexer": "ipython2",
434 |    "version": "2.7.12"
435 |   }
436 |  },
437 |  "nbformat": 4,
438 |  "nbformat_minor": 0
439 | }
440 | 


--------------------------------------------------------------------------------
/save_video frame.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# This script obtaining frames from camera and save as jpg."
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": null,
13 |    "metadata": {
14 |     "collapsed": true
15 |    },
16 |    "outputs": [],
17 |    "source": [
18 |     "\n",
19 |     "import cv2\n",
20 |     "video_capture = cv2.VideoCapture(0)\n",
21 |     "c=0\n",
22 |     "while True:\n",
23 |     "    # Capture frame-by-frame\n",
24 |     "\n",
25 |     "    ret, frame = video_capture.read()\n",
26 |     "\n",
27 |     "    \n",
28 |     "    timeF = 10\n",
29 |     "    \n",
30 |     "    if(c%timeF == 0): #save as jpg every 10 frame  \n",
31 |     "         cv2.imwrite('~/train_dir/me'+str(c) + '.jpg',frame) #save as jpg\n",
32 |     "\n",
33 |     "    c+=1\n",
34 |     "   \n",
35 |     "\n",
36 |     "    if cv2.waitKey(1) & 0xFF == ord('q'):\n",
37 |     "        break\n",
38 |     "\n",
39 |     "# When everything is done, release the capture\n",
40 |     "\n",
41 |     "video_capture.release()\n",
42 |     "cv2.destroyAllWindows()"
43 |    ]
44 |   }
45 |  ],
46 |  "metadata": {
47 |   "kernelspec": {
48 |    "display_name": "Python 2",
49 |    "language": "python",
50 |    "name": "python2"
51 |   },
52 |   "language_info": {
53 |    "codemirror_mode": {
54 |     "name": "ipython",
55 |     "version": 2
56 |    },
57 |    "file_extension": ".py",
58 |    "mimetype": "text/x-python",
59 |    "name": "python",
60 |    "nbconvert_exporter": "python",
61 |    "pygments_lexer": "ipython2",
62 |    "version": "2.7.12"
63 |   }
64 |  },
65 |  "nbformat": 4,
66 |  "nbformat_minor": 0
67 | }
68 | 


--------------------------------------------------------------------------------
/train your classifier.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# This script processing images and training your own  face classifier."
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stderr",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "/usr/local/lib/python2.7/dist-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.\n",
 22 |       "  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')\n"
 23 |      ]
 24 |     },
 25 |     {
 26 |      "name": "stdout",
 27 |      "output_type": "stream",
 28 |      "text": [
 29 |       "Populating the interactive namespace from numpy and matplotlib\n"
 30 |      ]
 31 |     },
 32 |     {
 33 |      "name": "stderr",
 34 |      "output_type": "stream",
 35 |      "text": [
 36 |       "/usr/local/lib/python2.7/dist-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
 37 |       "  \"This module will be removed in 0.20.\", DeprecationWarning)\n",
 38 |       "/usr/local/lib/python2.7/dist-packages/IPython/core/magics/pylab.py:161: UserWarning: pylab import has clobbered these variables: ['copy']\n",
 39 |       "`%matplotlib` prevents importing * from pylab and numpy\n",
 40 |       "  \"\\n`%matplotlib` prevents importing * from pylab and numpy\"\n"
 41 |      ]
 42 |     }
 43 |    ],
 44 |    "source": [
 45 |     "\n",
 46 |     "import tensorflow as tf\n",
 47 |     "import numpy as np\n",
 48 |     "import cv2\n",
 49 |     "\n",
 50 |     "import os\n",
 51 |     "from os.path import join as pjoin\n",
 52 |     "import sys\n",
 53 |     "import copy\n",
 54 |     "import detect_face\n",
 55 |     "import nn4 as network\n",
 56 |     "import matplotlib.pyplot as plt\n",
 57 |     "\n",
 58 |     "\n",
 59 |     "import sklearn\n",
 60 |     "from sklearn.preprocessing import StandardScaler\n",
 61 |     "from sklearn.model_selection import train_test_split\n",
 62 |     "from sklearn import metrics  \n",
 63 |     "from sklearn.externals import joblib\n",
 64 |     "\n",
 65 |     "%pylab inline\n"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 2,
 71 |    "metadata": {
 72 |     "collapsed": true
 73 |    },
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "#face detection parameters\n",
 77 |     "minsize = 20 # minimum size of face\n",
 78 |     "threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold\n",
 79 |     "factor = 0.709 # scale factor\n",
 80 |     "\n",
 81 |     "#facenet embedding parameters\n",
 82 |     "\n",
 83 |     "model_dir='./model_check_point/model.ckpt-500000'#\"Directory containing the graph definition and checkpoint files.\")\n",
 84 |     "model_def= 'models.nn4'  # \"Points to a module containing the definition of the inference graph.\")\n",
 85 |     "image_size=96 #\"Image size (height, width) in pixels.\"\n",
 86 |     "pool_type='MAX' #\"The type of pooling to use for some of the inception layers {'MAX', 'L2'}.\n",
 87 |     "use_lrn=False #\"Enables Local Response Normalization after the first layers of the inception network.\"\n",
 88 |     "seed=42,# \"Random seed.\"\n",
 89 |     "batch_size= None # \"Number of images to process in a batch.\"\n",
 90 |     "\n"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 6,
 96 |    "metadata": {
 97 |     "collapsed": true
 98 |    },
 99 |    "outputs": [],
100 |    "source": []
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 3,
105 |    "metadata": {
106 |     "collapsed": false
107 |    },
108 |    "outputs": [
109 |     {
110 |      "name": "stdout",
111 |      "output_type": "stream",
112 |      "text": [
113 |       "Creating networks and loading parameters\n"
114 |      ]
115 |     }
116 |    ],
117 |    "source": [
118 |     "#建立人脸检测模型，加载参数\n",
119 |     "print('Creating networks and loading parameters')\n",
120 |     "gpu_memory_fraction=1.0\n",
121 |     "with tf.Graph().as_default():\n",
122 |     "    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction)\n",
123 |     "    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))\n",
124 |     "    with sess.as_default():\n",
125 |     "        pnet, rnet, onet = detect_face.create_mtcnn(sess, './model_check_point/')\n",
126 |     "        \n",
127 |     "      "
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 4,
133 |    "metadata": {
134 |     "collapsed": false
135 |    },
136 |    "outputs": [
137 |     {
138 |      "name": "stdout",
139 |      "output_type": "stream",
140 |      "text": [
141 |       "建立facenet embedding模型\n",
142 |       "name =  incept3a\n",
143 |       "inputSize =  192\n",
144 |       "kernelSize = {3,5}\n",
145 |       "kernelStride = {1,1}\n",
146 |       "outputSize = {128,32}\n",
147 |       "reduceSize = {96,16,32,64}\n",
148 |       "pooling = {MAX, 3, 3, 1, 1}\n",
149 |       "outputSize =  256\n",
150 |       "\n",
151 |       "name =  incept3b\n",
152 |       "inputSize =  256\n",
153 |       "kernelSize = {3,5}\n",
154 |       "kernelStride = {1,1}\n",
155 |       "outputSize = {128,64}\n",
156 |       "reduceSize = {96,32,64,64}\n",
157 |       "pooling = {MAX, 3, 3, 1, 1}\n",
158 |       "outputSize =  320\n",
159 |       "\n",
160 |       "name =  incept3c\n",
161 |       "inputSize =  320\n",
162 |       "kernelSize = {3,5}\n",
163 |       "kernelStride = {2,2}\n",
164 |       "outputSize = {256,64}\n",
165 |       "reduceSize = {128,32,0,0}\n",
166 |       "pooling = {MAX, 3, 3, 2, 2}\n",
167 |       "outputSize =  640\n",
168 |       "\n",
169 |       "name =  incept4a\n",
170 |       "inputSize =  640\n",
171 |       "kernelSize = {3,5}\n",
172 |       "kernelStride = {1,1}\n",
173 |       "outputSize = {192,64}\n",
174 |       "reduceSize = {96,32,128,256}\n",
175 |       "pooling = {MAX, 3, 3, 1, 1}\n",
176 |       "outputSize =  640\n",
177 |       "\n",
178 |       "name =  incept4b\n",
179 |       "inputSize =  640\n",
180 |       "kernelSize = {3,5}\n",
181 |       "kernelStride = {1,1}\n",
182 |       "outputSize = {224,64}\n",
183 |       "reduceSize = {112,32,128,224}\n",
184 |       "pooling = {MAX, 3, 3, 1, 1}\n",
185 |       "outputSize =  640\n",
186 |       "\n",
187 |       "name =  incept4c\n",
188 |       "inputSize =  640\n",
189 |       "kernelSize = {3,5}\n",
190 |       "kernelStride = {1,1}\n",
191 |       "outputSize = {256,64}\n",
192 |       "reduceSize = {128,32,128,192}\n",
193 |       "pooling = {MAX, 3, 3, 1, 1}\n",
194 |       "outputSize =  640\n",
195 |       "\n",
196 |       "name =  incept4d\n",
197 |       "inputSize =  640\n",
198 |       "kernelSize = {3,5}\n",
199 |       "kernelStride = {1,1}\n",
200 |       "outputSize = {288,64}\n",
201 |       "reduceSize = {144,32,128,160}\n",
202 |       "pooling = {MAX, 3, 3, 1, 1}\n",
203 |       "outputSize =  640\n",
204 |       "\n",
205 |       "name =  incept4e\n",
206 |       "inputSize =  640\n",
207 |       "kernelSize = {3,5}\n",
208 |       "kernelStride = {2,2}\n",
209 |       "outputSize = {256,128}\n",
210 |       "reduceSize = {160,64,0,0}\n",
211 |       "pooling = {MAX, 3, 3, 2, 2}\n",
212 |       "outputSize =  1024\n",
213 |       "\n",
214 |       "name =  incept5a\n",
215 |       "inputSize =  1024\n",
216 |       "kernelSize = {3,5}\n",
217 |       "kernelStride = {1,1}\n",
218 |       "outputSize = {384,0}\n",
219 |       "reduceSize = {192,0,128,384}\n",
220 |       "pooling = {MAX, 3, 3, 1, 1}\n",
221 |       "outputSize =  896\n",
222 |       "\n",
223 |       "name =  incept5b\n",
224 |       "inputSize =  896\n",
225 |       "kernelSize = {3,5}\n",
226 |       "kernelStride = {1,1}\n",
227 |       "outputSize = {384,0}\n",
228 |       "reduceSize = {192,0,128,384}\n",
229 |       "pooling = {MAX, 3, 3, 1, 1}\n",
230 |       "outputSize =  896\n",
231 |       "\n",
232 |       "facenet embedding模型建立完毕\n"
233 |      ]
234 |     }
235 |    ],
236 |    "source": [
237 |     "#建立facenet embedding模型\n",
238 |     "print('建立facenet embedding模型')\n",
239 |     "tf.Graph().as_default()\n",
240 |     "sess = tf.Session()\n",
241 |     "images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, \n",
242 |     "                                                       image_size, \n",
243 |     "                                                       image_size, 3), name='input')\n",
244 |     "\n",
245 |     "phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')\n",
246 |     "\n",
247 |     "\n",
248 |     "\n",
249 |     "embeddings = network.inference(images_placeholder, pool_type, \n",
250 |     "                               use_lrn, \n",
251 |     "                               1.0, \n",
252 |     "                               phase_train=phase_train_placeholder)\n",
253 |     "\n",
254 |     "\n",
255 |     "\n",
256 |     "ema = tf.train.ExponentialMovingAverage(1.0)\n",
257 |     "saver = tf.train.Saver(ema.variables_to_restore())\n",
258 |     "\n",
259 |     "model_checkpoint_path='./model_check_point/model-20160506.ckpt-500000'\n",
260 |     "\n",
261 |     "saver.restore(sess, model_checkpoint_path)\n",
262 |     "print('facenet embedding模型建立完毕')"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "markdown",
267 |    "metadata": {
268 |     "collapsed": true
269 |    },
270 |    "source": [
271 |     "# reading training images from train folder\n"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": 5,
277 |    "metadata": {
278 |     "collapsed": true
279 |    },
280 |    "outputs": [],
281 |    "source": [
282 |     "###### train_dir containing one subdirectory per image class \n",
283 |     "#should like this:\n",
284 |     "#-->train_dir:\n",
285 |     "#     --->pic_me:\n",
286 |     "#            me1.jpg\n",
287 |     "#            me2.jpg\n",
288 |     "#            ...\n",
289 |     "#     --->pic_others:\n",
290 |     "#           other1.jpg\n",
291 |     "#            other2.jpg\n",
292 |     "#            ...\n",
293 |     "data_dir='~/train_dir/'#your own train folder"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": 6,
299 |    "metadata": {
300 |     "collapsed": true
301 |    },
302 |    "outputs": [],
303 |    "source": [
304 |     "def to_rgb(img):\n",
305 |     "    w, h = img.shape\n",
306 |     "    ret = np.empty((w, h, 3), dtype=np.uint8)\n",
307 |     "    ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img\n",
308 |     "    return ret\n",
309 |     "\n",
310 |     "def read_img(person_dir,f):\n",
311 |     "    img=cv2.imread(pjoin(person_dir, f))\n",
312 |     "    \n",
313 |     "    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
314 |     "        \n",
315 |     "    if gray.ndim == 2:\n",
316 |     "        img = to_rgb(gray)\n",
317 |     "    return img\n",
318 |     "\n",
319 |     "def load_data(data_dir):\n",
320 |     "    data = {}\n",
321 |     "    pics_ctr = 0\n",
322 |     "    for guy in os.listdir(data_dir):\n",
323 |     "        person_dir = pjoin(data_dir, guy)\n",
324 |     "        \n",
325 |     "        curr_pics = [read_img(person_dir, f) for f in os.listdir(person_dir)]\n",
326 |     "        \n",
327 |     "     \n",
328 |     "        \n",
329 |     "        data[guy] = curr_pics\n",
330 |     "        \n",
331 |     "    return data"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "code",
336 |    "execution_count": 7,
337 |    "metadata": {
338 |     "collapsed": false
339 |    },
340 |    "outputs": [
341 |     {
342 |      "name": "stdout",
343 |      "output_type": "stream",
344 |      "text": [
345 |       "foler:other,image numbers：70\n",
346 |       "foler:video_guai,image numbers：137\n",
347 |       "foler:video_me,image numbers：115\n"
348 |      ]
349 |     }
350 |    ],
351 |    "source": [
352 |     "data=load_data(data_dir)\n",
353 |     "keys=[]\n",
354 |     "for key in data.iterkeys():\n",
355 |     "    keys.append(key)\n",
356 |     "    print('foler:{},image numbers：{}'.format(key,len(data[key])))\n",
357 |     "    "
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "code",
362 |    "execution_count": null,
363 |    "metadata": {
364 |     "collapsed": true
365 |    },
366 |    "outputs": [],
367 |    "source": []
368 |   },
369 |   {
370 |    "cell_type": "code",
371 |    "execution_count": 8,
372 |    "metadata": {
373 |     "collapsed": false
374 |    },
375 |    "outputs": [
376 |     {
377 |      "name": "stdout",
378 |      "output_type": "stream",
379 |      "text": [
380 |       "68\n",
381 |       "145\n",
382 |       "搞完了，样本数为：145\n"
383 |      ]
384 |     }
385 |    ],
386 |    "source": [
387 |     "train_x=[]\n",
388 |     "train_y=[]\n",
389 |     "\n",
390 |     "for x in data[keys[0]]:\n",
391 |     "    bounding_boxes, _ = detect_face.detect_face(x, minsize, pnet, rnet, onet, threshold, factor)\n",
392 |     "    nrof_faces = bounding_boxes.shape[0]#number of faces\n",
393 |     "  \n",
394 |     "    \n",
395 |     "    \n",
396 |     "    for face_position in bounding_boxes:\n",
397 |     "        face_position=face_position.astype(int)\n",
398 |     "        #print(face_position[0:4])\n",
399 |     "        cv2.rectangle(x, (face_position[0], face_position[1]), (face_position[2], face_position[3]), (0, 255, 0), 2)\n",
400 |     "        crop=x[face_position[1]:face_position[3],\n",
401 |     "             face_position[0]:face_position[2],]\n",
402 |     "    \n",
403 |     "        crop = cv2.resize(crop, (96, 96), interpolation=cv2.INTER_CUBIC )\n",
404 |     "\n",
405 |     "        #print(crop.shape)\n",
406 |     "        \n",
407 |     "        crop_data=crop.reshape(-1,96,96,3)\n",
408 |     "        #print(crop_data.shape)\n",
409 |     "        \n",
410 |     "        emb_data = sess.run([embeddings], \n",
411 |     "                            feed_dict={images_placeholder: np.array(crop_data), phase_train_placeholder: False })[0]\n",
412 |     "        \n",
413 |     "        \n",
414 |     "        train_x.append(emb_data)\n",
415 |     "        train_y.append(0)\n",
416 |     "print(len(train_x))\n",
417 |     "\n",
418 |     "\n",
419 |     "\n",
420 |     "for y in data[keys[1]]:\n",
421 |     "    bounding_boxes, _ = detect_face.detect_face(y, minsize, pnet, rnet, \n",
422 |     "                                                onet, threshold, factor)\n",
423 |     "    nrof_faces = bounding_boxes.shape[0]#number of faces\n",
424 |     "   \n",
425 |     "    \n",
426 |     "    \n",
427 |     "    for face_position in bounding_boxes:\n",
428 |     "        face_position=face_position.astype(int)\n",
429 |     "        #print(face_position[0:4])\n",
430 |     "        cv2.rectangle(y, (face_position[0], face_position[1]), (face_position[2], face_position[3]), (0, 255, 0), 2)\n",
431 |     "        crop=y[face_position[1]:face_position[3],\n",
432 |     "             face_position[0]:face_position[2],]\n",
433 |     "    \n",
434 |     "        crop = cv2.resize(crop, (96, 96), interpolation=cv2.INTER_CUBIC )\n",
435 |     "\n",
436 |     "        crop_data=crop.reshape(-1,96,96,3)\n",
437 |     "        #print(crop_data.shape)\n",
438 |     "        \n",
439 |     "        emb_data = sess.run([embeddings], \n",
440 |     "                            feed_dict={images_placeholder: np.array(crop_data), phase_train_placeholder: False })[0]\n",
441 |     "        \n",
442 |     "        \n",
443 |     "        train_x.append(emb_data)\n",
444 |     "        train_y.append(1)\n",
445 |     "    \n",
446 |     "\n",
447 |     "print(len(train_x))\n",
448 |     "print('搞完了，样本数为：{}'.format(len(train_x)))"
449 |    ]
450 |   },
451 |   {
452 |    "cell_type": "code",
453 |    "execution_count": 10,
454 |    "metadata": {
455 |     "collapsed": false
456 |    },
457 |    "outputs": [
458 |     {
459 |      "name": "stdout",
460 |      "output_type": "stream",
461 |      "text": [
462 |       "(145, 128)\n",
463 |       "(145,)\n",
464 |       "((101, 128), (101,), (44, 128), (44,))\n"
465 |      ]
466 |     }
467 |    ],
468 |    "source": [
469 |     "#train/test split\n",
470 |     "train_x=np.array(train_x)\n",
471 |     "train_x=train_x.reshape(-1,128)\n",
472 |     "train_y=np.array(train_y)\n",
473 |     "print(train_x.shape)\n",
474 |     "print(train_y.shape)\n",
475 |     "\n",
476 |     "\n",
477 |     "X_train, X_test, y_train, y_test = train_test_split(train_x, train_y, test_size=.3, random_state=42)\n",
478 |     "print(X_train.shape,y_train.shape,X_test.shape,y_test.shape)"
479 |    ]
480 |   },
481 |   {
482 |    "cell_type": "code",
483 |    "execution_count": 11,
484 |    "metadata": {
485 |     "collapsed": false
486 |    },
487 |    "outputs": [
488 |     {
489 |      "name": "stdout",
490 |      "output_type": "stream",
491 |      "text": [
492 |       "accuracy: 100.00%\n"
493 |      ]
494 |     },
495 |     {
496 |      "data": {
497 |       "text/plain": [
498 |        "['./model_check_point/knn_classifier.model']"
499 |       ]
500 |      },
501 |      "execution_count": 11,
502 |      "metadata": {},
503 |      "output_type": "execute_result"
504 |     }
505 |    ],
506 |    "source": [
507 |     "\n",
508 |     "# KNN Classifier  \n",
509 |     "def knn_classifier(train_x, train_y):  \n",
510 |     "    from sklearn.neighbors import KNeighborsClassifier  \n",
511 |     "    model = KNeighborsClassifier()  \n",
512 |     "    model.fit(train_x, train_y)  \n",
513 |     "    return model  \n",
514 |     "\n",
515 |     "classifiers = knn_classifier \n",
516 |     "\n",
517 |     "model = classifiers(X_train,y_train)  \n",
518 |     "predict = model.predict(X_test)  \n",
519 |     "\n",
520 |     "accuracy = metrics.accuracy_score(y_test, predict)  \n",
521 |     "print ('accuracy: %.2f%%' % (100 * accuracy)  ) \n",
522 |     "  \n",
523 |     "    \n",
524 |     "#save model\n",
525 |     "joblib.dump(model, './model_check_point/knn_classifier.model')\n",
526 |     "#model = joblib.load('_2017_1_24_knn.model')"
527 |    ]
528 |   },
529 |   {
530 |    "cell_type": "code",
531 |    "execution_count": 12,
532 |    "metadata": {
533 |     "collapsed": false
534 |    },
535 |    "outputs": [
536 |     {
537 |      "name": "stdout",
538 |      "output_type": "stream",
539 |      "text": [
540 |       "accuracy: 100.00%\n"
541 |      ]
542 |     }
543 |    ],
544 |    "source": [
545 |     "model = joblib.load('./model_check_point/knn_classifier.model')\n",
546 |     "predict = model.predict(X_test) \n",
547 |     "accuracy = metrics.accuracy_score(y_test, predict)  \n",
548 |     "print ('accuracy: %.2f%%' % (100 * accuracy)  ) "
549 |    ]
550 |   },
551 |   {
552 |    "cell_type": "code",
553 |    "execution_count": null,
554 |    "metadata": {
555 |     "collapsed": true
556 |    },
557 |    "outputs": [],
558 |    "source": []
559 |   }
560 |  ],
561 |  "metadata": {
562 |   "kernelspec": {
563 |    "display_name": "Python 2",
564 |    "language": "python",
565 |    "name": "python2"
566 |   },
567 |   "language_info": {
568 |    "codemirror_mode": {
569 |     "name": "ipython",
570 |     "version": 2
571 |    },
572 |    "file_extension": ".py",
573 |    "mimetype": "text/x-python",
574 |    "name": "python",
575 |    "nbconvert_exporter": "python",
576 |    "pygments_lexer": "ipython2",
577 |    "version": "2.7.12"
578 |   }
579 |  },
580 |  "nbformat": 4,
581 |  "nbformat_minor": 0
582 | }
583 | 


--------------------------------------------------------------------------------