├── README.md ├── detect_face.py ├── detect_face.pyc ├── facenet.py ├── facenet.pyc ├── images ├── real time face detection and recognition.jpg ├── video_guai_20.jpg └── video_guai_2192.jpg ├── knn_classifier.model ├── model_check_point ├── det1.npy ├── det2.npy ├── det3.npy └── knn_classifier.model ├── nn4.py ├── nn4.pyc ├── real time face detection and recognition.ipynb ├── save_video frame.ipynb └── train your classifier.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # real_time_face_detection and recognition 2 | This is a real time face detection and recognition project base on opencv/tensorflow/mtcnn/facenet. Chinese version of description is [here](https://zhuanlan.zhihu.com/p/25025596) .Face detection is based on [MTCNN](https://kpzhang93.github.io/MTCNN_face_detection_alignment/index.html).Face embedding is based on [Facenet](https://arxiv.org/abs/1503.03832). 3 | ##Workflow 4 | ![](https://github.com/shanren7/real_time_face_recognition/blob/master/images/real%20time%20face%20detection%20and%20recognition.jpg) 5 | 6 | ##Inspiration 7 | The code was inspired by several projects as follows: 8 | 9 | 1.[OpenFace](https://github.com/cmusatyalab/openface). The main idea was inspired by openface. However, I prefer python and tensorflow,so there comes this project. 10 | 11 | 2.[davidsandberg/facenet](https://github.com/davidsandberg/facenet). 12 | 13 | facenet.py was taken from https://github.com/davidsandberg/facenet/blob/master/facenet/src/facenet.py 14 | 15 | nn4.py was taken from https://github.com/davidsandberg/facenet/blob/master/src/models/nn4.py 16 | 17 | detect_face.py was taken from https://github.com/davidsandberg/facenet/blob/master/src/align/detect_face.py 18 | 19 | 3.[yobibyte/yobiface](https://github.com/yobibyte/yobiface). 20 | 21 | ##Dependencies 22 | 1.tensorflow 23 | 2.opencv with python bindings (cv2) 24 | 3.jupyter notebook for running .ipynb examples 25 | 26 | ##Running 27 | 1.Downloading pre-trained facenet from https://github.com/yobibyte/yobiface/blob/master/model/model-20160506.ckpt-500000 and putting in model_check_point folder. 28 | 29 | 2.Running [real time face detection and recognition.ipynb](https://github.com/shanren7/real_time_face_recognition/blob/master/real%20time%20face%20detection%20and%20%20recognition.ipynb) with jupyter notebook 30 | 31 | ##Results 32 | ![](https://github.com/shanren7/real_time_face_recognition/blob/master/images/video_guai_20.jpg) 33 | ![](https://github.com/shanren7/real_time_face_recognition/blob/master/images/video_guai_2192.jpg) 34 | -------------------------------------------------------------------------------- /detect_face.py: -------------------------------------------------------------------------------- 1 | """ Tensorflow implementation of the face detection / alignment algorithm found at 2 | https://github.com/kpzhang93/MTCNN_face_detection_alignment 3 | """ 4 | # MIT License 5 | # 6 | # Copyright (c) 2016 David Sandberg 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | 30 | import numpy as np 31 | import tensorflow as tf 32 | #from math import floor 33 | import cv2 34 | import os 35 | 36 | def layer(op): 37 | '''Decorator for composable network layers.''' 38 | 39 | def layer_decorated(self, *args, **kwargs): 40 | # Automatically set a name if not provided. 41 | name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) 42 | # Figure out the layer inputs. 43 | if len(self.terminals) == 0: 44 | raise RuntimeError('No input variables found for layer %s.' % name) 45 | elif len(self.terminals) == 1: 46 | layer_input = self.terminals[0] 47 | else: 48 | layer_input = list(self.terminals) 49 | # Perform the operation and get the output. 50 | layer_output = op(self, layer_input, *args, **kwargs) 51 | # Add to layer LUT. 52 | self.layers[name] = layer_output 53 | # This output is now the input for the next layer. 54 | self.feed(layer_output) 55 | # Return self for chained calls. 56 | return self 57 | 58 | return layer_decorated 59 | 60 | class Network(object): 61 | 62 | def __init__(self, inputs, trainable=True): 63 | # The input nodes for this network 64 | self.inputs = inputs 65 | # The current list of terminal nodes 66 | self.terminals = [] 67 | # Mapping from layer names to layers 68 | self.layers = dict(inputs) 69 | # If true, the resulting variables are set as trainable 70 | self.trainable = trainable 71 | 72 | self.setup() 73 | 74 | def setup(self): 75 | '''Construct the network. ''' 76 | raise NotImplementedError('Must be implemented by the subclass.') 77 | 78 | def load(self, data_path, session, ignore_missing=False): 79 | '''Load network weights. 80 | data_path: The path to the numpy-serialized network weights 81 | session: The current TensorFlow session 82 | ignore_missing: If true, serialized weights for missing layers are ignored. 83 | ''' 84 | data_dict = np.load(data_path).item() #pylint: disable=no-member 85 | for op_name in data_dict: 86 | with tf.variable_scope(op_name, reuse=True): 87 | for param_name, data in data_dict[op_name].iteritems(): 88 | try: 89 | var = tf.get_variable(param_name) 90 | session.run(var.assign(data)) 91 | except ValueError: 92 | if not ignore_missing: 93 | raise 94 | 95 | def feed(self, *args): 96 | '''Set the input(s) for the next operation by replacing the terminal nodes. 97 | The arguments can be either layer names or the actual layers. 98 | ''' 99 | assert len(args) != 0 100 | self.terminals = [] 101 | for fed_layer in args: 102 | if isinstance(fed_layer, basestring): 103 | try: 104 | fed_layer = self.layers[fed_layer] 105 | except KeyError: 106 | raise KeyError('Unknown layer name fed: %s' % fed_layer) 107 | self.terminals.append(fed_layer) 108 | return self 109 | 110 | def get_output(self): 111 | '''Returns the current network output.''' 112 | return self.terminals[-1] 113 | 114 | def get_unique_name(self, prefix): 115 | '''Returns an index-suffixed unique name for the given prefix. 116 | This is used for auto-generating layer names based on the type-prefix. 117 | ''' 118 | ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 119 | return '%s_%d' % (prefix, ident) 120 | 121 | def make_var(self, name, shape): 122 | '''Creates a new TensorFlow variable.''' 123 | return tf.get_variable(name, shape, trainable=self.trainable) 124 | 125 | def validate_padding(self, padding): 126 | '''Verifies that the padding is one of the supported ones.''' 127 | assert padding in ('SAME', 'VALID') 128 | 129 | @layer 130 | def conv(self, 131 | inp, 132 | k_h, 133 | k_w, 134 | c_o, 135 | s_h, 136 | s_w, 137 | name, 138 | relu=True, 139 | padding='SAME', 140 | group=1, 141 | biased=True): 142 | # Verify that the padding is acceptable 143 | self.validate_padding(padding) 144 | # Get the number of channels in the input 145 | c_i = inp.get_shape()[-1] 146 | # Verify that the grouping parameter is valid 147 | assert c_i % group == 0 148 | assert c_o % group == 0 149 | # Convolution for a given input and kernel 150 | convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) 151 | with tf.variable_scope(name) as scope: 152 | kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o]) 153 | # This is the common-case. Convolve the input without any further complications. 154 | output = convolve(inp, kernel) 155 | # Add the biases 156 | if biased: 157 | biases = self.make_var('biases', [c_o]) 158 | output = tf.nn.bias_add(output, biases) 159 | if relu: 160 | # ReLU non-linearity 161 | output = tf.nn.relu(output, name=scope.name) 162 | return output 163 | 164 | @layer 165 | def prelu(self, inp, name): 166 | with tf.variable_scope(name): 167 | i = inp.get_shape().as_list() 168 | alpha = self.make_var('alpha', shape=(i[-1])) 169 | output = tf.nn.relu(inp) + tf.mul(alpha, -tf.nn.relu(-inp)) 170 | return output 171 | 172 | @layer 173 | def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'): 174 | self.validate_padding(padding) 175 | return tf.nn.max_pool(inp, 176 | ksize=[1, k_h, k_w, 1], 177 | strides=[1, s_h, s_w, 1], 178 | padding=padding, 179 | name=name) 180 | 181 | @layer 182 | def fc(self, inp, num_out, name, relu=True): 183 | with tf.variable_scope(name): 184 | input_shape = inp.get_shape() 185 | if input_shape.ndims == 4: 186 | # The input is spatial. Vectorize it first. 187 | dim = 1 188 | for d in input_shape[1:].as_list(): 189 | dim *= d 190 | feed_in = tf.reshape(inp, [-1, dim]) 191 | else: 192 | feed_in, dim = (inp, input_shape[-1].value) 193 | weights = self.make_var('weights', shape=[dim, num_out]) 194 | biases = self.make_var('biases', [num_out]) 195 | op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b 196 | fc = op(feed_in, weights, biases, name=name) 197 | return fc 198 | 199 | 200 | """ 201 | Multi dimensional softmax, 202 | refer to https://github.com/tensorflow/tensorflow/issues/210 203 | compute softmax along the dimension of target 204 | the native softmax only supports batch_size x dimension 205 | """ 206 | @layer 207 | def softmax(self, target, axis, name=None): 208 | max_axis = tf.reduce_max(target, axis, keep_dims=True) 209 | target_exp = tf.exp(target-max_axis) 210 | normalize = tf.reduce_sum(target_exp, axis, keep_dims=True) 211 | softmax = tf.div(target_exp, normalize, name) 212 | return softmax 213 | 214 | class PNet(Network): 215 | def setup(self): 216 | (self.feed('data') #pylint: disable=no-value-for-parameter, no-member 217 | .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1') 218 | .prelu(name='PReLU1') 219 | .max_pool(2, 2, 2, 2, name='pool1') 220 | .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2') 221 | .prelu(name='PReLU2') 222 | .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3') 223 | .prelu(name='PReLU3') 224 | .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1') 225 | .softmax(3,name='prob1')) 226 | 227 | (self.feed('PReLU3') #pylint: disable=no-value-for-parameter 228 | .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2')) 229 | 230 | class RNet(Network): 231 | def setup(self): 232 | (self.feed('data') #pylint: disable=no-value-for-parameter, no-member 233 | .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1') 234 | .prelu(name='prelu1') 235 | .max_pool(3, 3, 2, 2, name='pool1') 236 | .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2') 237 | .prelu(name='prelu2') 238 | .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') 239 | .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3') 240 | .prelu(name='prelu3') 241 | .fc(128, relu=False, name='conv4') 242 | .prelu(name='prelu4') 243 | .fc(2, relu=False, name='conv5-1') 244 | .softmax(1,name='prob1')) 245 | 246 | (self.feed('prelu4') #pylint: disable=no-value-for-parameter 247 | .fc(4, relu=False, name='conv5-2')) 248 | 249 | class ONet(Network): 250 | def setup(self): 251 | (self.feed('data') #pylint: disable=no-value-for-parameter, no-member 252 | .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1') 253 | .prelu(name='prelu1') 254 | .max_pool(3, 3, 2, 2, name='pool1') 255 | .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2') 256 | .prelu(name='prelu2') 257 | .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') 258 | .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3') 259 | .prelu(name='prelu3') 260 | .max_pool(2, 2, 2, 2, name='pool3') 261 | .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4') 262 | .prelu(name='prelu4') 263 | .fc(256, relu=False, name='conv5') 264 | .prelu(name='prelu5') 265 | .fc(2, relu=False, name='conv6-1') 266 | .softmax(1, name='prob1')) 267 | 268 | (self.feed('prelu5') #pylint: disable=no-value-for-parameter 269 | .fc(4, relu=False, name='conv6-2')) 270 | 271 | (self.feed('prelu5') #pylint: disable=no-value-for-parameter 272 | .fc(10, relu=False, name='conv6-3')) 273 | 274 | def create_mtcnn(sess, model_path): 275 | with tf.variable_scope('pnet'): 276 | data = tf.placeholder(tf.float32, (None,None,None,3), 'input') 277 | pnet = PNet({'data':data}) 278 | pnet.load(os.path.join(model_path, 'det1.npy'), sess) 279 | with tf.variable_scope('rnet'): 280 | data = tf.placeholder(tf.float32, (None,24,24,3), 'input') 281 | rnet = RNet({'data':data}) 282 | rnet.load(os.path.join(model_path, 'det2.npy'), sess) 283 | with tf.variable_scope('onet'): 284 | data = tf.placeholder(tf.float32, (None,48,48,3), 'input') 285 | onet = ONet({'data':data}) 286 | onet.load(os.path.join(model_path, 'det3.npy'), sess) 287 | 288 | pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img}) 289 | rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img}) 290 | onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img}) 291 | return pnet_fun, rnet_fun, onet_fun 292 | 293 | def detect_face(img, minsize, pnet, rnet, onet, threshold, factor): 294 | # im: input image 295 | # minsize: minimum of faces' size 296 | # pnet, rnet, onet: caffemodel 297 | # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold 298 | # fastresize: resize img from last scale (using in high-resolution images) if fastresize==true 299 | factor_count=0 300 | total_boxes=np.empty((0,9)) 301 | points=[] 302 | h=img.shape[0] 303 | w=img.shape[1] 304 | minl=np.amin([h, w]) 305 | m=12.0/minsize 306 | minl=minl*m 307 | # creat scale pyramid 308 | scales=[] 309 | while minl>=12: 310 | scales += [m*np.power(factor, factor_count)] 311 | minl = minl*factor 312 | factor_count += 1 313 | 314 | # first stage 315 | for j in range(len(scales)): 316 | scale=scales[j] 317 | hs=int(np.ceil(h*scale)) 318 | ws=int(np.ceil(w*scale)) 319 | im_data = imresample(img, (hs, ws)) 320 | im_data = (im_data-127.5)*0.0078125 321 | img_x = np.expand_dims(im_data, 0) 322 | img_y = np.transpose(img_x, (0,2,1,3)) 323 | out = pnet(img_y) 324 | out0 = np.transpose(out[0], (0,2,1,3)) 325 | out1 = np.transpose(out[1], (0,2,1,3)) 326 | 327 | boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0]) 328 | 329 | # inter-scale nms 330 | pick = nms(boxes.copy(), 0.5, 'Union') 331 | if boxes.size>0 and pick.size>0: 332 | boxes = boxes[pick,:] 333 | total_boxes = np.append(total_boxes, boxes, axis=0) 334 | 335 | numbox = total_boxes.shape[0] 336 | if numbox>0: 337 | pick = nms(total_boxes.copy(), 0.7, 'Union') 338 | total_boxes = total_boxes[pick,:] 339 | regw = total_boxes[:,2]-total_boxes[:,0] 340 | regh = total_boxes[:,3]-total_boxes[:,1] 341 | qq1 = total_boxes[:,0]+total_boxes[:,5]*regw 342 | qq2 = total_boxes[:,1]+total_boxes[:,6]*regh 343 | qq3 = total_boxes[:,2]+total_boxes[:,7]*regw 344 | qq4 = total_boxes[:,3]+total_boxes[:,8]*regh 345 | total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]])) 346 | total_boxes = rerec(total_boxes.copy()) 347 | total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32) 348 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) 349 | 350 | numbox = total_boxes.shape[0] 351 | if numbox>0: 352 | # second stage 353 | tempimg = np.zeros((24,24,3,numbox)) 354 | for k in range(0,numbox): 355 | tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) 356 | tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] 357 | if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: 358 | tempimg[:,:,:,k] = imresample(tmp, (24, 24)) 359 | else: 360 | return np.empty() 361 | tempimg = (tempimg-127.5)*0.0078125 362 | tempimg1 = np.transpose(tempimg, (3,1,0,2)) 363 | out = rnet(tempimg1) 364 | out0 = np.transpose(out[0]) 365 | out1 = np.transpose(out[1]) 366 | score = out1[1,:] 367 | ipass = np.where(score>threshold[1]) 368 | total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) 369 | mv = out0[:,ipass[0]] 370 | if total_boxes.shape[0]>0: 371 | pick = nms(total_boxes, 0.7, 'Union') 372 | total_boxes = total_boxes[pick,:] 373 | total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick])) 374 | total_boxes = rerec(total_boxes.copy()) 375 | 376 | numbox = total_boxes.shape[0] 377 | if numbox>0: 378 | # third stage 379 | total_boxes = np.fix(total_boxes).astype(np.int32) 380 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) 381 | tempimg = np.zeros((48,48,3,numbox)) 382 | for k in range(0,numbox): 383 | tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) 384 | tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] 385 | if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: 386 | tempimg[:,:,:,k] = imresample(tmp, (48, 48)) 387 | else: 388 | return np.empty() 389 | tempimg = (tempimg-127.5)*0.0078125 390 | tempimg1 = np.transpose(tempimg, (3,1,0,2)) 391 | out = onet(tempimg1) 392 | out0 = np.transpose(out[0]) 393 | out1 = np.transpose(out[1]) 394 | out2 = np.transpose(out[2]) 395 | score = out2[1,:] 396 | points = out1 397 | ipass = np.where(score>threshold[2]) 398 | points = points[:,ipass[0]] 399 | total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) 400 | mv = out0[:,ipass[0]] 401 | 402 | w = total_boxes[:,2]-total_boxes[:,0]+1 403 | h = total_boxes[:,3]-total_boxes[:,1]+1 404 | points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1 405 | points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1 406 | if total_boxes.shape[0]>0: 407 | total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) 408 | pick = nms(total_boxes.copy(), 0.7, 'Min') 409 | total_boxes = total_boxes[pick,:] 410 | points = points[:,pick] 411 | 412 | return total_boxes, points 413 | 414 | 415 | # function [boundingbox] = bbreg(boundingbox,reg) 416 | def bbreg(boundingbox,reg): 417 | # calibrate bounding boxes 418 | if reg.shape[1]==1: 419 | reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) 420 | 421 | w = boundingbox[:,2]-boundingbox[:,0]+1 422 | h = boundingbox[:,3]-boundingbox[:,1]+1 423 | b1 = boundingbox[:,0]+reg[:,0]*w 424 | b2 = boundingbox[:,1]+reg[:,1]*h 425 | b3 = boundingbox[:,2]+reg[:,2]*w 426 | b4 = boundingbox[:,3]+reg[:,3]*h 427 | boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ])) 428 | return boundingbox 429 | 430 | def generateBoundingBox(imap, reg, scale, t): 431 | # use heatmap to generate bounding boxes 432 | stride=2 433 | cellsize=12 434 | 435 | imap = np.transpose(imap) 436 | dx1 = np.transpose(reg[:,:,0]) 437 | dy1 = np.transpose(reg[:,:,1]) 438 | dx2 = np.transpose(reg[:,:,2]) 439 | dy2 = np.transpose(reg[:,:,3]) 440 | y, x = np.where(imap >= t) 441 | if y.shape[0]==1: 442 | dx1 = np.flipud(dx1) 443 | dy1 = np.flipud(dy1) 444 | dx2 = np.flipud(dx2) 445 | dy2 = np.flipud(dy2) 446 | score = imap[(y,x)] 447 | reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ])) 448 | if reg.size==0: 449 | reg = np.empty((0,3)) 450 | bb = np.transpose(np.vstack([y,x])) 451 | q1 = np.fix((stride*bb+1)/scale) 452 | q2 = np.fix((stride*bb+cellsize-1+1)/scale) 453 | boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg]) 454 | return boundingbox, reg 455 | 456 | # function pick = nms(boxes,threshold,type) 457 | def nms(boxes, threshold, method): 458 | if boxes.size==0: 459 | return np.empty((0,3)) 460 | x1 = boxes[:,0] 461 | y1 = boxes[:,1] 462 | x2 = boxes[:,2] 463 | y2 = boxes[:,3] 464 | s = boxes[:,4] 465 | area = (x2-x1+1) * (y2-y1+1) 466 | I = np.argsort(s) 467 | pick = np.zeros_like(s, dtype=np.int16) 468 | counter = 0 469 | while I.size>0: 470 | i = I[-1] 471 | pick[counter] = i 472 | counter += 1 473 | idx = I[0:-1] 474 | xx1 = np.maximum(x1[i], x1[idx]) 475 | yy1 = np.maximum(y1[i], y1[idx]) 476 | xx2 = np.minimum(x2[i], x2[idx]) 477 | yy2 = np.minimum(y2[i], y2[idx]) 478 | w = np.maximum(0.0, xx2-xx1+1) 479 | h = np.maximum(0.0, yy2-yy1+1) 480 | inter = w * h 481 | if method is 'Min': 482 | o = inter / np.minimum(area[i], area[idx]) 483 | else: 484 | o = inter / (area[i] + area[idx] - inter) 485 | I = I[np.where(o<=threshold)] 486 | pick = pick[0:counter] 487 | return pick 488 | 489 | # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h) 490 | def pad(total_boxes, w, h): 491 | # compute the padding coordinates (pad the bounding boxes to square) 492 | tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32) 493 | tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32) 494 | numbox = total_boxes.shape[0] 495 | 496 | dx = np.ones((numbox), dtype=np.int32) 497 | dy = np.ones((numbox), dtype=np.int32) 498 | edx = tmpw.copy().astype(np.int32) 499 | edy = tmph.copy().astype(np.int32) 500 | 501 | x = total_boxes[:,0].copy().astype(np.int32) 502 | y = total_boxes[:,1].copy().astype(np.int32) 503 | ex = total_boxes[:,2].copy().astype(np.int32) 504 | ey = total_boxes[:,3].copy().astype(np.int32) 505 | 506 | tmp = np.where(ex>w) 507 | edx[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1) 508 | ex[tmp] = w 509 | 510 | tmp = np.where(ey>h) 511 | edy[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1) 512 | ey[tmp] = h 513 | 514 | tmp = np.where(x<1) 515 | dx[tmp] = np.expand_dims(2-x[tmp],1) 516 | x[tmp] = 1 517 | 518 | tmp = np.where(y<1) 519 | dy[tmp] = np.expand_dims(2-y[tmp],1) 520 | y[tmp] = 1 521 | 522 | return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph 523 | 524 | # function [bboxA] = rerec(bboxA) 525 | def rerec(bboxA): 526 | # convert bboxA to square 527 | h = bboxA[:,3]-bboxA[:,1] 528 | w = bboxA[:,2]-bboxA[:,0] 529 | l = np.maximum(w, h) 530 | bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5 531 | bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5 532 | bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1))) 533 | return bboxA 534 | 535 | def imresample(img, sz): 536 | im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #pylint: disable=no-member 537 | return im_data 538 | 539 | # This method is kept for debugging purpose 540 | # h=img.shape[0] 541 | # w=img.shape[1] 542 | # hs, ws = sz 543 | # dx = float(w) / ws 544 | # dy = float(h) / hs 545 | # im_data = np.zeros((hs,ws,3)) 546 | # for a1 in range(0,hs): 547 | # for a2 in range(0,ws): 548 | # for a3 in range(0,3): 549 | # im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3] 550 | # return im_data 551 | 552 | -------------------------------------------------------------------------------- /detect_face.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shanren7/real_time_face_recognition/6e6764607c92f9fa852ced0e1faa9561ef024857/detect_face.pyc -------------------------------------------------------------------------------- /facenet.py: -------------------------------------------------------------------------------- 1 | # The whole file was taken from @davidsandberg implementation 2 | # https://github.com/davidsandberg/facenet/blob/master/facenet/src/facenet.py 3 | 4 | """Functions for building the face recognition network. 5 | """ 6 | # pylint: disable=missing-docstring 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | from os import path 13 | from six.moves import xrange 14 | import tensorflow as tf 15 | from tensorflow.python.ops import array_ops 16 | from tensorflow.python.ops import control_flow_ops 17 | import numpy as np 18 | from scipy import misc 19 | import matplotlib.pyplot as plt 20 | from sklearn.cross_validation import KFold 21 | 22 | parameters = [] 23 | conv_counter = 1 24 | pool_counter = 1 25 | affine_counter = 1 26 | 27 | def conv(inpOp, nIn, nOut, kH, kW, dH, dW, padType, prefix, phase_train=True, use_batch_norm=True): 28 | global conv_counter 29 | global parameters 30 | name = prefix + '_' + str(conv_counter) 31 | conv_counter += 1 32 | with tf.name_scope(name) as scope: 33 | kernel = tf.Variable(tf.truncated_normal([kH, kW, nIn, nOut], 34 | dtype=tf.float32, 35 | stddev=1e-1), name='weights') 36 | conv = tf.nn.conv2d(inpOp, kernel, [1, dH, dW, 1], padding=padType) 37 | 38 | if use_batch_norm: 39 | conv_bn = batch_norm(conv, nOut, phase_train, 'batch_norm') 40 | else: 41 | conv_bn = conv 42 | biases = tf.Variable(tf.constant(0.0, shape=[nOut], dtype=tf.float32), 43 | trainable=True, name='biases') 44 | bias = tf.nn.bias_add(conv_bn, biases) 45 | conv1 = tf.nn.relu(bias, name=scope) 46 | parameters += [kernel, biases] 47 | return conv1 48 | 49 | def affine(inpOp, nIn, nOut): 50 | global affine_counter 51 | global parameters 52 | name = 'affine' + str(affine_counter) 53 | affine_counter += 1 54 | with tf.name_scope(name): 55 | kernel = tf.Variable(tf.truncated_normal([nIn, nOut], 56 | dtype=tf.float32, 57 | stddev=1e-1), name='weights') 58 | biases = tf.Variable(tf.constant(0.0, shape=[nOut], dtype=tf.float32), 59 | trainable=True, name='biases') 60 | affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) 61 | parameters += [kernel, biases] 62 | return affine1 63 | 64 | def lppool(inpOp, pnorm, kH, kW, dH, dW, padding): 65 | global pool_counter 66 | global parameters 67 | name = 'pool' + str(pool_counter) 68 | pool_counter += 1 69 | 70 | with tf.name_scope('lppool'): 71 | if pnorm == 2: 72 | pwr = tf.square(inpOp) 73 | else: 74 | pwr = tf.pow(inpOp, pnorm) 75 | 76 | subsamp = tf.nn.avg_pool(pwr, 77 | ksize=[1, kH, kW, 1], 78 | strides=[1, dH, dW, 1], 79 | padding=padding, 80 | name=name) 81 | subsamp_sum = tf.mul(subsamp, kH*kW) 82 | 83 | if pnorm == 2: 84 | out = tf.sqrt(subsamp_sum) 85 | else: 86 | out = tf.pow(subsamp_sum, 1/pnorm) 87 | 88 | return out 89 | 90 | def mpool(inpOp, kH, kW, dH, dW, padding): 91 | global pool_counter 92 | global parameters 93 | name = 'pool' + str(pool_counter) 94 | pool_counter += 1 95 | with tf.name_scope('maxpool'): 96 | maxpool = tf.nn.max_pool(inpOp, 97 | ksize=[1, kH, kW, 1], 98 | strides=[1, dH, dW, 1], 99 | padding=padding, 100 | name=name) 101 | return maxpool 102 | 103 | def apool(inpOp, kH, kW, dH, dW, padding): 104 | global pool_counter 105 | global parameters 106 | name = 'pool' + str(pool_counter) 107 | pool_counter += 1 108 | return tf.nn.avg_pool(inpOp, 109 | ksize=[1, kH, kW, 1], 110 | strides=[1, dH, dW, 1], 111 | padding=padding, 112 | name=name) 113 | 114 | def batch_norm(x, n_out, phase_train, name, affine=True): 115 | """ 116 | Batch normalization on convolutional maps. 117 | Args: 118 | x: Tensor, 4D BHWD input maps 119 | n_out: integer, depth of input maps 120 | phase_train: boolean tf.Variable, true indicates training phase 121 | scope: string, variable scope 122 | affine: whether to affine-transform outputs 123 | Return: 124 | normed: batch-normalized maps 125 | Ref: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow/33950177 126 | """ 127 | global parameters 128 | 129 | with tf.name_scope(name): 130 | 131 | beta = tf.Variable(tf.constant(0.0, shape=[n_out]), 132 | name=name+'/beta', trainable=True) 133 | gamma = tf.Variable(tf.constant(1.0, shape=[n_out]), 134 | name=name+'/gamma', trainable=affine) 135 | 136 | batch_mean, batch_var = tf.nn.moments(x, [0,1,2], name='moments') 137 | ema = tf.train.ExponentialMovingAverage(decay=0.9) 138 | def mean_var_with_update(): 139 | ema_apply_op = ema.apply([batch_mean, batch_var]) 140 | with tf.control_dependencies([ema_apply_op]): 141 | return tf.identity(batch_mean), tf.identity(batch_var) 142 | mean, var = control_flow_ops.cond(phase_train, 143 | mean_var_with_update, 144 | lambda: (ema.average(batch_mean), ema.average(batch_var))) 145 | normed = tf.nn.batch_norm_with_global_normalization(x, mean, var, 146 | beta, gamma, 1e-3, affine, name=name) 147 | parameters += [beta, gamma] 148 | return normed 149 | 150 | def inception(inp, inSize, ks, o1s, o2s1, o2s2, o3s1, o3s2, o4s1, o4s2, o4s3, poolType, name, phase_train=True, use_batch_norm=True): 151 | 152 | print('name = ', name) 153 | print('inputSize = ', inSize) 154 | print('kernelSize = {3,5}') 155 | print('kernelStride = {%d,%d}' % (ks,ks)) 156 | print('outputSize = {%d,%d}' % (o2s2,o3s2)) 157 | print('reduceSize = {%d,%d,%d,%d}' % (o2s1,o3s1,o4s2,o1s)) 158 | print('pooling = {%s, %d, %d, %d, %d}' % (poolType, o4s1, o4s1, o4s3, o4s3)) 159 | if (o4s2>0): 160 | o4 = o4s2 161 | else: 162 | o4 = inSize 163 | print('outputSize = ', o1s+o2s2+o3s2+o4) 164 | print() 165 | 166 | net = [] 167 | 168 | with tf.name_scope(name): 169 | if o1s>0: 170 | conv1 = conv(inp, inSize, o1s, 1, 1, 1, 1, 'SAME', 'in1_conv1x1', phase_train=phase_train, use_batch_norm=use_batch_norm) 171 | net.append(conv1) 172 | 173 | if o2s1>0: 174 | conv3a = conv(inp, inSize, o2s1, 1, 1, 1, 1, 'SAME', 'in2_conv1x1', phase_train=phase_train, use_batch_norm=use_batch_norm) 175 | conv3 = conv(conv3a, o2s1, o2s2, 3, 3, ks, ks, 'SAME', 'in2_conv3x3', phase_train=phase_train, use_batch_norm=use_batch_norm) 176 | net.append(conv3) 177 | 178 | if o3s1>0: 179 | conv5a = conv(inp, inSize, o3s1, 1, 1, 1, 1, 'SAME', 'in3_conv1x1', phase_train=phase_train, use_batch_norm=use_batch_norm) 180 | conv5 = conv(conv5a, o3s1, o3s2, 5, 5, ks, ks, 'SAME', 'in3_conv5x5', phase_train=phase_train, use_batch_norm=use_batch_norm) 181 | net.append(conv5) 182 | 183 | if poolType=='MAX': 184 | pool = mpool(inp, o4s1, o4s1, o4s3, o4s3, 'SAME') 185 | elif poolType=='L2': 186 | pool = lppool(inp, 2, o4s1, o4s1, o4s3, o4s3, 'SAME') 187 | else: 188 | raise ValueError('Invalid pooling type "%s"' % poolType) 189 | 190 | if o4s2>0: 191 | pool_conv = conv(pool, inSize, o4s2, 1, 1, 1, 1, 'SAME', 'in4_conv1x1', phase_train=phase_train, use_batch_norm=use_batch_norm) 192 | else: 193 | pool_conv = pool 194 | net.append(pool_conv) 195 | 196 | incept = array_ops.concat(3, net, name=name) 197 | return incept 198 | 199 | def triplet_loss(anchor, positive, negative, alpha): 200 | """Calculate the triplet loss according to the FaceNet paper 201 | 202 | Args: 203 | anchor: the embeddings for the anchor images. 204 | positive: the embeddings for the positive images. 205 | positive: the embeddings for the negative images. 206 | 207 | Returns: 208 | the triplet loss according to the FaceNet paper as a float tensor. 209 | """ 210 | with tf.name_scope('triplet_loss'): 211 | pos_dist = tf.reduce_sum(tf.square(tf.sub(anchor, positive)), 1) # Summing over distances in each batch 212 | neg_dist = tf.reduce_sum(tf.square(tf.sub(anchor, negative)), 1) 213 | 214 | basic_loss = tf.add(tf.sub(pos_dist,neg_dist), alpha) 215 | loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0, name='tripletloss') 216 | 217 | return loss 218 | 219 | def _add_loss_summaries(total_loss): 220 | """Add summaries for losses in CIFAR-10 model. 221 | 222 | Generates moving average for all losses and associated summaries for 223 | visualizing the performance of the network. 224 | 225 | Args: 226 | total_loss: Total loss from loss(). 227 | Returns: 228 | loss_averages_op: op for generating moving averages of losses. 229 | """ 230 | # Compute the moving average of all individual losses and the total loss. 231 | loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') 232 | losses = tf.get_collection('losses') 233 | loss_averages_op = loss_averages.apply(losses + [total_loss]) 234 | 235 | # Attach a scalar summmary to all individual losses and the total loss; do the 236 | # same for the averaged version of the losses. 237 | for l in losses + [total_loss]: 238 | # Name each loss as '(raw)' and name the moving average version of the loss 239 | # as the original loss name. 240 | tf.scalar_summary(l.op.name +' (raw)', l) 241 | tf.scalar_summary(l.op.name, loss_averages.average(l)) 242 | 243 | return loss_averages_op 244 | 245 | def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay): 246 | """Setup training for the FaceNet model. 247 | 248 | Create an optimizer and apply to all trainable variables. Add moving 249 | average for all trainable variables. 250 | 251 | Args: 252 | total_loss: Total loss from loss(). 253 | global_step: Integer Variable counting the number of training steps 254 | processed. 255 | Returns: 256 | train_op: op for training. 257 | """ 258 | # Generate moving averages of all losses and associated summaries. 259 | loss_averages_op = _add_loss_summaries(total_loss) 260 | 261 | # Compute gradients. 262 | with tf.control_dependencies([loss_averages_op]): 263 | if optimizer=='ADAGRAD': 264 | opt = tf.train.AdagradOptimizer(learning_rate) 265 | elif optimizer=='ADADELTA': 266 | opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6) 267 | elif optimizer=='ADAM': 268 | opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) 269 | else: 270 | raise ValueError('Invalid optimization algorithm') 271 | 272 | grads = opt.compute_gradients(total_loss) 273 | 274 | # Apply gradients. 275 | apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) 276 | 277 | # Add histograms for trainable variables. 278 | for var in tf.trainable_variables(): 279 | tf.histogram_summary(var.op.name, var) 280 | 281 | # Add histograms for gradients. 282 | for grad, var in grads: 283 | if grad is not None: 284 | tf.histogram_summary(var.op.name + '/gradients', grad) 285 | 286 | # Track the moving averages of all trainable variables. 287 | variable_averages = tf.train.ExponentialMovingAverage( 288 | moving_average_decay, global_step) 289 | variables_averages_op = variable_averages.apply(tf.trainable_variables()) 290 | 291 | with tf.control_dependencies([apply_gradient_op, variables_averages_op]): 292 | train_op = tf.no_op(name='train') 293 | 294 | return train_op, grads 295 | 296 | def prewhiten(x): 297 | mean = np.mean(x) 298 | std = np.std(x) 299 | std_adj = np.max(std, 1.0/np.sqrt(x.size)) 300 | y = np.multiply(np.subtract(x, mean), 1/std_adj) 301 | return y 302 | 303 | def crop(image, random_crop, image_size): 304 | if image.shape[1]>image_size: 305 | sz1 = image.shape[1]/2 306 | sz2 = image_size/2 307 | if random_crop: 308 | diff = sz1-sz2 309 | (h, v) = (np.random.randint(-diff, diff+1), np.random.randint(-diff, diff+1)) 310 | else: 311 | (h, v) = (0,0) 312 | image = image[(sz1-sz2+v):(sz1+sz2+v),(sz1-sz2+h):(sz1+sz2+h),:] 313 | return image 314 | 315 | def flip(image, random_flip): 316 | if random_flip and np.random.choice([True, False]): 317 | image = np.fliplr(image) 318 | return image 319 | 320 | def to_rgb(img): 321 | w, h = img.shape 322 | ret = np.empty((w, h, 3), dtype=np.uint8) 323 | ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img 324 | return ret 325 | 326 | def load_data(image_paths, do_random_crop, do_random_flip, image_size, do_prewhiten=True): 327 | nrof_samples = len(image_paths) 328 | img_list = [None] * nrof_samples 329 | for i in xrange(nrof_samples): 330 | img = misc.imread(image_paths[i]) 331 | if img.ndim == 2: 332 | img = to_rgb(img) 333 | if do_prewhiten: 334 | img = prewhiten(img) 335 | img = crop(img, do_random_crop, image_size) 336 | img = flip(img, do_random_flip) 337 | img_list[i] = img 338 | images = np.stack(img_list) 339 | return images 340 | 341 | def get_batch(image_data, batch_size, batch_index): 342 | nrof_examples = np.size(image_data, 0) 343 | j = batch_index*batch_size % nrof_examples 344 | if j+batch_size<=nrof_examples: 345 | batch = image_data[j:j+batch_size,:,:,:] 346 | else: 347 | x1 = image_data[j:nrof_examples,:,:,:] 348 | x2 = image_data[0:nrof_examples-j,:,:,:] 349 | batch = np.vstack([x1,x2]) 350 | batch_float = batch.astype(np.float32) 351 | return batch_float 352 | 353 | def get_triplet_batch(triplets, batch_index, batch_size): 354 | ax, px, nx = triplets 355 | a = get_batch(ax, int(batch_size/3), batch_index) 356 | p = get_batch(px, int(batch_size/3), batch_index) 357 | n = get_batch(nx, int(batch_size/3), batch_index) 358 | batch = np.vstack([a, p, n]) 359 | return batch 360 | 361 | def select_training_triplets(embeddings, num_per_class, image_data, people_per_batch, alpha): 362 | 363 | def dist(emb1, emb2): 364 | x = np.square(np.subtract(emb1, emb2)) 365 | return np.sum(x, 0) 366 | 367 | nrof_images = image_data.shape[0] 368 | nrof_triplets = nrof_images - people_per_batch 369 | shp = [nrof_triplets, image_data.shape[1], image_data.shape[2], image_data.shape[3]] 370 | as_arr = np.zeros(shp) 371 | ps_arr = np.zeros(shp) 372 | ns_arr = np.zeros(shp) 373 | 374 | trip_idx = 0 375 | shuffle = np.arange(nrof_triplets) 376 | np.random.shuffle(shuffle) 377 | emb_start_idx = 0 378 | nrof_random_negs = 0 379 | for i in xrange(people_per_batch): 380 | n = num_per_class[i] 381 | for j in range(1,n): 382 | a_idx = emb_start_idx 383 | p_idx = emb_start_idx + j 384 | as_arr[shuffle[trip_idx]] = image_data[a_idx] 385 | ps_arr[shuffle[trip_idx]] = image_data[p_idx] 386 | 387 | # Select a semi-hard negative that has a distance 388 | # further away from the positive exemplar. 389 | pos_dist = dist(embeddings[a_idx][:], embeddings[p_idx][:]) 390 | sel_neg_idx = emb_start_idx 391 | while sel_neg_idx>=emb_start_idx and sel_neg_idx<=emb_start_idx+n-1: 392 | sel_neg_idx = (np.random.randint(1, 2**32) % nrof_images) -1 # Seems to give the same result as the lua implementation 393 | #sel_neg_idx = np.random.random_integers(0, nrof_images-1) 394 | sel_neg_dist = dist(embeddings[a_idx][:], embeddings[sel_neg_idx][:]) 395 | 396 | random_neg = True 397 | for k in range(nrof_images): 398 | if kemb_start_idx+n-1: 399 | neg_dist = dist(embeddings[a_idx][:], embeddings[k][:]) 400 | if pos_dist=emb_start_idx and sel_neg_idx<=emb_start_idx+n-1: 443 | sel_neg_idx = (np.random.randint(1, 2**32) % nrof_images) -1 444 | 445 | ns_arr[shuffle[trip_idx]] = image_data[sel_neg_idx] 446 | trip_idx += 1 447 | 448 | emb_start_idx += n 449 | 450 | nrof_triplets = trip_idx // batch_size * batch_size 451 | triplets = (as_arr[0:nrof_triplets,:,:,:], ps_arr[0:nrof_triplets,:,:,:], ns_arr[0:nrof_triplets,:,:,:]) 452 | 453 | return triplets, nrof_triplets 454 | 455 | 456 | class ImageClass(): 457 | "Stores the paths to images for a given class" 458 | def __init__(self, name, image_paths): 459 | self.name = name 460 | self.image_paths = image_paths 461 | 462 | def __str__(self): 463 | return self.name + ', ' + str(len(self.image_paths)) + ' images' 464 | 465 | def __len__(self): 466 | return len(self.image_paths) 467 | 468 | def get_dataset(paths): 469 | dataset = [] 470 | for path in paths.split(':'): 471 | path_exp = os.path.expanduser(path) 472 | classes = os.listdir(path_exp) 473 | classes.sort() 474 | nrof_classes = len(classes) 475 | for i in range(nrof_classes): 476 | class_name = classes[i] 477 | facedir = os.path.join(path_exp, class_name) 478 | if os.path.isdir(facedir): 479 | images = os.listdir(facedir) 480 | image_paths = map(lambda x: os.path.join(facedir,x), images) 481 | dataset.append(ImageClass(class_name, image_paths)) 482 | 483 | return dataset 484 | 485 | def split_dataset(dataset, split_ratio, mode): 486 | if mode=='SPLIT_CLASSES': 487 | nrof_classes = len(dataset) 488 | class_indices = np.arange(nrof_classes) 489 | np.random.shuffle(class_indices) 490 | split = int(round(nrof_classes*split_ratio)) 491 | train_set = [dataset[i] for i in class_indices[0:split]] 492 | test_set = [dataset[i] for i in class_indices[split:-1]] 493 | elif mode=='SPLIT_IMAGES': 494 | train_set = [] 495 | test_set = [] 496 | min_nrof_images = 2 497 | for cls in dataset: 498 | paths = cls.image_paths 499 | np.random.shuffle(paths) 500 | split = int(round(len(paths)*split_ratio)) 501 | if splitconverting to gray--->converting to rgb\n", 305 | "#--->detecting faces---->croping faces--->embedding--->classifying--->print\n", 306 | "\n", 307 | "\n", 308 | "video_capture = cv2.VideoCapture(0)\n", 309 | "c=0\n", 310 | " \n", 311 | "while True:\n", 312 | " # Capture frame-by-frame\n", 313 | "\n", 314 | " ret, frame = video_capture.read()\n", 315 | " #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)\n", 316 | " #print(frame.shape)\n", 317 | " \n", 318 | " timeF = frame_interval\n", 319 | " \n", 320 | " \n", 321 | " if(c%timeF == 0): #frame_interval==3, face detection every 3 frames\n", 322 | " \n", 323 | " find_results=[]\n", 324 | " gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)\n", 325 | " \n", 326 | " \n", 327 | " if gray.ndim == 2:\n", 328 | " img = to_rgb(gray)\n", 329 | " \n", 330 | " \n", 331 | "\n", 332 | " bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)\n", 333 | "\n", 334 | " \n", 335 | " \n", 336 | " nrof_faces = bounding_boxes.shape[0]#number of faces\n", 337 | " #print('找到人脸数目为:{}'.format(nrof_faces))\n", 338 | " \n", 339 | "\n", 340 | " for face_position in bounding_boxes:\n", 341 | " \n", 342 | " face_position=face_position.astype(int)\n", 343 | " \n", 344 | " #print((int(face_position[0]), int( face_position[1])))\n", 345 | " #word_position.append((int(face_position[0]), int( face_position[1])))\n", 346 | " \n", 347 | " cv2.rectangle(frame, (face_position[0], \n", 348 | " face_position[1]), \n", 349 | " (face_position[2], face_position[3]), \n", 350 | " (0, 255, 0), 2)\n", 351 | " \n", 352 | " crop=img[face_position[1]:face_position[3],face_position[0]:face_position[2],]\n", 353 | " \n", 354 | " crop = cv2.resize(crop, (96, 96), interpolation=cv2.INTER_CUBIC )\n", 355 | " \n", 356 | " data=crop.reshape(-1,96,96,3)\n", 357 | " \n", 358 | " emb_data = sess.run([embeddings], \n", 359 | " feed_dict={images_placeholder: np.array(data), \n", 360 | " phase_train_placeholder: False })[0]\n", 361 | " \n", 362 | " predict = model.predict(emb_data) \n", 363 | " \n", 364 | " \n", 365 | " if predict==1:\n", 366 | " find_results.append('me')\n", 367 | " elif predict==2:\n", 368 | " find_results.append('others')\n", 369 | "\n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " cv2.putText(frame,'detected:{}'.format(find_results), (50,100), \n", 374 | " cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (255, 0 ,0), \n", 375 | " thickness = 2, lineType = 2)\n", 376 | " \n", 377 | " \n", 378 | " #print(faces)\n", 379 | " c+=1\n", 380 | " # Draw a rectangle around the faces\n", 381 | " \n", 382 | "\n", 383 | "\n", 384 | " # Display the resulting frame\n", 385 | "\n", 386 | " cv2.imshow('Video', frame)\n", 387 | "\n", 388 | " if cv2.waitKey(1) & 0xFF == ord('q'):\n", 389 | " break\n", 390 | " \n", 391 | "\n", 392 | "\n", 393 | "# When everything is done, release the capture\n", 394 | "\n", 395 | "video_capture.release()\n", 396 | "cv2.destroyAllWindows()\n" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": null, 402 | "metadata": { 403 | "collapsed": true 404 | }, 405 | "outputs": [], 406 | "source": [] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": null, 411 | "metadata": { 412 | "collapsed": true 413 | }, 414 | "outputs": [], 415 | "source": [] 416 | } 417 | ], 418 | "metadata": { 419 | "kernelspec": { 420 | "display_name": "Python 2", 421 | "language": "python", 422 | "name": "python2" 423 | }, 424 | "language_info": { 425 | "codemirror_mode": { 426 | "name": "ipython", 427 | "version": 2 428 | }, 429 | "file_extension": ".py", 430 | "mimetype": "text/x-python", 431 | "name": "python", 432 | "nbconvert_exporter": "python", 433 | "pygments_lexer": "ipython2", 434 | "version": "2.7.12" 435 | } 436 | }, 437 | "nbformat": 4, 438 | "nbformat_minor": 0 439 | } 440 | -------------------------------------------------------------------------------- /save_video frame.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# This script obtaining frames from camera and save as jpg." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "\n", 19 | "import cv2\n", 20 | "video_capture = cv2.VideoCapture(0)\n", 21 | "c=0\n", 22 | "while True:\n", 23 | " # Capture frame-by-frame\n", 24 | "\n", 25 | " ret, frame = video_capture.read()\n", 26 | "\n", 27 | " \n", 28 | " timeF = 10\n", 29 | " \n", 30 | " if(c%timeF == 0): #save as jpg every 10 frame \n", 31 | " cv2.imwrite('~/train_dir/me'+str(c) + '.jpg',frame) #save as jpg\n", 32 | "\n", 33 | " c+=1\n", 34 | " \n", 35 | "\n", 36 | " if cv2.waitKey(1) & 0xFF == ord('q'):\n", 37 | " break\n", 38 | "\n", 39 | "# When everything is done, release the capture\n", 40 | "\n", 41 | "video_capture.release()\n", 42 | "cv2.destroyAllWindows()" 43 | ] 44 | } 45 | ], 46 | "metadata": { 47 | "kernelspec": { 48 | "display_name": "Python 2", 49 | "language": "python", 50 | "name": "python2" 51 | }, 52 | "language_info": { 53 | "codemirror_mode": { 54 | "name": "ipython", 55 | "version": 2 56 | }, 57 | "file_extension": ".py", 58 | "mimetype": "text/x-python", 59 | "name": "python", 60 | "nbconvert_exporter": "python", 61 | "pygments_lexer": "ipython2", 62 | "version": "2.7.12" 63 | } 64 | }, 65 | "nbformat": 4, 66 | "nbformat_minor": 0 67 | } 68 | -------------------------------------------------------------------------------- /train your classifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# This script processing images and training your own face classifier." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [ 17 | { 18 | "name": "stderr", 19 | "output_type": "stream", 20 | "text": [ 21 | "/usr/local/lib/python2.7/dist-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.\n", 22 | " warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')\n" 23 | ] 24 | }, 25 | { 26 | "name": "stdout", 27 | "output_type": "stream", 28 | "text": [ 29 | "Populating the interactive namespace from numpy and matplotlib\n" 30 | ] 31 | }, 32 | { 33 | "name": "stderr", 34 | "output_type": "stream", 35 | "text": [ 36 | "/usr/local/lib/python2.7/dist-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", 37 | " \"This module will be removed in 0.20.\", DeprecationWarning)\n", 38 | "/usr/local/lib/python2.7/dist-packages/IPython/core/magics/pylab.py:161: UserWarning: pylab import has clobbered these variables: ['copy']\n", 39 | "`%matplotlib` prevents importing * from pylab and numpy\n", 40 | " \"\\n`%matplotlib` prevents importing * from pylab and numpy\"\n" 41 | ] 42 | } 43 | ], 44 | "source": [ 45 | "\n", 46 | "import tensorflow as tf\n", 47 | "import numpy as np\n", 48 | "import cv2\n", 49 | "\n", 50 | "import os\n", 51 | "from os.path import join as pjoin\n", 52 | "import sys\n", 53 | "import copy\n", 54 | "import detect_face\n", 55 | "import nn4 as network\n", 56 | "import matplotlib.pyplot as plt\n", 57 | "\n", 58 | "\n", 59 | "import sklearn\n", 60 | "from sklearn.preprocessing import StandardScaler\n", 61 | "from sklearn.model_selection import train_test_split\n", 62 | "from sklearn import metrics \n", 63 | "from sklearn.externals import joblib\n", 64 | "\n", 65 | "%pylab inline\n" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 2, 71 | "metadata": { 72 | "collapsed": true 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "#face detection parameters\n", 77 | "minsize = 20 # minimum size of face\n", 78 | "threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold\n", 79 | "factor = 0.709 # scale factor\n", 80 | "\n", 81 | "#facenet embedding parameters\n", 82 | "\n", 83 | "model_dir='./model_check_point/model.ckpt-500000'#\"Directory containing the graph definition and checkpoint files.\")\n", 84 | "model_def= 'models.nn4' # \"Points to a module containing the definition of the inference graph.\")\n", 85 | "image_size=96 #\"Image size (height, width) in pixels.\"\n", 86 | "pool_type='MAX' #\"The type of pooling to use for some of the inception layers {'MAX', 'L2'}.\n", 87 | "use_lrn=False #\"Enables Local Response Normalization after the first layers of the inception network.\"\n", 88 | "seed=42,# \"Random seed.\"\n", 89 | "batch_size= None # \"Number of images to process in a batch.\"\n", 90 | "\n" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 6, 96 | "metadata": { 97 | "collapsed": true 98 | }, 99 | "outputs": [], 100 | "source": [] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 3, 105 | "metadata": { 106 | "collapsed": false 107 | }, 108 | "outputs": [ 109 | { 110 | "name": "stdout", 111 | "output_type": "stream", 112 | "text": [ 113 | "Creating networks and loading parameters\n" 114 | ] 115 | } 116 | ], 117 | "source": [ 118 | "#建立人脸检测模型,加载参数\n", 119 | "print('Creating networks and loading parameters')\n", 120 | "gpu_memory_fraction=1.0\n", 121 | "with tf.Graph().as_default():\n", 122 | " gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction)\n", 123 | " sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))\n", 124 | " with sess.as_default():\n", 125 | " pnet, rnet, onet = detect_face.create_mtcnn(sess, './model_check_point/')\n", 126 | " \n", 127 | " " 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 4, 133 | "metadata": { 134 | "collapsed": false 135 | }, 136 | "outputs": [ 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | "建立facenet embedding模型\n", 142 | "name = incept3a\n", 143 | "inputSize = 192\n", 144 | "kernelSize = {3,5}\n", 145 | "kernelStride = {1,1}\n", 146 | "outputSize = {128,32}\n", 147 | "reduceSize = {96,16,32,64}\n", 148 | "pooling = {MAX, 3, 3, 1, 1}\n", 149 | "outputSize = 256\n", 150 | "\n", 151 | "name = incept3b\n", 152 | "inputSize = 256\n", 153 | "kernelSize = {3,5}\n", 154 | "kernelStride = {1,1}\n", 155 | "outputSize = {128,64}\n", 156 | "reduceSize = {96,32,64,64}\n", 157 | "pooling = {MAX, 3, 3, 1, 1}\n", 158 | "outputSize = 320\n", 159 | "\n", 160 | "name = incept3c\n", 161 | "inputSize = 320\n", 162 | "kernelSize = {3,5}\n", 163 | "kernelStride = {2,2}\n", 164 | "outputSize = {256,64}\n", 165 | "reduceSize = {128,32,0,0}\n", 166 | "pooling = {MAX, 3, 3, 2, 2}\n", 167 | "outputSize = 640\n", 168 | "\n", 169 | "name = incept4a\n", 170 | "inputSize = 640\n", 171 | "kernelSize = {3,5}\n", 172 | "kernelStride = {1,1}\n", 173 | "outputSize = {192,64}\n", 174 | "reduceSize = {96,32,128,256}\n", 175 | "pooling = {MAX, 3, 3, 1, 1}\n", 176 | "outputSize = 640\n", 177 | "\n", 178 | "name = incept4b\n", 179 | "inputSize = 640\n", 180 | "kernelSize = {3,5}\n", 181 | "kernelStride = {1,1}\n", 182 | "outputSize = {224,64}\n", 183 | "reduceSize = {112,32,128,224}\n", 184 | "pooling = {MAX, 3, 3, 1, 1}\n", 185 | "outputSize = 640\n", 186 | "\n", 187 | "name = incept4c\n", 188 | "inputSize = 640\n", 189 | "kernelSize = {3,5}\n", 190 | "kernelStride = {1,1}\n", 191 | "outputSize = {256,64}\n", 192 | "reduceSize = {128,32,128,192}\n", 193 | "pooling = {MAX, 3, 3, 1, 1}\n", 194 | "outputSize = 640\n", 195 | "\n", 196 | "name = incept4d\n", 197 | "inputSize = 640\n", 198 | "kernelSize = {3,5}\n", 199 | "kernelStride = {1,1}\n", 200 | "outputSize = {288,64}\n", 201 | "reduceSize = {144,32,128,160}\n", 202 | "pooling = {MAX, 3, 3, 1, 1}\n", 203 | "outputSize = 640\n", 204 | "\n", 205 | "name = incept4e\n", 206 | "inputSize = 640\n", 207 | "kernelSize = {3,5}\n", 208 | "kernelStride = {2,2}\n", 209 | "outputSize = {256,128}\n", 210 | "reduceSize = {160,64,0,0}\n", 211 | "pooling = {MAX, 3, 3, 2, 2}\n", 212 | "outputSize = 1024\n", 213 | "\n", 214 | "name = incept5a\n", 215 | "inputSize = 1024\n", 216 | "kernelSize = {3,5}\n", 217 | "kernelStride = {1,1}\n", 218 | "outputSize = {384,0}\n", 219 | "reduceSize = {192,0,128,384}\n", 220 | "pooling = {MAX, 3, 3, 1, 1}\n", 221 | "outputSize = 896\n", 222 | "\n", 223 | "name = incept5b\n", 224 | "inputSize = 896\n", 225 | "kernelSize = {3,5}\n", 226 | "kernelStride = {1,1}\n", 227 | "outputSize = {384,0}\n", 228 | "reduceSize = {192,0,128,384}\n", 229 | "pooling = {MAX, 3, 3, 1, 1}\n", 230 | "outputSize = 896\n", 231 | "\n", 232 | "facenet embedding模型建立完毕\n" 233 | ] 234 | } 235 | ], 236 | "source": [ 237 | "#建立facenet embedding模型\n", 238 | "print('建立facenet embedding模型')\n", 239 | "tf.Graph().as_default()\n", 240 | "sess = tf.Session()\n", 241 | "images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, \n", 242 | " image_size, \n", 243 | " image_size, 3), name='input')\n", 244 | "\n", 245 | "phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')\n", 246 | "\n", 247 | "\n", 248 | "\n", 249 | "embeddings = network.inference(images_placeholder, pool_type, \n", 250 | " use_lrn, \n", 251 | " 1.0, \n", 252 | " phase_train=phase_train_placeholder)\n", 253 | "\n", 254 | "\n", 255 | "\n", 256 | "ema = tf.train.ExponentialMovingAverage(1.0)\n", 257 | "saver = tf.train.Saver(ema.variables_to_restore())\n", 258 | "\n", 259 | "model_checkpoint_path='./model_check_point/model-20160506.ckpt-500000'\n", 260 | "\n", 261 | "saver.restore(sess, model_checkpoint_path)\n", 262 | "print('facenet embedding模型建立完毕')" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": { 268 | "collapsed": true 269 | }, 270 | "source": [ 271 | "# reading training images from train folder\n" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 5, 277 | "metadata": { 278 | "collapsed": true 279 | }, 280 | "outputs": [], 281 | "source": [ 282 | "###### train_dir containing one subdirectory per image class \n", 283 | "#should like this:\n", 284 | "#-->train_dir:\n", 285 | "# --->pic_me:\n", 286 | "# me1.jpg\n", 287 | "# me2.jpg\n", 288 | "# ...\n", 289 | "# --->pic_others:\n", 290 | "# other1.jpg\n", 291 | "# other2.jpg\n", 292 | "# ...\n", 293 | "data_dir='~/train_dir/'#your own train folder" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 6, 299 | "metadata": { 300 | "collapsed": true 301 | }, 302 | "outputs": [], 303 | "source": [ 304 | "def to_rgb(img):\n", 305 | " w, h = img.shape\n", 306 | " ret = np.empty((w, h, 3), dtype=np.uint8)\n", 307 | " ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img\n", 308 | " return ret\n", 309 | "\n", 310 | "def read_img(person_dir,f):\n", 311 | " img=cv2.imread(pjoin(person_dir, f))\n", 312 | " \n", 313 | " gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n", 314 | " \n", 315 | " if gray.ndim == 2:\n", 316 | " img = to_rgb(gray)\n", 317 | " return img\n", 318 | "\n", 319 | "def load_data(data_dir):\n", 320 | " data = {}\n", 321 | " pics_ctr = 0\n", 322 | " for guy in os.listdir(data_dir):\n", 323 | " person_dir = pjoin(data_dir, guy)\n", 324 | " \n", 325 | " curr_pics = [read_img(person_dir, f) for f in os.listdir(person_dir)]\n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " data[guy] = curr_pics\n", 330 | " \n", 331 | " return data" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 7, 337 | "metadata": { 338 | "collapsed": false 339 | }, 340 | "outputs": [ 341 | { 342 | "name": "stdout", 343 | "output_type": "stream", 344 | "text": [ 345 | "foler:other,image numbers:70\n", 346 | "foler:video_guai,image numbers:137\n", 347 | "foler:video_me,image numbers:115\n" 348 | ] 349 | } 350 | ], 351 | "source": [ 352 | "data=load_data(data_dir)\n", 353 | "keys=[]\n", 354 | "for key in data.iterkeys():\n", 355 | " keys.append(key)\n", 356 | " print('foler:{},image numbers:{}'.format(key,len(data[key])))\n", 357 | " " 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": { 364 | "collapsed": true 365 | }, 366 | "outputs": [], 367 | "source": [] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": 8, 372 | "metadata": { 373 | "collapsed": false 374 | }, 375 | "outputs": [ 376 | { 377 | "name": "stdout", 378 | "output_type": "stream", 379 | "text": [ 380 | "68\n", 381 | "145\n", 382 | "搞完了,样本数为:145\n" 383 | ] 384 | } 385 | ], 386 | "source": [ 387 | "train_x=[]\n", 388 | "train_y=[]\n", 389 | "\n", 390 | "for x in data[keys[0]]:\n", 391 | " bounding_boxes, _ = detect_face.detect_face(x, minsize, pnet, rnet, onet, threshold, factor)\n", 392 | " nrof_faces = bounding_boxes.shape[0]#number of faces\n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " for face_position in bounding_boxes:\n", 397 | " face_position=face_position.astype(int)\n", 398 | " #print(face_position[0:4])\n", 399 | " cv2.rectangle(x, (face_position[0], face_position[1]), (face_position[2], face_position[3]), (0, 255, 0), 2)\n", 400 | " crop=x[face_position[1]:face_position[3],\n", 401 | " face_position[0]:face_position[2],]\n", 402 | " \n", 403 | " crop = cv2.resize(crop, (96, 96), interpolation=cv2.INTER_CUBIC )\n", 404 | "\n", 405 | " #print(crop.shape)\n", 406 | " \n", 407 | " crop_data=crop.reshape(-1,96,96,3)\n", 408 | " #print(crop_data.shape)\n", 409 | " \n", 410 | " emb_data = sess.run([embeddings], \n", 411 | " feed_dict={images_placeholder: np.array(crop_data), phase_train_placeholder: False })[0]\n", 412 | " \n", 413 | " \n", 414 | " train_x.append(emb_data)\n", 415 | " train_y.append(0)\n", 416 | "print(len(train_x))\n", 417 | "\n", 418 | "\n", 419 | "\n", 420 | "for y in data[keys[1]]:\n", 421 | " bounding_boxes, _ = detect_face.detect_face(y, minsize, pnet, rnet, \n", 422 | " onet, threshold, factor)\n", 423 | " nrof_faces = bounding_boxes.shape[0]#number of faces\n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " for face_position in bounding_boxes:\n", 428 | " face_position=face_position.astype(int)\n", 429 | " #print(face_position[0:4])\n", 430 | " cv2.rectangle(y, (face_position[0], face_position[1]), (face_position[2], face_position[3]), (0, 255, 0), 2)\n", 431 | " crop=y[face_position[1]:face_position[3],\n", 432 | " face_position[0]:face_position[2],]\n", 433 | " \n", 434 | " crop = cv2.resize(crop, (96, 96), interpolation=cv2.INTER_CUBIC )\n", 435 | "\n", 436 | " crop_data=crop.reshape(-1,96,96,3)\n", 437 | " #print(crop_data.shape)\n", 438 | " \n", 439 | " emb_data = sess.run([embeddings], \n", 440 | " feed_dict={images_placeholder: np.array(crop_data), phase_train_placeholder: False })[0]\n", 441 | " \n", 442 | " \n", 443 | " train_x.append(emb_data)\n", 444 | " train_y.append(1)\n", 445 | " \n", 446 | "\n", 447 | "print(len(train_x))\n", 448 | "print('搞完了,样本数为:{}'.format(len(train_x)))" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": 10, 454 | "metadata": { 455 | "collapsed": false 456 | }, 457 | "outputs": [ 458 | { 459 | "name": "stdout", 460 | "output_type": "stream", 461 | "text": [ 462 | "(145, 128)\n", 463 | "(145,)\n", 464 | "((101, 128), (101,), (44, 128), (44,))\n" 465 | ] 466 | } 467 | ], 468 | "source": [ 469 | "#train/test split\n", 470 | "train_x=np.array(train_x)\n", 471 | "train_x=train_x.reshape(-1,128)\n", 472 | "train_y=np.array(train_y)\n", 473 | "print(train_x.shape)\n", 474 | "print(train_y.shape)\n", 475 | "\n", 476 | "\n", 477 | "X_train, X_test, y_train, y_test = train_test_split(train_x, train_y, test_size=.3, random_state=42)\n", 478 | "print(X_train.shape,y_train.shape,X_test.shape,y_test.shape)" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": 11, 484 | "metadata": { 485 | "collapsed": false 486 | }, 487 | "outputs": [ 488 | { 489 | "name": "stdout", 490 | "output_type": "stream", 491 | "text": [ 492 | "accuracy: 100.00%\n" 493 | ] 494 | }, 495 | { 496 | "data": { 497 | "text/plain": [ 498 | "['./model_check_point/knn_classifier.model']" 499 | ] 500 | }, 501 | "execution_count": 11, 502 | "metadata": {}, 503 | "output_type": "execute_result" 504 | } 505 | ], 506 | "source": [ 507 | "\n", 508 | "# KNN Classifier \n", 509 | "def knn_classifier(train_x, train_y): \n", 510 | " from sklearn.neighbors import KNeighborsClassifier \n", 511 | " model = KNeighborsClassifier() \n", 512 | " model.fit(train_x, train_y) \n", 513 | " return model \n", 514 | "\n", 515 | "classifiers = knn_classifier \n", 516 | "\n", 517 | "model = classifiers(X_train,y_train) \n", 518 | "predict = model.predict(X_test) \n", 519 | "\n", 520 | "accuracy = metrics.accuracy_score(y_test, predict) \n", 521 | "print ('accuracy: %.2f%%' % (100 * accuracy) ) \n", 522 | " \n", 523 | " \n", 524 | "#save model\n", 525 | "joblib.dump(model, './model_check_point/knn_classifier.model')\n", 526 | "#model = joblib.load('_2017_1_24_knn.model')" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": 12, 532 | "metadata": { 533 | "collapsed": false 534 | }, 535 | "outputs": [ 536 | { 537 | "name": "stdout", 538 | "output_type": "stream", 539 | "text": [ 540 | "accuracy: 100.00%\n" 541 | ] 542 | } 543 | ], 544 | "source": [ 545 | "model = joblib.load('./model_check_point/knn_classifier.model')\n", 546 | "predict = model.predict(X_test) \n", 547 | "accuracy = metrics.accuracy_score(y_test, predict) \n", 548 | "print ('accuracy: %.2f%%' % (100 * accuracy) ) " 549 | ] 550 | }, 551 | { 552 | "cell_type": "code", 553 | "execution_count": null, 554 | "metadata": { 555 | "collapsed": true 556 | }, 557 | "outputs": [], 558 | "source": [] 559 | } 560 | ], 561 | "metadata": { 562 | "kernelspec": { 563 | "display_name": "Python 2", 564 | "language": "python", 565 | "name": "python2" 566 | }, 567 | "language_info": { 568 | "codemirror_mode": { 569 | "name": "ipython", 570 | "version": 2 571 | }, 572 | "file_extension": ".py", 573 | "mimetype": "text/x-python", 574 | "name": "python", 575 | "nbconvert_exporter": "python", 576 | "pygments_lexer": "ipython2", 577 | "version": "2.7.12" 578 | } 579 | }, 580 | "nbformat": 4, 581 | "nbformat_minor": 0 582 | } 583 | --------------------------------------------------------------------------------