├── Make_aligndata.py ├── Make_classifier.py ├── README.md ├── detect_face.py ├── facenet.py ├── getphoto.py ├── haarcascade_frontalface_alt2.xml ├── image ├── 1.png ├── 10.png ├── 11.png ├── 12.png ├── 13.png ├── 14.png ├── 15.png ├── 16.png ├── 17.png ├── 18.png ├── 19.png ├── 2.png ├── 3.png ├── 4.png ├── 5.png ├── 6.png ├── 7.png ├── 8.png ├── 9.png └── note ├── input └── readme.md ├── models ├── det1.npy ├── det2.npy ├── det3.npy └── readme.md ├── myclassifier └── readme.md ├── output └── readme.md ├── real_time.py └── realtime_facenet.py /Make_aligndata.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from scipy import misc 6 | import os 7 | import tensorflow as tf 8 | import numpy as np 9 | import facenet 10 | import detect_face 11 | 12 | 13 | output_dir_path = './output/' 14 | output_dir = os.path.expanduser(output_dir_path) 15 | if not os.path.exists(output_dir): 16 | os.makedirs(output_dir) 17 | 18 | datadir = './input/' 19 | dataset = facenet.get_dataset(datadir) 20 | 21 | print('Creating networks and loading parameters') 22 | with tf.Graph().as_default(): 23 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) 24 | sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) 25 | with sess.as_default(): 26 | pnet, rnet, onet = detect_face.create_mtcnn(sess, './models/') 27 | 28 | minsize = 20 # minimum size of face 29 | threshold = [0.6, 0.7, 0.7] # three steps's threshold 30 | factor = 0.709 # scale factor 31 | margin = 44 32 | image_size = 182 33 | 34 | # Add a random key to the filename to allow alignment using multiple processes 35 | random_key = np.random.randint(0, high=99999) 36 | bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) 37 | print('Goodluck') 38 | 39 | with open(bounding_boxes_filename, "w") as text_file: 40 | nrof_images_total = 0 41 | nrof_successfully_aligned = 0 42 | for cls in dataset: 43 | output_class_dir = os.path.join(output_dir, cls.name) 44 | if not os.path.exists(output_class_dir): 45 | os.makedirs(output_class_dir) 46 | for image_path in cls.image_paths: 47 | nrof_images_total += 1 48 | filename = os.path.splitext(os.path.split(image_path)[1])[0] 49 | output_filename = os.path.join(output_class_dir, filename + '.png') 50 | print(image_path) 51 | if not os.path.exists(output_filename): 52 | try: 53 | img = misc.imread(image_path) 54 | print('read data dimension: ', img.ndim) 55 | except (IOError, ValueError, IndexError) as e: 56 | errorMessage = '{}: {}'.format(image_path, e) 57 | print(errorMessage) 58 | else: 59 | if img.ndim < 2: 60 | print('Unable to align "%s"' % image_path) 61 | text_file.write('%s\n' % (output_filename)) 62 | continue 63 | if img.ndim == 2: 64 | img = facenet.to_rgb(img) 65 | print('to_rgb data dimension: ', img.ndim) 66 | img = img[:, :, 0:3] 67 | print('after data dimension: ', img.ndim) 68 | 69 | bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) 70 | nrof_faces = bounding_boxes.shape[0] 71 | print('detected_face: %d' % nrof_faces) 72 | if nrof_faces > 0: 73 | det = bounding_boxes[:, 0:4] 74 | img_size = np.asarray(img.shape)[0:2] 75 | if nrof_faces > 1: 76 | bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) 77 | img_center = img_size / 2 78 | offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1], 79 | (det[:, 1] + det[:, 3]) / 2 - img_center[0]]) 80 | offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) 81 | index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering 82 | det = det[index, :] 83 | det = np.squeeze(det) 84 | bb_temp = np.zeros(4, dtype=np.int32) 85 | 86 | bb_temp[0] = det[0] 87 | bb_temp[1] = det[1] 88 | bb_temp[2] = det[2] 89 | bb_temp[3] = det[3] 90 | 91 | cropped_temp = img[bb_temp[1]:bb_temp[3], bb_temp[0]:bb_temp[2], :] 92 | scaled_temp = misc.imresize(cropped_temp, (image_size, image_size), interp='bilinear') 93 | 94 | nrof_successfully_aligned += 1 95 | misc.imsave(output_filename, scaled_temp) 96 | text_file.write('%s %d %d %d %d\n' % (output_filename, bb_temp[0], bb_temp[1], bb_temp[2], bb_temp[3])) 97 | else: 98 | print('Unable to align "%s"' % image_path) 99 | text_file.write('%s\n' % (output_filename)) 100 | 101 | print('Total number of images: %d' % nrof_images_total) 102 | print('Number of successfully aligned images: %d' % nrof_successfully_aligned) 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /Make_classifier.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | import numpy as np 7 | import facenet 8 | import os 9 | import math 10 | import pickle 11 | from sklearn.svm import SVC 12 | 13 | 14 | with tf.Graph().as_default(): 15 | 16 | with tf.Session() as sess: 17 | 18 | datadir = './output/' 19 | dataset = facenet.get_dataset(datadir) 20 | paths, labels = facenet.get_image_paths_and_labels(dataset) 21 | print('Number of classes: %d' % len(dataset)) 22 | print('Number of images: %d' % len(paths)) 23 | 24 | print('Loading feature extraction model') 25 | modeldir = './models/' 26 | facenet.load_model(modeldir) 27 | 28 | images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") 29 | embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") 30 | phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") 31 | embedding_size = embeddings.get_shape()[1] 32 | 33 | # Run forward pass to calculate embeddings 34 | print('Calculating features for images') 35 | batch_size = 1000 36 | image_size = 160 37 | nrof_images = len(paths) 38 | nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images / batch_size)) 39 | emb_array = np.zeros((nrof_images, embedding_size)) 40 | for i in range(nrof_batches_per_epoch): 41 | start_index = i * batch_size 42 | end_index = min((i + 1) * batch_size, nrof_images) 43 | paths_batch = paths[start_index:end_index] 44 | images = facenet.load_data(paths_batch, False, False, image_size) 45 | feed_dict = {images_placeholder: images, phase_train_placeholder: False} 46 | emb_array[start_index:end_index, :] = sess.run(embeddings, feed_dict=feed_dict) 47 | 48 | classifier_filename = './myclassifier/my_classifier.pkl' 49 | classifier_filename_exp = os.path.expanduser(classifier_filename) 50 | 51 | # Train classifier 52 | print('Training classifier') 53 | model = SVC(kernel='linear', probability=True) 54 | model.fit(emb_array, labels) 55 | 56 | # Create a list of class names 57 | class_names = [cls.name.replace('_', ' ') for cls in dataset] 58 | 59 | # Saving classifier model 60 | with open(classifier_filename_exp, 'wb') as outfile: 61 | pickle.dump((model, class_names), outfile) 62 | print('Saved classifier model to file "%s"' % classifier_filename_exp) 63 | print('Goodluck') -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # face_recognition 2 | real time face recognition with MTCNN and FaceNet 3 | 4 | ## Before run code 5 | 6 | you need to do things below: 7 | 8 | * I have already uploaded det1.npy det2.npy det3.npy which for MTCNN,but you still need to download facenet's pb file from [davidsandberg's 9 | github](https://github.com/davidsandberg/facenet) like 20170511-185253,extract to pb file and put in models directory. 10 | * tensorflow-gpu 1.1.0 , later version may also work. 11 | * python 3.X 12 | 13 | 14 | ## Inspiration 15 | 16 | * OpenFace 17 | * [davidsandberg's github](https://github.com/davidsandberg/facenet) 18 | * main code is refered to bearsprogrammer 19 | 20 | ## Something note 21 | 22 | `Remember to change some codes where you need to put your own name and your friends' name instead of mine.` 23 | 24 | ## Run code 25 | 26 | Do as follows step by step: 27 | 28 | * To make you easy to get your photo and put in right structure as I said in intput and output directorys' readme.md file,I 29 | already privide getphoto.py which can take photos by openCV and autoly put it in input directory as format. 30 | * Next,you need to run Make_aligndata.py to align your photos which only croped your face part and autoly put in output directory as format.This photos will be used to train our own classifier. 31 | * Run Make_classifier.py to train our own classifier with SVM.Of course you can use your own classifier if you want.Then you may 32 | see myclassifier.pkl file in myclassifier directory. 33 | * Finally,run realtime_facenet.py or real_time.py. 34 | realtime_facenet.py is MTCNN version.real_time.py is another choice which use haar detector in openCV instead of MTCNN. 35 | 36 | ## Result 37 | 38 | If everything is ok ,you will see result below: 39 | 40 | ![](https://github.com/cryer/face_recognition/raw/master/image/1.png) 41 | 42 | ## More 43 | 44 | I used Chinese to do some Introduction about MTCNN and FaceNet.[See my blog for details](https://cryer.github.io/2018/01/facerecognition/) 45 | -------------------------------------------------------------------------------- /detect_face.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from six import string_types, iteritems 7 | 8 | import numpy as np 9 | import tensorflow as tf 10 | import cv2 11 | import os 12 | 13 | def layer(op): 14 | '''Decorator for composable network layers.''' 15 | 16 | def layer_decorated(self, *args, **kwargs): 17 | # Automatically set a name if not provided. 18 | name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) 19 | # Figure out the layer inputs. 20 | if len(self.terminals) == 0: 21 | raise RuntimeError('No input variables found for layer %s.' % name) 22 | elif len(self.terminals) == 1: 23 | layer_input = self.terminals[0] 24 | else: 25 | layer_input = list(self.terminals) 26 | # Perform the operation and get the output. 27 | layer_output = op(self, layer_input, *args, **kwargs) 28 | # Add to layer LUT. 29 | self.layers[name] = layer_output 30 | # This output is now the input for the next layer. 31 | self.feed(layer_output) 32 | # Return self for chained calls. 33 | return self 34 | 35 | return layer_decorated 36 | 37 | class Network(object): 38 | 39 | def __init__(self, inputs, trainable=True): 40 | # The input nodes for this network 41 | self.inputs = inputs 42 | # The current list of terminal nodes 43 | self.terminals = [] 44 | # Mapping from layer names to layers 45 | self.layers = dict(inputs) 46 | # If true, the resulting variables are set as trainable 47 | self.trainable = trainable 48 | 49 | self.setup() 50 | 51 | def setup(self): 52 | '''Construct the network. ''' 53 | raise NotImplementedError('Must be implemented by the subclass.') 54 | 55 | def load(self, data_path, session, ignore_missing=False): 56 | '''Load network weights. 57 | data_path: The path to the numpy-serialized network weights 58 | session: The current TensorFlow session 59 | ignore_missing: If true, serialized weights for missing layers are ignored. 60 | ''' 61 | data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member 62 | 63 | for op_name in data_dict: 64 | with tf.variable_scope(op_name, reuse=True): 65 | for param_name, data in iteritems(data_dict[op_name]): 66 | try: 67 | var = tf.get_variable(param_name) 68 | session.run(var.assign(data)) 69 | except ValueError: 70 | if not ignore_missing: 71 | raise 72 | 73 | def feed(self, *args): 74 | '''Set the input(s) for the next operation by replacing the terminal nodes. 75 | The arguments can be either layer names or the actual layers. 76 | ''' 77 | assert len(args) != 0 78 | self.terminals = [] 79 | for fed_layer in args: 80 | if isinstance(fed_layer, string_types): 81 | try: 82 | fed_layer = self.layers[fed_layer] 83 | except KeyError: 84 | raise KeyError('Unknown layer name fed: %s' % fed_layer) 85 | self.terminals.append(fed_layer) 86 | return self 87 | 88 | def get_output(self): 89 | '''Returns the current network output.''' 90 | return self.terminals[-1] 91 | 92 | def get_unique_name(self, prefix): 93 | '''Returns an index-suffixed unique name for the given prefix. 94 | This is used for auto-generating layer names based on the type-prefix. 95 | ''' 96 | ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 97 | return '%s_%d' % (prefix, ident) 98 | 99 | def make_var(self, name, shape): 100 | '''Creates a new TensorFlow variable.''' 101 | return tf.get_variable(name, shape, trainable=self.trainable) 102 | 103 | def validate_padding(self, padding): 104 | '''Verifies that the padding is one of the supported ones.''' 105 | assert padding in ('SAME', 'VALID') 106 | 107 | @layer 108 | def conv(self, 109 | inp, 110 | k_h, 111 | k_w, 112 | c_o, 113 | s_h, 114 | s_w, 115 | name, 116 | relu=True, 117 | padding='SAME', 118 | group=1, 119 | biased=True): 120 | # Verify that the padding is acceptable 121 | self.validate_padding(padding) 122 | # Get the number of channels in the input 123 | c_i = int(inp.get_shape()[-1]) 124 | # Verify that the grouping parameter is valid 125 | assert c_i % group == 0 126 | assert c_o % group == 0 127 | # Convolution for a given input and kernel 128 | convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) 129 | with tf.variable_scope(name) as scope: 130 | kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o]) 131 | # This is the common-case. Convolve the input without any further complications. 132 | output = convolve(inp, kernel) 133 | # Add the biases 134 | if biased: 135 | biases = self.make_var('biases', [c_o]) 136 | output = tf.nn.bias_add(output, biases) 137 | if relu: 138 | # ReLU non-linearity 139 | output = tf.nn.relu(output, name=scope.name) 140 | return output 141 | 142 | @layer 143 | def prelu(self, inp, name): 144 | with tf.variable_scope(name): 145 | i = int(inp.get_shape()[-1]) 146 | alpha = self.make_var('alpha', shape=(i,)) 147 | output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp)) 148 | return output 149 | 150 | @layer 151 | def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'): 152 | self.validate_padding(padding) 153 | return tf.nn.max_pool(inp, 154 | ksize=[1, k_h, k_w, 1], 155 | strides=[1, s_h, s_w, 1], 156 | padding=padding, 157 | name=name) 158 | 159 | @layer 160 | def fc(self, inp, num_out, name, relu=True): 161 | with tf.variable_scope(name): 162 | input_shape = inp.get_shape() 163 | if input_shape.ndims == 4: 164 | # The input is spatial. Vectorize it first. 165 | dim = 1 166 | for d in input_shape[1:].as_list(): 167 | dim *= int(d) 168 | feed_in = tf.reshape(inp, [-1, dim]) 169 | else: 170 | feed_in, dim = (inp, input_shape[-1].value) 171 | weights = self.make_var('weights', shape=[dim, num_out]) 172 | biases = self.make_var('biases', [num_out]) 173 | op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b 174 | fc = op(feed_in, weights, biases, name=name) 175 | return fc 176 | 177 | 178 | """ 179 | Multi dimensional softmax, 180 | refer to https://github.com/tensorflow/tensorflow/issues/210 181 | compute softmax along the dimension of target 182 | the native softmax only supports batch_size x dimension 183 | """ 184 | @layer 185 | def softmax(self, target, axis, name=None): 186 | max_axis = tf.reduce_max(target, axis, keep_dims=True) 187 | target_exp = tf.exp(target-max_axis) 188 | normalize = tf.reduce_sum(target_exp, axis, keep_dims=True) 189 | softmax = tf.div(target_exp, normalize, name) 190 | return softmax 191 | 192 | class PNet(Network): 193 | def setup(self): 194 | (self.feed('data') #pylint: disable=no-value-for-parameter, no-member 195 | .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1') 196 | .prelu(name='PReLU1') 197 | .max_pool(2, 2, 2, 2, name='pool1') 198 | .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2') 199 | .prelu(name='PReLU2') 200 | .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3') 201 | .prelu(name='PReLU3') 202 | .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1') 203 | .softmax(3,name='prob1')) 204 | 205 | (self.feed('PReLU3') #pylint: disable=no-value-for-parameter 206 | .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2')) 207 | 208 | class RNet(Network): 209 | def setup(self): 210 | (self.feed('data') #pylint: disable=no-value-for-parameter, no-member 211 | .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1') 212 | .prelu(name='prelu1') 213 | .max_pool(3, 3, 2, 2, name='pool1') 214 | .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2') 215 | .prelu(name='prelu2') 216 | .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') 217 | .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3') 218 | .prelu(name='prelu3') 219 | .fc(128, relu=False, name='conv4') 220 | .prelu(name='prelu4') 221 | .fc(2, relu=False, name='conv5-1') 222 | .softmax(1,name='prob1')) 223 | 224 | (self.feed('prelu4') #pylint: disable=no-value-for-parameter 225 | .fc(4, relu=False, name='conv5-2')) 226 | 227 | class ONet(Network): 228 | def setup(self): 229 | (self.feed('data') #pylint: disable=no-value-for-parameter, no-member 230 | .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1') 231 | .prelu(name='prelu1') 232 | .max_pool(3, 3, 2, 2, name='pool1') 233 | .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2') 234 | .prelu(name='prelu2') 235 | .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') 236 | .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3') 237 | .prelu(name='prelu3') 238 | .max_pool(2, 2, 2, 2, name='pool3') 239 | .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4') 240 | .prelu(name='prelu4') 241 | .fc(256, relu=False, name='conv5') 242 | .prelu(name='prelu5') 243 | .fc(2, relu=False, name='conv6-1') 244 | .softmax(1, name='prob1')) 245 | 246 | (self.feed('prelu5') #pylint: disable=no-value-for-parameter 247 | .fc(4, relu=False, name='conv6-2')) 248 | 249 | (self.feed('prelu5') #pylint: disable=no-value-for-parameter 250 | .fc(10, relu=False, name='conv6-3')) 251 | 252 | def create_mtcnn(sess, model_path): 253 | if not model_path: 254 | model_path,_ = os.path.split(os.path.realpath(__file__)) 255 | 256 | with tf.variable_scope('pnet'): 257 | data = tf.placeholder(tf.float32, (None,None,None,3), 'input') 258 | pnet = PNet({'data':data}) 259 | pnet.load(os.path.join(model_path, 'det1.npy'), sess) 260 | with tf.variable_scope('rnet'): 261 | data = tf.placeholder(tf.float32, (None,24,24,3), 'input') 262 | rnet = RNet({'data':data}) 263 | rnet.load(os.path.join(model_path, 'det2.npy'), sess) 264 | with tf.variable_scope('onet'): 265 | data = tf.placeholder(tf.float32, (None,48,48,3), 'input') 266 | onet = ONet({'data':data}) 267 | onet.load(os.path.join(model_path, 'det3.npy'), sess) 268 | 269 | pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img}) 270 | rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img}) 271 | onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img}) 272 | return pnet_fun, rnet_fun, onet_fun 273 | 274 | def detect_face(img, minsize, pnet, rnet, onet, threshold, factor): 275 | # im: input image 276 | # minsize: minimum of faces' size 277 | # pnet, rnet, onet: caffemodel 278 | # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold 279 | # fastresize: resize img from last scale (using in high-resolution images) if fastresize==true 280 | factor_count=0 281 | total_boxes=np.empty((0,9)) 282 | points=[] 283 | h=img.shape[0] 284 | w=img.shape[1] 285 | minl=np.amin([h, w]) 286 | m=12.0/minsize 287 | minl=minl*m 288 | # creat scale pyramid 289 | scales=[] 290 | while minl>=12: 291 | scales += [m*np.power(factor, factor_count)] 292 | minl = minl*factor 293 | factor_count += 1 294 | 295 | # first stage 296 | for j in range(len(scales)): 297 | scale=scales[j] 298 | hs=int(np.ceil(h*scale)) 299 | ws=int(np.ceil(w*scale)) 300 | im_data = imresample(img, (hs, ws)) 301 | im_data = (im_data-127.5)*0.0078125 302 | img_x = np.expand_dims(im_data, 0) 303 | img_y = np.transpose(img_x, (0,2,1,3)) 304 | out = pnet(img_y) 305 | out0 = np.transpose(out[0], (0,2,1,3)) 306 | out1 = np.transpose(out[1], (0,2,1,3)) 307 | 308 | boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0]) 309 | 310 | # inter-scale nms 311 | pick = nms(boxes.copy(), 0.5, 'Union') 312 | if boxes.size>0 and pick.size>0: 313 | boxes = boxes[pick,:] 314 | total_boxes = np.append(total_boxes, boxes, axis=0) 315 | 316 | numbox = total_boxes.shape[0] 317 | if numbox>0: 318 | pick = nms(total_boxes.copy(), 0.7, 'Union') 319 | total_boxes = total_boxes[pick,:] 320 | regw = total_boxes[:,2]-total_boxes[:,0] 321 | regh = total_boxes[:,3]-total_boxes[:,1] 322 | qq1 = total_boxes[:,0]+total_boxes[:,5]*regw 323 | qq2 = total_boxes[:,1]+total_boxes[:,6]*regh 324 | qq3 = total_boxes[:,2]+total_boxes[:,7]*regw 325 | qq4 = total_boxes[:,3]+total_boxes[:,8]*regh 326 | total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]])) 327 | total_boxes = rerec(total_boxes.copy()) 328 | total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32) 329 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) 330 | 331 | numbox = total_boxes.shape[0] 332 | if numbox>0: 333 | # second stage 334 | tempimg = np.zeros((24,24,3,numbox)) 335 | for k in range(0,numbox): 336 | tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) 337 | tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] 338 | if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: 339 | tempimg[:,:,:,k] = imresample(tmp, (24, 24)) 340 | else: 341 | return np.empty() 342 | tempimg = (tempimg-127.5)*0.0078125 343 | tempimg1 = np.transpose(tempimg, (3,1,0,2)) 344 | out = rnet(tempimg1) 345 | out0 = np.transpose(out[0]) 346 | out1 = np.transpose(out[1]) 347 | score = out1[1,:] 348 | ipass = np.where(score>threshold[1]) 349 | total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) 350 | mv = out0[:,ipass[0]] 351 | if total_boxes.shape[0]>0: 352 | pick = nms(total_boxes, 0.7, 'Union') 353 | total_boxes = total_boxes[pick,:] 354 | total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick])) 355 | total_boxes = rerec(total_boxes.copy()) 356 | 357 | numbox = total_boxes.shape[0] 358 | if numbox>0: 359 | # third stage 360 | total_boxes = np.fix(total_boxes).astype(np.int32) 361 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) 362 | tempimg = np.zeros((48,48,3,numbox)) 363 | for k in range(0,numbox): 364 | tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) 365 | tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] 366 | if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: 367 | tempimg[:,:,:,k] = imresample(tmp, (48, 48)) 368 | else: 369 | return np.empty() 370 | tempimg = (tempimg-127.5)*0.0078125 371 | tempimg1 = np.transpose(tempimg, (3,1,0,2)) 372 | out = onet(tempimg1) 373 | out0 = np.transpose(out[0]) 374 | out1 = np.transpose(out[1]) 375 | out2 = np.transpose(out[2]) 376 | score = out2[1,:] 377 | points = out1 378 | ipass = np.where(score>threshold[2]) 379 | points = points[:,ipass[0]] 380 | total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) 381 | mv = out0[:,ipass[0]] 382 | 383 | w = total_boxes[:,2]-total_boxes[:,0]+1 384 | h = total_boxes[:,3]-total_boxes[:,1]+1 385 | points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1 386 | points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1 387 | if total_boxes.shape[0]>0: 388 | total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) 389 | pick = nms(total_boxes.copy(), 0.7, 'Min') 390 | total_boxes = total_boxes[pick,:] 391 | points = points[:,pick] 392 | 393 | return total_boxes, points 394 | 395 | 396 | def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor): 397 | # im: input image 398 | # minsize: minimum of faces' size 399 | # pnet, rnet, onet: caffemodel 400 | # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1] 401 | 402 | all_scales = [None] * len(images) 403 | images_with_boxes = [None] * len(images) 404 | 405 | for i in range(len(images)): 406 | images_with_boxes[i] = {'total_boxes': np.empty((0, 9))} 407 | 408 | # create scale pyramid 409 | for index, img in enumerate(images): 410 | all_scales[index] = [] 411 | h = img.shape[0] 412 | w = img.shape[1] 413 | minsize = int(detection_window_size_ratio * np.minimum(w, h)) 414 | factor_count = 0 415 | minl = np.amin([h, w]) 416 | if minsize <= 12: 417 | minsize = 12 418 | 419 | m = 12.0 / minsize 420 | minl = minl * m 421 | while minl >= 12: 422 | all_scales[index].append(m * np.power(factor, factor_count)) 423 | minl = minl * factor 424 | factor_count += 1 425 | 426 | # # # # # # # # # # # # # 427 | # first stage - fast proposal network (pnet) to obtain face candidates 428 | # # # # # # # # # # # # # 429 | 430 | images_obj_per_resolution = {} 431 | 432 | # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images 433 | 434 | for index, scales in enumerate(all_scales): 435 | h = images[index].shape[0] 436 | w = images[index].shape[1] 437 | 438 | for scale in scales: 439 | hs = int(np.ceil(h * scale)) 440 | ws = int(np.ceil(w * scale)) 441 | 442 | if (ws, hs) not in images_obj_per_resolution: 443 | images_obj_per_resolution[(ws, hs)] = [] 444 | 445 | im_data = imresample(images[index], (hs, ws)) 446 | im_data = (im_data - 127.5) * 0.0078125 447 | img_y = np.transpose(im_data, (1, 0, 2)) # caffe uses different dimensions ordering 448 | images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index}) 449 | 450 | for resolution in images_obj_per_resolution: 451 | images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]] 452 | outs = pnet(images_per_resolution) 453 | 454 | for index in range(len(outs[0])): 455 | scale = images_obj_per_resolution[resolution][index]['scale'] 456 | image_index = images_obj_per_resolution[resolution][index]['index'] 457 | out0 = np.transpose(outs[0][index], (1, 0, 2)) 458 | out1 = np.transpose(outs[1][index], (1, 0, 2)) 459 | 460 | boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0]) 461 | 462 | # inter-scale nms 463 | pick = nms(boxes.copy(), 0.5, 'Union') 464 | if boxes.size > 0 and pick.size > 0: 465 | boxes = boxes[pick, :] 466 | images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'], 467 | boxes, 468 | axis=0) 469 | 470 | for index, image_obj in enumerate(images_with_boxes): 471 | numbox = image_obj['total_boxes'].shape[0] 472 | if numbox > 0: 473 | h = images[index].shape[0] 474 | w = images[index].shape[1] 475 | pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union') 476 | image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] 477 | regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] 478 | regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] 479 | qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw 480 | qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh 481 | qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw 482 | qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh 483 | image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]])) 484 | image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy()) 485 | image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32) 486 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h) 487 | 488 | numbox = image_obj['total_boxes'].shape[0] 489 | tempimg = np.zeros((24, 24, 3, numbox)) 490 | 491 | if numbox > 0: 492 | for k in range(0, numbox): 493 | tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) 494 | tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :] 495 | if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: 496 | tempimg[:, :, :, k] = imresample(tmp, (24, 24)) 497 | else: 498 | return np.empty() 499 | 500 | tempimg = (tempimg - 127.5) * 0.0078125 501 | image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2)) 502 | 503 | # # # # # # # # # # # # # 504 | # second stage - refinement of face candidates with rnet 505 | # # # # # # # # # # # # # 506 | 507 | bulk_rnet_input = np.empty((0, 24, 24, 3)) 508 | for index, image_obj in enumerate(images_with_boxes): 509 | if 'rnet_input' in image_obj: 510 | bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0) 511 | 512 | out = rnet(bulk_rnet_input) 513 | out0 = np.transpose(out[0]) 514 | out1 = np.transpose(out[1]) 515 | score = out1[1, :] 516 | 517 | i = 0 518 | for index, image_obj in enumerate(images_with_boxes): 519 | if 'rnet_input' not in image_obj: 520 | continue 521 | 522 | rnet_input_count = image_obj['rnet_input'].shape[0] 523 | score_per_image = score[i:i + rnet_input_count] 524 | out0_per_image = out0[:, i:i + rnet_input_count] 525 | 526 | ipass = np.where(score_per_image > threshold[1]) 527 | image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(), 528 | np.expand_dims(score_per_image[ipass].copy(), 1)]) 529 | 530 | mv = out0_per_image[:, ipass[0]] 531 | 532 | if image_obj['total_boxes'].shape[0] > 0: 533 | h = images[index].shape[0] 534 | w = images[index].shape[1] 535 | pick = nms(image_obj['total_boxes'], 0.7, 'Union') 536 | image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] 537 | image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick])) 538 | image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy()) 539 | 540 | numbox = image_obj['total_boxes'].shape[0] 541 | 542 | if numbox > 0: 543 | tempimg = np.zeros((48, 48, 3, numbox)) 544 | image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32) 545 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h) 546 | 547 | for k in range(0, numbox): 548 | tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) 549 | tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :] 550 | if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: 551 | tempimg[:, :, :, k] = imresample(tmp, (48, 48)) 552 | else: 553 | return np.empty() 554 | tempimg = (tempimg - 127.5) * 0.0078125 555 | image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2)) 556 | 557 | i += rnet_input_count 558 | 559 | # # # # # # # # # # # # # 560 | # third stage - further refinement and facial landmarks positions with onet 561 | # # # # # # # # # # # # # 562 | 563 | bulk_onet_input = np.empty((0, 48, 48, 3)) 564 | for index, image_obj in enumerate(images_with_boxes): 565 | if 'onet_input' in image_obj: 566 | bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0) 567 | 568 | out = onet(bulk_onet_input) 569 | 570 | out0 = np.transpose(out[0]) 571 | out1 = np.transpose(out[1]) 572 | out2 = np.transpose(out[2]) 573 | score = out2[1, :] 574 | points = out1 575 | 576 | i = 0 577 | ret = [] 578 | for index, image_obj in enumerate(images_with_boxes): 579 | if 'onet_input' not in image_obj: 580 | ret.append(None) 581 | continue 582 | 583 | onet_input_count = image_obj['onet_input'].shape[0] 584 | 585 | out0_per_image = out0[:, i:i + onet_input_count] 586 | score_per_image = score[i:i + onet_input_count] 587 | points_per_image = points[:, i:i + onet_input_count] 588 | 589 | ipass = np.where(score_per_image > threshold[2]) 590 | points_per_image = points_per_image[:, ipass[0]] 591 | 592 | image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(), 593 | np.expand_dims(score_per_image[ipass].copy(), 1)]) 594 | mv = out0_per_image[:, ipass[0]] 595 | 596 | w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1 597 | h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1 598 | points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile( 599 | image_obj['total_boxes'][:, 0], (5, 1)) - 1 600 | points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile( 601 | image_obj['total_boxes'][:, 1], (5, 1)) - 1 602 | 603 | if image_obj['total_boxes'].shape[0] > 0: 604 | image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv)) 605 | pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min') 606 | image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] 607 | points_per_image = points_per_image[:, pick] 608 | 609 | ret.append((image_obj['total_boxes'], points_per_image)) 610 | else: 611 | ret.append(None) 612 | 613 | i += onet_input_count 614 | 615 | return ret 616 | 617 | 618 | # function [boundingbox] = bbreg(boundingbox,reg) 619 | def bbreg(boundingbox,reg): 620 | # calibrate bounding boxes 621 | if reg.shape[1]==1: 622 | reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) 623 | 624 | w = boundingbox[:,2]-boundingbox[:,0]+1 625 | h = boundingbox[:,3]-boundingbox[:,1]+1 626 | b1 = boundingbox[:,0]+reg[:,0]*w 627 | b2 = boundingbox[:,1]+reg[:,1]*h 628 | b3 = boundingbox[:,2]+reg[:,2]*w 629 | b4 = boundingbox[:,3]+reg[:,3]*h 630 | boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ])) 631 | return boundingbox 632 | 633 | def generateBoundingBox(imap, reg, scale, t): 634 | # use heatmap to generate bounding boxes 635 | stride=2 636 | cellsize=12 637 | 638 | imap = np.transpose(imap) 639 | dx1 = np.transpose(reg[:,:,0]) 640 | dy1 = np.transpose(reg[:,:,1]) 641 | dx2 = np.transpose(reg[:,:,2]) 642 | dy2 = np.transpose(reg[:,:,3]) 643 | y, x = np.where(imap >= t) 644 | if y.shape[0]==1: 645 | dx1 = np.flipud(dx1) 646 | dy1 = np.flipud(dy1) 647 | dx2 = np.flipud(dx2) 648 | dy2 = np.flipud(dy2) 649 | score = imap[(y,x)] 650 | reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ])) 651 | if reg.size==0: 652 | reg = np.empty((0,3)) 653 | bb = np.transpose(np.vstack([y,x])) 654 | q1 = np.fix((stride*bb+1)/scale) 655 | q2 = np.fix((stride*bb+cellsize-1+1)/scale) 656 | boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg]) 657 | return boundingbox, reg 658 | 659 | # function pick = nms(boxes,threshold,type) 660 | def nms(boxes, threshold, method): 661 | if boxes.size==0: 662 | return np.empty((0,3)) 663 | x1 = boxes[:,0] 664 | y1 = boxes[:,1] 665 | x2 = boxes[:,2] 666 | y2 = boxes[:,3] 667 | s = boxes[:,4] 668 | area = (x2-x1+1) * (y2-y1+1) 669 | I = np.argsort(s) 670 | pick = np.zeros_like(s, dtype=np.int16) 671 | counter = 0 672 | while I.size>0: 673 | i = I[-1] 674 | pick[counter] = i 675 | counter += 1 676 | idx = I[0:-1] 677 | xx1 = np.maximum(x1[i], x1[idx]) 678 | yy1 = np.maximum(y1[i], y1[idx]) 679 | xx2 = np.minimum(x2[i], x2[idx]) 680 | yy2 = np.minimum(y2[i], y2[idx]) 681 | w = np.maximum(0.0, xx2-xx1+1) 682 | h = np.maximum(0.0, yy2-yy1+1) 683 | inter = w * h 684 | if method is 'Min': 685 | o = inter / np.minimum(area[i], area[idx]) 686 | else: 687 | o = inter / (area[i] + area[idx] - inter) 688 | I = I[np.where(o<=threshold)] 689 | pick = pick[0:counter] 690 | return pick 691 | 692 | # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h) 693 | def pad(total_boxes, w, h): 694 | # compute the padding coordinates (pad the bounding boxes to square) 695 | tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32) 696 | tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32) 697 | numbox = total_boxes.shape[0] 698 | 699 | dx = np.ones((numbox), dtype=np.int32) 700 | dy = np.ones((numbox), dtype=np.int32) 701 | edx = tmpw.copy().astype(np.int32) 702 | edy = tmph.copy().astype(np.int32) 703 | 704 | x = total_boxes[:,0].copy().astype(np.int32) 705 | y = total_boxes[:,1].copy().astype(np.int32) 706 | ex = total_boxes[:,2].copy().astype(np.int32) 707 | ey = total_boxes[:,3].copy().astype(np.int32) 708 | 709 | tmp = np.where(ex>w) 710 | edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1) 711 | ex[tmp] = w 712 | 713 | tmp = np.where(ey>h) 714 | edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1) 715 | ey[tmp] = h 716 | 717 | tmp = np.where(x<1) 718 | dx.flat[tmp] = np.expand_dims(2-x[tmp],1) 719 | x[tmp] = 1 720 | 721 | tmp = np.where(y<1) 722 | dy.flat[tmp] = np.expand_dims(2-y[tmp],1) 723 | y[tmp] = 1 724 | 725 | return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph 726 | 727 | # function [bboxA] = rerec(bboxA) 728 | def rerec(bboxA): 729 | # convert bboxA to square 730 | h = bboxA[:,3]-bboxA[:,1] 731 | w = bboxA[:,2]-bboxA[:,0] 732 | l = np.maximum(w, h) 733 | bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5 734 | bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5 735 | bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1))) 736 | return bboxA 737 | 738 | def imresample(img, sz): 739 | im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #@UndefinedVariable 740 | return im_data 741 | 742 | # This method is kept for debugging purpose 743 | # h=img.shape[0] 744 | # w=img.shape[1] 745 | # hs, ws = sz 746 | # dx = float(w) / ws 747 | # dy = float(h) / hs 748 | # im_data = np.zeros((hs,ws,3)) 749 | # for a1 in range(0,hs): 750 | # for a2 in range(0,ws): 751 | # for a3 in range(0,3): 752 | # im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3] 753 | # return im_data 754 | 755 | -------------------------------------------------------------------------------- /facenet.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | 6 | import os 7 | from subprocess import Popen, PIPE 8 | import tensorflow as tf 9 | from tensorflow.python.framework import ops 10 | import numpy as np 11 | from scipy import misc 12 | from sklearn.model_selection import KFold 13 | from scipy import interpolate 14 | from tensorflow.python.training import training 15 | import random 16 | import re 17 | from tensorflow.python.platform import gfile 18 | 19 | def triplet_loss(anchor, positive, negative, alpha): 20 | """Calculate the triplet loss according to the FaceNet paper 21 | 22 | Args: 23 | anchor: the embeddings for the anchor images. 24 | positive: the embeddings for the positive images. 25 | negative: the embeddings for the negative images. 26 | 27 | Returns: 28 | the triplet loss according to the FaceNet paper as a float tensor. 29 | """ 30 | with tf.variable_scope('triplet_loss'): 31 | pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1) 32 | neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1) 33 | 34 | basic_loss = tf.add(tf.subtract(pos_dist,neg_dist), alpha) 35 | loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0) 36 | 37 | return loss 38 | 39 | def decov_loss(xs): 40 | """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf 41 | 'Reducing Overfitting In Deep Networks by Decorrelating Representation' 42 | """ 43 | x = tf.reshape(xs, [int(xs.get_shape()[0]), -1]) 44 | m = tf.reduce_mean(x, 0, True) 45 | z = tf.expand_dims(x-m, 2) 46 | corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0,2,1])), 0) 47 | corr_frob_sqr = tf.reduce_sum(tf.square(corr)) 48 | corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr))) 49 | loss = 0.5*(corr_frob_sqr - corr_diag_sqr) 50 | return loss 51 | 52 | def center_loss(features, label, alfa, nrof_classes): 53 | """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" 54 | (http://ydwen.github.io/papers/WenECCV16.pdf) 55 | """ 56 | nrof_features = features.get_shape()[1] 57 | centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, 58 | initializer=tf.constant_initializer(0), trainable=False) 59 | label = tf.reshape(label, [-1]) 60 | centers_batch = tf.gather(centers, label) 61 | diff = (1 - alfa) * (centers_batch - features) 62 | centers = tf.scatter_sub(centers, label, diff) 63 | loss = tf.reduce_mean(tf.square(features - centers_batch)) 64 | return loss, centers 65 | 66 | def get_image_paths_and_labels(dataset): 67 | image_paths_flat = [] 68 | labels_flat = [] 69 | for i in range(len(dataset)): 70 | image_paths_flat += dataset[i].image_paths 71 | labels_flat += [i] * len(dataset[i].image_paths) 72 | return image_paths_flat, labels_flat 73 | 74 | def shuffle_examples(image_paths, labels): 75 | shuffle_list = list(zip(image_paths, labels)) 76 | random.shuffle(shuffle_list) 77 | image_paths_shuff, labels_shuff = zip(*shuffle_list) 78 | return image_paths_shuff, labels_shuff 79 | 80 | def read_images_from_disk(input_queue): 81 | """Consumes a single filename and label as a ' '-delimited string. 82 | Args: 83 | filename_and_label_tensor: A scalar string tensor. 84 | Returns: 85 | Two tensors: the decoded image, and the string label. 86 | """ 87 | label = input_queue[1] 88 | file_contents = tf.read_file(input_queue[0]) 89 | example = tf.image.decode_png(file_contents, channels=3) 90 | return example, label 91 | 92 | def random_rotate_image(image): 93 | angle = np.random.uniform(low=-10.0, high=10.0) 94 | return misc.imrotate(image, angle, 'bicubic') 95 | 96 | def read_and_augment_data(image_list, label_list, image_size, batch_size, max_nrof_epochs, 97 | random_crop, random_flip, random_rotate, nrof_preprocess_threads, shuffle=True): 98 | 99 | images = ops.convert_to_tensor(image_list, dtype=tf.string) 100 | labels = ops.convert_to_tensor(label_list, dtype=tf.int32) 101 | 102 | # Makes an input queue 103 | input_queue = tf.train.slice_input_producer([images, labels], 104 | num_epochs=max_nrof_epochs, shuffle=shuffle) 105 | 106 | images_and_labels = [] 107 | for _ in range(nrof_preprocess_threads): 108 | image, label = read_images_from_disk(input_queue) 109 | if random_rotate: 110 | image = tf.py_func(random_rotate_image, [image], tf.uint8) 111 | if random_crop: 112 | image = tf.random_crop(image, [image_size, image_size, 3]) 113 | else: 114 | image = tf.image.resize_image_with_crop_or_pad(image, image_size, image_size) 115 | if random_flip: 116 | image = tf.image.random_flip_left_right(image) 117 | #pylint: disable=no-member 118 | image.set_shape((image_size, image_size, 3)) 119 | image = tf.image.per_image_standardization(image) 120 | images_and_labels.append([image, label]) 121 | 122 | image_batch, label_batch = tf.train.batch_join( 123 | images_and_labels, batch_size=batch_size, 124 | capacity=4 * nrof_preprocess_threads * batch_size, 125 | allow_smaller_final_batch=True) 126 | 127 | return image_batch, label_batch 128 | 129 | def _add_loss_summaries(total_loss): 130 | """Add summaries for losses. 131 | 132 | Generates moving average for all losses and associated summaries for 133 | visualizing the performance of the network. 134 | 135 | Args: 136 | total_loss: Total loss from loss(). 137 | Returns: 138 | loss_averages_op: op for generating moving averages of losses. 139 | """ 140 | # Compute the moving average of all individual losses and the total loss. 141 | loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') 142 | losses = tf.get_collection('losses') 143 | loss_averages_op = loss_averages.apply(losses + [total_loss]) 144 | 145 | # Attach a scalar summmary to all individual losses and the total loss; do the 146 | # same for the averaged version of the losses. 147 | for l in losses + [total_loss]: 148 | # Name each loss as '(raw)' and name the moving average version of the loss 149 | # as the original loss name. 150 | tf.summary.scalar(l.op.name +' (raw)', l) 151 | tf.summary.scalar(l.op.name, loss_averages.average(l)) 152 | 153 | return loss_averages_op 154 | 155 | def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay, update_gradient_vars, log_histograms=True): 156 | # Generate moving averages of all losses and associated summaries. 157 | loss_averages_op = _add_loss_summaries(total_loss) 158 | 159 | # Compute gradients. 160 | with tf.control_dependencies([loss_averages_op]): 161 | if optimizer=='ADAGRAD': 162 | opt = tf.train.AdagradOptimizer(learning_rate) 163 | elif optimizer=='ADADELTA': 164 | opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6) 165 | elif optimizer=='ADAM': 166 | opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1) 167 | elif optimizer=='RMSPROP': 168 | opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9, epsilon=1.0) 169 | elif optimizer=='MOM': 170 | opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True) 171 | else: 172 | raise ValueError('Invalid optimization algorithm') 173 | 174 | grads = opt.compute_gradients(total_loss, update_gradient_vars) 175 | 176 | # Apply gradients. 177 | apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) 178 | 179 | # Add histograms for trainable variables. 180 | if log_histograms: 181 | for var in tf.trainable_variables(): 182 | tf.summary.histogram(var.op.name, var) 183 | 184 | # Add histograms for gradients. 185 | if log_histograms: 186 | for grad, var in grads: 187 | if grad is not None: 188 | tf.summary.histogram(var.op.name + '/gradients', grad) 189 | 190 | # Track the moving averages of all trainable variables. 191 | variable_averages = tf.train.ExponentialMovingAverage( 192 | moving_average_decay, global_step) 193 | variables_averages_op = variable_averages.apply(tf.trainable_variables()) 194 | 195 | with tf.control_dependencies([apply_gradient_op, variables_averages_op]): 196 | train_op = tf.no_op(name='train') 197 | 198 | return train_op 199 | 200 | def prewhiten(x): 201 | mean = np.mean(x) 202 | std = np.std(x) 203 | std_adj = np.maximum(std, 1.0/np.sqrt(x.size)) 204 | y = np.multiply(np.subtract(x, mean), 1/std_adj) 205 | return y 206 | 207 | def crop(image, random_crop, image_size): 208 | if image.shape[1]>image_size: 209 | sz1 = int(image.shape[1]//2) 210 | sz2 = int(image_size//2) 211 | if random_crop: 212 | diff = sz1-sz2 213 | (h, v) = (np.random.randint(-diff, diff+1), np.random.randint(-diff, diff+1)) 214 | else: 215 | (h, v) = (0,0) 216 | image = image[(sz1-sz2+v):(sz1+sz2+v),(sz1-sz2+h):(sz1+sz2+h),:] 217 | return image 218 | 219 | def flip(image, random_flip): 220 | if random_flip and np.random.choice([True, False]): 221 | image = np.fliplr(image) 222 | return image 223 | 224 | def to_rgb(img): 225 | w, h = img.shape 226 | ret = np.empty((w, h, 3), dtype=np.uint8) 227 | ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img 228 | return ret 229 | 230 | def load_data(image_paths, do_random_crop, do_random_flip, image_size, do_prewhiten=True): 231 | nrof_samples = len(image_paths) 232 | images = np.zeros((nrof_samples, image_size, image_size, 3)) 233 | for i in range(nrof_samples): 234 | img = misc.imread(image_paths[i]) 235 | if img.ndim == 2: 236 | img = to_rgb(img) 237 | if do_prewhiten: 238 | img = prewhiten(img) 239 | img = crop(img, do_random_crop, image_size) 240 | img = flip(img, do_random_flip) 241 | images[i,:,:,:] = img 242 | return images 243 | 244 | def get_label_batch(label_data, batch_size, batch_index): 245 | nrof_examples = np.size(label_data, 0) 246 | j = batch_index*batch_size % nrof_examples 247 | if j+batch_size<=nrof_examples: 248 | batch = label_data[j:j+batch_size] 249 | else: 250 | x1 = label_data[j:nrof_examples] 251 | x2 = label_data[0:nrof_examples-j] 252 | batch = np.vstack([x1,x2]) 253 | batch_int = batch.astype(np.int64) 254 | return batch_int 255 | 256 | def get_batch(image_data, batch_size, batch_index): 257 | nrof_examples = np.size(image_data, 0) 258 | j = batch_index*batch_size % nrof_examples 259 | if j+batch_size<=nrof_examples: 260 | batch = image_data[j:j+batch_size,:,:,:] 261 | else: 262 | x1 = image_data[j:nrof_examples,:,:,:] 263 | x2 = image_data[0:nrof_examples-j,:,:,:] 264 | batch = np.vstack([x1,x2]) 265 | batch_float = batch.astype(np.float32) 266 | return batch_float 267 | 268 | def get_triplet_batch(triplets, batch_index, batch_size): 269 | ax, px, nx = triplets 270 | a = get_batch(ax, int(batch_size/3), batch_index) 271 | p = get_batch(px, int(batch_size/3), batch_index) 272 | n = get_batch(nx, int(batch_size/3), batch_index) 273 | batch = np.vstack([a, p, n]) 274 | return batch 275 | 276 | def get_learning_rate_from_file(filename, epoch): 277 | with open(filename, 'r') as f: 278 | for line in f.readlines(): 279 | line = line.split('#', 1)[0] 280 | if line: 281 | par = line.strip().split(':') 282 | e = int(par[0]) 283 | lr = float(par[1]) 284 | if e <= epoch: 285 | learning_rate = lr 286 | else: 287 | return learning_rate 288 | 289 | class ImageClass(): 290 | "Stores the paths to images for a given class" 291 | def __init__(self, name, image_paths): 292 | self.name = name 293 | self.image_paths = image_paths 294 | 295 | def __str__(self): 296 | return self.name + ', ' + str(len(self.image_paths)) + ' images' 297 | 298 | def __len__(self): 299 | return len(self.image_paths) 300 | 301 | def get_dataset(paths, has_class_directories=True): 302 | dataset = [] 303 | for path in paths.split(':'): 304 | path_exp = os.path.expanduser(path) 305 | classes = os.listdir(path_exp) 306 | classes.sort() 307 | nrof_classes = len(classes) 308 | for i in range(nrof_classes): 309 | class_name = classes[i] 310 | facedir = os.path.join(path_exp, class_name) 311 | image_paths = get_image_paths(facedir) 312 | dataset.append(ImageClass(class_name, image_paths)) 313 | 314 | return dataset 315 | 316 | def get_image_paths(facedir): 317 | image_paths = [] 318 | if os.path.isdir(facedir): 319 | images = os.listdir(facedir) 320 | image_paths = [os.path.join(facedir,img) for img in images] 321 | return image_paths 322 | 323 | def split_dataset(dataset, split_ratio, mode): 324 | if mode=='SPLIT_CLASSES': 325 | nrof_classes = len(dataset) 326 | class_indices = np.arange(nrof_classes) 327 | np.random.shuffle(class_indices) 328 | split = int(round(nrof_classes*split_ratio)) 329 | train_set = [dataset[i] for i in class_indices[0:split]] 330 | test_set = [dataset[i] for i in class_indices[split:-1]] 331 | elif mode=='SPLIT_IMAGES': 332 | train_set = [] 333 | test_set = [] 334 | min_nrof_images = 2 335 | for cls in dataset: 336 | paths = cls.image_paths 337 | np.random.shuffle(paths) 338 | split = int(round(len(paths)*split_ratio)) 339 | if split1: 373 | raise ValueError('There should not be more than one meta file in the model directory (%s)' % model_dir) 374 | meta_file = meta_files[0] 375 | meta_files = [s for s in files if '.ckpt' in s] 376 | max_step = -1 377 | for f in files: 378 | step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f) 379 | if step_str is not None and len(step_str.groups())>=2: 380 | step = int(step_str.groups()[1]) 381 | if step > max_step: 382 | max_step = step 383 | ckpt_file = step_str.groups()[0] 384 | return meta_file, ckpt_file 385 | 386 | def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10): 387 | assert(embeddings1.shape[0] == embeddings2.shape[0]) 388 | assert(embeddings1.shape[1] == embeddings2.shape[1]) 389 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 390 | nrof_thresholds = len(thresholds) 391 | k_fold = KFold(n_splits=nrof_folds, shuffle=False) 392 | 393 | tprs = np.zeros((nrof_folds,nrof_thresholds)) 394 | fprs = np.zeros((nrof_folds,nrof_thresholds)) 395 | accuracy = np.zeros((nrof_folds)) 396 | 397 | diff = np.subtract(embeddings1, embeddings2) 398 | dist = np.sum(np.square(diff),1) 399 | indices = np.arange(nrof_pairs) 400 | 401 | for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): 402 | 403 | # Find the best threshold for the fold 404 | acc_train = np.zeros((nrof_thresholds)) 405 | for threshold_idx, threshold in enumerate(thresholds): 406 | _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set]) 407 | best_threshold_index = np.argmax(acc_train) 408 | for threshold_idx, threshold in enumerate(thresholds): 409 | tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set]) 410 | _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set]) 411 | 412 | tpr = np.mean(tprs,0) 413 | fpr = np.mean(fprs,0) 414 | return tpr, fpr, accuracy 415 | 416 | def calculate_accuracy(threshold, dist, actual_issame): 417 | predict_issame = np.less(dist, threshold) 418 | tp = np.sum(np.logical_and(predict_issame, actual_issame)) 419 | fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 420 | tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame))) 421 | fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) 422 | 423 | tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn) 424 | fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn) 425 | acc = float(tp+tn)/dist.size 426 | return tpr, fpr, acc 427 | 428 | 429 | 430 | def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10): 431 | assert(embeddings1.shape[0] == embeddings2.shape[0]) 432 | assert(embeddings1.shape[1] == embeddings2.shape[1]) 433 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 434 | nrof_thresholds = len(thresholds) 435 | k_fold = KFold(n_splits=nrof_folds, shuffle=False) 436 | 437 | val = np.zeros(nrof_folds) 438 | far = np.zeros(nrof_folds) 439 | 440 | diff = np.subtract(embeddings1, embeddings2) 441 | dist = np.sum(np.square(diff),1) 442 | indices = np.arange(nrof_pairs) 443 | 444 | for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): 445 | 446 | # Find the threshold that gives FAR = far_target 447 | far_train = np.zeros(nrof_thresholds) 448 | for threshold_idx, threshold in enumerate(thresholds): 449 | _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set]) 450 | if np.max(far_train)>=far_target: 451 | f = interpolate.interp1d(far_train, thresholds, kind='slinear') 452 | threshold = f(far_target) 453 | else: 454 | threshold = 0.0 455 | 456 | val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set]) 457 | 458 | val_mean = np.mean(val) 459 | far_mean = np.mean(far) 460 | val_std = np.std(val) 461 | return val_mean, val_std, far_mean 462 | 463 | 464 | def calculate_val_far(threshold, dist, actual_issame): 465 | predict_issame = np.less(dist, threshold) 466 | true_accept = np.sum(np.logical_and(predict_issame, actual_issame)) 467 | false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 468 | n_same = np.sum(actual_issame) 469 | n_diff = np.sum(np.logical_not(actual_issame)) 470 | val = float(true_accept) / float(n_same) 471 | far = float(false_accept) / float(n_diff) 472 | return val, far 473 | 474 | def store_revision_info(src_path, output_dir, arg_string): 475 | 476 | # Get git hash 477 | gitproc = Popen(['git', 'rev-parse', 'HEAD'], stdout = PIPE, cwd=src_path) 478 | (stdout, _) = gitproc.communicate() 479 | git_hash = stdout.strip() 480 | 481 | # Get local changes 482 | gitproc = Popen(['git', 'diff', 'HEAD'], stdout = PIPE, cwd=src_path) 483 | (stdout, _) = gitproc.communicate() 484 | git_diff = stdout.strip() 485 | 486 | # Store a text file in the log directory 487 | rev_info_filename = os.path.join(output_dir, 'revision_info.txt') 488 | with open(rev_info_filename, "w") as text_file: 489 | text_file.write('arguments: %s\n--------------------\n' % arg_string) 490 | text_file.write('git hash: %s\n--------------------\n' % git_hash) 491 | text_file.write('%s' % git_diff) 492 | 493 | def list_variables(filename): 494 | reader = training.NewCheckpointReader(filename) 495 | variable_map = reader.get_variable_to_shape_map() 496 | names = sorted(variable_map.keys()) 497 | return names 498 | 499 | def put_images_on_grid(images, shape=(16,8)): 500 | nrof_images = images.shape[0] 501 | img_size = images.shape[1] 502 | bw = 3 503 | img = np.zeros((shape[1]*(img_size+bw)+bw, shape[0]*(img_size+bw)+bw, 3), np.float32) 504 | for i in range(shape[1]): 505 | x_start = i*(img_size+bw)+bw 506 | for j in range(shape[0]): 507 | img_index = i*shape[0]+j 508 | if img_index>=nrof_images: 509 | break 510 | y_start = j*(img_size+bw)+bw 511 | img[x_start:x_start+img_size, y_start:y_start+img_size, :] = images[img_index, :, :, :] 512 | if img_index>=nrof_images: 513 | break 514 | return img 515 | 516 | def write_arguments_to_file(args, filename): 517 | with open(filename, 'w') as f: 518 | for key, value in vars(args).iteritems(): 519 | f.write('%s: %s\n' % (key, str(value))) 520 | -------------------------------------------------------------------------------- /getphoto.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | video_capture = cv2.VideoCapture(0) 4 | c = 0 5 | while (True): 6 | ret, frame = video_capture.read() 7 | classfier = cv2.CascadeClassifier("./haarcascade_frontalface_alt2.xml") 8 | 9 | faceRects = classfier.detectMultiScale(frame, scaleFactor=1.2, minNeighbors=3, minSize=(32, 32)) 10 | 11 | if len(faceRects) == 1: 12 | c += 1 13 | if c % 10 ==0: 14 | cv2.imwrite('input/xuguanyu/' + str(int(c/10)) + '.jpg', frame) 15 | 16 | 17 | cv2.imshow('frame', frame) 18 | 19 | 20 | if cv2.waitKey(1) & 0xFF == ord('q'): 21 | break 22 | 23 | video_capture.release() 24 | cv2.destroyAllWindows() 25 | -------------------------------------------------------------------------------- /image/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/1.png -------------------------------------------------------------------------------- /image/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/10.png -------------------------------------------------------------------------------- /image/11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/11.png -------------------------------------------------------------------------------- /image/12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/12.png -------------------------------------------------------------------------------- /image/13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/13.png -------------------------------------------------------------------------------- /image/14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/14.png -------------------------------------------------------------------------------- /image/15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/15.png -------------------------------------------------------------------------------- /image/16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/16.png -------------------------------------------------------------------------------- /image/17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/17.png -------------------------------------------------------------------------------- /image/18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/18.png -------------------------------------------------------------------------------- /image/19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/19.png -------------------------------------------------------------------------------- /image/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/2.png -------------------------------------------------------------------------------- /image/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/3.png -------------------------------------------------------------------------------- /image/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/4.png -------------------------------------------------------------------------------- /image/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/5.png -------------------------------------------------------------------------------- /image/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/6.png -------------------------------------------------------------------------------- /image/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/7.png -------------------------------------------------------------------------------- /image/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/8.png -------------------------------------------------------------------------------- /image/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/9.png -------------------------------------------------------------------------------- /image/note: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /input/readme.md: -------------------------------------------------------------------------------- 1 | 2 | This is the path to put your classifier image before alignment.The structure may like this: 3 | 4 | * input 5 | * class1(for face recognition it's a person's name) 6 | * image1 (for face recognition it's a person's photo) 7 | * image2 8 | * ... 9 | * class2 10 | * image1 11 | * image2 12 | * ... 13 | * class... 14 | -------------------------------------------------------------------------------- /models/det1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/models/det1.npy -------------------------------------------------------------------------------- /models/det2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/models/det2.npy -------------------------------------------------------------------------------- /models/det3.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/models/det3.npy -------------------------------------------------------------------------------- /models/readme.md: -------------------------------------------------------------------------------- 1 | 2 | This is path to put your models checkpoints and pb whatever. 3 | -------------------------------------------------------------------------------- /myclassifier/readme.md: -------------------------------------------------------------------------------- 1 | 2 | This is the path to put your own classifier like **.pkl 3 | -------------------------------------------------------------------------------- /output/readme.md: -------------------------------------------------------------------------------- 1 | 2 | This is the path to put your classifier image after alignment.The structure may like this: 3 | 4 | * output 5 | * class1(for face recognition it's a person's name) 6 | * image1 (for face recognition it's a person's photo) 7 | * image2 8 | * ... 9 | * class2 10 | * image1 11 | * image2 12 | * ... 13 | * class... 14 | -------------------------------------------------------------------------------- /real_time.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | from scipy import misc 7 | import cv2 8 | import numpy as np 9 | import facenet 10 | #import detect_face 11 | import os 12 | import time 13 | import pickle 14 | 15 | print('Creating networks and loading parameters') 16 | with tf.Graph().as_default(): 17 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) 18 | sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) 19 | with sess.as_default(): 20 | minsize = 20 # minimum size of face 21 | threshold = [0.6, 0.7, 0.7] # three steps's threshold 22 | factor = 0.709 # scale factor 23 | margin = 44 24 | frame_interval = 3 25 | batch_size = 1000 26 | image_size = 182 27 | input_image_size = 160 28 | 29 | HumanNames = ['liuzheng','shixing','xuguanyu','Human_h'] #train human name 30 | 31 | print('Loading feature extraction model') 32 | modeldir = './models/' 33 | facenet.load_model(modeldir) 34 | 35 | images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") 36 | embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") 37 | phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") 38 | embedding_size = embeddings.get_shape()[1] 39 | 40 | classifier_filename = './myclassifier/my_classifier.pkl' 41 | classifier_filename_exp = os.path.expanduser(classifier_filename) 42 | with open(classifier_filename_exp, 'rb') as infile: 43 | (model, class_names) = pickle.load(infile) 44 | print('load classifier file-> %s' % classifier_filename_exp) 45 | 46 | video_capture = cv2.VideoCapture(0) 47 | c = 0 48 | 49 | print('Start Recognition!') 50 | prevTime = 0 51 | while True: 52 | ret, frame = video_capture.read() 53 | classfier = cv2.CascadeClassifier("./haarcascade_frontalface_alt2.xml") 54 | 55 | curTime = time.time() # calc fps 56 | timeF = frame_interval 57 | 58 | if (c % timeF == 0): 59 | find_results = [] 60 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 61 | 62 | faceRects = classfier.detectMultiScale(frame, scaleFactor=1.2, minNeighbors=3, minSize=(32, 32)) 63 | 64 | if len(faceRects) > 0: 65 | img_size = np.asarray(frame.shape)[0:2] 66 | 67 | cropped = [] 68 | scaled = [] 69 | scaled_reshape = [] 70 | 71 | emb_array = np.zeros((1, embedding_size)) 72 | for faceRect in faceRects: 73 | x, y, w, h = faceRect 74 | cv2.rectangle(frame, (x - 10, y - 10), (x + w + 10, y + h + 10), (0, 255, 0), 2) 75 | 76 | cropped.append(frame[y - 10:y + h + 10,x - 10:x + w + 10, :]) 77 | cropped[0] = facenet.flip(cropped[0], False) 78 | scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear')) 79 | scaled[0] = cv2.resize(scaled[0], (input_image_size, input_image_size), 80 | interpolation=cv2.INTER_CUBIC) 81 | scaled[0] = facenet.prewhiten(scaled[0]) 82 | scaled_reshape.append(scaled[0].reshape(-1, input_image_size, input_image_size, 3)) 83 | feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False} 84 | emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) 85 | 86 | predictions = model.predict_proba(emb_array) 87 | best_class_indices = np.argmax(predictions, axis=1) 88 | best_class_probabilities = predictions[ 89 | np.arange(len(best_class_indices)), best_class_indices] 90 | for H_i in HumanNames: 91 | if HumanNames[best_class_indices[0]] == H_i: 92 | result_names = HumanNames[best_class_indices[0]] 93 | cv2.putText(frame, result_names, (x + 30, y - 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 94 | 1, (0, 0, 255), thickness=1, lineType=2) 95 | else: 96 | print('Unable to align') 97 | 98 | sec = curTime - prevTime 99 | prevTime = curTime 100 | fps = 1 / (sec) 101 | str = 'FPS: %2.3f' % fps 102 | text_fps_x = len(frame[0]) - 150 103 | text_fps_y = 20 104 | cv2.putText(frame, str, (text_fps_x, text_fps_y), 105 | cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2) 106 | cv2.imshow('Video', frame) 107 | 108 | if cv2.waitKey(1) & 0xFF == ord('q'): 109 | break 110 | 111 | video_capture.release() 112 | cv2.destroyAllWindows() 113 | -------------------------------------------------------------------------------- /realtime_facenet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | from scipy import misc 7 | import cv2 8 | import numpy as np 9 | import facenet 10 | import detect_face 11 | import os 12 | import time 13 | import pickle 14 | 15 | print('Creating networks and loading parameters') 16 | with tf.Graph().as_default(): 17 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) 18 | sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) 19 | with sess.as_default(): 20 | pnet, rnet, onet = detect_face.create_mtcnn(sess, './models/') 21 | 22 | minsize = 20 # minimum size of face 23 | threshold = [0.6, 0.7, 0.7] # three steps's threshold 24 | factor = 0.709 # scale factor 25 | margin = 44 26 | frame_interval = 3 27 | batch_size = 1000 28 | image_size = 182 29 | input_image_size = 160 30 | 31 | HumanNames = ['liuzheng','shixing','xuguanyu','Human_h'] #train human name 32 | 33 | print('Loading feature extraction model') 34 | modeldir = './models/' 35 | facenet.load_model(modeldir) 36 | 37 | images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") 38 | embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") 39 | phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") 40 | embedding_size = embeddings.get_shape()[1] 41 | 42 | classifier_filename = './myclassifier/my_classifier.pkl' 43 | classifier_filename_exp = os.path.expanduser(classifier_filename) 44 | with open(classifier_filename_exp, 'rb') as infile: 45 | (model, class_names) = pickle.load(infile) 46 | print('load classifier file-> %s' % classifier_filename_exp) 47 | 48 | video_capture = cv2.VideoCapture(0) 49 | c = 0 50 | 51 | print('Start Recognition!') 52 | prevTime = 0 53 | while True: 54 | ret, frame = video_capture.read() 55 | 56 | # frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) 57 | 58 | curTime = time.time() # calc fps 59 | timeF = frame_interval 60 | 61 | if (c % timeF == 0): 62 | find_results = [] 63 | 64 | if frame.ndim == 2: 65 | frame = facenet.to_rgb(frame) 66 | frame = frame[:, :, 0:3] 67 | bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) 68 | nrof_faces = bounding_boxes.shape[0] 69 | print('Detected_FaceNum: %d' % nrof_faces) 70 | 71 | if nrof_faces > 0: 72 | det = bounding_boxes[:, 0:4] 73 | img_size = np.asarray(frame.shape)[0:2] 74 | 75 | cropped = [] 76 | scaled = [] 77 | scaled_reshape = [] 78 | bb = np.zeros((nrof_faces,4), dtype=np.int32) 79 | 80 | for i in range(nrof_faces): 81 | emb_array = np.zeros((1, embedding_size)) 82 | 83 | bb[i][0] = det[i][0] 84 | bb[i][1] = det[i][1] 85 | bb[i][2] = det[i][2] 86 | bb[i][3] = det[i][3] 87 | 88 | if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): 89 | print('face is inner of range!') 90 | continue 91 | 92 | cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) 93 | cropped[0] = facenet.flip(cropped[0], False) 94 | scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear')) 95 | scaled[0] = cv2.resize(scaled[0], (input_image_size,input_image_size), 96 | interpolation=cv2.INTER_CUBIC) 97 | scaled[0] = facenet.prewhiten(scaled[0]) 98 | scaled_reshape.append(scaled[0].reshape(-1,input_image_size,input_image_size,3)) 99 | feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False} 100 | emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) 101 | 102 | predictions = model.predict_proba(emb_array) 103 | best_class_indices = np.argmax(predictions, axis=1) 104 | best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] 105 | cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) 106 | text_x = bb[i][0] 107 | text_y = bb[i][3] + 20 108 | 109 | for H_i in HumanNames: 110 | if HumanNames[best_class_indices[0]] == H_i: 111 | result_names = HumanNames[best_class_indices[0]] 112 | cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 113 | 1, (0, 0, 255), thickness=1, lineType=2) 114 | else: 115 | print('Unable to align') 116 | 117 | sec = curTime - prevTime 118 | prevTime = curTime 119 | fps = 1 / (sec) 120 | str = 'FPS: %2.3f' % fps 121 | text_fps_x = len(frame[0]) - 150 122 | text_fps_y = 20 123 | cv2.putText(frame, str, (text_fps_x, text_fps_y), 124 | cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2) 125 | # c+=1 126 | cv2.imshow('Video', frame) 127 | 128 | if cv2.waitKey(1) & 0xFF == ord('q'): 129 | break 130 | 131 | video_capture.release() 132 | # #video writer 133 | # out.release() 134 | cv2.destroyAllWindows() 135 | --------------------------------------------------------------------------------