├── Make_aligndata.py
├── Make_classifier.py
├── README.md
├── detect_face.py
├── facenet.py
├── getphoto.py
├── haarcascade_frontalface_alt2.xml
├── image
    ├── 1.png
    ├── 10.png
    ├── 11.png
    ├── 12.png
    ├── 13.png
    ├── 14.png
    ├── 15.png
    ├── 16.png
    ├── 17.png
    ├── 18.png
    ├── 19.png
    ├── 2.png
    ├── 3.png
    ├── 4.png
    ├── 5.png
    ├── 6.png
    ├── 7.png
    ├── 8.png
    ├── 9.png
    └── note
├── input
    └── readme.md
├── models
    ├── det1.npy
    ├── det2.npy
    ├── det3.npy
    └── readme.md
├── myclassifier
    └── readme.md
├── output
    └── readme.md
├── real_time.py
└── realtime_facenet.py


/Make_aligndata.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | from scipy import misc
  6 | import os
  7 | import tensorflow as tf
  8 | import numpy as np
  9 | import facenet
 10 | import detect_face
 11 | 
 12 | 
 13 | output_dir_path = './output/'
 14 | output_dir = os.path.expanduser(output_dir_path)
 15 | if not os.path.exists(output_dir):
 16 |         os.makedirs(output_dir)
 17 | 
 18 | datadir = './input/'
 19 | dataset = facenet.get_dataset(datadir)
 20 | 
 21 | print('Creating networks and loading parameters')
 22 | with tf.Graph().as_default():
 23 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
 24 |     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
 25 |     with sess.as_default():
 26 |         pnet, rnet, onet = detect_face.create_mtcnn(sess, './models/')
 27 | 
 28 | minsize = 20  # minimum size of face
 29 | threshold = [0.6, 0.7, 0.7]  # three steps's threshold
 30 | factor = 0.709  # scale factor
 31 | margin = 44
 32 | image_size = 182
 33 | 
 34 | # Add a random key to the filename to allow alignment using multiple processes
 35 | random_key = np.random.randint(0, high=99999)
 36 | bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key)
 37 | print('Goodluck')
 38 | 
 39 | with open(bounding_boxes_filename, "w") as text_file:
 40 |     nrof_images_total = 0
 41 |     nrof_successfully_aligned = 0
 42 |     for cls in dataset:
 43 |         output_class_dir = os.path.join(output_dir, cls.name)
 44 |         if not os.path.exists(output_class_dir):
 45 |             os.makedirs(output_class_dir)
 46 |         for image_path in cls.image_paths:
 47 |             nrof_images_total += 1
 48 |             filename = os.path.splitext(os.path.split(image_path)[1])[0]
 49 |             output_filename = os.path.join(output_class_dir, filename + '.png')
 50 |             print(image_path)
 51 |             if not os.path.exists(output_filename):
 52 |                 try:
 53 |                     img = misc.imread(image_path)
 54 |                     print('read data dimension: ', img.ndim)
 55 |                 except (IOError, ValueError, IndexError) as e:
 56 |                     errorMessage = '{}: {}'.format(image_path, e)
 57 |                     print(errorMessage)
 58 |                 else:
 59 |                     if img.ndim < 2:
 60 |                         print('Unable to align "%s"' % image_path)
 61 |                         text_file.write('%s\n' % (output_filename))
 62 |                         continue
 63 |                     if img.ndim == 2:
 64 |                         img = facenet.to_rgb(img)
 65 |                         print('to_rgb data dimension: ', img.ndim)
 66 |                     img = img[:, :, 0:3]
 67 |                     print('after data dimension: ', img.ndim)
 68 | 
 69 |                     bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
 70 |                     nrof_faces = bounding_boxes.shape[0]
 71 |                     print('detected_face: %d' % nrof_faces)
 72 |                     if nrof_faces > 0:
 73 |                         det = bounding_boxes[:, 0:4]
 74 |                         img_size = np.asarray(img.shape)[0:2]
 75 |                         if nrof_faces > 1:
 76 |                             bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
 77 |                             img_center = img_size / 2
 78 |                             offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1],
 79 |                                                  (det[:, 1] + det[:, 3]) / 2 - img_center[0]])
 80 |                             offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
 81 |                             index = np.argmax(bounding_box_size - offset_dist_squared * 2.0)  # some extra weight on the centering
 82 |                             det = det[index, :]
 83 |                         det = np.squeeze(det)
 84 |                         bb_temp = np.zeros(4, dtype=np.int32)
 85 | 
 86 |                         bb_temp[0] = det[0]
 87 |                         bb_temp[1] = det[1]
 88 |                         bb_temp[2] = det[2]
 89 |                         bb_temp[3] = det[3]
 90 | 
 91 |                         cropped_temp = img[bb_temp[1]:bb_temp[3], bb_temp[0]:bb_temp[2], :]
 92 |                         scaled_temp = misc.imresize(cropped_temp, (image_size, image_size), interp='bilinear')
 93 | 
 94 |                         nrof_successfully_aligned += 1
 95 |                         misc.imsave(output_filename, scaled_temp)
 96 |                         text_file.write('%s %d %d %d %d\n' % (output_filename, bb_temp[0], bb_temp[1], bb_temp[2], bb_temp[3]))
 97 |                     else:
 98 |                         print('Unable to align "%s"' % image_path)
 99 |                         text_file.write('%s\n' % (output_filename))
100 | 
101 | print('Total number of images: %d' % nrof_images_total)
102 | print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
103 | 
104 | 
105 | 
106 | 


--------------------------------------------------------------------------------
/Make_classifier.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import tensorflow as tf
 6 | import numpy as np
 7 | import facenet
 8 | import os
 9 | import math
10 | import pickle
11 | from sklearn.svm import SVC
12 | 
13 | 
14 | with tf.Graph().as_default():
15 | 
16 |     with tf.Session() as sess:
17 | 
18 |         datadir = './output/'
19 |         dataset = facenet.get_dataset(datadir)
20 |         paths, labels = facenet.get_image_paths_and_labels(dataset)
21 |         print('Number of classes: %d' % len(dataset))
22 |         print('Number of images: %d' % len(paths))
23 | 
24 |         print('Loading feature extraction model')
25 |         modeldir = './models/'
26 |         facenet.load_model(modeldir)
27 | 
28 |         images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
29 |         embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
30 |         phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
31 |         embedding_size = embeddings.get_shape()[1]
32 | 
33 |         # Run forward pass to calculate embeddings
34 |         print('Calculating features for images')
35 |         batch_size = 1000
36 |         image_size = 160
37 |         nrof_images = len(paths)
38 |         nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images / batch_size))
39 |         emb_array = np.zeros((nrof_images, embedding_size))
40 |         for i in range(nrof_batches_per_epoch):
41 |             start_index = i * batch_size
42 |             end_index = min((i + 1) * batch_size, nrof_images)
43 |             paths_batch = paths[start_index:end_index]
44 |             images = facenet.load_data(paths_batch, False, False, image_size)
45 |             feed_dict = {images_placeholder: images, phase_train_placeholder: False}
46 |             emb_array[start_index:end_index, :] = sess.run(embeddings, feed_dict=feed_dict)
47 | 
48 |         classifier_filename = './myclassifier/my_classifier.pkl'
49 |         classifier_filename_exp = os.path.expanduser(classifier_filename)
50 | 
51 |         # Train classifier
52 |         print('Training classifier')
53 |         model = SVC(kernel='linear', probability=True)
54 |         model.fit(emb_array, labels)
55 | 
56 |         # Create a list of class names
57 |         class_names = [cls.name.replace('_', ' ') for cls in dataset]
58 | 
59 |         # Saving classifier model
60 |         with open(classifier_filename_exp, 'wb') as outfile:
61 |             pickle.dump((model, class_names), outfile)
62 |         print('Saved classifier model to file "%s"' % classifier_filename_exp)
63 |         print('Goodluck')


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # face_recognition
 2 | real time face recognition with MTCNN and FaceNet
 3 | 
 4 | ## Before run code
 5 | 
 6 | you need to do things below:
 7 | 
 8 | *  I have already uploaded det1.npy det2.npy det3.npy which for MTCNN,but you still need to download facenet's pb file from [davidsandberg's
 9 | github](https://github.com/davidsandberg/facenet) like 20170511-185253,extract to pb file and put in models directory.
10 | * tensorflow-gpu 1.1.0 , later version may also work.
11 | * python 3.X
12 | 
13 | 
14 | ## Inspiration
15 | 
16 | * OpenFace
17 | * [davidsandberg's github](https://github.com/davidsandberg/facenet)
18 | * main code is refered to bearsprogrammer
19 | 
20 | ## Something note
21 | 
22 | `Remember to change some codes where you need to put your own name and your friends' name instead of mine.`
23 | 
24 | ## Run code
25 | 
26 | Do as follows step by step:
27 | 
28 | * To make you easy to get your photo and put in right structure as I said in intput and output directorys' readme.md file,I 
29 | already privide getphoto.py which can take photos by openCV and autoly put it in input directory as format.
30 | * Next,you need to run Make_aligndata.py to align your photos which only croped your face part and autoly put in output directory as format.This photos will be used to train our own classifier.
31 | * Run Make_classifier.py to train our own classifier with SVM.Of course you can use your own classifier if you want.Then you may 
32 | see myclassifier.pkl file in myclassifier directory.
33 | * Finally,run realtime_facenet.py or real_time.py. 
34 | realtime_facenet.py is MTCNN version.real_time.py is another choice which use haar detector in openCV instead of MTCNN.
35 | 
36 | ## Result
37 | 
38 | If everything is ok ,you will see result below:
39 | 
40 | ![](https://github.com/cryer/face_recognition/raw/master/image/1.png)
41 | 
42 | ## More
43 | 
44 | I used Chinese to do some Introduction about MTCNN and FaceNet.[See my blog for details](https://cryer.github.io/2018/01/facerecognition/)
45 | 


--------------------------------------------------------------------------------
/detect_face.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from six import string_types, iteritems
  7 | 
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | import cv2
 11 | import os
 12 | 
 13 | def layer(op):
 14 |     '''Decorator for composable network layers.'''
 15 | 
 16 |     def layer_decorated(self, *args, **kwargs):
 17 |         # Automatically set a name if not provided.
 18 |         name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
 19 |         # Figure out the layer inputs.
 20 |         if len(self.terminals) == 0:
 21 |             raise RuntimeError('No input variables found for layer %s.' % name)
 22 |         elif len(self.terminals) == 1:
 23 |             layer_input = self.terminals[0]
 24 |         else:
 25 |             layer_input = list(self.terminals)
 26 |         # Perform the operation and get the output.
 27 |         layer_output = op(self, layer_input, *args, **kwargs)
 28 |         # Add to layer LUT.
 29 |         self.layers[name] = layer_output
 30 |         # This output is now the input for the next layer.
 31 |         self.feed(layer_output)
 32 |         # Return self for chained calls.
 33 |         return self
 34 | 
 35 |     return layer_decorated
 36 | 
 37 | class Network(object):
 38 | 
 39 |     def __init__(self, inputs, trainable=True):
 40 |         # The input nodes for this network
 41 |         self.inputs = inputs
 42 |         # The current list of terminal nodes
 43 |         self.terminals = []
 44 |         # Mapping from layer names to layers
 45 |         self.layers = dict(inputs)
 46 |         # If true, the resulting variables are set as trainable
 47 |         self.trainable = trainable
 48 | 
 49 |         self.setup()
 50 | 
 51 |     def setup(self):
 52 |         '''Construct the network. '''
 53 |         raise NotImplementedError('Must be implemented by the subclass.')
 54 | 
 55 |     def load(self, data_path, session, ignore_missing=False):
 56 |         '''Load network weights.
 57 |         data_path: The path to the numpy-serialized network weights
 58 |         session: The current TensorFlow session
 59 |         ignore_missing: If true, serialized weights for missing layers are ignored.
 60 |         '''
 61 |         data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member
 62 | 
 63 |         for op_name in data_dict:
 64 |             with tf.variable_scope(op_name, reuse=True):
 65 |                 for param_name, data in iteritems(data_dict[op_name]):
 66 |                     try:
 67 |                         var = tf.get_variable(param_name)
 68 |                         session.run(var.assign(data))
 69 |                     except ValueError:
 70 |                         if not ignore_missing:
 71 |                             raise
 72 | 
 73 |     def feed(self, *args):
 74 |         '''Set the input(s) for the next operation by replacing the terminal nodes.
 75 |         The arguments can be either layer names or the actual layers.
 76 |         '''
 77 |         assert len(args) != 0
 78 |         self.terminals = []
 79 |         for fed_layer in args:
 80 |             if isinstance(fed_layer, string_types):
 81 |                 try:
 82 |                     fed_layer = self.layers[fed_layer]
 83 |                 except KeyError:
 84 |                     raise KeyError('Unknown layer name fed: %s' % fed_layer)
 85 |             self.terminals.append(fed_layer)
 86 |         return self
 87 | 
 88 |     def get_output(self):
 89 |         '''Returns the current network output.'''
 90 |         return self.terminals[-1]
 91 | 
 92 |     def get_unique_name(self, prefix):
 93 |         '''Returns an index-suffixed unique name for the given prefix.
 94 |         This is used for auto-generating layer names based on the type-prefix.
 95 |         '''
 96 |         ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
 97 |         return '%s_%d' % (prefix, ident)
 98 | 
 99 |     def make_var(self, name, shape):
100 |         '''Creates a new TensorFlow variable.'''
101 |         return tf.get_variable(name, shape, trainable=self.trainable)
102 | 
103 |     def validate_padding(self, padding):
104 |         '''Verifies that the padding is one of the supported ones.'''
105 |         assert padding in ('SAME', 'VALID')
106 | 
107 |     @layer
108 |     def conv(self,
109 |              inp,
110 |              k_h,
111 |              k_w,
112 |              c_o,
113 |              s_h,
114 |              s_w,
115 |              name,
116 |              relu=True,
117 |              padding='SAME',
118 |              group=1,
119 |              biased=True):
120 |         # Verify that the padding is acceptable
121 |         self.validate_padding(padding)
122 |         # Get the number of channels in the input
123 |         c_i = int(inp.get_shape()[-1])
124 |         # Verify that the grouping parameter is valid
125 |         assert c_i % group == 0
126 |         assert c_o % group == 0
127 |         # Convolution for a given input and kernel
128 |         convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
129 |         with tf.variable_scope(name) as scope:
130 |             kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
131 |             # This is the common-case. Convolve the input without any further complications.
132 |             output = convolve(inp, kernel)
133 |             # Add the biases
134 |             if biased:
135 |                 biases = self.make_var('biases', [c_o])
136 |                 output = tf.nn.bias_add(output, biases)
137 |             if relu:
138 |                 # ReLU non-linearity
139 |                 output = tf.nn.relu(output, name=scope.name)
140 |             return output
141 | 
142 |     @layer
143 |     def prelu(self, inp, name):
144 |         with tf.variable_scope(name):
145 |             i = int(inp.get_shape()[-1])
146 |             alpha = self.make_var('alpha', shape=(i,))
147 |             output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp))
148 |         return output
149 | 
150 |     @layer
151 |     def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'):
152 |         self.validate_padding(padding)
153 |         return tf.nn.max_pool(inp,
154 |                               ksize=[1, k_h, k_w, 1],
155 |                               strides=[1, s_h, s_w, 1],
156 |                               padding=padding,
157 |                               name=name)
158 | 
159 |     @layer
160 |     def fc(self, inp, num_out, name, relu=True):
161 |         with tf.variable_scope(name):
162 |             input_shape = inp.get_shape()
163 |             if input_shape.ndims == 4:
164 |                 # The input is spatial. Vectorize it first.
165 |                 dim = 1
166 |                 for d in input_shape[1:].as_list():
167 |                     dim *= int(d)
168 |                 feed_in = tf.reshape(inp, [-1, dim])
169 |             else:
170 |                 feed_in, dim = (inp, input_shape[-1].value)
171 |             weights = self.make_var('weights', shape=[dim, num_out])
172 |             biases = self.make_var('biases', [num_out])
173 |             op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
174 |             fc = op(feed_in, weights, biases, name=name)
175 |             return fc
176 | 
177 | 
178 |     """
179 |     Multi dimensional softmax,
180 |     refer to https://github.com/tensorflow/tensorflow/issues/210
181 |     compute softmax along the dimension of target
182 |     the native softmax only supports batch_size x dimension
183 |     """
184 |     @layer
185 |     def softmax(self, target, axis, name=None):
186 |         max_axis = tf.reduce_max(target, axis, keep_dims=True)
187 |         target_exp = tf.exp(target-max_axis)
188 |         normalize = tf.reduce_sum(target_exp, axis, keep_dims=True)
189 |         softmax = tf.div(target_exp, normalize, name)
190 |         return softmax
191 |     
192 | class PNet(Network):
193 |     def setup(self):
194 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
195 |              .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1')
196 |              .prelu(name='PReLU1')
197 |              .max_pool(2, 2, 2, 2, name='pool1')
198 |              .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2')
199 |              .prelu(name='PReLU2')
200 |              .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3')
201 |              .prelu(name='PReLU3')
202 |              .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1')
203 |              .softmax(3,name='prob1'))
204 | 
205 |         (self.feed('PReLU3') #pylint: disable=no-value-for-parameter
206 |              .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))
207 |         
208 | class RNet(Network):
209 |     def setup(self):
210 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
211 |              .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1')
212 |              .prelu(name='prelu1')
213 |              .max_pool(3, 3, 2, 2, name='pool1')
214 |              .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2')
215 |              .prelu(name='prelu2')
216 |              .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
217 |              .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3')
218 |              .prelu(name='prelu3')
219 |              .fc(128, relu=False, name='conv4')
220 |              .prelu(name='prelu4')
221 |              .fc(2, relu=False, name='conv5-1')
222 |              .softmax(1,name='prob1'))
223 | 
224 |         (self.feed('prelu4') #pylint: disable=no-value-for-parameter
225 |              .fc(4, relu=False, name='conv5-2'))
226 | 
227 | class ONet(Network):
228 |     def setup(self):
229 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
230 |              .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1')
231 |              .prelu(name='prelu1')
232 |              .max_pool(3, 3, 2, 2, name='pool1')
233 |              .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2')
234 |              .prelu(name='prelu2')
235 |              .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
236 |              .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3')
237 |              .prelu(name='prelu3')
238 |              .max_pool(2, 2, 2, 2, name='pool3')
239 |              .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4')
240 |              .prelu(name='prelu4')
241 |              .fc(256, relu=False, name='conv5')
242 |              .prelu(name='prelu5')
243 |              .fc(2, relu=False, name='conv6-1')
244 |              .softmax(1, name='prob1'))
245 | 
246 |         (self.feed('prelu5') #pylint: disable=no-value-for-parameter
247 |              .fc(4, relu=False, name='conv6-2'))
248 | 
249 |         (self.feed('prelu5') #pylint: disable=no-value-for-parameter
250 |              .fc(10, relu=False, name='conv6-3'))
251 | 
252 | def create_mtcnn(sess, model_path):
253 |     if not model_path:
254 |         model_path,_ = os.path.split(os.path.realpath(__file__))
255 | 
256 |     with tf.variable_scope('pnet'):
257 |         data = tf.placeholder(tf.float32, (None,None,None,3), 'input')
258 |         pnet = PNet({'data':data})
259 |         pnet.load(os.path.join(model_path, 'det1.npy'), sess)
260 |     with tf.variable_scope('rnet'):
261 |         data = tf.placeholder(tf.float32, (None,24,24,3), 'input')
262 |         rnet = RNet({'data':data})
263 |         rnet.load(os.path.join(model_path, 'det2.npy'), sess)
264 |     with tf.variable_scope('onet'):
265 |         data = tf.placeholder(tf.float32, (None,48,48,3), 'input')
266 |         onet = ONet({'data':data})
267 |         onet.load(os.path.join(model_path, 'det3.npy'), sess)
268 |         
269 |     pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img})
270 |     rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img})
271 |     onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img})
272 |     return pnet_fun, rnet_fun, onet_fun
273 | 
274 | def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
275 |     # im: input image
276 |     # minsize: minimum of faces' size
277 |     # pnet, rnet, onet: caffemodel
278 |     # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold
279 |     # fastresize: resize img from last scale (using in high-resolution images) if fastresize==true
280 |     factor_count=0
281 |     total_boxes=np.empty((0,9))
282 |     points=[]
283 |     h=img.shape[0]
284 |     w=img.shape[1]
285 |     minl=np.amin([h, w])
286 |     m=12.0/minsize
287 |     minl=minl*m
288 |     # creat scale pyramid
289 |     scales=[]
290 |     while minl>=12:
291 |         scales += [m*np.power(factor, factor_count)]
292 |         minl = minl*factor
293 |         factor_count += 1
294 | 
295 |     # first stage
296 |     for j in range(len(scales)):
297 |         scale=scales[j]
298 |         hs=int(np.ceil(h*scale))
299 |         ws=int(np.ceil(w*scale))
300 |         im_data = imresample(img, (hs, ws))
301 |         im_data = (im_data-127.5)*0.0078125
302 |         img_x = np.expand_dims(im_data, 0)
303 |         img_y = np.transpose(img_x, (0,2,1,3))
304 |         out = pnet(img_y)
305 |         out0 = np.transpose(out[0], (0,2,1,3))
306 |         out1 = np.transpose(out[1], (0,2,1,3))
307 |         
308 |         boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])
309 |         
310 |         # inter-scale nms
311 |         pick = nms(boxes.copy(), 0.5, 'Union')
312 |         if boxes.size>0 and pick.size>0:
313 |             boxes = boxes[pick,:]
314 |             total_boxes = np.append(total_boxes, boxes, axis=0)
315 | 
316 |     numbox = total_boxes.shape[0]
317 |     if numbox>0:
318 |         pick = nms(total_boxes.copy(), 0.7, 'Union')
319 |         total_boxes = total_boxes[pick,:]
320 |         regw = total_boxes[:,2]-total_boxes[:,0]
321 |         regh = total_boxes[:,3]-total_boxes[:,1]
322 |         qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
323 |         qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
324 |         qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
325 |         qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
326 |         total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
327 |         total_boxes = rerec(total_boxes.copy())
328 |         total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
329 |         dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
330 | 
331 |     numbox = total_boxes.shape[0]
332 |     if numbox>0:
333 |         # second stage
334 |         tempimg = np.zeros((24,24,3,numbox))
335 |         for k in range(0,numbox):
336 |             tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
337 |             tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
338 |             if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
339 |                 tempimg[:,:,:,k] = imresample(tmp, (24, 24))
340 |             else:
341 |                 return np.empty()
342 |         tempimg = (tempimg-127.5)*0.0078125
343 |         tempimg1 = np.transpose(tempimg, (3,1,0,2))
344 |         out = rnet(tempimg1)
345 |         out0 = np.transpose(out[0])
346 |         out1 = np.transpose(out[1])
347 |         score = out1[1,:]
348 |         ipass = np.where(score>threshold[1])
349 |         total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
350 |         mv = out0[:,ipass[0]]
351 |         if total_boxes.shape[0]>0:
352 |             pick = nms(total_boxes, 0.7, 'Union')
353 |             total_boxes = total_boxes[pick,:]
354 |             total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
355 |             total_boxes = rerec(total_boxes.copy())
356 | 
357 |     numbox = total_boxes.shape[0]
358 |     if numbox>0:
359 |         # third stage
360 |         total_boxes = np.fix(total_boxes).astype(np.int32)
361 |         dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
362 |         tempimg = np.zeros((48,48,3,numbox))
363 |         for k in range(0,numbox):
364 |             tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
365 |             tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
366 |             if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
367 |                 tempimg[:,:,:,k] = imresample(tmp, (48, 48))
368 |             else:
369 |                 return np.empty()
370 |         tempimg = (tempimg-127.5)*0.0078125
371 |         tempimg1 = np.transpose(tempimg, (3,1,0,2))
372 |         out = onet(tempimg1)
373 |         out0 = np.transpose(out[0])
374 |         out1 = np.transpose(out[1])
375 |         out2 = np.transpose(out[2])
376 |         score = out2[1,:]
377 |         points = out1
378 |         ipass = np.where(score>threshold[2])
379 |         points = points[:,ipass[0]]
380 |         total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
381 |         mv = out0[:,ipass[0]]
382 | 
383 |         w = total_boxes[:,2]-total_boxes[:,0]+1
384 |         h = total_boxes[:,3]-total_boxes[:,1]+1
385 |         points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
386 |         points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
387 |         if total_boxes.shape[0]>0:
388 |             total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
389 |             pick = nms(total_boxes.copy(), 0.7, 'Min')
390 |             total_boxes = total_boxes[pick,:]
391 |             points = points[:,pick]
392 |                 
393 |     return total_boxes, points
394 | 
395 | 
396 | def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor):
397 |     # im: input image
398 |     # minsize: minimum of faces' size
399 |     # pnet, rnet, onet: caffemodel
400 |     # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1]
401 | 
402 |     all_scales = [None] * len(images)
403 |     images_with_boxes = [None] * len(images)
404 | 
405 |     for i in range(len(images)):
406 |         images_with_boxes[i] = {'total_boxes': np.empty((0, 9))}
407 | 
408 |     # create scale pyramid
409 |     for index, img in enumerate(images):
410 |         all_scales[index] = []
411 |         h = img.shape[0]
412 |         w = img.shape[1]
413 |         minsize = int(detection_window_size_ratio * np.minimum(w, h))
414 |         factor_count = 0
415 |         minl = np.amin([h, w])
416 |         if minsize <= 12:
417 |             minsize = 12
418 | 
419 |         m = 12.0 / minsize
420 |         minl = minl * m
421 |         while minl >= 12:
422 |             all_scales[index].append(m * np.power(factor, factor_count))
423 |             minl = minl * factor
424 |             factor_count += 1
425 | 
426 |     # # # # # # # # # # # # #
427 |     # first stage - fast proposal network (pnet) to obtain face candidates
428 |     # # # # # # # # # # # # #
429 | 
430 |     images_obj_per_resolution = {}
431 | 
432 |     # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images
433 | 
434 |     for index, scales in enumerate(all_scales):
435 |         h = images[index].shape[0]
436 |         w = images[index].shape[1]
437 | 
438 |         for scale in scales:
439 |             hs = int(np.ceil(h * scale))
440 |             ws = int(np.ceil(w * scale))
441 | 
442 |             if (ws, hs) not in images_obj_per_resolution:
443 |                 images_obj_per_resolution[(ws, hs)] = []
444 | 
445 |             im_data = imresample(images[index], (hs, ws))
446 |             im_data = (im_data - 127.5) * 0.0078125
447 |             img_y = np.transpose(im_data, (1, 0, 2))  # caffe uses different dimensions ordering
448 |             images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index})
449 | 
450 |     for resolution in images_obj_per_resolution:
451 |         images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]]
452 |         outs = pnet(images_per_resolution)
453 | 
454 |         for index in range(len(outs[0])):
455 |             scale = images_obj_per_resolution[resolution][index]['scale']
456 |             image_index = images_obj_per_resolution[resolution][index]['index']
457 |             out0 = np.transpose(outs[0][index], (1, 0, 2))
458 |             out1 = np.transpose(outs[1][index], (1, 0, 2))
459 | 
460 |             boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0])
461 | 
462 |             # inter-scale nms
463 |             pick = nms(boxes.copy(), 0.5, 'Union')
464 |             if boxes.size > 0 and pick.size > 0:
465 |                 boxes = boxes[pick, :]
466 |                 images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'],
467 |                                                                           boxes,
468 |                                                                           axis=0)
469 | 
470 |     for index, image_obj in enumerate(images_with_boxes):
471 |         numbox = image_obj['total_boxes'].shape[0]
472 |         if numbox > 0:
473 |             h = images[index].shape[0]
474 |             w = images[index].shape[1]
475 |             pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union')
476 |             image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
477 |             regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0]
478 |             regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1]
479 |             qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw
480 |             qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh
481 |             qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw
482 |             qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh
483 |             image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]]))
484 |             image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
485 |             image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32)
486 |             dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
487 | 
488 |             numbox = image_obj['total_boxes'].shape[0]
489 |             tempimg = np.zeros((24, 24, 3, numbox))
490 | 
491 |             if numbox > 0:
492 |                 for k in range(0, numbox):
493 |                     tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
494 |                     tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
495 |                     if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
496 |                         tempimg[:, :, :, k] = imresample(tmp, (24, 24))
497 |                     else:
498 |                         return np.empty()
499 | 
500 |                 tempimg = (tempimg - 127.5) * 0.0078125
501 |                 image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
502 | 
503 |     # # # # # # # # # # # # #
504 |     # second stage - refinement of face candidates with rnet
505 |     # # # # # # # # # # # # #
506 | 
507 |     bulk_rnet_input = np.empty((0, 24, 24, 3))
508 |     for index, image_obj in enumerate(images_with_boxes):
509 |         if 'rnet_input' in image_obj:
510 |             bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0)
511 | 
512 |     out = rnet(bulk_rnet_input)
513 |     out0 = np.transpose(out[0])
514 |     out1 = np.transpose(out[1])
515 |     score = out1[1, :]
516 | 
517 |     i = 0
518 |     for index, image_obj in enumerate(images_with_boxes):
519 |         if 'rnet_input' not in image_obj:
520 |             continue
521 | 
522 |         rnet_input_count = image_obj['rnet_input'].shape[0]
523 |         score_per_image = score[i:i + rnet_input_count]
524 |         out0_per_image = out0[:, i:i + rnet_input_count]
525 | 
526 |         ipass = np.where(score_per_image > threshold[1])
527 |         image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
528 |                                               np.expand_dims(score_per_image[ipass].copy(), 1)])
529 | 
530 |         mv = out0_per_image[:, ipass[0]]
531 | 
532 |         if image_obj['total_boxes'].shape[0] > 0:
533 |             h = images[index].shape[0]
534 |             w = images[index].shape[1]
535 |             pick = nms(image_obj['total_boxes'], 0.7, 'Union')
536 |             image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
537 |             image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick]))
538 |             image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
539 | 
540 |             numbox = image_obj['total_boxes'].shape[0]
541 | 
542 |             if numbox > 0:
543 |                 tempimg = np.zeros((48, 48, 3, numbox))
544 |                 image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32)
545 |                 dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
546 | 
547 |                 for k in range(0, numbox):
548 |                     tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
549 |                     tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
550 |                     if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
551 |                         tempimg[:, :, :, k] = imresample(tmp, (48, 48))
552 |                     else:
553 |                         return np.empty()
554 |                 tempimg = (tempimg - 127.5) * 0.0078125
555 |                 image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
556 | 
557 |         i += rnet_input_count
558 | 
559 |     # # # # # # # # # # # # #
560 |     # third stage - further refinement and facial landmarks positions with onet
561 |     # # # # # # # # # # # # #
562 | 
563 |     bulk_onet_input = np.empty((0, 48, 48, 3))
564 |     for index, image_obj in enumerate(images_with_boxes):
565 |         if 'onet_input' in image_obj:
566 |             bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0)
567 | 
568 |     out = onet(bulk_onet_input)
569 | 
570 |     out0 = np.transpose(out[0])
571 |     out1 = np.transpose(out[1])
572 |     out2 = np.transpose(out[2])
573 |     score = out2[1, :]
574 |     points = out1
575 | 
576 |     i = 0
577 |     ret = []
578 |     for index, image_obj in enumerate(images_with_boxes):
579 |         if 'onet_input' not in image_obj:
580 |             ret.append(None)
581 |             continue
582 | 
583 |         onet_input_count = image_obj['onet_input'].shape[0]
584 | 
585 |         out0_per_image = out0[:, i:i + onet_input_count]
586 |         score_per_image = score[i:i + onet_input_count]
587 |         points_per_image = points[:, i:i + onet_input_count]
588 | 
589 |         ipass = np.where(score_per_image > threshold[2])
590 |         points_per_image = points_per_image[:, ipass[0]]
591 | 
592 |         image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
593 |                                               np.expand_dims(score_per_image[ipass].copy(), 1)])
594 |         mv = out0_per_image[:, ipass[0]]
595 | 
596 |         w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1
597 |         h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1
598 |         points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile(
599 |             image_obj['total_boxes'][:, 0], (5, 1)) - 1
600 |         points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile(
601 |             image_obj['total_boxes'][:, 1], (5, 1)) - 1
602 | 
603 |         if image_obj['total_boxes'].shape[0] > 0:
604 |             image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv))
605 |             pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min')
606 |             image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
607 |             points_per_image = points_per_image[:, pick]
608 | 
609 |             ret.append((image_obj['total_boxes'], points_per_image))
610 |         else:
611 |             ret.append(None)
612 | 
613 |         i += onet_input_count
614 | 
615 |     return ret
616 | 
617 | 
618 | # function [boundingbox] = bbreg(boundingbox,reg)
619 | def bbreg(boundingbox,reg):
620 |     # calibrate bounding boxes
621 |     if reg.shape[1]==1:
622 |         reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
623 | 
624 |     w = boundingbox[:,2]-boundingbox[:,0]+1
625 |     h = boundingbox[:,3]-boundingbox[:,1]+1
626 |     b1 = boundingbox[:,0]+reg[:,0]*w
627 |     b2 = boundingbox[:,1]+reg[:,1]*h
628 |     b3 = boundingbox[:,2]+reg[:,2]*w
629 |     b4 = boundingbox[:,3]+reg[:,3]*h
630 |     boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ]))
631 |     return boundingbox
632 |  
633 | def generateBoundingBox(imap, reg, scale, t):
634 |     # use heatmap to generate bounding boxes
635 |     stride=2
636 |     cellsize=12
637 | 
638 |     imap = np.transpose(imap)
639 |     dx1 = np.transpose(reg[:,:,0])
640 |     dy1 = np.transpose(reg[:,:,1])
641 |     dx2 = np.transpose(reg[:,:,2])
642 |     dy2 = np.transpose(reg[:,:,3])
643 |     y, x = np.where(imap >= t)
644 |     if y.shape[0]==1:
645 |         dx1 = np.flipud(dx1)
646 |         dy1 = np.flipud(dy1)
647 |         dx2 = np.flipud(dx2)
648 |         dy2 = np.flipud(dy2)
649 |     score = imap[(y,x)]
650 |     reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))
651 |     if reg.size==0:
652 |         reg = np.empty((0,3))
653 |     bb = np.transpose(np.vstack([y,x]))
654 |     q1 = np.fix((stride*bb+1)/scale)
655 |     q2 = np.fix((stride*bb+cellsize-1+1)/scale)
656 |     boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])
657 |     return boundingbox, reg
658 |  
659 | # function pick = nms(boxes,threshold,type)
660 | def nms(boxes, threshold, method):
661 |     if boxes.size==0:
662 |         return np.empty((0,3))
663 |     x1 = boxes[:,0]
664 |     y1 = boxes[:,1]
665 |     x2 = boxes[:,2]
666 |     y2 = boxes[:,3]
667 |     s = boxes[:,4]
668 |     area = (x2-x1+1) * (y2-y1+1)
669 |     I = np.argsort(s)
670 |     pick = np.zeros_like(s, dtype=np.int16)
671 |     counter = 0
672 |     while I.size>0:
673 |         i = I[-1]
674 |         pick[counter] = i
675 |         counter += 1
676 |         idx = I[0:-1]
677 |         xx1 = np.maximum(x1[i], x1[idx])
678 |         yy1 = np.maximum(y1[i], y1[idx])
679 |         xx2 = np.minimum(x2[i], x2[idx])
680 |         yy2 = np.minimum(y2[i], y2[idx])
681 |         w = np.maximum(0.0, xx2-xx1+1)
682 |         h = np.maximum(0.0, yy2-yy1+1)
683 |         inter = w * h
684 |         if method is 'Min':
685 |             o = inter / np.minimum(area[i], area[idx])
686 |         else:
687 |             o = inter / (area[i] + area[idx] - inter)
688 |         I = I[np.where(o<=threshold)]
689 |     pick = pick[0:counter]
690 |     return pick
691 | 
692 | # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
693 | def pad(total_boxes, w, h):
694 |     # compute the padding coordinates (pad the bounding boxes to square)
695 |     tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32)
696 |     tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32)
697 |     numbox = total_boxes.shape[0]
698 | 
699 |     dx = np.ones((numbox), dtype=np.int32)
700 |     dy = np.ones((numbox), dtype=np.int32)
701 |     edx = tmpw.copy().astype(np.int32)
702 |     edy = tmph.copy().astype(np.int32)
703 | 
704 |     x = total_boxes[:,0].copy().astype(np.int32)
705 |     y = total_boxes[:,1].copy().astype(np.int32)
706 |     ex = total_boxes[:,2].copy().astype(np.int32)
707 |     ey = total_boxes[:,3].copy().astype(np.int32)
708 | 
709 |     tmp = np.where(ex>w)
710 |     edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1)
711 |     ex[tmp] = w
712 |     
713 |     tmp = np.where(ey>h)
714 |     edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1)
715 |     ey[tmp] = h
716 | 
717 |     tmp = np.where(x<1)
718 |     dx.flat[tmp] = np.expand_dims(2-x[tmp],1)
719 |     x[tmp] = 1
720 | 
721 |     tmp = np.where(y<1)
722 |     dy.flat[tmp] = np.expand_dims(2-y[tmp],1)
723 |     y[tmp] = 1
724 |     
725 |     return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
726 | 
727 | # function [bboxA] = rerec(bboxA)
728 | def rerec(bboxA):
729 |     # convert bboxA to square
730 |     h = bboxA[:,3]-bboxA[:,1]
731 |     w = bboxA[:,2]-bboxA[:,0]
732 |     l = np.maximum(w, h)
733 |     bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5
734 |     bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5
735 |     bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1)))
736 |     return bboxA
737 | 
738 | def imresample(img, sz):
739 |     im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #@UndefinedVariable
740 |     return im_data
741 | 
742 |     # This method is kept for debugging purpose
743 | #     h=img.shape[0]
744 | #     w=img.shape[1]
745 | #     hs, ws = sz
746 | #     dx = float(w) / ws
747 | #     dy = float(h) / hs
748 | #     im_data = np.zeros((hs,ws,3))
749 | #     for a1 in range(0,hs):
750 | #         for a2 in range(0,ws):
751 | #             for a3 in range(0,3):
752 | #                 im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3]
753 | #     return im_data
754 | 
755 | 


--------------------------------------------------------------------------------
/facenet.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from __future__ import absolute_import
  3 | from __future__ import division
  4 | from __future__ import print_function
  5 | 
  6 | import os
  7 | from subprocess import Popen, PIPE
  8 | import tensorflow as tf
  9 | from tensorflow.python.framework import ops
 10 | import numpy as np
 11 | from scipy import misc
 12 | from sklearn.model_selection import KFold
 13 | from scipy import interpolate
 14 | from tensorflow.python.training import training
 15 | import random
 16 | import re
 17 | from tensorflow.python.platform import gfile
 18 | 
 19 | def triplet_loss(anchor, positive, negative, alpha):
 20 |     """Calculate the triplet loss according to the FaceNet paper
 21 |     
 22 |     Args:
 23 |       anchor: the embeddings for the anchor images.
 24 |       positive: the embeddings for the positive images.
 25 |       negative: the embeddings for the negative images.
 26 |   
 27 |     Returns:
 28 |       the triplet loss according to the FaceNet paper as a float tensor.
 29 |     """
 30 |     with tf.variable_scope('triplet_loss'):
 31 |         pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1)
 32 |         neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1)
 33 |         
 34 |         basic_loss = tf.add(tf.subtract(pos_dist,neg_dist), alpha)
 35 |         loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0)
 36 |       
 37 |     return loss
 38 |   
 39 | def decov_loss(xs):
 40 |     """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf
 41 |     'Reducing Overfitting In Deep Networks by Decorrelating Representation'
 42 |     """
 43 |     x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
 44 |     m = tf.reduce_mean(x, 0, True)
 45 |     z = tf.expand_dims(x-m, 2)
 46 |     corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0,2,1])), 0)
 47 |     corr_frob_sqr = tf.reduce_sum(tf.square(corr))
 48 |     corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
 49 |     loss = 0.5*(corr_frob_sqr - corr_diag_sqr)
 50 |     return loss 
 51 |   
 52 | def center_loss(features, label, alfa, nrof_classes):
 53 |     """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
 54 |        (http://ydwen.github.io/papers/WenECCV16.pdf)
 55 |     """
 56 |     nrof_features = features.get_shape()[1]
 57 |     centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
 58 |         initializer=tf.constant_initializer(0), trainable=False)
 59 |     label = tf.reshape(label, [-1])
 60 |     centers_batch = tf.gather(centers, label)
 61 |     diff = (1 - alfa) * (centers_batch - features)
 62 |     centers = tf.scatter_sub(centers, label, diff)
 63 |     loss = tf.reduce_mean(tf.square(features - centers_batch))
 64 |     return loss, centers
 65 | 
 66 | def get_image_paths_and_labels(dataset):
 67 |     image_paths_flat = []
 68 |     labels_flat = []
 69 |     for i in range(len(dataset)):
 70 |         image_paths_flat += dataset[i].image_paths
 71 |         labels_flat += [i] * len(dataset[i].image_paths)
 72 |     return image_paths_flat, labels_flat
 73 | 
 74 | def shuffle_examples(image_paths, labels):
 75 |     shuffle_list = list(zip(image_paths, labels))
 76 |     random.shuffle(shuffle_list)
 77 |     image_paths_shuff, labels_shuff = zip(*shuffle_list)
 78 |     return image_paths_shuff, labels_shuff
 79 | 
 80 | def read_images_from_disk(input_queue):
 81 |     """Consumes a single filename and label as a ' '-delimited string.
 82 |     Args:
 83 |       filename_and_label_tensor: A scalar string tensor.
 84 |     Returns:
 85 |       Two tensors: the decoded image, and the string label.
 86 |     """
 87 |     label = input_queue[1]
 88 |     file_contents = tf.read_file(input_queue[0])
 89 |     example = tf.image.decode_png(file_contents, channels=3)
 90 |     return example, label
 91 |   
 92 | def random_rotate_image(image):
 93 |     angle = np.random.uniform(low=-10.0, high=10.0)
 94 |     return misc.imrotate(image, angle, 'bicubic')
 95 |   
 96 | def read_and_augment_data(image_list, label_list, image_size, batch_size, max_nrof_epochs, 
 97 |         random_crop, random_flip, random_rotate, nrof_preprocess_threads, shuffle=True):
 98 |     
 99 |     images = ops.convert_to_tensor(image_list, dtype=tf.string)
100 |     labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
101 |     
102 |     # Makes an input queue
103 |     input_queue = tf.train.slice_input_producer([images, labels],
104 |         num_epochs=max_nrof_epochs, shuffle=shuffle)
105 | 
106 |     images_and_labels = []
107 |     for _ in range(nrof_preprocess_threads):
108 |         image, label = read_images_from_disk(input_queue)
109 |         if random_rotate:
110 |             image = tf.py_func(random_rotate_image, [image], tf.uint8)
111 |         if random_crop:
112 |             image = tf.random_crop(image, [image_size, image_size, 3])
113 |         else:
114 |             image = tf.image.resize_image_with_crop_or_pad(image, image_size, image_size)
115 |         if random_flip:
116 |             image = tf.image.random_flip_left_right(image)
117 |         #pylint: disable=no-member
118 |         image.set_shape((image_size, image_size, 3))
119 |         image = tf.image.per_image_standardization(image)
120 |         images_and_labels.append([image, label])
121 | 
122 |     image_batch, label_batch = tf.train.batch_join(
123 |         images_and_labels, batch_size=batch_size,
124 |         capacity=4 * nrof_preprocess_threads * batch_size,
125 |         allow_smaller_final_batch=True)
126 |   
127 |     return image_batch, label_batch
128 |   
129 | def _add_loss_summaries(total_loss):
130 |     """Add summaries for losses.
131 |   
132 |     Generates moving average for all losses and associated summaries for
133 |     visualizing the performance of the network.
134 |   
135 |     Args:
136 |       total_loss: Total loss from loss().
137 |     Returns:
138 |       loss_averages_op: op for generating moving averages of losses.
139 |     """
140 |     # Compute the moving average of all individual losses and the total loss.
141 |     loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
142 |     losses = tf.get_collection('losses')
143 |     loss_averages_op = loss_averages.apply(losses + [total_loss])
144 |   
145 |     # Attach a scalar summmary to all individual losses and the total loss; do the
146 |     # same for the averaged version of the losses.
147 |     for l in losses + [total_loss]:
148 |         # Name each loss as '(raw)' and name the moving average version of the loss
149 |         # as the original loss name.
150 |         tf.summary.scalar(l.op.name +' (raw)', l)
151 |         tf.summary.scalar(l.op.name, loss_averages.average(l))
152 |   
153 |     return loss_averages_op
154 | 
155 | def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay, update_gradient_vars, log_histograms=True):
156 |     # Generate moving averages of all losses and associated summaries.
157 |     loss_averages_op = _add_loss_summaries(total_loss)
158 | 
159 |     # Compute gradients.
160 |     with tf.control_dependencies([loss_averages_op]):
161 |         if optimizer=='ADAGRAD':
162 |             opt = tf.train.AdagradOptimizer(learning_rate)
163 |         elif optimizer=='ADADELTA':
164 |             opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6)
165 |         elif optimizer=='ADAM':
166 |             opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1)
167 |         elif optimizer=='RMSPROP':
168 |             opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9, epsilon=1.0)
169 |         elif optimizer=='MOM':
170 |             opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True)
171 |         else:
172 |             raise ValueError('Invalid optimization algorithm')
173 |     
174 |         grads = opt.compute_gradients(total_loss, update_gradient_vars)
175 |         
176 |     # Apply gradients.
177 |     apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
178 |   
179 |     # Add histograms for trainable variables.
180 |     if log_histograms:
181 |         for var in tf.trainable_variables():
182 |             tf.summary.histogram(var.op.name, var)
183 |    
184 |     # Add histograms for gradients.
185 |     if log_histograms:
186 |         for grad, var in grads:
187 |             if grad is not None:
188 |                 tf.summary.histogram(var.op.name + '/gradients', grad)
189 |   
190 |     # Track the moving averages of all trainable variables.
191 |     variable_averages = tf.train.ExponentialMovingAverage(
192 |         moving_average_decay, global_step)
193 |     variables_averages_op = variable_averages.apply(tf.trainable_variables())
194 |   
195 |     with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
196 |         train_op = tf.no_op(name='train')
197 |   
198 |     return train_op
199 | 
200 | def prewhiten(x):
201 |     mean = np.mean(x)
202 |     std = np.std(x)
203 |     std_adj = np.maximum(std, 1.0/np.sqrt(x.size))
204 |     y = np.multiply(np.subtract(x, mean), 1/std_adj)
205 |     return y  
206 | 
207 | def crop(image, random_crop, image_size):
208 |     if image.shape[1]>image_size:
209 |         sz1 = int(image.shape[1]//2)
210 |         sz2 = int(image_size//2)
211 |         if random_crop:
212 |             diff = sz1-sz2
213 |             (h, v) = (np.random.randint(-diff, diff+1), np.random.randint(-diff, diff+1))
214 |         else:
215 |             (h, v) = (0,0)
216 |         image = image[(sz1-sz2+v):(sz1+sz2+v),(sz1-sz2+h):(sz1+sz2+h),:]
217 |     return image
218 |   
219 | def flip(image, random_flip):
220 |     if random_flip and np.random.choice([True, False]):
221 |         image = np.fliplr(image)
222 |     return image
223 | 
224 | def to_rgb(img):
225 |     w, h = img.shape
226 |     ret = np.empty((w, h, 3), dtype=np.uint8)
227 |     ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
228 |     return ret
229 |   
230 | def load_data(image_paths, do_random_crop, do_random_flip, image_size, do_prewhiten=True):
231 |     nrof_samples = len(image_paths)
232 |     images = np.zeros((nrof_samples, image_size, image_size, 3))
233 |     for i in range(nrof_samples):
234 |         img = misc.imread(image_paths[i])
235 |         if img.ndim == 2:
236 |             img = to_rgb(img)
237 |         if do_prewhiten:
238 |             img = prewhiten(img)
239 |         img = crop(img, do_random_crop, image_size)
240 |         img = flip(img, do_random_flip)
241 |         images[i,:,:,:] = img
242 |     return images
243 | 
244 | def get_label_batch(label_data, batch_size, batch_index):
245 |     nrof_examples = np.size(label_data, 0)
246 |     j = batch_index*batch_size % nrof_examples
247 |     if j+batch_size<=nrof_examples:
248 |         batch = label_data[j:j+batch_size]
249 |     else:
250 |         x1 = label_data[j:nrof_examples]
251 |         x2 = label_data[0:nrof_examples-j]
252 |         batch = np.vstack([x1,x2])
253 |     batch_int = batch.astype(np.int64)
254 |     return batch_int
255 | 
256 | def get_batch(image_data, batch_size, batch_index):
257 |     nrof_examples = np.size(image_data, 0)
258 |     j = batch_index*batch_size % nrof_examples
259 |     if j+batch_size<=nrof_examples:
260 |         batch = image_data[j:j+batch_size,:,:,:]
261 |     else:
262 |         x1 = image_data[j:nrof_examples,:,:,:]
263 |         x2 = image_data[0:nrof_examples-j,:,:,:]
264 |         batch = np.vstack([x1,x2])
265 |     batch_float = batch.astype(np.float32)
266 |     return batch_float
267 | 
268 | def get_triplet_batch(triplets, batch_index, batch_size):
269 |     ax, px, nx = triplets
270 |     a = get_batch(ax, int(batch_size/3), batch_index)
271 |     p = get_batch(px, int(batch_size/3), batch_index)
272 |     n = get_batch(nx, int(batch_size/3), batch_index)
273 |     batch = np.vstack([a, p, n])
274 |     return batch
275 | 
276 | def get_learning_rate_from_file(filename, epoch):
277 |     with open(filename, 'r') as f:
278 |         for line in f.readlines():
279 |             line = line.split('#', 1)[0]
280 |             if line:
281 |                 par = line.strip().split(':')
282 |                 e = int(par[0])
283 |                 lr = float(par[1])
284 |                 if e <= epoch:
285 |                     learning_rate = lr
286 |                 else:
287 |                     return learning_rate
288 | 
289 | class ImageClass():
290 |     "Stores the paths to images for a given class"
291 |     def __init__(self, name, image_paths):
292 |         self.name = name
293 |         self.image_paths = image_paths
294 |   
295 |     def __str__(self):
296 |         return self.name + ', ' + str(len(self.image_paths)) + ' images'
297 |   
298 |     def __len__(self):
299 |         return len(self.image_paths)
300 |   
301 | def get_dataset(paths, has_class_directories=True):
302 |     dataset = []
303 |     for path in paths.split(':'):
304 |         path_exp = os.path.expanduser(path)
305 |         classes = os.listdir(path_exp)
306 |         classes.sort()
307 |         nrof_classes = len(classes)
308 |         for i in range(nrof_classes):
309 |             class_name = classes[i]
310 |             facedir = os.path.join(path_exp, class_name)
311 |             image_paths = get_image_paths(facedir)
312 |             dataset.append(ImageClass(class_name, image_paths))
313 |   
314 |     return dataset
315 | 
316 | def get_image_paths(facedir):
317 |     image_paths = []
318 |     if os.path.isdir(facedir):
319 |         images = os.listdir(facedir)
320 |         image_paths = [os.path.join(facedir,img) for img in images]
321 |     return image_paths
322 |   
323 | def split_dataset(dataset, split_ratio, mode):
324 |     if mode=='SPLIT_CLASSES':
325 |         nrof_classes = len(dataset)
326 |         class_indices = np.arange(nrof_classes)
327 |         np.random.shuffle(class_indices)
328 |         split = int(round(nrof_classes*split_ratio))
329 |         train_set = [dataset[i] for i in class_indices[0:split]]
330 |         test_set = [dataset[i] for i in class_indices[split:-1]]
331 |     elif mode=='SPLIT_IMAGES':
332 |         train_set = []
333 |         test_set = []
334 |         min_nrof_images = 2
335 |         for cls in dataset:
336 |             paths = cls.image_paths
337 |             np.random.shuffle(paths)
338 |             split = int(round(len(paths)*split_ratio))
339 |             if split<min_nrof_images:
340 |                 continue  # Not enough images for test set. Skip class...
341 |             train_set.append(ImageClass(cls.name, paths[0:split]))
342 |             test_set.append(ImageClass(cls.name, paths[split:-1]))
343 |     else:
344 |         raise ValueError('Invalid train/test split mode "%s"' % mode)
345 |     return train_set, test_set
346 | 
347 | def load_model(model):
348 |     # Check if the model is a model directory (containing a metagraph and a checkpoint file)
349 |     #  or if it is a protobuf file with a frozen graph
350 |     model_exp = os.path.expanduser(model)
351 |     if (os.path.isfile(model_exp)):
352 |         print('Model filename: %s' % model_exp)
353 |         with gfile.FastGFile(model_exp,'rb') as f:
354 |             graph_def = tf.GraphDef()
355 |             graph_def.ParseFromString(f.read())
356 |             tf.import_graph_def(graph_def, name='')
357 |     else:
358 |         print('Model directory: %s' % model_exp)
359 |         meta_file, ckpt_file = get_model_filenames(model_exp)
360 |         
361 |         print('Metagraph file: %s' % meta_file)
362 |         print('Checkpoint file: %s' % ckpt_file)
363 |       
364 |         saver = tf.train.import_meta_graph(os.path.join(model_exp, meta_file))
365 |         saver.restore(tf.get_default_session(), os.path.join(model_exp, ckpt_file))
366 |     
367 | def get_model_filenames(model_dir):
368 |     files = os.listdir(model_dir)
369 |     meta_files = [s for s in files if s.endswith('.meta')]
370 |     if len(meta_files)==0:
371 |         raise ValueError('No meta file found in the model directory (%s)' % model_dir)
372 |     elif len(meta_files)>1:
373 |         raise ValueError('There should not be more than one meta file in the model directory (%s)' % model_dir)
374 |     meta_file = meta_files[0]
375 |     meta_files = [s for s in files if '.ckpt' in s]
376 |     max_step = -1
377 |     for f in files:
378 |         step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f)
379 |         if step_str is not None and len(step_str.groups())>=2:
380 |             step = int(step_str.groups()[1])
381 |             if step > max_step:
382 |                 max_step = step
383 |                 ckpt_file = step_str.groups()[0]
384 |     return meta_file, ckpt_file
385 | 
386 | def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10):
387 |     assert(embeddings1.shape[0] == embeddings2.shape[0])
388 |     assert(embeddings1.shape[1] == embeddings2.shape[1])
389 |     nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
390 |     nrof_thresholds = len(thresholds)
391 |     k_fold = KFold(n_splits=nrof_folds, shuffle=False)
392 |     
393 |     tprs = np.zeros((nrof_folds,nrof_thresholds))
394 |     fprs = np.zeros((nrof_folds,nrof_thresholds))
395 |     accuracy = np.zeros((nrof_folds))
396 |     
397 |     diff = np.subtract(embeddings1, embeddings2)
398 |     dist = np.sum(np.square(diff),1)
399 |     indices = np.arange(nrof_pairs)
400 |     
401 |     for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
402 |         
403 |         # Find the best threshold for the fold
404 |         acc_train = np.zeros((nrof_thresholds))
405 |         for threshold_idx, threshold in enumerate(thresholds):
406 |             _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
407 |         best_threshold_index = np.argmax(acc_train)
408 |         for threshold_idx, threshold in enumerate(thresholds):
409 |             tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
410 |         _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
411 |           
412 |         tpr = np.mean(tprs,0)
413 |         fpr = np.mean(fprs,0)
414 |     return tpr, fpr, accuracy
415 | 
416 | def calculate_accuracy(threshold, dist, actual_issame):
417 |     predict_issame = np.less(dist, threshold)
418 |     tp = np.sum(np.logical_and(predict_issame, actual_issame))
419 |     fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
420 |     tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
421 |     fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
422 |   
423 |     tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn)
424 |     fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn)
425 |     acc = float(tp+tn)/dist.size
426 |     return tpr, fpr, acc
427 | 
428 | 
429 |   
430 | def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10):
431 |     assert(embeddings1.shape[0] == embeddings2.shape[0])
432 |     assert(embeddings1.shape[1] == embeddings2.shape[1])
433 |     nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
434 |     nrof_thresholds = len(thresholds)
435 |     k_fold = KFold(n_splits=nrof_folds, shuffle=False)
436 |     
437 |     val = np.zeros(nrof_folds)
438 |     far = np.zeros(nrof_folds)
439 |     
440 |     diff = np.subtract(embeddings1, embeddings2)
441 |     dist = np.sum(np.square(diff),1)
442 |     indices = np.arange(nrof_pairs)
443 |     
444 |     for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
445 |       
446 |         # Find the threshold that gives FAR = far_target
447 |         far_train = np.zeros(nrof_thresholds)
448 |         for threshold_idx, threshold in enumerate(thresholds):
449 |             _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
450 |         if np.max(far_train)>=far_target:
451 |             f = interpolate.interp1d(far_train, thresholds, kind='slinear')
452 |             threshold = f(far_target)
453 |         else:
454 |             threshold = 0.0
455 |     
456 |         val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
457 |   
458 |     val_mean = np.mean(val)
459 |     far_mean = np.mean(far)
460 |     val_std = np.std(val)
461 |     return val_mean, val_std, far_mean
462 | 
463 | 
464 | def calculate_val_far(threshold, dist, actual_issame):
465 |     predict_issame = np.less(dist, threshold)
466 |     true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
467 |     false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
468 |     n_same = np.sum(actual_issame)
469 |     n_diff = np.sum(np.logical_not(actual_issame))
470 |     val = float(true_accept) / float(n_same)
471 |     far = float(false_accept) / float(n_diff)
472 |     return val, far
473 | 
474 | def store_revision_info(src_path, output_dir, arg_string):
475 |   
476 |     # Get git hash
477 |     gitproc = Popen(['git', 'rev-parse', 'HEAD'], stdout = PIPE, cwd=src_path)
478 |     (stdout, _) = gitproc.communicate()
479 |     git_hash = stdout.strip()
480 |   
481 |     # Get local changes
482 |     gitproc = Popen(['git', 'diff', 'HEAD'], stdout = PIPE, cwd=src_path)
483 |     (stdout, _) = gitproc.communicate()
484 |     git_diff = stdout.strip()
485 |     
486 |     # Store a text file in the log directory
487 |     rev_info_filename = os.path.join(output_dir, 'revision_info.txt')
488 |     with open(rev_info_filename, "w") as text_file:
489 |         text_file.write('arguments: %s\n--------------------\n' % arg_string)
490 |         text_file.write('git hash: %s\n--------------------\n' % git_hash)
491 |         text_file.write('%s' % git_diff)
492 | 
493 | def list_variables(filename):
494 |     reader = training.NewCheckpointReader(filename)
495 |     variable_map = reader.get_variable_to_shape_map()
496 |     names = sorted(variable_map.keys())
497 |     return names
498 | 
499 | def put_images_on_grid(images, shape=(16,8)):
500 |     nrof_images = images.shape[0]
501 |     img_size = images.shape[1]
502 |     bw = 3
503 |     img = np.zeros((shape[1]*(img_size+bw)+bw, shape[0]*(img_size+bw)+bw, 3), np.float32)
504 |     for i in range(shape[1]):
505 |         x_start = i*(img_size+bw)+bw
506 |         for j in range(shape[0]):
507 |             img_index = i*shape[0]+j
508 |             if img_index>=nrof_images:
509 |                 break
510 |             y_start = j*(img_size+bw)+bw
511 |             img[x_start:x_start+img_size, y_start:y_start+img_size, :] = images[img_index, :, :, :]
512 |         if img_index>=nrof_images:
513 |             break
514 |     return img
515 | 
516 | def write_arguments_to_file(args, filename):
517 |     with open(filename, 'w') as f:
518 |         for key, value in vars(args).iteritems():
519 |             f.write('%s: %s\n' % (key, str(value)))
520 | 


--------------------------------------------------------------------------------
/getphoto.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | 
 3 | video_capture = cv2.VideoCapture(0)
 4 | c = 0
 5 | while (True):
 6 |     ret, frame = video_capture.read()
 7 |     classfier = cv2.CascadeClassifier("./haarcascade_frontalface_alt2.xml")
 8 | 
 9 |     faceRects = classfier.detectMultiScale(frame, scaleFactor=1.2, minNeighbors=3, minSize=(32, 32))
10 | 
11 |     if len(faceRects) == 1:
12 |         c += 1
13 |         if c % 10 ==0:
14 |             cv2.imwrite('input/xuguanyu/' + str(int(c/10)) + '.jpg', frame)
15 | 
16 | 
17 |     cv2.imshow('frame', frame)
18 | 
19 | 
20 |     if cv2.waitKey(1) & 0xFF == ord('q'):
21 |         break
22 | 
23 | video_capture.release()
24 | cv2.destroyAllWindows()
25 | 


--------------------------------------------------------------------------------
/image/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/1.png


--------------------------------------------------------------------------------
/image/10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/10.png


--------------------------------------------------------------------------------
/image/11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/11.png


--------------------------------------------------------------------------------
/image/12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/12.png


--------------------------------------------------------------------------------
/image/13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/13.png


--------------------------------------------------------------------------------
/image/14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/14.png


--------------------------------------------------------------------------------
/image/15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/15.png


--------------------------------------------------------------------------------
/image/16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/16.png


--------------------------------------------------------------------------------
/image/17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/17.png


--------------------------------------------------------------------------------
/image/18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/18.png


--------------------------------------------------------------------------------
/image/19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/19.png


--------------------------------------------------------------------------------
/image/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/2.png


--------------------------------------------------------------------------------
/image/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/3.png


--------------------------------------------------------------------------------
/image/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/4.png


--------------------------------------------------------------------------------
/image/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/5.png


--------------------------------------------------------------------------------
/image/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/6.png


--------------------------------------------------------------------------------
/image/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/7.png


--------------------------------------------------------------------------------
/image/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/8.png


--------------------------------------------------------------------------------
/image/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/image/9.png


--------------------------------------------------------------------------------
/image/note:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 


--------------------------------------------------------------------------------
/input/readme.md:
--------------------------------------------------------------------------------
 1 | 
 2 | This is the path to put your classifier image before alignment.The structure may like this:
 3 | 
 4 | * input
 5 |   * class1(for face recognition it's a person's name)
 6 |     * image1 (for face recognition it's a person's photo)
 7 |     * image2
 8 |     * ...
 9 |   * class2
10 |     * image1
11 |     * image2
12 |     * ...
13 |   * class...
14 | 


--------------------------------------------------------------------------------
/models/det1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/models/det1.npy


--------------------------------------------------------------------------------
/models/det2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/models/det2.npy


--------------------------------------------------------------------------------
/models/det3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cryer/face_recognition/6e2947c54584c58883c0cc8312bfa6c501064696/models/det3.npy


--------------------------------------------------------------------------------
/models/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | This is path to put your models checkpoints and pb whatever.
3 | 


--------------------------------------------------------------------------------
/myclassifier/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | This is the path to put your own classifier like **.pkl
3 | 


--------------------------------------------------------------------------------
/output/readme.md:
--------------------------------------------------------------------------------
 1 | 
 2 | This is the path to put your classifier image after alignment.The structure may like this:
 3 | 
 4 | * output
 5 |   * class1(for face recognition it's a person's name)
 6 |     * image1 (for face recognition it's a person's photo)
 7 |     * image2
 8 |     * ...
 9 |   * class2
10 |     * image1
11 |     * image2
12 |     * ...
13 |   * class...
14 | 


--------------------------------------------------------------------------------
/real_time.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import tensorflow as tf
  6 | from scipy import misc
  7 | import cv2
  8 | import numpy as np
  9 | import facenet
 10 | #import detect_face
 11 | import os
 12 | import time
 13 | import pickle
 14 | 
 15 | print('Creating networks and loading parameters')
 16 | with tf.Graph().as_default():
 17 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
 18 |     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
 19 |     with sess.as_default():
 20 |         minsize = 20  # minimum size of face
 21 |         threshold = [0.6, 0.7, 0.7]  # three steps's threshold
 22 |         factor = 0.709  # scale factor
 23 |         margin = 44
 24 |         frame_interval = 3
 25 |         batch_size = 1000
 26 |         image_size = 182
 27 |         input_image_size = 160
 28 | 
 29 |         HumanNames = ['liuzheng','shixing','xuguanyu','Human_h']    #train human name
 30 | 
 31 |         print('Loading feature extraction model')
 32 |         modeldir = './models/'
 33 |         facenet.load_model(modeldir)
 34 | 
 35 |         images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
 36 |         embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
 37 |         phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
 38 |         embedding_size = embeddings.get_shape()[1]
 39 | 
 40 |         classifier_filename = './myclassifier/my_classifier.pkl'
 41 |         classifier_filename_exp = os.path.expanduser(classifier_filename)
 42 |         with open(classifier_filename_exp, 'rb') as infile:
 43 |             (model, class_names) = pickle.load(infile)
 44 |             print('load classifier file-> %s' % classifier_filename_exp)
 45 | 
 46 |         video_capture = cv2.VideoCapture(0)
 47 |         c = 0
 48 | 
 49 |         print('Start Recognition!')
 50 |         prevTime = 0
 51 |         while True:
 52 |             ret, frame = video_capture.read()
 53 |             classfier = cv2.CascadeClassifier("./haarcascade_frontalface_alt2.xml")
 54 | 
 55 |             curTime = time.time()    # calc fps
 56 |             timeF = frame_interval
 57 | 
 58 |             if (c % timeF == 0):
 59 |                 find_results = []
 60 |                 gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
 61 | 
 62 |                 faceRects = classfier.detectMultiScale(frame, scaleFactor=1.2, minNeighbors=3, minSize=(32, 32))
 63 | 
 64 |                 if len(faceRects) > 0:
 65 |                     img_size = np.asarray(frame.shape)[0:2]
 66 | 
 67 |                     cropped = []
 68 |                     scaled = []
 69 |                     scaled_reshape = []
 70 | 
 71 |                     emb_array = np.zeros((1, embedding_size))
 72 |                     for faceRect in faceRects:
 73 |                         x, y, w, h = faceRect
 74 |                         cv2.rectangle(frame, (x - 10, y - 10), (x + w + 10, y + h + 10), (0, 255, 0), 2)
 75 | 
 76 |                         cropped.append(frame[y - 10:y + h + 10,x - 10:x + w + 10, :])
 77 |                         cropped[0] = facenet.flip(cropped[0], False)
 78 |                         scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear'))
 79 |                         scaled[0] = cv2.resize(scaled[0], (input_image_size, input_image_size),
 80 |                                                interpolation=cv2.INTER_CUBIC)
 81 |                         scaled[0] = facenet.prewhiten(scaled[0])
 82 |                         scaled_reshape.append(scaled[0].reshape(-1, input_image_size, input_image_size, 3))
 83 |                         feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False}
 84 |                         emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
 85 | 
 86 |                         predictions = model.predict_proba(emb_array)
 87 |                         best_class_indices = np.argmax(predictions, axis=1)
 88 |                         best_class_probabilities = predictions[
 89 |                             np.arange(len(best_class_indices)), best_class_indices]
 90 |                         for H_i in HumanNames:
 91 |                             if HumanNames[best_class_indices[0]] == H_i:
 92 |                                 result_names = HumanNames[best_class_indices[0]]
 93 |                                 cv2.putText(frame, result_names, (x + 30, y - 30), cv2.FONT_HERSHEY_COMPLEX_SMALL,
 94 |                                             1, (0, 0, 255), thickness=1, lineType=2)
 95 |                 else:
 96 |                     print('Unable to align')
 97 | 
 98 |             sec = curTime - prevTime
 99 |             prevTime = curTime
100 |             fps = 1 / (sec)
101 |             str = 'FPS: %2.3f' % fps
102 |             text_fps_x = len(frame[0]) - 150
103 |             text_fps_y = 20
104 |             cv2.putText(frame, str, (text_fps_x, text_fps_y),
105 |                         cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2)
106 |             cv2.imshow('Video', frame)
107 | 
108 |             if cv2.waitKey(1) & 0xFF == ord('q'):
109 |                 break
110 | 
111 |         video_capture.release()
112 |         cv2.destroyAllWindows()
113 | 


--------------------------------------------------------------------------------
/realtime_facenet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import tensorflow as tf
  6 | from scipy import misc
  7 | import cv2
  8 | import numpy as np
  9 | import facenet
 10 | import detect_face
 11 | import os
 12 | import time
 13 | import pickle
 14 | 
 15 | print('Creating networks and loading parameters')
 16 | with tf.Graph().as_default():
 17 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
 18 |     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
 19 |     with sess.as_default():
 20 |         pnet, rnet, onet = detect_face.create_mtcnn(sess, './models/')
 21 | 
 22 |         minsize = 20  # minimum size of face
 23 |         threshold = [0.6, 0.7, 0.7]  # three steps's threshold
 24 |         factor = 0.709  # scale factor
 25 |         margin = 44
 26 |         frame_interval = 3
 27 |         batch_size = 1000
 28 |         image_size = 182
 29 |         input_image_size = 160
 30 | 
 31 |         HumanNames = ['liuzheng','shixing','xuguanyu','Human_h']    #train human name
 32 | 
 33 |         print('Loading feature extraction model')
 34 |         modeldir = './models/'
 35 |         facenet.load_model(modeldir)
 36 | 
 37 |         images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
 38 |         embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
 39 |         phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
 40 |         embedding_size = embeddings.get_shape()[1]
 41 | 
 42 |         classifier_filename = './myclassifier/my_classifier.pkl'
 43 |         classifier_filename_exp = os.path.expanduser(classifier_filename)
 44 |         with open(classifier_filename_exp, 'rb') as infile:
 45 |             (model, class_names) = pickle.load(infile)
 46 |             print('load classifier file-> %s' % classifier_filename_exp)
 47 | 
 48 |         video_capture = cv2.VideoCapture(0)
 49 |         c = 0
 50 | 
 51 |         print('Start Recognition!')
 52 |         prevTime = 0
 53 |         while True:
 54 |             ret, frame = video_capture.read()
 55 | 
 56 |             # frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    #resize frame (optional)
 57 | 
 58 |             curTime = time.time()    # calc fps
 59 |             timeF = frame_interval
 60 | 
 61 |             if (c % timeF == 0):
 62 |                 find_results = []
 63 | 
 64 |                 if frame.ndim == 2:
 65 |                     frame = facenet.to_rgb(frame)
 66 |                 frame = frame[:, :, 0:3]
 67 |                 bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
 68 |                 nrof_faces = bounding_boxes.shape[0]
 69 |                 print('Detected_FaceNum: %d' % nrof_faces)
 70 | 
 71 |                 if nrof_faces > 0:
 72 |                     det = bounding_boxes[:, 0:4]
 73 |                     img_size = np.asarray(frame.shape)[0:2]
 74 | 
 75 |                     cropped = []
 76 |                     scaled = []
 77 |                     scaled_reshape = []
 78 |                     bb = np.zeros((nrof_faces,4), dtype=np.int32)
 79 | 
 80 |                     for i in range(nrof_faces):
 81 |                         emb_array = np.zeros((1, embedding_size))
 82 | 
 83 |                         bb[i][0] = det[i][0]
 84 |                         bb[i][1] = det[i][1]
 85 |                         bb[i][2] = det[i][2]
 86 |                         bb[i][3] = det[i][3]
 87 | 
 88 |                         if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
 89 |                             print('face is inner of range!')
 90 |                             continue
 91 | 
 92 |                         cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
 93 |                         cropped[0] = facenet.flip(cropped[0], False)
 94 |                         scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear'))
 95 |                         scaled[0] = cv2.resize(scaled[0], (input_image_size,input_image_size),
 96 |                                                interpolation=cv2.INTER_CUBIC)
 97 |                         scaled[0] = facenet.prewhiten(scaled[0])
 98 |                         scaled_reshape.append(scaled[0].reshape(-1,input_image_size,input_image_size,3))
 99 |                         feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False}
100 |                         emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
101 | 
102 |                         predictions = model.predict_proba(emb_array)
103 |                         best_class_indices = np.argmax(predictions, axis=1)
104 |                         best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
105 |                         cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2)
106 |                         text_x = bb[i][0]
107 |                         text_y = bb[i][3] + 20
108 | 
109 |                         for H_i in HumanNames:
110 |                             if HumanNames[best_class_indices[0]] == H_i:
111 |                                 result_names = HumanNames[best_class_indices[0]]
112 |                                 cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
113 |                                             1, (0, 0, 255), thickness=1, lineType=2)
114 |                 else:
115 |                     print('Unable to align')
116 | 
117 |             sec = curTime - prevTime
118 |             prevTime = curTime
119 |             fps = 1 / (sec)
120 |             str = 'FPS: %2.3f' % fps
121 |             text_fps_x = len(frame[0]) - 150
122 |             text_fps_y = 20
123 |             cv2.putText(frame, str, (text_fps_x, text_fps_y),
124 |                         cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2)
125 |             # c+=1
126 |             cv2.imshow('Video', frame)
127 | 
128 |             if cv2.waitKey(1) & 0xFF == ord('q'):
129 |                 break
130 | 
131 |         video_capture.release()
132 |         # #video writer
133 |         # out.release()
134 |         cv2.destroyAllWindows()
135 | 


--------------------------------------------------------------------------------